From f5586a7922838c38acc83f87670f7325c0514c3d Mon Sep 17 00:00:00 2001 From: dijunkun Date: Fri, 19 Apr 2024 17:35:19 +0800 Subject: [PATCH] Implementation for av1 codec --- .../video/decode/dav1d/dav1d_av1_decoder.cpp | 128 ++++++ .../video/decode/dav1d/dav1d_av1_decoder.h | 58 +++ .../video/decode/video_decoder_factory.cpp | 31 +- .../video/decode/video_decoder_factory.h | 2 +- .../video/encode/aom/aom_av1_encoder.cpp | 418 ++++++++++++++++++ src/media/video/encode/aom/aom_av1_encoder.h | 84 ++++ .../encode/openh264/openh264_encoder.cpp | 6 +- .../video/encode/video_encoder_factory.cpp | 29 +- .../video/encode/video_encoder_factory.h | 2 +- src/pc/peer_connection.cpp | 74 ++-- src/pc/peer_connection.h | 2 + src/rtp/rtp_codec.cpp | 88 ++++ src/rtp/rtp_packet.cpp | 94 ++++ src/rtp/rtp_packet.h | 64 +++ src/transmission/ice_transmission.cpp | 10 +- src/transmission/ice_transmission.h | 3 +- xmake.lua | 11 +- 17 files changed, 1037 insertions(+), 67 deletions(-) create mode 100644 src/media/video/decode/dav1d/dav1d_av1_decoder.cpp create mode 100644 src/media/video/decode/dav1d/dav1d_av1_decoder.h create mode 100644 src/media/video/encode/aom/aom_av1_encoder.cpp create mode 100644 src/media/video/encode/aom/aom_av1_encoder.h diff --git a/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp b/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp new file mode 100644 index 0000000..fbddecd --- /dev/null +++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp @@ -0,0 +1,128 @@ +#include "dav1d_av1_decoder.h" + +#include "log.h" + +#define SAVE_DECODER_STREAM 0 + +extern "C" { +#include +#include +#include +}; + +class ScopedDav1dPicture : public std::shared_ptr { + public: + ~ScopedDav1dPicture() { dav1d_picture_unref(&picture_); } + + Dav1dPicture &Picture() { return picture_; } + + private: + Dav1dPicture picture_ = {}; +}; + +class ScopedDav1dData { + public: + ~ScopedDav1dData() { dav1d_data_unref(&data_); } + + Dav1dData &Data() { return data_; } + + private: + Dav1dData data_ = {}; +}; + +// Calling `dav1d_data_wrap` requires a `free_callback` to be registered. +void NullFreeCallback(const uint8_t *buffer, void *opaque) {} + +Dav1dAv1Decoder::Dav1dAv1Decoder() {} + +Dav1dAv1Decoder::~Dav1dAv1Decoder() { + if (SAVE_DECODER_STREAM && file_) { + fflush(file_); + fclose(file_); + file_ = nullptr; + } + + if (decoded_frame_) { + delete decoded_frame_; + decoded_frame_ = nullptr; + } +} + +int Dav1dAv1Decoder::Init() { + Dav1dSettings s; + dav1d_default_settings(&s); + + s.n_threads = std::max(2, 4); + s.max_frame_delay = 1; // For low latency decoding. + s.all_layers = 0; // Don't output a frame for every spatial layer. + s.operating_point = 31; // Decode all operating points. + + int ret = dav1d_open(&context_, &s); + if (ret) { + LOG_ERROR("Dav1d AV1 decoder open failed"); + } + + decoded_frame_ = new VideoFrame(1280 * 720 * 3 / 2); + + if (SAVE_DECODER_STREAM) { + file_ = fopen("decode_stream.yuv", "w+b"); + if (!file_) { + LOG_WARN("Fail to open stream.yuv"); + } + } + return 0; +} + +int Dav1dAv1Decoder::Decode( + const uint8_t *data, int size, + std::function on_receive_decoded_frame) { + if (!first_) { + if ((*(data + 4) & 0x1f) != 0x07) { + return -1; + } else { + first_ = true; + } + } + + ScopedDav1dData scoped_dav1d_data; + Dav1dData &dav1d_data = scoped_dav1d_data.Data(); + dav1d_data_wrap(&dav1d_data, data, size, + /*free_callback=*/&NullFreeCallback, + /*user_data=*/nullptr); + + if (int decode_res = dav1d_send_data(context_, &dav1d_data)) { + LOG_ERROR("Dav1dAv1Decoder::Decode decoding failed with error code {}", + decode_res); + + return -1; + } + + std::shared_ptr scoped_dav1d_picture( + new ScopedDav1dPicture{}); + Dav1dPicture &dav1d_picture = scoped_dav1d_picture->Picture(); + if (int get_picture_res = dav1d_get_picture(context_, &dav1d_picture)) { + LOG_ERROR("Dav1dDecoder::Decode getting picture failed with error code {}", + get_picture_res); + return -1; + } + + if (dav1d_picture.p.bpc != 8) { + // Only accept 8 bit depth. + LOG_ERROR("Dav1dDecoder::Decode unhandled bit depth: {}", + dav1d_picture.p.bpc); + return -1; + } + + memcpy(decoded_frame_->GetBuffer(), dav1d_picture.data[0], + dav1d_picture.p.w * dav1d_picture.p.h); + memcpy(decoded_frame_->GetBuffer() + dav1d_picture.p.w * dav1d_picture.p.h, + dav1d_picture.data[1], dav1d_picture.p.w * dav1d_picture.p.h / 2); + LOG_INFO("dav1d decode size {}", decoded_frame_->Size()); + on_receive_decoded_frame(*decoded_frame_); + if (SAVE_DECODER_STREAM) { + fwrite((unsigned char *)decoded_frame_->Buffer(), 1, decoded_frame_->Size(), + file_); + } + + return 0; +} \ No newline at end of file diff --git a/src/media/video/decode/dav1d/dav1d_av1_decoder.h b/src/media/video/decode/dav1d/dav1d_av1_decoder.h new file mode 100644 index 0000000..2a797b4 --- /dev/null +++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.h @@ -0,0 +1,58 @@ +/* + * @Author: DI JUNKUN + * @Date: 2024-03-04 + * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved. + */ + +#ifndef _DAV1D_AV1_DECODER_H_ +#define _DAV1D_AV1_DECODER_H_ + +#include "dav1d/dav1d.h" + +#ifdef _WIN32 +extern "C" { +#include "libavcodec/avcodec.h" +}; +#else +#ifdef __cplusplus +extern "C" { +#endif +#include +#ifdef __cplusplus +}; +#endif +#endif + +#include + +#include "video_decoder.h" + +class Dav1dAv1Decoder : public VideoDecoder { + public: + Dav1dAv1Decoder(); + virtual ~Dav1dAv1Decoder(); + + public: + int Init(); + int Decode(const uint8_t *data, int size, + std::function on_receive_decoded_frame); + + private: + AVCodecID codec_id_; + const AVCodec *codec_; + AVCodecContext *codec_ctx_ = nullptr; + AVPacket *packet_ = nullptr; + AVFrame *frame_ = nullptr; + AVFrame *frame_nv12_ = nullptr; + struct SwsContext *img_convert_ctx = nullptr; + + VideoFrame *decoded_frame_ = nullptr; + + FILE *file_ = nullptr; + bool first_ = false; + + // dav1d + Dav1dContext *context_ = nullptr; +}; + +#endif \ No newline at end of file diff --git a/src/media/video/decode/video_decoder_factory.cpp b/src/media/video/decode/video_decoder_factory.cpp index ceee0da..2d5859a 100644 --- a/src/media/video/decode/video_decoder_factory.cpp +++ b/src/media/video/decode/video_decoder_factory.cpp @@ -4,6 +4,7 @@ #include "ffmpeg/ffmpeg_video_decoder.h" #include "openh264/openh264_decoder.h" #else +#include "dav1d/dav1d_av1_decoder.h" #include "ffmpeg/ffmpeg_video_decoder.h" #include "nvcodec/nvidia_video_decoder.h" #include "openh264/openh264_decoder.h" @@ -16,22 +17,26 @@ VideoDecoderFactory::VideoDecoderFactory() {} VideoDecoderFactory::~VideoDecoderFactory() {} std::unique_ptr VideoDecoderFactory::CreateVideoDecoder( - bool hardware_acceleration) { -#if __APPLE__ - return std::make_unique(OpenH264Decoder()); - // return std::make_unique(FfmpegVideoDecoder()); -#else - if (hardware_acceleration) { - if (CheckIsHardwareAccerlerationSupported()) { - return std::make_unique(NvidiaVideoDecoder()); - } else { - return nullptr; - } + bool hardware_acceleration, bool av1_encoding) { + if (av1_encoding) { + return std::make_unique(Dav1dAv1Decoder()); } else { - // return std::make_unique(FfmpegVideoDecoder()); +#if __APPLE__ return std::make_unique(OpenH264Decoder()); - } + // return std::make_unique(FfmpegVideoDecoder()); +#else + if (hardware_acceleration) { + if (CheckIsHardwareAccerlerationSupported()) { + return std::make_unique(NvidiaVideoDecoder()); + } else { + return nullptr; + } + } else { + // return std::make_unique(FfmpegVideoDecoder()); + return std::make_unique(OpenH264Decoder()); + } #endif + } } bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() { diff --git a/src/media/video/decode/video_decoder_factory.h b/src/media/video/decode/video_decoder_factory.h index c4e0e51..e052359 100644 --- a/src/media/video/decode/video_decoder_factory.h +++ b/src/media/video/decode/video_decoder_factory.h @@ -10,7 +10,7 @@ class VideoDecoderFactory { ~VideoDecoderFactory(); static std::unique_ptr CreateVideoDecoder( - bool hardware_acceleration); + bool hardware_acceleration, bool av1_encoding); static bool CheckIsHardwareAccerlerationSupported(); }; diff --git a/src/media/video/encode/aom/aom_av1_encoder.cpp b/src/media/video/encode/aom/aom_av1_encoder.cpp new file mode 100644 index 0000000..141a7bc --- /dev/null +++ b/src/media/video/encode/aom/aom_av1_encoder.cpp @@ -0,0 +1,418 @@ +#include "aom_av1_encoder.h" + +#include +#include + +#include "log.h" + +#ifdef __cplusplus +extern "C" { +#endif +extern "C" { +#include +#include +#include +#include +#include +#include +}; +#ifdef __cplusplus +}; +#endif + +#define SAVE_NV12_STREAM 0 +#define SAVE_H264_STREAM 1 + +#define YUV420P_BUFFER_SIZE 1280 * 720 * 3 / 2 +static unsigned char yuv420p_buffer[YUV420P_BUFFER_SIZE]; + +#define SET_ENCODER_PARAM_OR_RETURN_ERROR(param_id, param_value) \ + do { \ + if (!SetEncoderControlParameters(param_id, param_value)) { \ + return -1; \ + } \ + } while (0) + +constexpr int kQpMin = 10; +constexpr int kQpMax = 40; +constexpr int kUsageProfile = AOM_USAGE_REALTIME; +constexpr int kMinQindex = 145; // Min qindex threshold for QP scaling. +constexpr int kMaxQindex = 205; // Max qindex threshold for QP scaling. +constexpr int kBitDepth = 8; +constexpr int kLagInFrames = 0; // No look ahead. +constexpr int kRtpTicksPerSecond = 90000; +constexpr double kMinimumFrameRate = 1.0; + +constexpr uint8_t kObuSizePresentBit = 0b0'0000'010; + +static int NV12ToYUV420PFFmpeg(unsigned char *src_buffer, int width, int height, + unsigned char *dst_buffer) { + AVFrame *Input_pFrame = av_frame_alloc(); + AVFrame *Output_pFrame = av_frame_alloc(); + struct SwsContext *img_convert_ctx = sws_getContext( + width, height, AV_PIX_FMT_NV12, 1280, 720, AV_PIX_FMT_YUV420P, + SWS_FAST_BILINEAR, nullptr, nullptr, nullptr); + + av_image_fill_arrays(Input_pFrame->data, Input_pFrame->linesize, src_buffer, + AV_PIX_FMT_NV12, width, height, 1); + av_image_fill_arrays(Output_pFrame->data, Output_pFrame->linesize, dst_buffer, + AV_PIX_FMT_YUV420P, 1280, 720, 1); + + sws_scale(img_convert_ctx, (uint8_t const **)Input_pFrame->data, + Input_pFrame->linesize, 0, height, Output_pFrame->data, + Output_pFrame->linesize); + + if (Input_pFrame) av_free(Input_pFrame); + if (Output_pFrame) av_free(Output_pFrame); + if (img_convert_ctx) sws_freeContext(img_convert_ctx); + + return 0; +} + +static aom_superblock_size_t GetSuperblockSize(int width, int height, + int threads) { + int resolution = width * height; + if (threads >= 4 && resolution >= 960 * 540 && resolution < 1920 * 1080) + return AOM_SUPERBLOCK_SIZE_64X64; + else + return AOM_SUPERBLOCK_SIZE_DYNAMIC; +} + +template +bool AomAv1Encoder::SetEncoderControlParameters(int param_id, P param_value) { + aom_codec_err_t error_code = + aom_codec_control(&aom_av1_encoder_ctx_, param_id, param_value); + if (error_code != AOM_CODEC_OK) { + LOG_ERROR( + "AomAv1Encoder::SetEncoderControlParameters returned {} on id: {}", + error_code, param_id); + } + return error_code == AOM_CODEC_OK; +} + +int AomAv1Encoder::NumberOfThreads(int width, int height, int number_of_cores) { + // Keep the number of encoder threads equal to the possible number of + // column/row tiles, which is (1, 2, 4, 8). See comments below for + // AV1E_SET_TILE_COLUMNS/ROWS. + if (width * height > 1280 * 720 && number_of_cores > 8) { + return 8; + } else if (width * height >= 640 * 360 && number_of_cores > 4) { + return 4; + } else if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } else { +// Use 2 threads for low res on ARM. +#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \ + defined(WEBRTC_ANDROID) + if (width * height >= 320 * 180 && number_of_cores > 2) { + return 2; + } +#endif + // 1 thread less than VGA. + return 1; + } +} + +int AomAv1Encoder::GetCpuSpeed(int width, int height) { + if (width * height <= 320 * 180) + return 6; + else if (width * height <= 640 * 360) + return 7; + else if (width * height <= 1280 * 720) + return 8; + else + return 9; +} + +AomAv1Encoder::AomAv1Encoder() {} + +AomAv1Encoder::~AomAv1Encoder() { + if (SAVE_NV12_STREAM && file_nv12_) { + fflush(file_nv12_); + fclose(file_nv12_); + file_nv12_ = nullptr; + } + + if (SAVE_H264_STREAM && file_ivf_) { + fflush(file_ivf_); + fclose(file_ivf_); + file_ivf_ = nullptr; + } + + delete encoded_frame_; + + Release(); +} + +int AomAv1Encoder::Init() { + encoded_frame_ = new uint8_t[YUV420P_BUFFER_SIZE]; + + // Initialize encoder configuration structure with default values + aom_codec_err_t ret = aom_codec_enc_config_default( + aom_codec_av1_cx(), &aom_av1_encoder_config_, kUsageProfile); + if (ret != AOM_CODEC_OK) { + LOG_ERROR( + "AomAv1Encoder::EncodeInit returned {} on aom_codec_enc_config_default", + ret); + return -1; + } + + // Overwrite default config with input encoder settings & RTC-relevant values. + aom_av1_encoder_config_.g_w = frame_width_; + aom_av1_encoder_config_.g_h = frame_height_; + aom_av1_encoder_config_.g_threads = + NumberOfThreads(frame_width_, frame_height_, number_of_cores_); + aom_av1_encoder_config_.g_timebase.num = 1; + aom_av1_encoder_config_.g_timebase.den = kRtpTicksPerSecond; + aom_av1_encoder_config_.rc_target_bitrate = target_bitrate_; // kilobits/sec. + aom_av1_encoder_config_.rc_dropframe_thresh = + (!disable_frame_dropping_) ? 30 : 0; + aom_av1_encoder_config_.g_input_bit_depth = kBitDepth; + aom_av1_encoder_config_.kf_mode = AOM_KF_DISABLED; + aom_av1_encoder_config_.rc_min_quantizer = kQpMin; + aom_av1_encoder_config_.rc_max_quantizer = kQpMax; + aom_av1_encoder_config_.rc_undershoot_pct = 50; + aom_av1_encoder_config_.rc_overshoot_pct = 50; + aom_av1_encoder_config_.rc_buf_initial_sz = 600; + aom_av1_encoder_config_.rc_buf_optimal_sz = 600; + aom_av1_encoder_config_.rc_buf_sz = 1000; + aom_av1_encoder_config_.g_usage = kUsageProfile; + aom_av1_encoder_config_.g_error_resilient = 0; + // Low-latency settings. + aom_av1_encoder_config_.rc_end_usage = AOM_CBR; // cbr mode + aom_av1_encoder_config_.g_pass = AOM_RC_ONE_PASS; // One-pass rate control + aom_av1_encoder_config_.g_lag_in_frames = kLagInFrames; // No look ahead + + if (frame_for_encode_ != nullptr) { + aom_img_free(frame_for_encode_); + frame_for_encode_ = nullptr; + } + + // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH + aom_codec_flags_t flags = 0; + + // Initialize an encoder instance. + ret = aom_codec_enc_init(&aom_av1_encoder_ctx_, aom_codec_av1_cx(), + &aom_av1_encoder_config_, flags); + if (ret != AOM_CODEC_OK) { + LOG_ERROR("AomAv1Encoder::EncodeInit returned {} on aom_codec_enc_init", + ret); + return -1; + } + inited_ = true; + + // Set control parameters + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, 4); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CDEF, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MODE_COST_UPD_FREQ, 3); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MV_COST_UPD_FREQ, 3); + + // if (codec_settings->mode == VideoCodecMode::kScreensharing) { + // SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TUNE_CONTENT, + // AOM_CONTENT_SCREEN); + // SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 1); + // } else { + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 0); + // } + + if (aom_av1_encoder_config_.g_threads == 8) { + // Values passed to AV1E_SET_TILE_ROWS and AV1E_SET_TILE_COLUMNS are log2() + // based. + // Use 4 tile columns x 2 tile rows for 8 threads. + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, 2); + } else if (aom_av1_encoder_config_.g_threads == 4) { + // Use 2 tile columns x 2 tile rows for 4 threads. + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, 1); + } else { + SET_ENCODER_PARAM_OR_RETURN_ERROR( + AV1E_SET_TILE_COLUMNS, + static_cast(log2(aom_av1_encoder_config_.g_threads))); + } + + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ROW_MT, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_OBMC, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_NOISE_SENSITIVITY, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_WARPED_MOTION, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_REF_FRAME_MVS, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR( + AV1E_SET_SUPERBLOCK_SIZE, + GetSuperblockSize(frame_width_, frame_height_, + aom_av1_encoder_config_.g_threads)); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CFL_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ANGLE_DELTA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_FILTER_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DISABLE_TRELLIS_QUANT, 1); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIST_WTD_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DUAL_FILTER, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRABC, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_MASKED_COMP, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PAETH_INTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_QM, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RECT_PARTITIONS, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RESTORATION, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TX64, 0); + SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MAX_REFERENCE_FRAMES, 3); + + frame_for_encode_ = aom_img_wrap(nullptr, AOM_IMG_FMT_NV12, frame_width_, + frame_height_, 1, nullptr); + + if (SAVE_H264_STREAM) { + file_ivf_ = fopen("encoded_stream.ivf", "w+b"); + if (!file_ivf_) { + LOG_ERROR("Fail to open encoded_stream.ivf"); + } + } + + if (SAVE_NV12_STREAM) { + file_nv12_ = fopen("raw_stream.yuv", "w+b"); + if (!file_nv12_) { + LOG_ERROR("Fail to open raw_stream.yuv"); + } + } + + return 0; +} + +int AomAv1Encoder::Encode( + const uint8_t *pData, int nSize, + std::function on_encoded_image) { + if (SAVE_NV12_STREAM) { + fwrite(pData, 1, nSize, file_nv12_); + } + + const uint32_t duration = + kRtpTicksPerSecond / static_cast(max_frame_rate_); + timestamp_ += duration; + + frame_for_encode_->planes[AOM_PLANE_Y] = const_cast(pData); + frame_for_encode_->planes[AOM_PLANE_U] = + const_cast(pData + frame_width_ * frame_height_); + frame_for_encode_->planes[AOM_PLANE_V] = nullptr; + frame_for_encode_->stride[AOM_PLANE_Y] = frame_width_; + frame_for_encode_->stride[AOM_PLANE_U] = frame_width_; + frame_for_encode_->stride[AOM_PLANE_V] = 0; + + // NV12ToYUV420PFFmpeg((unsigned char *)pData, frame_width_, frame_height_, + // (unsigned char *)yuv420p_buffer); + + if (0 == seq_++ % 300) { + force_i_frame_flags_ = AOM_EFLAG_FORCE_KF; + } else { + force_i_frame_flags_ = 0; + } + + // Encode a frame. The presentation timestamp `pts` should not use real + // timestamps from frames or the wall clock, as that can cause the rate + // controller to misbehave. + aom_codec_err_t ret = + aom_codec_encode(&aom_av1_encoder_ctx_, frame_for_encode_, timestamp_, + duration, force_i_frame_flags_); + if (ret != AOM_CODEC_OK) { + LOG_ERROR("AomAv1Encoder::Encode returned {} on aom_codec_encode", ret); + return -1; + } + + aom_codec_iter_t iter = nullptr; + int data_pkt_count = 0; + while (const aom_codec_cx_pkt_t *pkt = + aom_codec_get_cx_data(&aom_av1_encoder_ctx_, &iter)) { + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) { + memcpy(encoded_frame_, pkt->data.frame.buf, pkt->data.frame.sz); + encoded_frame_size_ = pkt->data.frame.sz; + + int qp = -1; + SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp); + LOG_INFO("Encoded frame qp = {}", qp); + + // int consumed_size = 0; + // int offset = 0; + // int unit = 0; + // while (consumed_size < encoded_frame_size_) { + // int obu_size = 0; + // uint8_t obu_header; + // memcpy(&obu_header, encoded_frame_ + offset, sizeof(char)); + // obu_size = 1; + // offset += 1; + // // memcpy(reinterpret_cast(&obu_header), encoded_frame_, 1); + // int obu_type = (obu_header & 0b0'1111'000) >> 3; + + // bool obu_has_ext = obu_header & 0b0'0000'100; + // // LOG_ERROR("OBU has ext {}", obu_has_ext); + + // bool obu_has_size = obu_header & kObuSizePresentBit; + // LOG_ERROR("OBU has size {}", obu_has_size); + // if (!obu_has_size) { + // consumed_size = encoded_frame_size_; + // offset = encoded_frame_size_; + // } else { + // uint64_t size = 0; + // if (!ReadLeb128((char *)(encoded_frame_ + offset), &size) || + // size > encoded_frame_size_ - consumed_size) { + // LOG_ERROR( + // "Malformed AV1 input: declared size {} is larger than " + // "remaining buffer size {}", + // size, encoded_frame_size_ - consumed_size); + // return -1; + // } + + // LOG_ERROR("leb128 get size = {}, offset = {}", size, offset); + // consumed_size += size; + // offset += size; + // obu_size += size; + // } + + // LOG_ERROR("Temporal unit [{}], OBU size [{}], OBU type [{}]", unit++, + // obu_size, obu_type); + // } + + if (on_encoded_image) { + on_encoded_image((char *)encoded_frame_, encoded_frame_size_); + if (SAVE_H264_STREAM) { + fwrite(encoded_frame_, 1, encoded_frame_size_, file_ivf_); + } + } else { + OnEncodedImage((char *)encoded_frame_, encoded_frame_size_); + } + } + } + + return 0; +} + +int AomAv1Encoder::OnEncodedImage(char *encoded_packets, size_t size) { + LOG_INFO("OnEncodedImage not implemented"); + return 0; +} + +void AomAv1Encoder::ForceIdr() { force_i_frame_flags_ = AOM_EFLAG_FORCE_KF; } + +int AomAv1Encoder::Release() { + if (frame_for_encode_ != nullptr) { + aom_img_free(frame_for_encode_); + frame_for_encode_ = nullptr; + } + if (inited_) { + if (aom_codec_destroy(&aom_av1_encoder_ctx_)) { + return -1; + } + inited_ = false; + } + + return 0; +} diff --git a/src/media/video/encode/aom/aom_av1_encoder.h b/src/media/video/encode/aom/aom_av1_encoder.h new file mode 100644 index 0000000..572c1c8 --- /dev/null +++ b/src/media/video/encode/aom/aom_av1_encoder.h @@ -0,0 +1,84 @@ +/* + * @Author: DI JUNKUN + * @Date: 2024-03-01 + * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved. + */ + +#ifndef _AOM_AV1_ENCODER_H_ +#define _AOM_AV1_ENCODER_H_ + +#include +#include + +#include "aom/aom_codec.h" +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "video_encoder.h" + +typedef struct { + uint64_t obu_header_size; + unsigned obu_type; + uint64_t + obu_size; // leb128(), contains the size in bytes of the OBU not + // including the bytes within obu_header or the obu_size syntax + int extension_flag; + int has_size_field; + + // extension_flag == 1 + int temporal_id; + int spatial_id; +} OBU_t; + +class AomAv1Encoder : public VideoEncoder { + public: + AomAv1Encoder(); + virtual ~AomAv1Encoder(); + + public: + int Init(); + int Encode( + const uint8_t* pData, int nSize, + std::function on_encoded_image); + + virtual int OnEncodedImage(char* encoded_packets, size_t size); + + void ForceIdr(); + + private: + template + bool SetEncoderControlParameters(int param_id, P param_value); + int NumberOfThreads(int width, int height, int number_of_cores); + int GetCpuSpeed(int width, int height); + + int Release(); + + private: + int frame_width_ = 1280; + int frame_height_ = 720; + int key_frame_interval_ = 300; + int target_bitrate_ = 1000; + int max_bitrate_ = 2500000; + int max_payload_size_ = 1400; + int max_frame_rate_ = 30; + int number_of_cores_ = 4; + + std::vector> encoded_packets_; + unsigned char* encoded_image_ = nullptr; + FILE* file_ivf_ = nullptr; + FILE* file_nv12_ = nullptr; + unsigned char* nv12_data_ = nullptr; + unsigned int seq_ = 0; + + // aom av1 encoder + aom_image_t* frame_for_encode_ = nullptr; + aom_codec_ctx_t aom_av1_encoder_ctx_; + aom_codec_enc_cfg_t aom_av1_encoder_config_; + bool disable_frame_dropping_ = false; + bool inited_ = false; + int64_t timestamp_ = 0; + aom_enc_frame_flags_t force_i_frame_flags_ = 0; + uint8_t* encoded_frame_ = nullptr; + int encoded_frame_size_ = 0; +}; + +#endif \ No newline at end of file diff --git a/src/media/video/encode/openh264/openh264_encoder.cpp b/src/media/video/encode/openh264/openh264_encoder.cpp index fc7ceb8..1a4a4bc 100644 --- a/src/media/video/encode/openh264/openh264_encoder.cpp +++ b/src/media/video/encode/openh264/openh264_encoder.cpp @@ -49,7 +49,9 @@ static int NV12ToYUV420PFFmpeg(unsigned char *src_buffer, int width, int height, return 0; } -OpenH264Encoder::OpenH264Encoder() { +OpenH264Encoder::OpenH264Encoder() {} + +OpenH264Encoder::~OpenH264Encoder() { if (SAVE_NV12_STREAM && file_nv12_) { fflush(file_nv12_); fclose(file_nv12_); @@ -62,8 +64,8 @@ OpenH264Encoder::OpenH264Encoder() { file_h264_ = nullptr; } delete encoded_frame_; + Release(); } -OpenH264Encoder::~OpenH264Encoder() { Release(); } SEncParamExt OpenH264Encoder::CreateEncoderParams() const { SEncParamExt encoder_params; diff --git a/src/media/video/encode/video_encoder_factory.cpp b/src/media/video/encode/video_encoder_factory.cpp index a3396b8..e980a2b 100644 --- a/src/media/video/encode/video_encoder_factory.cpp +++ b/src/media/video/encode/video_encoder_factory.cpp @@ -4,6 +4,7 @@ #include "ffmpeg/ffmpeg_video_encoder.h" #include "openh264/openh264_encoder.h" #else +#include "aom/aom_av1_encoder.h" #include "ffmpeg/ffmpeg_video_encoder.h" #include "nvcodec/nvidia_video_encoder.h" #include "openh264/openh264_encoder.h" @@ -16,22 +17,26 @@ VideoEncoderFactory::VideoEncoderFactory() {} VideoEncoderFactory::~VideoEncoderFactory() {} std::unique_ptr VideoEncoderFactory::CreateVideoEncoder( - bool hardware_acceleration) { -#if __APPLE__ - // return std::make_unique(FFmpegVideoEncoder()); - return std::make_unique(OpenH264Encoder()); -#else - if (hardware_acceleration) { - if (CheckIsHardwareAccerlerationSupported()) { - return std::make_unique(NvidiaVideoEncoder()); - } else { - return nullptr; - } + bool hardware_acceleration, bool av1_encoding) { + if (av1_encoding) { + return std::make_unique(AomAv1Encoder()); } else { +#if __APPLE__ // return std::make_unique(FFmpegVideoEncoder()); return std::make_unique(OpenH264Encoder()); - } +#else + if (hardware_acceleration) { + if (CheckIsHardwareAccerlerationSupported()) { + return std::make_unique(NvidiaVideoEncoder()); + } else { + return nullptr; + } + } else { + // return std::make_unique(FFmpegVideoEncoder()); + return std::make_unique(OpenH264Encoder()); + } #endif + } } bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() { diff --git a/src/media/video/encode/video_encoder_factory.h b/src/media/video/encode/video_encoder_factory.h index 638b7f1..94036d5 100644 --- a/src/media/video/encode/video_encoder_factory.h +++ b/src/media/video/encode/video_encoder_factory.h @@ -10,7 +10,7 @@ class VideoEncoderFactory { ~VideoEncoderFactory(); static std::unique_ptr CreateVideoEncoder( - bool hardware_acceleration); + bool hardware_acceleration, bool av1_encoding); static bool CheckIsHardwareAccerlerationSupported(); }; diff --git a/src/pc/peer_connection.cpp b/src/pc/peer_connection.cpp index bc2f25e..0ae3d6e 100644 --- a/src/pc/peer_connection.cpp +++ b/src/pc/peer_connection.cpp @@ -38,6 +38,7 @@ int PeerConnection::Init(PeerConnectionParams params, cfg_turn_server_password_ = reader.Get("turn server", "password", ""); cfg_hardware_acceleration_ = reader.Get("hardware acceleration", "turn_on", "false"); + cfg_av1_encoding_ = reader.Get("av1 encoding", "turn_on", "false"); std::regex regex("\n"); @@ -62,6 +63,9 @@ int PeerConnection::Init(PeerConnectionParams params, LOG_INFO("Hardware accelerated codec [{}]", hardware_acceleration_ ? "ON" : "OFF"); + av1_encoding_ = cfg_av1_encoding_ == "true" ? true : false; + LOG_INFO("av1 encoding [{}]", hardware_acceleration_ ? "ON" : "OFF"); + on_receive_video_buffer_ = params.on_receive_video_buffer; on_receive_audio_buffer_ = params.on_receive_audio_buffer; on_receive_data_buffer_ = params.on_receive_data_buffer; @@ -95,7 +99,7 @@ int PeerConnection::Init(PeerConnectionParams params, (uint8_t *)data, size, [this, user_id, user_id_size](VideoFrame video_frame) { if (on_receive_video_buffer_) { - // LOG_ERROR("Receive video, size {}", video_frame.Size()); + LOG_ERROR("Receive video, size {}", video_frame.Size()); on_receive_video_buffer_((const char *)video_frame.Buffer(), video_frame.Size(), user_id, user_id_size); } @@ -177,44 +181,54 @@ int PeerConnection::CreateVideoCodec(bool hardware_acceleration) { #else #endif - video_encoder_ = - VideoEncoderFactory::CreateVideoEncoder(hardware_acceleration_); - if (hardware_acceleration_ && !video_encoder_) { - LOG_WARN( - "Hardware accelerated encoder not available, use default software " - "encoder"); - video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false); - if (!video_encoder_) { - LOG_ERROR( - "Hardware accelerated encoder and software encoder both not " - "available"); - return -1; + if (av1_encoding_) { + video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, true); + LOG_INFO("Only support software encoding for AV1"); + } else { + video_encoder_ = + VideoEncoderFactory::CreateVideoEncoder(hardware_acceleration_, false); + if (hardware_acceleration_ && !video_encoder_) { + LOG_WARN( + "Hardware accelerated encoder not available, use default software " + "encoder"); + video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false); + if (!video_encoder_) { + LOG_ERROR( + "Hardware accelerated encoder and software encoder both not " + "available"); + return -1; + } } } if (0 != video_encoder_->Init()) { - video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false); + video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false); if (!video_encoder_ || 0 != video_encoder_->Init()) { LOG_ERROR("Encoder init failed"); return -1; } } - video_decoder_ = - VideoDecoderFactory::CreateVideoDecoder(hardware_acceleration_); - if (hardware_acceleration_ && !video_decoder_) { - LOG_WARN( - "Hardware accelerated decoder not available, use default software " - "decoder"); - video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false); - if (!video_decoder_) { - LOG_ERROR( - "Hardware accelerated decoder and software decoder both not " - "available"); - return -1; + if (av1_encoding_) { + video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, true); + LOG_INFO("Only support software decoding for AV1"); + } else { + video_decoder_ = + VideoDecoderFactory::CreateVideoDecoder(hardware_acceleration_, false); + if (hardware_acceleration_ && !video_decoder_) { + LOG_WARN( + "Hardware accelerated decoder not available, use default software " + "decoder"); + video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false); + if (!video_decoder_) { + LOG_ERROR( + "Hardware accelerated decoder and software decoder both not " + "available"); + return -1; + } } } if (0 != video_decoder_->Init()) { - video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false); + video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false); if (!video_decoder_ || video_decoder_->Init()) { LOG_ERROR("Decoder init failed"); return -1; @@ -376,7 +390,8 @@ void PeerConnection::ProcessSignal(const std::string &signal) { ice_transmission_list_[remote_user_id]->InitIceTransmission( cfg_stun_server_ip_, stun_server_port_, cfg_turn_server_ip_, turn_server_port_, cfg_turn_server_username_, - cfg_turn_server_password_); + cfg_turn_server_password_, + av1_encoding_ ? RtpPacket::AV1 : RtpPacket::H264); ice_transmission_list_[remote_user_id]->JoinTransmission(); } @@ -430,7 +445,8 @@ void PeerConnection::ProcessSignal(const std::string &signal) { ice_transmission_list_[remote_user_id]->InitIceTransmission( cfg_stun_server_ip_, stun_server_port_, cfg_turn_server_ip_, turn_server_port_, cfg_turn_server_username_, - cfg_turn_server_password_); + cfg_turn_server_password_, + av1_encoding_ ? RtpPacket::AV1 : RtpPacket::H264); ice_transmission_list_[remote_user_id]->SetTransmissionId( transmission_id_); diff --git a/src/pc/peer_connection.h b/src/pc/peer_connection.h index e5ea40a..a97ab52 100644 --- a/src/pc/peer_connection.h +++ b/src/pc/peer_connection.h @@ -76,10 +76,12 @@ class PeerConnection { std::string cfg_turn_server_username_; std::string cfg_turn_server_password_; std::string cfg_hardware_acceleration_; + std::string cfg_av1_encoding_; int signal_server_port_ = 0; int stun_server_port_ = 0; int turn_server_port_ = 0; bool hardware_acceleration_ = false; + bool av1_encoding_ = false; private: std::shared_ptr ws_transport_ = nullptr; diff --git a/src/rtp/rtp_codec.cpp b/src/rtp/rtp_codec.cpp index e759bfb..471b30e 100644 --- a/src/rtp/rtp_codec.cpp +++ b/src/rtp/rtp_codec.cpp @@ -9,6 +9,16 @@ #define FU_A 28 #define FU_B 29 +constexpr int kObuTypeSequenceHeader = 1; +constexpr int kObuTypeTemporalDelimiter = 2; +constexpr int kObuTypeFrameHeader = 3; +constexpr int kObuTypeTileGroup = 4; +constexpr int kObuTypeMetadata = 5; +constexpr int kObuTypeFrame = 6; +constexpr int kObuTypeRedundantFrameHeader = 7; +constexpr int kObuTypeTileList = 8; +constexpr int kObuTypePadding = 15; + RtpCodec ::RtpCodec(RtpPacket::PAYLOAD_TYPE payload_type) : version_(RTP_VERSION), has_padding_(false), @@ -216,6 +226,84 @@ void RtpCodec::Encode(uint8_t* buffer, size_t size, packets.emplace_back(rtp_packet); } } + } else if (RtpPacket::PAYLOAD_TYPE::AV1 == payload_type_) { + if (size <= MAX_NALU_LEN) { + RtpPacket rtp_packet; + rtp_packet.SetVerion(version_); + rtp_packet.SetHasPadding(has_padding_); + rtp_packet.SetHasExtension(has_extension_); + rtp_packet.SetMarker(1); + rtp_packet.SetPayloadType(RtpPacket::PAYLOAD_TYPE(payload_type_)); + rtp_packet.SetSequenceNumber(sequence_number_++); + + timestamp_ = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + rtp_packet.SetTimestamp(timestamp_); + rtp_packet.SetSsrc(ssrc_); + + if (!csrcs_.empty()) { + rtp_packet.SetCsrcs(csrcs_); + } + + if (has_extension_) { + rtp_packet.SetExtensionProfile(extension_profile_); + rtp_packet.SetExtensionData(extension_data_, extension_len_); + } + + // int obu_header; + // memcpy(&obu_header, buffer, sizeof(char)); + // int obu_type = (obu_header & 0b0'1111'000) >> 3; + // LOG_ERROR("OBU type {}", obu_type); + // if (obu_type == kObuTypeTemporalDelimiter || + // obu_type == kObuTypeTileList || obu_type == kObuTypePadding) { + // LOG_ERROR("Unsupported OBU type", obu_type); + // } + + RtpPacket::AV1_AGGR_HEADER av1_aggr_header; + av1_aggr_header.z = av1_aggr_header.z; + av1_aggr_header.y = av1_aggr_header.y; + av1_aggr_header.w = av1_aggr_header.w; + av1_aggr_header.n = av1_aggr_header.n; + + rtp_packet.SetAv1AggrHeader(av1_aggr_header); + + rtp_packet.EncodeAv1(buffer, size); + packets.emplace_back(rtp_packet); + + } else { + size_t last_packet_size = size % MAX_NALU_LEN; + size_t packet_num = size / MAX_NALU_LEN + (last_packet_size ? 1 : 0); + timestamp_ = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + + for (size_t index = 0; index < packet_num; index++) { + RtpPacket rtp_packet; + rtp_packet.SetVerion(version_); + rtp_packet.SetHasPadding(has_padding_); + rtp_packet.SetHasExtension(has_extension_); + rtp_packet.SetMarker(index == packet_num - 1 ? 1 : 0); + rtp_packet.SetPayloadType(RtpPacket::PAYLOAD_TYPE(payload_type_)); + rtp_packet.SetSequenceNumber(sequence_number_++); + rtp_packet.SetTimestamp(timestamp_); + rtp_packet.SetSsrc(ssrc_); + + if (!csrcs_.empty()) { + rtp_packet.SetCsrcs(csrcs_); + } + + if (has_extension_) { + rtp_packet.SetExtensionProfile(extension_profile_); + rtp_packet.SetExtensionData(extension_data_, extension_len_); + } + + if (index == packet_num - 1 && last_packet_size > 0) { + rtp_packet.EncodeAv1(buffer + index * MAX_NALU_LEN, last_packet_size); + } else { + rtp_packet.EncodeAv1(buffer + index * MAX_NALU_LEN, MAX_NALU_LEN); + } + packets.emplace_back(rtp_packet); + } + } } else if (RtpPacket::PAYLOAD_TYPE::OPUS == payload_type_) { RtpPacket rtp_packet; rtp_packet.SetVerion(version_); diff --git a/src/rtp/rtp_packet.cpp b/src/rtp/rtp_packet.cpp index 91dbd7d..10b8025 100644 --- a/src/rtp/rtp_packet.cpp +++ b/src/rtp/rtp_packet.cpp @@ -17,6 +17,8 @@ void RtpPacket::TryToDecodeRtpPacket() { DecodeH264FecSource(); } else if (PAYLOAD_TYPE::H264_FEC_REPAIR == PAYLOAD_TYPE(buffer_[1] & 0x7F)) { DecodeH264FecRepair(); + } else if (PAYLOAD_TYPE::AV1 == PAYLOAD_TYPE(buffer_[1] & 0x7F)) { + DecodeAv1(); } else if (PAYLOAD_TYPE::OPUS == PAYLOAD_TYPE(buffer_[1] & 0x7F)) { DecodeOpus(); } else if (PAYLOAD_TYPE::DATA == PAYLOAD_TYPE(buffer_[1] & 0x7F)) { @@ -389,6 +391,55 @@ const uint8_t *RtpPacket::EncodeH264FecRepair( return buffer_; } +const uint8_t *RtpPacket::EncodeAv1(uint8_t *payload, size_t payload_size) { + buffer_[0] = (version_ << 6) | (has_padding_ << 5) | (has_extension_ << 4) | + total_csrc_number_; + buffer_[1] = (marker_ << 7) | payload_type_; + buffer_[2] = (sequence_number_ >> 8) & 0xFF; + buffer_[3] = sequence_number_ & 0xFF; + buffer_[4] = (timestamp_ >> 24) & 0xFF; + buffer_[5] = (timestamp_ >> 16) & 0xFF; + buffer_[6] = (timestamp_ >> 8) & 0xFF; + buffer_[7] = timestamp_ & 0xFF; + buffer_[8] = (ssrc_ >> 24) & 0xFF; + buffer_[9] = (ssrc_ >> 16) & 0xFF; + buffer_[10] = (ssrc_ >> 8) & 0xFF; + buffer_[11] = ssrc_ & 0xFF; + + for (uint32_t index = 0; index < total_csrc_number_ && !csrcs_.empty(); + index++) { + buffer_[12 + index] = (csrcs_[index] >> 24) & 0xFF; + buffer_[13 + index] = (csrcs_[index] >> 16) & 0xFF; + buffer_[14 + index] = (csrcs_[index] >> 8) & 0xFF; + buffer_[15 + index] = csrcs_[index] & 0xFF; + } + + uint32_t extension_offset = + total_csrc_number_ && !csrcs_.empty() ? total_csrc_number_ * 4 : 0; + if (has_extension_ && extension_data_) { + buffer_[12 + extension_offset] = extension_profile_ >> 8; + buffer_[13 + extension_offset] = extension_profile_ & 0xff; + buffer_[14 + extension_offset] = (extension_len_ >> 8) & 0xFF; + buffer_[15 + extension_offset] = extension_len_ & 0xFF; + memcpy(buffer_ + 16 + extension_offset, extension_data_, extension_len_); + } + + uint32_t payload_offset = + (has_extension_ && extension_data_ ? extension_len_ : 0) + + extension_offset; + + buffer_[12 + payload_offset] = fu_indicator_.forbidden_bit << 7 | + fu_indicator_.nal_reference_idc << 6 | + fu_indicator_.nal_unit_type; + + memcpy(buffer_ + 13 + payload_offset, payload, payload_size); + size_ = payload_size + (13 + payload_offset); + + return buffer_; +} + +// ---------------------------------------------------------------------------- + size_t RtpPacket::DecodeOpus(uint8_t *payload) { version_ = (buffer_[0] >> 6) & 0x03; has_padding_ = (buffer_[0] >> 5) & 0x01; @@ -672,4 +723,47 @@ size_t RtpPacket::DecodeH264FecRepair(uint8_t *payload) { memcpy(payload, payload_, payload_size_); } return payload_size_; +} + +size_t RtpPacket::DecodeAv1(uint8_t *payload) { + version_ = (buffer_[0] >> 6) & 0x03; + has_padding_ = (buffer_[0] >> 5) & 0x01; + has_extension_ = (buffer_[0] >> 4) & 0x01; + total_csrc_number_ = buffer_[0] & 0x0f; + marker_ = (buffer_[1] >> 7) & 0x01; + payload_type_ = buffer_[1] & 0x7f; + sequence_number_ = (buffer_[2] << 8) | buffer_[3]; + timestamp_ = + (buffer_[4] << 24) | (buffer_[5] << 16) | (buffer_[6] << 8) | buffer_[7]; + ssrc_ = (buffer_[8] << 24) | (buffer_[9] << 16) | (buffer_[10] << 8) | + buffer_[11]; + + for (uint32_t index = 0; index < total_csrc_number_; index++) { + uint32_t csrc = (buffer_[12 + index] << 24) | (buffer_[13 + index] << 16) | + (buffer_[14 + index] << 8) | buffer_[15 + index]; + csrcs_.push_back(csrc); + } + + uint32_t extension_offset = total_csrc_number_ * 4; + if (has_extension_) { + extension_profile_ = + (buffer_[12 + extension_offset] << 8) | buffer_[13 + extension_offset]; + extension_len_ = + (buffer_[14 + extension_offset] << 8) | buffer_[15 + extension_offset]; + + // extension_data_ = new uint8_t[extension_len_]; + // memcpy(extension_data_, buffer_ + 16 + extension_offset, + // extension_len_); + extension_data_ = buffer_ + 16 + extension_offset; + } + + uint32_t payload_offset = + (has_extension_ ? extension_len_ : 0) + extension_offset; + + payload_size_ = size_ - (13 + payload_offset); + payload_ = buffer_ + 13 + payload_offset; + if (payload) { + memcpy(payload, payload_, payload_size_); + } + return payload_size_; } \ No newline at end of file diff --git a/src/rtp/rtp_packet.h b/src/rtp/rtp_packet.h index e243176..9b3524e 100644 --- a/src/rtp/rtp_packet.h +++ b/src/rtp/rtp_packet.h @@ -115,6 +115,44 @@ // | padding | Padding size | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// AV1 +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |V=2|P|X| CC |M| PT | sequence number | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | timestamp | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | synchronization source (SSRC) identifier | +// +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +// | contributing source (CSRC) identifiers |x +// | .... |x +// +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +// | 0x100 | 0x0 | extensions length |x +// +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +// | ID | hdr_length | |x +// +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ |x +// | |x +// | dependency descriptor (hdr_length #bytes) |x +// | |x +// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | | Other rtp header extensions...|x +// +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +// | AV1 aggr hdr | | +// +-+-+-+-+-+-+-+-+ | +// | | +// | Bytes 2..N of AV1 payload | +// | | +// | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | : OPTIONAL RTP padding | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +// | AV1 aggr hdr | +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |Z|Y| W |N|-|-|-| +// +-+-+-+-+-+-+-+-+ + #define DEFAULT_MTU 1500 #define MAX_NALU_LEN 1400 @@ -124,6 +162,7 @@ class RtpPacket { H264 = 96, H264_FEC_SOURCE = 97, H264_FEC_REPAIR = 98, + AV1 = 99, OPUS = 111, DATA = 127 } PAYLOAD_TYPE; @@ -179,6 +218,20 @@ class RtpPacket { uint8_t nal_unit_type : 5; } FU_HEADER; + typedef struct { + uint8_t z : 1; + uint8_t y : 1; + uint8_t w : 2; + uint8_t n : 1; + } AV1_AGGR_HEADER; + + // typedef struct Obu { + // uint8_t header; + // uint8_t extension_header; // undefined if (header & kXbit) == 0 + // rtc::ArrayView payload; + // int size; // size of the header and payload combined. + // } OBU; + void SetFuIndicator(FU_INDICATOR fu_indicator) { fu_indicator_.forbidden_bit = fu_indicator.forbidden_bit; fu_indicator_.nal_reference_idc = fu_indicator.nal_reference_idc; @@ -192,6 +245,13 @@ class RtpPacket { fu_header_.nal_unit_type = fu_header.nal_unit_type; } + void SetAv1AggrHeader(AV1_AGGR_HEADER av1_aggr_header) { + av1_aggr_header_.z = av1_aggr_header.z; + av1_aggr_header_.y = av1_aggr_header.y; + av1_aggr_header_.w = av1_aggr_header.w; + av1_aggr_header_.n = av1_aggr_header.n; + } + void SetFecSymbolId(uint8_t fec_symbol_id) { fec_symbol_id_ = fec_symbol_id; } public: @@ -204,11 +264,14 @@ class RtpPacket { const uint8_t *EncodeH264FecRepair(uint8_t *payload, size_t payload_size, unsigned int fec_symbol_id, unsigned int fec_source_symbol_num); + const uint8_t *EncodeAv1(uint8_t *payload, size_t payload_size); + size_t DecodeData(uint8_t *payload = nullptr); size_t DecodeH264Nalu(uint8_t *payload = nullptr); size_t DecodeH264Fua(uint8_t *payload = nullptr); size_t DecodeH264FecSource(uint8_t *payload = nullptr); size_t DecodeH264FecRepair(uint8_t *payload = nullptr); + size_t DecodeAv1(uint8_t *payload = nullptr); size_t DecodeOpus(uint8_t *payload = nullptr); public: @@ -314,6 +377,7 @@ class RtpPacket { FU_HEADER fu_header_; uint8_t fec_symbol_id_ = 0; uint8_t fec_source_symbol_num_ = 0; + AV1_AGGR_HEADER av1_aggr_header_; // Payload uint8_t *payload_ = nullptr; diff --git a/src/transmission/ice_transmission.cpp b/src/transmission/ice_transmission.cpp index 4fd22e3..b40f787 100644 --- a/src/transmission/ice_transmission.cpp +++ b/src/transmission/ice_transmission.cpp @@ -42,11 +42,11 @@ IceTransmission::~IceTransmission() { } } -int IceTransmission::InitIceTransmission(std::string &stun_ip, int stun_port, - std::string &turn_ip, int turn_port, - std::string &turn_username, - std::string &turn_password) { - video_rtp_codec_ = std::make_unique(RtpPacket::PAYLOAD_TYPE::H264); +int IceTransmission::InitIceTransmission( + std::string &stun_ip, int stun_port, std::string &turn_ip, int turn_port, + std::string &turn_username, std::string &turn_password, + RtpPacket::PAYLOAD_TYPE video_codec_payload_type) { + video_rtp_codec_ = std::make_unique(video_codec_payload_type); audio_rtp_codec_ = std::make_unique(RtpPacket::PAYLOAD_TYPE::OPUS); data_rtp_codec_ = std::make_unique(RtpPacket::PAYLOAD_TYPE::DATA); diff --git a/src/transmission/ice_transmission.h b/src/transmission/ice_transmission.h index d0eeb08..506d4d0 100644 --- a/src/transmission/ice_transmission.h +++ b/src/transmission/ice_transmission.h @@ -37,7 +37,8 @@ class IceTransmission { int InitIceTransmission(std::string &stun_ip, int stun_port, std::string &turn_ip, int turn_port, std::string &turn_username, - std::string &turn_password); + std::string &turn_password, + RtpPacket::PAYLOAD_TYPE video_codec_payload_type); int DestroyIceTransmission(); diff --git a/xmake.lua b/xmake.lua index 8df3652..b1d1eea 100644 --- a/xmake.lua +++ b/xmake.lua @@ -11,7 +11,7 @@ add_defines("ASIO_STANDALONE", "ASIO_HAS_STD_TYPE_TRAITS", "ASIO_HAS_STD_SHARED_ "ASIO_HAS_STD_ADDRESSOF", "ASIO_HAS_STD_ATOMIC", "ASIO_HAS_STD_CHRONO", "ASIO_HAS_CSTDINT", "ASIO_HAS_STD_ARRAY", "ASIO_HAS_STD_SYSTEM_ERROR") -add_requires("asio 1.24.0", "nlohmann_json", "spdlog 1.11.0", "openfec", "libopus 1.4") +add_requires("asio 1.24.0", "nlohmann_json", "spdlog 1.11.0", "openfec", "libopus 1.4", "dav1d 1.1.0") add_packages("asio", "nlohmann_json", "spdlog", "openfec", "libopus") includes("thirdparty") @@ -19,8 +19,9 @@ includes("thirdparty") if is_os("windows") then add_requires("vcpkg::ffmpeg 5.1.2", {configs = {shared = false}}) add_requires("vcpkg::libnice 0.1.21") + add_requires("vcpkg::aom") add_requires("openh264 2.1.1", {configs = {shared = false}}) - add_packages("vcpkg::ffmpeg", "vcpkg::libnice", "openh264", "cuda") + add_packages("vcpkg::ffmpeg", "vcpkg::libnice", "vcpkg::aom", "openh264", "dav1d", "cuda") add_defines("_WEBSOCKETPP_CPP11_INTERNAL_") add_requires("cuda") elseif is_os("linux") then @@ -119,7 +120,9 @@ target("media") "src/media/video/encode/ffmpeg/*.cpp", "src/media/video/decode/ffmpeg/*.cpp", "src/media/video/encode/openh264/*.cpp", - "src/media/video/decode/openh264/*.cpp") + "src/media/video/decode/openh264/*.cpp", + "src/media/video/encode/aom/*.cpp", + "src/media/video/decode/dav1d/*.cpp") add_includedirs("src/media/video/encode", "src/media/video/decode", "src/media/video/encode/nvcodec", @@ -128,6 +131,8 @@ target("media") "src/media/video/decode/ffmpeg", "src/media/video/encode/openh264", "src/media/video/decode/openh264", + "src/media/video/encode/aom", + "src/media/video/decode/dav1d", "thirdparty/nvcodec/Interface", "thirdparty/nvcodec/Samples", {public = true}) elseif is_os(("linux")) then