Implementation for av1 codec

2025-12-17 20:47:01 +08:00 · 2024-04-19 17:35:19 +08:00
parent a309627ca3
commit f5586a7922
17 changed files with 1037 additions and 67 deletions
--- a/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp
+++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp
@@ -0,0 +1,128 @@
 #include "dav1d_av1_decoder.h"
 #include "log.h"
 #define SAVE_DECODER_STREAM 0
 extern "C" {
 #include <libavformat/avformat.h>
 #include <libavutil/imgutils.h>
 #include <libswscale/swscale.h>
 };
 class ScopedDav1dPicture : public std::shared_ptr<ScopedDav1dPicture> {
 public:
  ~ScopedDav1dPicture() { dav1d_picture_unref(&picture_); }
  Dav1dPicture &Picture() { return picture_; }
 private:
  Dav1dPicture picture_ = {};
 };
 class ScopedDav1dData {
 public:
  ~ScopedDav1dData() { dav1d_data_unref(&data_); }
  Dav1dData &Data() { return data_; }
 private:
  Dav1dData data_ = {};
 };
 // Calling `dav1d_data_wrap` requires a `free_callback` to be registered.
 void NullFreeCallback(const uint8_t *buffer, void *opaque) {}
 Dav1dAv1Decoder::Dav1dAv1Decoder() {}
 Dav1dAv1Decoder::~Dav1dAv1Decoder() {
  if (SAVE_DECODER_STREAM && file_) {
    fflush(file_);
    fclose(file_);
    file_ = nullptr;
  }
  if (decoded_frame_) {
    delete decoded_frame_;
    decoded_frame_ = nullptr;
  }
 }
 int Dav1dAv1Decoder::Init() {
  Dav1dSettings s;
  dav1d_default_settings(&s);
  s.n_threads = std::max(2, 4);
  s.max_frame_delay = 1;   // For low latency decoding.
  s.all_layers = 0;        // Don't output a frame for every spatial layer.
  s.operating_point = 31;  // Decode all operating points.
  int ret = dav1d_open(&context_, &s);
  if (ret) {
    LOG_ERROR("Dav1d AV1 decoder open failed");
  }
  decoded_frame_ = new VideoFrame(1280 * 720 * 3 / 2);
  if (SAVE_DECODER_STREAM) {
    file_ = fopen("decode_stream.yuv", "w+b");
    if (!file_) {
      LOG_WARN("Fail to open stream.yuv");
    }
  }
  return 0;
 }
 int Dav1dAv1Decoder::Decode(
    const uint8_t *data, int size,
    std::function<void(VideoFrame)> on_receive_decoded_frame) {
  if (!first_) {
    if ((*(data + 4) & 0x1f) != 0x07) {
      return -1;
    } else {
      first_ = true;
    }
  }
  ScopedDav1dData scoped_dav1d_data;
  Dav1dData &dav1d_data = scoped_dav1d_data.Data();
  dav1d_data_wrap(&dav1d_data, data, size,
                  /*free_callback=*/&NullFreeCallback,
                  /*user_data=*/nullptr);
  if (int decode_res = dav1d_send_data(context_, &dav1d_data)) {
    LOG_ERROR("Dav1dAv1Decoder::Decode decoding failed with error code {}",
              decode_res);
    return -1;
  }
  std::shared_ptr<ScopedDav1dPicture> scoped_dav1d_picture(
      new ScopedDav1dPicture{});
  Dav1dPicture &dav1d_picture = scoped_dav1d_picture->Picture();
  if (int get_picture_res = dav1d_get_picture(context_, &dav1d_picture)) {
    LOG_ERROR("Dav1dDecoder::Decode getting picture failed with error code {}",
              get_picture_res);
    return -1;
  }
  if (dav1d_picture.p.bpc != 8) {
    // Only accept 8 bit depth.
    LOG_ERROR("Dav1dDecoder::Decode unhandled bit depth: {}",
              dav1d_picture.p.bpc);
    return -1;
  }
  memcpy(decoded_frame_->GetBuffer(), dav1d_picture.data[0],
         dav1d_picture.p.w * dav1d_picture.p.h);
  memcpy(decoded_frame_->GetBuffer() + dav1d_picture.p.w * dav1d_picture.p.h,
         dav1d_picture.data[1], dav1d_picture.p.w * dav1d_picture.p.h / 2);
  LOG_INFO("dav1d decode size {}", decoded_frame_->Size());
  on_receive_decoded_frame(*decoded_frame_);
  if (SAVE_DECODER_STREAM) {
    fwrite((unsigned char *)decoded_frame_->Buffer(), 1, decoded_frame_->Size(),
           file_);
  }
  return 0;
 }
--- a/src/media/video/decode/dav1d/dav1d_av1_decoder.h
+++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.h
@@ -0,0 +1,58 @@
 /*
 * @Author: DI JUNKUN
 * @Date: 2024-03-04
 * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
 */
 #ifndef _DAV1D_AV1_DECODER_H_
 #define _DAV1D_AV1_DECODER_H_
 #include "dav1d/dav1d.h"
 #ifdef _WIN32
 extern "C" {
 #include "libavcodec/avcodec.h"
 };
 #else
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <libavcodec/avcodec.h>
 #ifdef __cplusplus
 };
 #endif
 #endif
 #include <functional>
 #include "video_decoder.h"
 class Dav1dAv1Decoder : public VideoDecoder {
 public:
  Dav1dAv1Decoder();
  virtual ~Dav1dAv1Decoder();
 public:
  int Init();
  int Decode(const uint8_t *data, int size,
             std::function<void(VideoFrame)> on_receive_decoded_frame);
 private:
  AVCodecID codec_id_;
  const AVCodec *codec_;
  AVCodecContext *codec_ctx_ = nullptr;
  AVPacket *packet_ = nullptr;
  AVFrame *frame_ = nullptr;
  AVFrame *frame_nv12_ = nullptr;
  struct SwsContext *img_convert_ctx = nullptr;
  VideoFrame *decoded_frame_ = nullptr;
  FILE *file_ = nullptr;
  bool first_ = false;
  // dav1d
  Dav1dContext *context_ = nullptr;
 };
 #endif
--- a/src/media/video/decode/video_decoder_factory.cpp
+++ b/src/media/video/decode/video_decoder_factory.cpp
@@ -4,6 +4,7 @@
 #include "ffmpeg/ffmpeg_video_decoder.h"
 #include "openh264/openh264_decoder.h"
 #else
 #include "dav1d/dav1d_av1_decoder.h"
 #include "ffmpeg/ffmpeg_video_decoder.h"
 #include "nvcodec/nvidia_video_decoder.h"
 #include "openh264/openh264_decoder.h"
@@ -16,22 +17,26 @@ VideoDecoderFactory::VideoDecoderFactory() {}
 VideoDecoderFactory::~VideoDecoderFactory() {}
 std::unique_ptr<VideoDecoder> VideoDecoderFactory::CreateVideoDecoder(
-    bool hardware_acceleration) {
+    bool hardware_acceleration, bool av1_encoding) {
-#if __APPLE__
+  if (av1_encoding) {
-  return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
+    return std::make_unique<Dav1dAv1Decoder>(Dav1dAv1Decoder());
  // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
 #else
  if (hardware_acceleration) {
    if (CheckIsHardwareAccerlerationSupported()) {
      return std::make_unique<NvidiaVideoDecoder>(NvidiaVideoDecoder());
    } else {
      return nullptr;
    }
  } else {
-    // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
+#if __APPLE__
    return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
-  }
+    // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
 #else
    if (hardware_acceleration) {
      if (CheckIsHardwareAccerlerationSupported()) {
        return std::make_unique<NvidiaVideoDecoder>(NvidiaVideoDecoder());
      } else {
        return nullptr;
      }
    } else {
      // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
      return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
    }
 #endif
  }
 }
 bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() {
--- a/src/media/video/decode/video_decoder_factory.h
+++ b/src/media/video/decode/video_decoder_factory.h
@@ -10,7 +10,7 @@ class VideoDecoderFactory {
  ~VideoDecoderFactory();
  static std::unique_ptr<VideoDecoder> CreateVideoDecoder(
-      bool hardware_acceleration);
+      bool hardware_acceleration, bool av1_encoding);
  static bool CheckIsHardwareAccerlerationSupported();
 };
--- a/src/media/video/encode/aom/aom_av1_encoder.cpp
+++ b/src/media/video/encode/aom/aom_av1_encoder.cpp
@@ -0,0 +1,418 @@
 #include "aom_av1_encoder.h"
 #include <chrono>
 #include <cmath>
 #include "log.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavdevice/avdevice.h>
 #include <libavfilter/avfilter.h>
 #include <libavformat/avformat.h>
 #include <libavutil/imgutils.h>
 #include <libswscale/swscale.h>
 };
 #ifdef __cplusplus
 };
 #endif
 #define SAVE_NV12_STREAM 0
 #define SAVE_H264_STREAM 1
 #define YUV420P_BUFFER_SIZE 1280 * 720 * 3 / 2
 static unsigned char yuv420p_buffer[YUV420P_BUFFER_SIZE];
 #define SET_ENCODER_PARAM_OR_RETURN_ERROR(param_id, param_value) \
  do {                                                           \
    if (!SetEncoderControlParameters(param_id, param_value)) {   \
      return -1;                                                 \
    }                                                            \
  } while (0)
 constexpr int kQpMin = 10;
 constexpr int kQpMax = 40;
 constexpr int kUsageProfile = AOM_USAGE_REALTIME;
 constexpr int kMinQindex = 145;  // Min qindex threshold for QP scaling.
 constexpr int kMaxQindex = 205;  // Max qindex threshold for QP scaling.
 constexpr int kBitDepth = 8;
 constexpr int kLagInFrames = 0;  // No look ahead.
 constexpr int kRtpTicksPerSecond = 90000;
 constexpr double kMinimumFrameRate = 1.0;
 constexpr uint8_t kObuSizePresentBit = 0b0'0000'010;
 static int NV12ToYUV420PFFmpeg(unsigned char *src_buffer, int width, int height,
                               unsigned char *dst_buffer) {
  AVFrame *Input_pFrame = av_frame_alloc();
  AVFrame *Output_pFrame = av_frame_alloc();
  struct SwsContext *img_convert_ctx = sws_getContext(
      width, height, AV_PIX_FMT_NV12, 1280, 720, AV_PIX_FMT_YUV420P,
      SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
  av_image_fill_arrays(Input_pFrame->data, Input_pFrame->linesize, src_buffer,
                       AV_PIX_FMT_NV12, width, height, 1);
  av_image_fill_arrays(Output_pFrame->data, Output_pFrame->linesize, dst_buffer,
                       AV_PIX_FMT_YUV420P, 1280, 720, 1);
  sws_scale(img_convert_ctx, (uint8_t const **)Input_pFrame->data,
            Input_pFrame->linesize, 0, height, Output_pFrame->data,
            Output_pFrame->linesize);
  if (Input_pFrame) av_free(Input_pFrame);
  if (Output_pFrame) av_free(Output_pFrame);
  if (img_convert_ctx) sws_freeContext(img_convert_ctx);
  return 0;
 }
 static aom_superblock_size_t GetSuperblockSize(int width, int height,
                                               int threads) {
  int resolution = width * height;
  if (threads >= 4 && resolution >= 960 * 540 && resolution < 1920 * 1080)
    return AOM_SUPERBLOCK_SIZE_64X64;
  else
    return AOM_SUPERBLOCK_SIZE_DYNAMIC;
 }
 template <typename P>
 bool AomAv1Encoder::SetEncoderControlParameters(int param_id, P param_value) {
  aom_codec_err_t error_code =
      aom_codec_control(&aom_av1_encoder_ctx_, param_id, param_value);
  if (error_code != AOM_CODEC_OK) {
    LOG_ERROR(
        "AomAv1Encoder::SetEncoderControlParameters returned {} on id: {}",
        error_code, param_id);
  }
  return error_code == AOM_CODEC_OK;
 }
 int AomAv1Encoder::NumberOfThreads(int width, int height, int number_of_cores) {
  // Keep the number of encoder threads equal to the possible number of
  // column/row tiles, which is (1, 2, 4, 8). See comments below for
  // AV1E_SET_TILE_COLUMNS/ROWS.
  if (width * height > 1280 * 720 && number_of_cores > 8) {
    return 8;
  } else if (width * height >= 640 * 360 && number_of_cores > 4) {
    return 4;
  } else if (width * height >= 320 * 180 && number_of_cores > 2) {
    return 2;
  } else {
 // Use 2 threads for low res on ARM.
 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
    defined(WEBRTC_ANDROID)
    if (width * height >= 320 * 180 && number_of_cores > 2) {
      return 2;
    }
 #endif
    // 1 thread less than VGA.
    return 1;
  }
 }
 int AomAv1Encoder::GetCpuSpeed(int width, int height) {
  if (width * height <= 320 * 180)
    return 6;
  else if (width * height <= 640 * 360)
    return 7;
  else if (width * height <= 1280 * 720)
    return 8;
  else
    return 9;
 }
 AomAv1Encoder::AomAv1Encoder() {}
 AomAv1Encoder::~AomAv1Encoder() {
  if (SAVE_NV12_STREAM && file_nv12_) {
    fflush(file_nv12_);
    fclose(file_nv12_);
    file_nv12_ = nullptr;
  }
  if (SAVE_H264_STREAM && file_ivf_) {
    fflush(file_ivf_);
    fclose(file_ivf_);
    file_ivf_ = nullptr;
  }
  delete encoded_frame_;
  Release();
 }
 int AomAv1Encoder::Init() {
  encoded_frame_ = new uint8_t[YUV420P_BUFFER_SIZE];
  // Initialize encoder configuration structure with default values
  aom_codec_err_t ret = aom_codec_enc_config_default(
      aom_codec_av1_cx(), &aom_av1_encoder_config_, kUsageProfile);
  if (ret != AOM_CODEC_OK) {
    LOG_ERROR(
        "AomAv1Encoder::EncodeInit returned {} on aom_codec_enc_config_default",
        ret);
    return -1;
  }
  // Overwrite default config with input encoder settings & RTC-relevant values.
  aom_av1_encoder_config_.g_w = frame_width_;
  aom_av1_encoder_config_.g_h = frame_height_;
  aom_av1_encoder_config_.g_threads =
      NumberOfThreads(frame_width_, frame_height_, number_of_cores_);
  aom_av1_encoder_config_.g_timebase.num = 1;
  aom_av1_encoder_config_.g_timebase.den = kRtpTicksPerSecond;
  aom_av1_encoder_config_.rc_target_bitrate = target_bitrate_;  // kilobits/sec.
  aom_av1_encoder_config_.rc_dropframe_thresh =
      (!disable_frame_dropping_) ? 30 : 0;
  aom_av1_encoder_config_.g_input_bit_depth = kBitDepth;
  aom_av1_encoder_config_.kf_mode = AOM_KF_DISABLED;
  aom_av1_encoder_config_.rc_min_quantizer = kQpMin;
  aom_av1_encoder_config_.rc_max_quantizer = kQpMax;
  aom_av1_encoder_config_.rc_undershoot_pct = 50;
  aom_av1_encoder_config_.rc_overshoot_pct = 50;
  aom_av1_encoder_config_.rc_buf_initial_sz = 600;
  aom_av1_encoder_config_.rc_buf_optimal_sz = 600;
  aom_av1_encoder_config_.rc_buf_sz = 1000;
  aom_av1_encoder_config_.g_usage = kUsageProfile;
  aom_av1_encoder_config_.g_error_resilient = 0;
  // Low-latency settings.
  aom_av1_encoder_config_.rc_end_usage = AOM_CBR;    // cbr mode
  aom_av1_encoder_config_.g_pass = AOM_RC_ONE_PASS;  // One-pass rate control
  aom_av1_encoder_config_.g_lag_in_frames = kLagInFrames;  // No look ahead
  if (frame_for_encode_ != nullptr) {
    aom_img_free(frame_for_encode_);
    frame_for_encode_ = nullptr;
  }
  // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH
  aom_codec_flags_t flags = 0;
  // Initialize an encoder instance.
  ret = aom_codec_enc_init(&aom_av1_encoder_ctx_, aom_codec_av1_cx(),
                           &aom_av1_encoder_config_, flags);
  if (ret != AOM_CODEC_OK) {
    LOG_ERROR("AomAv1Encoder::EncodeInit returned {} on aom_codec_enc_init",
              ret);
    return -1;
  }
  inited_ = true;
  // Set control parameters
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, 4);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CDEF, 1);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MODE_COST_UPD_FREQ, 3);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MV_COST_UPD_FREQ, 3);
  // if (codec_settings->mode == VideoCodecMode::kScreensharing) {
  //   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TUNE_CONTENT,
  //                                     AOM_CONTENT_SCREEN);
  //   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 1);
  // } else {
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 0);
  // }
  if (aom_av1_encoder_config_.g_threads == 8) {
    // Values passed to AV1E_SET_TILE_ROWS and AV1E_SET_TILE_COLUMNS are log2()
    // based.
    // Use 4 tile columns x 2 tile rows for 8 threads.
    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, 1);
    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, 2);
  } else if (aom_av1_encoder_config_.g_threads == 4) {
    // Use 2 tile columns x 2 tile rows for 4 threads.
    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, 1);
    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, 1);
  } else {
    SET_ENCODER_PARAM_OR_RETURN_ERROR(
        AV1E_SET_TILE_COLUMNS,
        static_cast<int>(log2(aom_av1_encoder_config_.g_threads)));
  }
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ROW_MT, 1);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_OBMC, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_NOISE_SENSITIVITY, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_WARPED_MOTION, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(
      AV1E_SET_SUPERBLOCK_SIZE,
      GetSuperblockSize(frame_width_, frame_height_,
                        aom_av1_encoder_config_.g_threads));
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CFL_INTRA, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_FILTER_INTRA, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DUAL_FILTER, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRABC, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_MASKED_COMP, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PAETH_INTRA, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_QM, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RESTORATION, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TX64, 0);
  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MAX_REFERENCE_FRAMES, 3);
  frame_for_encode_ = aom_img_wrap(nullptr, AOM_IMG_FMT_NV12, frame_width_,
                                   frame_height_, 1, nullptr);
  if (SAVE_H264_STREAM) {
    file_ivf_ = fopen("encoded_stream.ivf", "w+b");
    if (!file_ivf_) {
      LOG_ERROR("Fail to open encoded_stream.ivf");
    }
  }
  if (SAVE_NV12_STREAM) {
    file_nv12_ = fopen("raw_stream.yuv", "w+b");
    if (!file_nv12_) {
      LOG_ERROR("Fail to open raw_stream.yuv");
    }
  }
  return 0;
 }
 int AomAv1Encoder::Encode(
    const uint8_t *pData, int nSize,
    std::function<int(char *encoded_packets, size_t size)> on_encoded_image) {
  if (SAVE_NV12_STREAM) {
    fwrite(pData, 1, nSize, file_nv12_);
  }
  const uint32_t duration =
      kRtpTicksPerSecond / static_cast<float>(max_frame_rate_);
  timestamp_ += duration;
  frame_for_encode_->planes[AOM_PLANE_Y] = const_cast<unsigned char *>(pData);
  frame_for_encode_->planes[AOM_PLANE_U] =
      const_cast<unsigned char *>(pData + frame_width_ * frame_height_);
  frame_for_encode_->planes[AOM_PLANE_V] = nullptr;
  frame_for_encode_->stride[AOM_PLANE_Y] = frame_width_;
  frame_for_encode_->stride[AOM_PLANE_U] = frame_width_;
  frame_for_encode_->stride[AOM_PLANE_V] = 0;
  // NV12ToYUV420PFFmpeg((unsigned char *)pData, frame_width_, frame_height_,
  //                     (unsigned char *)yuv420p_buffer);
  if (0 == seq_++ % 300) {
    force_i_frame_flags_ = AOM_EFLAG_FORCE_KF;
  } else {
    force_i_frame_flags_ = 0;
  }
  // Encode a frame. The presentation timestamp `pts` should not use real
  // timestamps from frames or the wall clock, as that can cause the rate
  // controller to misbehave.
  aom_codec_err_t ret =
      aom_codec_encode(&aom_av1_encoder_ctx_, frame_for_encode_, timestamp_,
                       duration, force_i_frame_flags_);
  if (ret != AOM_CODEC_OK) {
    LOG_ERROR("AomAv1Encoder::Encode returned {} on aom_codec_encode", ret);
    return -1;
  }
  aom_codec_iter_t iter = nullptr;
  int data_pkt_count = 0;
  while (const aom_codec_cx_pkt_t *pkt =
             aom_codec_get_cx_data(&aom_av1_encoder_ctx_, &iter)) {
    if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
      memcpy(encoded_frame_, pkt->data.frame.buf, pkt->data.frame.sz);
      encoded_frame_size_ = pkt->data.frame.sz;
      int qp = -1;
      SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp);
      LOG_INFO("Encoded frame qp = {}", qp);
      // int consumed_size = 0;
      // int offset = 0;
      // int unit = 0;
      // while (consumed_size < encoded_frame_size_) {
      //   int obu_size = 0;
      //   uint8_t obu_header;
      //   memcpy(&obu_header, encoded_frame_ + offset, sizeof(char));
      //   obu_size = 1;
      //   offset += 1;
      //   // memcpy(reinterpret_cast<char *>(&obu_header), encoded_frame_, 1);
      //   int obu_type = (obu_header & 0b0'1111'000) >> 3;
      //   bool obu_has_ext = obu_header & 0b0'0000'100;
      //   // LOG_ERROR("OBU has ext {}", obu_has_ext);
      //   bool obu_has_size = obu_header & kObuSizePresentBit;
      //   LOG_ERROR("OBU has size {}", obu_has_size);
      //   if (!obu_has_size) {
      //     consumed_size = encoded_frame_size_;
      //     offset = encoded_frame_size_;
      //   } else {
      //     uint64_t size = 0;
      //     if (!ReadLeb128((char *)(encoded_frame_ + offset), &size) ||
      //         size > encoded_frame_size_ - consumed_size) {
      //       LOG_ERROR(
      //           "Malformed AV1 input: declared size {} is larger than "
      //           "remaining buffer size {}",
      //           size, encoded_frame_size_ - consumed_size);
      //       return -1;
      //     }
      //     LOG_ERROR("leb128 get size = {}, offset = {}", size, offset);
      //     consumed_size += size;
      //     offset += size;
      //     obu_size += size;
      //   }
      //   LOG_ERROR("Temporal unit [{}], OBU size [{}], OBU type [{}]", unit++,
      //             obu_size, obu_type);
      // }
      if (on_encoded_image) {
        on_encoded_image((char *)encoded_frame_, encoded_frame_size_);
        if (SAVE_H264_STREAM) {
          fwrite(encoded_frame_, 1, encoded_frame_size_, file_ivf_);
        }
      } else {
        OnEncodedImage((char *)encoded_frame_, encoded_frame_size_);
      }
    }
  }
  return 0;
 }
 int AomAv1Encoder::OnEncodedImage(char *encoded_packets, size_t size) {
  LOG_INFO("OnEncodedImage not implemented");
  return 0;
 }
 void AomAv1Encoder::ForceIdr() { force_i_frame_flags_ = AOM_EFLAG_FORCE_KF; }
 int AomAv1Encoder::Release() {
  if (frame_for_encode_ != nullptr) {
    aom_img_free(frame_for_encode_);
    frame_for_encode_ = nullptr;
  }
  if (inited_) {
    if (aom_codec_destroy(&aom_av1_encoder_ctx_)) {
      return -1;
    }
    inited_ = false;
  }
  return 0;
 }
--- a/src/media/video/encode/aom/aom_av1_encoder.h
+++ b/src/media/video/encode/aom/aom_av1_encoder.h
@@ -0,0 +1,84 @@
 /*
 * @Author: DI JUNKUN
 * @Date: 2024-03-01
 * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
 */
 #ifndef _AOM_AV1_ENCODER_H_
 #define _AOM_AV1_ENCODER_H_
 #include <functional>
 #include <vector>
 #include "aom/aom_codec.h"
 #include "aom/aom_encoder.h"
 #include "aom/aomcx.h"
 #include "video_encoder.h"
 typedef struct {
  uint64_t obu_header_size;
  unsigned obu_type;
  uint64_t
      obu_size;  // leb128(), contains the size in bytes of the OBU not
                 // including the bytes within obu_header or the obu_size syntax
  int extension_flag;
  int has_size_field;
  // extension_flag == 1
  int temporal_id;
  int spatial_id;
 } OBU_t;
 class AomAv1Encoder : public VideoEncoder {
 public:
  AomAv1Encoder();
  virtual ~AomAv1Encoder();
 public:
  int Init();
  int Encode(
      const uint8_t* pData, int nSize,
      std::function<int(char* encoded_packets, size_t size)> on_encoded_image);
  virtual int OnEncodedImage(char* encoded_packets, size_t size);
  void ForceIdr();
 private:
  template <typename P>
  bool SetEncoderControlParameters(int param_id, P param_value);
  int NumberOfThreads(int width, int height, int number_of_cores);
  int GetCpuSpeed(int width, int height);
  int Release();
 private:
  int frame_width_ = 1280;
  int frame_height_ = 720;
  int key_frame_interval_ = 300;
  int target_bitrate_ = 1000;
  int max_bitrate_ = 2500000;
  int max_payload_size_ = 1400;
  int max_frame_rate_ = 30;
  int number_of_cores_ = 4;
  std::vector<std::vector<uint8_t>> encoded_packets_;
  unsigned char* encoded_image_ = nullptr;
  FILE* file_ivf_ = nullptr;
  FILE* file_nv12_ = nullptr;
  unsigned char* nv12_data_ = nullptr;
  unsigned int seq_ = 0;
  // aom av1 encoder
  aom_image_t* frame_for_encode_ = nullptr;
  aom_codec_ctx_t aom_av1_encoder_ctx_;
  aom_codec_enc_cfg_t aom_av1_encoder_config_;
  bool disable_frame_dropping_ = false;
  bool inited_ = false;
  int64_t timestamp_ = 0;
  aom_enc_frame_flags_t force_i_frame_flags_ = 0;
  uint8_t* encoded_frame_ = nullptr;
  int encoded_frame_size_ = 0;
 };
 #endif
--- a/src/media/video/encode/openh264/openh264_encoder.cpp
+++ b/src/media/video/encode/openh264/openh264_encoder.cpp
@@ -49,7 +49,9 @@ static int NV12ToYUV420PFFmpeg(unsigned char *src_buffer, int width, int height,
  return 0;
 }
-OpenH264Encoder::OpenH264Encoder() {
+OpenH264Encoder::OpenH264Encoder() {}
 OpenH264Encoder::~OpenH264Encoder() {
  if (SAVE_NV12_STREAM && file_nv12_) {
    fflush(file_nv12_);
    fclose(file_nv12_);
@@ -62,8 +64,8 @@ OpenH264Encoder::OpenH264Encoder() {
    file_h264_ = nullptr;
  }
  delete encoded_frame_;
  Release();
 }
 OpenH264Encoder::~OpenH264Encoder() { Release(); }
 SEncParamExt OpenH264Encoder::CreateEncoderParams() const {
  SEncParamExt encoder_params;
--- a/src/media/video/encode/video_encoder_factory.cpp
+++ b/src/media/video/encode/video_encoder_factory.cpp
@@ -4,6 +4,7 @@
 #include "ffmpeg/ffmpeg_video_encoder.h"
 #include "openh264/openh264_encoder.h"
 #else
 #include "aom/aom_av1_encoder.h"
 #include "ffmpeg/ffmpeg_video_encoder.h"
 #include "nvcodec/nvidia_video_encoder.h"
 #include "openh264/openh264_encoder.h"
@@ -16,22 +17,26 @@ VideoEncoderFactory::VideoEncoderFactory() {}
 VideoEncoderFactory::~VideoEncoderFactory() {}
 std::unique_ptr<VideoEncoder> VideoEncoderFactory::CreateVideoEncoder(
-    bool hardware_acceleration) {
+    bool hardware_acceleration, bool av1_encoding) {
-#if __APPLE__
+  if (av1_encoding) {
-  // return std::make_unique<FFmpegVideoEncoder>(FFmpegVideoEncoder());
+    return std::make_unique<AomAv1Encoder>(AomAv1Encoder());
  return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
 #else
  if (hardware_acceleration) {
    if (CheckIsHardwareAccerlerationSupported()) {
      return std::make_unique<NvidiaVideoEncoder>(NvidiaVideoEncoder());
    } else {
      return nullptr;
    }
  } else {
 #if __APPLE__
    // return std::make_unique<FFmpegVideoEncoder>(FFmpegVideoEncoder());
    return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
-  }
+#else
    if (hardware_acceleration) {
      if (CheckIsHardwareAccerlerationSupported()) {
        return std::make_unique<NvidiaVideoEncoder>(NvidiaVideoEncoder());
      } else {
        return nullptr;
      }
    } else {
      // return std::make_unique<FFmpegVideoEncoder>(FFmpegVideoEncoder());
      return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
    }
 #endif
  }
 }
 bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
--- a/src/media/video/encode/video_encoder_factory.h
+++ b/src/media/video/encode/video_encoder_factory.h
@@ -10,7 +10,7 @@ class VideoEncoderFactory {
  ~VideoEncoderFactory();
  static std::unique_ptr<VideoEncoder> CreateVideoEncoder(
-      bool hardware_acceleration);
+      bool hardware_acceleration, bool av1_encoding);
  static bool CheckIsHardwareAccerlerationSupported();
 };
--- a/src/pc/peer_connection.cpp
+++ b/src/pc/peer_connection.cpp
@@ -38,6 +38,7 @@ int PeerConnection::Init(PeerConnectionParams params,
  cfg_turn_server_password_ = reader.Get("turn server", "password", "");
  cfg_hardware_acceleration_ =
      reader.Get("hardware acceleration", "turn_on", "false");
  cfg_av1_encoding_ = reader.Get("av1 encoding", "turn_on", "false");
  std::regex regex("\n");
@@ -62,6 +63,9 @@ int PeerConnection::Init(PeerConnectionParams params,
  LOG_INFO("Hardware accelerated codec [{}]",
           hardware_acceleration_ ? "ON" : "OFF");
  av1_encoding_ = cfg_av1_encoding_ == "true" ? true : false;
  LOG_INFO("av1 encoding [{}]", hardware_acceleration_ ? "ON" : "OFF");
  on_receive_video_buffer_ = params.on_receive_video_buffer;
  on_receive_audio_buffer_ = params.on_receive_audio_buffer;
  on_receive_data_buffer_ = params.on_receive_data_buffer;
@@ -95,7 +99,7 @@ int PeerConnection::Init(PeerConnectionParams params,
        (uint8_t *)data, size,
        [this, user_id, user_id_size](VideoFrame video_frame) {
          if (on_receive_video_buffer_) {
-            // LOG_ERROR("Receive video, size {}", video_frame.Size());
+            LOG_ERROR("Receive video, size {}", video_frame.Size());
            on_receive_video_buffer_((const char *)video_frame.Buffer(),
                                     video_frame.Size(), user_id, user_id_size);
          }
@@ -177,44 +181,54 @@ int PeerConnection::CreateVideoCodec(bool hardware_acceleration) {
 #else
 #endif
-  video_encoder_ =
+  if (av1_encoding_) {
-      VideoEncoderFactory::CreateVideoEncoder(hardware_acceleration_);
+    video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, true);
-  if (hardware_acceleration_ && !video_encoder_) {
+    LOG_INFO("Only support software encoding for AV1");
-    LOG_WARN(
+  } else {
-        "Hardware accelerated encoder not available, use default software "
+    video_encoder_ =
-        "encoder");
+        VideoEncoderFactory::CreateVideoEncoder(hardware_acceleration_, false);
-    video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false);
+    if (hardware_acceleration_ && !video_encoder_) {
-    if (!video_encoder_) {
+      LOG_WARN(
-      LOG_ERROR(
+          "Hardware accelerated encoder not available, use default software "
-          "Hardware accelerated encoder and software encoder both not "
+          "encoder");
-          "available");
+      video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
-      return -1;
+      if (!video_encoder_) {
        LOG_ERROR(
            "Hardware accelerated encoder and software encoder both not "
            "available");
        return -1;
      }
    }
  }
  if (0 != video_encoder_->Init()) {
-    video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false);
+    video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
    if (!video_encoder_ || 0 != video_encoder_->Init()) {
      LOG_ERROR("Encoder init failed");
      return -1;
    }
  }
-  video_decoder_ =
+  if (av1_encoding_) {
-      VideoDecoderFactory::CreateVideoDecoder(hardware_acceleration_);
+    video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, true);
-  if (hardware_acceleration_ && !video_decoder_) {
+    LOG_INFO("Only support software decoding for AV1");
-    LOG_WARN(
+  } else {
-        "Hardware accelerated decoder not available, use default software "
+    video_decoder_ =
-        "decoder");
+        VideoDecoderFactory::CreateVideoDecoder(hardware_acceleration_, false);
-    video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false);
+    if (hardware_acceleration_ && !video_decoder_) {
-    if (!video_decoder_) {
+      LOG_WARN(
-      LOG_ERROR(
+          "Hardware accelerated decoder not available, use default software "
-          "Hardware accelerated decoder and software decoder both not "
+          "decoder");
-          "available");
+      video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
-      return -1;
+      if (!video_decoder_) {
        LOG_ERROR(
            "Hardware accelerated decoder and software decoder both not "
            "available");
        return -1;
      }
    }
  }
  if (0 != video_decoder_->Init()) {
-    video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false);
+    video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
    if (!video_decoder_ || video_decoder_->Init()) {
      LOG_ERROR("Decoder init failed");
      return -1;
@@ -376,7 +390,8 @@ void PeerConnection::ProcessSignal(const std::string &signal) {
          ice_transmission_list_[remote_user_id]->InitIceTransmission(
              cfg_stun_server_ip_, stun_server_port_, cfg_turn_server_ip_,
              turn_server_port_, cfg_turn_server_username_,
-              cfg_turn_server_password_);
+              cfg_turn_server_password_,
              av1_encoding_ ? RtpPacket::AV1 : RtpPacket::H264);
          ice_transmission_list_[remote_user_id]->JoinTransmission();
        }
@@ -430,7 +445,8 @@ void PeerConnection::ProcessSignal(const std::string &signal) {
        ice_transmission_list_[remote_user_id]->InitIceTransmission(
            cfg_stun_server_ip_, stun_server_port_, cfg_turn_server_ip_,
            turn_server_port_, cfg_turn_server_username_,
-            cfg_turn_server_password_);
+            cfg_turn_server_password_,
            av1_encoding_ ? RtpPacket::AV1 : RtpPacket::H264);
        ice_transmission_list_[remote_user_id]->SetTransmissionId(
            transmission_id_);
--- a/src/pc/peer_connection.h
+++ b/src/pc/peer_connection.h
@@ -76,10 +76,12 @@ class PeerConnection {
  std::string cfg_turn_server_username_;
  std::string cfg_turn_server_password_;
  std::string cfg_hardware_acceleration_;
  std::string cfg_av1_encoding_;
  int signal_server_port_ = 0;
  int stun_server_port_ = 0;
  int turn_server_port_ = 0;
  bool hardware_acceleration_ = false;
  bool av1_encoding_ = false;
 private:
  std::shared_ptr<WsTransmission> ws_transport_ = nullptr;
--- a/src/rtp/rtp_codec.cpp
+++ b/src/rtp/rtp_codec.cpp
@@ -9,6 +9,16 @@
 #define FU_A 28
 #define FU_B 29
 constexpr int kObuTypeSequenceHeader = 1;
 constexpr int kObuTypeTemporalDelimiter = 2;
 constexpr int kObuTypeFrameHeader = 3;
 constexpr int kObuTypeTileGroup = 4;
 constexpr int kObuTypeMetadata = 5;
 constexpr int kObuTypeFrame = 6;
 constexpr int kObuTypeRedundantFrameHeader = 7;
 constexpr int kObuTypeTileList = 8;
 constexpr int kObuTypePadding = 15;
 RtpCodec ::RtpCodec(RtpPacket::PAYLOAD_TYPE payload_type)
    : version_(RTP_VERSION),
      has_padding_(false),
@@ -216,6 +226,84 @@ void RtpCodec::Encode(uint8_t* buffer, size_t size,
        packets.emplace_back(rtp_packet);
      }
    }
  } else if (RtpPacket::PAYLOAD_TYPE::AV1 == payload_type_) {
    if (size <= MAX_NALU_LEN) {
      RtpPacket rtp_packet;
      rtp_packet.SetVerion(version_);
      rtp_packet.SetHasPadding(has_padding_);
      rtp_packet.SetHasExtension(has_extension_);
      rtp_packet.SetMarker(1);
      rtp_packet.SetPayloadType(RtpPacket::PAYLOAD_TYPE(payload_type_));
      rtp_packet.SetSequenceNumber(sequence_number_++);
      timestamp_ =
          std::chrono::high_resolution_clock::now().time_since_epoch().count();
      rtp_packet.SetTimestamp(timestamp_);
      rtp_packet.SetSsrc(ssrc_);
      if (!csrcs_.empty()) {
        rtp_packet.SetCsrcs(csrcs_);
      }
      if (has_extension_) {
        rtp_packet.SetExtensionProfile(extension_profile_);
        rtp_packet.SetExtensionData(extension_data_, extension_len_);
      }
      // int obu_header;
      // memcpy(&obu_header, buffer, sizeof(char));
      // int obu_type = (obu_header & 0b0'1111'000) >> 3;
      // LOG_ERROR("OBU type {}", obu_type);
      // if (obu_type == kObuTypeTemporalDelimiter ||
      //     obu_type == kObuTypeTileList || obu_type == kObuTypePadding) {
      //   LOG_ERROR("Unsupported OBU type", obu_type);
      // }
      RtpPacket::AV1_AGGR_HEADER av1_aggr_header;
      av1_aggr_header.z = av1_aggr_header.z;
      av1_aggr_header.y = av1_aggr_header.y;
      av1_aggr_header.w = av1_aggr_header.w;
      av1_aggr_header.n = av1_aggr_header.n;
      rtp_packet.SetAv1AggrHeader(av1_aggr_header);
      rtp_packet.EncodeAv1(buffer, size);
      packets.emplace_back(rtp_packet);
    } else {
      size_t last_packet_size = size % MAX_NALU_LEN;
      size_t packet_num = size / MAX_NALU_LEN + (last_packet_size ? 1 : 0);
      timestamp_ =
          std::chrono::high_resolution_clock::now().time_since_epoch().count();
      for (size_t index = 0; index < packet_num; index++) {
        RtpPacket rtp_packet;
        rtp_packet.SetVerion(version_);
        rtp_packet.SetHasPadding(has_padding_);
        rtp_packet.SetHasExtension(has_extension_);
        rtp_packet.SetMarker(index == packet_num - 1 ? 1 : 0);
        rtp_packet.SetPayloadType(RtpPacket::PAYLOAD_TYPE(payload_type_));
        rtp_packet.SetSequenceNumber(sequence_number_++);
        rtp_packet.SetTimestamp(timestamp_);
        rtp_packet.SetSsrc(ssrc_);
        if (!csrcs_.empty()) {
          rtp_packet.SetCsrcs(csrcs_);
        }
        if (has_extension_) {
          rtp_packet.SetExtensionProfile(extension_profile_);
          rtp_packet.SetExtensionData(extension_data_, extension_len_);
        }
        if (index == packet_num - 1 && last_packet_size > 0) {
          rtp_packet.EncodeAv1(buffer + index * MAX_NALU_LEN, last_packet_size);
        } else {
          rtp_packet.EncodeAv1(buffer + index * MAX_NALU_LEN, MAX_NALU_LEN);
        }
        packets.emplace_back(rtp_packet);
      }
    }
  } else if (RtpPacket::PAYLOAD_TYPE::OPUS == payload_type_) {
    RtpPacket rtp_packet;
    rtp_packet.SetVerion(version_);
--- a/src/rtp/rtp_packet.cpp
+++ b/src/rtp/rtp_packet.cpp
@@ -17,6 +17,8 @@ void RtpPacket::TryToDecodeRtpPacket() {
    DecodeH264FecSource();
  } else if (PAYLOAD_TYPE::H264_FEC_REPAIR == PAYLOAD_TYPE(buffer_[1] & 0x7F)) {
    DecodeH264FecRepair();
  } else if (PAYLOAD_TYPE::AV1 == PAYLOAD_TYPE(buffer_[1] & 0x7F)) {
    DecodeAv1();
  } else if (PAYLOAD_TYPE::OPUS == PAYLOAD_TYPE(buffer_[1] & 0x7F)) {
    DecodeOpus();
  } else if (PAYLOAD_TYPE::DATA == PAYLOAD_TYPE(buffer_[1] & 0x7F)) {
@@ -389,6 +391,55 @@ const uint8_t *RtpPacket::EncodeH264FecRepair(
  return buffer_;
 }
 const uint8_t *RtpPacket::EncodeAv1(uint8_t *payload, size_t payload_size) {
  buffer_[0] = (version_ << 6) | (has_padding_ << 5) | (has_extension_ << 4) |
               total_csrc_number_;
  buffer_[1] = (marker_ << 7) | payload_type_;
  buffer_[2] = (sequence_number_ >> 8) & 0xFF;
  buffer_[3] = sequence_number_ & 0xFF;
  buffer_[4] = (timestamp_ >> 24) & 0xFF;
  buffer_[5] = (timestamp_ >> 16) & 0xFF;
  buffer_[6] = (timestamp_ >> 8) & 0xFF;
  buffer_[7] = timestamp_ & 0xFF;
  buffer_[8] = (ssrc_ >> 24) & 0xFF;
  buffer_[9] = (ssrc_ >> 16) & 0xFF;
  buffer_[10] = (ssrc_ >> 8) & 0xFF;
  buffer_[11] = ssrc_ & 0xFF;
  for (uint32_t index = 0; index < total_csrc_number_ && !csrcs_.empty();
       index++) {
    buffer_[12 + index] = (csrcs_[index] >> 24) & 0xFF;
    buffer_[13 + index] = (csrcs_[index] >> 16) & 0xFF;
    buffer_[14 + index] = (csrcs_[index] >> 8) & 0xFF;
    buffer_[15 + index] = csrcs_[index] & 0xFF;
  }
  uint32_t extension_offset =
      total_csrc_number_ && !csrcs_.empty() ? total_csrc_number_ * 4 : 0;
  if (has_extension_ && extension_data_) {
    buffer_[12 + extension_offset] = extension_profile_ >> 8;
    buffer_[13 + extension_offset] = extension_profile_ & 0xff;
    buffer_[14 + extension_offset] = (extension_len_ >> 8) & 0xFF;
    buffer_[15 + extension_offset] = extension_len_ & 0xFF;
    memcpy(buffer_ + 16 + extension_offset, extension_data_, extension_len_);
  }
  uint32_t payload_offset =
      (has_extension_ && extension_data_ ? extension_len_ : 0) +
      extension_offset;
  buffer_[12 + payload_offset] = fu_indicator_.forbidden_bit << 7 |
                                 fu_indicator_.nal_reference_idc << 6 |
                                 fu_indicator_.nal_unit_type;
  memcpy(buffer_ + 13 + payload_offset, payload, payload_size);
  size_ = payload_size + (13 + payload_offset);
  return buffer_;
 }
 // ----------------------------------------------------------------------------
 size_t RtpPacket::DecodeOpus(uint8_t *payload) {
  version_ = (buffer_[0] >> 6) & 0x03;
  has_padding_ = (buffer_[0] >> 5) & 0x01;
@@ -672,4 +723,47 @@ size_t RtpPacket::DecodeH264FecRepair(uint8_t *payload) {
    memcpy(payload, payload_, payload_size_);
  }
  return payload_size_;
 }
 size_t RtpPacket::DecodeAv1(uint8_t *payload) {
  version_ = (buffer_[0] >> 6) & 0x03;
  has_padding_ = (buffer_[0] >> 5) & 0x01;
  has_extension_ = (buffer_[0] >> 4) & 0x01;
  total_csrc_number_ = buffer_[0] & 0x0f;
  marker_ = (buffer_[1] >> 7) & 0x01;
  payload_type_ = buffer_[1] & 0x7f;
  sequence_number_ = (buffer_[2] << 8) | buffer_[3];
  timestamp_ =
      (buffer_[4] << 24) | (buffer_[5] << 16) | (buffer_[6] << 8) | buffer_[7];
  ssrc_ = (buffer_[8] << 24) | (buffer_[9] << 16) | (buffer_[10] << 8) |
          buffer_[11];
  for (uint32_t index = 0; index < total_csrc_number_; index++) {
    uint32_t csrc = (buffer_[12 + index] << 24) | (buffer_[13 + index] << 16) |
                    (buffer_[14 + index] << 8) | buffer_[15 + index];
    csrcs_.push_back(csrc);
  }
  uint32_t extension_offset = total_csrc_number_ * 4;
  if (has_extension_) {
    extension_profile_ =
        (buffer_[12 + extension_offset] << 8) | buffer_[13 + extension_offset];
    extension_len_ =
        (buffer_[14 + extension_offset] << 8) | buffer_[15 + extension_offset];
    // extension_data_ = new uint8_t[extension_len_];
    // memcpy(extension_data_, buffer_ + 16 + extension_offset,
    // extension_len_);
    extension_data_ = buffer_ + 16 + extension_offset;
  }
  uint32_t payload_offset =
      (has_extension_ ? extension_len_ : 0) + extension_offset;
  payload_size_ = size_ - (13 + payload_offset);
  payload_ = buffer_ + 13 + payload_offset;
  if (payload) {
    memcpy(payload, payload_, payload_size_);
  }
  return payload_size_;
 }
--- a/src/rtp/rtp_packet.h
+++ b/src/rtp/rtp_packet.h
@@ -115,6 +115,44 @@
 // |               padding         | Padding size  |
 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // AV1
 //  0                   1                   2                   3
 //  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // |V=2|P|X|  CC   |M|     PT      |       sequence number         |
 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // |                           timestamp                           |
 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // |           synchronization source (SSRC) identifier            |
 // +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
 // |            contributing source (CSRC) identifiers             |x
 // |                             ....                              |x
 // +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
 // |         0x100         |  0x0  |       extensions length       |x
 // +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
 // |      ID       |  hdr_length   |                               |x
 // +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+                               |x
 // |                                                               |x
 // |          dependency descriptor (hdr_length #bytes)            |x
 // |                                                               |x
 // |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // |                               | Other rtp header extensions...|x
 // +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
 // | AV1 aggr hdr  |                                               |
 // +-+-+-+-+-+-+-+-+                                               |
 // |                                                               |
 // |                   Bytes 2..N of AV1 payload                   |
 // |                                                               |
 // |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // |                               :    OPTIONAL RTP padding       |
 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 // | AV1 aggr hdr  |
 //  0 1 2 3 4 5 6 7
 // +-+-+-+-+-+-+-+-+
 // |Z|Y| W |N|-|-|-|
 // +-+-+-+-+-+-+-+-+
 #define DEFAULT_MTU 1500
 #define MAX_NALU_LEN 1400
@@ -124,6 +162,7 @@ class RtpPacket {
    H264 = 96,
    H264_FEC_SOURCE = 97,
    H264_FEC_REPAIR = 98,
    AV1 = 99,
    OPUS = 111,
    DATA = 127
  } PAYLOAD_TYPE;
@@ -179,6 +218,20 @@ class RtpPacket {
    uint8_t nal_unit_type : 5;
  } FU_HEADER;
  typedef struct {
    uint8_t z : 1;
    uint8_t y : 1;
    uint8_t w : 2;
    uint8_t n : 1;
  } AV1_AGGR_HEADER;
  // typedef struct Obu {
  //   uint8_t header;
  //   uint8_t extension_header;  // undefined if (header & kXbit) == 0
  //   rtc::ArrayView<const uint8_t> payload;
  //   int size;  // size of the header and payload combined.
  // } OBU;
  void SetFuIndicator(FU_INDICATOR fu_indicator) {
    fu_indicator_.forbidden_bit = fu_indicator.forbidden_bit;
    fu_indicator_.nal_reference_idc = fu_indicator.nal_reference_idc;
@@ -192,6 +245,13 @@ class RtpPacket {
    fu_header_.nal_unit_type = fu_header.nal_unit_type;
  }
  void SetAv1AggrHeader(AV1_AGGR_HEADER av1_aggr_header) {
    av1_aggr_header_.z = av1_aggr_header.z;
    av1_aggr_header_.y = av1_aggr_header.y;
    av1_aggr_header_.w = av1_aggr_header.w;
    av1_aggr_header_.n = av1_aggr_header.n;
  }
  void SetFecSymbolId(uint8_t fec_symbol_id) { fec_symbol_id_ = fec_symbol_id; }
 public:
@@ -204,11 +264,14 @@ class RtpPacket {
  const uint8_t *EncodeH264FecRepair(uint8_t *payload, size_t payload_size,
                                     unsigned int fec_symbol_id,
                                     unsigned int fec_source_symbol_num);
  const uint8_t *EncodeAv1(uint8_t *payload, size_t payload_size);
  size_t DecodeData(uint8_t *payload = nullptr);
  size_t DecodeH264Nalu(uint8_t *payload = nullptr);
  size_t DecodeH264Fua(uint8_t *payload = nullptr);
  size_t DecodeH264FecSource(uint8_t *payload = nullptr);
  size_t DecodeH264FecRepair(uint8_t *payload = nullptr);
  size_t DecodeAv1(uint8_t *payload = nullptr);
  size_t DecodeOpus(uint8_t *payload = nullptr);
 public:
@@ -314,6 +377,7 @@ class RtpPacket {
  FU_HEADER fu_header_;
  uint8_t fec_symbol_id_ = 0;
  uint8_t fec_source_symbol_num_ = 0;
  AV1_AGGR_HEADER av1_aggr_header_;
  // Payload
  uint8_t *payload_ = nullptr;
--- a/src/transmission/ice_transmission.cpp
+++ b/src/transmission/ice_transmission.cpp
@@ -42,11 +42,11 @@ IceTransmission::~IceTransmission() {
  }
 }
-int IceTransmission::InitIceTransmission(std::string &stun_ip, int stun_port,
+int IceTransmission::InitIceTransmission(
-                                         std::string &turn_ip, int turn_port,
+    std::string &stun_ip, int stun_port, std::string &turn_ip, int turn_port,
-                                         std::string &turn_username,
+    std::string &turn_username, std::string &turn_password,
-                                         std::string &turn_password) {
+    RtpPacket::PAYLOAD_TYPE video_codec_payload_type) {
-  video_rtp_codec_ = std::make_unique<RtpCodec>(RtpPacket::PAYLOAD_TYPE::H264);
+  video_rtp_codec_ = std::make_unique<RtpCodec>(video_codec_payload_type);
  audio_rtp_codec_ = std::make_unique<RtpCodec>(RtpPacket::PAYLOAD_TYPE::OPUS);
  data_rtp_codec_ = std::make_unique<RtpCodec>(RtpPacket::PAYLOAD_TYPE::DATA);
--- a/src/transmission/ice_transmission.h
+++ b/src/transmission/ice_transmission.h
@@ -37,7 +37,8 @@ class IceTransmission {
  int InitIceTransmission(std::string &stun_ip, int stun_port,
                          std::string &turn_ip, int turn_port,
                          std::string &turn_username,
-                          std::string &turn_password);
+                          std::string &turn_password,
                          RtpPacket::PAYLOAD_TYPE video_codec_payload_type);
  int DestroyIceTransmission();
--- a/xmake.lua
+++ b/xmake.lua
@@ -11,7 +11,7 @@ add_defines("ASIO_STANDALONE", "ASIO_HAS_STD_TYPE_TRAITS", "ASIO_HAS_STD_SHARED_
    "ASIO_HAS_STD_ADDRESSOF", "ASIO_HAS_STD_ATOMIC", "ASIO_HAS_STD_CHRONO", 
    "ASIO_HAS_CSTDINT", "ASIO_HAS_STD_ARRAY",  "ASIO_HAS_STD_SYSTEM_ERROR")
-add_requires("asio 1.24.0", "nlohmann_json", "spdlog 1.11.0", "openfec", "libopus 1.4")
+add_requires("asio 1.24.0", "nlohmann_json", "spdlog 1.11.0", "openfec", "libopus 1.4", "dav1d 1.1.0")
 add_packages("asio", "nlohmann_json", "spdlog", "openfec", "libopus")
 includes("thirdparty")
@@ -19,8 +19,9 @@ includes("thirdparty")
 if is_os("windows") then
    add_requires("vcpkg::ffmpeg 5.1.2", {configs = {shared = false}})
    add_requires("vcpkg::libnice 0.1.21")
    add_requires("vcpkg::aom")
    add_requires("openh264 2.1.1", {configs = {shared = false}})
-    add_packages("vcpkg::ffmpeg", "vcpkg::libnice", "openh264", "cuda")
+    add_packages("vcpkg::ffmpeg", "vcpkg::libnice", "vcpkg::aom", "openh264", "dav1d", "cuda")
    add_defines("_WEBSOCKETPP_CPP11_INTERNAL_")
    add_requires("cuda")
 elseif is_os("linux") then
@@ -119,7 +120,9 @@ target("media")
        "src/media/video/encode/ffmpeg/*.cpp",
        "src/media/video/decode/ffmpeg/*.cpp",
        "src/media/video/encode/openh264/*.cpp",
-        "src/media/video/decode/openh264/*.cpp")
+        "src/media/video/decode/openh264/*.cpp",
        "src/media/video/encode/aom/*.cpp",
        "src/media/video/decode/dav1d/*.cpp")
        add_includedirs("src/media/video/encode",
        "src/media/video/decode",
        "src/media/video/encode/nvcodec",
@@ -128,6 +131,8 @@ target("media")
        "src/media/video/decode/ffmpeg",
        "src/media/video/encode/openh264",
        "src/media/video/decode/openh264",
        "src/media/video/encode/aom",
        "src/media/video/decode/dav1d",
        "thirdparty/nvcodec/Interface",
        "thirdparty/nvcodec/Samples", {public = true})
    elseif is_os(("linux")) then