Implementation for av1 codec

2025-12-17 20:47:01 +08:00 · 2024-04-19 17:35:19 +08:00
parent a309627ca3
commit f5586a7922
17 changed files with 1037 additions and 67 deletions
--- a/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp
+++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp
@@ -0,0 +1,128 @@
+#include "dav1d_av1_decoder.h"
+
+#include "log.h"
+
+#define SAVE_DECODER_STREAM 0
+
+extern "C" {
+#include <libavformat/avformat.h>
+#include <libavutil/imgutils.h>
+#include <libswscale/swscale.h>
+};
+
+class ScopedDav1dPicture : public std::shared_ptr<ScopedDav1dPicture> {
+ public:
+  ~ScopedDav1dPicture() { dav1d_picture_unref(&picture_); }
+
+  Dav1dPicture &Picture() { return picture_; }
+
+ private:
+  Dav1dPicture picture_ = {};
+};
+
+class ScopedDav1dData {
+ public:
+  ~ScopedDav1dData() { dav1d_data_unref(&data_); }
+
+  Dav1dData &Data() { return data_; }
+
+ private:
+  Dav1dData data_ = {};
+};
+
+// Calling `dav1d_data_wrap` requires a `free_callback` to be registered.
+void NullFreeCallback(const uint8_t *buffer, void *opaque) {}
+
+Dav1dAv1Decoder::Dav1dAv1Decoder() {}
+
+Dav1dAv1Decoder::~Dav1dAv1Decoder() {
+  if (SAVE_DECODER_STREAM && file_) {
+    fflush(file_);
+    fclose(file_);
+    file_ = nullptr;
+  }
+
+  if (decoded_frame_) {
+    delete decoded_frame_;
+    decoded_frame_ = nullptr;
+  }
+}
+
+int Dav1dAv1Decoder::Init() {
+  Dav1dSettings s;
+  dav1d_default_settings(&s);
+
+  s.n_threads = std::max(2, 4);
+  s.max_frame_delay = 1;   // For low latency decoding.
+  s.all_layers = 0;        // Don't output a frame for every spatial layer.
+  s.operating_point = 31;  // Decode all operating points.
+
+  int ret = dav1d_open(&context_, &s);
+  if (ret) {
+    LOG_ERROR("Dav1d AV1 decoder open failed");
+  }
+
+  decoded_frame_ = new VideoFrame(1280 * 720 * 3 / 2);
+
+  if (SAVE_DECODER_STREAM) {
+    file_ = fopen("decode_stream.yuv", "w+b");
+    if (!file_) {
+      LOG_WARN("Fail to open stream.yuv");
+    }
+  }
+  return 0;
+}
+
+int Dav1dAv1Decoder::Decode(
+    const uint8_t *data, int size,
+    std::function<void(VideoFrame)> on_receive_decoded_frame) {
+  if (!first_) {
+    if ((*(data + 4) & 0x1f) != 0x07) {
+      return -1;
+    } else {
+      first_ = true;
+    }
+  }
+
+  ScopedDav1dData scoped_dav1d_data;
+  Dav1dData &dav1d_data = scoped_dav1d_data.Data();
+  dav1d_data_wrap(&dav1d_data, data, size,
+                  /*free_callback=*/&NullFreeCallback,
+                  /*user_data=*/nullptr);
+
+  if (int decode_res = dav1d_send_data(context_, &dav1d_data)) {
+    LOG_ERROR("Dav1dAv1Decoder::Decode decoding failed with error code {}",
+              decode_res);
+
+    return -1;
+  }
+
+  std::shared_ptr<ScopedDav1dPicture> scoped_dav1d_picture(
+      new ScopedDav1dPicture{});
+  Dav1dPicture &dav1d_picture = scoped_dav1d_picture->Picture();
+  if (int get_picture_res = dav1d_get_picture(context_, &dav1d_picture)) {
+    LOG_ERROR("Dav1dDecoder::Decode getting picture failed with error code {}",
+              get_picture_res);
+    return -1;
+  }
+
+  if (dav1d_picture.p.bpc != 8) {
+    // Only accept 8 bit depth.
+    LOG_ERROR("Dav1dDecoder::Decode unhandled bit depth: {}",
+              dav1d_picture.p.bpc);
+    return -1;
+  }
+
+  memcpy(decoded_frame_->GetBuffer(), dav1d_picture.data[0],
+         dav1d_picture.p.w * dav1d_picture.p.h);
+  memcpy(decoded_frame_->GetBuffer() + dav1d_picture.p.w * dav1d_picture.p.h,
+         dav1d_picture.data[1], dav1d_picture.p.w * dav1d_picture.p.h / 2);
+  LOG_INFO("dav1d decode size {}", decoded_frame_->Size());
+  on_receive_decoded_frame(*decoded_frame_);
+  if (SAVE_DECODER_STREAM) {
+    fwrite((unsigned char *)decoded_frame_->Buffer(), 1, decoded_frame_->Size(),
+           file_);
+  }
+
+  return 0;
+}
--- a/src/media/video/decode/dav1d/dav1d_av1_decoder.h
+++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.h
@@ -0,0 +1,58 @@
+/*
+ * @Author: DI JUNKUN
+ * @Date: 2024-03-04
+ * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
+ */
+
+#ifndef _DAV1D_AV1_DECODER_H_
+#define _DAV1D_AV1_DECODER_H_
+
+#include "dav1d/dav1d.h"
+
+#ifdef _WIN32
+extern "C" {
+#include "libavcodec/avcodec.h"
+};
+#else
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <libavcodec/avcodec.h>
+#ifdef __cplusplus
+};
+#endif
+#endif
+
+#include <functional>
+
+#include "video_decoder.h"
+
+class Dav1dAv1Decoder : public VideoDecoder {
+ public:
+  Dav1dAv1Decoder();
+  virtual ~Dav1dAv1Decoder();
+
+ public:
+  int Init();
+  int Decode(const uint8_t *data, int size,
+             std::function<void(VideoFrame)> on_receive_decoded_frame);
+
+ private:
+  AVCodecID codec_id_;
+  const AVCodec *codec_;
+  AVCodecContext *codec_ctx_ = nullptr;
+  AVPacket *packet_ = nullptr;
+  AVFrame *frame_ = nullptr;
+  AVFrame *frame_nv12_ = nullptr;
+  struct SwsContext *img_convert_ctx = nullptr;
+
+  VideoFrame *decoded_frame_ = nullptr;
+
+  FILE *file_ = nullptr;
+  bool first_ = false;
+
+  // dav1d
+  Dav1dContext *context_ = nullptr;
+};
+
+#endif
--- a/src/media/video/decode/video_decoder_factory.cpp
+++ b/src/media/video/decode/video_decoder_factory.cpp
@@ -4,6 +4,7 @@
 #include "ffmpeg/ffmpeg_video_decoder.h"
 #include "openh264/openh264_decoder.h"
 #else
+#include "dav1d/dav1d_av1_decoder.h"
 #include "ffmpeg/ffmpeg_video_decoder.h"
 #include "nvcodec/nvidia_video_decoder.h"
 #include "openh264/openh264_decoder.h"
@@ -16,22 +17,26 @@ VideoDecoderFactory::VideoDecoderFactory() {}
 VideoDecoderFactory::~VideoDecoderFactory() {}

 std::unique_ptr<VideoDecoder> VideoDecoderFactory::CreateVideoDecoder(
-    bool hardware_acceleration) {
-#if __APPLE__
-  return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
-  // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
-#else
-  if (hardware_acceleration) {
-    if (CheckIsHardwareAccerlerationSupported()) {
-      return std::make_unique<NvidiaVideoDecoder>(NvidiaVideoDecoder());
-    } else {
-      return nullptr;
-    }
+    bool hardware_acceleration, bool av1_encoding) {
+  if (av1_encoding) {
+    return std::make_unique<Dav1dAv1Decoder>(Dav1dAv1Decoder());
  } else {
-    // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
+#if __APPLE__
    return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
-  }
+    // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
+#else
+    if (hardware_acceleration) {
+      if (CheckIsHardwareAccerlerationSupported()) {
+        return std::make_unique<NvidiaVideoDecoder>(NvidiaVideoDecoder());
+      } else {
+        return nullptr;
+      }
+    } else {
+      // return std::make_unique<FfmpegVideoDecoder>(FfmpegVideoDecoder());
+      return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
+    }
 #endif
+  }
 }

 bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() {
--- a/src/media/video/decode/video_decoder_factory.h
+++ b/src/media/video/decode/video_decoder_factory.h
@@ -10,7 +10,7 @@ class VideoDecoderFactory {
  ~VideoDecoderFactory();

  static std::unique_ptr<VideoDecoder> CreateVideoDecoder(
-      bool hardware_acceleration);
+      bool hardware_acceleration, bool av1_encoding);

  static bool CheckIsHardwareAccerlerationSupported();
 };
--- a/src/media/video/encode/aom/aom_av1_encoder.cpp
+++ b/src/media/video/encode/aom/aom_av1_encoder.cpp
@@ -0,0 +1,418 @@
+#include "aom_av1_encoder.h"
+
+#include <chrono>
+#include <cmath>
+
+#include "log.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavdevice/avdevice.h>
+#include <libavfilter/avfilter.h>
+#include <libavformat/avformat.h>
+#include <libavutil/imgutils.h>
+#include <libswscale/swscale.h>
+};
+#ifdef __cplusplus
+};
+#endif
+
+#define SAVE_NV12_STREAM 0
+#define SAVE_H264_STREAM 1
+
+#define YUV420P_BUFFER_SIZE 1280 * 720 * 3 / 2
+static unsigned char yuv420p_buffer[YUV420P_BUFFER_SIZE];
+
+#define SET_ENCODER_PARAM_OR_RETURN_ERROR(param_id, param_value) \
+  do {                                                           \
+    if (!SetEncoderControlParameters(param_id, param_value)) {   \
+      return -1;                                                 \
+    }                                                            \
+  } while (0)
+
+constexpr int kQpMin = 10;
+constexpr int kQpMax = 40;
+constexpr int kUsageProfile = AOM_USAGE_REALTIME;
+constexpr int kMinQindex = 145;  // Min qindex threshold for QP scaling.
+constexpr int kMaxQindex = 205;  // Max qindex threshold for QP scaling.
+constexpr int kBitDepth = 8;
+constexpr int kLagInFrames = 0;  // No look ahead.
+constexpr int kRtpTicksPerSecond = 90000;
+constexpr double kMinimumFrameRate = 1.0;
+
+constexpr uint8_t kObuSizePresentBit = 0b0'0000'010;
+
+static int NV12ToYUV420PFFmpeg(unsigned char *src_buffer, int width, int height,
+                               unsigned char *dst_buffer) {
+  AVFrame *Input_pFrame = av_frame_alloc();
+  AVFrame *Output_pFrame = av_frame_alloc();
+  struct SwsContext *img_convert_ctx = sws_getContext(
+      width, height, AV_PIX_FMT_NV12, 1280, 720, AV_PIX_FMT_YUV420P,
+      SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
+
+  av_image_fill_arrays(Input_pFrame->data, Input_pFrame->linesize, src_buffer,
+                       AV_PIX_FMT_NV12, width, height, 1);
+  av_image_fill_arrays(Output_pFrame->data, Output_pFrame->linesize, dst_buffer,
+                       AV_PIX_FMT_YUV420P, 1280, 720, 1);
+
+  sws_scale(img_convert_ctx, (uint8_t const **)Input_pFrame->data,
+            Input_pFrame->linesize, 0, height, Output_pFrame->data,
+            Output_pFrame->linesize);
+
+  if (Input_pFrame) av_free(Input_pFrame);
+  if (Output_pFrame) av_free(Output_pFrame);
+  if (img_convert_ctx) sws_freeContext(img_convert_ctx);
+
+  return 0;
+}
+
+static aom_superblock_size_t GetSuperblockSize(int width, int height,
+                                               int threads) {
+  int resolution = width * height;
+  if (threads >= 4 && resolution >= 960 * 540 && resolution < 1920 * 1080)
+    return AOM_SUPERBLOCK_SIZE_64X64;
+  else
+    return AOM_SUPERBLOCK_SIZE_DYNAMIC;
+}
+
+template <typename P>
+bool AomAv1Encoder::SetEncoderControlParameters(int param_id, P param_value) {
+  aom_codec_err_t error_code =
+      aom_codec_control(&aom_av1_encoder_ctx_, param_id, param_value);
+  if (error_code != AOM_CODEC_OK) {
+    LOG_ERROR(
+        "AomAv1Encoder::SetEncoderControlParameters returned {} on id: {}",
+        error_code, param_id);
+  }
+  return error_code == AOM_CODEC_OK;
+}
+
+int AomAv1Encoder::NumberOfThreads(int width, int height, int number_of_cores) {
+  // Keep the number of encoder threads equal to the possible number of
+  // column/row tiles, which is (1, 2, 4, 8). See comments below for
+  // AV1E_SET_TILE_COLUMNS/ROWS.
+  if (width * height > 1280 * 720 && number_of_cores > 8) {
+    return 8;
+  } else if (width * height >= 640 * 360 && number_of_cores > 4) {
+    return 4;
+  } else if (width * height >= 320 * 180 && number_of_cores > 2) {
+    return 2;
+  } else {
+// Use 2 threads for low res on ARM.
+#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
+    defined(WEBRTC_ANDROID)
+    if (width * height >= 320 * 180 && number_of_cores > 2) {
+      return 2;
+    }
+#endif
+    // 1 thread less than VGA.
+    return 1;
+  }
+}
+
+int AomAv1Encoder::GetCpuSpeed(int width, int height) {
+  if (width * height <= 320 * 180)
+    return 6;
+  else if (width * height <= 640 * 360)
+    return 7;
+  else if (width * height <= 1280 * 720)
+    return 8;
+  else
+    return 9;
+}
+
+AomAv1Encoder::AomAv1Encoder() {}
+
+AomAv1Encoder::~AomAv1Encoder() {
+  if (SAVE_NV12_STREAM && file_nv12_) {
+    fflush(file_nv12_);
+    fclose(file_nv12_);
+    file_nv12_ = nullptr;
+  }
+
+  if (SAVE_H264_STREAM && file_ivf_) {
+    fflush(file_ivf_);
+    fclose(file_ivf_);
+    file_ivf_ = nullptr;
+  }
+
+  delete encoded_frame_;
+
+  Release();
+}
+
+int AomAv1Encoder::Init() {
+  encoded_frame_ = new uint8_t[YUV420P_BUFFER_SIZE];
+
+  // Initialize encoder configuration structure with default values
+  aom_codec_err_t ret = aom_codec_enc_config_default(
+      aom_codec_av1_cx(), &aom_av1_encoder_config_, kUsageProfile);
+  if (ret != AOM_CODEC_OK) {
+    LOG_ERROR(
+        "AomAv1Encoder::EncodeInit returned {} on aom_codec_enc_config_default",
+        ret);
+    return -1;
+  }
+
+  // Overwrite default config with input encoder settings & RTC-relevant values.
+  aom_av1_encoder_config_.g_w = frame_width_;
+  aom_av1_encoder_config_.g_h = frame_height_;
+  aom_av1_encoder_config_.g_threads =
+      NumberOfThreads(frame_width_, frame_height_, number_of_cores_);
+  aom_av1_encoder_config_.g_timebase.num = 1;
+  aom_av1_encoder_config_.g_timebase.den = kRtpTicksPerSecond;
+  aom_av1_encoder_config_.rc_target_bitrate = target_bitrate_;  // kilobits/sec.
+  aom_av1_encoder_config_.rc_dropframe_thresh =
+      (!disable_frame_dropping_) ? 30 : 0;
+  aom_av1_encoder_config_.g_input_bit_depth = kBitDepth;
+  aom_av1_encoder_config_.kf_mode = AOM_KF_DISABLED;
+  aom_av1_encoder_config_.rc_min_quantizer = kQpMin;
+  aom_av1_encoder_config_.rc_max_quantizer = kQpMax;
+  aom_av1_encoder_config_.rc_undershoot_pct = 50;
+  aom_av1_encoder_config_.rc_overshoot_pct = 50;
+  aom_av1_encoder_config_.rc_buf_initial_sz = 600;
+  aom_av1_encoder_config_.rc_buf_optimal_sz = 600;
+  aom_av1_encoder_config_.rc_buf_sz = 1000;
+  aom_av1_encoder_config_.g_usage = kUsageProfile;
+  aom_av1_encoder_config_.g_error_resilient = 0;
+  // Low-latency settings.
+  aom_av1_encoder_config_.rc_end_usage = AOM_CBR;    // cbr mode
+  aom_av1_encoder_config_.g_pass = AOM_RC_ONE_PASS;  // One-pass rate control
+  aom_av1_encoder_config_.g_lag_in_frames = kLagInFrames;  // No look ahead
+
+  if (frame_for_encode_ != nullptr) {
+    aom_img_free(frame_for_encode_);
+    frame_for_encode_ = nullptr;
+  }
+
+  // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH
+  aom_codec_flags_t flags = 0;
+
+  // Initialize an encoder instance.
+  ret = aom_codec_enc_init(&aom_av1_encoder_ctx_, aom_codec_av1_cx(),
+                           &aom_av1_encoder_config_, flags);
+  if (ret != AOM_CODEC_OK) {
+    LOG_ERROR("AomAv1Encoder::EncodeInit returned {} on aom_codec_enc_init",
+              ret);
+    return -1;
+  }
+  inited_ = true;
+
+  // Set control parameters
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED, 4);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CDEF, 1);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MODE_COST_UPD_FREQ, 3);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MV_COST_UPD_FREQ, 3);
+
+  // if (codec_settings->mode == VideoCodecMode::kScreensharing) {
+  //   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TUNE_CONTENT,
+  //                                     AOM_CONTENT_SCREEN);
+  //   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 1);
+  // } else {
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 0);
+  // }
+
+  if (aom_av1_encoder_config_.g_threads == 8) {
+    // Values passed to AV1E_SET_TILE_ROWS and AV1E_SET_TILE_COLUMNS are log2()
+    // based.
+    // Use 4 tile columns x 2 tile rows for 8 threads.
+    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, 1);
+    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, 2);
+  } else if (aom_av1_encoder_config_.g_threads == 4) {
+    // Use 2 tile columns x 2 tile rows for 4 threads.
+    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS, 1);
+    SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS, 1);
+  } else {
+    SET_ENCODER_PARAM_OR_RETURN_ERROR(
+        AV1E_SET_TILE_COLUMNS,
+        static_cast<int>(log2(aom_av1_encoder_config_.g_threads)));
+  }
+
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ROW_MT, 1);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_OBMC, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_NOISE_SENSITIVITY, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_WARPED_MOTION, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(
+      AV1E_SET_SUPERBLOCK_SIZE,
+      GetSuperblockSize(frame_width_, frame_height_,
+                        aom_av1_encoder_config_.g_threads));
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CFL_INTRA, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_FILTER_INTRA, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DUAL_FILTER, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRABC, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_MASKED_COMP, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PAETH_INTRA, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_QM, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RESTORATION, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TX64, 0);
+  SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MAX_REFERENCE_FRAMES, 3);
+
+  frame_for_encode_ = aom_img_wrap(nullptr, AOM_IMG_FMT_NV12, frame_width_,
+                                   frame_height_, 1, nullptr);
+
+  if (SAVE_H264_STREAM) {
+    file_ivf_ = fopen("encoded_stream.ivf", "w+b");
+    if (!file_ivf_) {
+      LOG_ERROR("Fail to open encoded_stream.ivf");
+    }
+  }
+
+  if (SAVE_NV12_STREAM) {
+    file_nv12_ = fopen("raw_stream.yuv", "w+b");
+    if (!file_nv12_) {
+      LOG_ERROR("Fail to open raw_stream.yuv");
+    }
+  }
+
+  return 0;
+}
+
+int AomAv1Encoder::Encode(
+    const uint8_t *pData, int nSize,
+    std::function<int(char *encoded_packets, size_t size)> on_encoded_image) {
+  if (SAVE_NV12_STREAM) {
+    fwrite(pData, 1, nSize, file_nv12_);
+  }
+
+  const uint32_t duration =
+      kRtpTicksPerSecond / static_cast<float>(max_frame_rate_);
+  timestamp_ += duration;
+
+  frame_for_encode_->planes[AOM_PLANE_Y] = const_cast<unsigned char *>(pData);
+  frame_for_encode_->planes[AOM_PLANE_U] =
+      const_cast<unsigned char *>(pData + frame_width_ * frame_height_);
+  frame_for_encode_->planes[AOM_PLANE_V] = nullptr;
+  frame_for_encode_->stride[AOM_PLANE_Y] = frame_width_;
+  frame_for_encode_->stride[AOM_PLANE_U] = frame_width_;
+  frame_for_encode_->stride[AOM_PLANE_V] = 0;
+
+  // NV12ToYUV420PFFmpeg((unsigned char *)pData, frame_width_, frame_height_,
+  //                     (unsigned char *)yuv420p_buffer);
+
+  if (0 == seq_++ % 300) {
+    force_i_frame_flags_ = AOM_EFLAG_FORCE_KF;
+  } else {
+    force_i_frame_flags_ = 0;
+  }
+
+  // Encode a frame. The presentation timestamp `pts` should not use real
+  // timestamps from frames or the wall clock, as that can cause the rate
+  // controller to misbehave.
+  aom_codec_err_t ret =
+      aom_codec_encode(&aom_av1_encoder_ctx_, frame_for_encode_, timestamp_,
+                       duration, force_i_frame_flags_);
+  if (ret != AOM_CODEC_OK) {
+    LOG_ERROR("AomAv1Encoder::Encode returned {} on aom_codec_encode", ret);
+    return -1;
+  }
+
+  aom_codec_iter_t iter = nullptr;
+  int data_pkt_count = 0;
+  while (const aom_codec_cx_pkt_t *pkt =
+             aom_codec_get_cx_data(&aom_av1_encoder_ctx_, &iter)) {
+    if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
+      memcpy(encoded_frame_, pkt->data.frame.buf, pkt->data.frame.sz);
+      encoded_frame_size_ = pkt->data.frame.sz;
+
+      int qp = -1;
+      SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp);
+      LOG_INFO("Encoded frame qp = {}", qp);
+
+      // int consumed_size = 0;
+      // int offset = 0;
+      // int unit = 0;
+      // while (consumed_size < encoded_frame_size_) {
+      //   int obu_size = 0;
+      //   uint8_t obu_header;
+      //   memcpy(&obu_header, encoded_frame_ + offset, sizeof(char));
+      //   obu_size = 1;
+      //   offset += 1;
+      //   // memcpy(reinterpret_cast<char *>(&obu_header), encoded_frame_, 1);
+      //   int obu_type = (obu_header & 0b0'1111'000) >> 3;
+
+      //   bool obu_has_ext = obu_header & 0b0'0000'100;
+      //   // LOG_ERROR("OBU has ext {}", obu_has_ext);
+
+      //   bool obu_has_size = obu_header & kObuSizePresentBit;
+      //   LOG_ERROR("OBU has size {}", obu_has_size);
+      //   if (!obu_has_size) {
+      //     consumed_size = encoded_frame_size_;
+      //     offset = encoded_frame_size_;
+      //   } else {
+      //     uint64_t size = 0;
+      //     if (!ReadLeb128((char *)(encoded_frame_ + offset), &size) ||
+      //         size > encoded_frame_size_ - consumed_size) {
+      //       LOG_ERROR(
+      //           "Malformed AV1 input: declared size {} is larger than "
+      //           "remaining buffer size {}",
+      //           size, encoded_frame_size_ - consumed_size);
+      //       return -1;
+      //     }
+
+      //     LOG_ERROR("leb128 get size = {}, offset = {}", size, offset);
+      //     consumed_size += size;
+      //     offset += size;
+      //     obu_size += size;
+      //   }
+
+      //   LOG_ERROR("Temporal unit [{}], OBU size [{}], OBU type [{}]", unit++,
+      //             obu_size, obu_type);
+      // }
+
+      if (on_encoded_image) {
+        on_encoded_image((char *)encoded_frame_, encoded_frame_size_);
+        if (SAVE_H264_STREAM) {
+          fwrite(encoded_frame_, 1, encoded_frame_size_, file_ivf_);
+        }
+      } else {
+        OnEncodedImage((char *)encoded_frame_, encoded_frame_size_);
+      }
+    }
+  }
+
+  return 0;
+}
+
+int AomAv1Encoder::OnEncodedImage(char *encoded_packets, size_t size) {
+  LOG_INFO("OnEncodedImage not implemented");
+  return 0;
+}
+
+void AomAv1Encoder::ForceIdr() { force_i_frame_flags_ = AOM_EFLAG_FORCE_KF; }
+
+int AomAv1Encoder::Release() {
+  if (frame_for_encode_ != nullptr) {
+    aom_img_free(frame_for_encode_);
+    frame_for_encode_ = nullptr;
+  }
+  if (inited_) {
+    if (aom_codec_destroy(&aom_av1_encoder_ctx_)) {
+      return -1;
+    }
+    inited_ = false;
+  }
+
+  return 0;
+}
--- a/src/media/video/encode/aom/aom_av1_encoder.h
+++ b/src/media/video/encode/aom/aom_av1_encoder.h
@@ -0,0 +1,84 @@
+/*
+ * @Author: DI JUNKUN
+ * @Date: 2024-03-01
+ * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
+ */
+
+#ifndef _AOM_AV1_ENCODER_H_
+#define _AOM_AV1_ENCODER_H_
+
+#include <functional>
+#include <vector>
+
+#include "aom/aom_codec.h"
+#include "aom/aom_encoder.h"
+#include "aom/aomcx.h"
+#include "video_encoder.h"
+
+typedef struct {
+  uint64_t obu_header_size;
+  unsigned obu_type;
+  uint64_t
+      obu_size;  // leb128(), contains the size in bytes of the OBU not
+                 // including the bytes within obu_header or the obu_size syntax
+  int extension_flag;
+  int has_size_field;
+
+  // extension_flag == 1
+  int temporal_id;
+  int spatial_id;
+} OBU_t;
+
+class AomAv1Encoder : public VideoEncoder {
+ public:
+  AomAv1Encoder();
+  virtual ~AomAv1Encoder();
+
+ public:
+  int Init();
+  int Encode(
+      const uint8_t* pData, int nSize,
+      std::function<int(char* encoded_packets, size_t size)> on_encoded_image);
+
+  virtual int OnEncodedImage(char* encoded_packets, size_t size);
+
+  void ForceIdr();
+
+ private:
+  template <typename P>
+  bool SetEncoderControlParameters(int param_id, P param_value);
+  int NumberOfThreads(int width, int height, int number_of_cores);
+  int GetCpuSpeed(int width, int height);
+
+  int Release();
+
+ private:
+  int frame_width_ = 1280;
+  int frame_height_ = 720;
+  int key_frame_interval_ = 300;
+  int target_bitrate_ = 1000;
+  int max_bitrate_ = 2500000;
+  int max_payload_size_ = 1400;
+  int max_frame_rate_ = 30;
+  int number_of_cores_ = 4;
+
+  std::vector<std::vector<uint8_t>> encoded_packets_;
+  unsigned char* encoded_image_ = nullptr;
+  FILE* file_ivf_ = nullptr;
+  FILE* file_nv12_ = nullptr;
+  unsigned char* nv12_data_ = nullptr;
+  unsigned int seq_ = 0;
+
+  // aom av1 encoder
+  aom_image_t* frame_for_encode_ = nullptr;
+  aom_codec_ctx_t aom_av1_encoder_ctx_;
+  aom_codec_enc_cfg_t aom_av1_encoder_config_;
+  bool disable_frame_dropping_ = false;
+  bool inited_ = false;
+  int64_t timestamp_ = 0;
+  aom_enc_frame_flags_t force_i_frame_flags_ = 0;
+  uint8_t* encoded_frame_ = nullptr;
+  int encoded_frame_size_ = 0;
+};
+
+#endif
--- a/src/media/video/encode/openh264/openh264_encoder.cpp
+++ b/src/media/video/encode/openh264/openh264_encoder.cpp
@@ -49,7 +49,9 @@ static int NV12ToYUV420PFFmpeg(unsigned char *src_buffer, int width, int height,
  return 0;
 }

-OpenH264Encoder::OpenH264Encoder() {
+OpenH264Encoder::OpenH264Encoder() {}
+
+OpenH264Encoder::~OpenH264Encoder() {
  if (SAVE_NV12_STREAM && file_nv12_) {
    fflush(file_nv12_);
    fclose(file_nv12_);
@@ -62,8 +64,8 @@ OpenH264Encoder::OpenH264Encoder() {
    file_h264_ = nullptr;
  }
  delete encoded_frame_;
+  Release();
 }
-OpenH264Encoder::~OpenH264Encoder() { Release(); }

 SEncParamExt OpenH264Encoder::CreateEncoderParams() const {
  SEncParamExt encoder_params;
--- a/src/media/video/encode/video_encoder_factory.cpp
+++ b/src/media/video/encode/video_encoder_factory.cpp
@@ -4,6 +4,7 @@
 #include "ffmpeg/ffmpeg_video_encoder.h"
 #include "openh264/openh264_encoder.h"
 #else
+#include "aom/aom_av1_encoder.h"
 #include "ffmpeg/ffmpeg_video_encoder.h"
 #include "nvcodec/nvidia_video_encoder.h"
 #include "openh264/openh264_encoder.h"
@@ -16,22 +17,26 @@ VideoEncoderFactory::VideoEncoderFactory() {}
 VideoEncoderFactory::~VideoEncoderFactory() {}

 std::unique_ptr<VideoEncoder> VideoEncoderFactory::CreateVideoEncoder(
-    bool hardware_acceleration) {
-#if __APPLE__
-  // return std::make_unique<FFmpegVideoEncoder>(FFmpegVideoEncoder());
-  return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
-#else
-  if (hardware_acceleration) {
-    if (CheckIsHardwareAccerlerationSupported()) {
-      return std::make_unique<NvidiaVideoEncoder>(NvidiaVideoEncoder());
-    } else {
-      return nullptr;
-    }
+    bool hardware_acceleration, bool av1_encoding) {
+  if (av1_encoding) {
+    return std::make_unique<AomAv1Encoder>(AomAv1Encoder());
  } else {
+#if __APPLE__
    // return std::make_unique<FFmpegVideoEncoder>(FFmpegVideoEncoder());
    return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
-  }
+#else
+    if (hardware_acceleration) {
+      if (CheckIsHardwareAccerlerationSupported()) {
+        return std::make_unique<NvidiaVideoEncoder>(NvidiaVideoEncoder());
+      } else {
+        return nullptr;
+      }
+    } else {
+      // return std::make_unique<FFmpegVideoEncoder>(FFmpegVideoEncoder());
+      return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
+    }
 #endif
+  }
 }

 bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
--- a/src/media/video/encode/video_encoder_factory.h
+++ b/src/media/video/encode/video_encoder_factory.h
@@ -10,7 +10,7 @@ class VideoEncoderFactory {
  ~VideoEncoderFactory();

  static std::unique_ptr<VideoEncoder> CreateVideoEncoder(
-      bool hardware_acceleration);
+      bool hardware_acceleration, bool av1_encoding);

  static bool CheckIsHardwareAccerlerationSupported();
 };