[feat] add new classes EncodedFrame/DecodedFrame/ReceivedFrame for video frame module

2025-10-26 20:25:34 +08:00 · 2025-03-19 18:36:55 +08:00
parent 1cd9ea1b0e
commit b50175f943
49 changed files with 485 additions and 203 deletions
--- a/src/common/clock/system_clock.h
+++ b/src/common/clock/system_clock.h
@@ -33,6 +33,8 @@ class SystemClock {
  int64_t ConvertToNtpTime(int64_t time_us);

  int64_t NtpToUtc(int64_t ntp_time);
+
+  int64_t CurrentNtpInMilliseconds() { return CurrentNtpTimeMs(); }
 };

 #endif
--- a/src/frame/decoded_frame.h
+++ b/src/frame/decoded_frame.h
@@ -0,0 +1,61 @@
+/*
+ * @Author: DI JUNKUN
+ * @Date: 2025-03-19
+ * Copyright (c) 2025 by DI JUNKUN, All Rights Reserved.
+ */
+
+#ifndef _DECODED_FRAME_H_
+#define _DECODED_FRAME_H_
+
+#include "video_frame.h"
+
+class DecodedFrame : public VideoFrame {
+ public:
+  DecodedFrame(const uint8_t *buffer, size_t size, uint32_t width,
+               uint32_t height)
+      : VideoFrame(buffer, size, width, height) {}
+  DecodedFrame(size_t size, uint32_t width, uint32_t height)
+      : VideoFrame(size, width, height) {}
+  DecodedFrame(const uint8_t *buffer, size_t size) : VideoFrame(buffer, size) {}
+  DecodedFrame() = default;
+  ~DecodedFrame() = default;
+
+  int64_t ReceivedTimestamp() const { return received_timestamp_us_; }
+
+  void SetReceivedTimestamp(int64_t received_timestamp_us) {
+    received_timestamp_us_ = received_timestamp_us;
+  }
+
+  int64_t CapturedTimestamp() const { return captured_timestamp_us_; }
+
+  void SetCapturedTimestamp(int64_t captured_timestamp_us) {
+    captured_timestamp_us_ = captured_timestamp_us;
+  }
+
+  int64_t DecodedTimestamp() const { return decoded_timestamp_us_; }
+
+  void SetDecodedTimestamp(int64_t decoded_timestamp_us) {
+    decoded_timestamp_us_ = decoded_timestamp_us;
+  }
+
+  uint32_t DecodedWidth() const { return decoded_width_; }
+
+  void SetDecodedWidth(uint32_t decoded_width) {
+    decoded_width_ = decoded_width;
+  }
+
+  uint32_t decodedHeight() const { return decoded_height_; }
+
+  void SetdecodedHeight(uint32_t decoded_height) {
+    decoded_height_ = decoded_height;
+  }
+
+ private:
+  int64_t received_timestamp_us_ = 0;
+  int64_t captured_timestamp_us_ = 0;
+  int64_t decoded_timestamp_us_ = 0;
+  uint32_t decoded_width_ = 0;
+  uint32_t decoded_height_ = 0;
+};
+
+#endif
--- a/src/frame/encoded_frame.h
+++ b/src/frame/encoded_frame.h
@@ -0,0 +1,59 @@
+/*
+ * @Author: DI JUNKUN
+ * @Date: 2025-03-19
+ * Copyright (c) 2025 by DI JUNKUN, All Rights Reserved.
+ */
+
+#ifndef _ENCODED_FRAME_H_
+#define _ENCODED_FRAME_H_
+
+#include "video_frame.h"
+
+class EncodedFrame : public VideoFrame {
+ public:
+  EncodedFrame(const uint8_t *buffer, size_t size, uint32_t width,
+               uint32_t height)
+      : VideoFrame(buffer, size, width, height) {}
+  EncodedFrame(size_t size, uint32_t width, uint32_t height)
+      : VideoFrame(size, width, height) {}
+  EncodedFrame(const uint8_t *buffer, size_t size) : VideoFrame(buffer, size) {}
+  EncodedFrame() = default;
+  ~EncodedFrame() = default;
+
+  int64_t CapturedTimestamp() const { return captured_timestamp_us_; }
+
+  void SetCapturedTimestamp(int64_t captured_timestamp_us) {
+    captured_timestamp_us_ = captured_timestamp_us;
+  }
+
+  int64_t EncodedTimestamp() const { return encoded_timestamp_us_; }
+
+  void SetEncodedTimestamp(int64_t encoded_timestamp_us) {
+    encoded_timestamp_us_ = encoded_timestamp_us;
+  }
+
+  VideoFrameType FrameType() const { return frame_type_; }
+
+  void SetFrameType(VideoFrameType frame_type) { frame_type_ = frame_type; }
+
+  uint32_t EncodedWidth() const { return encoded_width_; }
+
+  void SetEncodedWidth(uint32_t encoded_width) {
+    encoded_width_ = encoded_width;
+  }
+
+  uint32_t EncodedHeight() const { return encoded_height_; }
+
+  void SetEncodedHeight(uint32_t encoded_height) {
+    encoded_height_ = encoded_height;
+  }
+
+ private:
+  int64_t captured_timestamp_us_ = 0;
+  int64_t encoded_timestamp_us_ = 0;
+  VideoFrameType frame_type_ = VideoFrameType::kVideoFrameDelta;
+  uint32_t encoded_width_ = 0;
+  uint32_t encoded_height_ = 0;
+};
+
+#endif
--- a/src/frame/received_frame.h
+++ b/src/frame/received_frame.h
@@ -0,0 +1,36 @@
+/*
+ * @Author: DI JUNKUN
+ * @Date: 2025-03-19
+ * Copyright (c) 2025 by DI JUNKUN, All Rights Reserved.
+ */
+
+#ifndef _RECEIVED_FRAME_H_
+#define _RECEIVED_FRAME_H_
+
+#include "video_frame.h"
+
+class ReceivedFrame : public VideoFrame {
+ public:
+  ReceivedFrame(const uint8_t *buffer, size_t size)
+      : VideoFrame(buffer, size) {}
+  ReceivedFrame() = default;
+  ~ReceivedFrame() = default;
+
+  int64_t ReceivedTimestamp() const { return received_timestamp_us_; }
+
+  void SetReceivedTimestamp(int64_t received_timestamp_us) {
+    received_timestamp_us_ = received_timestamp_us;
+  }
+
+  int64_t CapturedTimestamp() const { return captured_timestamp_us_; }
+
+  void SetCapturedTimestamp(int64_t captured_timestamp_us) {
+    captured_timestamp_us_ = captured_timestamp_us;
+  }
+
+ private:
+  int64_t received_timestamp_us_ = 0;
+  int64_t captured_timestamp_us_ = 0;
+};
+
+#endif
--- a/src/frame/video_frame.h
+++ b/src/frame/video_frame.h
@@ -10,6 +10,12 @@
 #include <cstddef>
 #include <cstdint>

+enum VideoFrameType {
+  kEmptyFrame = 0,
+  kVideoFrameKey = 3,
+  kVideoFrameDelta = 4,
+};
+
 class VideoFrame {
 public:
  VideoFrame();
@@ -26,10 +32,10 @@ class VideoFrame {
  ~VideoFrame();

 public:
-  const uint8_t *Buffer() { return buffer_; }
-  size_t Size() { return size_; }
-  uint32_t Width() { return width_; }
-  uint32_t Height() { return height_; }
+  const uint8_t *Buffer() const { return buffer_; }
+  size_t Size() const { return size_; }
+  uint32_t Width() const { return width_; }
+  uint32_t Height() const { return height_; }

  void SetSize(size_t size) { size_ = size; }
  void SetWidth(uint32_t width) { width_ = width; }
--- a/src/frame/video_frame_wrapper.h
+++ b/src/frame/video_frame_wrapper.h
@@ -9,44 +9,56 @@

 #include "video_frame.h"

-enum VideoFrameType {
-  kEmptyFrame = 0,
-  kVideoFrameKey = 3,
-  kVideoFrameDelta = 4,
-};
-
 class VideoFrameWrapper : public VideoFrame {
 public:
  VideoFrameWrapper(const uint8_t *buffer, size_t size, uint32_t width,
                    uint32_t height)
      : VideoFrame(buffer, size, width, height) {}
-  VideoFrameWrapper() = delete;
+  VideoFrameWrapper(size_t size, uint32_t width, uint32_t height)
+      : VideoFrame(size, width, height) {}
+  VideoFrameWrapper(const uint8_t *buffer, size_t size)
+      : VideoFrame(buffer, size) {}
+  VideoFrameWrapper() = default;
  ~VideoFrameWrapper() = default;

-  int64_t CaptureTimestamp() { return capture_timestamp_us_; }
+  int64_t CapturedTimestamp() const { return captured_timestamp_us_; }

-  void SetCaptureTimestamp(int64_t capture_timestamp_us) {
-    capture_timestamp_us_ = capture_timestamp_us;
+  void SetCapturedTimestamp(int64_t captured_timestamp_us) {
+    captured_timestamp_us_ = captured_timestamp_us;
  }

-  VideoFrameType FrameType() { return frame_type_; }
+  int64_t ReceivedTimestamp() const { return received_timestamp_us_; }
+
+  void SetReceivedTimestamp(int64_t received_timestamp_us) {
+    received_timestamp_us_ = received_timestamp_us;
+  }
+
+  int64_t DecodedTimestamp() const { return decoded_timestamp_us_; }
+
+  void SetDecodedTimestamp(int64_t decoded_timestamp_us) {
+    decoded_timestamp_us_ = decoded_timestamp_us;
+  }
+
+  VideoFrameType FrameType() const { return frame_type_; }

  void SetFrameType(VideoFrameType frame_type) { frame_type_ = frame_type; }

-  uint32_t EncodedWidth() { return encoded_width_; }
+  uint32_t EncodedWidth() const { return encoded_width_; }

  void SetEncodedWidth(uint32_t encoded_width) {
    encoded_width_ = encoded_width;
  }

-  uint32_t EncodedHeight() { return encoded_height_; }
+  uint32_t EncodedHeight() const { return encoded_height_; }

  void SetEncodedHeight(uint32_t encoded_height) {
    encoded_height_ = encoded_height;
  }

 private:
-  int64_t capture_timestamp_us_ = 0;
+  int64_t captured_timestamp_us_ = 0;
+  int64_t received_timestamp_us_ = 0;
+  int64_t decoded_timestamp_us_ = 0;
  VideoFrameType frame_type_ = VideoFrameType::kVideoFrameDelta;
  uint32_t encoded_width_ = 0;
  uint32_t encoded_height_ = 0;
--- a/src/interface/x.h
+++ b/src/interface/x.h
@@ -44,7 +44,10 @@ typedef struct {
  size_t size;
  uint32_t width;
  uint32_t height;
-  uint64_t timestamp;
+  uint64_t captured_timestamp;
+  uint64_t received_timestamp;
+  uint64_t decoded_timestamp;
+  uint64_t rendered_timestamp;
 } XVideoFrame;

 typedef struct {
--- a/src/media/video/decode/aom/aom_av1_decoder.cpp
+++ b/src/media/video/decode/aom/aom_av1_decoder.cpp
@@ -5,7 +5,8 @@
 // #define SAVE_DECODED_NV12_STREAM
 // #define SAVE_RECEIVED_AV1_STREAM

-AomAv1Decoder::AomAv1Decoder() {}
+AomAv1Decoder::AomAv1Decoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}

 AomAv1Decoder::~AomAv1Decoder() {
 #ifdef SAVE_DECODED_NV12_STREAM
@@ -65,8 +66,11 @@ int AomAv1Decoder::Init() {
 }

 int AomAv1Decoder::Decode(
-    const uint8_t *data, size_t size,
-    std::function<void(VideoFrame)> on_receive_decoded_frame) {
+    const ReceivedFrame &received_frame,
+    std::function<void(const DecodedFrame &)> on_receive_decoded_frame) {
+  const uint8_t *data = received_frame.Buffer();
+  size_t size = received_frame.Size();
+
 #ifdef SAVE_RECEIVED_AV1_STREAM
  fwrite((unsigned char *)data, 1, size, file_av1_);
 #endif
@@ -124,14 +128,14 @@ int AomAv1Decoder::Decode(
    if (!nv12_frame_) {
      nv12_frame_capacity_ = nv12_frame_size_;
      nv12_frame_ =
-          new VideoFrame(nv12_frame_capacity_, frame_width_, frame_height_);
+          new DecodedFrame(nv12_frame_capacity_, frame_width_, frame_height_);
    }

    if (nv12_frame_capacity_ < nv12_frame_size_) {
      nv12_frame_capacity_ = nv12_frame_size_;
      delete nv12_frame_;
      nv12_frame_ =
-          new VideoFrame(nv12_frame_capacity_, frame_width_, frame_height_);
+          new DecodedFrame(nv12_frame_capacity_, frame_width_, frame_height_);
    }

    if (nv12_frame_->Size() != nv12_frame_size_ ||
@@ -142,6 +146,9 @@ int AomAv1Decoder::Decode(
      nv12_frame_->SetHeight(frame_height_);
    }

+    nv12_frame_->SetReceivedTimestamp(received_frame.ReceivedTimestamp());
+    nv12_frame_->SetCapturedTimestamp(received_frame.CapturedTimestamp());
+    nv12_frame_->SetDecodedTimestamp(clock_->CurrentTime());
    on_receive_decoded_frame(*nv12_frame_);

 #ifdef SAVE_DECODED_NV12_STREAM
--- a/src/media/video/decode/aom/aom_av1_decoder.h
+++ b/src/media/video/decode/aom/aom_av1_decoder.h
@@ -16,19 +16,21 @@

 class AomAv1Decoder : public VideoDecoder {
 public:
-  AomAv1Decoder();
+  AomAv1Decoder(std::shared_ptr<SystemClock> clock);
  virtual ~AomAv1Decoder();

 public:
  int Init();

-  int Decode(const uint8_t *data, size_t size,
-             std::function<void(VideoFrame)> on_receive_decoded_frame);
+  int Decode(
+      const ReceivedFrame &received_frame,
+      std::function<void(const DecodedFrame &)> on_receive_decoded_frame);

  std::string GetDecoderName() { return "AomAv1"; }

 private:
-  VideoFrame *nv12_frame_ = 0;
+  std::shared_ptr<SystemClock> clock_ = nullptr;
+  DecodedFrame *nv12_frame_ = 0;
  int nv12_frame_capacity_ = 0;
  int nv12_frame_size_ = 0;

--- a/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp
+++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.cpp
@@ -47,7 +47,8 @@ void Yuv420pToNv12(unsigned char *SrcY, unsigned char *SrcU,
  }
 }

-Dav1dAv1Decoder::Dav1dAv1Decoder() {}
+Dav1dAv1Decoder::Dav1dAv1Decoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}

 Dav1dAv1Decoder::~Dav1dAv1Decoder() {
 #ifdef SAVE_DECODED_NV12_STREAM
@@ -106,8 +107,11 @@ int Dav1dAv1Decoder::Init() {
 }

 int Dav1dAv1Decoder::Decode(
-    const uint8_t *data, size_t size,
-    std::function<void(VideoFrame)> on_receive_decoded_frame) {
+    const ReceivedFrame &received_frame,
+    std::function<void(const DecodedFrame &)> on_receive_decoded_frame) {
+  const uint8_t *data = received_frame.Buffer();
+  size_t size = received_frame.Size();
+
 #ifdef SAVE_RECEIVED_AV1_STREAM
  fwrite((unsigned char *)data, 1, size, file_av1_);
 #endif
@@ -159,14 +163,14 @@ int Dav1dAv1Decoder::Decode(
  if (!nv12_frame_) {
    nv12_frame_capacity_ = nv12_frame_size_;
    nv12_frame_ =
-        new VideoFrame(nv12_frame_capacity_, frame_width_, frame_height_);
+        new DecodedFrame(nv12_frame_capacity_, frame_width_, frame_height_);
  }

  if (nv12_frame_capacity_ < nv12_frame_size_) {
    nv12_frame_capacity_ = nv12_frame_size_;
    delete nv12_frame_;
    nv12_frame_ =
-        new VideoFrame(nv12_frame_capacity_, frame_width_, frame_height_);
+        new DecodedFrame(nv12_frame_capacity_, frame_width_, frame_height_);
  }

  if (nv12_frame_->Size() != nv12_frame_size_ ||
@@ -194,6 +198,9 @@ int Dav1dAv1Decoder::Decode(
        frame_width_, frame_width_, frame_height_);
  }

+  nv12_frame_->SetReceivedTimestamp(received_frame.ReceivedTimestamp());
+  nv12_frame_->SetCapturedTimestamp(received_frame.CapturedTimestamp());
+  nv12_frame_->SetDecodedTimestamp(clock_->CurrentTime());
  on_receive_decoded_frame(*nv12_frame_);

 #ifdef SAVE_DECODED_NV12_STREAM
--- a/src/media/video/decode/dav1d/dav1d_av1_decoder.h
+++ b/src/media/video/decode/dav1d/dav1d_av1_decoder.h
@@ -14,19 +14,21 @@

 class Dav1dAv1Decoder : public VideoDecoder {
 public:
-  Dav1dAv1Decoder();
+  Dav1dAv1Decoder(std::shared_ptr<SystemClock> clock);
  virtual ~Dav1dAv1Decoder();

 public:
  int Init();

-  int Decode(const uint8_t *data, size_t size,
-             std::function<void(VideoFrame)> on_receive_decoded_frame);
+  int Decode(
+      const ReceivedFrame &received_frame,
+      std::function<void(const DecodedFrame &)> on_receive_decoded_frame);

  std::string GetDecoderName() { return "Dav1dAv1"; }

 private:
-  VideoFrame *nv12_frame_ = 0;
+  std::shared_ptr<SystemClock> clock_ = nullptr;
+  DecodedFrame *nv12_frame_ = 0;
  size_t nv12_frame_capacity_ = 0;
  size_t nv12_frame_size_ = 0;

--- a/src/media/video/decode/nvcodec/nvidia_video_decoder.cpp
+++ b/src/media/video/decode/nvcodec/nvidia_video_decoder.cpp
@@ -6,7 +6,8 @@
 // #define SAVE_DECODED_NV12_STREAM
 // #define SAVE_RECEIVED_H264_STREAM

-NvidiaVideoDecoder::NvidiaVideoDecoder() {}
+NvidiaVideoDecoder::NvidiaVideoDecoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}
 NvidiaVideoDecoder::~NvidiaVideoDecoder() {
 #ifdef SAVE_DECODED_NV12_STREAM
  if (file_nv12_) {
@@ -65,11 +66,15 @@ int NvidiaVideoDecoder::Init() {
 }

 int NvidiaVideoDecoder::Decode(
-    const uint8_t *data, size_t size,
-    std::function<void(VideoFrame)> on_receive_decoded_frame) {
+    const ReceivedFrame &received_frame,
+    std::function<void(const DecodedFrame &)> on_receive_decoded_frame) {
  if (!decoder) {
    return -1;
  }
+
+  const uint8_t *data = received_frame.Buffer();
+  size_t size = received_frame.Size();
+
 #ifdef SAVE_RECEIVED_H264_STREAM
  fwrite((unsigned char *)data, 1, size, file_h264_);
 #endif
@@ -86,10 +91,15 @@ int NvidiaVideoDecoder::Decode(
      decoded_frame_buffer = decoder->GetFrame();
      if (decoded_frame_buffer) {
        if (on_receive_decoded_frame) {
-          VideoFrame decoded_frame(
+          DecodedFrame decoded_frame(
              decoded_frame_buffer,
              decoder->GetWidth() * decoder->GetHeight() * 3 / 2,
              decoder->GetWidth(), decoder->GetHeight());
+          decoded_frame.SetReceivedTimestamp(
+              received_frame.ReceivedTimestamp());
+          decoded_frame.SetCapturedTimestamp(
+              received_frame.CapturedTimestamp());
+          decoded_frame.SetDecodedTimestamp(clock_->CurrentTime());
          on_receive_decoded_frame(decoded_frame);
 #ifdef SAVE_DECODED_NV12_STREAM
          fwrite((unsigned char *)decoded_frame.Buffer(), 1,
--- a/src/media/video/decode/nvcodec/nvidia_video_decoder.h
+++ b/src/media/video/decode/nvcodec/nvidia_video_decoder.h
@@ -8,18 +8,19 @@

 class NvidiaVideoDecoder : public VideoDecoder {
 public:
-  NvidiaVideoDecoder();
+  NvidiaVideoDecoder(std::shared_ptr<SystemClock> clock);
  virtual ~NvidiaVideoDecoder();

 public:
  int Init();

-  int Decode(const uint8_t* data, size_t size,
-             std::function<void(VideoFrame)> on_receive_decoded_frame);
+  int Decode(const ReceivedFrame& received_frame,
+             std::function<void(const DecodedFrame&)> on_receive_decoded_frame);

  std::string GetDecoderName() { return "NvidiaH264"; }

 private:
+  std::shared_ptr<SystemClock> clock_ = nullptr;
  NvDecoder* decoder = nullptr;
  bool get_first_keyframe_ = false;
  bool skip_frame_ = false;
--- a/src/media/video/decode/openh264/openh264_decoder.cpp
+++ b/src/media/video/decode/openh264/openh264_decoder.cpp
@@ -50,7 +50,8 @@ void ConvertYuv420pToNv12(const unsigned char *yuv_data,
  }
 }

-OpenH264Decoder::OpenH264Decoder() {}
+OpenH264Decoder::OpenH264Decoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}
 OpenH264Decoder::~OpenH264Decoder() {
  if (openh264_decoder_) {
    openh264_decoder_->Uninitialize();
@@ -119,12 +120,15 @@ int OpenH264Decoder::Init() {
 }

 int OpenH264Decoder::Decode(
-    const uint8_t *data, size_t size,
-    std::function<void(VideoFrame)> on_receive_decoded_frame) {
+    const ReceivedFrame &received_frame,
+    std::function<void(const DecodedFrame &)> on_receive_decoded_frame) {
  if (!openh264_decoder_) {
    return -1;
  }

+  const uint8_t *data = received_frame.Buffer();
+  size_t size = received_frame.Size();
+
 #ifdef SAVE_RECEIVED_H264_STREAM
  fwrite((unsigned char *)data, 1, size, h264_stream_);
 #endif
@@ -158,14 +162,14 @@ int OpenH264Decoder::Decode(
  if (!nv12_frame_) {
    nv12_frame_capacity_ = yuv420p_frame_size_;
    nv12_frame_ =
-        new VideoFrame(nv12_frame_capacity_, frame_width_, frame_height_);
+        new DecodedFrame(nv12_frame_capacity_, frame_width_, frame_height_);
  }

  if (nv12_frame_capacity_ < yuv420p_frame_size_) {
    nv12_frame_capacity_ = yuv420p_frame_size_;
    delete nv12_frame_;
    nv12_frame_ =
-        new VideoFrame(nv12_frame_capacity_, frame_width_, frame_height_);
+        new DecodedFrame(nv12_frame_capacity_, frame_width_, frame_height_);
  }

  if (nv12_frame_->Size() != nv12_frame_size_ ||
@@ -202,6 +206,9 @@ int OpenH264Decoder::Decode(
            frame_width_, frame_width_, frame_height_);
      }

+      nv12_frame_->SetReceivedTimestamp(received_frame.ReceivedTimestamp());
+      nv12_frame_->SetCapturedTimestamp(received_frame.CapturedTimestamp());
+      nv12_frame_->SetDecodedTimestamp(clock_->CurrentTime());
      on_receive_decoded_frame(*nv12_frame_);

 #ifdef SAVE_DECODED_NV12_STREAM
--- a/src/media/video/decode/openh264/openh264_decoder.h
+++ b/src/media/video/decode/openh264/openh264_decoder.h
@@ -18,18 +18,19 @@

 class OpenH264Decoder : public VideoDecoder {
 public:
-  OpenH264Decoder();
+  OpenH264Decoder(std::shared_ptr<SystemClock> clock);
  virtual ~OpenH264Decoder();

 public:
  int Init();

-  int Decode(const uint8_t* data, size_t size,
-             std::function<void(VideoFrame)> on_receive_decoded_frame);
+  int Decode(const ReceivedFrame& received_frame,
+             std::function<void(const DecodedFrame&)> on_receive_decoded_frame);

  std::string GetDecoderName() { return "OpenH264"; }

 private:
+  std::shared_ptr<SystemClock> clock_ = nullptr;
  ISVCDecoder* openh264_decoder_ = nullptr;
  bool get_first_keyframe_ = false;
  bool skip_frame_ = false;
@@ -45,7 +46,7 @@ class OpenH264Decoder : public VideoDecoder {
  int yuv420p_frame_capacity_ = 0;
  int yuv420p_frame_size_ = 0;

-  VideoFrame* nv12_frame_ = 0;
+  DecodedFrame* nv12_frame_ = 0;
  int nv12_frame_capacity_ = 0;
  int nv12_frame_size_ = 0;
 };
--- a/src/media/video/decode/video_decoder.h
+++ b/src/media/video/decode/video_decoder.h
@@ -13,15 +13,17 @@
 #include <functional>
 #include <string>

-#include "video_frame.h"
+#include "clock/system_clock.h"
+#include "decoded_frame.h"
+#include "received_frame.h"

 class VideoDecoder {
 public:
  virtual int Init() = 0;

  virtual int Decode(
-      const uint8_t *data, size_t size,
-      std::function<void(VideoFrame)> on_receive_decoded_frame) = 0;
+      const ReceivedFrame& received_frame,
+      std::function<void(const DecodedFrame&)> on_receive_decoded_frame) = 0;

  virtual std::string GetDecoderName() = 0;

--- a/src/media/video/decode/video_decoder_factory.cpp
+++ b/src/media/video/decode/video_decoder_factory.cpp
@@ -17,26 +17,27 @@ VideoDecoderFactory::VideoDecoderFactory() {}
 VideoDecoderFactory::~VideoDecoderFactory() {}

 std::unique_ptr<VideoDecoder> VideoDecoderFactory::CreateVideoDecoder(
-    bool hardware_acceleration, bool av1_encoding) {
+    std::shared_ptr<SystemClock> clock, bool hardware_acceleration,
+    bool av1_encoding) {
  if (av1_encoding) {
    LOG_INFO("Use dav1d decoder");
-    return std::make_unique<Dav1dAv1Decoder>(Dav1dAv1Decoder());
+    return std::make_unique<Dav1dAv1Decoder>(Dav1dAv1Decoder(clock));
    // LOG_INFO("Use aom decoder");
    // return std::make_unique<AomAv1Decoder>(AomAv1Decoder());
  } else {
 #if __APPLE__
-    return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
+    return std::make_unique<OpenH264Decoder>(OpenH264Decoder(clock));
 #else
    if (hardware_acceleration) {
      if (CheckIsHardwareAccerlerationSupported()) {
        LOG_INFO("Use nvidia decoder");
-        return std::make_unique<NvidiaVideoDecoder>(NvidiaVideoDecoder());
+        return std::make_unique<NvidiaVideoDecoder>(NvidiaVideoDecoder(clock));
      } else {
        return nullptr;
      }
    } else {
      LOG_INFO("Use openh264 decoder");
-      return std::make_unique<OpenH264Decoder>(OpenH264Decoder());
+      return std::make_unique<OpenH264Decoder>(OpenH264Decoder(clock));
    }
 #endif
  }
--- a/src/media/video/decode/video_decoder_factory.h
+++ b/src/media/video/decode/video_decoder_factory.h
@@ -10,7 +10,8 @@ class VideoDecoderFactory {
  ~VideoDecoderFactory();

  static std::unique_ptr<VideoDecoder> CreateVideoDecoder(
-      bool hardware_acceleration, bool av1_encoding);
+      std::shared_ptr<SystemClock> clock, bool hardware_acceleration,
+      bool av1_encoding);

  static bool CheckIsHardwareAccerlerationSupported();
 };
--- a/src/media/video/encode/aom/aom_av1_encoder.cpp
+++ b/src/media/video/encode/aom/aom_av1_encoder.cpp
@@ -101,7 +101,8 @@ int AomAv1Encoder::ResetEncodeResolution(unsigned int width,
                                  &aom_av1_encoder_config_);
 }

-AomAv1Encoder::AomAv1Encoder() {}
+AomAv1Encoder::AomAv1Encoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}

 AomAv1Encoder::~AomAv1Encoder() {
 #ifdef SAVE_RECEIVED_NV12_STREAM
@@ -268,7 +269,7 @@ int AomAv1Encoder::Init() {

 int AomAv1Encoder::Encode(
    const XVideoFrame *video_frame,
-    std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
+    std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
        on_encoded_image) {
 #ifdef SAVE_RECEIVED_NV12_STREAM
  fwrite(video_frame->data, 1, video_frame->size, file_nv12_);
@@ -342,14 +343,15 @@ int AomAv1Encoder::Encode(
      // LOG_INFO("Encoded frame qp = {}", qp);

      if (on_encoded_image) {
-        std::shared_ptr<VideoFrameWrapper> encoded_frame =
-            std::make_shared<VideoFrameWrapper>(
-                encoded_frame_, encoded_frame_size_, video_frame->width,
-                video_frame->height);
+        std::shared_ptr<EncodedFrame> encoded_frame =
+            std::make_shared<EncodedFrame>(encoded_frame_, encoded_frame_size_,
+                                           video_frame->width,
+                                           video_frame->height);
        encoded_frame->SetFrameType(frame_type);
-        encoded_frame->SetCaptureTimestamp(video_frame->timestamp);
        encoded_frame->SetEncodedWidth(video_frame->width);
        encoded_frame->SetEncodedHeight(video_frame->height);
+        encoded_frame->SetCapturedTimestamp(video_frame->captured_timestamp);
+        encoded_frame->SetEncodedTimestamp(clock_->CurrentTime());
        on_encoded_image(encoded_frame);
 #ifdef SAVE_ENCODED_AV1_STREAM
        fwrite(encoded_frame_, 1, encoded_frame_size_, file_av1_);
--- a/src/media/video/encode/aom/aom_av1_encoder.h
+++ b/src/media/video/encode/aom/aom_av1_encoder.h
@@ -31,16 +31,15 @@ typedef struct {

 class AomAv1Encoder : public VideoEncoder {
 public:
-  AomAv1Encoder();
+  AomAv1Encoder(std::shared_ptr<SystemClock> clock);
  virtual ~AomAv1Encoder();

 public:
  int Init();

-  int Encode(
-      const XVideoFrame* video_frame,
-      std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
-          on_encoded_image);
+  int Encode(const XVideoFrame* video_frame,
+             std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
+                 on_encoded_image);

  int ForceIdr();

@@ -65,6 +64,7 @@ class AomAv1Encoder : public VideoEncoder {
  int Release();

 private:
+  std::shared_ptr<SystemClock> clock_ = nullptr;
  uint32_t frame_width_ = 1280;
  uint32_t frame_height_ = 720;
  int key_frame_interval_ = I_FRAME_INTERVAL;
--- a/src/media/video/encode/nvcodec/nvidia_video_encoder.cpp
+++ b/src/media/video/encode/nvcodec/nvidia_video_encoder.cpp
@@ -9,7 +9,8 @@
 // #define SAVE_RECEIVED_NV12_STREAM
 // #define SAVE_ENCODED_H264_STREAM

-NvidiaVideoEncoder::NvidiaVideoEncoder() {}
+NvidiaVideoEncoder::NvidiaVideoEncoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}
 NvidiaVideoEncoder::~NvidiaVideoEncoder() {
 #ifdef SAVE_RECEIVED_NV12_STREAM
  if (file_nv12_) {
@@ -130,7 +131,7 @@ int NvidiaVideoEncoder::Init() {

 int NvidiaVideoEncoder::Encode(
    const XVideoFrame *video_frame,
-    std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
+    std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
        on_encoded_image) {
  if (!encoder_) {
    LOG_ERROR("Invalid encoder");
@@ -181,14 +182,15 @@ int NvidiaVideoEncoder::Encode(

  for (const auto &packet : encoded_packets_) {
    if (on_encoded_image) {
-      std::shared_ptr<VideoFrameWrapper> encoded_frame =
-          std::make_shared<VideoFrameWrapper>(packet.data(), packet.size(),
-                                              encoder_->GetEncodeWidth(),
-                                              encoder_->GetEncodeHeight());
+      std::shared_ptr<EncodedFrame> encoded_frame =
+          std::make_shared<EncodedFrame>(packet.data(), packet.size(),
+                                         encoder_->GetEncodeWidth(),
+                                         encoder_->GetEncodeHeight());
      encoded_frame->SetFrameType(frame_type);
-      encoded_frame->SetCaptureTimestamp(video_frame->timestamp);
      encoded_frame->SetEncodedWidth(encoder_->GetEncodeWidth());
      encoded_frame->SetEncodedHeight(encoder_->GetEncodeHeight());
+      encoded_frame->SetCapturedTimestamp(video_frame->captured_timestamp);
+      encoded_frame->SetEncodedTimestamp(clock_->CurrentTime());
      on_encoded_image(encoded_frame);
 #ifdef SAVE_ENCODED_H264_STREAM
      fwrite((unsigned char *)packet.data(), 1, packet.size(), file_h264_);
--- a/src/media/video/encode/nvcodec/nvidia_video_encoder.h
+++ b/src/media/video/encode/nvcodec/nvidia_video_encoder.h
@@ -9,15 +9,14 @@

 class NvidiaVideoEncoder : public VideoEncoder {
 public:
-  NvidiaVideoEncoder();
+  NvidiaVideoEncoder(std::shared_ptr<SystemClock> clock);
  virtual ~NvidiaVideoEncoder();

  int Init();

-  int Encode(
-      const XVideoFrame* video_frame,
-      std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
-          on_encoded_image);
+  int Encode(const XVideoFrame* video_frame,
+             std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
+                 on_encoded_image);

  int ForceIdr();

@@ -35,6 +34,7 @@ class NvidiaVideoEncoder : public VideoEncoder {
  int ResetEncodeResolution(unsigned int width, unsigned int height);

 private:
+  std::shared_ptr<SystemClock> clock_ = nullptr;
  int index_of_gpu_ = 0;
  CUdevice cuda_device_ = 0;

--- a/src/media/video/encode/openh264/openh264_encoder.cpp
+++ b/src/media/video/encode/openh264/openh264_encoder.cpp
@@ -38,7 +38,8 @@ void Nv12ToI420(unsigned char *Src_data, int src_width, int src_height,
      Dst_Stride_U, (uint8_t *)V_data_Dst, Dst_Stride_V, src_width, src_height);
 }

-OpenH264Encoder::OpenH264Encoder() {}
+OpenH264Encoder::OpenH264Encoder(std::shared_ptr<SystemClock> clock)
+    : clock_(clock) {}

 OpenH264Encoder::~OpenH264Encoder() {
 #ifdef SAVE_RECEIVED_NV12_STREAM
@@ -181,7 +182,7 @@ int OpenH264Encoder::Init() {

 int OpenH264Encoder::Encode(
    const XVideoFrame *video_frame,
-    std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
+    std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
        on_encoded_image) {
  if (!openh264_encoder_) {
    LOG_ERROR("Invalid openh264 encoder");
@@ -281,14 +282,15 @@ int OpenH264Encoder::Encode(
  encoded_frame_size_ = encoded_frame_size;

  if (on_encoded_image) {
-    std::shared_ptr<VideoFrameWrapper> encoded_frame =
-        std::make_shared<VideoFrameWrapper>(encoded_frame_, encoded_frame_size_,
-                                            raw_frame_.iPicWidth,
-                                            raw_frame_.iPicHeight);
+    std::shared_ptr<EncodedFrame> encoded_frame =
+        std::make_shared<EncodedFrame>(encoded_frame_, encoded_frame_size_,
+                                       raw_frame_.iPicWidth,
+                                       raw_frame_.iPicHeight);
    encoded_frame->SetFrameType(frame_type);
-    encoded_frame->SetCaptureTimestamp(video_frame->timestamp);
    encoded_frame->SetEncodedWidth(raw_frame_.iPicWidth);
    encoded_frame->SetEncodedHeight(raw_frame_.iPicHeight);
+    encoded_frame->SetCapturedTimestamp(video_frame->captured_timestamp);
+    encoded_frame->SetEncodedTimestamp(clock_->CurrentTime());
    on_encoded_image(encoded_frame);
 #ifdef SAVE_ENCODED_H264_STREAM
    fwrite(encoded_frame_, 1, encoded_frame_size_, file_h264_);
@@ -333,6 +335,11 @@ int OpenH264Encoder::Encode(
    encoded_frame_size_ = encoded_frame_size;

    if (on_encoded_image) {
+      encoded_frame->SetFrameType(frame_type);
+      encoded_frame->SetEncodedWidth(raw_frame_.iPicWidth);
+      encoded_frame->SetEncodedHeight(raw_frame_.iPicHeight);
+      encoded_frame->SetCapturedTimestamp(video_frame->captured_timestamp);
+      encoded_frame->SetEncodedTimestamp(clock_->CurrentTime());
      on_encoded_image((char *)encoded_frame_, frame_type);
 #ifdef SAVE_ENCODED_H264_STREAM
      fwrite(encoded_frame_, 1, encoded_frame_size_, file_h264_);
--- a/src/media/video/encode/openh264/openh264_encoder.h
+++ b/src/media/video/encode/openh264/openh264_encoder.h
@@ -19,15 +19,14 @@

 class OpenH264Encoder : public VideoEncoder {
 public:
-  OpenH264Encoder();
+  OpenH264Encoder(std::shared_ptr<SystemClock> clock);
  virtual ~OpenH264Encoder();

  int Init();

-  int Encode(
-      const XVideoFrame* video_frame,
-      std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
-          on_encoded_image);
+  int Encode(const XVideoFrame* video_frame,
+             std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
+                 on_encoded_image);

  int ForceIdr();

@@ -48,6 +47,7 @@ class OpenH264Encoder : public VideoEncoder {
  int Release();

 private:
+  std::shared_ptr<SystemClock> clock_ = nullptr;
  uint32_t frame_width_ = 1280;
  uint32_t frame_height_ = 720;
  int key_frame_interval_ = 3000;
--- a/src/media/video/encode/video_encoder.h
+++ b/src/media/video/encode/video_encoder.h
@@ -8,7 +8,8 @@
 #include <memory>
 #include <string>

-#include "video_frame_wrapper.h"
+#include "clock/system_clock.h"
+#include "encoded_frame.h"
 #include "x.h"

 #define I_FRAME_INTERVAL 3000
@@ -18,7 +19,7 @@ class VideoEncoder {

  virtual int Encode(
      const XVideoFrame* video_frame,
-      std::function<int(std::shared_ptr<VideoFrameWrapper> encoded_frame)>
+      std::function<int(std::shared_ptr<EncodedFrame> encoded_frame)>
          on_encoded_image) = 0;

  virtual int ForceIdr() = 0;
--- a/src/media/video/encode/video_encoder_factory.cpp
+++ b/src/media/video/encode/video_encoder_factory.cpp
@@ -17,25 +17,26 @@ VideoEncoderFactory::VideoEncoderFactory() {}
 VideoEncoderFactory::~VideoEncoderFactory() {}

 std::unique_ptr<VideoEncoder> VideoEncoderFactory::CreateVideoEncoder(
-    bool hardware_acceleration, bool av1_encoding) {
+    std::shared_ptr<SystemClock> clock, bool hardware_acceleration,
+    bool av1_encoding) {
  if (av1_encoding) {
    LOG_INFO("Use AOM encoder");
-    return std::make_unique<AomAv1Encoder>(AomAv1Encoder());
+    return std::make_unique<AomAv1Encoder>(AomAv1Encoder(clock));
  } else {
 #if __APPLE__
    LOG_INFO("Use OpenH264 encoder");
-    return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
+    return std::make_unique<OpenH264Encoder>(OpenH264Encoder(clock));
 #else
    if (hardware_acceleration) {
      if (CheckIsHardwareAccerlerationSupported()) {
        LOG_INFO("Use Nvidia encoder");
-        return std::make_unique<NvidiaVideoEncoder>(NvidiaVideoEncoder());
+        return std::make_unique<NvidiaVideoEncoder>(NvidiaVideoEncoder(clock));
      } else {
        return nullptr;
      }
    } else {
      LOG_INFO("Use OpenH264 encoder");
-      return std::make_unique<OpenH264Encoder>(OpenH264Encoder());
+      return std::make_unique<OpenH264Encoder>(OpenH264Encoder(clock));
    }
 #endif
  }
--- a/src/media/video/encode/video_encoder_factory.h
+++ b/src/media/video/encode/video_encoder_factory.h
@@ -10,7 +10,8 @@ class VideoEncoderFactory {
  ~VideoEncoderFactory();

  static std::unique_ptr<VideoEncoder> CreateVideoEncoder(
-      bool hardware_acceleration, bool av1_encoding);
+      std::shared_ptr<SystemClock> clock, bool hardware_acceleration,
+      bool av1_encoding);

  static bool CheckIsHardwareAccerlerationSupported();
 };
--- a/src/rtp/rtp_packet/rtp_defines.h
+++ b/src/rtp/rtp_packet/rtp_defines.h
@@ -41,5 +41,7 @@ typedef struct {
 typedef enum { UNKNOWN = 0, NALU = 1, FU_A = 28, FU_B = 29 } NAL_UNIT_TYPE;

 const int kVideoPayloadTypeFrequency = 90000;
+
+static int kMsToRtpTimestamp = 90;
 }  // namespace rtp
 #endif
--- a/src/rtp/rtp_packetizer/rtp_packetizer.h
+++ b/src/rtp/rtp_packetizer/rtp_packetizer.h
@@ -22,11 +22,11 @@ class RtpPacketizer {
  virtual ~RtpPacketizer() = default;

  virtual std::vector<std::unique_ptr<RtpPacket>> Build(
-      uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+      uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) = 0;

  virtual std::vector<std::unique_ptr<RtpPacket>> BuildPadding(
-      uint32_t payload_size, int64_t capture_timestamp_us,
+      uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) = 0;
 };

--- a/src/rtp/rtp_packetizer/rtp_packetizer_av1.cpp
+++ b/src/rtp/rtp_packetizer/rtp_packetizer_av1.cpp
@@ -5,7 +5,7 @@ RtpPacketizerAv1::RtpPacketizerAv1(uint32_t ssrc) {}
 RtpPacketizerAv1::~RtpPacketizerAv1() {}

 std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerAv1::Build(
-    uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+    uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
    bool use_rtp_packet_to_send) {
  std::vector<std::unique_ptr<RtpPacket>> rtp_packets;

--- a/src/rtp/rtp_packetizer/rtp_packetizer_av1.h
+++ b/src/rtp/rtp_packetizer/rtp_packetizer_av1.h
@@ -16,11 +16,11 @@ class RtpPacketizerAv1 : public RtpPacketizer {
  virtual ~RtpPacketizerAv1();

  std::vector<std::unique_ptr<RtpPacket>> Build(
-      uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+      uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) override;

  std::vector<std::unique_ptr<RtpPacket>> BuildPadding(
-      uint32_t payload_size, int64_t capture_timestamp_us,
+      uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) override {
    return std::vector<std::unique_ptr<RtpPacket>>{};
  };
--- a/src/rtp/rtp_packetizer/rtp_packetizer_generic.cpp
+++ b/src/rtp/rtp_packetizer/rtp_packetizer_generic.cpp
@@ -47,7 +47,7 @@ void RtpPacketizerGeneric::AddAbsSendTimeExtension(
 }

 std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerGeneric::Build(
-    uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+    uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
    bool use_rtp_packet_to_send) {
  uint32_t last_packet_size = payload_size % MAX_NALU_LEN;
  uint32_t packet_num =
--- a/src/rtp/rtp_packetizer/rtp_packetizer_generic.h
+++ b/src/rtp/rtp_packetizer/rtp_packetizer_generic.h
@@ -16,11 +16,11 @@ class RtpPacketizerGeneric : public RtpPacketizer {
  virtual ~RtpPacketizerGeneric();

  std::vector<std::unique_ptr<RtpPacket>> Build(
-      uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+      uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) override;

  std::vector<std::unique_ptr<RtpPacket>> BuildPadding(
-      uint32_t payload_size, int64_t capture_timestamp_us,
+      uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) override {
    return std::vector<std::unique_ptr<RtpPacket>>{};
  };
--- a/src/rtp/rtp_packetizer/rtp_packetizer_h264.cpp
+++ b/src/rtp/rtp_packetizer/rtp_packetizer_h264.cpp
@@ -1,7 +1,5 @@
 #include "rtp_packetizer_h264.h"

-static int kMsToRtpTimestamp = 90;
-
 RtpPacketizerH264::RtpPacketizerH264(uint32_t ssrc)
    : version_(kRtpVersion),
      has_padding_(false),
@@ -61,19 +59,19 @@ void RtpPacketizerH264::AddAbsSendTimeExtension(
 }

 std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::Build(
-    uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+    uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
    bool use_rtp_packet_to_send) {
  if (payload_size <= MAX_NALU_LEN) {
-    return BuildNalu(payload, payload_size, capture_timestamp_us,
+    return BuildNalu(payload, payload_size, rtp_timestamp,
                     use_rtp_packet_to_send);
  } else {
-    return BuildFua(payload, payload_size, capture_timestamp_us,
+    return BuildFua(payload, payload_size, rtp_timestamp,
                    use_rtp_packet_to_send);
  }
 }

 std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildNalu(
-    uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+    uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
    bool use_rtp_packet_to_send) {
  std::vector<std::unique_ptr<RtpPacket>> rtp_packets;

@@ -84,7 +82,7 @@ std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildNalu(
  marker_ = 1;
  payload_type_ = rtp::PAYLOAD_TYPE(payload_type_);
  sequence_number_++;
-  timestamp_ = kMsToRtpTimestamp * static_cast<uint32_t>(capture_timestamp_us);
+  timestamp_ = rtp::kMsToRtpTimestamp * rtp_timestamp;

  if (!csrc_count_) {
  }
@@ -142,7 +140,7 @@ std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildNalu(
 }

 std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildFua(
-    uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+    uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
    bool use_rtp_packet_to_send) {
  std::vector<std::unique_ptr<RtpPacket>> rtp_packets;

@@ -242,7 +240,7 @@ std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildFua(
 }

 std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildPadding(
-    uint32_t payload_size, int64_t capture_timestamp_us,
+    uint32_t payload_size, uint32_t rtp_timestamp,
    bool use_rtp_packet_to_send) {
  std::vector<std::unique_ptr<RtpPacket>> rtp_packets;

@@ -258,8 +256,7 @@ std::vector<std::unique_ptr<RtpPacket>> RtpPacketizerH264::BuildPadding(
    marker_ = 0;
    uint8_t payload_type = rtp::PAYLOAD_TYPE(payload_type_ - 1);
    sequence_number_++;
-    timestamp_ =
-        kMsToRtpTimestamp * static_cast<uint32_t>(capture_timestamp_us);
+    timestamp_ = rtp::kMsToRtpTimestamp * rtp_timestamp;

    rtp_packet_frame_.clear();
    rtp_packet_frame_.push_back((version_ << 6) | (has_padding_ << 5) |
--- a/src/rtp/rtp_packetizer/rtp_packetizer_h264.h
+++ b/src/rtp/rtp_packetizer/rtp_packetizer_h264.h
@@ -16,20 +16,20 @@ class RtpPacketizerH264 : public RtpPacketizer {
  virtual ~RtpPacketizerH264();

  std::vector<std::unique_ptr<RtpPacket>> Build(
-      uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+      uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) override;

  std::vector<std::unique_ptr<RtpPacket>> BuildNalu(
-      uint8_t* payload, uint32_t payload_size, int64_t capture_timestamp_us,
+      uint8_t* payload, uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send);

  std::vector<std::unique_ptr<RtpPacket>> BuildFua(uint8_t* payload,
                                                   uint32_t payload_size,
-                                                   int64_t capture_timestamp_us,
+                                                   uint32_t rtp_timestamp,
                                                   bool use_rtp_packet_to_send);

  std::vector<std::unique_ptr<RtpPacket>> BuildPadding(
-      uint32_t payload_size, int64_t capture_timestamp_us,
+      uint32_t payload_size, uint32_t rtp_timestamp,
      bool use_rtp_packet_to_send) override;

 private:
--- a/src/transport/channel/rtp_video_receiver.cpp
+++ b/src/transport/channel/rtp_video_receiver.cpp
@@ -29,6 +29,8 @@ RtpVideoReceiver::RtpVideoReceiver(std::shared_ptr<SystemClock> clock)
          },
          1200)),
      nack_(std::make_unique<NackRequester>(clock_, this, this)),
+      delta_ntp_internal_ms_(clock->CurrentNtpInMilliseconds() -
+                             clock->CurrentTimeMs()),
      clock_(webrtc::Clock::GetWebrtcClockShared(clock)) {
  SetPeriod(std::chrono::milliseconds(5));
  rtcp_thread_ = std::thread(&RtpVideoReceiver::RtcpThread, this);
@@ -217,8 +219,15 @@ void RtpVideoReceiver::ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264) {
    if (rtp::PAYLOAD_TYPE::H264 == rtp_packet_h264.PayloadType()) {
      rtp::NAL_UNIT_TYPE nalu_type = rtp_packet_h264.NalUnitType();
      if (rtp::NAL_UNIT_TYPE::NALU == nalu_type) {
-        compelete_video_frame_queue_.push(VideoFrame(
-            rtp_packet_h264.Payload(), rtp_packet_h264.PayloadSize()));
+        ReceivedFrame received_frame(rtp_packet_h264.Payload(),
+                                     rtp_packet_h264.PayloadSize());
+        received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
+        received_frame.SetCapturedTimestamp(
+            (static_cast<int64_t>(rtp_packet_h264.Timestamp()) /
+                 rtp::kMsToRtpTimestamp -
+             delta_ntp_internal_ms_) *
+            1000);
+        compelete_video_frame_queue_.push(received_frame);
      } else if (rtp::NAL_UNIT_TYPE::FU_A == nalu_type) {
        incomplete_h264_frame_list_[rtp_packet_h264.SequenceNumber()] =
            rtp_packet_h264;
@@ -409,8 +418,15 @@ bool RtpVideoReceiver::CheckIsH264FrameCompleted(
          incomplete_h264_frame_list_.erase(seq);
          frame_fragment_count++;
        }
-        compelete_video_frame_queue_.push(
-            VideoFrame(nv12_data_, complete_frame_size));
+
+        ReceivedFrame received_frame(nv12_data_, complete_frame_size);
+        received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
+        received_frame.SetCapturedTimestamp(
+            (static_cast<int64_t>(rtp_packet_h264.Timestamp()) /
+                 rtp::kMsToRtpTimestamp -
+             delta_ntp_internal_ms_) *
+            1000);
+        compelete_video_frame_queue_.push(received_frame);

        return true;
      } else {
@@ -461,8 +477,14 @@ bool RtpVideoReceiver::CheckIsAv1FrameCompleted(RtpPacketAv1& rtp_packet_av1) {
        incomplete_av1_frame_list_.erase(start);
      }

-      compelete_video_frame_queue_.push(
-          VideoFrame(nv12_data_, complete_frame_size));
+      ReceivedFrame received_frame(nv12_data_, complete_frame_size);
+      received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
+      received_frame.SetCapturedTimestamp(
+          (static_cast<int64_t>(rtp_packet_av1.Timestamp()) /
+               rtp::kMsToRtpTimestamp -
+           delta_ntp_internal_ms_) *
+          1000);
+      compelete_video_frame_queue_.push(received_frame);

      return true;
    }
@@ -546,14 +568,15 @@ bool RtpVideoReceiver::CheckIsTimeSendRR() {

 bool RtpVideoReceiver::Process() {
  if (!compelete_video_frame_queue_.isEmpty()) {
-    std::optional<VideoFrame> video_frame = compelete_video_frame_queue_.pop();
+    std::optional<ReceivedFrame> video_frame =
+        compelete_video_frame_queue_.pop();
    if (on_receive_complete_frame_ && video_frame) {
      // auto now_complete_frame_ts =
      //     std::chrono::duration_cast<std::chrono::milliseconds>(
      //         std::chrono::system_clock::now().time_since_epoch())
      //         .count();
-      // uint32_t duration = now_complete_frame_ts - last_complete_frame_ts_;
-      // LOG_ERROR("Duration {}", duration);
+      // uint32_t duration = now_complete_frame_ts -
+      // last_complete_frame_ts_; LOG_ERROR("Duration {}", duration);
      // last_complete_frame_ts_ = now_complete_frame_ts;

      on_receive_complete_frame_(*video_frame);
@@ -577,13 +600,13 @@ void RtpVideoReceiver::ReviseFrequencyAndJitter(int payload_type_frequency) {
    if (last_payload_type_frequency_ != 0) {
      // Value in "jitter_q4_" variable is a number of samples.
      // I.e. jitter = timestamp (s) * frequency (Hz).
-      // Since the frequency has changed we have to update the number of samples
-      // accordingly. The new value should rely on a new frequency.
+      // Since the frequency has changed we have to update the number of
+      // samples accordingly. The new value should rely on a new frequency.

-      // If we don't do such procedure we end up with the number of samples that
-      // cannot be converted into TimeDelta correctly
-      // (i.e. jitter = jitter_q4_ >> 4 / payload_type_frequency).
-      // In such case, the number of samples has a "mix".
+      // If we don't do such procedure we end up with the number of samples
+      // that cannot be converted into TimeDelta correctly (i.e. jitter =
+      // jitter_q4_ >> 4 / payload_type_frequency). In such case, the number
+      // of samples has a "mix".

      // Doing so we pretend that everything prior and including the current
      // packet were computed on packet's frequency.
--- a/src/transport/channel/rtp_video_receiver.h
+++ b/src/transport/channel/rtp_video_receiver.h
@@ -13,6 +13,7 @@
 #include "io_statistics.h"
 #include "nack_requester.h"
 #include "receive_side_congestion_controller.h"
+#include "received_frame.h"
 #include "receiver_report.h"
 #include "ringbuffer.h"
 #include "rtcp_sender.h"
@@ -22,7 +23,6 @@
 #include "rtp_statistics.h"
 #include "sender_report.h"
 #include "thread_base.h"
-#include "video_frame.h"

 using namespace webrtc;

@@ -42,7 +42,7 @@ class RtpVideoReceiver : public ThreadBase,
  void SetSendDataFunc(std::function<int(const char*, size_t)> data_send_func);

  void SetOnReceiveCompleteFrame(
-      std::function<void(VideoFrame&)> on_receive_complete_frame) {
+      std::function<void(const ReceivedFrame&)> on_receive_complete_frame) {
    on_receive_complete_frame_ = on_receive_complete_frame;
  }
  uint32_t GetSsrc() { return ssrc_; }
@@ -89,9 +89,10 @@ class RtpVideoReceiver : public ThreadBase,
  std::map<uint16_t, RtpPacketAv1> incomplete_av1_frame_list_;
  std::map<uint16_t, RtpPacket> incomplete_frame_list_;
  uint8_t* nv12_data_ = nullptr;
-  std::function<void(VideoFrame&)> on_receive_complete_frame_ = nullptr;
+  std::function<void(const ReceivedFrame&)> on_receive_complete_frame_ =
+      nullptr;
  uint32_t last_complete_frame_ts_ = 0;
-  RingBuffer<VideoFrame> compelete_video_frame_queue_;
+  RingBuffer<ReceivedFrame> compelete_video_frame_queue_;

 private:
  std::unique_ptr<RtpStatistics> rtp_statistics_ = nullptr;
@@ -162,6 +163,7 @@ class RtpVideoReceiver : public ThreadBase,

 private:
  FILE* file_rtp_recv_ = nullptr;
+  int64_t delta_ntp_internal_ms_;
 };

 #endif
--- a/src/transport/channel/rtp_video_sender.cpp
+++ b/src/transport/channel/rtp_video_sender.cpp
@@ -45,7 +45,7 @@ RtpVideoSender::~RtpVideoSender() {

 void RtpVideoSender::Enqueue(
    std::vector<std::unique_ptr<RtpPacket>>& rtp_packets,
-    int64_t capture_timestamp_us) {
+    int64_t captured_timestamp_us) {
  if (!rtp_statistics_) {
    rtp_statistics_ = std::make_unique<RtpStatistics>();
    rtp_statistics_->Start();
--- a/src/transport/channel/rtp_video_sender.h
+++ b/src/transport/channel/rtp_video_sender.h
@@ -24,7 +24,7 @@ class RtpVideoSender : public ThreadBase {

 public:
  void Enqueue(std::vector<std::unique_ptr<RtpPacket>> &rtp_packets,
-               int64_t capture_timestamp_us);
+               int64_t captured_timestamp_us);
  void SetSendDataFunc(std::function<int(const char *, size_t)> data_send_func);
  void SetOnSentPacketFunc(
      std::function<void(const webrtc::RtpPacketToSend &)> on_sent_packet_func);
--- a/src/transport/channel/video_channel_receive.cpp
+++ b/src/transport/channel/video_channel_receive.cpp
@@ -7,7 +7,7 @@ VideoChannelReceive::VideoChannelReceive() {}
 VideoChannelReceive::VideoChannelReceive(
    std::shared_ptr<SystemClock> clock, std::shared_ptr<IceAgent> ice_agent,
    std::shared_ptr<IOStatistics> ice_io_statistics,
-    std::function<void(VideoFrame &)> on_receive_complete_frame)
+    std::function<void(const ReceivedFrame &)> on_receive_complete_frame)
    : ice_agent_(ice_agent),
      ice_io_statistics_(ice_io_statistics),
      on_receive_complete_frame_(on_receive_complete_frame),
@@ -19,8 +19,8 @@ void VideoChannelReceive::Initialize(rtp::PAYLOAD_TYPE payload_type) {
  rtp_video_receiver_ =
      std::make_unique<RtpVideoReceiver>(clock_, ice_io_statistics_);
  rtp_video_receiver_->SetOnReceiveCompleteFrame(
-      [this](VideoFrame &video_frame) -> void {
-        on_receive_complete_frame_(video_frame);
+      [this](const ReceivedFrame &received_frame) -> void {
+        on_receive_complete_frame_(received_frame);
      });

  rtp_video_receiver_->SetSendDataFunc([this](const char *data,
--- a/src/transport/channel/video_channel_receive.h
+++ b/src/transport/channel/video_channel_receive.h
@@ -17,7 +17,7 @@ class VideoChannelReceive {
  VideoChannelReceive(
      std::shared_ptr<SystemClock> clock, std::shared_ptr<IceAgent> ice_agent,
      std::shared_ptr<IOStatistics> ice_io_statistics,
-      std::function<void(VideoFrame &)> on_receive_complete_frame);
+      std::function<void(const ReceivedFrame &)> on_receive_complete_frame);

  ~VideoChannelReceive();

@@ -51,7 +51,8 @@ class VideoChannelReceive {
  std::shared_ptr<IceAgent> ice_agent_ = nullptr;
  std::shared_ptr<IOStatistics> ice_io_statistics_ = nullptr;
  std::unique_ptr<RtpVideoReceiver> rtp_video_receiver_ = nullptr;
-  std::function<void(VideoFrame &)> on_receive_complete_frame_ = nullptr;
+  std::function<void(const ReceivedFrame &)> on_receive_complete_frame_ =
+      nullptr;

 private:
  std::shared_ptr<SystemClock> clock_;
--- a/src/transport/channel/video_channel_send.cpp
+++ b/src/transport/channel/video_channel_send.cpp
@@ -17,6 +17,8 @@ VideoChannelSend::VideoChannelSend(
      packet_sender_(packet_sender),
      ice_io_statistics_(ice_io_statistics),
      on_sent_packet_func_(on_sent_packet_func),
+      delta_ntp_internal_ms_(clock->CurrentNtpInMilliseconds() -
+                             clock->CurrentTimeMs()),
      clock_(clock){};

 void VideoChannelSend::Initialize(rtp::PAYLOAD_TYPE payload_type) {
@@ -57,9 +59,9 @@ void VideoChannelSend::SetEnqueuePacketsFunc(
 }

 std::vector<std::unique_ptr<RtpPacket>> VideoChannelSend::GeneratePadding(
-    uint32_t payload_size, int64_t capture_timestamp_us) {
+    uint32_t payload_size, int64_t captured_timestamp_us) {
  if (rtp_packetizer_) {
-    return rtp_packetizer_->BuildPadding(payload_size, capture_timestamp_us,
+    return rtp_packetizer_->BuildPadding(payload_size, captured_timestamp_us,
                                         true);
  }
  return std::vector<std::unique_ptr<RtpPacket>>{};
@@ -71,15 +73,16 @@ void VideoChannelSend::Destroy() {
  }
 }

-int VideoChannelSend::SendVideo(
-    std::shared_ptr<VideoFrameWrapper> encoded_frame) {
+int VideoChannelSend::SendVideo(std::shared_ptr<EncodedFrame> encoded_frame) {
  if (rtp_video_sender_ && rtp_packetizer_) {
+    int64_t rtp_timestamp =
+        delta_ntp_internal_ms_ +
+        static_cast<uint32_t>(encoded_frame->CapturedTimestamp() / 1000);
    std::vector<std::unique_ptr<RtpPacket>> rtp_packets =
        rtp_packetizer_->Build((uint8_t*)encoded_frame->Buffer(),
-                               (uint32_t)encoded_frame->Size(),
-                               encoded_frame->CaptureTimestamp(), true);
-    packet_sender_->EnqueueRtpPacket(std::move(rtp_packets),
-                                     encoded_frame->CaptureTimestamp());
+                               (uint32_t)encoded_frame->Size(), rtp_timestamp,
+                               true);
+    packet_sender_->EnqueueRtpPacket(std::move(rtp_packets), rtp_timestamp);
  }

  return 0;
--- a/src/transport/channel/video_channel_send.h
+++ b/src/transport/channel/video_channel_send.h
@@ -12,12 +12,12 @@
 #include "clock/system_clock.h"
 #include "congestion_control.h"
 #include "congestion_control_feedback.h"
+#include "encoded_frame.h"
 #include "ice_agent.h"
 #include "packet_sender.h"
 #include "rtp_packetizer.h"
 #include "rtp_video_sender.h"
 #include "transport_feedback_adapter.h"
-#include "video_frame_wrapper.h"

 class VideoChannelSend {
 public:
@@ -36,7 +36,7 @@ class VideoChannelSend {
          enqueue_packets_func);

  std::vector<std::unique_ptr<RtpPacket>> GeneratePadding(
-      uint32_t payload_size, int64_t capture_timestamp_us);
+      uint32_t payload_size, int64_t captured_timestamp_us);

  int64_t GetTransportSeqAndIncrement() {
    int64_t transport_seq = rtp_video_sender_->GetTransportSequenceNumber();
@@ -55,7 +55,7 @@ class VideoChannelSend {
    return 0;
  }

-  int SendVideo(std::shared_ptr<VideoFrameWrapper> encoded_frame);
+  int SendVideo(std::shared_ptr<EncodedFrame> encoded_frame);

  void OnCongestionControlFeedback(
      Timestamp recv_ts,
@@ -84,6 +84,7 @@ class VideoChannelSend {

 private:
  std::shared_ptr<SystemClock> clock_;
+  int64_t delta_ntp_internal_ms_;
 };

 #endif
--- a/src/transport/ice_transport_controller.cpp
+++ b/src/transport/ice_transport_controller.cpp
@@ -49,7 +49,7 @@ void IceTransportController::Create(
  on_receive_data_ = on_receive_data;
  user_data_ = user_data;

-  CreateVideoCodec(video_codec_payload_type, hardware_acceleration);
+  CreateVideoCodec(clock_, video_codec_payload_type, hardware_acceleration);
  CreateAudioCodec();

  controller_ = std::make_unique<CongestionControl>();
@@ -75,9 +75,10 @@ void IceTransportController::Create(
      });

  packet_sender_->SetGeneratePaddingFunc(
-      [this](uint32_t size, int64_t capture_timestamp_us)
+      [this](uint32_t size, int64_t captured_timestamp_us)
          -> std::vector<std::unique_ptr<RtpPacket>> {
-        return video_channel_send_->GeneratePadding(size, capture_timestamp_us);
+        return video_channel_send_->GeneratePadding(size,
+                                                    captured_timestamp_us);
      });

  audio_channel_send_ = std::make_unique<AudioChannelSend>(
@@ -96,9 +97,9 @@ void IceTransportController::Create(
  std::weak_ptr<IceTransportController> weak_self = shared_from_this();
  video_channel_receive_ = std::make_unique<VideoChannelReceive>(
      clock_, ice_agent, ice_io_statistics,
-      [this, weak_self](VideoFrame& video_frame) {
+      [this, weak_self](const ReceivedFrame& received_frame) {
        if (auto self = weak_self.lock()) {
-          OnReceiveCompleteFrame(video_frame);
+          OnReceiveCompleteFrame(received_frame);
        }
      });

@@ -170,7 +171,7 @@ int IceTransportController::SendVideo(const XVideoFrame* video_frame) {
  new_frame.width = video_frame->width;
  new_frame.height = video_frame->height;
  new_frame.size = video_frame->size;
-  new_frame.timestamp = video_frame->timestamp;
+  new_frame.captured_timestamp = video_frame->captured_timestamp;
  if (target_width_.has_value() && target_height_.has_value()) {
    if (target_width_.value() < video_frame->width &&
        target_height_.value() < video_frame->height) {
@@ -183,7 +184,7 @@ int IceTransportController::SendVideo(const XVideoFrame* video_frame) {

  int ret = video_encoder_->Encode(
      need_to_release ? &new_frame : video_frame,
-      [this](std::shared_ptr<VideoFrameWrapper> encoded_frame) -> int {
+      [this](std::shared_ptr<EncodedFrame> encoded_frame) -> int {
        if (video_channel_send_) {
          video_channel_send_->SendVideo(encoded_frame);
        }
@@ -268,16 +269,19 @@ int IceTransportController::OnReceiveDataRtpPacket(const char* data,
  return -1;
 }

-void IceTransportController::OnReceiveCompleteFrame(VideoFrame& video_frame) {
+void IceTransportController::OnReceiveCompleteFrame(
+    const ReceivedFrame& received_frame) {
  int num_frame_returned = video_decoder_->Decode(
-      (uint8_t*)video_frame.Buffer(), video_frame.Size(),
-      [this](VideoFrame video_frame) {
+      received_frame, [this](DecodedFrame decoded_frame) {
        if (on_receive_video_) {
          XVideoFrame x_video_frame;
-          x_video_frame.data = (const char*)video_frame.Buffer();
-          x_video_frame.width = video_frame.Width();
-          x_video_frame.height = video_frame.Height();
-          x_video_frame.size = video_frame.Size();
+          x_video_frame.data = (const char*)decoded_frame.Buffer();
+          x_video_frame.width = decoded_frame.Width();
+          x_video_frame.height = decoded_frame.Height();
+          x_video_frame.size = decoded_frame.Size();
+          x_video_frame.captured_timestamp = decoded_frame.CapturedTimestamp();
+          x_video_frame.received_timestamp = decoded_frame.ReceivedTimestamp();
+          x_video_frame.decoded_timestamp = decoded_frame.DecodedTimestamp();
          on_receive_video_(&x_video_frame, remote_user_id_.data(),
                            remote_user_id_.size(), user_data_);
        }
@@ -303,7 +307,8 @@ void IceTransportController::OnReceiveCompleteData(const char* data,
  }
 }

-int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
+int IceTransportController::CreateVideoCodec(std::shared_ptr<SystemClock> clock,
+                                             rtp::PAYLOAD_TYPE video_pt,
                                             bool hardware_acceleration) {
  if (video_codec_inited_) {
    return 0;
@@ -316,8 +321,10 @@ int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
      hardware_acceleration_ = false;
      LOG_WARN("Only support software codec for AV1");
    }
-    video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, true);
-    video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, true);
+    video_encoder_ =
+        VideoEncoderFactory::CreateVideoEncoder(clock, false, true);
+    video_decoder_ =
+        VideoDecoderFactory::CreateVideoDecoder(clock, false, true);
  } else if (rtp::PAYLOAD_TYPE::H264 == video_pt) {
 #ifdef __APPLE__
    if (hardware_acceleration_) {
@@ -325,34 +332,45 @@ int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
      LOG_WARN(
          "MacOS not support hardware acceleration, use default software "
          "codec");
-      video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
-      video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
+      video_encoder_ =
+          VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
+      video_decoder_ =
+          VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
    } else {
-      video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
-      video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
+      video_encoder_ =
+          VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
+      video_decoder_ =
+          VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
    }
 #else
    if (hardware_acceleration_) {
      if (0 == LoadNvCodecDll()) {
        load_nvcodec_dll_success_ = true;
-        video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(true, false);
-        video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(true, false);
+        video_encoder_ =
+            VideoEncoderFactory::CreateVideoEncoder(clock, true, false);
+        video_decoder_ =
+            VideoDecoderFactory::CreateVideoDecoder(clock, true, false);
      } else {
        LOG_WARN(
            "Hardware accelerated codec not available, use default software "
            "codec");
-        video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
-        video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
+        video_encoder_ =
+            VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
+        video_decoder_ =
+            VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
      }
    } else {
-      video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
-      video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
+      video_encoder_ =
+          VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
+      video_decoder_ =
+          VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
    }
 #endif
  }

  if (!video_encoder_) {
-    video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
+    video_encoder_ =
+        VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
    LOG_ERROR("Create encoder failed, try to use software H.264 encoder");
  }
  if (!video_encoder_ || 0 != video_encoder_->Init()) {
@@ -361,7 +379,8 @@ int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
  }

  if (!video_decoder_) {
-    video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
+    video_decoder_ =
+        VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
    LOG_ERROR("Create decoder failed, try to use software H.264 decoder");
  }
  if (!video_decoder_ || video_decoder_->Init()) {
--- a/src/transport/ice_transport_controller.h
+++ b/src/transport/ice_transport_controller.h
@@ -65,7 +65,7 @@ class IceTransportController
  int OnReceiveAudioRtpPacket(const char *data, size_t size);
  int OnReceiveDataRtpPacket(const char *data, size_t size);

-  void OnReceiveCompleteFrame(VideoFrame &video_frame);
+  void OnReceiveCompleteFrame(const ReceivedFrame &received_frame);
  void OnReceiveCompleteAudio(const char *data, size_t size);
  void OnReceiveCompleteData(const char *data, size_t size);

@@ -76,7 +76,8 @@ class IceTransportController
      const webrtc::rtcp::CongestionControlFeedback &feedback);

 private:
-  int CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt, bool hardware_acceleration);
+  int CreateVideoCodec(std::shared_ptr<SystemClock> clock,
+                       rtp::PAYLOAD_TYPE video_pt, bool hardware_acceleration);
  int CreateAudioCodec();

 private:
--- a/src/transport/packet_sender/packet_sender.h
+++ b/src/transport/packet_sender/packet_sender.h
@@ -20,7 +20,7 @@ class PacketSender {
  virtual int Send() = 0;
  virtual int EnqueueRtpPacket(
      std::vector<std::unique_ptr<RtpPacket>> &rtp_packets,
-      int64_t capture_timestamp_us) = 0;
+      int64_t captured_timestamp_us) = 0;
 };

 #endif
--- a/src/transport/packet_sender/packet_sender_imp.cpp
+++ b/src/transport/packet_sender/packet_sender_imp.cpp
@@ -250,13 +250,12 @@ PacketSenderImp::Stats PacketSenderImp::GetStats() const {

 int PacketSenderImp::EnqueueRtpPacket(
    std::vector<std::unique_ptr<RtpPacket>> &rtp_packets,
-    int64_t capture_timestamp_us) {
+    int64_t captured_timestamp_us) {
  std::vector<std::unique_ptr<webrtc::RtpPacketToSend>> to_send_rtp_packets;
  for (auto &rtp_packet : rtp_packets) {
    std::unique_ptr<webrtc::RtpPacketToSend> rtp_packet_to_send(
        static_cast<webrtc::RtpPacketToSend *>(rtp_packet.release()));
-    rtp_packet_to_send->set_capture_time(
-        webrtc::Timestamp::Micros(capture_timestamp_us));
+    rtp_packet_to_send->set_capture_time(clock_->CurrentTime());
    rtp_packet_to_send->set_transport_sequence_number(transport_seq_++);

    switch (rtp_packet_to_send->PayloadType()) {
--- a/src/transport/packet_sender/packet_sender_imp.h
+++ b/src/transport/packet_sender/packet_sender_imp.h
@@ -38,7 +38,7 @@ class PacketSenderImp : public PacketSender,
  int Send() { return 0; }

  int EnqueueRtpPacket(std::vector<std::unique_ptr<RtpPacket>>& rtp_packets,
-                       int64_t capture_timestamp_us);
+                       int64_t captured_timestamp_us);

  void SetOnSentPacketFunc(
      std::function<void(const webrtc::RtpPacketToSend&)> on_sent_packet_func) {