[feat] refactor h264 frame assember

2025-12-18 13:19:24 +08:00 · 2025-03-26 17:39:47 +08:00
parent 28dc9bf62d
commit 0f07058fb9
7 changed files with 194 additions and 4 deletions
--- a/src/media/video/assemble_frame/h264_frame_assember.cpp
+++ b/src/media/video/assemble_frame/h264_frame_assember.cpp
@@ -0,0 +1,123 @@
 #include "h264_frame_assember.h"
 H264FrameAssembler::H264FrameAssembler() {}
 H264FrameAssembler::~H264FrameAssembler() {}
 int64_t EuclideanMod(int64_t n, int64_t div) {
  return (n %= div) < 0 ? n + div : n;
 }
 template <typename T>
 inline bool AheadOrAt(T a, T b) {
  const T maxDist = std::numeric_limits<T>::max() / 2 + T(1);
  if (a - b == maxDist) return b < a;
  return b - a < maxDist;
 }
 int64_t* GetContinuousSequence(
    std::array<int64_t, MAX_TRACKED_SEQUENCE_SIZE>& last_continuous,
    int64_t unwrapped_seq_num) {
  for (int64_t& last : last_continuous) {
    if (unwrapped_seq_num - 1 == last) {
      return &last;
    }
  }
  return nullptr;
 }
 int64_t H264FrameAssembler::Unwrap(uint16_t seq_num) {
  return (int64_t)seq_num;
 }
 std::vector<std::unique_ptr<RtpPacketH264>>& H264FrameAssembler::InsertPacket(
    std::unique_ptr<RtpPacketH264> rtp_packet) {
  std::vector<std::unique_ptr<RtpPacketH264>> result;
  int64_t unwrapped_seq_num = Unwrap(rtp_packet->SequenceNumber());
  auto& packet_slotted = GetPacketFromBuffer(unwrapped_seq_num);
  if (packet_slotted != nullptr &&
      AheadOrAt(packet_slotted->Timestamp(), rtp_packet->Timestamp())) {
    // The incoming `packet` is old or a duplicate.
    return std::move(result);
  } else {
    packet_slotted = std::move(rtp_packet);
  }
  return std::move(FindFrames(unwrapped_seq_num));
 }
 std::unique_ptr<RtpPacketH264>& H264FrameAssembler::GetPacketFromBuffer(
    int64_t unwrapped_seq_num) {
  return packet_buffer_[EuclideanMod(unwrapped_seq_num,
                                     MAX_PACKET_BUFFER_SIZE)];
 }
 std::vector<std::unique_ptr<RtpPacketH264>> H264FrameAssembler::FindFrames(
    int64_t unwrapped_seq_num) {
  std::vector<std::unique_ptr<RtpPacketH264>> result;
  RtpPacketH264* packet_slotted = GetPacketFromBuffer(unwrapped_seq_num).get();
  int64_t* last_continuous_unwrapped_seq_num =
      GetContinuousSequence(last_continuous_in_sequence_, unwrapped_seq_num);
  // not continuous or the beginning of a new coded video sequence.
  if (last_continuous_unwrapped_seq_num == nullptr) {
    // if (packet_slotted->FuAStart()) {
    //   return result;
    // }
    last_continuous_in_sequence_[last_continuous_in_sequence_index_] =
        unwrapped_seq_num;
    last_continuous_unwrapped_seq_num =
        &last_continuous_in_sequence_[last_continuous_in_sequence_index_];
    last_continuous_in_sequence_index_ =
        (last_continuous_in_sequence_index_ + 1) %
        last_continuous_in_sequence_.size();
  }
  for (int64_t seq_num = unwrapped_seq_num;
       seq_num < unwrapped_seq_num + MAX_PACKET_BUFFER_SIZE;) {
    // Packets that were never assembled into a completed frame will stay in
    // the 'buffer_'. Check that the `packet` sequence number match the expected
    // unwrapped sequence number.
    if (seq_num != Unwrap(packet_slotted->SequenceNumber())) {
      return result;
    }
    *last_continuous_unwrapped_seq_num = seq_num;
    // Last packet of the frame, try to assemble the frame.
    if (packet_slotted->FuAEnd()) {
      uint32_t rtp_timestamp = packet_slotted->Timestamp();
      // Iterate backwards to find where the frame starts.
      for (int64_t seq_num_start = seq_num;
           seq_num_start > seq_num - MAX_PACKET_BUFFER_SIZE; --seq_num_start) {
        auto& prev_packet = GetPacketFromBuffer(seq_num_start - 1);
        if (prev_packet == nullptr ||
            prev_packet->Timestamp() != rtp_timestamp) {
          const auto& current_packet = GetPacketFromBuffer(seq_num_start);
          if (!current_packet->FuAStart()) {
            // First packet of the frame is missing.
            return result;
          }
          for (int64_t seq_num = seq_num_start; seq_num <= seq_num; ++seq_num) {
            auto& packet = GetPacketFromBuffer(seq_num);
            result.push_back(std::move(packet));
          }
          break;
        }
      }
    }
    seq_num++;
    packet_slotted = GetPacketFromBuffer(seq_num).get();
    if (packet_slotted == nullptr) {
      return result;
    }
  }
  return result;
 }
--- a/src/media/video/assemble_frame/h264_frame_assember.h
+++ b/src/media/video/assemble_frame/h264_frame_assember.h
@@ -0,0 +1,42 @@
 /*
 * @Author: DI JUNKUN
 * @Date: 2025-03-26
 * Copyright (c) 2025 by DI JUNKUN, All Rights Reserved.
 */
 #ifndef _H264_FRAME_ASSEMBER_H_
 #define _H264_FRAME_ASSEMBER_H_
 #include <array>
 #include "rtp_packet_h264.h"
 #define MAX_PACKET_BUFFER_SIZE 2048
 #define MAX_TRACKED_SEQUENCE_SIZE 5
 class H264FrameAssembler {
 public:
  H264FrameAssembler();
  ~H264FrameAssembler();
 public:
  std::vector<std::unique_ptr<RtpPacketH264>>& InsertPacket(
      std::unique_ptr<RtpPacketH264> rtp_packet);
 private:
  int64_t Unwrap(uint16_t seq_num);
  std::unique_ptr<RtpPacketH264>& GetPacketFromBuffer(
      int64_t unwrapped_seq_num);
  std::vector<std::unique_ptr<RtpPacketH264>> FindFrames(
      int64_t unwrapped_seq_num);
 private:
  std::array<std::unique_ptr<RtpPacketH264>, MAX_PACKET_BUFFER_SIZE>
      packet_buffer_;
  std::array<int64_t, MAX_TRACKED_SEQUENCE_SIZE> last_continuous_in_sequence_;
  int64_t last_continuous_in_sequence_index_ = 0;
 };
 #endif
--- a/src/rtp/rtp_packet/rtp_packet.h
+++ b/src/rtp/rtp_packet/rtp_packet.h
@@ -248,7 +248,7 @@ class RtpPacket {
    return rtp::PAYLOAD_TYPE(payload_type_);
  }
  uint16_t SequenceNumber() const { return sequence_number_; }
-  uint64_t Timestamp() const { return timestamp_; }
+  uint32_t Timestamp() const { return timestamp_; }
  uint32_t Ssrc() const { return ssrc_; }
  std::vector<uint32_t> Csrcs() const { return csrcs_; };
  uint16_t ExtensionProfile() const { return extension_profile_; }
--- a/src/rtp/rtp_packet/rtp_packet.x
+++ b/src/rtp/rtp_packet/rtp_packet.x
@@ -329,7 +329,7 @@ class RtpPacket {
    // ParseRtpData();
    return sequence_number_;
  }
-  uint64_t Timestamp() const {
+  uint32_t Timestamp() const {
    // ParseRtpData();
    return timestamp_;
  }
--- a/src/transport/channel/rtp_video_receiver.cpp
+++ b/src/transport/channel/rtp_video_receiver.cpp
@@ -195,6 +195,27 @@ void RtpVideoReceiver::InsertRtpPacket(RtpPacket& rtp_packet) {
    ProcessAv1RtpPacket(rtp_packet_av1);
  } else if (rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::H264 ||
             rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::H264 - 1) {
    // std::unique_ptr<RtpPacketH264> rtp_packet_h264 =
    //     std::make_unique<RtpPacketH264>();
    // if (rtp_packet_h264->Build(rtp_packet.Buffer().data(),
    // rtp_packet.Size())) {
    //   std::vector<std::unique_ptr<RtpPacketH264>> complete_frame = std::move(
    //       h264_frame_assembler_.InsertPacket(std::move(rtp_packet_h264)));
    //   if (!complete_frame.empty()) {
    //     for (auto& frame : complete_frame) {
    //       ReceivedFrame received_frame(frame->Payload(),
    //       frame->PayloadSize());
    //       received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
    //       received_frame.SetCapturedTimestamp(
    //           (static_cast<int64_t>(frame->Timestamp()) /
    //                rtp::kMsToRtpTimestamp -
    //            delta_ntp_internal_ms_) *
    //           1000);
    //       compelete_video_frame_queue_.push(received_frame);
    //     }
    //   }
    // }
    RtpPacketH264 rtp_packet_h264;
    if (rtp_packet_h264.Build(rtp_packet.Buffer().data(), rtp_packet.Size())) {
      rtp_packet_h264.GetFrameHeaderInfo();
--- a/src/transport/channel/rtp_video_receiver.h
+++ b/src/transport/channel/rtp_video_receiver.h
@@ -11,6 +11,7 @@
 #include "api/clock/clock.h"
 #include "clock/system_clock.h"
 #include "fec_decoder.h"
 #include "h264_frame_assember.h"
 #include "io_statistics.h"
 #include "nack_requester.h"
 #include "receive_side_congestion_controller.h"
@@ -125,6 +126,7 @@ class RtpVideoReceiver : public ThreadBase,
      missing_sequence_numbers_;
  std::unordered_map<uint64_t, uint16_t> fua_end_sequence_numbers_;
  std::unordered_map<uint64_t, int64_t> missing_sequence_numbers_wait_time_;
  H264FrameAssembler h264_frame_assembler_;
 private:
  std::thread rtcp_thread_;
--- a/xmake.lua
+++ b/xmake.lua
@@ -149,7 +149,7 @@ target("transport")
 target("media")
    set_kind("object")
-    add_deps("log", "frame", "common")
+    add_deps("log", "frame", "common", "rtp")
    if is_os("windows") then
        add_files("src/media/video/encode/*.cpp",
        "src/media/video/decode/*.cpp",
@@ -214,10 +214,12 @@ target("media")
    end
    add_files("src/media/audio/encode/*.cpp",
        "src/media/audio/decode/*.cpp",
-        "src/media/resolution_adapter/*.cpp")
+        "src/media/resolution_adapter/*.cpp",
        "src/media/video/assemble_frame/*.cpp")
    add_includedirs("src/media/audio/encode",
        "src/media/audio/decode",
        "src/media/resolution_adapter",
        "src/media/video/assemble_frame",
        "src/interface", {public = true})
 target("pc")