From f361347795e6d8f255826d3f67251f08c63341cf Mon Sep 17 00:00:00 2001 From: dijunkun Date: Tue, 1 Apr 2025 18:12:15 +0800 Subject: [PATCH] [feat] h264 frame assember refactoring --- .../assemble_frame/h264_frame_assember.cpp | 1 + src/rtp/rtp_packet/rtp_packet_h264.cpp | 24 +- src/rtp/rtp_packet/rtp_packet_h264.h | 3 + src/rtp/rtp_packet/rtp_packet_to_send.cpp | 4 +- src/rtp/rtp_packet/rtx_packet.h | 2 +- src/transport/channel/rtp_video_receiver.cpp | 298 ++++++++++-------- src/transport/channel/rtp_video_receiver.h | 5 +- src/transport/channel/video_channel_send.cpp | 1 - 8 files changed, 186 insertions(+), 152 deletions(-) diff --git a/src/media/video/assemble_frame/h264_frame_assember.cpp b/src/media/video/assemble_frame/h264_frame_assember.cpp index 9de02a7..323114b 100644 --- a/src/media/video/assemble_frame/h264_frame_assember.cpp +++ b/src/media/video/assemble_frame/h264_frame_assember.cpp @@ -109,6 +109,7 @@ std::vector> H264FrameAssembler::FindFrames( for (int64_t seq = seq_num_start; seq <= seq_num; ++seq) { auto& packet = GetPacketFromBuffer(seq); + LOG_INFO("2 seq:{}", seq); result.push_back(std::move(packet)); } break; diff --git a/src/rtp/rtp_packet/rtp_packet_h264.cpp b/src/rtp/rtp_packet/rtp_packet_h264.cpp index bca37c1..3bdcefd 100644 --- a/src/rtp/rtp_packet/rtp_packet_h264.cpp +++ b/src/rtp/rtp_packet/rtp_packet_h264.cpp @@ -10,19 +10,25 @@ bool RtpPacketH264::GetFrameHeaderInfo() { } const uint8_t* frame_buffer = Payload(); + size_t offset = 0; - fu_indicator_.forbidden_bit = (frame_buffer[0] >> 7) & 0x01; - fu_indicator_.nal_reference_idc = (frame_buffer[0] >> 5) & 0x03; - fu_indicator_.nal_unit_type = frame_buffer[0] & 0x1F; + if (rtp::PAYLOAD_TYPE::RTX == PayloadType()) { + osn_ = frame_buffer[0] << 8 | frame_buffer[0 + 1]; + offset = 2; + } + + fu_indicator_.forbidden_bit = (frame_buffer[0 + offset] >> 7) & 0x01; + fu_indicator_.nal_reference_idc = (frame_buffer[0 + offset] >> 5) & 0x03; + fu_indicator_.nal_unit_type = frame_buffer[0 + offset] & 0x1F; if (rtp::NAL_UNIT_TYPE::NALU == fu_indicator_.nal_unit_type) { - add_offset_to_payload(1); + add_offset_to_payload(1 + offset); } else if (rtp::NAL_UNIT_TYPE::FU_A == fu_indicator_.nal_unit_type) { - fu_header_.start = (frame_buffer[1] >> 7) & 0x01; - fu_header_.end = (frame_buffer[1] >> 6) & 0x01; - fu_header_.remain_bit = (frame_buffer[1] >> 5) & 0x01; - fu_header_.nal_unit_type = frame_buffer[1] & 0x1F; - add_offset_to_payload(2); + fu_header_.start = (frame_buffer[1 + offset] >> 7) & 0x01; + fu_header_.end = (frame_buffer[1 + offset] >> 6) & 0x01; + fu_header_.remain_bit = (frame_buffer[1 + offset] >> 5) & 0x01; + fu_header_.nal_unit_type = frame_buffer[1 + offset] & 0x1F; + add_offset_to_payload(2 + offset); } fu_info_got_ = true; diff --git a/src/rtp/rtp_packet/rtp_packet_h264.h b/src/rtp/rtp_packet/rtp_packet_h264.h index f9890e9..2e3ec94 100644 --- a/src/rtp/rtp_packet/rtp_packet_h264.h +++ b/src/rtp/rtp_packet/rtp_packet_h264.h @@ -23,7 +23,10 @@ class RtpPacketH264 : public RtpPacket { bool FuAStart() { return fu_header_.start; } bool FuAEnd() { return fu_header_.end; } + uint16_t GetOsn() { return osn_; } + private: + uint16_t osn_; rtp::FU_INDICATOR fu_indicator_; rtp::FU_HEADER fu_header_; bool fu_info_got_ = false; diff --git a/src/rtp/rtp_packet/rtp_packet_to_send.cpp b/src/rtp/rtp_packet/rtp_packet_to_send.cpp index fb1f166..80f4521 100644 --- a/src/rtp/rtp_packet/rtp_packet_to_send.cpp +++ b/src/rtp/rtp_packet/rtp_packet_to_send.cpp @@ -77,9 +77,7 @@ bool RtpPacketToSend::BuildRtxPacket() { uint32_t ssrc = Ssrc(); std::vector csrcs = Csrcs(); - uint32_t timestamp = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + uint32_t timestamp = Timestamp(); if (!csrc_count) { } diff --git a/src/rtp/rtp_packet/rtx_packet.h b/src/rtp/rtp_packet/rtx_packet.h index 690d385..0920775 100644 --- a/src/rtp/rtp_packet/rtx_packet.h +++ b/src/rtp/rtp_packet/rtx_packet.h @@ -19,7 +19,7 @@ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | // | Original RTP Packet Payload | // | | -//+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ class RtxPacket { public: diff --git a/src/transport/channel/rtp_video_receiver.cpp b/src/transport/channel/rtp_video_receiver.cpp index a3f61ed..ca3daff 100644 --- a/src/transport/channel/rtp_video_receiver.cpp +++ b/src/transport/channel/rtp_video_receiver.cpp @@ -196,128 +196,103 @@ void RtpVideoReceiver::InsertRtpPacket(RtpPacket& rtp_packet) { } else if (rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::H264 || rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::H264 - 1 || rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::RTX) { - // RtpPacketH264 rtp_packet_h264; - // if (rtp_packet_h264.Build(rtp_packet.Buffer().data(), rtp_packet.Size())) - // { - // rtp_packet_h264.GetFrameHeaderInfo(); - - // ProcessH264RtpPacket(rtp_packet_h264); - // if (rtp_packet.PayloadType() != rtp::PAYLOAD_TYPE::RTX) { - // receive_side_congestion_controller_.OnReceivedPacket( - // rtp_packet_received, MediaType::VIDEO); - // nack_->OnReceivedPacket(rtp_packet.SequenceNumber(), true); - // } else { - // nack_->OnReceivedPacket(rtp_packet.SequenceNumber(), false); - // } - // } - - std::unique_ptr rtp_packet_h264 = - std::make_unique(); - if (rtp_packet.Buffer().data() != nullptr && rtp_packet.Size() > 0 && - rtp_packet_h264->Build(rtp_packet.Buffer().data(), rtp_packet.Size())) { - rtp_packet_h264->GetFrameHeaderInfo(); - - if (rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::RTX) { + RtpPacketH264 rtp_packet_h264; + if (rtp_packet_h264.Build(rtp_packet.Buffer().data(), rtp_packet.Size())) { + rtp_packet_h264.GetFrameHeaderInfo(); + if (rtp_packet.PayloadType() != rtp::PAYLOAD_TYPE::RTX) { receive_side_congestion_controller_.OnReceivedPacket( rtp_packet_received, MediaType::VIDEO); - - size_t osn_offset = rtp_packet.HeaderSize(); - uint16_t osn = rtp_packet.Buffer().data()[osn_offset] << 8 | - rtp_packet.Buffer().data()[osn_offset + 1]; - nack_->OnReceivedPacket(osn, true); - } else { nack_->OnReceivedPacket(rtp_packet.SequenceNumber(), false); - } - - rtp::NAL_UNIT_TYPE nalu_type = rtp_packet_h264->NalUnitType(); - if (rtp::NAL_UNIT_TYPE::NALU == nalu_type) { - ReceivedFrame received_frame(rtp_packet_h264->Payload(), - rtp_packet_h264->PayloadSize()); - received_frame.SetReceivedTimestamp(clock_->CurrentTime().us()); - received_frame.SetCapturedTimestamp( - (static_cast(rtp_packet_h264->Timestamp()) / - rtp::kMsToRtpTimestamp - - delta_ntp_internal_ms_) * - 1000); - compelete_video_frame_queue_.push(received_frame); - } else if (rtp::NAL_UNIT_TYPE::FU_A == nalu_type) { - std::vector> complete_frame = - h264_frame_assembler_.InsertPacket(std::move(rtp_packet_h264)); - if (!complete_frame.empty()) { - uint8_t* nv12_data_ = new uint8_t[NV12_BUFFER_SIZE]; - uint8_t* dest = nv12_data_; - size_t complete_frame_size = 0; - for (auto& frame : complete_frame) { - memcpy(dest, frame->Payload(), frame->PayloadSize()); - dest += frame->PayloadSize(); - complete_frame_size += frame->PayloadSize(); - } - - ReceivedFrame received_frame(nv12_data_, complete_frame_size); - received_frame.SetReceivedTimestamp(clock_->CurrentTime().us()); - received_frame.SetCapturedTimestamp( - (static_cast(complete_frame[0]->Timestamp()) / - rtp::kMsToRtpTimestamp - - delta_ntp_internal_ms_) * - 1000); - compelete_video_frame_queue_.push(received_frame); - - delete[] nv12_data_; - } + } else { + nack_->OnReceivedPacket(rtp_packet_h264.GetOsn(), true); } } + ProcessH264RtpPacket(rtp_packet_h264); + + // std::unique_ptr rtp_packet_h264 = + // std::make_unique(); + // if (rtp_packet.Buffer().data() != nullptr && rtp_packet.Size() > 0 && + // rtp_packet_h264->Build(rtp_packet.Buffer().data(), + // rtp_packet.Size())) { + // rtp_packet_h264->GetFrameHeaderInfo(); + + // if (rtp_packet.PayloadType() == rtp::PAYLOAD_TYPE::RTX) { + // size_t osn_offset = rtp_packet.HeaderSize(); + // uint16_t osn = rtp_packet.Buffer().data()[osn_offset] << 8 | + // rtp_packet.Buffer().data()[osn_offset + 1]; + // nack_->OnReceivedPacket(osn, true); + // } else { + // receive_side_congestion_controller_.OnReceivedPacket( + // rtp_packet_received, MediaType::VIDEO); + // nack_->OnReceivedPacket(rtp_packet.SequenceNumber(), false); + // } + + // rtp::NAL_UNIT_TYPE nalu_type = rtp_packet_h264->NalUnitType(); + // if (rtp::NAL_UNIT_TYPE::NALU == nalu_type) { + // ReceivedFrame received_frame(rtp_packet_h264->Payload(), + // rtp_packet_h264->PayloadSize()); + // received_frame.SetReceivedTimestamp(clock_->CurrentTime().us()); + // received_frame.SetCapturedTimestamp( + // (static_cast(rtp_packet_h264->Timestamp()) / + // rtp::kMsToRtpTimestamp - + // delta_ntp_internal_ms_) * + // 1000); + // compelete_video_frame_queue_.push(received_frame); + // } else if (rtp::NAL_UNIT_TYPE::FU_A == nalu_type) { + // std::vector> complete_frame = + // h264_frame_assembler_.InsertPacket(std::move(rtp_packet_h264)); + // if (!complete_frame.empty()) { + // uint8_t* nv12_data_ = new uint8_t[NV12_BUFFER_SIZE]; + // uint8_t* dest = nv12_data_; + // size_t complete_frame_size = 0; + // for (auto& frame : complete_frame) { + // memcpy(dest, frame->Payload(), frame->PayloadSize()); + // dest += frame->PayloadSize(); + // complete_frame_size += frame->PayloadSize(); + // } + + // ReceivedFrame received_frame(nv12_data_, complete_frame_size); + // received_frame.SetReceivedTimestamp(clock_->CurrentTime().us()); + // received_frame.SetCapturedTimestamp( + // (static_cast(complete_frame[0]->Timestamp()) / + // rtp::kMsToRtpTimestamp - + // delta_ntp_internal_ms_) * + // 1000); + // compelete_video_frame_queue_.push(received_frame); + + // delete[] nv12_data_; + // } + // } + // } } } -bool RtpVideoReceiver::ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264) { - bool is_missing_packet = false; - - if (rtp_packet_h264.PayloadType() == rtp::PAYLOAD_TYPE::RTX) { - is_missing_packet = true; - } - +void RtpVideoReceiver::ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264) { if (!fec_enable_) { - if (rtp::PAYLOAD_TYPE::H264 == rtp_packet_h264.PayloadType()) { - rtp::NAL_UNIT_TYPE nalu_type = rtp_packet_h264.NalUnitType(); - if (rtp::NAL_UNIT_TYPE::NALU == nalu_type) { - ReceivedFrame received_frame(rtp_packet_h264.Payload(), - rtp_packet_h264.PayloadSize()); - received_frame.SetReceivedTimestamp(clock_->CurrentTime().us()); - received_frame.SetCapturedTimestamp( - (static_cast(rtp_packet_h264.Timestamp()) / - rtp::kMsToRtpTimestamp - - delta_ntp_internal_ms_) * - 1000); - compelete_video_frame_queue_.push(received_frame); - } else if (rtp::NAL_UNIT_TYPE::FU_A == nalu_type) { + rtp::NAL_UNIT_TYPE nalu_type = rtp_packet_h264.NalUnitType(); + if (rtp::NAL_UNIT_TYPE::NALU == nalu_type) { + ReceivedFrame received_frame(rtp_packet_h264.Payload(), + rtp_packet_h264.PayloadSize()); + received_frame.SetReceivedTimestamp(clock_->CurrentTime().us()); + received_frame.SetCapturedTimestamp( + (static_cast(rtp_packet_h264.Timestamp()) / + rtp::kMsToRtpTimestamp - + delta_ntp_internal_ms_) * + 1000); + compelete_video_frame_queue_.push(received_frame); + } else if (rtp::NAL_UNIT_TYPE::FU_A == nalu_type) { + if (rtp::PAYLOAD_TYPE::H264 == rtp_packet_h264.PayloadType()) { incomplete_h264_frame_list_[rtp_packet_h264.SequenceNumber()] = rtp_packet_h264; - if (rtp_packet_h264.FuAEnd()) { - CheckIsH264FrameCompletedFuaEndReceived(rtp_packet_h264); - } else { - auto missing_seqs_iter = - missing_sequence_numbers_.find(rtp_packet_h264.Timestamp()); - auto missing_seqs_wait_ts_iter = - missing_sequence_numbers_wait_time_.find( - rtp_packet_h264.Timestamp()); - if (missing_seqs_wait_ts_iter != - missing_sequence_numbers_wait_time_.end()) { - if (clock_->CurrentTime().ms() - - missing_seqs_wait_ts_iter->second <= - MAX_WAIT_TIME_MS) { - CheckIsH264FrameCompletedMissSeqReceived(rtp_packet_h264); - is_missing_packet = true; - - } else { - missing_sequence_numbers_wait_time_.erase( - missing_seqs_wait_ts_iter); - missing_sequence_numbers_.erase(missing_seqs_iter); - } - } - } + CheckIsH264FrameCompleted(rtp_packet_h264, rtp_packet_h264.FuAStart(), + rtp_packet_h264.FuAEnd(), false); + } else if (rtp::PAYLOAD_TYPE::RTX == rtp_packet_h264.PayloadType()) { + incomplete_h264_frame_list_[rtp_packet_h264.GetOsn()] = rtp_packet_h264; + CheckIsH264FrameCompleted(rtp_packet_h264, rtp_packet_h264.FuAStart(), + rtp_packet_h264.FuAEnd(), true); + } else if (rtp::PAYLOAD_TYPE::H264 - 1 == rtp_packet_h264.PayloadType()) { + padding_sequence_numbers_.insert(rtp_packet_h264.SequenceNumber()); } - } else if (rtp::PAYLOAD_TYPE::H264 - 1 == rtp_packet_h264.PayloadType()) { - padding_sequence_numbers_.insert(rtp_packet_h264.SequenceNumber()); } } // else { @@ -429,8 +404,6 @@ bool RtpVideoReceiver::ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264) { // } // } // } - - return is_missing_packet; } void RtpVideoReceiver::ProcessAv1RtpPacket(RtpPacketAv1& rtp_packet_av1) { @@ -454,6 +427,69 @@ void RtpVideoReceiver::ProcessAv1RtpPacket(RtpPacketAv1& rtp_packet_av1) { // } } +bool RtpVideoReceiver::CheckIsH264FrameCompleted(RtpPacketH264& rtp_packet_h264, + bool is_start, bool is_end, + bool is_rtx) { + uint32_t timestamp = rtp_packet_h264.Timestamp(); + uint16_t seq, start_seq, end_seq; + + if (is_rtx) { + seq = rtp_packet_h264.GetOsn(); + } else { + seq = rtp_packet_h264.SequenceNumber(); + } + + if (is_start) { + fua_start_sequence_numbers_[timestamp] = seq; + } + + if (is_end) { + fua_end_sequence_numbers_[timestamp] = seq; + if (missing_sequence_numbers_wait_time_.find(timestamp) == + missing_sequence_numbers_wait_time_.end()) { + missing_sequence_numbers_wait_time_[timestamp] = + clock_->CurrentTime().ms(); + } + } + + if (fua_end_sequence_numbers_.find(timestamp) == + fua_end_sequence_numbers_.end()) { + return false; + } + end_seq = fua_end_sequence_numbers_[timestamp]; + + if (fua_start_sequence_numbers_.find(timestamp) == + fua_start_sequence_numbers_.end()) { + return false; + } + start_seq = fua_start_sequence_numbers_[timestamp]; + + if (is_rtx && fua_end_sequence_numbers_.find(timestamp) != + fua_end_sequence_numbers_.end()) { + auto missing_seqs_wait_ts_iter = + missing_sequence_numbers_wait_time_.find(timestamp); + if (missing_seqs_wait_ts_iter != + missing_sequence_numbers_wait_time_.end()) { + if (clock_->CurrentTime().ms() - missing_seqs_wait_ts_iter->second > + MAX_WAIT_TIME_MS) { + missing_sequence_numbers_wait_time_.erase(missing_seqs_wait_ts_iter); + LOG_WARN("rtx packet seq {} is timeout", seq); + return false; + } + } + } + + for (uint16_t sequence_number = start_seq; sequence_number <= end_seq; + ++sequence_number) { + if (incomplete_h264_frame_list_.find(sequence_number) == + incomplete_h264_frame_list_.end()) { + return false; + } + } + + return PopCompleteFrame(start_seq, end_seq, timestamp); +} + bool RtpVideoReceiver::CheckIsH264FrameCompletedFuaEndReceived( RtpPacketH264& rtp_packet_h264) { uint32_t timestamp = rtp_packet_h264.Timestamp(); @@ -462,16 +498,14 @@ bool RtpVideoReceiver::CheckIsH264FrameCompletedFuaEndReceived( uint16_t start_seq = 0; bool has_start = false; bool has_missing = false; - missing_sequence_numbers_wait_time_[timestamp] = clock_->CurrentTime().ms(); + if (missing_sequence_numbers_wait_time_.find(timestamp) == + missing_sequence_numbers_wait_time_.end()) { + missing_sequence_numbers_wait_time_[timestamp] = clock_->CurrentTime().ms(); + } for (uint16_t seq = end_seq; seq > 0; --seq) { auto it = incomplete_h264_frame_list_.find(seq); - if (it == incomplete_h264_frame_list_.end()) { - if (padding_sequence_numbers_.find(seq) == - padding_sequence_numbers_.end()) { - missing_sequence_numbers_[timestamp].insert(seq); - } - } else if (it->second.FuAStart()) { + if (it->second.FuAStart()) { start_seq = seq; has_start = true; break; @@ -482,13 +516,6 @@ bool RtpVideoReceiver::CheckIsH264FrameCompletedFuaEndReceived( return false; } - if (missing_sequence_numbers_.find(timestamp) != - missing_sequence_numbers_.end()) { - if (!missing_sequence_numbers_[timestamp].empty()) { - return false; - } - } - return PopCompleteFrame(start_seq, end_seq, timestamp); } @@ -523,19 +550,16 @@ bool RtpVideoReceiver::CheckIsH264FrameCompletedMissSeqReceived( return false; } - if (missing_sequence_numbers_.find(timestamp) != - missing_sequence_numbers_.end() && - missing_sequence_numbers_wait_time_.find(timestamp) != - missing_sequence_numbers_wait_time_.end()) { - if (!missing_sequence_numbers_[timestamp].empty()) { - int64_t wait_time = clock_->CurrentTime().us() - - missing_sequence_numbers_wait_time_[timestamp]; - if (wait_time < MAX_WAIT_TIME_MS) { - return false; - } + if (missing_sequence_numbers_wait_time_.find(timestamp) != + missing_sequence_numbers_wait_time_.end()) { + int64_t wait_time = clock_->CurrentTime().us() - + missing_sequence_numbers_wait_time_[timestamp]; + if (wait_time < MAX_WAIT_TIME_MS) { + return false; } } + LOG_WARN("complete frame"); return PopCompleteFrame(start_seq, end_seq, timestamp); } @@ -582,9 +606,9 @@ bool RtpVideoReceiver::PopCompleteFrame(uint16_t start_seq, uint16_t end_seq, delta_ntp_internal_ms_) * 1000); + fua_start_sequence_numbers_.erase(timestamp); fua_end_sequence_numbers_.erase(timestamp); missing_sequence_numbers_wait_time_.erase(timestamp); - missing_sequence_numbers_.erase(timestamp); compelete_video_frame_queue_.push(received_frame); return true; diff --git a/src/transport/channel/rtp_video_receiver.h b/src/transport/channel/rtp_video_receiver.h index fad14e5..bbda3f3 100644 --- a/src/transport/channel/rtp_video_receiver.h +++ b/src/transport/channel/rtp_video_receiver.h @@ -58,7 +58,9 @@ class RtpVideoReceiver : public ThreadBase, bool CheckIsAv1FrameCompleted(RtpPacketAv1& rtp_packet_av1); private: - bool ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264); + void ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264); + bool CheckIsH264FrameCompleted(RtpPacketH264& rtp_packet_h264, bool is_start, + bool is_end, bool is_rtx); bool CheckIsH264FrameCompletedFuaEndReceived(RtpPacketH264& rtp_packet_h264); bool CheckIsH264FrameCompletedMissSeqReceived(RtpPacketH264& rtp_packet_h264); bool PopCompleteFrame(uint16_t start_seq, uint16_t end_seq, @@ -125,6 +127,7 @@ class RtpVideoReceiver : public ThreadBase, std::unordered_map> missing_sequence_numbers_; std::unordered_map fua_end_sequence_numbers_; + std::unordered_map fua_start_sequence_numbers_; std::unordered_map missing_sequence_numbers_wait_time_; H264FrameAssembler h264_frame_assembler_; diff --git a/src/transport/channel/video_channel_send.cpp b/src/transport/channel/video_channel_send.cpp index 189c5bd..9c0a05a 100644 --- a/src/transport/channel/video_channel_send.cpp +++ b/src/transport/channel/video_channel_send.cpp @@ -102,7 +102,6 @@ int VideoChannelSend::SendVideo(const EncodedFrame& encoded_frame) { fwrite((unsigned char*)encoded_frame.Buffer(), 1, encoded_frame.Size(), file_rtp_sent_); #endif - packet_sender_->EnqueueRtpPackets(std::move(rtp_packets), rtp_timestamp); }