[feat] add new classes EncodedFrame/DecodedFrame/ReceivedFrame for video frame module

This commit is contained in:
dijunkun
2025-03-19 18:36:55 +08:00
parent 1cd9ea1b0e
commit b50175f943
49 changed files with 485 additions and 203 deletions

View File

@@ -29,6 +29,8 @@ RtpVideoReceiver::RtpVideoReceiver(std::shared_ptr<SystemClock> clock)
},
1200)),
nack_(std::make_unique<NackRequester>(clock_, this, this)),
delta_ntp_internal_ms_(clock->CurrentNtpInMilliseconds() -
clock->CurrentTimeMs()),
clock_(webrtc::Clock::GetWebrtcClockShared(clock)) {
SetPeriod(std::chrono::milliseconds(5));
rtcp_thread_ = std::thread(&RtpVideoReceiver::RtcpThread, this);
@@ -217,8 +219,15 @@ void RtpVideoReceiver::ProcessH264RtpPacket(RtpPacketH264& rtp_packet_h264) {
if (rtp::PAYLOAD_TYPE::H264 == rtp_packet_h264.PayloadType()) {
rtp::NAL_UNIT_TYPE nalu_type = rtp_packet_h264.NalUnitType();
if (rtp::NAL_UNIT_TYPE::NALU == nalu_type) {
compelete_video_frame_queue_.push(VideoFrame(
rtp_packet_h264.Payload(), rtp_packet_h264.PayloadSize()));
ReceivedFrame received_frame(rtp_packet_h264.Payload(),
rtp_packet_h264.PayloadSize());
received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
received_frame.SetCapturedTimestamp(
(static_cast<int64_t>(rtp_packet_h264.Timestamp()) /
rtp::kMsToRtpTimestamp -
delta_ntp_internal_ms_) *
1000);
compelete_video_frame_queue_.push(received_frame);
} else if (rtp::NAL_UNIT_TYPE::FU_A == nalu_type) {
incomplete_h264_frame_list_[rtp_packet_h264.SequenceNumber()] =
rtp_packet_h264;
@@ -409,8 +418,15 @@ bool RtpVideoReceiver::CheckIsH264FrameCompleted(
incomplete_h264_frame_list_.erase(seq);
frame_fragment_count++;
}
compelete_video_frame_queue_.push(
VideoFrame(nv12_data_, complete_frame_size));
ReceivedFrame received_frame(nv12_data_, complete_frame_size);
received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
received_frame.SetCapturedTimestamp(
(static_cast<int64_t>(rtp_packet_h264.Timestamp()) /
rtp::kMsToRtpTimestamp -
delta_ntp_internal_ms_) *
1000);
compelete_video_frame_queue_.push(received_frame);
return true;
} else {
@@ -461,8 +477,14 @@ bool RtpVideoReceiver::CheckIsAv1FrameCompleted(RtpPacketAv1& rtp_packet_av1) {
incomplete_av1_frame_list_.erase(start);
}
compelete_video_frame_queue_.push(
VideoFrame(nv12_data_, complete_frame_size));
ReceivedFrame received_frame(nv12_data_, complete_frame_size);
received_frame.SetReceivedTimestamp(clock_->CurrentTime().us());
received_frame.SetCapturedTimestamp(
(static_cast<int64_t>(rtp_packet_av1.Timestamp()) /
rtp::kMsToRtpTimestamp -
delta_ntp_internal_ms_) *
1000);
compelete_video_frame_queue_.push(received_frame);
return true;
}
@@ -546,14 +568,15 @@ bool RtpVideoReceiver::CheckIsTimeSendRR() {
bool RtpVideoReceiver::Process() {
if (!compelete_video_frame_queue_.isEmpty()) {
std::optional<VideoFrame> video_frame = compelete_video_frame_queue_.pop();
std::optional<ReceivedFrame> video_frame =
compelete_video_frame_queue_.pop();
if (on_receive_complete_frame_ && video_frame) {
// auto now_complete_frame_ts =
// std::chrono::duration_cast<std::chrono::milliseconds>(
// std::chrono::system_clock::now().time_since_epoch())
// .count();
// uint32_t duration = now_complete_frame_ts - last_complete_frame_ts_;
// LOG_ERROR("Duration {}", duration);
// uint32_t duration = now_complete_frame_ts -
// last_complete_frame_ts_; LOG_ERROR("Duration {}", duration);
// last_complete_frame_ts_ = now_complete_frame_ts;
on_receive_complete_frame_(*video_frame);
@@ -577,13 +600,13 @@ void RtpVideoReceiver::ReviseFrequencyAndJitter(int payload_type_frequency) {
if (last_payload_type_frequency_ != 0) {
// Value in "jitter_q4_" variable is a number of samples.
// I.e. jitter = timestamp (s) * frequency (Hz).
// Since the frequency has changed we have to update the number of samples
// accordingly. The new value should rely on a new frequency.
// Since the frequency has changed we have to update the number of
// samples accordingly. The new value should rely on a new frequency.
// If we don't do such procedure we end up with the number of samples that
// cannot be converted into TimeDelta correctly
// (i.e. jitter = jitter_q4_ >> 4 / payload_type_frequency).
// In such case, the number of samples has a "mix".
// If we don't do such procedure we end up with the number of samples
// that cannot be converted into TimeDelta correctly (i.e. jitter =
// jitter_q4_ >> 4 / payload_type_frequency). In such case, the number
// of samples has a "mix".
// Doing so we pretend that everything prior and including the current
// packet were computed on packet's frequency.

View File

@@ -13,6 +13,7 @@
#include "io_statistics.h"
#include "nack_requester.h"
#include "receive_side_congestion_controller.h"
#include "received_frame.h"
#include "receiver_report.h"
#include "ringbuffer.h"
#include "rtcp_sender.h"
@@ -22,7 +23,6 @@
#include "rtp_statistics.h"
#include "sender_report.h"
#include "thread_base.h"
#include "video_frame.h"
using namespace webrtc;
@@ -42,7 +42,7 @@ class RtpVideoReceiver : public ThreadBase,
void SetSendDataFunc(std::function<int(const char*, size_t)> data_send_func);
void SetOnReceiveCompleteFrame(
std::function<void(VideoFrame&)> on_receive_complete_frame) {
std::function<void(const ReceivedFrame&)> on_receive_complete_frame) {
on_receive_complete_frame_ = on_receive_complete_frame;
}
uint32_t GetSsrc() { return ssrc_; }
@@ -89,9 +89,10 @@ class RtpVideoReceiver : public ThreadBase,
std::map<uint16_t, RtpPacketAv1> incomplete_av1_frame_list_;
std::map<uint16_t, RtpPacket> incomplete_frame_list_;
uint8_t* nv12_data_ = nullptr;
std::function<void(VideoFrame&)> on_receive_complete_frame_ = nullptr;
std::function<void(const ReceivedFrame&)> on_receive_complete_frame_ =
nullptr;
uint32_t last_complete_frame_ts_ = 0;
RingBuffer<VideoFrame> compelete_video_frame_queue_;
RingBuffer<ReceivedFrame> compelete_video_frame_queue_;
private:
std::unique_ptr<RtpStatistics> rtp_statistics_ = nullptr;
@@ -162,6 +163,7 @@ class RtpVideoReceiver : public ThreadBase,
private:
FILE* file_rtp_recv_ = nullptr;
int64_t delta_ntp_internal_ms_;
};
#endif

View File

@@ -45,7 +45,7 @@ RtpVideoSender::~RtpVideoSender() {
void RtpVideoSender::Enqueue(
std::vector<std::unique_ptr<RtpPacket>>& rtp_packets,
int64_t capture_timestamp_us) {
int64_t captured_timestamp_us) {
if (!rtp_statistics_) {
rtp_statistics_ = std::make_unique<RtpStatistics>();
rtp_statistics_->Start();

View File

@@ -24,7 +24,7 @@ class RtpVideoSender : public ThreadBase {
public:
void Enqueue(std::vector<std::unique_ptr<RtpPacket>> &rtp_packets,
int64_t capture_timestamp_us);
int64_t captured_timestamp_us);
void SetSendDataFunc(std::function<int(const char *, size_t)> data_send_func);
void SetOnSentPacketFunc(
std::function<void(const webrtc::RtpPacketToSend &)> on_sent_packet_func);

View File

@@ -7,7 +7,7 @@ VideoChannelReceive::VideoChannelReceive() {}
VideoChannelReceive::VideoChannelReceive(
std::shared_ptr<SystemClock> clock, std::shared_ptr<IceAgent> ice_agent,
std::shared_ptr<IOStatistics> ice_io_statistics,
std::function<void(VideoFrame &)> on_receive_complete_frame)
std::function<void(const ReceivedFrame &)> on_receive_complete_frame)
: ice_agent_(ice_agent),
ice_io_statistics_(ice_io_statistics),
on_receive_complete_frame_(on_receive_complete_frame),
@@ -19,8 +19,8 @@ void VideoChannelReceive::Initialize(rtp::PAYLOAD_TYPE payload_type) {
rtp_video_receiver_ =
std::make_unique<RtpVideoReceiver>(clock_, ice_io_statistics_);
rtp_video_receiver_->SetOnReceiveCompleteFrame(
[this](VideoFrame &video_frame) -> void {
on_receive_complete_frame_(video_frame);
[this](const ReceivedFrame &received_frame) -> void {
on_receive_complete_frame_(received_frame);
});
rtp_video_receiver_->SetSendDataFunc([this](const char *data,

View File

@@ -17,7 +17,7 @@ class VideoChannelReceive {
VideoChannelReceive(
std::shared_ptr<SystemClock> clock, std::shared_ptr<IceAgent> ice_agent,
std::shared_ptr<IOStatistics> ice_io_statistics,
std::function<void(VideoFrame &)> on_receive_complete_frame);
std::function<void(const ReceivedFrame &)> on_receive_complete_frame);
~VideoChannelReceive();
@@ -51,7 +51,8 @@ class VideoChannelReceive {
std::shared_ptr<IceAgent> ice_agent_ = nullptr;
std::shared_ptr<IOStatistics> ice_io_statistics_ = nullptr;
std::unique_ptr<RtpVideoReceiver> rtp_video_receiver_ = nullptr;
std::function<void(VideoFrame &)> on_receive_complete_frame_ = nullptr;
std::function<void(const ReceivedFrame &)> on_receive_complete_frame_ =
nullptr;
private:
std::shared_ptr<SystemClock> clock_;

View File

@@ -17,6 +17,8 @@ VideoChannelSend::VideoChannelSend(
packet_sender_(packet_sender),
ice_io_statistics_(ice_io_statistics),
on_sent_packet_func_(on_sent_packet_func),
delta_ntp_internal_ms_(clock->CurrentNtpInMilliseconds() -
clock->CurrentTimeMs()),
clock_(clock){};
void VideoChannelSend::Initialize(rtp::PAYLOAD_TYPE payload_type) {
@@ -57,9 +59,9 @@ void VideoChannelSend::SetEnqueuePacketsFunc(
}
std::vector<std::unique_ptr<RtpPacket>> VideoChannelSend::GeneratePadding(
uint32_t payload_size, int64_t capture_timestamp_us) {
uint32_t payload_size, int64_t captured_timestamp_us) {
if (rtp_packetizer_) {
return rtp_packetizer_->BuildPadding(payload_size, capture_timestamp_us,
return rtp_packetizer_->BuildPadding(payload_size, captured_timestamp_us,
true);
}
return std::vector<std::unique_ptr<RtpPacket>>{};
@@ -71,15 +73,16 @@ void VideoChannelSend::Destroy() {
}
}
int VideoChannelSend::SendVideo(
std::shared_ptr<VideoFrameWrapper> encoded_frame) {
int VideoChannelSend::SendVideo(std::shared_ptr<EncodedFrame> encoded_frame) {
if (rtp_video_sender_ && rtp_packetizer_) {
int64_t rtp_timestamp =
delta_ntp_internal_ms_ +
static_cast<uint32_t>(encoded_frame->CapturedTimestamp() / 1000);
std::vector<std::unique_ptr<RtpPacket>> rtp_packets =
rtp_packetizer_->Build((uint8_t*)encoded_frame->Buffer(),
(uint32_t)encoded_frame->Size(),
encoded_frame->CaptureTimestamp(), true);
packet_sender_->EnqueueRtpPacket(std::move(rtp_packets),
encoded_frame->CaptureTimestamp());
(uint32_t)encoded_frame->Size(), rtp_timestamp,
true);
packet_sender_->EnqueueRtpPacket(std::move(rtp_packets), rtp_timestamp);
}
return 0;

View File

@@ -12,12 +12,12 @@
#include "clock/system_clock.h"
#include "congestion_control.h"
#include "congestion_control_feedback.h"
#include "encoded_frame.h"
#include "ice_agent.h"
#include "packet_sender.h"
#include "rtp_packetizer.h"
#include "rtp_video_sender.h"
#include "transport_feedback_adapter.h"
#include "video_frame_wrapper.h"
class VideoChannelSend {
public:
@@ -36,7 +36,7 @@ class VideoChannelSend {
enqueue_packets_func);
std::vector<std::unique_ptr<RtpPacket>> GeneratePadding(
uint32_t payload_size, int64_t capture_timestamp_us);
uint32_t payload_size, int64_t captured_timestamp_us);
int64_t GetTransportSeqAndIncrement() {
int64_t transport_seq = rtp_video_sender_->GetTransportSequenceNumber();
@@ -55,7 +55,7 @@ class VideoChannelSend {
return 0;
}
int SendVideo(std::shared_ptr<VideoFrameWrapper> encoded_frame);
int SendVideo(std::shared_ptr<EncodedFrame> encoded_frame);
void OnCongestionControlFeedback(
Timestamp recv_ts,
@@ -84,6 +84,7 @@ class VideoChannelSend {
private:
std::shared_ptr<SystemClock> clock_;
int64_t delta_ntp_internal_ms_;
};
#endif

View File

@@ -49,7 +49,7 @@ void IceTransportController::Create(
on_receive_data_ = on_receive_data;
user_data_ = user_data;
CreateVideoCodec(video_codec_payload_type, hardware_acceleration);
CreateVideoCodec(clock_, video_codec_payload_type, hardware_acceleration);
CreateAudioCodec();
controller_ = std::make_unique<CongestionControl>();
@@ -75,9 +75,10 @@ void IceTransportController::Create(
});
packet_sender_->SetGeneratePaddingFunc(
[this](uint32_t size, int64_t capture_timestamp_us)
[this](uint32_t size, int64_t captured_timestamp_us)
-> std::vector<std::unique_ptr<RtpPacket>> {
return video_channel_send_->GeneratePadding(size, capture_timestamp_us);
return video_channel_send_->GeneratePadding(size,
captured_timestamp_us);
});
audio_channel_send_ = std::make_unique<AudioChannelSend>(
@@ -96,9 +97,9 @@ void IceTransportController::Create(
std::weak_ptr<IceTransportController> weak_self = shared_from_this();
video_channel_receive_ = std::make_unique<VideoChannelReceive>(
clock_, ice_agent, ice_io_statistics,
[this, weak_self](VideoFrame& video_frame) {
[this, weak_self](const ReceivedFrame& received_frame) {
if (auto self = weak_self.lock()) {
OnReceiveCompleteFrame(video_frame);
OnReceiveCompleteFrame(received_frame);
}
});
@@ -170,7 +171,7 @@ int IceTransportController::SendVideo(const XVideoFrame* video_frame) {
new_frame.width = video_frame->width;
new_frame.height = video_frame->height;
new_frame.size = video_frame->size;
new_frame.timestamp = video_frame->timestamp;
new_frame.captured_timestamp = video_frame->captured_timestamp;
if (target_width_.has_value() && target_height_.has_value()) {
if (target_width_.value() < video_frame->width &&
target_height_.value() < video_frame->height) {
@@ -183,7 +184,7 @@ int IceTransportController::SendVideo(const XVideoFrame* video_frame) {
int ret = video_encoder_->Encode(
need_to_release ? &new_frame : video_frame,
[this](std::shared_ptr<VideoFrameWrapper> encoded_frame) -> int {
[this](std::shared_ptr<EncodedFrame> encoded_frame) -> int {
if (video_channel_send_) {
video_channel_send_->SendVideo(encoded_frame);
}
@@ -268,16 +269,19 @@ int IceTransportController::OnReceiveDataRtpPacket(const char* data,
return -1;
}
void IceTransportController::OnReceiveCompleteFrame(VideoFrame& video_frame) {
void IceTransportController::OnReceiveCompleteFrame(
const ReceivedFrame& received_frame) {
int num_frame_returned = video_decoder_->Decode(
(uint8_t*)video_frame.Buffer(), video_frame.Size(),
[this](VideoFrame video_frame) {
received_frame, [this](DecodedFrame decoded_frame) {
if (on_receive_video_) {
XVideoFrame x_video_frame;
x_video_frame.data = (const char*)video_frame.Buffer();
x_video_frame.width = video_frame.Width();
x_video_frame.height = video_frame.Height();
x_video_frame.size = video_frame.Size();
x_video_frame.data = (const char*)decoded_frame.Buffer();
x_video_frame.width = decoded_frame.Width();
x_video_frame.height = decoded_frame.Height();
x_video_frame.size = decoded_frame.Size();
x_video_frame.captured_timestamp = decoded_frame.CapturedTimestamp();
x_video_frame.received_timestamp = decoded_frame.ReceivedTimestamp();
x_video_frame.decoded_timestamp = decoded_frame.DecodedTimestamp();
on_receive_video_(&x_video_frame, remote_user_id_.data(),
remote_user_id_.size(), user_data_);
}
@@ -303,7 +307,8 @@ void IceTransportController::OnReceiveCompleteData(const char* data,
}
}
int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
int IceTransportController::CreateVideoCodec(std::shared_ptr<SystemClock> clock,
rtp::PAYLOAD_TYPE video_pt,
bool hardware_acceleration) {
if (video_codec_inited_) {
return 0;
@@ -316,8 +321,10 @@ int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
hardware_acceleration_ = false;
LOG_WARN("Only support software codec for AV1");
}
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, true);
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, true);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, false, true);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, false, true);
} else if (rtp::PAYLOAD_TYPE::H264 == video_pt) {
#ifdef __APPLE__
if (hardware_acceleration_) {
@@ -325,34 +332,45 @@ int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
LOG_WARN(
"MacOS not support hardware acceleration, use default software "
"codec");
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
} else {
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
}
#else
if (hardware_acceleration_) {
if (0 == LoadNvCodecDll()) {
load_nvcodec_dll_success_ = true;
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(true, false);
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(true, false);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, true, false);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, true, false);
} else {
LOG_WARN(
"Hardware accelerated codec not available, use default software "
"codec");
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
}
} else {
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
}
#endif
}
if (!video_encoder_) {
video_encoder_ = VideoEncoderFactory::CreateVideoEncoder(false, false);
video_encoder_ =
VideoEncoderFactory::CreateVideoEncoder(clock, false, false);
LOG_ERROR("Create encoder failed, try to use software H.264 encoder");
}
if (!video_encoder_ || 0 != video_encoder_->Init()) {
@@ -361,7 +379,8 @@ int IceTransportController::CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt,
}
if (!video_decoder_) {
video_decoder_ = VideoDecoderFactory::CreateVideoDecoder(false, false);
video_decoder_ =
VideoDecoderFactory::CreateVideoDecoder(clock, false, false);
LOG_ERROR("Create decoder failed, try to use software H.264 decoder");
}
if (!video_decoder_ || video_decoder_->Init()) {

View File

@@ -65,7 +65,7 @@ class IceTransportController
int OnReceiveAudioRtpPacket(const char *data, size_t size);
int OnReceiveDataRtpPacket(const char *data, size_t size);
void OnReceiveCompleteFrame(VideoFrame &video_frame);
void OnReceiveCompleteFrame(const ReceivedFrame &received_frame);
void OnReceiveCompleteAudio(const char *data, size_t size);
void OnReceiveCompleteData(const char *data, size_t size);
@@ -76,7 +76,8 @@ class IceTransportController
const webrtc::rtcp::CongestionControlFeedback &feedback);
private:
int CreateVideoCodec(rtp::PAYLOAD_TYPE video_pt, bool hardware_acceleration);
int CreateVideoCodec(std::shared_ptr<SystemClock> clock,
rtp::PAYLOAD_TYPE video_pt, bool hardware_acceleration);
int CreateAudioCodec();
private:

View File

@@ -20,7 +20,7 @@ class PacketSender {
virtual int Send() = 0;
virtual int EnqueueRtpPacket(
std::vector<std::unique_ptr<RtpPacket>> &rtp_packets,
int64_t capture_timestamp_us) = 0;
int64_t captured_timestamp_us) = 0;
};
#endif

View File

@@ -250,13 +250,12 @@ PacketSenderImp::Stats PacketSenderImp::GetStats() const {
int PacketSenderImp::EnqueueRtpPacket(
std::vector<std::unique_ptr<RtpPacket>> &rtp_packets,
int64_t capture_timestamp_us) {
int64_t captured_timestamp_us) {
std::vector<std::unique_ptr<webrtc::RtpPacketToSend>> to_send_rtp_packets;
for (auto &rtp_packet : rtp_packets) {
std::unique_ptr<webrtc::RtpPacketToSend> rtp_packet_to_send(
static_cast<webrtc::RtpPacketToSend *>(rtp_packet.release()));
rtp_packet_to_send->set_capture_time(
webrtc::Timestamp::Micros(capture_timestamp_us));
rtp_packet_to_send->set_capture_time(clock_->CurrentTime());
rtp_packet_to_send->set_transport_sequence_number(transport_seq_++);
switch (rtp_packet_to_send->PayloadType()) {

View File

@@ -38,7 +38,7 @@ class PacketSenderImp : public PacketSender,
int Send() { return 0; }
int EnqueueRtpPacket(std::vector<std::unique_ptr<RtpPacket>>& rtp_packets,
int64_t capture_timestamp_us);
int64_t captured_timestamp_us);
void SetOnSentPacketFunc(
std::function<void(const webrtc::RtpPacketToSend&)> on_sent_packet_func) {