[feat] audio capture supported on MacOSX

This commit is contained in:
dijunkun
2025-07-02 19:25:21 +08:00
parent e52ec6cde2
commit c13e2613b6
8 changed files with 225 additions and 26 deletions

View File

@@ -370,8 +370,9 @@ int Render::StopScreenCapturer() {
int Render::StartSpeakerCapturer() { int Render::StartSpeakerCapturer() {
if (!speaker_capturer_) { if (!speaker_capturer_) {
speaker_capturer_ = (SpeakerCapturer*)speaker_capturer_factory_->Create(); speaker_capturer_ = (SpeakerCapturer*)speaker_capturer_factory_->Create();
int speaker_capturer_init_ret = speaker_capturer_->Init( int speaker_capturer_init_ret =
[this](unsigned char* data, size_t size) -> void { speaker_capturer_->Init([this](unsigned char* data, size_t size,
const char* audio_name) -> void {
SendAudioFrame(peer_, (const char*)data, size, audio_label_.c_str()); SendAudioFrame(peer_, (const char*)data, size, audio_label_.c_str());
}); });

View File

@@ -1,14 +0,0 @@
#include "speaker_capturer_macosx.h"
SpeakerCapturerMacosx::SpeakerCapturerMacosx() {}
SpeakerCapturerMacosx::~SpeakerCapturerMacosx() {}
int SpeakerCapturerMacosx::Init(speaker_data_cb cb) { return 0; }
int SpeakerCapturerMacosx::Destroy() { return 0; }
int SpeakerCapturerMacosx::Start() { return 0; }
int SpeakerCapturerMacosx::Stop() { return 0; }
int SpeakerCapturerMacosx::Pause() { return 0; }
int SpeakerCapturerMacosx::Resume() { return 0; }

View File

@@ -26,13 +26,12 @@ class SpeakerCapturerMacosx : public SpeakerCapturer {
int Pause(); int Pause();
int Resume(); int Resume();
private: public:
speaker_data_cb cb_ = nullptr; speaker_data_cb cb_ = nullptr;
private:
bool inited_ = false; bool inited_ = false;
// thread
std::thread capture_thread_; class Impl;
Impl* impl_ = nullptr;
}; };
#endif #endif

View File

@@ -0,0 +1,210 @@
#import <AVFoundation/AVFoundation.h>
#import <Foundation/Foundation.h>
#import <ScreenCaptureKit/ScreenCaptureKit.h>
#include "speaker_capturer_macosx.h"
// 这个delegate用来接收SCStream回调
@interface SpeakerCaptureDelegate : NSObject <SCStreamDelegate, SCStreamOutput>
@property(nonatomic, assign) SpeakerCapturerMacosx* owner; // assign用于C++指针不用weak
- (instancetype)initWithOwner:(SpeakerCapturerMacosx*)owner;
@end
@implementation SpeakerCaptureDelegate
- (instancetype)initWithOwner:(SpeakerCapturerMacosx*)owner {
self = [super init];
if (self) {
_owner = owner;
}
return self;
}
- (void)stream:(SCStream*)stream
didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
ofType:(SCStreamOutputType)type {
if (type == SCStreamOutputTypeAudio) {
CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t length = CMBlockBufferGetDataLength(blockBuffer);
char* dataPtr = NULL;
CMBlockBufferGetDataPointer(blockBuffer, 0, NULL, NULL, &dataPtr);
// 获取输入格式
CMAudioFormatDescriptionRef formatDesc = CMSampleBufferGetFormatDescription(sampleBuffer);
const AudioStreamBasicDescription* asbd =
CMAudioFormatDescriptionGetStreamBasicDescription(formatDesc);
if (_owner->cb_ && dataPtr && length > 0 && asbd) {
std::vector<short> out_pcm16;
if (asbd->mFormatFlags & kAudioFormatFlagIsFloat) {
int channels = asbd->mChannelsPerFrame;
int samples = (int)(length / sizeof(float));
float* floatData = (float*)dataPtr;
std::vector<short> pcm16(samples);
for (int i = 0; i < samples; ++i) {
float v = floatData[i];
if (v > 1.0f) v = 1.0f;
if (v < -1.0f) v = -1.0f;
pcm16[i] = (short)(v * 32767.0f);
}
// 混合为单声道
if (channels > 1) {
int mono_samples = samples / channels;
out_pcm16.resize(mono_samples);
for (int i = 0; i < mono_samples; ++i) {
int sum = 0;
for (int c = 0; c < channels; ++c) {
sum += pcm16[i * channels + c];
}
out_pcm16[i] = sum / channels;
}
} else {
out_pcm16 = std::move(pcm16);
}
} else if (asbd->mBitsPerChannel == 16) {
int channels = asbd->mChannelsPerFrame;
int samples = (int)(length / 2);
short* src = (short*)dataPtr;
if (channels > 1) {
int mono_samples = samples / channels;
out_pcm16.resize(mono_samples);
for (int i = 0; i < mono_samples; ++i) {
int sum = 0;
for (int c = 0; c < channels; ++c) {
sum += src[i * channels + c];
}
out_pcm16[i] = sum / channels;
}
} else {
out_pcm16.assign(src, src + samples);
}
}
// 分包每960字节送一次cb_即480采样点
size_t frame_bytes = 960; // 480 * 2
size_t total_bytes = out_pcm16.size() * sizeof(short);
unsigned char* p = (unsigned char*)out_pcm16.data();
for (size_t offset = 0; offset + frame_bytes <= total_bytes; offset += frame_bytes) {
_owner->cb_(p + offset, frame_bytes, "audio");
}
// 如有剩余,可缓存到下次补齐
}
}
}
@end
// C++类实现细节,放这里,隐藏在.mm中
class SpeakerCapturerMacosx::Impl {
public:
SCStreamConfiguration* config = nil;
SCStream* stream = nil;
SpeakerCaptureDelegate* delegate = nil;
};
SpeakerCapturerMacosx::SpeakerCapturerMacosx() { impl_ = new Impl(); }
SpeakerCapturerMacosx::~SpeakerCapturerMacosx() {
Destroy();
delete impl_;
impl_ = nullptr;
}
int SpeakerCapturerMacosx::Init(speaker_data_cb cb) {
if (inited_) return 0;
cb_ = cb;
impl_->config = [[SCStreamConfiguration alloc] init];
impl_->config.capturesAudio = YES;
impl_->config.sampleRate = 48000;
impl_->config.channelCount = 1;
impl_->delegate = [[SpeakerCaptureDelegate alloc] initWithOwner:this];
// 异步获取可共享内容,改为同步等待
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
__block SCShareableContent* content = nil;
__block NSError* error = nil;
[SCShareableContent
getShareableContentWithCompletionHandler:^(SCShareableContent* c, NSError* e) {
content = c;
error = e;
dispatch_semaphore_signal(sema);
}];
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
if (error || !content) return -1;
// 选择主显示屏
CGDirectDisplayID mainDisplayId = CGMainDisplayID();
SCDisplay* mainDisplay = nil;
for (SCDisplay* d in content.displays) {
if (d.displayID == mainDisplayId) {
mainDisplay = d;
break;
}
}
if (!mainDisplay) return -1;
// 用SCContentFilter包装
SCContentFilter* filter = [[SCContentFilter alloc] initWithDisplay:mainDisplay
excludingWindows:@[]];
impl_->stream = [[SCStream alloc] initWithFilter:filter
configuration:impl_->config
delegate:impl_->delegate];
NSError* addOutputError = nil;
dispatch_queue_t queue = dispatch_queue_create("SpeakerAudio.Queue", DISPATCH_QUEUE_SERIAL);
BOOL ok = [impl_->stream addStreamOutput:impl_->delegate
type:SCStreamOutputTypeAudio
sampleHandlerQueue:queue
error:&addOutputError];
if (!ok || addOutputError) {
NSLog(@"addStreamOutput error: %@", addOutputError);
return -1;
}
inited_ = true;
return 0;
}
int SpeakerCapturerMacosx::Start() {
if (!inited_) return -1;
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
__block int ret = 0;
[impl_->stream startCaptureWithCompletionHandler:^(NSError* _Nullable error) {
if (error) ret = -1;
dispatch_semaphore_signal(sema);
}];
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
return ret;
}
int SpeakerCapturerMacosx::Stop() {
if (!inited_) return -1;
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
[impl_->stream stopCaptureWithCompletionHandler:^(NSError* error) {
dispatch_semaphore_signal(sema);
}];
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
inited_ = false;
return 0;
}
int SpeakerCapturerMacosx::Destroy() {
Stop();
cb_ = nullptr;
if (impl_) {
impl_->delegate = nil;
impl_->stream = nil;
impl_->config = nil;
}
inited_ = false;
return 0;
}
int SpeakerCapturerMacosx::Pause() {
// ScreenCaptureKit无暂停接口暂时无实现
return 0;
}
int SpeakerCapturerMacosx::Resume() { return Start(); }

View File

@@ -11,7 +11,8 @@
class SpeakerCapturer { class SpeakerCapturer {
public: public:
typedef std::function<void(unsigned char *, size_t)> speaker_data_cb; typedef std::function<void(unsigned char *, size_t, const char *)>
speaker_data_cb;
public: public:
virtual ~SpeakerCapturer() {} virtual ~SpeakerCapturer() {}

View File

@@ -24,7 +24,8 @@ void data_callback(ma_device* pDevice, void* pOutput, const void* pInput,
} }
ptr->GetCallback()((unsigned char*)pInput, ptr->GetCallback()((unsigned char*)pInput,
frameCount * ma_get_bytes_per_frame(format_, channels_)); frameCount * ma_get_bytes_per_frame(format_, channels_),
"audio");
} }
(void)pOutput; (void)pOutput;

View File

@@ -37,7 +37,7 @@ elseif is_os("macosx") then
add_ldflags("-Wl,-ld_classic") add_ldflags("-Wl,-ld_classic")
add_cxflags("-Wno-unused-variable") add_cxflags("-Wno-unused-variable")
add_frameworks("OpenGL", "IOSurface", "ScreenCaptureKit", "AVFoundation", add_frameworks("OpenGL", "IOSurface", "ScreenCaptureKit", "AVFoundation",
"CoreMedia", "CoreVideo") "CoreMedia", "CoreVideo", "CoreAudio", "AudioToolbox")
end end
add_packages("spdlog", "imgui") add_packages("spdlog", "imgui")
@@ -83,7 +83,8 @@ target("speaker_capturer")
add_files("src/speaker_capturer/windows/*.cpp") add_files("src/speaker_capturer/windows/*.cpp")
add_includedirs("src/speaker_capturer/windows", {public = true}) add_includedirs("src/speaker_capturer/windows", {public = true})
elseif is_os("macosx") then elseif is_os("macosx") then
add_files("src/speaker_capturer/macosx/*.cpp") add_files("src/speaker_capturer/macosx/*.cpp",
"src/speaker_capturer/macosx/*.mm")
add_includedirs("src/speaker_capturer/macosx", {public = true}) add_includedirs("src/speaker_capturer/macosx", {public = true})
elseif is_os("linux") then elseif is_os("linux") then
add_files("src/speaker_capturer/linux/*.cpp") add_files("src/speaker_capturer/linux/*.cpp")