From 65927c2091e6cdb6e29af95e7606a00a14721daf Mon Sep 17 00:00:00 2001 From: dijunkun Date: Thu, 15 Aug 2024 11:04:06 +0800 Subject: [PATCH] [feat] support speaker capture on Windows --- src/single_window/render.cpp | 10 +- src/single_window/render_callback_func.cpp | 2 +- .../windows/speaker_capturer_wasapi.cpp | 199 +++++------------- .../windows/speaker_capturer_wasapi.h | 32 +-- test/audio_capture/miniaudio.cpp | 46 ++-- xmake.lua | 3 +- 6 files changed, 93 insertions(+), 199 deletions(-) diff --git a/src/single_window/render.cpp b/src/single_window/render.cpp index ab59936..155da50 100644 --- a/src/single_window/render.cpp +++ b/src/single_window/render.cpp @@ -168,7 +168,7 @@ int Render::StartScreenCapture() { std::chrono::duration duration = now_time - last_frame_time_; auto tc = duration.count() * 1000; - if (tc >= 0) { + if (tc >= 0 && connection_established_) { SendData(peer_, DATA_TYPE::VIDEO, (const char *)data, NV12_BUFFER_SIZE); last_frame_time_ = now_time; @@ -203,7 +203,9 @@ int Render::StartSpeakerCapture() { int speaker_capturer_init_ret = speaker_capturer_->Init([this](unsigned char *data, size_t size) -> void { - SendData(peer_, DATA_TYPE::AUDIO, (const char *)data, size); + if (connection_established_) { + SendData(peer_, DATA_TYPE::AUDIO, (const char *)data, size); + } }); if (0 == speaker_capturer_init_ret) { @@ -421,13 +423,13 @@ int Render::Run() { screen_capturer_factory_ = new ScreenCapturerFactory(); // Speaker capture - // speaker_capturer_factory_ = new SpeakerCapturerFactory(); + speaker_capturer_factory_ = new SpeakerCapturerFactory(); // Mouse control device_controller_factory_ = new DeviceControllerFactory(); } - // StartSpeakerCapture(); + StartSpeakerCapture(); // Main loop while (!exit_) { diff --git a/src/single_window/render_callback_func.cpp b/src/single_window/render_callback_func.cpp index 965db97..725e471 100644 --- a/src/single_window/render_callback_func.cpp +++ b/src/single_window/render_callback_func.cpp @@ -13,7 +13,7 @@ #endif int Render::ProcessMouseKeyEven(SDL_Event &ev) { - if (!control_mouse_) { + if (!control_mouse_ || !connection_established_) { return 0; } diff --git a/src/speaker_capturer/windows/speaker_capturer_wasapi.cpp b/src/speaker_capturer/windows/speaker_capturer_wasapi.cpp index 37cddd0..61e5b9e 100644 --- a/src/speaker_capturer/windows/speaker_capturer_wasapi.cpp +++ b/src/speaker_capturer/windows/speaker_capturer_wasapi.cpp @@ -1,187 +1,94 @@ #include "speaker_capturer_wasapi.h" -#include -#include -#include +#include "rd_log.h" -#define REFTIMES_PER_SEC 10000000 -#define REFTIMES_PER_MILLISEC 10000 +#define MINIAUDIO_IMPLEMENTATION +#include "miniaudio.h" #define SAVE_AUDIO_FILE 0 -#define CHECK_HR(hres) \ - if (FAILED(hres)) { \ - return -1; \ +static ma_device_config device_config_; +static ma_device device_; +static ma_format format_ = ma_format_s16; +static ma_uint32 sample_rate_ = ma_standard_sample_rate_48000; +static ma_uint32 channels_ = 1; +static FILE* fp_ = nullptr; + +void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, + ma_uint32 frameCount) { + SpeakerCapturerWasapi* ptr = (SpeakerCapturerWasapi*)pDevice->pUserData; + if (ptr) { + if (SAVE_AUDIO_FILE) { + fwrite(pInput, frameCount * ma_get_bytes_per_frame(format_, channels_), 1, + fp_); + } + + ptr->GetCallback()((unsigned char*)pInput, + frameCount * ma_get_bytes_per_frame(format_, channels_)); } -#define SAFE_RELEASE(punk) \ - if ((punk) != nullptr) { \ - (punk)->Release(); \ - (punk) = nullptr; \ - } + (void)pOutput; +} -const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator); -const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator); -const IID IID_IAudioClient = __uuidof(IAudioClient); -const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient); +SpeakerCapturerWasapi::speaker_data_cb SpeakerCapturerWasapi::GetCallback() { + return cb_; +} SpeakerCapturerWasapi::SpeakerCapturerWasapi() {} SpeakerCapturerWasapi::~SpeakerCapturerWasapi() { - if (inited_ && capture_thread_->joinable()) { - capture_thread_->join(); - inited_ = false; - } - - CoTaskMemFree(pwfx); - SAFE_RELEASE(pEnumerator) - SAFE_RELEASE(pDevice) - SAFE_RELEASE(pAudioClient) - SAFE_RELEASE(pCaptureClient) - if (SAVE_AUDIO_FILE) { - fclose(fp); + fclose(fp_); } - - // if (pData_dst) delete pData_dst; - // pData_dst = nullptr; } int SpeakerCapturerWasapi::Init(speaker_data_cb cb) { + if (inited_) { + return 0; + } + cb_ = cb; if (SAVE_AUDIO_FILE) { - fopen_s(&fp, "system_audio.pcm", "wb"); + fopen_s(&fp_, "system_audio.pcm", "wb"); } - HRESULT hr; + ma_result result; + ma_backend backends[] = {ma_backend_wasapi}; - hr = CoCreateInstance(CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL, - IID_IMMDeviceEnumerator, (void **)&pEnumerator); - CHECK_HR(hr) + device_config_ = ma_device_config_init(ma_device_type_loopback); + device_config_.capture.pDeviceID = NULL; + device_config_.capture.format = format_; + device_config_.capture.channels = channels_; + device_config_.sampleRate = sample_rate_; + device_config_.dataCallback = data_callback; + device_config_.pUserData = this; - hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, - &pDevice); // 输出 - CHECK_HR(hr) - - hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, - (void **)&pAudioClient); - CHECK_HR(hr) - - hr = pAudioClient->GetMixFormat(&pwfx); - CHECK_HR(hr) - - // Change to 16bit - if (pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) { - pwfx->wFormatTag = WAVE_FORMAT_PCM; - pwfx->wBitsPerSample = 16; - pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8; - pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec; - } else if (pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) { - PWAVEFORMATEXTENSIBLE pEx = reinterpret_cast(pwfx); - if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, pEx->SubFormat)) { - pEx->SubFormat = KSDATAFORMAT_SUBTYPE_PCM; - pEx->Samples.wValidBitsPerSample = 16; - pwfx->wBitsPerSample = 16; - pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8; - pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec; - } + result = ma_device_init_ex(backends, sizeof(backends) / sizeof(backends[0]), + NULL, &device_config_, &device_); + if (result != MA_SUCCESS) { + LOG_ERROR("Failed to initialize loopback device"); + return -1; } - hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, - AUDCLNT_STREAMFLAGS_LOOPBACK, 0, 0, pwfx, - nullptr); - CHECK_HR(hr) - - // Get the size of the allocated buffer. - hr = pAudioClient->GetBufferSize(&bufferFrameCount); - CHECK_HR(hr) - - hr = pAudioClient->GetService(IID_IAudioCaptureClient, - (void **)&pCaptureClient); - CHECK_HR(hr) - - // Show audio info - { - printf("wFormatTag is %x\n", pwfx->wFormatTag); - printf("nChannels is %x\n", pwfx->nChannels); - printf("nSamplesPerSec is %d\n", pwfx->nSamplesPerSec); - printf("nAvgBytesPerSec is %d\n", pwfx->nAvgBytesPerSec); - printf("wBitsPerSample is %d\n", pwfx->wBitsPerSample); - } - - hnsActualDuration = - (double)REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec; - - // pData_dst = new BYTE[960]; - inited_ = true; return 0; } int SpeakerCapturerWasapi::Start() { - HRESULT hr; - hr = pAudioClient->Start(); - CHECK_HR(hr) - - capture_thread_.reset(new std::thread([this]() { - HRESULT hr; - - // Each loop fills about half of the shared buffer. - while (1) { - // Sleep for half the buffer duration. - Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 4); - - hr = pCaptureClient->GetNextPacketSize(&packetLength); - CHECK_HR(hr) - - while (packetLength != 0) { - // Get the available data in the shared buffer. - hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, - nullptr, &ts); - CHECK_HR(hr) - - // flags equals to 2 means silence, set data to nullptr - if (flags == AUDCLNT_BUFFERFLAGS_SILENT) { - pData = nullptr; - } - - if (pData != nullptr) { - size_t size = numFramesAvailable * pwfx->nBlockAlign; - - for (int i = 0; i < size / 2; i++) { - BYTE left = pData[i * 2]; - BYTE right = pData[i * 2 + 1]; - // Right channel only? - BYTE monoSample = right; - - pData_dst[i] = static_cast(monoSample); - } - - cb_(pData_dst, size / 2); - - if (SAVE_AUDIO_FILE) { - fwrite(pData_dst, size / 2, 1, fp); - } - } - - hr = pCaptureClient->ReleaseBuffer(numFramesAvailable); - CHECK_HR(hr) - - hr = pCaptureClient->GetNextPacketSize(&packetLength); - CHECK_HR(hr) - } - } - })); + ma_result result = ma_device_start(&device_); + if (result != MA_SUCCESS) { + ma_device_uninit(&device_); + LOG_ERROR("Failed to start device"); + return -1; + } return 0; } int SpeakerCapturerWasapi::Stop() { - HRESULT hr; - hr = pAudioClient->Stop(); - CHECK_HR(hr) + ma_device_uninit(&device_); return 0; } diff --git a/src/speaker_capturer/windows/speaker_capturer_wasapi.h b/src/speaker_capturer/windows/speaker_capturer_wasapi.h index 3bfafab..e2a2a9e 100644 --- a/src/speaker_capturer/windows/speaker_capturer_wasapi.h +++ b/src/speaker_capturer/windows/speaker_capturer_wasapi.h @@ -1,20 +1,12 @@ /* * @Author: DI JUNKUN - * @Date: 2024-07-22 + * @Date: 2024-08-15 * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved. */ #ifndef _SPEAKER_CAPTURER_WASAPI_H_ #define _SPEAKER_CAPTURER_WASAPI_H_ -#include -#include -#include -#include - -#include -#include - #include "speaker_capturer.h" class SpeakerCapturerWasapi : public SpeakerCapturer { @@ -31,31 +23,13 @@ class SpeakerCapturerWasapi : public SpeakerCapturer { int Pause(); int Resume(); + speaker_data_cb GetCallback(); + private: speaker_data_cb cb_ = nullptr; private: - REFERENCE_TIME hnsActualDuration; - UINT32 bufferFrameCount; - UINT32 numFramesAvailable; - BYTE *pData; - // std::vector pData_dst; - BYTE pData_dst[960]; - DWORD flags; - - // REFERENCE_TIME hnsRequestedDuration = 10000000; - IMMDeviceEnumerator *pEnumerator = NULL; - IMMDevice *pDevice = NULL; - IAudioClient *pAudioClient = NULL; - IAudioCaptureClient *pCaptureClient = NULL; - WAVEFORMATEX *pwfx = NULL; - UINT32 packetLength = 0; - UINT64 pos, ts; - FILE *fp; - bool inited_ = false; - // thread - std::unique_ptr capture_thread_ = nullptr; }; #endif \ No newline at end of file diff --git a/test/audio_capture/miniaudio.cpp b/test/audio_capture/miniaudio.cpp index 8169e13..03aaa43 100644 --- a/test/audio_capture/miniaudio.cpp +++ b/test/audio_capture/miniaudio.cpp @@ -18,12 +18,16 @@ buffer in the callback will be null whereas the input buffer will be valid. #include #include +FILE* fp; + void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount) { - ma_encoder* pEncoder = (ma_encoder*)pDevice->pUserData; - MA_ASSERT(pEncoder != NULL); + // ma_encoder* pEncoder = (ma_encoder*)pDevice->pUserData; + // MA_ASSERT(pEncoder != NULL); - ma_encoder_write_pcm_frames(pEncoder, pInput, frameCount, NULL); + // ma_encoder_write_pcm_frames(pEncoder, pInput, frameCount, NULL); + + fwrite(pInput, frameCount * ma_get_bytes_per_frame(ma_format_s16, 1), 1, fp); (void)pOutput; } @@ -35,32 +39,36 @@ int main(int argc, char** argv) { ma_device_config deviceConfig; ma_device device; + fopen_s(&fp, "miniaudio.pcm", "wb"); + /* Loopback mode is currently only supported on WASAPI. */ ma_backend backends[] = {ma_backend_wasapi}; - if (argc < 2) { - printf("No output file.\n"); - return -1; - } + // if (argc < 2) { + // printf("No output file.\n"); + // return -1; + // } - encoderConfig = - ma_encoder_config_init(ma_encoding_format_wav, ma_format_s16, 1, 48000); + // encoderConfig = + // ma_encoder_config_init(ma_encoding_format_wav, ma_format_s16, 1, + // 48000); - if (ma_encoder_init_file(argv[1], &encoderConfig, &encoder) != MA_SUCCESS) { - printf("Failed to initialize output file.\n"); - return -1; - } + // if (ma_encoder_init_file(argv[1], &encoderConfig, &encoder) != MA_SUCCESS) + // { + // printf("Failed to initialize output file.\n"); + // return -1; + // } deviceConfig = ma_device_config_init(ma_device_type_loopback); deviceConfig.capture.pDeviceID = NULL; /* Use default device for this example. Set this to the ID of a _playback_ device if you want to capture from a specific device. */ - deviceConfig.capture.format = encoder.config.format; - deviceConfig.capture.channels = encoder.config.channels; - deviceConfig.sampleRate = encoder.config.sampleRate; + deviceConfig.capture.format = ma_format_s16; + deviceConfig.capture.channels = 1; + deviceConfig.sampleRate = 48000; deviceConfig.dataCallback = data_callback; - deviceConfig.pUserData = &encoder; + deviceConfig.pUserData = nullptr; result = ma_device_init_ex(backends, sizeof(backends) / sizeof(backends[0]), NULL, &deviceConfig, &device); @@ -79,8 +87,10 @@ int main(int argc, char** argv) { printf("Press Enter to stop recording...\n"); getchar(); + fclose(fp); + ma_device_uninit(&device); - ma_encoder_uninit(&encoder); + // ma_encoder_uninit(&encoder); return 0; } \ No newline at end of file diff --git a/xmake.lua b/xmake.lua index 51ebecb..c3a4b69 100644 --- a/xmake.lua +++ b/xmake.lua @@ -3,7 +3,6 @@ set_license("LGPL-3.0") set_version("0.0.1") add_defines("RD_VERSION=\"0.0.1\""); -add_defines("MINIAUDIO_IMPLEMENTATION") add_rules("mode.release", "mode.debug") set_languages("c++17") @@ -20,6 +19,7 @@ end add_requires("spdlog 1.14.1", {system = false}) add_requires("imgui v1.91.0", {configs = {sdl2 = true, sdl2_renderer = true}}) add_requires("libyuv") +add_requires("miniaudio") if is_os("windows") then add_links("Shell32", "windowsapp", "dwmapi", "User32", "kernel32", @@ -83,6 +83,7 @@ target("speaker_capturer") add_deps("rd_log") add_includedirs("src/speaker_capturer", {public = true}) if is_os("windows") then + add_packages("miniaudio") add_files("src/speaker_capturer/windows/*.cpp") add_includedirs("src/speaker_capturer/windows", {public = true}) elseif is_os("macosx") then