[feat] support speaker capture on Windows

This commit is contained in:
dijunkun
2024-08-15 11:04:06 +08:00
parent 574b9d10ab
commit 65927c2091
6 changed files with 93 additions and 199 deletions

View File

@@ -168,7 +168,7 @@ int Render::StartScreenCapture() {
std::chrono::duration<double> duration = now_time - last_frame_time_; std::chrono::duration<double> duration = now_time - last_frame_time_;
auto tc = duration.count() * 1000; auto tc = duration.count() * 1000;
if (tc >= 0) { if (tc >= 0 && connection_established_) {
SendData(peer_, DATA_TYPE::VIDEO, (const char *)data, SendData(peer_, DATA_TYPE::VIDEO, (const char *)data,
NV12_BUFFER_SIZE); NV12_BUFFER_SIZE);
last_frame_time_ = now_time; last_frame_time_ = now_time;
@@ -203,7 +203,9 @@ int Render::StartSpeakerCapture() {
int speaker_capturer_init_ret = int speaker_capturer_init_ret =
speaker_capturer_->Init([this](unsigned char *data, size_t size) -> void { speaker_capturer_->Init([this](unsigned char *data, size_t size) -> void {
SendData(peer_, DATA_TYPE::AUDIO, (const char *)data, size); if (connection_established_) {
SendData(peer_, DATA_TYPE::AUDIO, (const char *)data, size);
}
}); });
if (0 == speaker_capturer_init_ret) { if (0 == speaker_capturer_init_ret) {
@@ -421,13 +423,13 @@ int Render::Run() {
screen_capturer_factory_ = new ScreenCapturerFactory(); screen_capturer_factory_ = new ScreenCapturerFactory();
// Speaker capture // Speaker capture
// speaker_capturer_factory_ = new SpeakerCapturerFactory(); speaker_capturer_factory_ = new SpeakerCapturerFactory();
// Mouse control // Mouse control
device_controller_factory_ = new DeviceControllerFactory(); device_controller_factory_ = new DeviceControllerFactory();
} }
// StartSpeakerCapture(); StartSpeakerCapture();
// Main loop // Main loop
while (!exit_) { while (!exit_) {

View File

@@ -13,7 +13,7 @@
#endif #endif
int Render::ProcessMouseKeyEven(SDL_Event &ev) { int Render::ProcessMouseKeyEven(SDL_Event &ev) {
if (!control_mouse_) { if (!control_mouse_ || !connection_established_) {
return 0; return 0;
} }

View File

@@ -1,187 +1,94 @@
#include "speaker_capturer_wasapi.h" #include "speaker_capturer_wasapi.h"
#include <algorithm> #include "rd_log.h"
#include <climits>
#include <iostream>
#define REFTIMES_PER_SEC 10000000 #define MINIAUDIO_IMPLEMENTATION
#define REFTIMES_PER_MILLISEC 10000 #include "miniaudio.h"
#define SAVE_AUDIO_FILE 0 #define SAVE_AUDIO_FILE 0
#define CHECK_HR(hres) \ static ma_device_config device_config_;
if (FAILED(hres)) { \ static ma_device device_;
return -1; \ static ma_format format_ = ma_format_s16;
static ma_uint32 sample_rate_ = ma_standard_sample_rate_48000;
static ma_uint32 channels_ = 1;
static FILE* fp_ = nullptr;
void data_callback(ma_device* pDevice, void* pOutput, const void* pInput,
ma_uint32 frameCount) {
SpeakerCapturerWasapi* ptr = (SpeakerCapturerWasapi*)pDevice->pUserData;
if (ptr) {
if (SAVE_AUDIO_FILE) {
fwrite(pInput, frameCount * ma_get_bytes_per_frame(format_, channels_), 1,
fp_);
}
ptr->GetCallback()((unsigned char*)pInput,
frameCount * ma_get_bytes_per_frame(format_, channels_));
} }
#define SAFE_RELEASE(punk) \ (void)pOutput;
if ((punk) != nullptr) { \ }
(punk)->Release(); \
(punk) = nullptr; \
}
const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator); SpeakerCapturerWasapi::speaker_data_cb SpeakerCapturerWasapi::GetCallback() {
const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator); return cb_;
const IID IID_IAudioClient = __uuidof(IAudioClient); }
const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
SpeakerCapturerWasapi::SpeakerCapturerWasapi() {} SpeakerCapturerWasapi::SpeakerCapturerWasapi() {}
SpeakerCapturerWasapi::~SpeakerCapturerWasapi() { SpeakerCapturerWasapi::~SpeakerCapturerWasapi() {
if (inited_ && capture_thread_->joinable()) {
capture_thread_->join();
inited_ = false;
}
CoTaskMemFree(pwfx);
SAFE_RELEASE(pEnumerator)
SAFE_RELEASE(pDevice)
SAFE_RELEASE(pAudioClient)
SAFE_RELEASE(pCaptureClient)
if (SAVE_AUDIO_FILE) { if (SAVE_AUDIO_FILE) {
fclose(fp); fclose(fp_);
} }
// if (pData_dst) delete pData_dst;
// pData_dst = nullptr;
} }
int SpeakerCapturerWasapi::Init(speaker_data_cb cb) { int SpeakerCapturerWasapi::Init(speaker_data_cb cb) {
if (inited_) {
return 0;
}
cb_ = cb; cb_ = cb;
if (SAVE_AUDIO_FILE) { if (SAVE_AUDIO_FILE) {
fopen_s(&fp, "system_audio.pcm", "wb"); fopen_s(&fp_, "system_audio.pcm", "wb");
} }
HRESULT hr; ma_result result;
ma_backend backends[] = {ma_backend_wasapi};
hr = CoCreateInstance(CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL, device_config_ = ma_device_config_init(ma_device_type_loopback);
IID_IMMDeviceEnumerator, (void **)&pEnumerator); device_config_.capture.pDeviceID = NULL;
CHECK_HR(hr) device_config_.capture.format = format_;
device_config_.capture.channels = channels_;
device_config_.sampleRate = sample_rate_;
device_config_.dataCallback = data_callback;
device_config_.pUserData = this;
hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, result = ma_device_init_ex(backends, sizeof(backends) / sizeof(backends[0]),
&pDevice); // 输出 NULL, &device_config_, &device_);
CHECK_HR(hr) if (result != MA_SUCCESS) {
LOG_ERROR("Failed to initialize loopback device");
hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, return -1;
(void **)&pAudioClient);
CHECK_HR(hr)
hr = pAudioClient->GetMixFormat(&pwfx);
CHECK_HR(hr)
// Change to 16bit
if (pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) {
pwfx->wFormatTag = WAVE_FORMAT_PCM;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
} else if (pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
PWAVEFORMATEXTENSIBLE pEx = reinterpret_cast<PWAVEFORMATEXTENSIBLE>(pwfx);
if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, pEx->SubFormat)) {
pEx->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
pEx->Samples.wValidBitsPerSample = 16;
pwfx->wBitsPerSample = 16;
pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
}
} }
hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
AUDCLNT_STREAMFLAGS_LOOPBACK, 0, 0, pwfx,
nullptr);
CHECK_HR(hr)
// Get the size of the allocated buffer.
hr = pAudioClient->GetBufferSize(&bufferFrameCount);
CHECK_HR(hr)
hr = pAudioClient->GetService(IID_IAudioCaptureClient,
(void **)&pCaptureClient);
CHECK_HR(hr)
// Show audio info
{
printf("wFormatTag is %x\n", pwfx->wFormatTag);
printf("nChannels is %x\n", pwfx->nChannels);
printf("nSamplesPerSec is %d\n", pwfx->nSamplesPerSec);
printf("nAvgBytesPerSec is %d\n", pwfx->nAvgBytesPerSec);
printf("wBitsPerSample is %d\n", pwfx->wBitsPerSample);
}
hnsActualDuration =
(double)REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
// pData_dst = new BYTE[960];
inited_ = true; inited_ = true;
return 0; return 0;
} }
int SpeakerCapturerWasapi::Start() { int SpeakerCapturerWasapi::Start() {
HRESULT hr; ma_result result = ma_device_start(&device_);
hr = pAudioClient->Start(); if (result != MA_SUCCESS) {
CHECK_HR(hr) ma_device_uninit(&device_);
LOG_ERROR("Failed to start device");
capture_thread_.reset(new std::thread([this]() { return -1;
HRESULT hr; }
// Each loop fills about half of the shared buffer.
while (1) {
// Sleep for half the buffer duration.
Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 4);
hr = pCaptureClient->GetNextPacketSize(&packetLength);
CHECK_HR(hr)
while (packetLength != 0) {
// Get the available data in the shared buffer.
hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags,
nullptr, &ts);
CHECK_HR(hr)
// flags equals to 2 means silence, set data to nullptr
if (flags == AUDCLNT_BUFFERFLAGS_SILENT) {
pData = nullptr;
}
if (pData != nullptr) {
size_t size = numFramesAvailable * pwfx->nBlockAlign;
for (int i = 0; i < size / 2; i++) {
BYTE left = pData[i * 2];
BYTE right = pData[i * 2 + 1];
// Right channel only?
BYTE monoSample = right;
pData_dst[i] = static_cast<BYTE>(monoSample);
}
cb_(pData_dst, size / 2);
if (SAVE_AUDIO_FILE) {
fwrite(pData_dst, size / 2, 1, fp);
}
}
hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
CHECK_HR(hr)
hr = pCaptureClient->GetNextPacketSize(&packetLength);
CHECK_HR(hr)
}
}
}));
return 0; return 0;
} }
int SpeakerCapturerWasapi::Stop() { int SpeakerCapturerWasapi::Stop() {
HRESULT hr; ma_device_uninit(&device_);
hr = pAudioClient->Stop();
CHECK_HR(hr)
return 0; return 0;
} }

View File

@@ -1,20 +1,12 @@
/* /*
* @Author: DI JUNKUN * @Author: DI JUNKUN
* @Date: 2024-07-22 * @Date: 2024-08-15
* Copyright (c) 2024 by DI JUNKUN, All Rights Reserved. * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
*/ */
#ifndef _SPEAKER_CAPTURER_WASAPI_H_ #ifndef _SPEAKER_CAPTURER_WASAPI_H_
#define _SPEAKER_CAPTURER_WASAPI_H_ #define _SPEAKER_CAPTURER_WASAPI_H_
#include <Audioclient.h>
#include <Devicetopology.h>
#include <Endpointvolume.h>
#include <Mmdeviceapi.h>
#include <thread>
#include <vector>
#include "speaker_capturer.h" #include "speaker_capturer.h"
class SpeakerCapturerWasapi : public SpeakerCapturer { class SpeakerCapturerWasapi : public SpeakerCapturer {
@@ -31,31 +23,13 @@ class SpeakerCapturerWasapi : public SpeakerCapturer {
int Pause(); int Pause();
int Resume(); int Resume();
speaker_data_cb GetCallback();
private: private:
speaker_data_cb cb_ = nullptr; speaker_data_cb cb_ = nullptr;
private: private:
REFERENCE_TIME hnsActualDuration;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
BYTE *pData;
// std::vector<BYTE> pData_dst;
BYTE pData_dst[960];
DWORD flags;
// REFERENCE_TIME hnsRequestedDuration = 10000000;
IMMDeviceEnumerator *pEnumerator = NULL;
IMMDevice *pDevice = NULL;
IAudioClient *pAudioClient = NULL;
IAudioCaptureClient *pCaptureClient = NULL;
WAVEFORMATEX *pwfx = NULL;
UINT32 packetLength = 0;
UINT64 pos, ts;
FILE *fp;
bool inited_ = false; bool inited_ = false;
// thread
std::unique_ptr<std::thread> capture_thread_ = nullptr;
}; };
#endif #endif

View File

@@ -18,12 +18,16 @@ buffer in the callback will be null whereas the input buffer will be valid.
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
FILE* fp;
void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, void data_callback(ma_device* pDevice, void* pOutput, const void* pInput,
ma_uint32 frameCount) { ma_uint32 frameCount) {
ma_encoder* pEncoder = (ma_encoder*)pDevice->pUserData; // ma_encoder* pEncoder = (ma_encoder*)pDevice->pUserData;
MA_ASSERT(pEncoder != NULL); // MA_ASSERT(pEncoder != NULL);
ma_encoder_write_pcm_frames(pEncoder, pInput, frameCount, NULL); // ma_encoder_write_pcm_frames(pEncoder, pInput, frameCount, NULL);
fwrite(pInput, frameCount * ma_get_bytes_per_frame(ma_format_s16, 1), 1, fp);
(void)pOutput; (void)pOutput;
} }
@@ -35,32 +39,36 @@ int main(int argc, char** argv) {
ma_device_config deviceConfig; ma_device_config deviceConfig;
ma_device device; ma_device device;
fopen_s(&fp, "miniaudio.pcm", "wb");
/* Loopback mode is currently only supported on WASAPI. */ /* Loopback mode is currently only supported on WASAPI. */
ma_backend backends[] = {ma_backend_wasapi}; ma_backend backends[] = {ma_backend_wasapi};
if (argc < 2) { // if (argc < 2) {
printf("No output file.\n"); // printf("No output file.\n");
return -1; // return -1;
} // }
encoderConfig = // encoderConfig =
ma_encoder_config_init(ma_encoding_format_wav, ma_format_s16, 1, 48000); // ma_encoder_config_init(ma_encoding_format_wav, ma_format_s16, 1,
// 48000);
if (ma_encoder_init_file(argv[1], &encoderConfig, &encoder) != MA_SUCCESS) { // if (ma_encoder_init_file(argv[1], &encoderConfig, &encoder) != MA_SUCCESS)
printf("Failed to initialize output file.\n"); // {
return -1; // printf("Failed to initialize output file.\n");
} // return -1;
// }
deviceConfig = ma_device_config_init(ma_device_type_loopback); deviceConfig = ma_device_config_init(ma_device_type_loopback);
deviceConfig.capture.pDeviceID = deviceConfig.capture.pDeviceID =
NULL; /* Use default device for this example. Set this to the ID of a NULL; /* Use default device for this example. Set this to the ID of a
_playback_ device if you want to capture from a specific device. _playback_ device if you want to capture from a specific device.
*/ */
deviceConfig.capture.format = encoder.config.format; deviceConfig.capture.format = ma_format_s16;
deviceConfig.capture.channels = encoder.config.channels; deviceConfig.capture.channels = 1;
deviceConfig.sampleRate = encoder.config.sampleRate; deviceConfig.sampleRate = 48000;
deviceConfig.dataCallback = data_callback; deviceConfig.dataCallback = data_callback;
deviceConfig.pUserData = &encoder; deviceConfig.pUserData = nullptr;
result = ma_device_init_ex(backends, sizeof(backends) / sizeof(backends[0]), result = ma_device_init_ex(backends, sizeof(backends) / sizeof(backends[0]),
NULL, &deviceConfig, &device); NULL, &deviceConfig, &device);
@@ -79,8 +87,10 @@ int main(int argc, char** argv) {
printf("Press Enter to stop recording...\n"); printf("Press Enter to stop recording...\n");
getchar(); getchar();
fclose(fp);
ma_device_uninit(&device); ma_device_uninit(&device);
ma_encoder_uninit(&encoder); // ma_encoder_uninit(&encoder);
return 0; return 0;
} }

View File

@@ -3,7 +3,6 @@ set_license("LGPL-3.0")
set_version("0.0.1") set_version("0.0.1")
add_defines("RD_VERSION=\"0.0.1\""); add_defines("RD_VERSION=\"0.0.1\"");
add_defines("MINIAUDIO_IMPLEMENTATION")
add_rules("mode.release", "mode.debug") add_rules("mode.release", "mode.debug")
set_languages("c++17") set_languages("c++17")
@@ -20,6 +19,7 @@ end
add_requires("spdlog 1.14.1", {system = false}) add_requires("spdlog 1.14.1", {system = false})
add_requires("imgui v1.91.0", {configs = {sdl2 = true, sdl2_renderer = true}}) add_requires("imgui v1.91.0", {configs = {sdl2 = true, sdl2_renderer = true}})
add_requires("libyuv") add_requires("libyuv")
add_requires("miniaudio")
if is_os("windows") then if is_os("windows") then
add_links("Shell32", "windowsapp", "dwmapi", "User32", "kernel32", add_links("Shell32", "windowsapp", "dwmapi", "User32", "kernel32",
@@ -83,6 +83,7 @@ target("speaker_capturer")
add_deps("rd_log") add_deps("rd_log")
add_includedirs("src/speaker_capturer", {public = true}) add_includedirs("src/speaker_capturer", {public = true})
if is_os("windows") then if is_os("windows") then
add_packages("miniaudio")
add_files("src/speaker_capturer/windows/*.cpp") add_files("src/speaker_capturer/windows/*.cpp")
add_includedirs("src/speaker_capturer/windows", {public = true}) add_includedirs("src/speaker_capturer/windows", {public = true})
elseif is_os("macosx") then elseif is_os("macosx") then