[feat] support speaker capture on Windows

2025-12-17 20:47:01 +08:00 · 2024-08-15 11:04:06 +08:00
parent 574b9d10ab
commit 65927c2091
6 changed files with 93 additions and 199 deletions
--- a/src/speaker_capturer/windows/speaker_capturer_wasapi.cpp
+++ b/src/speaker_capturer/windows/speaker_capturer_wasapi.cpp
@@ -1,187 +1,94 @@
 #include "speaker_capturer_wasapi.h"

-#include <algorithm>
-#include <climits>
-#include <iostream>
+#include "rd_log.h"

-#define REFTIMES_PER_SEC 10000000
-#define REFTIMES_PER_MILLISEC 10000
+#define MINIAUDIO_IMPLEMENTATION
+#include "miniaudio.h"

 #define SAVE_AUDIO_FILE 0

-#define CHECK_HR(hres) \
-  if (FAILED(hres)) {  \
-    return -1;         \
+static ma_device_config device_config_;
+static ma_device device_;
+static ma_format format_ = ma_format_s16;
+static ma_uint32 sample_rate_ = ma_standard_sample_rate_48000;
+static ma_uint32 channels_ = 1;
+static FILE* fp_ = nullptr;
+
+void data_callback(ma_device* pDevice, void* pOutput, const void* pInput,
+                   ma_uint32 frameCount) {
+  SpeakerCapturerWasapi* ptr = (SpeakerCapturerWasapi*)pDevice->pUserData;
+  if (ptr) {
+    if (SAVE_AUDIO_FILE) {
+      fwrite(pInput, frameCount * ma_get_bytes_per_frame(format_, channels_), 1,
+             fp_);
+    }
+
+    ptr->GetCallback()((unsigned char*)pInput,
+                       frameCount * ma_get_bytes_per_frame(format_, channels_));
  }

-#define SAFE_RELEASE(punk) \
-  if ((punk) != nullptr) { \
-    (punk)->Release();     \
-    (punk) = nullptr;      \
-  }
+  (void)pOutput;
+}

-const CLSID CLSID_MMDeviceEnumerator = __uuidof(MMDeviceEnumerator);
-const IID IID_IMMDeviceEnumerator = __uuidof(IMMDeviceEnumerator);
-const IID IID_IAudioClient = __uuidof(IAudioClient);
-const IID IID_IAudioCaptureClient = __uuidof(IAudioCaptureClient);
+SpeakerCapturerWasapi::speaker_data_cb SpeakerCapturerWasapi::GetCallback() {
+  return cb_;
+}

 SpeakerCapturerWasapi::SpeakerCapturerWasapi() {}

 SpeakerCapturerWasapi::~SpeakerCapturerWasapi() {
-  if (inited_ && capture_thread_->joinable()) {
-    capture_thread_->join();
-    inited_ = false;
-  }
-
-  CoTaskMemFree(pwfx);
-  SAFE_RELEASE(pEnumerator)
-  SAFE_RELEASE(pDevice)
-  SAFE_RELEASE(pAudioClient)
-  SAFE_RELEASE(pCaptureClient)
-
  if (SAVE_AUDIO_FILE) {
-    fclose(fp);
+    fclose(fp_);
  }
-
-  // if (pData_dst) delete pData_dst;
-  // pData_dst = nullptr;
 }

 int SpeakerCapturerWasapi::Init(speaker_data_cb cb) {
+  if (inited_) {
+    return 0;
+  }
+
  cb_ = cb;

  if (SAVE_AUDIO_FILE) {
-    fopen_s(&fp, "system_audio.pcm", "wb");
+    fopen_s(&fp_, "system_audio.pcm", "wb");
  }

-  HRESULT hr;
+  ma_result result;
+  ma_backend backends[] = {ma_backend_wasapi};

-  hr = CoCreateInstance(CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL,
-                        IID_IMMDeviceEnumerator, (void **)&pEnumerator);
-  CHECK_HR(hr)
+  device_config_ = ma_device_config_init(ma_device_type_loopback);
+  device_config_.capture.pDeviceID = NULL;
+  device_config_.capture.format = format_;
+  device_config_.capture.channels = channels_;
+  device_config_.sampleRate = sample_rate_;
+  device_config_.dataCallback = data_callback;
+  device_config_.pUserData = this;

-  hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole,
-                                            &pDevice);  // 输出
-  CHECK_HR(hr)
-
-  hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr,
-                         (void **)&pAudioClient);
-  CHECK_HR(hr)
-
-  hr = pAudioClient->GetMixFormat(&pwfx);
-  CHECK_HR(hr)
-
-  // Change to 16bit
-  if (pwfx->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) {
-    pwfx->wFormatTag = WAVE_FORMAT_PCM;
-    pwfx->wBitsPerSample = 16;
-    pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
-    pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
-  } else if (pwfx->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
-    PWAVEFORMATEXTENSIBLE pEx = reinterpret_cast<PWAVEFORMATEXTENSIBLE>(pwfx);
-    if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, pEx->SubFormat)) {
-      pEx->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
-      pEx->Samples.wValidBitsPerSample = 16;
-      pwfx->wBitsPerSample = 16;
-      pwfx->nBlockAlign = pwfx->nChannels * pwfx->wBitsPerSample / 8;
-      pwfx->nAvgBytesPerSec = pwfx->nBlockAlign * pwfx->nSamplesPerSec;
-    }
+  result = ma_device_init_ex(backends, sizeof(backends) / sizeof(backends[0]),
+                             NULL, &device_config_, &device_);
+  if (result != MA_SUCCESS) {
+    LOG_ERROR("Failed to initialize loopback device");
+    return -1;
  }

-  hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED,
-                                AUDCLNT_STREAMFLAGS_LOOPBACK, 0, 0, pwfx,
-                                nullptr);
-  CHECK_HR(hr)
-
-  // Get the size of the allocated buffer.
-  hr = pAudioClient->GetBufferSize(&bufferFrameCount);
-  CHECK_HR(hr)
-
-  hr = pAudioClient->GetService(IID_IAudioCaptureClient,
-                                (void **)&pCaptureClient);
-  CHECK_HR(hr)
-
-  // Show audio info
-  {
-    printf("wFormatTag is %x\n", pwfx->wFormatTag);
-    printf("nChannels is %x\n", pwfx->nChannels);
-    printf("nSamplesPerSec is %d\n", pwfx->nSamplesPerSec);
-    printf("nAvgBytesPerSec is %d\n", pwfx->nAvgBytesPerSec);
-    printf("wBitsPerSample is %d\n", pwfx->wBitsPerSample);
-  }
-
-  hnsActualDuration =
-      (double)REFTIMES_PER_SEC * bufferFrameCount / pwfx->nSamplesPerSec;
-
-  // pData_dst = new BYTE[960];
-
  inited_ = true;

  return 0;
 }

 int SpeakerCapturerWasapi::Start() {
-  HRESULT hr;
-  hr = pAudioClient->Start();
-  CHECK_HR(hr)
-
-  capture_thread_.reset(new std::thread([this]() {
-    HRESULT hr;
-
-    // Each loop fills about half of the shared buffer.
-    while (1) {
-      // Sleep for half the buffer duration.
-      Sleep(hnsActualDuration / REFTIMES_PER_MILLISEC / 4);
-
-      hr = pCaptureClient->GetNextPacketSize(&packetLength);
-      CHECK_HR(hr)
-
-      while (packetLength != 0) {
-        // Get the available data in the shared buffer.
-        hr = pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags,
-                                       nullptr, &ts);
-        CHECK_HR(hr)
-
-        // flags equals to 2 means silence, set data to nullptr
-        if (flags == AUDCLNT_BUFFERFLAGS_SILENT) {
-          pData = nullptr;
-        }
-
-        if (pData != nullptr) {
-          size_t size = numFramesAvailable * pwfx->nBlockAlign;
-
-          for (int i = 0; i < size / 2; i++) {
-            BYTE left = pData[i * 2];
-            BYTE right = pData[i * 2 + 1];
-            // Right channel only?
-            BYTE monoSample = right;
-
-            pData_dst[i] = static_cast<BYTE>(monoSample);
-          }
-
-          cb_(pData_dst, size / 2);
-
-          if (SAVE_AUDIO_FILE) {
-            fwrite(pData_dst, size / 2, 1, fp);
-          }
-        }
-
-        hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
-        CHECK_HR(hr)
-
-        hr = pCaptureClient->GetNextPacketSize(&packetLength);
-        CHECK_HR(hr)
-      }
-    }
-  }));
+  ma_result result = ma_device_start(&device_);
+  if (result != MA_SUCCESS) {
+    ma_device_uninit(&device_);
+    LOG_ERROR("Failed to start device");
+    return -1;
+  }

  return 0;
 }

 int SpeakerCapturerWasapi::Stop() {
-  HRESULT hr;
-  hr = pAudioClient->Stop();
-  CHECK_HR(hr)
+  ma_device_uninit(&device_);
  return 0;
 }

--- a/src/speaker_capturer/windows/speaker_capturer_wasapi.h
+++ b/src/speaker_capturer/windows/speaker_capturer_wasapi.h
@@ -1,20 +1,12 @@
 /*
 * @Author: DI JUNKUN
- * @Date: 2024-07-22
+ * @Date: 2024-08-15
 * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
 */

 #ifndef _SPEAKER_CAPTURER_WASAPI_H_
 #define _SPEAKER_CAPTURER_WASAPI_H_

-#include <Audioclient.h>
-#include <Devicetopology.h>
-#include <Endpointvolume.h>
-#include <Mmdeviceapi.h>
-
-#include <thread>
-#include <vector>
-
 #include "speaker_capturer.h"

 class SpeakerCapturerWasapi : public SpeakerCapturer {
@@ -31,31 +23,13 @@ class SpeakerCapturerWasapi : public SpeakerCapturer {
  int Pause();
  int Resume();

+  speaker_data_cb GetCallback();
+
 private:
  speaker_data_cb cb_ = nullptr;

 private:
-  REFERENCE_TIME hnsActualDuration;
-  UINT32 bufferFrameCount;
-  UINT32 numFramesAvailable;
-  BYTE *pData;
-  // std::vector<BYTE> pData_dst;
-  BYTE pData_dst[960];
-  DWORD flags;
-
-  // REFERENCE_TIME hnsRequestedDuration = 10000000;
-  IMMDeviceEnumerator *pEnumerator = NULL;
-  IMMDevice *pDevice = NULL;
-  IAudioClient *pAudioClient = NULL;
-  IAudioCaptureClient *pCaptureClient = NULL;
-  WAVEFORMATEX *pwfx = NULL;
-  UINT32 packetLength = 0;
-  UINT64 pos, ts;
-  FILE *fp;
-
  bool inited_ = false;
-  // thread
-  std::unique_ptr<std::thread> capture_thread_ = nullptr;
 };

 #endif