Enable audio transmission

2025-12-17 12:42:51 +08:00 · 2023-11-29 22:04:53 -08:00
parent 733434f9b3
commit 4a65a59803
12 changed files with 1520 additions and 271 deletions
--- a/test/audio_capture/audio_capture.cpp
+++ b/test/audio_capture/audio_capture.cpp
@@ -1,83 +0,0 @@
-#include <stdio.h>
-
-#ifdef _WIN32
-// Windows
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavdevice/avdevice.h>
-#include <libavformat/avformat.h>
-#include <libavutil/imgutils.h>
-#include <libswscale/swscale.h>
-};
-#else
-// Linux...
-#ifdef __cplusplus
-extern "C" {
-#endif
-#include <libavcodec/avcodec.h>
-#include <libavdevice/avdevice.h>
-#include <libavformat/avformat.h>
-#include <libavutil/imgutils.h>
-#include <libswscale/swscale.h>
-#ifdef __cplusplus
-};
-#endif
-#endif
-
-int main(int argc, char **argv) {
-  int ret = 0;
-  char errors[1024] = {0};
-  // context
-  AVFormatContext *fmt_ctx = NULL;  // ffmpeg<65>µġ<C2B5><C4A1>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
-
-  // paket
-  int count = 0;
-  AVPacket pkt;
-
-  // create file
-  char *out = "audio_old.pcm";
-  FILE *outfile = fopen(out, "wb+");
-
-  char *devicename = "default";
-  // register audio device
-  avdevice_register_all();
-
-  // get format
-  AVInputFormat *iformat = (AVInputFormat *)av_find_input_format("sndio");
-
-  // open audio
-  if ((ret = avformat_open_input(&fmt_ctx, devicename, iformat, NULL)) < 0) {
-    av_strerror(ret, errors, 1024);
-    printf("Failed to open audio device, [%d]%s\n", ret, errors);
-    return -1;
-  }
-
-  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD>Ϣ
-  if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
-    printf("111\n");
-    return -1;
-  }
-
-  // Ѱ<>ҵ<EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
-  int audioStreamIndex =
-      av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
-  if (audioStreamIndex < 0) {
-    printf("222\n");
-    return -1;
-  }
-
-  av_init_packet(&pkt);
-  // read data form audio
-  while (ret = (av_read_frame(fmt_ctx, &pkt)) == 0 && count++ < 10000) {
-    av_log(NULL, AV_LOG_INFO, "pkt size is %d(%p), count=%d\n", pkt.size,
-           pkt.data, count);
-    fwrite(pkt.data, 1, pkt.size, outfile);
-    fflush(outfile);
-    av_packet_unref(&pkt);  // release pkt
-  }
-
-  fclose(outfile);
-  avformat_close_input(&fmt_ctx);  // releas ctx
-
-  return 0;
-}
--- a/test/audio_capture/ffmpeg_audio.cpp
+++ b/test/audio_capture/ffmpeg_audio.cpp
@@ -0,0 +1,232 @@
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavdevice/avdevice.h>
+#include <libavfilter/avfilter.h>
+#include <libavformat/avformat.h>
+#include <libavutil/channel_layout.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/opt.h>
+#include <libavutil/samplefmt.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+};
+
+static int get_format_from_sample_fmt(const char **fmt,
+                                      enum AVSampleFormat sample_fmt) {
+  int i;
+  struct sample_fmt_entry {
+    enum AVSampleFormat sample_fmt;
+    const char *fmt_be, *fmt_le;
+  } sample_fmt_entries[] = {
+      {AV_SAMPLE_FMT_U8, "u8", "u8"},
+      {AV_SAMPLE_FMT_S16, "s16be", "s16le"},
+      {AV_SAMPLE_FMT_S32, "s32be", "s32le"},
+      {AV_SAMPLE_FMT_FLT, "f32be", "f32le"},
+      {AV_SAMPLE_FMT_DBL, "f64be", "f64le"},
+  };
+  *fmt = NULL;
+
+  for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
+    struct sample_fmt_entry *entry = &sample_fmt_entries[i];
+    if (sample_fmt == entry->sample_fmt) {
+      *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
+      return 0;
+    }
+  }
+
+  fprintf(stderr, "Sample format %s not supported as output format\n",
+          av_get_sample_fmt_name(sample_fmt));
+  return AVERROR(EINVAL);
+}
+
+/**
+ * Fill dst buffer with nb_samples, generated starting from t. <20><><EFBFBD><EFBFBD>ģʽ<C4A3><CABD>
+ */
+static void fill_samples(double *dst, int nb_samples, int nb_channels,
+                         int sample_rate, double *t) {
+  int i, j;
+  double tincr = 1.0 / sample_rate, *dstp = dst;
+  const double c = 2 * M_PI * 440.0;
+
+  /* generate sin tone with 440Hz frequency and duplicated channels */
+  for (i = 0; i < nb_samples; i++) {
+    *dstp = sin(c * *t);
+    for (j = 1; j < nb_channels; j++) dstp[j] = dstp[0];
+    dstp += nb_channels;
+    *t += tincr;
+  }
+}
+
+int main(int argc, char **argv) {
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  int64_t src_ch_layout = AV_CH_LAYOUT_MONO;
+  int src_rate = 44100;
+  enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL;
+  int src_nb_channels = 0;
+  uint8_t **src_data = NULL;  // <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
+  int src_linesize;
+  int src_nb_samples = 1024;
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
+  int dst_rate = 48000;
+  enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
+  int dst_nb_channels = 0;
+  uint8_t **dst_data = NULL;  // <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
+  int dst_linesize;
+  int dst_nb_samples;
+  int max_dst_nb_samples;
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
+  const char *dst_filename = NULL;  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>pcm<63><6D><EFBFBD><EFBFBD><EFBFBD>أ<EFBFBD>Ȼ<EFBFBD>󲥷<EFBFBD><F3B2A5B7><EFBFBD>֤
+  FILE *dst_file;
+
+  int dst_bufsize;
+  const char *fmt;
+
+  // <20>ز<EFBFBD><D8B2><EFBFBD>ʵ<EFBFBD><CAB5>
+  struct SwrContext *swr_ctx;
+
+  double t;
+  int ret;
+
+  dst_filename = "res.pcm";
+
+  dst_file = fopen(dst_filename, "wb");
+  if (!dst_file) {
+    fprintf(stderr, "Could not open destination file %s\n", dst_filename);
+    exit(1);
+  }
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>
+  /* create resampler context */
+  swr_ctx = swr_alloc();
+  if (!swr_ctx) {
+    fprintf(stderr, "Could not allocate resampler context\n");
+    ret = AVERROR(ENOMEM);
+    goto end;
+  }
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  /* set options */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
+  av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
+  av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
+
+  // <20><>ʼ<EFBFBD><CABC><EFBFBD>ز<EFBFBD><D8B2><EFBFBD>
+  /* initialize the resampling context */
+  if ((ret = swr_init(swr_ctx)) < 0) {
+    fprintf(stderr, "Failed to initialize the resampling context\n");
+    goto end;
+  }
+
+  /* allocate source and destination samples buffers */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ͨ<EFBFBD><CDA8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ռ<EFBFBD>
+  ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize,
+                                           src_nb_channels, src_nb_samples,
+                                           src_sample_fmt, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Could not allocate source samples\n");
+    goto end;
+  }
+
+  /* compute the number of converted samples: buffering is avoided
+   * ensuring that the output buffer will contain at least all the
+   * converted input samples */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  max_dst_nb_samples = dst_nb_samples =
+      av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
+
+  /* buffer is going to be directly written to a rawaudio file, no alignment
+   */
+  dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD>
+  ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize,
+                                           dst_nb_channels, dst_nb_samples,
+                                           dst_sample_fmt, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Could not allocate destination samples\n");
+    goto end;
+  }
+
+  t = 0;
+  do {
+    /* generate synthetic audio */
+    // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ
+    fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels,
+                 src_rate, &t);
+
+    /* compute destination number of samples */
+    int64_t delay = swr_get_delay(swr_ctx, src_rate);
+    dst_nb_samples =
+        av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
+    if (dst_nb_samples > max_dst_nb_samples) {
+      av_freep(&dst_data[0]);
+      ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
+                             dst_nb_samples, dst_sample_fmt, 1);
+      if (ret < 0) break;
+      max_dst_nb_samples = dst_nb_samples;
+    }
+    //        int fifo_size = swr_get_out_samples(swr_ctx,src_nb_samples);
+    //        printf("fifo_size:%d\n", fifo_size);
+    //        if(fifo_size < 1024)
+    //            continue;
+
+    /* convert to destination format */
+    //        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const
+    //        uint8_t **)src_data, src_nb_samples);
+    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples,
+                      (const uint8_t **)src_data, src_nb_samples);
+    if (ret < 0) {
+      fprintf(stderr, "Error while converting\n");
+      goto end;
+    }
+    dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
+                                             ret, dst_sample_fmt, 1);
+    if (dst_bufsize < 0) {
+      fprintf(stderr, "Could not get sample buffer size\n");
+      goto end;
+    }
+    printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
+    fwrite(dst_data[0], 1, dst_bufsize, dst_file);
+  } while (t < 10);
+
+  ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Error while converting\n");
+    goto end;
+  }
+  dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret,
+                                           dst_sample_fmt, 1);
+  if (dst_bufsize < 0) {
+    fprintf(stderr, "Could not get sample buffer size\n");
+    goto end;
+  }
+  printf("flush in:%d out:%d\n", 0, ret);
+  fwrite(dst_data[0], 1, dst_bufsize, dst_file);
+
+  if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0) goto end;
+  fprintf(stderr,
+          "Resampling succeeded. Play the output file with the command:\n"
+          "ffplay -f %s -channel_layout %" PRId64 " -channels %d -ar %d %s\n",
+          fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);
+
+end:
+  fclose(dst_file);
+
+  if (src_data) av_freep(&src_data[0]);
+  av_freep(&src_data);
+
+  if (dst_data) av_freep(&dst_data[0]);
+  av_freep(&dst_data);
+
+  swr_free(&swr_ctx);
+  return ret < 0;
+}
--- a/test/audio_capture/output_audio.cpp
+++ b/test/audio_capture/output_audio.cpp
@@ -0,0 +1,53 @@
+#include <SDL2/SDL.h>
+
+int main(int argc, char *argv[]) {
+  int ret;
+  SDL_AudioSpec wanted_spec, obtained_spec;
+
+  // Initialize SDL
+  if (SDL_Init(SDL_INIT_AUDIO) < 0) {
+    SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Failed to initialize SDL: %s",
+                 SDL_GetError());
+    return -1;
+  }
+
+  // Set audio format
+  wanted_spec.freq = 44100;  // Sample rate
+  wanted_spec.format =
+      AUDIO_F32SYS;          // Sample format (32-bit float, system byte order)
+  wanted_spec.channels = 2;  // Number of channels (stereo)
+  wanted_spec.samples = 1024;   // Buffer size (in samples)
+  wanted_spec.callback = NULL;  // Audio callback function (not used here)
+
+  // Open audio device
+  ret = SDL_OpenAudio(&wanted_spec, &obtained_spec);
+  if (ret < 0) {
+    SDL_LogError(SDL_LOG_CATEGORY_APPLICATION,
+                 "Failed to open audio device: %s", SDL_GetError());
+    return -1;
+  }
+
+  // Start playing audio
+  SDL_PauseAudio(0);
+
+  // Write PCM data to audio buffer
+  float *pcm_data = ...;    // PCM data buffer (float, interleaved)
+  int pcm_data_size = ...;  // Size of PCM data buffer (in bytes)
+  int bytes_written = SDL_QueueAudio(0, pcm_data, pcm_data_size);
+
+  // Wait until audio buffer is empty
+  while (SDL_GetQueuedAudioSize(0) > 0) {
+    SDL_Delay(100);
+  }
+
+  // Stop playing audio
+  SDL_PauseAudio(1);
+
+  // Close audio device
+  SDL_CloseAudio();
+
+  // Quit SDL
+  SDL_Quit();
+
+  return 0;
+}
--- a/test/audio_capture/play_audio.cpp
+++ b/test/audio_capture/play_audio.cpp
@@ -0,0 +1,89 @@
+#include <SDL2/SDL.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char* argv[]) {
+  if (SDL_Init(SDL_INIT_AUDIO)) {
+    printf("SDL init error\n");
+    return -1;
+  }
+
+  // SDL_AudioSpec
+  SDL_AudioSpec wanted_spec;
+  SDL_zero(wanted_spec);
+  wanted_spec.freq = 48000;
+  wanted_spec.format = AUDIO_S16LSB;
+  wanted_spec.channels = 2;
+  wanted_spec.silence = 0;
+  wanted_spec.samples = 960;
+  wanted_spec.callback = NULL;
+
+  SDL_AudioDeviceID deviceID = 0;
+  // <20><><EFBFBD><EFBFBD><EFBFBD>豸
+  if ((deviceID = SDL_OpenAudioDevice(NULL, 0, &wanted_spec, NULL,
+                                      SDL_AUDIO_ALLOW_FREQUENCY_CHANGE)) < 2) {
+    printf("could not open audio device: %s\n", SDL_GetError());
+
+    // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><D0B5><EFBFBD>ϵͳ
+    SDL_Quit();
+    return 0;
+  }
+
+  SDL_PauseAudioDevice(deviceID, 0);
+
+  FILE* fp = nullptr;
+
+  fopen_s(&fp, "ls.pcm", "rb+");
+  if (fp == NULL) {
+    printf("cannot open this file\n");
+    return -1;
+  }
+
+  if (fp == NULL) {
+    printf("error \n");
+  }
+  Uint32 buffer_size = 4096;
+  char* buffer = (char*)malloc(buffer_size);
+
+  while (true) {
+    if (fread(buffer, 1, buffer_size, fp) != buffer_size) {
+      printf("end of file\n");
+      break;
+    }
+    SDL_QueueAudio(deviceID, buffer, buffer_size);
+  }
+
+  printf("Play...\n");
+
+  SDL_Delay(10000);
+
+  // Uint32 residueAudioLen = 0;
+
+  // while (true) {
+  //   residueAudioLen = SDL_GetQueuedAudioSize(deviceID);
+  //   printf("%10d\n", residueAudioLen);
+  //   if (residueAudioLen <= 0) break;
+  //   SDL_Delay(1);
+  // }
+
+  // while (true) {
+  //   printf("1 <20><>ͣ 2 <20><><EFBFBD><EFBFBD>  3 <20>˳<EFBFBD> \n");
+  //   int flag = 0;
+
+  //   scanf_s("%d", &flag);
+
+  //   if (flag == 1)
+  //     SDL_PauseAudioDevice(deviceID, 1);
+  //   else if (flag == 2)
+  //     SDL_PauseAudioDevice(deviceID, 0);
+  //   else if (flag == 3)
+  //     break;
+  // }
+
+  SDL_CloseAudio();
+  SDL_Quit();
+  fclose(fp);
+
+  return 0;
+}
--- a/test/audio_capture/play_loopback.cpp
+++ b/test/audio_capture/play_loopback.cpp
@@ -0,0 +1,225 @@
+#include <SDL2/SDL.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavdevice/avdevice.h>
+#include <libavfilter/avfilter.h>
+#include <libavformat/avformat.h>
+#include <libavutil/channel_layout.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/opt.h>
+#include <libavutil/samplefmt.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+};
+
+static SDL_AudioDeviceID input_dev;
+static SDL_AudioDeviceID output_dev;
+
+static Uint8 *buffer = 0;
+static int in_pos = 0;
+static int out_pos = 0;
+
+int64_t src_ch_layout = AV_CH_LAYOUT_MONO;
+int src_rate = 48000;
+enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_S16;
+int src_nb_channels = 0;
+uint8_t **src_data = NULL;  // <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
+int src_linesize;
+int src_nb_samples = 480;
+
+// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+int64_t dst_ch_layout = AV_CH_LAYOUT_MONO;
+int dst_rate = 48000;
+enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
+int dst_nb_channels = 0;
+uint8_t **dst_data = NULL;  // <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
+int dst_linesize;
+int dst_nb_samples;
+int max_dst_nb_samples;
+static unsigned char audio_buffer[960 * 3];
+static int audio_len = 0;
+
+// <20><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
+const char *dst_filename = NULL;  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>pcm<63><6D><EFBFBD><EFBFBD><EFBFBD>أ<EFBFBD>Ȼ<EFBFBD>󲥷<EFBFBD><F3B2A5B7><EFBFBD>֤
+FILE *dst_file;
+
+int dst_bufsize;
+const char *fmt;
+
+// <20>ز<EFBFBD><D8B2><EFBFBD>ʵ<EFBFBD><CAB5>
+struct SwrContext *swr_ctx;
+
+double t;
+int ret;
+
+char *out = "audio_old.pcm";
+FILE *outfile = fopen(out, "wb+");
+
+void cb_in(void *userdata, Uint8 *stream, int len) {
+  // If len < 4, the printf below will probably segfault
+  // SDL_QueueAudio(output_dev, stream, len);
+
+  int64_t delay = swr_get_delay(swr_ctx, src_rate);
+  dst_nb_samples =
+      av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
+  if (dst_nb_samples > max_dst_nb_samples) {
+    av_freep(&dst_data[0]);
+    ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
+                           dst_nb_samples, dst_sample_fmt, 1);
+    if (ret < 0) return;
+    max_dst_nb_samples = dst_nb_samples;
+  }
+
+  ret = swr_convert(swr_ctx, dst_data, dst_nb_samples,
+                    (const uint8_t **)&stream, src_nb_samples);
+  if (ret < 0) {
+    fprintf(stderr, "Error while converting\n");
+    return;
+  }
+  dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels, ret,
+                                           dst_sample_fmt, 1);
+  if (dst_bufsize < 0) {
+    fprintf(stderr, "Could not get sample buffer size\n");
+    return;
+  }
+  printf("t:%f in:%d out:%d %d\n", t, src_nb_samples, ret, len);
+
+  memcpy(audio_buffer, dst_data[0], len);
+  // SDL_QueueAudio(output_dev, dst_data[0], len);
+  audio_len = len;
+}
+
+void cb_out(void *userdata, Uint8 *stream, int len) {
+  // If len < 4, the printf below will probably segfault
+  printf("cb_out len = %d\n", len);
+  SDL_memset(stream, 0, len);
+  if (audio_len == 0) return;
+  len = (len > audio_len ? audio_len : len);
+  SDL_MixAudioFormat(stream, audio_buffer, AUDIO_S16LSB, len,
+                     SDL_MIX_MAXVOLUME);
+}
+
+int init() {
+  dst_filename = "res.pcm";
+
+  dst_file = fopen(dst_filename, "wb");
+  if (!dst_file) {
+    fprintf(stderr, "Could not open destination file %s\n", dst_filename);
+    exit(1);
+  }
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>
+  /* create resampler context */
+  swr_ctx = swr_alloc();
+  if (!swr_ctx) {
+    fprintf(stderr, "Could not allocate resampler context\n");
+    ret = AVERROR(ENOMEM);
+    return -1;
+  }
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  /* set options */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
+  av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
+  av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
+
+  // <20><>ʼ<EFBFBD><CABC><EFBFBD>ز<EFBFBD><D8B2><EFBFBD>
+  /* initialize the resampling context */
+  if ((ret = swr_init(swr_ctx)) < 0) {
+    fprintf(stderr, "Failed to initialize the resampling context\n");
+    return -1;
+  }
+
+  /* allocate source and destination samples buffers */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ͨ<EFBFBD><CDA8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ռ<EFBFBD>
+  ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize,
+                                           src_nb_channels, src_nb_samples,
+                                           src_sample_fmt, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Could not allocate source samples\n");
+    return -1;
+  }
+
+  /* compute the number of converted samples: buffering is avoided
+   * ensuring that the output buffer will contain at least all the
+   * converted input samples */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  max_dst_nb_samples = dst_nb_samples =
+      av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
+
+  /* buffer is going to be directly written to a rawaudio file, no alignment */
+  dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD>
+  ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize,
+                                           dst_nb_channels, dst_nb_samples,
+                                           dst_sample_fmt, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Could not allocate destination samples\n");
+    return -1;
+  }
+}
+
+int main() {
+  init();
+
+  SDL_Init(SDL_INIT_AUDIO);
+
+  // 16Mb should be enough; the test lasts 5 seconds
+  buffer = (Uint8 *)malloc(16777215);
+
+  SDL_AudioSpec want_in, want_out, have_in, have_out;
+
+  SDL_zero(want_in);
+  want_in.freq = 48000;
+  want_in.format = AUDIO_S16LSB;
+  want_in.channels = 1;
+  want_in.samples = 480;
+  want_in.callback = cb_in;
+
+  input_dev = SDL_OpenAudioDevice(NULL, 1, &want_in, &have_in, 0);
+
+  printf("%d %d %d %d\n", have_in.freq, have_in.format, have_in.channels,
+         have_in.samples);
+  if (input_dev == 0) {
+    SDL_Log("Failed to open input: %s", SDL_GetError());
+    return 1;
+  }
+
+  SDL_zero(want_out);
+  want_out.freq = 48000;
+  want_out.format = AUDIO_S16LSB;
+  want_out.channels = 1;
+  want_out.samples = 480;
+  want_out.callback = cb_out;
+
+  output_dev = SDL_OpenAudioDevice(NULL, 0, &want_out, &have_out, 0);
+
+  printf("%d %d %d %d\n", have_out.freq, have_out.format, have_out.channels,
+         have_out.samples);
+  if (output_dev == 0) {
+    SDL_Log("Failed to open input: %s", SDL_GetError());
+    return 1;
+  }
+
+  SDL_PauseAudioDevice(input_dev, 0);
+  SDL_PauseAudioDevice(output_dev, 0);
+
+  while (1) {
+  }
+
+  SDL_CloseAudioDevice(output_dev);
+  SDL_CloseAudioDevice(input_dev);
+  free(buffer);
+
+  fclose(outfile);
+}
--- a/test/audio_capture/resample.cpp
+++ b/test/audio_capture/resample.cpp
@@ -0,0 +1,95 @@
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libswresample/swresample.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[]) {
+  int ret;
+  AVFrame *frame = NULL;
+  AVFrame *resampled_frame = NULL;
+  AVCodecContext *codec_ctx = NULL;
+  SwrContext *swr_ctx = NULL;
+
+  // Initialize FFmpeg
+  av_log_set_level(AV_LOG_INFO);
+  av_register_all();
+
+  // Allocate input frame
+  frame = av_frame_alloc();
+  if (!frame) {
+    av_log(NULL, AV_LOG_ERROR, "Failed to allocate input frame\n");
+    return -1;
+  }
+
+  // Allocate output frame for resampled data
+  resampled_frame = av_frame_alloc();
+  if (!resampled_frame) {
+    av_log(NULL, AV_LOG_ERROR, "Failed to allocate output frame\n");
+    return -1;
+  }
+
+  // Set input frame properties
+  frame->format = AV_SAMPLE_FMT_FLTP;  // Input sample format (float planar)
+  frame->channel_layout = AV_CH_LAYOUT_STEREO;  // Input channel layout (stereo)
+  frame->sample_rate = 44100;                   // Input sample rate (44100 Hz)
+  frame->nb_samples = 1024;                     // Number of input samples
+
+  // Set output frame properties
+  resampled_frame->format =
+      AV_SAMPLE_FMT_S16;  // Output sample format (signed 16-bit)
+  resampled_frame->channel_layout =
+      AV_CH_LAYOUT_STEREO;               // Output channel layout (stereo)
+  resampled_frame->sample_rate = 48000;  // Output sample rate (48000 Hz)
+  resampled_frame->nb_samples = av_rescale_rnd(
+      frame->nb_samples, resampled_frame->sample_rate, frame->sample_rate,
+      AV_ROUND_UP);  // Number of output samples
+
+  // Initialize resampler context
+  swr_ctx = swr_alloc_set_opts(
+      NULL, av_get_default_channel_layout(resampled_frame->channel_layout),
+      av_get_default_sample_fmt(resampled_frame->format),
+      resampled_frame->sample_rate,
+      av_get_default_channel_layout(frame->channel_layout),
+      av_get_default_sample_fmt(frame->format), frame->sample_rate, 0, NULL);
+  if (!swr_ctx) {
+    av_log(NULL, AV_LOG_ERROR, "Failed to allocate resampler context\n");
+    return -1;
+  }
+
+  // Initialize and configure the resampler
+  if ((ret = swr_init(swr_ctx)) < 0) {
+    av_log(NULL, AV_LOG_ERROR, "Failed to initialize resampler context: %s\n",
+           av_err2str(ret));
+    return -1;
+  }
+
+  // Allocate buffer for output samples
+  ret = av_samples_alloc(resampled_frame->data, resampled_frame->linesize,
+                         resampled_frame->channels, resampled_frame->nb_samples,
+                         resampled_frame->format, 0);
+  if (ret < 0) {
+    av_log(NULL, AV_LOG_ERROR, "Failed to allocate output samples buffer: %s\n",
+           av_err2str(ret));
+    return -1;
+  }
+
+  // Resample the input data
+  ret = swr_convert(swr_ctx, resampled_frame->data, resampled_frame->nb_samples,
+                    (const uint8_t **)frame->data, frame->nb_samples);
+  if (ret < 0) {
+    av_log(NULL, AV_LOG_ERROR, "Failed to resample input data: %s\n",
+           av_err2str(ret));
+    return -1;
+  }
+
+  // Cleanup and free resources
+  swr_free(&swr_ctx);
+  av_frame_free(&frame);
+  av_frame_free(&resampled_frame);
+
+  return 0;
+}
--- a/test/audio_capture/sdl2_audio_capture.cpp
+++ b/test/audio_capture/sdl2_audio_capture.cpp
@@ -2,6 +2,19 @@
 #include <stdio.h>
 #include <stdlib.h>

+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavdevice/avdevice.h>
+#include <libavfilter/avfilter.h>
+#include <libavformat/avformat.h>
+#include <libavutil/channel_layout.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/opt.h>
+#include <libavutil/samplefmt.h>
+#include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
+};
+
 static SDL_AudioDeviceID input_dev;
 static SDL_AudioDeviceID output_dev;

@@ -9,54 +22,152 @@ static Uint8 *buffer = 0;
 static int in_pos = 0;
 static int out_pos = 0;

+int64_t src_ch_layout = AV_CH_LAYOUT_MONO;
+int src_rate = 48000;
+enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_FLT;
+int src_nb_channels = 0;
+uint8_t **src_data = NULL;  // <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
+int src_linesize;
+int src_nb_samples = 480;
+
+// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
+int dst_rate = 48000;
+enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
+int dst_nb_channels = 0;
+uint8_t **dst_data = NULL;  // <20><><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8>
+int dst_linesize;
+int dst_nb_samples;
+int max_dst_nb_samples;
+
+// <20><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD>
+const char *dst_filename = NULL;  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>pcm<63><6D><EFBFBD><EFBFBD><EFBFBD>أ<EFBFBD>Ȼ<EFBFBD>󲥷<EFBFBD><F3B2A5B7><EFBFBD>֤
+FILE *dst_file;
+
+int dst_bufsize;
+const char *fmt;
+
+// <20>ز<EFBFBD><D8B2><EFBFBD>ʵ<EFBFBD><CAB5>
+struct SwrContext *swr_ctx;
+
+double t;
+int ret;
+
 char *out = "audio_old.pcm";
 FILE *outfile = fopen(out, "wb+");

 void cb_in(void *userdata, Uint8 *stream, int len) {
  // If len < 4, the printf below will probably segfault
+  {
+    fwrite(stream, 1, len, outfile);
+    fflush(outfile);
+  }
+  {
+    int64_t delay = swr_get_delay(swr_ctx, src_rate);
+    dst_nb_samples =
+        av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
+    if (dst_nb_samples > max_dst_nb_samples) {
+      av_freep(&dst_data[0]);
+      ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
+                             dst_nb_samples, dst_sample_fmt, 1);
+      if (ret < 0) return;
+      max_dst_nb_samples = dst_nb_samples;
+    }

-  // fwrite(stream, 1, len, outfile);
-  // fflush(outfile);
-
-  // SDL_memcpy(stream, buffer + in_pos, len);
-  // in_pos += len;
-  // printf("IN:  %d\t%d %d %d %d\n", in_pos, stream[0], stream[1], stream[2],
-  //        stream[3]);
+    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples,
+                      (const uint8_t **)&stream, src_nb_samples);
+    if (ret < 0) {
+      fprintf(stderr, "Error while converting\n");
+      return;
+    }
+    dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
+                                             ret, dst_sample_fmt, 1);
+    if (dst_bufsize < 0) {
+      fprintf(stderr, "Could not get sample buffer size\n");
+      return;
+    }
+    printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
+    fwrite(dst_data[0], 1, dst_bufsize, dst_file);
+  }
 }

 void cb_out(void *userdata, Uint8 *stream, int len) {
  // If len < 4, the printf below will probably segfault
-  fwrite(stream, 1, len, outfile);
-  fflush(outfile);

-  // if (out_pos >= in_pos) {
-  //   // Output is way ahead of input; fill with emptiness
-  //   memset(buffer + out_pos, 0, len * sizeof(Uint8));
-  //   printf("OUT: %d\t(Empty)\n", out_pos);
-  // } else if (out_pos + len > in_pos) {
-  //   // Output is reaching input; read until reaching input, and leave the
-  //   rest
-  //   // empty
-  //   memset(buffer + out_pos, 0, len * sizeof(Uint8));
-  //   SDL_memcpy(buffer + out_pos, stream, in_pos - out_pos);
-  //   out_pos = in_pos;
-  //   printf("OUT: %d\t%d %d %d %d (Partial)\n", out_pos, stream[0], stream[1],
-  //          stream[2], stream[3]);
-  // } else {
-  //   // Input is way ahead of output; read as much as requested
-  //   SDL_memcpy(buffer + out_pos, stream, len);
-  //   out_pos += len;
-  //   printf("OUT: %d\t%d %d %d %d\n", out_pos, stream[0], stream[1],
-  //   stream[2],
-  //          stream[3]);
-  // }
+  SDL_memcpy(buffer + out_pos, stream, len);
+  out_pos += len;
+}

-  // This is to make sure the output device works
-  // for (int i = 0; i < len; i++)
-  //    stream[i] = (Uint8) random();
+int init() {
+  dst_filename = "res.pcm";
+
+  dst_file = fopen(dst_filename, "wb");
+  if (!dst_file) {
+    fprintf(stderr, "Could not open destination file %s\n", dst_filename);
+    exit(1);
+  }
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD>
+  /* create resampler context */
+  swr_ctx = swr_alloc();
+  if (!swr_ctx) {
+    fprintf(stderr, "Could not allocate resampler context\n");
+    ret = AVERROR(ENOMEM);
+    return -1;
+  }
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  /* set options */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
+  av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "out_channel_layout", dst_ch_layout, 0);
+  av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);
+
+  // <20><>ʼ<EFBFBD><CABC><EFBFBD>ز<EFBFBD><D8B2><EFBFBD>
+  /* initialize the resampling context */
+  if ((ret = swr_init(swr_ctx)) < 0) {
+    fprintf(stderr, "Failed to initialize the resampling context\n");
+    return -1;
+  }
+
+  /* allocate source and destination samples buffers */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ͨ<EFBFBD><CDA8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD><DAB4>ռ<EFBFBD>
+  ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize,
+                                           src_nb_channels, src_nb_samples,
+                                           src_sample_fmt, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Could not allocate source samples\n");
+    return -1;
+  }
+
+  /* compute the number of converted samples: buffering is avoided
+   * ensuring that the output buffer will contain at least all the
+   * converted input samples */
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  max_dst_nb_samples = dst_nb_samples =
+      av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
+
+  /* buffer is going to be directly written to a rawaudio file, no alignment */
+  dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD>
+  ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize,
+                                           dst_nb_channels, dst_nb_samples,
+                                           dst_sample_fmt, 0);
+  if (ret < 0) {
+    fprintf(stderr, "Could not allocate destination samples\n");
+    return -1;
+  }
 }

 int main() {
+  init();
+
  SDL_Init(SDL_INIT_AUDIO);

  // 16Mb should be enough; the test lasts 5 seconds
@@ -64,29 +175,18 @@ int main() {

  SDL_AudioSpec want_in, want_out, have_in, have_out;

-  SDL_zero(want_out);
-  want_out.freq = 48000;
-  want_out.format = AUDIO_U16LSB;
-  want_out.channels = 2;
-  want_out.samples = 960;
-  want_out.callback = cb_out;
-
-  output_dev = SDL_OpenAudioDevice(NULL, 0, &want_out, &have_out,
-                                   SDL_AUDIO_ALLOW_ANY_CHANGE);
-  if (output_dev == 0) {
-    SDL_Log("Failed to open output: %s", SDL_GetError());
-    return 1;
-  }
-
  SDL_zero(want_in);
  want_in.freq = 48000;
-  want_in.format = AUDIO_U16LSB;
+  want_in.format = AUDIO_F32LSB;
  want_in.channels = 2;
  want_in.samples = 960;
  want_in.callback = cb_in;

  input_dev = SDL_OpenAudioDevice(NULL, 1, &want_in, &have_in,
                                  SDL_AUDIO_ALLOW_ANY_CHANGE);
+
+  printf("%d %d %d %d\n", have_in.freq, have_in.format, have_in.channels,
+         have_in.samples);
  if (input_dev == 0) {
    SDL_Log("Failed to open input: %s", SDL_GetError());
    return 1;
--- a/test/audio_capture/windows_capture.cpp
+++ b/test/audio_capture/windows_capture.cpp
@@ -0,0 +1,123 @@
+#define __STDC_CONSTANT_MACROS
+extern "C" {
+#include <libavdevice/avdevice.h>
+#include <libavformat/avformat.h>
+#include <libavutil/log.h>
+#include <libswresample/swresample.h>
+}
+
+#include <windows.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#pragma comment(lib, "avutil.lib")
+#pragma comment(lib, "avdevice.lib")
+#pragma comment(lib, "avformat.lib")
+#pragma comment(lib, "avcodec.lib")
+
+#pragma comment(lib, "Winmm.lib")
+
+using std::shared_ptr;
+using std::string;
+using std::vector;
+
+void capture_audio() {
+  // windows api <20><>ȡ<EFBFBD><C8A1>Ƶ<EFBFBD>豸<EFBFBD>б<EFBFBD><D0B1><EFBFBD>ffmpeg <20><><EFBFBD><EFBFBD>û<EFBFBD><C3BB><EFBFBD>ṩ<EFBFBD><E1B9A9>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>Ƶ<EFBFBD>豸<EFBFBD><E8B1B8>api<70><69>
+  int nDeviceNum = waveInGetNumDevs();
+  vector<string> vecDeviceName;
+  for (int i = 0; i < nDeviceNum; ++i) {
+    WAVEINCAPS wic;
+    waveInGetDevCaps(i, &wic, sizeof(wic));
+
+    // ת<><D7AA>utf-8
+    int nSize = WideCharToMultiByte(CP_UTF8, 0, wic.szPname,
+                                    wcslen(wic.szPname), NULL, 0, NULL, NULL);
+    shared_ptr<char> spDeviceName(new char[nSize + 1]);
+    memset(spDeviceName.get(), 0, nSize + 1);
+    WideCharToMultiByte(CP_UTF8, 0, wic.szPname, wcslen(wic.szPname),
+                        spDeviceName.get(), nSize, NULL, NULL);
+    vecDeviceName.push_back(spDeviceName.get());
+    av_log(NULL, AV_LOG_DEBUG, "audio input device : %s \n",
+           spDeviceName.get());
+  }
+  if (vecDeviceName.size() <= 0) {
+    av_log(NULL, AV_LOG_ERROR, "not find audio input device.\n");
+    return;
+  }
+  string sDeviceName = "audio=" + vecDeviceName[0];  // ʹ<>õ<EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>Ƶ<EFBFBD>豸
+
+  // ffmpeg
+  avdevice_register_all();  // ע<><D7A2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>豸
+  AVInputFormat* ifmt =
+      (AVInputFormat*)av_find_input_format("dshow");  // <20><><EFBFBD>òɼ<C3B2><C9BC><EFBFBD>ʽ dshow
+  if (ifmt == NULL) {
+    av_log(NULL, AV_LOG_ERROR, "av_find_input_format for dshow fail.\n");
+    return;
+  }
+
+  AVFormatContext* fmt_ctx = NULL;  // format <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  int ret = avformat_open_input(&fmt_ctx, sDeviceName.c_str(), ifmt,
+                                NULL);  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD>豸
+  if (ret != 0) {
+    av_log(NULL, AV_LOG_ERROR, "avformat_open_input fail. return %d.\n", ret);
+    return;
+  }
+
+  AVPacket pkt;
+
+  int64_t src_rate = 44100;
+  int64_t dst_rate = 48000;
+  SwrContext* swr_ctx = swr_alloc();
+
+  uint8_t** dst_data = NULL;
+  int dst_linesize = 0;
+
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "in_channel_layout", AV_CH_LAYOUT_MONO, 0);
+  av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+  // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  av_opt_set_int(swr_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
+  av_opt_set_int(swr_ctx, "out_sample_rate", dst_rate, 0);
+  av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
+  // <20><>ʼ<EFBFBD><CABC>SwrContext
+  swr_init(swr_ctx);
+
+  FILE* fp = fopen("dst.pcm", "wb");
+  int count = 0;
+  while (count++ < 10) {
+    ret = av_read_frame(fmt_ctx, &pkt);
+    if (ret != 0) {
+      av_log(NULL, AV_LOG_ERROR, "av_read_frame fail, return %d .\n", ret);
+      break;
+    }
+
+    int out_samples_per_channel =
+        (int)av_rescale_rnd(1024, dst_rate, src_rate, AV_ROUND_UP);
+    int out_buffer_size = av_samples_get_buffer_size(
+        NULL, 1, out_samples_per_channel, AV_SAMPLE_FMT_S16, 0);
+    // uint8_t* out_buffer = (uint8_t*)av_malloc(out_buffer_size);
+    ret = av_samples_alloc_array_and_samples(
+        &dst_data, &dst_linesize, 2, out_buffer_size, AV_SAMPLE_FMT_S16, 0);
+
+    // <20><><EFBFBD><EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD>
+    swr_convert(swr_ctx, dst_data, out_samples_per_channel,
+                (const uint8_t**)&pkt.data, 1024);
+
+    fwrite(dst_data[1], 1, out_buffer_size, fp);
+    av_packet_unref(&pkt);  // <20><><EFBFBD><EFBFBD><EFBFBD>ͷ<EFBFBD>pkt<6B><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ棬<DAB4><E6A3AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڴ<EFBFBD>й¶
+  }
+  fflush(fp);  // ˢ<><CBA2><EFBFBD>ļ<EFBFBD>io<69><6F><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+  fclose(fp);
+
+  avformat_close_input(&fmt_ctx);
+}
+
+int main(int argc, char** argv) {
+  av_log_set_level(AV_LOG_DEBUG);  // <20><><EFBFBD><EFBFBD>ffmpeg<65><67>־<EFBFBD><D6BE><EFBFBD>ȼ<EFBFBD>
+  capture_audio();
+
+  Sleep(1);
+}