[feat] mouse/keyboard control and screen capture supported by using X11 on Linux platform

2025-12-21 15:09:48 +08:00 · 2025-05-07 19:37:41 +08:00
parent 93bd5b2660
commit 250fd49406
12 changed files with 363 additions and 454 deletions
--- a/src/screen_capturer/linux/screen_capturer_x11.cpp
+++ b/src/screen_capturer/linux/screen_capturer_x11.cpp
@@ -1,160 +1,118 @@
 #include "screen_capturer_x11.h"

-#include <iostream>
+#include <chrono>
+#include <thread>

+#include "libyuv.h"
 #include "rd_log.h"

-#define NV12_BUFFER_SIZE 1280 * 720 * 3 / 2
-unsigned char nv12_buffer_[NV12_BUFFER_SIZE];
-
 ScreenCapturerX11::ScreenCapturerX11() {}

-ScreenCapturerX11::~ScreenCapturerX11() {
-  if (inited_ && capture_thread_.joinable()) {
-    capture_thread_.join();
-    inited_ = false;
-  }
-}
+ScreenCapturerX11::~ScreenCapturerX11() { Destroy(); }

 int ScreenCapturerX11::Init(const int fps, cb_desktop_data cb) {
-  if (cb) {
-    _on_data = cb;
+  display_ = XOpenDisplay(nullptr);
+  if (!display_) {
+    LOG_ERROR("Cannot connect to X server");
+    return -1;
+  }
+
+  root_ = DefaultRootWindow(display_);
+  XWindowAttributes attr;
+  XGetWindowAttributes(display_, root_, &attr);
+
+  width_ = attr.width;
+  height_ = attr.height;
+
+  if (width_ % 2 != 0 || height_ % 2 != 0) {
+    LOG_ERROR("Width and height must be even numbers");
+    return -2;
  }

  fps_ = fps;
+  callback_ = cb;

-  av_log_set_level(AV_LOG_QUIET);
-
-  pFormatCtx_ = avformat_alloc_context();
-
-  avdevice_register_all();
-
-  // grabbing frame rate
-  av_dict_set(&options_, "framerate", "30", 0);
-  // show remote cursor
-  av_dict_set(&options_, "capture_cursor", "0", 0);
-  // Make the grabbed area follow the mouse
-  // av_dict_set(&options_, "follow_mouse", "centered", 0);
-  // Video frame size. The default is to capture the full screen
-  // av_dict_set(&options_, "video_size", "1280x720", 0);
-  std::string capture_method = "x11grab";
-  ifmt_ = (AVInputFormat *)av_find_input_format(capture_method.c_str());
-  if (!ifmt_) {
-    LOG_ERROR("Couldn't find_input_format [{}]", capture_method.c_str());
-  }
-
-  const char *display = std::getenv("DISPLAY");
-  // Grab at position 10,20
-  if (display) {
-    if (avformat_open_input(&pFormatCtx_, display, ifmt_, &options_) != 0) {
-      LOG_ERROR("Couldn't open input stream {}", display);
-      return -1;
-    } else {
-      LOG_INFO("Open input stream [{}]", display);
-    }
-  } else {
-    LOG_ERROR("DISPLAY environment variable not set");
-    return -1;
-  }
-
-  if (avformat_find_stream_info(pFormatCtx_, NULL) < 0) {
-    LOG_ERROR("Couldn't find stream information");
-    return -1;
-  }
-
-  videoindex_ = -1;
-  for (i_ = 0; i_ < pFormatCtx_->nb_streams; i_++)
-    if (pFormatCtx_->streams[i_]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
-      videoindex_ = i_;
-      break;
-    }
-  if (videoindex_ == -1) {
-    LOG_ERROR("Didn't find a video stream");
-    return -1;
-  }
-
-  pCodecParam_ = pFormatCtx_->streams[videoindex_]->codecpar;
-
-  pCodecCtx_ = avcodec_alloc_context3(NULL);
-  avcodec_parameters_to_context(pCodecCtx_, pCodecParam_);
-
-  pCodec_ = const_cast<AVCodec *>(avcodec_find_decoder(pCodecCtx_->codec_id));
-  if (pCodec_ == NULL) {
-    LOG_ERROR("Codec not found");
-    return -1;
-  }
-  if (avcodec_open2(pCodecCtx_, pCodec_, NULL) < 0) {
-    LOG_ERROR("Could not open codec");
-    return -1;
-  }
-
-  const int screen_w = pFormatCtx_->streams[videoindex_]->codecpar->width;
-  const int screen_h = pFormatCtx_->streams[videoindex_]->codecpar->height;
-
-  pFrame_ = av_frame_alloc();
-  pFrameNv12_ = av_frame_alloc();
-
-  pFrame_->width = screen_w;
-  pFrame_->height = screen_h;
-  pFrameNv12_->width = 1280;
-  pFrameNv12_->height = 720;
-
-  packet_ = (AVPacket *)av_malloc(sizeof(AVPacket));
-
-  img_convert_ctx_ = sws_getContext(
-      pFrame_->width, pFrame_->height, pCodecCtx_->pix_fmt, pFrameNv12_->width,
-      pFrameNv12_->height, AV_PIX_FMT_NV12, SWS_BICUBIC, NULL, NULL, NULL);
-
-  inited_ = true;
+  y_plane_.resize(width_ * height_);
+  uv_plane_.resize((width_ / 2) * (height_ / 2) * 2);

  return 0;
 }

 int ScreenCapturerX11::Destroy() {
-  running_ = false;
+  Stop();
+  CleanUp();
  return 0;
 }

 int ScreenCapturerX11::Start() {
+  if (running_) return 0;
  running_ = true;
-  capture_thread_ = std::thread([this]() {
+  paused_ = false;
+  thread_ = std::thread([this]() {
    while (running_) {
-      if (av_read_frame(pFormatCtx_, packet_) >= 0) {
-        if (packet_->stream_index == videoindex_) {
-          avcodec_send_packet(pCodecCtx_, packet_);
-          av_packet_unref(packet_);
-          got_picture_ = avcodec_receive_frame(pCodecCtx_, pFrame_);
-
-          if (!got_picture_) {
-            av_image_fill_arrays(pFrameNv12_->data, pFrameNv12_->linesize,
-                                 nv12_buffer_, AV_PIX_FMT_NV12,
-                                 pFrameNv12_->width, pFrameNv12_->height, 1);
-
-            sws_scale(img_convert_ctx_, pFrame_->data, pFrame_->linesize, 0,
-                      pFrame_->height, pFrameNv12_->data,
-                      pFrameNv12_->linesize);
-
-            _on_data((unsigned char *)nv12_buffer_,
-                     pFrameNv12_->width * pFrameNv12_->height * 3 / 2,
-                     pFrameNv12_->width, pFrameNv12_->height);
-          }
-        }
-      }
+      if (!paused_) OnFrame();
    }
  });
-
  return 0;
 }

 int ScreenCapturerX11::Stop() {
+  if (!running_) return 0;
  running_ = false;
+  if (thread_.joinable()) thread_.join();
  return 0;
 }

-int ScreenCapturerX11::Pause() { return 0; }
+int ScreenCapturerX11::Pause() {
+  paused_ = true;
+  return 0;
+}

-int ScreenCapturerX11::Resume() { return 0; }
+int ScreenCapturerX11::Resume() {
+  paused_ = false;
+  return 0;
+}

-void ScreenCapturerX11::OnFrame() {}
+void ScreenCapturerX11::OnFrame() {
+  if (!display_) return;

-void ScreenCapturerX11::CleanUp() {}
+  XImage* image =
+      XGetImage(display_, root_, 0, 0, width_, height_, AllPlanes, ZPixmap);
+  if (!image) return;
+
+  bool needs_copy = image->bytes_per_line != width_ * 4;
+  std::vector<uint8_t> argb_buf;
+  uint8_t* src_argb = nullptr;
+
+  if (needs_copy) {
+    argb_buf.resize(width_ * height_ * 4);
+    for (int y = 0; y < height_; ++y) {
+      memcpy(&argb_buf[y * width_ * 4], image->data + y * image->bytes_per_line,
+             width_ * 4);
+    }
+    src_argb = argb_buf.data();
+  } else {
+    src_argb = reinterpret_cast<uint8_t*>(image->data);
+  }
+
+  libyuv::ARGBToNV12(src_argb, width_ * 4, y_plane_.data(), width_,
+                     uv_plane_.data(), width_, width_, height_);
+
+  std::vector<uint8_t> nv12;
+  nv12.reserve(y_plane_.size() + uv_plane_.size());
+  nv12.insert(nv12.end(), y_plane_.begin(), y_plane_.end());
+  nv12.insert(nv12.end(), uv_plane_.begin(), uv_plane_.end());
+
+  if (callback_) {
+    callback_(nv12.data(), width_ * height_ * 3 / 2, width_, height_);
+  }
+
+  XDestroyImage(image);
+}
+
+void ScreenCapturerX11::CleanUp() {
+  if (display_) {
+    XCloseDisplay(display_);
+    display_ = nullptr;
+  }
+}