Use kcp as QoS module

2025-12-19 22:09:09 +08:00 · 2023-08-30 17:44:22 +08:00
parent a4cd77dcb0
commit 3c1f7973d0
79 changed files with 14442 additions and 3150 deletions
--- a/application/remote_desk/demo/App.cpp
+++ b/application/remote_desk/demo/App.cpp
@@ -1,85 +0,0 @@
 //*********************************************************
 //
 // Copyright (c) Microsoft. All rights reserved.
 // This code is licensed under the MIT License (MIT).
 // THE SOFTWARE IS PROVIDED <20>AS IS? WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
 // THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 //*********************************************************
 #include "pch.h"
 #include "App.h"
 #include "SimpleCapture.h"
 using namespace winrt;
 using namespace Windows::System;
 using namespace Windows::Foundation;
 using namespace Windows::UI;
 using namespace Windows::UI::Composition;
 using namespace Windows::Graphics::Capture;
 void App::Initialize(ContainerVisual const &root) {
  auto queue = DispatcherQueue::GetForCurrentThread();
  m_compositor = root.Compositor();
  m_root = m_compositor.CreateContainerVisual();
  m_content = m_compositor.CreateSpriteVisual();
  m_brush = m_compositor.CreateSurfaceBrush();
  m_root.RelativeSizeAdjustment({1, 1});
  root.Children().InsertAtTop(m_root);
  m_content.AnchorPoint({0.5f, 0.5f});
  m_content.RelativeOffsetAdjustment({0.5f, 0.5f, 0});
  m_content.RelativeSizeAdjustment({1, 1});
  m_content.Size({-80, -80});
  m_content.Brush(m_brush);
  m_brush.HorizontalAlignmentRatio(0.5f);
  m_brush.VerticalAlignmentRatio(0.5f);
  m_brush.Stretch(CompositionStretch::Uniform);
  auto shadow = m_compositor.CreateDropShadow();
  shadow.Mask(m_brush);
  m_content.Shadow(shadow);
  m_root.Children().InsertAtTop(m_content);
  auto d3dDevice = CreateD3DDevice();
  auto dxgiDevice = d3dDevice.as<IDXGIDevice>();
  m_device = CreateDirect3DDevice(dxgiDevice.get());
 }
 void App::StartCapture(HWND hwnd) {
  if (m_capture) {
    m_capture->Close();
    m_capture = nullptr;
  }
  auto item = CreateCaptureItemForWindow(hwnd);
  m_capture = std::make_unique<SimpleCapture>(m_device, item);
  auto surface = m_capture->CreateSurface(m_compositor);
  m_brush.Surface(surface);
  m_capture->StartCapture();
 }
 void App::StartCapture(HMONITOR hmonitor) {
  if (m_capture) {
    m_capture->Close();
    m_capture = nullptr;
  }
  auto item = CreateCaptureItemForMonitor(hmonitor);
  m_capture = std::make_unique<SimpleCapture>(m_device, item);
  auto surface = m_capture->CreateSurface(m_compositor);
  m_brush.Surface(surface);
  m_capture->StartCapture();
 }
--- a/application/remote_desk/demo/App.h
+++ b/application/remote_desk/demo/App.h
@@ -1,24 +0,0 @@
 #pragma once
 class SimpleCapture;
 class App {
 public:
  App() {}
  ~App() {}
  void Initialize(winrt::Windows::UI::Composition::ContainerVisual const &root);
  void StartCapture(HWND hwnd);
  void StartCapture(HMONITOR hmonitor);
 private:
  winrt::Windows::UI::Composition::Compositor m_compositor{nullptr};
  winrt::Windows::UI::Composition::ContainerVisual m_root{nullptr};
  winrt::Windows::UI::Composition::SpriteVisual m_content{nullptr};
  winrt::Windows::UI::Composition::CompositionSurfaceBrush m_brush{nullptr};
  winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice m_device{
      nullptr};
  std::unique_ptr<SimpleCapture> m_capture{nullptr};
 };
--- a/application/remote_desk/demo/SimpleCapture.cpp
+++ b/application/remote_desk/demo/SimpleCapture.cpp
@@ -1,217 +0,0 @@
 //*********************************************************
 //
 // Copyright (c) Microsoft. All rights reserved.
 // This code is licensed under the MIT License (MIT).
 // THE SOFTWARE IS PROVIDED <20>AS IS? WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
 // THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 //*********************************************************
 #include "SimpleCapture.h"
 #include "pch.h"
 using namespace winrt;
 using namespace Windows;
 using namespace Windows::Foundation;
 using namespace Windows::System;
 using namespace Windows::Graphics;
 using namespace Windows::Graphics::Capture;
 using namespace Windows::Graphics::DirectX;
 using namespace Windows::Graphics::DirectX::Direct3D11;
 using namespace Windows::Foundation::Numerics;
 using namespace Windows::UI;
 using namespace Windows::UI::Composition;
 SimpleCapture::SimpleCapture(IDirect3DDevice const &device,
                             GraphicsCaptureItem const &item) {
  m_item = item;
  m_device = device;
  // Set up
  auto d3dDevice = GetDXGIInterfaceFromObject<ID3D11Device>(m_device);
  d3dDevice->GetImmediateContext(m_d3dContext.put());
  auto size = m_item.Size();
  m_swapChain = CreateDXGISwapChain(
      d3dDevice, static_cast<uint32_t>(size.Width),
      static_cast<uint32_t>(size.Height),
      static_cast<DXGI_FORMAT>(DirectXPixelFormat::B8G8R8A8UIntNormalized), 2);
  // Create framepool, define pixel format (DXGI_FORMAT_B8G8R8A8_UNORM), and
  // frame size.
  m_framePool = Direct3D11CaptureFramePool::Create(
      m_device, DirectXPixelFormat::B8G8R8A8UIntNormalized, 2, size);
  m_session = m_framePool.CreateCaptureSession(m_item);
  m_lastSize = size;
  m_frameArrived = m_framePool.FrameArrived(
      auto_revoke, {this, &SimpleCapture::OnFrameArrived});
 }
 // Start sending capture frames
 void SimpleCapture::StartCapture() {
  CheckClosed();
  m_session.StartCapture();
 }
 ICompositionSurface SimpleCapture::CreateSurface(Compositor const &compositor) {
  CheckClosed();
  return CreateCompositionSurfaceForSwapChain(compositor, m_swapChain.get());
 }
 // Process captured frames
 void SimpleCapture::Close() {
  auto expected = false;
  if (m_closed.compare_exchange_strong(expected, true)) {
    m_frameArrived.revoke();
    m_framePool.Close();
    m_session.Close();
    m_swapChain = nullptr;
    m_framePool = nullptr;
    m_session = nullptr;
    m_item = nullptr;
  }
 }
 void SimpleCapture::OnFrameArrived(
    Direct3D11CaptureFramePool const &sender,
    winrt::Windows::Foundation::IInspectable const &) {
  auto newSize = false;
  {
    auto frame = sender.TryGetNextFrame();
    auto frameContentSize = frame.ContentSize();
    if (frameContentSize.Width != m_lastSize.Width ||
        frameContentSize.Height != m_lastSize.Height) {
      // The thing we have been capturing has changed size.
      // We need to resize our swap chain first, then blit the pixels.
      // After we do that, retire the frame and then recreate our frame pool.
      newSize = true;
      m_lastSize = frameContentSize;
      m_swapChain->ResizeBuffers(
          2, static_cast<uint32_t>(m_lastSize.Width),
          static_cast<uint32_t>(m_lastSize.Height),
          static_cast<DXGI_FORMAT>(DirectXPixelFormat::B8G8R8A8UIntNormalized),
          0);
    }
    // copy to swapChain
    {
      auto frameSurface =
          GetDXGIInterfaceFromObject<ID3D11Texture2D>(frame.Surface());
      com_ptr<ID3D11Texture2D> backBuffer;
      check_hresult(m_swapChain->GetBuffer(0, guid_of<ID3D11Texture2D>(),
                                           backBuffer.put_void()));
      m_d3dContext->CopyResource(backBuffer.get(), frameSurface.get());
      DXGI_PRESENT_PARAMETERS presentParameters = {0};
      m_swapChain->Present1(1, 0, &presentParameters);
    }
    // copy to mapped texture
    {
      auto frameSurface =
          GetDXGIInterfaceFromObject<ID3D11Texture2D>(frame.Surface());
      if (!m_mappedTexture || newSize) CreateMappedTexture(frameSurface);
      m_d3dContext->CopyResource(m_mappedTexture.get(), frameSurface.get());
      D3D11_MAPPED_SUBRESOURCE mapInfo;
      m_d3dContext->Map(m_mappedTexture.get(), 0, D3D11_MAP_READ,
                        D3D11_MAP_FLAG_DO_NOT_WAIT, &mapInfo);
      // copy data from mapInfo.pData
 #if 1
      if (mapInfo.pData) {
        static unsigned char *buffer = nullptr;
        if (buffer && newSize) delete[] buffer;
        if (!buffer)
          buffer = new unsigned char[frameContentSize.Width *
                                     frameContentSize.Height * 4];
        int dstRowPitch = frameContentSize.Width * 4;
        for (int h = 0; h < frameContentSize.Height; h++) {
          memcpy_s(buffer + h * dstRowPitch, dstRowPitch,
                   (BYTE *)mapInfo.pData + h * mapInfo.RowPitch,
                   min(mapInfo.RowPitch, dstRowPitch));
        }
        BITMAPINFOHEADER bi;
        bi.biSize = sizeof(BITMAPINFOHEADER);
        bi.biWidth = frameContentSize.Width;
        bi.biHeight = frameContentSize.Height * (-1);
        bi.biPlanes = 1;
        bi.biBitCount = 32;  // should get from system color bits
        bi.biCompression = BI_RGB;
        bi.biSizeImage = 0;
        bi.biXPelsPerMeter = 0;
        bi.biYPelsPerMeter = 0;
        bi.biClrUsed = 0;
        bi.biClrImportant = 0;
        BITMAPFILEHEADER bf;
        bf.bfType = 0x4d42;
        bf.bfReserved1 = 0;
        bf.bfReserved2 = 0;
        bf.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
        bf.bfSize =
            bf.bfOffBits + frameContentSize.Width * frameContentSize.Height * 4;
        FILE *fp = nullptr;
        fopen_s(&fp, ".\\save.bmp", "wb+");
        fwrite(&bf, 1, sizeof(bf), fp);
        fwrite(&bi, 1, sizeof(bi), fp);
        fwrite(buffer, 1, frameContentSize.Width * frameContentSize.Height * 4,
               fp);
        fflush(fp);
        fclose(fp);
      }
 #endif
      m_d3dContext->Unmap(m_mappedTexture.get(), 0);
    }
  }
  if (newSize) {
    m_framePool.Recreate(m_device, DirectXPixelFormat::B8G8R8A8UIntNormalized,
                         2, m_lastSize);
  }
 }
 HRESULT
 SimpleCapture::CreateMappedTexture(winrt::com_ptr<ID3D11Texture2D> src_texture,
                                   UINT width, UINT height) {
  D3D11_TEXTURE2D_DESC src_desc;
  src_texture->GetDesc(&src_desc);
  D3D11_TEXTURE2D_DESC map_desc;
  map_desc.Width = width == 0 ? src_desc.Width : width;
  map_desc.Height = height == 0 ? src_desc.Height : height;
  map_desc.MipLevels = src_desc.MipLevels;
  map_desc.ArraySize = src_desc.ArraySize;
  map_desc.Format = src_desc.Format;
  map_desc.SampleDesc = src_desc.SampleDesc;
  map_desc.Usage = D3D11_USAGE_STAGING;
  map_desc.BindFlags = 0;
  map_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
  map_desc.MiscFlags = 0;
  auto d3dDevice = GetDXGIInterfaceFromObject<ID3D11Device>(m_device);
  return d3dDevice->CreateTexture2D(&map_desc, nullptr, m_mappedTexture.put());
 }
--- a/application/remote_desk/demo/SimpleCapture.h
+++ b/application/remote_desk/demo/SimpleCapture.h
@@ -1,49 +0,0 @@
 #pragma once
 class SimpleCapture {
 public:
  SimpleCapture(
      winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice const
          &device,
      winrt::Windows::Graphics::Capture::GraphicsCaptureItem const &item);
  ~SimpleCapture() { Close(); }
  void StartCapture();
  winrt::Windows::UI::Composition::ICompositionSurface
  CreateSurface(winrt::Windows::UI::Composition::Compositor const &compositor);
  void Close();
 private:
  void OnFrameArrived(
      winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const
          &sender,
      winrt::Windows::Foundation::IInspectable const &args);
  void CheckClosed() {
    if (m_closed.load() == true) {
      throw winrt::hresult_error(RO_E_CLOSED);
    }
  }
  HRESULT
  CreateMappedTexture(winrt::com_ptr<ID3D11Texture2D> src_texture,
                      UINT width = 0, UINT height = 0);
 private:
  winrt::Windows::Graphics::Capture::GraphicsCaptureItem m_item{nullptr};
  winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool m_framePool{
      nullptr};
  winrt::Windows::Graphics::Capture::GraphicsCaptureSession m_session{nullptr};
  winrt::Windows::Graphics::SizeInt32 m_lastSize;
  winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice m_device{
      nullptr};
  winrt::com_ptr<IDXGISwapChain1> m_swapChain{nullptr};
  winrt::com_ptr<ID3D11DeviceContext> m_d3dContext{nullptr};
  winrt::com_ptr<ID3D11Texture2D> m_mappedTexture{nullptr};
  std::atomic<bool> m_closed = false;
  winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool::
      FrameArrived_revoker m_frameArrived;
 };
--- a/application/remote_desk/demo/Win32MonitorEnumeration.h
+++ b/application/remote_desk/demo/Win32MonitorEnumeration.h
@@ -1,50 +0,0 @@
 #pragma once
 #include <dwmapi.h>
 struct Monitor {
 public:
  Monitor(nullptr_t) {}
  Monitor(HMONITOR hmonitor, std::wstring &className, bool isPrimary) {
    m_hmonitor = hmonitor;
    m_className = className;
    m_bIsPrimary = isPrimary;
  }
  HMONITOR Hmonitor() const noexcept { return m_hmonitor; }
  std::wstring ClassName() const noexcept { return m_className; }
  bool IsPrimary() const noexcept { return m_bIsPrimary; }
 private:
  HMONITOR m_hmonitor;
  std::wstring m_className;
  bool m_bIsPrimary;
 };
 BOOL WINAPI EnumMonitorProc(HMONITOR hmonitor, HDC hdc, LPRECT lprc,
                            LPARAM data) {
  MONITORINFOEX info_ex;
  info_ex.cbSize = sizeof(MONITORINFOEX);
  GetMonitorInfo(hmonitor, &info_ex);
  if (info_ex.dwFlags == DISPLAY_DEVICE_MIRRORING_DRIVER)
    return true;
  auto monitors = ((std::vector<Monitor> *)data);
  std::wstring name = info_ex.szDevice;
  auto monitor =
      Monitor(hmonitor, name, info_ex.dwFlags & MONITORINFOF_PRIMARY);
  monitors->emplace_back(monitor);
  return true;
 }
 std::vector<Monitor> EnumerateMonitors() {
  std::vector<Monitor> monitors;
  ::EnumDisplayMonitors(NULL, NULL, EnumMonitorProc, (LPARAM)&monitors);
  return monitors;
 }
--- a/application/remote_desk/demo/Win32WindowEnumeration.h
+++ b/application/remote_desk/demo/Win32WindowEnumeration.h
@@ -1,101 +0,0 @@
 #pragma once
 #include <dwmapi.h>
 struct Window {
 public:
  Window(nullptr_t) {}
  Window(HWND hwnd, std::wstring const &title, std::wstring &className) {
    m_hwnd = hwnd;
    m_title = title;
    m_className = className;
  }
  HWND Hwnd() const noexcept { return m_hwnd; }
  std::wstring Title() const noexcept { return m_title; }
  std::wstring ClassName() const noexcept { return m_className; }
 private:
  HWND m_hwnd;
  std::wstring m_title;
  std::wstring m_className;
 };
 std::wstring GetClassName(HWND hwnd) {
  std::array<WCHAR, 1024> className;
  ::GetClassName(hwnd, className.data(), (int)className.size());
  std::wstring title(className.data());
  return title;
 }
 std::wstring GetWindowText(HWND hwnd) {
  std::array<WCHAR, 1024> windowText;
  ::GetWindowText(hwnd, windowText.data(), (int)windowText.size());
  std::wstring title(windowText.data());
  return title;
 }
 bool IsAltTabWindow(Window const &window) {
  HWND hwnd = window.Hwnd();
  HWND shellWindow = GetShellWindow();
  auto title = window.Title();
  auto className = window.ClassName();
  if (hwnd == shellWindow) {
    return false;
  }
  if (title.length() == 0) {
    return false;
  }
  if (!IsWindowVisible(hwnd)) {
    return false;
  }
  if (GetAncestor(hwnd, GA_ROOT) != hwnd) {
    return false;
  }
  LONG style = GetWindowLong(hwnd, GWL_STYLE);
  if (!((style & WS_DISABLED) != WS_DISABLED)) {
    return false;
  }
  DWORD cloaked = FALSE;
  HRESULT hrTemp =
      DwmGetWindowAttribute(hwnd, DWMWA_CLOAKED, &cloaked, sizeof(cloaked));
  if (SUCCEEDED(hrTemp) && cloaked == DWM_CLOAKED_SHELL) {
    return false;
  }
  return true;
 }
 BOOL CALLBACK EnumWindowsProc(HWND hwnd, LPARAM lParam) {
  auto class_name = GetClassName(hwnd);
  auto title = GetWindowText(hwnd);
  auto window = Window(hwnd, title, class_name);
  if (!IsAltTabWindow(window)) {
    return TRUE;
  }
  std::vector<Window> &windows =
      *reinterpret_cast<std::vector<Window> *>(lParam);
  windows.push_back(window);
  return TRUE;
 }
 const std::vector<Window> EnumerateWindows() {
  std::vector<Window> windows;
  EnumWindows(EnumWindowsProc, reinterpret_cast<LPARAM>(&windows));
  return windows;
 }
--- a/application/remote_desk/demo/capture.interop.h
+++ b/application/remote_desk/demo/capture.interop.h
@@ -1,28 +0,0 @@
 #pragma once
 #include <windows.graphics.capture.h>
 #include <windows.graphics.capture.interop.h>
 #include <winrt/Windows.Graphics.Capture.h>
 inline auto CreateCaptureItemForWindow(HWND hwnd) {
  auto activation_factory = winrt::get_activation_factory<
      winrt::Windows::Graphics::Capture::GraphicsCaptureItem>();
  auto interop_factory = activation_factory.as<IGraphicsCaptureItemInterop>();
  winrt::Windows::Graphics::Capture::GraphicsCaptureItem item = {nullptr};
  interop_factory->CreateForWindow(
      hwnd,
      winrt::guid_of<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>(),
      reinterpret_cast<void **>(winrt::put_abi(item)));
  return item;
 }
 inline auto CreateCaptureItemForMonitor(HMONITOR hmonitor) {
  auto activation_factory = winrt::get_activation_factory<
      winrt::Windows::Graphics::Capture::GraphicsCaptureItem>();
  auto interop_factory = activation_factory.as<IGraphicsCaptureItemInterop>();
  winrt::Windows::Graphics::Capture::GraphicsCaptureItem item = {nullptr};
  interop_factory->CreateForMonitor(
      hmonitor,
      winrt::guid_of<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>(),
      reinterpret_cast<void **>(winrt::put_abi(item)));
  return item;
 }
--- a/application/remote_desk/demo/composition.interop.h
+++ b/application/remote_desk/demo/composition.interop.h
@@ -1,69 +0,0 @@
 #pragma once
 #include <d2d1_1.h>
 #include <windows.ui.composition.interop.h>
 #include <winrt/Windows.UI.Composition.h>
 inline auto CreateCompositionGraphicsDevice(
    winrt::Windows::UI::Composition::Compositor const &compositor,
    ::IUnknown *device) {
  winrt::Windows::UI::Composition::CompositionGraphicsDevice graphicsDevice{
      nullptr};
  auto compositorInterop =
      compositor.as<ABI::Windows::UI::Composition::ICompositorInterop>();
  winrt::com_ptr<ABI::Windows::UI::Composition::ICompositionGraphicsDevice>
      graphicsInterop;
  winrt::check_hresult(
      compositorInterop->CreateGraphicsDevice(device, graphicsInterop.put()));
  winrt::check_hresult(graphicsInterop->QueryInterface(
      winrt::guid_of<
          winrt::Windows::UI::Composition::CompositionGraphicsDevice>(),
      reinterpret_cast<void **>(winrt::put_abi(graphicsDevice))));
  return graphicsDevice;
 }
 inline void ResizeSurface(
    winrt::Windows::UI::Composition::CompositionDrawingSurface const &surface,
    winrt::Windows::Foundation::Size const &size) {
  auto surfaceInterop = surface.as<
      ABI::Windows::UI::Composition::ICompositionDrawingSurfaceInterop>();
  SIZE newSize = {};
  newSize.cx = static_cast<LONG>(std::round(size.Width));
  newSize.cy = static_cast<LONG>(std::round(size.Height));
  winrt::check_hresult(surfaceInterop->Resize(newSize));
 }
 inline auto SurfaceBeginDraw(
    winrt::Windows::UI::Composition::CompositionDrawingSurface const &surface) {
  auto surfaceInterop = surface.as<
      ABI::Windows::UI::Composition::ICompositionDrawingSurfaceInterop>();
  winrt::com_ptr<ID2D1DeviceContext> context;
  POINT offset = {};
  winrt::check_hresult(surfaceInterop->BeginDraw(
      nullptr, __uuidof(ID2D1DeviceContext), context.put_void(), &offset));
  context->SetTransform(
      D2D1::Matrix3x2F::Translation((FLOAT)offset.x, (FLOAT)offset.y));
  return context;
 }
 inline void SurfaceEndDraw(
    winrt::Windows::UI::Composition::CompositionDrawingSurface const &surface) {
  auto surfaceInterop = surface.as<
      ABI::Windows::UI::Composition::ICompositionDrawingSurfaceInterop>();
  winrt::check_hresult(surfaceInterop->EndDraw());
 }
 inline auto CreateCompositionSurfaceForSwapChain(
    winrt::Windows::UI::Composition::Compositor const &compositor,
    ::IUnknown *swapChain) {
  winrt::Windows::UI::Composition::ICompositionSurface surface{nullptr};
  auto compositorInterop =
      compositor.as<ABI::Windows::UI::Composition::ICompositorInterop>();
  winrt::com_ptr<ABI::Windows::UI::Composition::ICompositionSurface>
      surfaceInterop;
  winrt::check_hresult(compositorInterop->CreateCompositionSurfaceForSwapChain(
      swapChain, surfaceInterop.put()));
  winrt::check_hresult(surfaceInterop->QueryInterface(
      winrt::guid_of<winrt::Windows::UI::Composition::ICompositionSurface>(),
      reinterpret_cast<void **>(winrt::put_abi(surface))));
  return surface;
 }
--- a/application/remote_desk/demo/d3dHelpers.h
+++ b/application/remote_desk/demo/d3dHelpers.h
@@ -1,132 +0,0 @@
 #pragma once
 #include "composition.interop.h"
 struct SurfaceContext {
 public:
  SurfaceContext(std::nullptr_t) {}
  SurfaceContext(
      winrt::Windows::UI::Composition::CompositionDrawingSurface surface) {
    m_surface = surface;
    m_d2dContext = SurfaceBeginDraw(m_surface);
  }
  ~SurfaceContext() {
    SurfaceEndDraw(m_surface);
    m_d2dContext = nullptr;
    m_surface = nullptr;
  }
  winrt::com_ptr<ID2D1DeviceContext> GetDeviceContext() { return m_d2dContext; }
 private:
  winrt::com_ptr<ID2D1DeviceContext> m_d2dContext;
  winrt::Windows::UI::Composition::CompositionDrawingSurface m_surface{nullptr};
 };
 struct D3D11DeviceLock {
 public:
  D3D11DeviceLock(std::nullopt_t) {}
  D3D11DeviceLock(ID3D11Multithread *pMultithread) {
    m_multithread.copy_from(pMultithread);
    m_multithread->Enter();
  }
  ~D3D11DeviceLock() {
    m_multithread->Leave();
    m_multithread = nullptr;
  }
 private:
  winrt::com_ptr<ID3D11Multithread> m_multithread;
 };
 inline auto CreateWICFactory() {
  winrt::com_ptr<IWICImagingFactory2> wicFactory;
  winrt::check_hresult(::CoCreateInstance(
      CLSID_WICImagingFactory, nullptr, CLSCTX_INPROC_SERVER,
      winrt::guid_of<IWICImagingFactory>(), wicFactory.put_void()));
  return wicFactory;
 }
 inline auto CreateD2DDevice(winrt::com_ptr<ID2D1Factory1> const &factory,
                            winrt::com_ptr<ID3D11Device> const &device) {
  winrt::com_ptr<ID2D1Device> result;
  winrt::check_hresult(
      factory->CreateDevice(device.as<IDXGIDevice>().get(), result.put()));
  return result;
 }
 inline auto CreateD3DDevice(D3D_DRIVER_TYPE const type,
                            winrt::com_ptr<ID3D11Device> &device) {
  WINRT_ASSERT(!device);
  UINT flags = D3D11_CREATE_DEVICE_BGRA_SUPPORT;
  //#ifdef _DEBUG
  //	flags |= D3D11_CREATE_DEVICE_DEBUG;
  //#endif
  return D3D11CreateDevice(nullptr, type, nullptr, flags, nullptr, 0,
                           D3D11_SDK_VERSION, device.put(), nullptr, nullptr);
 }
 inline auto CreateD3DDevice() {
  winrt::com_ptr<ID3D11Device> device;
  HRESULT hr = CreateD3DDevice(D3D_DRIVER_TYPE_HARDWARE, device);
  if (DXGI_ERROR_UNSUPPORTED == hr) {
    hr = CreateD3DDevice(D3D_DRIVER_TYPE_WARP, device);
  }
  winrt::check_hresult(hr);
  return device;
 }
 inline auto CreateD2DFactory() {
  D2D1_FACTORY_OPTIONS options{};
  //#ifdef _DEBUG
  //	options.debugLevel = D2D1_DEBUG_LEVEL_INFORMATION;
  //#endif
  winrt::com_ptr<ID2D1Factory1> factory;
  winrt::check_hresult(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED,
                                         options, factory.put()));
  return factory;
 }
 inline auto CreateDXGISwapChain(winrt::com_ptr<ID3D11Device> const &device,
                                const DXGI_SWAP_CHAIN_DESC1 *desc) {
  auto dxgiDevice = device.as<IDXGIDevice2>();
  winrt::com_ptr<IDXGIAdapter> adapter;
  winrt::check_hresult(dxgiDevice->GetParent(winrt::guid_of<IDXGIAdapter>(),
                                             adapter.put_void()));
  winrt::com_ptr<IDXGIFactory2> factory;
  winrt::check_hresult(
      adapter->GetParent(winrt::guid_of<IDXGIFactory2>(), factory.put_void()));
  winrt::com_ptr<IDXGISwapChain1> swapchain;
  winrt::check_hresult(factory->CreateSwapChainForComposition(
      device.get(), desc, nullptr, swapchain.put()));
  return swapchain;
 }
 inline auto CreateDXGISwapChain(winrt::com_ptr<ID3D11Device> const &device,
                                uint32_t width, uint32_t height,
                                DXGI_FORMAT format, uint32_t bufferCount) {
  DXGI_SWAP_CHAIN_DESC1 desc = {};
  desc.Width = width;
  desc.Height = height;
  desc.Format = format;
  desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
  desc.SampleDesc.Count = 1;
  desc.SampleDesc.Quality = 0;
  desc.BufferCount = bufferCount;
  desc.Scaling = DXGI_SCALING_STRETCH;
  desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
  desc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
  return CreateDXGISwapChain(device, &desc);
 }
--- a/application/remote_desk/demo/direct3d11.interop.h
+++ b/application/remote_desk/demo/direct3d11.interop.h
@@ -1,41 +0,0 @@
 #pragma once
 #include <winrt/windows.graphics.directx.direct3d11.h>
 extern "C" {
 HRESULT __stdcall CreateDirect3D11DeviceFromDXGIDevice(
    ::IDXGIDevice *dxgiDevice, ::IInspectable **graphicsDevice);
 HRESULT __stdcall CreateDirect3D11SurfaceFromDXGISurface(
    ::IDXGISurface *dgxiSurface, ::IInspectable **graphicsSurface);
 }
 struct __declspec(uuid("A9B3D012-3DF2-4EE3-B8D1-8695F457D3C1"))
    IDirect3DDxgiInterfaceAccess : ::IUnknown {
  virtual HRESULT __stdcall GetInterface(GUID const &id, void **object) = 0;
 };
 inline auto CreateDirect3DDevice(IDXGIDevice *dxgi_device) {
  winrt::com_ptr<::IInspectable> d3d_device;
  winrt::check_hresult(
      CreateDirect3D11DeviceFromDXGIDevice(dxgi_device, d3d_device.put()));
  return d3d_device
      .as<winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice>();
 }
 inline auto CreateDirect3DSurface(IDXGISurface *dxgi_surface) {
  winrt::com_ptr<::IInspectable> d3d_surface;
  winrt::check_hresult(
      CreateDirect3D11SurfaceFromDXGISurface(dxgi_surface, d3d_surface.put()));
  return d3d_surface
      .as<winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface>();
 }
 template <typename T>
 auto GetDXGIInterfaceFromObject(
    winrt::Windows::Foundation::IInspectable const &object) {
  auto access = object.as<IDirect3DDxgiInterfaceAccess>();
  winrt::com_ptr<T> result;
  winrt::check_hresult(
      access->GetInterface(winrt::guid_of<T>(), result.put_void()));
  return result;
 }
--- a/application/remote_desk/demo/main.cpp
+++ b/application/remote_desk/demo/main.cpp
@@ -1,160 +0,0 @@
 //*********************************************************
 //
 // Copyright (c) Microsoft. All rights reserved.
 // This code is licensed under the MIT License (MIT).
 // THE SOFTWARE IS PROVIDED <20>AS IS? WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH
 // THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 //
 //*********************************************************
 #include "pch.h"
 #include "App.h"
 #include "SimpleCapture.h"
 #include "Win32MonitorEnumeration.h"
 #include "Win32WindowEnumeration.h"
 #include <ShObjIdl.h>
 using namespace winrt;
 using namespace Windows::UI;
 using namespace Windows::UI::Composition;
 using namespace Windows::UI::Composition::Desktop;
 // Direct3D11CaptureFramePool requires a DispatcherQueue
 auto CreateDispatcherQueueController() {
  namespace abi = ABI::Windows::System;
  DispatcherQueueOptions options{sizeof(DispatcherQueueOptions),
                                 DQTYPE_THREAD_CURRENT, DQTAT_COM_STA};
  Windows::System::DispatcherQueueController controller{nullptr};
  check_hresult(CreateDispatcherQueueController(
      options, reinterpret_cast<abi::IDispatcherQueueController **>(
                   put_abi(controller))));
  return controller;
 }
 DesktopWindowTarget CreateDesktopWindowTarget(Compositor const &compositor,
                                              HWND window) {
  namespace abi = ABI::Windows::UI::Composition::Desktop;
  auto interop = compositor.as<abi::ICompositorDesktopInterop>();
  DesktopWindowTarget target{nullptr};
  check_hresult(interop->CreateDesktopWindowTarget(
      window, true,
      reinterpret_cast<abi::IDesktopWindowTarget **>(put_abi(target))));
  return target;
 }
 int CALLBACK WinMain(HINSTANCE instance, HINSTANCE previousInstance,
                     LPSTR cmdLine, int cmdShow);
 auto g_app = std::make_shared<App>();
 auto g_windows = EnumerateWindows();
 auto g_monitors = EnumerateMonitors();
 LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam);
 int CALLBACK WinMain(HINSTANCE instance, HINSTANCE previousInstance,
                     LPSTR cmdLine, int cmdShow) {
  // Init COM
  init_apartment(apartment_type::single_threaded);
  // Create the window
  WNDCLASSEX wcex = {};
  wcex.cbSize = sizeof(WNDCLASSEX);
  wcex.style = CS_HREDRAW | CS_VREDRAW;
  wcex.lpfnWndProc = WndProc;
  wcex.cbClsExtra = 0;
  wcex.cbWndExtra = 0;
  wcex.hInstance = instance;
  wcex.hIcon = LoadIcon(instance, MAKEINTRESOURCE(IDI_APPLICATION));
  wcex.hCursor = LoadCursor(NULL, IDC_ARROW);
  wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1);
  wcex.lpszMenuName = NULL;
  wcex.lpszClassName = L"ScreenCaptureforHWND";
  wcex.hIconSm = LoadIcon(wcex.hInstance, MAKEINTRESOURCE(IDI_APPLICATION));
  WINRT_VERIFY(RegisterClassEx(&wcex));
  HWND hwnd = CreateWindow(L"ScreenCaptureforHWND", L"ScreenCaptureforHWND",
                           WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT,
                           800, 600, NULL, NULL, instance, NULL);
  WINRT_VERIFY(hwnd);
  ShowWindow(hwnd, cmdShow);
  UpdateWindow(hwnd);
  // Create combo box
  HWND comboBoxHwnd =
      CreateWindow(WC_COMBOBOX, L"",
                   CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_CHILD |
                       WS_OVERLAPPED | WS_VISIBLE,
                   10, 10, 200, 200, hwnd, NULL, instance, NULL);
  WINRT_VERIFY(comboBoxHwnd);
  // Populate combo box
  // for (auto &window : g_windows) {
  //   SendMessage(comboBoxHwnd, CB_ADDSTRING, 0, (LPARAM)window.Title().c_str());
  // }
  for (auto &monitor : g_monitors) {
    SendMessage(comboBoxHwnd, CB_ADDSTRING, 0,
                (LPARAM)monitor.ClassName().c_str());
  }
  // SendMessage(comboBoxHwnd, CB_SETCURSEL, 0, 0);
  // Create a DispatcherQueue for our thread
  auto controller = CreateDispatcherQueueController();
  // Initialize Composition
  auto compositor = Compositor();
  auto target = CreateDesktopWindowTarget(compositor, hwnd);
  auto root = compositor.CreateContainerVisual();
  root.RelativeSizeAdjustment({1.0f, 1.0f});
  target.Root(root);
  // Enqueue our capture work on the dispatcher
  auto queue = controller.DispatcherQueue();
  auto success = queue.TryEnqueue([=]() -> void { g_app->Initialize(root); });
  WINRT_VERIFY(success);
  // Message pump
  MSG msg;
  while (GetMessage(&msg, NULL, 0, 0)) {
    TranslateMessage(&msg);
    DispatchMessage(&msg);
  }
  return (int)msg.wParam;
 }
 LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) {
  switch (msg) {
    case WM_DESTROY:
      PostQuitMessage(0);
      break;
    case WM_COMMAND:
      if (HIWORD(wParam) == CBN_SELCHANGE) {
        auto index = SendMessage((HWND)lParam, CB_GETCURSEL, 0, 0);
        // if (index < g_windows.size() - 1) {
        //   auto window = g_windows[index];
        //   g_app->StartCapture(window.Hwnd());
        // } else {
          // auto monitor = g_monitors[index - g_windows.size()];
          auto monitor = g_monitors[0];
          g_app->StartCapture(monitor.Hmonitor());
        // }
      }
      break;
    default:
      return DefWindowProc(hwnd, msg, wParam, lParam);
      break;
  }
  return 0;
 }
--- a/application/remote_desk/demo/pch.cpp
+++ b/application/remote_desk/demo/pch.cpp
@@ -1 +0,0 @@
 #include "pch.h"
--- a/application/remote_desk/demo/pch.h
+++ b/application/remote_desk/demo/pch.h
@@ -1,34 +0,0 @@
 #pragma once
 #include <Unknwn.h>
 #include <inspectable.h>
 // WinRT
 #include <winrt/Windows.Foundation.h>
 #include <winrt/Windows.System.h>
 #include <winrt/Windows.UI.h>
 #include <winrt/Windows.UI.Composition.h>
 #include <winrt/Windows.UI.Composition.Desktop.h>
 #include <winrt/Windows.UI.Popups.h>
 #include <winrt/Windows.Graphics.Capture.h>
 #include <winrt/Windows.Graphics.DirectX.h>
 #include <winrt/Windows.Graphics.DirectX.Direct3d11.h>
 #include <windows.ui.composition.interop.h>
 #include <DispatcherQueue.h>
 // STL
 #include <atomic>
 #include <memory>
 // D3D
 #include <d3d11_4.h>
 #include <dxgi1_6.h>
 #include <d2d1_3.h>
 #include <wincodec.h>
 // Helpers
 #include "composition.interop.h"
 #include "d3dHelpers.h"
 #include "direct3d11.interop.h"
 #include "capture.interop.h"
--- a/application/remote_desk/remote_desk.cpp.bak
+++ b/application/remote_desk/remote_desk.cpp.bak
@@ -1,368 +0,0 @@
 #include <stdio.h>
 #define __STDC_CONSTANT_MACROS
 #ifdef _WIN32
 // Windows
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libavdevice/avdevice.h>
 #include <libavformat/avformat.h>
 #include <libavutil/imgutils.h>
 #include <libswscale/swscale.h>
 #include "SDL2/SDL.h"
 };
 #else
 // Linux...
 #ifdef __cplusplus
 extern "C" {
 #endif
 #include <SDL/SDL.h>
 #include <libavcodec/avcodec.h>
 #include <libavdevice/avdevice.h>
 #include <libavformat/avformat.h>
 #include <libswscale/swscale.h>
 #ifdef __cplusplus
 };
 #endif
 #endif
 // Output YUV420P
 #define OUTPUT_YUV420P 0
 //'1' Use Dshow
 //'0' Use GDIgrab
 #define USE_DSHOW 0
 // Refresh Event
 #define SFM_REFRESH_EVENT (SDL_USEREVENT + 1)
 #define SFM_BREAK_EVENT (SDL_USEREVENT + 2)
 int thread_exit = 0;
 SDL_Texture *sdlTexture = nullptr;
 SDL_Renderer *sdlRenderer = nullptr;
 SDL_Rect sdlRect;
 int YUV420ToNV12FFmpeg(unsigned char *src_buffer, int width, int height,
                       unsigned char *des_buffer) {
  AVFrame *Input_pFrame = av_frame_alloc();
  AVFrame *Output_pFrame = av_frame_alloc();
  struct SwsContext *img_convert_ctx = sws_getContext(
      width, height, AV_PIX_FMT_NV12, width, height, AV_PIX_FMT_YUV420P,
      SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
  av_image_fill_arrays(Input_pFrame->data, Input_pFrame->linesize, src_buffer,
                       AV_PIX_FMT_NV12, width, height, 1);
  av_image_fill_arrays(Output_pFrame->data, Output_pFrame->linesize, des_buffer,
                       AV_PIX_FMT_YUV420P, width, height, 1);
  sws_scale(img_convert_ctx, (uint8_t const **)Input_pFrame->data,
            Input_pFrame->linesize, 0, height, Output_pFrame->data,
            Output_pFrame->linesize);
  if (Input_pFrame) av_free(Input_pFrame);
  if (Output_pFrame) av_free(Output_pFrame);
  if (img_convert_ctx) sws_freeContext(img_convert_ctx);
  return 0;
 }
 int sfp_refresh_thread(void *opaque) {
  thread_exit = 0;
  while (!thread_exit) {
    SDL_Event event;
    event.type = SFM_REFRESH_EVENT;
    SDL_PushEvent(&event);
    SDL_Delay(40);
  }
  thread_exit = 0;
  // Break
  SDL_Event event;
  event.type = SFM_BREAK_EVENT;
  SDL_PushEvent(&event);
  return 0;
 }
 // Show Dshow Device
 // void show_dshow_device() {
 //   AVFormatContext *pFormatCtx = avformat_alloc_context();
 //   AVDictionary *options = NULL;
 //   av_dict_set(&options, "list_devices", "true", 0);
 //   AVInputFormat *iformat = av_find_input_format("dshow");
 //   printf("========Device Info=============\n");
 //   avformat_open_input(&pFormatCtx, "video=dummy", iformat, &options);
 //   printf("================================\n");
 // }
 // Show AVFoundation Device
 void show_avfoundation_device() {
  const AVFormatContext *const_pFormatCtx = avformat_alloc_context();
  AVFormatContext *pFormatCtx = const_cast<AVFormatContext *>(const_pFormatCtx);
  AVDictionary *options = NULL;
  av_dict_set(&options, "list_devices", "true", 0);
  const AVInputFormat *const_iformat = av_find_input_format("avfoundation");
  AVInputFormat *iformat = const_cast<AVInputFormat *>(const_iformat);
  printf("==AVFoundation Device Info===\n");
  avformat_open_input(&pFormatCtx, "", iformat, &options);
  printf("=============================\n");
 }
 int main(int argc, char *argv[]) {
  AVFormatContext *pFormatCtx;
  int i, videoindex;
  AVCodecContext *pCodecCtx;
  AVCodec *pCodec;
  avformat_network_init();
  pFormatCtx = avformat_alloc_context();
  // Open File
  // char filepath[]="src01_480x272_22.h265";
  // avformat_open_input(&pFormatCtx,filepath,NULL,NULL)
  // Register Device
  avdevice_register_all();
  // Windows
 #ifdef _WIN32
 #if USE_DSHOW
  // Use dshow
  //
  // Need to Install screen-capture-recorder
  // screen-capture-recorder
  // Website: http://sourceforge.net/projects/screencapturer/
  //
  AVInputFormat *ifmt = av_find_input_format("dshow");
  if (avformat_open_input(&pFormatCtx, "video=screen-capture-recorder", ifmt,
                          NULL) != 0) {
    printf("Couldn't open input stream.\n");
    return -1;
  }
 #else
  // Use gdigrab
  AVDictionary *options = NULL;
  // Set some options
  // grabbing frame rate
  // av_dict_set(&options,"framerate","5",0);
  // The distance from the left edge of the screen or desktop
  // av_dict_set(&options,"offset_x","20",0);
  // The distance from the top edge of the screen or desktop
  // av_dict_set(&options,"offset_y","40",0);
  // Video frame size. The default is to capture the full screen
  // av_dict_set(&options,"video_size","640x480",0);
  const AVInputFormat *ifmt = av_find_input_format("gdigrab");
  if (avformat_open_input(&pFormatCtx, "desktop", ifmt, &options) != 0) {
    printf("Couldn't open input stream.\n");
    return -1;
  }
 #endif
 #elif defined linux
  // Linux
  AVDictionary *options = NULL;
  // Set some options
  // grabbing frame rate
  // av_dict_set(&options,"framerate","5",0);
  // Make the grabbed area follow the mouse
  // av_dict_set(&options,"follow_mouse","centered",0);
  // Video frame size. The default is to capture the full screen
  // av_dict_set(&options,"video_size","640x480",0);
  AVInputFormat *ifmt = av_find_input_format("x11grab");
  // Grab at position 10,20
  if (avformat_open_input(&pFormatCtx, ":0.0+10,20", ifmt, &options) != 0) {
    printf("Couldn't open input stream.\n");
    return -1;
  }
 #else
  show_avfoundation_device();
  // Mac
  AVInputFormat *ifmt = av_find_input_format("avfoundation");
  // Avfoundation
  //[video]:[audio]
  if (avformat_open_input(&pFormatCtx, "1", ifmt, NULL) != 0) {
    printf("Couldn't open input stream.\n");
    return -1;
  }
 #endif
  if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
    printf("Couldn't find stream information.\n");
    return -1;
  }
  videoindex = -1;
  for (i = 0; i < pFormatCtx->nb_streams; i++)
    if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
      videoindex = i;
      break;
    }
  if (videoindex == -1) {
    printf("Didn't find a video stream.\n");
    return -1;
  }
  // pCodecCtx = pFormatCtx->streams[videoindex]->codec;
  pCodecCtx = avcodec_alloc_context3(NULL);
  avcodec_parameters_to_context(pCodecCtx,
                                pFormatCtx->streams[videoindex]->codecpar);
  pCodec = const_cast<AVCodec *>(avcodec_find_decoder(pCodecCtx->codec_id));
  // pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
  if (pCodec == NULL) {
    printf("Codec not found.\n");
    return -1;
  }
  if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0) {
    printf("Could not open codec.\n");
    return -1;
  }
  AVFrame *pFrame, *pFrameYUV;
  pFrame = av_frame_alloc();
  pFrameYUV = av_frame_alloc();
  // unsigned char *out_buffer=(unsigned char
  // *)av_malloc(avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx->width,
  // pCodecCtx->height)); avpicture_fill((AVPicture *)pFrameYUV, out_buffer,
  // AV_PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
  // SDL----------------------------
  if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
    printf("Could not initialize SDL - %s\n", SDL_GetError());
    return -1;
  }
  const int pixel_w = 640, pixel_h = 360;
  int screen_w = 640, screen_h = 360;
  // const SDL_VideoInfo *vi = SDL_GetVideoInfo();
  // Half of the Desktop's width and height.
  screen_w = 640;
  screen_h = 360;
  // SDL_Surface *screen;
  // screen = SDL_SetVideoMode(screen_w, screen_h, 0, 0);
  SDL_Window *screen;
  screen = SDL_CreateWindow("RTS Receiver", SDL_WINDOWPOS_UNDEFINED,
                            SDL_WINDOWPOS_UNDEFINED, screen_w, screen_h,
                            SDL_WINDOW_RESIZABLE);
  if (!screen) {
    printf("SDL: could not set video mode - exiting:%s\n", SDL_GetError());
    return -1;
  }
  // SDL_Overlay *bmp;
  // bmp = SDL_CreateYUVOverlay(pCodecCtx->width, pCodecCtx->height,
  //                            SDL_YV12_OVERLAY, screen);
  sdlRenderer = SDL_CreateRenderer(screen, -1, SDL_RENDERER_ACCELERATED);
  Uint32 pixformat = 0;
  pixformat = SDL_PIXELFORMAT_NV12;
  SDL_Texture *sdlTexture = nullptr;
  sdlTexture = SDL_CreateTexture(sdlRenderer, pixformat,
                                 SDL_TEXTUREACCESS_STREAMING, pixel_w, pixel_h);
  SDL_Rect rect;
  rect.x = 0;
  rect.y = 0;
  rect.w = screen_w;
  rect.h = screen_h;
  // SDL End------------------------
  int ret, got_picture;
  AVPacket *packet = (AVPacket *)av_malloc(sizeof(AVPacket));
 #if OUTPUT_YUV420P
  FILE *fp_yuv = fopen("output.yuv", "wb+");
 #endif
  struct SwsContext *img_convert_ctx;
  img_convert_ctx = sws_getContext(
      pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt, pCodecCtx->width,
      pCodecCtx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
  //------------------------------
  SDL_Thread *video_tid = SDL_CreateThread(sfp_refresh_thread, NULL, NULL);
  //
  // SDL_WM_SetCaption("Simplest FFmpeg Grab Desktop", NULL);
  // Event Loop
  SDL_Event event;
  for (;;) {
    // Wait
    SDL_WaitEvent(&event);
    if (event.type == SFM_REFRESH_EVENT) {
      //------------------------------
      if (av_read_frame(pFormatCtx, packet) >= 0) {
        if (packet->stream_index == videoindex) {
          avcodec_send_packet(pCodecCtx, packet);
          got_picture = avcodec_receive_frame(pCodecCtx, pFrame);
          // ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture,
          // packet);
          if (ret < 0) {
            printf("Decode Error.\n");
            return -1;
          }
          if (got_picture) {
            //             SDL_LockYUVOverlay(bmp);
            //             pFrameYUV->data[0] = bmp->pixels[0];
            //             pFrameYUV->data[1] = bmp->pixels[2];
            //             pFrameYUV->data[2] = bmp->pixels[1];
            //             pFrameYUV->linesize[0] = bmp->pitches[0];
            //             pFrameYUV->linesize[1] = bmp->pitches[2];
            //             pFrameYUV->linesize[2] = bmp->pitches[1];
            //             sws_scale(img_convert_ctx,
            //                       (const unsigned char *const *)pFrame->data,
            //                       pFrame->linesize, 0, pCodecCtx->height,
            //                       pFrameYUV->data, pFrameYUV->linesize);
            // #if OUTPUT_YUV420P
            //             int y_size = pCodecCtx->width * pCodecCtx->height;
            //             fwrite(pFrameYUV->data[0], 1, y_size, fp_yuv); // Y
            //             fwrite(pFrameYUV->data[1], 1, y_size / 4, fp_yuv); //
            //             U fwrite(pFrameYUV->data[2], 1, y_size / 4, fp_yuv);
            //             // V
            // #endif
            //             SDL_UnlockYUVOverlay(bmp);
            //             SDL_DisplayYUVOverlay(bmp, &rect);
            // YUV420ToNV12FFmpeg(buffer, pixel_w, pixel_h, dst_buffer);
            // SDL_UpdateTexture(sdlTexture, NULL, dst_buffer, pixel_w);
            // // FIX: If window is resize
            // sdlRect.x = 0;
            // sdlRect.y = 0;
            // sdlRect.w = screen_w;
            // sdlRect.h = screen_h;
            // SDL_RenderClear(sdlRenderer);
            // SDL_RenderCopy(sdlRenderer, sdlTexture, NULL, &sdlRect);
            // SDL_RenderPresent(sdlRenderer);
          }
        }
        // av_free_packet(packet);
      } else {
        // Exit Thread
        thread_exit = 1;
      }
    } else if (event.type == SDL_QUIT) {
      thread_exit = 1;
    } else if (event.type == SFM_BREAK_EVENT) {
      break;
    }
  }
  sws_freeContext(img_convert_ctx);
 #if OUTPUT_YUV420P
  fclose(fp_yuv);
 #endif
  SDL_Quit();
  // av_free(out_buffer);
  av_free(pFrameYUV);
  avcodec_close(pCodecCtx);
  avformat_close_input(&pFormatCtx);
  return 0;
 }
--- a/application/remote_desk/remote_desk_server/main.cpp
+++ b/application/remote_desk/remote_desk_server/main.cpp
@@ -0,0 +1,12 @@
 #include <iostream>
 #include "remote_desk_server.h"
 int main() {
  RemoteDeskServer remote_desk_server;
  remote_desk_server.Init();
  while (1) {
  }
  return 0;
 }
--- a/application/remote_desk/remote_desk_server/remote_desk_server.cpp
+++ b/application/remote_desk/remote_desk_server/remote_desk_server.cpp
@@ -0,0 +1,93 @@
 #include "remote_desk_server.h"
 #include <iostream>
 extern "C" {
 #include <libavformat/avformat.h>
 #include <libavutil/imgutils.h>
 #include <libswscale/swscale.h>
 };
 #define NV12_BUFFER_SIZE 2560 * 1440 * 3 / 2
 RemoteDeskServer ::RemoteDeskServer() {}
 RemoteDeskServer ::~RemoteDeskServer() {
  if (nv12_buffer_) {
    delete nv12_buffer_;
    nv12_buffer_ = nullptr;
  }
 }
 int BGRAToNV12FFmpeg(unsigned char *src_buffer, int width, int height,
                     unsigned char *dst_buffer) {
  AVFrame *Input_pFrame = av_frame_alloc();
  AVFrame *Output_pFrame = av_frame_alloc();
  struct SwsContext *img_convert_ctx = sws_getContext(
      width, height, AV_PIX_FMT_BGRA, width, height, AV_PIX_FMT_NV12,
      SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
  av_image_fill_arrays(Input_pFrame->data, Input_pFrame->linesize, src_buffer,
                       AV_PIX_FMT_BGRA, width, height, 1);
  av_image_fill_arrays(Output_pFrame->data, Output_pFrame->linesize, dst_buffer,
                       AV_PIX_FMT_NV12, width, height, 1);
  sws_scale(img_convert_ctx, (uint8_t const **)Input_pFrame->data,
            Input_pFrame->linesize, 0, height, Output_pFrame->data,
            Output_pFrame->linesize);
  if (Input_pFrame) av_free(Input_pFrame);
  if (Output_pFrame) av_free(Output_pFrame);
  if (img_convert_ctx) sws_freeContext(img_convert_ctx);
  return 0;
 }
 void RemoteDeskServer::HostReceiveBuffer(const char *data, size_t size,
                                         const char *user_id,
                                         size_t user_id_size) {
  std::string msg(data, size);
  std::string user(user_id, user_id_size);
  std::cout << "Receive: [" << user << "] " << msg << std::endl;
 }
 int RemoteDeskServer::Init() {
  Params params;
  params.cfg_path = "../../../../config/config.ini";
  params.on_receive_buffer = [](const char *data, size_t size,
                                const char *user_id, size_t user_id_size) {
    // std::string msg(data, size);
    // std::string user(user_id, user_id_size);
    // std::cout << "Receive: [" << user << "] " << msg << std::endl;
  };
  std::string transmission_id = "000000";
  std::string user_id = "Server";
  peer = CreatePeer(&params);
  CreateConnection(peer, transmission_id.c_str(), user_id.c_str());
  nv12_buffer_ = new char[NV12_BUFFER_SIZE];
  screen_capture = new ScreenCaptureWgc();
  RECORD_DESKTOP_RECT rect;
  rect.left = 0;
  rect.top = 0;
  rect.right = GetSystemMetrics(SM_CXSCREEN);
  rect.bottom = GetSystemMetrics(SM_CYSCREEN);
  screen_capture->Init(
      rect, 60,
      [this](unsigned char *data, int size, int width, int height) -> void {
        // std::cout << "Send" << std::endl;
        BGRAToNV12FFmpeg(data, width, height, (unsigned char *)nv12_buffer_);
        SendData(peer, DATA_TYPE::VIDEO, (const char *)nv12_buffer_,
                 NV12_BUFFER_SIZE);
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
      });
  screen_capture->Start();
  return 0;
 }
--- a/application/remote_desk/remote_desk_server/remote_desk_server.h
+++ b/application/remote_desk/remote_desk_server/remote_desk_server.h
@@ -0,0 +1,25 @@
 #ifndef _REMOTE_DESK_SERVER_H_
 #define _REMOTE_DESK_SERVER_H_
 #include "screen_capture_wgc.h"
 #include "x.h"
 class RemoteDeskServer {
 public:
  RemoteDeskServer();
  ~RemoteDeskServer();
 public:
  int Init();
  static void HostReceiveBuffer(const char* data, size_t size,
                                const char* user_id, size_t user_id_size);
 private:
  PeerPtr* peer = nullptr;
  ScreenCaptureWgc* screen_capture = nullptr;
  char* nv12_buffer_ = nullptr;
 };
 #endif
--- a/application/remote_desk/screen_capture/main.cpp.bak
+++ b/application/remote_desk/screen_capture/main.cpp.bak
@@ -19,9 +19,7 @@ extern "C" {
 int screen_w = 2560, screen_h = 1440;
 const int pixel_w = 2560, pixel_h = 1440;
 unsigned char buffer[pixel_w * pixel_h * 3 / 2];
 unsigned char dst_buffer[pixel_w * pixel_h * 3 / 2];
 unsigned char rgbData[pixel_w * pixel_h * 4];
 SDL_Texture *sdlTexture = nullptr;
 SDL_Renderer *sdlRenderer = nullptr;
 SDL_Rect sdlRect;
--- a/application/remote_desk/screen_capture/screen_capture_wgc.cpp
+++ b/application/remote_desk/screen_capture/screen_capture_wgc.cpp
@@ -55,10 +55,7 @@ int ScreenCaptureWgc::Init(const RECORD_DESKTOP_RECT &rect, const int fps,
  if (_inited == true) return error;
  _fps = fps;
-  _rect = rect;
+
  _start_time = av_gettime_relative();
  _time_base = {1, AV_TIME_BASE};
  _pixel_fmt = AV_PIX_FMT_BGRA;
  _on_data = cb;
  do {
@@ -126,71 +123,9 @@ int ScreenCaptureWgc::Stop() {
 }
 void ScreenCaptureWgc::OnFrame(const WgcSession::wgc_session_frame &frame) {
  // std::cout << "onframe" << std::endl;
  // AVFrame *av_frame = av_frame_alloc();
  // av_frame->pts = av_gettime_relative();
  // av_frame->pkt_dts = av_frame->pts;
  // // av_frame->pkt_pts = av_frame->pts;
  // av_frame->width = frame.width;
  // av_frame->height = frame.height;
  // av_frame->format = AV_PIX_FMT_BGRA;
  // av_frame->pict_type = AV_PICTURE_TYPE_NONE;
  // av_frame->pkt_size = frame.width * frame.height * 4;
  // av_image_fill_arrays(av_frame->data, av_frame->linesize, frame.data,
  //                      AV_PIX_FMT_BGRA, frame.width, frame.height, 1);
  if (_on_data)
    _on_data((unsigned char *)frame.data, frame.width * frame.height * 4,
             frame.width, frame.height);
  // av_frame_free(&av_frame);
  // BGRA to YUV
  // auto swrCtxBGRA2YUV = sws_getContext(
  //     frame.width, frame.height, AV_PIX_FMT_BGRA, frame.width, frame.height,
  //     AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
  // create BGRA
  // AVFrame *frame_bgra = av_frame;
  // AVFrame *frame_bgra = av_frame_alloc();
  // frame_bgra->format = AV_PIX_FMT_BGRA;
  // frame_bgra->width = frame.width;
  // frame_bgra->height = frame.height;
  // if (av_frame_get_buffer(frame_bgra, 32) < 0) {
  //   printf("Failed: av_frame_get_buffer\n");
  //   return;
  // }
  // frame_bgra->data[0] = cropImage;
  // YUV
  // AVFrame *frame_yuv = av_frame_alloc();
  // frame_yuv->width = frame.width;
  // frame_yuv->height = frame.height;
  // frame_yuv->format = AV_PIX_FMT_YUV420P;
  // uint8_t *picture_buf =
  //     (uint8_t *)av_malloc(frame.width * frame.height * 3 / 2);
  // if (av_image_fill_arrays(frame_yuv->data, frame_yuv->linesize, picture_buf,
  //                          AV_PIX_FMT_YUV420P, frame.width, frame.height,
  //                          1) < 0) {
  //   std::cout << "Failed: av_image_fill_arrays" << std::endl;
  //   return;
  // }
  // if (sws_scale(swrCtxBGRA2YUV, frame_bgra->data, frame_bgra->linesize, 0,
  //               frame.height, frame_yuv->data, frame_yuv->linesize) < 0) {
  //   std::cout << "BGRA to YUV failed" << std::endl;
  //   return;
  // }
  // frame_yuv->pts = av_gettime();
  // if (_on_data)
  //   _on_data((unsigned char *)frame_yuv->data,
  //            frame.width * frame.height * 3 / 2, frame.width, frame.height);
 }
 void ScreenCaptureWgc::CleanUp() {
--- a/application/remote_desk/screen_capture/screen_capture_wgc.h
+++ b/application/remote_desk/screen_capture/screen_capture_wgc.h
@@ -3,23 +3,14 @@
 #include <Windows.h>
 #include "wgc_session.h"
 #include "wgc_session_impl.h"
 extern "C" {
 #include <libavcodec\avcodec.h>
 #include <libavfilter\avfilter.h>
 #include <libavformat\avformat.h>
 #include <libavutil\imgutils.h>
 #include <libavutil\time.h>
 #include <libswscale/swscale.h>
 }
 #include <atomic>
 #include <functional>
 #include <string>
 #include <thread>
 #include "wgc_session.h"
 #include "wgc_session_impl.h"
 typedef struct {
  int left;
  int top;
@@ -67,10 +58,6 @@ class ScreenCaptureWgc : public WgcSession::wgc_session_observer {
  cb_desktop_data _on_data;
  cb_desktop_error _on_error;
  AVRational _time_base;
  int64_t _start_time;
  AVPixelFormat _pixel_fmt;
 };
 #endif
--- a/application/remote_desk/screen_capture/wgc_session.h
+++ b/application/remote_desk/screen_capture/wgc_session.h
--- a/application/remote_desk/screen_capture/wgc_session_impl.cpp
+++ b/application/remote_desk/screen_capture/wgc_session_impl.cpp
--- a/application/remote_desk/screen_capture/wgc_session_impl.h
+++ b/application/remote_desk/screen_capture/wgc_session_impl.h
--- a/application/remote_desk/webrtc/wgc_capture_session.cc
+++ b/application/remote_desk/webrtc/wgc_capture_session.cc
@@ -1,372 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "modules/desktop_capture/win/wgc_capture_session.h"
 #include <windows.graphics.capture.interop.h>
 #include <windows.graphics.directX.direct3d11.interop.h>
 #include <wrl.h>
 #include <memory>
 #include <utility>
 #include <vector>
 #include "modules/desktop_capture/win/wgc_desktop_frame.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/time_utils.h"
 #include "rtc_base/win/create_direct3d_device.h"
 #include "rtc_base/win/get_activation_factory.h"
 #include "system_wrappers/include/metrics.h"
 using Microsoft::WRL::ComPtr;
 namespace WGC = ABI::Windows::Graphics::Capture;
 namespace webrtc {
 namespace {
 // We must use a BGRA pixel format that has 4 bytes per pixel, as required by
 // the DesktopFrame interface.
 const auto kPixelFormat = ABI::Windows::Graphics::DirectX::DirectXPixelFormat::
    DirectXPixelFormat_B8G8R8A8UIntNormalized;
 // We only want 1 buffer in our frame pool to reduce latency. If we had more,
 // they would sit in the pool for longer and be stale by the time we are asked
 // for a new frame.
 const int kNumBuffers = 1;
 // These values are persisted to logs. Entries should not be renumbered and
 // numeric values should never be reused.
 enum class StartCaptureResult {
  kSuccess = 0,
  kSourceClosed = 1,
  kAddClosedFailed = 2,
  kDxgiDeviceCastFailed = 3,
  kD3dDelayLoadFailed = 4,
  kD3dDeviceCreationFailed = 5,
  kFramePoolActivationFailed = 6,
  kFramePoolCastFailed = 7,
  kGetItemSizeFailed = 8,
  kCreateFreeThreadedFailed = 9,
  kCreateCaptureSessionFailed = 10,
  kStartCaptureFailed = 11,
  kMaxValue = kStartCaptureFailed
 };
 // These values are persisted to logs. Entries should not be renumbered and
 // numeric values should never be reused.
 enum class GetFrameResult {
  kSuccess = 0,
  kItemClosed = 1,
  kTryGetNextFrameFailed = 2,
  kFrameDropped = 3,
  kGetSurfaceFailed = 4,
  kDxgiInterfaceAccessFailed = 5,
  kTexture2dCastFailed = 6,
  kCreateMappedTextureFailed = 7,
  kMapFrameFailed = 8,
  kGetContentSizeFailed = 9,
  kResizeMappedTextureFailed = 10,
  kRecreateFramePoolFailed = 11,
  kMaxValue = kRecreateFramePoolFailed
 };
 void RecordStartCaptureResult(StartCaptureResult error) {
  RTC_HISTOGRAM_ENUMERATION(
      "WebRTC.DesktopCapture.Win.WgcCaptureSessionStartResult",
      static_cast<int>(error), static_cast<int>(StartCaptureResult::kMaxValue));
 }
 void RecordGetFrameResult(GetFrameResult error) {
  RTC_HISTOGRAM_ENUMERATION(
      "WebRTC.DesktopCapture.Win.WgcCaptureSessionGetFrameResult",
      static_cast<int>(error), static_cast<int>(GetFrameResult::kMaxValue));
 }
 }  // namespace
 WgcCaptureSession::WgcCaptureSession(ComPtr<ID3D11Device> d3d11_device,
                                     ComPtr<WGC::IGraphicsCaptureItem> item)
    : d3d11_device_(std::move(d3d11_device)), item_(std::move(item)) {}
 WgcCaptureSession::~WgcCaptureSession() = default;
 HRESULT WgcCaptureSession::StartCapture() {
  RTC_DCHECK_RUN_ON(&sequence_checker_);
  RTC_DCHECK(!is_capture_started_);
  if (item_closed_) {
    RTC_LOG(LS_ERROR) << "The target source has been closed.";
    RecordStartCaptureResult(StartCaptureResult::kSourceClosed);
    return E_ABORT;
  }
  RTC_DCHECK(d3d11_device_);
  RTC_DCHECK(item_);
  // Listen for the Closed event, to detect if the source we are capturing is
  // closed (e.g. application window is closed or monitor is disconnected). If
  // it is, we should abort the capture.
  auto closed_handler =
      Microsoft::WRL::Callback<ABI::Windows::Foundation::ITypedEventHandler<
          WGC::GraphicsCaptureItem*, IInspectable*>>(
          this, &WgcCaptureSession::OnItemClosed);
  EventRegistrationToken item_closed_token;
  HRESULT hr = item_->add_Closed(closed_handler.Get(), &item_closed_token);
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kAddClosedFailed);
    return hr;
  }
  ComPtr<IDXGIDevice> dxgi_device;
  hr = d3d11_device_->QueryInterface(IID_PPV_ARGS(&dxgi_device));
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kDxgiDeviceCastFailed);
    return hr;
  }
  if (!ResolveCoreWinRTDirect3DDelayload()) {
    RecordStartCaptureResult(StartCaptureResult::kD3dDelayLoadFailed);
    return E_FAIL;
  }
  hr = CreateDirect3DDeviceFromDXGIDevice(dxgi_device.Get(), &direct3d_device_);
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kD3dDeviceCreationFailed);
    return hr;
  }
  ComPtr<WGC::IDirect3D11CaptureFramePoolStatics> frame_pool_statics;
  hr = GetActivationFactory<
      ABI::Windows::Graphics::Capture::IDirect3D11CaptureFramePoolStatics,
      RuntimeClass_Windows_Graphics_Capture_Direct3D11CaptureFramePool>(
      &frame_pool_statics);
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kFramePoolActivationFailed);
    return hr;
  }
  // Cast to FramePoolStatics2 so we can use CreateFreeThreaded and avoid the
  // need to have a DispatcherQueue. We don't listen for the FrameArrived event,
  // so there's no difference.
  ComPtr<WGC::IDirect3D11CaptureFramePoolStatics2> frame_pool_statics2;
  hr = frame_pool_statics->QueryInterface(IID_PPV_ARGS(&frame_pool_statics2));
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kFramePoolCastFailed);
    return hr;
  }
  ABI::Windows::Graphics::SizeInt32 item_size;
  hr = item_.Get()->get_Size(&item_size);
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kGetItemSizeFailed);
    return hr;
  }
  previous_size_ = item_size;
  hr = frame_pool_statics2->CreateFreeThreaded(direct3d_device_.Get(),
                                               kPixelFormat, kNumBuffers,
                                               item_size, &frame_pool_);
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kCreateFreeThreadedFailed);
    return hr;
  }
  hr = frame_pool_->CreateCaptureSession(item_.Get(), &session_);
  if (FAILED(hr)) {
    RecordStartCaptureResult(StartCaptureResult::kCreateCaptureSessionFailed);
    return hr;
  }
  hr = session_->StartCapture();
  if (FAILED(hr)) {
    RTC_LOG(LS_ERROR) << "Failed to start CaptureSession: " << hr;
    RecordStartCaptureResult(StartCaptureResult::kStartCaptureFailed);
    return hr;
  }
  RecordStartCaptureResult(StartCaptureResult::kSuccess);
  is_capture_started_ = true;
  return hr;
 }
 HRESULT WgcCaptureSession::GetFrame(
    std::unique_ptr<DesktopFrame>* output_frame) {
  RTC_DCHECK_RUN_ON(&sequence_checker_);
  if (item_closed_) {
    RTC_LOG(LS_ERROR) << "The target source has been closed.";
    RecordGetFrameResult(GetFrameResult::kItemClosed);
    return E_ABORT;
  }
  RTC_DCHECK(is_capture_started_);
  ComPtr<WGC::IDirect3D11CaptureFrame> capture_frame;
  HRESULT hr = frame_pool_->TryGetNextFrame(&capture_frame);
  if (FAILED(hr)) {
    RTC_LOG(LS_ERROR) << "TryGetNextFrame failed: " << hr;
    RecordGetFrameResult(GetFrameResult::kTryGetNextFrameFailed);
    return hr;
  }
  if (!capture_frame) {
    RecordGetFrameResult(GetFrameResult::kFrameDropped);
    return hr;
  }
  // We need to get this CaptureFrame as an ID3D11Texture2D so that we can get
  // the raw image data in the format required by the DesktopFrame interface.
  ComPtr<ABI::Windows::Graphics::DirectX::Direct3D11::IDirect3DSurface>
      d3d_surface;
  hr = capture_frame->get_Surface(&d3d_surface);
  if (FAILED(hr)) {
    RecordGetFrameResult(GetFrameResult::kGetSurfaceFailed);
    return hr;
  }
  ComPtr<Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>
      direct3DDxgiInterfaceAccess;
  hr = d3d_surface->QueryInterface(IID_PPV_ARGS(&direct3DDxgiInterfaceAccess));
  if (FAILED(hr)) {
    RecordGetFrameResult(GetFrameResult::kDxgiInterfaceAccessFailed);
    return hr;
  }
  ComPtr<ID3D11Texture2D> texture_2D;
  hr = direct3DDxgiInterfaceAccess->GetInterface(IID_PPV_ARGS(&texture_2D));
  if (FAILED(hr)) {
    RecordGetFrameResult(GetFrameResult::kTexture2dCastFailed);
    return hr;
  }
  if (!mapped_texture_) {
    hr = CreateMappedTexture(texture_2D);
    if (FAILED(hr)) {
      RecordGetFrameResult(GetFrameResult::kCreateMappedTextureFailed);
      return hr;
    }
  }
  // We need to copy |texture_2D| into |mapped_texture_| as the latter has the
  // D3D11_CPU_ACCESS_READ flag set, which lets us access the image data.
  // Otherwise it would only be readable by the GPU.
  ComPtr<ID3D11DeviceContext> d3d_context;
  d3d11_device_->GetImmediateContext(&d3d_context);
  d3d_context->CopyResource(mapped_texture_.Get(), texture_2D.Get());
  D3D11_MAPPED_SUBRESOURCE map_info;
  hr = d3d_context->Map(mapped_texture_.Get(), /*subresource_index=*/0,
                        D3D11_MAP_READ, /*D3D11_MAP_FLAG_DO_NOT_WAIT=*/0,
                        &map_info);
  if (FAILED(hr)) {
    RecordGetFrameResult(GetFrameResult::kMapFrameFailed);
    return hr;
  }
  ABI::Windows::Graphics::SizeInt32 new_size;
  hr = capture_frame->get_ContentSize(&new_size);
  if (FAILED(hr)) {
    RecordGetFrameResult(GetFrameResult::kGetContentSizeFailed);
    return hr;
  }
  // If the size has changed since the last capture, we must be sure to use
  // the smaller dimensions. Otherwise we might overrun our buffer, or
  // read stale data from the last frame.
  int image_height = std::min(previous_size_.Height, new_size.Height);
  int image_width = std::min(previous_size_.Width, new_size.Width);
  int row_data_length = image_width * DesktopFrame::kBytesPerPixel;
  // Make a copy of the data pointed to by |map_info.pData| so we are free to
  // unmap our texture.
  uint8_t* src_data = static_cast<uint8_t*>(map_info.pData);
  std::vector<uint8_t> image_data;
  image_data.reserve(image_height * row_data_length);
  uint8_t* image_data_ptr = image_data.data();
  for (int i = 0; i < image_height; i++) {
    memcpy(image_data_ptr, src_data, row_data_length);
    image_data_ptr += row_data_length;
    src_data += map_info.RowPitch;
  }
  // Transfer ownership of |image_data| to the output_frame.
  DesktopSize size(image_width, image_height);
  *output_frame = std::make_unique<WgcDesktopFrame>(size, row_data_length,
                                                    std::move(image_data));
  d3d_context->Unmap(mapped_texture_.Get(), 0);
  // If the size changed, we must resize the texture and frame pool to fit the
  // new size.
  if (previous_size_.Height != new_size.Height ||
      previous_size_.Width != new_size.Width) {
    hr = CreateMappedTexture(texture_2D, new_size.Width, new_size.Height);
    if (FAILED(hr)) {
      RecordGetFrameResult(GetFrameResult::kResizeMappedTextureFailed);
      return hr;
    }
    hr = frame_pool_->Recreate(direct3d_device_.Get(), kPixelFormat,
                               kNumBuffers, new_size);
    if (FAILED(hr)) {
      RecordGetFrameResult(GetFrameResult::kRecreateFramePoolFailed);
      return hr;
    }
  }
  RecordGetFrameResult(GetFrameResult::kSuccess);
  previous_size_ = new_size;
  return hr;
 }
 HRESULT WgcCaptureSession::CreateMappedTexture(
    ComPtr<ID3D11Texture2D> src_texture,
    UINT width,
    UINT height) {
  RTC_DCHECK_RUN_ON(&sequence_checker_);
  D3D11_TEXTURE2D_DESC src_desc;
  src_texture->GetDesc(&src_desc);
  D3D11_TEXTURE2D_DESC map_desc;
  map_desc.Width = width == 0 ? src_desc.Width : width;
  map_desc.Height = height == 0 ? src_desc.Height : height;
  map_desc.MipLevels = src_desc.MipLevels;
  map_desc.ArraySize = src_desc.ArraySize;
  map_desc.Format = src_desc.Format;
  map_desc.SampleDesc = src_desc.SampleDesc;
  map_desc.Usage = D3D11_USAGE_STAGING;
  map_desc.BindFlags = 0;
  map_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
  map_desc.MiscFlags = 0;
  return d3d11_device_->CreateTexture2D(&map_desc, nullptr, &mapped_texture_);
 }
 HRESULT WgcCaptureSession::OnItemClosed(WGC::IGraphicsCaptureItem* sender,
                                        IInspectable* event_args) {
  RTC_DCHECK_RUN_ON(&sequence_checker_);
  RTC_LOG(LS_INFO) << "Capture target has been closed.";
  item_closed_ = true;
  is_capture_started_ = false;
  mapped_texture_ = nullptr;
  session_ = nullptr;
  frame_pool_ = nullptr;
  direct3d_device_ = nullptr;
  item_ = nullptr;
  d3d11_device_ = nullptr;
  return S_OK;
 }
 }  // namespace webrtc
--- a/application/remote_desk/webrtc/wgc_capture_session.h
+++ b/application/remote_desk/webrtc/wgc_capture_session.h
@@ -1,110 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #ifndef MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURE_SESSION_H_
 #define MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURE_SESSION_H_
 #include <d3d11.h>
 #include <windows.graphics.capture.h>
 #include <wrl/client.h>
 #include <memory>
 #include "api/sequence_checker.h"
 #include "modules/desktop_capture/desktop_capture_options.h"
 #include "modules/desktop_capture/win/wgc_capture_source.h"
 namespace webrtc {
 class WgcCaptureSession final {
 public:
  WgcCaptureSession(
      Microsoft::WRL::ComPtr<ID3D11Device> d3d11_device,
      Microsoft::WRL::ComPtr<
          ABI::Windows::Graphics::Capture::IGraphicsCaptureItem> item);
  // Disallow copy and assign.
  WgcCaptureSession(const WgcCaptureSession&) = delete;
  WgcCaptureSession& operator=(const WgcCaptureSession&) = delete;
  ~WgcCaptureSession();
  HRESULT StartCapture();
  // Returns a frame from the frame pool, if any are present.
  HRESULT GetFrame(std::unique_ptr<DesktopFrame>* output_frame);
  bool IsCaptureStarted() const {
    RTC_DCHECK_RUN_ON(&sequence_checker_);
    return is_capture_started_;
  }
 private:
  // Initializes |mapped_texture_| with the properties of the |src_texture|,
  // overrides the values of some necessary properties like the
  // D3D11_CPU_ACCESS_READ flag. Also has optional parameters for what size
  // |mapped_texture_| should be, if they aren't provided we will use the size
  // of |src_texture|.
  HRESULT CreateMappedTexture(
      Microsoft::WRL::ComPtr<ID3D11Texture2D> src_texture,
      UINT width = 0,
      UINT height = 0);
  // Event handler for |item_|'s Closed event.
  HRESULT OnItemClosed(
      ABI::Windows::Graphics::Capture::IGraphicsCaptureItem* sender,
      IInspectable* event_args);
  // A Direct3D11 Device provided by the caller. We use this to create an
  // IDirect3DDevice, and also to create textures that will hold the image data.
  Microsoft::WRL::ComPtr<ID3D11Device> d3d11_device_;
  // This item represents what we are capturing, we use it to create the
  // capture session, and also to listen for the Closed event.
  Microsoft::WRL::ComPtr<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>
      item_;
  // The IDirect3DDevice is necessary to instantiate the frame pool.
  Microsoft::WRL::ComPtr<
      ABI::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice>
      direct3d_device_;
  // The frame pool is where frames are deposited during capture, we retrieve
  // them from here with TryGetNextFrame().
  Microsoft::WRL::ComPtr<
      ABI::Windows::Graphics::Capture::IDirect3D11CaptureFramePool>
      frame_pool_;
  // This texture holds the final image data. We made it a member so we can
  // reuse it, instead of having to create a new texture every time we grab a
  // frame.
  Microsoft::WRL::ComPtr<ID3D11Texture2D> mapped_texture_;
  // This lets us know when the source has been resized, which is important
  // because we must resize the framepool and our texture to be able to hold
  // enough data for the frame.
  ABI::Windows::Graphics::SizeInt32 previous_size_;
  // The capture session lets us set properties about the capture before it
  // starts such as whether to capture the mouse cursor, and it lets us tell WGC
  // to start capturing frames.
  Microsoft::WRL::ComPtr<
      ABI::Windows::Graphics::Capture::IGraphicsCaptureSession>
      session_;
  bool item_closed_ = false;
  bool is_capture_started_ = false;
  SequenceChecker sequence_checker_;
 };
 }  // namespace webrtc
 #endif  // MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURE_SESSION_H_
--- a/application/remote_desk/webrtc/wgc_capture_source.cc
+++ b/application/remote_desk/webrtc/wgc_capture_source.cc
@@ -1,136 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "modules/desktop_capture/win/wgc_capture_source.h"
 #include <windows.graphics.capture.interop.h>
 #include <windows.h>
 #include <utility>
 #include "modules/desktop_capture/win/screen_capture_utils.h"
 #include "modules/desktop_capture/win/window_capture_utils.h"
 #include "rtc_base/win/get_activation_factory.h"
 using Microsoft::WRL::ComPtr;
 namespace WGC = ABI::Windows::Graphics::Capture;
 namespace webrtc {
 WgcCaptureSource::WgcCaptureSource(DesktopCapturer::SourceId source_id)
    : source_id_(source_id) {}
 WgcCaptureSource::~WgcCaptureSource() = default;
 bool WgcCaptureSource::IsCapturable() {
  // If we can create a capture item, then we can capture it. Unfortunately,
  // we can't cache this item because it may be created in a different COM
  // apartment than where capture will eventually start from.
  ComPtr<WGC::IGraphicsCaptureItem> item;
  return SUCCEEDED(CreateCaptureItem(&item));
 }
 bool WgcCaptureSource::FocusOnSource() {
  return false;
 }
 HRESULT WgcCaptureSource::GetCaptureItem(
    ComPtr<WGC::IGraphicsCaptureItem>* result) {
  HRESULT hr = S_OK;
  if (!item_)
    hr = CreateCaptureItem(&item_);
  *result = item_;
  return hr;
 }
 WgcCaptureSourceFactory::~WgcCaptureSourceFactory() = default;
 WgcWindowSourceFactory::WgcWindowSourceFactory() = default;
 WgcWindowSourceFactory::~WgcWindowSourceFactory() = default;
 std::unique_ptr<WgcCaptureSource> WgcWindowSourceFactory::CreateCaptureSource(
    DesktopCapturer::SourceId source_id) {
  return std::make_unique<WgcWindowSource>(source_id);
 }
 WgcScreenSourceFactory::WgcScreenSourceFactory() = default;
 WgcScreenSourceFactory::~WgcScreenSourceFactory() = default;
 std::unique_ptr<WgcCaptureSource> WgcScreenSourceFactory::CreateCaptureSource(
    DesktopCapturer::SourceId source_id) {
  return std::make_unique<WgcScreenSource>(source_id);
 }
 WgcWindowSource::WgcWindowSource(DesktopCapturer::SourceId source_id)
    : WgcCaptureSource(source_id) {}
 WgcWindowSource::~WgcWindowSource() = default;
 DesktopVector WgcWindowSource::GetTopLeft() {
  DesktopRect window_rect;
  if (!GetWindowRect(reinterpret_cast<HWND>(GetSourceId()), &window_rect))
    return DesktopVector();
  return window_rect.top_left();
 }
 bool WgcWindowSource::IsCapturable() {
  if (!IsWindowValidAndVisible(reinterpret_cast<HWND>(GetSourceId())))
    return false;
  return WgcCaptureSource::IsCapturable();
 }
 bool WgcWindowSource::FocusOnSource() {
  if (!IsWindowValidAndVisible(reinterpret_cast<HWND>(GetSourceId())))
    return false;
  return ::BringWindowToTop(reinterpret_cast<HWND>(GetSourceId())) &&
         ::SetForegroundWindow(reinterpret_cast<HWND>(GetSourceId()));
 }
 HRESULT WgcWindowSource::CreateCaptureItem(
    ComPtr<WGC::IGraphicsCaptureItem>* result) {
  if (!ResolveCoreWinRTDelayload())
    return E_FAIL;
  ComPtr<IGraphicsCaptureItemInterop> interop;
  HRESULT hr = GetActivationFactory<
      IGraphicsCaptureItemInterop,
      RuntimeClass_Windows_Graphics_Capture_GraphicsCaptureItem>(&interop);
  if (FAILED(hr))
    return hr;
  ComPtr<WGC::IGraphicsCaptureItem> item;
  hr = interop->CreateForWindow(reinterpret_cast<HWND>(GetSourceId()),
                                IID_PPV_ARGS(&item));
  if (FAILED(hr))
    return hr;
  if (!item)
    return E_HANDLE;
  *result = std::move(item);
  return hr;
 }
 WgcScreenSource::WgcScreenSource(DesktopCapturer::SourceId source_id)
    : WgcCaptureSource(source_id) {
  // Getting the HMONITOR could fail if the source_id is invalid. In that case,
  // we leave hmonitor_ uninitialized and |IsCapturable()| will fail.
  HMONITOR hmon;
  if (GetHmonitorFromDeviceIndex(GetSourceId(), &hmon))
    hmonitor_ = hmon;
 }
 WgcScreenSource::~WgcScreenSource() = default;
 DesktopVector WgcScreenSource::GetTopLeft() {
  if (!hmonitor_)
    return DesktopVector();
  return GetMonitorRect(*hmonitor_).top_left();
 }
 bool WgcScreenSource::IsCapturable() {
  if (!hmonitor_)
    return false;
  if (!IsMonitorValid(*hmonitor_))
    return false;
  return WgcCaptureSource::IsCapturable();
 }
 HRESULT WgcScreenSource::CreateCaptureItem(
    ComPtr<WGC::IGraphicsCaptureItem>* result) {
  if (!hmonitor_)
    return E_ABORT;
  if (!ResolveCoreWinRTDelayload())
    return E_FAIL;
  ComPtr<IGraphicsCaptureItemInterop> interop;
  HRESULT hr = GetActivationFactory<
      IGraphicsCaptureItemInterop,
      RuntimeClass_Windows_Graphics_Capture_GraphicsCaptureItem>(&interop);
  if (FAILED(hr))
    return hr;
  ComPtr<WGC::IGraphicsCaptureItem> item;
  hr = interop->CreateForMonitor(*hmonitor_, IID_PPV_ARGS(&item));
  if (FAILED(hr))
    return hr;
  if (!item)
    return E_HANDLE;
  *result = std::move(item);
  return hr;
 }
 }  // namespace webrtc
--- a/application/remote_desk/webrtc/wgc_capture_source.h
+++ b/application/remote_desk/webrtc/wgc_capture_source.h
@@ -1,105 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #ifndef MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURE_SOURCE_H_
 #define MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURE_SOURCE_H_
 #include <windows.graphics.capture.h>
 #include <wrl/client.h>
 #include <memory>
 #include "absl/types/optional.h"
 #include "modules/desktop_capture/desktop_capturer.h"
 #include "modules/desktop_capture/desktop_geometry.h"
 namespace webrtc {
 // Abstract class to represent the source that WGC-based capturers capture
 // from. Could represent an application window or a screen. Consumers should use
 // the appropriate Wgc*SourceFactory class to create WgcCaptureSource objects
 // of the appropriate type.
 class WgcCaptureSource {
 public:
  explicit WgcCaptureSource(DesktopCapturer::SourceId source_id);
  virtual ~WgcCaptureSource();
  virtual DesktopVector GetTopLeft() = 0;
  virtual bool IsCapturable();
  virtual bool FocusOnSource();
  HRESULT GetCaptureItem(
      Microsoft::WRL::ComPtr<
          ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>* result);
  DesktopCapturer::SourceId GetSourceId() { return source_id_; }
 protected:
  virtual HRESULT CreateCaptureItem(
      Microsoft::WRL::ComPtr<
          ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>* result) = 0;
 private:
  Microsoft::WRL::ComPtr<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>
      item_;
  const DesktopCapturer::SourceId source_id_;
 };
 class WgcCaptureSourceFactory {
 public:
  virtual ~WgcCaptureSourceFactory();
  virtual std::unique_ptr<WgcCaptureSource> CreateCaptureSource(
      DesktopCapturer::SourceId) = 0;
 };
 class WgcWindowSourceFactory final : public WgcCaptureSourceFactory {
 public:
  WgcWindowSourceFactory();
  // Disallow copy and assign.
  WgcWindowSourceFactory(const WgcWindowSourceFactory&) = delete;
  WgcWindowSourceFactory& operator=(const WgcWindowSourceFactory&) = delete;
  ~WgcWindowSourceFactory() override;
  std::unique_ptr<WgcCaptureSource> CreateCaptureSource(
      DesktopCapturer::SourceId) override;
 };
 class WgcScreenSourceFactory final : public WgcCaptureSourceFactory {
 public:
  WgcScreenSourceFactory();
  WgcScreenSourceFactory(const WgcScreenSourceFactory&) = delete;
  WgcScreenSourceFactory& operator=(const WgcScreenSourceFactory&) = delete;
  ~WgcScreenSourceFactory() override;
  std::unique_ptr<WgcCaptureSource> CreateCaptureSource(
      DesktopCapturer::SourceId) override;
 };
 // Class for capturing application windows.
 class WgcWindowSource final : public WgcCaptureSource {
 public:
  explicit WgcWindowSource(DesktopCapturer::SourceId source_id);
  WgcWindowSource(const WgcWindowSource&) = delete;
  WgcWindowSource& operator=(const WgcWindowSource&) = delete;
  ~WgcWindowSource() override;
  DesktopVector GetTopLeft() override;
  bool IsCapturable() override;
  bool FocusOnSource() override;
 private:
  HRESULT CreateCaptureItem(
      Microsoft::WRL::ComPtr<
          ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>* result)
      override;
 };
 // Class for capturing screens/monitors/displays.
 class WgcScreenSource final : public WgcCaptureSource {
 public:
  explicit WgcScreenSource(DesktopCapturer::SourceId source_id);
  WgcScreenSource(const WgcScreenSource&) = delete;
  WgcScreenSource& operator=(const WgcScreenSource&) = delete;
  ~WgcScreenSource() override;
  DesktopVector GetTopLeft() override;
  bool IsCapturable() override;
 private:
  HRESULT CreateCaptureItem(
      Microsoft::WRL::ComPtr<
          ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>* result)
      override;
  // To maintain compatibility with other capturers, this class accepts a
  // device index as it's SourceId. However, WGC requires we use an HMONITOR to
  // describe which screen to capture. So, we internally convert the supplied
  // device index into an HMONITOR when |IsCapturable()| is called.
  absl::optional<HMONITOR> hmonitor_;
 };
 }  // namespace webrtc
 #endif  // MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURE_SOURCE_H_
--- a/application/remote_desk/webrtc/wgc_capture_source_unittest.cc
+++ b/application/remote_desk/webrtc/wgc_capture_source_unittest.cc
@@ -1,113 +0,0 @@
 /*
 *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "modules/desktop_capture/win/wgc_capture_source.h"
 #include <windows.graphics.capture.h>
 #include <wrl/client.h>
 #include <utility>
 #include "modules/desktop_capture/desktop_capture_types.h"
 #include "modules/desktop_capture/desktop_geometry.h"
 #include "modules/desktop_capture/win/screen_capture_utils.h"
 #include "modules/desktop_capture/win/test_support/test_window.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/win/scoped_com_initializer.h"
 #include "rtc_base/win/windows_version.h"
 #include "test/gtest.h"
 namespace webrtc {
 namespace {
 const WCHAR kWindowTitle[] = L"WGC Capture Source Test Window";
 const int kFirstXCoord = 25;
 const int kFirstYCoord = 50;
 const int kSecondXCoord = 50;
 const int kSecondYCoord = 75;
 enum SourceType { kWindowSource = 0, kScreenSource = 1 };
 }  // namespace
 class WgcCaptureSourceTest : public ::testing::TestWithParam<SourceType> {
 public:
  void SetUp() override {
    if (rtc::rtc_win::GetVersion() < rtc::rtc_win::Version::VERSION_WIN10_RS5) {
      RTC_LOG(LS_INFO)
          << "Skipping WgcCaptureSourceTests on Windows versions < RS5.";
      GTEST_SKIP();
    }
    com_initializer_ =
        std::make_unique<ScopedCOMInitializer>(ScopedCOMInitializer::kMTA);
    ASSERT_TRUE(com_initializer_->Succeeded());
  }
  void TearDown() override {
    if (window_open_) {
      DestroyTestWindow(window_info_);
    }
  }
  void SetUpForWindowSource() {
    window_info_ = CreateTestWindow(kWindowTitle);
    window_open_ = true;
    source_id_ = reinterpret_cast<DesktopCapturer::SourceId>(window_info_.hwnd);
    source_factory_ = std::make_unique<WgcWindowSourceFactory>();
  }
  void SetUpForScreenSource() {
    source_id_ = kFullDesktopScreenId;
    source_factory_ = std::make_unique<WgcScreenSourceFactory>();
  }
 protected:
  std::unique_ptr<ScopedCOMInitializer> com_initializer_;
  std::unique_ptr<WgcCaptureSourceFactory> source_factory_;
  std::unique_ptr<WgcCaptureSource> source_;
  DesktopCapturer::SourceId source_id_;
  WindowInfo window_info_;
  bool window_open_ = false;
 };
 // Window specific test
 TEST_F(WgcCaptureSourceTest, WindowPosition) {
  SetUpForWindowSource();
  source_ = source_factory_->CreateCaptureSource(source_id_);
  ASSERT_TRUE(source_);
  EXPECT_EQ(source_->GetSourceId(), source_id_);
  MoveTestWindow(window_info_.hwnd, kFirstXCoord, kFirstYCoord);
  DesktopVector source_vector = source_->GetTopLeft();
  EXPECT_EQ(source_vector.x(), kFirstXCoord);
  EXPECT_EQ(source_vector.y(), kFirstYCoord);
  MoveTestWindow(window_info_.hwnd, kSecondXCoord, kSecondYCoord);
  source_vector = source_->GetTopLeft();
  EXPECT_EQ(source_vector.x(), kSecondXCoord);
  EXPECT_EQ(source_vector.y(), kSecondYCoord);
 }
 // Screen specific test
 TEST_F(WgcCaptureSourceTest, ScreenPosition) {
  SetUpForScreenSource();
  source_ = source_factory_->CreateCaptureSource(source_id_);
  ASSERT_TRUE(source_);
  EXPECT_EQ(source_id_, source_->GetSourceId());
  DesktopRect screen_rect = GetFullscreenRect();
  DesktopVector source_vector = source_->GetTopLeft();
  EXPECT_EQ(source_vector.x(), screen_rect.left());
  EXPECT_EQ(source_vector.y(), screen_rect.top());
 }
 // Source agnostic test
 TEST_P(WgcCaptureSourceTest, CreateSource) {
  if (GetParam() == SourceType::kWindowSource) {
    SetUpForWindowSource();
  } else {
    SetUpForScreenSource();
  }
  source_ = source_factory_->CreateCaptureSource(source_id_);
  ASSERT_TRUE(source_);
  EXPECT_EQ(source_id_, source_->GetSourceId());
  EXPECT_TRUE(source_->IsCapturable());
  Microsoft::WRL::ComPtr<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>
      item;
  EXPECT_TRUE(SUCCEEDED(source_->GetCaptureItem(&item)));
  EXPECT_TRUE(item);
 }
 INSTANTIATE_TEST_SUITE_P(SourceAgnostic,
                         WgcCaptureSourceTest,
                         ::testing::Values(SourceType::kWindowSource,
                                           SourceType::kScreenSource));
 }  // namespace webrtc
--- a/application/remote_desk/webrtc/wgc_capturer_win.cc
+++ b/application/remote_desk/webrtc/wgc_capturer_win.cc
@@ -1,218 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "modules/desktop_capture/win/wgc_capturer_win.h"
 #include <utility>
 #include "modules/desktop_capture/desktop_capture_metrics_helper.h"
 #include "modules/desktop_capture/desktop_capture_types.h"
 #include "modules/desktop_capture/win/wgc_desktop_frame.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/time_utils.h"
 #include "system_wrappers/include/metrics.h"
 namespace WGC = ABI::Windows::Graphics::Capture;
 using Microsoft::WRL::ComPtr;
 namespace webrtc {
 namespace {
 enum class WgcCapturerResult {
  kSuccess = 0,
  kNoDirect3dDevice = 1,
  kNoSourceSelected = 2,
  kItemCreationFailure = 3,
  kSessionStartFailure = 4,
  kGetFrameFailure = 5,
  kFrameDropped = 6,
  kMaxValue = kFrameDropped
 };
 void RecordWgcCapturerResult(WgcCapturerResult error) {
  RTC_HISTOGRAM_ENUMERATION("WebRTC.DesktopCapture.Win.WgcCapturerResult",
                            static_cast<int>(error),
                            static_cast<int>(WgcCapturerResult::kMaxValue));
 }
 }  // namespace
 WgcCapturerWin::WgcCapturerWin(
    std::unique_ptr<WgcCaptureSourceFactory> source_factory,
    std::unique_ptr<SourceEnumerator> source_enumerator)
    : source_factory_(std::move(source_factory)),
      source_enumerator_(std::move(source_enumerator)) {}
 WgcCapturerWin::~WgcCapturerWin() = default;
 // static
 std::unique_ptr<DesktopCapturer> WgcCapturerWin::CreateRawWindowCapturer(
    const DesktopCaptureOptions& options) {
  return std::make_unique<WgcCapturerWin>(
      std::make_unique<WgcWindowSourceFactory>(),
      std::make_unique<WindowEnumerator>(
          options.enumerate_current_process_windows()));
 }
 // static
 std::unique_ptr<DesktopCapturer> WgcCapturerWin::CreateRawScreenCapturer(
    const DesktopCaptureOptions& options) {
  return std::make_unique<WgcCapturerWin>(
      std::make_unique<WgcScreenSourceFactory>(),
      std::make_unique<ScreenEnumerator>());
 }
 bool WgcCapturerWin::GetSourceList(SourceList* sources) {
  return source_enumerator_->FindAllSources(sources);
 }
 bool WgcCapturerWin::SelectSource(DesktopCapturer::SourceId id) {
  capture_source_ = source_factory_->CreateCaptureSource(id);
  return capture_source_->IsCapturable();
 }
 bool WgcCapturerWin::FocusOnSelectedSource() {
  if (!capture_source_)
    return false;
  return capture_source_->FocusOnSource();
 }
 void WgcCapturerWin::Start(Callback* callback) {
  RTC_DCHECK(!callback_);
  RTC_DCHECK(callback);
  RecordCapturerImpl(DesktopCapturerId::kWgcCapturerWin);
  callback_ = callback;
  // Create a Direct3D11 device to share amongst the WgcCaptureSessions. Many
  // parameters are nullptr as the implemention uses defaults that work well for
  // us.
  HRESULT hr = D3D11CreateDevice(
      /*adapter=*/nullptr, D3D_DRIVER_TYPE_HARDWARE,
      /*software_rasterizer=*/nullptr, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
      /*feature_levels=*/nullptr, /*feature_levels_size=*/0, D3D11_SDK_VERSION,
      &d3d11_device_, /*feature_level=*/nullptr, /*device_context=*/nullptr);
  if (hr == DXGI_ERROR_UNSUPPORTED) {
    // If a hardware device could not be created, use WARP which is a high speed
    // software device.
    hr = D3D11CreateDevice(
        /*adapter=*/nullptr, D3D_DRIVER_TYPE_WARP,
        /*software_rasterizer=*/nullptr, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
        /*feature_levels=*/nullptr, /*feature_levels_size=*/0,
        D3D11_SDK_VERSION, &d3d11_device_, /*feature_level=*/nullptr,
        /*device_context=*/nullptr);
  }
  if (FAILED(hr)) {
    RTC_LOG(LS_ERROR) << "Failed to create D3D11Device: " << hr;
  }
 }
 void WgcCapturerWin::CaptureFrame() {
  RTC_DCHECK(callback_);
  if (!capture_source_) {
    RTC_LOG(LS_ERROR) << "Source hasn't been selected";
    callback_->OnCaptureResult(DesktopCapturer::Result::ERROR_PERMANENT,
                               /*frame=*/nullptr);
    RecordWgcCapturerResult(WgcCapturerResult::kNoSourceSelected);
    return;
  }
  if (!d3d11_device_) {
    RTC_LOG(LS_ERROR) << "No D3D11D3evice, cannot capture.";
    callback_->OnCaptureResult(DesktopCapturer::Result::ERROR_PERMANENT,
                               /*frame=*/nullptr);
    RecordWgcCapturerResult(WgcCapturerResult::kNoDirect3dDevice);
    return;
  }
  int64_t capture_start_time_nanos = rtc::TimeNanos();
  HRESULT hr;
  WgcCaptureSession* capture_session = nullptr;
  std::map<SourceId, WgcCaptureSession>::iterator session_iter =
      ongoing_captures_.find(capture_source_->GetSourceId());
  if (session_iter == ongoing_captures_.end()) {
    ComPtr<WGC::IGraphicsCaptureItem> item;
    hr = capture_source_->GetCaptureItem(&item);
    if (FAILED(hr)) {
      RTC_LOG(LS_ERROR) << "Failed to create a GraphicsCaptureItem: " << hr;
      callback_->OnCaptureResult(DesktopCapturer::Result::ERROR_PERMANENT,
                                 /*frame=*/nullptr);
      RecordWgcCapturerResult(WgcCapturerResult::kItemCreationFailure);
      return;
    }
    std::pair<std::map<SourceId, WgcCaptureSession>::iterator, bool>
        iter_success_pair = ongoing_captures_.emplace(
            std::piecewise_construct,
            std::forward_as_tuple(capture_source_->GetSourceId()),
            std::forward_as_tuple(d3d11_device_, item));
    RTC_DCHECK(iter_success_pair.second);
    capture_session = &iter_success_pair.first->second;
  } else {
    capture_session = &session_iter->second;
  }
  if (!capture_session->IsCaptureStarted()) {
    hr = capture_session->StartCapture();
    if (FAILED(hr)) {
      RTC_LOG(LS_ERROR) << "Failed to start capture: " << hr;
      ongoing_captures_.erase(capture_source_->GetSourceId());
      callback_->OnCaptureResult(DesktopCapturer::Result::ERROR_PERMANENT,
                                 /*frame=*/nullptr);
      RecordWgcCapturerResult(WgcCapturerResult::kSessionStartFailure);
      return;
    }
  }
  std::unique_ptr<DesktopFrame> frame;
  hr = capture_session->GetFrame(&frame);
  if (FAILED(hr)) {
    RTC_LOG(LS_ERROR) << "GetFrame failed: " << hr;
    ongoing_captures_.erase(capture_source_->GetSourceId());
    callback_->OnCaptureResult(DesktopCapturer::Result::ERROR_PERMANENT,
                               /*frame=*/nullptr);
    RecordWgcCapturerResult(WgcCapturerResult::kGetFrameFailure);
    return;
  }
  if (!frame) {
    callback_->OnCaptureResult(DesktopCapturer::Result::ERROR_TEMPORARY,
                               /*frame=*/nullptr);
    RecordWgcCapturerResult(WgcCapturerResult::kFrameDropped);
    return;
  }
  int capture_time_ms = (rtc::TimeNanos() - capture_start_time_nanos) /
                        rtc::kNumNanosecsPerMillisec;
  RTC_HISTOGRAM_COUNTS_1000("WebRTC.DesktopCapture.Win.WgcCapturerFrameTime",
                            capture_time_ms);
  frame->set_capture_time_ms(capture_time_ms);
  frame->set_capturer_id(DesktopCapturerId::kWgcCapturerWin);
  frame->set_may_contain_cursor(true);
  frame->set_top_left(capture_source_->GetTopLeft());
  RecordWgcCapturerResult(WgcCapturerResult::kSuccess);
  callback_->OnCaptureResult(DesktopCapturer::Result::SUCCESS,
                             std::move(frame));
 }
 bool WgcCapturerWin::IsSourceBeingCaptured(DesktopCapturer::SourceId id) {
  std::map<DesktopCapturer::SourceId, WgcCaptureSession>::iterator
      session_iter = ongoing_captures_.find(id);
  if (session_iter == ongoing_captures_.end())
    return false;
  return session_iter->second.IsCaptureStarted();
 }
 }  // namespace webrtc
--- a/application/remote_desk/webrtc/wgc_capturer_win.h
+++ b/application/remote_desk/webrtc/wgc_capturer_win.h
@@ -1,138 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #ifndef MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURER_WIN_H_
 #define MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURER_WIN_H_
 #include <d3d11.h>
 #include <wrl/client.h>
 #include <map>
 #include <memory>
 #include "modules/desktop_capture/desktop_capture_options.h"
 #include "modules/desktop_capture/desktop_capturer.h"
 #include "modules/desktop_capture/win/screen_capture_utils.h"
 #include "modules/desktop_capture/win/wgc_capture_session.h"
 #include "modules/desktop_capture/win/wgc_capture_source.h"
 #include "modules/desktop_capture/win/window_capture_utils.h"
 namespace webrtc {
 // WgcCapturerWin is initialized with an implementation of this base class,
 // which it uses to find capturable sources of a particular type. This way,
 // WgcCapturerWin can remain source-agnostic.
 class SourceEnumerator {
 public:
  virtual ~SourceEnumerator() = default;
  virtual bool FindAllSources(DesktopCapturer::SourceList* sources) = 0;
 };
 class WindowEnumerator final : public SourceEnumerator {
 public:
  explicit WindowEnumerator(bool enumerate_current_process_windows)
      : enumerate_current_process_windows_(enumerate_current_process_windows) {}
  WindowEnumerator(const WindowEnumerator&) = delete;
  WindowEnumerator& operator=(const WindowEnumerator&) = delete;
  ~WindowEnumerator() override = default;
  bool FindAllSources(DesktopCapturer::SourceList* sources) override {
    // WGC fails to capture windows with the WS_EX_TOOLWINDOW style, so we
    // provide it as a filter to ensure windows with the style are not returned.
    return window_capture_helper_.EnumerateCapturableWindows(
        sources, enumerate_current_process_windows_, WS_EX_TOOLWINDOW);
  }
 private:
  WindowCaptureHelperWin window_capture_helper_;
  bool enumerate_current_process_windows_;
 };
 class ScreenEnumerator final : public SourceEnumerator {
 public:
  ScreenEnumerator() = default;
  ScreenEnumerator(const ScreenEnumerator&) = delete;
  ScreenEnumerator& operator=(const ScreenEnumerator&) = delete;
  ~ScreenEnumerator() override = default;
  bool FindAllSources(DesktopCapturer::SourceList* sources) override {
    return webrtc::GetScreenList(sources);
  }
 };
 // A capturer that uses the Window.Graphics.Capture APIs. It is suitable for
 // both window and screen capture (but only one type per instance). Consumers
 // should not instantiate this class directly, instead they should use
 // |CreateRawWindowCapturer()| or |CreateRawScreenCapturer()| to receive a
 // capturer appropriate for the type of source they want to capture.
 class WgcCapturerWin : public DesktopCapturer {
 public:
  WgcCapturerWin(std::unique_ptr<WgcCaptureSourceFactory> source_factory,
                 std::unique_ptr<SourceEnumerator> source_enumerator);
  WgcCapturerWin(const WgcCapturerWin&) = delete;
  WgcCapturerWin& operator=(const WgcCapturerWin&) = delete;
  ~WgcCapturerWin() override;
  static std::unique_ptr<DesktopCapturer> CreateRawWindowCapturer(
      const DesktopCaptureOptions& options);
  static std::unique_ptr<DesktopCapturer> CreateRawScreenCapturer(
      const DesktopCaptureOptions& options);
  // DesktopCapturer interface.
  bool GetSourceList(SourceList* sources) override;
  bool SelectSource(SourceId id) override;
  bool FocusOnSelectedSource() override;
  void Start(Callback* callback) override;
  void CaptureFrame() override;
  // Used in WgcCapturerTests.
  bool IsSourceBeingCaptured(SourceId id);
 private:
  // Factory to create a WgcCaptureSource for us whenever SelectSource is
  // called. Initialized at construction with a source-specific implementation.
  std::unique_ptr<WgcCaptureSourceFactory> source_factory_;
  // The source enumerator helps us find capturable sources of the appropriate
  // type. Initialized at construction with a source-specific implementation.
  std::unique_ptr<SourceEnumerator> source_enumerator_;
  // The WgcCaptureSource represents the source we are capturing. It tells us
  // if the source is capturable and it creates the GraphicsCaptureItem for us.
  std::unique_ptr<WgcCaptureSource> capture_source_;
  // A map of all the sources we are capturing and the associated
  // WgcCaptureSession. Frames for the current source (indicated via
  // SelectSource) will be retrieved from the appropriate session when
  // requested via CaptureFrame.
  // This helps us efficiently capture multiple sources (e.g. when consumers
  // are trying to display a list of available capture targets with thumbnails).
  std::map<SourceId, WgcCaptureSession> ongoing_captures_;
  // The callback that we deliver frames to, synchronously, before CaptureFrame
  // returns.
  Callback* callback_ = nullptr;
  // A Direct3D11 device that is shared amongst the WgcCaptureSessions, who
  // require one to perform the capture.
  Microsoft::WRL::ComPtr<::ID3D11Device> d3d11_device_;
 };
 }  // namespace webrtc
 #endif  // MODULES_DESKTOP_CAPTURE_WIN_WGC_CAPTURER_WIN_H_
--- a/application/remote_desk/webrtc/wgc_capturer_win_unittest.cc
+++ b/application/remote_desk/webrtc/wgc_capturer_win_unittest.cc
@@ -1,421 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "modules/desktop_capture/win/wgc_capturer_win.h"
 #include <string>
 #include <utility>
 #include <vector>
 #include "modules/desktop_capture/desktop_capture_options.h"
 #include "modules/desktop_capture/desktop_capture_types.h"
 #include "modules/desktop_capture/desktop_capturer.h"
 #include "modules/desktop_capture/win/test_support/test_window.h"
 #include "modules/desktop_capture/win/window_capture_utils.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/thread.h"
 #include "rtc_base/time_utils.h"
 #include "rtc_base/win/scoped_com_initializer.h"
 #include "rtc_base/win/windows_version.h"
 #include "system_wrappers/include/metrics.h"
 #include "test/gtest.h"
 namespace webrtc {
 namespace {
 const char kWindowThreadName[] = "wgc_capturer_test_window_thread";
 const WCHAR kWindowTitle[] = L"WGC Capturer Test Window";
 const char kCapturerImplHistogram[] =
    "WebRTC.DesktopCapture.Win.DesktopCapturerImpl";
 const char kCapturerResultHistogram[] =
    "WebRTC.DesktopCapture.Win.WgcCapturerResult";
 const int kSuccess = 0;
 const int kSessionStartFailure = 4;
 const char kCaptureSessionResultHistogram[] =
    "WebRTC.DesktopCapture.Win.WgcCaptureSessionStartResult";
 const int kSourceClosed = 1;
 const char kCaptureTimeHistogram[] =
    "WebRTC.DesktopCapture.Win.WgcCapturerFrameTime";
 const int kSmallWindowWidth = 200;
 const int kSmallWindowHeight = 100;
 const int kMediumWindowWidth = 300;
 const int kMediumWindowHeight = 200;
 const int kLargeWindowWidth = 400;
 const int kLargeWindowHeight = 500;
 // The size of the image we capture is slightly smaller than the actual size of
 // the window.
 const int kWindowWidthSubtrahend = 14;
 const int kWindowHeightSubtrahend = 7;
 // Custom message constants so we can direct our thread to close windows
 // and quit running.
 const UINT kNoOp = WM_APP;
 const UINT kDestroyWindow = WM_APP + 1;
 const UINT kQuitRunning = WM_APP + 2;
 enum CaptureType { kWindowCapture = 0, kScreenCapture = 1 };
 }  // namespace
 class WgcCapturerWinTest : public ::testing::TestWithParam<CaptureType>,
                           public DesktopCapturer::Callback {
 public:
  void SetUp() override {
    if (rtc::rtc_win::GetVersion() < rtc::rtc_win::Version::VERSION_WIN10_RS5) {
      RTC_LOG(LS_INFO)
          << "Skipping WgcCapturerWinTests on Windows versions < RS5.";
      GTEST_SKIP();
    }
    com_initializer_ =
        std::make_unique<ScopedCOMInitializer>(ScopedCOMInitializer::kMTA);
    EXPECT_TRUE(com_initializer_->Succeeded());
  }
  void SetUpForWindowCapture(int window_width = kMediumWindowWidth,
                             int window_height = kMediumWindowHeight) {
    capturer_ = WgcCapturerWin::CreateRawWindowCapturer(
        DesktopCaptureOptions::CreateDefault());
    CreateWindowOnSeparateThread(window_width, window_height);
    StartWindowThreadMessageLoop();
    source_id_ = GetTestWindowIdFromSourceList();
  }
  void SetUpForScreenCapture() {
    capturer_ = WgcCapturerWin::CreateRawScreenCapturer(
        DesktopCaptureOptions::CreateDefault());
    source_id_ = GetScreenIdFromSourceList();
  }
  void TearDown() override {
    if (window_open_) {
      CloseTestWindow();
    }
  }
  // The window must live on a separate thread so that we can run a message pump
  // without blocking the test thread. This is necessary if we are interested in
  // having GraphicsCaptureItem events (i.e. the Closed event) fire, and it more
  // closely resembles how capture works in the wild.
  void CreateWindowOnSeparateThread(int window_width, int window_height) {
    window_thread_ = rtc::Thread::Create();
    window_thread_->SetName(kWindowThreadName, nullptr);
    window_thread_->Start();
    window_thread_->Invoke<void>(RTC_FROM_HERE, [this, window_width,
                                                 window_height]() {
      window_thread_id_ = GetCurrentThreadId();
      window_info_ =
          CreateTestWindow(kWindowTitle, window_height, window_width);
      window_open_ = true;
      while (!IsWindowResponding(window_info_.hwnd)) {
        RTC_LOG(LS_INFO) << "Waiting for test window to become responsive in "
                            "WgcWindowCaptureTest.";
      }
      while (!IsWindowValidAndVisible(window_info_.hwnd)) {
        RTC_LOG(LS_INFO) << "Waiting for test window to be visible in "
                            "WgcWindowCaptureTest.";
      }
    });
    ASSERT_TRUE(window_thread_->RunningForTest());
    ASSERT_FALSE(window_thread_->IsCurrent());
  }
  void StartWindowThreadMessageLoop() {
    window_thread_->PostTask(RTC_FROM_HERE, [this]() {
      MSG msg;
      BOOL gm;
      while ((gm = ::GetMessage(&msg, NULL, 0, 0)) != 0 && gm != -1) {
        ::DispatchMessage(&msg);
        if (msg.message == kDestroyWindow) {
          DestroyTestWindow(window_info_);
        }
        if (msg.message == kQuitRunning) {
          PostQuitMessage(0);
        }
      }
    });
  }
  void CloseTestWindow() {
    ::PostThreadMessage(window_thread_id_, kDestroyWindow, 0, 0);
    ::PostThreadMessage(window_thread_id_, kQuitRunning, 0, 0);
    window_thread_->Stop();
    window_open_ = false;
  }
  DesktopCapturer::SourceId GetTestWindowIdFromSourceList() {
    // Frequently, the test window will not show up in GetSourceList because it
    // was created too recently. Since we are confident the window will be found
    // eventually we loop here until we find it.
    intptr_t src_id;
    do {
      DesktopCapturer::SourceList sources;
      EXPECT_TRUE(capturer_->GetSourceList(&sources));
      auto it = std::find_if(
          sources.begin(), sources.end(),
          [&](const DesktopCapturer::Source& src) {
            return src.id == reinterpret_cast<intptr_t>(window_info_.hwnd);
          });
      src_id = it->id;
    } while (src_id != reinterpret_cast<intptr_t>(window_info_.hwnd));
    return src_id;
  }
  DesktopCapturer::SourceId GetScreenIdFromSourceList() {
    DesktopCapturer::SourceList sources;
    EXPECT_TRUE(capturer_->GetSourceList(&sources));
    EXPECT_GT(sources.size(), 0ULL);
    return sources[0].id;
  }
  void DoCapture() {
    // Sometimes the first few frames are empty becaues the capture engine is
    // still starting up. We also may drop a few frames when the window is
    // resized or un-minimized.
    do {
      capturer_->CaptureFrame();
    } while (result_ == DesktopCapturer::Result::ERROR_TEMPORARY);
    EXPECT_EQ(result_, DesktopCapturer::Result::SUCCESS);
    EXPECT_TRUE(frame_);
    EXPECT_GT(metrics::NumEvents(kCapturerResultHistogram, kSuccess),
              successful_captures_);
    ++successful_captures_;
  }
  void ValidateFrame(int expected_width, int expected_height) {
    EXPECT_EQ(frame_->size().width(), expected_width - kWindowWidthSubtrahend);
    EXPECT_EQ(frame_->size().height(),
              expected_height - kWindowHeightSubtrahend);
    // Verify the buffer contains as much data as it should, and that the right
    // colors are found.
    int data_length = frame_->stride() * frame_->size().height();
    // The first and last pixel should have the same color because they will be
    // from the border of the window.
    // Pixels have 4 bytes of data so the whole pixel needs a uint32_t to fit.
    uint32_t first_pixel = static_cast<uint32_t>(*frame_->data());
    uint32_t last_pixel = static_cast<uint32_t>(
        *(frame_->data() + data_length - DesktopFrame::kBytesPerPixel));
    EXPECT_EQ(first_pixel, last_pixel);
    // Let's also check a pixel from the middle of the content area, which the
    // TestWindow will paint a consistent color for us to verify.
    uint8_t* middle_pixel = frame_->data() + (data_length / 2);
    int sub_pixel_offset = DesktopFrame::kBytesPerPixel / 4;
    EXPECT_EQ(*middle_pixel, kTestWindowBValue);
    middle_pixel += sub_pixel_offset;
    EXPECT_EQ(*middle_pixel, kTestWindowGValue);
    middle_pixel += sub_pixel_offset;
    EXPECT_EQ(*middle_pixel, kTestWindowRValue);
    middle_pixel += sub_pixel_offset;
    // The window is opaque so we expect 0xFF for the Alpha channel.
    EXPECT_EQ(*middle_pixel, 0xFF);
  }
  // DesktopCapturer::Callback interface
  // The capturer synchronously invokes this method before |CaptureFrame()|
  // returns.
  void OnCaptureResult(DesktopCapturer::Result result,
                       std::unique_ptr<DesktopFrame> frame) override {
    result_ = result;
    frame_ = std::move(frame);
  }
 protected:
  std::unique_ptr<ScopedCOMInitializer> com_initializer_;
  DWORD window_thread_id_;
  std::unique_ptr<rtc::Thread> window_thread_;
  WindowInfo window_info_;
  intptr_t source_id_;
  bool window_open_ = false;
  DesktopCapturer::Result result_;
  int successful_captures_ = 0;
  std::unique_ptr<DesktopFrame> frame_;
  std::unique_ptr<DesktopCapturer> capturer_;
 };
 TEST_P(WgcCapturerWinTest, SelectValidSource) {
  if (GetParam() == CaptureType::kWindowCapture) {
    SetUpForWindowCapture();
  } else {
    SetUpForScreenCapture();
  }
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
 }
 TEST_P(WgcCapturerWinTest, SelectInvalidSource) {
  if (GetParam() == CaptureType::kWindowCapture) {
    capturer_ = WgcCapturerWin::CreateRawWindowCapturer(
        DesktopCaptureOptions::CreateDefault());
    source_id_ = kNullWindowId;
  } else {
    capturer_ = WgcCapturerWin::CreateRawScreenCapturer(
        DesktopCaptureOptions::CreateDefault());
    source_id_ = kInvalidScreenId;
  }
  EXPECT_FALSE(capturer_->SelectSource(source_id_));
 }
 TEST_P(WgcCapturerWinTest, Capture) {
  if (GetParam() == CaptureType::kWindowCapture) {
    SetUpForWindowCapture();
  } else {
    SetUpForScreenCapture();
  }
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  capturer_->Start(this);
  EXPECT_GE(metrics::NumEvents(kCapturerImplHistogram,
                               DesktopCapturerId::kWgcCapturerWin),
            1);
  DoCapture();
  EXPECT_GT(frame_->size().width(), 0);
  EXPECT_GT(frame_->size().height(), 0);
 }
 TEST_P(WgcCapturerWinTest, CaptureTime) {
  if (GetParam() == CaptureType::kWindowCapture) {
    SetUpForWindowCapture();
  } else {
    SetUpForScreenCapture();
  }
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  capturer_->Start(this);
  int64_t start_time;
  do {
    start_time = rtc::TimeNanos();
    capturer_->CaptureFrame();
  } while (result_ == DesktopCapturer::Result::ERROR_TEMPORARY);
  int capture_time_ms =
      (rtc::TimeNanos() - start_time) / rtc::kNumNanosecsPerMillisec;
  EXPECT_TRUE(frame_);
  // The test may measure the time slightly differently than the capturer. So we
  // just check if it's within 5 ms.
  EXPECT_NEAR(frame_->capture_time_ms(), capture_time_ms, 5);
  EXPECT_GE(
      metrics::NumEvents(kCaptureTimeHistogram, frame_->capture_time_ms()), 1);
 }
 INSTANTIATE_TEST_SUITE_P(SourceAgnostic,
                         WgcCapturerWinTest,
                         ::testing::Values(CaptureType::kWindowCapture,
                                           CaptureType::kScreenCapture));
 // Monitor specific tests.
 TEST_F(WgcCapturerWinTest, FocusOnMonitor) {
  SetUpForScreenCapture();
  EXPECT_TRUE(capturer_->SelectSource(0));
  // You can't set focus on a monitor.
  EXPECT_FALSE(capturer_->FocusOnSelectedSource());
 }
 TEST_F(WgcCapturerWinTest, CaptureAllMonitors) {
  SetUpForScreenCapture();
  EXPECT_TRUE(capturer_->SelectSource(kFullDesktopScreenId));
  capturer_->Start(this);
  DoCapture();
  EXPECT_GT(frame_->size().width(), 0);
  EXPECT_GT(frame_->size().height(), 0);
 }
 // Window specific tests.
 TEST_F(WgcCapturerWinTest, FocusOnWindow) {
  capturer_ = WgcCapturerWin::CreateRawWindowCapturer(
      DesktopCaptureOptions::CreateDefault());
  window_info_ = CreateTestWindow(kWindowTitle);
  source_id_ = GetScreenIdFromSourceList();
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  EXPECT_TRUE(capturer_->FocusOnSelectedSource());
  HWND hwnd = reinterpret_cast<HWND>(source_id_);
  EXPECT_EQ(hwnd, ::GetActiveWindow());
  EXPECT_EQ(hwnd, ::GetForegroundWindow());
  EXPECT_EQ(hwnd, ::GetFocus());
  DestroyTestWindow(window_info_);
 }
 TEST_F(WgcCapturerWinTest, SelectMinimizedWindow) {
  SetUpForWindowCapture();
  MinimizeTestWindow(reinterpret_cast<HWND>(source_id_));
  EXPECT_FALSE(capturer_->SelectSource(source_id_));
  UnminimizeTestWindow(reinterpret_cast<HWND>(source_id_));
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
 }
 TEST_F(WgcCapturerWinTest, SelectClosedWindow) {
  SetUpForWindowCapture();
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  CloseTestWindow();
  EXPECT_FALSE(capturer_->SelectSource(source_id_));
 }
 TEST_F(WgcCapturerWinTest, UnsupportedWindowStyle) {
  // Create a window with the WS_EX_TOOLWINDOW style, which WGC does not
  // support.
  window_info_ = CreateTestWindow(kWindowTitle, kMediumWindowWidth,
                                  kMediumWindowHeight, WS_EX_TOOLWINDOW);
  capturer_ = WgcCapturerWin::CreateRawWindowCapturer(
      DesktopCaptureOptions::CreateDefault());
  DesktopCapturer::SourceList sources;
  EXPECT_TRUE(capturer_->GetSourceList(&sources));
  auto it = std::find_if(
      sources.begin(), sources.end(), [&](const DesktopCapturer::Source& src) {
        return src.id == reinterpret_cast<intptr_t>(window_info_.hwnd);
      });
  // We should not find the window, since we filter for unsupported styles.
  EXPECT_EQ(it, sources.end());
  DestroyTestWindow(window_info_);
 }
 TEST_F(WgcCapturerWinTest, IncreaseWindowSizeMidCapture) {
  SetUpForWindowCapture(kSmallWindowWidth, kSmallWindowHeight);
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  capturer_->Start(this);
  DoCapture();
  ValidateFrame(kSmallWindowWidth, kSmallWindowHeight);
  ResizeTestWindow(window_info_.hwnd, kSmallWindowWidth, kMediumWindowHeight);
  DoCapture();
  // We don't expect to see the new size until the next capture, as the frame
  // pool hadn't had a chance to resize yet to fit the new, larger image.
  DoCapture();
  ValidateFrame(kSmallWindowWidth, kMediumWindowHeight);
  ResizeTestWindow(window_info_.hwnd, kLargeWindowWidth, kMediumWindowHeight);
  DoCapture();
  DoCapture();
  ValidateFrame(kLargeWindowWidth, kMediumWindowHeight);
 }
 TEST_F(WgcCapturerWinTest, ReduceWindowSizeMidCapture) {
  SetUpForWindowCapture(kLargeWindowWidth, kLargeWindowHeight);
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  capturer_->Start(this);
  DoCapture();
  ValidateFrame(kLargeWindowWidth, kLargeWindowHeight);
  ResizeTestWindow(window_info_.hwnd, kLargeWindowWidth, kMediumWindowHeight);
  // We expect to see the new size immediately because the image data has shrunk
  // and will fit in the existing buffer.
  DoCapture();
  ValidateFrame(kLargeWindowWidth, kMediumWindowHeight);
  ResizeTestWindow(window_info_.hwnd, kSmallWindowWidth, kMediumWindowHeight);
  DoCapture();
  ValidateFrame(kSmallWindowWidth, kMediumWindowHeight);
 }
 TEST_F(WgcCapturerWinTest, MinimizeWindowMidCapture) {
  SetUpForWindowCapture();
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  capturer_->Start(this);
  // Minmize the window and capture should continue but return temporary errors.
  MinimizeTestWindow(window_info_.hwnd);
  for (int i = 0; i < 10; ++i) {
    capturer_->CaptureFrame();
    EXPECT_EQ(result_, DesktopCapturer::Result::ERROR_TEMPORARY);
  }
  // Reopen the window and the capture should continue normally.
  UnminimizeTestWindow(window_info_.hwnd);
  DoCapture();
  // We can't verify the window size here because the test window does not
  // repaint itself after it is unminimized, but capturing successfully is still
  // a good test.
 }
 TEST_F(WgcCapturerWinTest, CloseWindowMidCapture) {
  SetUpForWindowCapture();
  EXPECT_TRUE(capturer_->SelectSource(source_id_));
  capturer_->Start(this);
  DoCapture();
  ValidateFrame(kMediumWindowWidth, kMediumWindowHeight);
  CloseTestWindow();
  // We need to call GetMessage to trigger the Closed event and the capturer's
  // event handler for it. If we are too early and the Closed event hasn't
  // arrived yet we should keep trying until the capturer receives it and stops.
  auto* wgc_capturer = static_cast<WgcCapturerWin*>(capturer_.get());
  while (wgc_capturer->IsSourceBeingCaptured(source_id_)) {
    // Since the capturer handles the Closed message, there will be no message
    // for us and GetMessage will hang, unless we send ourselves a message
    // first.
    ::PostThreadMessage(GetCurrentThreadId(), kNoOp, 0, 0);
    MSG msg;
    ::GetMessage(&msg, NULL, 0, 0);
    ::DispatchMessage(&msg);
  }
  // Occasionally, one last frame will have made it into the frame pool before
  // the window closed. The first call will consume it, and in that case we need
  // to make one more call to CaptureFrame.
  capturer_->CaptureFrame();
  if (result_ == DesktopCapturer::Result::SUCCESS)
    capturer_->CaptureFrame();
  EXPECT_GE(metrics::NumEvents(kCapturerResultHistogram, kSessionStartFailure),
            1);
  EXPECT_GE(metrics::NumEvents(kCaptureSessionResultHistogram, kSourceClosed),
            1);
  EXPECT_EQ(result_, DesktopCapturer::Result::ERROR_PERMANENT);
 }
 }  // namespace webrtc
--- a/application/remote_desk/webrtc/wgc_desktop_frame.cc
+++ b/application/remote_desk/webrtc/wgc_desktop_frame.cc
@@ -1,19 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "modules/desktop_capture/win/wgc_desktop_frame.h"
 #include <utility>
 namespace webrtc {
 WgcDesktopFrame::WgcDesktopFrame(DesktopSize size,
                                 int stride,
                                 std::vector<uint8_t>&& image_data)
    : DesktopFrame(size, stride, image_data.data(), nullptr),
      image_data_(std::move(image_data)) {}
 WgcDesktopFrame::~WgcDesktopFrame() = default;
 }  // namespace webrtc
--- a/application/remote_desk/webrtc/wgc_desktop_frame.h
+++ b/application/remote_desk/webrtc/wgc_desktop_frame.h
@@ -1,35 +0,0 @@
 /*
 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #ifndef MODULES_DESKTOP_CAPTURE_WIN_WGC_DESKTOP_FRAME_H_
 #define MODULES_DESKTOP_CAPTURE_WIN_WGC_DESKTOP_FRAME_H_
 #include <d3d11.h>
 #include <wrl/client.h>
 #include <memory>
 #include <vector>
 #include "desktop_frame.h"
 #include "desktop_geometry.h"
 namespace webrtc {
 // DesktopFrame implementation used by capturers that use the
 // Windows.Graphics.Capture API.
 class WgcDesktopFrame final : public DesktopFrame {
 public:
  // WgcDesktopFrame receives an rvalue reference to the |image_data| vector
  // so that it can take ownership of it (and avoid a copy).
  WgcDesktopFrame(DesktopSize size,
                  int stride,
                  std::vector<uint8_t>&& image_data);
  WgcDesktopFrame(const WgcDesktopFrame&) = delete;
  WgcDesktopFrame& operator=(const WgcDesktopFrame&) = delete;
  ~WgcDesktopFrame() override;
 private:
  std::vector<uint8_t> image_data_;
 };
 }  // namespace webrtc
 #endif  // MODULES_DESKTOP_CAPTURE_WIN_WGC_DESKTOP_FRAME_H_
--- a/application/remote_desk/xmake.lua
+++ b/application/remote_desk/xmake.lua
@@ -29,16 +29,30 @@ target("log")
    add_headerfiles("../../src/log/log.h")
    add_includedirs("../../src/log", {public = true})
-target("remote_desk")
+target("screen_capture")
-    set_kind("binary")
+    set_kind("static")
    add_deps("projectx")
    add_packages("log")
-    add_packages("ffmpeg")
+    add_files("screen_capture/*.cpp")
-    add_packages("vcpkg::sdl2")
+    add_includedirs("screen_capture", {public = true})
-    add_links("avfilter", "avdevice", "avformat", "avcodec", "swscale", "swresample", "avutil")
+
-    add_files("dll/*.cpp")
+target("remote_desk_server")
    set_kind("binary")
    add_packages("log", "ffmpeg")
    add_deps("projectx", "screen_capture")
    add_files("remote_desk_server/*.cpp")
    add_includedirs("../../src/interface")
-    add_links("SDL2-static", "SDL2main", "Shell32", "gdi32", "winmm", 
+    -- add_links("avformat", "swscale")
-        "setupapi", "version", "WindowsApp", "Imm32", "avutil")
+
 -- target("remote_desk")
 --     set_kind("binary")
 --     add_deps("projectx")
 --     add_packages("log")
 --     add_packages("ffmpeg")
 --     add_packages("vcpkg::sdl2")
 --     add_links("avfilter", "avdevice", "avformat", "avcodec", "swscale", "swresample", "avutil")
 --     add_files("**.cpp")
 --     add_includedirs("../../src/interface")
 --     add_links("SDL2-static", "SDL2main", "Shell32", "gdi32", "winmm", 
 --         "setupapi", "version", "WindowsApp", "Imm32", "avutil")
--- a/src/interface/x.h
+++ b/src/interface/x.h
@@ -5,6 +5,7 @@
 #include <stdlib.h>
 enum ws_status { WS_CONNECTING = 0, WS_OPEN, WS_FAILED, WS_CLOSED, WS_UNKNOWN };
 enum DATA_TYPE { VIDEO = 0, AUDIO, USER };
 #ifdef __cplusplus
 extern "C" {
@@ -30,7 +31,8 @@ int CreateConnection(PeerPtr* peer_ptr, const char* transmission_id,
 int JoinConnection(PeerPtr* peer_ptr, const char* transmission_id,
                   const char* user_id);
-int SendData(PeerPtr* peer_ptr, const char* data, size_t size);
+int SendData(PeerPtr* peer_ptr, DATA_TYPE data_type, const char* data,
             size_t size);
 #ifdef __cplusplus
 }
--- a/src/media/video/decode/nvcodec/NvDecoder.cpp
+++ b/src/media/video/decode/nvcodec/NvDecoder.cpp
@@ -0,0 +1,774 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include "NvDecoder.h"
 #include <algorithm>
 #include <chrono>
 #include <cmath>
 #include <iostream>
 #include "nvcuvid.h"
 #define START_TIMER auto start = std::chrono::high_resolution_clock::now();
 #define STOP_TIMER(print_message)                                     \
  std::cout << print_message                                          \
            << std::chrono::duration_cast<std::chrono::milliseconds>( \
                   std::chrono::high_resolution_clock::now() - start) \
                   .count()                                           \
            << " ms " << std::endl;
 #define CUDA_DRVAPI_CALL(call)                                      \
  do {                                                              \
    CUresult err__ = call;                                          \
    if (err__ != CUDA_SUCCESS) {                                    \
      const char *szErrName = NULL;                                 \
      cuGetErrorName(err__, &szErrName);                            \
      std::ostringstream errorLog;                                  \
      errorLog << "CUDA driver API error " << szErrName;            \
      throw NVDECException::makeNVDECException(                     \
          errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__); \
    }                                                               \
  } while (0)
 static const char *GetVideoCodecString(cudaVideoCodec eCodec) {
  static struct {
    cudaVideoCodec eCodec;
    const char *name;
  } aCodecName[] = {
      {cudaVideoCodec_MPEG1, "MPEG-1"},
      {cudaVideoCodec_MPEG2, "MPEG-2"},
      {cudaVideoCodec_MPEG4, "MPEG-4 (ASP)"},
      {cudaVideoCodec_VC1, "VC-1/WMV"},
      {cudaVideoCodec_H264, "AVC/H.264"},
      {cudaVideoCodec_JPEG, "M-JPEG"},
      {cudaVideoCodec_H264_SVC, "H.264/SVC"},
      {cudaVideoCodec_H264_MVC, "H.264/MVC"},
      {cudaVideoCodec_HEVC, "H.265/HEVC"},
      {cudaVideoCodec_VP8, "VP8"},
      {cudaVideoCodec_VP9, "VP9"},
      {cudaVideoCodec_AV1, "AV1"},
      {cudaVideoCodec_NumCodecs, "Invalid"},
      {cudaVideoCodec_YUV420, "YUV  4:2:0"},
      {cudaVideoCodec_YV12, "YV12 4:2:0"},
      {cudaVideoCodec_NV12, "NV12 4:2:0"},
      {cudaVideoCodec_YUYV, "YUYV 4:2:2"},
      {cudaVideoCodec_UYVY, "UYVY 4:2:2"},
  };
  if (eCodec >= 0 && eCodec <= cudaVideoCodec_NumCodecs) {
    return aCodecName[eCodec].name;
  }
  for (int i = cudaVideoCodec_NumCodecs + 1;
       i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) {
    if (eCodec == aCodecName[i].eCodec) {
      return aCodecName[eCodec].name;
    }
  }
  return "Unknown";
 }
 static const char *GetVideoChromaFormatString(
    cudaVideoChromaFormat eChromaFormat) {
  static struct {
    cudaVideoChromaFormat eChromaFormat;
    const char *name;
  } aChromaFormatName[] = {
      {cudaVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)"},
      {cudaVideoChromaFormat_420, "YUV 420"},
      {cudaVideoChromaFormat_422, "YUV 422"},
      {cudaVideoChromaFormat_444, "YUV 444"},
  };
  if (eChromaFormat >= 0 && eChromaFormat < sizeof(aChromaFormatName) /
                                                sizeof(aChromaFormatName[0])) {
    return aChromaFormatName[eChromaFormat].name;
  }
  return "Unknown";
 }
 static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat) {
  float factor = 0.5;
  switch (eSurfaceFormat) {
    case cudaVideoSurfaceFormat_NV12:
    case cudaVideoSurfaceFormat_P016:
      factor = 0.5;
      break;
    case cudaVideoSurfaceFormat_YUV444:
    case cudaVideoSurfaceFormat_YUV444_16Bit:
      factor = 1.0;
      break;
  }
  return factor;
 }
 static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat) {
  int numPlane = 1;
  switch (eSurfaceFormat) {
    case cudaVideoSurfaceFormat_NV12:
    case cudaVideoSurfaceFormat_P016:
      numPlane = 1;
      break;
    case cudaVideoSurfaceFormat_YUV444:
    case cudaVideoSurfaceFormat_YUV444_16Bit:
      numPlane = 2;
      break;
  }
  return numPlane;
 }
 /**
 *   @brief  This function is used to get codec string from codec id
 */
 const char *NvDecoder::GetCodecString(cudaVideoCodec eCodec) {
  return GetVideoCodecString(eCodec);
 }
 /* Called when the parser encounters sequence header for AV1 SVC content
 *  return value interpretation:
 *      < 0 : fail, >=0: succeeded (bit 0-9: currOperatingPoint, bit 10-10:
 * bDispAllLayer, bit 11-30: reserved, must be set 0)
 */
 int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) {
  if (pOPInfo->codec == cudaVideoCodec_AV1) {
    if (pOPInfo->av1.operating_points_cnt > 1) {
      // clip has SVC enabled
      if (m_nOperatingPoint >= pOPInfo->av1.operating_points_cnt)
        m_nOperatingPoint = 0;
      printf("AV1 SVC clip: operating point count %d  ",
             pOPInfo->av1.operating_points_cnt);
      printf("Selected operating point: %d, IDC 0x%x bOutputAllLayers %d\n",
             m_nOperatingPoint,
             pOPInfo->av1.operating_points_idc[m_nOperatingPoint],
             m_bDispAllLayers);
      return (m_nOperatingPoint | (m_bDispAllLayers << 10));
    }
  }
  return -1;
 }
 /* Return value from HandleVideoSequence() are interpreted as   :
 *  0: fail, 1: succeeded, > 1: override dpb size of parser (set by
 * CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser)
 */
 int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
  START_TIMER
  m_videoInfo.str("");
  m_videoInfo.clear();
  m_videoInfo << "Video Input Information" << std::endl
              << "\tCodec        : " << GetVideoCodecString(pVideoFormat->codec)
              << std::endl
              << "\tFrame rate   : " << pVideoFormat->frame_rate.numerator
              << "/" << pVideoFormat->frame_rate.denominator << " = "
              << 1.0 * pVideoFormat->frame_rate.numerator /
                     pVideoFormat->frame_rate.denominator
              << " fps" << std::endl
              << "\tSequence     : "
              << (pVideoFormat->progressive_sequence ? "Progressive"
                                                     : "Interlaced")
              << std::endl
              << "\tCoded size   : [" << pVideoFormat->coded_width << ", "
              << pVideoFormat->coded_height << "]" << std::endl
              << "\tDisplay area : [" << pVideoFormat->display_area.left << ", "
              << pVideoFormat->display_area.top << ", "
              << pVideoFormat->display_area.right << ", "
              << pVideoFormat->display_area.bottom << "]" << std::endl
              << "\tChroma       : "
              << GetVideoChromaFormatString(pVideoFormat->chroma_format)
              << std::endl
              << "\tBit depth    : " << pVideoFormat->bit_depth_luma_minus8 + 8;
  m_videoInfo << std::endl;
  int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
  CUVIDDECODECAPS decodecaps;
  memset(&decodecaps, 0, sizeof(decodecaps));
  decodecaps.eCodecType = pVideoFormat->codec;
  decodecaps.eChromaFormat = pVideoFormat->chroma_format;
  decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
  NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  if (!decodecaps.bIsSupported) {
    NVDEC_THROW_ERROR("Codec not supported on this GPU",
                      CUDA_ERROR_NOT_SUPPORTED);
    return nDecodeSurface;
  }
  if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
      (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
    std::ostringstream errorString;
    errorString << std::endl
                << "Resolution          : " << pVideoFormat->coded_width << "x"
                << pVideoFormat->coded_height << std::endl
                << "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x"
                << decodecaps.nMaxHeight << std::endl
                << "Resolution not supported on this GPU";
    const std::string cErr = errorString.str();
    NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
    return nDecodeSurface;
  }
  if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
      decodecaps.nMaxMBCount) {
    std::ostringstream errorString;
    errorString << std::endl
                << "MBCount             : "
                << (pVideoFormat->coded_width >> 4) *
                       (pVideoFormat->coded_height >> 4)
                << std::endl
                << "Max Supported mbcnt : " << decodecaps.nMaxMBCount
                << std::endl
                << "MBCount not supported on this GPU";
    const std::string cErr = errorString.str();
    NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
    return nDecodeSurface;
  }
  if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
    // cuvidCreateDecoder() has been called before, and now there's possible
    // config change
    return ReconfigureDecoder(pVideoFormat);
  }
  // eCodec has been set in the constructor (for parser). Here it's set again
  // for potential correction
  m_eCodec = pVideoFormat->codec;
  m_eChromaFormat = pVideoFormat->chroma_format;
  m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
  m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
  // Set the output surface format same as chroma format
  if (m_eChromaFormat == cudaVideoChromaFormat_420 ||
      cudaVideoChromaFormat_Monochrome)
    m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8
                          ? cudaVideoSurfaceFormat_P016
                          : cudaVideoSurfaceFormat_NV12;
  else if (m_eChromaFormat == cudaVideoChromaFormat_444)
    m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8
                          ? cudaVideoSurfaceFormat_YUV444_16Bit
                          : cudaVideoSurfaceFormat_YUV444;
  else if (m_eChromaFormat == cudaVideoChromaFormat_422)
    m_eOutputFormat =
        cudaVideoSurfaceFormat_NV12;  // no 4:2:2 output format supported yet so
                                      // make 420 default
  // Check if output format supported. If not, check falback options
  if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) {
    if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
      m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
    else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
      m_eOutputFormat = cudaVideoSurfaceFormat_P016;
    else if (decodecaps.nOutputFormatMask &
             (1 << cudaVideoSurfaceFormat_YUV444))
      m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
    else if (decodecaps.nOutputFormatMask &
             (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
      m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
    else
      NVDEC_THROW_ERROR("No supported output format found",
                        CUDA_ERROR_NOT_SUPPORTED);
  }
  m_videoFormat = *pVideoFormat;
  CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
  videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
  videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
  videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
  videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
  if (pVideoFormat->progressive_sequence)
    videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
  else
    videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
  videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
  // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
  // NVDEC hardware
  videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
  videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
  videoDecodeCreateInfo.vidLock = m_ctxLock;
  videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
  videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
  // AV1 has max width/height of sequence in sequence header
  if (pVideoFormat->codec == cudaVideoCodec_AV1 &&
      pVideoFormat->seqhdr_data_length > 0) {
    // dont overwrite if it is already set from cmdline or reconfig.txt
    if (!(m_nMaxWidth > pVideoFormat->coded_width ||
          m_nMaxHeight > pVideoFormat->coded_height)) {
      CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
      m_nMaxWidth = vidFormatEx->av1.max_width;
      m_nMaxHeight = vidFormatEx->av1.max_height;
    }
  }
  if (m_nMaxWidth < (int)pVideoFormat->coded_width)
    m_nMaxWidth = pVideoFormat->coded_width;
  if (m_nMaxHeight < (int)pVideoFormat->coded_height)
    m_nMaxHeight = pVideoFormat->coded_height;
  videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
  videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
  if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
    m_nWidth =
        pVideoFormat->display_area.right - pVideoFormat->display_area.left;
    m_nLumaHeight =
        pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
    videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
    videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
  } else {
    if (m_resizeDim.w && m_resizeDim.h) {
      videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
      videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
      videoDecodeCreateInfo.display_area.right =
          pVideoFormat->display_area.right;
      videoDecodeCreateInfo.display_area.bottom =
          pVideoFormat->display_area.bottom;
      m_nWidth = m_resizeDim.w;
      m_nLumaHeight = m_resizeDim.h;
    }
    if (m_cropRect.r && m_cropRect.b) {
      videoDecodeCreateInfo.display_area.left = m_cropRect.l;
      videoDecodeCreateInfo.display_area.top = m_cropRect.t;
      videoDecodeCreateInfo.display_area.right = m_cropRect.r;
      videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
      m_nWidth = m_cropRect.r - m_cropRect.l;
      m_nLumaHeight = m_cropRect.b - m_cropRect.t;
    }
    videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
    videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
  }
  m_nChromaHeight =
      (int)(ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
  m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
  m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
  m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
  m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
  m_displayRect.t = videoDecodeCreateInfo.display_area.top;
  m_displayRect.l = videoDecodeCreateInfo.display_area.left;
  m_displayRect.r = videoDecodeCreateInfo.display_area.right;
  m_videoInfo << "Video Decoding Params:" << std::endl
              << "\tNum Surfaces : "
              << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl
              << "\tCrop         : [" << videoDecodeCreateInfo.display_area.left
              << ", " << videoDecodeCreateInfo.display_area.top << ", "
              << videoDecodeCreateInfo.display_area.right << ", "
              << videoDecodeCreateInfo.display_area.bottom << "]" << std::endl
              << "\tResize       : " << videoDecodeCreateInfo.ulTargetWidth
              << "x" << videoDecodeCreateInfo.ulTargetHeight << std::endl
              << "\tDeinterlace  : "
              << std::vector<const char *>{
                     "Weave", "Bob",
                     "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
  m_videoInfo << std::endl;
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
  NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  STOP_TIMER("Session Initialization Time: ");
  return nDecodeSurface;
 }
 int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
  if (pVideoFormat->bit_depth_luma_minus8 !=
          m_videoFormat.bit_depth_luma_minus8 ||
      pVideoFormat->bit_depth_chroma_minus8 !=
          m_videoFormat.bit_depth_chroma_minus8) {
    NVDEC_THROW_ERROR("Reconfigure Not supported for bit depth change",
                      CUDA_ERROR_NOT_SUPPORTED);
  }
  if (pVideoFormat->chroma_format != m_videoFormat.chroma_format) {
    NVDEC_THROW_ERROR("Reconfigure Not supported for chroma format change",
                      CUDA_ERROR_NOT_SUPPORTED);
  }
  bool bDecodeResChange =
      !(pVideoFormat->coded_width == m_videoFormat.coded_width &&
        pVideoFormat->coded_height == m_videoFormat.coded_height);
  bool bDisplayRectChange = !(
      pVideoFormat->display_area.bottom == m_videoFormat.display_area.bottom &&
      pVideoFormat->display_area.top == m_videoFormat.display_area.top &&
      pVideoFormat->display_area.left == m_videoFormat.display_area.left &&
      pVideoFormat->display_area.right == m_videoFormat.display_area.right);
  int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
  if ((pVideoFormat->coded_width > m_nMaxWidth) ||
      (pVideoFormat->coded_height > m_nMaxHeight)) {
    // For VP9, let driver  handle the change if new width/height >
    // maxwidth/maxheight
    if ((m_eCodec != cudaVideoCodec_VP9) || m_bReconfigExternal) {
      NVDEC_THROW_ERROR(
          "Reconfigure Not supported when width/height > maxwidth/maxheight",
          CUDA_ERROR_NOT_SUPPORTED);
    }
    return 1;
  }
  if (!bDecodeResChange && !m_bReconfigExtPPChange) {
    // if the coded_width/coded_height hasn't changed but display resolution has
    // changed, then need to update width/height for correct output without
    // cropping. Example : 1920x1080 vs 1920x1088
    if (bDisplayRectChange) {
      m_nWidth =
          pVideoFormat->display_area.right - pVideoFormat->display_area.left;
      m_nLumaHeight =
          pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
      m_nChromaHeight =
          (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
      m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
    }
    // no need for reconfigureDecoder(). Just return
    return 1;
  }
  CUVIDRECONFIGUREDECODERINFO reconfigParams = {0};
  reconfigParams.ulWidth = m_videoFormat.coded_width =
      pVideoFormat->coded_width;
  reconfigParams.ulHeight = m_videoFormat.coded_height =
      pVideoFormat->coded_height;
  // Dont change display rect and get scaled output from decoder. This will help
  // display app to present apps smoothly
  reconfigParams.display_area.bottom = m_displayRect.b;
  reconfigParams.display_area.top = m_displayRect.t;
  reconfigParams.display_area.left = m_displayRect.l;
  reconfigParams.display_area.right = m_displayRect.r;
  reconfigParams.ulTargetWidth = m_nSurfaceWidth;
  reconfigParams.ulTargetHeight = m_nSurfaceHeight;
  // If external reconfigure is called along with resolution change even if post
  // processing params is not changed, do full reconfigure params update
  if ((m_bReconfigExternal && bDecodeResChange) || m_bReconfigExtPPChange) {
    // update display rect and target resolution if requested explicitely
    m_bReconfigExternal = false;
    m_bReconfigExtPPChange = false;
    m_videoFormat = *pVideoFormat;
    if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
      m_nWidth =
          pVideoFormat->display_area.right - pVideoFormat->display_area.left;
      m_nLumaHeight =
          pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
      reconfigParams.ulTargetWidth = pVideoFormat->coded_width;
      reconfigParams.ulTargetHeight = pVideoFormat->coded_height;
    } else {
      if (m_resizeDim.w && m_resizeDim.h) {
        reconfigParams.display_area.left = pVideoFormat->display_area.left;
        reconfigParams.display_area.top = pVideoFormat->display_area.top;
        reconfigParams.display_area.right = pVideoFormat->display_area.right;
        reconfigParams.display_area.bottom = pVideoFormat->display_area.bottom;
        m_nWidth = m_resizeDim.w;
        m_nLumaHeight = m_resizeDim.h;
      }
      if (m_cropRect.r && m_cropRect.b) {
        reconfigParams.display_area.left = m_cropRect.l;
        reconfigParams.display_area.top = m_cropRect.t;
        reconfigParams.display_area.right = m_cropRect.r;
        reconfigParams.display_area.bottom = m_cropRect.b;
        m_nWidth = m_cropRect.r - m_cropRect.l;
        m_nLumaHeight = m_cropRect.b - m_cropRect.t;
      }
      reconfigParams.ulTargetWidth = m_nWidth;
      reconfigParams.ulTargetHeight = m_nLumaHeight;
    }
    m_nChromaHeight =
        (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
    m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
    m_nSurfaceHeight = reconfigParams.ulTargetHeight;
    m_nSurfaceWidth = reconfigParams.ulTargetWidth;
    m_displayRect.b = reconfigParams.display_area.bottom;
    m_displayRect.t = reconfigParams.display_area.top;
    m_displayRect.l = reconfigParams.display_area.left;
    m_displayRect.r = reconfigParams.display_area.right;
  }
  reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
  START_TIMER
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
  NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  STOP_TIMER("Session Reconfigure Time: ");
  return nDecodeSurface;
 }
 int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
  m_bReconfigExternal = true;
  m_bReconfigExtPPChange = false;
  if (pCropRect) {
    if (!((pCropRect->t == m_cropRect.t) && (pCropRect->l == m_cropRect.l) &&
          (pCropRect->b == m_cropRect.b) && (pCropRect->r == m_cropRect.r))) {
      m_bReconfigExtPPChange = true;
      m_cropRect = *pCropRect;
    }
  }
  if (pResizeDim) {
    if (!((pResizeDim->w == m_resizeDim.w) &&
          (pResizeDim->h == m_resizeDim.h))) {
      m_bReconfigExtPPChange = true;
      m_resizeDim = *pResizeDim;
    }
  }
  // Clear existing output buffers of different size
  uint8_t *pFrame = NULL;
  while (!m_vpFrame.empty()) {
    pFrame = m_vpFrame.back();
    m_vpFrame.pop_back();
    if (m_bUseDeviceFrame) {
      CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
      CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
      CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
    } else {
      delete pFrame;
    }
  }
  return 1;
 }
 /* Return value from HandlePictureDecode() are interpreted as:
 *  0: fail, >=1: succeeded
 */
 int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
  if (!m_hDecoder) {
    NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED);
    return false;
  }
  m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
  NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  return 1;
 }
 /* Return value from HandlePictureDisplay() are interpreted as:
 *  0: fail, >=1: succeeded
 */
 int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
  CUVIDPROCPARAMS videoProcessingParameters = {};
  videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
  videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
  videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
  videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
  videoProcessingParameters.output_stream = m_cuvidStream;
  CUdeviceptr dpSrcFrame = 0;
  unsigned int nSrcPitch = 0;
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
  NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
                                    &dpSrcFrame, &nSrcPitch,
                                    &videoProcessingParameters));
  CUVIDGETDECODESTATUS DecodeStatus;
  memset(&DecodeStatus, 0, sizeof(DecodeStatus));
  CUresult result =
      cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
  if (result == CUDA_SUCCESS &&
      (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
       DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
    printf("Decode Error occurred for picture %d\n",
           m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
  }
  uint8_t *pDecodedFrame = nullptr;
  {
    std::lock_guard<std::mutex> lock(m_mtxVPFrame);
    if ((unsigned)++m_nDecodedFrame > m_vpFrame.size()) {
      // Not enough frames in stock
      m_nFrameAlloc++;
      uint8_t *pFrame = NULL;
      if (m_bUseDeviceFrame) {
        if (m_bDeviceFramePitched) {
          CUDA_DRVAPI_CALL(cuMemAllocPitch(
              (CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
              m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
        } else {
          CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
        }
      } else {
        pFrame = new uint8_t[GetFrameSize()];
      }
      m_vpFrame.push_back(pFrame);
    }
    pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1];
  }
  // Copy luma plane
  CUDA_MEMCPY2D m = {0};
  m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
  m.srcDevice = dpSrcFrame;
  m.srcPitch = nSrcPitch;
  m.dstMemoryType =
      m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
  m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
  m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
  m.WidthInBytes = GetWidth() * m_nBPP;
  m.Height = m_nLumaHeight;
  CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
  // Copy chroma plane
  // NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
  // height
  m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
                              m.srcPitch * ((m_nSurfaceHeight + 1) & ~1));
  m.dstDevice =
      (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
  m.Height = m_nChromaHeight;
  CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
  if (m_nNumChromaPlanes == 2) {
    m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
                                m.srcPitch * ((m_nSurfaceHeight + 1) & ~1) * 2);
    m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
                                            m.dstPitch * m_nLumaHeight * 2);
    m.Height = m_nChromaHeight;
    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
  }
  CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
    m_vTimestamp.resize(m_vpFrame.size());
  }
  m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
  NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
  return 1;
 }
 NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
                     cudaVideoCodec eCodec, bool bLowLatency,
                     bool bDeviceFramePitched, const Rect *pCropRect,
                     const Dim *pResizeDim, int maxWidth, int maxHeight,
                     unsigned int clkRate)
    : m_cuContext(cuContext),
      m_bUseDeviceFrame(bUseDeviceFrame),
      m_eCodec(eCodec),
      m_bDeviceFramePitched(bDeviceFramePitched),
      m_nMaxWidth(maxWidth),
      m_nMaxHeight(maxHeight) {
  if (pCropRect) m_cropRect = *pCropRect;
  if (pResizeDim) m_resizeDim = *pResizeDim;
  NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
  CUVIDPARSERPARAMS videoParserParameters = {};
  videoParserParameters.CodecType = eCodec;
  videoParserParameters.ulMaxNumDecodeSurfaces = 1;
  videoParserParameters.ulClockRate = clkRate;
  videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
  videoParserParameters.pUserData = this;
  videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
  videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
  videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
  videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
  NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
 }
 NvDecoder::~NvDecoder() {
  START_TIMER
  if (m_hParser) {
    cuvidDestroyVideoParser(m_hParser);
  }
  cuCtxPushCurrent(m_cuContext);
  if (m_hDecoder) {
    cuvidDestroyDecoder(m_hDecoder);
  }
  std::lock_guard<std::mutex> lock(m_mtxVPFrame);
  for (uint8_t *pFrame : m_vpFrame) {
    if (m_bUseDeviceFrame) {
      cuMemFree((CUdeviceptr)pFrame);
    } else {
      delete[] pFrame;
    }
  }
  cuCtxPopCurrent(NULL);
  cuvidCtxLockDestroy(m_ctxLock);
  STOP_TIMER("Session Deinitialization Time: ");
 }
 int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
                      int64_t nTimestamp) {
  m_nDecodedFrame = 0;
  m_nDecodedFrameReturned = 0;
  CUVIDSOURCEDATAPACKET packet = {0};
  packet.payload = pData;
  packet.payload_size = nSize;
  packet.flags = nFlags | CUVID_PKT_TIMESTAMP;
  packet.timestamp = nTimestamp;
  if (!pData || nSize == 0) {
    packet.flags |= CUVID_PKT_ENDOFSTREAM;
  }
  NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
  m_cuvidStream = 0;
  return m_nDecodedFrame;
 }
 uint8_t *NvDecoder::GetFrame(int64_t *pTimestamp) {
  if (m_nDecodedFrame > 0) {
    std::lock_guard<std::mutex> lock(m_mtxVPFrame);
    m_nDecodedFrame--;
    if (pTimestamp) *pTimestamp = m_vTimestamp[m_nDecodedFrameReturned];
    return m_vpFrame[m_nDecodedFrameReturned++];
  }
  return NULL;
 }
 uint8_t *NvDecoder::GetLockedFrame(int64_t *pTimestamp) {
  uint8_t *pFrame;
  uint64_t timestamp;
  if (m_nDecodedFrame > 0) {
    std::lock_guard<std::mutex> lock(m_mtxVPFrame);
    m_nDecodedFrame--;
    pFrame = m_vpFrame[0];
    m_vpFrame.erase(m_vpFrame.begin(), m_vpFrame.begin() + 1);
    timestamp = m_vTimestamp[0];
    m_vTimestamp.erase(m_vTimestamp.begin(), m_vTimestamp.begin() + 1);
    if (pTimestamp) *pTimestamp = timestamp;
    return pFrame;
  }
  return NULL;
 }
 void NvDecoder::UnlockFrame(uint8_t **pFrame) {
  std::lock_guard<std::mutex> lock(m_mtxVPFrame);
  m_vpFrame.insert(m_vpFrame.end(), &pFrame[0], &pFrame[1]);
  // add a dummy entry for timestamp
  uint64_t timestamp[2] = {0};
  m_vTimestamp.insert(m_vTimestamp.end(), &timestamp[0], &timestamp[1]);
 }
--- a/src/media/video/decode/nvcodec/NvDecoder.h
+++ b/src/media/video/decode/nvcodec/NvDecoder.h
@@ -0,0 +1,395 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #pragma once
 #include <assert.h>
 #include <stdint.h>
 #include <string.h>
 #include <iostream>
 #include <mutex>
 #include <sstream>
 #include <string>
 #include <vector>
 #include "Utils/NvCodecUtils.h"
 #include "nvcuvid.h"
 /**
 * @brief Exception class for error reporting from the decode API.
 */
 class NVDECException : public std::exception {
 public:
  NVDECException(const std::string &errorStr, const CUresult errorCode)
      : m_errorString(errorStr), m_errorCode(errorCode) {}
  virtual ~NVDECException() throw() {}
  virtual const char *what() const throw() { return m_errorString.c_str(); }
  CUresult getErrorCode() const { return m_errorCode; }
  const std::string &getErrorString() const { return m_errorString; }
  static NVDECException makeNVDECException(const std::string &errorStr,
                                           const CUresult errorCode,
                                           const std::string &functionName,
                                           const std::string &fileName,
                                           int lineNo);
 private:
  std::string m_errorString;
  CUresult m_errorCode;
 };
 inline NVDECException NVDECException::makeNVDECException(
    const std::string &errorStr, const CUresult errorCode,
    const std::string &functionName, const std::string &fileName, int lineNo) {
  std::ostringstream errorLog;
  errorLog << functionName << " : " << errorStr << " at " << fileName << ":"
           << lineNo << std::endl;
  NVDECException exception(errorLog.str(), errorCode);
  return exception;
 }
 #define NVDEC_THROW_ERROR(errorStr, errorCode)                  \
  do {                                                          \
    throw NVDECException::makeNVDECException(                   \
        errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
  } while (0)
 #define NVDEC_API_CALL(cuvidAPI)                                        \
  do {                                                                  \
    CUresult errorCode = cuvidAPI;                                      \
    if (errorCode != CUDA_SUCCESS) {                                    \
      std::ostringstream errorLog;                                      \
      errorLog << #cuvidAPI << " returned error " << errorCode;         \
      throw NVDECException::makeNVDECException(                         \
          errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
    }                                                                   \
  } while (0)
 struct Rect {
  int l, t, r, b;
 };
 struct Dim {
  int w, h;
 };
 /**
 * @brief Base class for decoder interface.
 */
 class NvDecoder {
 public:
  /**
   *  @brief This function is used to initialize the decoder session.
   *  Application must call this function to initialize the decoder, before
   *  starting to decode any frames.
   */
  NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec,
            bool bLowLatency = false, bool bDeviceFramePitched = false,
            const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
            int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000);
  ~NvDecoder();
  /**
   *  @brief  This function is used to get the current CUDA context.
   */
  CUcontext GetContext() { return m_cuContext; }
  /**
   *  @brief  This function is used to get the output frame width.
   *  NV12/P016 output format width is 2 byte aligned because of U and V
   * interleave
   */
  int GetWidth() {
    assert(m_nWidth);
    return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 ||
            m_eOutputFormat == cudaVideoSurfaceFormat_P016)
               ? (m_nWidth + 1) & ~1
               : m_nWidth;
  }
  /**
   *  @brief  This function is used to get the actual decode width
   */
  int GetDecodeWidth() {
    assert(m_nWidth);
    return m_nWidth;
  }
  /**
   *  @brief  This function is used to get the output frame height (Luma
   * height).
   */
  int GetHeight() {
    assert(m_nLumaHeight);
    return m_nLumaHeight;
  }
  /**
   *  @brief  This function is used to get the current chroma height.
   */
  int GetChromaHeight() {
    assert(m_nChromaHeight);
    return m_nChromaHeight;
  }
  /**
   *  @brief  This function is used to get the number of chroma planes.
   */
  int GetNumChromaPlanes() {
    assert(m_nNumChromaPlanes);
    return m_nNumChromaPlanes;
  }
  /**
   *   @brief  This function is used to get the current frame size based on
   * pixel format.
   */
  int GetFrameSize() {
    assert(m_nWidth);
    return GetWidth() *
           (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP;
  }
  /**
   *   @brief  This function is used to get the current frame Luma plane size.
   */
  int GetLumaPlaneSize() {
    assert(m_nWidth);
    return GetWidth() * m_nLumaHeight * m_nBPP;
  }
  /**
   *   @brief  This function is used to get the current frame chroma plane size.
   */
  int GetChromaPlaneSize() {
    assert(m_nWidth);
    return GetWidth() * (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP;
  }
  /**
   *  @brief  This function is used to get the pitch of the device buffer
   * holding the decoded frame.
   */
  int GetDeviceFramePitch() {
    assert(m_nWidth);
    return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : GetWidth() * m_nBPP;
  }
  /**
   *   @brief  This function is used to get the bit depth associated with the
   * pixel format.
   */
  int GetBitDepth() {
    assert(m_nWidth);
    return m_nBitDepthMinus8 + 8;
  }
  /**
   *   @brief  This function is used to get the bytes used per pixel.
   */
  int GetBPP() {
    assert(m_nWidth);
    return m_nBPP;
  }
  /**
   *   @brief  This function is used to get the YUV chroma format
   */
  cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; }
  /**
   *   @brief  This function is used to get information about the video stream
   * (codec, display parameters etc)
   */
  CUVIDEOFORMAT GetVideoFormatInfo() {
    assert(m_nWidth);
    return m_videoFormat;
  }
  /**
   *   @brief  This function is used to get codec string from codec id
   */
  const char *GetCodecString(cudaVideoCodec eCodec);
  /**
   *   @brief  This function is used to print information about the video stream
   */
  std::string GetVideoInfo() const { return m_videoInfo.str(); }
  /**
   *   @brief  This function decodes a frame and returns the number of frames
   * that are available for display. All frames that are available for display
   * should be read before making a subsequent decode call.
   *   @param  pData - pointer to the data buffer that is to be decoded
   *   @param  nSize - size of the data buffer in bytes
   *   @param  nFlags - CUvideopacketflags for setting decode options
   *   @param  nTimestamp - presentation timestamp
   */
  int Decode(const uint8_t *pData, int nSize, int nFlags = 0,
             int64_t nTimestamp = 0);
  /**
   *   @brief  This function returns a decoded frame and timestamp. This
   * function should be called in a loop for fetching all the frames that are
   * available for display.
   */
  uint8_t *GetFrame(int64_t *pTimestamp = nullptr);
  /**
   *   @brief  This function decodes a frame and returns the locked frame
   * buffers This makes the buffers available for use by the application without
   * the buffers getting overwritten, even if subsequent decode calls are made.
   * The frame buffers remain locked, until UnlockFrame() is called
   */
  uint8_t *GetLockedFrame(int64_t *pTimestamp = nullptr);
  /**
   *   @brief  This function unlocks the frame buffer and makes the frame
   * buffers available for write again
   *   @param  ppFrame - pointer to array of frames that are to be unlocked
   *   @param  nFrame - number of frames to be unlocked
   */
  void UnlockFrame(uint8_t **pFrame);
  /**
   *   @brief  This function allows app to set decoder reconfig params
   *   @param  pCropRect - cropping rectangle coordinates
   *   @param  pResizeDim - width and height of resized output
   */
  int setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim);
  /**
   *   @brief  This function allows app to set operating point for AV1 SVC clips
   *   @param  opPoint - operating point of an AV1 scalable bitstream
   *   @param  bDispAllLayers - Output all decoded frames of an AV1 scalable
   * bitstream
   */
  void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) {
    m_nOperatingPoint = opPoint;
    m_bDispAllLayers = bDispAllLayers;
  }
  // start a timer
  void startTimer() { m_stDecode_time.Start(); }
  // stop the timer
  double stopTimer() { return m_stDecode_time.Stop(); }
 private:
  /**
   *   @brief  Callback function to be registered for getting a callback when
   * decoding of sequence starts
   */
  static int CUDAAPI HandleVideoSequenceProc(void *pUserData,
                                             CUVIDEOFORMAT *pVideoFormat) {
    return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
  }
  /**
   *   @brief  Callback function to be registered for getting a callback when a
   * decoded frame is ready to be decoded
   */
  static int CUDAAPI HandlePictureDecodeProc(void *pUserData,
                                             CUVIDPICPARAMS *pPicParams) {
    return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams);
  }
  /**
   *   @brief  Callback function to be registered for getting a callback when a
   * decoded frame is available for display
   */
  static int CUDAAPI HandlePictureDisplayProc(void *pUserData,
                                              CUVIDPARSERDISPINFO *pDispInfo) {
    return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo);
  }
  /**
   *   @brief  Callback function to be registered for getting a callback to get
   * operating point when AV1 SVC sequence header start.
   */
  static int CUDAAPI
  HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) {
    return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo);
  }
  /**
  *   @brief  This function gets called when a sequence is ready to be decoded.
  The function also gets called when there is format change
  */
  int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
  /**
   *   @brief  This function gets called when a picture is ready to be decoded.
   * cuvidDecodePicture is called from this function to decode the picture
   */
  int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
  /**
  *   @brief  This function gets called after a picture is decoded and available
  for display. Frames are fetched and stored in internal buffer
  */
  int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
  /**
   *   @brief  This function gets called when AV1 sequence encounter more than
   * one operating points
   */
  int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
  /**
   *   @brief  This function reconfigure decoder if there is a change in
   * sequence params.
   */
  int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat);
 private:
  CUcontext m_cuContext = NULL;
  CUvideoctxlock m_ctxLock;
  CUvideoparser m_hParser = NULL;
  CUvideodecoder m_hDecoder = NULL;
  bool m_bUseDeviceFrame;
  // dimension of the output
  unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
  unsigned int m_nNumChromaPlanes = 0;
  // height of the mapped surface
  int m_nSurfaceHeight = 0;
  int m_nSurfaceWidth = 0;
  cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs;
  cudaVideoChromaFormat m_eChromaFormat = cudaVideoChromaFormat_420;
  cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
  int m_nBitDepthMinus8 = 0;
  int m_nBPP = 1;
  CUVIDEOFORMAT m_videoFormat = {};
  Rect m_displayRect = {};
  // stock of frames
  std::vector<uint8_t *> m_vpFrame;
  // timestamps of decoded frames
  std::vector<int64_t> m_vTimestamp;
  int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
  int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32];
  bool m_bEndDecodeDone = false;
  std::mutex m_mtxVPFrame;
  int m_nFrameAlloc = 0;
  CUstream m_cuvidStream = 0;
  bool m_bDeviceFramePitched = false;
  size_t m_nDeviceFramePitch = 0;
  Rect m_cropRect = {};
  Dim m_resizeDim = {};
  std::ostringstream m_videoInfo;
  unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0;
  bool m_bReconfigExternal = false;
  bool m_bReconfigExtPPChange = false;
  StopWatch m_stDecode_time;
  unsigned int m_nOperatingPoint = 0;
  bool m_bDispAllLayers = false;
 };
--- a/src/media/video/decode/nvcodec/nv_decoder.cpp
+++ b/src/media/video/decode/nvcodec/nv_decoder.cpp
@@ -0,0 +1,65 @@
 #include "nv_decoder.h"
 #include "log.h"
 VideoDecoder::VideoDecoder() {}
 VideoDecoder::~VideoDecoder() {}
 int VideoDecoder::Init() {
  ck(cuInit(0));
  int nGpu = 0;
  int iGpu = 0;
  ck(cuDeviceGetCount(&nGpu));
  if (nGpu < 1) {
    return -1;
  }
  CUdevice cuDevice;
  cuDeviceGet(&cuDevice, iGpu);
  CUcontext cuContext = NULL;
  cuCtxCreate(&cuContext, 0, cuDevice);
  if (!cuContext) {
    return -1;
  }
  decoder = new NvDecoder(cuContext, false, cudaVideoCodec_H264, true);
  return 0;
 }
 int VideoDecoder::Decode(const uint8_t *pData, int nSize) {
  if (!decoder) {
    return -1;
  }
  if ((*(pData + 4) & 0x1f) == 0x07) {
    // LOG_WARN("Receive key frame");
  }
  int ret = decoder->Decode(pData, nSize);
  return ret;
 }
 int VideoDecoder::GetFrame(uint8_t *yuv_data, uint32_t &width, uint32_t &height,
                           uint32_t &size) {
  if (nullptr == decoder) {
    return -1;
  }
  cudaVideoSurfaceFormat format = decoder->GetOutputFormat();
  if (format == cudaVideoSurfaceFormat_NV12) {
    uint8_t *data = nullptr;
    data = decoder->GetFrame();
    if (data) {
      yuv_data = data;
      width = decoder->GetWidth();
      height = decoder->GetHeight();
      size = width * height * 3 / 2;
      return 0;
      return -1;
    }
    return -1;
  }
  return -1;
 }
--- a/src/media/video/decode/nvcodec/nv_decoder.h
+++ b/src/media/video/decode/nvcodec/nv_decoder.h
@@ -0,0 +1,21 @@
 #ifndef _NV_DECODER_H_
 #define _NV_DECODER_H_
 #include "NvDecoder.h"
 class VideoDecoder {
 public:
  VideoDecoder();
  ~VideoDecoder();
  int Init();
  int Decode(const uint8_t* pData, int nSize);
  int GetFrame(uint8_t* yuv_data, uint32_t& width, uint32_t& height,
               uint32_t& size);
  NvDecoder* decoder = nullptr;
  bool get_first_keyframe_ = false;
  bool skip_frame_ = false;
 };
 #endif
--- a/src/media/video/encode/nvcodec/NvEncoder.cpp
+++ b/src/media/video/encode/nvcodec/NvEncoder.cpp
@@ -0,0 +1,909 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include "NvEncoder.h"
 #ifndef _WIN32
 #include <cstring>
 static inline bool operator==(const GUID &guid1, const GUID &guid2) {
  return !memcmp(&guid1, &guid2, sizeof(GUID));
 }
 static inline bool operator!=(const GUID &guid1, const GUID &guid2) {
  return !(guid1 == guid2);
 }
 #endif
 NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice,
                     uint32_t nWidth, uint32_t nHeight,
                     NV_ENC_BUFFER_FORMAT eBufferFormat,
                     uint32_t nExtraOutputDelay, bool bMotionEstimationOnly,
                     bool bOutputInVideoMemory)
    : m_pDevice(pDevice),
      m_eDeviceType(eDeviceType),
      m_nWidth(nWidth),
      m_nHeight(nHeight),
      m_nMaxEncodeWidth(nWidth),
      m_nMaxEncodeHeight(nHeight),
      m_eBufferFormat(eBufferFormat),
      m_bMotionEstimationOnly(bMotionEstimationOnly),
      m_bOutputInVideoMemory(bOutputInVideoMemory),
      m_nExtraOutputDelay(nExtraOutputDelay),
      m_hEncoder(nullptr) {
  LoadNvEncApi();
  if (!m_nvenc.nvEncOpenEncodeSession) {
    m_nEncoderBuffer = 0;
    NVENC_THROW_ERROR("EncodeAPI not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
  }
  NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encodeSessionExParams = {
      NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
  encodeSessionExParams.device = m_pDevice;
  encodeSessionExParams.deviceType = m_eDeviceType;
  encodeSessionExParams.apiVersion = NVENCAPI_VERSION;
  void *hEncoder = NULL;
  NVENC_API_CALL(
      m_nvenc.nvEncOpenEncodeSessionEx(&encodeSessionExParams, &hEncoder));
  m_hEncoder = hEncoder;
 }
 void NvEncoder::LoadNvEncApi() {
  uint32_t version = 0;
  uint32_t currentVersion =
      (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
  NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
  if (currentVersion > version) {
    NVENC_THROW_ERROR(
        "Current Driver Version does not support this NvEncodeAPI version, "
        "please upgrade driver",
        NV_ENC_ERR_INVALID_VERSION);
  }
  m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
  NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
 }
 NvEncoder::~NvEncoder() { DestroyHWEncoder(); }
 void NvEncoder::CreateDefaultEncoderParams(
    NV_ENC_INITIALIZE_PARAMS *pIntializeParams, GUID codecGuid, GUID presetGuid,
    NV_ENC_TUNING_INFO tuningInfo) {
  if (!m_hEncoder) {
    NVENC_THROW_ERROR("Encoder Initialization failed",
                      NV_ENC_ERR_NO_ENCODE_DEVICE);
    return;
  }
  if (pIntializeParams == nullptr ||
      pIntializeParams->encodeConfig == nullptr) {
    NVENC_THROW_ERROR(
        "pInitializeParams and pInitializeParams->encodeConfig can't be NULL",
        NV_ENC_ERR_INVALID_PTR);
  }
  memset(pIntializeParams->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
  auto pEncodeConfig = pIntializeParams->encodeConfig;
  memset(pIntializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
  pIntializeParams->encodeConfig = pEncodeConfig;
  pIntializeParams->encodeConfig->version = NV_ENC_CONFIG_VER;
  pIntializeParams->version = NV_ENC_INITIALIZE_PARAMS_VER;
  pIntializeParams->encodeGUID = codecGuid;
  pIntializeParams->presetGUID = presetGuid;
  pIntializeParams->encodeWidth = m_nWidth;
  pIntializeParams->encodeHeight = m_nHeight;
  pIntializeParams->darWidth = m_nWidth;
  pIntializeParams->darHeight = m_nHeight;
  pIntializeParams->frameRateNum = 30;
  pIntializeParams->frameRateDen = 1;
  pIntializeParams->enablePTD = 1;
  pIntializeParams->reportSliceOffsets = 0;
  pIntializeParams->enableSubFrameWrite = 0;
  pIntializeParams->maxEncodeWidth = m_nWidth;
  pIntializeParams->maxEncodeHeight = m_nHeight;
  pIntializeParams->enableMEOnlyMode = m_bMotionEstimationOnly;
  pIntializeParams->enableOutputInVidmem = m_bOutputInVideoMemory;
 #if defined(_WIN32)
  if (!m_bOutputInVideoMemory) {
    pIntializeParams->enableEncodeAsync =
        GetCapabilityValue(codecGuid, NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT);
  }
 #endif
  NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
                                       {NV_ENC_CONFIG_VER}};
  m_nvenc.nvEncGetEncodePresetConfig(m_hEncoder, codecGuid, presetGuid,
                                     &presetConfig);
  memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
         sizeof(NV_ENC_CONFIG));
  pIntializeParams->encodeConfig->frameIntervalP = 1;
  pIntializeParams->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
  pIntializeParams->encodeConfig->rcParams.rateControlMode =
      NV_ENC_PARAMS_RC_CONSTQP;
  if (!m_bMotionEstimationOnly) {
    pIntializeParams->tuningInfo = tuningInfo;
    NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
                                         {NV_ENC_CONFIG_VER}};
    m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, codecGuid, presetGuid,
                                         tuningInfo, &presetConfig);
    memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
           sizeof(NV_ENC_CONFIG));
  } else {
    m_encodeConfig.version = NV_ENC_CONFIG_VER;
    m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
    m_encodeConfig.rcParams.constQP = {28, 31, 25};
  }
  if (pIntializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
      pIntializeParams->encodeConfig->encodeCodecConfig.h264Config
          .chromaFormatIDC = 3;
    }
    pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod =
        pIntializeParams->encodeConfig->gopLength;
  } else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
    pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
        .pixelBitDepthMinus8 =
        (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
            ? 2
            : 0;
    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
      pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
          .chromaFormatIDC = 3;
    }
    pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod =
        pIntializeParams->encodeConfig->gopLength;
  }
  return;
 }
 void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
  if (!m_hEncoder) {
    NVENC_THROW_ERROR("Encoder Initialization failed",
                      NV_ENC_ERR_NO_ENCODE_DEVICE);
  }
  if (!pEncoderParams) {
    NVENC_THROW_ERROR("Invalid NV_ENC_INITIALIZE_PARAMS ptr",
                      NV_ENC_ERR_INVALID_PTR);
  }
  if (pEncoderParams->encodeWidth == 0 || pEncoderParams->encodeHeight == 0) {
    NVENC_THROW_ERROR("Invalid encoder width and height",
                      NV_ENC_ERR_INVALID_PARAM);
  }
  if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID &&
      pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID) {
    NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM);
  }
  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
      NVENC_THROW_ERROR("10-bit format isn't supported by H264 encoder",
                        NV_ENC_ERR_INVALID_PARAM);
    }
  }
  // set other necessary params if not set yet
  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
    if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) &&
        (pEncoderParams->encodeConfig->encodeCodecConfig.h264Config
             .chromaFormatIDC != 3)) {
      NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
    }
  }
  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
    bool yuv10BitFormat =
        (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
            ? true
            : false;
    if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig
                                  .hevcConfig.pixelBitDepthMinus8 != 2) {
      NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
    }
    if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) &&
        (pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig
             .chromaFormatIDC != 3)) {
      NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
    }
  }
  memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams));
  m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
  if (pEncoderParams->encodeConfig) {
    memcpy(&m_encodeConfig, pEncoderParams->encodeConfig,
           sizeof(m_encodeConfig));
    m_encodeConfig.version = NV_ENC_CONFIG_VER;
  } else {
    NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
                                         {NV_ENC_CONFIG_VER}};
    if (!m_bMotionEstimationOnly) {
      m_nvenc.nvEncGetEncodePresetConfigEx(
          m_hEncoder, pEncoderParams->encodeGUID, pEncoderParams->presetGUID,
          pEncoderParams->tuningInfo, &presetConfig);
      memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
    } else {
      m_encodeConfig.version = NV_ENC_CONFIG_VER;
      m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
      m_encodeConfig.rcParams.constQP = {28, 31, 25};
    }
  }
  m_initializeParams.encodeConfig = &m_encodeConfig;
  NVENC_API_CALL(
      m_nvenc.nvEncInitializeEncoder(m_hEncoder, &m_initializeParams));
  m_bEncoderInitialized = true;
  m_nWidth = m_initializeParams.encodeWidth;
  m_nHeight = m_initializeParams.encodeHeight;
  m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
  m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
  m_nEncoderBuffer = m_encodeConfig.frameIntervalP +
                     m_encodeConfig.rcParams.lookaheadDepth +
                     m_nExtraOutputDelay;
  m_nOutputDelay = m_nEncoderBuffer - 1;
  m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
  if (!m_bOutputInVideoMemory) {
    m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr);
  }
 #if defined(_WIN32)
  for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
    m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
    NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
    eventParams.completionEvent = m_vpCompletionEvent[i];
    m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams);
  }
 #endif
  if (m_bMotionEstimationOnly) {
    m_vMappedRefBuffers.resize(m_nEncoderBuffer, nullptr);
    if (!m_bOutputInVideoMemory) {
      InitializeMVOutputBuffer();
    }
  } else {
    if (!m_bOutputInVideoMemory) {
      m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr);
      InitializeBitstreamBuffer();
    }
  }
  AllocateInputBuffers(m_nEncoderBuffer);
 }
 void NvEncoder::DestroyEncoder() {
  if (!m_hEncoder) {
    return;
  }
  ReleaseInputBuffers();
  DestroyHWEncoder();
 }
 void NvEncoder::DestroyHWEncoder() {
  if (!m_hEncoder) {
    return;
  }
 #if defined(_WIN32)
  for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
    if (m_vpCompletionEvent[i]) {
      NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
      eventParams.completionEvent = m_vpCompletionEvent[i];
      m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams);
      CloseHandle(m_vpCompletionEvent[i]);
    }
  }
  m_vpCompletionEvent.clear();
 #endif
  if (m_bMotionEstimationOnly) {
    DestroyMVOutputBuffer();
  } else {
    DestroyBitstreamBuffer();
  }
  m_nvenc.nvEncDestroyEncoder(m_hEncoder);
  m_hEncoder = nullptr;
  m_bEncoderInitialized = false;
 }
 const NvEncInputFrame *NvEncoder::GetNextInputFrame() {
  int i = m_iToSend % m_nEncoderBuffer;
  return &m_vInputFrames[i];
 }
 const NvEncInputFrame *NvEncoder::GetNextReferenceFrame() {
  int i = m_iToSend % m_nEncoderBuffer;
  return &m_vReferenceFrames[i];
 }
 void NvEncoder::MapResources(uint32_t bfrIdx) {
  NV_ENC_MAP_INPUT_RESOURCE mapInputResource = {NV_ENC_MAP_INPUT_RESOURCE_VER};
  mapInputResource.registeredResource = m_vRegisteredResources[bfrIdx];
  NVENC_API_CALL(m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource));
  m_vMappedInputBuffers[bfrIdx] = mapInputResource.mappedResource;
  if (m_bMotionEstimationOnly) {
    mapInputResource.registeredResource =
        m_vRegisteredResourcesForReference[bfrIdx];
    NVENC_API_CALL(
        m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource));
    m_vMappedRefBuffers[bfrIdx] = mapInputResource.mappedResource;
  }
 }
 void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>> &vPacket,
                            NV_ENC_PIC_PARAMS *pPicParams) {
  vPacket.clear();
  if (!IsHWEncoderInitialized()) {
    NVENC_THROW_ERROR("Encoder device not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
  }
  int bfrIdx = m_iToSend % m_nEncoderBuffer;
  MapResources(bfrIdx);
  NVENCSTATUS nvStatus = DoEncode(m_vMappedInputBuffers[bfrIdx],
                                  m_vBitstreamOutputBuffer[bfrIdx], pPicParams);
  if (nvStatus == NV_ENC_SUCCESS || nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) {
    m_iToSend++;
    GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, true);
  } else {
    NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus);
  }
 }
 void NvEncoder::RunMotionEstimation(std::vector<uint8_t> &mvData) {
  if (!m_hEncoder) {
    NVENC_THROW_ERROR("Encoder Initialization failed",
                      NV_ENC_ERR_NO_ENCODE_DEVICE);
    return;
  }
  const uint32_t bfrIdx = m_iToSend % m_nEncoderBuffer;
  MapResources(bfrIdx);
  NVENCSTATUS nvStatus = DoMotionEstimation(m_vMappedInputBuffers[bfrIdx],
                                            m_vMappedRefBuffers[bfrIdx],
                                            m_vMVDataOutputBuffer[bfrIdx]);
  if (nvStatus == NV_ENC_SUCCESS) {
    m_iToSend++;
    std::vector<std::vector<uint8_t>> vPacket;
    GetEncodedPacket(m_vMVDataOutputBuffer, vPacket, true);
    if (vPacket.size() != 1) {
      NVENC_THROW_ERROR(
          "GetEncodedPacket() doesn't return one (and only one) MVData",
          NV_ENC_ERR_GENERIC);
    }
    mvData = vPacket[0];
  } else {
    NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus);
  }
 }
 void NvEncoder::GetSequenceParams(std::vector<uint8_t> &seqParams) {
  uint8_t spsppsData[1024];  // Assume maximum spspps data is 1KB or less
  memset(spsppsData, 0, sizeof(spsppsData));
  NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER};
  uint32_t spsppsSize = 0;
  payload.spsppsBuffer = spsppsData;
  payload.inBufferSize = sizeof(spsppsData);
  payload.outSPSPPSPayloadSize = &spsppsSize;
  NVENC_API_CALL(m_nvenc.nvEncGetSequenceParams(m_hEncoder, &payload));
  seqParams.clear();
  seqParams.insert(seqParams.end(), &spsppsData[0], &spsppsData[spsppsSize]);
 }
 NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer,
                                NV_ENC_OUTPUT_PTR outputBuffer,
                                NV_ENC_PIC_PARAMS *pPicParams) {
  NV_ENC_PIC_PARAMS picParams = {};
  if (pPicParams) {
    picParams = *pPicParams;
  }
  picParams.version = NV_ENC_PIC_PARAMS_VER;
  picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  picParams.inputBuffer = inputBuffer;
  picParams.bufferFmt = GetPixelFormat();
  picParams.inputWidth = GetEncodeWidth();
  picParams.inputHeight = GetEncodeHeight();
  picParams.outputBitstream = outputBuffer;
  picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
  NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams);
  return nvStatus;
 }
 void NvEncoder::SendEOS() {
  NV_ENC_PIC_PARAMS picParams = {NV_ENC_PIC_PARAMS_VER};
  picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
  picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
  NVENC_API_CALL(m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams));
 }
 void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>> &vPacket) {
  vPacket.clear();
  if (!IsHWEncoderInitialized()) {
    NVENC_THROW_ERROR("Encoder device not initialized",
                      NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
  }
  SendEOS();
  GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, false);
 }
 void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR> &vOutputBuffer,
                                 std::vector<std::vector<uint8_t>> &vPacket,
                                 bool bOutputDelay) {
  unsigned i = 0;
  int iEnd = bOutputDelay ? m_iToSend - m_nOutputDelay : m_iToSend;
  for (; m_iGot < iEnd; m_iGot++) {
    WaitForCompletionEvent(m_iGot % m_nEncoderBuffer);
    NV_ENC_LOCK_BITSTREAM lockBitstreamData = {NV_ENC_LOCK_BITSTREAM_VER};
    lockBitstreamData.outputBitstream =
        vOutputBuffer[m_iGot % m_nEncoderBuffer];
    lockBitstreamData.doNotWait = false;
    NVENC_API_CALL(m_nvenc.nvEncLockBitstream(m_hEncoder, &lockBitstreamData));
    uint8_t *pData = (uint8_t *)lockBitstreamData.bitstreamBufferPtr;
    if (vPacket.size() < i + 1) {
      vPacket.push_back(std::vector<uint8_t>());
    }
    vPacket[i].clear();
    vPacket[i].insert(vPacket[i].end(), &pData[0],
                      &pData[lockBitstreamData.bitstreamSizeInBytes]);
    i++;
    NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream(
        m_hEncoder, lockBitstreamData.outputBitstream));
    if (m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]) {
      NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource(
          m_hEncoder, m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]));
      m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer] = nullptr;
    }
    if (m_bMotionEstimationOnly &&
        m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer]) {
      NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource(
          m_hEncoder, m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer]));
      m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer] = nullptr;
    }
  }
 }
 bool NvEncoder::Reconfigure(
    const NV_ENC_RECONFIGURE_PARAMS *pReconfigureParams) {
  NVENC_API_CALL(m_nvenc.nvEncReconfigureEncoder(
      m_hEncoder, const_cast<NV_ENC_RECONFIGURE_PARAMS *>(pReconfigureParams)));
  memcpy(&m_initializeParams, &(pReconfigureParams->reInitEncodeParams),
         sizeof(m_initializeParams));
  if (pReconfigureParams->reInitEncodeParams.encodeConfig) {
    memcpy(&m_encodeConfig, pReconfigureParams->reInitEncodeParams.encodeConfig,
           sizeof(m_encodeConfig));
  }
  m_nWidth = m_initializeParams.encodeWidth;
  m_nHeight = m_initializeParams.encodeHeight;
  m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
  m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
  return true;
 }
 NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(
    void *pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
    int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
    NV_ENC_BUFFER_USAGE bufferUsage) {
  NV_ENC_REGISTER_RESOURCE registerResource = {NV_ENC_REGISTER_RESOURCE_VER};
  registerResource.resourceType = eResourceType;
  registerResource.resourceToRegister = pBuffer;
  registerResource.width = width;
  registerResource.height = height;
  registerResource.pitch = pitch;
  registerResource.bufferFormat = bufferFormat;
  registerResource.bufferUsage = bufferUsage;
  NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, &registerResource));
  return registerResource.registeredResource;
 }
 void NvEncoder::RegisterInputResources(std::vector<void *> inputframes,
                                       NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
                                       int width, int height, int pitch,
                                       NV_ENC_BUFFER_FORMAT bufferFormat,
                                       bool bReferenceFrame) {
  for (uint32_t i = 0; i < inputframes.size(); ++i) {
    NV_ENC_REGISTERED_PTR registeredPtr =
        RegisterResource(inputframes[i], eResourceType, width, height, pitch,
                         bufferFormat, NV_ENC_INPUT_IMAGE);
    std::vector<uint32_t> _chromaOffsets;
    NvEncoder::GetChromaSubPlaneOffsets(bufferFormat, pitch, height,
                                        _chromaOffsets);
    NvEncInputFrame inputframe = {};
    inputframe.inputPtr = (void *)inputframes[i];
    inputframe.chromaOffsets[0] = 0;
    inputframe.chromaOffsets[1] = 0;
    for (uint32_t ch = 0; ch < _chromaOffsets.size(); ch++) {
      inputframe.chromaOffsets[ch] = _chromaOffsets[ch];
    }
    inputframe.numChromaPlanes = NvEncoder::GetNumChromaPlanes(bufferFormat);
    inputframe.pitch = pitch;
    inputframe.chromaPitch = NvEncoder::GetChromaPitch(bufferFormat, pitch);
    inputframe.bufferFormat = bufferFormat;
    inputframe.resourceType = eResourceType;
    if (bReferenceFrame) {
      m_vRegisteredResourcesForReference.push_back(registeredPtr);
      m_vReferenceFrames.push_back(inputframe);
    } else {
      m_vRegisteredResources.push_back(registeredPtr);
      m_vInputFrames.push_back(inputframe);
    }
  }
 }
 void NvEncoder::FlushEncoder() {
  if (!m_bMotionEstimationOnly && !m_bOutputInVideoMemory) {
    // Incase of error it is possible for buffers still mapped to encoder.
    // flush the encoder queue and then unmapped it if any surface is still
    // mapped
    try {
      std::vector<std::vector<uint8_t>> vPacket;
      EndEncode(vPacket);
    } catch (...) {
    }
  }
 }
 void NvEncoder::UnregisterInputResources() {
  FlushEncoder();
  if (m_bMotionEstimationOnly) {
    for (uint32_t i = 0; i < m_vMappedRefBuffers.size(); ++i) {
      if (m_vMappedRefBuffers[i]) {
        m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedRefBuffers[i]);
      }
    }
  }
  m_vMappedRefBuffers.clear();
  for (uint32_t i = 0; i < m_vMappedInputBuffers.size(); ++i) {
    if (m_vMappedInputBuffers[i]) {
      m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[i]);
    }
  }
  m_vMappedInputBuffers.clear();
  for (uint32_t i = 0; i < m_vRegisteredResources.size(); ++i) {
    if (m_vRegisteredResources[i]) {
      m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResources[i]);
    }
  }
  m_vRegisteredResources.clear();
  for (uint32_t i = 0; i < m_vRegisteredResourcesForReference.size(); ++i) {
    if (m_vRegisteredResourcesForReference[i]) {
      m_nvenc.nvEncUnregisterResource(m_hEncoder,
                                      m_vRegisteredResourcesForReference[i]);
    }
  }
  m_vRegisteredResourcesForReference.clear();
 }
 void NvEncoder::WaitForCompletionEvent(int iEvent) {
 #if defined(_WIN32)
  // Check if we are in async mode. If not, don't wait for event;
  NV_ENC_CONFIG sEncodeConfig = {0};
  NV_ENC_INITIALIZE_PARAMS sInitializeParams = {0};
  sInitializeParams.encodeConfig = &sEncodeConfig;
  GetInitializeParams(&sInitializeParams);
  if (0U == sInitializeParams.enableEncodeAsync) {
    return;
  }
 #ifdef DEBUG
  WaitForSingleObject(m_vpCompletionEvent[iEvent], INFINITE);
 #else
  // wait for 20s which is infinite on terms of gpu time
  if (WaitForSingleObject(m_vpCompletionEvent[iEvent], 20000) == WAIT_FAILED) {
    NVENC_THROW_ERROR("Failed to encode frame", NV_ENC_ERR_GENERIC);
  }
 #endif
 #endif
 }
 uint32_t NvEncoder::GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                    const uint32_t width) {
  switch (bufferFormat) {
    case NV_ENC_BUFFER_FORMAT_NV12:
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
    case NV_ENC_BUFFER_FORMAT_YUV444:
      return width;
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      return width * 2;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return width * 4;
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return 0;
  }
 }
 uint32_t NvEncoder::GetNumChromaPlanes(
    const NV_ENC_BUFFER_FORMAT bufferFormat) {
  switch (bufferFormat) {
    case NV_ENC_BUFFER_FORMAT_NV12:
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
      return 1;
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
    case NV_ENC_BUFFER_FORMAT_YUV444:
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      return 2;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return 0;
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return -1;
  }
 }
 uint32_t NvEncoder::GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                   const uint32_t lumaPitch) {
  switch (bufferFormat) {
    case NV_ENC_BUFFER_FORMAT_NV12:
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
    case NV_ENC_BUFFER_FORMAT_YUV444:
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      return lumaPitch;
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
      return (lumaPitch + 1) / 2;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return 0;
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return -1;
  }
 }
 void NvEncoder::GetChromaSubPlaneOffsets(
    const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch,
    const uint32_t height, std::vector<uint32_t> &chromaOffsets) {
  chromaOffsets.clear();
  switch (bufferFormat) {
    case NV_ENC_BUFFER_FORMAT_NV12:
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
      chromaOffsets.push_back(pitch * height);
      return;
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
      chromaOffsets.push_back(pitch * height);
      chromaOffsets.push_back(chromaOffsets[0] +
                              (NvEncoder::GetChromaPitch(bufferFormat, pitch) *
                               GetChromaHeight(bufferFormat, height)));
      return;
    case NV_ENC_BUFFER_FORMAT_YUV444:
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      chromaOffsets.push_back(pitch * height);
      chromaOffsets.push_back(chromaOffsets[0] + (pitch * height));
      return;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return;
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return;
  }
 }
 uint32_t NvEncoder::GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                    const uint32_t lumaHeight) {
  switch (bufferFormat) {
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
    case NV_ENC_BUFFER_FORMAT_NV12:
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
      return (lumaHeight + 1) / 2;
    case NV_ENC_BUFFER_FORMAT_YUV444:
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      return lumaHeight;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return 0;
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return 0;
  }
 }
 uint32_t NvEncoder::GetChromaWidthInBytes(
    const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth) {
  switch (bufferFormat) {
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
      return (lumaWidth + 1) / 2;
    case NV_ENC_BUFFER_FORMAT_NV12:
      return lumaWidth;
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
      return 2 * lumaWidth;
    case NV_ENC_BUFFER_FORMAT_YUV444:
      return lumaWidth;
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      return 2 * lumaWidth;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return 0;
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return 0;
  }
 }
 int NvEncoder::GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery) {
  if (!m_hEncoder) {
    return 0;
  }
  NV_ENC_CAPS_PARAM capsParam = {NV_ENC_CAPS_PARAM_VER};
  capsParam.capsToQuery = capsToQuery;
  int v;
  m_nvenc.nvEncGetEncodeCaps(m_hEncoder, guidCodec, &capsParam, &v);
  return v;
 }
 int NvEncoder::GetFrameSize() const {
  switch (GetPixelFormat()) {
    case NV_ENC_BUFFER_FORMAT_YV12:
    case NV_ENC_BUFFER_FORMAT_IYUV:
    case NV_ENC_BUFFER_FORMAT_NV12:
      return GetEncodeWidth() *
             (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
      return 2 * GetEncodeWidth() *
             (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
    case NV_ENC_BUFFER_FORMAT_YUV444:
      return GetEncodeWidth() * GetEncodeHeight() * 3;
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
      return 2 * GetEncodeWidth() * GetEncodeHeight() * 3;
    case NV_ENC_BUFFER_FORMAT_ARGB:
    case NV_ENC_BUFFER_FORMAT_ARGB10:
    case NV_ENC_BUFFER_FORMAT_AYUV:
    case NV_ENC_BUFFER_FORMAT_ABGR:
    case NV_ENC_BUFFER_FORMAT_ABGR10:
      return 4 * GetEncodeWidth() * GetEncodeHeight();
    default:
      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
      return 0;
  }
 }
 void NvEncoder::GetInitializeParams(
    NV_ENC_INITIALIZE_PARAMS *pInitializeParams) {
  if (!pInitializeParams || !pInitializeParams->encodeConfig) {
    NVENC_THROW_ERROR(
        "Both pInitializeParams and pInitializeParams->encodeConfig can't be "
        "NULL",
        NV_ENC_ERR_INVALID_PTR);
  }
  NV_ENC_CONFIG *pEncodeConfig = pInitializeParams->encodeConfig;
  *pEncodeConfig = m_encodeConfig;
  *pInitializeParams = m_initializeParams;
  pInitializeParams->encodeConfig = pEncodeConfig;
 }
 void NvEncoder::InitializeBitstreamBuffer() {
  for (int i = 0; i < m_nEncoderBuffer; i++) {
    NV_ENC_CREATE_BITSTREAM_BUFFER createBitstreamBuffer = {
        NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
    NVENC_API_CALL(
        m_nvenc.nvEncCreateBitstreamBuffer(m_hEncoder, &createBitstreamBuffer));
    m_vBitstreamOutputBuffer[i] = createBitstreamBuffer.bitstreamBuffer;
  }
 }
 void NvEncoder::DestroyBitstreamBuffer() {
  for (uint32_t i = 0; i < m_vBitstreamOutputBuffer.size(); i++) {
    if (m_vBitstreamOutputBuffer[i]) {
      m_nvenc.nvEncDestroyBitstreamBuffer(m_hEncoder,
                                          m_vBitstreamOutputBuffer[i]);
    }
  }
  m_vBitstreamOutputBuffer.clear();
 }
 void NvEncoder::InitializeMVOutputBuffer() {
  for (int i = 0; i < m_nEncoderBuffer; i++) {
    NV_ENC_CREATE_MV_BUFFER createMVBuffer = {NV_ENC_CREATE_MV_BUFFER_VER};
    NVENC_API_CALL(m_nvenc.nvEncCreateMVBuffer(m_hEncoder, &createMVBuffer));
    m_vMVDataOutputBuffer.push_back(createMVBuffer.mvBuffer);
  }
 }
 void NvEncoder::DestroyMVOutputBuffer() {
  for (uint32_t i = 0; i < m_vMVDataOutputBuffer.size(); i++) {
    if (m_vMVDataOutputBuffer[i]) {
      m_nvenc.nvEncDestroyMVBuffer(m_hEncoder, m_vMVDataOutputBuffer[i]);
    }
  }
  m_vMVDataOutputBuffer.clear();
 }
 NVENCSTATUS NvEncoder::DoMotionEstimation(
    NV_ENC_INPUT_PTR inputBuffer, NV_ENC_INPUT_PTR inputBufferForReference,
    NV_ENC_OUTPUT_PTR outputBuffer) {
  NV_ENC_MEONLY_PARAMS meParams = {NV_ENC_MEONLY_PARAMS_VER};
  meParams.inputBuffer = inputBuffer;
  meParams.referenceFrame = inputBufferForReference;
  meParams.inputWidth = GetEncodeWidth();
  meParams.inputHeight = GetEncodeHeight();
  meParams.mvBuffer = outputBuffer;
  meParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
  NVENCSTATUS nvStatus =
      m_nvenc.nvEncRunMotionEstimationOnly(m_hEncoder, &meParams);
  return nvStatus;
 }
--- a/src/media/video/encode/nvcodec/NvEncoder.h
+++ b/src/media/video/encode/nvcodec/NvEncoder.h
@@ -0,0 +1,482 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #pragma once
 #include <stdint.h>
 #include <string.h>
 #include <iostream>
 #include <mutex>
 #include <sstream>
 #include <string>
 #include <vector>
 #include "nvEncodeAPI.h"
 /**
 * @brief Exception class for error reporting from NvEncodeAPI calls.
 */
 class NVENCException : public std::exception {
 public:
  NVENCException(const std::string& errorStr, const NVENCSTATUS errorCode)
      : m_errorString(errorStr), m_errorCode(errorCode) {}
  virtual ~NVENCException() throw() {}
  virtual const char* what() const throw() { return m_errorString.c_str(); }
  NVENCSTATUS getErrorCode() const { return m_errorCode; }
  const std::string& getErrorString() const { return m_errorString; }
  static NVENCException makeNVENCException(const std::string& errorStr,
                                           const NVENCSTATUS errorCode,
                                           const std::string& functionName,
                                           const std::string& fileName,
                                           int lineNo);
 private:
  std::string m_errorString;
  NVENCSTATUS m_errorCode;
 };
 inline NVENCException NVENCException::makeNVENCException(
    const std::string& errorStr, const NVENCSTATUS errorCode,
    const std::string& functionName, const std::string& fileName, int lineNo) {
  std::ostringstream errorLog;
  errorLog << functionName << " : " << errorStr << " at " << fileName << ":"
           << lineNo << std::endl;
  NVENCException exception(errorLog.str(), errorCode);
  return exception;
 }
 #define NVENC_THROW_ERROR(errorStr, errorCode)                  \
  do {                                                          \
    throw NVENCException::makeNVENCException(                   \
        errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
  } while (0)
 #define NVENC_API_CALL(nvencAPI)                                        \
  do {                                                                  \
    NVENCSTATUS errorCode = nvencAPI;                                   \
    if (errorCode != NV_ENC_SUCCESS) {                                  \
      std::ostringstream errorLog;                                      \
      errorLog << #nvencAPI << " returned error " << errorCode;         \
      throw NVENCException::makeNVENCException(                         \
          errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
    }                                                                   \
  } while (0)
 struct NvEncInputFrame {
  void* inputPtr = nullptr;
  uint32_t chromaOffsets[2];
  uint32_t numChromaPlanes;
  uint32_t pitch;
  uint32_t chromaPitch;
  NV_ENC_BUFFER_FORMAT bufferFormat;
  NV_ENC_INPUT_RESOURCE_TYPE resourceType;
 };
 /**
 * @brief Shared base class for different encoder interfaces.
 */
 class NvEncoder {
 public:
  /**
   *  @brief This function is used to initialize the encoder session.
   *  Application must call this function to initialize the encoder, before
   *  starting to encode any frames.
   */
  void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
  /**
   *  @brief  This function is used to destroy the encoder session.
   *  Application must call this function to destroy the encoder session and
   *  clean up any allocated resources. The application must call EndEncode()
   *  function to get any queued encoded frames before calling DestroyEncoder().
   */
  void DestroyEncoder();
  /**
   *  @brief  This function is used to reconfigure an existing encoder session.
   *  Application can use this function to dynamically change the bitrate,
   *  resolution and other QOS parameters. If the application changes the
   *  resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR.
   */
  bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams);
  /**
   *  @brief  This function is used to get the next available input buffer.
   *  Applications must call this function to obtain a pointer to the next
   *  input buffer. The application must copy the uncompressed data to the
   *  input buffer and then call EncodeFrame() function to encode it.
   */
  const NvEncInputFrame* GetNextInputFrame();
  /**
   *  @brief  This function is used to encode a frame.
   *  Applications must call EncodeFrame() function to encode the uncompressed
   *  data, which has been copied to an input buffer obtained from the
   *  GetNextInputFrame() function.
   */
  void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket,
                   NV_ENC_PIC_PARAMS* pPicParams = nullptr);
  /**
   *  @brief  This function to flush the encoder queue.
   *  The encoder might be queuing frames for B picture encoding or lookahead;
   *  the application must call EndEncode() to get all the queued encoded frames
   *  from the encoder. The application must call this function before
   * destroying an encoder session.
   */
  void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
  /**
   *  @brief  This function is used to query hardware encoder capabilities.
   *  Applications can call this function to query capabilities like maximum
   * encode dimensions, support for lookahead or the ME-only mode etc.
   */
  int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery);
  /**
   *  @brief  This function is used to get the current device on which encoder
   * is running.
   */
  void* GetDevice() const { return m_pDevice; }
  /**
   *  @brief  This function is used to get the current device type which encoder
   * is running.
   */
  NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; }
  /**
   *  @brief  This function is used to get the current encode width.
   *  The encode width can be modified by Reconfigure() function.
   */
  int GetEncodeWidth() const { return m_nWidth; }
  /**
   *  @brief  This function is used to get the current encode height.
   *  The encode height can be modified by Reconfigure() function.
   */
  int GetEncodeHeight() const { return m_nHeight; }
  /**
   *   @brief  This function is used to get the current frame size based on
   * pixel format.
   */
  int GetFrameSize() const;
  /**
   *  @brief  This function is used to initialize config parameters based on
   *          given codec and preset guids.
   *  The application can call this function to get the default configuration
   *  for a certain preset. The application can either use these parameters
   *  directly or override them with application-specific settings before
   *  using them in CreateEncoder() function.
   */
  void CreateDefaultEncoderParams(
      NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid,
      GUID presetGuid,
      NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_UNDEFINED);
  /**
   *  @brief  This function is used to get the current initialization
   * parameters, which had been used to configure the encoder session. The
   * initialization parameters are modified if the application calls
   *  Reconfigure() function.
   */
  void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams);
  /**
   *  @brief  This function is used to run motion estimation
   *  This is used to run motion estimation on a a pair of frames. The
   *  application must copy the reference frame data to the buffer obtained
   *  by calling GetNextReferenceFrame(), and copy the input frame data to
   *  the buffer obtained by calling GetNextInputFrame() before calling the
   *  RunMotionEstimation() function.
   */
  void RunMotionEstimation(std::vector<uint8_t>& mvData);
  /**
   *  @brief This function is used to get an available reference frame.
   *  Application must call this function to get a pointer to reference buffer,
   *  to be used in the subsequent RunMotionEstimation() function.
   */
  const NvEncInputFrame* GetNextReferenceFrame();
  /**
   *  @brief This function is used to get sequence and picture parameter
   * headers. Application can call this function after encoder is initialized to
   * get SPS and PPS nalus for the current encoder instance. The sequence header
   * data might change when application calls Reconfigure() function.
   */
  void GetSequenceParams(std::vector<uint8_t>& seqParams);
  /**
   *  @brief  NvEncoder class virtual destructor.
   */
  virtual ~NvEncoder();
 public:
  /**
   *  @brief This a static function to get chroma offsets for YUV planar
   * formats.
   */
  static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                       const uint32_t pitch,
                                       const uint32_t height,
                                       std::vector<uint32_t>& chromaOffsets);
  /**
   *  @brief This a static function to get the chroma plane pitch for YUV planar
   * formats.
   */
  static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                 const uint32_t lumaPitch);
  /**
   *  @brief This a static function to get the number of chroma planes for YUV
   * planar formats.
   */
  static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat);
  /**
   *  @brief This a static function to get the chroma plane width in bytes for
   * YUV planar formats.
   */
  static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                        const uint32_t lumaWidth);
  /**
   *  @brief This a static function to get the chroma planes height in bytes for
   * YUV planar formats.
   */
  static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                  const uint32_t lumaHeight);
  /**
   *  @brief This a static function to get the width in bytes for the frame.
   *  For YUV planar format this is the width in bytes of the luma plane.
   */
  static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat,
                                  const uint32_t width);
  /**
   *  @brief This function returns the number of allocated buffers.
   */
  uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }
 protected:
  /**
   *  @brief  NvEncoder class constructor.
   *  NvEncoder class constructor cannot be called directly by the application.
   */
  NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth,
            uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
            uint32_t nOutputDelay, bool bMotionEstimationOnly,
            bool bOutputInVideoMemory = false);
  /**
   *  @brief This function is used to check if hardware encoder is properly
   * initialized.
   */
  bool IsHWEncoderInitialized() const {
    return m_hEncoder != NULL && m_bEncoderInitialized;
  }
  /**
   *  @brief This function is used to register CUDA, D3D or OpenGL input buffers
   * with NvEncodeAPI. This is non public function and is called by derived
   * class for allocating and registering input buffers.
   */
  void RegisterInputResources(std::vector<void*> inputframes,
                              NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
                              int width, int height, int pitch,
                              NV_ENC_BUFFER_FORMAT bufferFormat,
                              bool bReferenceFrame = false);
  /**
   *  @brief This function is used to unregister resources which had been
   * previously registered for encoding using RegisterInputResources() function.
   */
  void UnregisterInputResources();
  /**
   *  @brief This function is used to register CUDA, D3D or OpenGL input or
   * output buffers with NvEncodeAPI.
   */
  NV_ENC_REGISTERED_PTR RegisterResource(
      void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
      int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
      NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE);
  /**
   *  @brief This function returns maximum width used to open the encoder
   * session. All encode input buffers are allocated using maximum dimensions.
   */
  uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; }
  /**
   *  @brief This function returns maximum height used to open the encoder
   * session. All encode input buffers are allocated using maximum dimensions.
   */
  uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; }
  /**
   *  @brief This function returns the completion event.
   */
  void* GetCompletionEvent(uint32_t eventIdx) {
    return (m_vpCompletionEvent.size() == m_nEncoderBuffer)
               ? m_vpCompletionEvent[eventIdx]
               : nullptr;
  }
  /**
   *  @brief This function returns the current pixel format.
   */
  NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; }
  /**
   *  @brief This function is used to submit the encode commands to the
   *         NVENC hardware.
   */
  NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer,
                       NV_ENC_OUTPUT_PTR outputBuffer,
                       NV_ENC_PIC_PARAMS* pPicParams);
  /**
   *  @brief This function is used to submit the encode commands to the
   *         NVENC hardware for ME only mode.
   */
  NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer,
                                 NV_ENC_INPUT_PTR inputBufferForReference,
                                 NV_ENC_OUTPUT_PTR outputBuffer);
  /**
   *  @brief This function is used to map the input buffers to NvEncodeAPI.
   */
  void MapResources(uint32_t bfrIdx);
  /**
   *  @brief This function is used to wait for completion of encode command.
   */
  void WaitForCompletionEvent(int iEvent);
  /**
   *  @brief This function is used to send EOS to HW encoder.
   */
  void SendEOS();
 private:
  /**
  *  @brief This is a private function which is used to check if there is any
            buffering done by encoder.
  *  The encoder generally buffers data to encode B frames or for lookahead
  *  or pipelining.
  */
  bool IsZeroDelay() { return m_nOutputDelay == 0; }
  /**
   *  @brief This is a private function which is used to load the encode api
   * shared library.
   */
  void LoadNvEncApi();
  /**
   *  @brief This is a private function which is used to get the output packets
   *         from the encoder HW.
   *  This is called by DoEncode() function. If there is buffering enabled,
   *  this may return without any output data.
   */
  void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer,
                        std::vector<std::vector<uint8_t>>& vPacket,
                        bool bOutputDelay);
  /**
   *  @brief This is a private function which is used to initialize the
   * bitstream buffers. This is only used in the encoding mode.
   */
  void InitializeBitstreamBuffer();
  /**
   *  @brief This is a private function which is used to destroy the bitstream
   * buffers. This is only used in the encoding mode.
   */
  void DestroyBitstreamBuffer();
  /**
   *  @brief This is a private function which is used to initialize MV output
   * buffers. This is only used in ME-only Mode.
   */
  void InitializeMVOutputBuffer();
  /**
   *  @brief This is a private function which is used to destroy MV output
   * buffers. This is only used in ME-only Mode.
   */
  void DestroyMVOutputBuffer();
  /**
   *  @brief This is a private function which is used to destroy HW encoder.
   */
  void DestroyHWEncoder();
  /**
   *  @brief This function is used to flush the encoder queue.
   */
  void FlushEncoder();
 private:
  /**
   *  @brief This is a pure virtual function which is used to allocate input
   * buffers. The derived classes must implement this function.
   */
  virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0;
  /**
   *  @brief This is a pure virtual function which is used to destroy input
   * buffers. The derived classes must implement this function.
   */
  virtual void ReleaseInputBuffers() = 0;
 protected:
  bool m_bMotionEstimationOnly = false;
  bool m_bOutputInVideoMemory = false;
  void* m_hEncoder = nullptr;
  NV_ENCODE_API_FUNCTION_LIST m_nvenc;
  std::vector<NvEncInputFrame> m_vInputFrames;
  std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
  std::vector<NvEncInputFrame> m_vReferenceFrames;
  std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResourcesForReference;
  std::vector<NV_ENC_INPUT_PTR> m_vMappedInputBuffers;
  std::vector<NV_ENC_INPUT_PTR> m_vMappedRefBuffers;
  std::vector<void*> m_vpCompletionEvent;
  int32_t m_iToSend = 0;
  int32_t m_iGot = 0;
  int32_t m_nEncoderBuffer = 0;
  int32_t m_nOutputDelay = 0;
 private:
  uint32_t m_nWidth;
  uint32_t m_nHeight;
  NV_ENC_BUFFER_FORMAT m_eBufferFormat;
  void* m_pDevice;
  NV_ENC_DEVICE_TYPE m_eDeviceType;
  NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
  NV_ENC_CONFIG m_encodeConfig = {};
  bool m_bEncoderInitialized = false;
  uint32_t m_nExtraOutputDelay =
      3;  // To ensure encode and graphics can work in parallel,
          // m_nExtraOutputDelay should be set to at least 1
  std::vector<NV_ENC_OUTPUT_PTR> m_vBitstreamOutputBuffer;
  std::vector<NV_ENC_OUTPUT_PTR> m_vMVDataOutputBuffer;
  uint32_t m_nMaxEncodeWidth = 0;
  uint32_t m_nMaxEncodeHeight = 0;
 };
--- a/src/media/video/encode/nvcodec/NvEncoderCuda.cpp
+++ b/src/media/video/encode/nvcodec/NvEncoderCuda.cpp
@@ -0,0 +1,244 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include "NvEncoderCuda.h"
 NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
                             uint32_t nHeight,
                             NV_ENC_BUFFER_FORMAT eBufferFormat,
                             uint32_t nExtraOutputDelay,
                             bool bMotionEstimationOnly,
                             bool bOutputInVideoMemory)
    : NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight,
                eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly,
                bOutputInVideoMemory),
      m_cuContext(cuContext) {
  if (!m_hEncoder) {
    NVENC_THROW_ERROR("Encoder Initialization failed",
                      NV_ENC_ERR_INVALID_DEVICE);
  }
  if (!m_cuContext) {
    NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE);
  }
 }
 NvEncoderCuda::~NvEncoderCuda() { ReleaseCudaResources(); }
 void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
  if (!IsHWEncoderInitialized()) {
    NVENC_THROW_ERROR("Encoder intialization failed",
                      NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
  }
  // for MEOnly mode we need to allocate seperate set of buffers for reference
  // frame
  int numCount = m_bMotionEstimationOnly ? 2 : 1;
  for (int count = 0; count < numCount; count++) {
    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
    std::vector<void *> inputFrames;
    for (int i = 0; i < numInputBuffers; i++) {
      CUdeviceptr pDeviceFrame;
      uint32_t chromaHeight =
          GetNumChromaPlanes(GetPixelFormat()) *
          GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
      if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
          GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
        chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
      CUDA_DRVAPI_CALL(cuMemAllocPitch(
          (CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
          GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
          GetMaxEncodeHeight() + chromaHeight, 16));
      inputFrames.push_back((void *)pDeviceFrame);
    }
    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
    RegisterInputResources(
        inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
        GetMaxEncodeWidth(), GetMaxEncodeHeight(), (int)m_cudaPitch,
        GetPixelFormat(), (count == 1) ? true : false);
  }
 }
 void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream,
                                     NV_ENC_CUSTREAM_PTR outputStream) {
  NVENC_API_CALL(
      m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream));
 }
 void NvEncoderCuda::ReleaseInputBuffers() { ReleaseCudaResources(); }
 void NvEncoderCuda::ReleaseCudaResources() {
  if (!m_hEncoder) {
    return;
  }
  if (!m_cuContext) {
    return;
  }
  UnregisterInputResources();
  cuCtxPushCurrent(m_cuContext);
  for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
    if (m_vInputFrames[i].inputPtr) {
      cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
    }
  }
  m_vInputFrames.clear();
  for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
    if (m_vReferenceFrames[i].inputPtr) {
      cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
    }
  }
  m_vReferenceFrames.clear();
  cuCtxPopCurrent(NULL);
  m_cuContext = nullptr;
 }
 void NvEncoderCuda::CopyToDeviceFrame(
    CUcontext device, void *pSrcFrame, uint32_t nSrcPitch,
    CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
    CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
    const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes,
    bool bUnAlignedDeviceCopy, CUstream stream) {
  if (srcMemoryType != CU_MEMORYTYPE_HOST &&
      srcMemoryType != CU_MEMORYTYPE_DEVICE) {
    NVENC_THROW_ERROR("Invalid source memory type for copy",
                      NV_ENC_ERR_INVALID_PARAM);
  }
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
  uint32_t srcPitch =
      nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
  CUDA_MEMCPY2D m = {0};
  m.srcMemoryType = srcMemoryType;
  if (srcMemoryType == CU_MEMORYTYPE_HOST) {
    m.srcHost = pSrcFrame;
  } else {
    m.srcDevice = (CUdeviceptr)pSrcFrame;
  }
  m.srcPitch = srcPitch;
  m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
  m.dstDevice = pDstFrame;
  m.dstPitch = dstPitch;
  m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
  m.Height = height;
  if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
  } else {
    CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
                                    : cuMemcpy2DAsync(&m, stream));
  }
  std::vector<uint32_t> srcChromaOffsets;
  NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height,
                                      srcChromaOffsets);
  uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
  uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch);
  uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
  uint32_t chromaWidthInBytes =
      NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
  for (uint32_t i = 0; i < numChromaPlanes; ++i) {
    if (chromaHeight) {
      if (srcMemoryType == CU_MEMORYTYPE_HOST) {
        m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
      } else {
        m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
      }
      m.srcPitch = srcChromaPitch;
      m.dstDevice = (CUdeviceptr)((uint8_t *)pDstFrame + dstChromaOffsets[i]);
      m.dstPitch = destChromaPitch;
      m.WidthInBytes = chromaWidthInBytes;
      m.Height = chromaHeight;
      if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
      } else {
        CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
                                        : cuMemcpy2DAsync(&m, stream));
      }
    }
  }
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
 }
 void NvEncoderCuda::CopyToDeviceFrame(
    CUcontext device, void *pSrcFrame, uint32_t nSrcPitch,
    CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
    CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
    CUdeviceptr dstChromaDevicePtrs[], uint32_t dstChromaPitch,
    uint32_t numChromaPlanes, bool bUnAlignedDeviceCopy) {
  if (srcMemoryType != CU_MEMORYTYPE_HOST &&
      srcMemoryType != CU_MEMORYTYPE_DEVICE) {
    NVENC_THROW_ERROR("Invalid source memory type for copy",
                      NV_ENC_ERR_INVALID_PARAM);
  }
  CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
  uint32_t srcPitch =
      nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
  CUDA_MEMCPY2D m = {0};
  m.srcMemoryType = srcMemoryType;
  if (srcMemoryType == CU_MEMORYTYPE_HOST) {
    m.srcHost = pSrcFrame;
  } else {
    m.srcDevice = (CUdeviceptr)pSrcFrame;
  }
  m.srcPitch = srcPitch;
  m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
  m.dstDevice = pDstFrame;
  m.dstPitch = dstPitch;
  m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
  m.Height = height;
  if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
  } else {
    CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
  }
  std::vector<uint32_t> srcChromaOffsets;
  NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height,
                                      srcChromaOffsets);
  uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
  uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
  uint32_t chromaWidthInBytes =
      NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
  for (uint32_t i = 0; i < numChromaPlanes; ++i) {
    if (chromaHeight) {
      if (srcMemoryType == CU_MEMORYTYPE_HOST) {
        m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
      } else {
        m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
      }
      m.srcPitch = srcChromaPitch;
      m.dstDevice = dstChromaDevicePtrs[i];
      m.dstPitch = dstChromaPitch;
      m.WidthInBytes = chromaWidthInBytes;
      m.Height = chromaHeight;
      if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
      } else {
        CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
      }
    }
  }
  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
 }
--- a/src/media/video/encode/nvcodec/NvEncoderCuda.h
+++ b/src/media/video/encode/nvcodec/NvEncoderCuda.h
@@ -0,0 +1,106 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #pragma once
 #include <cuda.h>
 #include <stdint.h>
 #include <mutex>
 #include <vector>
 #include "NvEncoder.h"
 #define CUDA_DRVAPI_CALL(call)                                        \
  do {                                                                \
    CUresult err__ = call;                                            \
    if (err__ != CUDA_SUCCESS) {                                      \
      const char* szErrName = NULL;                                   \
      cuGetErrorName(err__, &szErrName);                              \
      std::ostringstream errorLog;                                    \
      errorLog << "CUDA driver API error " << szErrName;              \
      throw NVENCException::makeNVENCException(                       \
          errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, \
          __LINE__);                                                  \
    }                                                                 \
  } while (0)
 /**
 *  @brief Encoder for CUDA device memory.
 */
 class NvEncoderCuda : public NvEncoder {
 public:
  NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight,
                NV_ENC_BUFFER_FORMAT eBufferFormat,
                uint32_t nExtraOutputDelay = 3,
                bool bMotionEstimationOnly = false,
                bool bOPInVideoMemory = false);
  virtual ~NvEncoderCuda();
  /**
   *  @brief This is a static function to copy input data from host memory to
   * device memory. This function assumes YUV plane is a single contiguous
   * memory segment.
   */
  static void CopyToDeviceFrame(
      CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
      CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
      CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
      const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes,
      bool bUnAlignedDeviceCopy = false, CUstream stream = NULL);
  /**
   *  @brief This is a static function to copy input data from host memory to
   * device memory. Application must pass a seperate device pointer for each YUV
   * plane.
   */
  static void CopyToDeviceFrame(
      CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
      CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
      CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
      CUdeviceptr dstChromaPtr[], uint32_t dstChromaPitch,
      uint32_t numChromaPlanes, bool bUnAlignedDeviceCopy = false);
  /**
   *  @brief This function sets input and output CUDA streams
   */
  void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream,
                        NV_ENC_CUSTREAM_PTR outputStream);
 protected:
  /**
   *  @brief This function is used to release the input buffers allocated for
   * encoding. This function is an override of virtual function
   * NvEncoder::ReleaseInputBuffers().
   */
  virtual void ReleaseInputBuffers() override;
 private:
  /**
   *  @brief This function is used to allocate input buffers for encoding.
   *  This function is an override of virtual function
   * NvEncoder::AllocateInputBuffers().
   */
  virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
 private:
  /**
   *  @brief This is a private function to release CUDA device memory used for
   * encoding.
   */
  void ReleaseCudaResources();
 protected:
  CUcontext m_cuContext;
 private:
  size_t m_cudaPitch = 0;
 };
--- a/src/media/video/encode/nvcodec/nv_encoder.cpp
+++ b/src/media/video/encode/nvcodec/nv_encoder.cpp
@@ -0,0 +1,145 @@
 #include "nv_encoder.h"
 #include <chrono>
 #include "log.h"
 #define SAVE_ENCODER_STREAM 0
 VideoEncoder::VideoEncoder() {
  if (SAVE_ENCODER_STREAM) {
    file_ = fopen("saved/stream.h264", "w+b");
    if (!file_) {
      LOG_WARN("Fail to open saved/stream.h264");
    }
  }
 }
 VideoEncoder::~VideoEncoder() {
  if (SAVE_ENCODER_STREAM && file_) {
    fflush(file_);
    fclose(file_);
    file_ = nullptr;
  }
  if (nv12_data_) {
    free(nv12_data_);
    nv12_data_ = nullptr;
  }
 }
 int VideoEncoder::Init() {
  // Init cuda context
  int num_of_GPUs = 0;
  CUdevice cuda_device;
  bool cuda_ctx_succeed =
      (index_of_GPU >= 0 && cuInit(0) == CUresult::CUDA_SUCCESS &&
       cuDeviceGetCount(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
       (num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
       cuDeviceGet(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
       cuCtxCreate(&cuda_context_, 0, cuda_device) == CUresult::CUDA_SUCCESS);
  if (!cuda_ctx_succeed) {
  }
  encoder_ = new NvEncoderCuda(cuda_context_, frame_width, frame_height,
                               NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12);
  // Init encoder_ session
  NV_ENC_INITIALIZE_PARAMS init_params;
  init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
  init_params.encodeConfig = &encode_config;
  encoder_->CreateDefaultEncoderParams(&init_params, codec_guid, preset_guid,
                                       tuning_info);
  init_params.encodeWidth = frame_width;
  init_params.encodeHeight = frame_height;
  init_params.encodeConfig->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
  init_params.encodeConfig->encodeCodecConfig.h264Config.level =
      NV_ENC_LEVEL::NV_ENC_LEVEL_H264_31;
  // TO TEST: not tested yet
  // init_params.encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
  init_params.encodeConfig->gopLength = keyFrameInterval_;
  // Donot use B-frame for realtime application
  init_params.encodeConfig->frameIntervalP = 1;
  init_params.encodeConfig->rcParams.rateControlMode =
      NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CBR;
  init_params.encodeConfig->rcParams.maxBitRate = maxBitrate_ * 1000;
  init_params.encodeConfig->encodeCodecConfig.h264Config.sliceMode = 1;
  init_params.encodeConfig->encodeCodecConfig.h264Config.sliceModeData =
      max_payload_size_;
  encoder_->CreateEncoder(&init_params);
  return 0;
 }
 int VideoEncoder::Encode(const uint8_t *pData, int nSize) {
  if (!encoder_) {
    LOG_ERROR("Invalid encoder");
    return -1;
  }
  if (0 == seq_++ % (30 * 5)) {
    ForceIdr();
  }
 #ifdef SHOW_SUBMODULE_TIME_COST
  auto start = std::chrono::steady_clock::now();
 #endif
  const NvEncInputFrame *encoder_inputframe = encoder_->GetNextInputFrame();
  NvEncoderCuda::CopyToDeviceFrame(
      cuda_context_,
      (void *)pData,  // NOLINT
      0, (CUdeviceptr)encoder_inputframe->inputPtr, encoder_inputframe->pitch,
      encoder_->GetEncodeWidth(), encoder_->GetEncodeHeight(),
      CU_MEMORYTYPE_HOST, encoder_inputframe->bufferFormat,
      encoder_inputframe->chromaOffsets, encoder_inputframe->numChromaPlanes);
  encoder_->EncodeFrame(encoded_packets_);
  if (encoded_packets_.size() < 1) {
    LOG_WARN("empty encoded_packets_");
    return -1;
  }
  for (const auto &packet : encoded_packets_) {
    OnEncodedImage((char *)packet.data(), packet.size());
    if (SAVE_ENCODER_STREAM) {
      fwrite((unsigned char *)packet.data(), 1, packet.size(), file_);
    }
  }
 #ifdef SHOW_SUBMODULE_TIME_COST
  auto encode_time_cost = std::chrono::duration_cast<std::chrono::milliseconds>(
                              std::chrono::steady_clock::now() - start)
                              .count();
  LOG_INFO("Encode time cost {}ms", encode_time_cost);
 #endif
  return 0;
 }
 int VideoEncoder::OnEncodedImage(char *encoded_packets, size_t size) {
  LOG_INFO("output encoded image");
  fwrite(encoded_packets, 1, size, file_);
  return 0;
 }
 void VideoEncoder::ForceIdr() {
  NV_ENC_RECONFIGURE_PARAMS reconfig_params;
  reconfig_params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
  NV_ENC_INITIALIZE_PARAMS init_params;
  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
  init_params.encodeConfig = &encode_config;
  encoder_->GetInitializeParams(&init_params);
  reconfig_params.reInitEncodeParams = init_params;
  reconfig_params.forceIDR = 1;
  reconfig_params.resetEncoder = 1;
  encoder_->Reconfigure(&reconfig_params);
 }
--- a/src/media/video/encode/nvcodec/nv_encoder.h
+++ b/src/media/video/encode/nvcodec/nv_encoder.h
@@ -0,0 +1,36 @@
 #ifndef _NV_ENCODER_H_
 #define _NV_ENCODER_H_
 #include "NvEncoderCuda.h"
 class VideoEncoder {
 public:
  VideoEncoder();
  ~VideoEncoder();
  int Init();
  int Encode(const uint8_t* pData, int nSize);
  virtual int OnEncodedImage(char* encoded_packets, size_t size);
  void ForceIdr();
 private:
  int index_of_GPU = 0;
  GUID codec_guid = NV_ENC_CODEC_H264_GUID;
  GUID preset_guid = NV_ENC_PRESET_P2_GUID;
  NV_ENC_TUNING_INFO tuning_info =
      NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
  int frame_width = 1280;
  int frame_height = 720;
  int keyFrameInterval_ = 3000;
  int maxBitrate_ = 2000;
  int max_payload_size_ = 3000;
  NvEncoder* encoder_ = nullptr;
  CUcontext cuda_context_ = nullptr;
  std::vector<std::vector<uint8_t>> encoded_packets_;
  unsigned char* encoded_image_ = nullptr;
  FILE* file_ = nullptr;
  unsigned char* nv12_data_ = nullptr;
  unsigned int seq_ = 0;
 };
 #endif
--- a/src/pc/peer_connection.cpp
+++ b/src/pc/peer_connection.cpp
@@ -50,6 +50,7 @@ int PeerConnection::Init(PeerConnectionParams params,
  do {
  } while (SignalStatus::Connected != GetSignalStatus());
  VideoEncoder::Init();
  return 0;
 }
@@ -215,7 +216,35 @@ int PeerConnection::Destroy() {
 SignalStatus PeerConnection::GetSignalStatus() { return signal_status_; }
-int PeerConnection::SendData(const char *data, size_t size) {
+int PeerConnection::SendVideoData(const char *data, size_t size) {
  int ret = Encode((uint8_t *)data, size);
  if (0 != ret) {
    LOG_ERROR("Encode failed");
    return -1;
  }
  // for (auto ice_trans : ice_transmission_list_) {
  //   ice_trans.second->SendData(data, size);
  // }
  return 0;
 }
 int PeerConnection::OnEncodedImage(char *encoded_packets, size_t size) {
  for (auto ice_trans : ice_transmission_list_) {
    ice_trans.second->SendData(encoded_packets, size);
  }
  return 0;
 }
 int PeerConnection::SendAudioData(const char *data, size_t size) {
  for (auto ice_trans : ice_transmission_list_) {
    ice_trans.second->SendData(data, size);
  }
  return 0;
 }
 int PeerConnection::SendUserData(const char *data, size_t size) {
  for (auto ice_trans : ice_transmission_list_) {
    ice_trans.second->SendData(data, size);
  }
--- a/src/pc/peer_connection.h
+++ b/src/pc/peer_connection.h
@@ -5,6 +5,8 @@
 #include <map>
 #include "ice_transmission.h"
 #include "nv_decoder.h"
 #include "nv_encoder.h"
 #include "ws_transmission.h"
 enum SignalStatus { Connecting = 0, Connected, Closed };
@@ -20,7 +22,7 @@ typedef struct {
  NetStatusReport net_status_report;
 } PeerConnectionParams;
-class PeerConnection {
+class PeerConnection : public VideoEncoder, VideoDecoder {
 public:
  PeerConnection(OnReceiveBuffer on_receive_buffer);
  ~PeerConnection();
@@ -37,7 +39,9 @@ class PeerConnection {
  SignalStatus GetSignalStatus();
-  int SendData(const char *data, size_t size);
+  int SendVideoData(const char *data, size_t size);
  int SendAudioData(const char *data, size_t size);
  int SendUserData(const char *data, size_t size);
 private:
  int Init(PeerConnectionParams params, const std::string &transmission_id,
@@ -47,6 +51,9 @@ class PeerConnection {
  int RequestTransmissionMemberList(const std::string &transmission_id);
 private:
  int OnEncodedImage(char *encoded_packets, size_t size) override;
 private:
  std::string uri_ = "";
  std::string cfg_signal_server_ip_;
@@ -68,6 +75,8 @@ class PeerConnection {
  SignalStatus signal_status_ = SignalStatus::Closed;
  OnReceiveBuffer on_receive_buffer_;
 private:
 };
 #endif
--- a/src/qos/kcp/ikcp.c
+++ b/src/qos/kcp/ikcp.c
--- a/src/qos/kcp/ikcp.h
+++ b/src/qos/kcp/ikcp.h
@@ -0,0 +1,416 @@
 //=====================================================================
 //
 // KCP - A Better ARQ Protocol Implementation
 // skywind3000 (at) gmail.com, 2010-2011
 //  
 // Features:
 // + Average RTT reduce 30% - 40% vs traditional ARQ like tcp.
 // + Maximum RTT reduce three times vs tcp.
 // + Lightweight, distributed as a single source file.
 //
 //=====================================================================
 #ifndef __IKCP_H__
 #define __IKCP_H__
 #include <stddef.h>
 #include <stdlib.h>
 #include <assert.h>
 //=====================================================================
 // 32BIT INTEGER DEFINITION 
 //=====================================================================
 #ifndef __INTEGER_32_BITS__
 #define __INTEGER_32_BITS__
 #if defined(_WIN64) || defined(WIN64) || defined(__amd64__) || \
 	defined(__x86_64) || defined(__x86_64__) || defined(_M_IA64) || \
 	defined(_M_AMD64)
 	typedef unsigned int ISTDUINT32;
 	typedef int ISTDINT32;
 #elif defined(_WIN32) || defined(WIN32) || defined(__i386__) || \
 	defined(__i386) || defined(_M_X86)
 	typedef unsigned long ISTDUINT32;
 	typedef long ISTDINT32;
 #elif defined(__MACOS__)
 	typedef UInt32 ISTDUINT32;
 	typedef SInt32 ISTDINT32;
 #elif defined(__APPLE__) && defined(__MACH__)
 	#include <sys/types.h>
 	typedef u_int32_t ISTDUINT32;
 	typedef int32_t ISTDINT32;
 #elif defined(__BEOS__)
 	#include <sys/inttypes.h>
 	typedef u_int32_t ISTDUINT32;
 	typedef int32_t ISTDINT32;
 #elif (defined(_MSC_VER) || defined(__BORLANDC__)) && (!defined(__MSDOS__))
 	typedef unsigned __int32 ISTDUINT32;
 	typedef __int32 ISTDINT32;
 #elif defined(__GNUC__)
 	#include <stdint.h>
 	typedef uint32_t ISTDUINT32;
 	typedef int32_t ISTDINT32;
 #else 
 	typedef unsigned long ISTDUINT32; 
 	typedef long ISTDINT32;
 #endif
 #endif
 //=====================================================================
 // Integer Definition
 //=====================================================================
 #ifndef __IINT8_DEFINED
 #define __IINT8_DEFINED
 typedef char IINT8;
 #endif
 #ifndef __IUINT8_DEFINED
 #define __IUINT8_DEFINED
 typedef unsigned char IUINT8;
 #endif
 #ifndef __IUINT16_DEFINED
 #define __IUINT16_DEFINED
 typedef unsigned short IUINT16;
 #endif
 #ifndef __IINT16_DEFINED
 #define __IINT16_DEFINED
 typedef short IINT16;
 #endif
 #ifndef __IINT32_DEFINED
 #define __IINT32_DEFINED
 typedef ISTDINT32 IINT32;
 #endif
 #ifndef __IUINT32_DEFINED
 #define __IUINT32_DEFINED
 typedef ISTDUINT32 IUINT32;
 #endif
 #ifndef __IINT64_DEFINED
 #define __IINT64_DEFINED
 #if defined(_MSC_VER) || defined(__BORLANDC__)
 typedef __int64 IINT64;
 #else
 typedef long long IINT64;
 #endif
 #endif
 #ifndef __IUINT64_DEFINED
 #define __IUINT64_DEFINED
 #if defined(_MSC_VER) || defined(__BORLANDC__)
 typedef unsigned __int64 IUINT64;
 #else
 typedef unsigned long long IUINT64;
 #endif
 #endif
 #ifndef INLINE
 #if defined(__GNUC__)
 #if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
 #define INLINE         __inline__ __attribute__((always_inline))
 #else
 #define INLINE         __inline__
 #endif
 #elif (defined(_MSC_VER) || defined(__BORLANDC__) || defined(__WATCOMC__))
 #define INLINE __inline
 #else
 #define INLINE 
 #endif
 #endif
 #if (!defined(__cplusplus)) && (!defined(inline))
 #define inline INLINE
 #endif
 //=====================================================================
 // QUEUE DEFINITION                                                  
 //=====================================================================
 #ifndef __IQUEUE_DEF__
 #define __IQUEUE_DEF__
 struct IQUEUEHEAD {
 	struct IQUEUEHEAD *next, *prev;
 };
 typedef struct IQUEUEHEAD iqueue_head;
 //---------------------------------------------------------------------
 // queue init                                                         
 //---------------------------------------------------------------------
 #define IQUEUE_HEAD_INIT(name) { &(name), &(name) }
 #define IQUEUE_HEAD(name) \
 	struct IQUEUEHEAD name = IQUEUE_HEAD_INIT(name)
 #define IQUEUE_INIT(ptr) ( \
 	(ptr)->next = (ptr), (ptr)->prev = (ptr))
 #define IOFFSETOF(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #define ICONTAINEROF(ptr, type, member) ( \
 		(type*)( ((char*)((type*)ptr)) - IOFFSETOF(type, member)) )
 #define IQUEUE_ENTRY(ptr, type, member) ICONTAINEROF(ptr, type, member)
 //---------------------------------------------------------------------
 // queue operation                     
 //---------------------------------------------------------------------
 #define IQUEUE_ADD(node, head) ( \
 	(node)->prev = (head), (node)->next = (head)->next, \
 	(head)->next->prev = (node), (head)->next = (node))
 #define IQUEUE_ADD_TAIL(node, head) ( \
 	(node)->prev = (head)->prev, (node)->next = (head), \
 	(head)->prev->next = (node), (head)->prev = (node))
 #define IQUEUE_DEL_BETWEEN(p, n) ((n)->prev = (p), (p)->next = (n))
 #define IQUEUE_DEL(entry) (\
 	(entry)->next->prev = (entry)->prev, \
 	(entry)->prev->next = (entry)->next, \
 	(entry)->next = 0, (entry)->prev = 0)
 #define IQUEUE_DEL_INIT(entry) do { \
 	IQUEUE_DEL(entry); IQUEUE_INIT(entry); } while (0)
 #define IQUEUE_IS_EMPTY(entry) ((entry) == (entry)->next)
 #define iqueue_init		IQUEUE_INIT
 #define iqueue_entry	IQUEUE_ENTRY
 #define iqueue_add		IQUEUE_ADD
 #define iqueue_add_tail	IQUEUE_ADD_TAIL
 #define iqueue_del		IQUEUE_DEL
 #define iqueue_del_init	IQUEUE_DEL_INIT
 #define iqueue_is_empty IQUEUE_IS_EMPTY
 #define IQUEUE_FOREACH(iterator, head, TYPE, MEMBER) \
 	for ((iterator) = iqueue_entry((head)->next, TYPE, MEMBER); \
 		&((iterator)->MEMBER) != (head); \
 		(iterator) = iqueue_entry((iterator)->MEMBER.next, TYPE, MEMBER))
 #define iqueue_foreach(iterator, head, TYPE, MEMBER) \
 	IQUEUE_FOREACH(iterator, head, TYPE, MEMBER)
 #define iqueue_foreach_entry(pos, head) \
 	for( (pos) = (head)->next; (pos) != (head) ; (pos) = (pos)->next )
 #define __iqueue_splice(list, head) do {	\
 		iqueue_head *first = (list)->next, *last = (list)->prev; \
 		iqueue_head *at = (head)->next; \
 		(first)->prev = (head), (head)->next = (first);		\
 		(last)->next = (at), (at)->prev = (last); }	while (0)
 #define iqueue_splice(list, head) do { \
 	if (!iqueue_is_empty(list)) __iqueue_splice(list, head); } while (0)
 #define iqueue_splice_init(list, head) do {	\
 	iqueue_splice(list, head);	iqueue_init(list); } while (0)
 #ifdef _MSC_VER
 #pragma warning(disable:4311)
 #pragma warning(disable:4312)
 #pragma warning(disable:4996)
 #endif
 #endif
 //---------------------------------------------------------------------
 // BYTE ORDER & ALIGNMENT
 //---------------------------------------------------------------------
 #ifndef IWORDS_BIG_ENDIAN
    #ifdef _BIG_ENDIAN_
        #if _BIG_ENDIAN_
            #define IWORDS_BIG_ENDIAN 1
        #endif
    #endif
    #ifndef IWORDS_BIG_ENDIAN
        #if defined(__hppa__) || \
            defined(__m68k__) || defined(mc68000) || defined(_M_M68K) || \
            (defined(__MIPS__) && defined(__MIPSEB__)) || \
            defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC) || \
            defined(__sparc__) || defined(__powerpc__) || \
            defined(__mc68000__) || defined(__s390x__) || defined(__s390__)
            #define IWORDS_BIG_ENDIAN 1
        #endif
    #endif
    #ifndef IWORDS_BIG_ENDIAN
        #define IWORDS_BIG_ENDIAN  0
    #endif
 #endif
 #ifndef IWORDS_MUST_ALIGN
 	#if defined(__i386__) || defined(__i386) || defined(_i386_)
 		#define IWORDS_MUST_ALIGN 0
 	#elif defined(_M_IX86) || defined(_X86_) || defined(__x86_64__)
 		#define IWORDS_MUST_ALIGN 0
 	#elif defined(__amd64) || defined(__amd64__)
 		#define IWORDS_MUST_ALIGN 0
 	#else
 		#define IWORDS_MUST_ALIGN 1
 	#endif
 #endif
 //=====================================================================
 // SEGMENT
 //=====================================================================
 struct IKCPSEG
 {
 	struct IQUEUEHEAD node;
 	IUINT32 conv;
 	IUINT32 cmd;
 	IUINT32 frg;
 	IUINT32 wnd;
 	IUINT32 ts;
 	IUINT32 sn;
 	IUINT32 una;
 	IUINT32 len;
 	IUINT32 resendts;
 	IUINT32 rto;
 	IUINT32 fastack;
 	IUINT32 xmit;
 	char data[1];
 };
 //---------------------------------------------------------------------
 // IKCPCB
 //---------------------------------------------------------------------
 struct IKCPCB
 {
 	IUINT32 conv, mtu, mss, state;
 	IUINT32 snd_una, snd_nxt, rcv_nxt;
 	IUINT32 ts_recent, ts_lastack, ssthresh;
 	IINT32 rx_rttval, rx_srtt, rx_rto, rx_minrto;
 	IUINT32 snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe;
 	IUINT32 current, interval, ts_flush, xmit;
 	IUINT32 nrcv_buf, nsnd_buf;
 	IUINT32 nrcv_que, nsnd_que;
 	IUINT32 nodelay, updated;
 	IUINT32 ts_probe, probe_wait;
 	IUINT32 dead_link, incr;
 	struct IQUEUEHEAD snd_queue;
 	struct IQUEUEHEAD rcv_queue;
 	struct IQUEUEHEAD snd_buf;
 	struct IQUEUEHEAD rcv_buf;
 	IUINT32 *acklist;
 	IUINT32 ackcount;
 	IUINT32 ackblock;
 	void *user;
 	char *buffer;
 	int fastresend;
 	int fastlimit;
 	int nocwnd, stream;
 	int logmask;
 	int (*output)(const char *buf, int len, struct IKCPCB *kcp, void *user);
 	void (*writelog)(const char *log, struct IKCPCB *kcp, void *user);
 };
 typedef struct IKCPCB ikcpcb;
 #define IKCP_LOG_OUTPUT			1
 #define IKCP_LOG_INPUT			2
 #define IKCP_LOG_SEND			4
 #define IKCP_LOG_RECV			8
 #define IKCP_LOG_IN_DATA		16
 #define IKCP_LOG_IN_ACK			32
 #define IKCP_LOG_IN_PROBE		64
 #define IKCP_LOG_IN_WINS		128
 #define IKCP_LOG_OUT_DATA		256
 #define IKCP_LOG_OUT_ACK		512
 #define IKCP_LOG_OUT_PROBE		1024
 #define IKCP_LOG_OUT_WINS		2048
 #ifdef __cplusplus
 extern "C" {
 #endif
 //---------------------------------------------------------------------
 // interface
 //---------------------------------------------------------------------
 // create a new kcp control object, 'conv' must equal in two endpoint
 // from the same connection. 'user' will be passed to the output callback
 // output callback can be setup like this: 'kcp->output = my_udp_output'
 ikcpcb* ikcp_create(IUINT32 conv, void *user);
 // release kcp control object
 void ikcp_release(ikcpcb *kcp);
 // set output callback, which will be invoked by kcp
 void ikcp_setoutput(ikcpcb *kcp, int (*output)(const char *buf, int len, 
 	ikcpcb *kcp, void *user));
 // user/upper level recv: returns size, returns below zero for EAGAIN
 int ikcp_recv(ikcpcb *kcp, char *buffer, int len);
 // user/upper level send, returns below zero for error
 int ikcp_send(ikcpcb *kcp, const char *buffer, int len);
 // update state (call it repeatedly, every 10ms-100ms), or you can ask 
 // ikcp_check when to call it again (without ikcp_input/_send calling).
 // 'current' - current timestamp in millisec. 
 void ikcp_update(ikcpcb *kcp, IUINT32 current);
 // Determine when should you invoke ikcp_update:
 // returns when you should invoke ikcp_update in millisec, if there 
 // is no ikcp_input/_send calling. you can call ikcp_update in that
 // time, instead of call update repeatly.
 // Important to reduce unnacessary ikcp_update invoking. use it to 
 // schedule ikcp_update (eg. implementing an epoll-like mechanism, 
 // or optimize ikcp_update when handling massive kcp connections)
 IUINT32 ikcp_check(const ikcpcb *kcp, IUINT32 current);
 // when you received a low level packet (eg. UDP packet), call it
 int ikcp_input(ikcpcb *kcp, const char *data, long size);
 // flush pending data
 void ikcp_flush(ikcpcb *kcp);
 // check the size of next message in the recv queue
 int ikcp_peeksize(const ikcpcb *kcp);
 // change MTU size, default is 1400
 int ikcp_setmtu(ikcpcb *kcp, int mtu);
 // set maximum window size: sndwnd=32, rcvwnd=32 by default
 int ikcp_wndsize(ikcpcb *kcp, int sndwnd, int rcvwnd);
 // get how many packet is waiting to be sent
 int ikcp_waitsnd(const ikcpcb *kcp);
 // fastest: ikcp_nodelay(kcp, 1, 20, 2, 1)
 // nodelay: 0:disable(default), 1:enable
 // interval: internal update timer interval in millisec, default is 100ms 
 // resend: 0:disable fast resend(default), 1:enable fast resend
 // nc: 0:normal congestion control(default), 1:disable congestion control
 int ikcp_nodelay(ikcpcb *kcp, int nodelay, int interval, int resend, int nc);
 void ikcp_log(ikcpcb *kcp, int mask, const char *fmt, ...);
 // setup allocator
 void ikcp_allocator(void* (*new_malloc)(size_t), void (*new_free)(void*));
 // read conv
 IUINT32 ikcp_getconv(const void *ptr);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/src/rtc/x_inner.cpp
+++ b/src/rtc/x_inner.cpp
@@ -41,8 +41,15 @@ int JoinConnection(PeerPtr *peer_ptr, const char *transmission_id,
  return 0;
 }
-int SendData(PeerPtr *peer_ptr, const char *data, size_t size) {
+int SendData(PeerPtr *peer_ptr, DATA_TYPE data_type, const char *data,
-  peer_ptr->peer_connection->SendData(data, size);
+             size_t size) {
  if (DATA_TYPE::VIDEO == data_type) {
    peer_ptr->peer_connection->SendVideoData(data, size);
  } else if (DATA_TYPE::AUDIO == data_type) {
    peer_ptr->peer_connection->SendAudioData(data, size);
  } else if (DATA_TYPE::USER == data_type) {
    peer_ptr->peer_connection->SendUserData(data, size);
  }
  return 0;
 }
--- a/src/transmission/ice_transmission.cpp
+++ b/src/transmission/ice_transmission.cpp
@@ -1,5 +1,6 @@
 #include "ice_transmission.h"
 #include <chrono>
 #include <map>
 #include <nlohmann/json.hpp>
 #include <thread>
@@ -7,7 +8,57 @@
 #include "common.h"
 #include "log.h"
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 #include <windows.h>
 #elif !defined(__unix)
 #define __unix
 #endif
 #ifdef __unix
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #endif
 using nlohmann::json;
 static int count = 1;
 static inline void itimeofday(long *sec, long *usec) {
 #if defined(__unix)
  struct timeval time;
  gettimeofday(&time, NULL);
  if (sec) *sec = time.tv_sec;
  if (usec) *usec = time.tv_usec;
 #else
  static long mode = 0, addsec = 0;
  BOOL retval;
  static IINT64 freq = 1;
  IINT64 qpc;
  if (mode == 0) {
    retval = QueryPerformanceFrequency((LARGE_INTEGER *)&freq);
    freq = (freq == 0) ? 1 : freq;
    retval = QueryPerformanceCounter((LARGE_INTEGER *)&qpc);
    addsec = (long)time(NULL);
    addsec = addsec - (long)((qpc / freq) & 0x7fffffff);
    mode = 1;
  }
  retval = QueryPerformanceCounter((LARGE_INTEGER *)&qpc);
  retval = retval * 2;
  if (sec) *sec = (long)(qpc / freq) + addsec;
  if (usec) *usec = (long)((qpc % freq) * 1000000 / freq);
 #endif
 }
 static inline IINT64 iclock64(void) {
  long s, u;
  IINT64 value;
  itimeofday(&s, &u);
  value = ((IINT64)s) * 1000 + (u / 1000);
  return value;
 }
 static inline IUINT32 iclock() { return (IUINT32)(iclock64() & 0xfffffffful); }
 const std::vector<std::string> ice_status = {
    "JUICE_STATE_DISCONNECTED", "JUICE_STATE_GATHERING",
@@ -31,9 +82,45 @@ IceTransmission::~IceTransmission() {
    delete ice_agent_;
    ice_agent_ = nullptr;
  }
  ikcp_release(kcp_);
 }
 int IceTransmission::InitIceTransmission(std::string &ip, int port) {
  kcp_ = ikcp_create(0x11223344, (void *)this);
  ikcp_setoutput(kcp_,
                 [](const char *buf, int len, ikcpcb *kcp, void *user) -> int {
                   IceTransmission *ice_transmission_obj =
                       static_cast<IceTransmission *>(user);
                   LOG_ERROR("Real send size: {}", len);
                   return ice_transmission_obj->ice_agent_->Send(buf, len);
                 });
  // ikcp_wndsize(kcp_, 1280, 1280);
  ikcp_nodelay(kcp_, 0, 40, 0, 0);
  ikcp_setmtu(kcp_, 4000);
  // kcp_->rx_minrto = 10;
  // kcp_->fastresend = 1;
  std::thread kcp_update_thread([this]() {
    while (1) {
      auto clock = std::chrono::duration_cast<std::chrono::milliseconds>(
                       std::chrono::system_clock::now().time_since_epoch())
                       .count();
      mtx_.lock();
      ikcp_update(kcp_, iclock());
      int len = 0;
      int total_len = 0;
      while (1) {
        len = ikcp_recv(kcp_, kcp_complete_buffer_ + len, 1400);
        total_len += len;
        if (len <= 0) break;
      }
      mtx_.unlock();
      std::this_thread::sleep_for(std::chrono::milliseconds(2));
    }
  });
  kcp_update_thread.detach();
  ice_agent_ = new IceAgent(ip, port);
  ice_agent_->CreateIceAgent(
@@ -43,6 +130,7 @@ int IceTransmission::InitIceTransmission(std::string &ip, int port) {
              static_cast<IceTransmission *>(user_ptr);
          LOG_INFO("[{}->{}] state_change: {}", ice_transmission_obj->user_id_,
                   ice_transmission_obj->remote_user_id_, ice_status[state]);
          ice_transmission_obj->state_ = state;
        } else {
          LOG_INFO("state_change: {}", ice_status[state]);
        }
@@ -74,9 +162,28 @@ int IceTransmission::InitIceTransmission(std::string &ip, int port) {
          IceTransmission *ice_transmission_obj =
              static_cast<IceTransmission *>(user_ptr);
          if (ice_transmission_obj->on_receive_ice_msg_cb_) {
-            ice_transmission_obj->on_receive_ice_msg_cb_(
+            LOG_ERROR("[{}] Receive size: {}", (void *)user_ptr, size);
-                data, size, ice_transmission_obj->remote_user_id_.data(),
+            ice_transmission_obj->mtx_.lock();
-                ice_transmission_obj->remote_user_id_.size());
+            int ret = ikcp_input(ice_transmission_obj->kcp_, data, size);
            // ikcp_update(ice_transmission_obj->kcp_, iclock());
            LOG_ERROR("ikcp_input {}", ret);
            // auto clock =
            //     std::chrono::duration_cast<std::chrono::milliseconds>(
            //         std::chrono::system_clock::now().time_since_epoch())
            //         .count();
            // ikcp_update(ice_transmission_obj->kcp_, clock);
            ice_transmission_obj->mtx_.unlock();
            // ice_transmission_obj->on_receive_ice_msg_cb_(
            //     ice_transmission_obj->kcp_complete_buffer_, total_len,
            //     ice_transmission_obj->remote_user_id_.data(),
            //     ice_transmission_obj->remote_user_id_.size());
            // ice_transmission_obj->on_receive_ice_msg_cb_(
            //     data, size, ice_transmission_obj->remote_user_id_.data(),
            //     ice_transmission_obj->remote_user_id_.size());
          }
        }
      },
@@ -167,6 +274,21 @@ int IceTransmission::SendAnswer() {
 }
 int IceTransmission::SendData(const char *data, size_t size) {
-  ice_agent_->Send(data, size);
+  if (JUICE_STATE_COMPLETED == state_) {
    LOG_ERROR("[{}] Wanna send size: {}", (void *)this, size);
    mtx_.lock();
    if (ikcp_waitsnd(kcp_) > kcp_->snd_wnd) {
      // LOG_ERROR("Skip frame");
      // mtx_.unlock();
      // return 0;
      ikcp_flush(kcp_);
    }
    int ret = ikcp_send(kcp_, data, size / 100);
    LOG_ERROR("ikcp_send {}, wnd [{} | {}]", ret, ikcp_waitsnd(kcp_),
              kcp_->snd_wnd);
    mtx_.unlock();
    // ice_agent_->Send(data, size);
  }
  return 0;
 }
--- a/src/transmission/ice_transmission.h
+++ b/src/transmission/ice_transmission.h
@@ -5,8 +5,8 @@
 #include "congestion_control.h"
 #include "ice_agent.h"
 #include "ikcp.h"
 #include "ws_transmission.h"
 class IceTransmission {
 public:
  IceTransmission(
@@ -61,6 +61,12 @@ class IceTransmission {
  std::string remote_user_id_ = "";
  bool offer_peer_ = true;
  std::string remote_ice_username_ = "";
  juice_state_t state_ = JUICE_STATE_DISCONNECTED;
 private:
  ikcpcb *kcp_ = nullptr;
  char kcp_complete_buffer_[2560 * 1440 * 4];
  std::mutex mtx_;
 };
 #endif
--- a/tests/peerconnection/guest.cpp
+++ b/tests/peerconnection/guest.cpp
@@ -4,10 +4,10 @@
 void GuestReceiveBuffer(const char* data, size_t size, const char* user_id,
                        size_t user_id_size) {
-  std::string msg(data, size);
+  // std::string msg(data, size);
-  std::string user(user_id, user_id_size);
+  // std::string user(user_id, user_id_size);
-  std::cout << "Receive: [" << user << "] " << msg << std::endl;
+  // std::cout << "Receive: [" << user << "] " << msg << std::endl;
 }
 int main(int argc, char** argv) {
@@ -26,12 +26,15 @@ int main(int argc, char** argv) {
  std::string msg = "Hello world";
  int i = 100;
-  while (i--) {
+  // while (i--) {
-    getchar();
+  //   getchar();
-    std::cout << "Send msg: " << msg << std::endl;
+  //   std::cout << "Send msg: " << msg << std::endl;
-    SendData(peer, msg.data(), msg.size());
+  //   SendData(peer, DATA_TYPE::USER, msg.data(), msg.size());
-  }
+  // }
-  getchar();
+  // getchar();
  while (1) {
  }
  return 0;
 }
--- a/tests/peerconnection/host.cpp
+++ b/tests/peerconnection/host.cpp
@@ -22,13 +22,16 @@ int main(int argc, char** argv) {
  std::string msg = "Hello world";
-  int i = 100;
+  // int i = 100;
-  while (i--) {
+  // while (i--) {
-    getchar();
+  //   getchar();
-    std::cout << "Send msg: " << msg << std::endl;
+  //   std::cout << "Send msg: " << msg << std::endl;
-    SendData(peer, msg.data(), msg.size());
+  //   SendData(peer, DATA_TYPE::USER, msg.data(), msg.size());
  // }
  // getchar();
  while (1) {
  }
  getchar();
  return 0;
 }
--- a/thirdparty/ffmpeg/xmake.lua
+++ b/thirdparty/ffmpeg/xmake.lua
@@ -32,7 +32,7 @@ package("ffmpeg")
        add_configs("hardcoded-tables", {description = "Enable hardcoded tables.", default = true, type = "boolean"})
    end
-    add_links("avfilter", "avdevice", "avformat", "avcodec", "swscale", "swresample", "avutil")
+    -- add_links("avfilter", "avdevice", "avformat", "avcodec", "swscale", "swresample", "avutil")
    if is_plat("macosx") then
        add_frameworks("CoreFoundation", "Foundation", "CoreVideo", "CoreMedia", "AudioToolbox", "VideoToolbox", "Security")
    elseif is_plat("linux") then
--- a/thirdparty/nvcodec/Interface/cuviddec.h
+++ b/thirdparty/nvcodec/Interface/cuviddec.h
--- a/thirdparty/nvcodec/Interface/nvEncodeAPI.h
+++ b/thirdparty/nvcodec/Interface/nvEncodeAPI.h
--- a/thirdparty/nvcodec/Interface/nvcuvid.h
+++ b/thirdparty/nvcodec/Interface/nvcuvid.h
@@ -0,0 +1,436 @@
 /*
 * This copyright notice applies to this header file only:
 *
 * Copyright (c) 2010-2020 NVIDIA Corporation
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the software, and to permit persons to whom the
 * software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 /********************************************************************************************************************/
 //! \file nvcuvid.h
 //!   NVDECODE API provides video decoding interface to NVIDIA GPU devices.
 //! \date 2015-2020
 //!  This file contains the interface constants, structure definitions and function prototypes.
 /********************************************************************************************************************/
 #if !defined(__NVCUVID_H__)
 #define __NVCUVID_H__
 #include "cuviddec.h"
 #if defined(__cplusplus)
 extern "C" {
 #endif /* __cplusplus */
 /***********************************************/
 //!
 //! High-level helper APIs for video sources
 //!
 /***********************************************/
 typedef void *CUvideosource;
 typedef void *CUvideoparser;
 typedef long long CUvideotimestamp;
 /************************************************************************/
 //! \enum cudaVideoState
 //! Video source state enums
 //! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
 /************************************************************************/
 typedef enum {
    cudaVideoState_Error   = -1,    /**< Error state (invalid source)                  */
    cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
    cudaVideoState_Started = 1      /**< Source is running and delivering data         */
 } cudaVideoState;
 /************************************************************************/
 //! \enum cudaAudioCodec
 //! Audio compression enums
 //! Used in CUAUDIOFORMAT structure
 /************************************************************************/
 typedef enum {
    cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio               */
    cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio               */
    cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio     */
    cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
    cudaAudioCodec_LPCM,            /**< PCM Audio                  */
    cudaAudioCodec_AAC,             /**< AAC Audio                  */
 } cudaAudioCodec;
 /************************************************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDEOFORMAT
 //! Video format
 //! Used in cuvidGetSourceVideoFormat API
 /************************************************************************************************/
 typedef struct
 {
    cudaVideoCodec codec;                   /**< OUT: Compression format          */
   /**
    * OUT: frame rate = numerator / denominator (for example: 30000/1001)
    */
    struct {
        /**< OUT: frame rate numerator   (0 = unspecified or variable frame rate) */
        unsigned int numerator;
        /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
        unsigned int denominator;
    } frame_rate;
    unsigned char progressive_sequence;     /**< OUT: 0=interlaced, 1=progressive                                      */
    unsigned char bit_depth_luma_minus8;    /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth   */
    unsigned char bit_depth_chroma_minus8;  /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
    unsigned char min_num_decode_surfaces;  /**< OUT: Minimum number of decode surfaces to be allocated for correct
                                                      decoding. The client can send this value in ulNumDecodeSurfaces
                                                      (in CUVIDDECODECREATEINFO structure).
                                                      This guarantees correct functionality and optimal video memory
                                                      usage but not necessarily the best performance, which depends on
                                                      the design of the overall application. The optimal number of
                                                      decode surfaces (in terms of performance and memory utilization)
                                                      should be decided by experimentation for each application, but it
                                                      cannot go below min_num_decode_surfaces.
                                                      If this value is used for ulNumDecodeSurfaces then it must be
                                                      returned to parser during sequence callback.                     */
    unsigned int coded_width;               /**< OUT: coded frame width in pixels                                      */
    unsigned int coded_height;              /**< OUT: coded frame height in pixels                                     */
   /**
    * area of the frame that should be displayed
    * typical example:
    * coded_width = 1920, coded_height = 1088
    * display_area = { 0,0,1920,1080 }
    */
    struct {
        int left;                           /**< OUT: left position of display rect    */
        int top;                            /**< OUT: top position of display rect     */
        int right;                          /**< OUT: right position of display rect   */
        int bottom;                         /**< OUT: bottom position of display rect  */
    } display_area;
    cudaVideoChromaFormat chroma_format;    /**< OUT:  Chroma format                   */
    unsigned int bitrate;                   /**< OUT: video bitrate (bps, 0=unknown)   */
   /**
    * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
    */
    struct {
        int x;
        int y;
    } display_aspect_ratio;
    /**
    * Video Signal Description
    * Refer section E.2.1 (VUI parameters semantics) of H264 spec file
    */
    struct {
        unsigned char video_format          : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified     */
        unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range           */
        unsigned char reserved_zero_bits    : 4; /**< Reserved bits                                                      */
        unsigned char color_primaries;           /**< OUT: chromaticity coordinates of source primaries                  */
        unsigned char transfer_characteristics;  /**< OUT: opto-electronic transfer characteristic of the source picture */
        unsigned char matrix_coefficients;       /**< OUT: used in deriving luma and chroma signals from RGB primaries   */
    } video_signal_description;
    unsigned int seqhdr_data_length;             /**< OUT: Additional bytes following (CUVIDEOFORMATEX)                  */
 } CUVIDEOFORMAT;
 /****************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDOPERATINGPOINTINFO
 //! Operating point information of scalable bitstream
 /****************************************************************/
 typedef struct 
 {
    cudaVideoCodec codec;
    union 
    {
        struct
        {
            unsigned char  operating_points_cnt;
            unsigned char  reserved24_bits[3];
            unsigned short operating_points_idc[32];
        } av1;
        unsigned char CodecReserved[1024];
    };
 } CUVIDOPERATINGPOINTINFO;
 /****************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDAV1SEQHDR
 //! AV1 specific sequence header information
 /****************************************************************/
 typedef struct {
    unsigned int max_width;
    unsigned int max_height;
    unsigned char reserved[1016];
 } CUVIDAV1SEQHDR;
 /****************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDEOFORMATEX
 //! Video format including raw sequence header information
 //! Used in cuvidGetSourceVideoFormat API
 /****************************************************************/
 typedef struct
 {
    CUVIDEOFORMAT format;                 /**< OUT: CUVIDEOFORMAT structure */
    union {
        CUVIDAV1SEQHDR av1;
        unsigned char raw_seqhdr_data[1024];  /**< OUT: Sequence header data    */
    };
 } CUVIDEOFORMATEX;
 /****************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUAUDIOFORMAT
 //! Audio formats
 //! Used in cuvidGetSourceAudioFormat API
 /****************************************************************/
 typedef struct
 {
    cudaAudioCodec codec;       /**< OUT: Compression format                                              */
    unsigned int channels;      /**< OUT: number of audio channels                                        */
    unsigned int samplespersec; /**< OUT: sampling frequency                                              */
    unsigned int bitrate;       /**< OUT: For uncompressed, can also be used to determine bits per sample */
    unsigned int reserved1;     /**< Reserved for future use                                              */
    unsigned int reserved2;     /**< Reserved for future use                                              */
 } CUAUDIOFORMAT;
 /***************************************************************/
 //! \enum CUvideopacketflags
 //! Data packet flags
 //! Used in CUVIDSOURCEDATAPACKET structure
 /***************************************************************/
 typedef enum {
    CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream                              */
    CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid                                                            */
    CUVID_PKT_DISCONTINUITY = 0x04,   /**< Set when a discontinuity has to be signalled                                  */
    CUVID_PKT_ENDOFPICTURE  = 0x08,   /**< Set when the packet contains exactly one frame or one field                   */
    CUVID_PKT_NOTIFY_EOS    = 0x10,   /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
                                           display callback will be invoked with null value of CUVIDPARSERDISPINFO which
                                           should be interpreted as end of the stream.                                   */
 } CUvideopacketflags;
 /*****************************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDSOURCEDATAPACKET
 //! Data Packet
 //! Used in cuvidParseVideoData API
 //! IN for cuvidParseVideoData
 /*****************************************************************************/
 typedef struct _CUVIDSOURCEDATAPACKET
 {
    unsigned long flags;            /**< IN: Combination of CUVID_PKT_XXX flags                              */
    unsigned long payload_size;     /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
    const unsigned char *payload;   /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
    CUvideotimestamp timestamp;     /**< IN: Presentation time stamp (10MHz clock), only valid if
                                             CUVID_PKT_TIMESTAMP flag is set                                 */
 } CUVIDSOURCEDATAPACKET;
 // Callback for packet delivery
 typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
 /**************************************************************************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDSOURCEPARAMS
 //! Describes parameters needed in cuvidCreateVideoSource API
 //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
 //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
 /**************************************************************************************************************************/
 typedef struct _CUVIDSOURCEPARAMS
 {
    unsigned int ulClockRate;                   /**< IN: Time stamp units in Hz (0=default=10000000Hz)      */
    unsigned int bAnnexb : 1;                   /**< IN: AV1 annexB stream                                  */
    unsigned int uReserved : 31;                /**< Reserved for future use - set to zero                  */
    unsigned int uReserved1[6];                 /**< Reserved for future use - set to zero                  */
    void *pUserData;                            /**< IN: User private data passed in to the data handlers   */
    PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< IN: Called to deliver video packets                    */
    PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< IN: Called to deliver audio packets.                   */
    void *pvReserved2[8];                       /**< Reserved for future use - set to NULL                  */
 } CUVIDSOURCEPARAMS;
 /**********************************************/
 //! \ingroup ENUMS
 //! \enum CUvideosourceformat_flags
 //! CUvideosourceformat_flags
 //! Used in cuvidGetSourceVideoFormat API
 /**********************************************/
 typedef enum {
    CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
 } CUvideosourceformat_flags;
 #if !defined(__APPLE__)
 /***************************************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
 //! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks: 
 //! pfnVideoDataHandler() and pfnAudioDataHandler()
 //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 
 //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
 /***************************************************************************************************************************/
 CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
 /***************************************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
 //! Create video source
 /***************************************************************************************************************************/
 CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
 /********************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
 //! Destroy video source
 /********************************************************************/
 CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
 /******************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
 //! Set video source state to:
 //! cudaVideoState_Started - to signal the source to run and deliver data
 //! cudaVideoState_Stopped - to stop the source from delivering the data
 //! cudaVideoState_Error   - invalid source
 /******************************************************************************************/
 CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
 /******************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
 //! Get video source state
 //! Returns:
 //! cudaVideoState_Started - if Source is running and delivering data
 //! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
 //! cudaVideoState_Error   - if Source is in error state
 /******************************************************************************************/
 cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
 /******************************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
 //! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
 /******************************************************************************************************************/
 CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
 /**************************************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
 //! Get audio source format
 //! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported 
 //! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
 /**************************************************************************************************************************/
 CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
 #endif
 /**********************************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDPARSERDISPINFO
 //! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
 /**********************************************************************************/
 typedef struct _CUVIDPARSERDISPINFO
 {
    int picture_index;          /**< OUT: Index of the current picture                                                         */
    int progressive_frame;      /**< OUT: 1 if progressive frame; 0 otherwise                                                  */
    int top_field_first;        /**< OUT: 1 if top field is displayed first; 0 otherwise                                       */
    int repeat_first_field;     /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, 
                                     -1=unpaired field)                                                                        */
    CUvideotimestamp timestamp; /**< OUT: Presentation time stamp                                                              */
 } CUVIDPARSERDISPINFO;
 /***********************************************************************************************************************/
 //! Parser callbacks
 //! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture
 //! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS
 //! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by
 //! cuvidParseVideoData() to the application.
 //! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is 
 //! -1 or invalid operating point.
 //! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces
 //! while creating parser)
 //! PFNVIDDECODECALLBACK   : 0: fail, >=1: succeeded
 //! PFNVIDDISPLAYCALLBACK  : 0: fail, >=1: succeeded
 //! PFNVIDOPPOINTCALLBACK  : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
 /***********************************************************************************************************************/
 typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
 typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
 typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
 typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
 /**************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDPARSERPARAMS
 //! Used in cuvidCreateVideoParser API
 /**************************************/
 typedef struct _CUVIDPARSERPARAMS
 {
    cudaVideoCodec CodecType;                   /**< IN: cudaVideoCodec_XXX                                                  */
    unsigned int ulMaxNumDecodeSurfaces;        /**< IN: Max # of decode surfaces (parser will cycle through these)          */
    unsigned int ulClockRate;                   /**< IN: Timestamp units in Hz (0=default=10000000Hz)                        */
    unsigned int ulErrorThreshold;              /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always 
                                                     IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
    unsigned int ulMaxDisplayDelay;             /**< IN: Max display queue delay (improves pipelining of decode with display)
                                                         0=no delay (recommended values: 2..4)                               */
    unsigned int bAnnexb : 1;                   /**< IN: AV1 annexB stream                                                   */
    unsigned int uReserved : 31;                /**< Reserved for future use - set to zero                                   */
    unsigned int uReserved1[4];                 /**< IN: Reserved for future use - set to 0                                  */
    void *pUserData;                            /**< IN: User data for callbacks                                             */
    PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
    PFNVIDDECODECALLBACK pfnDecodePicture;      /**< IN: Called when a picture is ready to be decoded (decode order)         */
    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< IN: Called whenever a picture is ready to be displayed (display order)  */
    PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 
                                                         scalable bitstream                                                  */
    void *pvReserved2[6];                       /**< Reserved for future use - set to NULL                                   */
    CUVIDEOFORMATEX *pExtVideoInfo;             /**< IN: [Optional] sequence header data from system layer                   */
 } CUVIDPARSERPARAMS;
 /************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
 //! Create video parser object and initialize
 /************************************************************************************************/
 CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
 /************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
 //! Parse the video data from source data packet in pPacket 
 //! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and 
 //! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
 //! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
 //! the decoder encounters a video format change
 //! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
 /************************************************************************************************/
 CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
 /************************************************************************************************/
 //! \ingroup FUNCTS
 //! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
 //! Destroy the video parser
 /************************************************************************************************/
 CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
 /**********************************************************************************************/
 #if defined(__cplusplus)
 }
 #endif /* __cplusplus */
 #endif // __NVCUVID_H__
--- a/thirdparty/nvcodec/Lib/Win32/nvcuvid.lib
+++ b/thirdparty/nvcodec/Lib/Win32/nvcuvid.lib
--- a/thirdparty/nvcodec/Lib/Win32/nvencodeapi.lib
+++ b/thirdparty/nvcodec/Lib/Win32/nvencodeapi.lib
--- a/thirdparty/nvcodec/Lib/linux/stubs/aarch64/libnvcuvid.so
+++ b/thirdparty/nvcodec/Lib/linux/stubs/aarch64/libnvcuvid.so
--- a/thirdparty/nvcodec/Lib/linux/stubs/aarch64/libnvidia-encode.so
+++ b/thirdparty/nvcodec/Lib/linux/stubs/aarch64/libnvidia-encode.so
--- a/thirdparty/nvcodec/Lib/linux/stubs/ppc64le/libnvcuvid.so
+++ b/thirdparty/nvcodec/Lib/linux/stubs/ppc64le/libnvcuvid.so
--- a/thirdparty/nvcodec/Lib/linux/stubs/ppc64le/libnvidia-encode.so
+++ b/thirdparty/nvcodec/Lib/linux/stubs/ppc64le/libnvidia-encode.so
--- a/thirdparty/nvcodec/Lib/linux/stubs/x86_64/libnvcuvid.so
+++ b/thirdparty/nvcodec/Lib/linux/stubs/x86_64/libnvcuvid.so
--- a/thirdparty/nvcodec/Lib/linux/stubs/x86_64/libnvidia-encode.so
+++ b/thirdparty/nvcodec/Lib/linux/stubs/x86_64/libnvidia-encode.so
--- a/thirdparty/nvcodec/Lib/x64/nvcuvid.lib
+++ b/thirdparty/nvcodec/Lib/x64/nvcuvid.lib
--- a/thirdparty/nvcodec/Lib/x64/nvencodeapi.lib
+++ b/thirdparty/nvcodec/Lib/x64/nvencodeapi.lib
--- a/thirdparty/nvcodec/Samples/CMakeLists.txt
+++ b/thirdparty/nvcodec/Samples/CMakeLists.txt
@@ -0,0 +1,111 @@
 # Copyright 2020 NVIDIA Corporation.  All rights reserved.
 #
 # Please refer to the NVIDIA end user license agreement (EULA) associated
 # with this source code for terms and conditions that govern your use of
 # this software. Any use, reproduction, disclosure, or distribution of
 # this software and related documentation outside the terms of the EULA
 # is strictly prohibited.
 # 3.7 is required for FindVulkan module support in CMake.
 cmake_minimum_required(VERSION 3.7)
 project(NvCodec)
 # Set C++11 for all projects and disable non-standard extensions
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(CMAKE_INSTALL_PREFIX .)
 set(NVCODEC_PUBLIC_INTERFACE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../Interface)
 set(NVCODEC_UTILS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Utils)
 set(NV_CODEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec)
 set(NV_ENC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvEncoder)
 set(NV_DEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvDecoder)
 set(NV_APPENC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppEncode/Common)
 set(NV_APPDEC_COMMON_DIR  ${CMAKE_CURRENT_SOURCE_DIR}/AppDecode/Common)
 if(CMAKE_SIZEOF_VOID_P EQUAL 8)
    set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
 else()
    set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
 endif()
 if(WIN32)
    if(CMAKE_SIZEOF_VOID_P EQUAL 8)
        Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvcuvid.lib)
        set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvencodeapi.lib)
    else()
        Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvcuvid.lib)
        set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvencodeapi.lib)
    endif()
 else ()
    find_library(CUVID_LIB nvcuvid)
    find_library(NVENCODEAPI_LIB nvidia-encode)
 endif()
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
    find_package(PkgConfig REQUIRED)
    pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
    pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
    pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
    pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
    set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
    find_library(AVCODEC_LIBRARY NAMES avcodec
            HINTS
            ${PC_AVCODEC_LIBDIR}
            ${PC_AVCODEC_LIBRARY_DIRS}
            )
    find_library(AVFORMAT_LIBRARY NAMES avformat
            HINTS
            ${PC_AVFORMAT_LIBDIR}
            ${PC_AVFORMAT_LIBRARY_DIRS}
            )
    find_library(AVUTIL_LIBRARY NAMES avutil
            HINTS
            ${PC_AVUTIL_LIBDIR}
            ${PC_AVUTIL_LIBRARY_DIRS}
            )
    find_library(SWRESAMPLE_LIBRARY NAMES swresample
            HINTS
            ${PC_SWRESAMPLE_LIBDIR}
            ${PC_SWRESAMPLE_LIBRARY_DIRS}
            )
    set(AVCODEC_LIB ${AVCODEC_LIBRARY})
    set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
    set(AVUTIL_LIB ${AVUTIL_LIBRARY})
    set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
 endif()
 if(WIN32)
    add_subdirectory(AppEncode/AppEncD3D11)
    add_subdirectory(AppEncode/AppEncD3D9)
    add_subdirectory(AppDecode/AppDecD3D)
 else ()
    #Need only linux Makefile for this
    add_subdirectory(AppEncode/AppEncGL)
 endif()
 add_subdirectory(AppEncode/AppEncCuda)
 add_subdirectory(AppEncode/AppEncDec)
 add_subdirectory(AppEncode/AppEncLowLatency)
 add_subdirectory(AppEncode/AppEncME)
 add_subdirectory(AppEncode/AppEncPerf)
 add_subdirectory(AppEncode/AppEncQual)
 add_subdirectory(AppEncode/AppMotionEstimationVkCuda)
 add_subdirectory(AppTranscode/AppTrans)
 add_subdirectory(AppTranscode/AppTransOneToN)
 add_subdirectory(AppTranscode/AppTransPerf)
 add_subdirectory(AppDecode/AppDec)
 add_subdirectory(AppDecode/AppDecGL)
 add_subdirectory(AppDecode/AppDecImageProvider)
 add_subdirectory(AppDecode/AppDecLowLatency)
 add_subdirectory(AppDecode/AppDecMem)
 add_subdirectory(AppDecode/AppDecMultiFiles)
 add_subdirectory(AppDecode/AppDecMultiInput)
 add_subdirectory(AppDecode/AppDecPerf)
--- a/thirdparty/nvcodec/Samples/Utils/BitDepth.cu
+++ b/thirdparty/nvcodec/Samples/Utils/BitDepth.cu
@@ -0,0 +1,54 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include <cuda_runtime.h>
 #include <stdint.h>
 #include <stdio.h>
 static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x,
        y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x >= nWidth || y >= nHeight)
    {
        return;
    }
    int destStrideInPixels = nDestPitch / (sizeof(uint16_t));
    *(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] };
 }
 static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x,
        y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x >= nWidth || y >= nHeight)
    {
        return;
    }
    int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t));
    dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y;
 }
 void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
 {
    dim3 blockSize(16, 16, 1);
    dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
    ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight);
 }
 void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
 {
    dim3 blockSize(16, 16, 1);
    dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
    ConvertUInt16ToUInt8Kernel <<<gridSize, blockSize >>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight);
 }
--- a/thirdparty/nvcodec/Samples/Utils/ColorSpace.cu
+++ b/thirdparty/nvcodec/Samples/Utils/ColorSpace.cu
@@ -0,0 +1,399 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include "ColorSpace.h"
 __constant__ float matYuv2Rgb[3][3];
 __constant__ float matRgb2Yuv[3][3];
 void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) {
    black = 16; white = 235;
    max = 255;
    switch (iMatrix)
    {
    case ColorSpaceStandard_BT709:
    default:
        wr = 0.2126f; wb = 0.0722f;
        break;
    case ColorSpaceStandard_FCC:
        wr = 0.30f; wb = 0.11f;
        break;
    case ColorSpaceStandard_BT470:
    case ColorSpaceStandard_BT601:
        wr = 0.2990f; wb = 0.1140f;
        break;
    case ColorSpaceStandard_SMPTE240M:
        wr = 0.212f; wb = 0.087f;
        break;
    case ColorSpaceStandard_BT2020:
    case ColorSpaceStandard_BT2020C:
        wr = 0.2627f; wb = 0.0593f;
        // 10-bit only
        black = 64 << 6; white = 940 << 6;
        max = (1 << 16) - 1;
        break;
    }
 }
 void SetMatYuv2Rgb(int iMatrix) {
    float wr, wb;
    int black, white, max;
    GetConstants(iMatrix, wr, wb, black, white, max);
    float mat[3][3] = {
        1.0f, 0.0f, (1.0f - wr) / 0.5f,
        1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
        1.0f, (1.0f - wb) / 0.5f, 0.0f,
    };
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 3; j++) {
            mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
        }
    }
    cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat));
 }
 void SetMatRgb2Yuv(int iMatrix) {
    float wr, wb;
    int black, white, max;
    GetConstants(iMatrix, wr, wb, black, white, max);
    float mat[3][3] = {
        wr, 1.0f - wb - wr, wb,
        -0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
        0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
    };
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 3; j++) {
            mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
        }
    }
    cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat));
 }
 template<class T>
 __device__ static T Clamp(T x, T lower, T upper) {
    return x < lower ? lower : (x > upper ? upper : x);
 }
 template<class Rgb, class YuvUnit>
 __device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
    const int 
        low = 1 << (sizeof(YuvUnit) * 8 - 4),
        mid = 1 << (sizeof(YuvUnit) * 8 - 1);
    float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
    const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
    YuvUnit 
        r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf),
        g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf),
        b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf);
    Rgb rgb{};
    const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
    if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
        rgb.c.r = r >> nShift;
        rgb.c.g = g >> nShift;
        rgb.c.b = b >> nShift;
    } else {
        rgb.c.r = r << nShift;
        rgb.c.g = g << nShift;
        rgb.c.b = b << nShift;
    }
    return rgb;
 }
 template<class YuvUnitx2, class Rgb, class RgbIntx2>
 __global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
    if (x + 1 >= nWidth || y + 1 >= nHeight) {
        return;
    }
    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
    uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
    YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
    YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
    *(RgbIntx2 *)pDst = RgbIntx2 {
        YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
        YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
    };
    *(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 {
        YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d, 
        YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
    };
 }
 template<class YuvUnitx2, class Rgb, class RgbIntx2>
 __global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
    int y = (threadIdx.y + blockIdx.y * blockDim.y);
    if (x + 1 >= nWidth || y  >= nHeight) {
        return;
    }
    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
    uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
    YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
    YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
    *(RgbIntx2 *)pDst = RgbIntx2{
        YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
        YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
    };
 }
 template<class YuvUnitx2, class Rgb, class RgbUnitx2>
 __global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
    if (x + 1 >= nWidth || y + 1 >= nHeight) {
        return;
    }
    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
    YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
    YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
    Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
        rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
        rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
        rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
    uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
    *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
    *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
    pDst += nRgbpPitch * nHeight;
    *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
    *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
    pDst += nRgbpPitch * nHeight;
    *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
    *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
 }
 template<class YuvUnitx2, class Rgb, class RgbUnitx2>
 __global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
    int y = (threadIdx.y + blockIdx.y * blockDim.y);
    if (x + 1 >= nWidth || y >= nHeight) {
        return;
    }
    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
    YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
    YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
    Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
        rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
    uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
    *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
    pDst += nRgbpPitch * nHeight;
    *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
    pDst += nRgbpPitch * nHeight;
    *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
 }
 template <class COLOR32>
 void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbKernel<uchar2, COLOR32, uint2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR64>
 void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbKernel<uchar2, COLOR64, ulonglong2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    Yuv444ToRgbKernel<uchar2, COLOR32, uint2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR64>
 void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    Yuv444ToRgbKernel<uchar2, COLOR64, ulonglong2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbKernel<ushort2, COLOR32, uint2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR64>
 void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbKernel<ushort2, COLOR64, ulonglong2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    Yuv444ToRgbKernel<ushort2, COLOR32, uint2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR64>
 void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    Yuv444ToRgbKernel<ushort2, COLOR64, ulonglong2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
        (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
 }
 template <class COLOR32>
 void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
    SetMatYuv2Rgb(iMatrix);
    Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
        << <dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >> >
        (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
 }
 // Explicit Instantiation
 template void Nv12ToColor32<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void Nv12ToColor32<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void Nv12ToColor64<BGRA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void Nv12ToColor64<RGBA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void P016ToColor32<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void P016ToColor32<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void P016ToColor64<BGRA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void P016ToColor64<RGBA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444P16ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444P16ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444P16ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444P16ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
 template void Nv12ToColorPlanar<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void Nv12ToColorPlanar<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void P016ToColorPlanar<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void P016ToColorPlanar<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
 template<class YuvUnit, class RgbUnit>
 __device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
    const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
    return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low;
 }
 template<class YuvUnit, class RgbUnit>
 __device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
    const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
    return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid;
 }
 template<class YuvUnit, class RgbUnit>
 __device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
    const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
    return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid;
 }
 template<class YuvUnitx2, class Rgb, class RgbIntx2>
 __global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) {
    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
    if (x + 1 >= nWidth || y + 1 >= nHeight) {
        return;
    }
    uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
    RgbIntx2 int2a = *(RgbIntx2 *)pSrc;
    RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch);
    Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
    decltype(Rgb::c.r)
        r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
        g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
        b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
    uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
    *(YuvUnitx2 *)pDst = YuvUnitx2 {
        RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
        RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
    };
    *(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 {
        RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
        RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
    };
    *(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 {
        RgbToU<decltype(YuvUnitx2::x)>(r, g, b), 
        RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
    };
 }
 void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) {
    SetMatRgb2Yuv(iMatrix);
    RgbToYuvKernel<ushort2, BGRA64, ulonglong2>
        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
        (dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight);
 }
--- a/thirdparty/nvcodec/Samples/Utils/ColorSpace.h
+++ b/thirdparty/nvcodec/Samples/Utils/ColorSpace.h
@@ -0,0 +1,48 @@
 #pragma once
 #include <stdint.h>
 #include <cuda_runtime.h>
 typedef enum ColorSpaceStandard {
    ColorSpaceStandard_BT709 = 1,
    ColorSpaceStandard_Unspecified = 2,
    ColorSpaceStandard_Reserved = 3,
    ColorSpaceStandard_FCC = 4,
    ColorSpaceStandard_BT470 = 5,
    ColorSpaceStandard_BT601 = 6,
    ColorSpaceStandard_SMPTE240M = 7,
    ColorSpaceStandard_YCgCo = 8,
    ColorSpaceStandard_BT2020 = 9,
    ColorSpaceStandard_BT2020C = 10
 } ColorSpaceStandard;
 union BGRA32 {
    uint32_t d;
    uchar4 v;
    struct {
        uint8_t b, g, r, a;
    } c;
 };
 union RGBA32 {
    uint32_t d;
    uchar4 v;
    struct {
        uint8_t r, g, b, a;
    } c;
 };
 union BGRA64 {
    uint64_t d;
    ushort4 v;
    struct {
        uint16_t b, g, r, a;
    } c;
 };
 union RGBA64 {
    uint64_t d;
    ushort4 v;
    struct {
        uint16_t r, g, b, a;
    } c;
 };
--- a/thirdparty/nvcodec/Samples/Utils/FFmpegDemuxer.h
+++ b/thirdparty/nvcodec/Samples/Utils/FFmpegDemuxer.h
@@ -0,0 +1,357 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #pragma once
 extern "C" {
 #include <libavformat/avformat.h>
 #include <libavformat/avio.h>
 #include <libavcodec/avcodec.h>
 }
 #include "NvCodecUtils.h"
 //---------------------------------------------------------------------------
 //! \file FFmpegDemuxer.h 
 //! \brief Provides functionality for stream demuxing
 //!
 //! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it. 
 //---------------------------------------------------------------------------
 /**
 * @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
 */
 class FFmpegDemuxer {
 private:
    AVFormatContext *fmtc = NULL;
    AVIOContext *avioc = NULL;
    AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
    AVBSFContext *bsfc = NULL;
    int iVideoStream;
    bool bMp4H264, bMp4HEVC, bMp4MPEG4;
    AVCodecID eVideoCodec;
    AVPixelFormat eChromaFormat;
    int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
    double timeBase = 0.0;
    int64_t userTimeScale = 0; 
    uint8_t *pDataWithHeader = NULL;
    unsigned int frameCount = 0;
 public:
    class DataProvider {
    public:
        virtual ~DataProvider() {}
        virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
    };
 private:
    /**
    *   @brief  Private constructor to initialize libavformat resources.
    *   @param  fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
    */
    FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
        if (!fmtc) {
            LOG(ERROR) << "No AVFormatContext provided.";
            return;
        }
        LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
        ck(avformat_find_stream_info(fmtc, NULL));
        iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
        if (iVideoStream < 0) {
            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file";
            return;
        }
        //fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
        eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
        nWidth = fmtc->streams[iVideoStream]->codecpar->width;
        nHeight = fmtc->streams[iVideoStream]->codecpar->height;
        eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
        AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
        timeBase = av_q2d(rTimeBase);
        userTimeScale = timeScale;
        // Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
        switch (eChromaFormat)
        {
        case AV_PIX_FMT_YUV420P10LE:
        case AV_PIX_FMT_GRAY10LE:   // monochrome is treated as 420 with chroma filled with 0x0
            nBitDepth = 10;
            nChromaHeight = (nHeight + 1) >> 1;
            nBPP = 2;
            break;
        case AV_PIX_FMT_YUV420P12LE:
            nBitDepth = 12;
            nChromaHeight = (nHeight + 1) >> 1;
            nBPP = 2;
            break;
        case AV_PIX_FMT_YUV444P10LE:
            nBitDepth = 10;
            nChromaHeight = nHeight << 1;
            nBPP = 2;
            break;
        case AV_PIX_FMT_YUV444P12LE:
            nBitDepth = 12;
            nChromaHeight = nHeight << 1;
            nBPP = 2;
            break;
        case AV_PIX_FMT_YUV444P:
            nBitDepth = 8;
            nChromaHeight = nHeight << 1;
            nBPP = 1;
            break;
        case AV_PIX_FMT_YUV420P:
        case AV_PIX_FMT_YUVJ420P:
        case AV_PIX_FMT_YUVJ422P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
        case AV_PIX_FMT_YUVJ444P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
        case AV_PIX_FMT_GRAY8:      // monochrome is treated as 420 with chroma filled with 0x0
            nBitDepth = 8;
            nChromaHeight = (nHeight + 1) >> 1;
            nBPP = 1;
            break;
        default:
            LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
            eChromaFormat = AV_PIX_FMT_YUV420P;
            nBitDepth = 8;
            nChromaHeight = (nHeight + 1) >> 1;
            nBPP = 1;
        }
        bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (
                !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 
                || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 
                || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
            );
        bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (
                !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
                || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
                || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
            );
        bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (
                !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
                || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
                || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
            );
        //Initialize packet fields with default values
        av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;
        av_init_packet(&pktFiltered);
        pktFiltered.data = NULL;
        pktFiltered.size = 0;
        // Initialize bitstream filter and its required resources
        if (bMp4H264) {
            const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
            if (!bsf) {
                LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
                return;
            }
            ck(av_bsf_alloc(bsf, &bsfc));
            avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
            ck(av_bsf_init(bsfc));
        }
        if (bMp4HEVC) {
            const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
            if (!bsf) {
                LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
                return;
            }
            ck(av_bsf_alloc(bsf, &bsfc));
            avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
            ck(av_bsf_init(bsfc));
        }
    }
    AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
        AVFormatContext *ctx = NULL;
        if (!(ctx = avformat_alloc_context())) {
            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
            return NULL;
        }
        uint8_t *avioc_buffer = NULL;
        int avioc_buffer_size = 8 * 1024 * 1024;
        avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
        if (!avioc_buffer) {
            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
            return NULL;
        }
        avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
            0, pDataProvider, &ReadPacket, NULL, NULL);
        if (!avioc) {
            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
            return NULL;
        }
        ctx->pb = avioc;
        ck(avformat_open_input(&ctx, NULL, NULL, NULL));
        return ctx;
    }
    /**
    *   @brief  Allocate and return AVFormatContext*.
    *   @param  szFilePath - Filepath pointing to input stream.
    *   @return Pointer to AVFormatContext
    */
     AVFormatContext *CreateFormatContext(const char *szFilePath) {
        avformat_network_init();
        AVFormatContext *ctx = NULL;
        ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
        return ctx;
    }
 public:
    FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
    FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;}
    ~FFmpegDemuxer() {
        if (!fmtc) {
            return;
        }
        if (pkt.data) {
            av_packet_unref(&pkt);
        }
        if (pktFiltered.data) {
            av_packet_unref(&pktFiltered);
        }
        if (bsfc) {
            av_bsf_free(&bsfc);
        }
        avformat_close_input(&fmtc);
        if (avioc) {
            av_freep(&avioc->buffer);
            av_freep(&avioc);
        }
        if (pDataWithHeader) {
            av_free(pDataWithHeader);
        }
    }
    AVCodecID GetVideoCodec() {
        return eVideoCodec;
    }
    AVPixelFormat GetChromaFormat() {
        return eChromaFormat;
    }
    int GetWidth() {
        return nWidth;
    }
    int GetHeight() {
        return nHeight;
    }
    int GetBitDepth() {
        return nBitDepth;
    }
    int GetFrameSize() {
        return nWidth * (nHeight + nChromaHeight) * nBPP;
    }
    bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
        if (!fmtc) {
            return false;
        }
        *pnVideoBytes = 0;
        if (pkt.data) {
            av_packet_unref(&pkt);
        }
        int e = 0;
        while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) {
            av_packet_unref(&pkt);
        }
        if (e < 0) {
            return false;
        }
        if (bMp4H264 || bMp4HEVC) {
            if (pktFiltered.data) {
                av_packet_unref(&pktFiltered);
            }
            ck(av_bsf_send_packet(bsfc, &pkt));
            ck(av_bsf_receive_packet(bsfc, &pktFiltered));
            *ppVideo = pktFiltered.data;
            *pnVideoBytes = pktFiltered.size;
            if (pts)
                *pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase);
        } else {
            if (bMp4MPEG4 && (frameCount == 0)) {
                int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
                if (extraDataSize > 0) {
                    // extradata contains start codes 00 00 01. Subtract its size
                    pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t));
                    if (!pDataWithHeader) {
                        LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
                        return false;
                    }
                    memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
                    memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t));
                    *ppVideo = pDataWithHeader;
                    *pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t);
                }
            } else {
                *ppVideo = pkt.data;
                *pnVideoBytes = pkt.size;
            }
            if (pts)
                *pts = (int64_t)(pkt.pts * userTimeScale * timeBase);
        }
        frameCount++;
        return true;
    }
    static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
        return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
    }
 };
 inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
    switch (id) {
    case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1;
    case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2;
    case AV_CODEC_ID_MPEG4      : return cudaVideoCodec_MPEG4;
    case AV_CODEC_ID_WMV3       :
    case AV_CODEC_ID_VC1        : return cudaVideoCodec_VC1;
    case AV_CODEC_ID_H264       : return cudaVideoCodec_H264;
    case AV_CODEC_ID_HEVC       : return cudaVideoCodec_HEVC;
    case AV_CODEC_ID_VP8        : return cudaVideoCodec_VP8;
    case AV_CODEC_ID_VP9        : return cudaVideoCodec_VP9;
    case AV_CODEC_ID_MJPEG      : return cudaVideoCodec_JPEG;
    case AV_CODEC_ID_AV1        : return cudaVideoCodec_AV1;
    default                     : return cudaVideoCodec_NumCodecs;
    }
 }
--- a/thirdparty/nvcodec/Samples/Utils/FFmpegStreamer.h
+++ b/thirdparty/nvcodec/Samples/Utils/FFmpegStreamer.h
@@ -0,0 +1,109 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #pragma once
 #include <thread>
 #include <mutex>
 extern "C" {
 #include <libavformat/avformat.h>
 #include <libavutil/opt.h>
 #include <libswresample/swresample.h>
 };
 #include "Logger.h"
 extern simplelogger::Logger *logger;
 class FFmpegStreamer {
 private:
    AVFormatContext *oc = NULL;
    AVStream *vs = NULL;
    int nFps = 0;
 public:
    FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
        avformat_network_init();
        oc = avformat_alloc_context();
        if (!oc) {
            LOG(ERROR) << "FFMPEG: avformat_alloc_context error";
            return;
        }
        // Set format on oc
        AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL);
        if (!fmt) {
            LOG(ERROR) << "Invalid format";
            return;
        }
        fmt->video_codec = eCodecId;
        oc->oformat = fmt;
        oc->url = av_strdup(szInFilePath);
        LOG(INFO) << "Streaming destination: " << oc->url;
        // Add video stream to oc
        vs = avformat_new_stream(oc, NULL);
        if (!vs) {
            LOG(ERROR) << "FFMPEG: Could not alloc video stream";
            return;
        }
        vs->id = 0;
        // Set video parameters
        AVCodecParameters *vpar = vs->codecpar;
        vpar->codec_id = fmt->video_codec;
        vpar->codec_type = AVMEDIA_TYPE_VIDEO;
        vpar->width = nWidth;
        vpar->height = nHeight;
        // Everything is ready. Now open the output stream.
        if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
            LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
            return ;
        }
        // Write the container header
        if (avformat_write_header(oc, NULL)) {
            LOG(ERROR) << "FFMPEG: avformat_write_header error!";
            return;
        }
    }
    ~FFmpegStreamer() {
        if (oc) {
            av_write_trailer(oc);
            avio_close(oc->pb);
            avformat_free_context(oc);
        }
    }
    bool Stream(uint8_t *pData, int nBytes, int nPts) {
        AVPacket pkt = {0};
        av_init_packet(&pkt);
        pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base);
        // No B-frames
        pkt.dts = pkt.pts;
        pkt.stream_index = vs->index;
        pkt.data = pData;
        pkt.size = nBytes;
        if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
            pkt.flags |= AV_PKT_FLAG_KEY;
        }
        // Write the compressed frame into the output
        int ret = av_write_frame(oc, &pkt);
        av_write_frame(oc, NULL);
        if (ret < 0) {
            LOG(ERROR) << "FFMPEG: Error while writing video frame";
        }
        return true;
    }
 };
--- a/thirdparty/nvcodec/Samples/Utils/NvCodecUtils.h
+++ b/thirdparty/nvcodec/Samples/Utils/NvCodecUtils.h
@@ -0,0 +1,490 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 //---------------------------------------------------------------------------
 //! \file NvCodecUtils.h
 //! \brief Miscellaneous classes and error checking functions.
 //!
 //! Used by Transcode/Encode samples apps for reading input files,
 //! mutithreading, performance measurement or colorspace conversion while
 //! decoding.
 //---------------------------------------------------------------------------
 #pragma once
 #include <assert.h>
 #include <stdint.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <chrono>
 #include <condition_variable>
 #include <fstream>
 #include <iomanip>
 #include <ios>
 #include <list>
 #include <sstream>
 #include <thread>
 #ifdef __cuda_cuda_h__
 inline bool check(CUresult e, int iLine, const char *szFile) {
  if (e != CUDA_SUCCESS) {
    const char *szErrName = NULL;
    cuGetErrorName(e, &szErrName);
    return false;
  }
  return true;
 }
 #endif
 #ifdef __CUDA_RUNTIME_H__
 inline bool check(cudaError_t e, int iLine, const char *szFile) {
  if (e != cudaSuccess) {
    return false;
  }
  return true;
 }
 #endif
 #ifdef _NV_ENCODEAPI_H_
 inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
  const char *aszErrName[] = {
      "NV_ENC_SUCCESS",
      "NV_ENC_ERR_NO_ENCODE_DEVICE",
      "NV_ENC_ERR_UNSUPPORTED_DEVICE",
      "NV_ENC_ERR_INVALID_ENCODERDEVICE",
      "NV_ENC_ERR_INVALID_DEVICE",
      "NV_ENC_ERR_DEVICE_NOT_EXIST",
      "NV_ENC_ERR_INVALID_PTR",
      "NV_ENC_ERR_INVALID_EVENT",
      "NV_ENC_ERR_INVALID_PARAM",
      "NV_ENC_ERR_INVALID_CALL",
      "NV_ENC_ERR_OUT_OF_MEMORY",
      "NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
      "NV_ENC_ERR_UNSUPPORTED_PARAM",
      "NV_ENC_ERR_LOCK_BUSY",
      "NV_ENC_ERR_NOT_ENOUGH_BUFFER",
      "NV_ENC_ERR_INVALID_VERSION",
      "NV_ENC_ERR_MAP_FAILED",
      "NV_ENC_ERR_NEED_MORE_INPUT",
      "NV_ENC_ERR_ENCODER_BUSY",
      "NV_ENC_ERR_EVENT_NOT_REGISTERD",
      "NV_ENC_ERR_GENERIC",
      "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
      "NV_ENC_ERR_UNIMPLEMENTED",
      "NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
      "NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
      "NV_ENC_ERR_RESOURCE_NOT_MAPPED",
  };
  if (e != NV_ENC_SUCCESS) {
    return false;
  }
  return true;
 }
 #endif
 #ifdef _WINERROR_
 inline bool check(HRESULT e, int iLine, const char *szFile) {
  if (e != S_OK) {
    std::stringstream stream;
    stream << std::hex << std::uppercase << e;
    return false;
  }
  return true;
 }
 #endif
 #if defined(__gl_h_) || defined(__GL_H__)
 inline bool check(GLenum e, int iLine, const char *szFile) {
  if (e != 0) {
    return false;
  }
  return true;
 }
 #endif
 inline bool check(int e, int iLine, const char *szFile) {
  if (e < 0) {
    return false;
  }
  return true;
 }
 #define ck(call) check(call, __LINE__, __FILE__)
 /**
 * @brief Wrapper class around std::thread
 */
 class NvThread {
 public:
  NvThread() = default;
  NvThread(const NvThread &) = delete;
  NvThread &operator=(const NvThread &other) = delete;
  NvThread(std::thread &&thread) : t(std::move(thread)) {}
  NvThread(NvThread &&thread) : t(std::move(thread.t)) {}
  NvThread &operator=(NvThread &&other) {
    t = std::move(other.t);
    return *this;
  }
  ~NvThread() { join(); }
  void join() {
    if (t.joinable()) {
      t.join();
    }
  }
 private:
  std::thread t;
 };
 #ifndef _WIN32
 #define _stricmp strcasecmp
 #define _stat64 stat64
 #endif
 /**
 * @brief Utility class to allocate buffer memory. Helps avoid I/O during the
 * encode/decode loop in case of performance tests.
 */
 class BufferedFileReader {
 public:
  /**
   * @brief Constructor function to allocate appropriate memory and copy file
   * contents into it
   */
  BufferedFileReader(const char *szFileName, bool bPartial = false) {
    struct _stat64 st;
    if (_stat64(szFileName, &st) != 0) {
      return;
    }
    nSize = st.st_size;
    while (nSize) {
      try {
        pBuf = new uint8_t[(size_t)nSize];
        if (nSize != st.st_size) {
        }
        break;
      } catch (std::bad_alloc) {
        if (!bPartial) {
          return;
        }
        nSize = (uint32_t)(nSize * 0.9);
      }
    }
    std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
    if (!fpIn) {
      return;
    }
    std::streamsize nRead =
        fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
    fpIn.close();
    assert(nRead == nSize);
  }
  ~BufferedFileReader() {
    if (pBuf) {
      delete[] pBuf;
    }
  }
  bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
    if (!pBuf) {
      return false;
    }
    *ppBuf = pBuf;
    *pnSize = nSize;
    return true;
  }
 private:
  uint8_t *pBuf = NULL;
  uint64_t nSize = 0;
 };
 /**
 * @brief Template class to facilitate color space conversion
 */
 template <typename T>
 class YuvConverter {
 public:
  YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
    pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
  }
  ~YuvConverter() { delete[] pQuad; }
  void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
    if (nPitch == 0) {
      nPitch = nWidth;
    }
    // sizes of source surface plane
    int nSizePlaneY = nPitch * nHeight;
    int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
    int nSizePlaneV = nSizePlaneU;
    T *puv = pFrame + nSizePlaneY;
    if (nPitch == nWidth) {
      memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
    } else {
      for (int i = 0; i < (nHeight + 1) / 2; i++) {
        memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i,
               ((nWidth + 1) / 2) * sizeof(T));
      }
    }
    T *pv = puv + nSizePlaneU;
    for (int y = 0; y < (nHeight + 1) / 2; y++) {
      for (int x = 0; x < (nWidth + 1) / 2; x++) {
        puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
        puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
      }
    }
  }
  void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
    if (nPitch == 0) {
      nPitch = nWidth;
    }
    // sizes of source surface plane
    int nSizePlaneY = nPitch * nHeight;
    int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
    int nSizePlaneV = nSizePlaneU;
    T *puv = pFrame + nSizePlaneY, *pu = puv, *pv = puv + nSizePlaneU;
    // split chroma from interleave to planar
    for (int y = 0; y < (nHeight + 1) / 2; y++) {
      for (int x = 0; x < (nWidth + 1) / 2; x++) {
        pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
        pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
      }
    }
    if (nPitch == nWidth) {
      memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
    } else {
      for (int i = 0; i < (nHeight + 1) / 2; i++) {
        memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i,
               ((nWidth + 1) / 2) * sizeof(T));
      }
    }
  }
 private:
  T *pQuad;
  int nWidth, nHeight;
 };
 /**
 * @brief Utility class to measure elapsed time in seconds between the block of
 * executed code
 */
 class StopWatch {
 public:
  void Start() { t0 = std::chrono::high_resolution_clock::now(); }
  double Stop() {
    return std::chrono::duration_cast<std::chrono::nanoseconds>(
               std::chrono::high_resolution_clock::now().time_since_epoch() -
               t0.time_since_epoch())
               .count() /
           1.0e9;
  }
 private:
  std::chrono::high_resolution_clock::time_point t0;
 };
 template <typename T>
 class ConcurrentQueue {
 public:
  ConcurrentQueue() {}
  ConcurrentQueue(size_t size) : maxSize(size) {}
  ConcurrentQueue(const ConcurrentQueue &) = delete;
  ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
  void setSize(size_t s) { maxSize = s; }
  void push_back(const T &value) {
    // Do not use a std::lock_guard here. We will need to explicitly
    // unlock before notify_one as the other waiting thread will
    // automatically try to acquire mutex once it wakes up
    // (which will happen on notify_one)
    std::unique_lock<std::mutex> lock(m_mutex);
    auto wasEmpty = m_List.empty();
    while (full()) {
      m_cond.wait(lock);
    }
    m_List.push_back(value);
    if (wasEmpty && !m_List.empty()) {
      lock.unlock();
      m_cond.notify_one();
    }
  }
  T pop_front() {
    std::unique_lock<std::mutex> lock(m_mutex);
    while (m_List.empty()) {
      m_cond.wait(lock);
    }
    auto wasFull = full();
    T data = std::move(m_List.front());
    m_List.pop_front();
    if (wasFull && !full()) {
      lock.unlock();
      m_cond.notify_one();
    }
    return data;
  }
  T front() {
    std::unique_lock<std::mutex> lock(m_mutex);
    while (m_List.empty()) {
      m_cond.wait(lock);
    }
    return m_List.front();
  }
  size_t size() {
    std::unique_lock<std::mutex> lock(m_mutex);
    return m_List.size();
  }
  bool empty() {
    std::unique_lock<std::mutex> lock(m_mutex);
    return m_List.empty();
  }
  void clear() {
    std::unique_lock<std::mutex> lock(m_mutex);
    m_List.clear();
  }
 private:
  bool full() {
    if (m_List.size() == maxSize) return true;
    return false;
  }
 private:
  std::list<T> m_List;
  std::mutex m_mutex;
  std::condition_variable m_cond;
  size_t maxSize;
 };
 inline void CheckInputFile(const char *szInFilePath) {
  std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
  if (fpIn.fail()) {
    std::ostringstream err;
    err << "Unable to open input file: " << szInFilePath << std::endl;
    throw std::invalid_argument(err.str());
  }
 }
 inline void ValidateResolution(int nWidth, int nHeight) {
  if (nWidth <= 0 || nHeight <= 0) {
    std::ostringstream err;
    err << "Please specify positive non zero resolution as -s WxH. Current "
           "resolution is "
        << nWidth << "x" << nHeight << std::endl;
    throw std::invalid_argument(err.str());
  }
 }
 template <class COLOR32>
 void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra,
                   int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
 template <class COLOR64>
 void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra,
                   int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
 template <class COLOR32>
 void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra,
                   int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
 template <class COLOR64>
 void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra,
                   int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
 template <class COLOR32>
 void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
                     int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
 template <class COLOR64>
 void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
                     int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
 template <class COLOR32>
 void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
                        int nBgraPitch, int nWidth, int nHeight,
                        int iMatrix = 4);
 template <class COLOR64>
 void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
                        int nBgraPitch, int nWidth, int nHeight,
                        int iMatrix = 4);
 template <class COLOR32>
 void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp,
                       int nBgrpPitch, int nWidth, int nHeight,
                       int iMatrix = 0);
 template <class COLOR32>
 void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp,
                       int nBgrpPitch, int nWidth, int nHeight,
                       int iMatrix = 4);
 template <class COLOR32>
 void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp,
                         int nBgrpPitch, int nWidth, int nHeight,
                         int iMatrix = 0);
 template <class COLOR32>
 void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp,
                            int nBgrpPitch, int nWidth, int nHeight,
                            int iMatrix = 4);
 void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016,
                  int nP016Pitch, int nWidth, int nHeight, int iMatrix = 4);
 void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch,
                          int nDestPitch, int nWidth, int nHeight);
 void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch,
                          int nDestPitch, int nWidth, int nHeight);
 void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth,
                int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch,
                int nSrcWidth, int nSrcHeight,
                unsigned char *dpDstNv12UV = nullptr);
 void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth,
                int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch,
                int nSrcWidth, int nSrcHeight,
                unsigned char *dpDstP016UV = nullptr);
 void ScaleYUV420(unsigned char *dpDstY, unsigned char *dpDstU,
                 unsigned char *dpDstV, int nDstPitch, int nDstChromaPitch,
                 int nDstWidth, int nDstHeight, unsigned char *dpSrcY,
                 unsigned char *dpSrcU, unsigned char *dpSrcV, int nSrcPitch,
                 int nSrcChromaPitch, int nSrcWidth, int nSrcHeight,
                 bool bSemiplanar);
 #ifdef __cuda_cuda_h__
 void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue,
                CUstream_st *outputCUStream);
 #endif
--- a/thirdparty/nvcodec/Samples/Utils/NvEncoderCLIOptions.h
+++ b/thirdparty/nvcodec/Samples/Utils/NvEncoderCLIOptions.h
@@ -0,0 +1,644 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #pragma once
 #include <vector>
 #include <string>
 #include <algorithm>
 #include <stdexcept>
 #include <sstream>
 #include <iterator>
 #include <cstring>
 #include <functional>
 #include "../Utils/Logger.h"
 extern simplelogger::Logger *logger;
 #ifndef _WIN32
 inline bool operator==(const GUID &guid1, const GUID &guid2) {
    return !memcmp(&guid1, &guid2, sizeof(GUID));
 }
 inline bool operator!=(const GUID &guid1, const GUID &guid2) {
    return !(guid1 == guid2);
 }
 #endif
 /*
 * Helper class for parsing generic encoder options and preparing encoder
 * initialization parameters. This class also provides some utility methods
 * which generate verbose descriptions of the provided set of encoder
 * initialization parameters.
 */
 class NvEncoderInitParam {
 public:
    NvEncoderInitParam(const char *szParam = "", 
        std::function<void(NV_ENC_INITIALIZE_PARAMS *pParams)> *pfuncInit = NULL, bool _bLowLatency = false) 
        : strParam(szParam), bLowLatency(_bLowLatency)
    {
        if (pfuncInit) {
            funcInit = *pfuncInit;
        }
        std::transform(strParam.begin(), strParam.end(), strParam.begin(), tolower);
        std::istringstream ss(strParam);
        tokens = std::vector<std::string> {
            std::istream_iterator<std::string>(ss),
            std::istream_iterator<std::string>() 
        };
        for (unsigned i = 0; i < tokens.size(); i++)
        {
            if (tokens[i] == "-codec" && ++i != tokens.size())
            {
                ParseString("-codec", tokens[i], vCodec, szCodecNames, &guidCodec);
                continue;
            }
            if (tokens[i] == "-preset" && ++i != tokens.size()) {
                ParseString("-preset", tokens[i], vPreset, szPresetNames, &guidPreset);
                continue;
            }
            if (tokens[i] == "-tuninginfo" && ++i != tokens.size())
            {
                ParseString("-tuninginfo", tokens[i], vTuningInfo, szTuningInfoNames, &m_TuningInfo);
                continue;
            }
        }
    }
    virtual ~NvEncoderInitParam() {}
    virtual bool IsCodecH264() {
        return GetEncodeGUID() == NV_ENC_CODEC_H264_GUID;
    }
    virtual bool IsCodecHEVC() {
        return GetEncodeGUID() == NV_ENC_CODEC_HEVC_GUID;
    }
    std::string GetHelpMessage(bool bMeOnly = false, bool bUnbuffered = false, bool bHide444 = false, bool bOutputInVidMem = false)
    {
        std::ostringstream oss;
        if (bOutputInVidMem && bMeOnly)
        {
            oss << "-codec       Codec: " << "h264" << std::endl;
        }
        else
        {
            oss << "-codec       Codec: " << szCodecNames << std::endl;
        }
        oss << "-preset      Preset: " << szPresetNames << std::endl
            << "-profile     H264: " << szH264ProfileNames;
        if (bOutputInVidMem && bMeOnly)
        {
            oss << std::endl;
        }
        else
        {
            oss << "; HEVC: " << szHevcProfileNames << std::endl;
        }
        if (!bMeOnly)
        {
            if (bLowLatency == false)
                oss << "-tuninginfo  TuningInfo: " << szTuningInfoNames << std::endl;
            else
                oss << "-tuninginfo  TuningInfo: " << szLowLatencyTuningInfoNames << std::endl;
            oss << "-multipass   Multipass: " << szMultipass << std::endl;
        }
        if (!bHide444 && !bLowLatency)
        {
            oss << "-444         (Only for RGB input) YUV444 encode" << std::endl;
        }
        if (bMeOnly) return oss.str();
        oss << "-fps         Frame rate" << std::endl;
        if (!bUnbuffered && !bLowLatency)
        {
            oss << "-bf          Number of consecutive B-frames" << std::endl;
        }
        if (!bLowLatency)
        {
            oss << "-rc          Rate control mode: " << szRcModeNames << std::endl
                << "-gop         Length of GOP (Group of Pictures)" << std::endl
                << "-bitrate     Average bit rate, can be in unit of 1, K, M" << std::endl
                << "-maxbitrate  Max bit rate, can be in unit of 1, K, M" << std::endl
                << "-vbvbufsize  VBV buffer size in bits, can be in unit of 1, K, M" << std::endl
                << "-vbvinit     VBV initial delay in bits, can be in unit of 1, K, M" << std::endl
                << "-aq          Enable spatial AQ and set its stength (range 1-15, 0-auto)" << std::endl
                << "-temporalaq  (No value) Enable temporal AQ" << std::endl
                << "-cq          Target constant quality level for VBR mode (range 1-51, 0-auto)" << std::endl;
        }
        if (!bUnbuffered && !bLowLatency)
        {
            oss << "-lookahead   Maximum depth of lookahead (range 0-(31 - number of B frames))" << std::endl;
        }
        oss << "-qmin        Min QP value" << std::endl
            << "-qmax        Max QP value" << std::endl
            << "-initqp      Initial QP value" << std::endl;
        if (!bLowLatency)
        {
            oss << "-constqp     QP value for constqp rate control mode" << std::endl
                << "Note: QP value can be in the form of qp_of_P_B_I or qp_P,qp_B,qp_I (no space)" << std::endl;
        }
        if (bUnbuffered && !bLowLatency)
        {
            oss << "Note: Options -bf and -lookahead are unavailable for this app" << std::endl;
        }
        return oss.str();
    }
    /**
     * @brief Generate and return a string describing the values of the main/common
     *        encoder initialization parameters
     */
    std::string MainParamToString(const NV_ENC_INITIALIZE_PARAMS *pParams) {
        std::ostringstream os;
        os 
            << "Encoding Parameters:" 
            << std::endl << "\tcodec        : " << ConvertValueToString(vCodec, szCodecNames, pParams->encodeGUID)
            << std::endl << "\tpreset       : " << ConvertValueToString(vPreset, szPresetNames, pParams->presetGUID);
        if (pParams->tuningInfo)
        {
            os << std::endl << "\ttuningInfo   : " << ConvertValueToString(vTuningInfo, szTuningInfoNames, pParams->tuningInfo);
        }
        os
            << std::endl << "\tprofile      : " << ConvertValueToString(vProfile, szProfileNames, pParams->encodeConfig->profileGUID)
            << std::endl << "\tchroma       : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC : pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC)
            << std::endl << "\tbitdepth     : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? 0 : pParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8) + 8
            << std::endl << "\trc           : " << ConvertValueToString(vRcMode, szRcModeNames, pParams->encodeConfig->rcParams.rateControlMode)
            ;
            if (pParams->encodeConfig->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CONSTQP) {
                os << " (P,B,I=" << pParams->encodeConfig->rcParams.constQP.qpInterP << "," << pParams->encodeConfig->rcParams.constQP.qpInterB << "," << pParams->encodeConfig->rcParams.constQP.qpIntra << ")";
            }
        os
            << std::endl << "\tfps          : " << pParams->frameRateNum << "/" << pParams->frameRateDen
            << std::endl << "\tgop          : " << (pParams->encodeConfig->gopLength == NVENC_INFINITE_GOPLENGTH ? "INF" : std::to_string(pParams->encodeConfig->gopLength))
            << std::endl << "\tbf           : " << pParams->encodeConfig->frameIntervalP - 1
            << std::endl << "\tmultipass    : " << pParams->encodeConfig->rcParams.multiPass
            << std::endl << "\tsize         : " << pParams->encodeWidth << "x" << pParams->encodeHeight
            << std::endl << "\tbitrate      : " << pParams->encodeConfig->rcParams.averageBitRate
            << std::endl << "\tmaxbitrate   : " << pParams->encodeConfig->rcParams.maxBitRate
            << std::endl << "\tvbvbufsize   : " << pParams->encodeConfig->rcParams.vbvBufferSize
            << std::endl << "\tvbvinit      : " << pParams->encodeConfig->rcParams.vbvInitialDelay
            << std::endl << "\taq           : " << (pParams->encodeConfig->rcParams.enableAQ ? (pParams->encodeConfig->rcParams.aqStrength ? std::to_string(pParams->encodeConfig->rcParams.aqStrength) : "auto") : "disabled")
            << std::endl << "\ttemporalaq   : " << (pParams->encodeConfig->rcParams.enableTemporalAQ ? "enabled" : "disabled")
            << std::endl << "\tlookahead    : " << (pParams->encodeConfig->rcParams.enableLookahead ? std::to_string(pParams->encodeConfig->rcParams.lookaheadDepth) : "disabled")
            << std::endl << "\tcq           : " << (unsigned int)pParams->encodeConfig->rcParams.targetQuality
            << std::endl << "\tqmin         : P,B,I=" << (int)pParams->encodeConfig->rcParams.minQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.minQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.minQP.qpIntra
            << std::endl << "\tqmax         : P,B,I=" << (int)pParams->encodeConfig->rcParams.maxQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.maxQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.maxQP.qpIntra
            << std::endl << "\tinitqp       : P,B,I=" << (int)pParams->encodeConfig->rcParams.initialRCQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.initialRCQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.initialRCQP.qpIntra
            ;
        return os.str();
    }
 public:
    virtual GUID GetEncodeGUID() { return guidCodec; }
    virtual GUID GetPresetGUID() { return guidPreset; }
    virtual NV_ENC_TUNING_INFO GetTuningInfo() { return m_TuningInfo; }
    /*
     * @brief Set encoder initialization parameters based on input options
     * This method parses the tokens formed from the command line options
     * provided to the application and sets the fields from NV_ENC_INITIALIZE_PARAMS
     * based on the supplied values.
     */
    virtual void SetInitParams(NV_ENC_INITIALIZE_PARAMS *pParams, NV_ENC_BUFFER_FORMAT eBufferFormat)
    {
        NV_ENC_CONFIG &config = *pParams->encodeConfig;
        for (unsigned i = 0; i < tokens.size(); i++)
        {
            if (
                tokens[i] == "-codec"      && ++i ||
                tokens[i] == "-preset"     && ++i ||
                tokens[i] == "-tuninginfo" && ++i ||
                tokens[i] == "-multipass" && ++i != tokens.size() && ParseString("-multipass", tokens[i], vMultiPass, szMultipass, &config.rcParams.multiPass) ||
                tokens[i] == "-profile"    && ++i != tokens.size() && (IsCodecH264() ? 
                    ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) : 
                    ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID)) ||
                tokens[i] == "-rc"         && ++i != tokens.size() && ParseString("-rc",          tokens[i], vRcMode, szRcModeNames, &config.rcParams.rateControlMode)                    ||
                tokens[i] == "-fps"        && ++i != tokens.size() && ParseInt("-fps",            tokens[i], &pParams->frameRateNum)                                                      ||
                tokens[i] == "-bf"         && ++i != tokens.size() && ParseInt("-bf",             tokens[i], &config.frameIntervalP) && ++config.frameIntervalP                           ||
                tokens[i] == "-bitrate"    && ++i != tokens.size() && ParseBitRate("-bitrate",    tokens[i], &config.rcParams.averageBitRate)                                             ||
                tokens[i] == "-maxbitrate" && ++i != tokens.size() && ParseBitRate("-maxbitrate", tokens[i], &config.rcParams.maxBitRate)                                                 ||
                tokens[i] == "-vbvbufsize" && ++i != tokens.size() && ParseBitRate("-vbvbufsize", tokens[i], &config.rcParams.vbvBufferSize)                                              ||
                tokens[i] == "-vbvinit"    && ++i != tokens.size() && ParseBitRate("-vbvinit",    tokens[i], &config.rcParams.vbvInitialDelay)                                            ||
                tokens[i] == "-cq"         && ++i != tokens.size() && ParseInt("-cq",             tokens[i], &config.rcParams.targetQuality)                                              ||
                tokens[i] == "-initqp"     && ++i != tokens.size() && ParseQp("-initqp",          tokens[i], &config.rcParams.initialRCQP) && (config.rcParams.enableInitialRCQP = true)  ||
                tokens[i] == "-qmin"       && ++i != tokens.size() && ParseQp("-qmin",            tokens[i], &config.rcParams.minQP) && (config.rcParams.enableMinQP = true)              ||
                tokens[i] == "-qmax"       && ++i != tokens.size() && ParseQp("-qmax",            tokens[i], &config.rcParams.maxQP) && (config.rcParams.enableMaxQP = true)              ||
                tokens[i] == "-constqp"    && ++i != tokens.size() && ParseQp("-constqp",         tokens[i], &config.rcParams.constQP)                                                    ||
                tokens[i] == "-temporalaq" && (config.rcParams.enableTemporalAQ = true)
            )
            {
                continue;
            }
            if (tokens[i] == "-lookahead" && ++i != tokens.size() && ParseInt("-lookahead", tokens[i], &config.rcParams.lookaheadDepth))
            {
                config.rcParams.enableLookahead = config.rcParams.lookaheadDepth > 0;
                continue;
            }
            int aqStrength;
            if (tokens[i] == "-aq" && ++i != tokens.size() && ParseInt("-aq", tokens[i], &aqStrength)) {
                config.rcParams.enableAQ = true;
                config.rcParams.aqStrength = aqStrength;
                continue;
            }
            if (tokens[i] == "-gop" && ++i != tokens.size() && ParseInt("-gop", tokens[i], &config.gopLength))
            {
                if (IsCodecH264()) 
                {
                    config.encodeCodecConfig.h264Config.idrPeriod = config.gopLength;
                }
                else 
                {
                    config.encodeCodecConfig.hevcConfig.idrPeriod = config.gopLength;
                }
                continue;
            }
            if (tokens[i] == "-444")
            {
                if (IsCodecH264()) 
                {
                    config.encodeCodecConfig.h264Config.chromaFormatIDC = 3;
                } else 
                {
                    config.encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
                }
                continue;
            }
            std::ostringstream errmessage;
            errmessage << "Incorrect parameter: " << tokens[i] << std::endl;
            errmessage << "Re-run the application with the -h option to get a list of the supported options.";
            errmessage << std::endl;
            throw std::invalid_argument(errmessage.str());
        }
        if (IsCodecHEVC())
        {
            if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
            {
                config.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2;
            }
        }
        funcInit(pParams);
        LOG(INFO) << NvEncoderInitParam().MainParamToString(pParams);
        LOG(TRACE) << NvEncoderInitParam().FullParamToString(pParams);
    }
 private:
    /*
     * Helper methods for parsing tokens (generated by splitting the command line)
     * and performing conversions to the appropriate target type/value.
     */
    template<typename T>
    bool ParseString(const std::string &strName, const std::string &strValue, const std::vector<T> &vValue, const std::string &strValueNames, T *pValue) {
        std::vector<std::string> vstrValueName = split(strValueNames, ' ');
        auto it = std::find(vstrValueName.begin(), vstrValueName.end(), strValue);
        if (it == vstrValueName.end()) {
            LOG(ERROR) << strName << " options: " << strValueNames;
            return false;
        }
        *pValue = vValue[it - vstrValueName.begin()];
        return true;
    }
    template<typename T>
    std::string ConvertValueToString(const std::vector<T> &vValue, const std::string &strValueNames, T value) {
        auto it = std::find(vValue.begin(), vValue.end(), value);
        if (it == vValue.end()) {
            LOG(ERROR) << "Invalid value. Can't convert to one of " << strValueNames;
            return std::string();
        }
        return split(strValueNames, ' ')[it - vValue.begin()];
    }
    bool ParseBitRate(const std::string &strName, const std::string &strValue, unsigned *pBitRate) {
        try {
            size_t l;
            double r = std::stod(strValue, &l);
            char c = strValue[l];
            if (c != 0 && c != 'k' && c != 'm') {
                LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
            }
            *pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
        } catch (std::invalid_argument) {
            return false;
        }
        return true;
    }
    template<typename T>
    bool ParseInt(const std::string &strName, const std::string &strValue, T *pInt) {
        try {
            *pInt = std::stoi(strValue);
        } catch (std::invalid_argument) {
            LOG(ERROR) << strName << " need a value of positive number";
            return false;
        }
        return true;
    }
    bool ParseQp(const std::string &strName, const std::string &strValue, NV_ENC_QP *pQp) {
        std::vector<std::string> vQp = split(strValue, ',');
        try {
            if (vQp.size() == 1) {
                unsigned qp = (unsigned)std::stoi(vQp[0]);
                *pQp = {qp, qp, qp};
            } else if (vQp.size() == 3) {
                *pQp = {(unsigned)std::stoi(vQp[0]), (unsigned)std::stoi(vQp[1]), (unsigned)std::stoi(vQp[2])};
            } else {
                LOG(ERROR) << strName << " qp_for_P_B_I or qp_P,qp_B,qp_I (no space is allowed)";
                return false;
            }
        } catch (std::invalid_argument) {
            return false;
        }
        return true;
    }
    std::vector<std::string> split(const std::string &s, char delim) {
        std::stringstream ss(s);
        std::string token;
        std::vector<std::string> tokens;
        while (getline(ss, token, delim)) {
            tokens.push_back(token);
        }
        return tokens;
    }
 private:
    std::string strParam;
    std::function<void(NV_ENC_INITIALIZE_PARAMS *pParams)> funcInit = [](NV_ENC_INITIALIZE_PARAMS *pParams){};
    std::vector<std::string> tokens;
    GUID guidCodec = NV_ENC_CODEC_H264_GUID;
    GUID guidPreset = NV_ENC_PRESET_P3_GUID;
    NV_ENC_TUNING_INFO m_TuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
    bool bLowLatency = false;
    const char *szCodecNames = "h264 hevc";
    std::vector<GUID> vCodec = std::vector<GUID> {
        NV_ENC_CODEC_H264_GUID,
        NV_ENC_CODEC_HEVC_GUID
    };
    const char *szChromaNames = "yuv420 yuv444";
    std::vector<uint32_t> vChroma = std::vector<uint32_t>
    {
        1, 3
    };
    const char *szPresetNames = "p1 p2 p3 p4 p5 p6 p7";
    std::vector<GUID> vPreset = std::vector<GUID> {
        NV_ENC_PRESET_P1_GUID,
        NV_ENC_PRESET_P2_GUID,
        NV_ENC_PRESET_P3_GUID,
        NV_ENC_PRESET_P4_GUID,
        NV_ENC_PRESET_P5_GUID,
        NV_ENC_PRESET_P6_GUID,
        NV_ENC_PRESET_P7_GUID,
    };
    const char *szH264ProfileNames = "baseline main high high444";
    std::vector<GUID> vH264Profile = std::vector<GUID> {
        NV_ENC_H264_PROFILE_BASELINE_GUID,
        NV_ENC_H264_PROFILE_MAIN_GUID,
        NV_ENC_H264_PROFILE_HIGH_GUID,
        NV_ENC_H264_PROFILE_HIGH_444_GUID,
    };
    const char *szHevcProfileNames = "main main10 frext";
    std::vector<GUID> vHevcProfile = std::vector<GUID> {
        NV_ENC_HEVC_PROFILE_MAIN_GUID,
        NV_ENC_HEVC_PROFILE_MAIN10_GUID,
        NV_ENC_HEVC_PROFILE_FREXT_GUID,
    };
    const char *szProfileNames = "(default) auto baseline(h264) main(h264) high(h264) high444(h264)"
        " stereo(h264) progressiv_high(h264) constrained_high(h264)"
        " main(hevc) main10(hevc) frext(hevc)";
    std::vector<GUID> vProfile = std::vector<GUID> {
        GUID{},
        NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID,
        NV_ENC_H264_PROFILE_BASELINE_GUID,
        NV_ENC_H264_PROFILE_MAIN_GUID,
        NV_ENC_H264_PROFILE_HIGH_GUID,
        NV_ENC_H264_PROFILE_HIGH_444_GUID,
        NV_ENC_H264_PROFILE_STEREO_GUID,
        NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID,
        NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID,
        NV_ENC_HEVC_PROFILE_MAIN_GUID,
        NV_ENC_HEVC_PROFILE_MAIN10_GUID,
        NV_ENC_HEVC_PROFILE_FREXT_GUID,
    };
    const char *szLowLatencyTuningInfoNames = "lowlatency ultralowlatency";
    const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless";
    std::vector<NV_ENC_TUNING_INFO> vTuningInfo = std::vector<NV_ENC_TUNING_INFO>{
        NV_ENC_TUNING_INFO_HIGH_QUALITY,
        NV_ENC_TUNING_INFO_LOW_LATENCY,
        NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
        NV_ENC_TUNING_INFO_LOSSLESS
    };
    const char *szRcModeNames = "constqp vbr cbr";
    std::vector<NV_ENC_PARAMS_RC_MODE> vRcMode = std::vector<NV_ENC_PARAMS_RC_MODE> {
        NV_ENC_PARAMS_RC_CONSTQP,
        NV_ENC_PARAMS_RC_VBR,
        NV_ENC_PARAMS_RC_CBR,
    };
    const char *szMultipass = "disabled qres fullres";
    std::vector<NV_ENC_MULTI_PASS> vMultiPass = std::vector<NV_ENC_MULTI_PASS>{
        NV_ENC_MULTI_PASS_DISABLED,
        NV_ENC_TWO_PASS_QUARTER_RESOLUTION,
        NV_ENC_TWO_PASS_FULL_RESOLUTION,
    };
   const char *szQpMapModeNames = "disabled emphasis_level_map delta_qp_map qp_map";
    std::vector<NV_ENC_QP_MAP_MODE> vQpMapMode = std::vector<NV_ENC_QP_MAP_MODE> {
        NV_ENC_QP_MAP_DISABLED,
        NV_ENC_QP_MAP_EMPHASIS,
        NV_ENC_QP_MAP_DELTA,
        NV_ENC_QP_MAP,
    };
 public:
    /*
     * Generates and returns a string describing the values for each field in
     * the NV_ENC_INITIALIZE_PARAMS structure (i.e. a description of the entire
     * set of initialization parameters supplied to the API).
     */
    std::string FullParamToString(const NV_ENC_INITIALIZE_PARAMS *pInitializeParams) {
        std::ostringstream os;
        os << "NV_ENC_INITIALIZE_PARAMS:" << std::endl
            << "encodeGUID: " << ConvertValueToString(vCodec, szCodecNames, pInitializeParams->encodeGUID) << std::endl
            << "presetGUID: " << ConvertValueToString(vPreset, szPresetNames, pInitializeParams->presetGUID) << std::endl;
        if (pInitializeParams->tuningInfo)
        {
            os << "tuningInfo: " << ConvertValueToString(vTuningInfo, szTuningInfoNames, pInitializeParams->tuningInfo) << std::endl;
        }
        os
            << "encodeWidth: " << pInitializeParams->encodeWidth << std::endl
            << "encodeHeight: " << pInitializeParams->encodeHeight << std::endl
            << "darWidth: " << pInitializeParams->darWidth << std::endl
            << "darHeight: " << pInitializeParams->darHeight << std::endl
            << "frameRateNum: " << pInitializeParams->frameRateNum << std::endl
            << "frameRateDen: " << pInitializeParams->frameRateDen << std::endl
            << "enableEncodeAsync: " << pInitializeParams->enableEncodeAsync << std::endl
            << "reportSliceOffsets: " << pInitializeParams->reportSliceOffsets << std::endl
            << "enableSubFrameWrite: " << pInitializeParams->enableSubFrameWrite << std::endl
            << "enableExternalMEHints: " << pInitializeParams->enableExternalMEHints << std::endl
            << "enableMEOnlyMode: " << pInitializeParams->enableMEOnlyMode << std::endl
            << "enableWeightedPrediction: " << pInitializeParams->enableWeightedPrediction << std::endl
            << "maxEncodeWidth: " << pInitializeParams->maxEncodeWidth << std::endl
            << "maxEncodeHeight: " << pInitializeParams->maxEncodeHeight << std::endl
            << "maxMEHintCountsPerBlock: " << pInitializeParams->maxMEHintCountsPerBlock << std::endl
        ;
        NV_ENC_CONFIG *pConfig = pInitializeParams->encodeConfig;
        os << "NV_ENC_CONFIG:" << std::endl
            << "profile: " << ConvertValueToString(vProfile, szProfileNames, pConfig->profileGUID) << std::endl
            << "gopLength: " << pConfig->gopLength << std::endl
            << "frameIntervalP: " << pConfig->frameIntervalP << std::endl
            << "monoChromeEncoding: " << pConfig->monoChromeEncoding << std::endl
            << "frameFieldMode: " << pConfig->frameFieldMode << std::endl
            << "mvPrecision: " << pConfig->mvPrecision << std::endl
            << "NV_ENC_RC_PARAMS:" << std::endl
            << "    rateControlMode: 0x" << std::hex << pConfig->rcParams.rateControlMode << std::dec << std::endl
            << "    constQP: " << pConfig->rcParams.constQP.qpInterP << ", " << pConfig->rcParams.constQP.qpInterB << ", " << pConfig->rcParams.constQP.qpIntra << std::endl
            << "    averageBitRate:  " << pConfig->rcParams.averageBitRate << std::endl
            << "    maxBitRate:      " << pConfig->rcParams.maxBitRate << std::endl
            << "    vbvBufferSize:   " << pConfig->rcParams.vbvBufferSize << std::endl
            << "    vbvInitialDelay: " << pConfig->rcParams.vbvInitialDelay << std::endl
            << "    enableMinQP: " << pConfig->rcParams.enableMinQP << std::endl
            << "    enableMaxQP: " << pConfig->rcParams.enableMaxQP << std::endl
            << "    enableInitialRCQP: " << pConfig->rcParams.enableInitialRCQP << std::endl
            << "    enableAQ: " << pConfig->rcParams.enableAQ << std::endl
            << "    qpMapMode: " << ConvertValueToString(vQpMapMode, szQpMapModeNames, pConfig->rcParams.qpMapMode) << std::endl
            << "    multipass: " << ConvertValueToString(vMultiPass, szMultipass, pConfig->rcParams.multiPass) << std::endl
            << "    enableLookahead: " << pConfig->rcParams.enableLookahead << std::endl
            << "    disableIadapt: " << pConfig->rcParams.disableIadapt << std::endl
            << "    disableBadapt: " << pConfig->rcParams.disableBadapt << std::endl
            << "    enableTemporalAQ: " << pConfig->rcParams.enableTemporalAQ << std::endl
            << "    zeroReorderDelay: " << pConfig->rcParams.zeroReorderDelay << std::endl
            << "    enableNonRefP: " << pConfig->rcParams.enableNonRefP << std::endl
            << "    strictGOPTarget: " << pConfig->rcParams.strictGOPTarget << std::endl
            << "    aqStrength: " << pConfig->rcParams.aqStrength << std::endl
            << "    minQP: " << pConfig->rcParams.minQP.qpInterP << ", " << pConfig->rcParams.minQP.qpInterB << ", " << pConfig->rcParams.minQP.qpIntra << std::endl
            << "    maxQP: " << pConfig->rcParams.maxQP.qpInterP << ", " << pConfig->rcParams.maxQP.qpInterB << ", " << pConfig->rcParams.maxQP.qpIntra << std::endl
            << "    initialRCQP: " << pConfig->rcParams.initialRCQP.qpInterP << ", " << pConfig->rcParams.initialRCQP.qpInterB << ", " << pConfig->rcParams.initialRCQP.qpIntra << std::endl
            << "    temporallayerIdxMask: " << pConfig->rcParams.temporallayerIdxMask << std::endl
            << "    temporalLayerQP: " << (int)pConfig->rcParams.temporalLayerQP[0] << ", " << (int)pConfig->rcParams.temporalLayerQP[1] << ", " << (int)pConfig->rcParams.temporalLayerQP[2] << ", " << (int)pConfig->rcParams.temporalLayerQP[3] << ", " << (int)pConfig->rcParams.temporalLayerQP[4] << ", " << (int)pConfig->rcParams.temporalLayerQP[5] << ", " << (int)pConfig->rcParams.temporalLayerQP[6] << ", " << (int)pConfig->rcParams.temporalLayerQP[7] << std::endl
            << "    targetQuality: " << pConfig->rcParams.targetQuality << std::endl
            << "    lookaheadDepth: " << pConfig->rcParams.lookaheadDepth << std::endl;
        if (pInitializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
            os  
            << "NV_ENC_CODEC_CONFIG (H264):" << std::endl
            << "    enableStereoMVC: " << pConfig->encodeCodecConfig.h264Config.enableStereoMVC << std::endl
            << "    hierarchicalPFrames: " << pConfig->encodeCodecConfig.h264Config.hierarchicalPFrames << std::endl
            << "    hierarchicalBFrames: " << pConfig->encodeCodecConfig.h264Config.hierarchicalBFrames << std::endl
            << "    outputBufferingPeriodSEI: " << pConfig->encodeCodecConfig.h264Config.outputBufferingPeriodSEI << std::endl
            << "    outputPictureTimingSEI: " << pConfig->encodeCodecConfig.h264Config.outputPictureTimingSEI << std::endl
            << "    outputAUD: " << pConfig->encodeCodecConfig.h264Config.outputAUD << std::endl
            << "    disableSPSPPS: " << pConfig->encodeCodecConfig.h264Config.disableSPSPPS << std::endl
            << "    outputFramePackingSEI: " << pConfig->encodeCodecConfig.h264Config.outputFramePackingSEI << std::endl
            << "    outputRecoveryPointSEI: " << pConfig->encodeCodecConfig.h264Config.outputRecoveryPointSEI << std::endl
            << "    enableIntraRefresh: " << pConfig->encodeCodecConfig.h264Config.enableIntraRefresh << std::endl
            << "    enableConstrainedEncoding: " << pConfig->encodeCodecConfig.h264Config.enableConstrainedEncoding << std::endl
            << "    repeatSPSPPS: " << pConfig->encodeCodecConfig.h264Config.repeatSPSPPS << std::endl
            << "    enableVFR: " << pConfig->encodeCodecConfig.h264Config.enableVFR << std::endl
            << "    enableLTR: " << pConfig->encodeCodecConfig.h264Config.enableLTR << std::endl
            << "    qpPrimeYZeroTransformBypassFlag: " << pConfig->encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag << std::endl
            << "    useConstrainedIntraPred: " << pConfig->encodeCodecConfig.h264Config.useConstrainedIntraPred << std::endl
            << "    level: " << pConfig->encodeCodecConfig.h264Config.level << std::endl
            << "    idrPeriod: " << pConfig->encodeCodecConfig.h264Config.idrPeriod << std::endl
            << "    separateColourPlaneFlag: " << pConfig->encodeCodecConfig.h264Config.separateColourPlaneFlag << std::endl
            << "    disableDeblockingFilterIDC: " << pConfig->encodeCodecConfig.h264Config.disableDeblockingFilterIDC << std::endl
            << "    numTemporalLayers: " << pConfig->encodeCodecConfig.h264Config.numTemporalLayers << std::endl
            << "    spsId: " << pConfig->encodeCodecConfig.h264Config.spsId << std::endl
            << "    ppsId: " << pConfig->encodeCodecConfig.h264Config.ppsId << std::endl
            << "    adaptiveTransformMode: " << pConfig->encodeCodecConfig.h264Config.adaptiveTransformMode << std::endl
            << "    fmoMode: " << pConfig->encodeCodecConfig.h264Config.fmoMode << std::endl
            << "    bdirectMode: " << pConfig->encodeCodecConfig.h264Config.bdirectMode << std::endl
            << "    entropyCodingMode: " << pConfig->encodeCodecConfig.h264Config.entropyCodingMode << std::endl
            << "    stereoMode: " << pConfig->encodeCodecConfig.h264Config.stereoMode << std::endl
            << "    intraRefreshPeriod: " << pConfig->encodeCodecConfig.h264Config.intraRefreshPeriod << std::endl
            << "    intraRefreshCnt: " << pConfig->encodeCodecConfig.h264Config.intraRefreshCnt << std::endl
            << "    maxNumRefFrames: " << pConfig->encodeCodecConfig.h264Config.maxNumRefFrames << std::endl
            << "    sliceMode: " << pConfig->encodeCodecConfig.h264Config.sliceMode << std::endl
            << "    sliceModeData: " << pConfig->encodeCodecConfig.h264Config.sliceModeData << std::endl
            << "    NV_ENC_CONFIG_H264_VUI_PARAMETERS:" << std::endl
            << "        overscanInfoPresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.overscanInfoPresentFlag << std::endl
            << "        overscanInfo: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.overscanInfo << std::endl
            << "        videoSignalTypePresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag << std::endl
            << "        videoFormat: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoFormat << std::endl
            << "        videoFullRangeFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag << std::endl
            << "        colourDescriptionPresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag << std::endl
            << "        colourPrimaries: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries << std::endl
            << "        transferCharacteristics: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics << std::endl
            << "        colourMatrix: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix << std::endl
            << "        chromaSampleLocationFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationFlag << std::endl
            << "        chromaSampleLocationTop: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationTop << std::endl
            << "        chromaSampleLocationBot: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationBot << std::endl
            << "        bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.bitstreamRestrictionFlag << std::endl
            << "    ltrNumFrames: " << pConfig->encodeCodecConfig.h264Config.ltrNumFrames << std::endl
            << "    ltrTrustMode: " << pConfig->encodeCodecConfig.h264Config.ltrTrustMode << std::endl
            << "    chromaFormatIDC: " << pConfig->encodeCodecConfig.h264Config.chromaFormatIDC << std::endl
            << "    maxTemporalLayers: " << pConfig->encodeCodecConfig.h264Config.maxTemporalLayers << std::endl;
        } else if (pInitializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
            os  
            << "NV_ENC_CODEC_CONFIG (HEVC):" << std::endl
            << "    level: " << pConfig->encodeCodecConfig.hevcConfig.level << std::endl
            << "    tier: " << pConfig->encodeCodecConfig.hevcConfig.tier << std::endl
            << "    minCUSize: " << pConfig->encodeCodecConfig.hevcConfig.minCUSize << std::endl
            << "    maxCUSize: " << pConfig->encodeCodecConfig.hevcConfig.maxCUSize << std::endl
            << "    useConstrainedIntraPred: " << pConfig->encodeCodecConfig.hevcConfig.useConstrainedIntraPred << std::endl
            << "    disableDeblockAcrossSliceBoundary: " << pConfig->encodeCodecConfig.hevcConfig.disableDeblockAcrossSliceBoundary << std::endl
            << "    outputBufferingPeriodSEI: " << pConfig->encodeCodecConfig.hevcConfig.outputBufferingPeriodSEI << std::endl
            << "    outputPictureTimingSEI: " << pConfig->encodeCodecConfig.hevcConfig.outputPictureTimingSEI << std::endl
            << "    outputAUD: " << pConfig->encodeCodecConfig.hevcConfig.outputAUD << std::endl
            << "    enableLTR: " << pConfig->encodeCodecConfig.hevcConfig.enableLTR << std::endl
            << "    disableSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.disableSPSPPS << std::endl
            << "    repeatSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.repeatSPSPPS << std::endl
            << "    enableIntraRefresh: " << pConfig->encodeCodecConfig.hevcConfig.enableIntraRefresh << std::endl
            << "    chromaFormatIDC: " << pConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC << std::endl
            << "    pixelBitDepthMinus8: " << pConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 << std::endl
            << "    idrPeriod: " << pConfig->encodeCodecConfig.hevcConfig.idrPeriod << std::endl
            << "    intraRefreshPeriod: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshPeriod << std::endl
            << "    intraRefreshCnt: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshCnt << std::endl
            << "    maxNumRefFramesInDPB: " << pConfig->encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB << std::endl
            << "    ltrNumFrames: " << pConfig->encodeCodecConfig.hevcConfig.ltrNumFrames << std::endl
            << "    vpsId: " << pConfig->encodeCodecConfig.hevcConfig.vpsId << std::endl
            << "    spsId: " << pConfig->encodeCodecConfig.hevcConfig.spsId << std::endl
            << "    ppsId: " << pConfig->encodeCodecConfig.hevcConfig.ppsId << std::endl
            << "    sliceMode: " << pConfig->encodeCodecConfig.hevcConfig.sliceMode << std::endl
            << "    sliceModeData: " << pConfig->encodeCodecConfig.hevcConfig.sliceModeData << std::endl
            << "    maxTemporalLayersMinus1: " << pConfig->encodeCodecConfig.hevcConfig.maxTemporalLayersMinus1 << std::endl
            << "    NV_ENC_CONFIG_HEVC_VUI_PARAMETERS:" << std::endl
            << "        overscanInfoPresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.overscanInfoPresentFlag << std::endl
            << "        overscanInfo: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.overscanInfo << std::endl
            << "        videoSignalTypePresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoSignalTypePresentFlag << std::endl
            << "        videoFormat: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat << std::endl
            << "        videoFullRangeFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag << std::endl
            << "        colourDescriptionPresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag << std::endl
            << "        colourPrimaries: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourPrimaries << std::endl
            << "        transferCharacteristics: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.transferCharacteristics << std::endl
            << "        colourMatrix: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourMatrix << std::endl
            << "        chromaSampleLocationFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationFlag << std::endl
            << "        chromaSampleLocationTop: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationTop << std::endl
            << "        chromaSampleLocationBot: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationBot << std::endl
            << "        bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.bitstreamRestrictionFlag << std::endl
            << "    ltrTrustMode: " << pConfig->encodeCodecConfig.hevcConfig.ltrTrustMode << std::endl;
        }
        return os.str();
    }
 };
--- a/thirdparty/nvcodec/Samples/Utils/Resize.cu
+++ b/thirdparty/nvcodec/Samples/Utils/Resize.cu
@@ -0,0 +1,192 @@
 /*
 * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include <cuda_runtime.h>
 #include "NvCodecUtils.h"
 template<typename YuvUnitx2>
 static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv,
        uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight,
        float fxScale, float fyScale)
 {
    int ix = blockIdx.x * blockDim.x + threadIdx.x,
        iy = blockIdx.y * blockDim.y + threadIdx.y;
    if (ix >= nWidth / 2 || iy >= nHeight / 2) {
        return;
    }
    int x = ix * 2, y = iy * 2;
    typedef decltype(YuvUnitx2::x) YuvUnit;
    const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1;
    *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
        (YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
        (YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
    };
    y++;
    *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
        (YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
        (YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
    };
    float2 uv = tex2D<float2>(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f);
    *(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) };
 }
 template <typename YuvUnitx2>
 static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) {
    cudaResourceDesc resDesc = {};
    resDesc.resType = cudaResourceTypePitch2D;
    resDesc.res.pitch2D.devPtr = dpSrc;
    resDesc.res.pitch2D.desc = cudaCreateChannelDesc<decltype(YuvUnitx2::x)>();
    resDesc.res.pitch2D.width = nSrcWidth;
    resDesc.res.pitch2D.height = nSrcHeight;
    resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
    cudaTextureDesc texDesc = {};
    texDesc.filterMode = cudaFilterModeLinear;
    texDesc.readMode = cudaReadModeNormalizedFloat;
    cudaTextureObject_t texY=0;
    ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL));
    resDesc.res.pitch2D.desc = cudaCreateChannelDesc<YuvUnitx2>();
    resDesc.res.pitch2D.width = nSrcWidth / 2;
    resDesc.res.pitch2D.height = nSrcHeight * 3 / 2;
    cudaTextureObject_t texUv=0;
    ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL));
    Resize<YuvUnitx2> << <dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32), dim3(16, 16) >> >(texY, texUv, dpDst, dpDstUV,
        nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight);
    ck(cudaDestroyTextureObject(texY));
    ck(cudaDestroyTextureObject(texUv));
 }
 void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV)
 {
    unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight);
    return Resize<uchar2>(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight);
 }
 void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV)
 {
    unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight);
    return Resize<ushort2>(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight);
 }
 static __global__ void Scale(cudaTextureObject_t texSrc,
    uint8_t *pDst, int nPitch, int nWidth, int nHeight,
    float fxScale, float fyScale)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x,
        y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x >= nWidth || y >= nHeight)
    {
        return;
    }
    *(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D<float>(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f));
 }
 static __global__ void Scale_uv(cudaTextureObject_t texSrc,
    uint8_t *pDst, int nPitch, int nWidth, int nHeight,
    float fxScale, float fyScale)
 {
    int x = blockIdx.x * blockDim.x + threadIdx.x,
        y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x >= nWidth || y >= nHeight)
    {
        return;
    }
    float2 uv = tex2D<float2>(texSrc, x * fxScale, y * fyScale);
    uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
    *(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut;
 }
 void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false) 
 {
    cudaResourceDesc resDesc = {};
    resDesc.resType = cudaResourceTypePitch2D;
    resDesc.res.pitch2D.devPtr = dpSrc;
    resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc<uchar2>() : cudaCreateChannelDesc<unsigned char>();
    resDesc.res.pitch2D.width = nSrcWidth;
    resDesc.res.pitch2D.height = nSrcHeight;
    resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
    cudaTextureDesc texDesc = {};
    texDesc.filterMode = cudaFilterModeLinear;
    texDesc.readMode = cudaReadModeNormalizedFloat;
    texDesc.addressMode[0] = cudaAddressModeClamp;
    texDesc.addressMode[1] = cudaAddressModeClamp;
    texDesc.addressMode[2] = cudaAddressModeClamp;
    cudaTextureObject_t texSrc = 0;
    ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL));
    dim3 blockSize(16, 16, 1);
    dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1);
    if (bUVPlane)
    {
        Scale_uv << <gridSize, blockSize >> >(texSrc, dpDst,
            nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
    }
    else
    {
        Scale << <gridSize, blockSize >> >(texSrc, dpDst,
            nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
    }
    ck(cudaGetLastError());
    ck(cudaDestroyTextureObject(texSrc));
 }
 void ScaleYUV420(unsigned char *dpDstY,
                 unsigned char* dpDstU,
                unsigned char* dpDstV,
                int nDstPitch,
                int nDstChromaPitch,
                int nDstWidth,
                int nDstHeight,
                unsigned char *dpSrcY,
                unsigned char* dpSrcU,
                unsigned char* dpSrcV, 
                int nSrcPitch,
                int nSrcChromaPitch,
                int nSrcWidth,
                int nSrcHeight,
                bool bSemiplanar)
 {
    int chromaWidthDst = (nDstWidth + 1) / 2;
    int chromaHeightDst = (nDstHeight + 1) / 2;
    int chromaWidthSrc = (nSrcWidth + 1) / 2;
    int chromaHeightSrc = (nSrcHeight + 1) / 2;
    ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight);
    if (bSemiplanar)
    {
        ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true);
    }
    else
    {
        ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
        ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
    }
 }
--- a/thirdparty/nvcodec/Samples/Utils/crc.cu
+++ b/thirdparty/nvcodec/Samples/Utils/crc.cu
@@ -0,0 +1,126 @@
 /*
 * Copyright 2018-2020 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 #include <cuda_runtime.h>
 #include "NvCodecUtils.h"
 /*
 * CRC32 lookup table
 * Generated by the following routine
 * int i, j;
 * U032 crc;
 * for (i = 0; i < 256; i++) 
 * {
 *    crc = i;
 *    for (j = 0; j < 8; j++) {    // 8 reduction
 *      crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0);
 *    }
 *    Crc32Table[i] = crc;
 * }
 */
 __device__ __constant__ uint32_t Crc32Table[256] = {
    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
    0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
    0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
    0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
    0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
    0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
    0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
    0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
    0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
    0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
    0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
    0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
    0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
    0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
    0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
    0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
    0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
    0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
    0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
    0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
    0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
    0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
    0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
    0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
    0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
    0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
    0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
    0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
    0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
    0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
    0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
    0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
    0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
    0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
    0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
    0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
    0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
    0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
    0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
    0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
    0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
    0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
    0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
    0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
    0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
    0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
    0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
    0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
    0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
    0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
    0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
 };
 typedef struct _NV_ENC_ENCODE_OUT_PARAMS
 {
    uint32_t                  version;                 /**< [out]: Struct version. */
    uint32_t                  bitstreamSizeInBytes;    /**< [out]: Encoded bitstream size in bytes */
    uint32_t                  cycleCount;              /**< [out]: Cycle count */
    uint32_t                  firstPassCycleCount;     /**< [out]: First pass cycle count */
    uint32_t                  reserved[60];            /**< [out]: Reserved and must be set to 0 */
 } NV_ENC_ENCODE_OUT_PARAMS;
 static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue)
 {
    NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer;
    uint32_t bitstreamSize = outParams->bitstreamSizeInBytes;
    uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS);
    uint32_t crc=~0;
    for(uint32_t i = 0; i < bitstreamSize; i++)
    {
        crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc))  ^ (*pEncStream++)];
    }
    *crcValue = ~crc;
 }
 void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream)
 {
    dim3 blockSize(1, 1, 1);
    dim3 gridSize(1, 1, 1);
    ComputeCRCKernel <<<gridSize, blockSize, 0, outputCUStream >>>(pBuffer, crcValue);
 }
--- a/xmake.lua
+++ b/xmake.lua
@@ -17,6 +17,7 @@ add_defines("ASIO_STANDALONE", "ASIO_HAS_STD_TYPE_TRAITS", "ASIO_HAS_STD_SHARED_
 if is_os("windows") then
    add_defines("_WEBSOCKETPP_CPP11_INTERNAL_")
    add_links("ws2_32", "Bcrypt")
    add_requires("cuda")
 elseif is_os("linux") then 
    add_links("pthread")
    set_config("cxxflags", "-fPIC")
@@ -57,10 +58,24 @@ target("ws")
    add_packages("asio")
    add_includedirs("thirdparty/websocketpp/include", {public = true})
 target("media")
    set_kind("static")
    add_deps("log")
    add_packages("cuda")
    add_links("cuda", "nvencodeapi", "nvcuvid")
    add_files("src/media/video/encode/nvcodec/*.cpp",
    "src/media/video/decode/nvcodec/*.cpp")
    add_includedirs("src/media/video/encode/nvcodec",
    "src/media/video/decode/nvcodec", 
    "thirdparty/nvcodec/Interface",
    "thirdparty/nvcodec/Samples", {public = true})
    add_linkdirs("thirdparty/nvcodec/Lib/x64")
 target("qos")
    set_kind("static")
    add_deps("log")
-    add_files("src/qos/*.cpp")
+    add_files("src/qos/kcp/*.c")
    add_includedirs("src/qos/kcp", {public = true})
 target("transmission")
    set_kind("static")
@@ -72,9 +87,9 @@ target("transmission")
 target("pc")
    set_kind("static")
    add_deps("log")
-    add_deps("ws", "ice", "transmission", "inih", "common")
+    add_deps("ws", "ice", "transmission", "inih", "common", "media")
    add_files("src/pc/*.cpp")
-    add_packages("asio", "nlohmann_json")
+    add_packages("asio", "nlohmann_json", "cuda")
    add_includedirs("src/transmission", {public = true})
 target("projectx")
@@ -82,7 +97,7 @@ target("projectx")
    add_deps("log")
    add_deps("pc")
    add_files("src/rtc/*.cpp")
-    add_packages("asio", "nlohmann_json")
+    add_packages("asio", "nlohmann_json", "cuda")
    add_includedirs("src/rtc", "src/pc", "src/interface")
    add_rules("utils.symbols.export_all", {export_classes = true})
    -- set_policy("build.merge_archive", true)