Use kcp as QoS module

2025-12-18 04:56:45 +08:00 · 2023-08-30 17:44:22 +08:00
parent a4cd77dcb0
commit 3c1f7973d0
79 changed files with 14442 additions and 3150 deletions
--- a/src/interface/x.h
+++ b/src/interface/x.h
@@ -5,6 +5,7 @@
 #include <stdlib.h>

 enum ws_status { WS_CONNECTING = 0, WS_OPEN, WS_FAILED, WS_CLOSED, WS_UNKNOWN };
+enum DATA_TYPE { VIDEO = 0, AUDIO, USER };

 #ifdef __cplusplus
 extern "C" {
@@ -30,7 +31,8 @@ int CreateConnection(PeerPtr* peer_ptr, const char* transmission_id,
 int JoinConnection(PeerPtr* peer_ptr, const char* transmission_id,
                   const char* user_id);

-int SendData(PeerPtr* peer_ptr, const char* data, size_t size);
+int SendData(PeerPtr* peer_ptr, DATA_TYPE data_type, const char* data,
+             size_t size);

 #ifdef __cplusplus
 }
--- a/src/media/video/decode/nvcodec/NvDecoder.cpp
+++ b/src/media/video/decode/nvcodec/NvDecoder.cpp
@@ -0,0 +1,774 @@
+/*
+ * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#include "NvDecoder.h"
+
+#include <algorithm>
+#include <chrono>
+#include <cmath>
+#include <iostream>
+
+#include "nvcuvid.h"
+
+#define START_TIMER auto start = std::chrono::high_resolution_clock::now();
+#define STOP_TIMER(print_message)                                     \
+  std::cout << print_message                                          \
+            << std::chrono::duration_cast<std::chrono::milliseconds>( \
+                   std::chrono::high_resolution_clock::now() - start) \
+                   .count()                                           \
+            << " ms " << std::endl;
+
+#define CUDA_DRVAPI_CALL(call)                                      \
+  do {                                                              \
+    CUresult err__ = call;                                          \
+    if (err__ != CUDA_SUCCESS) {                                    \
+      const char *szErrName = NULL;                                 \
+      cuGetErrorName(err__, &szErrName);                            \
+      std::ostringstream errorLog;                                  \
+      errorLog << "CUDA driver API error " << szErrName;            \
+      throw NVDECException::makeNVDECException(                     \
+          errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__); \
+    }                                                               \
+  } while (0)
+
+static const char *GetVideoCodecString(cudaVideoCodec eCodec) {
+  static struct {
+    cudaVideoCodec eCodec;
+    const char *name;
+  } aCodecName[] = {
+      {cudaVideoCodec_MPEG1, "MPEG-1"},
+      {cudaVideoCodec_MPEG2, "MPEG-2"},
+      {cudaVideoCodec_MPEG4, "MPEG-4 (ASP)"},
+      {cudaVideoCodec_VC1, "VC-1/WMV"},
+      {cudaVideoCodec_H264, "AVC/H.264"},
+      {cudaVideoCodec_JPEG, "M-JPEG"},
+      {cudaVideoCodec_H264_SVC, "H.264/SVC"},
+      {cudaVideoCodec_H264_MVC, "H.264/MVC"},
+      {cudaVideoCodec_HEVC, "H.265/HEVC"},
+      {cudaVideoCodec_VP8, "VP8"},
+      {cudaVideoCodec_VP9, "VP9"},
+      {cudaVideoCodec_AV1, "AV1"},
+      {cudaVideoCodec_NumCodecs, "Invalid"},
+      {cudaVideoCodec_YUV420, "YUV  4:2:0"},
+      {cudaVideoCodec_YV12, "YV12 4:2:0"},
+      {cudaVideoCodec_NV12, "NV12 4:2:0"},
+      {cudaVideoCodec_YUYV, "YUYV 4:2:2"},
+      {cudaVideoCodec_UYVY, "UYVY 4:2:2"},
+  };
+
+  if (eCodec >= 0 && eCodec <= cudaVideoCodec_NumCodecs) {
+    return aCodecName[eCodec].name;
+  }
+  for (int i = cudaVideoCodec_NumCodecs + 1;
+       i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) {
+    if (eCodec == aCodecName[i].eCodec) {
+      return aCodecName[eCodec].name;
+    }
+  }
+  return "Unknown";
+}
+
+static const char *GetVideoChromaFormatString(
+    cudaVideoChromaFormat eChromaFormat) {
+  static struct {
+    cudaVideoChromaFormat eChromaFormat;
+    const char *name;
+  } aChromaFormatName[] = {
+      {cudaVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)"},
+      {cudaVideoChromaFormat_420, "YUV 420"},
+      {cudaVideoChromaFormat_422, "YUV 422"},
+      {cudaVideoChromaFormat_444, "YUV 444"},
+  };
+
+  if (eChromaFormat >= 0 && eChromaFormat < sizeof(aChromaFormatName) /
+                                                sizeof(aChromaFormatName[0])) {
+    return aChromaFormatName[eChromaFormat].name;
+  }
+  return "Unknown";
+}
+
+static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat) {
+  float factor = 0.5;
+  switch (eSurfaceFormat) {
+    case cudaVideoSurfaceFormat_NV12:
+    case cudaVideoSurfaceFormat_P016:
+      factor = 0.5;
+      break;
+    case cudaVideoSurfaceFormat_YUV444:
+    case cudaVideoSurfaceFormat_YUV444_16Bit:
+      factor = 1.0;
+      break;
+  }
+
+  return factor;
+}
+
+static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat) {
+  int numPlane = 1;
+  switch (eSurfaceFormat) {
+    case cudaVideoSurfaceFormat_NV12:
+    case cudaVideoSurfaceFormat_P016:
+      numPlane = 1;
+      break;
+    case cudaVideoSurfaceFormat_YUV444:
+    case cudaVideoSurfaceFormat_YUV444_16Bit:
+      numPlane = 2;
+      break;
+  }
+
+  return numPlane;
+}
+
+/**
+ *   @brief  This function is used to get codec string from codec id
+ */
+const char *NvDecoder::GetCodecString(cudaVideoCodec eCodec) {
+  return GetVideoCodecString(eCodec);
+}
+
+/* Called when the parser encounters sequence header for AV1 SVC content
+ *  return value interpretation:
+ *      < 0 : fail, >=0: succeeded (bit 0-9: currOperatingPoint, bit 10-10:
+ * bDispAllLayer, bit 11-30: reserved, must be set 0)
+ */
+int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) {
+  if (pOPInfo->codec == cudaVideoCodec_AV1) {
+    if (pOPInfo->av1.operating_points_cnt > 1) {
+      // clip has SVC enabled
+      if (m_nOperatingPoint >= pOPInfo->av1.operating_points_cnt)
+        m_nOperatingPoint = 0;
+
+      printf("AV1 SVC clip: operating point count %d  ",
+             pOPInfo->av1.operating_points_cnt);
+      printf("Selected operating point: %d, IDC 0x%x bOutputAllLayers %d\n",
+             m_nOperatingPoint,
+             pOPInfo->av1.operating_points_idc[m_nOperatingPoint],
+             m_bDispAllLayers);
+      return (m_nOperatingPoint | (m_bDispAllLayers << 10));
+    }
+  }
+  return -1;
+}
+
+/* Return value from HandleVideoSequence() are interpreted as   :
+ *  0: fail, 1: succeeded, > 1: override dpb size of parser (set by
+ * CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser)
+ */
+int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
+  START_TIMER
+  m_videoInfo.str("");
+  m_videoInfo.clear();
+  m_videoInfo << "Video Input Information" << std::endl
+              << "\tCodec        : " << GetVideoCodecString(pVideoFormat->codec)
+              << std::endl
+              << "\tFrame rate   : " << pVideoFormat->frame_rate.numerator
+              << "/" << pVideoFormat->frame_rate.denominator << " = "
+              << 1.0 * pVideoFormat->frame_rate.numerator /
+                     pVideoFormat->frame_rate.denominator
+              << " fps" << std::endl
+              << "\tSequence     : "
+              << (pVideoFormat->progressive_sequence ? "Progressive"
+                                                     : "Interlaced")
+              << std::endl
+              << "\tCoded size   : [" << pVideoFormat->coded_width << ", "
+              << pVideoFormat->coded_height << "]" << std::endl
+              << "\tDisplay area : [" << pVideoFormat->display_area.left << ", "
+              << pVideoFormat->display_area.top << ", "
+              << pVideoFormat->display_area.right << ", "
+              << pVideoFormat->display_area.bottom << "]" << std::endl
+              << "\tChroma       : "
+              << GetVideoChromaFormatString(pVideoFormat->chroma_format)
+              << std::endl
+              << "\tBit depth    : " << pVideoFormat->bit_depth_luma_minus8 + 8;
+  m_videoInfo << std::endl;
+
+  int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
+
+  CUVIDDECODECAPS decodecaps;
+  memset(&decodecaps, 0, sizeof(decodecaps));
+
+  decodecaps.eCodecType = pVideoFormat->codec;
+  decodecaps.eChromaFormat = pVideoFormat->chroma_format;
+  decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+
+  if (!decodecaps.bIsSupported) {
+    NVDEC_THROW_ERROR("Codec not supported on this GPU",
+                      CUDA_ERROR_NOT_SUPPORTED);
+    return nDecodeSurface;
+  }
+
+  if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
+      (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
+    std::ostringstream errorString;
+    errorString << std::endl
+                << "Resolution          : " << pVideoFormat->coded_width << "x"
+                << pVideoFormat->coded_height << std::endl
+                << "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x"
+                << decodecaps.nMaxHeight << std::endl
+                << "Resolution not supported on this GPU";
+
+    const std::string cErr = errorString.str();
+    NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
+    return nDecodeSurface;
+  }
+
+  if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
+      decodecaps.nMaxMBCount) {
+    std::ostringstream errorString;
+    errorString << std::endl
+                << "MBCount             : "
+                << (pVideoFormat->coded_width >> 4) *
+                       (pVideoFormat->coded_height >> 4)
+                << std::endl
+                << "Max Supported mbcnt : " << decodecaps.nMaxMBCount
+                << std::endl
+                << "MBCount not supported on this GPU";
+
+    const std::string cErr = errorString.str();
+    NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
+    return nDecodeSurface;
+  }
+
+  if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
+    // cuvidCreateDecoder() has been called before, and now there's possible
+    // config change
+    return ReconfigureDecoder(pVideoFormat);
+  }
+
+  // eCodec has been set in the constructor (for parser). Here it's set again
+  // for potential correction
+  m_eCodec = pVideoFormat->codec;
+  m_eChromaFormat = pVideoFormat->chroma_format;
+  m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+  m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
+
+  // Set the output surface format same as chroma format
+  if (m_eChromaFormat == cudaVideoChromaFormat_420 ||
+      cudaVideoChromaFormat_Monochrome)
+    m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8
+                          ? cudaVideoSurfaceFormat_P016
+                          : cudaVideoSurfaceFormat_NV12;
+  else if (m_eChromaFormat == cudaVideoChromaFormat_444)
+    m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8
+                          ? cudaVideoSurfaceFormat_YUV444_16Bit
+                          : cudaVideoSurfaceFormat_YUV444;
+  else if (m_eChromaFormat == cudaVideoChromaFormat_422)
+    m_eOutputFormat =
+        cudaVideoSurfaceFormat_NV12;  // no 4:2:2 output format supported yet so
+                                      // make 420 default
+
+  // Check if output format supported. If not, check falback options
+  if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) {
+    if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
+      m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
+    else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
+      m_eOutputFormat = cudaVideoSurfaceFormat_P016;
+    else if (decodecaps.nOutputFormatMask &
+             (1 << cudaVideoSurfaceFormat_YUV444))
+      m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
+    else if (decodecaps.nOutputFormatMask &
+             (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
+      m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
+    else
+      NVDEC_THROW_ERROR("No supported output format found",
+                        CUDA_ERROR_NOT_SUPPORTED);
+  }
+  m_videoFormat = *pVideoFormat;
+
+  CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
+  videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
+  videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
+  videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
+  videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+  if (pVideoFormat->progressive_sequence)
+    videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
+  else
+    videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
+  videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
+  // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
+  // NVDEC hardware
+  videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
+  videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
+  videoDecodeCreateInfo.vidLock = m_ctxLock;
+  videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
+  videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
+  // AV1 has max width/height of sequence in sequence header
+  if (pVideoFormat->codec == cudaVideoCodec_AV1 &&
+      pVideoFormat->seqhdr_data_length > 0) {
+    // dont overwrite if it is already set from cmdline or reconfig.txt
+    if (!(m_nMaxWidth > pVideoFormat->coded_width ||
+          m_nMaxHeight > pVideoFormat->coded_height)) {
+      CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
+      m_nMaxWidth = vidFormatEx->av1.max_width;
+      m_nMaxHeight = vidFormatEx->av1.max_height;
+    }
+  }
+  if (m_nMaxWidth < (int)pVideoFormat->coded_width)
+    m_nMaxWidth = pVideoFormat->coded_width;
+  if (m_nMaxHeight < (int)pVideoFormat->coded_height)
+    m_nMaxHeight = pVideoFormat->coded_height;
+  videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
+  videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
+
+  if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
+    m_nWidth =
+        pVideoFormat->display_area.right - pVideoFormat->display_area.left;
+    m_nLumaHeight =
+        pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
+    videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
+    videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
+  } else {
+    if (m_resizeDim.w && m_resizeDim.h) {
+      videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
+      videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
+      videoDecodeCreateInfo.display_area.right =
+          pVideoFormat->display_area.right;
+      videoDecodeCreateInfo.display_area.bottom =
+          pVideoFormat->display_area.bottom;
+      m_nWidth = m_resizeDim.w;
+      m_nLumaHeight = m_resizeDim.h;
+    }
+
+    if (m_cropRect.r && m_cropRect.b) {
+      videoDecodeCreateInfo.display_area.left = m_cropRect.l;
+      videoDecodeCreateInfo.display_area.top = m_cropRect.t;
+      videoDecodeCreateInfo.display_area.right = m_cropRect.r;
+      videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
+      m_nWidth = m_cropRect.r - m_cropRect.l;
+      m_nLumaHeight = m_cropRect.b - m_cropRect.t;
+    }
+    videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
+    videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
+  }
+
+  m_nChromaHeight =
+      (int)(ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
+  m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
+  m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
+  m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
+  m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
+  m_displayRect.t = videoDecodeCreateInfo.display_area.top;
+  m_displayRect.l = videoDecodeCreateInfo.display_area.left;
+  m_displayRect.r = videoDecodeCreateInfo.display_area.right;
+
+  m_videoInfo << "Video Decoding Params:" << std::endl
+              << "\tNum Surfaces : "
+              << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl
+              << "\tCrop         : [" << videoDecodeCreateInfo.display_area.left
+              << ", " << videoDecodeCreateInfo.display_area.top << ", "
+              << videoDecodeCreateInfo.display_area.right << ", "
+              << videoDecodeCreateInfo.display_area.bottom << "]" << std::endl
+              << "\tResize       : " << videoDecodeCreateInfo.ulTargetWidth
+              << "x" << videoDecodeCreateInfo.ulTargetHeight << std::endl
+              << "\tDeinterlace  : "
+              << std::vector<const char *>{
+                     "Weave", "Bob",
+                     "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
+  m_videoInfo << std::endl;
+
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+  STOP_TIMER("Session Initialization Time: ");
+  return nDecodeSurface;
+}
+
+int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
+  if (pVideoFormat->bit_depth_luma_minus8 !=
+          m_videoFormat.bit_depth_luma_minus8 ||
+      pVideoFormat->bit_depth_chroma_minus8 !=
+          m_videoFormat.bit_depth_chroma_minus8) {
+    NVDEC_THROW_ERROR("Reconfigure Not supported for bit depth change",
+                      CUDA_ERROR_NOT_SUPPORTED);
+  }
+
+  if (pVideoFormat->chroma_format != m_videoFormat.chroma_format) {
+    NVDEC_THROW_ERROR("Reconfigure Not supported for chroma format change",
+                      CUDA_ERROR_NOT_SUPPORTED);
+  }
+
+  bool bDecodeResChange =
+      !(pVideoFormat->coded_width == m_videoFormat.coded_width &&
+        pVideoFormat->coded_height == m_videoFormat.coded_height);
+  bool bDisplayRectChange = !(
+      pVideoFormat->display_area.bottom == m_videoFormat.display_area.bottom &&
+      pVideoFormat->display_area.top == m_videoFormat.display_area.top &&
+      pVideoFormat->display_area.left == m_videoFormat.display_area.left &&
+      pVideoFormat->display_area.right == m_videoFormat.display_area.right);
+
+  int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
+
+  if ((pVideoFormat->coded_width > m_nMaxWidth) ||
+      (pVideoFormat->coded_height > m_nMaxHeight)) {
+    // For VP9, let driver  handle the change if new width/height >
+    // maxwidth/maxheight
+    if ((m_eCodec != cudaVideoCodec_VP9) || m_bReconfigExternal) {
+      NVDEC_THROW_ERROR(
+          "Reconfigure Not supported when width/height > maxwidth/maxheight",
+          CUDA_ERROR_NOT_SUPPORTED);
+    }
+    return 1;
+  }
+
+  if (!bDecodeResChange && !m_bReconfigExtPPChange) {
+    // if the coded_width/coded_height hasn't changed but display resolution has
+    // changed, then need to update width/height for correct output without
+    // cropping. Example : 1920x1080 vs 1920x1088
+    if (bDisplayRectChange) {
+      m_nWidth =
+          pVideoFormat->display_area.right - pVideoFormat->display_area.left;
+      m_nLumaHeight =
+          pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
+      m_nChromaHeight =
+          (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
+      m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
+    }
+
+    // no need for reconfigureDecoder(). Just return
+    return 1;
+  }
+
+  CUVIDRECONFIGUREDECODERINFO reconfigParams = {0};
+
+  reconfigParams.ulWidth = m_videoFormat.coded_width =
+      pVideoFormat->coded_width;
+  reconfigParams.ulHeight = m_videoFormat.coded_height =
+      pVideoFormat->coded_height;
+
+  // Dont change display rect and get scaled output from decoder. This will help
+  // display app to present apps smoothly
+  reconfigParams.display_area.bottom = m_displayRect.b;
+  reconfigParams.display_area.top = m_displayRect.t;
+  reconfigParams.display_area.left = m_displayRect.l;
+  reconfigParams.display_area.right = m_displayRect.r;
+  reconfigParams.ulTargetWidth = m_nSurfaceWidth;
+  reconfigParams.ulTargetHeight = m_nSurfaceHeight;
+
+  // If external reconfigure is called along with resolution change even if post
+  // processing params is not changed, do full reconfigure params update
+  if ((m_bReconfigExternal && bDecodeResChange) || m_bReconfigExtPPChange) {
+    // update display rect and target resolution if requested explicitely
+    m_bReconfigExternal = false;
+    m_bReconfigExtPPChange = false;
+    m_videoFormat = *pVideoFormat;
+    if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
+      m_nWidth =
+          pVideoFormat->display_area.right - pVideoFormat->display_area.left;
+      m_nLumaHeight =
+          pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
+      reconfigParams.ulTargetWidth = pVideoFormat->coded_width;
+      reconfigParams.ulTargetHeight = pVideoFormat->coded_height;
+    } else {
+      if (m_resizeDim.w && m_resizeDim.h) {
+        reconfigParams.display_area.left = pVideoFormat->display_area.left;
+        reconfigParams.display_area.top = pVideoFormat->display_area.top;
+        reconfigParams.display_area.right = pVideoFormat->display_area.right;
+        reconfigParams.display_area.bottom = pVideoFormat->display_area.bottom;
+        m_nWidth = m_resizeDim.w;
+        m_nLumaHeight = m_resizeDim.h;
+      }
+
+      if (m_cropRect.r && m_cropRect.b) {
+        reconfigParams.display_area.left = m_cropRect.l;
+        reconfigParams.display_area.top = m_cropRect.t;
+        reconfigParams.display_area.right = m_cropRect.r;
+        reconfigParams.display_area.bottom = m_cropRect.b;
+        m_nWidth = m_cropRect.r - m_cropRect.l;
+        m_nLumaHeight = m_cropRect.b - m_cropRect.t;
+      }
+      reconfigParams.ulTargetWidth = m_nWidth;
+      reconfigParams.ulTargetHeight = m_nLumaHeight;
+    }
+
+    m_nChromaHeight =
+        (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
+    m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
+    m_nSurfaceHeight = reconfigParams.ulTargetHeight;
+    m_nSurfaceWidth = reconfigParams.ulTargetWidth;
+    m_displayRect.b = reconfigParams.display_area.bottom;
+    m_displayRect.t = reconfigParams.display_area.top;
+    m_displayRect.l = reconfigParams.display_area.left;
+    m_displayRect.r = reconfigParams.display_area.right;
+  }
+
+  reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
+
+  START_TIMER
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+  STOP_TIMER("Session Reconfigure Time: ");
+
+  return nDecodeSurface;
+}
+
+int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
+  m_bReconfigExternal = true;
+  m_bReconfigExtPPChange = false;
+  if (pCropRect) {
+    if (!((pCropRect->t == m_cropRect.t) && (pCropRect->l == m_cropRect.l) &&
+          (pCropRect->b == m_cropRect.b) && (pCropRect->r == m_cropRect.r))) {
+      m_bReconfigExtPPChange = true;
+      m_cropRect = *pCropRect;
+    }
+  }
+  if (pResizeDim) {
+    if (!((pResizeDim->w == m_resizeDim.w) &&
+          (pResizeDim->h == m_resizeDim.h))) {
+      m_bReconfigExtPPChange = true;
+      m_resizeDim = *pResizeDim;
+    }
+  }
+
+  // Clear existing output buffers of different size
+  uint8_t *pFrame = NULL;
+  while (!m_vpFrame.empty()) {
+    pFrame = m_vpFrame.back();
+    m_vpFrame.pop_back();
+    if (m_bUseDeviceFrame) {
+      CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+      CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
+      CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+    } else {
+      delete pFrame;
+    }
+  }
+
+  return 1;
+}
+
+/* Return value from HandlePictureDecode() are interpreted as:
+ *  0: fail, >=1: succeeded
+ */
+int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
+  if (!m_hDecoder) {
+    NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED);
+    return false;
+  }
+  m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+  return 1;
+}
+
+/* Return value from HandlePictureDisplay() are interpreted as:
+ *  0: fail, >=1: succeeded
+ */
+int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
+  CUVIDPROCPARAMS videoProcessingParameters = {};
+  videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
+  videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
+  videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
+  videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
+  videoProcessingParameters.output_stream = m_cuvidStream;
+
+  CUdeviceptr dpSrcFrame = 0;
+  unsigned int nSrcPitch = 0;
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
+                                    &dpSrcFrame, &nSrcPitch,
+                                    &videoProcessingParameters));
+
+  CUVIDGETDECODESTATUS DecodeStatus;
+  memset(&DecodeStatus, 0, sizeof(DecodeStatus));
+  CUresult result =
+      cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
+  if (result == CUDA_SUCCESS &&
+      (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
+       DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
+    printf("Decode Error occurred for picture %d\n",
+           m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
+  }
+
+  uint8_t *pDecodedFrame = nullptr;
+  {
+    std::lock_guard<std::mutex> lock(m_mtxVPFrame);
+    if ((unsigned)++m_nDecodedFrame > m_vpFrame.size()) {
+      // Not enough frames in stock
+      m_nFrameAlloc++;
+      uint8_t *pFrame = NULL;
+      if (m_bUseDeviceFrame) {
+        if (m_bDeviceFramePitched) {
+          CUDA_DRVAPI_CALL(cuMemAllocPitch(
+              (CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
+              m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
+        } else {
+          CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
+        }
+      } else {
+        pFrame = new uint8_t[GetFrameSize()];
+      }
+      m_vpFrame.push_back(pFrame);
+    }
+    pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1];
+  }
+
+  // Copy luma plane
+  CUDA_MEMCPY2D m = {0};
+  m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+  m.srcDevice = dpSrcFrame;
+  m.srcPitch = nSrcPitch;
+  m.dstMemoryType =
+      m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
+  m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
+  m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
+  m.WidthInBytes = GetWidth() * m_nBPP;
+  m.Height = m_nLumaHeight;
+  CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
+
+  // Copy chroma plane
+  // NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
+  // height
+  m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
+                              m.srcPitch * ((m_nSurfaceHeight + 1) & ~1));
+  m.dstDevice =
+      (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
+  m.Height = m_nChromaHeight;
+  CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
+
+  if (m_nNumChromaPlanes == 2) {
+    m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
+                                m.srcPitch * ((m_nSurfaceHeight + 1) & ~1) * 2);
+    m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
+                                            m.dstPitch * m_nLumaHeight * 2);
+    m.Height = m_nChromaHeight;
+    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
+  }
+  CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+
+  if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
+    m_vTimestamp.resize(m_vpFrame.size());
+  }
+  m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
+
+  NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
+  return 1;
+}
+
+NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
+                     cudaVideoCodec eCodec, bool bLowLatency,
+                     bool bDeviceFramePitched, const Rect *pCropRect,
+                     const Dim *pResizeDim, int maxWidth, int maxHeight,
+                     unsigned int clkRate)
+    : m_cuContext(cuContext),
+      m_bUseDeviceFrame(bUseDeviceFrame),
+      m_eCodec(eCodec),
+      m_bDeviceFramePitched(bDeviceFramePitched),
+      m_nMaxWidth(maxWidth),
+      m_nMaxHeight(maxHeight) {
+  if (pCropRect) m_cropRect = *pCropRect;
+  if (pResizeDim) m_resizeDim = *pResizeDim;
+
+  NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
+
+  CUVIDPARSERPARAMS videoParserParameters = {};
+  videoParserParameters.CodecType = eCodec;
+  videoParserParameters.ulMaxNumDecodeSurfaces = 1;
+  videoParserParameters.ulClockRate = clkRate;
+  videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
+  videoParserParameters.pUserData = this;
+  videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
+  videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
+  videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
+  videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
+  NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
+}
+
+NvDecoder::~NvDecoder() {
+  START_TIMER
+
+  if (m_hParser) {
+    cuvidDestroyVideoParser(m_hParser);
+  }
+  cuCtxPushCurrent(m_cuContext);
+  if (m_hDecoder) {
+    cuvidDestroyDecoder(m_hDecoder);
+  }
+
+  std::lock_guard<std::mutex> lock(m_mtxVPFrame);
+
+  for (uint8_t *pFrame : m_vpFrame) {
+    if (m_bUseDeviceFrame) {
+      cuMemFree((CUdeviceptr)pFrame);
+    } else {
+      delete[] pFrame;
+    }
+  }
+  cuCtxPopCurrent(NULL);
+
+  cuvidCtxLockDestroy(m_ctxLock);
+
+  STOP_TIMER("Session Deinitialization Time: ");
+}
+
+int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
+                      int64_t nTimestamp) {
+  m_nDecodedFrame = 0;
+  m_nDecodedFrameReturned = 0;
+  CUVIDSOURCEDATAPACKET packet = {0};
+  packet.payload = pData;
+  packet.payload_size = nSize;
+  packet.flags = nFlags | CUVID_PKT_TIMESTAMP;
+  packet.timestamp = nTimestamp;
+  if (!pData || nSize == 0) {
+    packet.flags |= CUVID_PKT_ENDOFSTREAM;
+  }
+  NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
+  m_cuvidStream = 0;
+
+  return m_nDecodedFrame;
+}
+
+uint8_t *NvDecoder::GetFrame(int64_t *pTimestamp) {
+  if (m_nDecodedFrame > 0) {
+    std::lock_guard<std::mutex> lock(m_mtxVPFrame);
+    m_nDecodedFrame--;
+    if (pTimestamp) *pTimestamp = m_vTimestamp[m_nDecodedFrameReturned];
+    return m_vpFrame[m_nDecodedFrameReturned++];
+  }
+
+  return NULL;
+}
+
+uint8_t *NvDecoder::GetLockedFrame(int64_t *pTimestamp) {
+  uint8_t *pFrame;
+  uint64_t timestamp;
+  if (m_nDecodedFrame > 0) {
+    std::lock_guard<std::mutex> lock(m_mtxVPFrame);
+    m_nDecodedFrame--;
+    pFrame = m_vpFrame[0];
+    m_vpFrame.erase(m_vpFrame.begin(), m_vpFrame.begin() + 1);
+
+    timestamp = m_vTimestamp[0];
+    m_vTimestamp.erase(m_vTimestamp.begin(), m_vTimestamp.begin() + 1);
+
+    if (pTimestamp) *pTimestamp = timestamp;
+
+    return pFrame;
+  }
+
+  return NULL;
+}
+
+void NvDecoder::UnlockFrame(uint8_t **pFrame) {
+  std::lock_guard<std::mutex> lock(m_mtxVPFrame);
+  m_vpFrame.insert(m_vpFrame.end(), &pFrame[0], &pFrame[1]);
+
+  // add a dummy entry for timestamp
+  uint64_t timestamp[2] = {0};
+  m_vTimestamp.insert(m_vTimestamp.end(), &timestamp[0], &timestamp[1]);
+}
--- a/src/media/video/decode/nvcodec/NvDecoder.h
+++ b/src/media/video/decode/nvcodec/NvDecoder.h
@@ -0,0 +1,395 @@
+/*
+ * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "Utils/NvCodecUtils.h"
+#include "nvcuvid.h"
+
+/**
+ * @brief Exception class for error reporting from the decode API.
+ */
+class NVDECException : public std::exception {
+ public:
+  NVDECException(const std::string &errorStr, const CUresult errorCode)
+      : m_errorString(errorStr), m_errorCode(errorCode) {}
+
+  virtual ~NVDECException() throw() {}
+  virtual const char *what() const throw() { return m_errorString.c_str(); }
+  CUresult getErrorCode() const { return m_errorCode; }
+  const std::string &getErrorString() const { return m_errorString; }
+  static NVDECException makeNVDECException(const std::string &errorStr,
+                                           const CUresult errorCode,
+                                           const std::string &functionName,
+                                           const std::string &fileName,
+                                           int lineNo);
+
+ private:
+  std::string m_errorString;
+  CUresult m_errorCode;
+};
+
+inline NVDECException NVDECException::makeNVDECException(
+    const std::string &errorStr, const CUresult errorCode,
+    const std::string &functionName, const std::string &fileName, int lineNo) {
+  std::ostringstream errorLog;
+  errorLog << functionName << " : " << errorStr << " at " << fileName << ":"
+           << lineNo << std::endl;
+  NVDECException exception(errorLog.str(), errorCode);
+  return exception;
+}
+
+#define NVDEC_THROW_ERROR(errorStr, errorCode)                  \
+  do {                                                          \
+    throw NVDECException::makeNVDECException(                   \
+        errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
+  } while (0)
+
+#define NVDEC_API_CALL(cuvidAPI)                                        \
+  do {                                                                  \
+    CUresult errorCode = cuvidAPI;                                      \
+    if (errorCode != CUDA_SUCCESS) {                                    \
+      std::ostringstream errorLog;                                      \
+      errorLog << #cuvidAPI << " returned error " << errorCode;         \
+      throw NVDECException::makeNVDECException(                         \
+          errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
+    }                                                                   \
+  } while (0)
+
+struct Rect {
+  int l, t, r, b;
+};
+
+struct Dim {
+  int w, h;
+};
+
+/**
+ * @brief Base class for decoder interface.
+ */
+class NvDecoder {
+ public:
+  /**
+   *  @brief This function is used to initialize the decoder session.
+   *  Application must call this function to initialize the decoder, before
+   *  starting to decode any frames.
+   */
+  NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec,
+            bool bLowLatency = false, bool bDeviceFramePitched = false,
+            const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
+            int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000);
+  ~NvDecoder();
+
+  /**
+   *  @brief  This function is used to get the current CUDA context.
+   */
+  CUcontext GetContext() { return m_cuContext; }
+
+  /**
+   *  @brief  This function is used to get the output frame width.
+   *  NV12/P016 output format width is 2 byte aligned because of U and V
+   * interleave
+   */
+  int GetWidth() {
+    assert(m_nWidth);
+    return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 ||
+            m_eOutputFormat == cudaVideoSurfaceFormat_P016)
+               ? (m_nWidth + 1) & ~1
+               : m_nWidth;
+  }
+
+  /**
+   *  @brief  This function is used to get the actual decode width
+   */
+  int GetDecodeWidth() {
+    assert(m_nWidth);
+    return m_nWidth;
+  }
+
+  /**
+   *  @brief  This function is used to get the output frame height (Luma
+   * height).
+   */
+  int GetHeight() {
+    assert(m_nLumaHeight);
+    return m_nLumaHeight;
+  }
+
+  /**
+   *  @brief  This function is used to get the current chroma height.
+   */
+  int GetChromaHeight() {
+    assert(m_nChromaHeight);
+    return m_nChromaHeight;
+  }
+
+  /**
+   *  @brief  This function is used to get the number of chroma planes.
+   */
+  int GetNumChromaPlanes() {
+    assert(m_nNumChromaPlanes);
+    return m_nNumChromaPlanes;
+  }
+
+  /**
+   *   @brief  This function is used to get the current frame size based on
+   * pixel format.
+   */
+  int GetFrameSize() {
+    assert(m_nWidth);
+    return GetWidth() *
+           (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP;
+  }
+
+  /**
+   *   @brief  This function is used to get the current frame Luma plane size.
+   */
+  int GetLumaPlaneSize() {
+    assert(m_nWidth);
+    return GetWidth() * m_nLumaHeight * m_nBPP;
+  }
+
+  /**
+   *   @brief  This function is used to get the current frame chroma plane size.
+   */
+  int GetChromaPlaneSize() {
+    assert(m_nWidth);
+    return GetWidth() * (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP;
+  }
+
+  /**
+   *  @brief  This function is used to get the pitch of the device buffer
+   * holding the decoded frame.
+   */
+  int GetDeviceFramePitch() {
+    assert(m_nWidth);
+    return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : GetWidth() * m_nBPP;
+  }
+
+  /**
+   *   @brief  This function is used to get the bit depth associated with the
+   * pixel format.
+   */
+  int GetBitDepth() {
+    assert(m_nWidth);
+    return m_nBitDepthMinus8 + 8;
+  }
+
+  /**
+   *   @brief  This function is used to get the bytes used per pixel.
+   */
+  int GetBPP() {
+    assert(m_nWidth);
+    return m_nBPP;
+  }
+
+  /**
+   *   @brief  This function is used to get the YUV chroma format
+   */
+  cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; }
+
+  /**
+   *   @brief  This function is used to get information about the video stream
+   * (codec, display parameters etc)
+   */
+  CUVIDEOFORMAT GetVideoFormatInfo() {
+    assert(m_nWidth);
+    return m_videoFormat;
+  }
+
+  /**
+   *   @brief  This function is used to get codec string from codec id
+   */
+  const char *GetCodecString(cudaVideoCodec eCodec);
+
+  /**
+   *   @brief  This function is used to print information about the video stream
+   */
+  std::string GetVideoInfo() const { return m_videoInfo.str(); }
+
+  /**
+   *   @brief  This function decodes a frame and returns the number of frames
+   * that are available for display. All frames that are available for display
+   * should be read before making a subsequent decode call.
+   *   @param  pData - pointer to the data buffer that is to be decoded
+   *   @param  nSize - size of the data buffer in bytes
+   *   @param  nFlags - CUvideopacketflags for setting decode options
+   *   @param  nTimestamp - presentation timestamp
+   */
+  int Decode(const uint8_t *pData, int nSize, int nFlags = 0,
+             int64_t nTimestamp = 0);
+
+  /**
+   *   @brief  This function returns a decoded frame and timestamp. This
+   * function should be called in a loop for fetching all the frames that are
+   * available for display.
+   */
+  uint8_t *GetFrame(int64_t *pTimestamp = nullptr);
+
+  /**
+   *   @brief  This function decodes a frame and returns the locked frame
+   * buffers This makes the buffers available for use by the application without
+   * the buffers getting overwritten, even if subsequent decode calls are made.
+   * The frame buffers remain locked, until UnlockFrame() is called
+   */
+  uint8_t *GetLockedFrame(int64_t *pTimestamp = nullptr);
+
+  /**
+   *   @brief  This function unlocks the frame buffer and makes the frame
+   * buffers available for write again
+   *   @param  ppFrame - pointer to array of frames that are to be unlocked
+   *   @param  nFrame - number of frames to be unlocked
+   */
+  void UnlockFrame(uint8_t **pFrame);
+
+  /**
+   *   @brief  This function allows app to set decoder reconfig params
+   *   @param  pCropRect - cropping rectangle coordinates
+   *   @param  pResizeDim - width and height of resized output
+   */
+  int setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim);
+
+  /**
+   *   @brief  This function allows app to set operating point for AV1 SVC clips
+   *   @param  opPoint - operating point of an AV1 scalable bitstream
+   *   @param  bDispAllLayers - Output all decoded frames of an AV1 scalable
+   * bitstream
+   */
+  void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) {
+    m_nOperatingPoint = opPoint;
+    m_bDispAllLayers = bDispAllLayers;
+  }
+
+  // start a timer
+  void startTimer() { m_stDecode_time.Start(); }
+
+  // stop the timer
+  double stopTimer() { return m_stDecode_time.Stop(); }
+
+ private:
+  /**
+   *   @brief  Callback function to be registered for getting a callback when
+   * decoding of sequence starts
+   */
+  static int CUDAAPI HandleVideoSequenceProc(void *pUserData,
+                                             CUVIDEOFORMAT *pVideoFormat) {
+    return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
+  }
+
+  /**
+   *   @brief  Callback function to be registered for getting a callback when a
+   * decoded frame is ready to be decoded
+   */
+  static int CUDAAPI HandlePictureDecodeProc(void *pUserData,
+                                             CUVIDPICPARAMS *pPicParams) {
+    return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams);
+  }
+
+  /**
+   *   @brief  Callback function to be registered for getting a callback when a
+   * decoded frame is available for display
+   */
+  static int CUDAAPI HandlePictureDisplayProc(void *pUserData,
+                                              CUVIDPARSERDISPINFO *pDispInfo) {
+    return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo);
+  }
+
+  /**
+   *   @brief  Callback function to be registered for getting a callback to get
+   * operating point when AV1 SVC sequence header start.
+   */
+  static int CUDAAPI
+  HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) {
+    return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo);
+  }
+
+  /**
+  *   @brief  This function gets called when a sequence is ready to be decoded.
+  The function also gets called when there is format change
+  */
+  int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
+
+  /**
+   *   @brief  This function gets called when a picture is ready to be decoded.
+   * cuvidDecodePicture is called from this function to decode the picture
+   */
+  int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
+
+  /**
+  *   @brief  This function gets called after a picture is decoded and available
+  for display. Frames are fetched and stored in internal buffer
+  */
+  int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
+
+  /**
+   *   @brief  This function gets called when AV1 sequence encounter more than
+   * one operating points
+   */
+  int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
+  /**
+   *   @brief  This function reconfigure decoder if there is a change in
+   * sequence params.
+   */
+  int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat);
+
+ private:
+  CUcontext m_cuContext = NULL;
+  CUvideoctxlock m_ctxLock;
+  CUvideoparser m_hParser = NULL;
+  CUvideodecoder m_hDecoder = NULL;
+  bool m_bUseDeviceFrame;
+  // dimension of the output
+  unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
+  unsigned int m_nNumChromaPlanes = 0;
+  // height of the mapped surface
+  int m_nSurfaceHeight = 0;
+  int m_nSurfaceWidth = 0;
+  cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs;
+  cudaVideoChromaFormat m_eChromaFormat = cudaVideoChromaFormat_420;
+  cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
+  int m_nBitDepthMinus8 = 0;
+  int m_nBPP = 1;
+  CUVIDEOFORMAT m_videoFormat = {};
+  Rect m_displayRect = {};
+  // stock of frames
+  std::vector<uint8_t *> m_vpFrame;
+  // timestamps of decoded frames
+  std::vector<int64_t> m_vTimestamp;
+  int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
+  int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32];
+  bool m_bEndDecodeDone = false;
+  std::mutex m_mtxVPFrame;
+  int m_nFrameAlloc = 0;
+  CUstream m_cuvidStream = 0;
+  bool m_bDeviceFramePitched = false;
+  size_t m_nDeviceFramePitch = 0;
+  Rect m_cropRect = {};
+  Dim m_resizeDim = {};
+
+  std::ostringstream m_videoInfo;
+  unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0;
+  bool m_bReconfigExternal = false;
+  bool m_bReconfigExtPPChange = false;
+  StopWatch m_stDecode_time;
+
+  unsigned int m_nOperatingPoint = 0;
+  bool m_bDispAllLayers = false;
+};
--- a/src/media/video/decode/nvcodec/nv_decoder.cpp
+++ b/src/media/video/decode/nvcodec/nv_decoder.cpp
@@ -0,0 +1,65 @@
+#include "nv_decoder.h"
+
+#include "log.h"
+
+VideoDecoder::VideoDecoder() {}
+VideoDecoder::~VideoDecoder() {}
+
+int VideoDecoder::Init() {
+  ck(cuInit(0));
+  int nGpu = 0;
+  int iGpu = 0;
+
+  ck(cuDeviceGetCount(&nGpu));
+  if (nGpu < 1) {
+    return -1;
+  }
+
+  CUdevice cuDevice;
+  cuDeviceGet(&cuDevice, iGpu);
+
+  CUcontext cuContext = NULL;
+  cuCtxCreate(&cuContext, 0, cuDevice);
+  if (!cuContext) {
+    return -1;
+  }
+
+  decoder = new NvDecoder(cuContext, false, cudaVideoCodec_H264, true);
+  return 0;
+}
+
+int VideoDecoder::Decode(const uint8_t *pData, int nSize) {
+  if (!decoder) {
+    return -1;
+  }
+
+  if ((*(pData + 4) & 0x1f) == 0x07) {
+    // LOG_WARN("Receive key frame");
+  }
+
+  int ret = decoder->Decode(pData, nSize);
+  return ret;
+}
+
+int VideoDecoder::GetFrame(uint8_t *yuv_data, uint32_t &width, uint32_t &height,
+                           uint32_t &size) {
+  if (nullptr == decoder) {
+    return -1;
+  }
+  cudaVideoSurfaceFormat format = decoder->GetOutputFormat();
+  if (format == cudaVideoSurfaceFormat_NV12) {
+    uint8_t *data = nullptr;
+    data = decoder->GetFrame();
+    if (data) {
+      yuv_data = data;
+      width = decoder->GetWidth();
+      height = decoder->GetHeight();
+      size = width * height * 3 / 2;
+      return 0;
+
+      return -1;
+    }
+    return -1;
+  }
+  return -1;
+}
--- a/src/media/video/decode/nvcodec/nv_decoder.h
+++ b/src/media/video/decode/nvcodec/nv_decoder.h
@@ -0,0 +1,21 @@
+#ifndef _NV_DECODER_H_
+#define _NV_DECODER_H_
+
+#include "NvDecoder.h"
+
+class VideoDecoder {
+ public:
+  VideoDecoder();
+  ~VideoDecoder();
+
+  int Init();
+  int Decode(const uint8_t* pData, int nSize);
+  int GetFrame(uint8_t* yuv_data, uint32_t& width, uint32_t& height,
+               uint32_t& size);
+
+  NvDecoder* decoder = nullptr;
+  bool get_first_keyframe_ = false;
+  bool skip_frame_ = false;
+};
+
+#endif
--- a/src/media/video/encode/nvcodec/NvEncoder.cpp
+++ b/src/media/video/encode/nvcodec/NvEncoder.cpp
@@ -0,0 +1,909 @@
+/*
+ * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#include "NvEncoder.h"
+
+#ifndef _WIN32
+#include <cstring>
+static inline bool operator==(const GUID &guid1, const GUID &guid2) {
+  return !memcmp(&guid1, &guid2, sizeof(GUID));
+}
+
+static inline bool operator!=(const GUID &guid1, const GUID &guid2) {
+  return !(guid1 == guid2);
+}
+#endif
+
+NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice,
+                     uint32_t nWidth, uint32_t nHeight,
+                     NV_ENC_BUFFER_FORMAT eBufferFormat,
+                     uint32_t nExtraOutputDelay, bool bMotionEstimationOnly,
+                     bool bOutputInVideoMemory)
+    : m_pDevice(pDevice),
+      m_eDeviceType(eDeviceType),
+      m_nWidth(nWidth),
+      m_nHeight(nHeight),
+      m_nMaxEncodeWidth(nWidth),
+      m_nMaxEncodeHeight(nHeight),
+      m_eBufferFormat(eBufferFormat),
+      m_bMotionEstimationOnly(bMotionEstimationOnly),
+      m_bOutputInVideoMemory(bOutputInVideoMemory),
+      m_nExtraOutputDelay(nExtraOutputDelay),
+      m_hEncoder(nullptr) {
+  LoadNvEncApi();
+
+  if (!m_nvenc.nvEncOpenEncodeSession) {
+    m_nEncoderBuffer = 0;
+    NVENC_THROW_ERROR("EncodeAPI not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
+  }
+
+  NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encodeSessionExParams = {
+      NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER};
+  encodeSessionExParams.device = m_pDevice;
+  encodeSessionExParams.deviceType = m_eDeviceType;
+  encodeSessionExParams.apiVersion = NVENCAPI_VERSION;
+  void *hEncoder = NULL;
+  NVENC_API_CALL(
+      m_nvenc.nvEncOpenEncodeSessionEx(&encodeSessionExParams, &hEncoder));
+  m_hEncoder = hEncoder;
+}
+
+void NvEncoder::LoadNvEncApi() {
+  uint32_t version = 0;
+  uint32_t currentVersion =
+      (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
+  NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
+  if (currentVersion > version) {
+    NVENC_THROW_ERROR(
+        "Current Driver Version does not support this NvEncodeAPI version, "
+        "please upgrade driver",
+        NV_ENC_ERR_INVALID_VERSION);
+  }
+
+  m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
+  NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
+}
+
+NvEncoder::~NvEncoder() { DestroyHWEncoder(); }
+
+void NvEncoder::CreateDefaultEncoderParams(
+    NV_ENC_INITIALIZE_PARAMS *pIntializeParams, GUID codecGuid, GUID presetGuid,
+    NV_ENC_TUNING_INFO tuningInfo) {
+  if (!m_hEncoder) {
+    NVENC_THROW_ERROR("Encoder Initialization failed",
+                      NV_ENC_ERR_NO_ENCODE_DEVICE);
+    return;
+  }
+
+  if (pIntializeParams == nullptr ||
+      pIntializeParams->encodeConfig == nullptr) {
+    NVENC_THROW_ERROR(
+        "pInitializeParams and pInitializeParams->encodeConfig can't be NULL",
+        NV_ENC_ERR_INVALID_PTR);
+  }
+
+  memset(pIntializeParams->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
+  auto pEncodeConfig = pIntializeParams->encodeConfig;
+  memset(pIntializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
+  pIntializeParams->encodeConfig = pEncodeConfig;
+
+  pIntializeParams->encodeConfig->version = NV_ENC_CONFIG_VER;
+  pIntializeParams->version = NV_ENC_INITIALIZE_PARAMS_VER;
+
+  pIntializeParams->encodeGUID = codecGuid;
+  pIntializeParams->presetGUID = presetGuid;
+  pIntializeParams->encodeWidth = m_nWidth;
+  pIntializeParams->encodeHeight = m_nHeight;
+  pIntializeParams->darWidth = m_nWidth;
+  pIntializeParams->darHeight = m_nHeight;
+  pIntializeParams->frameRateNum = 30;
+  pIntializeParams->frameRateDen = 1;
+  pIntializeParams->enablePTD = 1;
+  pIntializeParams->reportSliceOffsets = 0;
+  pIntializeParams->enableSubFrameWrite = 0;
+  pIntializeParams->maxEncodeWidth = m_nWidth;
+  pIntializeParams->maxEncodeHeight = m_nHeight;
+  pIntializeParams->enableMEOnlyMode = m_bMotionEstimationOnly;
+  pIntializeParams->enableOutputInVidmem = m_bOutputInVideoMemory;
+#if defined(_WIN32)
+  if (!m_bOutputInVideoMemory) {
+    pIntializeParams->enableEncodeAsync =
+        GetCapabilityValue(codecGuid, NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT);
+  }
+#endif
+
+  NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
+                                       {NV_ENC_CONFIG_VER}};
+  m_nvenc.nvEncGetEncodePresetConfig(m_hEncoder, codecGuid, presetGuid,
+                                     &presetConfig);
+  memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
+         sizeof(NV_ENC_CONFIG));
+  pIntializeParams->encodeConfig->frameIntervalP = 1;
+  pIntializeParams->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
+
+  pIntializeParams->encodeConfig->rcParams.rateControlMode =
+      NV_ENC_PARAMS_RC_CONSTQP;
+
+  if (!m_bMotionEstimationOnly) {
+    pIntializeParams->tuningInfo = tuningInfo;
+    NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
+                                         {NV_ENC_CONFIG_VER}};
+    m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, codecGuid, presetGuid,
+                                         tuningInfo, &presetConfig);
+    memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
+           sizeof(NV_ENC_CONFIG));
+  } else {
+    m_encodeConfig.version = NV_ENC_CONFIG_VER;
+    m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+    m_encodeConfig.rcParams.constQP = {28, 31, 25};
+  }
+
+  if (pIntializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
+    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
+        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
+      pIntializeParams->encodeConfig->encodeCodecConfig.h264Config
+          .chromaFormatIDC = 3;
+    }
+    pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod =
+        pIntializeParams->encodeConfig->gopLength;
+  } else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
+    pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
+        .pixelBitDepthMinus8 =
+        (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
+         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+            ? 2
+            : 0;
+    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
+        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
+      pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
+          .chromaFormatIDC = 3;
+    }
+    pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod =
+        pIntializeParams->encodeConfig->gopLength;
+  }
+
+  return;
+}
+
+void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
+  if (!m_hEncoder) {
+    NVENC_THROW_ERROR("Encoder Initialization failed",
+                      NV_ENC_ERR_NO_ENCODE_DEVICE);
+  }
+
+  if (!pEncoderParams) {
+    NVENC_THROW_ERROR("Invalid NV_ENC_INITIALIZE_PARAMS ptr",
+                      NV_ENC_ERR_INVALID_PTR);
+  }
+
+  if (pEncoderParams->encodeWidth == 0 || pEncoderParams->encodeHeight == 0) {
+    NVENC_THROW_ERROR("Invalid encoder width and height",
+                      NV_ENC_ERR_INVALID_PARAM);
+  }
+
+  if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID &&
+      pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID) {
+    NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM);
+  }
+
+  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
+    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
+        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
+      NVENC_THROW_ERROR("10-bit format isn't supported by H264 encoder",
+                        NV_ENC_ERR_INVALID_PARAM);
+    }
+  }
+
+  // set other necessary params if not set yet
+  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
+    if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) &&
+        (pEncoderParams->encodeConfig->encodeCodecConfig.h264Config
+             .chromaFormatIDC != 3)) {
+      NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+    }
+  }
+
+  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
+    bool yuv10BitFormat =
+        (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
+         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
+            ? true
+            : false;
+    if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig
+                                  .hevcConfig.pixelBitDepthMinus8 != 2) {
+      NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
+         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) &&
+        (pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig
+             .chromaFormatIDC != 3)) {
+      NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+    }
+  }
+
+  memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams));
+  m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
+
+  if (pEncoderParams->encodeConfig) {
+    memcpy(&m_encodeConfig, pEncoderParams->encodeConfig,
+           sizeof(m_encodeConfig));
+    m_encodeConfig.version = NV_ENC_CONFIG_VER;
+  } else {
+    NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
+                                         {NV_ENC_CONFIG_VER}};
+    if (!m_bMotionEstimationOnly) {
+      m_nvenc.nvEncGetEncodePresetConfigEx(
+          m_hEncoder, pEncoderParams->encodeGUID, pEncoderParams->presetGUID,
+          pEncoderParams->tuningInfo, &presetConfig);
+      memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+    } else {
+      m_encodeConfig.version = NV_ENC_CONFIG_VER;
+      m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+      m_encodeConfig.rcParams.constQP = {28, 31, 25};
+    }
+  }
+  m_initializeParams.encodeConfig = &m_encodeConfig;
+
+  NVENC_API_CALL(
+      m_nvenc.nvEncInitializeEncoder(m_hEncoder, &m_initializeParams));
+
+  m_bEncoderInitialized = true;
+  m_nWidth = m_initializeParams.encodeWidth;
+  m_nHeight = m_initializeParams.encodeHeight;
+  m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
+  m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
+
+  m_nEncoderBuffer = m_encodeConfig.frameIntervalP +
+                     m_encodeConfig.rcParams.lookaheadDepth +
+                     m_nExtraOutputDelay;
+  m_nOutputDelay = m_nEncoderBuffer - 1;
+  m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
+
+  if (!m_bOutputInVideoMemory) {
+    m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr);
+  }
+
+#if defined(_WIN32)
+  for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
+    m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
+    NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
+    eventParams.completionEvent = m_vpCompletionEvent[i];
+    m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams);
+  }
+#endif
+
+  if (m_bMotionEstimationOnly) {
+    m_vMappedRefBuffers.resize(m_nEncoderBuffer, nullptr);
+
+    if (!m_bOutputInVideoMemory) {
+      InitializeMVOutputBuffer();
+    }
+  } else {
+    if (!m_bOutputInVideoMemory) {
+      m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr);
+      InitializeBitstreamBuffer();
+    }
+  }
+
+  AllocateInputBuffers(m_nEncoderBuffer);
+}
+
+void NvEncoder::DestroyEncoder() {
+  if (!m_hEncoder) {
+    return;
+  }
+
+  ReleaseInputBuffers();
+
+  DestroyHWEncoder();
+}
+
+void NvEncoder::DestroyHWEncoder() {
+  if (!m_hEncoder) {
+    return;
+  }
+
+#if defined(_WIN32)
+  for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
+    if (m_vpCompletionEvent[i]) {
+      NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
+      eventParams.completionEvent = m_vpCompletionEvent[i];
+      m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams);
+      CloseHandle(m_vpCompletionEvent[i]);
+    }
+  }
+  m_vpCompletionEvent.clear();
+#endif
+
+  if (m_bMotionEstimationOnly) {
+    DestroyMVOutputBuffer();
+  } else {
+    DestroyBitstreamBuffer();
+  }
+
+  m_nvenc.nvEncDestroyEncoder(m_hEncoder);
+
+  m_hEncoder = nullptr;
+
+  m_bEncoderInitialized = false;
+}
+
+const NvEncInputFrame *NvEncoder::GetNextInputFrame() {
+  int i = m_iToSend % m_nEncoderBuffer;
+  return &m_vInputFrames[i];
+}
+
+const NvEncInputFrame *NvEncoder::GetNextReferenceFrame() {
+  int i = m_iToSend % m_nEncoderBuffer;
+  return &m_vReferenceFrames[i];
+}
+
+void NvEncoder::MapResources(uint32_t bfrIdx) {
+  NV_ENC_MAP_INPUT_RESOURCE mapInputResource = {NV_ENC_MAP_INPUT_RESOURCE_VER};
+
+  mapInputResource.registeredResource = m_vRegisteredResources[bfrIdx];
+  NVENC_API_CALL(m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource));
+  m_vMappedInputBuffers[bfrIdx] = mapInputResource.mappedResource;
+
+  if (m_bMotionEstimationOnly) {
+    mapInputResource.registeredResource =
+        m_vRegisteredResourcesForReference[bfrIdx];
+    NVENC_API_CALL(
+        m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource));
+    m_vMappedRefBuffers[bfrIdx] = mapInputResource.mappedResource;
+  }
+}
+
+void NvEncoder::EncodeFrame(std::vector<std::vector<uint8_t>> &vPacket,
+                            NV_ENC_PIC_PARAMS *pPicParams) {
+  vPacket.clear();
+  if (!IsHWEncoderInitialized()) {
+    NVENC_THROW_ERROR("Encoder device not found", NV_ENC_ERR_NO_ENCODE_DEVICE);
+  }
+
+  int bfrIdx = m_iToSend % m_nEncoderBuffer;
+
+  MapResources(bfrIdx);
+
+  NVENCSTATUS nvStatus = DoEncode(m_vMappedInputBuffers[bfrIdx],
+                                  m_vBitstreamOutputBuffer[bfrIdx], pPicParams);
+
+  if (nvStatus == NV_ENC_SUCCESS || nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) {
+    m_iToSend++;
+    GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, true);
+  } else {
+    NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus);
+  }
+}
+
+void NvEncoder::RunMotionEstimation(std::vector<uint8_t> &mvData) {
+  if (!m_hEncoder) {
+    NVENC_THROW_ERROR("Encoder Initialization failed",
+                      NV_ENC_ERR_NO_ENCODE_DEVICE);
+    return;
+  }
+
+  const uint32_t bfrIdx = m_iToSend % m_nEncoderBuffer;
+
+  MapResources(bfrIdx);
+
+  NVENCSTATUS nvStatus = DoMotionEstimation(m_vMappedInputBuffers[bfrIdx],
+                                            m_vMappedRefBuffers[bfrIdx],
+                                            m_vMVDataOutputBuffer[bfrIdx]);
+
+  if (nvStatus == NV_ENC_SUCCESS) {
+    m_iToSend++;
+    std::vector<std::vector<uint8_t>> vPacket;
+    GetEncodedPacket(m_vMVDataOutputBuffer, vPacket, true);
+    if (vPacket.size() != 1) {
+      NVENC_THROW_ERROR(
+          "GetEncodedPacket() doesn't return one (and only one) MVData",
+          NV_ENC_ERR_GENERIC);
+    }
+    mvData = vPacket[0];
+  } else {
+    NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus);
+  }
+}
+
+void NvEncoder::GetSequenceParams(std::vector<uint8_t> &seqParams) {
+  uint8_t spsppsData[1024];  // Assume maximum spspps data is 1KB or less
+  memset(spsppsData, 0, sizeof(spsppsData));
+  NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER};
+  uint32_t spsppsSize = 0;
+
+  payload.spsppsBuffer = spsppsData;
+  payload.inBufferSize = sizeof(spsppsData);
+  payload.outSPSPPSPayloadSize = &spsppsSize;
+  NVENC_API_CALL(m_nvenc.nvEncGetSequenceParams(m_hEncoder, &payload));
+  seqParams.clear();
+  seqParams.insert(seqParams.end(), &spsppsData[0], &spsppsData[spsppsSize]);
+}
+
+NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer,
+                                NV_ENC_OUTPUT_PTR outputBuffer,
+                                NV_ENC_PIC_PARAMS *pPicParams) {
+  NV_ENC_PIC_PARAMS picParams = {};
+  if (pPicParams) {
+    picParams = *pPicParams;
+  }
+  picParams.version = NV_ENC_PIC_PARAMS_VER;
+  picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+  picParams.inputBuffer = inputBuffer;
+  picParams.bufferFmt = GetPixelFormat();
+  picParams.inputWidth = GetEncodeWidth();
+  picParams.inputHeight = GetEncodeHeight();
+  picParams.outputBitstream = outputBuffer;
+  picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+  NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams);
+
+  return nvStatus;
+}
+
+void NvEncoder::SendEOS() {
+  NV_ENC_PIC_PARAMS picParams = {NV_ENC_PIC_PARAMS_VER};
+  picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+  picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+  NVENC_API_CALL(m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams));
+}
+
+void NvEncoder::EndEncode(std::vector<std::vector<uint8_t>> &vPacket) {
+  vPacket.clear();
+  if (!IsHWEncoderInitialized()) {
+    NVENC_THROW_ERROR("Encoder device not initialized",
+                      NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+  }
+
+  SendEOS();
+
+  GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, false);
+}
+
+void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR> &vOutputBuffer,
+                                 std::vector<std::vector<uint8_t>> &vPacket,
+                                 bool bOutputDelay) {
+  unsigned i = 0;
+  int iEnd = bOutputDelay ? m_iToSend - m_nOutputDelay : m_iToSend;
+  for (; m_iGot < iEnd; m_iGot++) {
+    WaitForCompletionEvent(m_iGot % m_nEncoderBuffer);
+    NV_ENC_LOCK_BITSTREAM lockBitstreamData = {NV_ENC_LOCK_BITSTREAM_VER};
+    lockBitstreamData.outputBitstream =
+        vOutputBuffer[m_iGot % m_nEncoderBuffer];
+    lockBitstreamData.doNotWait = false;
+    NVENC_API_CALL(m_nvenc.nvEncLockBitstream(m_hEncoder, &lockBitstreamData));
+
+    uint8_t *pData = (uint8_t *)lockBitstreamData.bitstreamBufferPtr;
+    if (vPacket.size() < i + 1) {
+      vPacket.push_back(std::vector<uint8_t>());
+    }
+    vPacket[i].clear();
+    vPacket[i].insert(vPacket[i].end(), &pData[0],
+                      &pData[lockBitstreamData.bitstreamSizeInBytes]);
+    i++;
+
+    NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream(
+        m_hEncoder, lockBitstreamData.outputBitstream));
+
+    if (m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]) {
+      NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource(
+          m_hEncoder, m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]));
+      m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer] = nullptr;
+    }
+
+    if (m_bMotionEstimationOnly &&
+        m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer]) {
+      NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource(
+          m_hEncoder, m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer]));
+      m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer] = nullptr;
+    }
+  }
+}
+
+bool NvEncoder::Reconfigure(
+    const NV_ENC_RECONFIGURE_PARAMS *pReconfigureParams) {
+  NVENC_API_CALL(m_nvenc.nvEncReconfigureEncoder(
+      m_hEncoder, const_cast<NV_ENC_RECONFIGURE_PARAMS *>(pReconfigureParams)));
+
+  memcpy(&m_initializeParams, &(pReconfigureParams->reInitEncodeParams),
+         sizeof(m_initializeParams));
+  if (pReconfigureParams->reInitEncodeParams.encodeConfig) {
+    memcpy(&m_encodeConfig, pReconfigureParams->reInitEncodeParams.encodeConfig,
+           sizeof(m_encodeConfig));
+  }
+
+  m_nWidth = m_initializeParams.encodeWidth;
+  m_nHeight = m_initializeParams.encodeHeight;
+  m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth;
+  m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight;
+
+  return true;
+}
+
+NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(
+    void *pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
+    int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
+    NV_ENC_BUFFER_USAGE bufferUsage) {
+  NV_ENC_REGISTER_RESOURCE registerResource = {NV_ENC_REGISTER_RESOURCE_VER};
+  registerResource.resourceType = eResourceType;
+  registerResource.resourceToRegister = pBuffer;
+  registerResource.width = width;
+  registerResource.height = height;
+  registerResource.pitch = pitch;
+  registerResource.bufferFormat = bufferFormat;
+  registerResource.bufferUsage = bufferUsage;
+  NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, &registerResource));
+
+  return registerResource.registeredResource;
+}
+
+void NvEncoder::RegisterInputResources(std::vector<void *> inputframes,
+                                       NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+                                       int width, int height, int pitch,
+                                       NV_ENC_BUFFER_FORMAT bufferFormat,
+                                       bool bReferenceFrame) {
+  for (uint32_t i = 0; i < inputframes.size(); ++i) {
+    NV_ENC_REGISTERED_PTR registeredPtr =
+        RegisterResource(inputframes[i], eResourceType, width, height, pitch,
+                         bufferFormat, NV_ENC_INPUT_IMAGE);
+
+    std::vector<uint32_t> _chromaOffsets;
+    NvEncoder::GetChromaSubPlaneOffsets(bufferFormat, pitch, height,
+                                        _chromaOffsets);
+    NvEncInputFrame inputframe = {};
+    inputframe.inputPtr = (void *)inputframes[i];
+    inputframe.chromaOffsets[0] = 0;
+    inputframe.chromaOffsets[1] = 0;
+    for (uint32_t ch = 0; ch < _chromaOffsets.size(); ch++) {
+      inputframe.chromaOffsets[ch] = _chromaOffsets[ch];
+    }
+    inputframe.numChromaPlanes = NvEncoder::GetNumChromaPlanes(bufferFormat);
+    inputframe.pitch = pitch;
+    inputframe.chromaPitch = NvEncoder::GetChromaPitch(bufferFormat, pitch);
+    inputframe.bufferFormat = bufferFormat;
+    inputframe.resourceType = eResourceType;
+
+    if (bReferenceFrame) {
+      m_vRegisteredResourcesForReference.push_back(registeredPtr);
+      m_vReferenceFrames.push_back(inputframe);
+    } else {
+      m_vRegisteredResources.push_back(registeredPtr);
+      m_vInputFrames.push_back(inputframe);
+    }
+  }
+}
+
+void NvEncoder::FlushEncoder() {
+  if (!m_bMotionEstimationOnly && !m_bOutputInVideoMemory) {
+    // Incase of error it is possible for buffers still mapped to encoder.
+    // flush the encoder queue and then unmapped it if any surface is still
+    // mapped
+    try {
+      std::vector<std::vector<uint8_t>> vPacket;
+      EndEncode(vPacket);
+    } catch (...) {
+    }
+  }
+}
+
+void NvEncoder::UnregisterInputResources() {
+  FlushEncoder();
+
+  if (m_bMotionEstimationOnly) {
+    for (uint32_t i = 0; i < m_vMappedRefBuffers.size(); ++i) {
+      if (m_vMappedRefBuffers[i]) {
+        m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedRefBuffers[i]);
+      }
+    }
+  }
+  m_vMappedRefBuffers.clear();
+
+  for (uint32_t i = 0; i < m_vMappedInputBuffers.size(); ++i) {
+    if (m_vMappedInputBuffers[i]) {
+      m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[i]);
+    }
+  }
+  m_vMappedInputBuffers.clear();
+
+  for (uint32_t i = 0; i < m_vRegisteredResources.size(); ++i) {
+    if (m_vRegisteredResources[i]) {
+      m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResources[i]);
+    }
+  }
+  m_vRegisteredResources.clear();
+
+  for (uint32_t i = 0; i < m_vRegisteredResourcesForReference.size(); ++i) {
+    if (m_vRegisteredResourcesForReference[i]) {
+      m_nvenc.nvEncUnregisterResource(m_hEncoder,
+                                      m_vRegisteredResourcesForReference[i]);
+    }
+  }
+  m_vRegisteredResourcesForReference.clear();
+}
+
+void NvEncoder::WaitForCompletionEvent(int iEvent) {
+#if defined(_WIN32)
+  // Check if we are in async mode. If not, don't wait for event;
+  NV_ENC_CONFIG sEncodeConfig = {0};
+  NV_ENC_INITIALIZE_PARAMS sInitializeParams = {0};
+  sInitializeParams.encodeConfig = &sEncodeConfig;
+  GetInitializeParams(&sInitializeParams);
+
+  if (0U == sInitializeParams.enableEncodeAsync) {
+    return;
+  }
+#ifdef DEBUG
+  WaitForSingleObject(m_vpCompletionEvent[iEvent], INFINITE);
+#else
+  // wait for 20s which is infinite on terms of gpu time
+  if (WaitForSingleObject(m_vpCompletionEvent[iEvent], 20000) == WAIT_FAILED) {
+    NVENC_THROW_ERROR("Failed to encode frame", NV_ENC_ERR_GENERIC);
+  }
+#endif
+#endif
+}
+
+uint32_t NvEncoder::GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                    const uint32_t width) {
+  switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+      return width;
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      return width * 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return width * 4;
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return 0;
+  }
+}
+
+uint32_t NvEncoder::GetNumChromaPlanes(
+    const NV_ENC_BUFFER_FORMAT bufferFormat) {
+  switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+      return 1;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      return 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return 0;
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return -1;
+  }
+}
+
+uint32_t NvEncoder::GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                   const uint32_t lumaPitch) {
+  switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      return lumaPitch;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+      return (lumaPitch + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return 0;
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return -1;
+  }
+}
+
+void NvEncoder::GetChromaSubPlaneOffsets(
+    const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t pitch,
+    const uint32_t height, std::vector<uint32_t> &chromaOffsets) {
+  chromaOffsets.clear();
+  switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+      chromaOffsets.push_back(pitch * height);
+      return;
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+      chromaOffsets.push_back(pitch * height);
+      chromaOffsets.push_back(chromaOffsets[0] +
+                              (NvEncoder::GetChromaPitch(bufferFormat, pitch) *
+                               GetChromaHeight(bufferFormat, height)));
+      return;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      chromaOffsets.push_back(pitch * height);
+      chromaOffsets.push_back(chromaOffsets[0] + (pitch * height));
+      return;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return;
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return;
+  }
+}
+
+uint32_t NvEncoder::GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                    const uint32_t lumaHeight) {
+  switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_NV12:
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+      return (lumaHeight + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      return lumaHeight;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return 0;
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return 0;
+  }
+}
+
+uint32_t NvEncoder::GetChromaWidthInBytes(
+    const NV_ENC_BUFFER_FORMAT bufferFormat, const uint32_t lumaWidth) {
+  switch (bufferFormat) {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+      return (lumaWidth + 1) / 2;
+    case NV_ENC_BUFFER_FORMAT_NV12:
+      return lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+      return 2 * lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+      return lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      return 2 * lumaWidth;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return 0;
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return 0;
+  }
+}
+
+int NvEncoder::GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery) {
+  if (!m_hEncoder) {
+    return 0;
+  }
+  NV_ENC_CAPS_PARAM capsParam = {NV_ENC_CAPS_PARAM_VER};
+  capsParam.capsToQuery = capsToQuery;
+  int v;
+  m_nvenc.nvEncGetEncodeCaps(m_hEncoder, guidCodec, &capsParam, &v);
+  return v;
+}
+
+int NvEncoder::GetFrameSize() const {
+  switch (GetPixelFormat()) {
+    case NV_ENC_BUFFER_FORMAT_YV12:
+    case NV_ENC_BUFFER_FORMAT_IYUV:
+    case NV_ENC_BUFFER_FORMAT_NV12:
+      return GetEncodeWidth() *
+             (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
+    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:
+      return 2 * GetEncodeWidth() *
+             (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2);
+    case NV_ENC_BUFFER_FORMAT_YUV444:
+      return GetEncodeWidth() * GetEncodeHeight() * 3;
+    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:
+      return 2 * GetEncodeWidth() * GetEncodeHeight() * 3;
+    case NV_ENC_BUFFER_FORMAT_ARGB:
+    case NV_ENC_BUFFER_FORMAT_ARGB10:
+    case NV_ENC_BUFFER_FORMAT_AYUV:
+    case NV_ENC_BUFFER_FORMAT_ABGR:
+    case NV_ENC_BUFFER_FORMAT_ABGR10:
+      return 4 * GetEncodeWidth() * GetEncodeHeight();
+    default:
+      NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM);
+      return 0;
+  }
+}
+
+void NvEncoder::GetInitializeParams(
+    NV_ENC_INITIALIZE_PARAMS *pInitializeParams) {
+  if (!pInitializeParams || !pInitializeParams->encodeConfig) {
+    NVENC_THROW_ERROR(
+        "Both pInitializeParams and pInitializeParams->encodeConfig can't be "
+        "NULL",
+        NV_ENC_ERR_INVALID_PTR);
+  }
+  NV_ENC_CONFIG *pEncodeConfig = pInitializeParams->encodeConfig;
+  *pEncodeConfig = m_encodeConfig;
+  *pInitializeParams = m_initializeParams;
+  pInitializeParams->encodeConfig = pEncodeConfig;
+}
+
+void NvEncoder::InitializeBitstreamBuffer() {
+  for (int i = 0; i < m_nEncoderBuffer; i++) {
+    NV_ENC_CREATE_BITSTREAM_BUFFER createBitstreamBuffer = {
+        NV_ENC_CREATE_BITSTREAM_BUFFER_VER};
+    NVENC_API_CALL(
+        m_nvenc.nvEncCreateBitstreamBuffer(m_hEncoder, &createBitstreamBuffer));
+    m_vBitstreamOutputBuffer[i] = createBitstreamBuffer.bitstreamBuffer;
+  }
+}
+
+void NvEncoder::DestroyBitstreamBuffer() {
+  for (uint32_t i = 0; i < m_vBitstreamOutputBuffer.size(); i++) {
+    if (m_vBitstreamOutputBuffer[i]) {
+      m_nvenc.nvEncDestroyBitstreamBuffer(m_hEncoder,
+                                          m_vBitstreamOutputBuffer[i]);
+    }
+  }
+
+  m_vBitstreamOutputBuffer.clear();
+}
+
+void NvEncoder::InitializeMVOutputBuffer() {
+  for (int i = 0; i < m_nEncoderBuffer; i++) {
+    NV_ENC_CREATE_MV_BUFFER createMVBuffer = {NV_ENC_CREATE_MV_BUFFER_VER};
+    NVENC_API_CALL(m_nvenc.nvEncCreateMVBuffer(m_hEncoder, &createMVBuffer));
+    m_vMVDataOutputBuffer.push_back(createMVBuffer.mvBuffer);
+  }
+}
+
+void NvEncoder::DestroyMVOutputBuffer() {
+  for (uint32_t i = 0; i < m_vMVDataOutputBuffer.size(); i++) {
+    if (m_vMVDataOutputBuffer[i]) {
+      m_nvenc.nvEncDestroyMVBuffer(m_hEncoder, m_vMVDataOutputBuffer[i]);
+    }
+  }
+
+  m_vMVDataOutputBuffer.clear();
+}
+
+NVENCSTATUS NvEncoder::DoMotionEstimation(
+    NV_ENC_INPUT_PTR inputBuffer, NV_ENC_INPUT_PTR inputBufferForReference,
+    NV_ENC_OUTPUT_PTR outputBuffer) {
+  NV_ENC_MEONLY_PARAMS meParams = {NV_ENC_MEONLY_PARAMS_VER};
+  meParams.inputBuffer = inputBuffer;
+  meParams.referenceFrame = inputBufferForReference;
+  meParams.inputWidth = GetEncodeWidth();
+  meParams.inputHeight = GetEncodeHeight();
+  meParams.mvBuffer = outputBuffer;
+  meParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
+  NVENCSTATUS nvStatus =
+      m_nvenc.nvEncRunMotionEstimationOnly(m_hEncoder, &meParams);
+
+  return nvStatus;
+}
--- a/src/media/video/encode/nvcodec/NvEncoder.h
+++ b/src/media/video/encode/nvcodec/NvEncoder.h
@@ -0,0 +1,482 @@
+/*
+ * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <string.h>
+
+#include <iostream>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "nvEncodeAPI.h"
+
+/**
+ * @brief Exception class for error reporting from NvEncodeAPI calls.
+ */
+class NVENCException : public std::exception {
+ public:
+  NVENCException(const std::string& errorStr, const NVENCSTATUS errorCode)
+      : m_errorString(errorStr), m_errorCode(errorCode) {}
+
+  virtual ~NVENCException() throw() {}
+  virtual const char* what() const throw() { return m_errorString.c_str(); }
+  NVENCSTATUS getErrorCode() const { return m_errorCode; }
+  const std::string& getErrorString() const { return m_errorString; }
+  static NVENCException makeNVENCException(const std::string& errorStr,
+                                           const NVENCSTATUS errorCode,
+                                           const std::string& functionName,
+                                           const std::string& fileName,
+                                           int lineNo);
+
+ private:
+  std::string m_errorString;
+  NVENCSTATUS m_errorCode;
+};
+
+inline NVENCException NVENCException::makeNVENCException(
+    const std::string& errorStr, const NVENCSTATUS errorCode,
+    const std::string& functionName, const std::string& fileName, int lineNo) {
+  std::ostringstream errorLog;
+  errorLog << functionName << " : " << errorStr << " at " << fileName << ":"
+           << lineNo << std::endl;
+  NVENCException exception(errorLog.str(), errorCode);
+  return exception;
+}
+
+#define NVENC_THROW_ERROR(errorStr, errorCode)                  \
+  do {                                                          \
+    throw NVENCException::makeNVENCException(                   \
+        errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
+  } while (0)
+
+#define NVENC_API_CALL(nvencAPI)                                        \
+  do {                                                                  \
+    NVENCSTATUS errorCode = nvencAPI;                                   \
+    if (errorCode != NV_ENC_SUCCESS) {                                  \
+      std::ostringstream errorLog;                                      \
+      errorLog << #nvencAPI << " returned error " << errorCode;         \
+      throw NVENCException::makeNVENCException(                         \
+          errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
+    }                                                                   \
+  } while (0)
+
+struct NvEncInputFrame {
+  void* inputPtr = nullptr;
+  uint32_t chromaOffsets[2];
+  uint32_t numChromaPlanes;
+  uint32_t pitch;
+  uint32_t chromaPitch;
+  NV_ENC_BUFFER_FORMAT bufferFormat;
+  NV_ENC_INPUT_RESOURCE_TYPE resourceType;
+};
+
+/**
+ * @brief Shared base class for different encoder interfaces.
+ */
+class NvEncoder {
+ public:
+  /**
+   *  @brief This function is used to initialize the encoder session.
+   *  Application must call this function to initialize the encoder, before
+   *  starting to encode any frames.
+   */
+  void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
+
+  /**
+   *  @brief  This function is used to destroy the encoder session.
+   *  Application must call this function to destroy the encoder session and
+   *  clean up any allocated resources. The application must call EndEncode()
+   *  function to get any queued encoded frames before calling DestroyEncoder().
+   */
+  void DestroyEncoder();
+
+  /**
+   *  @brief  This function is used to reconfigure an existing encoder session.
+   *  Application can use this function to dynamically change the bitrate,
+   *  resolution and other QOS parameters. If the application changes the
+   *  resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR.
+   */
+  bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams);
+
+  /**
+   *  @brief  This function is used to get the next available input buffer.
+   *  Applications must call this function to obtain a pointer to the next
+   *  input buffer. The application must copy the uncompressed data to the
+   *  input buffer and then call EncodeFrame() function to encode it.
+   */
+  const NvEncInputFrame* GetNextInputFrame();
+
+  /**
+   *  @brief  This function is used to encode a frame.
+   *  Applications must call EncodeFrame() function to encode the uncompressed
+   *  data, which has been copied to an input buffer obtained from the
+   *  GetNextInputFrame() function.
+   */
+  void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket,
+                   NV_ENC_PIC_PARAMS* pPicParams = nullptr);
+
+  /**
+   *  @brief  This function to flush the encoder queue.
+   *  The encoder might be queuing frames for B picture encoding or lookahead;
+   *  the application must call EndEncode() to get all the queued encoded frames
+   *  from the encoder. The application must call this function before
+   * destroying an encoder session.
+   */
+  void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
+
+  /**
+   *  @brief  This function is used to query hardware encoder capabilities.
+   *  Applications can call this function to query capabilities like maximum
+   * encode dimensions, support for lookahead or the ME-only mode etc.
+   */
+  int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery);
+
+  /**
+   *  @brief  This function is used to get the current device on which encoder
+   * is running.
+   */
+  void* GetDevice() const { return m_pDevice; }
+
+  /**
+   *  @brief  This function is used to get the current device type which encoder
+   * is running.
+   */
+  NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; }
+
+  /**
+   *  @brief  This function is used to get the current encode width.
+   *  The encode width can be modified by Reconfigure() function.
+   */
+  int GetEncodeWidth() const { return m_nWidth; }
+
+  /**
+   *  @brief  This function is used to get the current encode height.
+   *  The encode height can be modified by Reconfigure() function.
+   */
+  int GetEncodeHeight() const { return m_nHeight; }
+
+  /**
+   *   @brief  This function is used to get the current frame size based on
+   * pixel format.
+   */
+  int GetFrameSize() const;
+
+  /**
+   *  @brief  This function is used to initialize config parameters based on
+   *          given codec and preset guids.
+   *  The application can call this function to get the default configuration
+   *  for a certain preset. The application can either use these parameters
+   *  directly or override them with application-specific settings before
+   *  using them in CreateEncoder() function.
+   */
+  void CreateDefaultEncoderParams(
+      NV_ENC_INITIALIZE_PARAMS* pIntializeParams, GUID codecGuid,
+      GUID presetGuid,
+      NV_ENC_TUNING_INFO tuningInfo = NV_ENC_TUNING_INFO_UNDEFINED);
+
+  /**
+   *  @brief  This function is used to get the current initialization
+   * parameters, which had been used to configure the encoder session. The
+   * initialization parameters are modified if the application calls
+   *  Reconfigure() function.
+   */
+  void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams);
+
+  /**
+   *  @brief  This function is used to run motion estimation
+   *  This is used to run motion estimation on a a pair of frames. The
+   *  application must copy the reference frame data to the buffer obtained
+   *  by calling GetNextReferenceFrame(), and copy the input frame data to
+   *  the buffer obtained by calling GetNextInputFrame() before calling the
+   *  RunMotionEstimation() function.
+   */
+  void RunMotionEstimation(std::vector<uint8_t>& mvData);
+
+  /**
+   *  @brief This function is used to get an available reference frame.
+   *  Application must call this function to get a pointer to reference buffer,
+   *  to be used in the subsequent RunMotionEstimation() function.
+   */
+  const NvEncInputFrame* GetNextReferenceFrame();
+
+  /**
+   *  @brief This function is used to get sequence and picture parameter
+   * headers. Application can call this function after encoder is initialized to
+   * get SPS and PPS nalus for the current encoder instance. The sequence header
+   * data might change when application calls Reconfigure() function.
+   */
+  void GetSequenceParams(std::vector<uint8_t>& seqParams);
+
+  /**
+   *  @brief  NvEncoder class virtual destructor.
+   */
+  virtual ~NvEncoder();
+
+ public:
+  /**
+   *  @brief This a static function to get chroma offsets for YUV planar
+   * formats.
+   */
+  static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                       const uint32_t pitch,
+                                       const uint32_t height,
+                                       std::vector<uint32_t>& chromaOffsets);
+  /**
+   *  @brief This a static function to get the chroma plane pitch for YUV planar
+   * formats.
+   */
+  static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                 const uint32_t lumaPitch);
+
+  /**
+   *  @brief This a static function to get the number of chroma planes for YUV
+   * planar formats.
+   */
+  static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat);
+
+  /**
+   *  @brief This a static function to get the chroma plane width in bytes for
+   * YUV planar formats.
+   */
+  static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                        const uint32_t lumaWidth);
+
+  /**
+   *  @brief This a static function to get the chroma planes height in bytes for
+   * YUV planar formats.
+   */
+  static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                  const uint32_t lumaHeight);
+
+  /**
+   *  @brief This a static function to get the width in bytes for the frame.
+   *  For YUV planar format this is the width in bytes of the luma plane.
+   */
+  static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat,
+                                  const uint32_t width);
+
+  /**
+   *  @brief This function returns the number of allocated buffers.
+   */
+  uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }
+
+ protected:
+  /**
+   *  @brief  NvEncoder class constructor.
+   *  NvEncoder class constructor cannot be called directly by the application.
+   */
+  NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth,
+            uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+            uint32_t nOutputDelay, bool bMotionEstimationOnly,
+            bool bOutputInVideoMemory = false);
+
+  /**
+   *  @brief This function is used to check if hardware encoder is properly
+   * initialized.
+   */
+  bool IsHWEncoderInitialized() const {
+    return m_hEncoder != NULL && m_bEncoderInitialized;
+  }
+
+  /**
+   *  @brief This function is used to register CUDA, D3D or OpenGL input buffers
+   * with NvEncodeAPI. This is non public function and is called by derived
+   * class for allocating and registering input buffers.
+   */
+  void RegisterInputResources(std::vector<void*> inputframes,
+                              NV_ENC_INPUT_RESOURCE_TYPE eResourceType,
+                              int width, int height, int pitch,
+                              NV_ENC_BUFFER_FORMAT bufferFormat,
+                              bool bReferenceFrame = false);
+
+  /**
+   *  @brief This function is used to unregister resources which had been
+   * previously registered for encoding using RegisterInputResources() function.
+   */
+  void UnregisterInputResources();
+
+  /**
+   *  @brief This function is used to register CUDA, D3D or OpenGL input or
+   * output buffers with NvEncodeAPI.
+   */
+  NV_ENC_REGISTERED_PTR RegisterResource(
+      void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
+      int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
+      NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE);
+
+  /**
+   *  @brief This function returns maximum width used to open the encoder
+   * session. All encode input buffers are allocated using maximum dimensions.
+   */
+  uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; }
+
+  /**
+   *  @brief This function returns maximum height used to open the encoder
+   * session. All encode input buffers are allocated using maximum dimensions.
+   */
+  uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; }
+
+  /**
+   *  @brief This function returns the completion event.
+   */
+  void* GetCompletionEvent(uint32_t eventIdx) {
+    return (m_vpCompletionEvent.size() == m_nEncoderBuffer)
+               ? m_vpCompletionEvent[eventIdx]
+               : nullptr;
+  }
+
+  /**
+   *  @brief This function returns the current pixel format.
+   */
+  NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; }
+
+  /**
+   *  @brief This function is used to submit the encode commands to the
+   *         NVENC hardware.
+   */
+  NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer,
+                       NV_ENC_OUTPUT_PTR outputBuffer,
+                       NV_ENC_PIC_PARAMS* pPicParams);
+
+  /**
+   *  @brief This function is used to submit the encode commands to the
+   *         NVENC hardware for ME only mode.
+   */
+  NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer,
+                                 NV_ENC_INPUT_PTR inputBufferForReference,
+                                 NV_ENC_OUTPUT_PTR outputBuffer);
+
+  /**
+   *  @brief This function is used to map the input buffers to NvEncodeAPI.
+   */
+  void MapResources(uint32_t bfrIdx);
+
+  /**
+   *  @brief This function is used to wait for completion of encode command.
+   */
+  void WaitForCompletionEvent(int iEvent);
+
+  /**
+   *  @brief This function is used to send EOS to HW encoder.
+   */
+  void SendEOS();
+
+ private:
+  /**
+  *  @brief This is a private function which is used to check if there is any
+            buffering done by encoder.
+  *  The encoder generally buffers data to encode B frames or for lookahead
+  *  or pipelining.
+  */
+  bool IsZeroDelay() { return m_nOutputDelay == 0; }
+
+  /**
+   *  @brief This is a private function which is used to load the encode api
+   * shared library.
+   */
+  void LoadNvEncApi();
+
+  /**
+   *  @brief This is a private function which is used to get the output packets
+   *         from the encoder HW.
+   *  This is called by DoEncode() function. If there is buffering enabled,
+   *  this may return without any output data.
+   */
+  void GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR>& vOutputBuffer,
+                        std::vector<std::vector<uint8_t>>& vPacket,
+                        bool bOutputDelay);
+
+  /**
+   *  @brief This is a private function which is used to initialize the
+   * bitstream buffers. This is only used in the encoding mode.
+   */
+  void InitializeBitstreamBuffer();
+
+  /**
+   *  @brief This is a private function which is used to destroy the bitstream
+   * buffers. This is only used in the encoding mode.
+   */
+  void DestroyBitstreamBuffer();
+
+  /**
+   *  @brief This is a private function which is used to initialize MV output
+   * buffers. This is only used in ME-only Mode.
+   */
+  void InitializeMVOutputBuffer();
+
+  /**
+   *  @brief This is a private function which is used to destroy MV output
+   * buffers. This is only used in ME-only Mode.
+   */
+  void DestroyMVOutputBuffer();
+
+  /**
+   *  @brief This is a private function which is used to destroy HW encoder.
+   */
+  void DestroyHWEncoder();
+
+  /**
+   *  @brief This function is used to flush the encoder queue.
+   */
+  void FlushEncoder();
+
+ private:
+  /**
+   *  @brief This is a pure virtual function which is used to allocate input
+   * buffers. The derived classes must implement this function.
+   */
+  virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0;
+
+  /**
+   *  @brief This is a pure virtual function which is used to destroy input
+   * buffers. The derived classes must implement this function.
+   */
+  virtual void ReleaseInputBuffers() = 0;
+
+ protected:
+  bool m_bMotionEstimationOnly = false;
+  bool m_bOutputInVideoMemory = false;
+  void* m_hEncoder = nullptr;
+  NV_ENCODE_API_FUNCTION_LIST m_nvenc;
+  std::vector<NvEncInputFrame> m_vInputFrames;
+  std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
+  std::vector<NvEncInputFrame> m_vReferenceFrames;
+  std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResourcesForReference;
+  std::vector<NV_ENC_INPUT_PTR> m_vMappedInputBuffers;
+  std::vector<NV_ENC_INPUT_PTR> m_vMappedRefBuffers;
+  std::vector<void*> m_vpCompletionEvent;
+
+  int32_t m_iToSend = 0;
+  int32_t m_iGot = 0;
+  int32_t m_nEncoderBuffer = 0;
+  int32_t m_nOutputDelay = 0;
+
+ private:
+  uint32_t m_nWidth;
+  uint32_t m_nHeight;
+  NV_ENC_BUFFER_FORMAT m_eBufferFormat;
+  void* m_pDevice;
+  NV_ENC_DEVICE_TYPE m_eDeviceType;
+  NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
+  NV_ENC_CONFIG m_encodeConfig = {};
+  bool m_bEncoderInitialized = false;
+  uint32_t m_nExtraOutputDelay =
+      3;  // To ensure encode and graphics can work in parallel,
+          // m_nExtraOutputDelay should be set to at least 1
+  std::vector<NV_ENC_OUTPUT_PTR> m_vBitstreamOutputBuffer;
+  std::vector<NV_ENC_OUTPUT_PTR> m_vMVDataOutputBuffer;
+  uint32_t m_nMaxEncodeWidth = 0;
+  uint32_t m_nMaxEncodeHeight = 0;
+};
--- a/src/media/video/encode/nvcodec/NvEncoderCuda.cpp
+++ b/src/media/video/encode/nvcodec/NvEncoderCuda.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#include "NvEncoderCuda.h"
+
+NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
+                             uint32_t nHeight,
+                             NV_ENC_BUFFER_FORMAT eBufferFormat,
+                             uint32_t nExtraOutputDelay,
+                             bool bMotionEstimationOnly,
+                             bool bOutputInVideoMemory)
+    : NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight,
+                eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly,
+                bOutputInVideoMemory),
+      m_cuContext(cuContext) {
+  if (!m_hEncoder) {
+    NVENC_THROW_ERROR("Encoder Initialization failed",
+                      NV_ENC_ERR_INVALID_DEVICE);
+  }
+
+  if (!m_cuContext) {
+    NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE);
+  }
+}
+
+NvEncoderCuda::~NvEncoderCuda() { ReleaseCudaResources(); }
+
+void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
+  if (!IsHWEncoderInitialized()) {
+    NVENC_THROW_ERROR("Encoder intialization failed",
+                      NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
+  }
+
+  // for MEOnly mode we need to allocate seperate set of buffers for reference
+  // frame
+  int numCount = m_bMotionEstimationOnly ? 2 : 1;
+
+  for (int count = 0; count < numCount; count++) {
+    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+    std::vector<void *> inputFrames;
+    for (int i = 0; i < numInputBuffers; i++) {
+      CUdeviceptr pDeviceFrame;
+      uint32_t chromaHeight =
+          GetNumChromaPlanes(GetPixelFormat()) *
+          GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
+      if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
+          GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
+        chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
+      CUDA_DRVAPI_CALL(cuMemAllocPitch(
+          (CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
+          GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
+          GetMaxEncodeHeight() + chromaHeight, 16));
+      inputFrames.push_back((void *)pDeviceFrame);
+    }
+    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+
+    RegisterInputResources(
+        inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
+        GetMaxEncodeWidth(), GetMaxEncodeHeight(), (int)m_cudaPitch,
+        GetPixelFormat(), (count == 1) ? true : false);
+  }
+}
+
+void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream,
+                                     NV_ENC_CUSTREAM_PTR outputStream) {
+  NVENC_API_CALL(
+      m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream));
+}
+
+void NvEncoderCuda::ReleaseInputBuffers() { ReleaseCudaResources(); }
+
+void NvEncoderCuda::ReleaseCudaResources() {
+  if (!m_hEncoder) {
+    return;
+  }
+
+  if (!m_cuContext) {
+    return;
+  }
+
+  UnregisterInputResources();
+
+  cuCtxPushCurrent(m_cuContext);
+
+  for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
+    if (m_vInputFrames[i].inputPtr) {
+      cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
+    }
+  }
+  m_vInputFrames.clear();
+
+  for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
+    if (m_vReferenceFrames[i].inputPtr) {
+      cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
+    }
+  }
+  m_vReferenceFrames.clear();
+
+  cuCtxPopCurrent(NULL);
+  m_cuContext = nullptr;
+}
+
+void NvEncoderCuda::CopyToDeviceFrame(
+    CUcontext device, void *pSrcFrame, uint32_t nSrcPitch,
+    CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
+    CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
+    const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes,
+    bool bUnAlignedDeviceCopy, CUstream stream) {
+  if (srcMemoryType != CU_MEMORYTYPE_HOST &&
+      srcMemoryType != CU_MEMORYTYPE_DEVICE) {
+    NVENC_THROW_ERROR("Invalid source memory type for copy",
+                      NV_ENC_ERR_INVALID_PARAM);
+  }
+
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
+
+  uint32_t srcPitch =
+      nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
+  CUDA_MEMCPY2D m = {0};
+  m.srcMemoryType = srcMemoryType;
+  if (srcMemoryType == CU_MEMORYTYPE_HOST) {
+    m.srcHost = pSrcFrame;
+  } else {
+    m.srcDevice = (CUdeviceptr)pSrcFrame;
+  }
+  m.srcPitch = srcPitch;
+  m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+  m.dstDevice = pDstFrame;
+  m.dstPitch = dstPitch;
+  m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
+  m.Height = height;
+  if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
+    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
+  } else {
+    CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
+                                    : cuMemcpy2DAsync(&m, stream));
+  }
+
+  std::vector<uint32_t> srcChromaOffsets;
+  NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height,
+                                      srcChromaOffsets);
+  uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
+  uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch);
+  uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
+  uint32_t chromaWidthInBytes =
+      NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
+
+  for (uint32_t i = 0; i < numChromaPlanes; ++i) {
+    if (chromaHeight) {
+      if (srcMemoryType == CU_MEMORYTYPE_HOST) {
+        m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
+      } else {
+        m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
+      }
+      m.srcPitch = srcChromaPitch;
+
+      m.dstDevice = (CUdeviceptr)((uint8_t *)pDstFrame + dstChromaOffsets[i]);
+      m.dstPitch = destChromaPitch;
+      m.WidthInBytes = chromaWidthInBytes;
+      m.Height = chromaHeight;
+      if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
+        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
+      } else {
+        CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
+                                        : cuMemcpy2DAsync(&m, stream));
+      }
+    }
+  }
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+}
+
+void NvEncoderCuda::CopyToDeviceFrame(
+    CUcontext device, void *pSrcFrame, uint32_t nSrcPitch,
+    CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
+    CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
+    CUdeviceptr dstChromaDevicePtrs[], uint32_t dstChromaPitch,
+    uint32_t numChromaPlanes, bool bUnAlignedDeviceCopy) {
+  if (srcMemoryType != CU_MEMORYTYPE_HOST &&
+      srcMemoryType != CU_MEMORYTYPE_DEVICE) {
+    NVENC_THROW_ERROR("Invalid source memory type for copy",
+                      NV_ENC_ERR_INVALID_PARAM);
+  }
+
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
+
+  uint32_t srcPitch =
+      nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
+  CUDA_MEMCPY2D m = {0};
+  m.srcMemoryType = srcMemoryType;
+  if (srcMemoryType == CU_MEMORYTYPE_HOST) {
+    m.srcHost = pSrcFrame;
+  } else {
+    m.srcDevice = (CUdeviceptr)pSrcFrame;
+  }
+  m.srcPitch = srcPitch;
+  m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+  m.dstDevice = pDstFrame;
+  m.dstPitch = dstPitch;
+  m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
+  m.Height = height;
+  if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
+    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
+  } else {
+    CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
+  }
+
+  std::vector<uint32_t> srcChromaOffsets;
+  NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height,
+                                      srcChromaOffsets);
+  uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height);
+  uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch);
+  uint32_t chromaWidthInBytes =
+      NvEncoder::GetChromaWidthInBytes(pixelFormat, width);
+
+  for (uint32_t i = 0; i < numChromaPlanes; ++i) {
+    if (chromaHeight) {
+      if (srcMemoryType == CU_MEMORYTYPE_HOST) {
+        m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
+      } else {
+        m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]);
+      }
+      m.srcPitch = srcChromaPitch;
+
+      m.dstDevice = dstChromaDevicePtrs[i];
+      m.dstPitch = dstChromaPitch;
+      m.WidthInBytes = chromaWidthInBytes;
+      m.Height = chromaHeight;
+      if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
+        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
+      } else {
+        CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
+      }
+    }
+  }
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+}
--- a/src/media/video/encode/nvcodec/NvEncoderCuda.h
+++ b/src/media/video/encode/nvcodec/NvEncoderCuda.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#pragma once
+
+#include <cuda.h>
+#include <stdint.h>
+
+#include <mutex>
+#include <vector>
+
+#include "NvEncoder.h"
+
+#define CUDA_DRVAPI_CALL(call)                                        \
+  do {                                                                \
+    CUresult err__ = call;                                            \
+    if (err__ != CUDA_SUCCESS) {                                      \
+      const char* szErrName = NULL;                                   \
+      cuGetErrorName(err__, &szErrName);                              \
+      std::ostringstream errorLog;                                    \
+      errorLog << "CUDA driver API error " << szErrName;              \
+      throw NVENCException::makeNVENCException(                       \
+          errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, \
+          __LINE__);                                                  \
+    }                                                                 \
+  } while (0)
+
+/**
+ *  @brief Encoder for CUDA device memory.
+ */
+class NvEncoderCuda : public NvEncoder {
+ public:
+  NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight,
+                NV_ENC_BUFFER_FORMAT eBufferFormat,
+                uint32_t nExtraOutputDelay = 3,
+                bool bMotionEstimationOnly = false,
+                bool bOPInVideoMemory = false);
+  virtual ~NvEncoderCuda();
+
+  /**
+   *  @brief This is a static function to copy input data from host memory to
+   * device memory. This function assumes YUV plane is a single contiguous
+   * memory segment.
+   */
+  static void CopyToDeviceFrame(
+      CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
+      CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
+      CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
+      const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes,
+      bool bUnAlignedDeviceCopy = false, CUstream stream = NULL);
+
+  /**
+   *  @brief This is a static function to copy input data from host memory to
+   * device memory. Application must pass a seperate device pointer for each YUV
+   * plane.
+   */
+  static void CopyToDeviceFrame(
+      CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
+      CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
+      CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
+      CUdeviceptr dstChromaPtr[], uint32_t dstChromaPitch,
+      uint32_t numChromaPlanes, bool bUnAlignedDeviceCopy = false);
+
+  /**
+   *  @brief This function sets input and output CUDA streams
+   */
+  void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream,
+                        NV_ENC_CUSTREAM_PTR outputStream);
+
+ protected:
+  /**
+   *  @brief This function is used to release the input buffers allocated for
+   * encoding. This function is an override of virtual function
+   * NvEncoder::ReleaseInputBuffers().
+   */
+  virtual void ReleaseInputBuffers() override;
+
+ private:
+  /**
+   *  @brief This function is used to allocate input buffers for encoding.
+   *  This function is an override of virtual function
+   * NvEncoder::AllocateInputBuffers().
+   */
+  virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
+
+ private:
+  /**
+   *  @brief This is a private function to release CUDA device memory used for
+   * encoding.
+   */
+  void ReleaseCudaResources();
+
+ protected:
+  CUcontext m_cuContext;
+
+ private:
+  size_t m_cudaPitch = 0;
+};
--- a/src/media/video/encode/nvcodec/nv_encoder.cpp
+++ b/src/media/video/encode/nvcodec/nv_encoder.cpp
@@ -0,0 +1,145 @@
+#include "nv_encoder.h"
+
+#include <chrono>
+
+#include "log.h"
+
+#define SAVE_ENCODER_STREAM 0
+
+VideoEncoder::VideoEncoder() {
+  if (SAVE_ENCODER_STREAM) {
+    file_ = fopen("saved/stream.h264", "w+b");
+    if (!file_) {
+      LOG_WARN("Fail to open saved/stream.h264");
+    }
+  }
+}
+VideoEncoder::~VideoEncoder() {
+  if (SAVE_ENCODER_STREAM && file_) {
+    fflush(file_);
+    fclose(file_);
+    file_ = nullptr;
+  }
+
+  if (nv12_data_) {
+    free(nv12_data_);
+    nv12_data_ = nullptr;
+  }
+}
+
+int VideoEncoder::Init() {
+  // Init cuda context
+  int num_of_GPUs = 0;
+  CUdevice cuda_device;
+  bool cuda_ctx_succeed =
+      (index_of_GPU >= 0 && cuInit(0) == CUresult::CUDA_SUCCESS &&
+       cuDeviceGetCount(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
+       (num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
+       cuDeviceGet(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
+       cuCtxCreate(&cuda_context_, 0, cuda_device) == CUresult::CUDA_SUCCESS);
+  if (!cuda_ctx_succeed) {
+  }
+
+  encoder_ = new NvEncoderCuda(cuda_context_, frame_width, frame_height,
+                               NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12);
+
+  // Init encoder_ session
+  NV_ENC_INITIALIZE_PARAMS init_params;
+  init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
+  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
+  init_params.encodeConfig = &encode_config;
+
+  encoder_->CreateDefaultEncoderParams(&init_params, codec_guid, preset_guid,
+                                       tuning_info);
+
+  init_params.encodeWidth = frame_width;
+  init_params.encodeHeight = frame_height;
+  init_params.encodeConfig->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
+  init_params.encodeConfig->encodeCodecConfig.h264Config.level =
+      NV_ENC_LEVEL::NV_ENC_LEVEL_H264_31;
+  // TO TEST: not tested yet
+  // init_params.encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
+  init_params.encodeConfig->gopLength = keyFrameInterval_;
+  // Donot use B-frame for realtime application
+  init_params.encodeConfig->frameIntervalP = 1;
+  init_params.encodeConfig->rcParams.rateControlMode =
+      NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CBR;
+  init_params.encodeConfig->rcParams.maxBitRate = maxBitrate_ * 1000;
+  init_params.encodeConfig->encodeCodecConfig.h264Config.sliceMode = 1;
+  init_params.encodeConfig->encodeCodecConfig.h264Config.sliceModeData =
+      max_payload_size_;
+
+  encoder_->CreateEncoder(&init_params);
+  return 0;
+}
+
+int VideoEncoder::Encode(const uint8_t *pData, int nSize) {
+  if (!encoder_) {
+    LOG_ERROR("Invalid encoder");
+    return -1;
+  }
+
+  if (0 == seq_++ % (30 * 5)) {
+    ForceIdr();
+  }
+
+#ifdef SHOW_SUBMODULE_TIME_COST
+  auto start = std::chrono::steady_clock::now();
+#endif
+
+  const NvEncInputFrame *encoder_inputframe = encoder_->GetNextInputFrame();
+
+  NvEncoderCuda::CopyToDeviceFrame(
+      cuda_context_,
+      (void *)pData,  // NOLINT
+      0, (CUdeviceptr)encoder_inputframe->inputPtr, encoder_inputframe->pitch,
+      encoder_->GetEncodeWidth(), encoder_->GetEncodeHeight(),
+      CU_MEMORYTYPE_HOST, encoder_inputframe->bufferFormat,
+      encoder_inputframe->chromaOffsets, encoder_inputframe->numChromaPlanes);
+
+  encoder_->EncodeFrame(encoded_packets_);
+
+  if (encoded_packets_.size() < 1) {
+    LOG_WARN("empty encoded_packets_");
+    return -1;
+  }
+
+  for (const auto &packet : encoded_packets_) {
+    OnEncodedImage((char *)packet.data(), packet.size());
+
+    if (SAVE_ENCODER_STREAM) {
+      fwrite((unsigned char *)packet.data(), 1, packet.size(), file_);
+    }
+  }
+
+#ifdef SHOW_SUBMODULE_TIME_COST
+  auto encode_time_cost = std::chrono::duration_cast<std::chrono::milliseconds>(
+                              std::chrono::steady_clock::now() - start)
+                              .count();
+  LOG_INFO("Encode time cost {}ms", encode_time_cost);
+#endif
+
+  return 0;
+}
+
+int VideoEncoder::OnEncodedImage(char *encoded_packets, size_t size) {
+  LOG_INFO("output encoded image");
+  fwrite(encoded_packets, 1, size, file_);
+  return 0;
+}
+
+void VideoEncoder::ForceIdr() {
+  NV_ENC_RECONFIGURE_PARAMS reconfig_params;
+  reconfig_params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
+
+  NV_ENC_INITIALIZE_PARAMS init_params;
+  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
+  init_params.encodeConfig = &encode_config;
+  encoder_->GetInitializeParams(&init_params);
+
+  reconfig_params.reInitEncodeParams = init_params;
+  reconfig_params.forceIDR = 1;
+  reconfig_params.resetEncoder = 1;
+
+  encoder_->Reconfigure(&reconfig_params);
+}
--- a/src/media/video/encode/nvcodec/nv_encoder.h
+++ b/src/media/video/encode/nvcodec/nv_encoder.h
@@ -0,0 +1,36 @@
+#ifndef _NV_ENCODER_H_
+#define _NV_ENCODER_H_
+
+#include "NvEncoderCuda.h"
+
+class VideoEncoder {
+ public:
+  VideoEncoder();
+  ~VideoEncoder();
+
+  int Init();
+  int Encode(const uint8_t* pData, int nSize);
+  virtual int OnEncodedImage(char* encoded_packets, size_t size);
+  void ForceIdr();
+
+ private:
+  int index_of_GPU = 0;
+  GUID codec_guid = NV_ENC_CODEC_H264_GUID;
+  GUID preset_guid = NV_ENC_PRESET_P2_GUID;
+  NV_ENC_TUNING_INFO tuning_info =
+      NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
+  int frame_width = 1280;
+  int frame_height = 720;
+  int keyFrameInterval_ = 3000;
+  int maxBitrate_ = 2000;
+  int max_payload_size_ = 3000;
+  NvEncoder* encoder_ = nullptr;
+  CUcontext cuda_context_ = nullptr;
+  std::vector<std::vector<uint8_t>> encoded_packets_;
+  unsigned char* encoded_image_ = nullptr;
+  FILE* file_ = nullptr;
+  unsigned char* nv12_data_ = nullptr;
+  unsigned int seq_ = 0;
+};
+
+#endif
--- a/src/pc/peer_connection.cpp
+++ b/src/pc/peer_connection.cpp
@@ -50,6 +50,7 @@ int PeerConnection::Init(PeerConnectionParams params,

  do {
  } while (SignalStatus::Connected != GetSignalStatus());
+  VideoEncoder::Init();

  return 0;
 }
@@ -215,7 +216,35 @@ int PeerConnection::Destroy() {

 SignalStatus PeerConnection::GetSignalStatus() { return signal_status_; }

-int PeerConnection::SendData(const char *data, size_t size) {
+int PeerConnection::SendVideoData(const char *data, size_t size) {
+  int ret = Encode((uint8_t *)data, size);
+  if (0 != ret) {
+    LOG_ERROR("Encode failed");
+    return -1;
+  }
+
+  // for (auto ice_trans : ice_transmission_list_) {
+  //   ice_trans.second->SendData(data, size);
+  // }
+  return 0;
+}
+
+int PeerConnection::OnEncodedImage(char *encoded_packets, size_t size) {
+  for (auto ice_trans : ice_transmission_list_) {
+    ice_trans.second->SendData(encoded_packets, size);
+  }
+
+  return 0;
+}
+
+int PeerConnection::SendAudioData(const char *data, size_t size) {
+  for (auto ice_trans : ice_transmission_list_) {
+    ice_trans.second->SendData(data, size);
+  }
+  return 0;
+}
+
+int PeerConnection::SendUserData(const char *data, size_t size) {
  for (auto ice_trans : ice_transmission_list_) {
    ice_trans.second->SendData(data, size);
  }
--- a/src/pc/peer_connection.h
+++ b/src/pc/peer_connection.h
@@ -5,6 +5,8 @@
 #include <map>

 #include "ice_transmission.h"
+#include "nv_decoder.h"
+#include "nv_encoder.h"
 #include "ws_transmission.h"

 enum SignalStatus { Connecting = 0, Connected, Closed };
@@ -20,7 +22,7 @@ typedef struct {
  NetStatusReport net_status_report;
 } PeerConnectionParams;

-class PeerConnection {
+class PeerConnection : public VideoEncoder, VideoDecoder {
 public:
  PeerConnection(OnReceiveBuffer on_receive_buffer);
  ~PeerConnection();
@@ -37,7 +39,9 @@ class PeerConnection {

  SignalStatus GetSignalStatus();

-  int SendData(const char *data, size_t size);
+  int SendVideoData(const char *data, size_t size);
+  int SendAudioData(const char *data, size_t size);
+  int SendUserData(const char *data, size_t size);

 private:
  int Init(PeerConnectionParams params, const std::string &transmission_id,
@@ -47,6 +51,9 @@ class PeerConnection {

  int RequestTransmissionMemberList(const std::string &transmission_id);

+ private:
+  int OnEncodedImage(char *encoded_packets, size_t size) override;
+
 private:
  std::string uri_ = "";
  std::string cfg_signal_server_ip_;
@@ -68,6 +75,8 @@ class PeerConnection {
  SignalStatus signal_status_ = SignalStatus::Closed;

  OnReceiveBuffer on_receive_buffer_;
+
+ private:
 };

 #endif
--- a/src/qos/kcp/ikcp.c
+++ b/src/qos/kcp/ikcp.c
--- a/src/qos/kcp/ikcp.h
+++ b/src/qos/kcp/ikcp.h
@@ -0,0 +1,416 @@
+//=====================================================================
+//
+// KCP - A Better ARQ Protocol Implementation
+// skywind3000 (at) gmail.com, 2010-2011
+//  
+// Features:
+// + Average RTT reduce 30% - 40% vs traditional ARQ like tcp.
+// + Maximum RTT reduce three times vs tcp.
+// + Lightweight, distributed as a single source file.
+//
+//=====================================================================
+#ifndef __IKCP_H__
+#define __IKCP_H__
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <assert.h>
+
+
+//=====================================================================
+// 32BIT INTEGER DEFINITION 
+//=====================================================================
+#ifndef __INTEGER_32_BITS__
+#define __INTEGER_32_BITS__
+#if defined(_WIN64) || defined(WIN64) || defined(__amd64__) || \
+	defined(__x86_64) || defined(__x86_64__) || defined(_M_IA64) || \
+	defined(_M_AMD64)
+	typedef unsigned int ISTDUINT32;
+	typedef int ISTDINT32;
+#elif defined(_WIN32) || defined(WIN32) || defined(__i386__) || \
+	defined(__i386) || defined(_M_X86)
+	typedef unsigned long ISTDUINT32;
+	typedef long ISTDINT32;
+#elif defined(__MACOS__)
+	typedef UInt32 ISTDUINT32;
+	typedef SInt32 ISTDINT32;
+#elif defined(__APPLE__) && defined(__MACH__)
+	#include <sys/types.h>
+	typedef u_int32_t ISTDUINT32;
+	typedef int32_t ISTDINT32;
+#elif defined(__BEOS__)
+	#include <sys/inttypes.h>
+	typedef u_int32_t ISTDUINT32;
+	typedef int32_t ISTDINT32;
+#elif (defined(_MSC_VER) || defined(__BORLANDC__)) && (!defined(__MSDOS__))
+	typedef unsigned __int32 ISTDUINT32;
+	typedef __int32 ISTDINT32;
+#elif defined(__GNUC__)
+	#include <stdint.h>
+	typedef uint32_t ISTDUINT32;
+	typedef int32_t ISTDINT32;
+#else 
+	typedef unsigned long ISTDUINT32; 
+	typedef long ISTDINT32;
+#endif
+#endif
+
+
+//=====================================================================
+// Integer Definition
+//=====================================================================
+#ifndef __IINT8_DEFINED
+#define __IINT8_DEFINED
+typedef char IINT8;
+#endif
+
+#ifndef __IUINT8_DEFINED
+#define __IUINT8_DEFINED
+typedef unsigned char IUINT8;
+#endif
+
+#ifndef __IUINT16_DEFINED
+#define __IUINT16_DEFINED
+typedef unsigned short IUINT16;
+#endif
+
+#ifndef __IINT16_DEFINED
+#define __IINT16_DEFINED
+typedef short IINT16;
+#endif
+
+#ifndef __IINT32_DEFINED
+#define __IINT32_DEFINED
+typedef ISTDINT32 IINT32;
+#endif
+
+#ifndef __IUINT32_DEFINED
+#define __IUINT32_DEFINED
+typedef ISTDUINT32 IUINT32;
+#endif
+
+#ifndef __IINT64_DEFINED
+#define __IINT64_DEFINED
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 IINT64;
+#else
+typedef long long IINT64;
+#endif
+#endif
+
+#ifndef __IUINT64_DEFINED
+#define __IUINT64_DEFINED
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef unsigned __int64 IUINT64;
+#else
+typedef unsigned long long IUINT64;
+#endif
+#endif
+
+#ifndef INLINE
+#if defined(__GNUC__)
+
+#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
+#define INLINE         __inline__ __attribute__((always_inline))
+#else
+#define INLINE         __inline__
+#endif
+
+#elif (defined(_MSC_VER) || defined(__BORLANDC__) || defined(__WATCOMC__))
+#define INLINE __inline
+#else
+#define INLINE 
+#endif
+#endif
+
+#if (!defined(__cplusplus)) && (!defined(inline))
+#define inline INLINE
+#endif
+
+
+//=====================================================================
+// QUEUE DEFINITION                                                  
+//=====================================================================
+#ifndef __IQUEUE_DEF__
+#define __IQUEUE_DEF__
+
+struct IQUEUEHEAD {
+	struct IQUEUEHEAD *next, *prev;
+};
+
+typedef struct IQUEUEHEAD iqueue_head;
+
+
+//---------------------------------------------------------------------
+// queue init                                                         
+//---------------------------------------------------------------------
+#define IQUEUE_HEAD_INIT(name) { &(name), &(name) }
+#define IQUEUE_HEAD(name) \
+	struct IQUEUEHEAD name = IQUEUE_HEAD_INIT(name)
+
+#define IQUEUE_INIT(ptr) ( \
+	(ptr)->next = (ptr), (ptr)->prev = (ptr))
+
+#define IOFFSETOF(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+
+#define ICONTAINEROF(ptr, type, member) ( \
+		(type*)( ((char*)((type*)ptr)) - IOFFSETOF(type, member)) )
+
+#define IQUEUE_ENTRY(ptr, type, member) ICONTAINEROF(ptr, type, member)
+
+
+//---------------------------------------------------------------------
+// queue operation                     
+//---------------------------------------------------------------------
+#define IQUEUE_ADD(node, head) ( \
+	(node)->prev = (head), (node)->next = (head)->next, \
+	(head)->next->prev = (node), (head)->next = (node))
+
+#define IQUEUE_ADD_TAIL(node, head) ( \
+	(node)->prev = (head)->prev, (node)->next = (head), \
+	(head)->prev->next = (node), (head)->prev = (node))
+
+#define IQUEUE_DEL_BETWEEN(p, n) ((n)->prev = (p), (p)->next = (n))
+
+#define IQUEUE_DEL(entry) (\
+	(entry)->next->prev = (entry)->prev, \
+	(entry)->prev->next = (entry)->next, \
+	(entry)->next = 0, (entry)->prev = 0)
+
+#define IQUEUE_DEL_INIT(entry) do { \
+	IQUEUE_DEL(entry); IQUEUE_INIT(entry); } while (0)
+
+#define IQUEUE_IS_EMPTY(entry) ((entry) == (entry)->next)
+
+#define iqueue_init		IQUEUE_INIT
+#define iqueue_entry	IQUEUE_ENTRY
+#define iqueue_add		IQUEUE_ADD
+#define iqueue_add_tail	IQUEUE_ADD_TAIL
+#define iqueue_del		IQUEUE_DEL
+#define iqueue_del_init	IQUEUE_DEL_INIT
+#define iqueue_is_empty IQUEUE_IS_EMPTY
+
+#define IQUEUE_FOREACH(iterator, head, TYPE, MEMBER) \
+	for ((iterator) = iqueue_entry((head)->next, TYPE, MEMBER); \
+		&((iterator)->MEMBER) != (head); \
+		(iterator) = iqueue_entry((iterator)->MEMBER.next, TYPE, MEMBER))
+
+#define iqueue_foreach(iterator, head, TYPE, MEMBER) \
+	IQUEUE_FOREACH(iterator, head, TYPE, MEMBER)
+
+#define iqueue_foreach_entry(pos, head) \
+	for( (pos) = (head)->next; (pos) != (head) ; (pos) = (pos)->next )
+	
+
+#define __iqueue_splice(list, head) do {	\
+		iqueue_head *first = (list)->next, *last = (list)->prev; \
+		iqueue_head *at = (head)->next; \
+		(first)->prev = (head), (head)->next = (first);		\
+		(last)->next = (at), (at)->prev = (last); }	while (0)
+
+#define iqueue_splice(list, head) do { \
+	if (!iqueue_is_empty(list)) __iqueue_splice(list, head); } while (0)
+
+#define iqueue_splice_init(list, head) do {	\
+	iqueue_splice(list, head);	iqueue_init(list); } while (0)
+
+
+#ifdef _MSC_VER
+#pragma warning(disable:4311)
+#pragma warning(disable:4312)
+#pragma warning(disable:4996)
+#endif
+
+#endif
+
+
+//---------------------------------------------------------------------
+// BYTE ORDER & ALIGNMENT
+//---------------------------------------------------------------------
+#ifndef IWORDS_BIG_ENDIAN
+    #ifdef _BIG_ENDIAN_
+        #if _BIG_ENDIAN_
+            #define IWORDS_BIG_ENDIAN 1
+        #endif
+    #endif
+    #ifndef IWORDS_BIG_ENDIAN
+        #if defined(__hppa__) || \
+            defined(__m68k__) || defined(mc68000) || defined(_M_M68K) || \
+            (defined(__MIPS__) && defined(__MIPSEB__)) || \
+            defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC) || \
+            defined(__sparc__) || defined(__powerpc__) || \
+            defined(__mc68000__) || defined(__s390x__) || defined(__s390__)
+            #define IWORDS_BIG_ENDIAN 1
+        #endif
+    #endif
+    #ifndef IWORDS_BIG_ENDIAN
+        #define IWORDS_BIG_ENDIAN  0
+    #endif
+#endif
+
+#ifndef IWORDS_MUST_ALIGN
+	#if defined(__i386__) || defined(__i386) || defined(_i386_)
+		#define IWORDS_MUST_ALIGN 0
+	#elif defined(_M_IX86) || defined(_X86_) || defined(__x86_64__)
+		#define IWORDS_MUST_ALIGN 0
+	#elif defined(__amd64) || defined(__amd64__)
+		#define IWORDS_MUST_ALIGN 0
+	#else
+		#define IWORDS_MUST_ALIGN 1
+	#endif
+#endif
+
+
+//=====================================================================
+// SEGMENT
+//=====================================================================
+struct IKCPSEG
+{
+	struct IQUEUEHEAD node;
+	IUINT32 conv;
+	IUINT32 cmd;
+	IUINT32 frg;
+	IUINT32 wnd;
+	IUINT32 ts;
+	IUINT32 sn;
+	IUINT32 una;
+	IUINT32 len;
+	IUINT32 resendts;
+	IUINT32 rto;
+	IUINT32 fastack;
+	IUINT32 xmit;
+	char data[1];
+};
+
+
+//---------------------------------------------------------------------
+// IKCPCB
+//---------------------------------------------------------------------
+struct IKCPCB
+{
+	IUINT32 conv, mtu, mss, state;
+	IUINT32 snd_una, snd_nxt, rcv_nxt;
+	IUINT32 ts_recent, ts_lastack, ssthresh;
+	IINT32 rx_rttval, rx_srtt, rx_rto, rx_minrto;
+	IUINT32 snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe;
+	IUINT32 current, interval, ts_flush, xmit;
+	IUINT32 nrcv_buf, nsnd_buf;
+	IUINT32 nrcv_que, nsnd_que;
+	IUINT32 nodelay, updated;
+	IUINT32 ts_probe, probe_wait;
+	IUINT32 dead_link, incr;
+	struct IQUEUEHEAD snd_queue;
+	struct IQUEUEHEAD rcv_queue;
+	struct IQUEUEHEAD snd_buf;
+	struct IQUEUEHEAD rcv_buf;
+	IUINT32 *acklist;
+	IUINT32 ackcount;
+	IUINT32 ackblock;
+	void *user;
+	char *buffer;
+	int fastresend;
+	int fastlimit;
+	int nocwnd, stream;
+	int logmask;
+	int (*output)(const char *buf, int len, struct IKCPCB *kcp, void *user);
+	void (*writelog)(const char *log, struct IKCPCB *kcp, void *user);
+};
+
+
+typedef struct IKCPCB ikcpcb;
+
+#define IKCP_LOG_OUTPUT			1
+#define IKCP_LOG_INPUT			2
+#define IKCP_LOG_SEND			4
+#define IKCP_LOG_RECV			8
+#define IKCP_LOG_IN_DATA		16
+#define IKCP_LOG_IN_ACK			32
+#define IKCP_LOG_IN_PROBE		64
+#define IKCP_LOG_IN_WINS		128
+#define IKCP_LOG_OUT_DATA		256
+#define IKCP_LOG_OUT_ACK		512
+#define IKCP_LOG_OUT_PROBE		1024
+#define IKCP_LOG_OUT_WINS		2048
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//---------------------------------------------------------------------
+// interface
+//---------------------------------------------------------------------
+
+// create a new kcp control object, 'conv' must equal in two endpoint
+// from the same connection. 'user' will be passed to the output callback
+// output callback can be setup like this: 'kcp->output = my_udp_output'
+ikcpcb* ikcp_create(IUINT32 conv, void *user);
+
+// release kcp control object
+void ikcp_release(ikcpcb *kcp);
+
+// set output callback, which will be invoked by kcp
+void ikcp_setoutput(ikcpcb *kcp, int (*output)(const char *buf, int len, 
+	ikcpcb *kcp, void *user));
+
+// user/upper level recv: returns size, returns below zero for EAGAIN
+int ikcp_recv(ikcpcb *kcp, char *buffer, int len);
+
+// user/upper level send, returns below zero for error
+int ikcp_send(ikcpcb *kcp, const char *buffer, int len);
+
+// update state (call it repeatedly, every 10ms-100ms), or you can ask 
+// ikcp_check when to call it again (without ikcp_input/_send calling).
+// 'current' - current timestamp in millisec. 
+void ikcp_update(ikcpcb *kcp, IUINT32 current);
+
+// Determine when should you invoke ikcp_update:
+// returns when you should invoke ikcp_update in millisec, if there 
+// is no ikcp_input/_send calling. you can call ikcp_update in that
+// time, instead of call update repeatly.
+// Important to reduce unnacessary ikcp_update invoking. use it to 
+// schedule ikcp_update (eg. implementing an epoll-like mechanism, 
+// or optimize ikcp_update when handling massive kcp connections)
+IUINT32 ikcp_check(const ikcpcb *kcp, IUINT32 current);
+
+// when you received a low level packet (eg. UDP packet), call it
+int ikcp_input(ikcpcb *kcp, const char *data, long size);
+
+// flush pending data
+void ikcp_flush(ikcpcb *kcp);
+
+// check the size of next message in the recv queue
+int ikcp_peeksize(const ikcpcb *kcp);
+
+// change MTU size, default is 1400
+int ikcp_setmtu(ikcpcb *kcp, int mtu);
+
+// set maximum window size: sndwnd=32, rcvwnd=32 by default
+int ikcp_wndsize(ikcpcb *kcp, int sndwnd, int rcvwnd);
+
+// get how many packet is waiting to be sent
+int ikcp_waitsnd(const ikcpcb *kcp);
+
+// fastest: ikcp_nodelay(kcp, 1, 20, 2, 1)
+// nodelay: 0:disable(default), 1:enable
+// interval: internal update timer interval in millisec, default is 100ms 
+// resend: 0:disable fast resend(default), 1:enable fast resend
+// nc: 0:normal congestion control(default), 1:disable congestion control
+int ikcp_nodelay(ikcpcb *kcp, int nodelay, int interval, int resend, int nc);
+
+
+void ikcp_log(ikcpcb *kcp, int mask, const char *fmt, ...);
+
+// setup allocator
+void ikcp_allocator(void* (*new_malloc)(size_t), void (*new_free)(void*));
+
+// read conv
+IUINT32 ikcp_getconv(const void *ptr);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+
--- a/src/rtc/x_inner.cpp
+++ b/src/rtc/x_inner.cpp
@@ -41,8 +41,15 @@ int JoinConnection(PeerPtr *peer_ptr, const char *transmission_id,
  return 0;
 }

-int SendData(PeerPtr *peer_ptr, const char *data, size_t size) {
-  peer_ptr->peer_connection->SendData(data, size);
+int SendData(PeerPtr *peer_ptr, DATA_TYPE data_type, const char *data,
+             size_t size) {
+  if (DATA_TYPE::VIDEO == data_type) {
+    peer_ptr->peer_connection->SendVideoData(data, size);
+  } else if (DATA_TYPE::AUDIO == data_type) {
+    peer_ptr->peer_connection->SendAudioData(data, size);
+  } else if (DATA_TYPE::USER == data_type) {
+    peer_ptr->peer_connection->SendUserData(data, size);
+  }
  return 0;
 }

--- a/src/transmission/ice_transmission.cpp
+++ b/src/transmission/ice_transmission.cpp
@@ -1,5 +1,6 @@
 #include "ice_transmission.h"

+#include <chrono>
 #include <map>
 #include <nlohmann/json.hpp>
 #include <thread>
@@ -7,7 +8,57 @@
 #include "common.h"
 #include "log.h"

+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+#include <windows.h>
+#elif !defined(__unix)
+#define __unix
+#endif
+
+#ifdef __unix
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif
+
 using nlohmann::json;
+static int count = 1;
+
+static inline void itimeofday(long *sec, long *usec) {
+#if defined(__unix)
+  struct timeval time;
+  gettimeofday(&time, NULL);
+  if (sec) *sec = time.tv_sec;
+  if (usec) *usec = time.tv_usec;
+#else
+  static long mode = 0, addsec = 0;
+  BOOL retval;
+  static IINT64 freq = 1;
+  IINT64 qpc;
+  if (mode == 0) {
+    retval = QueryPerformanceFrequency((LARGE_INTEGER *)&freq);
+    freq = (freq == 0) ? 1 : freq;
+    retval = QueryPerformanceCounter((LARGE_INTEGER *)&qpc);
+    addsec = (long)time(NULL);
+    addsec = addsec - (long)((qpc / freq) & 0x7fffffff);
+    mode = 1;
+  }
+  retval = QueryPerformanceCounter((LARGE_INTEGER *)&qpc);
+  retval = retval * 2;
+  if (sec) *sec = (long)(qpc / freq) + addsec;
+  if (usec) *usec = (long)((qpc % freq) * 1000000 / freq);
+#endif
+}
+
+static inline IINT64 iclock64(void) {
+  long s, u;
+  IINT64 value;
+  itimeofday(&s, &u);
+  value = ((IINT64)s) * 1000 + (u / 1000);
+  return value;
+}
+
+static inline IUINT32 iclock() { return (IUINT32)(iclock64() & 0xfffffffful); }

 const std::vector<std::string> ice_status = {
    "JUICE_STATE_DISCONNECTED", "JUICE_STATE_GATHERING",
@@ -31,9 +82,45 @@ IceTransmission::~IceTransmission() {
    delete ice_agent_;
    ice_agent_ = nullptr;
  }
+  ikcp_release(kcp_);
 }

 int IceTransmission::InitIceTransmission(std::string &ip, int port) {
+  kcp_ = ikcp_create(0x11223344, (void *)this);
+  ikcp_setoutput(kcp_,
+                 [](const char *buf, int len, ikcpcb *kcp, void *user) -> int {
+                   IceTransmission *ice_transmission_obj =
+                       static_cast<IceTransmission *>(user);
+                   LOG_ERROR("Real send size: {}", len);
+                   return ice_transmission_obj->ice_agent_->Send(buf, len);
+                 });
+  // ikcp_wndsize(kcp_, 1280, 1280);
+  ikcp_nodelay(kcp_, 0, 40, 0, 0);
+  ikcp_setmtu(kcp_, 4000);
+  // kcp_->rx_minrto = 10;
+  // kcp_->fastresend = 1;
+  std::thread kcp_update_thread([this]() {
+    while (1) {
+      auto clock = std::chrono::duration_cast<std::chrono::milliseconds>(
+                       std::chrono::system_clock::now().time_since_epoch())
+                       .count();
+      mtx_.lock();
+      ikcp_update(kcp_, iclock());
+
+      int len = 0;
+      int total_len = 0;
+      while (1) {
+        len = ikcp_recv(kcp_, kcp_complete_buffer_ + len, 1400);
+        total_len += len;
+        if (len <= 0) break;
+      }
+
+      mtx_.unlock();
+      std::this_thread::sleep_for(std::chrono::milliseconds(2));
+    }
+  });
+  kcp_update_thread.detach();
+
  ice_agent_ = new IceAgent(ip, port);

  ice_agent_->CreateIceAgent(
@@ -43,6 +130,7 @@ int IceTransmission::InitIceTransmission(std::string &ip, int port) {
              static_cast<IceTransmission *>(user_ptr);
          LOG_INFO("[{}->{}] state_change: {}", ice_transmission_obj->user_id_,
                   ice_transmission_obj->remote_user_id_, ice_status[state]);
+          ice_transmission_obj->state_ = state;
        } else {
          LOG_INFO("state_change: {}", ice_status[state]);
        }
@@ -74,9 +162,28 @@ int IceTransmission::InitIceTransmission(std::string &ip, int port) {
          IceTransmission *ice_transmission_obj =
              static_cast<IceTransmission *>(user_ptr);
          if (ice_transmission_obj->on_receive_ice_msg_cb_) {
-            ice_transmission_obj->on_receive_ice_msg_cb_(
-                data, size, ice_transmission_obj->remote_user_id_.data(),
-                ice_transmission_obj->remote_user_id_.size());
+            LOG_ERROR("[{}] Receive size: {}", (void *)user_ptr, size);
+            ice_transmission_obj->mtx_.lock();
+            int ret = ikcp_input(ice_transmission_obj->kcp_, data, size);
+            // ikcp_update(ice_transmission_obj->kcp_, iclock());
+            LOG_ERROR("ikcp_input {}", ret);
+            // auto clock =
+            //     std::chrono::duration_cast<std::chrono::milliseconds>(
+            //         std::chrono::system_clock::now().time_since_epoch())
+            //         .count();
+
+            // ikcp_update(ice_transmission_obj->kcp_, clock);
+
+            ice_transmission_obj->mtx_.unlock();
+
+            // ice_transmission_obj->on_receive_ice_msg_cb_(
+            //     ice_transmission_obj->kcp_complete_buffer_, total_len,
+            //     ice_transmission_obj->remote_user_id_.data(),
+            //     ice_transmission_obj->remote_user_id_.size());
+
+            // ice_transmission_obj->on_receive_ice_msg_cb_(
+            //     data, size, ice_transmission_obj->remote_user_id_.data(),
+            //     ice_transmission_obj->remote_user_id_.size());
          }
        }
      },
@@ -167,6 +274,21 @@ int IceTransmission::SendAnswer() {
 }

 int IceTransmission::SendData(const char *data, size_t size) {
-  ice_agent_->Send(data, size);
+  if (JUICE_STATE_COMPLETED == state_) {
+    LOG_ERROR("[{}] Wanna send size: {}", (void *)this, size);
+    mtx_.lock();
+
+    if (ikcp_waitsnd(kcp_) > kcp_->snd_wnd) {
+      // LOG_ERROR("Skip frame");
+      // mtx_.unlock();
+      // return 0;
+      ikcp_flush(kcp_);
+    }
+    int ret = ikcp_send(kcp_, data, size / 100);
+    LOG_ERROR("ikcp_send {}, wnd [{} | {}]", ret, ikcp_waitsnd(kcp_),
+              kcp_->snd_wnd);
+    mtx_.unlock();
+    // ice_agent_->Send(data, size);
+  }
  return 0;
 }
--- a/src/transmission/ice_transmission.h
+++ b/src/transmission/ice_transmission.h
@@ -5,8 +5,8 @@

 #include "congestion_control.h"
 #include "ice_agent.h"
+#include "ikcp.h"
 #include "ws_transmission.h"
-
 class IceTransmission {
 public:
  IceTransmission(
@@ -61,6 +61,12 @@ class IceTransmission {
  std::string remote_user_id_ = "";
  bool offer_peer_ = true;
  std::string remote_ice_username_ = "";
+  juice_state_t state_ = JUICE_STATE_DISCONNECTED;
+
+ private:
+  ikcpcb *kcp_ = nullptr;
+  char kcp_complete_buffer_[2560 * 1440 * 4];
+  std::mutex mtx_;
 };

 #endif