[fix] fix nvidia encoder crash during reconfigure the resolution

2026-07-15 11:14:41 +08:00 · 2024-09-10 17:32:43 +08:00
parent 2f16d22ab7
commit a8333c622b
40 changed files with 4507 additions and 25450 deletions
@@ -0,0 +1,256 @@
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2024 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <sstream>
+#include <mutex>
+#include <time.h>
+
+#ifdef _WIN32
+#include <winsock.h>
+#include <windows.h>
+
+#pragma comment(lib, "ws2_32.lib")
+#undef ERROR
+#else
+#include <unistd.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#define SOCKET int
+#define INVALID_SOCKET -1
+#endif
+
+enum LogLevel {
+    TRACE,
+    INFO,
+    WARNING,
+    ERROR,
+    FATAL
+};
+
+namespace simplelogger{
+class Logger {
+public:
+    Logger(LogLevel level, bool bPrintTimeStamp) : level(level), bPrintTimeStamp(bPrintTimeStamp) {}
+    virtual ~Logger() {}
+    virtual std::ostream& GetStream() = 0;
+    virtual void FlushStream() {}
+    bool ShouldLogFor(LogLevel l) {
+        return l >= level;
+    }
+    char* GetLead(LogLevel l, const char *szFile, int nLine, const char *szFunc) {
+        if (l < TRACE || l > FATAL) {
+            sprintf(szLead, "[?????] ");
+            return szLead;
+        }
+        const char *szLevels[] = {"TRACE", "INFO", "WARN", "ERROR", "FATAL"};
+        if (bPrintTimeStamp) {
+            time_t t = time(NULL);
+            struct tm *ptm = localtime(&t);
+            sprintf(szLead, "[%-5s][%02d:%02d:%02d] ", 
+                szLevels[l], ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
+        } else {
+            sprintf(szLead, "[%-5s] ", szLevels[l]);
+        }
+        return szLead;
+    }
+    void EnterCriticalSection() {
+        mtx.lock();
+    }
+    void LeaveCriticalSection() {
+        mtx.unlock();
+    }
+private:
+    LogLevel level;
+    char szLead[80];
+    bool bPrintTimeStamp;
+    std::mutex mtx;
+};
+
+class LoggerFactory {
+public:
+    static Logger* CreateFileLogger(std::string strFilePath, 
+            LogLevel level = INFO, bool bPrintTimeStamp = true) {
+        return new FileLogger(strFilePath, level, bPrintTimeStamp);
+    }
+    static Logger* CreateConsoleLogger(LogLevel level = INFO, 
+            bool bPrintTimeStamp = true) {
+        return new ConsoleLogger(level, bPrintTimeStamp);
+    }
+    static Logger* CreateUdpLogger(char *szHost, unsigned uPort, LogLevel level = INFO, 
+            bool bPrintTimeStamp = true) {
+        return new UdpLogger(szHost, uPort, level, bPrintTimeStamp);
+    }
+private:
+    LoggerFactory() {}
+
+    class FileLogger : public Logger {
+    public:
+        FileLogger(std::string strFilePath, LogLevel level, bool bPrintTimeStamp) 
+        : Logger(level, bPrintTimeStamp) {
+            pFileOut = new std::ofstream();
+            pFileOut->open(strFilePath.c_str());
+        }
+        ~FileLogger() {
+            pFileOut->close();
+        }
+        std::ostream& GetStream() {
+            return *pFileOut;
+        }
+    private:
+        std::ofstream *pFileOut;
+    };
+
+    class ConsoleLogger : public Logger {
+    public:
+        ConsoleLogger(LogLevel level, bool bPrintTimeStamp) 
+        : Logger(level, bPrintTimeStamp) {}
+        std::ostream& GetStream() {
+            return std::cout;
+        }
+    };
+
+    class UdpLogger : public Logger {
+    private:
+        class UdpOstream : public std::ostream {
+        public:
+            UdpOstream(char *szHost, unsigned short uPort) : std::ostream(&sb), socket(INVALID_SOCKET){
+#ifdef _WIN32
+                WSADATA w;
+                if (WSAStartup(0x0101, &w) != 0) {
+                    fprintf(stderr, "WSAStartup() failed.\n");
+                    return;
+                }
+#endif
+                socket = ::socket(AF_INET, SOCK_DGRAM, 0);
+                if (socket == INVALID_SOCKET) {
+#ifdef _WIN32
+                    WSACleanup();
+#endif
+                    fprintf(stderr, "socket() failed.\n");
+                    return;
+                }
+#ifdef _WIN32
+                unsigned int b1, b2, b3, b4;
+                sscanf(szHost, "%u.%u.%u.%u", &b1, &b2, &b3, &b4);
+                struct in_addr addr = {(unsigned char)b1, (unsigned char)b2, (unsigned char)b3, (unsigned char)b4};
+#else
+                struct in_addr addr = {inet_addr(szHost)};
+#endif
+                struct sockaddr_in s = {AF_INET, htons(uPort), addr};
+                server = s;
+            }
+            ~UdpOstream() throw() {
+                if (socket == INVALID_SOCKET) {
+                    return;
+                }
+#ifdef _WIN32
+                closesocket(socket);
+                WSACleanup();
+#else
+                close(socket);
+#endif
+            }
+            void Flush() {
+                if (sendto(socket, sb.str().c_str(), (int)sb.str().length() + 1, 
+                        0, (struct sockaddr *)&server, (int)sizeof(sockaddr_in)) == -1) {
+                    fprintf(stderr, "sendto() failed.\n");
+                }
+                sb.str("");
+            }
+
+        private:
+            std::stringbuf sb;
+            SOCKET socket;
+            struct sockaddr_in server;
+        };
+    public:
+        UdpLogger(char *szHost, unsigned uPort, LogLevel level, bool bPrintTimeStamp) 
+        : Logger(level, bPrintTimeStamp), udpOut(szHost, (unsigned short)uPort) {}
+        UdpOstream& GetStream() {
+            return udpOut;
+        }
+        virtual void FlushStream() {
+            udpOut.Flush();
+        }
+    private:
+        UdpOstream udpOut;
+    };
+};
+
+class LogTransaction {
+public:
+    LogTransaction(Logger *pLogger, LogLevel level, const char *szFile, const int nLine, const char *szFunc) : pLogger(pLogger), level(level) {
+        if (!pLogger) {
+            std::cout << "[-----] ";
+            return;
+        }
+        if (!pLogger->ShouldLogFor(level)) {
+            return;
+        }
+        pLogger->EnterCriticalSection();
+        pLogger->GetStream() << pLogger->GetLead(level, szFile, nLine, szFunc);
+    }
+    ~LogTransaction() {
+        if (!pLogger) {
+            std::cout << std::endl;
+            return;
+        }
+        if (!pLogger->ShouldLogFor(level)) {
+            return;
+        }
+        pLogger->GetStream() << std::endl;
+        pLogger->FlushStream();
+        pLogger->LeaveCriticalSection();
+        if (level == FATAL) {
+            exit(1);
+        }
+    }
+    std::ostream& GetStream() {
+        if (!pLogger) {
+            return std::cout;
+        }
+        if (!pLogger->ShouldLogFor(level)) {
+            return ossNull;
+        }
+        return pLogger->GetStream();
+    }
+private:
+    Logger *pLogger;
+    LogLevel level;
+    std::ostringstream ossNull;
+};
+
+}
+
+extern simplelogger::Logger *logger;
+#define LOG(level) simplelogger::LogTransaction(logger, level, __FILE__, __LINE__, __FUNCTION__).GetStream()
@@ -1,12 +1,28 @@
 /*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ * This copyright notice applies to this header file only:
 *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
 */

 //---------------------------------------------------------------------------
@@ -26,19 +42,20 @@

 #include <chrono>
 #include <condition_variable>
-#include <fstream>
 #include <iomanip>
 #include <ios>
 #include <list>
 #include <sstream>
 #include <thread>
+#include <vector>

 #ifdef __cuda_cuda_h__
 inline bool check(CUresult e, int iLine, const char *szFile) {
  if (e != CUDA_SUCCESS) {
    const char *szErrName = NULL;
    cuGetErrorName(e, &szErrName);
-
+    std::cout << "CUDA driver API error " << szErrName << " at line " << iLine
+              << " in file " << szFile;
    return false;
  }
  return true;
@@ -48,6 +65,8 @@ inline bool check(CUresult e, int iLine, const char *szFile) {
 #ifdef __CUDA_RUNTIME_H__
 inline bool check(cudaError_t e, int iLine, const char *szFile) {
  if (e != cudaSuccess) {
+    std::cout << "CUDA runtime API error " << cudaGetErrorName(e) << " at line "
+              << iLine << " in file " << szFile;
    return false;
  }
  return true;
@@ -85,6 +104,8 @@ inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
      "NV_ENC_ERR_RESOURCE_NOT_MAPPED",
  };
  if (e != NV_ENC_SUCCESS) {
+    std::cout << "NVENC error " << aszErrName[e] << " at line " << iLine
+              << " in file " << szFile;
    return false;
  }
  return true;
@@ -96,7 +117,8 @@ inline bool check(HRESULT e, int iLine, const char *szFile) {
  if (e != S_OK) {
    std::stringstream stream;
    stream << std::hex << std::uppercase << e;
-
+    std::cout << "HRESULT error 0x" << stream.str() << " at line " << iLine
+              << " in file " << szFile;
    return false;
  }
  return true;
@@ -106,6 +128,8 @@ inline bool check(HRESULT e, int iLine, const char *szFile) {
 #if defined(__gl_h_) || defined(__GL_H__)
 inline bool check(GLenum e, int iLine, const char *szFile) {
  if (e != 0) {
+    std::cout << "GLenum error " << e << " at line " << iLine << " in file "
+              << szFile;
    return false;
  }
  return true;
@@ -114,12 +138,17 @@ inline bool check(GLenum e, int iLine, const char *szFile) {

 inline bool check(int e, int iLine, const char *szFile) {
  if (e < 0) {
+    std::cout << "General error " << e << " at line " << iLine << " in file "
+              << szFile;
    return false;
  }
  return true;
 }

 #define ck(call) check(call, __LINE__, __FILE__)
+#define MAKE_FOURCC(ch0, ch1, ch2, ch3)                         \
+  ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | \
+   ((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24))

 /**
 * @brief Wrapper class around std::thread
@@ -156,68 +185,72 @@ class NvThread {
 #define _stat64 stat64
 #endif

-/**
- * @brief Utility class to allocate buffer memory. Helps avoid I/O during the
- * encode/decode loop in case of performance tests.
- */
-class BufferedFileReader {
- public:
-  /**
-   * @brief Constructor function to allocate appropriate memory and copy file
-   * contents into it
-   */
-  BufferedFileReader(const char *szFileName, bool bPartial = false) {
-    struct _stat64 st;
+// /**
+//  * @brief Utility class to allocate buffer memory. Helps avoid I/O during the
+//  * encode/decode loop in case of performance tests.
+//  */
+// class BufferedFileReader {
+//  public:
+//   /**
+//    * @brief Constructor function to allocate appropriate memory and copy file
+//    * contents into it
+//    */
+//   BufferedFileReader(const char *szFileName, bool bPartial = false) {
+//     struct _stat64 st;

-    if (_stat64(szFileName, &st) != 0) {
-      return;
-    }
+//     if (_stat64(szFileName, &st) != 0) {
+//       return;
+//     }

-    nSize = st.st_size;
-    while (nSize) {
-      try {
-        pBuf = new uint8_t[(size_t)nSize];
-        if (nSize != st.st_size) {
-        }
-        break;
-      } catch (std::bad_alloc) {
-        if (!bPartial) {
-          return;
-        }
-        nSize = (uint32_t)(nSize * 0.9);
-      }
-    }
+//     nSize = st.st_size;
+//     while (nSize) {
+//       try {
+//         pBuf = new uint8_t[(size_t)nSize];
+//         if (nSize != st.st_size) {
+//           std::cout << "File is too large - only " << std::setprecision(4)
+//                     << 100.0 * nSize / st.st_size << "% is loaded";
+//         }
+//         break;
+//       } catch (std::bad_alloc) {
+//         if (!bPartial) {
+//           std::cout << "Failed to allocate memory in BufferedReader";
+//           return;
+//         }
+//         nSize = (uint32_t)(nSize * 0.9);
+//       }
+//     }

-    std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
-    if (!fpIn) {
-      return;
-    }
+//     std::ifstream fpIn(szFileName, std::ifstream::in |
+//     std::ifstream::binary); if (!fpIn) {
+//       std::cout << "Unable to open input file: " << szFileName;
+//       return;
+//     }

-    std::streamsize nRead =
-        fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
-    fpIn.close();
+//     std::streamsize nRead =
+//         fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
+//     fpIn.close();

-    assert(nRead == nSize);
-  }
-  ~BufferedFileReader() {
-    if (pBuf) {
-      delete[] pBuf;
-    }
-  }
-  bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
-    if (!pBuf) {
-      return false;
-    }
+//     assert(nRead == nSize);
+//   }
+//   ~BufferedFileReader() {
+//     if (pBuf) {
+//       delete[] pBuf;
+//     }
+//   }
+//   bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
+//     if (!pBuf) {
+//       return false;
+//     }

-    *ppBuf = pBuf;
-    *pnSize = nSize;
-    return true;
-  }
+//     *ppBuf = pBuf;
+//     *pnSize = nSize;
+//     return true;
+//   }

- private:
-  uint8_t *pBuf = NULL;
-  uint64_t nSize = 0;
-};
+//  private:
+//   uint8_t *pBuf = NULL;
+//   uint64_t nSize = 0;
+// };

 /**
 * @brief Template class to facilitate color space conversion
@@ -290,6 +323,60 @@ class YuvConverter {
  int nWidth, nHeight;
 };

+/**
+ * @brief Class for writing IVF format header for AV1 codec
+ */
+class IVFUtils {
+ public:
+  void WriteFileHeader(std::vector<uint8_t> &vPacket, uint32_t nFourCC,
+                       uint32_t nWidth, uint32_t nHeight,
+                       uint32_t nFrameRateNum, uint32_t nFrameRateDen,
+                       uint32_t nFrameCnt) {
+    char header[32];
+
+    header[0] = 'D';
+    header[1] = 'K';
+    header[2] = 'I';
+    header[3] = 'F';
+    mem_put_le16(header + 4, 0);               // version
+    mem_put_le16(header + 6, 32);              // header size
+    mem_put_le32(header + 8, nFourCC);         // fourcc
+    mem_put_le16(header + 12, nWidth);         // width
+    mem_put_le16(header + 14, nHeight);        // height
+    mem_put_le32(header + 16, nFrameRateNum);  // rate
+    mem_put_le32(header + 20, nFrameRateDen);  // scale
+    mem_put_le32(header + 24, nFrameCnt);      // length
+    mem_put_le32(header + 28, 0);              // unused
+
+    vPacket.insert(vPacket.end(), &header[0], &header[32]);
+  }
+
+  void WriteFrameHeader(std::vector<uint8_t> &vPacket, size_t nFrameSize,
+                        int64_t pts) {
+    char header[12];
+    mem_put_le32(header, (int)nFrameSize);
+    mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
+    mem_put_le32(header + 8, (int)(pts >> 32));
+
+    vPacket.insert(vPacket.end(), &header[0], &header[12]);
+  }
+
+ private:
+  static inline void mem_put_le32(void *vmem, int val) {
+    unsigned char *mem = (unsigned char *)vmem;
+    mem[0] = (unsigned char)((val >> 0) & 0xff);
+    mem[1] = (unsigned char)((val >> 8) & 0xff);
+    mem[2] = (unsigned char)((val >> 16) & 0xff);
+    mem[3] = (unsigned char)((val >> 24) & 0xff);
+  }
+
+  static inline void mem_put_le16(void *vmem, int val) {
+    unsigned char *mem = (unsigned char *)vmem;
+    mem[0] = (unsigned char)((val >> 0) & 0xff);
+    mem[1] = (unsigned char)((val >> 8) & 0xff);
+  }
+};
+
 /**
 * @brief Utility class to measure elapsed time in seconds between the block of
 * executed code
@@ -312,7 +399,7 @@ class StopWatch {
 template <typename T>
 class ConcurrentQueue {
 public:
-  ConcurrentQueue() {}
+  ConcurrentQueue() : maxSize(0) {}
  ConcurrentQueue(size_t size) : maxSize(size) {}
  ConcurrentQueue(const ConcurrentQueue &) = delete;
  ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
@@ -382,7 +469,7 @@ class ConcurrentQueue {

 private:
  bool full() {
-    if (m_List.size() == maxSize) return true;
+    if (maxSize > 0 && m_List.size() == maxSize) return true;
    return false;
  }

@@ -393,14 +480,14 @@ class ConcurrentQueue {
  size_t maxSize;
 };

-inline void CheckInputFile(const char *szInFilePath) {
-  std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
-  if (fpIn.fail()) {
-    std::ostringstream err;
-    err << "Unable to open input file: " << szInFilePath << std::endl;
-    throw std::invalid_argument(err.str());
-  }
-}
+// inline void CheckInputFile(const char *szInFilePath) {
+//   std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
+//   if (fpIn.fail()) {
+//     std::ostringstream err;
+//     err << "Unable to open input file: " << szInFilePath << std::endl;
+//     throw std::invalid_argument(err.str());
+//   }
+// }

 inline void ValidateResolution(int nWidth, int nHeight) {
  if (nWidth <= 0 || nHeight <= 0) {
@@ -1,12 +1,28 @@
 /*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ * This copyright notice applies to this header file only:
 *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
 */

 #include "NvDecoder.h"
@@ -16,23 +32,22 @@
 #include <cmath>
 #include <iostream>

-#include "nvcodec_api.h"
 #include "nvcuvid.h"

-#define START_TIMER auto start = std::chrono::steady_clock::now();
+#define START_TIMER auto start = std::chrono::high_resolution_clock::now();
+
 #define STOP_TIMER(print_message)                                              \
-  std::cout << print_message                                          \
-            << std::chrono::duration_cast<std::chrono::milliseconds>( \
-                   std::chrono::steady_clock::now() - start)          \
-                   .count()                                           \
-            << " ms " << std::endl;
+  int64_t elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>( \
+                            std::chrono::high_resolution_clock::now() - start) \
+                            .count();                                          \
+  std::cout << print_message << elapsedTime << " ms " << std::endl;

 #define CUDA_DRVAPI_CALL(call)                                      \
  do {                                                              \
    CUresult err__ = call;                                          \
    if (err__ != CUDA_SUCCESS) {                                    \
      const char *szErrName = NULL;                                 \
-      cuGetErrorName_ld(err__, &szErrName);                         \
+      cuGetErrorName(err__, &szErrName);                            \
      std::ostringstream errorLog;                                  \
      errorLog << "CUDA driver API error " << szErrName;            \
      throw NVDECException::makeNVDECException(                     \
@@ -164,7 +179,7 @@ int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) {
 * CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser)
 */
 int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
-  START_TIMER
+  // START_TIMER
  m_videoInfo.str("");
  m_videoInfo.clear();
  m_videoInfo << "Video Input Information" << std::endl
@@ -200,9 +215,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
  decodecaps.eChromaFormat = pVideoFormat->chroma_format;
  decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;

-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
-  NVDEC_API_CALL(cuvidGetDecoderCaps_ld(&decodecaps));
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));

  if (!decodecaps.bIsSupported) {
    NVDEC_THROW_ERROR("Codec not supported on this GPU",
@@ -243,7 +258,7 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
  }

  if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
-    // cuvidCreateDecoder_ld() has been called before, and now there's possible
+    // cuvidCreateDecoder() has been called before, and now there's possible
    // config change
    return ReconfigureDecoder(pVideoFormat);
  }
@@ -379,10 +394,10 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
                     "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
  m_videoInfo << std::endl;

-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
-  NVDEC_API_CALL(cuvidCreateDecoder_ld(&m_hDecoder, &videoDecodeCreateInfo));
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
-  STOP_TIMER("Session Initialization Time: ");
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+  // STOP_TIMER("Session Initialization Time: ");
  return nDecodeSurface;
 }

@@ -506,11 +521,11 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {

  reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;

-  START_TIMER
-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
-  NVDEC_API_CALL(cuvidReconfigureDecoder_ld(m_hDecoder, &reconfigParams));
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
-  STOP_TIMER("Session Reconfigure Time: ");
+  // START_TIMER
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
+  // STOP_TIMER("Session Reconfigure Time: ");

  return nDecodeSurface;
 }
@@ -539,9 +554,9 @@ int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
    pFrame = m_vpFrame.back();
    m_vpFrame.pop_back();
    if (m_bUseDeviceFrame) {
-      CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
-      CUDA_DRVAPI_CALL(cuMemFree_ld((CUdeviceptr)pFrame));
-      CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+      CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+      CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
+      CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
    } else {
      delete pFrame;
    }
@@ -559,9 +574,18 @@ int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
    return false;
  }
  m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
-  NVDEC_API_CALL(cuvidDecodePicture_ld(m_hDecoder, pPicParams));
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
+  if (m_bForce_zero_latency &&
+      ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) {
+    CUVIDPARSERDISPINFO dispInfo;
+    memset(&dispInfo, 0, sizeof(dispInfo));
+    dispInfo.picture_index = pPicParams->CurrPicIdx;
+    dispInfo.progressive_frame = !pPicParams->field_pic_flag;
+    dispInfo.top_field_first = pPicParams->bottom_field_flag ^ 1;
+    HandlePictureDisplay(&dispInfo);
+  }
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
  return 1;
 }

@@ -576,22 +600,87 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
  videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
  videoProcessingParameters.output_stream = m_cuvidStream;

+  if (m_bExtractSEIMessage) {
+    if (m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData) {
+      // Write SEI Message
+      uint8_t *seiBuffer =
+          (uint8_t *)(m_SEIMessagesDisplayOrder[pDispInfo->picture_index]
+                          .pSEIData);
+      uint32_t seiNumMessages =
+          m_SEIMessagesDisplayOrder[pDispInfo->picture_index].sei_message_count;
+      CUSEIMESSAGE *seiMessagesInfo =
+          m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage;
+      if (m_fpSEI) {
+        for (uint32_t i = 0; i < seiNumMessages; i++) {
+          if ((m_eCodec == cudaVideoCodec_H264) ||
+              (m_eCodec == cudaVideoCodec_H264_SVC) ||
+              (m_eCodec == cudaVideoCodec_H264_MVC) ||
+              (m_eCodec == cudaVideoCodec_HEVC) ||
+              (m_eCodec == cudaVideoCodec_MPEG2)) {
+            switch (seiMessagesInfo[i].sei_message_type) {
+              case SEI_TYPE_TIME_CODE:
+              case SEI_TYPE_TIME_CODE_H264: {
+                if (m_eCodec != cudaVideoCodec_MPEG2) {
+                  TIMECODE *timecode = (TIMECODE *)seiBuffer;
+                  fwrite(timecode, sizeof(TIMECODE), 1, m_fpSEI);
+                } else {
+                  TIMECODEMPEG2 *timecode = (TIMECODEMPEG2 *)seiBuffer;
+                  fwrite(timecode, sizeof(TIMECODEMPEG2), 1, m_fpSEI);
+                }
+              } break;
+              case SEI_TYPE_USER_DATA_REGISTERED:
+              case SEI_TYPE_USER_DATA_UNREGISTERED: {
+                fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1,
+                       m_fpSEI);
+              } break;
+              case SEI_TYPE_MASTERING_DISPLAY_COLOR_VOLUME: {
+                SEIMASTERINGDISPLAYINFO *masteringDisplayVolume =
+                    (SEIMASTERINGDISPLAYINFO *)seiBuffer;
+                fwrite(masteringDisplayVolume, sizeof(SEIMASTERINGDISPLAYINFO),
+                       1, m_fpSEI);
+              } break;
+              case SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO: {
+                SEICONTENTLIGHTLEVELINFO *contentLightLevelInfo =
+                    (SEICONTENTLIGHTLEVELINFO *)seiBuffer;
+                fwrite(contentLightLevelInfo, sizeof(SEICONTENTLIGHTLEVELINFO),
+                       1, m_fpSEI);
+              } break;
+              case SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS: {
+                SEIALTERNATIVETRANSFERCHARACTERISTICS *transferCharacteristics =
+                    (SEIALTERNATIVETRANSFERCHARACTERISTICS *)seiBuffer;
+                fwrite(transferCharacteristics,
+                       sizeof(SEIALTERNATIVETRANSFERCHARACTERISTICS), 1,
+                       m_fpSEI);
+              } break;
+            }
+          }
+          if (m_eCodec == cudaVideoCodec_AV1) {
+            fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1, m_fpSEI);
+          }
+          seiBuffer += seiMessagesInfo[i].sei_message_size;
+        }
+      }
+      free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData);
+      free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage);
+    }
+  }
+
  CUdeviceptr dpSrcFrame = 0;
  unsigned int nSrcPitch = 0;
-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
-  NVDEC_API_CALL(cuvidMapVideoFrame64_ld(m_hDecoder, pDispInfo->picture_index,
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
+  NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
                                    &dpSrcFrame, &nSrcPitch,
                                    &videoProcessingParameters));

  CUVIDGETDECODESTATUS DecodeStatus;
  memset(&DecodeStatus, 0, sizeof(DecodeStatus));
-  CUresult result = cuvidGetDecodeStatus_ld(
-      m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
+  CUresult result =
+      cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
  if (result == CUDA_SUCCESS &&
      (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
       DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
-    // printf("Decode Error occurred for picture %d\n",
-    //        m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
+    printf("Decode Error occurred for picture %d\n",
+           m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
  }

  uint8_t *pDecodedFrame = nullptr;
@@ -603,12 +692,11 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
      uint8_t *pFrame = NULL;
      if (m_bUseDeviceFrame) {
        if (m_bDeviceFramePitched) {
-          CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
+          CUDA_DRVAPI_CALL(cuMemAllocPitch(
              (CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
              m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
        } else {
-          CUDA_DRVAPI_CALL(
-              cuMemAlloc_ld((CUdeviceptr *)&pFrame, GetFrameSize()));
+          CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
        }
      } else {
        pFrame = new uint8_t[GetFrameSize()];
@@ -629,7 +717,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
  m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
  m.WidthInBytes = GetWidth() * m_nBPP;
  m.Height = m_nLumaHeight;
-  CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
+  CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));

  // Copy chroma plane
  // NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
@@ -639,7 +727,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
  m.dstDevice =
      (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
  m.Height = m_nChromaHeight;
-  CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
+  CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));

  if (m_nNumChromaPlanes == 2) {
    m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
@@ -647,36 +735,83 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
    m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
                                            m.dstPitch * m_nLumaHeight * 2);
    m.Height = m_nChromaHeight;
-    CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
+    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
  }
-  CUDA_DRVAPI_CALL(cuStreamSynchronize_ld(m_cuvidStream));
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+  CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));

  if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
    m_vTimestamp.resize(m_vpFrame.size());
  }
  m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;

-  NVDEC_API_CALL(cuvidUnmapVideoFrame64_ld(m_hDecoder, dpSrcFrame));
+  NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
+  return 1;
+}
+
+int NvDecoder::GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
+  uint32_t seiNumMessages = pSEIMessageInfo->sei_message_count;
+  CUSEIMESSAGE *seiMessagesInfo = pSEIMessageInfo->pSEIMessage;
+  size_t totalSEIBufferSize = 0;
+  if ((pSEIMessageInfo->picIdx < 0) ||
+      (pSEIMessageInfo->picIdx >= MAX_FRM_CNT)) {
+    printf("Invalid picture index (%d)\n", pSEIMessageInfo->picIdx);
+    return 0;
+  }
+  for (uint32_t i = 0; i < seiNumMessages; i++) {
+    totalSEIBufferSize += seiMessagesInfo[i].sei_message_size;
+  }
+  if (!m_pCurrSEIMessage) {
+    printf("Out of Memory, Allocation failed for m_pCurrSEIMessage\n");
+    return 0;
+  }
+  m_pCurrSEIMessage->pSEIData = malloc(totalSEIBufferSize);
+  if (!m_pCurrSEIMessage->pSEIData) {
+    printf("Out of Memory, Allocation failed for SEI Buffer\n");
+    return 0;
+  }
+  memcpy(m_pCurrSEIMessage->pSEIData, pSEIMessageInfo->pSEIData,
+         totalSEIBufferSize);
+  m_pCurrSEIMessage->pSEIMessage =
+      (CUSEIMESSAGE *)malloc(sizeof(CUSEIMESSAGE) * seiNumMessages);
+  if (!m_pCurrSEIMessage->pSEIMessage) {
+    free(m_pCurrSEIMessage->pSEIData);
+    m_pCurrSEIMessage->pSEIData = NULL;
+    return 0;
+  }
+  memcpy(m_pCurrSEIMessage->pSEIMessage, pSEIMessageInfo->pSEIMessage,
+         sizeof(CUSEIMESSAGE) * seiNumMessages);
+  m_pCurrSEIMessage->sei_message_count = pSEIMessageInfo->sei_message_count;
+  m_SEIMessagesDisplayOrder[pSEIMessageInfo->picIdx] = *m_pCurrSEIMessage;
  return 1;
 }

 NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
                     cudaVideoCodec eCodec, bool bLowLatency,
                     bool bDeviceFramePitched, const Rect *pCropRect,
-                     const Dim *pResizeDim, int maxWidth, int maxHeight,
-                     unsigned int clkRate)
+                     const Dim *pResizeDim, bool extract_user_SEI_Message,
+                     int maxWidth, int maxHeight, unsigned int clkRate,
+                     bool force_zero_latency)
    : m_cuContext(cuContext),
      m_bUseDeviceFrame(bUseDeviceFrame),
      m_eCodec(eCodec),
      m_bDeviceFramePitched(bDeviceFramePitched),
+      m_bExtractSEIMessage(extract_user_SEI_Message),
      m_nMaxWidth(maxWidth),
-      m_nMaxHeight(maxHeight) {
+      m_nMaxHeight(maxHeight),
+      m_bForce_zero_latency(force_zero_latency) {
  if (pCropRect) m_cropRect = *pCropRect;
  if (pResizeDim) m_resizeDim = *pResizeDim;

-  NVDEC_API_CALL(cuvidCtxLockCreate_ld(&m_ctxLock, cuContext));
+  NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));

+  ck(cuStreamCreate(&m_cuvidStream, CU_STREAM_DEFAULT));
+
+  if (m_bExtractSEIMessage) {
+    m_fpSEI = fopen("sei_message.txt", "wb");
+    m_pCurrSEIMessage = new CUVIDSEIMESSAGEINFO;
+    memset(&m_SEIMessagesDisplayOrder, 0, sizeof(m_SEIMessagesDisplayOrder));
+  }
  CUVIDPARSERPARAMS videoParserParameters = {};
  videoParserParameters.CodecType = eCodec;
  videoParserParameters.ulMaxNumDecodeSurfaces = 1;
@@ -685,36 +820,49 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
  videoParserParameters.pUserData = this;
  videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
  videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
-  videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
+  videoParserParameters.pfnDisplayPicture =
+      m_bForce_zero_latency ? NULL : HandlePictureDisplayProc;
  videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
-  NVDEC_API_CALL(cuvidCreateVideoParser_ld(&m_hParser, &videoParserParameters));
+  videoParserParameters.pfnGetSEIMsg =
+      m_bExtractSEIMessage ? HandleSEIMessagesProc : NULL;
+  NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
 }

 NvDecoder::~NvDecoder() {
-  START_TIMER
+  // START_TIMER
+
+  if (m_pCurrSEIMessage) {
+    delete m_pCurrSEIMessage;
+    m_pCurrSEIMessage = NULL;
+  }
+
+  if (m_fpSEI) {
+    fclose(m_fpSEI);
+    m_fpSEI = NULL;
+  }

  if (m_hParser) {
-    cuvidDestroyVideoParser_ld(m_hParser);
+    cuvidDestroyVideoParser(m_hParser);
  }
-  cuCtxPushCurrent_ld(m_cuContext);
+  cuCtxPushCurrent(m_cuContext);
  if (m_hDecoder) {
-    cuvidDestroyDecoder_ld(m_hDecoder);
+    cuvidDestroyDecoder(m_hDecoder);
  }

  std::lock_guard<std::mutex> lock(m_mtxVPFrame);

  for (uint8_t *pFrame : m_vpFrame) {
    if (m_bUseDeviceFrame) {
-      cuMemFree_ld((CUdeviceptr)pFrame);
+      cuMemFree((CUdeviceptr)pFrame);
    } else {
      delete[] pFrame;
    }
  }
-  cuCtxPopCurrent_ld(NULL);
+  cuCtxPopCurrent(NULL);

-  cuvidCtxLockDestroy_ld(m_ctxLock);
+  cuvidCtxLockDestroy(m_ctxLock);

-  STOP_TIMER("Session Deinitialization Time: ");
+  // STOP_TIMER("Session Deinitialization Time: ");
 }

 int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
@@ -729,11 +877,7 @@ int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
  if (!pData || nSize == 0) {
    packet.flags |= CUVID_PKT_ENDOFSTREAM;
  }
-  // NVDEC_API_CALL(cuvidParseVideoData_ld(m_hParser, &packet));
-  if (CUDA_SUCCESS != cuvidParseVideoData_ld(m_hParser, &packet)) {
-    return 0;
-  }
-  m_cuvidStream = 0;
+  NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));

  return m_nDecodedFrame;
 }
@@ -1,12 +1,28 @@
 /*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ * This copyright notice applies to this header file only:
 *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
 */

 #pragma once
@@ -16,14 +32,27 @@
 #include <string.h>

 #include <iostream>
+#include <map>
 #include <mutex>
 #include <sstream>
 #include <string>
 #include <vector>

-#include "Utils/NvCodecUtils.h"
+#include "NvCodecUtils.h"
 #include "nvcuvid.h"

+#define MAX_FRM_CNT 32
+
+typedef enum {
+  SEI_TYPE_TIME_CODE_H264 = 1,
+  SEI_TYPE_USER_DATA_REGISTERED = 4,
+  SEI_TYPE_USER_DATA_UNREGISTERED = 5,
+  SEI_TYPE_TIME_CODE = 136,
+  SEI_TYPE_MASTERING_DISPLAY_COLOR_VOLUME = 137,
+  SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO = 144,
+  SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147
+} SEI_H264_HEVC_MPEG2_PAYLOAD_TYPE;
+
 /**
 * @brief Exception class for error reporting from the decode API.
 */
@@ -95,7 +124,9 @@ class NvDecoder {
  NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec,
            bool bLowLatency = false, bool bDeviceFramePitched = false,
            const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
-            int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000);
+            bool extract_user_SEI_Message = false, int maxWidth = 0,
+            int maxHeight = 0, unsigned int clkRate = 1000,
+            bool force_zero_latency = false);
  ~NvDecoder();

  /**
@@ -284,6 +315,13 @@ class NvDecoder {
  // stop the timer
  double stopTimer() { return m_stDecode_time.Stop(); }

+ protected:
+  /**
+  *   @brief  This function gets called when a sequence is ready to be decoded.
+  The function also gets called when there is format change
+  */
+  virtual int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
+
 private:
  /**
   *   @brief  Callback function to be registered for getting a callback when
@@ -322,10 +360,13 @@ class NvDecoder {
  }

  /**
-  *   @brief  This function gets called when a sequence is ready to be decoded.
-  The function also gets called when there is format change
+   *   @brief  Callback function to be registered for getting a callback when
+   * all the unregistered user SEI Messages are parsed for a frame.
   */
-  int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
+  static int CUDAAPI
+  HandleSEIMessagesProc(void *pUserData, CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
+    return ((NvDecoder *)pUserData)->GetSEIMessage(pSEIMessageInfo);
+  }

  /**
   *   @brief  This function gets called when a picture is ready to be decoded.
@@ -344,6 +385,13 @@ class NvDecoder {
   * one operating points
   */
  int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
+
+  /**
+   *   @brief  This function gets called when all unregistered user SEI messages
+   * are parsed for a frame
+   */
+  int GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo);
+
  /**
   *   @brief  This function reconfigure decoder if there is a change in
   * sequence params.
@@ -374,7 +422,10 @@ class NvDecoder {
  // timestamps of decoded frames
  std::vector<int64_t> m_vTimestamp;
  int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
-  int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32];
+  int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[MAX_FRM_CNT];
+  CUVIDSEIMESSAGEINFO *m_pCurrSEIMessage = NULL;
+  CUVIDSEIMESSAGEINFO m_SEIMessagesDisplayOrder[MAX_FRM_CNT];
+  FILE *m_fpSEI = NULL;
  bool m_bEndDecodeDone = false;
  std::mutex m_mtxVPFrame;
  int m_nFrameAlloc = 0;
@@ -392,4 +443,11 @@ class NvDecoder {

  unsigned int m_nOperatingPoint = 0;
  bool m_bDispAllLayers = false;
+  // In H.264, there is an inherent display latency for video contents
+  // which do not have num_reorder_frames=0 in the VUI. This applies to
+  // All-Intra and IPPP sequences as well. If the user wants zero display
+  // latency for All-Intra and IPPP sequences, the below flag will enable
+  // the display callback immediately after the decode callback.
+  bool m_bForce_zero_latency = false;
+  bool m_bExtractSEIMessage = false;
 };
@@ -1,18 +1,32 @@
 /*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ * This copyright notice applies to this header file only:
 *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
 */

 #include "NvEncoder.h"

-#include "nvcodec_api.h"
-
 #ifndef _WIN32
 #include <cstring>
 static inline bool operator==(const GUID &guid1, const GUID &guid2) {
@@ -28,7 +42,8 @@ NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice,
                     uint32_t nWidth, uint32_t nHeight,
                     NV_ENC_BUFFER_FORMAT eBufferFormat,
                     uint32_t nExtraOutputDelay, bool bMotionEstimationOnly,
-                     bool bOutputInVideoMemory)
+                     bool bOutputInVideoMemory, bool bDX12Encode,
+                     bool bUseIVFContainer)
    : m_pDevice(pDevice),
      m_eDeviceType(eDeviceType),
      m_nWidth(nWidth),
@@ -38,6 +53,8 @@ NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice,
      m_eBufferFormat(eBufferFormat),
      m_bMotionEstimationOnly(bMotionEstimationOnly),
      m_bOutputInVideoMemory(bOutputInVideoMemory),
+      m_bIsDX12Encode(bDX12Encode),
+      m_bUseIVFContainer(bUseIVFContainer),
      m_nExtraOutputDelay(nExtraOutputDelay),
      m_hEncoder(nullptr) {
  LoadNvEncApi();
@@ -62,7 +79,7 @@ void NvEncoder::LoadNvEncApi() {
  uint32_t version = 0;
  uint32_t currentVersion =
      (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
-  NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion_ld(&version));
+  NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
  if (currentVersion > version) {
    NVENC_THROW_ERROR(
        "Current Driver Version does not support this NvEncodeAPI version, "
@@ -71,7 +88,7 @@ void NvEncoder::LoadNvEncApi() {
  }

  m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
-  NVENC_API_CALL(NvEncodeAPICreateInstance_ld(&m_nvenc));
+  NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
 }

 NvEncoder::~NvEncoder() { DestroyHWEncoder(); }
@@ -122,27 +139,21 @@ void NvEncoder::CreateDefaultEncoderParams(
  }
 #endif

-  NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
-                                       {NV_ENC_CONFIG_VER}};
-  m_nvenc.nvEncGetEncodePresetConfig(m_hEncoder, codecGuid, presetGuid,
-                                     &presetConfig);
-  memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
-         sizeof(NV_ENC_CONFIG));
-  pIntializeParams->encodeConfig->frameIntervalP = 1;
-  pIntializeParams->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
-
+  pIntializeParams->tuningInfo = tuningInfo;
  pIntializeParams->encodeConfig->rcParams.rateControlMode =
      NV_ENC_PARAMS_RC_CONSTQP;

-  if (!m_bMotionEstimationOnly) {
-    pIntializeParams->tuningInfo = tuningInfo;
-    NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
-                                         {NV_ENC_CONFIG_VER}};
+  // There are changes in the structure layout, therefore users are recommended
+  // to be careful while moving their application to the new header. Following
+  // initialization has changed for the same reason.
+  NV_ENC_PRESET_CONFIG presetConfig = {
+      NV_ENC_PRESET_CONFIG_VER, 0, {NV_ENC_CONFIG_VER}};
  m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, codecGuid, presetGuid,
                                       tuningInfo, &presetConfig);
  memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
         sizeof(NV_ENC_CONFIG));
-  } else {
+
+  if (m_bMotionEstimationOnly) {
    m_encodeConfig.version = NV_ENC_CONFIG_VER;
    m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
    m_encodeConfig.rcParams.constQP = {28, 31, 25};
@@ -157,12 +168,13 @@ void NvEncoder::CreateDefaultEncoderParams(
    pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod =
        pIntializeParams->encodeConfig->gopLength;
  } else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
+    pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.inputBitDepth =
        pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
-        .pixelBitDepthMinus8 =
+            .outputBitDepth =
            (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
             m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
-            ? 2
-            : 0;
+                ? NV_ENC_BIT_DEPTH_10
+                : NV_ENC_BIT_DEPTH_8;
    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
      pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
@@ -170,6 +182,22 @@ void NvEncoder::CreateDefaultEncoderParams(
    }
    pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod =
        pIntializeParams->encodeConfig->gopLength;
+  } else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
+    pIntializeParams->encodeConfig->encodeCodecConfig.av1Config.inputBitDepth =
+        (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT)
+            ? NV_ENC_BIT_DEPTH_10
+            : NV_ENC_BIT_DEPTH_8;
+    pIntializeParams->encodeConfig->encodeCodecConfig.av1Config
+        .chromaFormatIDC = 1;
+    pIntializeParams->encodeConfig->encodeCodecConfig.av1Config.idrPeriod =
+        pIntializeParams->encodeConfig->gopLength;
+    if (m_bOutputInVideoMemory) {
+      pIntializeParams->encodeConfig->frameIntervalP = 1;
+    }
+  }
+
+  if (m_bIsDX12Encode) {
+    pIntializeParams->bufferFormat = m_eBufferFormat;
  }

  return;
@@ -192,7 +220,8 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
  }

  if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID &&
-      pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID) {
+      pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID &&
+      pEncoderParams->encodeGUID != NV_ENC_CODEC_AV1_GUID) {
    NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM);
  }

@@ -204,6 +233,14 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
    }
  }

+  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
+    if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
+        m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
+      NVENC_THROW_ERROR("YUV444 format isn't supported by AV1 encoder",
+                        NV_ENC_ERR_INVALID_PARAM);
+    }
+  }
+
  // set other necessary params if not set yet
  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
    if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) &&
@@ -219,8 +256,9 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
         m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
            ? true
            : false;
-    if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig
-                                  .hevcConfig.pixelBitDepthMinus8 != 2) {
+    if (yuv10BitFormat &&
+        pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig
+                .inputBitDepth != NV_ENC_BIT_DEPTH_10) {
      NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
    }

@@ -232,6 +270,28 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
    }
  }

+  if (pEncoderParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
+    bool yuv10BitFormat =
+        (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) ? true : false;
+    if (yuv10BitFormat &&
+        pEncoderParams->encodeConfig->encodeCodecConfig.av1Config
+                .inputBitDepth != NV_ENC_BIT_DEPTH_10) {
+      NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if (pEncoderParams->encodeConfig->encodeCodecConfig.av1Config
+            .chromaFormatIDC != 1) {
+      NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
+    }
+
+    if (m_bOutputInVideoMemory &&
+        pEncoderParams->encodeConfig->frameIntervalP > 1) {
+      NVENC_THROW_ERROR(
+          "Alt Ref frames not supported for AV1 in case of OutputInVideoMemory",
+          NV_ENC_ERR_INVALID_PARAM);
+    }
+  }
+
  memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams));
  m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;

@@ -240,19 +300,31 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
           sizeof(m_encodeConfig));
    m_encodeConfig.version = NV_ENC_CONFIG_VER;
  } else {
-    NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
-                                         {NV_ENC_CONFIG_VER}};
+    // There are changes in the structure layout, therefore users are
+    // recommended to be careful while moving their application to the new
+    // header. Following initialization has changed for the same reason.
+    NV_ENC_PRESET_CONFIG presetConfig = {
+        NV_ENC_PRESET_CONFIG_VER, 0, {NV_ENC_CONFIG_VER}};
    if (!m_bMotionEstimationOnly) {
      m_nvenc.nvEncGetEncodePresetConfigEx(
          m_hEncoder, pEncoderParams->encodeGUID, pEncoderParams->presetGUID,
          pEncoderParams->tuningInfo, &presetConfig);
      memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+      if (m_bOutputInVideoMemory &&
+          pEncoderParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
+        m_encodeConfig.frameIntervalP = 1;
+      }
    } else {
      m_encodeConfig.version = NV_ENC_CONFIG_VER;
      m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
      m_encodeConfig.rcParams.constQP = {28, 31, 25};
    }
  }
+
+  if (((uint32_t)m_encodeConfig.frameIntervalP) > m_encodeConfig.gopLength) {
+    m_encodeConfig.frameIntervalP = m_encodeConfig.gopLength;
+  }
+
  m_initializeParams.encodeConfig = &m_encodeConfig;

  NVENC_API_CALL(
@@ -268,7 +340,6 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
                     m_encodeConfig.rcParams.lookaheadDepth +
                     m_nExtraOutputDelay;
  m_nOutputDelay = m_nEncoderBuffer - 1;
-  m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);

  if (!m_bOutputInVideoMemory) {
    m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr);
@@ -277,12 +348,16 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
 #if defined(_WIN32)
  for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
    m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (!m_bIsDX12Encode) {
      NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
      eventParams.completionEvent = m_vpCompletionEvent[i];
      m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams);
    }
+  }
 #endif

+  m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
+
  if (m_bMotionEstimationOnly) {
    m_vMappedRefBuffers.resize(m_nEncoderBuffer, nullptr);

@@ -290,7 +365,7 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
      InitializeMVOutputBuffer();
    }
  } else {
-    if (!m_bOutputInVideoMemory) {
+    if (!m_bOutputInVideoMemory && !m_bIsDX12Encode) {
      m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr);
      InitializeBitstreamBuffer();
    }
@@ -317,9 +392,11 @@ void NvEncoder::DestroyHWEncoder() {
 #if defined(_WIN32)
  for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
    if (m_vpCompletionEvent[i]) {
+      if (!m_bIsDX12Encode) {
        NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
        eventParams.completionEvent = m_vpCompletionEvent[i];
        m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams);
+      }
      CloseHandle(m_vpCompletionEvent[i]);
    }
  }
@@ -329,7 +406,7 @@ void NvEncoder::DestroyHWEncoder() {
  if (m_bMotionEstimationOnly) {
    DestroyMVOutputBuffer();
  } else {
-    DestroyBitstreamBuffer();
+    if (!m_bIsDX12Encode) DestroyBitstreamBuffer();
  }

  m_nvenc.nvEncDestroyEncoder(m_hEncoder);
@@ -444,6 +521,7 @@ NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer,
  picParams.bufferFmt = GetPixelFormat();
  picParams.inputWidth = GetEncodeWidth();
  picParams.inputHeight = GetEncodeHeight();
+  picParams.frameIdx = m_iToSend;
  picParams.outputBitstream = outputBuffer;
  picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
  NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams);
@@ -488,8 +566,25 @@ void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR> &vOutputBuffer,
      vPacket.push_back(std::vector<uint8_t>());
    }
    vPacket[i].clear();
+
+    if ((m_initializeParams.encodeGUID == NV_ENC_CODEC_AV1_GUID) &&
+        (m_bUseIVFContainer)) {
+      if (m_bWriteIVFFileHeader) {
+        m_IVFUtils.WriteFileHeader(vPacket[i], MAKE_FOURCC('A', 'V', '0', '1'),
+                                   m_initializeParams.encodeWidth,
+                                   m_initializeParams.encodeHeight,
+                                   m_initializeParams.frameRateNum,
+                                   m_initializeParams.frameRateDen, 0xFFFF);
+        m_bWriteIVFFileHeader = false;
+      }
+
+      m_IVFUtils.WriteFrameHeader(vPacket[i],
+                                  lockBitstreamData.bitstreamSizeInBytes,
+                                  lockBitstreamData.outputTimeStamp);
+    }
    vPacket[i].insert(vPacket[i].end(), &pData[0],
                      &pData[lockBitstreamData.bitstreamSizeInBytes]);
+
    i++;

    NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream(
@@ -533,7 +628,8 @@ bool NvEncoder::Reconfigure(
 NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(
    void *pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
    int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
-    NV_ENC_BUFFER_USAGE bufferUsage) {
+    NV_ENC_BUFFER_USAGE bufferUsage,
+    NV_ENC_FENCE_POINT_D3D12 *pInputFencePoint) {
  NV_ENC_REGISTER_RESOURCE registerResource = {NV_ENC_REGISTER_RESOURCE_VER};
  registerResource.resourceType = eResourceType;
  registerResource.resourceToRegister = pBuffer;
@@ -542,6 +638,7 @@ NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(
  registerResource.pitch = pitch;
  registerResource.bufferFormat = bufferFormat;
  registerResource.bufferUsage = bufferUsage;
+  registerResource.pInputFencePoint = pInputFencePoint;
  NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, &registerResource));

  return registerResource.registeredResource;
@@ -1,12 +1,28 @@
 /*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ * This copyright notice applies to this header file only:
 *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
 */

 #pragma once
@@ -20,6 +36,8 @@
 #include <string>
 #include <vector>

+#include "NvCodecUtils.h"
+#include "log.h"
 #include "nvEncodeAPI.h"

 /**
@@ -45,12 +63,74 @@ class NVENCException : public std::exception {
  NVENCSTATUS m_errorCode;
 };

+inline const char* GetNvStatusString(NVENCSTATUS eStatus) {
+  switch (eStatus) {
+    case NV_ENC_SUCCESS:
+      return "NV_ENC_SUCCESS";
+    case NV_ENC_ERR_NO_ENCODE_DEVICE:
+      return "NV_ENC_ERR_NO_ENCODE_DEVICE";
+    case NV_ENC_ERR_UNSUPPORTED_DEVICE:
+      return "NV_ENC_ERR_UNSUPPORTED_DEVICE";
+    case NV_ENC_ERR_INVALID_ENCODERDEVICE:
+      return "NV_ENC_ERR_INVALID_ENCODERDEVICE";
+    case NV_ENC_ERR_INVALID_DEVICE:
+      return "NV_ENC_ERR_INVALID_DEVICE";
+    case NV_ENC_ERR_DEVICE_NOT_EXIST:
+      return "NV_ENC_ERR_DEVICE_NOT_EXIST";
+    case NV_ENC_ERR_INVALID_PTR:
+      return "NV_ENC_ERR_INVALID_PTR";
+    case NV_ENC_ERR_INVALID_EVENT:
+      return "NV_ENC_ERR_INVALID_EVENT";
+    case NV_ENC_ERR_INVALID_PARAM:
+      return "NV_ENC_ERR_INVALID_PARAM";
+    case NV_ENC_ERR_INVALID_CALL:
+      return "NV_ENC_ERR_INVALID_CALL";
+    case NV_ENC_ERR_OUT_OF_MEMORY:
+      return "NV_ENC_ERR_OUT_OF_MEMORY";
+    case NV_ENC_ERR_ENCODER_NOT_INITIALIZED:
+      return "NV_ENC_ERR_ENCODER_NOT_INITIALIZED";
+    case NV_ENC_ERR_UNSUPPORTED_PARAM:
+      return "NV_ENC_ERR_UNSUPPORTED_PARAM";
+    case NV_ENC_ERR_LOCK_BUSY:
+      return "NV_ENC_ERR_LOCK_BUSY";
+    case NV_ENC_ERR_NOT_ENOUGH_BUFFER:
+      return "NV_ENC_ERR_NOT_ENOUGH_BUFFER";
+    case NV_ENC_ERR_INVALID_VERSION:
+      return "NV_ENC_ERR_INVALID_VERSION";
+    case NV_ENC_ERR_MAP_FAILED:
+      return "NV_ENC_ERR_MAP_FAILED";
+    case NV_ENC_ERR_NEED_MORE_INPUT:
+      return "NV_ENC_ERR_NEED_MORE_INPUT";
+    case NV_ENC_ERR_ENCODER_BUSY:
+      return "NV_ENC_ERR_ENCODER_BUSY";
+    case NV_ENC_ERR_EVENT_NOT_REGISTERD:
+      return "NV_ENC_ERR_EVENT_NOT_REGISTERD";
+    case NV_ENC_ERR_GENERIC:
+      return "NV_ENC_ERR_GENERIC";
+    case NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY:
+      return "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY";
+    case NV_ENC_ERR_UNIMPLEMENTED:
+      return "NV_ENC_ERR_UNIMPLEMENTED";
+    case NV_ENC_ERR_RESOURCE_REGISTER_FAILED:
+      return "NV_ENC_ERR_RESOURCE_REGISTER_FAILED";
+    case NV_ENC_ERR_RESOURCE_NOT_REGISTERED:
+      return "NV_ENC_ERR_RESOURCE_NOT_REGISTERED";
+    case NV_ENC_ERR_RESOURCE_NOT_MAPPED:
+      return "NV_ENC_ERR_RESOURCE_NOT_MAPPED";
+    case NV_ENC_ERR_NEED_MORE_OUTPUT:
+      return "NV_ENC_ERR_NEED_MORE_OUTPUT";
+    default:
+      return "NVENC_UNKNOWN_ERROR";
+  }
+}
+
 inline NVENCException NVENCException::makeNVENCException(
    const std::string& errorStr, const NVENCSTATUS errorCode,
    const std::string& functionName, const std::string& fileName, int lineNo) {
  std::ostringstream errorLog;
  errorLog << functionName << " : " << errorStr << " at " << fileName << ":"
           << lineNo << std::endl;
+  LOG_ERROR("{} failed due to {}", functionName, GetNvStatusString(errorCode));
  NVENCException exception(errorLog.str(), errorCode);
  return exception;
 }
@@ -92,7 +172,7 @@ class NvEncoder {
   *  Application must call this function to initialize the encoder, before
   *  starting to encode any frames.
   */
-  void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
+  virtual void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);

  /**
   *  @brief  This function is used to destroy the encoder session.
@@ -100,7 +180,7 @@ class NvEncoder {
   *  clean up any allocated resources. The application must call EndEncode()
   *  function to get any queued encoded frames before calling DestroyEncoder().
   */
-  void DestroyEncoder();
+  virtual void DestroyEncoder();

  /**
   *  @brief  This function is used to reconfigure an existing encoder session.
@@ -124,7 +204,7 @@ class NvEncoder {
   *  data, which has been copied to an input buffer obtained from the
   *  GetNextInputFrame() function.
   */
-  void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket,
+  virtual void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket,
                           NV_ENC_PIC_PARAMS* pPicParams = nullptr);

  /**
@@ -134,7 +214,7 @@ class NvEncoder {
   *  from the encoder. The application must call this function before
   * destroying an encoder session.
   */
-  void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
+  virtual void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);

  /**
   *  @brief  This function is used to query hardware encoder capabilities.
@@ -272,6 +352,13 @@ class NvEncoder {
   */
  uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }

+  /*
+   * @brief This function returns initializeParams(width, height, fps etc).
+   */
+  NV_ENC_INITIALIZE_PARAMS GetinitializeParams() const {
+    return m_initializeParams;
+  }
+
 protected:
  /**
   *  @brief  NvEncoder class constructor.
@@ -280,7 +367,8 @@ class NvEncoder {
  NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth,
            uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
            uint32_t nOutputDelay, bool bMotionEstimationOnly,
-            bool bOutputInVideoMemory = false);
+            bool bOutputInVideoMemory = false, bool bDX12Encode = false,
+            bool bUseIVFContainer = true);

  /**
   *  @brief This function is used to check if hardware encoder is properly
@@ -314,7 +402,8 @@ class NvEncoder {
  NV_ENC_REGISTERED_PTR RegisterResource(
      void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
      int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
-      NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE);
+      NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE,
+      NV_ENC_FENCE_POINT_D3D12* pInputFencePoint = NULL);

  /**
   *  @brief This function returns maximum width used to open the encoder
@@ -448,8 +537,10 @@ class NvEncoder {
 protected:
  bool m_bMotionEstimationOnly = false;
  bool m_bOutputInVideoMemory = false;
+  bool m_bIsDX12Encode = false;
  void* m_hEncoder = nullptr;
  NV_ENCODE_API_FUNCTION_LIST m_nvenc;
+  NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
  std::vector<NvEncInputFrame> m_vInputFrames;
  std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
  std::vector<NvEncInputFrame> m_vReferenceFrames;
@@ -462,6 +553,9 @@ class NvEncoder {
  int32_t m_iGot = 0;
  int32_t m_nEncoderBuffer = 0;
  int32_t m_nOutputDelay = 0;
+  IVFUtils m_IVFUtils;
+  bool m_bWriteIVFFileHeader = true;
+  bool m_bUseIVFContainer = true;

 private:
  uint32_t m_nWidth;
@@ -469,7 +563,6 @@ class NvEncoder {
  NV_ENC_BUFFER_FORMAT m_eBufferFormat;
  void* m_pDevice;
  NV_ENC_DEVICE_TYPE m_eDeviceType;
-  NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
  NV_ENC_CONFIG m_encodeConfig = {};
  bool m_bEncoderInitialized = false;
  uint32_t m_nExtraOutputDelay =
@@ -1,13 +1,29 @@
 /*
-* Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2024 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */

 #pragma once
 #include <vector>
@@ -18,7 +34,8 @@
 #include <iterator>
 #include <cstring>
 #include <functional>
-#include "../Utils/Logger.h"
+#include "Logger.h"
+#include "nvEncodeAPI.h"

 extern simplelogger::Logger *logger;

@@ -81,6 +98,11 @@ public:
    virtual bool IsCodecHEVC() {
        return GetEncodeGUID() == NV_ENC_CODEC_HEVC_GUID;
    }
+
+    virtual bool IsCodecAV1() {
+        return GetEncodeGUID() == NV_ENC_CODEC_AV1_GUID;
+    }
+
    std::string GetHelpMessage(bool bMeOnly = false, bool bUnbuffered = false, bool bHide444 = false, bool bOutputInVidMem = false)
    {
        std::ostringstream oss;
@@ -103,7 +125,8 @@ public:
        }
        else
        {
-            oss << "; HEVC: " << szHevcProfileNames << std::endl;
+            oss << "; HEVC: " << szHevcProfileNames;
+            oss << "; AV1: " << szAV1ProfileNames << std::endl;
        }

        if (!bMeOnly)
@@ -117,7 +140,7 @@ public:

        if (!bHide444 && !bLowLatency)
        {
-            oss << "-444         (Only for RGB input) YUV444 encode" << std::endl;
+            oss << "-444         (Only for RGB input) YUV444 encode. Not valid for AV1 Codec" << std::endl;
        }
        if (bMeOnly) return oss.str();
        oss << "-fps         Frame rate" << std::endl;
@@ -132,6 +155,8 @@ public:
            oss << "-rc          Rate control mode: " << szRcModeNames << std::endl
                << "-gop         Length of GOP (Group of Pictures)" << std::endl
                << "-bitrate     Average bit rate, can be in unit of 1, K, M" << std::endl
+                << "Note:        Fps or Average bit rate values for each session can be specified in the form of v1,v1,v3 (no space) for AppTransOneToN" << std::endl 
+                << "             If the number of 'bitrate' or 'fps' values specified are less than the number of sessions, then the last specified value will be considered for the remaining sessions" << std::endl
                << "-maxbitrate  Max bit rate, can be in unit of 1, K, M" << std::endl
                << "-vbvbufsize  VBV buffer size in bits, can be in unit of 1, K, M" << std::endl
                << "-vbvinit     VBV initial delay in bits, can be in unit of 1, K, M" << std::endl
@@ -174,8 +199,11 @@ public:
        }
        os
            << std::endl << "\tprofile      : " << ConvertValueToString(vProfile, szProfileNames, pParams->encodeConfig->profileGUID)
-            << std::endl << "\tchroma       : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC : pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC)
-            << std::endl << "\tbitdepth     : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? 0 : pParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8) + 8
+            << std::endl << "\tchroma       : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC :
+                                                   (pParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) ? pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC :
+                                                   pParams->encodeConfig->encodeCodecConfig.av1Config.chromaFormatIDC)
+            << std::endl << "\tbitdepth     : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.inputBitDepth : (pParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) ?
+                                                     pParams->encodeConfig->encodeCodecConfig.hevcConfig.inputBitDepth : pParams->encodeConfig->encodeCodecConfig.av1Config.inputBitDepth)
            << std::endl << "\trc           : " << ConvertValueToString(vRcMode, szRcModeNames, pParams->encodeConfig->rcParams.rateControlMode)
            ;
            if (pParams->encodeConfig->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CONSTQP) {
@@ -213,9 +241,16 @@ public:
     * provided to the application and sets the fields from NV_ENC_INITIALIZE_PARAMS
     * based on the supplied values.
     */
+
+    virtual void setTransOneToN(bool isTransOneToN)
+    {
+        bTransOneToN = isTransOneToN;
+    }
+
    virtual void SetInitParams(NV_ENC_INITIALIZE_PARAMS *pParams, NV_ENC_BUFFER_FORMAT eBufferFormat)
    {
        NV_ENC_CONFIG &config = *pParams->encodeConfig;
+        int nGOPOption = 0, nBFramesOption = 0;
        for (unsigned i = 0; i < tokens.size(); i++)
        {
            if (
@@ -224,11 +259,12 @@ public:
                tokens[i] == "-tuninginfo" && ++i ||
                tokens[i] == "-multipass" && ++i != tokens.size() && ParseString("-multipass", tokens[i], vMultiPass, szMultipass, &config.rcParams.multiPass) ||
                tokens[i] == "-profile"    && ++i != tokens.size() && (IsCodecH264() ? 
-                    ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) : 
-                    ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID)) ||
+                    ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) : IsCodecHEVC() ?
+                    ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID) :
+                    ParseString("-profile", tokens[i], vAV1Profile, szAV1ProfileNames, &config.profileGUID)) ||
                tokens[i] == "-rc"         && ++i != tokens.size() && ParseString("-rc",          tokens[i], vRcMode, szRcModeNames, &config.rcParams.rateControlMode)                    ||
                tokens[i] == "-fps"        && ++i != tokens.size() && ParseInt("-fps",            tokens[i], &pParams->frameRateNum)                                                      ||
-                tokens[i] == "-bf"         && ++i != tokens.size() && ParseInt("-bf",             tokens[i], &config.frameIntervalP) && ++config.frameIntervalP                           ||
+                tokens[i] == "-bf"         && ++i != tokens.size() && ParseInt("-bf",             tokens[i], &config.frameIntervalP) && ++config.frameIntervalP && ++nBFramesOption       ||
                tokens[i] == "-bitrate"    && ++i != tokens.size() && ParseBitRate("-bitrate",    tokens[i], &config.rcParams.averageBitRate)                                             ||
                tokens[i] == "-maxbitrate" && ++i != tokens.size() && ParseBitRate("-maxbitrate", tokens[i], &config.rcParams.maxBitRate)                                                 ||
                tokens[i] == "-vbvbufsize" && ++i != tokens.size() && ParseBitRate("-vbvbufsize", tokens[i], &config.rcParams.vbvBufferSize)                                              ||
@@ -257,14 +293,19 @@ public:

            if (tokens[i] == "-gop" && ++i != tokens.size() && ParseInt("-gop", tokens[i], &config.gopLength))
            {
+                nGOPOption = 1;
                if (IsCodecH264()) 
                {
                    config.encodeCodecConfig.h264Config.idrPeriod = config.gopLength;
                }
-                else 
+                else if (IsCodecHEVC())
                {
                    config.encodeCodecConfig.hevcConfig.idrPeriod = config.gopLength;
                }
+                else
+                {
+                    config.encodeCodecConfig.av1Config.idrPeriod = config.gopLength;
+                }
                continue;
            }

@@ -273,10 +314,17 @@ public:
                if (IsCodecH264()) 
                {
                    config.encodeCodecConfig.h264Config.chromaFormatIDC = 3;
-                } else 
+                } 
+                else if (IsCodecHEVC())
                {
                    config.encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
                }
+                else
+                {
+                    std::ostringstream errmessage;
+                    errmessage << "Incorrect Parameter: YUV444 Input not supported with AV1 Codec" << std::endl;
+                    throw std::invalid_argument(errmessage.str());
+                }
                continue;
            }

@@ -292,10 +340,27 @@ public:
        {
            if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
            {
-                config.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2;
+                config.encodeCodecConfig.hevcConfig.inputBitDepth = NV_ENC_BIT_DEPTH_10;
+                config.encodeCodecConfig.hevcConfig.outputBitDepth = NV_ENC_BIT_DEPTH_10;
            }
        }

+        if (IsCodecAV1())
+        {
+            if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT)
+            {
+                config.encodeCodecConfig.av1Config.inputBitDepth = NV_ENC_BIT_DEPTH_10;
+                config.encodeCodecConfig.av1Config.outputBitDepth = NV_ENC_BIT_DEPTH_10;
+            }
+        }
+
+        if (nGOPOption && nBFramesOption && (config.gopLength < ((uint32_t)config.frameIntervalP)))
+        {
+            std::ostringstream errmessage;
+            errmessage << "gopLength (" << config.gopLength << ") must be greater or equal to frameIntervalP (number of B frames + 1) (" << config.frameIntervalP << ")\n";
+            throw std::invalid_argument(errmessage.str());
+        }
+
        funcInit(pParams);
        LOG(INFO) << NvEncoderInitParam().MainParamToString(pParams);
        LOG(TRACE) << NvEncoderInitParam().FullParamToString(pParams);
@@ -327,6 +392,37 @@ private:
        return split(strValueNames, ' ')[it - vValue.begin()];
    }
    bool ParseBitRate(const std::string &strName, const std::string &strValue, unsigned *pBitRate) {
+        if(bTransOneToN)
+        {
+            std::vector<std::string> oneToNBitrate = split(strValue, ',');
+            std::string currBitrate;
+            if ((bitrateCnt + 1) > oneToNBitrate.size())
+            {
+                currBitrate = oneToNBitrate[oneToNBitrate.size() - 1];
+            }
+            else
+            {
+                currBitrate = oneToNBitrate[bitrateCnt];
+                bitrateCnt++;
+            }
+
+            try {
+                size_t l;
+                double r = std::stod(currBitrate, &l);
+                char c = currBitrate[l];
+                if (c != 0 && c != 'k' && c != 'm') {
+                    LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
+                }
+                *pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
+            }
+            catch (std::invalid_argument) {
+                return false;
+            }
+            return true;
+        }
+
+        else
+        {
            try {
                size_t l;
                double r = std::stod(strValue, &l);
@@ -335,21 +431,50 @@ private:
                    LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
                }
                *pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
-        } catch (std::invalid_argument) {
+            }
+            catch (std::invalid_argument) {
                return false;
            }
            return true;
        }
+    }
    template<typename T>
    bool ParseInt(const std::string &strName, const std::string &strValue, T *pInt) {
+        if (bTransOneToN)
+        {
+            std::vector<std::string> oneToNFps = split(strValue, ',');
+            std::string currFps;
+            if ((fpsCnt + 1) > oneToNFps.size())
+            {
+                currFps = oneToNFps[oneToNFps.size() - 1];
+            }
+            else
+            {
+                currFps = oneToNFps[fpsCnt];
+                fpsCnt++;
+            }
+
            try {
-            *pInt = std::stoi(strValue);
-        } catch (std::invalid_argument) {
+                *pInt = std::stoi(currFps);
+            }
+            catch (std::invalid_argument) {
                LOG(ERROR) << strName << " need a value of positive number";
                return false;
            }
            return true;
        }
+        else
+        {
+            try {
+                *pInt = std::stoi(strValue);
+            }
+            catch (std::invalid_argument) {
+                LOG(ERROR) << strName << " need a value of positive number";
+                return false;
+            }
+            return true;
+        }
+    }
    bool ParseQp(const std::string &strName, const std::string &strValue, NV_ENC_QP *pQp) {
        std::vector<std::string> vQp = split(strValue, ',');
        try {
@@ -385,11 +510,15 @@ private:
    GUID guidPreset = NV_ENC_PRESET_P3_GUID;
    NV_ENC_TUNING_INFO m_TuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
    bool bLowLatency = false;
+    uint32_t bitrateCnt = 0;
+    uint32_t fpsCnt = 0;
+    bool bTransOneToN = 0;
    
-    const char *szCodecNames = "h264 hevc";
+    const char *szCodecNames = "h264 hevc av1";
    std::vector<GUID> vCodec = std::vector<GUID> {
        NV_ENC_CODEC_H264_GUID,
-        NV_ENC_CODEC_HEVC_GUID
+        NV_ENC_CODEC_HEVC_GUID,
+        NV_ENC_CODEC_AV1_GUID
    };
    
    const char *szChromaNames = "yuv420 yuv444";
@@ -422,9 +551,15 @@ private:
        NV_ENC_HEVC_PROFILE_MAIN10_GUID,
        NV_ENC_HEVC_PROFILE_FREXT_GUID,
    };
+    const char *szAV1ProfileNames = "main";
+    std::vector<GUID> vAV1Profile = std::vector<GUID>{
+        NV_ENC_AV1_PROFILE_MAIN_GUID,
+    };
+
    const char *szProfileNames = "(default) auto baseline(h264) main(h264) high(h264) high444(h264)"
        " stereo(h264) progressiv_high(h264) constrained_high(h264)"
-        " main(hevc) main10(hevc) frext(hevc)";
+        " main(hevc) main10(hevc) frext(hevc)"
+        " main(av1) high(av1)";
    std::vector<GUID> vProfile = std::vector<GUID> {
        GUID{},
        NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID,
@@ -438,15 +573,17 @@ private:
        NV_ENC_HEVC_PROFILE_MAIN_GUID,
        NV_ENC_HEVC_PROFILE_MAIN10_GUID,
        NV_ENC_HEVC_PROFILE_FREXT_GUID,
+        NV_ENC_AV1_PROFILE_MAIN_GUID,
    };

    const char *szLowLatencyTuningInfoNames = "lowlatency ultralowlatency";
-    const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless";
+    const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless uhq";
    std::vector<NV_ENC_TUNING_INFO> vTuningInfo = std::vector<NV_ENC_TUNING_INFO>{
        NV_ENC_TUNING_INFO_HIGH_QUALITY,
        NV_ENC_TUNING_INFO_LOW_LATENCY,
        NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
-        NV_ENC_TUNING_INFO_LOSSLESS
+        NV_ENC_TUNING_INFO_LOSSLESS,
+        NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY
    };

    const char *szRcModeNames = "constqp vbr cbr";
@@ -611,7 +748,8 @@ public:
            << "    repeatSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.repeatSPSPPS << std::endl
            << "    enableIntraRefresh: " << pConfig->encodeCodecConfig.hevcConfig.enableIntraRefresh << std::endl
            << "    chromaFormatIDC: " << pConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC << std::endl
-            << "    pixelBitDepthMinus8: " << pConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 << std::endl
+            << "    inputBitDepth: " << pConfig->encodeCodecConfig.hevcConfig.inputBitDepth << std::endl
+            << "    outputBitDepth: " << pConfig->encodeCodecConfig.hevcConfig.outputBitDepth << std::endl
            << "    idrPeriod: " << pConfig->encodeCodecConfig.hevcConfig.idrPeriod << std::endl
            << "    intraRefreshPeriod: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshPeriod << std::endl
            << "    intraRefreshCnt: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshCnt << std::endl
@@ -638,7 +776,65 @@ public:
            << "        chromaSampleLocationBot: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationBot << std::endl
            << "        bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.bitstreamRestrictionFlag << std::endl
            << "    ltrTrustMode: " << pConfig->encodeCodecConfig.hevcConfig.ltrTrustMode << std::endl;
+        } else if (pInitializeParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
+            os
+                << "NV_ENC_CODEC_CONFIG (AV1):" << std::endl
+                << "    level: " << pConfig->encodeCodecConfig.av1Config.level << std::endl
+                << "    tier: " << pConfig->encodeCodecConfig.av1Config.tier << std::endl
+                << "    minPartSize: " << pConfig->encodeCodecConfig.av1Config.minPartSize << std::endl
+                << "    maxPartSize: " << pConfig->encodeCodecConfig.av1Config.maxPartSize << std::endl
+                << "    outputAnnexBFormat: " << pConfig->encodeCodecConfig.av1Config.outputAnnexBFormat << std::endl
+                << "    enableTimingInfo: " << pConfig->encodeCodecConfig.av1Config.enableTimingInfo << std::endl
+                << "    enableDecoderModelInfo: " << pConfig->encodeCodecConfig.av1Config.enableDecoderModelInfo << std::endl
+                << "    enableFrameIdNumbers: " << pConfig->encodeCodecConfig.av1Config.enableFrameIdNumbers << std::endl
+                << "    disableSeqHdr: " << pConfig->encodeCodecConfig.av1Config.disableSeqHdr << std::endl
+                << "    repeatSeqHdr: " << pConfig->encodeCodecConfig.av1Config.repeatSeqHdr << std::endl
+                << "    enableIntraRefresh: " << pConfig->encodeCodecConfig.av1Config.enableIntraRefresh << std::endl
+                << "    chromaFormatIDC: " << pConfig->encodeCodecConfig.av1Config.chromaFormatIDC << std::endl
+                << "    enableBitstreamPadding: " << pConfig->encodeCodecConfig.av1Config.enableBitstreamPadding << std::endl
+                << "    enableCustomTileConfig: " << pConfig->encodeCodecConfig.av1Config.enableCustomTileConfig << std::endl
+                << "    enableFilmGrainParams: " << pConfig->encodeCodecConfig.av1Config.enableFilmGrainParams << std::endl
+                << "    inputBitDepth: " << pConfig->encodeCodecConfig.av1Config.inputBitDepth << std::endl
+                << "    outputBitDepth: " << pConfig->encodeCodecConfig.av1Config.outputBitDepth << std::endl
+                << "    idrPeriod: " << pConfig->encodeCodecConfig.av1Config.idrPeriod << std::endl
+                << "    intraRefreshPeriod: " << pConfig->encodeCodecConfig.av1Config.intraRefreshPeriod << std::endl
+                << "    intraRefreshCnt: " << pConfig->encodeCodecConfig.av1Config.intraRefreshCnt << std::endl
+                << "    maxNumRefFramesInDPB: " << pConfig->encodeCodecConfig.av1Config.maxNumRefFramesInDPB << std::endl
+                << "    numTileColumns: " << pConfig->encodeCodecConfig.av1Config.numTileColumns << std::endl
+                << "    numTileRows: " << pConfig->encodeCodecConfig.av1Config.numTileRows << std::endl
+                << "    maxTemporalLayersMinus1: " << pConfig->encodeCodecConfig.av1Config.maxTemporalLayersMinus1 << std::endl
+                << "    colorPrimaries: " << pConfig->encodeCodecConfig.av1Config.colorPrimaries << std::endl
+                << "    transferCharacteristics: " << pConfig->encodeCodecConfig.av1Config.transferCharacteristics << std::endl
+                << "    matrixCoefficients: " << pConfig->encodeCodecConfig.av1Config.matrixCoefficients << std::endl
+                << "    colorRange: " << pConfig->encodeCodecConfig.av1Config.colorRange << std::endl
+                << "    chromaSamplePosition: " << pConfig->encodeCodecConfig.av1Config.chromaSamplePosition << std::endl
+                << "    useBFramesAsRef: " << pConfig->encodeCodecConfig.av1Config.useBFramesAsRef << std::endl
+                << "    numFwdRefs: " << pConfig->encodeCodecConfig.av1Config.numFwdRefs << std::endl
+                << "    numBwdRefs: " << pConfig->encodeCodecConfig.av1Config.numBwdRefs << std::endl;
+            if (pConfig->encodeCodecConfig.av1Config.filmGrainParams != NULL)
+            {
+                os
+                    << "    NV_ENC_FILM_GRAIN_PARAMS_AV1:" << std::endl
+                    << "        applyGrain: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->applyGrain << std::endl
+                    << "        chromaScalingFromLuma: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->chromaScalingFromLuma << std::endl
+                    << "        overlapFlag: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->overlapFlag << std::endl
+                    << "        clipToRestrictedRange: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->clipToRestrictedRange << std::endl
+                    << "        grainScalingMinus8: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->grainScalingMinus8 << std::endl
+                    << "        arCoeffLag: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->arCoeffLag << std::endl
+                    << "        numYPoints: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->numYPoints << std::endl
+                    << "        numCbPoints: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->numCbPoints << std::endl
+                    << "        numCrPoints: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->numCrPoints << std::endl
+                    << "        arCoeffShiftMinus6: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->arCoeffShiftMinus6 << std::endl
+                    << "        grainScaleShift: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->grainScaleShift << std::endl
+                    << "        cbMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->cbMult << std::endl
+                    << "        cbLumaMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->cbLumaMult << std::endl
+                    << "        cbOffset: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->cbOffset << std::endl
+                    << "        crMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->crMult << std::endl
+                    << "        crLumaMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->crLumaMult << std::endl
+                    << "        crOffset: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->crOffset << std::endl;
            }
+        }
+
        return os.str();
    }
 };
@@ -1,27 +1,41 @@
 /*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
+ * This copyright notice applies to this header file only:
 *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
 */

 #include "NvEncoderCuda.h"

-#include "nvcodec_api.h"
-
 NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
                             uint32_t nHeight,
                             NV_ENC_BUFFER_FORMAT eBufferFormat,
                             uint32_t nExtraOutputDelay,
                             bool bMotionEstimationOnly,
-                             bool bOutputInVideoMemory)
+                             bool bOutputInVideoMemory, bool bUseIVFContainer)
    : NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight,
                eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly,
-                bOutputInVideoMemory),
+                bOutputInVideoMemory, false, bUseIVFContainer),
      m_cuContext(cuContext) {
  if (!m_hEncoder) {
    NVENC_THROW_ERROR("Encoder Initialization failed",
@@ -46,7 +60,7 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
  int numCount = m_bMotionEstimationOnly ? 2 : 1;

  for (int count = 0; count < numCount; count++) {
-    CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
+    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
    std::vector<void *> inputFrames;
    for (int i = 0; i < numInputBuffers; i++) {
      CUdeviceptr pDeviceFrame;
@@ -56,13 +70,13 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
      if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
          GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
        chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
-      CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
+      CUDA_DRVAPI_CALL(cuMemAllocPitch(
          (CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
          GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
          GetMaxEncodeHeight() + chromaHeight, 16));
      inputFrames.push_back((void *)pDeviceFrame);
    }
-    CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));

    RegisterInputResources(
        inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
@@ -90,24 +104,23 @@ void NvEncoderCuda::ReleaseCudaResources() {

  UnregisterInputResources();

-  cuCtxPushCurrent_ld(m_cuContext);
+  cuCtxPushCurrent(m_cuContext);

  for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
    if (m_vInputFrames[i].inputPtr) {
-      cuMemFree_ld(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
+      cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
    }
  }
  m_vInputFrames.clear();

  for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
    if (m_vReferenceFrames[i].inputPtr) {
-      cuMemFree_ld(
-          reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
+      cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
    }
  }
  m_vReferenceFrames.clear();

-  cuCtxPopCurrent_ld(NULL);
+  cuCtxPopCurrent(NULL);
  m_cuContext = nullptr;
 }

@@ -123,7 +136,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
                      NV_ENC_ERR_INVALID_PARAM);
  }

-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));

  uint32_t srcPitch =
      nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -141,10 +154,10 @@ void NvEncoderCuda::CopyToDeviceFrame(
  m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
  m.Height = height;
  if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
-    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
+    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
  } else {
-    CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
-                                    : cuMemcpy2DAsync_ld(&m, stream));
+    CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
+                                    : cuMemcpy2DAsync(&m, stream));
  }

  std::vector<uint32_t> srcChromaOffsets;
@@ -170,14 +183,14 @@ void NvEncoderCuda::CopyToDeviceFrame(
      m.WidthInBytes = chromaWidthInBytes;
      m.Height = chromaHeight;
      if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
-        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
+        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
      } else {
-        CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
-                                        : cuMemcpy2DAsync_ld(&m, stream));
+        CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
+                                        : cuMemcpy2DAsync(&m, stream));
      }
    }
  }
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
 }

 void NvEncoderCuda::CopyToDeviceFrame(
@@ -192,7 +205,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
                      NV_ENC_ERR_INVALID_PARAM);
  }

-  CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
+  CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));

  uint32_t srcPitch =
      nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -210,9 +223,9 @@ void NvEncoderCuda::CopyToDeviceFrame(
  m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
  m.Height = height;
  if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
-    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
+    CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
  } else {
-    CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
+    CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
  }

  std::vector<uint32_t> srcChromaOffsets;
@@ -237,11 +250,11 @@ void NvEncoderCuda::CopyToDeviceFrame(
      m.WidthInBytes = chromaWidthInBytes;
      m.Height = chromaHeight;
      if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
-        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
+        CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
      } else {
-        CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
+        CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
      }
    }
  }
-  CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
+  CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
 }
@@ -0,0 +1,127 @@
+/*
+ * This copyright notice applies to this header file only:
+ *
+ * Copyright (c) 2010-2024 NVIDIA Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the software, and to permit persons to whom the
+ * software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <vector>
+#include <stdint.h>
+#include <mutex>
+#include <cuda.h>
+#include "NvEncoder.h"
+
+#define CUDA_DRVAPI_CALL( call )                                                                                                 \
+    do                                                                                                                           \
+    {                                                                                                                            \
+        CUresult err__ = call;                                                                                                   \
+        if (err__ != CUDA_SUCCESS)                                                                                               \
+        {                                                                                                                        \
+            const char *szErrName = NULL;                                                                                        \
+            cuGetErrorName(err__, &szErrName);                                                                                   \
+            std::ostringstream errorLog;                                                                                         \
+            errorLog << "CUDA driver API error " << szErrName ;                                                                  \
+            throw NVENCException::makeNVENCException(errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, __LINE__);      \
+        }                                                                                                                        \
+    }                                                                                                                            \
+    while (0)
+
+/**
+*  @brief Encoder for CUDA device memory.
+*/
+class NvEncoderCuda : public NvEncoder
+{
+public:
+    NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
+        uint32_t nExtraOutputDelay = 3, bool bMotionEstimationOnly = false, bool bOPInVideoMemory = false, bool bUseIVFContainer = true);
+    virtual ~NvEncoderCuda();
+
+    /**
+    *  @brief This is a static function to copy input data from host memory to device memory.
+    *  This function assumes YUV plane is a single contiguous memory segment.
+    */
+    static void CopyToDeviceFrame(CUcontext device,
+        void* pSrcFrame,
+        uint32_t nSrcPitch,
+        CUdeviceptr pDstFrame,
+        uint32_t dstPitch,
+        int width,
+        int height,
+        CUmemorytype srcMemoryType,
+        NV_ENC_BUFFER_FORMAT pixelFormat,
+        const uint32_t dstChromaOffsets[],
+        uint32_t numChromaPlanes,
+        bool bUnAlignedDeviceCopy = false,
+        CUstream stream = NULL);
+
+    /**
+    *  @brief This is a static function to copy input data from host memory to device memory.
+    *  Application must pass a seperate device pointer for each YUV plane.
+    */
+    static void CopyToDeviceFrame(CUcontext device,
+        void* pSrcFrame,
+        uint32_t nSrcPitch,
+        CUdeviceptr pDstFrame,
+        uint32_t dstPitch,
+        int width,
+        int height,
+        CUmemorytype srcMemoryType,
+        NV_ENC_BUFFER_FORMAT pixelFormat,
+        CUdeviceptr dstChromaPtr[],
+        uint32_t dstChromaPitch,
+        uint32_t numChromaPlanes,
+        bool bUnAlignedDeviceCopy = false);
+
+    /**
+    *  @brief This function sets input and output CUDA streams
+    */
+    void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream);
+
+protected:
+    /**
+    *  @brief This function is used to release the input buffers allocated for encoding.
+    *  This function is an override of virtual function NvEncoder::ReleaseInputBuffers().
+    */
+    virtual void ReleaseInputBuffers() override;
+
+private:
+    /**
+    *  @brief This function is used to allocate input buffers for encoding.
+    *  This function is an override of virtual function NvEncoder::AllocateInputBuffers().
+    */
+    virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
+
+private:
+    /**
+    *  @brief This is a private function to release CUDA device memory used for encoding.
+    */
+    void ReleaseCudaResources();
+
+protected:
+    CUcontext m_cuContext;
+
+private:
+    size_t m_cudaPitch = 0;
+};
@@ -0,0 +1,110 @@
+#include "nvcodec_common.h"
+
+void ShowHelpAndExit(const char *szBadOption) {
+  std::ostringstream oss;
+  bool bThrowError = false;
+  if (szBadOption) {
+    oss << "Error parsing \"" << szBadOption << "\"" << std::endl;
+    bThrowError = true;
+  }
+  oss << "Options:" << std::endl
+      << "-i           Input file path" << std::endl
+      << "-o           Output file path" << std::endl
+      << "-s           Input resolution in this form: WxH" << std::endl
+      << "-if          Input format: iyuv nv12" << std::endl
+      << "-gpu         Ordinal of GPU to use" << std::endl
+      << "-case        0: Encode frames with dynamic bitrate change"
+      << std::endl
+      << "             1: Encode frames with dynamic resolution change"
+      << std::endl;
+  oss << NvEncoderInitParam("", nullptr, true).GetHelpMessage() << std::endl;
+  if (bThrowError) {
+    throw std::invalid_argument(oss.str());
+  } else {
+    std::cout << oss.str();
+    exit(0);
+  }
+}
+
+void ParseCommandLine(int argc, char *argv[], char *szInputFileName,
+                      int &nWidth, int &nHeight, NV_ENC_BUFFER_FORMAT &eFormat,
+                      char *szOutputFileName, NvEncoderInitParam &initParam,
+                      int &iGpu, int &iCase, int &nFrame) {
+  std::ostringstream oss;
+  int i;
+  for (i = 1; i < argc; i++) {
+    if (!_stricmp(argv[i], "-h")) {
+      ShowHelpAndExit();
+    }
+    if (!_stricmp(argv[i], "-i")) {
+      if (++i == argc) {
+        ShowHelpAndExit("-i");
+      }
+      sprintf(szInputFileName, "%s", argv[i]);
+      continue;
+    }
+    if (!_stricmp(argv[i], "-o")) {
+      if (++i == argc) {
+        ShowHelpAndExit("-o");
+      }
+      sprintf(szOutputFileName, "%s", argv[i]);
+      continue;
+    }
+    if (!_stricmp(argv[i], "-s")) {
+      if (++i == argc || 2 != sscanf(argv[i], "%dx%d", &nWidth, &nHeight)) {
+        ShowHelpAndExit("-s");
+      }
+      continue;
+    }
+
+    std::vector<std::string> vszFileFormatName = {"iyuv", "nv12"};
+
+    NV_ENC_BUFFER_FORMAT aFormat[] = {
+        NV_ENC_BUFFER_FORMAT_IYUV,
+        NV_ENC_BUFFER_FORMAT_NV12,
+    };
+
+    if (!_stricmp(argv[i], "-if")) {
+      if (++i == argc) {
+        ShowHelpAndExit("-if");
+      }
+      auto it = std::find(vszFileFormatName.begin(), vszFileFormatName.end(),
+                          argv[i]);
+      if (it == vszFileFormatName.end()) {
+        ShowHelpAndExit("-if");
+      }
+      eFormat = aFormat[it - vszFileFormatName.begin()];
+      continue;
+    }
+    if (!_stricmp(argv[i], "-gpu")) {
+      if (++i == argc) {
+        ShowHelpAndExit("-gpu");
+      }
+      iGpu = atoi(argv[i]);
+      continue;
+    }
+    if (!_stricmp(argv[i], "-case")) {
+      if (++i == argc) {
+        ShowHelpAndExit("-case");
+      }
+      iCase = atoi(argv[i]);
+      continue;
+    }
+    if (!_stricmp(argv[i], "-frame")) {
+      if (++i == argc) {
+        ShowHelpAndExit("-frame");
+      }
+      nFrame = atoi(argv[i]);
+      continue;
+    }
+    // Regard as encoder parameter
+    if (argv[i][0] != '-') {
+      ShowHelpAndExit(argv[i]);
+    }
+    oss << argv[i] << " ";
+    while (i + 1 < argc && argv[i + 1][0] != '-') {
+      oss << argv[++i] << " ";
+    }
+  }
+  initParam = NvEncoderInitParam(oss.str().c_str(), nullptr, true);
+}
@@ -0,0 +1,26 @@
+/*
+ * @Author: DI JUNKUN
+ * @Date: 2024-09-10
+ * Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
+ */
+
+#ifndef _NVCODEC_COMMON_H_
+#define _NVCODEC_COMMON_H_
+
+#include <cuda.h>
+
+#include <iostream>
+#include <memory>
+
+#include "NvCodecUtils.h"
+#include "NvEncoderCLIOptions.h"
+#include "NvEncoderCuda.h"
+
+void ShowHelpAndExit(const char *szBadOption = NULL);
+
+void ParseCommandLine(int argc, char *argv[], char *szInputFileName,
+                      int &nWidth, int &nHeight, NV_ENC_BUFFER_FORMAT &eFormat,
+                      char *szOutputFileName, NvEncoderInitParam &initParam,
+                      int &iGpu, int &iCase, int &nFrame);
+
+#endif
@@ -362,7 +362,10 @@ int AomAv1Encoder::OnEncodedImage(char *encoded_packets, size_t size) {
  return 0;
 }

-void AomAv1Encoder::ForceIdr() { force_i_frame_flags_ = AOM_EFLAG_FORCE_KF; }
+int AomAv1Encoder::ForceIdr() {
+  force_i_frame_flags_ = AOM_EFLAG_FORCE_KF;
+  return 0;
+}

 int AomAv1Encoder::Release() {
  if (frame_for_encode_ != nullptr) {
@@ -50,7 +50,7 @@ class AomAv1Encoder : public VideoEncoder {

  int OnEncodedImage(char* encoded_packets, size_t size);

-  void ForceIdr();
+  int ForceIdr();

 private:
  template <typename P>
@@ -1,106 +0,0 @@
-/*
- * Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-#pragma once
-
-#include <cuda.h>
-#include <stdint.h>
-
-#include <mutex>
-#include <vector>
-
-#include "NvEncoder.h"
-
-#define CUDA_DRVAPI_CALL(call)                                        \
-  do {                                                                \
-    CUresult err__ = call;                                            \
-    if (err__ != CUDA_SUCCESS) {                                      \
-      const char* szErrName = NULL;                                   \
-      cuGetErrorName_ld(err__, &szErrName);                           \
-      std::ostringstream errorLog;                                    \
-      errorLog << "CUDA driver API error " << szErrName;              \
-      throw NVENCException::makeNVENCException(                       \
-          errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, \
-          __LINE__);                                                  \
-    }                                                                 \
-  } while (0)
-
-/**
- *  @brief Encoder for CUDA device memory.
- */
-class NvEncoderCuda : public NvEncoder {
- public:
-  NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight,
-                NV_ENC_BUFFER_FORMAT eBufferFormat,
-                uint32_t nExtraOutputDelay = 3,
-                bool bMotionEstimationOnly = false,
-                bool bOPInVideoMemory = false);
-  virtual ~NvEncoderCuda();
-
-  /**
-   *  @brief This is a static function to copy input data from host memory to
-   * device memory. This function assumes YUV plane is a single contiguous
-   * memory segment.
-   */
-  static void CopyToDeviceFrame(
-      CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
-      CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
-      CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
-      const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes,
-      bool bUnAlignedDeviceCopy = false, CUstream stream = NULL);
-
-  /**
-   *  @brief This is a static function to copy input data from host memory to
-   * device memory. Application must pass a seperate device pointer for each YUV
-   * plane.
-   */
-  static void CopyToDeviceFrame(
-      CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
-      CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
-      CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
-      CUdeviceptr dstChromaPtr[], uint32_t dstChromaPitch,
-      uint32_t numChromaPlanes, bool bUnAlignedDeviceCopy = false);
-
-  /**
-   *  @brief This function sets input and output CUDA streams
-   */
-  void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream,
-                        NV_ENC_CUSTREAM_PTR outputStream);
-
- protected:
-  /**
-   *  @brief This function is used to release the input buffers allocated for
-   * encoding. This function is an override of virtual function
-   * NvEncoder::ReleaseInputBuffers().
-   */
-  virtual void ReleaseInputBuffers() override;
-
- private:
-  /**
-   *  @brief This function is used to allocate input buffers for encoding.
-   *  This function is an override of virtual function
-   * NvEncoder::AllocateInputBuffers().
-   */
-  virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
-
- private:
-  /**
-   *  @brief This is a private function to release CUDA device memory used for
-   * encoding.
-   */
-  void ReleaseCudaResources();
-
- protected:
-  CUcontext m_cuContext;
-
- private:
-  size_t m_cudaPitch = 0;
-};
@@ -4,6 +4,7 @@

 #include "log.h"
 #include "nvcodec_api.h"
+#include "nvcodec_common.h"

 #define SAVE_RECEIVED_NV12_STREAM 0
 #define SAVE_ENCODED_H264_STREAM 0
@@ -26,53 +27,82 @@ NvidiaVideoEncoder::~NvidiaVideoEncoder() {
    free(nv12_data_);
    nv12_data_ = nullptr;
  }
+
+  if (encoder_) {
+    encoder_->DestroyEncoder();
+  }
 }

 int NvidiaVideoEncoder::Init() {
-  // Init cuda context
-  int num_of_GPUs = 0;
-  CUdevice cuda_device;
-  bool cuda_ctx_succeed =
-      (index_of_GPU >= 0 && cuInit_ld(0) == CUresult::CUDA_SUCCESS &&
-       cuDeviceGetCount_ld(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
-       (num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
-       cuDeviceGet_ld(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
-       cuCtxCreate_ld(&cuda_context_, 0, cuda_device) ==
-           CUresult::CUDA_SUCCESS);
-  if (!cuda_ctx_succeed) {
+  ck(cuInit(0));
+  int num_of_gpu = 0;
+  ck(cuDeviceGetCount(&num_of_gpu));
+  if (index_of_gpu_ < 0 || index_of_gpu_ >= num_of_gpu) {
+    LOG_ERROR("GPU ordinal out of range. Should be within [0-{}]");
+    return -1;
  }

+  ck(cuDeviceGet(&cuda_device_, index_of_gpu_));
+  char device_name[80];
+  ck(cuDeviceGetName(device_name, sizeof(device_name), cuda_device_));
+  LOG_INFO("H.264 encoder using [{}]", device_name);
+  ck(cuCtxCreate(&cuda_context_, 0, cuda_device_));
+
  encoder_ = new NvEncoderCuda(cuda_context_, frame_width_, frame_height_,
-                               NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12);
+                               buffer_format_, 0);

-  // Init encoder_ session
-  NV_ENC_INITIALIZE_PARAMS init_params;
-  init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
-  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
-  init_params.encodeConfig = &encode_config;
+  NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER};
+  NV_ENC_CONFIG encodeConfig = {NV_ENC_CONFIG_VER};
+  init_params.encodeConfig = &encodeConfig;
+  encoder_->CreateDefaultEncoderParams(&init_params, codec_guid_, preset_guid_,
+                                       tuning_info_);

-  encoder_->CreateDefaultEncoderParams(&init_params, codec_guid, preset_guid,
-                                       tuning_info);
+  frame_width_max_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
+                                                  NV_ENC_CAPS_WIDTH_MAX);
+  frame_height_max_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
+                                                   NV_ENC_CAPS_HEIGHT_MAX);
+  // frame_width_min_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
+  //                                                 NV_ENC_CAPS_WIDTH_MIN);
+  // frame_height_min_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
+  //                                                  NV_ENC_CAPS_HEIGHT_MIN);
+  encode_level_max_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
+                                                   NV_ENC_CAPS_LEVEL_MAX);
+  encode_level_min_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
+                                                   NV_ENC_CAPS_LEVEL_MIN);
+  support_dynamic_resolution_ = encoder_->GetCapabilityValue(
+      NV_ENC_CODEC_H264_GUID, NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE);
+  support_dynamic_bitrate_ = encoder_->GetCapabilityValue(
+      NV_ENC_CODEC_H264_GUID, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);

  init_params.encodeWidth = frame_width_;
  init_params.encodeHeight = frame_height_;
-  init_params.encodeConfig->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
-  init_params.encodeConfig->gopLength = keyFrameInterval_;
-  init_params.encodeConfig->frameIntervalP = 1;
-  init_params.encodeConfig->rcParams.rateControlMode =
-      NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_VBR;
-  init_params.encodeConfig->rcParams.maxBitRate = maxBitrate_ * 500;
-  // init_params.encodeConfig->rcParams.enableMinQP = 1;
-  // init_params.encodeConfig->rcParams.minQP.qpIntra = 10;
-  init_params.encodeConfig->rcParams.enableMaxQP = 1;
-  init_params.encodeConfig->rcParams.maxQP.qpIntra = 22;
-  init_params.encodeConfig->encodeCodecConfig.h264Config.level =
-      NV_ENC_LEVEL::NV_ENC_LEVEL_H264_31;
-  init_params.encodeConfig->encodeCodecConfig.h264Config.sliceMode = 1;
-  init_params.encodeConfig->encodeCodecConfig.h264Config.sliceModeData =
-      max_payload_size_;
-  // init_params.encodeConfig->encodeCodecConfig.h264Config.disableSPSPPS = 1;
-  // init_params.encodeConfig->encodeCodecConfig.h264Config.repeatSPSPPS = 1;
+  // must set max encode width and height otherwise will get crash when try to
+  // reconfigure the resolution
+  init_params.maxEncodeWidth = frame_width_max_;
+  init_params.maxEncodeHeight = frame_height_max_;
+  // init_params.darWidth = init_params.encodeWidth;
+  // init_params.darHeight = init_params.encodeHeight;
+
+  encodeConfig.gopLength = key_frame_interval_;
+  encodeConfig.frameIntervalP = 1;
+  encodeConfig.encodeCodecConfig.h264Config.idrPeriod = key_frame_interval_;
+  encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+  // encodeConfig.rcParams.enableMaxQP = 1;
+  // encodeConfig.rcParams.enableMinQP = 1;
+  // encodeConfig.rcParams.maxQP.qpIntra = 22;
+  // encodeConfig.rcParams.minQP.qpIntra = 10;
+  encodeConfig.rcParams.averageBitRate = average_bitrate_;
+  // use the default VBV buffer size
+  encodeConfig.rcParams.vbvBufferSize = 0;
+  encodeConfig.rcParams.maxBitRate = max_bitrate_;
+  // use the default VBV initial delay
+  encodeConfig.rcParams.vbvInitialDelay = 0;
+  // enable adaptive quantization (Spatial)
+  encodeConfig.rcParams.enableAQ = false;
+  encodeConfig.encodeCodecConfig.h264Config.idrPeriod = encodeConfig.gopLength;
+  encodeConfig.encodeCodecConfig.h264Config.level = encode_level_max_;
+  // encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = 1;
+  // encodeConfig.encodeCodecConfig.h264Config.repeatSPSPPS = 1;

  encoder_->CreateEncoder(&init_params);

@@ -94,7 +124,7 @@ int NvidiaVideoEncoder::Init() {
 }

 int NvidiaVideoEncoder::Encode(
-    const uint8_t *pData, int nSize,
+    const XVideoFrame *video_frame,
    std::function<int(char *encoded_packets, size_t size,
                      VideoFrameType frame_type)>
        on_encoded_image) {
@@ -104,7 +134,16 @@ int NvidiaVideoEncoder::Encode(
  }

  if (SAVE_RECEIVED_NV12_STREAM) {
-    fwrite(pData, 1, nSize, file_nv12_);
+    fwrite(video_frame->data, 1, video_frame->size, file_nv12_);
+  }
+
+  if (video_frame->width != frame_width_ ||
+      video_frame->height != frame_height_) {
+    if (support_dynamic_resolution_) {
+      if (0 != ResetEncodeResolution(video_frame->width, video_frame->height)) {
+        return -1;
+      }
+    }
  }

  VideoFrameType frame_type;
@@ -120,10 +159,11 @@ int NvidiaVideoEncoder::Encode(
 #endif

  const NvEncInputFrame *encoder_inputframe = encoder_->GetNextInputFrame();
-
+  // LOG_ERROR("w:{}, h:{}", encoder_->GetEncodeWidth(),
+  //           encoder_->GetEncodeHeight());
  NvEncoderCuda::CopyToDeviceFrame(
      cuda_context_,
-      (void *)pData,  // NOLINT
+      (void *)video_frame->data,  // NOLINT
      0, (CUdeviceptr)encoder_inputframe->inputPtr, encoder_inputframe->pitch,
      encoder_->GetEncodeWidth(), encoder_->GetEncodeHeight(),
      CU_MEMORYTYPE_HOST, encoder_inputframe->bufferFormat,
@@ -161,20 +201,64 @@ int NvidiaVideoEncoder::OnEncodedImage(char *encoded_packets, size_t size) {
  return 0;
 }

-void NvidiaVideoEncoder::ForceIdr() {
-  NV_ENC_RECONFIGURE_PARAMS reconfig_params;
-  reconfig_params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
+int NvidiaVideoEncoder::ForceIdr() {
+  if (!encoder_) {
+    return -1;
+  }

-  NV_ENC_INITIALIZE_PARAMS init_params;
+  NV_ENC_RECONFIGURE_PARAMS reconfig_params = {NV_ENC_RECONFIGURE_PARAMS_VER};
+  NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER};
  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
  init_params.encodeConfig = &encode_config;
  encoder_->GetInitializeParams(&init_params);

  reconfig_params.reInitEncodeParams = init_params;
  reconfig_params.forceIDR = 1;
-  reconfig_params.resetEncoder = 1;

  if (!encoder_->Reconfigure(&reconfig_params)) {
    LOG_ERROR("Failed to force I frame");
+    return -1;
  }
+
+  return 0;
+}
+
+int NvidiaVideoEncoder::ResetEncodeResolution(unsigned int width,
+                                              unsigned int height) {
+  if (!encoder_) {
+    return -1;
+  }
+
+  if (width > frame_width_max_ || height > frame_height_max_) {
+    LOG_ERROR(
+        "Target resolution is too large for this hardware encoder, which "
+        "[{}x{}] and support max resolution is [{}x{}]",
+        width, height, frame_width_max_, frame_height_max_);
+    return -1;
+  }
+
+  frame_width_ = width;
+  frame_height_ = height;
+
+  NV_ENC_RECONFIGURE_PARAMS reconfig_params = {NV_ENC_RECONFIGURE_PARAMS_VER};
+  NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER};
+  NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
+  init_params.encodeConfig = &encode_config;
+  encoder_->GetInitializeParams(&init_params);
+
+  reconfig_params.reInitEncodeParams = init_params;
+  reconfig_params.reInitEncodeParams.encodeWidth = frame_width_;
+  reconfig_params.reInitEncodeParams.encodeHeight = frame_height_;
+  // reconfig_params.reInitEncodeParams.darWidth =
+  //     reconfig_params.reInitEncodeParams.encodeWidth;
+  // reconfig_params.reInitEncodeParams.darHeight =
+  //     reconfig_params.reInitEncodeParams.encodeHeight;
+  reconfig_params.forceIDR = 1;
+
+  if (!encoder_->Reconfigure(&reconfig_params)) {
+    LOG_ERROR("Failed to reset resolution");
+    return -1;
+  }
+
+  return 0;
 }
@@ -13,31 +13,49 @@ class NvidiaVideoEncoder : public VideoEncoder {

  int Init();
  int Encode(const uint8_t* pData, int nSize,
-             std::function<int(char* encoded_packets, size_t size,
-                               VideoFrameType frame_type)>
-                 on_encoded_image);
-
-  int Encode(const XVideoFrame* video_frame,
             std::function<int(char* encoded_packets, size_t size,
                               VideoFrameType frame_type)>
                 on_encoded_image) {
    return 0;
  }

+  int Encode(const XVideoFrame* video_frame,
+             std::function<int(char* encoded_packets, size_t size,
+                               VideoFrameType frame_type)>
+                 on_encoded_image);
+
  virtual int OnEncodedImage(char* encoded_packets, size_t size);

-  void ForceIdr();
+  int ForceIdr();

 private:
-  int index_of_GPU = 0;
-  GUID codec_guid = NV_ENC_CODEC_H264_GUID;
-  GUID preset_guid = NV_ENC_PRESET_P2_GUID;
-  NV_ENC_TUNING_INFO tuning_info =
+  int ResetEncodeResolution(unsigned int width, unsigned int height);
+
+ private:
+  int index_of_gpu_ = 0;
+  CUdevice cuda_device_ = 0;
+
+  GUID codec_guid_ = NV_ENC_CODEC_H264_GUID;
+  GUID preset_guid_ = NV_ENC_PRESET_P3_GUID;
+  NV_ENC_TUNING_INFO tuning_info_ =
      NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
-  int frame_width_ = 1280;
-  int frame_height_ = 720;
-  int keyFrameInterval_ = 3000;
-  int maxBitrate_ = 1000;
+  NV_ENC_BUFFER_FORMAT buffer_format_ =
+      NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12;
+
+  uint32_t frame_width_max_ = 0;
+  uint32_t frame_height_max_ = 0;
+  uint32_t frame_width_min_ = 0;
+  uint32_t frame_height_min_ = 0;
+  uint32_t encode_level_max_ = 0;
+  uint32_t encode_level_min_ = 0;
+  bool support_dynamic_resolution_ = false;
+  bool support_dynamic_bitrate_ = false;
+
+  uint32_t frame_width_ = 1280;
+  uint32_t frame_height_ = 720;
+  uint32_t key_frame_interval_ = 3000;
+  uint32_t average_bitrate_ = 2000000;
+  uint32_t max_bitrate_ = 10000000;
  int max_payload_size_ = 3000;
  NvEncoder* encoder_ = nullptr;
  CUcontext cuda_context_ = nullptr;
@@ -358,10 +358,12 @@ int OpenH264Encoder::OnEncodedImage(char *encoded_packets, size_t size) {
  return 0;
 }

-void OpenH264Encoder::ForceIdr() {
+int OpenH264Encoder::ForceIdr() {
  if (openh264_encoder_) {
-    openh264_encoder_->ForceIntraFrame(true);
+    return openh264_encoder_->ForceIntraFrame(true);
  }
+
+  return 0;
 }

 int OpenH264Encoder::Release() {
@@ -37,7 +37,7 @@ class OpenH264Encoder : public VideoEncoder {

  int OnEncodedImage(char* encoded_packets, size_t size);

-  void ForceIdr();
+  int ForceIdr();

 private:
  int InitEncoderParams(int width, int height);
@@ -29,7 +29,7 @@ class VideoEncoder {
                         on_encoded_image) = 0;

  virtual int OnEncodedImage(char* encoded_packets, size_t size) = 0;
-  virtual void ForceIdr() = 0;
+  virtual int ForceIdr() = 0;

  VideoEncoder() = default;
  virtual ~VideoEncoder() {}
@@ -491,8 +491,8 @@ int PeerConnection::SendVideoData(const char *data, size_t size) {
  }

  if (b_force_i_frame_) {
-    video_encoder_->ForceIdr();
    LOG_INFO("Force I frame");
+    video_encoder_->ForceIdr();
    b_force_i_frame_ = false;
  }

@@ -49,14 +49,14 @@ int IceTransmission::InitIceTransmission(
         uint32_t audio_inbound_bitrate, uint32_t audio_outbound_bitrate,
         uint32_t data_inbound_bitrate, uint32_t data_outbound_bitrate,
         uint32_t total_inbound_bitrate, uint32_t total_outbound_bitrate) {
-        LOG_ERROR(
-            "video in: [{}] kbps, video out: [{}] kbps, audio in: [{}] kbps, "
-            "audio out: [{}] kbps, data in: [{}] kbps, data out: [{}] kbps, "
-            "total in: [{}] kbps, total out: [{}] kbps",
-            video_inbound_bitrate / 1000, video_outbound_bitrate / 1000,
-            audio_inbound_bitrate / 1000, audio_outbound_bitrate / 1000,
-            data_inbound_bitrate / 1000, data_outbound_bitrate / 1000,
-            total_inbound_bitrate / 1000, total_outbound_bitrate / 1000);
+        // LOG_ERROR(
+        //     "video in: [{}] kbps, video out: [{}] kbps, audio in: [{}] kbps,
+        //     " "audio out: [{}] kbps, data in: [{}] kbps, data out: [{}] kbps,
+        //     " "total in: [{}] kbps, total out: [{}] kbps",
+        //     video_inbound_bitrate / 1000, video_outbound_bitrate / 1000,
+        //     audio_inbound_bitrate / 1000, audio_outbound_bitrate / 1000,
+        //     data_inbound_bitrate / 1000, data_outbound_bitrate / 1000,
+        //     total_inbound_bitrate / 1000, total_outbound_bitrate / 1000);
      });
  video_rtp_codec_ = std::make_unique<RtpCodec>(video_codec_payload_type);
  audio_rtp_codec_ = std::make_unique<RtpCodec>(RtpPacket::PAYLOAD_TYPE::OPUS);
@@ -1,7 +1,7 @@
 /*
 * This copyright notice applies to this header file only:
 *
- * Copyright (c) 2010-2020 NVIDIA Corporation
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -1106,7 +1106,6 @@ extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
 extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
 #endif

-#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
 /****************************************************************************************************************************/
 //! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, 
 //!                                           unsigned int * pPitch, CUVIDPROCPARAMS *pVPP);
@@ -1126,7 +1125,6 @@ extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned
 #define cuvidMapVideoFrame      cuvidMapVideoFrame64
 #define cuvidUnmapVideoFrame    cuvidUnmapVideoFrame64
 #endif
-#endif



@@ -1,7 +1,7 @@
 /*
 * This copyright notice applies to this header file only:
 *
- * Copyright (c) 2010-2020 NVIDIA Corporation
+ * Copyright (c) 2010-2024 NVIDIA Corporation
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -28,7 +28,7 @@
 /********************************************************************************************************************/
 //! \file nvcuvid.h
 //!   NVDECODE API provides video decoding interface to NVIDIA GPU devices.
-//! \date 2015-2020
+//! \date 2015-2024
 //!  This file contains the interface constants, structure definitions and function prototypes.
 /********************************************************************************************************************/

@@ -41,6 +41,7 @@
 extern "C" {
 #endif /* __cplusplus */

+#define MAX_CLOCK_TS 3

 /***********************************************/
 //!
@@ -78,6 +79,106 @@ typedef enum {
    cudaAudioCodec_AAC,             /**< AAC Audio                  */
 } cudaAudioCodec;

+/************************************************************************/
+//! \ingroup STRUCTS
+//! \struct TIMECODESET
+//! Used to store Time code set extracted from H264 and HEVC codecs
+/************************************************************************/
+typedef struct _TIMECODESET
+{
+    unsigned int time_offset_value;
+    unsigned short n_frames;                 
+    unsigned char clock_timestamp_flag;
+    unsigned char units_field_based_flag;
+    unsigned char counting_type;
+    unsigned char full_timestamp_flag;
+    unsigned char discontinuity_flag;
+    unsigned char cnt_dropped_flag;
+    unsigned char seconds_value;
+    unsigned char minutes_value;
+    unsigned char hours_value;
+    unsigned char seconds_flag;
+    unsigned char minutes_flag;
+    unsigned char hours_flag;
+    unsigned char time_offset_length;
+    unsigned char reserved;
+} TIMECODESET;
+
+/************************************************************************/
+//! \ingroup STRUCTS
+//! \struct TIMECODE
+//! Used to extract Time code in H264 and HEVC codecs
+/************************************************************************/
+typedef struct _TIMECODE
+{
+    TIMECODESET time_code_set[MAX_CLOCK_TS];
+    unsigned char num_clock_ts;
+} TIMECODE;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct SEIMASTERINGDISPLAYINFO
+//! Used to extract mastering display color volume SEI in H264 and HEVC codecs
+/**********************************************************************************/
+typedef struct _SEIMASTERINGDISPLAYINFO
+{
+    unsigned short display_primaries_x[3];
+    unsigned short display_primaries_y[3];
+    unsigned short white_point_x;
+    unsigned short white_point_y;
+    unsigned int max_display_mastering_luminance;
+    unsigned int min_display_mastering_luminance;
+} SEIMASTERINGDISPLAYINFO;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct SEICONTENTLIGHTLEVELINFO
+//! Used to extract content light level info SEI in H264 and HEVC codecs
+/**********************************************************************************/
+typedef struct _SEICONTENTLIGHTLEVELINFO
+{
+    unsigned short max_content_light_level;
+    unsigned short max_pic_average_light_level;
+    unsigned int reserved;
+} SEICONTENTLIGHTLEVELINFO;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct TIMECODEMPEG2
+//! Used to extract Time code in MPEG2 codec
+/**********************************************************************************/
+typedef struct _TIMECODEMPEG2
+{
+    unsigned char drop_frame_flag;
+    unsigned char time_code_hours;
+    unsigned char time_code_minutes;
+    unsigned char marker_bit;
+    unsigned char time_code_seconds;
+    unsigned char time_code_pictures;
+} TIMECODEMPEG2;
+
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct SEIALTERNATIVETRANSFERCHARACTERISTICS
+//! Used to extract alternative transfer characteristics SEI in H264 and HEVC codecs
+/**********************************************************************************/
+typedef struct _SEIALTERNATIVETRANSFERCHARACTERISTICS
+{
+    unsigned char preferred_transfer_characteristics;
+} SEIALTERNATIVETRANSFERCHARACTERISTICS;
+    
+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUSEIMESSAGE;
+//! Used in CUVIDSEIMESSAGEINFO structure
+/**********************************************************************************/
+typedef struct _CUSEIMESSAGE
+{
+    unsigned char sei_message_type; /**< OUT: SEI Message Type      */
+    unsigned char reserved[3];
+    unsigned int sei_message_size;  /**< OUT: SEI Message Size      */
+} CUSEIMESSAGE;
+
 /************************************************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDEOFORMAT
@@ -168,6 +269,19 @@ typedef struct
    };
 } CUVIDOPERATINGPOINTINFO;

+/**********************************************************************************/
+//! \ingroup STRUCTS
+//! \struct CUVIDSEIMESSAGEINFO
+//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg
+/**********************************************************************************/
+typedef struct _CUVIDSEIMESSAGEINFO
+{
+    void *pSEIData;                 /**< OUT: SEI Message Data      */
+    CUSEIMESSAGE *pSEIMessage;      /**< OUT: SEI Message Info      */
+    unsigned int sei_message_count; /**< OUT: SEI Message Count     */
+    unsigned int picIdx;            /**< OUT: SEI Message Pic Index */
+} CUVIDSEIMESSAGEINFO;
+
 /****************************************************************/
 //! \ingroup STRUCTS
 //! \struct CUVIDAV1SEQHDR
@@ -366,11 +480,13 @@ typedef struct _CUVIDPARSERDISPINFO
 //! PFNVIDDECODECALLBACK   : 0: fail, >=1: succeeded
 //! PFNVIDDISPLAYCALLBACK  : 0: fail, >=1: succeeded
 //! PFNVIDOPPOINTCALLBACK  : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
+//! PFNVIDSEIMSGCALLBACK   : 0: fail, >=1: succeeded
 /***********************************************************************************************************************/
 typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
 typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
 typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
 typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
+typedef int (CUDAAPI *PFNVIDSEIMSGCALLBACK) (void *, CUVIDSEIMESSAGEINFO *);

 /**************************************/
 //! \ingroup STRUCTS
@@ -395,7 +511,8 @@ typedef struct _CUVIDPARSERPARAMS
    PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< IN: Called whenever a picture is ready to be displayed (display order)  */
    PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 
                                                         scalable bitstream                                                  */
-    void *pvReserved2[6];                       /**< Reserved for future use - set to NULL                                   */
+    PFNVIDSEIMSGCALLBACK pfnGetSEIMsg;          /**< IN: Called when all SEI messages are parsed for particular frame        */
+    void *pvReserved2[5];                       /**< Reserved for future use - set to NULL                                   */
    CUVIDEOFORMATEX *pExtVideoInfo;             /**< IN: [Optional] sequence header data from system layer                   */
 } CUVIDPARSERPARAMS;

@@ -1 +0,0 @@
-libcuda.so.1
@@ -1 +0,0 @@
-libnvcuvid.so.1
@@ -1 +0,0 @@
-libnvidia-encode.so.1
@@ -1,111 +0,0 @@
-# Copyright 2020 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-# 3.7 is required for FindVulkan module support in CMake.
-cmake_minimum_required(VERSION 3.7)
-
-project(NvCodec)
-
-# Set C++11 for all projects and disable non-standard extensions
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CXX_EXTENSIONS OFF)
-
-set(CMAKE_INSTALL_PREFIX .)
-
-set(NVCODEC_PUBLIC_INTERFACE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../Interface)
-set(NVCODEC_UTILS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Utils)
-set(NV_CODEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec)
-set(NV_ENC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvEncoder)
-set(NV_DEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvDecoder)
-set(NV_APPENC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppEncode/Common)
-set(NV_APPDEC_COMMON_DIR  ${CMAKE_CURRENT_SOURCE_DIR}/AppDecode/Common)
-
- if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-    set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
-else()
-    set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
-endif()
-
-if(WIN32)
-    if(CMAKE_SIZEOF_VOID_P EQUAL 8)
-        Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvcuvid.lib)
-        set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvencodeapi.lib)
-    else()
-        Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvcuvid.lib)
-        set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvencodeapi.lib)
-    endif()
-else ()
-    find_library(CUVID_LIB nvcuvid)
-    find_library(NVENCODEAPI_LIB nvidia-encode)
-endif()
-
-if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
-    find_package(PkgConfig REQUIRED)
-    pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
-    pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
-    pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
-    pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
-
-    set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
-    find_library(AVCODEC_LIBRARY NAMES avcodec
-            HINTS
-            ${PC_AVCODEC_LIBDIR}
-            ${PC_AVCODEC_LIBRARY_DIRS}
-            )
-    find_library(AVFORMAT_LIBRARY NAMES avformat
-            HINTS
-            ${PC_AVFORMAT_LIBDIR}
-            ${PC_AVFORMAT_LIBRARY_DIRS}
-            )
-    find_library(AVUTIL_LIBRARY NAMES avutil
-            HINTS
-            ${PC_AVUTIL_LIBDIR}
-            ${PC_AVUTIL_LIBRARY_DIRS}
-            )
-    find_library(SWRESAMPLE_LIBRARY NAMES swresample
-            HINTS
-            ${PC_SWRESAMPLE_LIBDIR}
-            ${PC_SWRESAMPLE_LIBRARY_DIRS}
-            )
-    set(AVCODEC_LIB ${AVCODEC_LIBRARY})
-    set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
-    set(AVUTIL_LIB ${AVUTIL_LIBRARY})
-    set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
-endif()
-
-
-if(WIN32)
-    add_subdirectory(AppEncode/AppEncD3D11)
-    add_subdirectory(AppEncode/AppEncD3D9)
-    add_subdirectory(AppDecode/AppDecD3D)
-else ()
-    #Need only linux Makefile for this
-    add_subdirectory(AppEncode/AppEncGL)
-endif()
-
-add_subdirectory(AppEncode/AppEncCuda)
-add_subdirectory(AppEncode/AppEncDec)
-add_subdirectory(AppEncode/AppEncLowLatency)
-add_subdirectory(AppEncode/AppEncME)
-add_subdirectory(AppEncode/AppEncPerf)
-add_subdirectory(AppEncode/AppEncQual)
-add_subdirectory(AppEncode/AppMotionEstimationVkCuda)
-add_subdirectory(AppTranscode/AppTrans)
-add_subdirectory(AppTranscode/AppTransOneToN)
-add_subdirectory(AppTranscode/AppTransPerf)
-add_subdirectory(AppDecode/AppDec)
-add_subdirectory(AppDecode/AppDecGL)
-add_subdirectory(AppDecode/AppDecImageProvider)
-add_subdirectory(AppDecode/AppDecLowLatency)
-add_subdirectory(AppDecode/AppDecMem)
-add_subdirectory(AppDecode/AppDecMultiFiles)
-add_subdirectory(AppDecode/AppDecMultiInput)
-add_subdirectory(AppDecode/AppDecPerf)
-
-
@@ -1,54 +0,0 @@
-/*
-* Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-#include <cuda_runtime.h>
-#include <stdint.h>
-#include <stdio.h>
-
-static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x,
-        y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (x >= nWidth || y >= nHeight)
-    {
-        return;
-    }
-    int destStrideInPixels = nDestPitch / (sizeof(uint16_t));
-    *(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] };
-}
-
-static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x,
-        y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (x >= nWidth || y >= nHeight)
-    {
-        return;
-    }
-    int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t));
-    dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y;
-}
-
-void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
-{
-    dim3 blockSize(16, 16, 1);
-    dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
-    ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight);
-}
-
-void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
-{
-    dim3 blockSize(16, 16, 1);
-    dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
-    ConvertUInt16ToUInt8Kernel <<<gridSize, blockSize >>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight);
-}
@@ -1,399 +0,0 @@
-/*
-* Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-#include "ColorSpace.h"
-
-__constant__ float matYuv2Rgb[3][3];
-__constant__ float matRgb2Yuv[3][3];
-
-
-void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) {
-    black = 16; white = 235;
-    max = 255;
-
-    switch (iMatrix)
-    {
-    case ColorSpaceStandard_BT709:
-    default:
-        wr = 0.2126f; wb = 0.0722f;
-        break;
-
-    case ColorSpaceStandard_FCC:
-        wr = 0.30f; wb = 0.11f;
-        break;
-
-    case ColorSpaceStandard_BT470:
-    case ColorSpaceStandard_BT601:
-        wr = 0.2990f; wb = 0.1140f;
-        break;
-
-    case ColorSpaceStandard_SMPTE240M:
-        wr = 0.212f; wb = 0.087f;
-        break;
-
-    case ColorSpaceStandard_BT2020:
-    case ColorSpaceStandard_BT2020C:
-        wr = 0.2627f; wb = 0.0593f;
-        // 10-bit only
-        black = 64 << 6; white = 940 << 6;
-        max = (1 << 16) - 1;
-        break;
-    }
-}
-
-void SetMatYuv2Rgb(int iMatrix) {
-    float wr, wb;
-    int black, white, max;
-    GetConstants(iMatrix, wr, wb, black, white, max);
-    float mat[3][3] = {
-        1.0f, 0.0f, (1.0f - wr) / 0.5f,
-        1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
-        1.0f, (1.0f - wb) / 0.5f, 0.0f,
-    };
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
-        }
-    }
-    cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat));
-}
-
-void SetMatRgb2Yuv(int iMatrix) {
-    float wr, wb;
-    int black, white, max;
-    GetConstants(iMatrix, wr, wb, black, white, max);
-    float mat[3][3] = {
-        wr, 1.0f - wb - wr, wb,
-        -0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
-        0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
-    };
-    for (int i = 0; i < 3; i++) {
-        for (int j = 0; j < 3; j++) {
-            mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
-        }
-    }
-    cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat));
-}
-
-template<class T>
-__device__ static T Clamp(T x, T lower, T upper) {
-    return x < lower ? lower : (x > upper ? upper : x);
-}
-
-template<class Rgb, class YuvUnit>
-__device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
-    const int 
-        low = 1 << (sizeof(YuvUnit) * 8 - 4),
-        mid = 1 << (sizeof(YuvUnit) * 8 - 1);
-    float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
-    const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
-    YuvUnit 
-        r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf),
-        g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf),
-        b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf);
-    
-    Rgb rgb{};
-    const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
-    if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
-        rgb.c.r = r >> nShift;
-        rgb.c.g = g >> nShift;
-        rgb.c.b = b >> nShift;
-    } else {
-        rgb.c.r = r << nShift;
-        rgb.c.g = g << nShift;
-        rgb.c.b = b << nShift;
-    }
-    return rgb;
-}
-
-template<class YuvUnitx2, class Rgb, class RgbIntx2>
-__global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
-    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
-    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
-    if (x + 1 >= nWidth || y + 1 >= nHeight) {
-        return;
-    }
-
-    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
-    uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
-
-    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
-    YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
-    YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
-
-    *(RgbIntx2 *)pDst = RgbIntx2 {
-        YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
-        YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
-    };
-    *(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 {
-        YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d, 
-        YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
-    };
-}
-
-template<class YuvUnitx2, class Rgb, class RgbIntx2>
-__global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
-    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
-    int y = (threadIdx.y + blockIdx.y * blockDim.y);
-    if (x + 1 >= nWidth || y  >= nHeight) {
-        return;
-    }
-
-    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
-    uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
-
-    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
-    YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
-    YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
-
-    *(RgbIntx2 *)pDst = RgbIntx2{
-        YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
-        YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
-    };
-}
-
-template<class YuvUnitx2, class Rgb, class RgbUnitx2>
-__global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
-    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
-    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
-    if (x + 1 >= nWidth || y + 1 >= nHeight) {
-        return;
-    }
-
-    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
-
-    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
-    YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
-    YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
-
-    Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
-        rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
-        rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
-        rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
-
-    uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
-    *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
-    *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
-    pDst += nRgbpPitch * nHeight;
-    *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
-    *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
-    pDst += nRgbpPitch * nHeight;
-    *(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
-    *(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
-}
-
-template<class YuvUnitx2, class Rgb, class RgbUnitx2>
-__global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
-    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
-    int y = (threadIdx.y + blockIdx.y * blockDim.y);
-    if (x + 1 >= nWidth || y >= nHeight) {
-        return;
-    }
-
-    uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
-
-    YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
-    YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
-    YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
-
-    Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
-        rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
-
-
-    uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
-    *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
-
-    pDst += nRgbpPitch * nHeight;
-    *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
-
-    pDst += nRgbpPitch * nHeight;
-    *(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
-}
-
-template <class COLOR32>
-void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    YuvToRgbKernel<uchar2, COLOR32, uint2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR64>
-void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    YuvToRgbKernel<uchar2, COLOR64, ulonglong2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    Yuv444ToRgbKernel<uchar2, COLOR32, uint2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
-        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR64>
-void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    Yuv444ToRgbKernel<uchar2, COLOR64, ulonglong2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
-        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    YuvToRgbKernel<ushort2, COLOR32, uint2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR64>
-void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    YuvToRgbKernel<ushort2, COLOR64, ulonglong2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    Yuv444ToRgbKernel<ushort2, COLOR32, uint2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
-        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR64>
-void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    Yuv444ToRgbKernel<ushort2, COLOR64, ulonglong2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
-        (dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
-        (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
-}
-
-template <class COLOR32>
-void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatYuv2Rgb(iMatrix);
-    Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
-        << <dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >> >
-        (dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
-}
-
-// Explicit Instantiation
-template void Nv12ToColor32<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void Nv12ToColor32<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void Nv12ToColor64<BGRA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void Nv12ToColor64<RGBA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void P016ToColor32<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void P016ToColor32<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void P016ToColor64<BGRA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void P016ToColor64<RGBA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444P16ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444P16ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444P16ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444P16ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
-template void Nv12ToColorPlanar<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void Nv12ToColorPlanar<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void P016ToColorPlanar<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void P016ToColorPlanar<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
-
-template<class YuvUnit, class RgbUnit>
-__device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
-    const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
-    return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low;
-}
-
-template<class YuvUnit, class RgbUnit>
-__device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
-    const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
-    return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid;
-}
-
-template<class YuvUnit, class RgbUnit>
-__device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
-    const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
-    return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid;
-}
-
-template<class YuvUnitx2, class Rgb, class RgbIntx2>
-__global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) {
-    int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
-    int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
-    if (x + 1 >= nWidth || y + 1 >= nHeight) {
-        return;
-    }
-
-    uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
-    RgbIntx2 int2a = *(RgbIntx2 *)pSrc;
-    RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch);
-
-    Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
-    decltype(Rgb::c.r)
-        r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
-        g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
-        b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
-
-    uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
-    *(YuvUnitx2 *)pDst = YuvUnitx2 {
-        RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
-        RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
-    };
-    *(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 {
-        RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
-        RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
-    };
-    *(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 {
-        RgbToU<decltype(YuvUnitx2::x)>(r, g, b), 
-        RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
-    };
-}
-
-void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) {
-    SetMatRgb2Yuv(iMatrix);
-    RgbToYuvKernel<ushort2, BGRA64, ulonglong2>
-        <<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
-        (dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight);
-}
@@ -1,48 +0,0 @@
-#pragma once
-#include <stdint.h>
-#include <cuda_runtime.h>
-
-typedef enum ColorSpaceStandard {
-    ColorSpaceStandard_BT709 = 1,
-    ColorSpaceStandard_Unspecified = 2,
-    ColorSpaceStandard_Reserved = 3,
-    ColorSpaceStandard_FCC = 4,
-    ColorSpaceStandard_BT470 = 5,
-    ColorSpaceStandard_BT601 = 6,
-    ColorSpaceStandard_SMPTE240M = 7,
-    ColorSpaceStandard_YCgCo = 8,
-    ColorSpaceStandard_BT2020 = 9,
-    ColorSpaceStandard_BT2020C = 10
-} ColorSpaceStandard;
-
-union BGRA32 {
-    uint32_t d;
-    uchar4 v;
-    struct {
-        uint8_t b, g, r, a;
-    } c;
-};
-
-union RGBA32 {
-    uint32_t d;
-    uchar4 v;
-    struct {
-        uint8_t r, g, b, a;
-    } c;
-};
-
-union BGRA64 {
-    uint64_t d;
-    ushort4 v;
-    struct {
-        uint16_t b, g, r, a;
-    } c;
-};
-
-union RGBA64 {
-    uint64_t d;
-    ushort4 v;
-    struct {
-        uint16_t r, g, b, a;
-    } c;
-};
@@ -1,357 +0,0 @@
-/*
-* Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-#pragma once
-
-extern "C" {
-#include <libavformat/avformat.h>
-#include <libavformat/avio.h>
-#include <libavcodec/avcodec.h>
-}
-#include "NvCodecUtils.h"
-
-//---------------------------------------------------------------------------
-//! \file FFmpegDemuxer.h 
-//! \brief Provides functionality for stream demuxing
-//!
-//! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it. 
-//---------------------------------------------------------------------------
-
-/**
-* @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
-*/
-class FFmpegDemuxer {
-private:
-    AVFormatContext *fmtc = NULL;
-    AVIOContext *avioc = NULL;
-    AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
-    AVBSFContext *bsfc = NULL;
-
-    int iVideoStream;
-    bool bMp4H264, bMp4HEVC, bMp4MPEG4;
-    AVCodecID eVideoCodec;
-    AVPixelFormat eChromaFormat;
-    int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
-    double timeBase = 0.0;
-    int64_t userTimeScale = 0; 
-
-    uint8_t *pDataWithHeader = NULL;
-
-    unsigned int frameCount = 0;
-
-public:
-    class DataProvider {
-    public:
-        virtual ~DataProvider() {}
-        virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
-    };
-
-private:
-
-    /**
-    *   @brief  Private constructor to initialize libavformat resources.
-    *   @param  fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
-    */
-    FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
-        if (!fmtc) {
-            LOG(ERROR) << "No AVFormatContext provided.";
-            return;
-        }
-
-        LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
-
-        ck(avformat_find_stream_info(fmtc, NULL));
-        iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
-        if (iVideoStream < 0) {
-            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file";
-            return;
-        }
-
-        //fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
-        eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
-        nWidth = fmtc->streams[iVideoStream]->codecpar->width;
-        nHeight = fmtc->streams[iVideoStream]->codecpar->height;
-        eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
-        AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
-        timeBase = av_q2d(rTimeBase);
-        userTimeScale = timeScale;
-
-        // Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
-        switch (eChromaFormat)
-        {
-        case AV_PIX_FMT_YUV420P10LE:
-        case AV_PIX_FMT_GRAY10LE:   // monochrome is treated as 420 with chroma filled with 0x0
-            nBitDepth = 10;
-            nChromaHeight = (nHeight + 1) >> 1;
-            nBPP = 2;
-            break;
-        case AV_PIX_FMT_YUV420P12LE:
-            nBitDepth = 12;
-            nChromaHeight = (nHeight + 1) >> 1;
-            nBPP = 2;
-            break;
-        case AV_PIX_FMT_YUV444P10LE:
-            nBitDepth = 10;
-            nChromaHeight = nHeight << 1;
-            nBPP = 2;
-            break;
-        case AV_PIX_FMT_YUV444P12LE:
-            nBitDepth = 12;
-            nChromaHeight = nHeight << 1;
-            nBPP = 2;
-            break;
-        case AV_PIX_FMT_YUV444P:
-            nBitDepth = 8;
-            nChromaHeight = nHeight << 1;
-            nBPP = 1;
-            break;
-        case AV_PIX_FMT_YUV420P:
-        case AV_PIX_FMT_YUVJ420P:
-        case AV_PIX_FMT_YUVJ422P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
-        case AV_PIX_FMT_YUVJ444P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
-        case AV_PIX_FMT_GRAY8:      // monochrome is treated as 420 with chroma filled with 0x0
-            nBitDepth = 8;
-            nChromaHeight = (nHeight + 1) >> 1;
-            nBPP = 1;
-            break;
-        default:
-            LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
-            eChromaFormat = AV_PIX_FMT_YUV420P;
-            nBitDepth = 8;
-            nChromaHeight = (nHeight + 1) >> 1;
-            nBPP = 1;
-        }
-
-        bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (
-                !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 
-                || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 
-                || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
-            );
-        bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (
-                !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
-                || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
-                || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
-            );
-
-        bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (
-                !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
-                || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
-                || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
-            );
-
-        //Initialize packet fields with default values
-        av_init_packet(&pkt);
-        pkt.data = NULL;
-        pkt.size = 0;
-        av_init_packet(&pktFiltered);
-        pktFiltered.data = NULL;
-        pktFiltered.size = 0;
-
-        // Initialize bitstream filter and its required resources
-        if (bMp4H264) {
-            const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
-            if (!bsf) {
-                LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
-                return;
-            }
-            ck(av_bsf_alloc(bsf, &bsfc));
-            avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
-            ck(av_bsf_init(bsfc));
-        }
-        if (bMp4HEVC) {
-            const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
-            if (!bsf) {
-                LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
-                return;
-            }
-            ck(av_bsf_alloc(bsf, &bsfc));
-            avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
-            ck(av_bsf_init(bsfc));
-        }
-    }
-
-    AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
-
-        AVFormatContext *ctx = NULL;
-        if (!(ctx = avformat_alloc_context())) {
-            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
-            return NULL;
-        }
-
-        uint8_t *avioc_buffer = NULL;
-        int avioc_buffer_size = 8 * 1024 * 1024;
-        avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
-        if (!avioc_buffer) {
-            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
-            return NULL;
-        }
-        avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
-            0, pDataProvider, &ReadPacket, NULL, NULL);
-        if (!avioc) {
-            LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
-            return NULL;
-        }
-        ctx->pb = avioc;
-
-        ck(avformat_open_input(&ctx, NULL, NULL, NULL));
-        return ctx;
-    }
-
-    /**
-    *   @brief  Allocate and return AVFormatContext*.
-    *   @param  szFilePath - Filepath pointing to input stream.
-    *   @return Pointer to AVFormatContext
-    */
-     AVFormatContext *CreateFormatContext(const char *szFilePath) {
-        avformat_network_init();
-
-        AVFormatContext *ctx = NULL;
-        ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
-        return ctx;
-    }
-
-public:
-    FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
-    FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;}
-    ~FFmpegDemuxer() {
-
-        if (!fmtc) {
-            return;
-        }
-
-        if (pkt.data) {
-            av_packet_unref(&pkt);
-        }
-        if (pktFiltered.data) {
-            av_packet_unref(&pktFiltered);
-        }
-
-        if (bsfc) {
-            av_bsf_free(&bsfc);
-        }
-
-        avformat_close_input(&fmtc);
-
-        if (avioc) {
-            av_freep(&avioc->buffer);
-            av_freep(&avioc);
-        }
-
-        if (pDataWithHeader) {
-            av_free(pDataWithHeader);
-        }
-    }
-    AVCodecID GetVideoCodec() {
-        return eVideoCodec;
-    }
-    AVPixelFormat GetChromaFormat() {
-        return eChromaFormat;
-    }
-    int GetWidth() {
-        return nWidth;
-    }
-    int GetHeight() {
-        return nHeight;
-    }
-    int GetBitDepth() {
-        return nBitDepth;
-    }
-    int GetFrameSize() {
-        return nWidth * (nHeight + nChromaHeight) * nBPP;
-    }
-    bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
-        if (!fmtc) {
-            return false;
-        }
-
-        *pnVideoBytes = 0;
-
-        if (pkt.data) {
-            av_packet_unref(&pkt);
-        }
-
-        int e = 0;
-        while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) {
-            av_packet_unref(&pkt);
-        }
-        if (e < 0) {
-            return false;
-        }
-
-        if (bMp4H264 || bMp4HEVC) {
-            if (pktFiltered.data) {
-                av_packet_unref(&pktFiltered);
-            }
-            ck(av_bsf_send_packet(bsfc, &pkt));
-            ck(av_bsf_receive_packet(bsfc, &pktFiltered));
-            *ppVideo = pktFiltered.data;
-            *pnVideoBytes = pktFiltered.size;
-            if (pts)
-                *pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase);
-        } else {
-
-            if (bMp4MPEG4 && (frameCount == 0)) {
-
-                int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
-
-                if (extraDataSize > 0) {
-
-                    // extradata contains start codes 00 00 01. Subtract its size
-                    pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t));
-
-                    if (!pDataWithHeader) {
-                        LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
-                        return false;
-                    }
-
-                    memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
-                    memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t));
-
-                    *ppVideo = pDataWithHeader;
-                    *pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t);
-                }
-
-            } else {
-                *ppVideo = pkt.data;
-                *pnVideoBytes = pkt.size;
-            }
-
-            if (pts)
-                *pts = (int64_t)(pkt.pts * userTimeScale * timeBase);
-        }
-
-        frameCount++;
-
-        return true;
-    }
-
-    static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
-        return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
-    }
-};
-
-inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
-    switch (id) {
-    case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1;
-    case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2;
-    case AV_CODEC_ID_MPEG4      : return cudaVideoCodec_MPEG4;
-    case AV_CODEC_ID_WMV3       :
-    case AV_CODEC_ID_VC1        : return cudaVideoCodec_VC1;
-    case AV_CODEC_ID_H264       : return cudaVideoCodec_H264;
-    case AV_CODEC_ID_HEVC       : return cudaVideoCodec_HEVC;
-    case AV_CODEC_ID_VP8        : return cudaVideoCodec_VP8;
-    case AV_CODEC_ID_VP9        : return cudaVideoCodec_VP9;
-    case AV_CODEC_ID_MJPEG      : return cudaVideoCodec_JPEG;
-    case AV_CODEC_ID_AV1        : return cudaVideoCodec_AV1;
-    default                     : return cudaVideoCodec_NumCodecs;
-    }
-}
-
-
@@ -1,109 +0,0 @@
-/*
-* Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-#pragma once
-
-#include <thread>
-#include <mutex>
-extern "C" {
-#include <libavformat/avformat.h>
-#include <libavutil/opt.h>
-#include <libswresample/swresample.h>
-};
-#include "Logger.h"
-
-extern simplelogger::Logger *logger;
-
-class FFmpegStreamer {
-private:
-    AVFormatContext *oc = NULL;
-    AVStream *vs = NULL;
-    int nFps = 0;
-
-public:
-    FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
-        avformat_network_init();
-        oc = avformat_alloc_context();
-        if (!oc) {
-            LOG(ERROR) << "FFMPEG: avformat_alloc_context error";
-            return;
-        }
-
-        // Set format on oc
-        AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL);
-        if (!fmt) {
-            LOG(ERROR) << "Invalid format";
-            return;
-        }
-        fmt->video_codec = eCodecId;
-
-        oc->oformat = fmt;
-        oc->url = av_strdup(szInFilePath);
-        LOG(INFO) << "Streaming destination: " << oc->url;
-
-        // Add video stream to oc
-        vs = avformat_new_stream(oc, NULL);
-        if (!vs) {
-            LOG(ERROR) << "FFMPEG: Could not alloc video stream";
-            return;
-        }
-        vs->id = 0;
-
-        // Set video parameters
-        AVCodecParameters *vpar = vs->codecpar;
-        vpar->codec_id = fmt->video_codec;
-        vpar->codec_type = AVMEDIA_TYPE_VIDEO;
-        vpar->width = nWidth;
-        vpar->height = nHeight;
-
-        // Everything is ready. Now open the output stream.
-        if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
-            LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
-            return ;
-        }
-
-        // Write the container header
-        if (avformat_write_header(oc, NULL)) {
-            LOG(ERROR) << "FFMPEG: avformat_write_header error!";
-            return;
-        }
-    }
-    ~FFmpegStreamer() {
-        if (oc) {
-            av_write_trailer(oc);
-            avio_close(oc->pb);
-            avformat_free_context(oc);
-        }
-    }
-
-    bool Stream(uint8_t *pData, int nBytes, int nPts) {
-        AVPacket pkt = {0};
-        av_init_packet(&pkt);
-        pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base);
-        // No B-frames
-        pkt.dts = pkt.pts;
-        pkt.stream_index = vs->index;
-        pkt.data = pData;
-        pkt.size = nBytes;
-
-        if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
-            pkt.flags |= AV_PKT_FLAG_KEY;
-        }
-
-        // Write the compressed frame into the output
-        int ret = av_write_frame(oc, &pkt);
-        av_write_frame(oc, NULL);
-        if (ret < 0) {
-            LOG(ERROR) << "FFMPEG: Error while writing video frame";
-        }
-
-        return true;
-    }
-};
@@ -1,192 +0,0 @@
-/*
-* Copyright 2017-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-#include <cuda_runtime.h>
-#include "NvCodecUtils.h"
-
-template<typename YuvUnitx2>
-static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv,
-        uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight,
-        float fxScale, float fyScale)
-{
-    int ix = blockIdx.x * blockDim.x + threadIdx.x,
-        iy = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (ix >= nWidth / 2 || iy >= nHeight / 2) {
-        return;
-    }
-
-    int x = ix * 2, y = iy * 2;
-    typedef decltype(YuvUnitx2::x) YuvUnit;
-    const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1;
-    *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
-        (YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
-        (YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
-    };
-    y++;
-    *(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
-        (YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
-        (YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
-    };
-    float2 uv = tex2D<float2>(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f);
-    *(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) };
-}
-
-template <typename YuvUnitx2>
-static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) {
-    cudaResourceDesc resDesc = {};
-    resDesc.resType = cudaResourceTypePitch2D;
-    resDesc.res.pitch2D.devPtr = dpSrc;
-    resDesc.res.pitch2D.desc = cudaCreateChannelDesc<decltype(YuvUnitx2::x)>();
-    resDesc.res.pitch2D.width = nSrcWidth;
-    resDesc.res.pitch2D.height = nSrcHeight;
-    resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
-
-    cudaTextureDesc texDesc = {};
-    texDesc.filterMode = cudaFilterModeLinear;
-    texDesc.readMode = cudaReadModeNormalizedFloat;
-
-    cudaTextureObject_t texY=0;
-    ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL));
-
-    resDesc.res.pitch2D.desc = cudaCreateChannelDesc<YuvUnitx2>();
-    resDesc.res.pitch2D.width = nSrcWidth / 2;
-    resDesc.res.pitch2D.height = nSrcHeight * 3 / 2;
-
-    cudaTextureObject_t texUv=0;
-    ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL));
-
-    Resize<YuvUnitx2> << <dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32), dim3(16, 16) >> >(texY, texUv, dpDst, dpDstUV,
-        nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight);
-
-    ck(cudaDestroyTextureObject(texY));
-    ck(cudaDestroyTextureObject(texUv));
-}
-
-void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV)
-{
-    unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight);
-    return Resize<uchar2>(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight);
-}
-
-
-void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV)
-{
-    unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight);
-    return Resize<ushort2>(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight);
-}
-
-static __global__ void Scale(cudaTextureObject_t texSrc,
-    uint8_t *pDst, int nPitch, int nWidth, int nHeight,
-    float fxScale, float fyScale)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x,
-        y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (x >= nWidth || y >= nHeight)
-    {
-        return;
-    }
-
-    *(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D<float>(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f));
-}
-
-static __global__ void Scale_uv(cudaTextureObject_t texSrc,
-    uint8_t *pDst, int nPitch, int nWidth, int nHeight,
-    float fxScale, float fyScale)
-{
-    int x = blockIdx.x * blockDim.x + threadIdx.x,
-        y = blockIdx.y * blockDim.y + threadIdx.y;
-
-    if (x >= nWidth || y >= nHeight)
-    {
-        return;
-    }
-
-    float2 uv = tex2D<float2>(texSrc, x * fxScale, y * fyScale);
-    uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
-
-    *(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut;
-}
-
-void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false) 
-{
-    cudaResourceDesc resDesc = {};
-    resDesc.resType = cudaResourceTypePitch2D;
-    resDesc.res.pitch2D.devPtr = dpSrc;
-    resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc<uchar2>() : cudaCreateChannelDesc<unsigned char>();
-    resDesc.res.pitch2D.width = nSrcWidth;
-    resDesc.res.pitch2D.height = nSrcHeight;
-    resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
-
-    cudaTextureDesc texDesc = {};
-    texDesc.filterMode = cudaFilterModeLinear;
-    texDesc.readMode = cudaReadModeNormalizedFloat;
-
-    texDesc.addressMode[0] = cudaAddressModeClamp;
-    texDesc.addressMode[1] = cudaAddressModeClamp;
-    texDesc.addressMode[2] = cudaAddressModeClamp;
-
-    cudaTextureObject_t texSrc = 0;
-    ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL));
-
-    dim3 blockSize(16, 16, 1);
-    dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1);
-
-    if (bUVPlane)
-    {
-        Scale_uv << <gridSize, blockSize >> >(texSrc, dpDst,
-            nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
-    }
-    else
-    {
-        Scale << <gridSize, blockSize >> >(texSrc, dpDst,
-            nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
-    }
-
-    ck(cudaGetLastError());
-    ck(cudaDestroyTextureObject(texSrc));
-}
-
-void ScaleYUV420(unsigned char *dpDstY,
-                 unsigned char* dpDstU,
-                unsigned char* dpDstV,
-                int nDstPitch,
-                int nDstChromaPitch,
-                int nDstWidth,
-                int nDstHeight,
-                unsigned char *dpSrcY,
-                unsigned char* dpSrcU,
-                unsigned char* dpSrcV, 
-                int nSrcPitch,
-                int nSrcChromaPitch,
-                int nSrcWidth,
-                int nSrcHeight,
-                bool bSemiplanar)
-{
-    int chromaWidthDst = (nDstWidth + 1) / 2;
-    int chromaHeightDst = (nDstHeight + 1) / 2;
-
-    int chromaWidthSrc = (nSrcWidth + 1) / 2;
-    int chromaHeightSrc = (nSrcHeight + 1) / 2;
-
-    ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight);
-
-    if (bSemiplanar)
-    {
-        ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true);
-    }
-    else
-    {
-        ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
-        ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
-    }
-}
@@ -1,126 +0,0 @@
-/*
-* Copyright 2018-2020 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-#include <cuda_runtime.h>
-#include "NvCodecUtils.h"
-
-/*
-* CRC32 lookup table
-* Generated by the following routine
-* int i, j;
-* U032 crc;
-* for (i = 0; i < 256; i++) 
-* {
-*    crc = i;
-*    for (j = 0; j < 8; j++) {    // 8 reduction
-*      crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0);
-*    }
-*    Crc32Table[i] = crc;
-* }
- */
-__device__ __constant__ uint32_t Crc32Table[256] = {
-    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
-    0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
-    0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
-    0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
-    0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
-    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
-    0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
-    0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
-    0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
-    0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
-    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
-    0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
-    0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
-    0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
-    0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
-    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
-    0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
-    0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
-    0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
-    0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
-    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
-    0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
-    0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
-    0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
-    0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
-    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
-    0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
-    0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
-    0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
-    0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
-    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
-    0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
-    0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
-    0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
-    0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
-    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
-    0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
-    0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
-    0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
-    0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
-    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
-    0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
-    0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
-    0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
-    0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
-    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
-    0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
-    0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
-    0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
-    0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
-    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
-    0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
-    0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
-    0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
-    0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
-    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
-    0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
-    0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
-    0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
-    0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
-    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
-    0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
-    0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
-    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
-};
-
-typedef struct _NV_ENC_ENCODE_OUT_PARAMS
-{
-    uint32_t                  version;                 /**< [out]: Struct version. */
-    uint32_t                  bitstreamSizeInBytes;    /**< [out]: Encoded bitstream size in bytes */
-    uint32_t                  cycleCount;              /**< [out]: Cycle count */
-    uint32_t                  firstPassCycleCount;     /**< [out]: First pass cycle count */
-    uint32_t                  reserved[60];            /**< [out]: Reserved and must be set to 0 */
-} NV_ENC_ENCODE_OUT_PARAMS;
-
-static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue)
-{
-    NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer;
-    uint32_t bitstreamSize = outParams->bitstreamSizeInBytes;
-    uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS);
-    uint32_t crc=~0;
-
-    for(uint32_t i = 0; i < bitstreamSize; i++)
-    {
-        crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc))  ^ (*pEncStream++)];
-    }
-
-    *crcValue = ~crc;
-}
-
-void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream)
-{
-    dim3 blockSize(1, 1, 1);
-    dim3 gridSize(1, 1, 1);
-
-    ComputeCRCKernel <<<gridSize, blockSize, 0, outputCUStream >>>(pBuffer, crcValue);
-}
@@ -24,7 +24,7 @@ if is_os("windows") then
    add_defines("_WEBSOCKETPP_CPP11_INTERNAL_")
 elseif is_os("linux") then
    add_requires("glib", {system = true})
-    add_packages("glib", "cuda")
+    add_packages("glib")
    add_cxflags("-fPIC") 
    add_syslinks("pthread")
 elseif is_os("macosx") then
@@ -122,8 +122,8 @@ target("media")
        "src/media/video/encode/aom",
        "src/media/video/decode/dav1d",
        "src/media/nvcodec",
-        "thirdparty/nvcodec/Interface",
-        "thirdparty/nvcodec/Samples", {public = true})
+        "thirdparty/nvcodec/interface", {public = true})
+        add_includedirs(path.join(os.getenv("CUDA_PATH"), "include"), {public = true})
    elseif is_os(("linux")) then
        add_files("src/media/video/encode/*.cpp",
        "src/media/video/decode/*.cpp",
@@ -143,8 +143,8 @@ target("media")
        "src/media/video/encode/aom",
        "src/media/video/decode/dav1d",
        "src/media/nvcodec",
-        "thirdparty/nvcodec/Interface",
-        "thirdparty/nvcodec/Samples", {public = true})
+        "thirdparty/nvcodec/interface", {public = true})
+        add_includedirs(path.join(os.getenv("CUDA_PATH"), "include"), {public = true})
    elseif is_os("macosx") then
        add_files("src/media/video/encode/*.cpp",
        "src/media/video/decode/*.cpp",
@@ -195,15 +195,18 @@ target("projectx")
    add_includedirs("src/rtc", "src/pc", "src/interface")

    if is_os("windows") then
-        add_linkdirs("thirdparty/nvcodec/Lib/x64")
+        add_linkdirs("thirdparty/nvcodec/lib/x64")
+        add_linkdirs(path.join(os.getenv("CUDA_PATH"), "lib/x64"))
        add_links("nice", "glib-2.0", "gio-2.0", "gmodule-2.0", "gobject-2.0",
        "pcre2-8", "pcre2-16", "pcre2-32", "pcre2-posix", 
        "zlib", "ffi", "libcrypto", "libssl", "intl", "iconv", 
        "Shell32", "Advapi32", "Dnsapi", "Shlwapi", "Crypt32", 
        "ws2_32", "Bcrypt", "windowsapp", "User32", "Strmiids", "Mfuuid",
        "Secur32", "Bcrypt")
+        add_links("cuda", "nvencodeapi", "nvcuvid")
    elseif is_os(("linux")) then
-        add_linkdirs("thirdparty/nvcodec/Lib/x64")
+        add_linkdirs("thirdparty/nvcodec/lib/x64")
+        add_linkdirs(path.join(os.getenv("CUDA_PATH"), "lib/x64"))
        add_links("cuda", "nvidia-encode", "nvcuvid")
    elseif is_os("macosx") then