[fix] fix nvidia encoder crash during reconfigure the resolution

This commit is contained in:
dijunkun
2024-09-10 17:32:43 +08:00
parent 2f16d22ab7
commit a8333c622b
40 changed files with 4507 additions and 25450 deletions

256
src/media/nvcodec/Logger.h Normal file
View File

@@ -0,0 +1,256 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <mutex>
#include <time.h>
#ifdef _WIN32
#include <winsock.h>
#include <windows.h>
#pragma comment(lib, "ws2_32.lib")
#undef ERROR
#else
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#define SOCKET int
#define INVALID_SOCKET -1
#endif
enum LogLevel {
TRACE,
INFO,
WARNING,
ERROR,
FATAL
};
namespace simplelogger{
class Logger {
public:
Logger(LogLevel level, bool bPrintTimeStamp) : level(level), bPrintTimeStamp(bPrintTimeStamp) {}
virtual ~Logger() {}
virtual std::ostream& GetStream() = 0;
virtual void FlushStream() {}
bool ShouldLogFor(LogLevel l) {
return l >= level;
}
char* GetLead(LogLevel l, const char *szFile, int nLine, const char *szFunc) {
if (l < TRACE || l > FATAL) {
sprintf(szLead, "[?????] ");
return szLead;
}
const char *szLevels[] = {"TRACE", "INFO", "WARN", "ERROR", "FATAL"};
if (bPrintTimeStamp) {
time_t t = time(NULL);
struct tm *ptm = localtime(&t);
sprintf(szLead, "[%-5s][%02d:%02d:%02d] ",
szLevels[l], ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
} else {
sprintf(szLead, "[%-5s] ", szLevels[l]);
}
return szLead;
}
void EnterCriticalSection() {
mtx.lock();
}
void LeaveCriticalSection() {
mtx.unlock();
}
private:
LogLevel level;
char szLead[80];
bool bPrintTimeStamp;
std::mutex mtx;
};
class LoggerFactory {
public:
static Logger* CreateFileLogger(std::string strFilePath,
LogLevel level = INFO, bool bPrintTimeStamp = true) {
return new FileLogger(strFilePath, level, bPrintTimeStamp);
}
static Logger* CreateConsoleLogger(LogLevel level = INFO,
bool bPrintTimeStamp = true) {
return new ConsoleLogger(level, bPrintTimeStamp);
}
static Logger* CreateUdpLogger(char *szHost, unsigned uPort, LogLevel level = INFO,
bool bPrintTimeStamp = true) {
return new UdpLogger(szHost, uPort, level, bPrintTimeStamp);
}
private:
LoggerFactory() {}
class FileLogger : public Logger {
public:
FileLogger(std::string strFilePath, LogLevel level, bool bPrintTimeStamp)
: Logger(level, bPrintTimeStamp) {
pFileOut = new std::ofstream();
pFileOut->open(strFilePath.c_str());
}
~FileLogger() {
pFileOut->close();
}
std::ostream& GetStream() {
return *pFileOut;
}
private:
std::ofstream *pFileOut;
};
class ConsoleLogger : public Logger {
public:
ConsoleLogger(LogLevel level, bool bPrintTimeStamp)
: Logger(level, bPrintTimeStamp) {}
std::ostream& GetStream() {
return std::cout;
}
};
class UdpLogger : public Logger {
private:
class UdpOstream : public std::ostream {
public:
UdpOstream(char *szHost, unsigned short uPort) : std::ostream(&sb), socket(INVALID_SOCKET){
#ifdef _WIN32
WSADATA w;
if (WSAStartup(0x0101, &w) != 0) {
fprintf(stderr, "WSAStartup() failed.\n");
return;
}
#endif
socket = ::socket(AF_INET, SOCK_DGRAM, 0);
if (socket == INVALID_SOCKET) {
#ifdef _WIN32
WSACleanup();
#endif
fprintf(stderr, "socket() failed.\n");
return;
}
#ifdef _WIN32
unsigned int b1, b2, b3, b4;
sscanf(szHost, "%u.%u.%u.%u", &b1, &b2, &b3, &b4);
struct in_addr addr = {(unsigned char)b1, (unsigned char)b2, (unsigned char)b3, (unsigned char)b4};
#else
struct in_addr addr = {inet_addr(szHost)};
#endif
struct sockaddr_in s = {AF_INET, htons(uPort), addr};
server = s;
}
~UdpOstream() throw() {
if (socket == INVALID_SOCKET) {
return;
}
#ifdef _WIN32
closesocket(socket);
WSACleanup();
#else
close(socket);
#endif
}
void Flush() {
if (sendto(socket, sb.str().c_str(), (int)sb.str().length() + 1,
0, (struct sockaddr *)&server, (int)sizeof(sockaddr_in)) == -1) {
fprintf(stderr, "sendto() failed.\n");
}
sb.str("");
}
private:
std::stringbuf sb;
SOCKET socket;
struct sockaddr_in server;
};
public:
UdpLogger(char *szHost, unsigned uPort, LogLevel level, bool bPrintTimeStamp)
: Logger(level, bPrintTimeStamp), udpOut(szHost, (unsigned short)uPort) {}
UdpOstream& GetStream() {
return udpOut;
}
virtual void FlushStream() {
udpOut.Flush();
}
private:
UdpOstream udpOut;
};
};
class LogTransaction {
public:
LogTransaction(Logger *pLogger, LogLevel level, const char *szFile, const int nLine, const char *szFunc) : pLogger(pLogger), level(level) {
if (!pLogger) {
std::cout << "[-----] ";
return;
}
if (!pLogger->ShouldLogFor(level)) {
return;
}
pLogger->EnterCriticalSection();
pLogger->GetStream() << pLogger->GetLead(level, szFile, nLine, szFunc);
}
~LogTransaction() {
if (!pLogger) {
std::cout << std::endl;
return;
}
if (!pLogger->ShouldLogFor(level)) {
return;
}
pLogger->GetStream() << std::endl;
pLogger->FlushStream();
pLogger->LeaveCriticalSection();
if (level == FATAL) {
exit(1);
}
}
std::ostream& GetStream() {
if (!pLogger) {
return std::cout;
}
if (!pLogger->ShouldLogFor(level)) {
return ossNull;
}
return pLogger->GetStream();
}
private:
Logger *pLogger;
LogLevel level;
std::ostringstream ossNull;
};
}
extern simplelogger::Logger *logger;
#define LOG(level) simplelogger::LogTransaction(logger, level, __FILE__, __LINE__, __FUNCTION__).GetStream()

View File

@@ -1,12 +1,28 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
* This copyright notice applies to this header file only:
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//---------------------------------------------------------------------------
@@ -26,19 +42,20 @@
#include <chrono>
#include <condition_variable>
#include <fstream>
#include <iomanip>
#include <ios>
#include <list>
#include <sstream>
#include <thread>
#include <vector>
#ifdef __cuda_cuda_h__
inline bool check(CUresult e, int iLine, const char *szFile) {
if (e != CUDA_SUCCESS) {
const char *szErrName = NULL;
cuGetErrorName(e, &szErrName);
std::cout << "CUDA driver API error " << szErrName << " at line " << iLine
<< " in file " << szFile;
return false;
}
return true;
@@ -48,6 +65,8 @@ inline bool check(CUresult e, int iLine, const char *szFile) {
#ifdef __CUDA_RUNTIME_H__
inline bool check(cudaError_t e, int iLine, const char *szFile) {
if (e != cudaSuccess) {
std::cout << "CUDA runtime API error " << cudaGetErrorName(e) << " at line "
<< iLine << " in file " << szFile;
return false;
}
return true;
@@ -85,6 +104,8 @@ inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
"NV_ENC_ERR_RESOURCE_NOT_MAPPED",
};
if (e != NV_ENC_SUCCESS) {
std::cout << "NVENC error " << aszErrName[e] << " at line " << iLine
<< " in file " << szFile;
return false;
}
return true;
@@ -96,7 +117,8 @@ inline bool check(HRESULT e, int iLine, const char *szFile) {
if (e != S_OK) {
std::stringstream stream;
stream << std::hex << std::uppercase << e;
std::cout << "HRESULT error 0x" << stream.str() << " at line " << iLine
<< " in file " << szFile;
return false;
}
return true;
@@ -106,6 +128,8 @@ inline bool check(HRESULT e, int iLine, const char *szFile) {
#if defined(__gl_h_) || defined(__GL_H__)
inline bool check(GLenum e, int iLine, const char *szFile) {
if (e != 0) {
std::cout << "GLenum error " << e << " at line " << iLine << " in file "
<< szFile;
return false;
}
return true;
@@ -114,12 +138,17 @@ inline bool check(GLenum e, int iLine, const char *szFile) {
inline bool check(int e, int iLine, const char *szFile) {
if (e < 0) {
std::cout << "General error " << e << " at line " << iLine << " in file "
<< szFile;
return false;
}
return true;
}
#define ck(call) check(call, __LINE__, __FILE__)
#define MAKE_FOURCC(ch0, ch1, ch2, ch3) \
((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | \
((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24))
/**
* @brief Wrapper class around std::thread
@@ -156,68 +185,72 @@ class NvThread {
#define _stat64 stat64
#endif
/**
* @brief Utility class to allocate buffer memory. Helps avoid I/O during the
* encode/decode loop in case of performance tests.
*/
class BufferedFileReader {
public:
/**
* @brief Constructor function to allocate appropriate memory and copy file
* contents into it
*/
BufferedFileReader(const char *szFileName, bool bPartial = false) {
struct _stat64 st;
// /**
// * @brief Utility class to allocate buffer memory. Helps avoid I/O during the
// * encode/decode loop in case of performance tests.
// */
// class BufferedFileReader {
// public:
// /**
// * @brief Constructor function to allocate appropriate memory and copy file
// * contents into it
// */
// BufferedFileReader(const char *szFileName, bool bPartial = false) {
// struct _stat64 st;
if (_stat64(szFileName, &st) != 0) {
return;
}
// if (_stat64(szFileName, &st) != 0) {
// return;
// }
nSize = st.st_size;
while (nSize) {
try {
pBuf = new uint8_t[(size_t)nSize];
if (nSize != st.st_size) {
}
break;
} catch (std::bad_alloc) {
if (!bPartial) {
return;
}
nSize = (uint32_t)(nSize * 0.9);
}
}
// nSize = st.st_size;
// while (nSize) {
// try {
// pBuf = new uint8_t[(size_t)nSize];
// if (nSize != st.st_size) {
// std::cout << "File is too large - only " << std::setprecision(4)
// << 100.0 * nSize / st.st_size << "% is loaded";
// }
// break;
// } catch (std::bad_alloc) {
// if (!bPartial) {
// std::cout << "Failed to allocate memory in BufferedReader";
// return;
// }
// nSize = (uint32_t)(nSize * 0.9);
// }
// }
std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
if (!fpIn) {
return;
}
// std::ifstream fpIn(szFileName, std::ifstream::in |
// std::ifstream::binary); if (!fpIn) {
// std::cout << "Unable to open input file: " << szFileName;
// return;
// }
std::streamsize nRead =
fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
fpIn.close();
// std::streamsize nRead =
// fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
// fpIn.close();
assert(nRead == nSize);
}
~BufferedFileReader() {
if (pBuf) {
delete[] pBuf;
}
}
bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
if (!pBuf) {
return false;
}
// assert(nRead == nSize);
// }
// ~BufferedFileReader() {
// if (pBuf) {
// delete[] pBuf;
// }
// }
// bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
// if (!pBuf) {
// return false;
// }
*ppBuf = pBuf;
*pnSize = nSize;
return true;
}
// *ppBuf = pBuf;
// *pnSize = nSize;
// return true;
// }
private:
uint8_t *pBuf = NULL;
uint64_t nSize = 0;
};
// private:
// uint8_t *pBuf = NULL;
// uint64_t nSize = 0;
// };
/**
* @brief Template class to facilitate color space conversion
@@ -290,6 +323,60 @@ class YuvConverter {
int nWidth, nHeight;
};
/**
* @brief Class for writing IVF format header for AV1 codec
*/
class IVFUtils {
public:
void WriteFileHeader(std::vector<uint8_t> &vPacket, uint32_t nFourCC,
uint32_t nWidth, uint32_t nHeight,
uint32_t nFrameRateNum, uint32_t nFrameRateDen,
uint32_t nFrameCnt) {
char header[32];
header[0] = 'D';
header[1] = 'K';
header[2] = 'I';
header[3] = 'F';
mem_put_le16(header + 4, 0); // version
mem_put_le16(header + 6, 32); // header size
mem_put_le32(header + 8, nFourCC); // fourcc
mem_put_le16(header + 12, nWidth); // width
mem_put_le16(header + 14, nHeight); // height
mem_put_le32(header + 16, nFrameRateNum); // rate
mem_put_le32(header + 20, nFrameRateDen); // scale
mem_put_le32(header + 24, nFrameCnt); // length
mem_put_le32(header + 28, 0); // unused
vPacket.insert(vPacket.end(), &header[0], &header[32]);
}
void WriteFrameHeader(std::vector<uint8_t> &vPacket, size_t nFrameSize,
int64_t pts) {
char header[12];
mem_put_le32(header, (int)nFrameSize);
mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
mem_put_le32(header + 8, (int)(pts >> 32));
vPacket.insert(vPacket.end(), &header[0], &header[12]);
}
private:
static inline void mem_put_le32(void *vmem, int val) {
unsigned char *mem = (unsigned char *)vmem;
mem[0] = (unsigned char)((val >> 0) & 0xff);
mem[1] = (unsigned char)((val >> 8) & 0xff);
mem[2] = (unsigned char)((val >> 16) & 0xff);
mem[3] = (unsigned char)((val >> 24) & 0xff);
}
static inline void mem_put_le16(void *vmem, int val) {
unsigned char *mem = (unsigned char *)vmem;
mem[0] = (unsigned char)((val >> 0) & 0xff);
mem[1] = (unsigned char)((val >> 8) & 0xff);
}
};
/**
* @brief Utility class to measure elapsed time in seconds between the block of
* executed code
@@ -312,7 +399,7 @@ class StopWatch {
template <typename T>
class ConcurrentQueue {
public:
ConcurrentQueue() {}
ConcurrentQueue() : maxSize(0) {}
ConcurrentQueue(size_t size) : maxSize(size) {}
ConcurrentQueue(const ConcurrentQueue &) = delete;
ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
@@ -382,7 +469,7 @@ class ConcurrentQueue {
private:
bool full() {
if (m_List.size() == maxSize) return true;
if (maxSize > 0 && m_List.size() == maxSize) return true;
return false;
}
@@ -393,14 +480,14 @@ class ConcurrentQueue {
size_t maxSize;
};
inline void CheckInputFile(const char *szInFilePath) {
std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
if (fpIn.fail()) {
std::ostringstream err;
err << "Unable to open input file: " << szInFilePath << std::endl;
throw std::invalid_argument(err.str());
}
}
// inline void CheckInputFile(const char *szInFilePath) {
// std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
// if (fpIn.fail()) {
// std::ostringstream err;
// err << "Unable to open input file: " << szInFilePath << std::endl;
// throw std::invalid_argument(err.str());
// }
// }
inline void ValidateResolution(int nWidth, int nHeight) {
if (nWidth <= 0 || nHeight <= 0) {

View File

@@ -1,12 +1,28 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
* This copyright notice applies to this header file only:
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "NvDecoder.h"
@@ -16,23 +32,22 @@
#include <cmath>
#include <iostream>
#include "nvcodec_api.h"
#include "nvcuvid.h"
#define START_TIMER auto start = std::chrono::steady_clock::now();
#define START_TIMER auto start = std::chrono::high_resolution_clock::now();
#define STOP_TIMER(print_message) \
std::cout << print_message \
<< std::chrono::duration_cast<std::chrono::milliseconds>( \
std::chrono::steady_clock::now() - start) \
.count() \
<< " ms " << std::endl;
int64_t elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>( \
std::chrono::high_resolution_clock::now() - start) \
.count(); \
std::cout << print_message << elapsedTime << " ms " << std::endl;
#define CUDA_DRVAPI_CALL(call) \
do { \
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char *szErrName = NULL; \
cuGetErrorName_ld(err__, &szErrName); \
cuGetErrorName(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVDECException::makeNVDECException( \
@@ -164,7 +179,7 @@ int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) {
* CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser)
*/
int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
START_TIMER
// START_TIMER
m_videoInfo.str("");
m_videoInfo.clear();
m_videoInfo << "Video Input Information" << std::endl
@@ -200,9 +215,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps_ld(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
if (!decodecaps.bIsSupported) {
NVDEC_THROW_ERROR("Codec not supported on this GPU",
@@ -243,7 +258,7 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
}
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
// cuvidCreateDecoder_ld() has been called before, and now there's possible
// cuvidCreateDecoder() has been called before, and now there's possible
// config change
return ReconfigureDecoder(pVideoFormat);
}
@@ -379,10 +394,10 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
"Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
m_videoInfo << std::endl;
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder_ld(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
STOP_TIMER("Session Initialization Time: ");
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
// STOP_TIMER("Session Initialization Time: ");
return nDecodeSurface;
}
@@ -506,11 +521,11 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
START_TIMER
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder_ld(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
STOP_TIMER("Session Reconfigure Time: ");
// START_TIMER
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
// STOP_TIMER("Session Reconfigure Time: ");
return nDecodeSurface;
}
@@ -539,9 +554,9 @@ int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
pFrame = m_vpFrame.back();
m_vpFrame.pop_back();
if (m_bUseDeviceFrame) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree_ld((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
} else {
delete pFrame;
}
@@ -559,9 +574,18 @@ int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
return false;
}
m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture_ld(m_hDecoder, pPicParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
if (m_bForce_zero_latency &&
((!pPicParams->field_pic_flag) || (pPicParams->second_field))) {
CUVIDPARSERDISPINFO dispInfo;
memset(&dispInfo, 0, sizeof(dispInfo));
dispInfo.picture_index = pPicParams->CurrPicIdx;
dispInfo.progressive_frame = !pPicParams->field_pic_flag;
dispInfo.top_field_first = pPicParams->bottom_field_flag ^ 1;
HandlePictureDisplay(&dispInfo);
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
return 1;
}
@@ -576,22 +600,87 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
videoProcessingParameters.output_stream = m_cuvidStream;
if (m_bExtractSEIMessage) {
if (m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData) {
// Write SEI Message
uint8_t *seiBuffer =
(uint8_t *)(m_SEIMessagesDisplayOrder[pDispInfo->picture_index]
.pSEIData);
uint32_t seiNumMessages =
m_SEIMessagesDisplayOrder[pDispInfo->picture_index].sei_message_count;
CUSEIMESSAGE *seiMessagesInfo =
m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage;
if (m_fpSEI) {
for (uint32_t i = 0; i < seiNumMessages; i++) {
if ((m_eCodec == cudaVideoCodec_H264) ||
(m_eCodec == cudaVideoCodec_H264_SVC) ||
(m_eCodec == cudaVideoCodec_H264_MVC) ||
(m_eCodec == cudaVideoCodec_HEVC) ||
(m_eCodec == cudaVideoCodec_MPEG2)) {
switch (seiMessagesInfo[i].sei_message_type) {
case SEI_TYPE_TIME_CODE:
case SEI_TYPE_TIME_CODE_H264: {
if (m_eCodec != cudaVideoCodec_MPEG2) {
TIMECODE *timecode = (TIMECODE *)seiBuffer;
fwrite(timecode, sizeof(TIMECODE), 1, m_fpSEI);
} else {
TIMECODEMPEG2 *timecode = (TIMECODEMPEG2 *)seiBuffer;
fwrite(timecode, sizeof(TIMECODEMPEG2), 1, m_fpSEI);
}
} break;
case SEI_TYPE_USER_DATA_REGISTERED:
case SEI_TYPE_USER_DATA_UNREGISTERED: {
fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1,
m_fpSEI);
} break;
case SEI_TYPE_MASTERING_DISPLAY_COLOR_VOLUME: {
SEIMASTERINGDISPLAYINFO *masteringDisplayVolume =
(SEIMASTERINGDISPLAYINFO *)seiBuffer;
fwrite(masteringDisplayVolume, sizeof(SEIMASTERINGDISPLAYINFO),
1, m_fpSEI);
} break;
case SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO: {
SEICONTENTLIGHTLEVELINFO *contentLightLevelInfo =
(SEICONTENTLIGHTLEVELINFO *)seiBuffer;
fwrite(contentLightLevelInfo, sizeof(SEICONTENTLIGHTLEVELINFO),
1, m_fpSEI);
} break;
case SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS: {
SEIALTERNATIVETRANSFERCHARACTERISTICS *transferCharacteristics =
(SEIALTERNATIVETRANSFERCHARACTERISTICS *)seiBuffer;
fwrite(transferCharacteristics,
sizeof(SEIALTERNATIVETRANSFERCHARACTERISTICS), 1,
m_fpSEI);
} break;
}
}
if (m_eCodec == cudaVideoCodec_AV1) {
fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1, m_fpSEI);
}
seiBuffer += seiMessagesInfo[i].sei_message_size;
}
}
free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData);
free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage);
}
}
CUdeviceptr dpSrcFrame = 0;
unsigned int nSrcPitch = 0;
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidMapVideoFrame64_ld(m_hDecoder, pDispInfo->picture_index,
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
&dpSrcFrame, &nSrcPitch,
&videoProcessingParameters));
CUVIDGETDECODESTATUS DecodeStatus;
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
CUresult result = cuvidGetDecodeStatus_ld(
m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
CUresult result =
cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
if (result == CUDA_SUCCESS &&
(DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
// printf("Decode Error occurred for picture %d\n",
// m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
printf("Decode Error occurred for picture %d\n",
m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
}
uint8_t *pDecodedFrame = nullptr;
@@ -603,12 +692,11 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
uint8_t *pFrame = NULL;
if (m_bUseDeviceFrame) {
if (m_bDeviceFramePitched) {
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
CUDA_DRVAPI_CALL(cuMemAllocPitch(
(CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
} else {
CUDA_DRVAPI_CALL(
cuMemAlloc_ld((CUdeviceptr *)&pFrame, GetFrameSize()));
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
}
} else {
pFrame = new uint8_t[GetFrameSize()];
@@ -629,7 +717,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
m.WidthInBytes = GetWidth() * m_nBPP;
m.Height = m_nLumaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
// Copy chroma plane
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
@@ -639,7 +727,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstDevice =
(CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
if (m_nNumChromaPlanes == 2) {
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
@@ -647,36 +735,83 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
m.dstPitch * m_nLumaHeight * 2);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
}
CUDA_DRVAPI_CALL(cuStreamSynchronize_ld(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
m_vTimestamp.resize(m_vpFrame.size());
}
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
NVDEC_API_CALL(cuvidUnmapVideoFrame64_ld(m_hDecoder, dpSrcFrame));
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
return 1;
}
int NvDecoder::GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
uint32_t seiNumMessages = pSEIMessageInfo->sei_message_count;
CUSEIMESSAGE *seiMessagesInfo = pSEIMessageInfo->pSEIMessage;
size_t totalSEIBufferSize = 0;
if ((pSEIMessageInfo->picIdx < 0) ||
(pSEIMessageInfo->picIdx >= MAX_FRM_CNT)) {
printf("Invalid picture index (%d)\n", pSEIMessageInfo->picIdx);
return 0;
}
for (uint32_t i = 0; i < seiNumMessages; i++) {
totalSEIBufferSize += seiMessagesInfo[i].sei_message_size;
}
if (!m_pCurrSEIMessage) {
printf("Out of Memory, Allocation failed for m_pCurrSEIMessage\n");
return 0;
}
m_pCurrSEIMessage->pSEIData = malloc(totalSEIBufferSize);
if (!m_pCurrSEIMessage->pSEIData) {
printf("Out of Memory, Allocation failed for SEI Buffer\n");
return 0;
}
memcpy(m_pCurrSEIMessage->pSEIData, pSEIMessageInfo->pSEIData,
totalSEIBufferSize);
m_pCurrSEIMessage->pSEIMessage =
(CUSEIMESSAGE *)malloc(sizeof(CUSEIMESSAGE) * seiNumMessages);
if (!m_pCurrSEIMessage->pSEIMessage) {
free(m_pCurrSEIMessage->pSEIData);
m_pCurrSEIMessage->pSEIData = NULL;
return 0;
}
memcpy(m_pCurrSEIMessage->pSEIMessage, pSEIMessageInfo->pSEIMessage,
sizeof(CUSEIMESSAGE) * seiNumMessages);
m_pCurrSEIMessage->sei_message_count = pSEIMessageInfo->sei_message_count;
m_SEIMessagesDisplayOrder[pSEIMessageInfo->picIdx] = *m_pCurrSEIMessage;
return 1;
}
NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
cudaVideoCodec eCodec, bool bLowLatency,
bool bDeviceFramePitched, const Rect *pCropRect,
const Dim *pResizeDim, int maxWidth, int maxHeight,
unsigned int clkRate)
const Dim *pResizeDim, bool extract_user_SEI_Message,
int maxWidth, int maxHeight, unsigned int clkRate,
bool force_zero_latency)
: m_cuContext(cuContext),
m_bUseDeviceFrame(bUseDeviceFrame),
m_eCodec(eCodec),
m_bDeviceFramePitched(bDeviceFramePitched),
m_bExtractSEIMessage(extract_user_SEI_Message),
m_nMaxWidth(maxWidth),
m_nMaxHeight(maxHeight) {
m_nMaxHeight(maxHeight),
m_bForce_zero_latency(force_zero_latency) {
if (pCropRect) m_cropRect = *pCropRect;
if (pResizeDim) m_resizeDim = *pResizeDim;
NVDEC_API_CALL(cuvidCtxLockCreate_ld(&m_ctxLock, cuContext));
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
ck(cuStreamCreate(&m_cuvidStream, CU_STREAM_DEFAULT));
if (m_bExtractSEIMessage) {
m_fpSEI = fopen("sei_message.txt", "wb");
m_pCurrSEIMessage = new CUVIDSEIMESSAGEINFO;
memset(&m_SEIMessagesDisplayOrder, 0, sizeof(m_SEIMessagesDisplayOrder));
}
CUVIDPARSERPARAMS videoParserParameters = {};
videoParserParameters.CodecType = eCodec;
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
@@ -685,36 +820,49 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
videoParserParameters.pUserData = this;
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
videoParserParameters.pfnDisplayPicture =
m_bForce_zero_latency ? NULL : HandlePictureDisplayProc;
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
NVDEC_API_CALL(cuvidCreateVideoParser_ld(&m_hParser, &videoParserParameters));
videoParserParameters.pfnGetSEIMsg =
m_bExtractSEIMessage ? HandleSEIMessagesProc : NULL;
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
}
NvDecoder::~NvDecoder() {
START_TIMER
// START_TIMER
if (m_pCurrSEIMessage) {
delete m_pCurrSEIMessage;
m_pCurrSEIMessage = NULL;
}
if (m_fpSEI) {
fclose(m_fpSEI);
m_fpSEI = NULL;
}
if (m_hParser) {
cuvidDestroyVideoParser_ld(m_hParser);
cuvidDestroyVideoParser(m_hParser);
}
cuCtxPushCurrent_ld(m_cuContext);
cuCtxPushCurrent(m_cuContext);
if (m_hDecoder) {
cuvidDestroyDecoder_ld(m_hDecoder);
cuvidDestroyDecoder(m_hDecoder);
}
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
for (uint8_t *pFrame : m_vpFrame) {
if (m_bUseDeviceFrame) {
cuMemFree_ld((CUdeviceptr)pFrame);
cuMemFree((CUdeviceptr)pFrame);
} else {
delete[] pFrame;
}
}
cuCtxPopCurrent_ld(NULL);
cuCtxPopCurrent(NULL);
cuvidCtxLockDestroy_ld(m_ctxLock);
cuvidCtxLockDestroy(m_ctxLock);
STOP_TIMER("Session Deinitialization Time: ");
// STOP_TIMER("Session Deinitialization Time: ");
}
int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
@@ -729,11 +877,7 @@ int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
if (!pData || nSize == 0) {
packet.flags |= CUVID_PKT_ENDOFSTREAM;
}
// NVDEC_API_CALL(cuvidParseVideoData_ld(m_hParser, &packet));
if (CUDA_SUCCESS != cuvidParseVideoData_ld(m_hParser, &packet)) {
return 0;
}
m_cuvidStream = 0;
NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
return m_nDecodedFrame;
}

View File

@@ -1,12 +1,28 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
* This copyright notice applies to this header file only:
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
@@ -16,14 +32,27 @@
#include <string.h>
#include <iostream>
#include <map>
#include <mutex>
#include <sstream>
#include <string>
#include <vector>
#include "Utils/NvCodecUtils.h"
#include "NvCodecUtils.h"
#include "nvcuvid.h"
#define MAX_FRM_CNT 32
typedef enum {
SEI_TYPE_TIME_CODE_H264 = 1,
SEI_TYPE_USER_DATA_REGISTERED = 4,
SEI_TYPE_USER_DATA_UNREGISTERED = 5,
SEI_TYPE_TIME_CODE = 136,
SEI_TYPE_MASTERING_DISPLAY_COLOR_VOLUME = 137,
SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO = 144,
SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147
} SEI_H264_HEVC_MPEG2_PAYLOAD_TYPE;
/**
* @brief Exception class for error reporting from the decode API.
*/
@@ -95,7 +124,9 @@ class NvDecoder {
NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec,
bool bLowLatency = false, bool bDeviceFramePitched = false,
const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000);
bool extract_user_SEI_Message = false, int maxWidth = 0,
int maxHeight = 0, unsigned int clkRate = 1000,
bool force_zero_latency = false);
~NvDecoder();
/**
@@ -284,6 +315,13 @@ class NvDecoder {
// stop the timer
double stopTimer() { return m_stDecode_time.Stop(); }
protected:
/**
* @brief This function gets called when a sequence is ready to be decoded.
The function also gets called when there is format change
*/
virtual int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
private:
/**
* @brief Callback function to be registered for getting a callback when
@@ -322,10 +360,13 @@ class NvDecoder {
}
/**
* @brief This function gets called when a sequence is ready to be decoded.
The function also gets called when there is format change
* @brief Callback function to be registered for getting a callback when
* all the unregistered user SEI Messages are parsed for a frame.
*/
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
static int CUDAAPI
HandleSEIMessagesProc(void *pUserData, CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
return ((NvDecoder *)pUserData)->GetSEIMessage(pSEIMessageInfo);
}
/**
* @brief This function gets called when a picture is ready to be decoded.
@@ -344,6 +385,13 @@ class NvDecoder {
* one operating points
*/
int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
/**
* @brief This function gets called when all unregistered user SEI messages
* are parsed for a frame
*/
int GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo);
/**
* @brief This function reconfigure decoder if there is a change in
* sequence params.
@@ -374,7 +422,10 @@ class NvDecoder {
// timestamps of decoded frames
std::vector<int64_t> m_vTimestamp;
int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32];
int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[MAX_FRM_CNT];
CUVIDSEIMESSAGEINFO *m_pCurrSEIMessage = NULL;
CUVIDSEIMESSAGEINFO m_SEIMessagesDisplayOrder[MAX_FRM_CNT];
FILE *m_fpSEI = NULL;
bool m_bEndDecodeDone = false;
std::mutex m_mtxVPFrame;
int m_nFrameAlloc = 0;
@@ -392,4 +443,11 @@ class NvDecoder {
unsigned int m_nOperatingPoint = 0;
bool m_bDispAllLayers = false;
// In H.264, there is an inherent display latency for video contents
// which do not have num_reorder_frames=0 in the VUI. This applies to
// All-Intra and IPPP sequences as well. If the user wants zero display
// latency for All-Intra and IPPP sequences, the below flag will enable
// the display callback immediately after the decode callback.
bool m_bForce_zero_latency = false;
bool m_bExtractSEIMessage = false;
};

View File

@@ -1,18 +1,32 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
* This copyright notice applies to this header file only:
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "NvEncoder.h"
#include "nvcodec_api.h"
#ifndef _WIN32
#include <cstring>
static inline bool operator==(const GUID &guid1, const GUID &guid2) {
@@ -28,7 +42,8 @@ NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice,
uint32_t nWidth, uint32_t nHeight,
NV_ENC_BUFFER_FORMAT eBufferFormat,
uint32_t nExtraOutputDelay, bool bMotionEstimationOnly,
bool bOutputInVideoMemory)
bool bOutputInVideoMemory, bool bDX12Encode,
bool bUseIVFContainer)
: m_pDevice(pDevice),
m_eDeviceType(eDeviceType),
m_nWidth(nWidth),
@@ -38,6 +53,8 @@ NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void *pDevice,
m_eBufferFormat(eBufferFormat),
m_bMotionEstimationOnly(bMotionEstimationOnly),
m_bOutputInVideoMemory(bOutputInVideoMemory),
m_bIsDX12Encode(bDX12Encode),
m_bUseIVFContainer(bUseIVFContainer),
m_nExtraOutputDelay(nExtraOutputDelay),
m_hEncoder(nullptr) {
LoadNvEncApi();
@@ -62,7 +79,7 @@ void NvEncoder::LoadNvEncApi() {
uint32_t version = 0;
uint32_t currentVersion =
(NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion_ld(&version));
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
if (currentVersion > version) {
NVENC_THROW_ERROR(
"Current Driver Version does not support this NvEncodeAPI version, "
@@ -71,7 +88,7 @@ void NvEncoder::LoadNvEncApi() {
}
m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
NVENC_API_CALL(NvEncodeAPICreateInstance_ld(&m_nvenc));
NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
}
NvEncoder::~NvEncoder() { DestroyHWEncoder(); }
@@ -122,27 +139,21 @@ void NvEncoder::CreateDefaultEncoderParams(
}
#endif
NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
{NV_ENC_CONFIG_VER}};
m_nvenc.nvEncGetEncodePresetConfig(m_hEncoder, codecGuid, presetGuid,
&presetConfig);
memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
sizeof(NV_ENC_CONFIG));
pIntializeParams->encodeConfig->frameIntervalP = 1;
pIntializeParams->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH;
pIntializeParams->tuningInfo = tuningInfo;
pIntializeParams->encodeConfig->rcParams.rateControlMode =
NV_ENC_PARAMS_RC_CONSTQP;
if (!m_bMotionEstimationOnly) {
pIntializeParams->tuningInfo = tuningInfo;
NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
{NV_ENC_CONFIG_VER}};
// There are changes in the structure layout, therefore users are recommended
// to be careful while moving their application to the new header. Following
// initialization has changed for the same reason.
NV_ENC_PRESET_CONFIG presetConfig = {
NV_ENC_PRESET_CONFIG_VER, 0, {NV_ENC_CONFIG_VER}};
m_nvenc.nvEncGetEncodePresetConfigEx(m_hEncoder, codecGuid, presetGuid,
tuningInfo, &presetConfig);
memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg,
sizeof(NV_ENC_CONFIG));
} else {
if (m_bMotionEstimationOnly) {
m_encodeConfig.version = NV_ENC_CONFIG_VER;
m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
m_encodeConfig.rcParams.constQP = {28, 31, 25};
@@ -157,12 +168,13 @@ void NvEncoder::CreateDefaultEncoderParams(
pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod =
pIntializeParams->encodeConfig->gopLength;
} else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.inputBitDepth =
pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
.pixelBitDepthMinus8 =
.outputBitDepth =
(m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT ||
m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
? 2
: 0;
? NV_ENC_BIT_DEPTH_10
: NV_ENC_BIT_DEPTH_8;
if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig
@@ -170,6 +182,22 @@ void NvEncoder::CreateDefaultEncoderParams(
}
pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod =
pIntializeParams->encodeConfig->gopLength;
} else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
pIntializeParams->encodeConfig->encodeCodecConfig.av1Config.inputBitDepth =
(m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT)
? NV_ENC_BIT_DEPTH_10
: NV_ENC_BIT_DEPTH_8;
pIntializeParams->encodeConfig->encodeCodecConfig.av1Config
.chromaFormatIDC = 1;
pIntializeParams->encodeConfig->encodeCodecConfig.av1Config.idrPeriod =
pIntializeParams->encodeConfig->gopLength;
if (m_bOutputInVideoMemory) {
pIntializeParams->encodeConfig->frameIntervalP = 1;
}
}
if (m_bIsDX12Encode) {
pIntializeParams->bufferFormat = m_eBufferFormat;
}
return;
@@ -192,7 +220,8 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
}
if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID &&
pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID) {
pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID &&
pEncoderParams->encodeGUID != NV_ENC_CODEC_AV1_GUID) {
NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM);
}
@@ -204,6 +233,14 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
}
}
if (pEncoderParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 ||
m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) {
NVENC_THROW_ERROR("YUV444 format isn't supported by AV1 encoder",
NV_ENC_ERR_INVALID_PARAM);
}
}
// set other necessary params if not set yet
if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) &&
@@ -219,8 +256,9 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
? true
: false;
if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig
.hevcConfig.pixelBitDepthMinus8 != 2) {
if (yuv10BitFormat &&
pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig
.inputBitDepth != NV_ENC_BIT_DEPTH_10) {
NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
}
@@ -232,6 +270,28 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
}
}
if (pEncoderParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
bool yuv10BitFormat =
(m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT) ? true : false;
if (yuv10BitFormat &&
pEncoderParams->encodeConfig->encodeCodecConfig.av1Config
.inputBitDepth != NV_ENC_BIT_DEPTH_10) {
NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM);
}
if (pEncoderParams->encodeConfig->encodeCodecConfig.av1Config
.chromaFormatIDC != 1) {
NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM);
}
if (m_bOutputInVideoMemory &&
pEncoderParams->encodeConfig->frameIntervalP > 1) {
NVENC_THROW_ERROR(
"Alt Ref frames not supported for AV1 in case of OutputInVideoMemory",
NV_ENC_ERR_INVALID_PARAM);
}
}
memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams));
m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
@@ -240,19 +300,31 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
sizeof(m_encodeConfig));
m_encodeConfig.version = NV_ENC_CONFIG_VER;
} else {
NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER,
{NV_ENC_CONFIG_VER}};
// There are changes in the structure layout, therefore users are
// recommended to be careful while moving their application to the new
// header. Following initialization has changed for the same reason.
NV_ENC_PRESET_CONFIG presetConfig = {
NV_ENC_PRESET_CONFIG_VER, 0, {NV_ENC_CONFIG_VER}};
if (!m_bMotionEstimationOnly) {
m_nvenc.nvEncGetEncodePresetConfigEx(
m_hEncoder, pEncoderParams->encodeGUID, pEncoderParams->presetGUID,
pEncoderParams->tuningInfo, &presetConfig);
memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
if (m_bOutputInVideoMemory &&
pEncoderParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
m_encodeConfig.frameIntervalP = 1;
}
} else {
m_encodeConfig.version = NV_ENC_CONFIG_VER;
m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
m_encodeConfig.rcParams.constQP = {28, 31, 25};
}
}
if (((uint32_t)m_encodeConfig.frameIntervalP) > m_encodeConfig.gopLength) {
m_encodeConfig.frameIntervalP = m_encodeConfig.gopLength;
}
m_initializeParams.encodeConfig = &m_encodeConfig;
NVENC_API_CALL(
@@ -268,7 +340,6 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
m_encodeConfig.rcParams.lookaheadDepth +
m_nExtraOutputDelay;
m_nOutputDelay = m_nEncoderBuffer - 1;
m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
if (!m_bOutputInVideoMemory) {
m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr);
@@ -277,12 +348,16 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
#if defined(_WIN32)
for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL);
if (!m_bIsDX12Encode) {
NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
eventParams.completionEvent = m_vpCompletionEvent[i];
m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams);
}
}
#endif
m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr);
if (m_bMotionEstimationOnly) {
m_vMappedRefBuffers.resize(m_nEncoderBuffer, nullptr);
@@ -290,7 +365,7 @@ void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS *pEncoderParams) {
InitializeMVOutputBuffer();
}
} else {
if (!m_bOutputInVideoMemory) {
if (!m_bOutputInVideoMemory && !m_bIsDX12Encode) {
m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr);
InitializeBitstreamBuffer();
}
@@ -317,9 +392,11 @@ void NvEncoder::DestroyHWEncoder() {
#if defined(_WIN32)
for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) {
if (m_vpCompletionEvent[i]) {
if (!m_bIsDX12Encode) {
NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER};
eventParams.completionEvent = m_vpCompletionEvent[i];
m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams);
}
CloseHandle(m_vpCompletionEvent[i]);
}
}
@@ -329,7 +406,7 @@ void NvEncoder::DestroyHWEncoder() {
if (m_bMotionEstimationOnly) {
DestroyMVOutputBuffer();
} else {
DestroyBitstreamBuffer();
if (!m_bIsDX12Encode) DestroyBitstreamBuffer();
}
m_nvenc.nvEncDestroyEncoder(m_hEncoder);
@@ -444,6 +521,7 @@ NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer,
picParams.bufferFmt = GetPixelFormat();
picParams.inputWidth = GetEncodeWidth();
picParams.inputHeight = GetEncodeHeight();
picParams.frameIdx = m_iToSend;
picParams.outputBitstream = outputBuffer;
picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer);
NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams);
@@ -488,8 +566,25 @@ void NvEncoder::GetEncodedPacket(std::vector<NV_ENC_OUTPUT_PTR> &vOutputBuffer,
vPacket.push_back(std::vector<uint8_t>());
}
vPacket[i].clear();
if ((m_initializeParams.encodeGUID == NV_ENC_CODEC_AV1_GUID) &&
(m_bUseIVFContainer)) {
if (m_bWriteIVFFileHeader) {
m_IVFUtils.WriteFileHeader(vPacket[i], MAKE_FOURCC('A', 'V', '0', '1'),
m_initializeParams.encodeWidth,
m_initializeParams.encodeHeight,
m_initializeParams.frameRateNum,
m_initializeParams.frameRateDen, 0xFFFF);
m_bWriteIVFFileHeader = false;
}
m_IVFUtils.WriteFrameHeader(vPacket[i],
lockBitstreamData.bitstreamSizeInBytes,
lockBitstreamData.outputTimeStamp);
}
vPacket[i].insert(vPacket[i].end(), &pData[0],
&pData[lockBitstreamData.bitstreamSizeInBytes]);
i++;
NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream(
@@ -533,7 +628,8 @@ bool NvEncoder::Reconfigure(
NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(
void *pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
NV_ENC_BUFFER_USAGE bufferUsage) {
NV_ENC_BUFFER_USAGE bufferUsage,
NV_ENC_FENCE_POINT_D3D12 *pInputFencePoint) {
NV_ENC_REGISTER_RESOURCE registerResource = {NV_ENC_REGISTER_RESOURCE_VER};
registerResource.resourceType = eResourceType;
registerResource.resourceToRegister = pBuffer;
@@ -542,6 +638,7 @@ NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource(
registerResource.pitch = pitch;
registerResource.bufferFormat = bufferFormat;
registerResource.bufferUsage = bufferUsage;
registerResource.pInputFencePoint = pInputFencePoint;
NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, &registerResource));
return registerResource.registeredResource;

View File

@@ -1,12 +1,28 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
* This copyright notice applies to this header file only:
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
@@ -20,6 +36,8 @@
#include <string>
#include <vector>
#include "NvCodecUtils.h"
#include "log.h"
#include "nvEncodeAPI.h"
/**
@@ -45,12 +63,74 @@ class NVENCException : public std::exception {
NVENCSTATUS m_errorCode;
};
inline const char* GetNvStatusString(NVENCSTATUS eStatus) {
switch (eStatus) {
case NV_ENC_SUCCESS:
return "NV_ENC_SUCCESS";
case NV_ENC_ERR_NO_ENCODE_DEVICE:
return "NV_ENC_ERR_NO_ENCODE_DEVICE";
case NV_ENC_ERR_UNSUPPORTED_DEVICE:
return "NV_ENC_ERR_UNSUPPORTED_DEVICE";
case NV_ENC_ERR_INVALID_ENCODERDEVICE:
return "NV_ENC_ERR_INVALID_ENCODERDEVICE";
case NV_ENC_ERR_INVALID_DEVICE:
return "NV_ENC_ERR_INVALID_DEVICE";
case NV_ENC_ERR_DEVICE_NOT_EXIST:
return "NV_ENC_ERR_DEVICE_NOT_EXIST";
case NV_ENC_ERR_INVALID_PTR:
return "NV_ENC_ERR_INVALID_PTR";
case NV_ENC_ERR_INVALID_EVENT:
return "NV_ENC_ERR_INVALID_EVENT";
case NV_ENC_ERR_INVALID_PARAM:
return "NV_ENC_ERR_INVALID_PARAM";
case NV_ENC_ERR_INVALID_CALL:
return "NV_ENC_ERR_INVALID_CALL";
case NV_ENC_ERR_OUT_OF_MEMORY:
return "NV_ENC_ERR_OUT_OF_MEMORY";
case NV_ENC_ERR_ENCODER_NOT_INITIALIZED:
return "NV_ENC_ERR_ENCODER_NOT_INITIALIZED";
case NV_ENC_ERR_UNSUPPORTED_PARAM:
return "NV_ENC_ERR_UNSUPPORTED_PARAM";
case NV_ENC_ERR_LOCK_BUSY:
return "NV_ENC_ERR_LOCK_BUSY";
case NV_ENC_ERR_NOT_ENOUGH_BUFFER:
return "NV_ENC_ERR_NOT_ENOUGH_BUFFER";
case NV_ENC_ERR_INVALID_VERSION:
return "NV_ENC_ERR_INVALID_VERSION";
case NV_ENC_ERR_MAP_FAILED:
return "NV_ENC_ERR_MAP_FAILED";
case NV_ENC_ERR_NEED_MORE_INPUT:
return "NV_ENC_ERR_NEED_MORE_INPUT";
case NV_ENC_ERR_ENCODER_BUSY:
return "NV_ENC_ERR_ENCODER_BUSY";
case NV_ENC_ERR_EVENT_NOT_REGISTERD:
return "NV_ENC_ERR_EVENT_NOT_REGISTERD";
case NV_ENC_ERR_GENERIC:
return "NV_ENC_ERR_GENERIC";
case NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY:
return "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY";
case NV_ENC_ERR_UNIMPLEMENTED:
return "NV_ENC_ERR_UNIMPLEMENTED";
case NV_ENC_ERR_RESOURCE_REGISTER_FAILED:
return "NV_ENC_ERR_RESOURCE_REGISTER_FAILED";
case NV_ENC_ERR_RESOURCE_NOT_REGISTERED:
return "NV_ENC_ERR_RESOURCE_NOT_REGISTERED";
case NV_ENC_ERR_RESOURCE_NOT_MAPPED:
return "NV_ENC_ERR_RESOURCE_NOT_MAPPED";
case NV_ENC_ERR_NEED_MORE_OUTPUT:
return "NV_ENC_ERR_NEED_MORE_OUTPUT";
default:
return "NVENC_UNKNOWN_ERROR";
}
}
inline NVENCException NVENCException::makeNVENCException(
const std::string& errorStr, const NVENCSTATUS errorCode,
const std::string& functionName, const std::string& fileName, int lineNo) {
std::ostringstream errorLog;
errorLog << functionName << " : " << errorStr << " at " << fileName << ":"
<< lineNo << std::endl;
LOG_ERROR("{} failed due to {}", functionName, GetNvStatusString(errorCode));
NVENCException exception(errorLog.str(), errorCode);
return exception;
}
@@ -92,7 +172,7 @@ class NvEncoder {
* Application must call this function to initialize the encoder, before
* starting to encode any frames.
*/
void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
virtual void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams);
/**
* @brief This function is used to destroy the encoder session.
@@ -100,7 +180,7 @@ class NvEncoder {
* clean up any allocated resources. The application must call EndEncode()
* function to get any queued encoded frames before calling DestroyEncoder().
*/
void DestroyEncoder();
virtual void DestroyEncoder();
/**
* @brief This function is used to reconfigure an existing encoder session.
@@ -124,7 +204,7 @@ class NvEncoder {
* data, which has been copied to an input buffer obtained from the
* GetNextInputFrame() function.
*/
void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket,
virtual void EncodeFrame(std::vector<std::vector<uint8_t>>& vPacket,
NV_ENC_PIC_PARAMS* pPicParams = nullptr);
/**
@@ -134,7 +214,7 @@ class NvEncoder {
* from the encoder. The application must call this function before
* destroying an encoder session.
*/
void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
virtual void EndEncode(std::vector<std::vector<uint8_t>>& vPacket);
/**
* @brief This function is used to query hardware encoder capabilities.
@@ -272,6 +352,13 @@ class NvEncoder {
*/
uint32_t GetEncoderBufferCount() const { return m_nEncoderBuffer; }
/*
* @brief This function returns initializeParams(width, height, fps etc).
*/
NV_ENC_INITIALIZE_PARAMS GetinitializeParams() const {
return m_initializeParams;
}
protected:
/**
* @brief NvEncoder class constructor.
@@ -280,7 +367,8 @@ class NvEncoder {
NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, void* pDevice, uint32_t nWidth,
uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
uint32_t nOutputDelay, bool bMotionEstimationOnly,
bool bOutputInVideoMemory = false);
bool bOutputInVideoMemory = false, bool bDX12Encode = false,
bool bUseIVFContainer = true);
/**
* @brief This function is used to check if hardware encoder is properly
@@ -314,7 +402,8 @@ class NvEncoder {
NV_ENC_REGISTERED_PTR RegisterResource(
void* pBuffer, NV_ENC_INPUT_RESOURCE_TYPE eResourceType, int width,
int height, int pitch, NV_ENC_BUFFER_FORMAT bufferFormat,
NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE);
NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE,
NV_ENC_FENCE_POINT_D3D12* pInputFencePoint = NULL);
/**
* @brief This function returns maximum width used to open the encoder
@@ -448,8 +537,10 @@ class NvEncoder {
protected:
bool m_bMotionEstimationOnly = false;
bool m_bOutputInVideoMemory = false;
bool m_bIsDX12Encode = false;
void* m_hEncoder = nullptr;
NV_ENCODE_API_FUNCTION_LIST m_nvenc;
NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
std::vector<NvEncInputFrame> m_vInputFrames;
std::vector<NV_ENC_REGISTERED_PTR> m_vRegisteredResources;
std::vector<NvEncInputFrame> m_vReferenceFrames;
@@ -462,6 +553,9 @@ class NvEncoder {
int32_t m_iGot = 0;
int32_t m_nEncoderBuffer = 0;
int32_t m_nOutputDelay = 0;
IVFUtils m_IVFUtils;
bool m_bWriteIVFFileHeader = true;
bool m_bUseIVFContainer = true;
private:
uint32_t m_nWidth;
@@ -469,7 +563,6 @@ class NvEncoder {
NV_ENC_BUFFER_FORMAT m_eBufferFormat;
void* m_pDevice;
NV_ENC_DEVICE_TYPE m_eDeviceType;
NV_ENC_INITIALIZE_PARAMS m_initializeParams = {};
NV_ENC_CONFIG m_encodeConfig = {};
bool m_bEncoderInitialized = false;
uint32_t m_nExtraOutputDelay =

View File

@@ -1,13 +1,29 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <vector>
@@ -18,7 +34,8 @@
#include <iterator>
#include <cstring>
#include <functional>
#include "../Utils/Logger.h"
#include "Logger.h"
#include "nvEncodeAPI.h"
extern simplelogger::Logger *logger;
@@ -81,6 +98,11 @@ public:
virtual bool IsCodecHEVC() {
return GetEncodeGUID() == NV_ENC_CODEC_HEVC_GUID;
}
virtual bool IsCodecAV1() {
return GetEncodeGUID() == NV_ENC_CODEC_AV1_GUID;
}
std::string GetHelpMessage(bool bMeOnly = false, bool bUnbuffered = false, bool bHide444 = false, bool bOutputInVidMem = false)
{
std::ostringstream oss;
@@ -103,7 +125,8 @@ public:
}
else
{
oss << "; HEVC: " << szHevcProfileNames << std::endl;
oss << "; HEVC: " << szHevcProfileNames;
oss << "; AV1: " << szAV1ProfileNames << std::endl;
}
if (!bMeOnly)
@@ -117,7 +140,7 @@ public:
if (!bHide444 && !bLowLatency)
{
oss << "-444 (Only for RGB input) YUV444 encode" << std::endl;
oss << "-444 (Only for RGB input) YUV444 encode. Not valid for AV1 Codec" << std::endl;
}
if (bMeOnly) return oss.str();
oss << "-fps Frame rate" << std::endl;
@@ -132,6 +155,8 @@ public:
oss << "-rc Rate control mode: " << szRcModeNames << std::endl
<< "-gop Length of GOP (Group of Pictures)" << std::endl
<< "-bitrate Average bit rate, can be in unit of 1, K, M" << std::endl
<< "Note: Fps or Average bit rate values for each session can be specified in the form of v1,v1,v3 (no space) for AppTransOneToN" << std::endl
<< " If the number of 'bitrate' or 'fps' values specified are less than the number of sessions, then the last specified value will be considered for the remaining sessions" << std::endl
<< "-maxbitrate Max bit rate, can be in unit of 1, K, M" << std::endl
<< "-vbvbufsize VBV buffer size in bits, can be in unit of 1, K, M" << std::endl
<< "-vbvinit VBV initial delay in bits, can be in unit of 1, K, M" << std::endl
@@ -174,8 +199,11 @@ public:
}
os
<< std::endl << "\tprofile : " << ConvertValueToString(vProfile, szProfileNames, pParams->encodeConfig->profileGUID)
<< std::endl << "\tchroma : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC : pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC)
<< std::endl << "\tbitdepth : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? 0 : pParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8) + 8
<< std::endl << "\tchroma : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC :
(pParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) ? pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC :
pParams->encodeConfig->encodeCodecConfig.av1Config.chromaFormatIDC)
<< std::endl << "\tbitdepth : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.inputBitDepth : (pParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) ?
pParams->encodeConfig->encodeCodecConfig.hevcConfig.inputBitDepth : pParams->encodeConfig->encodeCodecConfig.av1Config.inputBitDepth)
<< std::endl << "\trc : " << ConvertValueToString(vRcMode, szRcModeNames, pParams->encodeConfig->rcParams.rateControlMode)
;
if (pParams->encodeConfig->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CONSTQP) {
@@ -213,9 +241,16 @@ public:
* provided to the application and sets the fields from NV_ENC_INITIALIZE_PARAMS
* based on the supplied values.
*/
virtual void setTransOneToN(bool isTransOneToN)
{
bTransOneToN = isTransOneToN;
}
virtual void SetInitParams(NV_ENC_INITIALIZE_PARAMS *pParams, NV_ENC_BUFFER_FORMAT eBufferFormat)
{
NV_ENC_CONFIG &config = *pParams->encodeConfig;
int nGOPOption = 0, nBFramesOption = 0;
for (unsigned i = 0; i < tokens.size(); i++)
{
if (
@@ -224,11 +259,12 @@ public:
tokens[i] == "-tuninginfo" && ++i ||
tokens[i] == "-multipass" && ++i != tokens.size() && ParseString("-multipass", tokens[i], vMultiPass, szMultipass, &config.rcParams.multiPass) ||
tokens[i] == "-profile" && ++i != tokens.size() && (IsCodecH264() ?
ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) :
ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID)) ||
ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) : IsCodecHEVC() ?
ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID) :
ParseString("-profile", tokens[i], vAV1Profile, szAV1ProfileNames, &config.profileGUID)) ||
tokens[i] == "-rc" && ++i != tokens.size() && ParseString("-rc", tokens[i], vRcMode, szRcModeNames, &config.rcParams.rateControlMode) ||
tokens[i] == "-fps" && ++i != tokens.size() && ParseInt("-fps", tokens[i], &pParams->frameRateNum) ||
tokens[i] == "-bf" && ++i != tokens.size() && ParseInt("-bf", tokens[i], &config.frameIntervalP) && ++config.frameIntervalP ||
tokens[i] == "-bf" && ++i != tokens.size() && ParseInt("-bf", tokens[i], &config.frameIntervalP) && ++config.frameIntervalP && ++nBFramesOption ||
tokens[i] == "-bitrate" && ++i != tokens.size() && ParseBitRate("-bitrate", tokens[i], &config.rcParams.averageBitRate) ||
tokens[i] == "-maxbitrate" && ++i != tokens.size() && ParseBitRate("-maxbitrate", tokens[i], &config.rcParams.maxBitRate) ||
tokens[i] == "-vbvbufsize" && ++i != tokens.size() && ParseBitRate("-vbvbufsize", tokens[i], &config.rcParams.vbvBufferSize) ||
@@ -257,14 +293,19 @@ public:
if (tokens[i] == "-gop" && ++i != tokens.size() && ParseInt("-gop", tokens[i], &config.gopLength))
{
nGOPOption = 1;
if (IsCodecH264())
{
config.encodeCodecConfig.h264Config.idrPeriod = config.gopLength;
}
else
else if (IsCodecHEVC())
{
config.encodeCodecConfig.hevcConfig.idrPeriod = config.gopLength;
}
else
{
config.encodeCodecConfig.av1Config.idrPeriod = config.gopLength;
}
continue;
}
@@ -273,10 +314,17 @@ public:
if (IsCodecH264())
{
config.encodeCodecConfig.h264Config.chromaFormatIDC = 3;
} else
}
else if (IsCodecHEVC())
{
config.encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
}
else
{
std::ostringstream errmessage;
errmessage << "Incorrect Parameter: YUV444 Input not supported with AV1 Codec" << std::endl;
throw std::invalid_argument(errmessage.str());
}
continue;
}
@@ -292,10 +340,27 @@ public:
{
if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
{
config.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2;
config.encodeCodecConfig.hevcConfig.inputBitDepth = NV_ENC_BIT_DEPTH_10;
config.encodeCodecConfig.hevcConfig.outputBitDepth = NV_ENC_BIT_DEPTH_10;
}
}
if (IsCodecAV1())
{
if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT)
{
config.encodeCodecConfig.av1Config.inputBitDepth = NV_ENC_BIT_DEPTH_10;
config.encodeCodecConfig.av1Config.outputBitDepth = NV_ENC_BIT_DEPTH_10;
}
}
if (nGOPOption && nBFramesOption && (config.gopLength < ((uint32_t)config.frameIntervalP)))
{
std::ostringstream errmessage;
errmessage << "gopLength (" << config.gopLength << ") must be greater or equal to frameIntervalP (number of B frames + 1) (" << config.frameIntervalP << ")\n";
throw std::invalid_argument(errmessage.str());
}
funcInit(pParams);
LOG(INFO) << NvEncoderInitParam().MainParamToString(pParams);
LOG(TRACE) << NvEncoderInitParam().FullParamToString(pParams);
@@ -327,6 +392,37 @@ private:
return split(strValueNames, ' ')[it - vValue.begin()];
}
bool ParseBitRate(const std::string &strName, const std::string &strValue, unsigned *pBitRate) {
if(bTransOneToN)
{
std::vector<std::string> oneToNBitrate = split(strValue, ',');
std::string currBitrate;
if ((bitrateCnt + 1) > oneToNBitrate.size())
{
currBitrate = oneToNBitrate[oneToNBitrate.size() - 1];
}
else
{
currBitrate = oneToNBitrate[bitrateCnt];
bitrateCnt++;
}
try {
size_t l;
double r = std::stod(currBitrate, &l);
char c = currBitrate[l];
if (c != 0 && c != 'k' && c != 'm') {
LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
}
*pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
}
catch (std::invalid_argument) {
return false;
}
return true;
}
else
{
try {
size_t l;
double r = std::stod(strValue, &l);
@@ -335,21 +431,50 @@ private:
LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
}
*pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
} catch (std::invalid_argument) {
}
catch (std::invalid_argument) {
return false;
}
return true;
}
}
template<typename T>
bool ParseInt(const std::string &strName, const std::string &strValue, T *pInt) {
if (bTransOneToN)
{
std::vector<std::string> oneToNFps = split(strValue, ',');
std::string currFps;
if ((fpsCnt + 1) > oneToNFps.size())
{
currFps = oneToNFps[oneToNFps.size() - 1];
}
else
{
currFps = oneToNFps[fpsCnt];
fpsCnt++;
}
try {
*pInt = std::stoi(strValue);
} catch (std::invalid_argument) {
*pInt = std::stoi(currFps);
}
catch (std::invalid_argument) {
LOG(ERROR) << strName << " need a value of positive number";
return false;
}
return true;
}
else
{
try {
*pInt = std::stoi(strValue);
}
catch (std::invalid_argument) {
LOG(ERROR) << strName << " need a value of positive number";
return false;
}
return true;
}
}
bool ParseQp(const std::string &strName, const std::string &strValue, NV_ENC_QP *pQp) {
std::vector<std::string> vQp = split(strValue, ',');
try {
@@ -385,11 +510,15 @@ private:
GUID guidPreset = NV_ENC_PRESET_P3_GUID;
NV_ENC_TUNING_INFO m_TuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
bool bLowLatency = false;
uint32_t bitrateCnt = 0;
uint32_t fpsCnt = 0;
bool bTransOneToN = 0;
const char *szCodecNames = "h264 hevc";
const char *szCodecNames = "h264 hevc av1";
std::vector<GUID> vCodec = std::vector<GUID> {
NV_ENC_CODEC_H264_GUID,
NV_ENC_CODEC_HEVC_GUID
NV_ENC_CODEC_HEVC_GUID,
NV_ENC_CODEC_AV1_GUID
};
const char *szChromaNames = "yuv420 yuv444";
@@ -422,9 +551,15 @@ private:
NV_ENC_HEVC_PROFILE_MAIN10_GUID,
NV_ENC_HEVC_PROFILE_FREXT_GUID,
};
const char *szAV1ProfileNames = "main";
std::vector<GUID> vAV1Profile = std::vector<GUID>{
NV_ENC_AV1_PROFILE_MAIN_GUID,
};
const char *szProfileNames = "(default) auto baseline(h264) main(h264) high(h264) high444(h264)"
" stereo(h264) progressiv_high(h264) constrained_high(h264)"
" main(hevc) main10(hevc) frext(hevc)";
" main(hevc) main10(hevc) frext(hevc)"
" main(av1) high(av1)";
std::vector<GUID> vProfile = std::vector<GUID> {
GUID{},
NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID,
@@ -438,15 +573,17 @@ private:
NV_ENC_HEVC_PROFILE_MAIN_GUID,
NV_ENC_HEVC_PROFILE_MAIN10_GUID,
NV_ENC_HEVC_PROFILE_FREXT_GUID,
NV_ENC_AV1_PROFILE_MAIN_GUID,
};
const char *szLowLatencyTuningInfoNames = "lowlatency ultralowlatency";
const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless";
const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless uhq";
std::vector<NV_ENC_TUNING_INFO> vTuningInfo = std::vector<NV_ENC_TUNING_INFO>{
NV_ENC_TUNING_INFO_HIGH_QUALITY,
NV_ENC_TUNING_INFO_LOW_LATENCY,
NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
NV_ENC_TUNING_INFO_LOSSLESS
NV_ENC_TUNING_INFO_LOSSLESS,
NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY
};
const char *szRcModeNames = "constqp vbr cbr";
@@ -611,7 +748,8 @@ public:
<< " repeatSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.repeatSPSPPS << std::endl
<< " enableIntraRefresh: " << pConfig->encodeCodecConfig.hevcConfig.enableIntraRefresh << std::endl
<< " chromaFormatIDC: " << pConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC << std::endl
<< " pixelBitDepthMinus8: " << pConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 << std::endl
<< " inputBitDepth: " << pConfig->encodeCodecConfig.hevcConfig.inputBitDepth << std::endl
<< " outputBitDepth: " << pConfig->encodeCodecConfig.hevcConfig.outputBitDepth << std::endl
<< " idrPeriod: " << pConfig->encodeCodecConfig.hevcConfig.idrPeriod << std::endl
<< " intraRefreshPeriod: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshPeriod << std::endl
<< " intraRefreshCnt: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshCnt << std::endl
@@ -638,7 +776,65 @@ public:
<< " chromaSampleLocationBot: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationBot << std::endl
<< " bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.bitstreamRestrictionFlag << std::endl
<< " ltrTrustMode: " << pConfig->encodeCodecConfig.hevcConfig.ltrTrustMode << std::endl;
} else if (pInitializeParams->encodeGUID == NV_ENC_CODEC_AV1_GUID) {
os
<< "NV_ENC_CODEC_CONFIG (AV1):" << std::endl
<< " level: " << pConfig->encodeCodecConfig.av1Config.level << std::endl
<< " tier: " << pConfig->encodeCodecConfig.av1Config.tier << std::endl
<< " minPartSize: " << pConfig->encodeCodecConfig.av1Config.minPartSize << std::endl
<< " maxPartSize: " << pConfig->encodeCodecConfig.av1Config.maxPartSize << std::endl
<< " outputAnnexBFormat: " << pConfig->encodeCodecConfig.av1Config.outputAnnexBFormat << std::endl
<< " enableTimingInfo: " << pConfig->encodeCodecConfig.av1Config.enableTimingInfo << std::endl
<< " enableDecoderModelInfo: " << pConfig->encodeCodecConfig.av1Config.enableDecoderModelInfo << std::endl
<< " enableFrameIdNumbers: " << pConfig->encodeCodecConfig.av1Config.enableFrameIdNumbers << std::endl
<< " disableSeqHdr: " << pConfig->encodeCodecConfig.av1Config.disableSeqHdr << std::endl
<< " repeatSeqHdr: " << pConfig->encodeCodecConfig.av1Config.repeatSeqHdr << std::endl
<< " enableIntraRefresh: " << pConfig->encodeCodecConfig.av1Config.enableIntraRefresh << std::endl
<< " chromaFormatIDC: " << pConfig->encodeCodecConfig.av1Config.chromaFormatIDC << std::endl
<< " enableBitstreamPadding: " << pConfig->encodeCodecConfig.av1Config.enableBitstreamPadding << std::endl
<< " enableCustomTileConfig: " << pConfig->encodeCodecConfig.av1Config.enableCustomTileConfig << std::endl
<< " enableFilmGrainParams: " << pConfig->encodeCodecConfig.av1Config.enableFilmGrainParams << std::endl
<< " inputBitDepth: " << pConfig->encodeCodecConfig.av1Config.inputBitDepth << std::endl
<< " outputBitDepth: " << pConfig->encodeCodecConfig.av1Config.outputBitDepth << std::endl
<< " idrPeriod: " << pConfig->encodeCodecConfig.av1Config.idrPeriod << std::endl
<< " intraRefreshPeriod: " << pConfig->encodeCodecConfig.av1Config.intraRefreshPeriod << std::endl
<< " intraRefreshCnt: " << pConfig->encodeCodecConfig.av1Config.intraRefreshCnt << std::endl
<< " maxNumRefFramesInDPB: " << pConfig->encodeCodecConfig.av1Config.maxNumRefFramesInDPB << std::endl
<< " numTileColumns: " << pConfig->encodeCodecConfig.av1Config.numTileColumns << std::endl
<< " numTileRows: " << pConfig->encodeCodecConfig.av1Config.numTileRows << std::endl
<< " maxTemporalLayersMinus1: " << pConfig->encodeCodecConfig.av1Config.maxTemporalLayersMinus1 << std::endl
<< " colorPrimaries: " << pConfig->encodeCodecConfig.av1Config.colorPrimaries << std::endl
<< " transferCharacteristics: " << pConfig->encodeCodecConfig.av1Config.transferCharacteristics << std::endl
<< " matrixCoefficients: " << pConfig->encodeCodecConfig.av1Config.matrixCoefficients << std::endl
<< " colorRange: " << pConfig->encodeCodecConfig.av1Config.colorRange << std::endl
<< " chromaSamplePosition: " << pConfig->encodeCodecConfig.av1Config.chromaSamplePosition << std::endl
<< " useBFramesAsRef: " << pConfig->encodeCodecConfig.av1Config.useBFramesAsRef << std::endl
<< " numFwdRefs: " << pConfig->encodeCodecConfig.av1Config.numFwdRefs << std::endl
<< " numBwdRefs: " << pConfig->encodeCodecConfig.av1Config.numBwdRefs << std::endl;
if (pConfig->encodeCodecConfig.av1Config.filmGrainParams != NULL)
{
os
<< " NV_ENC_FILM_GRAIN_PARAMS_AV1:" << std::endl
<< " applyGrain: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->applyGrain << std::endl
<< " chromaScalingFromLuma: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->chromaScalingFromLuma << std::endl
<< " overlapFlag: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->overlapFlag << std::endl
<< " clipToRestrictedRange: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->clipToRestrictedRange << std::endl
<< " grainScalingMinus8: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->grainScalingMinus8 << std::endl
<< " arCoeffLag: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->arCoeffLag << std::endl
<< " numYPoints: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->numYPoints << std::endl
<< " numCbPoints: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->numCbPoints << std::endl
<< " numCrPoints: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->numCrPoints << std::endl
<< " arCoeffShiftMinus6: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->arCoeffShiftMinus6 << std::endl
<< " grainScaleShift: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->grainScaleShift << std::endl
<< " cbMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->cbMult << std::endl
<< " cbLumaMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->cbLumaMult << std::endl
<< " cbOffset: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->cbOffset << std::endl
<< " crMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->crMult << std::endl
<< " crLumaMult: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->crLumaMult << std::endl
<< " crOffset: " << pConfig->encodeCodecConfig.av1Config.filmGrainParams->crOffset << std::endl;
}
}
return os.str();
}
};

View File

@@ -1,27 +1,41 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
* This copyright notice applies to this header file only:
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "NvEncoderCuda.h"
#include "nvcodec_api.h"
NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
uint32_t nHeight,
NV_ENC_BUFFER_FORMAT eBufferFormat,
uint32_t nExtraOutputDelay,
bool bMotionEstimationOnly,
bool bOutputInVideoMemory)
bool bOutputInVideoMemory, bool bUseIVFContainer)
: NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight,
eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly,
bOutputInVideoMemory),
bOutputInVideoMemory, false, bUseIVFContainer),
m_cuContext(cuContext) {
if (!m_hEncoder) {
NVENC_THROW_ERROR("Encoder Initialization failed",
@@ -46,7 +60,7 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
int numCount = m_bMotionEstimationOnly ? 2 : 1;
for (int count = 0; count < numCount; count++) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
std::vector<void *> inputFrames;
for (int i = 0; i < numInputBuffers; i++) {
CUdeviceptr pDeviceFrame;
@@ -56,13 +70,13 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
CUDA_DRVAPI_CALL(cuMemAllocPitch(
(CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
GetMaxEncodeHeight() + chromaHeight, 16));
inputFrames.push_back((void *)pDeviceFrame);
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
RegisterInputResources(
inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
@@ -90,24 +104,23 @@ void NvEncoderCuda::ReleaseCudaResources() {
UnregisterInputResources();
cuCtxPushCurrent_ld(m_cuContext);
cuCtxPushCurrent(m_cuContext);
for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
if (m_vInputFrames[i].inputPtr) {
cuMemFree_ld(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
}
}
m_vInputFrames.clear();
for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
if (m_vReferenceFrames[i].inputPtr) {
cuMemFree_ld(
reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
}
}
m_vReferenceFrames.clear();
cuCtxPopCurrent_ld(NULL);
cuCtxPopCurrent(NULL);
m_cuContext = nullptr;
}
@@ -123,7 +136,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
NV_ENC_ERR_INVALID_PARAM);
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
uint32_t srcPitch =
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -141,10 +154,10 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
m.Height = height;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
} else {
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
: cuMemcpy2DAsync_ld(&m, stream));
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
: cuMemcpy2DAsync(&m, stream));
}
std::vector<uint32_t> srcChromaOffsets;
@@ -170,14 +183,14 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = chromaWidthInBytes;
m.Height = chromaHeight;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
} else {
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
: cuMemcpy2DAsync_ld(&m, stream));
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
: cuMemcpy2DAsync(&m, stream));
}
}
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
}
void NvEncoderCuda::CopyToDeviceFrame(
@@ -192,7 +205,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
NV_ENC_ERR_INVALID_PARAM);
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
uint32_t srcPitch =
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -210,9 +223,9 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
m.Height = height;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
} else {
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
}
std::vector<uint32_t> srcChromaOffsets;
@@ -237,11 +250,11 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = chromaWidthInBytes;
m.Height = chromaHeight;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
} else {
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
}
}
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
}

View File

@@ -0,0 +1,127 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <vector>
#include <stdint.h>
#include <mutex>
#include <cuda.h>
#include "NvEncoder.h"
#define CUDA_DRVAPI_CALL( call ) \
do \
{ \
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) \
{ \
const char *szErrName = NULL; \
cuGetErrorName(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName ; \
throw NVENCException::makeNVENCException(errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, __LINE__); \
} \
} \
while (0)
/**
* @brief Encoder for CUDA device memory.
*/
class NvEncoderCuda : public NvEncoder
{
public:
NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat,
uint32_t nExtraOutputDelay = 3, bool bMotionEstimationOnly = false, bool bOPInVideoMemory = false, bool bUseIVFContainer = true);
virtual ~NvEncoderCuda();
/**
* @brief This is a static function to copy input data from host memory to device memory.
* This function assumes YUV plane is a single contiguous memory segment.
*/
static void CopyToDeviceFrame(CUcontext device,
void* pSrcFrame,
uint32_t nSrcPitch,
CUdeviceptr pDstFrame,
uint32_t dstPitch,
int width,
int height,
CUmemorytype srcMemoryType,
NV_ENC_BUFFER_FORMAT pixelFormat,
const uint32_t dstChromaOffsets[],
uint32_t numChromaPlanes,
bool bUnAlignedDeviceCopy = false,
CUstream stream = NULL);
/**
* @brief This is a static function to copy input data from host memory to device memory.
* Application must pass a seperate device pointer for each YUV plane.
*/
static void CopyToDeviceFrame(CUcontext device,
void* pSrcFrame,
uint32_t nSrcPitch,
CUdeviceptr pDstFrame,
uint32_t dstPitch,
int width,
int height,
CUmemorytype srcMemoryType,
NV_ENC_BUFFER_FORMAT pixelFormat,
CUdeviceptr dstChromaPtr[],
uint32_t dstChromaPitch,
uint32_t numChromaPlanes,
bool bUnAlignedDeviceCopy = false);
/**
* @brief This function sets input and output CUDA streams
*/
void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream);
protected:
/**
* @brief This function is used to release the input buffers allocated for encoding.
* This function is an override of virtual function NvEncoder::ReleaseInputBuffers().
*/
virtual void ReleaseInputBuffers() override;
private:
/**
* @brief This function is used to allocate input buffers for encoding.
* This function is an override of virtual function NvEncoder::AllocateInputBuffers().
*/
virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
private:
/**
* @brief This is a private function to release CUDA device memory used for encoding.
*/
void ReleaseCudaResources();
protected:
CUcontext m_cuContext;
private:
size_t m_cudaPitch = 0;
};

View File

@@ -0,0 +1,110 @@
#include "nvcodec_common.h"
void ShowHelpAndExit(const char *szBadOption) {
std::ostringstream oss;
bool bThrowError = false;
if (szBadOption) {
oss << "Error parsing \"" << szBadOption << "\"" << std::endl;
bThrowError = true;
}
oss << "Options:" << std::endl
<< "-i Input file path" << std::endl
<< "-o Output file path" << std::endl
<< "-s Input resolution in this form: WxH" << std::endl
<< "-if Input format: iyuv nv12" << std::endl
<< "-gpu Ordinal of GPU to use" << std::endl
<< "-case 0: Encode frames with dynamic bitrate change"
<< std::endl
<< " 1: Encode frames with dynamic resolution change"
<< std::endl;
oss << NvEncoderInitParam("", nullptr, true).GetHelpMessage() << std::endl;
if (bThrowError) {
throw std::invalid_argument(oss.str());
} else {
std::cout << oss.str();
exit(0);
}
}
void ParseCommandLine(int argc, char *argv[], char *szInputFileName,
int &nWidth, int &nHeight, NV_ENC_BUFFER_FORMAT &eFormat,
char *szOutputFileName, NvEncoderInitParam &initParam,
int &iGpu, int &iCase, int &nFrame) {
std::ostringstream oss;
int i;
for (i = 1; i < argc; i++) {
if (!_stricmp(argv[i], "-h")) {
ShowHelpAndExit();
}
if (!_stricmp(argv[i], "-i")) {
if (++i == argc) {
ShowHelpAndExit("-i");
}
sprintf(szInputFileName, "%s", argv[i]);
continue;
}
if (!_stricmp(argv[i], "-o")) {
if (++i == argc) {
ShowHelpAndExit("-o");
}
sprintf(szOutputFileName, "%s", argv[i]);
continue;
}
if (!_stricmp(argv[i], "-s")) {
if (++i == argc || 2 != sscanf(argv[i], "%dx%d", &nWidth, &nHeight)) {
ShowHelpAndExit("-s");
}
continue;
}
std::vector<std::string> vszFileFormatName = {"iyuv", "nv12"};
NV_ENC_BUFFER_FORMAT aFormat[] = {
NV_ENC_BUFFER_FORMAT_IYUV,
NV_ENC_BUFFER_FORMAT_NV12,
};
if (!_stricmp(argv[i], "-if")) {
if (++i == argc) {
ShowHelpAndExit("-if");
}
auto it = std::find(vszFileFormatName.begin(), vszFileFormatName.end(),
argv[i]);
if (it == vszFileFormatName.end()) {
ShowHelpAndExit("-if");
}
eFormat = aFormat[it - vszFileFormatName.begin()];
continue;
}
if (!_stricmp(argv[i], "-gpu")) {
if (++i == argc) {
ShowHelpAndExit("-gpu");
}
iGpu = atoi(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-case")) {
if (++i == argc) {
ShowHelpAndExit("-case");
}
iCase = atoi(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-frame")) {
if (++i == argc) {
ShowHelpAndExit("-frame");
}
nFrame = atoi(argv[i]);
continue;
}
// Regard as encoder parameter
if (argv[i][0] != '-') {
ShowHelpAndExit(argv[i]);
}
oss << argv[i] << " ";
while (i + 1 < argc && argv[i + 1][0] != '-') {
oss << argv[++i] << " ";
}
}
initParam = NvEncoderInitParam(oss.str().c_str(), nullptr, true);
}

View File

@@ -0,0 +1,26 @@
/*
* @Author: DI JUNKUN
* @Date: 2024-09-10
* Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
*/
#ifndef _NVCODEC_COMMON_H_
#define _NVCODEC_COMMON_H_
#include <cuda.h>
#include <iostream>
#include <memory>
#include "NvCodecUtils.h"
#include "NvEncoderCLIOptions.h"
#include "NvEncoderCuda.h"
void ShowHelpAndExit(const char *szBadOption = NULL);
void ParseCommandLine(int argc, char *argv[], char *szInputFileName,
int &nWidth, int &nHeight, NV_ENC_BUFFER_FORMAT &eFormat,
char *szOutputFileName, NvEncoderInitParam &initParam,
int &iGpu, int &iCase, int &nFrame);
#endif

View File

@@ -362,7 +362,10 @@ int AomAv1Encoder::OnEncodedImage(char *encoded_packets, size_t size) {
return 0;
}
void AomAv1Encoder::ForceIdr() { force_i_frame_flags_ = AOM_EFLAG_FORCE_KF; }
int AomAv1Encoder::ForceIdr() {
force_i_frame_flags_ = AOM_EFLAG_FORCE_KF;
return 0;
}
int AomAv1Encoder::Release() {
if (frame_for_encode_ != nullptr) {

View File

@@ -50,7 +50,7 @@ class AomAv1Encoder : public VideoEncoder {
int OnEncodedImage(char* encoded_packets, size_t size);
void ForceIdr();
int ForceIdr();
private:
template <typename P>

View File

@@ -1,106 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
#include <cuda.h>
#include <stdint.h>
#include <mutex>
#include <vector>
#include "NvEncoder.h"
#define CUDA_DRVAPI_CALL(call) \
do { \
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char* szErrName = NULL; \
cuGetErrorName_ld(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVENCException::makeNVENCException( \
errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, \
__LINE__); \
} \
} while (0)
/**
* @brief Encoder for CUDA device memory.
*/
class NvEncoderCuda : public NvEncoder {
public:
NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight,
NV_ENC_BUFFER_FORMAT eBufferFormat,
uint32_t nExtraOutputDelay = 3,
bool bMotionEstimationOnly = false,
bool bOPInVideoMemory = false);
virtual ~NvEncoderCuda();
/**
* @brief This is a static function to copy input data from host memory to
* device memory. This function assumes YUV plane is a single contiguous
* memory segment.
*/
static void CopyToDeviceFrame(
CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
const uint32_t dstChromaOffsets[], uint32_t numChromaPlanes,
bool bUnAlignedDeviceCopy = false, CUstream stream = NULL);
/**
* @brief This is a static function to copy input data from host memory to
* device memory. Application must pass a seperate device pointer for each YUV
* plane.
*/
static void CopyToDeviceFrame(
CUcontext device, void* pSrcFrame, uint32_t nSrcPitch,
CUdeviceptr pDstFrame, uint32_t dstPitch, int width, int height,
CUmemorytype srcMemoryType, NV_ENC_BUFFER_FORMAT pixelFormat,
CUdeviceptr dstChromaPtr[], uint32_t dstChromaPitch,
uint32_t numChromaPlanes, bool bUnAlignedDeviceCopy = false);
/**
* @brief This function sets input and output CUDA streams
*/
void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream,
NV_ENC_CUSTREAM_PTR outputStream);
protected:
/**
* @brief This function is used to release the input buffers allocated for
* encoding. This function is an override of virtual function
* NvEncoder::ReleaseInputBuffers().
*/
virtual void ReleaseInputBuffers() override;
private:
/**
* @brief This function is used to allocate input buffers for encoding.
* This function is an override of virtual function
* NvEncoder::AllocateInputBuffers().
*/
virtual void AllocateInputBuffers(int32_t numInputBuffers) override;
private:
/**
* @brief This is a private function to release CUDA device memory used for
* encoding.
*/
void ReleaseCudaResources();
protected:
CUcontext m_cuContext;
private:
size_t m_cudaPitch = 0;
};

View File

@@ -4,6 +4,7 @@
#include "log.h"
#include "nvcodec_api.h"
#include "nvcodec_common.h"
#define SAVE_RECEIVED_NV12_STREAM 0
#define SAVE_ENCODED_H264_STREAM 0
@@ -26,53 +27,82 @@ NvidiaVideoEncoder::~NvidiaVideoEncoder() {
free(nv12_data_);
nv12_data_ = nullptr;
}
if (encoder_) {
encoder_->DestroyEncoder();
}
}
int NvidiaVideoEncoder::Init() {
// Init cuda context
int num_of_GPUs = 0;
CUdevice cuda_device;
bool cuda_ctx_succeed =
(index_of_GPU >= 0 && cuInit_ld(0) == CUresult::CUDA_SUCCESS &&
cuDeviceGetCount_ld(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
(num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
cuDeviceGet_ld(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
cuCtxCreate_ld(&cuda_context_, 0, cuda_device) ==
CUresult::CUDA_SUCCESS);
if (!cuda_ctx_succeed) {
ck(cuInit(0));
int num_of_gpu = 0;
ck(cuDeviceGetCount(&num_of_gpu));
if (index_of_gpu_ < 0 || index_of_gpu_ >= num_of_gpu) {
LOG_ERROR("GPU ordinal out of range. Should be within [0-{}]");
return -1;
}
ck(cuDeviceGet(&cuda_device_, index_of_gpu_));
char device_name[80];
ck(cuDeviceGetName(device_name, sizeof(device_name), cuda_device_));
LOG_INFO("H.264 encoder using [{}]", device_name);
ck(cuCtxCreate(&cuda_context_, 0, cuda_device_));
encoder_ = new NvEncoderCuda(cuda_context_, frame_width_, frame_height_,
NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12);
buffer_format_, 0);
// Init encoder_ session
NV_ENC_INITIALIZE_PARAMS init_params;
init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
init_params.encodeConfig = &encode_config;
NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER};
NV_ENC_CONFIG encodeConfig = {NV_ENC_CONFIG_VER};
init_params.encodeConfig = &encodeConfig;
encoder_->CreateDefaultEncoderParams(&init_params, codec_guid_, preset_guid_,
tuning_info_);
encoder_->CreateDefaultEncoderParams(&init_params, codec_guid, preset_guid,
tuning_info);
frame_width_max_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
NV_ENC_CAPS_WIDTH_MAX);
frame_height_max_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
NV_ENC_CAPS_HEIGHT_MAX);
// frame_width_min_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
// NV_ENC_CAPS_WIDTH_MIN);
// frame_height_min_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
// NV_ENC_CAPS_HEIGHT_MIN);
encode_level_max_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
NV_ENC_CAPS_LEVEL_MAX);
encode_level_min_ = encoder_->GetCapabilityValue(NV_ENC_CODEC_H264_GUID,
NV_ENC_CAPS_LEVEL_MIN);
support_dynamic_resolution_ = encoder_->GetCapabilityValue(
NV_ENC_CODEC_H264_GUID, NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE);
support_dynamic_bitrate_ = encoder_->GetCapabilityValue(
NV_ENC_CODEC_H264_GUID, NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE);
init_params.encodeWidth = frame_width_;
init_params.encodeHeight = frame_height_;
init_params.encodeConfig->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
init_params.encodeConfig->gopLength = keyFrameInterval_;
init_params.encodeConfig->frameIntervalP = 1;
init_params.encodeConfig->rcParams.rateControlMode =
NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_VBR;
init_params.encodeConfig->rcParams.maxBitRate = maxBitrate_ * 500;
// init_params.encodeConfig->rcParams.enableMinQP = 1;
// init_params.encodeConfig->rcParams.minQP.qpIntra = 10;
init_params.encodeConfig->rcParams.enableMaxQP = 1;
init_params.encodeConfig->rcParams.maxQP.qpIntra = 22;
init_params.encodeConfig->encodeCodecConfig.h264Config.level =
NV_ENC_LEVEL::NV_ENC_LEVEL_H264_31;
init_params.encodeConfig->encodeCodecConfig.h264Config.sliceMode = 1;
init_params.encodeConfig->encodeCodecConfig.h264Config.sliceModeData =
max_payload_size_;
// init_params.encodeConfig->encodeCodecConfig.h264Config.disableSPSPPS = 1;
// init_params.encodeConfig->encodeCodecConfig.h264Config.repeatSPSPPS = 1;
// must set max encode width and height otherwise will get crash when try to
// reconfigure the resolution
init_params.maxEncodeWidth = frame_width_max_;
init_params.maxEncodeHeight = frame_height_max_;
// init_params.darWidth = init_params.encodeWidth;
// init_params.darHeight = init_params.encodeHeight;
encodeConfig.gopLength = key_frame_interval_;
encodeConfig.frameIntervalP = 1;
encodeConfig.encodeCodecConfig.h264Config.idrPeriod = key_frame_interval_;
encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
// encodeConfig.rcParams.enableMaxQP = 1;
// encodeConfig.rcParams.enableMinQP = 1;
// encodeConfig.rcParams.maxQP.qpIntra = 22;
// encodeConfig.rcParams.minQP.qpIntra = 10;
encodeConfig.rcParams.averageBitRate = average_bitrate_;
// use the default VBV buffer size
encodeConfig.rcParams.vbvBufferSize = 0;
encodeConfig.rcParams.maxBitRate = max_bitrate_;
// use the default VBV initial delay
encodeConfig.rcParams.vbvInitialDelay = 0;
// enable adaptive quantization (Spatial)
encodeConfig.rcParams.enableAQ = false;
encodeConfig.encodeCodecConfig.h264Config.idrPeriod = encodeConfig.gopLength;
encodeConfig.encodeCodecConfig.h264Config.level = encode_level_max_;
// encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = 1;
// encodeConfig.encodeCodecConfig.h264Config.repeatSPSPPS = 1;
encoder_->CreateEncoder(&init_params);
@@ -94,7 +124,7 @@ int NvidiaVideoEncoder::Init() {
}
int NvidiaVideoEncoder::Encode(
const uint8_t *pData, int nSize,
const XVideoFrame *video_frame,
std::function<int(char *encoded_packets, size_t size,
VideoFrameType frame_type)>
on_encoded_image) {
@@ -104,7 +134,16 @@ int NvidiaVideoEncoder::Encode(
}
if (SAVE_RECEIVED_NV12_STREAM) {
fwrite(pData, 1, nSize, file_nv12_);
fwrite(video_frame->data, 1, video_frame->size, file_nv12_);
}
if (video_frame->width != frame_width_ ||
video_frame->height != frame_height_) {
if (support_dynamic_resolution_) {
if (0 != ResetEncodeResolution(video_frame->width, video_frame->height)) {
return -1;
}
}
}
VideoFrameType frame_type;
@@ -120,10 +159,11 @@ int NvidiaVideoEncoder::Encode(
#endif
const NvEncInputFrame *encoder_inputframe = encoder_->GetNextInputFrame();
// LOG_ERROR("w:{}, h:{}", encoder_->GetEncodeWidth(),
// encoder_->GetEncodeHeight());
NvEncoderCuda::CopyToDeviceFrame(
cuda_context_,
(void *)pData, // NOLINT
(void *)video_frame->data, // NOLINT
0, (CUdeviceptr)encoder_inputframe->inputPtr, encoder_inputframe->pitch,
encoder_->GetEncodeWidth(), encoder_->GetEncodeHeight(),
CU_MEMORYTYPE_HOST, encoder_inputframe->bufferFormat,
@@ -161,20 +201,64 @@ int NvidiaVideoEncoder::OnEncodedImage(char *encoded_packets, size_t size) {
return 0;
}
void NvidiaVideoEncoder::ForceIdr() {
NV_ENC_RECONFIGURE_PARAMS reconfig_params;
reconfig_params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
int NvidiaVideoEncoder::ForceIdr() {
if (!encoder_) {
return -1;
}
NV_ENC_INITIALIZE_PARAMS init_params;
NV_ENC_RECONFIGURE_PARAMS reconfig_params = {NV_ENC_RECONFIGURE_PARAMS_VER};
NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER};
NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
init_params.encodeConfig = &encode_config;
encoder_->GetInitializeParams(&init_params);
reconfig_params.reInitEncodeParams = init_params;
reconfig_params.forceIDR = 1;
reconfig_params.resetEncoder = 1;
if (!encoder_->Reconfigure(&reconfig_params)) {
LOG_ERROR("Failed to force I frame");
return -1;
}
return 0;
}
int NvidiaVideoEncoder::ResetEncodeResolution(unsigned int width,
unsigned int height) {
if (!encoder_) {
return -1;
}
if (width > frame_width_max_ || height > frame_height_max_) {
LOG_ERROR(
"Target resolution is too large for this hardware encoder, which "
"[{}x{}] and support max resolution is [{}x{}]",
width, height, frame_width_max_, frame_height_max_);
return -1;
}
frame_width_ = width;
frame_height_ = height;
NV_ENC_RECONFIGURE_PARAMS reconfig_params = {NV_ENC_RECONFIGURE_PARAMS_VER};
NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER};
NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER};
init_params.encodeConfig = &encode_config;
encoder_->GetInitializeParams(&init_params);
reconfig_params.reInitEncodeParams = init_params;
reconfig_params.reInitEncodeParams.encodeWidth = frame_width_;
reconfig_params.reInitEncodeParams.encodeHeight = frame_height_;
// reconfig_params.reInitEncodeParams.darWidth =
// reconfig_params.reInitEncodeParams.encodeWidth;
// reconfig_params.reInitEncodeParams.darHeight =
// reconfig_params.reInitEncodeParams.encodeHeight;
reconfig_params.forceIDR = 1;
if (!encoder_->Reconfigure(&reconfig_params)) {
LOG_ERROR("Failed to reset resolution");
return -1;
}
return 0;
}

View File

@@ -13,31 +13,49 @@ class NvidiaVideoEncoder : public VideoEncoder {
int Init();
int Encode(const uint8_t* pData, int nSize,
std::function<int(char* encoded_packets, size_t size,
VideoFrameType frame_type)>
on_encoded_image);
int Encode(const XVideoFrame* video_frame,
std::function<int(char* encoded_packets, size_t size,
VideoFrameType frame_type)>
on_encoded_image) {
return 0;
}
int Encode(const XVideoFrame* video_frame,
std::function<int(char* encoded_packets, size_t size,
VideoFrameType frame_type)>
on_encoded_image);
virtual int OnEncodedImage(char* encoded_packets, size_t size);
void ForceIdr();
int ForceIdr();
private:
int index_of_GPU = 0;
GUID codec_guid = NV_ENC_CODEC_H264_GUID;
GUID preset_guid = NV_ENC_PRESET_P2_GUID;
NV_ENC_TUNING_INFO tuning_info =
int ResetEncodeResolution(unsigned int width, unsigned int height);
private:
int index_of_gpu_ = 0;
CUdevice cuda_device_ = 0;
GUID codec_guid_ = NV_ENC_CODEC_H264_GUID;
GUID preset_guid_ = NV_ENC_PRESET_P3_GUID;
NV_ENC_TUNING_INFO tuning_info_ =
NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
int frame_width_ = 1280;
int frame_height_ = 720;
int keyFrameInterval_ = 3000;
int maxBitrate_ = 1000;
NV_ENC_BUFFER_FORMAT buffer_format_ =
NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12;
uint32_t frame_width_max_ = 0;
uint32_t frame_height_max_ = 0;
uint32_t frame_width_min_ = 0;
uint32_t frame_height_min_ = 0;
uint32_t encode_level_max_ = 0;
uint32_t encode_level_min_ = 0;
bool support_dynamic_resolution_ = false;
bool support_dynamic_bitrate_ = false;
uint32_t frame_width_ = 1280;
uint32_t frame_height_ = 720;
uint32_t key_frame_interval_ = 3000;
uint32_t average_bitrate_ = 2000000;
uint32_t max_bitrate_ = 10000000;
int max_payload_size_ = 3000;
NvEncoder* encoder_ = nullptr;
CUcontext cuda_context_ = nullptr;

View File

@@ -358,10 +358,12 @@ int OpenH264Encoder::OnEncodedImage(char *encoded_packets, size_t size) {
return 0;
}
void OpenH264Encoder::ForceIdr() {
int OpenH264Encoder::ForceIdr() {
if (openh264_encoder_) {
openh264_encoder_->ForceIntraFrame(true);
return openh264_encoder_->ForceIntraFrame(true);
}
return 0;
}
int OpenH264Encoder::Release() {

View File

@@ -37,7 +37,7 @@ class OpenH264Encoder : public VideoEncoder {
int OnEncodedImage(char* encoded_packets, size_t size);
void ForceIdr();
int ForceIdr();
private:
int InitEncoderParams(int width, int height);

View File

@@ -29,7 +29,7 @@ class VideoEncoder {
on_encoded_image) = 0;
virtual int OnEncodedImage(char* encoded_packets, size_t size) = 0;
virtual void ForceIdr() = 0;
virtual int ForceIdr() = 0;
VideoEncoder() = default;
virtual ~VideoEncoder() {}

View File

@@ -491,8 +491,8 @@ int PeerConnection::SendVideoData(const char *data, size_t size) {
}
if (b_force_i_frame_) {
video_encoder_->ForceIdr();
LOG_INFO("Force I frame");
video_encoder_->ForceIdr();
b_force_i_frame_ = false;
}

View File

@@ -49,14 +49,14 @@ int IceTransmission::InitIceTransmission(
uint32_t audio_inbound_bitrate, uint32_t audio_outbound_bitrate,
uint32_t data_inbound_bitrate, uint32_t data_outbound_bitrate,
uint32_t total_inbound_bitrate, uint32_t total_outbound_bitrate) {
LOG_ERROR(
"video in: [{}] kbps, video out: [{}] kbps, audio in: [{}] kbps, "
"audio out: [{}] kbps, data in: [{}] kbps, data out: [{}] kbps, "
"total in: [{}] kbps, total out: [{}] kbps",
video_inbound_bitrate / 1000, video_outbound_bitrate / 1000,
audio_inbound_bitrate / 1000, audio_outbound_bitrate / 1000,
data_inbound_bitrate / 1000, data_outbound_bitrate / 1000,
total_inbound_bitrate / 1000, total_outbound_bitrate / 1000);
// LOG_ERROR(
// "video in: [{}] kbps, video out: [{}] kbps, audio in: [{}] kbps,
// " "audio out: [{}] kbps, data in: [{}] kbps, data out: [{}] kbps,
// " "total in: [{}] kbps, total out: [{}] kbps",
// video_inbound_bitrate / 1000, video_outbound_bitrate / 1000,
// audio_inbound_bitrate / 1000, audio_outbound_bitrate / 1000,
// data_inbound_bitrate / 1000, data_outbound_bitrate / 1000,
// total_inbound_bitrate / 1000, total_outbound_bitrate / 1000);
});
video_rtp_codec_ = std::make_unique<RtpCodec>(video_codec_payload_type);
audio_rtp_codec_ = std::make_unique<RtpCodec>(RtpPacket::PAYLOAD_TYPE::OPUS);

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2020 NVIDIA Corporation
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -1106,7 +1106,6 @@ extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
#endif
#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
/****************************************************************************************************************************/
//! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
//! unsigned int * pPitch, CUVIDPROCPARAMS *pVPP);
@@ -1126,7 +1125,6 @@ extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned
#define cuvidMapVideoFrame cuvidMapVideoFrame64
#define cuvidUnmapVideoFrame cuvidUnmapVideoFrame64
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2020 NVIDIA Corporation
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -28,7 +28,7 @@
/********************************************************************************************************************/
//! \file nvcuvid.h
//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
//! \date 2015-2020
//! \date 2015-2024
//! This file contains the interface constants, structure definitions and function prototypes.
/********************************************************************************************************************/
@@ -41,6 +41,7 @@
extern "C" {
#endif /* __cplusplus */
#define MAX_CLOCK_TS 3
/***********************************************/
//!
@@ -78,6 +79,106 @@ typedef enum {
cudaAudioCodec_AAC, /**< AAC Audio */
} cudaAudioCodec;
/************************************************************************/
//! \ingroup STRUCTS
//! \struct TIMECODESET
//! Used to store Time code set extracted from H264 and HEVC codecs
/************************************************************************/
typedef struct _TIMECODESET
{
unsigned int time_offset_value;
unsigned short n_frames;
unsigned char clock_timestamp_flag;
unsigned char units_field_based_flag;
unsigned char counting_type;
unsigned char full_timestamp_flag;
unsigned char discontinuity_flag;
unsigned char cnt_dropped_flag;
unsigned char seconds_value;
unsigned char minutes_value;
unsigned char hours_value;
unsigned char seconds_flag;
unsigned char minutes_flag;
unsigned char hours_flag;
unsigned char time_offset_length;
unsigned char reserved;
} TIMECODESET;
/************************************************************************/
//! \ingroup STRUCTS
//! \struct TIMECODE
//! Used to extract Time code in H264 and HEVC codecs
/************************************************************************/
typedef struct _TIMECODE
{
TIMECODESET time_code_set[MAX_CLOCK_TS];
unsigned char num_clock_ts;
} TIMECODE;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct SEIMASTERINGDISPLAYINFO
//! Used to extract mastering display color volume SEI in H264 and HEVC codecs
/**********************************************************************************/
typedef struct _SEIMASTERINGDISPLAYINFO
{
unsigned short display_primaries_x[3];
unsigned short display_primaries_y[3];
unsigned short white_point_x;
unsigned short white_point_y;
unsigned int max_display_mastering_luminance;
unsigned int min_display_mastering_luminance;
} SEIMASTERINGDISPLAYINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct SEICONTENTLIGHTLEVELINFO
//! Used to extract content light level info SEI in H264 and HEVC codecs
/**********************************************************************************/
typedef struct _SEICONTENTLIGHTLEVELINFO
{
unsigned short max_content_light_level;
unsigned short max_pic_average_light_level;
unsigned int reserved;
} SEICONTENTLIGHTLEVELINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct TIMECODEMPEG2
//! Used to extract Time code in MPEG2 codec
/**********************************************************************************/
typedef struct _TIMECODEMPEG2
{
unsigned char drop_frame_flag;
unsigned char time_code_hours;
unsigned char time_code_minutes;
unsigned char marker_bit;
unsigned char time_code_seconds;
unsigned char time_code_pictures;
} TIMECODEMPEG2;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct SEIALTERNATIVETRANSFERCHARACTERISTICS
//! Used to extract alternative transfer characteristics SEI in H264 and HEVC codecs
/**********************************************************************************/
typedef struct _SEIALTERNATIVETRANSFERCHARACTERISTICS
{
unsigned char preferred_transfer_characteristics;
} SEIALTERNATIVETRANSFERCHARACTERISTICS;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUSEIMESSAGE;
//! Used in CUVIDSEIMESSAGEINFO structure
/**********************************************************************************/
typedef struct _CUSEIMESSAGE
{
unsigned char sei_message_type; /**< OUT: SEI Message Type */
unsigned char reserved[3];
unsigned int sei_message_size; /**< OUT: SEI Message Size */
} CUSEIMESSAGE;
/************************************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDEOFORMAT
@@ -168,6 +269,19 @@ typedef struct
};
} CUVIDOPERATINGPOINTINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSEIMESSAGEINFO
//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg
/**********************************************************************************/
typedef struct _CUVIDSEIMESSAGEINFO
{
void *pSEIData; /**< OUT: SEI Message Data */
CUSEIMESSAGE *pSEIMessage; /**< OUT: SEI Message Info */
unsigned int sei_message_count; /**< OUT: SEI Message Count */
unsigned int picIdx; /**< OUT: SEI Message Pic Index */
} CUVIDSEIMESSAGEINFO;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDAV1SEQHDR
@@ -366,11 +480,13 @@ typedef struct _CUVIDPARSERDISPINFO
//! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded
//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded
//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
//! PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded
/***********************************************************************************************************************/
typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
typedef int (CUDAAPI *PFNVIDSEIMSGCALLBACK) (void *, CUVIDSEIMESSAGEINFO *);
/**************************************/
//! \ingroup STRUCTS
@@ -395,7 +511,8 @@ typedef struct _CUVIDPARSERPARAMS
PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */
PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1
scalable bitstream */
void *pvReserved2[6]; /**< Reserved for future use - set to NULL */
PFNVIDSEIMSGCALLBACK pfnGetSEIMsg; /**< IN: Called when all SEI messages are parsed for particular frame */
void *pvReserved2[5]; /**< Reserved for future use - set to NULL */
CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */
} CUVIDPARSERPARAMS;

View File

@@ -1 +0,0 @@
libcuda.so.1

Binary file not shown.

View File

@@ -1 +0,0 @@
libnvcuvid.so.1

Binary file not shown.

View File

@@ -1 +0,0 @@
libnvidia-encode.so.1

Binary file not shown.

View File

@@ -1,111 +0,0 @@
# Copyright 2020 NVIDIA Corporation. All rights reserved.
#
# Please refer to the NVIDIA end user license agreement (EULA) associated
# with this source code for terms and conditions that govern your use of
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
# 3.7 is required for FindVulkan module support in CMake.
cmake_minimum_required(VERSION 3.7)
project(NvCodec)
# Set C++11 for all projects and disable non-standard extensions
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_INSTALL_PREFIX .)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Utils)
set(NV_CODEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec)
set(NV_ENC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvEncoder)
set(NV_DEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvDecoder)
set(NV_APPENC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppEncode/Common)
set(NV_APPDEC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppDecode/Common)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
else()
set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
endif()
if(WIN32)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvcuvid.lib)
set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvencodeapi.lib)
else()
Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvcuvid.lib)
set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvencodeapi.lib)
endif()
else ()
find_library(CUVID_LIB nvcuvid)
find_library(NVENCODEAPI_LIB nvidia-encode)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()
if(WIN32)
add_subdirectory(AppEncode/AppEncD3D11)
add_subdirectory(AppEncode/AppEncD3D9)
add_subdirectory(AppDecode/AppDecD3D)
else ()
#Need only linux Makefile for this
add_subdirectory(AppEncode/AppEncGL)
endif()
add_subdirectory(AppEncode/AppEncCuda)
add_subdirectory(AppEncode/AppEncDec)
add_subdirectory(AppEncode/AppEncLowLatency)
add_subdirectory(AppEncode/AppEncME)
add_subdirectory(AppEncode/AppEncPerf)
add_subdirectory(AppEncode/AppEncQual)
add_subdirectory(AppEncode/AppMotionEstimationVkCuda)
add_subdirectory(AppTranscode/AppTrans)
add_subdirectory(AppTranscode/AppTransOneToN)
add_subdirectory(AppTranscode/AppTransPerf)
add_subdirectory(AppDecode/AppDec)
add_subdirectory(AppDecode/AppDecGL)
add_subdirectory(AppDecode/AppDecImageProvider)
add_subdirectory(AppDecode/AppDecLowLatency)
add_subdirectory(AppDecode/AppDecMem)
add_subdirectory(AppDecode/AppDecMultiFiles)
add_subdirectory(AppDecode/AppDecMultiInput)
add_subdirectory(AppDecode/AppDecPerf)

View File

@@ -1,54 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdio.h>
static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
int destStrideInPixels = nDestPitch / (sizeof(uint16_t));
*(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] };
}
static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t));
dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y;
}
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight);
}
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
ConvertUInt16ToUInt8Kernel <<<gridSize, blockSize >>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight);
}

View File

@@ -1,399 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include "ColorSpace.h"
__constant__ float matYuv2Rgb[3][3];
__constant__ float matRgb2Yuv[3][3];
void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) {
black = 16; white = 235;
max = 255;
switch (iMatrix)
{
case ColorSpaceStandard_BT709:
default:
wr = 0.2126f; wb = 0.0722f;
break;
case ColorSpaceStandard_FCC:
wr = 0.30f; wb = 0.11f;
break;
case ColorSpaceStandard_BT470:
case ColorSpaceStandard_BT601:
wr = 0.2990f; wb = 0.1140f;
break;
case ColorSpaceStandard_SMPTE240M:
wr = 0.212f; wb = 0.087f;
break;
case ColorSpaceStandard_BT2020:
case ColorSpaceStandard_BT2020C:
wr = 0.2627f; wb = 0.0593f;
// 10-bit only
black = 64 << 6; white = 940 << 6;
max = (1 << 16) - 1;
break;
}
}
void SetMatYuv2Rgb(int iMatrix) {
float wr, wb;
int black, white, max;
GetConstants(iMatrix, wr, wb, black, white, max);
float mat[3][3] = {
1.0f, 0.0f, (1.0f - wr) / 0.5f,
1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
1.0f, (1.0f - wb) / 0.5f, 0.0f,
};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
}
}
cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat));
}
void SetMatRgb2Yuv(int iMatrix) {
float wr, wb;
int black, white, max;
GetConstants(iMatrix, wr, wb, black, white, max);
float mat[3][3] = {
wr, 1.0f - wb - wr, wb,
-0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
}
}
cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat));
}
template<class T>
__device__ static T Clamp(T x, T lower, T upper) {
return x < lower ? lower : (x > upper ? upper : x);
}
template<class Rgb, class YuvUnit>
__device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
const int
low = 1 << (sizeof(YuvUnit) * 8 - 4),
mid = 1 << (sizeof(YuvUnit) * 8 - 1);
float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
YuvUnit
r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf),
g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf),
b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf);
Rgb rgb{};
const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
rgb.c.r = r >> nShift;
rgb.c.g = g >> nShift;
rgb.c.b = b >> nShift;
} else {
rgb.c.r = r << nShift;
rgb.c.g = g << nShift;
rgb.c.b = b << nShift;
}
return rgb;
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
*(RgbIntx2 *)pDst = RgbIntx2 {
YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
};
*(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 {
YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d,
YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
};
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y);
if (x + 1 >= nWidth || y >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
*(RgbIntx2 *)pDst = RgbIntx2{
YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
};
}
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
__global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
}
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
__global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y);
if (x + 1 >= nWidth || y >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
}
template <class COLOR32>
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<uchar2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<uchar2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<uchar2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<uchar2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<ushort2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<ushort2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<ushort2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<ushort2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
<< <dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >> >
(dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
// Explicit Instantiation
template void Nv12ToColor32<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor32<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor64<BGRA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor64<RGBA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor32<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor32<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor64<BGRA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor64<RGBA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColorPlanar<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColorPlanar<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColorPlanar<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColorPlanar<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low;
}
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid;
}
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid;
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
RgbIntx2 int2a = *(RgbIntx2 *)pSrc;
RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch);
Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
decltype(Rgb::c.r)
r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
*(YuvUnitx2 *)pDst = YuvUnitx2 {
RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
};
*(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 {
RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
};
*(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 {
RgbToU<decltype(YuvUnitx2::x)>(r, g, b),
RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
};
}
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) {
SetMatRgb2Yuv(iMatrix);
RgbToYuvKernel<ushort2, BGRA64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight);
}

View File

@@ -1,48 +0,0 @@
#pragma once
#include <stdint.h>
#include <cuda_runtime.h>
typedef enum ColorSpaceStandard {
ColorSpaceStandard_BT709 = 1,
ColorSpaceStandard_Unspecified = 2,
ColorSpaceStandard_Reserved = 3,
ColorSpaceStandard_FCC = 4,
ColorSpaceStandard_BT470 = 5,
ColorSpaceStandard_BT601 = 6,
ColorSpaceStandard_SMPTE240M = 7,
ColorSpaceStandard_YCgCo = 8,
ColorSpaceStandard_BT2020 = 9,
ColorSpaceStandard_BT2020C = 10
} ColorSpaceStandard;
union BGRA32 {
uint32_t d;
uchar4 v;
struct {
uint8_t b, g, r, a;
} c;
};
union RGBA32 {
uint32_t d;
uchar4 v;
struct {
uint8_t r, g, b, a;
} c;
};
union BGRA64 {
uint64_t d;
ushort4 v;
struct {
uint16_t b, g, r, a;
} c;
};
union RGBA64 {
uint64_t d;
ushort4 v;
struct {
uint16_t r, g, b, a;
} c;
};

View File

@@ -1,357 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
extern "C" {
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavcodec/avcodec.h>
}
#include "NvCodecUtils.h"
//---------------------------------------------------------------------------
//! \file FFmpegDemuxer.h
//! \brief Provides functionality for stream demuxing
//!
//! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it.
//---------------------------------------------------------------------------
/**
* @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
*/
class FFmpegDemuxer {
private:
AVFormatContext *fmtc = NULL;
AVIOContext *avioc = NULL;
AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
AVBSFContext *bsfc = NULL;
int iVideoStream;
bool bMp4H264, bMp4HEVC, bMp4MPEG4;
AVCodecID eVideoCodec;
AVPixelFormat eChromaFormat;
int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
double timeBase = 0.0;
int64_t userTimeScale = 0;
uint8_t *pDataWithHeader = NULL;
unsigned int frameCount = 0;
public:
class DataProvider {
public:
virtual ~DataProvider() {}
virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
};
private:
/**
* @brief Private constructor to initialize libavformat resources.
* @param fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
*/
FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
if (!fmtc) {
LOG(ERROR) << "No AVFormatContext provided.";
return;
}
LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
ck(avformat_find_stream_info(fmtc, NULL));
iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if (iVideoStream < 0) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file";
return;
}
//fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
nWidth = fmtc->streams[iVideoStream]->codecpar->width;
nHeight = fmtc->streams[iVideoStream]->codecpar->height;
eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
timeBase = av_q2d(rTimeBase);
userTimeScale = timeScale;
// Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
switch (eChromaFormat)
{
case AV_PIX_FMT_YUV420P10LE:
case AV_PIX_FMT_GRAY10LE: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 10;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV420P12LE:
nBitDepth = 12;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P10LE:
nBitDepth = 10;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P12LE:
nBitDepth = 12;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P:
nBitDepth = 8;
nChromaHeight = nHeight << 1;
nBPP = 1;
break;
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
case AV_PIX_FMT_YUVJ422P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_YUVJ444P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_GRAY8: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
break;
default:
LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
eChromaFormat = AV_PIX_FMT_YUV420P;
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
}
bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
//Initialize packet fields with default values
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
av_init_packet(&pktFiltered);
pktFiltered.data = NULL;
pktFiltered.size = 0;
// Initialize bitstream filter and its required resources
if (bMp4H264) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
if (bMp4HEVC) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
}
AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
AVFormatContext *ctx = NULL;
if (!(ctx = avformat_alloc_context())) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
uint8_t *avioc_buffer = NULL;
int avioc_buffer_size = 8 * 1024 * 1024;
avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
if (!avioc_buffer) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
0, pDataProvider, &ReadPacket, NULL, NULL);
if (!avioc) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
ctx->pb = avioc;
ck(avformat_open_input(&ctx, NULL, NULL, NULL));
return ctx;
}
/**
* @brief Allocate and return AVFormatContext*.
* @param szFilePath - Filepath pointing to input stream.
* @return Pointer to AVFormatContext
*/
AVFormatContext *CreateFormatContext(const char *szFilePath) {
avformat_network_init();
AVFormatContext *ctx = NULL;
ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
return ctx;
}
public:
FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;}
~FFmpegDemuxer() {
if (!fmtc) {
return;
}
if (pkt.data) {
av_packet_unref(&pkt);
}
if (pktFiltered.data) {
av_packet_unref(&pktFiltered);
}
if (bsfc) {
av_bsf_free(&bsfc);
}
avformat_close_input(&fmtc);
if (avioc) {
av_freep(&avioc->buffer);
av_freep(&avioc);
}
if (pDataWithHeader) {
av_free(pDataWithHeader);
}
}
AVCodecID GetVideoCodec() {
return eVideoCodec;
}
AVPixelFormat GetChromaFormat() {
return eChromaFormat;
}
int GetWidth() {
return nWidth;
}
int GetHeight() {
return nHeight;
}
int GetBitDepth() {
return nBitDepth;
}
int GetFrameSize() {
return nWidth * (nHeight + nChromaHeight) * nBPP;
}
bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
if (!fmtc) {
return false;
}
*pnVideoBytes = 0;
if (pkt.data) {
av_packet_unref(&pkt);
}
int e = 0;
while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) {
av_packet_unref(&pkt);
}
if (e < 0) {
return false;
}
if (bMp4H264 || bMp4HEVC) {
if (pktFiltered.data) {
av_packet_unref(&pktFiltered);
}
ck(av_bsf_send_packet(bsfc, &pkt));
ck(av_bsf_receive_packet(bsfc, &pktFiltered));
*ppVideo = pktFiltered.data;
*pnVideoBytes = pktFiltered.size;
if (pts)
*pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase);
} else {
if (bMp4MPEG4 && (frameCount == 0)) {
int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
if (extraDataSize > 0) {
// extradata contains start codes 00 00 01. Subtract its size
pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t));
if (!pDataWithHeader) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return false;
}
memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t));
*ppVideo = pDataWithHeader;
*pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t);
}
} else {
*ppVideo = pkt.data;
*pnVideoBytes = pkt.size;
}
if (pts)
*pts = (int64_t)(pkt.pts * userTimeScale * timeBase);
}
frameCount++;
return true;
}
static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
}
};
inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
switch (id) {
case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1;
case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2;
case AV_CODEC_ID_MPEG4 : return cudaVideoCodec_MPEG4;
case AV_CODEC_ID_WMV3 :
case AV_CODEC_ID_VC1 : return cudaVideoCodec_VC1;
case AV_CODEC_ID_H264 : return cudaVideoCodec_H264;
case AV_CODEC_ID_HEVC : return cudaVideoCodec_HEVC;
case AV_CODEC_ID_VP8 : return cudaVideoCodec_VP8;
case AV_CODEC_ID_VP9 : return cudaVideoCodec_VP9;
case AV_CODEC_ID_MJPEG : return cudaVideoCodec_JPEG;
case AV_CODEC_ID_AV1 : return cudaVideoCodec_AV1;
default : return cudaVideoCodec_NumCodecs;
}
}

View File

@@ -1,109 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
#include <thread>
#include <mutex>
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
};
#include "Logger.h"
extern simplelogger::Logger *logger;
class FFmpegStreamer {
private:
AVFormatContext *oc = NULL;
AVStream *vs = NULL;
int nFps = 0;
public:
FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
avformat_network_init();
oc = avformat_alloc_context();
if (!oc) {
LOG(ERROR) << "FFMPEG: avformat_alloc_context error";
return;
}
// Set format on oc
AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL);
if (!fmt) {
LOG(ERROR) << "Invalid format";
return;
}
fmt->video_codec = eCodecId;
oc->oformat = fmt;
oc->url = av_strdup(szInFilePath);
LOG(INFO) << "Streaming destination: " << oc->url;
// Add video stream to oc
vs = avformat_new_stream(oc, NULL);
if (!vs) {
LOG(ERROR) << "FFMPEG: Could not alloc video stream";
return;
}
vs->id = 0;
// Set video parameters
AVCodecParameters *vpar = vs->codecpar;
vpar->codec_id = fmt->video_codec;
vpar->codec_type = AVMEDIA_TYPE_VIDEO;
vpar->width = nWidth;
vpar->height = nHeight;
// Everything is ready. Now open the output stream.
if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
return ;
}
// Write the container header
if (avformat_write_header(oc, NULL)) {
LOG(ERROR) << "FFMPEG: avformat_write_header error!";
return;
}
}
~FFmpegStreamer() {
if (oc) {
av_write_trailer(oc);
avio_close(oc->pb);
avformat_free_context(oc);
}
}
bool Stream(uint8_t *pData, int nBytes, int nPts) {
AVPacket pkt = {0};
av_init_packet(&pkt);
pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base);
// No B-frames
pkt.dts = pkt.pts;
pkt.stream_index = vs->index;
pkt.data = pData;
pkt.size = nBytes;
if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
pkt.flags |= AV_PKT_FLAG_KEY;
}
// Write the compressed frame into the output
int ret = av_write_frame(oc, &pkt);
av_write_frame(oc, NULL);
if (ret < 0) {
LOG(ERROR) << "FFMPEG: Error while writing video frame";
}
return true;
}
};

View File

@@ -1,192 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include "NvCodecUtils.h"
template<typename YuvUnitx2>
static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv,
uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int ix = blockIdx.x * blockDim.x + threadIdx.x,
iy = blockIdx.y * blockDim.y + threadIdx.y;
if (ix >= nWidth / 2 || iy >= nHeight / 2) {
return;
}
int x = ix * 2, y = iy * 2;
typedef decltype(YuvUnitx2::x) YuvUnit;
const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1;
*(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
(YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
(YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
};
y++;
*(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
(YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
(YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
};
float2 uv = tex2D<float2>(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f);
*(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) };
}
template <typename YuvUnitx2>
static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) {
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = dpSrc;
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<decltype(YuvUnitx2::x)>();
resDesc.res.pitch2D.width = nSrcWidth;
resDesc.res.pitch2D.height = nSrcHeight;
resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
cudaTextureDesc texDesc = {};
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeNormalizedFloat;
cudaTextureObject_t texY=0;
ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL));
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<YuvUnitx2>();
resDesc.res.pitch2D.width = nSrcWidth / 2;
resDesc.res.pitch2D.height = nSrcHeight * 3 / 2;
cudaTextureObject_t texUv=0;
ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL));
Resize<YuvUnitx2> << <dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32), dim3(16, 16) >> >(texY, texUv, dpDst, dpDstUV,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight);
ck(cudaDestroyTextureObject(texY));
ck(cudaDestroyTextureObject(texUv));
}
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV)
{
unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight);
return Resize<uchar2>(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight);
}
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV)
{
unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight);
return Resize<ushort2>(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight);
}
static __global__ void Scale(cudaTextureObject_t texSrc,
uint8_t *pDst, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
*(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D<float>(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f));
}
static __global__ void Scale_uv(cudaTextureObject_t texSrc,
uint8_t *pDst, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
float2 uv = tex2D<float2>(texSrc, x * fxScale, y * fyScale);
uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
*(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut;
}
void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false)
{
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = dpSrc;
resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc<uchar2>() : cudaCreateChannelDesc<unsigned char>();
resDesc.res.pitch2D.width = nSrcWidth;
resDesc.res.pitch2D.height = nSrcHeight;
resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
cudaTextureDesc texDesc = {};
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeNormalizedFloat;
texDesc.addressMode[0] = cudaAddressModeClamp;
texDesc.addressMode[1] = cudaAddressModeClamp;
texDesc.addressMode[2] = cudaAddressModeClamp;
cudaTextureObject_t texSrc = 0;
ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL));
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1);
if (bUVPlane)
{
Scale_uv << <gridSize, blockSize >> >(texSrc, dpDst,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
}
else
{
Scale << <gridSize, blockSize >> >(texSrc, dpDst,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
}
ck(cudaGetLastError());
ck(cudaDestroyTextureObject(texSrc));
}
void ScaleYUV420(unsigned char *dpDstY,
unsigned char* dpDstU,
unsigned char* dpDstV,
int nDstPitch,
int nDstChromaPitch,
int nDstWidth,
int nDstHeight,
unsigned char *dpSrcY,
unsigned char* dpSrcU,
unsigned char* dpSrcV,
int nSrcPitch,
int nSrcChromaPitch,
int nSrcWidth,
int nSrcHeight,
bool bSemiplanar)
{
int chromaWidthDst = (nDstWidth + 1) / 2;
int chromaHeightDst = (nDstHeight + 1) / 2;
int chromaWidthSrc = (nSrcWidth + 1) / 2;
int chromaHeightSrc = (nSrcHeight + 1) / 2;
ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight);
if (bSemiplanar)
{
ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true);
}
else
{
ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
}
}

View File

@@ -1,126 +0,0 @@
/*
* Copyright 2018-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include "NvCodecUtils.h"
/*
* CRC32 lookup table
* Generated by the following routine
* int i, j;
* U032 crc;
* for (i = 0; i < 256; i++)
* {
* crc = i;
* for (j = 0; j < 8; j++) { // 8 reduction
* crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0);
* }
* Crc32Table[i] = crc;
* }
*/
__device__ __constant__ uint32_t Crc32Table[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
typedef struct _NV_ENC_ENCODE_OUT_PARAMS
{
uint32_t version; /**< [out]: Struct version. */
uint32_t bitstreamSizeInBytes; /**< [out]: Encoded bitstream size in bytes */
uint32_t cycleCount; /**< [out]: Cycle count */
uint32_t firstPassCycleCount; /**< [out]: First pass cycle count */
uint32_t reserved[60]; /**< [out]: Reserved and must be set to 0 */
} NV_ENC_ENCODE_OUT_PARAMS;
static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue)
{
NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer;
uint32_t bitstreamSize = outParams->bitstreamSizeInBytes;
uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS);
uint32_t crc=~0;
for(uint32_t i = 0; i < bitstreamSize; i++)
{
crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc)) ^ (*pEncStream++)];
}
*crcValue = ~crc;
}
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream)
{
dim3 blockSize(1, 1, 1);
dim3 gridSize(1, 1, 1);
ComputeCRCKernel <<<gridSize, blockSize, 0, outputCUStream >>>(pBuffer, crcValue);
}

View File

@@ -24,7 +24,7 @@ if is_os("windows") then
add_defines("_WEBSOCKETPP_CPP11_INTERNAL_")
elseif is_os("linux") then
add_requires("glib", {system = true})
add_packages("glib", "cuda")
add_packages("glib")
add_cxflags("-fPIC")
add_syslinks("pthread")
elseif is_os("macosx") then
@@ -122,8 +122,8 @@ target("media")
"src/media/video/encode/aom",
"src/media/video/decode/dav1d",
"src/media/nvcodec",
"thirdparty/nvcodec/Interface",
"thirdparty/nvcodec/Samples", {public = true})
"thirdparty/nvcodec/interface", {public = true})
add_includedirs(path.join(os.getenv("CUDA_PATH"), "include"), {public = true})
elseif is_os(("linux")) then
add_files("src/media/video/encode/*.cpp",
"src/media/video/decode/*.cpp",
@@ -143,8 +143,8 @@ target("media")
"src/media/video/encode/aom",
"src/media/video/decode/dav1d",
"src/media/nvcodec",
"thirdparty/nvcodec/Interface",
"thirdparty/nvcodec/Samples", {public = true})
"thirdparty/nvcodec/interface", {public = true})
add_includedirs(path.join(os.getenv("CUDA_PATH"), "include"), {public = true})
elseif is_os("macosx") then
add_files("src/media/video/encode/*.cpp",
"src/media/video/decode/*.cpp",
@@ -195,15 +195,18 @@ target("projectx")
add_includedirs("src/rtc", "src/pc", "src/interface")
if is_os("windows") then
add_linkdirs("thirdparty/nvcodec/Lib/x64")
add_linkdirs("thirdparty/nvcodec/lib/x64")
add_linkdirs(path.join(os.getenv("CUDA_PATH"), "lib/x64"))
add_links("nice", "glib-2.0", "gio-2.0", "gmodule-2.0", "gobject-2.0",
"pcre2-8", "pcre2-16", "pcre2-32", "pcre2-posix",
"zlib", "ffi", "libcrypto", "libssl", "intl", "iconv",
"Shell32", "Advapi32", "Dnsapi", "Shlwapi", "Crypt32",
"ws2_32", "Bcrypt", "windowsapp", "User32", "Strmiids", "Mfuuid",
"Secur32", "Bcrypt")
add_links("cuda", "nvencodeapi", "nvcuvid")
elseif is_os(("linux")) then
add_linkdirs("thirdparty/nvcodec/Lib/x64")
add_linkdirs("thirdparty/nvcodec/lib/x64")
add_linkdirs(path.join(os.getenv("CUDA_PATH"), "lib/x64"))
add_links("cuda", "nvidia-encode", "nvcuvid")
elseif is_os("macosx") then