[fix] use dynamic DLL loading for cuda library

This commit is contained in:
dijunkun
2024-08-12 17:26:51 +08:00
parent 1626b482de
commit f02286365c
12 changed files with 21999 additions and 78 deletions

View File

@@ -16,6 +16,7 @@
#include <cmath>
#include <iostream>
#include "nvcodec_api.h"
#include "nvcuvid.h"
#define START_TIMER auto start = std::chrono::steady_clock::now();
@@ -31,7 +32,7 @@
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char *szErrName = NULL; \
cuGetErrorName(err__, &szErrName); \
cuGetErrorName_ld(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVDECException::makeNVDECException( \
@@ -199,9 +200,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps_ld(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
if (!decodecaps.bIsSupported) {
NVDEC_THROW_ERROR("Codec not supported on this GPU",
@@ -242,7 +243,7 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
}
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
// cuvidCreateDecoder() has been called before, and now there's possible
// cuvidCreateDecoder_ld() has been called before, and now there's possible
// config change
return ReconfigureDecoder(pVideoFormat);
}
@@ -378,9 +379,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
"Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
m_videoInfo << std::endl;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder_ld(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
STOP_TIMER("Session Initialization Time: ");
return nDecodeSurface;
}
@@ -506,9 +507,9 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
START_TIMER
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder_ld(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
STOP_TIMER("Session Reconfigure Time: ");
return nDecodeSurface;
@@ -538,9 +539,9 @@ int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
pFrame = m_vpFrame.back();
m_vpFrame.pop_back();
if (m_bUseDeviceFrame) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree_ld((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
} else {
delete pFrame;
}
@@ -558,9 +559,9 @@ int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
return false;
}
m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture_ld(m_hDecoder, pPicParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
return 1;
}
@@ -577,15 +578,15 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
CUdeviceptr dpSrcFrame = 0;
unsigned int nSrcPitch = 0;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidMapVideoFrame64_ld(m_hDecoder, pDispInfo->picture_index,
&dpSrcFrame, &nSrcPitch,
&videoProcessingParameters));
CUVIDGETDECODESTATUS DecodeStatus;
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
CUresult result =
cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
CUresult result = cuvidGetDecodeStatus_ld(
m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
if (result == CUDA_SUCCESS &&
(DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
@@ -602,11 +603,12 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
uint8_t *pFrame = NULL;
if (m_bUseDeviceFrame) {
if (m_bDeviceFramePitched) {
CUDA_DRVAPI_CALL(cuMemAllocPitch(
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
(CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
} else {
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
CUDA_DRVAPI_CALL(
cuMemAlloc_ld((CUdeviceptr *)&pFrame, GetFrameSize()));
}
} else {
pFrame = new uint8_t[GetFrameSize()];
@@ -627,7 +629,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
m.WidthInBytes = GetWidth() * m_nBPP;
m.Height = m_nLumaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
// Copy chroma plane
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
@@ -637,7 +639,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstDevice =
(CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
if (m_nNumChromaPlanes == 2) {
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
@@ -645,17 +647,17 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
m.dstPitch * m_nLumaHeight * 2);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
}
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuStreamSynchronize_ld(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
m_vTimestamp.resize(m_vpFrame.size());
}
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
NVDEC_API_CALL(cuvidUnmapVideoFrame64_ld(m_hDecoder, dpSrcFrame));
return 1;
}
@@ -673,7 +675,7 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
if (pCropRect) m_cropRect = *pCropRect;
if (pResizeDim) m_resizeDim = *pResizeDim;
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
NVDEC_API_CALL(cuvidCtxLockCreate_ld(&m_ctxLock, cuContext));
CUVIDPARSERPARAMS videoParserParameters = {};
videoParserParameters.CodecType = eCodec;
@@ -685,32 +687,32 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
NVDEC_API_CALL(cuvidCreateVideoParser_ld(&m_hParser, &videoParserParameters));
}
NvDecoder::~NvDecoder() {
START_TIMER
if (m_hParser) {
cuvidDestroyVideoParser(m_hParser);
cuvidDestroyVideoParser_ld(m_hParser);
}
cuCtxPushCurrent(m_cuContext);
cuCtxPushCurrent_ld(m_cuContext);
if (m_hDecoder) {
cuvidDestroyDecoder(m_hDecoder);
cuvidDestroyDecoder_ld(m_hDecoder);
}
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
for (uint8_t *pFrame : m_vpFrame) {
if (m_bUseDeviceFrame) {
cuMemFree((CUdeviceptr)pFrame);
cuMemFree_ld((CUdeviceptr)pFrame);
} else {
delete[] pFrame;
}
}
cuCtxPopCurrent(NULL);
cuCtxPopCurrent_ld(NULL);
cuvidCtxLockDestroy(m_ctxLock);
cuvidCtxLockDestroy_ld(m_ctxLock);
STOP_TIMER("Session Deinitialization Time: ");
}
@@ -727,8 +729,8 @@ int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
if (!pData || nSize == 0) {
packet.flags |= CUVID_PKT_ENDOFSTREAM;
}
// NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
if (CUDA_SUCCESS != cuvidParseVideoData(m_hParser, &packet)) {
// NVDEC_API_CALL(cuvidParseVideoData_ld(m_hParser, &packet));
if (CUDA_SUCCESS != cuvidParseVideoData_ld(m_hParser, &packet)) {
return 0;
}
m_cuvidStream = 0;

View File

@@ -1,6 +1,7 @@
#include "nvidia_video_decoder.h"
#include "log.h"
#include "nvcodec_api.h"
#define SAVE_RECEIVED_H264_STREAM 0
#define SAVE_DECODED_NV12_STREAM 0
@@ -21,20 +22,20 @@ NvidiaVideoDecoder::~NvidiaVideoDecoder() {
}
int NvidiaVideoDecoder::Init() {
ck(cuInit(0));
ck(cuInit_ld(0));
int nGpu = 0;
int iGpu = 0;
ck(cuDeviceGetCount(&nGpu));
ck(cuDeviceGetCount_ld(&nGpu));
if (nGpu < 1) {
return -1;
}
CUdevice cuDevice;
cuDeviceGet(&cuDevice, iGpu);
cuDeviceGet_ld(&cuDevice, iGpu);
CUcontext cuContext = NULL;
cuCtxCreate(&cuContext, 0, cuDevice);
cuCtxCreate_ld(&cuContext, 0, cuDevice);
if (!cuContext) {
return -1;
}

View File

@@ -10,6 +10,7 @@
#endif
#include "log.h"
#include "nvcodec_api.h"
VideoDecoderFactory::VideoDecoderFactory() {}
@@ -44,9 +45,8 @@ bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() {
return false;
#else
CUresult cuResult;
CUvideoctxlock cudaCtxLock;
cuResult = cuvidCtxLockCreate(&cudaCtxLock, 0);
cuResult = cuvidCtxLockCreate_ld(&cudaCtxLock, 0);
if (cuResult != CUDA_SUCCESS) {
LOG_WARN(
"System not support hardware accelerated decode, use default software "

View File

@@ -11,6 +11,8 @@
#include "NvEncoder.h"
#include "nvcodec_api.h"
#ifndef _WIN32
#include <cstring>
static inline bool operator==(const GUID &guid1, const GUID &guid2) {
@@ -60,7 +62,7 @@ void NvEncoder::LoadNvEncApi() {
uint32_t version = 0;
uint32_t currentVersion =
(NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion_ld(&version));
if (currentVersion > version) {
NVENC_THROW_ERROR(
"Current Driver Version does not support this NvEncodeAPI version, "
@@ -69,7 +71,7 @@ void NvEncoder::LoadNvEncApi() {
}
m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
NVENC_API_CALL(NvEncodeAPICreateInstance_ld(&m_nvenc));
}
NvEncoder::~NvEncoder() { DestroyHWEncoder(); }

View File

@@ -11,6 +11,8 @@
#include "NvEncoderCuda.h"
#include "nvcodec_api.h"
NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
uint32_t nHeight,
NV_ENC_BUFFER_FORMAT eBufferFormat,
@@ -44,7 +46,7 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
int numCount = m_bMotionEstimationOnly ? 2 : 1;
for (int count = 0; count < numCount; count++) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
std::vector<void *> inputFrames;
for (int i = 0; i < numInputBuffers; i++) {
CUdeviceptr pDeviceFrame;
@@ -54,13 +56,13 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
CUDA_DRVAPI_CALL(cuMemAllocPitch(
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
(CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
GetMaxEncodeHeight() + chromaHeight, 16));
inputFrames.push_back((void *)pDeviceFrame);
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
RegisterInputResources(
inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
@@ -88,23 +90,24 @@ void NvEncoderCuda::ReleaseCudaResources() {
UnregisterInputResources();
cuCtxPushCurrent(m_cuContext);
cuCtxPushCurrent_ld(m_cuContext);
for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
if (m_vInputFrames[i].inputPtr) {
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
cuMemFree_ld(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
}
}
m_vInputFrames.clear();
for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
if (m_vReferenceFrames[i].inputPtr) {
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
cuMemFree_ld(
reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
}
}
m_vReferenceFrames.clear();
cuCtxPopCurrent(NULL);
cuCtxPopCurrent_ld(NULL);
m_cuContext = nullptr;
}
@@ -120,7 +123,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
NV_ENC_ERR_INVALID_PARAM);
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
uint32_t srcPitch =
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -138,10 +141,10 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
m.Height = height;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
: cuMemcpy2DAsync(&m, stream));
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
: cuMemcpy2DAsync_ld(&m, stream));
}
std::vector<uint32_t> srcChromaOffsets;
@@ -167,14 +170,14 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = chromaWidthInBytes;
m.Height = chromaHeight;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
: cuMemcpy2DAsync(&m, stream));
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
: cuMemcpy2DAsync_ld(&m, stream));
}
}
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
}
void NvEncoderCuda::CopyToDeviceFrame(
@@ -189,7 +192,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
NV_ENC_ERR_INVALID_PARAM);
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
uint32_t srcPitch =
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -207,9 +210,9 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
m.Height = height;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
}
std::vector<uint32_t> srcChromaOffsets;
@@ -234,11 +237,11 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = chromaWidthInBytes;
m.Height = chromaHeight;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
}
}
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
}

View File

@@ -24,7 +24,7 @@
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char* szErrName = NULL; \
cuGetErrorName(err__, &szErrName); \
cuGetErrorName_ld(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVENCException::makeNVENCException( \

View File

@@ -3,6 +3,7 @@
#include <chrono>
#include "log.h"
#include "nvcodec_api.h"
#define SAVE_RECEIVED_NV12_STREAM 0
#define SAVE_ENCODED_H264_STREAM 0
@@ -32,11 +33,12 @@ int NvidiaVideoEncoder::Init() {
int num_of_GPUs = 0;
CUdevice cuda_device;
bool cuda_ctx_succeed =
(index_of_GPU >= 0 && cuInit(0) == CUresult::CUDA_SUCCESS &&
cuDeviceGetCount(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
(index_of_GPU >= 0 && cuInit_ld(0) == CUresult::CUDA_SUCCESS &&
cuDeviceGetCount_ld(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
(num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
cuDeviceGet(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
cuCtxCreate(&cuda_context_, 0, cuda_device) == CUresult::CUDA_SUCCESS);
cuDeviceGet_ld(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
cuCtxCreate_ld(&cuda_context_, 0, cuda_device) ==
CUresult::CUDA_SUCCESS);
if (!cuda_ctx_succeed) {
}

View File

@@ -10,6 +10,7 @@
#endif
#include "log.h"
#include "nvcodec_api.h"
VideoEncoderFactory::VideoEncoderFactory() {}
@@ -46,7 +47,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
CUresult cuResult;
NV_ENCODE_API_FUNCTION_LIST functionList = {NV_ENCODE_API_FUNCTION_LIST_VER};
cuResult = cuInit(0);
cuResult = cuInit_ld(0);
if (cuResult != CUDA_SUCCESS) {
LOG_WARN(
"System not support hardware accelerated encode, use default software "
@@ -54,7 +55,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
return false;
}
NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance(&functionList);
NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance_ld(&functionList);
if (nvEncStatus != NV_ENC_SUCCESS) {
LOG_WARN(
"System not support hardware accelerated encode, use default software "

View File

@@ -6,6 +6,7 @@
#include "common.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include "nvcodec_api.h"
using nlohmann::json;
@@ -227,6 +228,7 @@ int PeerConnection::CreateVideoCodec(bool hardware_acceleration) {
"MacOS not support hardware acceleration, use default software codec");
}
#else
InitNvCodecApi();
#endif
if (av1_encoding_) {

21542
thirdparty/nvcodec/Interface/cuda.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,367 @@
/*
* @Author: DI JUNKUN
* @Date: 2024-08-12
* Copyright (c) 2024 by DI JUNKUN, All Rights Reserved.
*/
#ifndef _NVCODEC_API_H_
#define _NVCODEC_API_H_
#include <Windows.h>
#include <iostream>
#include "cuda.h"
#include "cuviddec.h"
#include "nvEncodeAPI.h"
#include "nvcuvid.h"
typedef CUresult (*TcuInit)(unsigned int Flags);
typedef CUresult (*TcuDeviceGet)(CUdevice *device, int ordinal);
typedef CUresult (*TcuDeviceGetCount)(int *count);
typedef CUresult (*TcuCtxCreate)(CUcontext *pctx, unsigned int flags,
CUdevice dev);
typedef CUresult (*TcuGetErrorName)(CUresult error, const char **pStr);
typedef CUresult (*TcuCtxPushCurrent)(CUcontext ctx);
typedef CUresult (*TcuCtxPopCurrent)(CUcontext *pctx);
typedef CUresult (*TcuMemAlloc)(CUdeviceptr *dptr, size_t bytesize);
typedef CUresult (*TcuMemAllocPitch)(CUdeviceptr *dptr, size_t *pPitch,
size_t WidthInBytes, size_t Height,
unsigned int ElementSizeBytes);
typedef CUresult (*TcuMemFree)(CUdeviceptr dptr);
typedef CUresult (*TcuMemcpy2DAsync)(const CUDA_MEMCPY2D *pCopy,
CUstream hStream);
typedef CUresult (*TcuStreamSynchronize)(CUstream hStream);
typedef CUresult (*TcuMemcpy2D)(const CUDA_MEMCPY2D *pCopy);
typedef CUresult (*TcuMemcpy2DUnaligned)(const CUDA_MEMCPY2D *pCopy);
// API
static TcuInit cuInit_ld;
static TcuDeviceGet cuDeviceGet_ld;
static TcuDeviceGetCount cuDeviceGetCount_ld;
static TcuCtxCreate cuCtxCreate_ld;
static TcuGetErrorName cuGetErrorName_ld;
static TcuCtxPushCurrent cuCtxPushCurrent_ld;
static TcuCtxPopCurrent cuCtxPopCurrent_ld;
static TcuMemAlloc cuMemAlloc_ld;
static TcuMemAllocPitch cuMemAllocPitch_ld;
static TcuMemFree cuMemFree_ld;
static TcuMemcpy2DAsync cuMemcpy2DAsync_ld;
static TcuStreamSynchronize cuStreamSynchronize_ld;
static TcuMemcpy2D cuMemcpy2D_ld;
static TcuMemcpy2DUnaligned cuMemcpy2DUnaligned_ld;
//
typedef CUresult (*TcuvidCtxLockCreate)(CUvideoctxlock *pLock, CUcontext ctx);
typedef CUresult (*TcuvidGetDecoderCaps)(CUVIDDECODECAPS *pdc);
typedef CUresult (*TcuvidCreateDecoder)(CUvideodecoder *phDecoder,
CUVIDDECODECREATEINFO *pdci);
typedef CUresult (*TcuvidDestroyDecoder)(CUvideodecoder hDecoder);
typedef CUresult (*TcuvidDecodePicture)(CUvideodecoder hDecoder,
CUVIDPICPARAMS *pPicParams);
typedef CUresult (*TcuvidGetDecodeStatus)(CUvideodecoder hDecoder, int nPicIdx,
CUVIDGETDECODESTATUS *pDecodeStatus);
typedef CUresult (*TcuvidReconfigureDecoder)(
CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams);
typedef CUresult (*TcuvidMapVideoFrame64)(CUvideodecoder hDecoder, int nPicIdx,
unsigned long long *pDevPtr,
unsigned int *pPitch,
CUVIDPROCPARAMS *pVPP);
typedef CUresult (*TcuvidUnmapVideoFrame64)(CUvideodecoder hDecoder,
unsigned long long DevPtr);
typedef CUresult (*TcuvidCtxLockDestroy)(CUvideoctxlock lck);
typedef CUresult (*TcuvidCreateVideoParser)(CUvideoparser *pObj,
CUVIDPARSERPARAMS *pParams);
typedef CUresult (*TcuvidParseVideoData)(CUvideoparser obj,
CUVIDSOURCEDATAPACKET *pPacket);
typedef CUresult (*TcuvidDestroyVideoParser)(CUvideoparser obj);
//
static TcuvidCtxLockCreate cuvidCtxLockCreate_ld;
static TcuvidGetDecoderCaps cuvidGetDecoderCaps_ld;
static TcuvidCreateDecoder cuvidCreateDecoder_ld;
static TcuvidDestroyDecoder cuvidDestroyDecoder_ld;
static TcuvidDecodePicture cuvidDecodePicture_ld;
static TcuvidGetDecodeStatus cuvidGetDecodeStatus_ld;
static TcuvidReconfigureDecoder cuvidReconfigureDecoder_ld;
static TcuvidMapVideoFrame64 cuvidMapVideoFrame64_ld;
static TcuvidUnmapVideoFrame64 cuvidUnmapVideoFrame64_ld;
static TcuvidCtxLockDestroy cuvidCtxLockDestroy_ld;
static TcuvidCreateVideoParser cuvidCreateVideoParser_ld;
static TcuvidParseVideoData cuvidParseVideoData_ld;
static TcuvidDestroyVideoParser cuvidDestroyVideoParser_ld;
//
typedef NVENCSTATUS (*TNvEncodeAPICreateInstance)(
NV_ENCODE_API_FUNCTION_LIST *functionList);
typedef NVENCSTATUS (*TNvEncodeAPIGetMaxSupportedVersion)(uint32_t *version);
//
static TNvEncodeAPICreateInstance NvEncodeAPICreateInstance_ld;
static TNvEncodeAPIGetMaxSupportedVersion NvEncodeAPIGetMaxSupportedVersion_ld;
static int InitNvCodecApi() {
// Load library
HMODULE nvcuda_dll = LoadLibrary(TEXT("nvcuda.dll"));
if (nvcuda_dll == NULL) {
std::cerr << "Unable to load nvcuda.dll!" << std::endl;
return 1;
}
cuInit_ld = (TcuInit)GetProcAddress(nvcuda_dll, "cuInit");
if (cuInit_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuDeviceGet_ld = (TcuDeviceGet)GetProcAddress(nvcuda_dll, "cuDeviceGet");
if (cuDeviceGet_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuDeviceGetCount_ld =
(TcuDeviceGetCount)GetProcAddress(nvcuda_dll, "cuDeviceGetCount");
if (cuDeviceGetCount_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuCtxCreate_ld = (TcuCtxCreate)GetProcAddress(nvcuda_dll, "cuCtxCreate");
if (cuCtxCreate_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuGetErrorName_ld =
(TcuGetErrorName)GetProcAddress(nvcuda_dll, "cuGetErrorName");
if (cuGetErrorName_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuCtxPushCurrent_ld =
(TcuCtxPushCurrent)GetProcAddress(nvcuda_dll, "cuCtxPushCurrent");
if (cuCtxPushCurrent_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuCtxPopCurrent_ld =
(TcuCtxPopCurrent)GetProcAddress(nvcuda_dll, "cuCtxPopCurrent");
if (cuCtxPopCurrent_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuMemAlloc_ld = (TcuMemAlloc)GetProcAddress(nvcuda_dll, "cuMemAlloc");
if (cuMemAlloc_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuMemAllocPitch_ld =
(TcuMemAllocPitch)GetProcAddress(nvcuda_dll, "cuMemAllocPitch");
if (cuMemAllocPitch_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuMemFree_ld = (TcuMemFree)GetProcAddress(nvcuda_dll, "cuMemFree");
if (cuMemFree_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuMemcpy2DAsync_ld =
(TcuMemcpy2DAsync)GetProcAddress(nvcuda_dll, "cuMemcpy2DAsync");
if (cuMemcpy2DAsync_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuStreamSynchronize_ld =
(TcuStreamSynchronize)GetProcAddress(nvcuda_dll, "cuStreamSynchronize");
if (cuStreamSynchronize_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuMemcpy2D_ld = (TcuMemcpy2D)GetProcAddress(nvcuda_dll, "cuMemcpy2D");
if (cuMemcpy2D_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuMemcpy2DUnaligned_ld =
(TcuMemcpy2DUnaligned)GetProcAddress(nvcuda_dll, "cuMemcpy2DUnaligned");
if (cuMemcpy2DUnaligned_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
//
HMODULE nvcuvid_dll = LoadLibrary(TEXT("nvcuvid.dll"));
if (nvcuvid_dll == NULL) {
std::cerr << "Unable to load nvcuvid.dll!" << std::endl;
return 1;
}
cuvidCtxLockCreate_ld =
(TcuvidCtxLockCreate)GetProcAddress(nvcuda_dll, "cuvidCtxLockCreate");
if (cuvidCtxLockCreate_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidGetDecoderCaps_ld =
(TcuvidGetDecoderCaps)GetProcAddress(nvcuda_dll, "cuvidGetDecoderCaps");
if (cuvidGetDecoderCaps_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidCreateDecoder_ld =
(TcuvidCreateDecoder)GetProcAddress(nvcuda_dll, "cuvidCreateDecoder");
if (cuvidCreateDecoder_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidDestroyDecoder_ld =
(TcuvidDestroyDecoder)GetProcAddress(nvcuda_dll, "cuvidDestroyDecoder");
if (cuvidDestroyDecoder_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidDecodePicture_ld =
(TcuvidDecodePicture)GetProcAddress(nvcuda_dll, "cuvidDecodePicture");
if (cuvidDecodePicture_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidGetDecodeStatus_ld =
(TcuvidGetDecodeStatus)GetProcAddress(nvcuda_dll, "cuvidGetDecodeStatus");
if (cuvidGetDecodeStatus_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidReconfigureDecoder_ld = (TcuvidReconfigureDecoder)GetProcAddress(
nvcuda_dll, "cuvidReconfigureDecoder");
if (cuvidReconfigureDecoder_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidMapVideoFrame64_ld =
(TcuvidMapVideoFrame64)GetProcAddress(nvcuda_dll, "cuvidMapVideoFrame64");
if (cuvidMapVideoFrame64_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidUnmapVideoFrame64_ld = (TcuvidUnmapVideoFrame64)GetProcAddress(
nvcuda_dll, "cuvidUnmapVideoFrame64");
if (cuvidUnmapVideoFrame64_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidCtxLockDestroy_ld =
(TcuvidCtxLockDestroy)GetProcAddress(nvcuda_dll, "cuvidCtxLockDestroy");
if (cuvidCtxLockDestroy_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidCreateVideoParser_ld = (TcuvidCreateVideoParser)GetProcAddress(
nvcuda_dll, "cuvidCreateVideoParser");
if (cuvidCreateVideoParser_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidParseVideoData_ld =
(TcuvidParseVideoData)GetProcAddress(nvcuda_dll, "cuvidParseVideoData");
if (cuvidParseVideoData_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
cuvidDestroyVideoParser_ld = (TcuvidDestroyVideoParser)GetProcAddress(
nvcuda_dll, "cuvidDestroyVideoParser");
if (cuvidDestroyVideoParser_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
//
HMODULE nvEncodeAPI64_dll = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
if (nvEncodeAPI64_dll == NULL) {
std::cerr << "Unable to load nvEncodeAPI64.dll!" << std::endl;
return 1;
}
NvEncodeAPICreateInstance_ld = (TNvEncodeAPICreateInstance)GetProcAddress(
nvcuda_dll, "NvEncodeAPICreateInstance");
if (NvEncodeAPICreateInstance_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
NvEncodeAPIGetMaxSupportedVersion_ld =
(TNvEncodeAPIGetMaxSupportedVersion)GetProcAddress(
nvcuda_dll, "NvEncodeAPIGetMaxSupportedVersion");
if (NvEncodeAPIGetMaxSupportedVersion_ld == NULL) {
std::cerr << "Unable to find function!" << std::endl;
FreeLibrary(nvcuda_dll);
return 1;
}
return 0;
}
#endif

View File

@@ -192,7 +192,6 @@ target("projectx")
"pcre2-8", "pcre2-16", "pcre2-32", "pcre2-posix",
"zlib", "ffi", "libcrypto", "libssl", "intl", "iconv",
"Shell32", "Advapi32", "Dnsapi", "Shlwapi", "Crypt32",
"cuda", "nvencodeapi", "nvcuvid",
"ws2_32", "Bcrypt", "windowsapp", "User32", "Strmiids", "Mfuuid",
"Secur32", "Bcrypt")
elseif is_os(("linux")) then