[fix] use dynamic DLL loading for cuda library

This commit is contained in:
dijunkun
2024-08-12 17:26:51 +08:00
parent 1626b482de
commit f02286365c
12 changed files with 21999 additions and 78 deletions

View File

@@ -16,6 +16,7 @@
#include <cmath>
#include <iostream>
#include "nvcodec_api.h"
#include "nvcuvid.h"
#define START_TIMER auto start = std::chrono::steady_clock::now();
@@ -31,7 +32,7 @@
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char *szErrName = NULL; \
cuGetErrorName(err__, &szErrName); \
cuGetErrorName_ld(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVDECException::makeNVDECException( \
@@ -199,9 +200,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps_ld(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
if (!decodecaps.bIsSupported) {
NVDEC_THROW_ERROR("Codec not supported on this GPU",
@@ -242,7 +243,7 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
}
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
// cuvidCreateDecoder() has been called before, and now there's possible
// cuvidCreateDecoder_ld() has been called before, and now there's possible
// config change
return ReconfigureDecoder(pVideoFormat);
}
@@ -378,9 +379,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
"Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
m_videoInfo << std::endl;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder_ld(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
STOP_TIMER("Session Initialization Time: ");
return nDecodeSurface;
}
@@ -506,9 +507,9 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
START_TIMER
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder_ld(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
STOP_TIMER("Session Reconfigure Time: ");
return nDecodeSurface;
@@ -538,9 +539,9 @@ int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
pFrame = m_vpFrame.back();
m_vpFrame.pop_back();
if (m_bUseDeviceFrame) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree_ld((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
} else {
delete pFrame;
}
@@ -558,9 +559,9 @@ int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
return false;
}
m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture_ld(m_hDecoder, pPicParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
return 1;
}
@@ -577,15 +578,15 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
CUdeviceptr dpSrcFrame = 0;
unsigned int nSrcPitch = 0;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
&dpSrcFrame, &nSrcPitch,
&videoProcessingParameters));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
NVDEC_API_CALL(cuvidMapVideoFrame64_ld(m_hDecoder, pDispInfo->picture_index,
&dpSrcFrame, &nSrcPitch,
&videoProcessingParameters));
CUVIDGETDECODESTATUS DecodeStatus;
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
CUresult result =
cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
CUresult result = cuvidGetDecodeStatus_ld(
m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
if (result == CUDA_SUCCESS &&
(DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
@@ -602,11 +603,12 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
uint8_t *pFrame = NULL;
if (m_bUseDeviceFrame) {
if (m_bDeviceFramePitched) {
CUDA_DRVAPI_CALL(cuMemAllocPitch(
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
(CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
} else {
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
CUDA_DRVAPI_CALL(
cuMemAlloc_ld((CUdeviceptr *)&pFrame, GetFrameSize()));
}
} else {
pFrame = new uint8_t[GetFrameSize()];
@@ -627,7 +629,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
m.WidthInBytes = GetWidth() * m_nBPP;
m.Height = m_nLumaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
// Copy chroma plane
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
@@ -637,7 +639,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstDevice =
(CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
if (m_nNumChromaPlanes == 2) {
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
@@ -645,17 +647,17 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
m.dstPitch * m_nLumaHeight * 2);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
}
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuStreamSynchronize_ld(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
m_vTimestamp.resize(m_vpFrame.size());
}
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
NVDEC_API_CALL(cuvidUnmapVideoFrame64_ld(m_hDecoder, dpSrcFrame));
return 1;
}
@@ -673,7 +675,7 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
if (pCropRect) m_cropRect = *pCropRect;
if (pResizeDim) m_resizeDim = *pResizeDim;
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
NVDEC_API_CALL(cuvidCtxLockCreate_ld(&m_ctxLock, cuContext));
CUVIDPARSERPARAMS videoParserParameters = {};
videoParserParameters.CodecType = eCodec;
@@ -685,32 +687,32 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
NVDEC_API_CALL(cuvidCreateVideoParser_ld(&m_hParser, &videoParserParameters));
}
NvDecoder::~NvDecoder() {
START_TIMER
if (m_hParser) {
cuvidDestroyVideoParser(m_hParser);
cuvidDestroyVideoParser_ld(m_hParser);
}
cuCtxPushCurrent(m_cuContext);
cuCtxPushCurrent_ld(m_cuContext);
if (m_hDecoder) {
cuvidDestroyDecoder(m_hDecoder);
cuvidDestroyDecoder_ld(m_hDecoder);
}
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
for (uint8_t *pFrame : m_vpFrame) {
if (m_bUseDeviceFrame) {
cuMemFree((CUdeviceptr)pFrame);
cuMemFree_ld((CUdeviceptr)pFrame);
} else {
delete[] pFrame;
}
}
cuCtxPopCurrent(NULL);
cuCtxPopCurrent_ld(NULL);
cuvidCtxLockDestroy(m_ctxLock);
cuvidCtxLockDestroy_ld(m_ctxLock);
STOP_TIMER("Session Deinitialization Time: ");
}
@@ -727,8 +729,8 @@ int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
if (!pData || nSize == 0) {
packet.flags |= CUVID_PKT_ENDOFSTREAM;
}
// NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
if (CUDA_SUCCESS != cuvidParseVideoData(m_hParser, &packet)) {
// NVDEC_API_CALL(cuvidParseVideoData_ld(m_hParser, &packet));
if (CUDA_SUCCESS != cuvidParseVideoData_ld(m_hParser, &packet)) {
return 0;
}
m_cuvidStream = 0;

View File

@@ -1,6 +1,7 @@
#include "nvidia_video_decoder.h"
#include "log.h"
#include "nvcodec_api.h"
#define SAVE_RECEIVED_H264_STREAM 0
#define SAVE_DECODED_NV12_STREAM 0
@@ -21,20 +22,20 @@ NvidiaVideoDecoder::~NvidiaVideoDecoder() {
}
int NvidiaVideoDecoder::Init() {
ck(cuInit(0));
ck(cuInit_ld(0));
int nGpu = 0;
int iGpu = 0;
ck(cuDeviceGetCount(&nGpu));
ck(cuDeviceGetCount_ld(&nGpu));
if (nGpu < 1) {
return -1;
}
CUdevice cuDevice;
cuDeviceGet(&cuDevice, iGpu);
cuDeviceGet_ld(&cuDevice, iGpu);
CUcontext cuContext = NULL;
cuCtxCreate(&cuContext, 0, cuDevice);
cuCtxCreate_ld(&cuContext, 0, cuDevice);
if (!cuContext) {
return -1;
}

View File

@@ -10,6 +10,7 @@
#endif
#include "log.h"
#include "nvcodec_api.h"
VideoDecoderFactory::VideoDecoderFactory() {}
@@ -44,9 +45,8 @@ bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() {
return false;
#else
CUresult cuResult;
CUvideoctxlock cudaCtxLock;
cuResult = cuvidCtxLockCreate(&cudaCtxLock, 0);
cuResult = cuvidCtxLockCreate_ld(&cudaCtxLock, 0);
if (cuResult != CUDA_SUCCESS) {
LOG_WARN(
"System not support hardware accelerated decode, use default software "

View File

@@ -11,6 +11,8 @@
#include "NvEncoder.h"
#include "nvcodec_api.h"
#ifndef _WIN32
#include <cstring>
static inline bool operator==(const GUID &guid1, const GUID &guid2) {
@@ -60,7 +62,7 @@ void NvEncoder::LoadNvEncApi() {
uint32_t version = 0;
uint32_t currentVersion =
(NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion_ld(&version));
if (currentVersion > version) {
NVENC_THROW_ERROR(
"Current Driver Version does not support this NvEncodeAPI version, "
@@ -69,7 +71,7 @@ void NvEncoder::LoadNvEncApi() {
}
m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
NVENC_API_CALL(NvEncodeAPICreateInstance_ld(&m_nvenc));
}
NvEncoder::~NvEncoder() { DestroyHWEncoder(); }

View File

@@ -11,6 +11,8 @@
#include "NvEncoderCuda.h"
#include "nvcodec_api.h"
NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
uint32_t nHeight,
NV_ENC_BUFFER_FORMAT eBufferFormat,
@@ -44,7 +46,7 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
int numCount = m_bMotionEstimationOnly ? 2 : 1;
for (int count = 0; count < numCount; count++) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
std::vector<void *> inputFrames;
for (int i = 0; i < numInputBuffers; i++) {
CUdeviceptr pDeviceFrame;
@@ -54,13 +56,13 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
CUDA_DRVAPI_CALL(cuMemAllocPitch(
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
(CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
GetMaxEncodeHeight() + chromaHeight, 16));
inputFrames.push_back((void *)pDeviceFrame);
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
RegisterInputResources(
inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
@@ -88,23 +90,24 @@ void NvEncoderCuda::ReleaseCudaResources() {
UnregisterInputResources();
cuCtxPushCurrent(m_cuContext);
cuCtxPushCurrent_ld(m_cuContext);
for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
if (m_vInputFrames[i].inputPtr) {
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
cuMemFree_ld(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
}
}
m_vInputFrames.clear();
for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
if (m_vReferenceFrames[i].inputPtr) {
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
cuMemFree_ld(
reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
}
}
m_vReferenceFrames.clear();
cuCtxPopCurrent(NULL);
cuCtxPopCurrent_ld(NULL);
m_cuContext = nullptr;
}
@@ -120,7 +123,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
NV_ENC_ERR_INVALID_PARAM);
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
uint32_t srcPitch =
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -138,10 +141,10 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
m.Height = height;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
: cuMemcpy2DAsync(&m, stream));
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
: cuMemcpy2DAsync_ld(&m, stream));
}
std::vector<uint32_t> srcChromaOffsets;
@@ -167,14 +170,14 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = chromaWidthInBytes;
m.Height = chromaHeight;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
: cuMemcpy2DAsync(&m, stream));
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
: cuMemcpy2DAsync_ld(&m, stream));
}
}
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
}
void NvEncoderCuda::CopyToDeviceFrame(
@@ -189,7 +192,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
NV_ENC_ERR_INVALID_PARAM);
}
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
uint32_t srcPitch =
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
@@ -207,9 +210,9 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
m.Height = height;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
}
std::vector<uint32_t> srcChromaOffsets;
@@ -234,11 +237,11 @@ void NvEncoderCuda::CopyToDeviceFrame(
m.WidthInBytes = chromaWidthInBytes;
m.Height = chromaHeight;
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
} else {
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
}
}
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
}

View File

@@ -24,7 +24,7 @@
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char* szErrName = NULL; \
cuGetErrorName(err__, &szErrName); \
cuGetErrorName_ld(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVENCException::makeNVENCException( \

View File

@@ -3,6 +3,7 @@
#include <chrono>
#include "log.h"
#include "nvcodec_api.h"
#define SAVE_RECEIVED_NV12_STREAM 0
#define SAVE_ENCODED_H264_STREAM 0
@@ -32,11 +33,12 @@ int NvidiaVideoEncoder::Init() {
int num_of_GPUs = 0;
CUdevice cuda_device;
bool cuda_ctx_succeed =
(index_of_GPU >= 0 && cuInit(0) == CUresult::CUDA_SUCCESS &&
cuDeviceGetCount(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
(index_of_GPU >= 0 && cuInit_ld(0) == CUresult::CUDA_SUCCESS &&
cuDeviceGetCount_ld(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
(num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
cuDeviceGet(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
cuCtxCreate(&cuda_context_, 0, cuda_device) == CUresult::CUDA_SUCCESS);
cuDeviceGet_ld(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
cuCtxCreate_ld(&cuda_context_, 0, cuda_device) ==
CUresult::CUDA_SUCCESS);
if (!cuda_ctx_succeed) {
}

View File

@@ -10,6 +10,7 @@
#endif
#include "log.h"
#include "nvcodec_api.h"
VideoEncoderFactory::VideoEncoderFactory() {}
@@ -46,7 +47,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
CUresult cuResult;
NV_ENCODE_API_FUNCTION_LIST functionList = {NV_ENCODE_API_FUNCTION_LIST_VER};
cuResult = cuInit(0);
cuResult = cuInit_ld(0);
if (cuResult != CUDA_SUCCESS) {
LOG_WARN(
"System not support hardware accelerated encode, use default software "
@@ -54,7 +55,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
return false;
}
NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance(&functionList);
NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance_ld(&functionList);
if (nvEncStatus != NV_ENC_SUCCESS) {
LOG_WARN(
"System not support hardware accelerated encode, use default software "

View File

@@ -6,6 +6,7 @@
#include "common.h"
#include "log.h"
#include "nlohmann/json.hpp"
#include "nvcodec_api.h"
using nlohmann::json;
@@ -227,6 +228,7 @@ int PeerConnection::CreateVideoCodec(bool hardware_acceleration) {
"MacOS not support hardware acceleration, use default software codec");
}
#else
InitNvCodecApi();
#endif
if (av1_encoding_) {