mirror of
https://github.com/kunkundi/crossdesk.git
synced 2025-10-27 04:35:34 +08:00
[fix] use dynamic DLL loading for cuda library
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
#include "nvcodec_api.h"
|
||||
#include "nvcuvid.h"
|
||||
|
||||
#define START_TIMER auto start = std::chrono::steady_clock::now();
|
||||
@@ -31,7 +32,7 @@
|
||||
CUresult err__ = call; \
|
||||
if (err__ != CUDA_SUCCESS) { \
|
||||
const char *szErrName = NULL; \
|
||||
cuGetErrorName(err__, &szErrName); \
|
||||
cuGetErrorName_ld(err__, &szErrName); \
|
||||
std::ostringstream errorLog; \
|
||||
errorLog << "CUDA driver API error " << szErrName; \
|
||||
throw NVDECException::makeNVDECException( \
|
||||
@@ -199,9 +200,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
|
||||
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
|
||||
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidGetDecoderCaps_ld(&decodecaps));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
|
||||
if (!decodecaps.bIsSupported) {
|
||||
NVDEC_THROW_ERROR("Codec not supported on this GPU",
|
||||
@@ -242,7 +243,7 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
|
||||
}
|
||||
|
||||
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
|
||||
// cuvidCreateDecoder() has been called before, and now there's possible
|
||||
// cuvidCreateDecoder_ld() has been called before, and now there's possible
|
||||
// config change
|
||||
return ReconfigureDecoder(pVideoFormat);
|
||||
}
|
||||
@@ -378,9 +379,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
|
||||
"Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
|
||||
m_videoInfo << std::endl;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidCreateDecoder_ld(&m_hDecoder, &videoDecodeCreateInfo));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
STOP_TIMER("Session Initialization Time: ");
|
||||
return nDecodeSurface;
|
||||
}
|
||||
@@ -506,9 +507,9 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
|
||||
reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
|
||||
|
||||
START_TIMER
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidReconfigureDecoder_ld(m_hDecoder, &reconfigParams));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
STOP_TIMER("Session Reconfigure Time: ");
|
||||
|
||||
return nDecodeSurface;
|
||||
@@ -538,9 +539,9 @@ int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
|
||||
pFrame = m_vpFrame.back();
|
||||
m_vpFrame.pop_back();
|
||||
if (m_bUseDeviceFrame) {
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
CUDA_DRVAPI_CALL(cuMemFree_ld((CUdeviceptr)pFrame));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
} else {
|
||||
delete pFrame;
|
||||
}
|
||||
@@ -558,9 +559,9 @@ int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
|
||||
return false;
|
||||
}
|
||||
m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidDecodePicture_ld(m_hDecoder, pPicParams));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -577,15 +578,15 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
|
||||
CUdeviceptr dpSrcFrame = 0;
|
||||
unsigned int nSrcPitch = 0;
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index,
|
||||
&dpSrcFrame, &nSrcPitch,
|
||||
&videoProcessingParameters));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidMapVideoFrame64_ld(m_hDecoder, pDispInfo->picture_index,
|
||||
&dpSrcFrame, &nSrcPitch,
|
||||
&videoProcessingParameters));
|
||||
|
||||
CUVIDGETDECODESTATUS DecodeStatus;
|
||||
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
|
||||
CUresult result =
|
||||
cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
|
||||
CUresult result = cuvidGetDecodeStatus_ld(
|
||||
m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
|
||||
if (result == CUDA_SUCCESS &&
|
||||
(DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
|
||||
DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
|
||||
@@ -602,11 +603,12 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
uint8_t *pFrame = NULL;
|
||||
if (m_bUseDeviceFrame) {
|
||||
if (m_bDeviceFramePitched) {
|
||||
CUDA_DRVAPI_CALL(cuMemAllocPitch(
|
||||
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
|
||||
(CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
|
||||
m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
|
||||
} else {
|
||||
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
|
||||
CUDA_DRVAPI_CALL(
|
||||
cuMemAlloc_ld((CUdeviceptr *)&pFrame, GetFrameSize()));
|
||||
}
|
||||
} else {
|
||||
pFrame = new uint8_t[GetFrameSize()];
|
||||
@@ -627,7 +629,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
|
||||
m.WidthInBytes = GetWidth() * m_nBPP;
|
||||
m.Height = m_nLumaHeight;
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
|
||||
|
||||
// Copy chroma plane
|
||||
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning
|
||||
@@ -637,7 +639,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
m.dstDevice =
|
||||
(CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
|
||||
m.Height = m_nChromaHeight;
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
|
||||
|
||||
if (m_nNumChromaPlanes == 2) {
|
||||
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame +
|
||||
@@ -645,17 +647,17 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
|
||||
m.dstPitch * m_nLumaHeight * 2);
|
||||
m.Height = m_nChromaHeight;
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream));
|
||||
}
|
||||
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuStreamSynchronize_ld(m_cuvidStream));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
|
||||
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
|
||||
m_vTimestamp.resize(m_vpFrame.size());
|
||||
}
|
||||
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
|
||||
|
||||
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
|
||||
NVDEC_API_CALL(cuvidUnmapVideoFrame64_ld(m_hDecoder, dpSrcFrame));
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -673,7 +675,7 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
|
||||
if (pCropRect) m_cropRect = *pCropRect;
|
||||
if (pResizeDim) m_resizeDim = *pResizeDim;
|
||||
|
||||
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
|
||||
NVDEC_API_CALL(cuvidCtxLockCreate_ld(&m_ctxLock, cuContext));
|
||||
|
||||
CUVIDPARSERPARAMS videoParserParameters = {};
|
||||
videoParserParameters.CodecType = eCodec;
|
||||
@@ -685,32 +687,32 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame,
|
||||
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
|
||||
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
|
||||
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
|
||||
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
|
||||
NVDEC_API_CALL(cuvidCreateVideoParser_ld(&m_hParser, &videoParserParameters));
|
||||
}
|
||||
|
||||
NvDecoder::~NvDecoder() {
|
||||
START_TIMER
|
||||
|
||||
if (m_hParser) {
|
||||
cuvidDestroyVideoParser(m_hParser);
|
||||
cuvidDestroyVideoParser_ld(m_hParser);
|
||||
}
|
||||
cuCtxPushCurrent(m_cuContext);
|
||||
cuCtxPushCurrent_ld(m_cuContext);
|
||||
if (m_hDecoder) {
|
||||
cuvidDestroyDecoder(m_hDecoder);
|
||||
cuvidDestroyDecoder_ld(m_hDecoder);
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
|
||||
|
||||
for (uint8_t *pFrame : m_vpFrame) {
|
||||
if (m_bUseDeviceFrame) {
|
||||
cuMemFree((CUdeviceptr)pFrame);
|
||||
cuMemFree_ld((CUdeviceptr)pFrame);
|
||||
} else {
|
||||
delete[] pFrame;
|
||||
}
|
||||
}
|
||||
cuCtxPopCurrent(NULL);
|
||||
cuCtxPopCurrent_ld(NULL);
|
||||
|
||||
cuvidCtxLockDestroy(m_ctxLock);
|
||||
cuvidCtxLockDestroy_ld(m_ctxLock);
|
||||
|
||||
STOP_TIMER("Session Deinitialization Time: ");
|
||||
}
|
||||
@@ -727,8 +729,8 @@ int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags,
|
||||
if (!pData || nSize == 0) {
|
||||
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
||||
}
|
||||
// NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
|
||||
if (CUDA_SUCCESS != cuvidParseVideoData(m_hParser, &packet)) {
|
||||
// NVDEC_API_CALL(cuvidParseVideoData_ld(m_hParser, &packet));
|
||||
if (CUDA_SUCCESS != cuvidParseVideoData_ld(m_hParser, &packet)) {
|
||||
return 0;
|
||||
}
|
||||
m_cuvidStream = 0;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include "nvidia_video_decoder.h"
|
||||
|
||||
#include "log.h"
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
#define SAVE_RECEIVED_H264_STREAM 0
|
||||
#define SAVE_DECODED_NV12_STREAM 0
|
||||
@@ -21,20 +22,20 @@ NvidiaVideoDecoder::~NvidiaVideoDecoder() {
|
||||
}
|
||||
|
||||
int NvidiaVideoDecoder::Init() {
|
||||
ck(cuInit(0));
|
||||
ck(cuInit_ld(0));
|
||||
int nGpu = 0;
|
||||
int iGpu = 0;
|
||||
|
||||
ck(cuDeviceGetCount(&nGpu));
|
||||
ck(cuDeviceGetCount_ld(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
CUdevice cuDevice;
|
||||
cuDeviceGet(&cuDevice, iGpu);
|
||||
cuDeviceGet_ld(&cuDevice, iGpu);
|
||||
|
||||
CUcontext cuContext = NULL;
|
||||
cuCtxCreate(&cuContext, 0, cuDevice);
|
||||
cuCtxCreate_ld(&cuContext, 0, cuDevice);
|
||||
if (!cuContext) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#endif
|
||||
|
||||
#include "log.h"
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
VideoDecoderFactory::VideoDecoderFactory() {}
|
||||
|
||||
@@ -44,9 +45,8 @@ bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() {
|
||||
return false;
|
||||
#else
|
||||
CUresult cuResult;
|
||||
|
||||
CUvideoctxlock cudaCtxLock;
|
||||
cuResult = cuvidCtxLockCreate(&cudaCtxLock, 0);
|
||||
cuResult = cuvidCtxLockCreate_ld(&cudaCtxLock, 0);
|
||||
if (cuResult != CUDA_SUCCESS) {
|
||||
LOG_WARN(
|
||||
"System not support hardware accelerated decode, use default software "
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include "NvEncoder.h"
|
||||
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <cstring>
|
||||
static inline bool operator==(const GUID &guid1, const GUID &guid2) {
|
||||
@@ -60,7 +62,7 @@ void NvEncoder::LoadNvEncApi() {
|
||||
uint32_t version = 0;
|
||||
uint32_t currentVersion =
|
||||
(NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
|
||||
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version));
|
||||
NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion_ld(&version));
|
||||
if (currentVersion > version) {
|
||||
NVENC_THROW_ERROR(
|
||||
"Current Driver Version does not support this NvEncodeAPI version, "
|
||||
@@ -69,7 +71,7 @@ void NvEncoder::LoadNvEncApi() {
|
||||
}
|
||||
|
||||
m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER};
|
||||
NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc));
|
||||
NVENC_API_CALL(NvEncodeAPICreateInstance_ld(&m_nvenc));
|
||||
}
|
||||
|
||||
NvEncoder::~NvEncoder() { DestroyHWEncoder(); }
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
|
||||
#include "NvEncoderCuda.h"
|
||||
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth,
|
||||
uint32_t nHeight,
|
||||
NV_ENC_BUFFER_FORMAT eBufferFormat,
|
||||
@@ -44,7 +46,7 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
|
||||
int numCount = m_bMotionEstimationOnly ? 2 : 1;
|
||||
|
||||
for (int count = 0; count < numCount; count++) {
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext));
|
||||
std::vector<void *> inputFrames;
|
||||
for (int i = 0; i < numInputBuffers; i++) {
|
||||
CUdeviceptr pDeviceFrame;
|
||||
@@ -54,13 +56,13 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) {
|
||||
if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 ||
|
||||
GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV)
|
||||
chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight());
|
||||
CUDA_DRVAPI_CALL(cuMemAllocPitch(
|
||||
CUDA_DRVAPI_CALL(cuMemAllocPitch_ld(
|
||||
(CUdeviceptr *)&pDeviceFrame, &m_cudaPitch,
|
||||
GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()),
|
||||
GetMaxEncodeHeight() + chromaHeight, 16));
|
||||
inputFrames.push_back((void *)pDeviceFrame);
|
||||
}
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
|
||||
RegisterInputResources(
|
||||
inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
|
||||
@@ -88,23 +90,24 @@ void NvEncoderCuda::ReleaseCudaResources() {
|
||||
|
||||
UnregisterInputResources();
|
||||
|
||||
cuCtxPushCurrent(m_cuContext);
|
||||
cuCtxPushCurrent_ld(m_cuContext);
|
||||
|
||||
for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) {
|
||||
if (m_vInputFrames[i].inputPtr) {
|
||||
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
|
||||
cuMemFree_ld(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr));
|
||||
}
|
||||
}
|
||||
m_vInputFrames.clear();
|
||||
|
||||
for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) {
|
||||
if (m_vReferenceFrames[i].inputPtr) {
|
||||
cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
|
||||
cuMemFree_ld(
|
||||
reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr));
|
||||
}
|
||||
}
|
||||
m_vReferenceFrames.clear();
|
||||
|
||||
cuCtxPopCurrent(NULL);
|
||||
cuCtxPopCurrent_ld(NULL);
|
||||
m_cuContext = nullptr;
|
||||
}
|
||||
|
||||
@@ -120,7 +123,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
|
||||
NV_ENC_ERR_INVALID_PARAM);
|
||||
}
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
|
||||
|
||||
uint32_t srcPitch =
|
||||
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
|
||||
@@ -138,10 +141,10 @@ void NvEncoderCuda::CopyToDeviceFrame(
|
||||
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
|
||||
m.Height = height;
|
||||
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
|
||||
} else {
|
||||
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
|
||||
: cuMemcpy2DAsync(&m, stream));
|
||||
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
|
||||
: cuMemcpy2DAsync_ld(&m, stream));
|
||||
}
|
||||
|
||||
std::vector<uint32_t> srcChromaOffsets;
|
||||
@@ -167,14 +170,14 @@ void NvEncoderCuda::CopyToDeviceFrame(
|
||||
m.WidthInBytes = chromaWidthInBytes;
|
||||
m.Height = chromaHeight;
|
||||
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
|
||||
} else {
|
||||
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m)
|
||||
: cuMemcpy2DAsync(&m, stream));
|
||||
CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m)
|
||||
: cuMemcpy2DAsync_ld(&m, stream));
|
||||
}
|
||||
}
|
||||
}
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
}
|
||||
|
||||
void NvEncoderCuda::CopyToDeviceFrame(
|
||||
@@ -189,7 +192,7 @@ void NvEncoderCuda::CopyToDeviceFrame(
|
||||
NV_ENC_ERR_INVALID_PARAM);
|
||||
}
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(device));
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device));
|
||||
|
||||
uint32_t srcPitch =
|
||||
nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width);
|
||||
@@ -207,9 +210,9 @@ void NvEncoderCuda::CopyToDeviceFrame(
|
||||
m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width);
|
||||
m.Height = height;
|
||||
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
|
||||
} else {
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
|
||||
}
|
||||
|
||||
std::vector<uint32_t> srcChromaOffsets;
|
||||
@@ -234,11 +237,11 @@ void NvEncoderCuda::CopyToDeviceFrame(
|
||||
m.WidthInBytes = chromaWidthInBytes;
|
||||
m.Height = chromaHeight;
|
||||
if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) {
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m));
|
||||
} else {
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2D(&m));
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m));
|
||||
}
|
||||
}
|
||||
}
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL));
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
CUresult err__ = call; \
|
||||
if (err__ != CUDA_SUCCESS) { \
|
||||
const char* szErrName = NULL; \
|
||||
cuGetErrorName(err__, &szErrName); \
|
||||
cuGetErrorName_ld(err__, &szErrName); \
|
||||
std::ostringstream errorLog; \
|
||||
errorLog << "CUDA driver API error " << szErrName; \
|
||||
throw NVENCException::makeNVENCException( \
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <chrono>
|
||||
|
||||
#include "log.h"
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
#define SAVE_RECEIVED_NV12_STREAM 0
|
||||
#define SAVE_ENCODED_H264_STREAM 0
|
||||
@@ -32,11 +33,12 @@ int NvidiaVideoEncoder::Init() {
|
||||
int num_of_GPUs = 0;
|
||||
CUdevice cuda_device;
|
||||
bool cuda_ctx_succeed =
|
||||
(index_of_GPU >= 0 && cuInit(0) == CUresult::CUDA_SUCCESS &&
|
||||
cuDeviceGetCount(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
|
||||
(index_of_GPU >= 0 && cuInit_ld(0) == CUresult::CUDA_SUCCESS &&
|
||||
cuDeviceGetCount_ld(&num_of_GPUs) == CUresult::CUDA_SUCCESS &&
|
||||
(num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) &&
|
||||
cuDeviceGet(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
|
||||
cuCtxCreate(&cuda_context_, 0, cuda_device) == CUresult::CUDA_SUCCESS);
|
||||
cuDeviceGet_ld(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS &&
|
||||
cuCtxCreate_ld(&cuda_context_, 0, cuda_device) ==
|
||||
CUresult::CUDA_SUCCESS);
|
||||
if (!cuda_ctx_succeed) {
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#endif
|
||||
|
||||
#include "log.h"
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
VideoEncoderFactory::VideoEncoderFactory() {}
|
||||
|
||||
@@ -46,7 +47,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
|
||||
CUresult cuResult;
|
||||
NV_ENCODE_API_FUNCTION_LIST functionList = {NV_ENCODE_API_FUNCTION_LIST_VER};
|
||||
|
||||
cuResult = cuInit(0);
|
||||
cuResult = cuInit_ld(0);
|
||||
if (cuResult != CUDA_SUCCESS) {
|
||||
LOG_WARN(
|
||||
"System not support hardware accelerated encode, use default software "
|
||||
@@ -54,7 +55,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() {
|
||||
return false;
|
||||
}
|
||||
|
||||
NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance(&functionList);
|
||||
NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance_ld(&functionList);
|
||||
if (nvEncStatus != NV_ENC_SUCCESS) {
|
||||
LOG_WARN(
|
||||
"System not support hardware accelerated encode, use default software "
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "common.h"
|
||||
#include "log.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "nvcodec_api.h"
|
||||
|
||||
using nlohmann::json;
|
||||
|
||||
@@ -227,6 +228,7 @@ int PeerConnection::CreateVideoCodec(bool hardware_acceleration) {
|
||||
"MacOS not support hardware acceleration, use default software codec");
|
||||
}
|
||||
#else
|
||||
InitNvCodecApi();
|
||||
#endif
|
||||
|
||||
if (av1_encoding_) {
|
||||
|
||||
Reference in New Issue
Block a user