mirror of
				https://github.com/kunkundi/crossdesk.git
				synced 2025-10-27 04:35:34 +08:00 
			
		
		
		
	[fix] use dynamic DLL loading for cuda library
This commit is contained in:
		| @@ -16,6 +16,7 @@ | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
|  | ||||
| #include "nvcodec_api.h" | ||||
| #include "nvcuvid.h" | ||||
|  | ||||
| #define START_TIMER auto start = std::chrono::steady_clock::now(); | ||||
| @@ -31,7 +32,7 @@ | ||||
|     CUresult err__ = call;                                          \ | ||||
|     if (err__ != CUDA_SUCCESS) {                                    \ | ||||
|       const char *szErrName = NULL;                                 \ | ||||
|       cuGetErrorName(err__, &szErrName);                            \ | ||||
|       cuGetErrorName_ld(err__, &szErrName);                         \ | ||||
|       std::ostringstream errorLog;                                  \ | ||||
|       errorLog << "CUDA driver API error " << szErrName;            \ | ||||
|       throw NVDECException::makeNVDECException(                     \ | ||||
| @@ -199,9 +200,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) { | ||||
|   decodecaps.eChromaFormat = pVideoFormat->chroma_format; | ||||
|   decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; | ||||
|  | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidGetDecoderCaps_ld(&decodecaps)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|  | ||||
|   if (!decodecaps.bIsSupported) { | ||||
|     NVDEC_THROW_ERROR("Codec not supported on this GPU", | ||||
| @@ -242,7 +243,7 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) { | ||||
|   } | ||||
|  | ||||
|   if (m_nWidth && m_nLumaHeight && m_nChromaHeight) { | ||||
|     // cuvidCreateDecoder() has been called before, and now there's possible | ||||
|     // cuvidCreateDecoder_ld() has been called before, and now there's possible | ||||
|     // config change | ||||
|     return ReconfigureDecoder(pVideoFormat); | ||||
|   } | ||||
| @@ -378,9 +379,9 @@ int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) { | ||||
|                      "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode]; | ||||
|   m_videoInfo << std::endl; | ||||
|  | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidCreateDecoder_ld(&m_hDecoder, &videoDecodeCreateInfo)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|   STOP_TIMER("Session Initialization Time: "); | ||||
|   return nDecodeSurface; | ||||
| } | ||||
| @@ -506,9 +507,9 @@ int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) { | ||||
|   reconfigParams.ulNumDecodeSurfaces = nDecodeSurface; | ||||
|  | ||||
|   START_TIMER | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidReconfigureDecoder_ld(m_hDecoder, &reconfigParams)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|   STOP_TIMER("Session Reconfigure Time: "); | ||||
|  | ||||
|   return nDecodeSurface; | ||||
| @@ -538,9 +539,9 @@ int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) { | ||||
|     pFrame = m_vpFrame.back(); | ||||
|     m_vpFrame.pop_back(); | ||||
|     if (m_bUseDeviceFrame) { | ||||
|       CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|       CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame)); | ||||
|       CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|       CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|       CUDA_DRVAPI_CALL(cuMemFree_ld((CUdeviceptr)pFrame)); | ||||
|       CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|     } else { | ||||
|       delete pFrame; | ||||
|     } | ||||
| @@ -558,9 +559,9 @@ int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) { | ||||
|     return false; | ||||
|   } | ||||
|   m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++; | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidDecodePicture_ld(m_hDecoder, pPicParams)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
| @@ -577,15 +578,15 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { | ||||
|  | ||||
|   CUdeviceptr dpSrcFrame = 0; | ||||
|   unsigned int nSrcPitch = 0; | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, | ||||
|                                     &dpSrcFrame, &nSrcPitch, | ||||
|                                     &videoProcessingParameters)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|   NVDEC_API_CALL(cuvidMapVideoFrame64_ld(m_hDecoder, pDispInfo->picture_index, | ||||
|                                          &dpSrcFrame, &nSrcPitch, | ||||
|                                          &videoProcessingParameters)); | ||||
|  | ||||
|   CUVIDGETDECODESTATUS DecodeStatus; | ||||
|   memset(&DecodeStatus, 0, sizeof(DecodeStatus)); | ||||
|   CUresult result = | ||||
|       cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus); | ||||
|   CUresult result = cuvidGetDecodeStatus_ld( | ||||
|       m_hDecoder, pDispInfo->picture_index, &DecodeStatus); | ||||
|   if (result == CUDA_SUCCESS && | ||||
|       (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || | ||||
|        DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) { | ||||
| @@ -602,11 +603,12 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { | ||||
|       uint8_t *pFrame = NULL; | ||||
|       if (m_bUseDeviceFrame) { | ||||
|         if (m_bDeviceFramePitched) { | ||||
|           CUDA_DRVAPI_CALL(cuMemAllocPitch( | ||||
|           CUDA_DRVAPI_CALL(cuMemAllocPitch_ld( | ||||
|               (CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP, | ||||
|               m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16)); | ||||
|         } else { | ||||
|           CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize())); | ||||
|           CUDA_DRVAPI_CALL( | ||||
|               cuMemAlloc_ld((CUdeviceptr *)&pFrame, GetFrameSize())); | ||||
|         } | ||||
|       } else { | ||||
|         pFrame = new uint8_t[GetFrameSize()]; | ||||
| @@ -627,7 +629,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { | ||||
|   m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP; | ||||
|   m.WidthInBytes = GetWidth() * m_nBPP; | ||||
|   m.Height = m_nLumaHeight; | ||||
|   CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream)); | ||||
|   CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream)); | ||||
|  | ||||
|   // Copy chroma plane | ||||
|   // NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning | ||||
| @@ -637,7 +639,7 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { | ||||
|   m.dstDevice = | ||||
|       (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight); | ||||
|   m.Height = m_nChromaHeight; | ||||
|   CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream)); | ||||
|   CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream)); | ||||
|  | ||||
|   if (m_nNumChromaPlanes == 2) { | ||||
|     m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + | ||||
| @@ -645,17 +647,17 @@ int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { | ||||
|     m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + | ||||
|                                             m.dstPitch * m_nLumaHeight * 2); | ||||
|     m.Height = m_nChromaHeight; | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream)); | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2DAsync_ld(&m, m_cuvidStream)); | ||||
|   } | ||||
|   CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuStreamSynchronize_ld(m_cuvidStream)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|  | ||||
|   if ((int)m_vTimestamp.size() < m_nDecodedFrame) { | ||||
|     m_vTimestamp.resize(m_vpFrame.size()); | ||||
|   } | ||||
|   m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp; | ||||
|  | ||||
|   NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame)); | ||||
|   NVDEC_API_CALL(cuvidUnmapVideoFrame64_ld(m_hDecoder, dpSrcFrame)); | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
| @@ -673,7 +675,7 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, | ||||
|   if (pCropRect) m_cropRect = *pCropRect; | ||||
|   if (pResizeDim) m_resizeDim = *pResizeDim; | ||||
|  | ||||
|   NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext)); | ||||
|   NVDEC_API_CALL(cuvidCtxLockCreate_ld(&m_ctxLock, cuContext)); | ||||
|  | ||||
|   CUVIDPARSERPARAMS videoParserParameters = {}; | ||||
|   videoParserParameters.CodecType = eCodec; | ||||
| @@ -685,32 +687,32 @@ NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, | ||||
|   videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc; | ||||
|   videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc; | ||||
|   videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc; | ||||
|   NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters)); | ||||
|   NVDEC_API_CALL(cuvidCreateVideoParser_ld(&m_hParser, &videoParserParameters)); | ||||
| } | ||||
|  | ||||
| NvDecoder::~NvDecoder() { | ||||
|   START_TIMER | ||||
|  | ||||
|   if (m_hParser) { | ||||
|     cuvidDestroyVideoParser(m_hParser); | ||||
|     cuvidDestroyVideoParser_ld(m_hParser); | ||||
|   } | ||||
|   cuCtxPushCurrent(m_cuContext); | ||||
|   cuCtxPushCurrent_ld(m_cuContext); | ||||
|   if (m_hDecoder) { | ||||
|     cuvidDestroyDecoder(m_hDecoder); | ||||
|     cuvidDestroyDecoder_ld(m_hDecoder); | ||||
|   } | ||||
|  | ||||
|   std::lock_guard<std::mutex> lock(m_mtxVPFrame); | ||||
|  | ||||
|   for (uint8_t *pFrame : m_vpFrame) { | ||||
|     if (m_bUseDeviceFrame) { | ||||
|       cuMemFree((CUdeviceptr)pFrame); | ||||
|       cuMemFree_ld((CUdeviceptr)pFrame); | ||||
|     } else { | ||||
|       delete[] pFrame; | ||||
|     } | ||||
|   } | ||||
|   cuCtxPopCurrent(NULL); | ||||
|   cuCtxPopCurrent_ld(NULL); | ||||
|  | ||||
|   cuvidCtxLockDestroy(m_ctxLock); | ||||
|   cuvidCtxLockDestroy_ld(m_ctxLock); | ||||
|  | ||||
|   STOP_TIMER("Session Deinitialization Time: "); | ||||
| } | ||||
| @@ -727,8 +729,8 @@ int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags, | ||||
|   if (!pData || nSize == 0) { | ||||
|     packet.flags |= CUVID_PKT_ENDOFSTREAM; | ||||
|   } | ||||
|   // NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet)); | ||||
|   if (CUDA_SUCCESS != cuvidParseVideoData(m_hParser, &packet)) { | ||||
|   // NVDEC_API_CALL(cuvidParseVideoData_ld(m_hParser, &packet)); | ||||
|   if (CUDA_SUCCESS != cuvidParseVideoData_ld(m_hParser, &packet)) { | ||||
|     return 0; | ||||
|   } | ||||
|   m_cuvidStream = 0; | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| #include "nvidia_video_decoder.h" | ||||
|  | ||||
| #include "log.h" | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| #define SAVE_RECEIVED_H264_STREAM 0 | ||||
| #define SAVE_DECODED_NV12_STREAM 0 | ||||
| @@ -21,20 +22,20 @@ NvidiaVideoDecoder::~NvidiaVideoDecoder() { | ||||
| } | ||||
|  | ||||
| int NvidiaVideoDecoder::Init() { | ||||
|   ck(cuInit(0)); | ||||
|   ck(cuInit_ld(0)); | ||||
|   int nGpu = 0; | ||||
|   int iGpu = 0; | ||||
|  | ||||
|   ck(cuDeviceGetCount(&nGpu)); | ||||
|   ck(cuDeviceGetCount_ld(&nGpu)); | ||||
|   if (nGpu < 1) { | ||||
|     return -1; | ||||
|   } | ||||
|  | ||||
|   CUdevice cuDevice; | ||||
|   cuDeviceGet(&cuDevice, iGpu); | ||||
|   cuDeviceGet_ld(&cuDevice, iGpu); | ||||
|  | ||||
|   CUcontext cuContext = NULL; | ||||
|   cuCtxCreate(&cuContext, 0, cuDevice); | ||||
|   cuCtxCreate_ld(&cuContext, 0, cuDevice); | ||||
|   if (!cuContext) { | ||||
|     return -1; | ||||
|   } | ||||
|   | ||||
| @@ -10,6 +10,7 @@ | ||||
| #endif | ||||
|  | ||||
| #include "log.h" | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| VideoDecoderFactory::VideoDecoderFactory() {} | ||||
|  | ||||
| @@ -44,9 +45,8 @@ bool VideoDecoderFactory::CheckIsHardwareAccerlerationSupported() { | ||||
|   return false; | ||||
| #else | ||||
|   CUresult cuResult; | ||||
|  | ||||
|   CUvideoctxlock cudaCtxLock; | ||||
|   cuResult = cuvidCtxLockCreate(&cudaCtxLock, 0); | ||||
|   cuResult = cuvidCtxLockCreate_ld(&cudaCtxLock, 0); | ||||
|   if (cuResult != CUDA_SUCCESS) { | ||||
|     LOG_WARN( | ||||
|         "System not support hardware accelerated decode, use default software " | ||||
|   | ||||
| @@ -11,6 +11,8 @@ | ||||
|  | ||||
| #include "NvEncoder.h" | ||||
|  | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| #ifndef _WIN32 | ||||
| #include <cstring> | ||||
| static inline bool operator==(const GUID &guid1, const GUID &guid2) { | ||||
| @@ -60,7 +62,7 @@ void NvEncoder::LoadNvEncApi() { | ||||
|   uint32_t version = 0; | ||||
|   uint32_t currentVersion = | ||||
|       (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION; | ||||
|   NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version)); | ||||
|   NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion_ld(&version)); | ||||
|   if (currentVersion > version) { | ||||
|     NVENC_THROW_ERROR( | ||||
|         "Current Driver Version does not support this NvEncodeAPI version, " | ||||
| @@ -69,7 +71,7 @@ void NvEncoder::LoadNvEncApi() { | ||||
|   } | ||||
|  | ||||
|   m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER}; | ||||
|   NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc)); | ||||
|   NVENC_API_CALL(NvEncodeAPICreateInstance_ld(&m_nvenc)); | ||||
| } | ||||
|  | ||||
| NvEncoder::~NvEncoder() { DestroyHWEncoder(); } | ||||
|   | ||||
| @@ -11,6 +11,8 @@ | ||||
|  | ||||
| #include "NvEncoderCuda.h" | ||||
|  | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, | ||||
|                              uint32_t nHeight, | ||||
|                              NV_ENC_BUFFER_FORMAT eBufferFormat, | ||||
| @@ -44,7 +46,7 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) { | ||||
|   int numCount = m_bMotionEstimationOnly ? 2 : 1; | ||||
|  | ||||
|   for (int count = 0; count < numCount; count++) { | ||||
|     CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); | ||||
|     CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(m_cuContext)); | ||||
|     std::vector<void *> inputFrames; | ||||
|     for (int i = 0; i < numInputBuffers; i++) { | ||||
|       CUdeviceptr pDeviceFrame; | ||||
| @@ -54,13 +56,13 @@ void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) { | ||||
|       if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || | ||||
|           GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV) | ||||
|         chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); | ||||
|       CUDA_DRVAPI_CALL(cuMemAllocPitch( | ||||
|       CUDA_DRVAPI_CALL(cuMemAllocPitch_ld( | ||||
|           (CUdeviceptr *)&pDeviceFrame, &m_cudaPitch, | ||||
|           GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()), | ||||
|           GetMaxEncodeHeight() + chromaHeight, 16)); | ||||
|       inputFrames.push_back((void *)pDeviceFrame); | ||||
|     } | ||||
|     CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|     CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
|  | ||||
|     RegisterInputResources( | ||||
|         inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR, | ||||
| @@ -88,23 +90,24 @@ void NvEncoderCuda::ReleaseCudaResources() { | ||||
|  | ||||
|   UnregisterInputResources(); | ||||
|  | ||||
|   cuCtxPushCurrent(m_cuContext); | ||||
|   cuCtxPushCurrent_ld(m_cuContext); | ||||
|  | ||||
|   for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) { | ||||
|     if (m_vInputFrames[i].inputPtr) { | ||||
|       cuMemFree(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr)); | ||||
|       cuMemFree_ld(reinterpret_cast<CUdeviceptr>(m_vInputFrames[i].inputPtr)); | ||||
|     } | ||||
|   } | ||||
|   m_vInputFrames.clear(); | ||||
|  | ||||
|   for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) { | ||||
|     if (m_vReferenceFrames[i].inputPtr) { | ||||
|       cuMemFree(reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr)); | ||||
|       cuMemFree_ld( | ||||
|           reinterpret_cast<CUdeviceptr>(m_vReferenceFrames[i].inputPtr)); | ||||
|     } | ||||
|   } | ||||
|   m_vReferenceFrames.clear(); | ||||
|  | ||||
|   cuCtxPopCurrent(NULL); | ||||
|   cuCtxPopCurrent_ld(NULL); | ||||
|   m_cuContext = nullptr; | ||||
| } | ||||
|  | ||||
| @@ -120,7 +123,7 @@ void NvEncoderCuda::CopyToDeviceFrame( | ||||
|                       NV_ENC_ERR_INVALID_PARAM); | ||||
|   } | ||||
|  | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(device)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device)); | ||||
|  | ||||
|   uint32_t srcPitch = | ||||
|       nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); | ||||
| @@ -138,10 +141,10 @@ void NvEncoderCuda::CopyToDeviceFrame( | ||||
|   m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); | ||||
|   m.Height = height; | ||||
|   if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) { | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m)); | ||||
|   } else { | ||||
|     CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m) | ||||
|                                     : cuMemcpy2DAsync(&m, stream)); | ||||
|     CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m) | ||||
|                                     : cuMemcpy2DAsync_ld(&m, stream)); | ||||
|   } | ||||
|  | ||||
|   std::vector<uint32_t> srcChromaOffsets; | ||||
| @@ -167,14 +170,14 @@ void NvEncoderCuda::CopyToDeviceFrame( | ||||
|       m.WidthInBytes = chromaWidthInBytes; | ||||
|       m.Height = chromaHeight; | ||||
|       if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) { | ||||
|         CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); | ||||
|         CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m)); | ||||
|       } else { | ||||
|         CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D(&m) | ||||
|                                         : cuMemcpy2DAsync(&m, stream)); | ||||
|         CUDA_DRVAPI_CALL(stream == NULL ? cuMemcpy2D_ld(&m) | ||||
|                                         : cuMemcpy2DAsync_ld(&m, stream)); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
| } | ||||
|  | ||||
| void NvEncoderCuda::CopyToDeviceFrame( | ||||
| @@ -189,7 +192,7 @@ void NvEncoderCuda::CopyToDeviceFrame( | ||||
|                       NV_ENC_ERR_INVALID_PARAM); | ||||
|   } | ||||
|  | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent(device)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPushCurrent_ld(device)); | ||||
|  | ||||
|   uint32_t srcPitch = | ||||
|       nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); | ||||
| @@ -207,9 +210,9 @@ void NvEncoderCuda::CopyToDeviceFrame( | ||||
|   m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); | ||||
|   m.Height = height; | ||||
|   if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) { | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m)); | ||||
|   } else { | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2D(&m)); | ||||
|     CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m)); | ||||
|   } | ||||
|  | ||||
|   std::vector<uint32_t> srcChromaOffsets; | ||||
| @@ -234,11 +237,11 @@ void NvEncoderCuda::CopyToDeviceFrame( | ||||
|       m.WidthInBytes = chromaWidthInBytes; | ||||
|       m.Height = chromaHeight; | ||||
|       if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) { | ||||
|         CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); | ||||
|         CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned_ld(&m)); | ||||
|       } else { | ||||
|         CUDA_DRVAPI_CALL(cuMemcpy2D(&m)); | ||||
|         CUDA_DRVAPI_CALL(cuMemcpy2D_ld(&m)); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); | ||||
|   CUDA_DRVAPI_CALL(cuCtxPopCurrent_ld(NULL)); | ||||
| } | ||||
|   | ||||
| @@ -24,7 +24,7 @@ | ||||
|     CUresult err__ = call;                                            \ | ||||
|     if (err__ != CUDA_SUCCESS) {                                      \ | ||||
|       const char* szErrName = NULL;                                   \ | ||||
|       cuGetErrorName(err__, &szErrName);                              \ | ||||
|       cuGetErrorName_ld(err__, &szErrName);                           \ | ||||
|       std::ostringstream errorLog;                                    \ | ||||
|       errorLog << "CUDA driver API error " << szErrName;              \ | ||||
|       throw NVENCException::makeNVENCException(                       \ | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| #include <chrono> | ||||
|  | ||||
| #include "log.h" | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| #define SAVE_RECEIVED_NV12_STREAM 0 | ||||
| #define SAVE_ENCODED_H264_STREAM 0 | ||||
| @@ -32,11 +33,12 @@ int NvidiaVideoEncoder::Init() { | ||||
|   int num_of_GPUs = 0; | ||||
|   CUdevice cuda_device; | ||||
|   bool cuda_ctx_succeed = | ||||
|       (index_of_GPU >= 0 && cuInit(0) == CUresult::CUDA_SUCCESS && | ||||
|        cuDeviceGetCount(&num_of_GPUs) == CUresult::CUDA_SUCCESS && | ||||
|       (index_of_GPU >= 0 && cuInit_ld(0) == CUresult::CUDA_SUCCESS && | ||||
|        cuDeviceGetCount_ld(&num_of_GPUs) == CUresult::CUDA_SUCCESS && | ||||
|        (num_of_GPUs > 0 && index_of_GPU < num_of_GPUs) && | ||||
|        cuDeviceGet(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS && | ||||
|        cuCtxCreate(&cuda_context_, 0, cuda_device) == CUresult::CUDA_SUCCESS); | ||||
|        cuDeviceGet_ld(&cuda_device, index_of_GPU) == CUresult::CUDA_SUCCESS && | ||||
|        cuCtxCreate_ld(&cuda_context_, 0, cuda_device) == | ||||
|            CUresult::CUDA_SUCCESS); | ||||
|   if (!cuda_ctx_succeed) { | ||||
|   } | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ | ||||
| #endif | ||||
|  | ||||
| #include "log.h" | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| VideoEncoderFactory::VideoEncoderFactory() {} | ||||
|  | ||||
| @@ -46,7 +47,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() { | ||||
|   CUresult cuResult; | ||||
|   NV_ENCODE_API_FUNCTION_LIST functionList = {NV_ENCODE_API_FUNCTION_LIST_VER}; | ||||
|  | ||||
|   cuResult = cuInit(0); | ||||
|   cuResult = cuInit_ld(0); | ||||
|   if (cuResult != CUDA_SUCCESS) { | ||||
|     LOG_WARN( | ||||
|         "System not support hardware accelerated encode, use default software " | ||||
| @@ -54,7 +55,7 @@ bool VideoEncoderFactory::CheckIsHardwareAccerlerationSupported() { | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance(&functionList); | ||||
|   NVENCSTATUS nvEncStatus = NvEncodeAPICreateInstance_ld(&functionList); | ||||
|   if (nvEncStatus != NV_ENC_SUCCESS) { | ||||
|     LOG_WARN( | ||||
|         "System not support hardware accelerated encode, use default software " | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
| #include "common.h" | ||||
| #include "log.h" | ||||
| #include "nlohmann/json.hpp" | ||||
| #include "nvcodec_api.h" | ||||
|  | ||||
| using nlohmann::json; | ||||
|  | ||||
| @@ -227,6 +228,7 @@ int PeerConnection::CreateVideoCodec(bool hardware_acceleration) { | ||||
|         "MacOS not support hardware acceleration, use default software codec"); | ||||
|   } | ||||
| #else | ||||
|   InitNvCodecApi(); | ||||
| #endif | ||||
|  | ||||
|   if (av1_encoding_) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user