[fix] fix nvidia encoder crash during reconfigure the resolution

This commit is contained in:
dijunkun
2024-09-10 17:32:43 +08:00
parent 2f16d22ab7
commit a8333c622b
40 changed files with 4507 additions and 25450 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2020 NVIDIA Corporation
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -1106,7 +1106,6 @@ extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
#endif
#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
/****************************************************************************************************************************/
//! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
//! unsigned int * pPitch, CUVIDPROCPARAMS *pVPP);
@@ -1126,7 +1125,6 @@ extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned
#define cuvidMapVideoFrame cuvidMapVideoFrame64
#define cuvidUnmapVideoFrame cuvidUnmapVideoFrame64
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2020 NVIDIA Corporation
* Copyright (c) 2010-2024 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -28,7 +28,7 @@
/********************************************************************************************************************/
//! \file nvcuvid.h
//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
//! \date 2015-2020
//! \date 2015-2024
//! This file contains the interface constants, structure definitions and function prototypes.
/********************************************************************************************************************/
@@ -41,6 +41,7 @@
extern "C" {
#endif /* __cplusplus */
#define MAX_CLOCK_TS 3
/***********************************************/
//!
@@ -78,6 +79,106 @@ typedef enum {
cudaAudioCodec_AAC, /**< AAC Audio */
} cudaAudioCodec;
/************************************************************************/
//! \ingroup STRUCTS
//! \struct TIMECODESET
//! Used to store Time code set extracted from H264 and HEVC codecs
/************************************************************************/
typedef struct _TIMECODESET
{
unsigned int time_offset_value;
unsigned short n_frames;
unsigned char clock_timestamp_flag;
unsigned char units_field_based_flag;
unsigned char counting_type;
unsigned char full_timestamp_flag;
unsigned char discontinuity_flag;
unsigned char cnt_dropped_flag;
unsigned char seconds_value;
unsigned char minutes_value;
unsigned char hours_value;
unsigned char seconds_flag;
unsigned char minutes_flag;
unsigned char hours_flag;
unsigned char time_offset_length;
unsigned char reserved;
} TIMECODESET;
/************************************************************************/
//! \ingroup STRUCTS
//! \struct TIMECODE
//! Used to extract Time code in H264 and HEVC codecs
/************************************************************************/
typedef struct _TIMECODE
{
TIMECODESET time_code_set[MAX_CLOCK_TS];
unsigned char num_clock_ts;
} TIMECODE;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct SEIMASTERINGDISPLAYINFO
//! Used to extract mastering display color volume SEI in H264 and HEVC codecs
/**********************************************************************************/
typedef struct _SEIMASTERINGDISPLAYINFO
{
unsigned short display_primaries_x[3];
unsigned short display_primaries_y[3];
unsigned short white_point_x;
unsigned short white_point_y;
unsigned int max_display_mastering_luminance;
unsigned int min_display_mastering_luminance;
} SEIMASTERINGDISPLAYINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct SEICONTENTLIGHTLEVELINFO
//! Used to extract content light level info SEI in H264 and HEVC codecs
/**********************************************************************************/
typedef struct _SEICONTENTLIGHTLEVELINFO
{
unsigned short max_content_light_level;
unsigned short max_pic_average_light_level;
unsigned int reserved;
} SEICONTENTLIGHTLEVELINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct TIMECODEMPEG2
//! Used to extract Time code in MPEG2 codec
/**********************************************************************************/
typedef struct _TIMECODEMPEG2
{
unsigned char drop_frame_flag;
unsigned char time_code_hours;
unsigned char time_code_minutes;
unsigned char marker_bit;
unsigned char time_code_seconds;
unsigned char time_code_pictures;
} TIMECODEMPEG2;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct SEIALTERNATIVETRANSFERCHARACTERISTICS
//! Used to extract alternative transfer characteristics SEI in H264 and HEVC codecs
/**********************************************************************************/
typedef struct _SEIALTERNATIVETRANSFERCHARACTERISTICS
{
unsigned char preferred_transfer_characteristics;
} SEIALTERNATIVETRANSFERCHARACTERISTICS;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUSEIMESSAGE;
//! Used in CUVIDSEIMESSAGEINFO structure
/**********************************************************************************/
typedef struct _CUSEIMESSAGE
{
unsigned char sei_message_type; /**< OUT: SEI Message Type */
unsigned char reserved[3];
unsigned int sei_message_size; /**< OUT: SEI Message Size */
} CUSEIMESSAGE;
/************************************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDEOFORMAT
@@ -168,6 +269,19 @@ typedef struct
};
} CUVIDOPERATINGPOINTINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSEIMESSAGEINFO
//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg
/**********************************************************************************/
typedef struct _CUVIDSEIMESSAGEINFO
{
void *pSEIData; /**< OUT: SEI Message Data */
CUSEIMESSAGE *pSEIMessage; /**< OUT: SEI Message Info */
unsigned int sei_message_count; /**< OUT: SEI Message Count */
unsigned int picIdx; /**< OUT: SEI Message Pic Index */
} CUVIDSEIMESSAGEINFO;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDAV1SEQHDR
@@ -366,11 +480,13 @@ typedef struct _CUVIDPARSERDISPINFO
//! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded
//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded
//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
//! PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded
/***********************************************************************************************************************/
typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
typedef int (CUDAAPI *PFNVIDSEIMSGCALLBACK) (void *, CUVIDSEIMESSAGEINFO *);
/**************************************/
//! \ingroup STRUCTS
@@ -395,7 +511,8 @@ typedef struct _CUVIDPARSERPARAMS
PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */
PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1
scalable bitstream */
void *pvReserved2[6]; /**< Reserved for future use - set to NULL */
PFNVIDSEIMSGCALLBACK pfnGetSEIMsg; /**< IN: Called when all SEI messages are parsed for particular frame */
void *pvReserved2[5]; /**< Reserved for future use - set to NULL */
CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */
} CUVIDPARSERPARAMS;

View File

@@ -1 +0,0 @@
libcuda.so.1

Binary file not shown.

View File

@@ -1 +0,0 @@
libnvcuvid.so.1

Binary file not shown.

View File

@@ -1 +0,0 @@
libnvidia-encode.so.1

Binary file not shown.

View File

@@ -1,111 +0,0 @@
# Copyright 2020 NVIDIA Corporation. All rights reserved.
#
# Please refer to the NVIDIA end user license agreement (EULA) associated
# with this source code for terms and conditions that govern your use of
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
# 3.7 is required for FindVulkan module support in CMake.
cmake_minimum_required(VERSION 3.7)
project(NvCodec)
# Set C++11 for all projects and disable non-standard extensions
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_INSTALL_PREFIX .)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Utils)
set(NV_CODEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec)
set(NV_ENC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvEncoder)
set(NV_DEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvDecoder)
set(NV_APPENC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppEncode/Common)
set(NV_APPDEC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppDecode/Common)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
else()
set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
endif()
if(WIN32)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvcuvid.lib)
set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvencodeapi.lib)
else()
Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvcuvid.lib)
set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvencodeapi.lib)
endif()
else ()
find_library(CUVID_LIB nvcuvid)
find_library(NVENCODEAPI_LIB nvidia-encode)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()
if(WIN32)
add_subdirectory(AppEncode/AppEncD3D11)
add_subdirectory(AppEncode/AppEncD3D9)
add_subdirectory(AppDecode/AppDecD3D)
else ()
#Need only linux Makefile for this
add_subdirectory(AppEncode/AppEncGL)
endif()
add_subdirectory(AppEncode/AppEncCuda)
add_subdirectory(AppEncode/AppEncDec)
add_subdirectory(AppEncode/AppEncLowLatency)
add_subdirectory(AppEncode/AppEncME)
add_subdirectory(AppEncode/AppEncPerf)
add_subdirectory(AppEncode/AppEncQual)
add_subdirectory(AppEncode/AppMotionEstimationVkCuda)
add_subdirectory(AppTranscode/AppTrans)
add_subdirectory(AppTranscode/AppTransOneToN)
add_subdirectory(AppTranscode/AppTransPerf)
add_subdirectory(AppDecode/AppDec)
add_subdirectory(AppDecode/AppDecGL)
add_subdirectory(AppDecode/AppDecImageProvider)
add_subdirectory(AppDecode/AppDecLowLatency)
add_subdirectory(AppDecode/AppDecMem)
add_subdirectory(AppDecode/AppDecMultiFiles)
add_subdirectory(AppDecode/AppDecMultiInput)
add_subdirectory(AppDecode/AppDecPerf)

View File

@@ -1,54 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdio.h>
static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
int destStrideInPixels = nDestPitch / (sizeof(uint16_t));
*(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] };
}
static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t));
dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y;
}
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight);
}
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
ConvertUInt16ToUInt8Kernel <<<gridSize, blockSize >>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight);
}

View File

@@ -1,399 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include "ColorSpace.h"
__constant__ float matYuv2Rgb[3][3];
__constant__ float matRgb2Yuv[3][3];
void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) {
black = 16; white = 235;
max = 255;
switch (iMatrix)
{
case ColorSpaceStandard_BT709:
default:
wr = 0.2126f; wb = 0.0722f;
break;
case ColorSpaceStandard_FCC:
wr = 0.30f; wb = 0.11f;
break;
case ColorSpaceStandard_BT470:
case ColorSpaceStandard_BT601:
wr = 0.2990f; wb = 0.1140f;
break;
case ColorSpaceStandard_SMPTE240M:
wr = 0.212f; wb = 0.087f;
break;
case ColorSpaceStandard_BT2020:
case ColorSpaceStandard_BT2020C:
wr = 0.2627f; wb = 0.0593f;
// 10-bit only
black = 64 << 6; white = 940 << 6;
max = (1 << 16) - 1;
break;
}
}
void SetMatYuv2Rgb(int iMatrix) {
float wr, wb;
int black, white, max;
GetConstants(iMatrix, wr, wb, black, white, max);
float mat[3][3] = {
1.0f, 0.0f, (1.0f - wr) / 0.5f,
1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
1.0f, (1.0f - wb) / 0.5f, 0.0f,
};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
}
}
cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat));
}
void SetMatRgb2Yuv(int iMatrix) {
float wr, wb;
int black, white, max;
GetConstants(iMatrix, wr, wb, black, white, max);
float mat[3][3] = {
wr, 1.0f - wb - wr, wb,
-0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
}
}
cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat));
}
template<class T>
__device__ static T Clamp(T x, T lower, T upper) {
return x < lower ? lower : (x > upper ? upper : x);
}
template<class Rgb, class YuvUnit>
__device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
const int
low = 1 << (sizeof(YuvUnit) * 8 - 4),
mid = 1 << (sizeof(YuvUnit) * 8 - 1);
float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
YuvUnit
r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf),
g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf),
b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf);
Rgb rgb{};
const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
rgb.c.r = r >> nShift;
rgb.c.g = g >> nShift;
rgb.c.b = b >> nShift;
} else {
rgb.c.r = r << nShift;
rgb.c.g = g << nShift;
rgb.c.b = b << nShift;
}
return rgb;
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
*(RgbIntx2 *)pDst = RgbIntx2 {
YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
};
*(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 {
YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d,
YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
};
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y);
if (x + 1 >= nWidth || y >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
*(RgbIntx2 *)pDst = RgbIntx2{
YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
};
}
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
__global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
}
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
__global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y);
if (x + 1 >= nWidth || y >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
}
template <class COLOR32>
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<uchar2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<uchar2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<uchar2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<uchar2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<ushort2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<ushort2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<ushort2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<ushort2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
<< <dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >> >
(dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
// Explicit Instantiation
template void Nv12ToColor32<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor32<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor64<BGRA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor64<RGBA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor32<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor32<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor64<BGRA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor64<RGBA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColorPlanar<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColorPlanar<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColorPlanar<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColorPlanar<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low;
}
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid;
}
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid;
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
RgbIntx2 int2a = *(RgbIntx2 *)pSrc;
RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch);
Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
decltype(Rgb::c.r)
r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
*(YuvUnitx2 *)pDst = YuvUnitx2 {
RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
};
*(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 {
RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
};
*(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 {
RgbToU<decltype(YuvUnitx2::x)>(r, g, b),
RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
};
}
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) {
SetMatRgb2Yuv(iMatrix);
RgbToYuvKernel<ushort2, BGRA64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight);
}

View File

@@ -1,48 +0,0 @@
#pragma once
#include <stdint.h>
#include <cuda_runtime.h>
typedef enum ColorSpaceStandard {
ColorSpaceStandard_BT709 = 1,
ColorSpaceStandard_Unspecified = 2,
ColorSpaceStandard_Reserved = 3,
ColorSpaceStandard_FCC = 4,
ColorSpaceStandard_BT470 = 5,
ColorSpaceStandard_BT601 = 6,
ColorSpaceStandard_SMPTE240M = 7,
ColorSpaceStandard_YCgCo = 8,
ColorSpaceStandard_BT2020 = 9,
ColorSpaceStandard_BT2020C = 10
} ColorSpaceStandard;
union BGRA32 {
uint32_t d;
uchar4 v;
struct {
uint8_t b, g, r, a;
} c;
};
union RGBA32 {
uint32_t d;
uchar4 v;
struct {
uint8_t r, g, b, a;
} c;
};
union BGRA64 {
uint64_t d;
ushort4 v;
struct {
uint16_t b, g, r, a;
} c;
};
union RGBA64 {
uint64_t d;
ushort4 v;
struct {
uint16_t r, g, b, a;
} c;
};

View File

@@ -1,357 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
extern "C" {
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavcodec/avcodec.h>
}
#include "NvCodecUtils.h"
//---------------------------------------------------------------------------
//! \file FFmpegDemuxer.h
//! \brief Provides functionality for stream demuxing
//!
//! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it.
//---------------------------------------------------------------------------
/**
* @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
*/
class FFmpegDemuxer {
private:
AVFormatContext *fmtc = NULL;
AVIOContext *avioc = NULL;
AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
AVBSFContext *bsfc = NULL;
int iVideoStream;
bool bMp4H264, bMp4HEVC, bMp4MPEG4;
AVCodecID eVideoCodec;
AVPixelFormat eChromaFormat;
int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
double timeBase = 0.0;
int64_t userTimeScale = 0;
uint8_t *pDataWithHeader = NULL;
unsigned int frameCount = 0;
public:
class DataProvider {
public:
virtual ~DataProvider() {}
virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
};
private:
/**
* @brief Private constructor to initialize libavformat resources.
* @param fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
*/
FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
if (!fmtc) {
LOG(ERROR) << "No AVFormatContext provided.";
return;
}
LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
ck(avformat_find_stream_info(fmtc, NULL));
iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if (iVideoStream < 0) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file";
return;
}
//fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
nWidth = fmtc->streams[iVideoStream]->codecpar->width;
nHeight = fmtc->streams[iVideoStream]->codecpar->height;
eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
timeBase = av_q2d(rTimeBase);
userTimeScale = timeScale;
// Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
switch (eChromaFormat)
{
case AV_PIX_FMT_YUV420P10LE:
case AV_PIX_FMT_GRAY10LE: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 10;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV420P12LE:
nBitDepth = 12;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P10LE:
nBitDepth = 10;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P12LE:
nBitDepth = 12;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P:
nBitDepth = 8;
nChromaHeight = nHeight << 1;
nBPP = 1;
break;
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
case AV_PIX_FMT_YUVJ422P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_YUVJ444P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_GRAY8: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
break;
default:
LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
eChromaFormat = AV_PIX_FMT_YUV420P;
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
}
bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
//Initialize packet fields with default values
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
av_init_packet(&pktFiltered);
pktFiltered.data = NULL;
pktFiltered.size = 0;
// Initialize bitstream filter and its required resources
if (bMp4H264) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
if (bMp4HEVC) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
}
AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
AVFormatContext *ctx = NULL;
if (!(ctx = avformat_alloc_context())) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
uint8_t *avioc_buffer = NULL;
int avioc_buffer_size = 8 * 1024 * 1024;
avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
if (!avioc_buffer) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
0, pDataProvider, &ReadPacket, NULL, NULL);
if (!avioc) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
ctx->pb = avioc;
ck(avformat_open_input(&ctx, NULL, NULL, NULL));
return ctx;
}
/**
* @brief Allocate and return AVFormatContext*.
* @param szFilePath - Filepath pointing to input stream.
* @return Pointer to AVFormatContext
*/
AVFormatContext *CreateFormatContext(const char *szFilePath) {
avformat_network_init();
AVFormatContext *ctx = NULL;
ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
return ctx;
}
public:
FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;}
~FFmpegDemuxer() {
if (!fmtc) {
return;
}
if (pkt.data) {
av_packet_unref(&pkt);
}
if (pktFiltered.data) {
av_packet_unref(&pktFiltered);
}
if (bsfc) {
av_bsf_free(&bsfc);
}
avformat_close_input(&fmtc);
if (avioc) {
av_freep(&avioc->buffer);
av_freep(&avioc);
}
if (pDataWithHeader) {
av_free(pDataWithHeader);
}
}
AVCodecID GetVideoCodec() {
return eVideoCodec;
}
AVPixelFormat GetChromaFormat() {
return eChromaFormat;
}
int GetWidth() {
return nWidth;
}
int GetHeight() {
return nHeight;
}
int GetBitDepth() {
return nBitDepth;
}
int GetFrameSize() {
return nWidth * (nHeight + nChromaHeight) * nBPP;
}
bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
if (!fmtc) {
return false;
}
*pnVideoBytes = 0;
if (pkt.data) {
av_packet_unref(&pkt);
}
int e = 0;
while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) {
av_packet_unref(&pkt);
}
if (e < 0) {
return false;
}
if (bMp4H264 || bMp4HEVC) {
if (pktFiltered.data) {
av_packet_unref(&pktFiltered);
}
ck(av_bsf_send_packet(bsfc, &pkt));
ck(av_bsf_receive_packet(bsfc, &pktFiltered));
*ppVideo = pktFiltered.data;
*pnVideoBytes = pktFiltered.size;
if (pts)
*pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase);
} else {
if (bMp4MPEG4 && (frameCount == 0)) {
int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
if (extraDataSize > 0) {
// extradata contains start codes 00 00 01. Subtract its size
pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t));
if (!pDataWithHeader) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return false;
}
memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t));
*ppVideo = pDataWithHeader;
*pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t);
}
} else {
*ppVideo = pkt.data;
*pnVideoBytes = pkt.size;
}
if (pts)
*pts = (int64_t)(pkt.pts * userTimeScale * timeBase);
}
frameCount++;
return true;
}
static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
}
};
inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
switch (id) {
case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1;
case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2;
case AV_CODEC_ID_MPEG4 : return cudaVideoCodec_MPEG4;
case AV_CODEC_ID_WMV3 :
case AV_CODEC_ID_VC1 : return cudaVideoCodec_VC1;
case AV_CODEC_ID_H264 : return cudaVideoCodec_H264;
case AV_CODEC_ID_HEVC : return cudaVideoCodec_HEVC;
case AV_CODEC_ID_VP8 : return cudaVideoCodec_VP8;
case AV_CODEC_ID_VP9 : return cudaVideoCodec_VP9;
case AV_CODEC_ID_MJPEG : return cudaVideoCodec_JPEG;
case AV_CODEC_ID_AV1 : return cudaVideoCodec_AV1;
default : return cudaVideoCodec_NumCodecs;
}
}

View File

@@ -1,109 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
#include <thread>
#include <mutex>
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
};
#include "Logger.h"
extern simplelogger::Logger *logger;
class FFmpegStreamer {
private:
AVFormatContext *oc = NULL;
AVStream *vs = NULL;
int nFps = 0;
public:
FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
avformat_network_init();
oc = avformat_alloc_context();
if (!oc) {
LOG(ERROR) << "FFMPEG: avformat_alloc_context error";
return;
}
// Set format on oc
AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL);
if (!fmt) {
LOG(ERROR) << "Invalid format";
return;
}
fmt->video_codec = eCodecId;
oc->oformat = fmt;
oc->url = av_strdup(szInFilePath);
LOG(INFO) << "Streaming destination: " << oc->url;
// Add video stream to oc
vs = avformat_new_stream(oc, NULL);
if (!vs) {
LOG(ERROR) << "FFMPEG: Could not alloc video stream";
return;
}
vs->id = 0;
// Set video parameters
AVCodecParameters *vpar = vs->codecpar;
vpar->codec_id = fmt->video_codec;
vpar->codec_type = AVMEDIA_TYPE_VIDEO;
vpar->width = nWidth;
vpar->height = nHeight;
// Everything is ready. Now open the output stream.
if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
return ;
}
// Write the container header
if (avformat_write_header(oc, NULL)) {
LOG(ERROR) << "FFMPEG: avformat_write_header error!";
return;
}
}
~FFmpegStreamer() {
if (oc) {
av_write_trailer(oc);
avio_close(oc->pb);
avformat_free_context(oc);
}
}
bool Stream(uint8_t *pData, int nBytes, int nPts) {
AVPacket pkt = {0};
av_init_packet(&pkt);
pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base);
// No B-frames
pkt.dts = pkt.pts;
pkt.stream_index = vs->index;
pkt.data = pData;
pkt.size = nBytes;
if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
pkt.flags |= AV_PKT_FLAG_KEY;
}
// Write the compressed frame into the output
int ret = av_write_frame(oc, &pkt);
av_write_frame(oc, NULL);
if (ret < 0) {
LOG(ERROR) << "FFMPEG: Error while writing video frame";
}
return true;
}
};

View File

@@ -1,490 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
//---------------------------------------------------------------------------
//! \file NvCodecUtils.h
//! \brief Miscellaneous classes and error checking functions.
//!
//! Used by Transcode/Encode samples apps for reading input files,
//! mutithreading, performance measurement or colorspace conversion while
//! decoding.
//---------------------------------------------------------------------------
#pragma once
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <chrono>
#include <condition_variable>
#include <fstream>
#include <iomanip>
#include <ios>
#include <list>
#include <sstream>
#include <thread>
#ifdef __cuda_cuda_h__
inline bool check(CUresult e, int iLine, const char *szFile) {
if (e != CUDA_SUCCESS) {
const char *szErrName = NULL;
cuGetErrorName(e, &szErrName);
return false;
}
return true;
}
#endif
#ifdef __CUDA_RUNTIME_H__
inline bool check(cudaError_t e, int iLine, const char *szFile) {
if (e != cudaSuccess) {
return false;
}
return true;
}
#endif
#ifdef _NV_ENCODEAPI_H_
inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
const char *aszErrName[] = {
"NV_ENC_SUCCESS",
"NV_ENC_ERR_NO_ENCODE_DEVICE",
"NV_ENC_ERR_UNSUPPORTED_DEVICE",
"NV_ENC_ERR_INVALID_ENCODERDEVICE",
"NV_ENC_ERR_INVALID_DEVICE",
"NV_ENC_ERR_DEVICE_NOT_EXIST",
"NV_ENC_ERR_INVALID_PTR",
"NV_ENC_ERR_INVALID_EVENT",
"NV_ENC_ERR_INVALID_PARAM",
"NV_ENC_ERR_INVALID_CALL",
"NV_ENC_ERR_OUT_OF_MEMORY",
"NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
"NV_ENC_ERR_UNSUPPORTED_PARAM",
"NV_ENC_ERR_LOCK_BUSY",
"NV_ENC_ERR_NOT_ENOUGH_BUFFER",
"NV_ENC_ERR_INVALID_VERSION",
"NV_ENC_ERR_MAP_FAILED",
"NV_ENC_ERR_NEED_MORE_INPUT",
"NV_ENC_ERR_ENCODER_BUSY",
"NV_ENC_ERR_EVENT_NOT_REGISTERD",
"NV_ENC_ERR_GENERIC",
"NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
"NV_ENC_ERR_UNIMPLEMENTED",
"NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
"NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
"NV_ENC_ERR_RESOURCE_NOT_MAPPED",
};
if (e != NV_ENC_SUCCESS) {
return false;
}
return true;
}
#endif
#ifdef _WINERROR_
inline bool check(HRESULT e, int iLine, const char *szFile) {
if (e != S_OK) {
std::stringstream stream;
stream << std::hex << std::uppercase << e;
return false;
}
return true;
}
#endif
#if defined(__gl_h_) || defined(__GL_H__)
inline bool check(GLenum e, int iLine, const char *szFile) {
if (e != 0) {
return false;
}
return true;
}
#endif
inline bool check(int e, int iLine, const char *szFile) {
if (e < 0) {
return false;
}
return true;
}
#define ck(call) check(call, __LINE__, __FILE__)
/**
* @brief Wrapper class around std::thread
*/
class NvThread {
public:
NvThread() = default;
NvThread(const NvThread &) = delete;
NvThread &operator=(const NvThread &other) = delete;
NvThread(std::thread &&thread) : t(std::move(thread)) {}
NvThread(NvThread &&thread) : t(std::move(thread.t)) {}
NvThread &operator=(NvThread &&other) {
t = std::move(other.t);
return *this;
}
~NvThread() { join(); }
void join() {
if (t.joinable()) {
t.join();
}
}
private:
std::thread t;
};
#ifndef _WIN32
#define _stricmp strcasecmp
#define _stat64 stat64
#endif
/**
* @brief Utility class to allocate buffer memory. Helps avoid I/O during the
* encode/decode loop in case of performance tests.
*/
class BufferedFileReader {
public:
/**
* @brief Constructor function to allocate appropriate memory and copy file
* contents into it
*/
BufferedFileReader(const char *szFileName, bool bPartial = false) {
struct _stat64 st;
if (_stat64(szFileName, &st) != 0) {
return;
}
nSize = st.st_size;
while (nSize) {
try {
pBuf = new uint8_t[(size_t)nSize];
if (nSize != st.st_size) {
}
break;
} catch (std::bad_alloc) {
if (!bPartial) {
return;
}
nSize = (uint32_t)(nSize * 0.9);
}
}
std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
if (!fpIn) {
return;
}
std::streamsize nRead =
fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
fpIn.close();
assert(nRead == nSize);
}
~BufferedFileReader() {
if (pBuf) {
delete[] pBuf;
}
}
bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
if (!pBuf) {
return false;
}
*ppBuf = pBuf;
*pnSize = nSize;
return true;
}
private:
uint8_t *pBuf = NULL;
uint64_t nSize = 0;
};
/**
* @brief Template class to facilitate color space conversion
*/
template <typename T>
class YuvConverter {
public:
YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
}
~YuvConverter() { delete[] pQuad; }
void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
if (nPitch == 0) {
nPitch = nWidth;
}
// sizes of source surface plane
int nSizePlaneY = nPitch * nHeight;
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
int nSizePlaneV = nSizePlaneU;
T *puv = pFrame + nSizePlaneY;
if (nPitch == nWidth) {
memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
} else {
for (int i = 0; i < (nHeight + 1) / 2; i++) {
memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i,
((nWidth + 1) / 2) * sizeof(T));
}
}
T *pv = puv + nSizePlaneU;
for (int y = 0; y < (nHeight + 1) / 2; y++) {
for (int x = 0; x < (nWidth + 1) / 2; x++) {
puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
}
}
}
void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
if (nPitch == 0) {
nPitch = nWidth;
}
// sizes of source surface plane
int nSizePlaneY = nPitch * nHeight;
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
int nSizePlaneV = nSizePlaneU;
T *puv = pFrame + nSizePlaneY, *pu = puv, *pv = puv + nSizePlaneU;
// split chroma from interleave to planar
for (int y = 0; y < (nHeight + 1) / 2; y++) {
for (int x = 0; x < (nWidth + 1) / 2; x++) {
pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
}
}
if (nPitch == nWidth) {
memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
} else {
for (int i = 0; i < (nHeight + 1) / 2; i++) {
memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i,
((nWidth + 1) / 2) * sizeof(T));
}
}
}
private:
T *pQuad;
int nWidth, nHeight;
};
/**
* @brief Utility class to measure elapsed time in seconds between the block of
* executed code
*/
class StopWatch {
public:
void Start() { t0 = std::chrono::high_resolution_clock::now(); }
double Stop() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now().time_since_epoch() -
t0.time_since_epoch())
.count() /
1.0e9;
}
private:
std::chrono::high_resolution_clock::time_point t0;
};
template <typename T>
class ConcurrentQueue {
public:
ConcurrentQueue() {}
ConcurrentQueue(size_t size) : maxSize(size) {}
ConcurrentQueue(const ConcurrentQueue &) = delete;
ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
void setSize(size_t s) { maxSize = s; }
void push_back(const T &value) {
// Do not use a std::lock_guard here. We will need to explicitly
// unlock before notify_one as the other waiting thread will
// automatically try to acquire mutex once it wakes up
// (which will happen on notify_one)
std::unique_lock<std::mutex> lock(m_mutex);
auto wasEmpty = m_List.empty();
while (full()) {
m_cond.wait(lock);
}
m_List.push_back(value);
if (wasEmpty && !m_List.empty()) {
lock.unlock();
m_cond.notify_one();
}
}
T pop_front() {
std::unique_lock<std::mutex> lock(m_mutex);
while (m_List.empty()) {
m_cond.wait(lock);
}
auto wasFull = full();
T data = std::move(m_List.front());
m_List.pop_front();
if (wasFull && !full()) {
lock.unlock();
m_cond.notify_one();
}
return data;
}
T front() {
std::unique_lock<std::mutex> lock(m_mutex);
while (m_List.empty()) {
m_cond.wait(lock);
}
return m_List.front();
}
size_t size() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_List.size();
}
bool empty() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_List.empty();
}
void clear() {
std::unique_lock<std::mutex> lock(m_mutex);
m_List.clear();
}
private:
bool full() {
if (m_List.size() == maxSize) return true;
return false;
}
private:
std::list<T> m_List;
std::mutex m_mutex;
std::condition_variable m_cond;
size_t maxSize;
};
inline void CheckInputFile(const char *szInFilePath) {
std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
if (fpIn.fail()) {
std::ostringstream err;
err << "Unable to open input file: " << szInFilePath << std::endl;
throw std::invalid_argument(err.str());
}
}
inline void ValidateResolution(int nWidth, int nHeight) {
if (nWidth <= 0 || nHeight <= 0) {
std::ostringstream err;
err << "Please specify positive non zero resolution as -s WxH. Current "
"resolution is "
<< nWidth << "x" << nHeight << std::endl;
throw std::invalid_argument(err.str());
}
}
template <class COLOR32>
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR64>
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR32>
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
template <class COLOR64>
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
template <class COLOR32>
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR64>
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR32>
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR64>
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 4);
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016,
int nP016Pitch, int nWidth, int nHeight, int iMatrix = 4);
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch,
int nDestPitch, int nWidth, int nHeight);
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch,
int nDestPitch, int nWidth, int nHeight);
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth,
int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch,
int nSrcWidth, int nSrcHeight,
unsigned char *dpDstNv12UV = nullptr);
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth,
int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch,
int nSrcWidth, int nSrcHeight,
unsigned char *dpDstP016UV = nullptr);
void ScaleYUV420(unsigned char *dpDstY, unsigned char *dpDstU,
unsigned char *dpDstV, int nDstPitch, int nDstChromaPitch,
int nDstWidth, int nDstHeight, unsigned char *dpSrcY,
unsigned char *dpSrcU, unsigned char *dpSrcV, int nSrcPitch,
int nSrcChromaPitch, int nSrcWidth, int nSrcHeight,
bool bSemiplanar);
#ifdef __cuda_cuda_h__
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue,
CUstream_st *outputCUStream);
#endif

View File

@@ -1,644 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
#include <vector>
#include <string>
#include <algorithm>
#include <stdexcept>
#include <sstream>
#include <iterator>
#include <cstring>
#include <functional>
#include "../Utils/Logger.h"
extern simplelogger::Logger *logger;
#ifndef _WIN32
inline bool operator==(const GUID &guid1, const GUID &guid2) {
return !memcmp(&guid1, &guid2, sizeof(GUID));
}
inline bool operator!=(const GUID &guid1, const GUID &guid2) {
return !(guid1 == guid2);
}
#endif
/*
* Helper class for parsing generic encoder options and preparing encoder
* initialization parameters. This class also provides some utility methods
* which generate verbose descriptions of the provided set of encoder
* initialization parameters.
*/
class NvEncoderInitParam {
public:
NvEncoderInitParam(const char *szParam = "",
std::function<void(NV_ENC_INITIALIZE_PARAMS *pParams)> *pfuncInit = NULL, bool _bLowLatency = false)
: strParam(szParam), bLowLatency(_bLowLatency)
{
if (pfuncInit) {
funcInit = *pfuncInit;
}
std::transform(strParam.begin(), strParam.end(), strParam.begin(), tolower);
std::istringstream ss(strParam);
tokens = std::vector<std::string> {
std::istream_iterator<std::string>(ss),
std::istream_iterator<std::string>()
};
for (unsigned i = 0; i < tokens.size(); i++)
{
if (tokens[i] == "-codec" && ++i != tokens.size())
{
ParseString("-codec", tokens[i], vCodec, szCodecNames, &guidCodec);
continue;
}
if (tokens[i] == "-preset" && ++i != tokens.size()) {
ParseString("-preset", tokens[i], vPreset, szPresetNames, &guidPreset);
continue;
}
if (tokens[i] == "-tuninginfo" && ++i != tokens.size())
{
ParseString("-tuninginfo", tokens[i], vTuningInfo, szTuningInfoNames, &m_TuningInfo);
continue;
}
}
}
virtual ~NvEncoderInitParam() {}
virtual bool IsCodecH264() {
return GetEncodeGUID() == NV_ENC_CODEC_H264_GUID;
}
virtual bool IsCodecHEVC() {
return GetEncodeGUID() == NV_ENC_CODEC_HEVC_GUID;
}
std::string GetHelpMessage(bool bMeOnly = false, bool bUnbuffered = false, bool bHide444 = false, bool bOutputInVidMem = false)
{
std::ostringstream oss;
if (bOutputInVidMem && bMeOnly)
{
oss << "-codec Codec: " << "h264" << std::endl;
}
else
{
oss << "-codec Codec: " << szCodecNames << std::endl;
}
oss << "-preset Preset: " << szPresetNames << std::endl
<< "-profile H264: " << szH264ProfileNames;
if (bOutputInVidMem && bMeOnly)
{
oss << std::endl;
}
else
{
oss << "; HEVC: " << szHevcProfileNames << std::endl;
}
if (!bMeOnly)
{
if (bLowLatency == false)
oss << "-tuninginfo TuningInfo: " << szTuningInfoNames << std::endl;
else
oss << "-tuninginfo TuningInfo: " << szLowLatencyTuningInfoNames << std::endl;
oss << "-multipass Multipass: " << szMultipass << std::endl;
}
if (!bHide444 && !bLowLatency)
{
oss << "-444 (Only for RGB input) YUV444 encode" << std::endl;
}
if (bMeOnly) return oss.str();
oss << "-fps Frame rate" << std::endl;
if (!bUnbuffered && !bLowLatency)
{
oss << "-bf Number of consecutive B-frames" << std::endl;
}
if (!bLowLatency)
{
oss << "-rc Rate control mode: " << szRcModeNames << std::endl
<< "-gop Length of GOP (Group of Pictures)" << std::endl
<< "-bitrate Average bit rate, can be in unit of 1, K, M" << std::endl
<< "-maxbitrate Max bit rate, can be in unit of 1, K, M" << std::endl
<< "-vbvbufsize VBV buffer size in bits, can be in unit of 1, K, M" << std::endl
<< "-vbvinit VBV initial delay in bits, can be in unit of 1, K, M" << std::endl
<< "-aq Enable spatial AQ and set its stength (range 1-15, 0-auto)" << std::endl
<< "-temporalaq (No value) Enable temporal AQ" << std::endl
<< "-cq Target constant quality level for VBR mode (range 1-51, 0-auto)" << std::endl;
}
if (!bUnbuffered && !bLowLatency)
{
oss << "-lookahead Maximum depth of lookahead (range 0-(31 - number of B frames))" << std::endl;
}
oss << "-qmin Min QP value" << std::endl
<< "-qmax Max QP value" << std::endl
<< "-initqp Initial QP value" << std::endl;
if (!bLowLatency)
{
oss << "-constqp QP value for constqp rate control mode" << std::endl
<< "Note: QP value can be in the form of qp_of_P_B_I or qp_P,qp_B,qp_I (no space)" << std::endl;
}
if (bUnbuffered && !bLowLatency)
{
oss << "Note: Options -bf and -lookahead are unavailable for this app" << std::endl;
}
return oss.str();
}
/**
* @brief Generate and return a string describing the values of the main/common
* encoder initialization parameters
*/
std::string MainParamToString(const NV_ENC_INITIALIZE_PARAMS *pParams) {
std::ostringstream os;
os
<< "Encoding Parameters:"
<< std::endl << "\tcodec : " << ConvertValueToString(vCodec, szCodecNames, pParams->encodeGUID)
<< std::endl << "\tpreset : " << ConvertValueToString(vPreset, szPresetNames, pParams->presetGUID);
if (pParams->tuningInfo)
{
os << std::endl << "\ttuningInfo : " << ConvertValueToString(vTuningInfo, szTuningInfoNames, pParams->tuningInfo);
}
os
<< std::endl << "\tprofile : " << ConvertValueToString(vProfile, szProfileNames, pParams->encodeConfig->profileGUID)
<< std::endl << "\tchroma : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC : pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC)
<< std::endl << "\tbitdepth : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? 0 : pParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8) + 8
<< std::endl << "\trc : " << ConvertValueToString(vRcMode, szRcModeNames, pParams->encodeConfig->rcParams.rateControlMode)
;
if (pParams->encodeConfig->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CONSTQP) {
os << " (P,B,I=" << pParams->encodeConfig->rcParams.constQP.qpInterP << "," << pParams->encodeConfig->rcParams.constQP.qpInterB << "," << pParams->encodeConfig->rcParams.constQP.qpIntra << ")";
}
os
<< std::endl << "\tfps : " << pParams->frameRateNum << "/" << pParams->frameRateDen
<< std::endl << "\tgop : " << (pParams->encodeConfig->gopLength == NVENC_INFINITE_GOPLENGTH ? "INF" : std::to_string(pParams->encodeConfig->gopLength))
<< std::endl << "\tbf : " << pParams->encodeConfig->frameIntervalP - 1
<< std::endl << "\tmultipass : " << pParams->encodeConfig->rcParams.multiPass
<< std::endl << "\tsize : " << pParams->encodeWidth << "x" << pParams->encodeHeight
<< std::endl << "\tbitrate : " << pParams->encodeConfig->rcParams.averageBitRate
<< std::endl << "\tmaxbitrate : " << pParams->encodeConfig->rcParams.maxBitRate
<< std::endl << "\tvbvbufsize : " << pParams->encodeConfig->rcParams.vbvBufferSize
<< std::endl << "\tvbvinit : " << pParams->encodeConfig->rcParams.vbvInitialDelay
<< std::endl << "\taq : " << (pParams->encodeConfig->rcParams.enableAQ ? (pParams->encodeConfig->rcParams.aqStrength ? std::to_string(pParams->encodeConfig->rcParams.aqStrength) : "auto") : "disabled")
<< std::endl << "\ttemporalaq : " << (pParams->encodeConfig->rcParams.enableTemporalAQ ? "enabled" : "disabled")
<< std::endl << "\tlookahead : " << (pParams->encodeConfig->rcParams.enableLookahead ? std::to_string(pParams->encodeConfig->rcParams.lookaheadDepth) : "disabled")
<< std::endl << "\tcq : " << (unsigned int)pParams->encodeConfig->rcParams.targetQuality
<< std::endl << "\tqmin : P,B,I=" << (int)pParams->encodeConfig->rcParams.minQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.minQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.minQP.qpIntra
<< std::endl << "\tqmax : P,B,I=" << (int)pParams->encodeConfig->rcParams.maxQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.maxQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.maxQP.qpIntra
<< std::endl << "\tinitqp : P,B,I=" << (int)pParams->encodeConfig->rcParams.initialRCQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.initialRCQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.initialRCQP.qpIntra
;
return os.str();
}
public:
virtual GUID GetEncodeGUID() { return guidCodec; }
virtual GUID GetPresetGUID() { return guidPreset; }
virtual NV_ENC_TUNING_INFO GetTuningInfo() { return m_TuningInfo; }
/*
* @brief Set encoder initialization parameters based on input options
* This method parses the tokens formed from the command line options
* provided to the application and sets the fields from NV_ENC_INITIALIZE_PARAMS
* based on the supplied values.
*/
virtual void SetInitParams(NV_ENC_INITIALIZE_PARAMS *pParams, NV_ENC_BUFFER_FORMAT eBufferFormat)
{
NV_ENC_CONFIG &config = *pParams->encodeConfig;
for (unsigned i = 0; i < tokens.size(); i++)
{
if (
tokens[i] == "-codec" && ++i ||
tokens[i] == "-preset" && ++i ||
tokens[i] == "-tuninginfo" && ++i ||
tokens[i] == "-multipass" && ++i != tokens.size() && ParseString("-multipass", tokens[i], vMultiPass, szMultipass, &config.rcParams.multiPass) ||
tokens[i] == "-profile" && ++i != tokens.size() && (IsCodecH264() ?
ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) :
ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID)) ||
tokens[i] == "-rc" && ++i != tokens.size() && ParseString("-rc", tokens[i], vRcMode, szRcModeNames, &config.rcParams.rateControlMode) ||
tokens[i] == "-fps" && ++i != tokens.size() && ParseInt("-fps", tokens[i], &pParams->frameRateNum) ||
tokens[i] == "-bf" && ++i != tokens.size() && ParseInt("-bf", tokens[i], &config.frameIntervalP) && ++config.frameIntervalP ||
tokens[i] == "-bitrate" && ++i != tokens.size() && ParseBitRate("-bitrate", tokens[i], &config.rcParams.averageBitRate) ||
tokens[i] == "-maxbitrate" && ++i != tokens.size() && ParseBitRate("-maxbitrate", tokens[i], &config.rcParams.maxBitRate) ||
tokens[i] == "-vbvbufsize" && ++i != tokens.size() && ParseBitRate("-vbvbufsize", tokens[i], &config.rcParams.vbvBufferSize) ||
tokens[i] == "-vbvinit" && ++i != tokens.size() && ParseBitRate("-vbvinit", tokens[i], &config.rcParams.vbvInitialDelay) ||
tokens[i] == "-cq" && ++i != tokens.size() && ParseInt("-cq", tokens[i], &config.rcParams.targetQuality) ||
tokens[i] == "-initqp" && ++i != tokens.size() && ParseQp("-initqp", tokens[i], &config.rcParams.initialRCQP) && (config.rcParams.enableInitialRCQP = true) ||
tokens[i] == "-qmin" && ++i != tokens.size() && ParseQp("-qmin", tokens[i], &config.rcParams.minQP) && (config.rcParams.enableMinQP = true) ||
tokens[i] == "-qmax" && ++i != tokens.size() && ParseQp("-qmax", tokens[i], &config.rcParams.maxQP) && (config.rcParams.enableMaxQP = true) ||
tokens[i] == "-constqp" && ++i != tokens.size() && ParseQp("-constqp", tokens[i], &config.rcParams.constQP) ||
tokens[i] == "-temporalaq" && (config.rcParams.enableTemporalAQ = true)
)
{
continue;
}
if (tokens[i] == "-lookahead" && ++i != tokens.size() && ParseInt("-lookahead", tokens[i], &config.rcParams.lookaheadDepth))
{
config.rcParams.enableLookahead = config.rcParams.lookaheadDepth > 0;
continue;
}
int aqStrength;
if (tokens[i] == "-aq" && ++i != tokens.size() && ParseInt("-aq", tokens[i], &aqStrength)) {
config.rcParams.enableAQ = true;
config.rcParams.aqStrength = aqStrength;
continue;
}
if (tokens[i] == "-gop" && ++i != tokens.size() && ParseInt("-gop", tokens[i], &config.gopLength))
{
if (IsCodecH264())
{
config.encodeCodecConfig.h264Config.idrPeriod = config.gopLength;
}
else
{
config.encodeCodecConfig.hevcConfig.idrPeriod = config.gopLength;
}
continue;
}
if (tokens[i] == "-444")
{
if (IsCodecH264())
{
config.encodeCodecConfig.h264Config.chromaFormatIDC = 3;
} else
{
config.encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
}
continue;
}
std::ostringstream errmessage;
errmessage << "Incorrect parameter: " << tokens[i] << std::endl;
errmessage << "Re-run the application with the -h option to get a list of the supported options.";
errmessage << std::endl;
throw std::invalid_argument(errmessage.str());
}
if (IsCodecHEVC())
{
if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
{
config.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2;
}
}
funcInit(pParams);
LOG(INFO) << NvEncoderInitParam().MainParamToString(pParams);
LOG(TRACE) << NvEncoderInitParam().FullParamToString(pParams);
}
private:
/*
* Helper methods for parsing tokens (generated by splitting the command line)
* and performing conversions to the appropriate target type/value.
*/
template<typename T>
bool ParseString(const std::string &strName, const std::string &strValue, const std::vector<T> &vValue, const std::string &strValueNames, T *pValue) {
std::vector<std::string> vstrValueName = split(strValueNames, ' ');
auto it = std::find(vstrValueName.begin(), vstrValueName.end(), strValue);
if (it == vstrValueName.end()) {
LOG(ERROR) << strName << " options: " << strValueNames;
return false;
}
*pValue = vValue[it - vstrValueName.begin()];
return true;
}
template<typename T>
std::string ConvertValueToString(const std::vector<T> &vValue, const std::string &strValueNames, T value) {
auto it = std::find(vValue.begin(), vValue.end(), value);
if (it == vValue.end()) {
LOG(ERROR) << "Invalid value. Can't convert to one of " << strValueNames;
return std::string();
}
return split(strValueNames, ' ')[it - vValue.begin()];
}
bool ParseBitRate(const std::string &strName, const std::string &strValue, unsigned *pBitRate) {
try {
size_t l;
double r = std::stod(strValue, &l);
char c = strValue[l];
if (c != 0 && c != 'k' && c != 'm') {
LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
}
*pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
} catch (std::invalid_argument) {
return false;
}
return true;
}
template<typename T>
bool ParseInt(const std::string &strName, const std::string &strValue, T *pInt) {
try {
*pInt = std::stoi(strValue);
} catch (std::invalid_argument) {
LOG(ERROR) << strName << " need a value of positive number";
return false;
}
return true;
}
bool ParseQp(const std::string &strName, const std::string &strValue, NV_ENC_QP *pQp) {
std::vector<std::string> vQp = split(strValue, ',');
try {
if (vQp.size() == 1) {
unsigned qp = (unsigned)std::stoi(vQp[0]);
*pQp = {qp, qp, qp};
} else if (vQp.size() == 3) {
*pQp = {(unsigned)std::stoi(vQp[0]), (unsigned)std::stoi(vQp[1]), (unsigned)std::stoi(vQp[2])};
} else {
LOG(ERROR) << strName << " qp_for_P_B_I or qp_P,qp_B,qp_I (no space is allowed)";
return false;
}
} catch (std::invalid_argument) {
return false;
}
return true;
}
std::vector<std::string> split(const std::string &s, char delim) {
std::stringstream ss(s);
std::string token;
std::vector<std::string> tokens;
while (getline(ss, token, delim)) {
tokens.push_back(token);
}
return tokens;
}
private:
std::string strParam;
std::function<void(NV_ENC_INITIALIZE_PARAMS *pParams)> funcInit = [](NV_ENC_INITIALIZE_PARAMS *pParams){};
std::vector<std::string> tokens;
GUID guidCodec = NV_ENC_CODEC_H264_GUID;
GUID guidPreset = NV_ENC_PRESET_P3_GUID;
NV_ENC_TUNING_INFO m_TuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
bool bLowLatency = false;
const char *szCodecNames = "h264 hevc";
std::vector<GUID> vCodec = std::vector<GUID> {
NV_ENC_CODEC_H264_GUID,
NV_ENC_CODEC_HEVC_GUID
};
const char *szChromaNames = "yuv420 yuv444";
std::vector<uint32_t> vChroma = std::vector<uint32_t>
{
1, 3
};
const char *szPresetNames = "p1 p2 p3 p4 p5 p6 p7";
std::vector<GUID> vPreset = std::vector<GUID> {
NV_ENC_PRESET_P1_GUID,
NV_ENC_PRESET_P2_GUID,
NV_ENC_PRESET_P3_GUID,
NV_ENC_PRESET_P4_GUID,
NV_ENC_PRESET_P5_GUID,
NV_ENC_PRESET_P6_GUID,
NV_ENC_PRESET_P7_GUID,
};
const char *szH264ProfileNames = "baseline main high high444";
std::vector<GUID> vH264Profile = std::vector<GUID> {
NV_ENC_H264_PROFILE_BASELINE_GUID,
NV_ENC_H264_PROFILE_MAIN_GUID,
NV_ENC_H264_PROFILE_HIGH_GUID,
NV_ENC_H264_PROFILE_HIGH_444_GUID,
};
const char *szHevcProfileNames = "main main10 frext";
std::vector<GUID> vHevcProfile = std::vector<GUID> {
NV_ENC_HEVC_PROFILE_MAIN_GUID,
NV_ENC_HEVC_PROFILE_MAIN10_GUID,
NV_ENC_HEVC_PROFILE_FREXT_GUID,
};
const char *szProfileNames = "(default) auto baseline(h264) main(h264) high(h264) high444(h264)"
" stereo(h264) progressiv_high(h264) constrained_high(h264)"
" main(hevc) main10(hevc) frext(hevc)";
std::vector<GUID> vProfile = std::vector<GUID> {
GUID{},
NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID,
NV_ENC_H264_PROFILE_BASELINE_GUID,
NV_ENC_H264_PROFILE_MAIN_GUID,
NV_ENC_H264_PROFILE_HIGH_GUID,
NV_ENC_H264_PROFILE_HIGH_444_GUID,
NV_ENC_H264_PROFILE_STEREO_GUID,
NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID,
NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID,
NV_ENC_HEVC_PROFILE_MAIN_GUID,
NV_ENC_HEVC_PROFILE_MAIN10_GUID,
NV_ENC_HEVC_PROFILE_FREXT_GUID,
};
const char *szLowLatencyTuningInfoNames = "lowlatency ultralowlatency";
const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless";
std::vector<NV_ENC_TUNING_INFO> vTuningInfo = std::vector<NV_ENC_TUNING_INFO>{
NV_ENC_TUNING_INFO_HIGH_QUALITY,
NV_ENC_TUNING_INFO_LOW_LATENCY,
NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
NV_ENC_TUNING_INFO_LOSSLESS
};
const char *szRcModeNames = "constqp vbr cbr";
std::vector<NV_ENC_PARAMS_RC_MODE> vRcMode = std::vector<NV_ENC_PARAMS_RC_MODE> {
NV_ENC_PARAMS_RC_CONSTQP,
NV_ENC_PARAMS_RC_VBR,
NV_ENC_PARAMS_RC_CBR,
};
const char *szMultipass = "disabled qres fullres";
std::vector<NV_ENC_MULTI_PASS> vMultiPass = std::vector<NV_ENC_MULTI_PASS>{
NV_ENC_MULTI_PASS_DISABLED,
NV_ENC_TWO_PASS_QUARTER_RESOLUTION,
NV_ENC_TWO_PASS_FULL_RESOLUTION,
};
const char *szQpMapModeNames = "disabled emphasis_level_map delta_qp_map qp_map";
std::vector<NV_ENC_QP_MAP_MODE> vQpMapMode = std::vector<NV_ENC_QP_MAP_MODE> {
NV_ENC_QP_MAP_DISABLED,
NV_ENC_QP_MAP_EMPHASIS,
NV_ENC_QP_MAP_DELTA,
NV_ENC_QP_MAP,
};
public:
/*
* Generates and returns a string describing the values for each field in
* the NV_ENC_INITIALIZE_PARAMS structure (i.e. a description of the entire
* set of initialization parameters supplied to the API).
*/
std::string FullParamToString(const NV_ENC_INITIALIZE_PARAMS *pInitializeParams) {
std::ostringstream os;
os << "NV_ENC_INITIALIZE_PARAMS:" << std::endl
<< "encodeGUID: " << ConvertValueToString(vCodec, szCodecNames, pInitializeParams->encodeGUID) << std::endl
<< "presetGUID: " << ConvertValueToString(vPreset, szPresetNames, pInitializeParams->presetGUID) << std::endl;
if (pInitializeParams->tuningInfo)
{
os << "tuningInfo: " << ConvertValueToString(vTuningInfo, szTuningInfoNames, pInitializeParams->tuningInfo) << std::endl;
}
os
<< "encodeWidth: " << pInitializeParams->encodeWidth << std::endl
<< "encodeHeight: " << pInitializeParams->encodeHeight << std::endl
<< "darWidth: " << pInitializeParams->darWidth << std::endl
<< "darHeight: " << pInitializeParams->darHeight << std::endl
<< "frameRateNum: " << pInitializeParams->frameRateNum << std::endl
<< "frameRateDen: " << pInitializeParams->frameRateDen << std::endl
<< "enableEncodeAsync: " << pInitializeParams->enableEncodeAsync << std::endl
<< "reportSliceOffsets: " << pInitializeParams->reportSliceOffsets << std::endl
<< "enableSubFrameWrite: " << pInitializeParams->enableSubFrameWrite << std::endl
<< "enableExternalMEHints: " << pInitializeParams->enableExternalMEHints << std::endl
<< "enableMEOnlyMode: " << pInitializeParams->enableMEOnlyMode << std::endl
<< "enableWeightedPrediction: " << pInitializeParams->enableWeightedPrediction << std::endl
<< "maxEncodeWidth: " << pInitializeParams->maxEncodeWidth << std::endl
<< "maxEncodeHeight: " << pInitializeParams->maxEncodeHeight << std::endl
<< "maxMEHintCountsPerBlock: " << pInitializeParams->maxMEHintCountsPerBlock << std::endl
;
NV_ENC_CONFIG *pConfig = pInitializeParams->encodeConfig;
os << "NV_ENC_CONFIG:" << std::endl
<< "profile: " << ConvertValueToString(vProfile, szProfileNames, pConfig->profileGUID) << std::endl
<< "gopLength: " << pConfig->gopLength << std::endl
<< "frameIntervalP: " << pConfig->frameIntervalP << std::endl
<< "monoChromeEncoding: " << pConfig->monoChromeEncoding << std::endl
<< "frameFieldMode: " << pConfig->frameFieldMode << std::endl
<< "mvPrecision: " << pConfig->mvPrecision << std::endl
<< "NV_ENC_RC_PARAMS:" << std::endl
<< " rateControlMode: 0x" << std::hex << pConfig->rcParams.rateControlMode << std::dec << std::endl
<< " constQP: " << pConfig->rcParams.constQP.qpInterP << ", " << pConfig->rcParams.constQP.qpInterB << ", " << pConfig->rcParams.constQP.qpIntra << std::endl
<< " averageBitRate: " << pConfig->rcParams.averageBitRate << std::endl
<< " maxBitRate: " << pConfig->rcParams.maxBitRate << std::endl
<< " vbvBufferSize: " << pConfig->rcParams.vbvBufferSize << std::endl
<< " vbvInitialDelay: " << pConfig->rcParams.vbvInitialDelay << std::endl
<< " enableMinQP: " << pConfig->rcParams.enableMinQP << std::endl
<< " enableMaxQP: " << pConfig->rcParams.enableMaxQP << std::endl
<< " enableInitialRCQP: " << pConfig->rcParams.enableInitialRCQP << std::endl
<< " enableAQ: " << pConfig->rcParams.enableAQ << std::endl
<< " qpMapMode: " << ConvertValueToString(vQpMapMode, szQpMapModeNames, pConfig->rcParams.qpMapMode) << std::endl
<< " multipass: " << ConvertValueToString(vMultiPass, szMultipass, pConfig->rcParams.multiPass) << std::endl
<< " enableLookahead: " << pConfig->rcParams.enableLookahead << std::endl
<< " disableIadapt: " << pConfig->rcParams.disableIadapt << std::endl
<< " disableBadapt: " << pConfig->rcParams.disableBadapt << std::endl
<< " enableTemporalAQ: " << pConfig->rcParams.enableTemporalAQ << std::endl
<< " zeroReorderDelay: " << pConfig->rcParams.zeroReorderDelay << std::endl
<< " enableNonRefP: " << pConfig->rcParams.enableNonRefP << std::endl
<< " strictGOPTarget: " << pConfig->rcParams.strictGOPTarget << std::endl
<< " aqStrength: " << pConfig->rcParams.aqStrength << std::endl
<< " minQP: " << pConfig->rcParams.minQP.qpInterP << ", " << pConfig->rcParams.minQP.qpInterB << ", " << pConfig->rcParams.minQP.qpIntra << std::endl
<< " maxQP: " << pConfig->rcParams.maxQP.qpInterP << ", " << pConfig->rcParams.maxQP.qpInterB << ", " << pConfig->rcParams.maxQP.qpIntra << std::endl
<< " initialRCQP: " << pConfig->rcParams.initialRCQP.qpInterP << ", " << pConfig->rcParams.initialRCQP.qpInterB << ", " << pConfig->rcParams.initialRCQP.qpIntra << std::endl
<< " temporallayerIdxMask: " << pConfig->rcParams.temporallayerIdxMask << std::endl
<< " temporalLayerQP: " << (int)pConfig->rcParams.temporalLayerQP[0] << ", " << (int)pConfig->rcParams.temporalLayerQP[1] << ", " << (int)pConfig->rcParams.temporalLayerQP[2] << ", " << (int)pConfig->rcParams.temporalLayerQP[3] << ", " << (int)pConfig->rcParams.temporalLayerQP[4] << ", " << (int)pConfig->rcParams.temporalLayerQP[5] << ", " << (int)pConfig->rcParams.temporalLayerQP[6] << ", " << (int)pConfig->rcParams.temporalLayerQP[7] << std::endl
<< " targetQuality: " << pConfig->rcParams.targetQuality << std::endl
<< " lookaheadDepth: " << pConfig->rcParams.lookaheadDepth << std::endl;
if (pInitializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
os
<< "NV_ENC_CODEC_CONFIG (H264):" << std::endl
<< " enableStereoMVC: " << pConfig->encodeCodecConfig.h264Config.enableStereoMVC << std::endl
<< " hierarchicalPFrames: " << pConfig->encodeCodecConfig.h264Config.hierarchicalPFrames << std::endl
<< " hierarchicalBFrames: " << pConfig->encodeCodecConfig.h264Config.hierarchicalBFrames << std::endl
<< " outputBufferingPeriodSEI: " << pConfig->encodeCodecConfig.h264Config.outputBufferingPeriodSEI << std::endl
<< " outputPictureTimingSEI: " << pConfig->encodeCodecConfig.h264Config.outputPictureTimingSEI << std::endl
<< " outputAUD: " << pConfig->encodeCodecConfig.h264Config.outputAUD << std::endl
<< " disableSPSPPS: " << pConfig->encodeCodecConfig.h264Config.disableSPSPPS << std::endl
<< " outputFramePackingSEI: " << pConfig->encodeCodecConfig.h264Config.outputFramePackingSEI << std::endl
<< " outputRecoveryPointSEI: " << pConfig->encodeCodecConfig.h264Config.outputRecoveryPointSEI << std::endl
<< " enableIntraRefresh: " << pConfig->encodeCodecConfig.h264Config.enableIntraRefresh << std::endl
<< " enableConstrainedEncoding: " << pConfig->encodeCodecConfig.h264Config.enableConstrainedEncoding << std::endl
<< " repeatSPSPPS: " << pConfig->encodeCodecConfig.h264Config.repeatSPSPPS << std::endl
<< " enableVFR: " << pConfig->encodeCodecConfig.h264Config.enableVFR << std::endl
<< " enableLTR: " << pConfig->encodeCodecConfig.h264Config.enableLTR << std::endl
<< " qpPrimeYZeroTransformBypassFlag: " << pConfig->encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag << std::endl
<< " useConstrainedIntraPred: " << pConfig->encodeCodecConfig.h264Config.useConstrainedIntraPred << std::endl
<< " level: " << pConfig->encodeCodecConfig.h264Config.level << std::endl
<< " idrPeriod: " << pConfig->encodeCodecConfig.h264Config.idrPeriod << std::endl
<< " separateColourPlaneFlag: " << pConfig->encodeCodecConfig.h264Config.separateColourPlaneFlag << std::endl
<< " disableDeblockingFilterIDC: " << pConfig->encodeCodecConfig.h264Config.disableDeblockingFilterIDC << std::endl
<< " numTemporalLayers: " << pConfig->encodeCodecConfig.h264Config.numTemporalLayers << std::endl
<< " spsId: " << pConfig->encodeCodecConfig.h264Config.spsId << std::endl
<< " ppsId: " << pConfig->encodeCodecConfig.h264Config.ppsId << std::endl
<< " adaptiveTransformMode: " << pConfig->encodeCodecConfig.h264Config.adaptiveTransformMode << std::endl
<< " fmoMode: " << pConfig->encodeCodecConfig.h264Config.fmoMode << std::endl
<< " bdirectMode: " << pConfig->encodeCodecConfig.h264Config.bdirectMode << std::endl
<< " entropyCodingMode: " << pConfig->encodeCodecConfig.h264Config.entropyCodingMode << std::endl
<< " stereoMode: " << pConfig->encodeCodecConfig.h264Config.stereoMode << std::endl
<< " intraRefreshPeriod: " << pConfig->encodeCodecConfig.h264Config.intraRefreshPeriod << std::endl
<< " intraRefreshCnt: " << pConfig->encodeCodecConfig.h264Config.intraRefreshCnt << std::endl
<< " maxNumRefFrames: " << pConfig->encodeCodecConfig.h264Config.maxNumRefFrames << std::endl
<< " sliceMode: " << pConfig->encodeCodecConfig.h264Config.sliceMode << std::endl
<< " sliceModeData: " << pConfig->encodeCodecConfig.h264Config.sliceModeData << std::endl
<< " NV_ENC_CONFIG_H264_VUI_PARAMETERS:" << std::endl
<< " overscanInfoPresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.overscanInfoPresentFlag << std::endl
<< " overscanInfo: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.overscanInfo << std::endl
<< " videoSignalTypePresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag << std::endl
<< " videoFormat: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoFormat << std::endl
<< " videoFullRangeFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag << std::endl
<< " colourDescriptionPresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag << std::endl
<< " colourPrimaries: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries << std::endl
<< " transferCharacteristics: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics << std::endl
<< " colourMatrix: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix << std::endl
<< " chromaSampleLocationFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationFlag << std::endl
<< " chromaSampleLocationTop: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationTop << std::endl
<< " chromaSampleLocationBot: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationBot << std::endl
<< " bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.bitstreamRestrictionFlag << std::endl
<< " ltrNumFrames: " << pConfig->encodeCodecConfig.h264Config.ltrNumFrames << std::endl
<< " ltrTrustMode: " << pConfig->encodeCodecConfig.h264Config.ltrTrustMode << std::endl
<< " chromaFormatIDC: " << pConfig->encodeCodecConfig.h264Config.chromaFormatIDC << std::endl
<< " maxTemporalLayers: " << pConfig->encodeCodecConfig.h264Config.maxTemporalLayers << std::endl;
} else if (pInitializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
os
<< "NV_ENC_CODEC_CONFIG (HEVC):" << std::endl
<< " level: " << pConfig->encodeCodecConfig.hevcConfig.level << std::endl
<< " tier: " << pConfig->encodeCodecConfig.hevcConfig.tier << std::endl
<< " minCUSize: " << pConfig->encodeCodecConfig.hevcConfig.minCUSize << std::endl
<< " maxCUSize: " << pConfig->encodeCodecConfig.hevcConfig.maxCUSize << std::endl
<< " useConstrainedIntraPred: " << pConfig->encodeCodecConfig.hevcConfig.useConstrainedIntraPred << std::endl
<< " disableDeblockAcrossSliceBoundary: " << pConfig->encodeCodecConfig.hevcConfig.disableDeblockAcrossSliceBoundary << std::endl
<< " outputBufferingPeriodSEI: " << pConfig->encodeCodecConfig.hevcConfig.outputBufferingPeriodSEI << std::endl
<< " outputPictureTimingSEI: " << pConfig->encodeCodecConfig.hevcConfig.outputPictureTimingSEI << std::endl
<< " outputAUD: " << pConfig->encodeCodecConfig.hevcConfig.outputAUD << std::endl
<< " enableLTR: " << pConfig->encodeCodecConfig.hevcConfig.enableLTR << std::endl
<< " disableSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.disableSPSPPS << std::endl
<< " repeatSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.repeatSPSPPS << std::endl
<< " enableIntraRefresh: " << pConfig->encodeCodecConfig.hevcConfig.enableIntraRefresh << std::endl
<< " chromaFormatIDC: " << pConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC << std::endl
<< " pixelBitDepthMinus8: " << pConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 << std::endl
<< " idrPeriod: " << pConfig->encodeCodecConfig.hevcConfig.idrPeriod << std::endl
<< " intraRefreshPeriod: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshPeriod << std::endl
<< " intraRefreshCnt: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshCnt << std::endl
<< " maxNumRefFramesInDPB: " << pConfig->encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB << std::endl
<< " ltrNumFrames: " << pConfig->encodeCodecConfig.hevcConfig.ltrNumFrames << std::endl
<< " vpsId: " << pConfig->encodeCodecConfig.hevcConfig.vpsId << std::endl
<< " spsId: " << pConfig->encodeCodecConfig.hevcConfig.spsId << std::endl
<< " ppsId: " << pConfig->encodeCodecConfig.hevcConfig.ppsId << std::endl
<< " sliceMode: " << pConfig->encodeCodecConfig.hevcConfig.sliceMode << std::endl
<< " sliceModeData: " << pConfig->encodeCodecConfig.hevcConfig.sliceModeData << std::endl
<< " maxTemporalLayersMinus1: " << pConfig->encodeCodecConfig.hevcConfig.maxTemporalLayersMinus1 << std::endl
<< " NV_ENC_CONFIG_HEVC_VUI_PARAMETERS:" << std::endl
<< " overscanInfoPresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.overscanInfoPresentFlag << std::endl
<< " overscanInfo: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.overscanInfo << std::endl
<< " videoSignalTypePresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoSignalTypePresentFlag << std::endl
<< " videoFormat: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat << std::endl
<< " videoFullRangeFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag << std::endl
<< " colourDescriptionPresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag << std::endl
<< " colourPrimaries: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourPrimaries << std::endl
<< " transferCharacteristics: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.transferCharacteristics << std::endl
<< " colourMatrix: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourMatrix << std::endl
<< " chromaSampleLocationFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationFlag << std::endl
<< " chromaSampleLocationTop: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationTop << std::endl
<< " chromaSampleLocationBot: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationBot << std::endl
<< " bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.bitstreamRestrictionFlag << std::endl
<< " ltrTrustMode: " << pConfig->encodeCodecConfig.hevcConfig.ltrTrustMode << std::endl;
}
return os.str();
}
};

View File

@@ -1,192 +0,0 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include "NvCodecUtils.h"
template<typename YuvUnitx2>
static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv,
uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int ix = blockIdx.x * blockDim.x + threadIdx.x,
iy = blockIdx.y * blockDim.y + threadIdx.y;
if (ix >= nWidth / 2 || iy >= nHeight / 2) {
return;
}
int x = ix * 2, y = iy * 2;
typedef decltype(YuvUnitx2::x) YuvUnit;
const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1;
*(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
(YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
(YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
};
y++;
*(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
(YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
(YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
};
float2 uv = tex2D<float2>(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f);
*(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) };
}
template <typename YuvUnitx2>
static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) {
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = dpSrc;
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<decltype(YuvUnitx2::x)>();
resDesc.res.pitch2D.width = nSrcWidth;
resDesc.res.pitch2D.height = nSrcHeight;
resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
cudaTextureDesc texDesc = {};
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeNormalizedFloat;
cudaTextureObject_t texY=0;
ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL));
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<YuvUnitx2>();
resDesc.res.pitch2D.width = nSrcWidth / 2;
resDesc.res.pitch2D.height = nSrcHeight * 3 / 2;
cudaTextureObject_t texUv=0;
ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL));
Resize<YuvUnitx2> << <dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32), dim3(16, 16) >> >(texY, texUv, dpDst, dpDstUV,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight);
ck(cudaDestroyTextureObject(texY));
ck(cudaDestroyTextureObject(texUv));
}
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV)
{
unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight);
return Resize<uchar2>(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight);
}
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV)
{
unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight);
return Resize<ushort2>(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight);
}
static __global__ void Scale(cudaTextureObject_t texSrc,
uint8_t *pDst, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
*(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D<float>(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f));
}
static __global__ void Scale_uv(cudaTextureObject_t texSrc,
uint8_t *pDst, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
float2 uv = tex2D<float2>(texSrc, x * fxScale, y * fyScale);
uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
*(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut;
}
void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false)
{
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = dpSrc;
resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc<uchar2>() : cudaCreateChannelDesc<unsigned char>();
resDesc.res.pitch2D.width = nSrcWidth;
resDesc.res.pitch2D.height = nSrcHeight;
resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
cudaTextureDesc texDesc = {};
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeNormalizedFloat;
texDesc.addressMode[0] = cudaAddressModeClamp;
texDesc.addressMode[1] = cudaAddressModeClamp;
texDesc.addressMode[2] = cudaAddressModeClamp;
cudaTextureObject_t texSrc = 0;
ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL));
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1);
if (bUVPlane)
{
Scale_uv << <gridSize, blockSize >> >(texSrc, dpDst,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
}
else
{
Scale << <gridSize, blockSize >> >(texSrc, dpDst,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
}
ck(cudaGetLastError());
ck(cudaDestroyTextureObject(texSrc));
}
void ScaleYUV420(unsigned char *dpDstY,
unsigned char* dpDstU,
unsigned char* dpDstV,
int nDstPitch,
int nDstChromaPitch,
int nDstWidth,
int nDstHeight,
unsigned char *dpSrcY,
unsigned char* dpSrcU,
unsigned char* dpSrcV,
int nSrcPitch,
int nSrcChromaPitch,
int nSrcWidth,
int nSrcHeight,
bool bSemiplanar)
{
int chromaWidthDst = (nDstWidth + 1) / 2;
int chromaHeightDst = (nDstHeight + 1) / 2;
int chromaWidthSrc = (nSrcWidth + 1) / 2;
int chromaHeightSrc = (nSrcHeight + 1) / 2;
ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight);
if (bSemiplanar)
{
ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true);
}
else
{
ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
}
}

View File

@@ -1,126 +0,0 @@
/*
* Copyright 2018-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include "NvCodecUtils.h"
/*
* CRC32 lookup table
* Generated by the following routine
* int i, j;
* U032 crc;
* for (i = 0; i < 256; i++)
* {
* crc = i;
* for (j = 0; j < 8; j++) { // 8 reduction
* crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0);
* }
* Crc32Table[i] = crc;
* }
*/
__device__ __constant__ uint32_t Crc32Table[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
typedef struct _NV_ENC_ENCODE_OUT_PARAMS
{
uint32_t version; /**< [out]: Struct version. */
uint32_t bitstreamSizeInBytes; /**< [out]: Encoded bitstream size in bytes */
uint32_t cycleCount; /**< [out]: Cycle count */
uint32_t firstPassCycleCount; /**< [out]: First pass cycle count */
uint32_t reserved[60]; /**< [out]: Reserved and must be set to 0 */
} NV_ENC_ENCODE_OUT_PARAMS;
static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue)
{
NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer;
uint32_t bitstreamSize = outParams->bitstreamSizeInBytes;
uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS);
uint32_t crc=~0;
for(uint32_t i = 0; i < bitstreamSize; i++)
{
crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc)) ^ (*pEncStream++)];
}
*crcValue = ~crc;
}
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream)
{
dim3 blockSize(1, 1, 1);
dim3 gridSize(1, 1, 1);
ComputeCRCKernel <<<gridSize, blockSize, 0, outputCUStream >>>(pBuffer, crcValue);
}