Use kcp as QoS module

This commit is contained in:
dijunkun
2023-08-30 17:44:22 +08:00
parent a4cd77dcb0
commit 3c1f7973d0
79 changed files with 14442 additions and 3150 deletions

View File

@@ -32,7 +32,7 @@ package("ffmpeg")
add_configs("hardcoded-tables", {description = "Enable hardcoded tables.", default = true, type = "boolean"})
end
add_links("avfilter", "avdevice", "avformat", "avcodec", "swscale", "swresample", "avutil")
-- add_links("avfilter", "avdevice", "avformat", "avcodec", "swscale", "swresample", "avutil")
if is_plat("macosx") then
add_frameworks("CoreFoundation", "Foundation", "CoreVideo", "CoreMedia", "AudioToolbox", "VideoToolbox", "Security")
elseif is_plat("linux") then

1190
thirdparty/nvcodec/Interface/cuviddec.h vendored Normal file

File diff suppressed because it is too large Load Diff

5003
thirdparty/nvcodec/Interface/nvEncodeAPI.h vendored Normal file

File diff suppressed because it is too large Load Diff

436
thirdparty/nvcodec/Interface/nvcuvid.h vendored Normal file
View File

@@ -0,0 +1,436 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2020 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/********************************************************************************************************************/
//! \file nvcuvid.h
//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
//! \date 2015-2020
//! This file contains the interface constants, structure definitions and function prototypes.
/********************************************************************************************************************/
#if !defined(__NVCUVID_H__)
#define __NVCUVID_H__
#include "cuviddec.h"
#if defined(__cplusplus)
extern "C" {
#endif /* __cplusplus */
/***********************************************/
//!
//! High-level helper APIs for video sources
//!
/***********************************************/
typedef void *CUvideosource;
typedef void *CUvideoparser;
typedef long long CUvideotimestamp;
/************************************************************************/
//! \enum cudaVideoState
//! Video source state enums
//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
/************************************************************************/
typedef enum {
cudaVideoState_Error = -1, /**< Error state (invalid source) */
cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */
cudaVideoState_Started = 1 /**< Source is running and delivering data */
} cudaVideoState;
/************************************************************************/
//! \enum cudaAudioCodec
//! Audio compression enums
//! Used in CUAUDIOFORMAT structure
/************************************************************************/
typedef enum {
cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */
cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */
cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */
cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */
cudaAudioCodec_LPCM, /**< PCM Audio */
cudaAudioCodec_AAC, /**< AAC Audio */
} cudaAudioCodec;
/************************************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDEOFORMAT
//! Video format
//! Used in cuvidGetSourceVideoFormat API
/************************************************************************************************/
typedef struct
{
cudaVideoCodec codec; /**< OUT: Compression format */
/**
* OUT: frame rate = numerator / denominator (for example: 30000/1001)
*/
struct {
/**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */
unsigned int numerator;
/**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
unsigned int denominator;
} frame_rate;
unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */
unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct
decoding. The client can send this value in ulNumDecodeSurfaces
(in CUVIDDECODECREATEINFO structure).
This guarantees correct functionality and optimal video memory
usage but not necessarily the best performance, which depends on
the design of the overall application. The optimal number of
decode surfaces (in terms of performance and memory utilization)
should be decided by experimentation for each application, but it
cannot go below min_num_decode_surfaces.
If this value is used for ulNumDecodeSurfaces then it must be
returned to parser during sequence callback. */
unsigned int coded_width; /**< OUT: coded frame width in pixels */
unsigned int coded_height; /**< OUT: coded frame height in pixels */
/**
* area of the frame that should be displayed
* typical example:
* coded_width = 1920, coded_height = 1088
* display_area = { 0,0,1920,1080 }
*/
struct {
int left; /**< OUT: left position of display rect */
int top; /**< OUT: top position of display rect */
int right; /**< OUT: right position of display rect */
int bottom; /**< OUT: bottom position of display rect */
} display_area;
cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */
unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */
/**
* OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
*/
struct {
int x;
int y;
} display_aspect_ratio;
/**
* Video Signal Description
* Refer section E.2.1 (VUI parameters semantics) of H264 spec file
*/
struct {
unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */
unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */
unsigned char reserved_zero_bits : 4; /**< Reserved bits */
unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */
unsigned char transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */
unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */
} video_signal_description;
unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */
} CUVIDEOFORMAT;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDOPERATINGPOINTINFO
//! Operating point information of scalable bitstream
/****************************************************************/
typedef struct
{
cudaVideoCodec codec;
union
{
struct
{
unsigned char operating_points_cnt;
unsigned char reserved24_bits[3];
unsigned short operating_points_idc[32];
} av1;
unsigned char CodecReserved[1024];
};
} CUVIDOPERATINGPOINTINFO;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDAV1SEQHDR
//! AV1 specific sequence header information
/****************************************************************/
typedef struct {
unsigned int max_width;
unsigned int max_height;
unsigned char reserved[1016];
} CUVIDAV1SEQHDR;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDEOFORMATEX
//! Video format including raw sequence header information
//! Used in cuvidGetSourceVideoFormat API
/****************************************************************/
typedef struct
{
CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */
union {
CUVIDAV1SEQHDR av1;
unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */
};
} CUVIDEOFORMATEX;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUAUDIOFORMAT
//! Audio formats
//! Used in cuvidGetSourceAudioFormat API
/****************************************************************/
typedef struct
{
cudaAudioCodec codec; /**< OUT: Compression format */
unsigned int channels; /**< OUT: number of audio channels */
unsigned int samplespersec; /**< OUT: sampling frequency */
unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */
unsigned int reserved1; /**< Reserved for future use */
unsigned int reserved2; /**< Reserved for future use */
} CUAUDIOFORMAT;
/***************************************************************/
//! \enum CUvideopacketflags
//! Data packet flags
//! Used in CUVIDSOURCEDATAPACKET structure
/***************************************************************/
typedef enum {
CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */
CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */
CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */
CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */
CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
display callback will be invoked with null value of CUVIDPARSERDISPINFO which
should be interpreted as end of the stream. */
} CUvideopacketflags;
/*****************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSOURCEDATAPACKET
//! Data Packet
//! Used in cuvidParseVideoData API
//! IN for cuvidParseVideoData
/*****************************************************************************/
typedef struct _CUVIDSOURCEDATAPACKET
{
unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */
unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if
CUVID_PKT_TIMESTAMP flag is set */
} CUVIDSOURCEDATAPACKET;
// Callback for packet delivery
typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
/**************************************************************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSOURCEPARAMS
//! Describes parameters needed in cuvidCreateVideoSource API
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
/**************************************************************************************************************************/
typedef struct _CUVIDSOURCEPARAMS
{
unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */
void *pUserData; /**< IN: User private data passed in to the data handlers */
PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */
PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */
void *pvReserved2[8]; /**< Reserved for future use - set to NULL */
} CUVIDSOURCEPARAMS;
/**********************************************/
//! \ingroup ENUMS
//! \enum CUvideosourceformat_flags
//! CUvideosourceformat_flags
//! Used in cuvidGetSourceVideoFormat API
/**********************************************/
typedef enum {
CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */
} CUvideosourceformat_flags;
#if !defined(__APPLE__)
/***************************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
//! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks:
//! pfnVideoDataHandler() and pfnAudioDataHandler()
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
/***************************************************************************************************************************/
CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
/***************************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
//! Create video source
/***************************************************************************************************************************/
CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
/********************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
//! Destroy video source
/********************************************************************/
CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
/******************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
//! Set video source state to:
//! cudaVideoState_Started - to signal the source to run and deliver data
//! cudaVideoState_Stopped - to stop the source from delivering the data
//! cudaVideoState_Error - invalid source
/******************************************************************************************/
CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
/******************************************************************************************/
//! \ingroup FUNCTS
//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
//! Get video source state
//! Returns:
//! cudaVideoState_Started - if Source is running and delivering data
//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
//! cudaVideoState_Error - if Source is in error state
/******************************************************************************************/
cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
/******************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
/******************************************************************************************************************/
CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
/**************************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
//! Get audio source format
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported
//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed.
/**************************************************************************************************************************/
CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
#endif
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDPARSERDISPINFO
//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
/**********************************************************************************/
typedef struct _CUVIDPARSERDISPINFO
{
int picture_index; /**< OUT: Index of the current picture */
int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */
int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */
int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling,
-1=unpaired field) */
CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */
} CUVIDPARSERDISPINFO;
/***********************************************************************************************************************/
//! Parser callbacks
//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture
//! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS
//! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by
//! cuvidParseVideoData() to the application.
//! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is
//! -1 or invalid operating point.
//! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces
//! while creating parser)
//! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded
//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded
//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved)
/***********************************************************************************************************************/
typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*);
/**************************************/
//! \ingroup STRUCTS
//! \struct CUVIDPARSERPARAMS
//! Used in cuvidCreateVideoParser API
/**************************************/
typedef struct _CUVIDPARSERPARAMS
{
cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */
unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */
unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */
unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always
IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display)
0=no delay (recommended values: 2..4) */
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */
void *pUserData; /**< IN: User data for callbacks */
PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */
PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */
PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1
scalable bitstream */
void *pvReserved2[6]; /**< Reserved for future use - set to NULL */
CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */
} CUVIDPARSERPARAMS;
/************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
//! Create video parser object and initialize
/************************************************************************************************/
CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
/************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
//! Parse the video data from source data packet in pPacket
//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and
//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
//! the decoder encounters a video format change
//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
/************************************************************************************************/
CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
/************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
//! Destroy the video parser
/************************************************************************************************/
CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
/**********************************************************************************************/
#if defined(__cplusplus)
}
#endif /* __cplusplus */
#endif // __NVCUVID_H__

BIN
thirdparty/nvcodec/Lib/Win32/nvcuvid.lib vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
thirdparty/nvcodec/Lib/x64/nvcuvid.lib vendored Normal file

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,111 @@
# Copyright 2020 NVIDIA Corporation. All rights reserved.
#
# Please refer to the NVIDIA end user license agreement (EULA) associated
# with this source code for terms and conditions that govern your use of
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
# 3.7 is required for FindVulkan module support in CMake.
cmake_minimum_required(VERSION 3.7)
project(NvCodec)
# Set C++11 for all projects and disable non-standard extensions
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_INSTALL_PREFIX .)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Utils)
set(NV_CODEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec)
set(NV_ENC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvEncoder)
set(NV_DEC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvDecoder)
set(NV_APPENC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppEncode/Common)
set(NV_APPDEC_COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/AppDecode/Common)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
else()
set(NVCODEC_SAMPLES_INSTALL_DIR ${CMAKE_BINARY_DIR})
endif()
if(WIN32)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvcuvid.lib)
set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/x64/nvencodeapi.lib)
else()
Set(CUVID_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvcuvid.lib)
set(NVENCODEAPI_LIB ${CMAKE_CURRENT_SOURCE_DIR}/../Lib/Win32/nvencodeapi.lib)
endif()
else ()
find_library(CUVID_LIB nvcuvid)
find_library(NVENCODEAPI_LIB nvidia-encode)
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()
if(WIN32)
add_subdirectory(AppEncode/AppEncD3D11)
add_subdirectory(AppEncode/AppEncD3D9)
add_subdirectory(AppDecode/AppDecD3D)
else ()
#Need only linux Makefile for this
add_subdirectory(AppEncode/AppEncGL)
endif()
add_subdirectory(AppEncode/AppEncCuda)
add_subdirectory(AppEncode/AppEncDec)
add_subdirectory(AppEncode/AppEncLowLatency)
add_subdirectory(AppEncode/AppEncME)
add_subdirectory(AppEncode/AppEncPerf)
add_subdirectory(AppEncode/AppEncQual)
add_subdirectory(AppEncode/AppMotionEstimationVkCuda)
add_subdirectory(AppTranscode/AppTrans)
add_subdirectory(AppTranscode/AppTransOneToN)
add_subdirectory(AppTranscode/AppTransPerf)
add_subdirectory(AppDecode/AppDec)
add_subdirectory(AppDecode/AppDecGL)
add_subdirectory(AppDecode/AppDecImageProvider)
add_subdirectory(AppDecode/AppDecLowLatency)
add_subdirectory(AppDecode/AppDecMem)
add_subdirectory(AppDecode/AppDecMultiFiles)
add_subdirectory(AppDecode/AppDecMultiInput)
add_subdirectory(AppDecode/AppDecPerf)

View File

@@ -0,0 +1,54 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include <stdint.h>
#include <stdio.h>
static __global__ void ConvertUInt8ToUInt16Kernel(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
int destStrideInPixels = nDestPitch / (sizeof(uint16_t));
*(uchar2 *)&dpUInt16[y * destStrideInPixels + x] = uchar2{ 0, dpUInt8[y * nSrcPitch + x] };
}
static __global__ void ConvertUInt16ToUInt8Kernel(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
int srcStrideInPixels = nSrcPitch / (sizeof(uint16_t));
dpUInt8[y * nDestPitch + x] = ((uchar2 *)&dpUInt16[y * srcStrideInPixels + x])->y;
}
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
ConvertUInt8ToUInt16Kernel <<< gridSize, blockSize >>>(dpUInt8, dpUInt16, nSrcPitch, nDestPitch, nWidth, nHeight);
}
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight)
{
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nHeight + blockSize.y - 1) / blockSize.y, 1);
ConvertUInt16ToUInt8Kernel <<<gridSize, blockSize >>>(dpUInt16, dpUInt8, nSrcPitch, nDestPitch, nWidth, nHeight);
}

View File

@@ -0,0 +1,399 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include "ColorSpace.h"
__constant__ float matYuv2Rgb[3][3];
__constant__ float matRgb2Yuv[3][3];
void inline GetConstants(int iMatrix, float &wr, float &wb, int &black, int &white, int &max) {
black = 16; white = 235;
max = 255;
switch (iMatrix)
{
case ColorSpaceStandard_BT709:
default:
wr = 0.2126f; wb = 0.0722f;
break;
case ColorSpaceStandard_FCC:
wr = 0.30f; wb = 0.11f;
break;
case ColorSpaceStandard_BT470:
case ColorSpaceStandard_BT601:
wr = 0.2990f; wb = 0.1140f;
break;
case ColorSpaceStandard_SMPTE240M:
wr = 0.212f; wb = 0.087f;
break;
case ColorSpaceStandard_BT2020:
case ColorSpaceStandard_BT2020C:
wr = 0.2627f; wb = 0.0593f;
// 10-bit only
black = 64 << 6; white = 940 << 6;
max = (1 << 16) - 1;
break;
}
}
void SetMatYuv2Rgb(int iMatrix) {
float wr, wb;
int black, white, max;
GetConstants(iMatrix, wr, wb, black, white, max);
float mat[3][3] = {
1.0f, 0.0f, (1.0f - wr) / 0.5f,
1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
1.0f, (1.0f - wb) / 0.5f, 0.0f,
};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
}
}
cudaMemcpyToSymbol(matYuv2Rgb, mat, sizeof(mat));
}
void SetMatRgb2Yuv(int iMatrix) {
float wr, wb;
int black, white, max;
GetConstants(iMatrix, wr, wb, black, white, max);
float mat[3][3] = {
wr, 1.0f - wb - wr, wb,
-0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
}
}
cudaMemcpyToSymbol(matRgb2Yuv, mat, sizeof(mat));
}
template<class T>
__device__ static T Clamp(T x, T lower, T upper) {
return x < lower ? lower : (x > upper ? upper : x);
}
template<class Rgb, class YuvUnit>
__device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
const int
low = 1 << (sizeof(YuvUnit) * 8 - 4),
mid = 1 << (sizeof(YuvUnit) * 8 - 1);
float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
YuvUnit
r = (YuvUnit)Clamp(matYuv2Rgb[0][0] * fy + matYuv2Rgb[0][1] * fu + matYuv2Rgb[0][2] * fv, 0.0f, maxf),
g = (YuvUnit)Clamp(matYuv2Rgb[1][0] * fy + matYuv2Rgb[1][1] * fu + matYuv2Rgb[1][2] * fv, 0.0f, maxf),
b = (YuvUnit)Clamp(matYuv2Rgb[2][0] * fy + matYuv2Rgb[2][1] * fu + matYuv2Rgb[2][2] * fv, 0.0f, maxf);
Rgb rgb{};
const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
rgb.c.r = r >> nShift;
rgb.c.g = g >> nShift;
rgb.c.b = b >> nShift;
} else {
rgb.c.r = r << nShift;
rgb.c.g = g << nShift;
rgb.c.b = b << nShift;
}
return rgb;
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void YuvToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
*(RgbIntx2 *)pDst = RgbIntx2 {
YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
};
*(RgbIntx2 *)(pDst + nRgbPitch) = RgbIntx2 {
YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d,
YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
};
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void Yuv444ToRgbKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgb, int nRgbPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y);
if (x + 1 >= nWidth || y >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
uint8_t *pDst = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
*(RgbIntx2 *)pDst = RgbIntx2{
YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
};
}
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
__global__ static void YuvToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 l1 = *(YuvUnitx2 *)(pSrc + nYuvPitch);
YuvUnitx2 ch = *(YuvUnitx2 *)(pSrc + (nHeight - y / 2) * nYuvPitch);
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
*(RgbUnitx2 *)(pDst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
}
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
__global__ static void Yuv444ToRgbPlanarKernel(uint8_t *pYuv, int nYuvPitch, uint8_t *pRgbp, int nRgbpPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y);
if (x + 1 >= nWidth || y >= nHeight) {
return;
}
uint8_t *pSrc = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
YuvUnitx2 l0 = *(YuvUnitx2 *)pSrc;
YuvUnitx2 ch1 = *(YuvUnitx2 *)(pSrc + (nHeight * nYuvPitch));
YuvUnitx2 ch2 = *(YuvUnitx2 *)(pSrc + (2 * nHeight * nYuvPitch));
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
uint8_t *pDst = pRgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
pDst += nRgbpPitch * nHeight;
*(RgbUnitx2 *)pDst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
}
template <class COLOR32>
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<uchar2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<uchar2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<uchar2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<uchar2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<ushort2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbKernel<ushort2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<ushort2, COLOR32, uint2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR64>
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbKernel<ushort2, COLOR64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgra, nBgraPitch, nWidth, nHeight);
}
template <class COLOR32>
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpNv12, nNv12Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpP016, nP016Pitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >>>
(dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
template <class COLOR32>
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix) {
SetMatYuv2Rgb(iMatrix);
Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
<< <dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2), dim3(32, 2) >> >
(dpYUV444, nPitch, dpBgrp, nBgrpPitch, nWidth, nHeight);
}
// Explicit Instantiation
template void Nv12ToColor32<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor32<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor64<BGRA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColor64<RGBA64>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor32<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor32<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor64<BGRA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColor64<RGBA64>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor32<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor32<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor64<BGRA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColor64<RGBA64>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColorPlanar<BGRA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void Nv12ToColorPlanar<RGBA32>(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColorPlanar<BGRA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void P016ToColorPlanar<RGBA32>(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight, int iMatrix);
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
return matRgb2Yuv[0][0] * r + matRgb2Yuv[0][1] * g + matRgb2Yuv[0][2] * b + low;
}
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
return matRgb2Yuv[1][0] * r + matRgb2Yuv[1][1] * g + matRgb2Yuv[1][2] * b + mid;
}
template<class YuvUnit, class RgbUnit>
__device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
return matRgb2Yuv[2][0] * r + matRgb2Yuv[2][1] * g + matRgb2Yuv[2][2] * b + mid;
}
template<class YuvUnitx2, class Rgb, class RgbIntx2>
__global__ static void RgbToYuvKernel(uint8_t *pRgb, int nRgbPitch, uint8_t *pYuv, int nYuvPitch, int nWidth, int nHeight) {
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
if (x + 1 >= nWidth || y + 1 >= nHeight) {
return;
}
uint8_t *pSrc = pRgb + x * sizeof(Rgb) + y * nRgbPitch;
RgbIntx2 int2a = *(RgbIntx2 *)pSrc;
RgbIntx2 int2b = *(RgbIntx2 *)(pSrc + nRgbPitch);
Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
decltype(Rgb::c.r)
r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
uint8_t *pDst = pYuv + x * sizeof(YuvUnitx2) / 2 + y * nYuvPitch;
*(YuvUnitx2 *)pDst = YuvUnitx2 {
RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
};
*(YuvUnitx2 *)(pDst + nYuvPitch) = YuvUnitx2 {
RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
};
*(YuvUnitx2 *)(pDst + (nHeight - y / 2) * nYuvPitch) = YuvUnitx2 {
RgbToU<decltype(YuvUnitx2::x)>(r, g, b),
RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
};
}
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight, int iMatrix) {
SetMatRgb2Yuv(iMatrix);
RgbToYuvKernel<ushort2, BGRA64, ulonglong2>
<<<dim3((nWidth + 63) / 32 / 2, (nHeight + 3) / 2 / 2), dim3(32, 2)>>>
(dpBgra, nBgraPitch, dpP016, nP016Pitch, nWidth, nHeight);
}

View File

@@ -0,0 +1,48 @@
#pragma once
#include <stdint.h>
#include <cuda_runtime.h>
typedef enum ColorSpaceStandard {
ColorSpaceStandard_BT709 = 1,
ColorSpaceStandard_Unspecified = 2,
ColorSpaceStandard_Reserved = 3,
ColorSpaceStandard_FCC = 4,
ColorSpaceStandard_BT470 = 5,
ColorSpaceStandard_BT601 = 6,
ColorSpaceStandard_SMPTE240M = 7,
ColorSpaceStandard_YCgCo = 8,
ColorSpaceStandard_BT2020 = 9,
ColorSpaceStandard_BT2020C = 10
} ColorSpaceStandard;
union BGRA32 {
uint32_t d;
uchar4 v;
struct {
uint8_t b, g, r, a;
} c;
};
union RGBA32 {
uint32_t d;
uchar4 v;
struct {
uint8_t r, g, b, a;
} c;
};
union BGRA64 {
uint64_t d;
ushort4 v;
struct {
uint16_t b, g, r, a;
} c;
};
union RGBA64 {
uint64_t d;
ushort4 v;
struct {
uint16_t r, g, b, a;
} c;
};

View File

@@ -0,0 +1,357 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
extern "C" {
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavcodec/avcodec.h>
}
#include "NvCodecUtils.h"
//---------------------------------------------------------------------------
//! \file FFmpegDemuxer.h
//! \brief Provides functionality for stream demuxing
//!
//! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it.
//---------------------------------------------------------------------------
/**
* @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
*/
class FFmpegDemuxer {
private:
AVFormatContext *fmtc = NULL;
AVIOContext *avioc = NULL;
AVPacket pkt, pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
AVBSFContext *bsfc = NULL;
int iVideoStream;
bool bMp4H264, bMp4HEVC, bMp4MPEG4;
AVCodecID eVideoCodec;
AVPixelFormat eChromaFormat;
int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
double timeBase = 0.0;
int64_t userTimeScale = 0;
uint8_t *pDataWithHeader = NULL;
unsigned int frameCount = 0;
public:
class DataProvider {
public:
virtual ~DataProvider() {}
virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
};
private:
/**
* @brief Private constructor to initialize libavformat resources.
* @param fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
*/
FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
if (!fmtc) {
LOG(ERROR) << "No AVFormatContext provided.";
return;
}
LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
ck(avformat_find_stream_info(fmtc, NULL));
iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if (iVideoStream < 0) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "Could not find stream in input file";
return;
}
//fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
nWidth = fmtc->streams[iVideoStream]->codecpar->width;
nHeight = fmtc->streams[iVideoStream]->codecpar->height;
eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
timeBase = av_q2d(rTimeBase);
userTimeScale = timeScale;
// Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
switch (eChromaFormat)
{
case AV_PIX_FMT_YUV420P10LE:
case AV_PIX_FMT_GRAY10LE: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 10;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV420P12LE:
nBitDepth = 12;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P10LE:
nBitDepth = 10;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P12LE:
nBitDepth = 12;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P:
nBitDepth = 8;
nChromaHeight = nHeight << 1;
nBPP = 1;
break;
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
case AV_PIX_FMT_YUVJ422P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_YUVJ444P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_GRAY8: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
break;
default:
LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
eChromaFormat = AV_PIX_FMT_YUV420P;
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
}
bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (
!strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
|| !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
|| !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
);
//Initialize packet fields with default values
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
av_init_packet(&pktFiltered);
pktFiltered.data = NULL;
pktFiltered.size = 0;
// Initialize bitstream filter and its required resources
if (bMp4H264) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
if (bMp4HEVC) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " " << "av_bsf_get_by_name() failed";
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
}
AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
AVFormatContext *ctx = NULL;
if (!(ctx = avformat_alloc_context())) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
uint8_t *avioc_buffer = NULL;
int avioc_buffer_size = 8 * 1024 * 1024;
avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
if (!avioc_buffer) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
0, pDataProvider, &ReadPacket, NULL, NULL);
if (!avioc) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
ctx->pb = avioc;
ck(avformat_open_input(&ctx, NULL, NULL, NULL));
return ctx;
}
/**
* @brief Allocate and return AVFormatContext*.
* @param szFilePath - Filepath pointing to input stream.
* @return Pointer to AVFormatContext
*/
AVFormatContext *CreateFormatContext(const char *szFilePath) {
avformat_network_init();
AVFormatContext *ctx = NULL;
ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
return ctx;
}
public:
FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/) : FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) {avioc = fmtc->pb;}
~FFmpegDemuxer() {
if (!fmtc) {
return;
}
if (pkt.data) {
av_packet_unref(&pkt);
}
if (pktFiltered.data) {
av_packet_unref(&pktFiltered);
}
if (bsfc) {
av_bsf_free(&bsfc);
}
avformat_close_input(&fmtc);
if (avioc) {
av_freep(&avioc->buffer);
av_freep(&avioc);
}
if (pDataWithHeader) {
av_free(pDataWithHeader);
}
}
AVCodecID GetVideoCodec() {
return eVideoCodec;
}
AVPixelFormat GetChromaFormat() {
return eChromaFormat;
}
int GetWidth() {
return nWidth;
}
int GetHeight() {
return nHeight;
}
int GetBitDepth() {
return nBitDepth;
}
int GetFrameSize() {
return nWidth * (nHeight + nChromaHeight) * nBPP;
}
bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
if (!fmtc) {
return false;
}
*pnVideoBytes = 0;
if (pkt.data) {
av_packet_unref(&pkt);
}
int e = 0;
while ((e = av_read_frame(fmtc, &pkt)) >= 0 && pkt.stream_index != iVideoStream) {
av_packet_unref(&pkt);
}
if (e < 0) {
return false;
}
if (bMp4H264 || bMp4HEVC) {
if (pktFiltered.data) {
av_packet_unref(&pktFiltered);
}
ck(av_bsf_send_packet(bsfc, &pkt));
ck(av_bsf_receive_packet(bsfc, &pktFiltered));
*ppVideo = pktFiltered.data;
*pnVideoBytes = pktFiltered.size;
if (pts)
*pts = (int64_t) (pktFiltered.pts * userTimeScale * timeBase);
} else {
if (bMp4MPEG4 && (frameCount == 0)) {
int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
if (extraDataSize > 0) {
// extradata contains start codes 00 00 01. Subtract its size
pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt.size - 3*sizeof(uint8_t));
if (!pDataWithHeader) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return false;
}
memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
memcpy(pDataWithHeader+extraDataSize, pkt.data+3, pkt.size - 3*sizeof(uint8_t));
*ppVideo = pDataWithHeader;
*pnVideoBytes = extraDataSize + pkt.size - 3*sizeof(uint8_t);
}
} else {
*ppVideo = pkt.data;
*pnVideoBytes = pkt.size;
}
if (pts)
*pts = (int64_t)(pkt.pts * userTimeScale * timeBase);
}
frameCount++;
return true;
}
static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
}
};
inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
switch (id) {
case AV_CODEC_ID_MPEG1VIDEO : return cudaVideoCodec_MPEG1;
case AV_CODEC_ID_MPEG2VIDEO : return cudaVideoCodec_MPEG2;
case AV_CODEC_ID_MPEG4 : return cudaVideoCodec_MPEG4;
case AV_CODEC_ID_WMV3 :
case AV_CODEC_ID_VC1 : return cudaVideoCodec_VC1;
case AV_CODEC_ID_H264 : return cudaVideoCodec_H264;
case AV_CODEC_ID_HEVC : return cudaVideoCodec_HEVC;
case AV_CODEC_ID_VP8 : return cudaVideoCodec_VP8;
case AV_CODEC_ID_VP9 : return cudaVideoCodec_VP9;
case AV_CODEC_ID_MJPEG : return cudaVideoCodec_JPEG;
case AV_CODEC_ID_AV1 : return cudaVideoCodec_AV1;
default : return cudaVideoCodec_NumCodecs;
}
}

View File

@@ -0,0 +1,109 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
#include <thread>
#include <mutex>
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
};
#include "Logger.h"
extern simplelogger::Logger *logger;
class FFmpegStreamer {
private:
AVFormatContext *oc = NULL;
AVStream *vs = NULL;
int nFps = 0;
public:
FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
avformat_network_init();
oc = avformat_alloc_context();
if (!oc) {
LOG(ERROR) << "FFMPEG: avformat_alloc_context error";
return;
}
// Set format on oc
AVOutputFormat *fmt = av_guess_format("mpegts", NULL, NULL);
if (!fmt) {
LOG(ERROR) << "Invalid format";
return;
}
fmt->video_codec = eCodecId;
oc->oformat = fmt;
oc->url = av_strdup(szInFilePath);
LOG(INFO) << "Streaming destination: " << oc->url;
// Add video stream to oc
vs = avformat_new_stream(oc, NULL);
if (!vs) {
LOG(ERROR) << "FFMPEG: Could not alloc video stream";
return;
}
vs->id = 0;
// Set video parameters
AVCodecParameters *vpar = vs->codecpar;
vpar->codec_id = fmt->video_codec;
vpar->codec_type = AVMEDIA_TYPE_VIDEO;
vpar->width = nWidth;
vpar->height = nHeight;
// Everything is ready. Now open the output stream.
if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
return ;
}
// Write the container header
if (avformat_write_header(oc, NULL)) {
LOG(ERROR) << "FFMPEG: avformat_write_header error!";
return;
}
}
~FFmpegStreamer() {
if (oc) {
av_write_trailer(oc);
avio_close(oc->pb);
avformat_free_context(oc);
}
}
bool Stream(uint8_t *pData, int nBytes, int nPts) {
AVPacket pkt = {0};
av_init_packet(&pkt);
pkt.pts = av_rescale_q(nPts++, AVRational {1, nFps}, vs->time_base);
// No B-frames
pkt.dts = pkt.pts;
pkt.stream_index = vs->index;
pkt.data = pData;
pkt.size = nBytes;
if(!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
pkt.flags |= AV_PKT_FLAG_KEY;
}
// Write the compressed frame into the output
int ret = av_write_frame(oc, &pkt);
av_write_frame(oc, NULL);
if (ret < 0) {
LOG(ERROR) << "FFMPEG: Error while writing video frame";
}
return true;
}
};

View File

@@ -0,0 +1,490 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
//---------------------------------------------------------------------------
//! \file NvCodecUtils.h
//! \brief Miscellaneous classes and error checking functions.
//!
//! Used by Transcode/Encode samples apps for reading input files,
//! mutithreading, performance measurement or colorspace conversion while
//! decoding.
//---------------------------------------------------------------------------
#pragma once
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <chrono>
#include <condition_variable>
#include <fstream>
#include <iomanip>
#include <ios>
#include <list>
#include <sstream>
#include <thread>
#ifdef __cuda_cuda_h__
inline bool check(CUresult e, int iLine, const char *szFile) {
if (e != CUDA_SUCCESS) {
const char *szErrName = NULL;
cuGetErrorName(e, &szErrName);
return false;
}
return true;
}
#endif
#ifdef __CUDA_RUNTIME_H__
inline bool check(cudaError_t e, int iLine, const char *szFile) {
if (e != cudaSuccess) {
return false;
}
return true;
}
#endif
#ifdef _NV_ENCODEAPI_H_
inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
const char *aszErrName[] = {
"NV_ENC_SUCCESS",
"NV_ENC_ERR_NO_ENCODE_DEVICE",
"NV_ENC_ERR_UNSUPPORTED_DEVICE",
"NV_ENC_ERR_INVALID_ENCODERDEVICE",
"NV_ENC_ERR_INVALID_DEVICE",
"NV_ENC_ERR_DEVICE_NOT_EXIST",
"NV_ENC_ERR_INVALID_PTR",
"NV_ENC_ERR_INVALID_EVENT",
"NV_ENC_ERR_INVALID_PARAM",
"NV_ENC_ERR_INVALID_CALL",
"NV_ENC_ERR_OUT_OF_MEMORY",
"NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
"NV_ENC_ERR_UNSUPPORTED_PARAM",
"NV_ENC_ERR_LOCK_BUSY",
"NV_ENC_ERR_NOT_ENOUGH_BUFFER",
"NV_ENC_ERR_INVALID_VERSION",
"NV_ENC_ERR_MAP_FAILED",
"NV_ENC_ERR_NEED_MORE_INPUT",
"NV_ENC_ERR_ENCODER_BUSY",
"NV_ENC_ERR_EVENT_NOT_REGISTERD",
"NV_ENC_ERR_GENERIC",
"NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
"NV_ENC_ERR_UNIMPLEMENTED",
"NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
"NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
"NV_ENC_ERR_RESOURCE_NOT_MAPPED",
};
if (e != NV_ENC_SUCCESS) {
return false;
}
return true;
}
#endif
#ifdef _WINERROR_
inline bool check(HRESULT e, int iLine, const char *szFile) {
if (e != S_OK) {
std::stringstream stream;
stream << std::hex << std::uppercase << e;
return false;
}
return true;
}
#endif
#if defined(__gl_h_) || defined(__GL_H__)
inline bool check(GLenum e, int iLine, const char *szFile) {
if (e != 0) {
return false;
}
return true;
}
#endif
inline bool check(int e, int iLine, const char *szFile) {
if (e < 0) {
return false;
}
return true;
}
#define ck(call) check(call, __LINE__, __FILE__)
/**
* @brief Wrapper class around std::thread
*/
class NvThread {
public:
NvThread() = default;
NvThread(const NvThread &) = delete;
NvThread &operator=(const NvThread &other) = delete;
NvThread(std::thread &&thread) : t(std::move(thread)) {}
NvThread(NvThread &&thread) : t(std::move(thread.t)) {}
NvThread &operator=(NvThread &&other) {
t = std::move(other.t);
return *this;
}
~NvThread() { join(); }
void join() {
if (t.joinable()) {
t.join();
}
}
private:
std::thread t;
};
#ifndef _WIN32
#define _stricmp strcasecmp
#define _stat64 stat64
#endif
/**
* @brief Utility class to allocate buffer memory. Helps avoid I/O during the
* encode/decode loop in case of performance tests.
*/
class BufferedFileReader {
public:
/**
* @brief Constructor function to allocate appropriate memory and copy file
* contents into it
*/
BufferedFileReader(const char *szFileName, bool bPartial = false) {
struct _stat64 st;
if (_stat64(szFileName, &st) != 0) {
return;
}
nSize = st.st_size;
while (nSize) {
try {
pBuf = new uint8_t[(size_t)nSize];
if (nSize != st.st_size) {
}
break;
} catch (std::bad_alloc) {
if (!bPartial) {
return;
}
nSize = (uint32_t)(nSize * 0.9);
}
}
std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
if (!fpIn) {
return;
}
std::streamsize nRead =
fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
fpIn.close();
assert(nRead == nSize);
}
~BufferedFileReader() {
if (pBuf) {
delete[] pBuf;
}
}
bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
if (!pBuf) {
return false;
}
*ppBuf = pBuf;
*pnSize = nSize;
return true;
}
private:
uint8_t *pBuf = NULL;
uint64_t nSize = 0;
};
/**
* @brief Template class to facilitate color space conversion
*/
template <typename T>
class YuvConverter {
public:
YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
}
~YuvConverter() { delete[] pQuad; }
void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
if (nPitch == 0) {
nPitch = nWidth;
}
// sizes of source surface plane
int nSizePlaneY = nPitch * nHeight;
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
int nSizePlaneV = nSizePlaneU;
T *puv = pFrame + nSizePlaneY;
if (nPitch == nWidth) {
memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
} else {
for (int i = 0; i < (nHeight + 1) / 2; i++) {
memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i,
((nWidth + 1) / 2) * sizeof(T));
}
}
T *pv = puv + nSizePlaneU;
for (int y = 0; y < (nHeight + 1) / 2; y++) {
for (int x = 0; x < (nWidth + 1) / 2; x++) {
puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
}
}
}
void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
if (nPitch == 0) {
nPitch = nWidth;
}
// sizes of source surface plane
int nSizePlaneY = nPitch * nHeight;
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
int nSizePlaneV = nSizePlaneU;
T *puv = pFrame + nSizePlaneY, *pu = puv, *pv = puv + nSizePlaneU;
// split chroma from interleave to planar
for (int y = 0; y < (nHeight + 1) / 2; y++) {
for (int x = 0; x < (nWidth + 1) / 2; x++) {
pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
}
}
if (nPitch == nWidth) {
memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
} else {
for (int i = 0; i < (nHeight + 1) / 2; i++) {
memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i,
((nWidth + 1) / 2) * sizeof(T));
}
}
}
private:
T *pQuad;
int nWidth, nHeight;
};
/**
* @brief Utility class to measure elapsed time in seconds between the block of
* executed code
*/
class StopWatch {
public:
void Start() { t0 = std::chrono::high_resolution_clock::now(); }
double Stop() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now().time_since_epoch() -
t0.time_since_epoch())
.count() /
1.0e9;
}
private:
std::chrono::high_resolution_clock::time_point t0;
};
template <typename T>
class ConcurrentQueue {
public:
ConcurrentQueue() {}
ConcurrentQueue(size_t size) : maxSize(size) {}
ConcurrentQueue(const ConcurrentQueue &) = delete;
ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
void setSize(size_t s) { maxSize = s; }
void push_back(const T &value) {
// Do not use a std::lock_guard here. We will need to explicitly
// unlock before notify_one as the other waiting thread will
// automatically try to acquire mutex once it wakes up
// (which will happen on notify_one)
std::unique_lock<std::mutex> lock(m_mutex);
auto wasEmpty = m_List.empty();
while (full()) {
m_cond.wait(lock);
}
m_List.push_back(value);
if (wasEmpty && !m_List.empty()) {
lock.unlock();
m_cond.notify_one();
}
}
T pop_front() {
std::unique_lock<std::mutex> lock(m_mutex);
while (m_List.empty()) {
m_cond.wait(lock);
}
auto wasFull = full();
T data = std::move(m_List.front());
m_List.pop_front();
if (wasFull && !full()) {
lock.unlock();
m_cond.notify_one();
}
return data;
}
T front() {
std::unique_lock<std::mutex> lock(m_mutex);
while (m_List.empty()) {
m_cond.wait(lock);
}
return m_List.front();
}
size_t size() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_List.size();
}
bool empty() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_List.empty();
}
void clear() {
std::unique_lock<std::mutex> lock(m_mutex);
m_List.clear();
}
private:
bool full() {
if (m_List.size() == maxSize) return true;
return false;
}
private:
std::list<T> m_List;
std::mutex m_mutex;
std::condition_variable m_cond;
size_t maxSize;
};
inline void CheckInputFile(const char *szInFilePath) {
std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
if (fpIn.fail()) {
std::ostringstream err;
err << "Unable to open input file: " << szInFilePath << std::endl;
throw std::invalid_argument(err.str());
}
}
inline void ValidateResolution(int nWidth, int nHeight) {
if (nWidth <= 0 || nHeight <= 0) {
std::ostringstream err;
err << "Please specify positive non zero resolution as -s WxH. Current "
"resolution is "
<< nWidth << "x" << nHeight << std::endl;
throw std::invalid_argument(err.str());
}
}
template <class COLOR32>
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR64>
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR32>
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
template <class COLOR64>
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 4);
template <class COLOR32>
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR64>
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight, int iMatrix = 0);
template <class COLOR32>
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR64>
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra,
int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp,
int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 4);
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016,
int nP016Pitch, int nWidth, int nHeight, int iMatrix = 4);
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch,
int nDestPitch, int nWidth, int nHeight);
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch,
int nDestPitch, int nWidth, int nHeight);
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth,
int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch,
int nSrcWidth, int nSrcHeight,
unsigned char *dpDstNv12UV = nullptr);
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth,
int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch,
int nSrcWidth, int nSrcHeight,
unsigned char *dpDstP016UV = nullptr);
void ScaleYUV420(unsigned char *dpDstY, unsigned char *dpDstU,
unsigned char *dpDstV, int nDstPitch, int nDstChromaPitch,
int nDstWidth, int nDstHeight, unsigned char *dpSrcY,
unsigned char *dpSrcU, unsigned char *dpSrcV, int nSrcPitch,
int nSrcChromaPitch, int nSrcWidth, int nSrcHeight,
bool bSemiplanar);
#ifdef __cuda_cuda_h__
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue,
CUstream_st *outputCUStream);
#endif

View File

@@ -0,0 +1,644 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#pragma once
#include <vector>
#include <string>
#include <algorithm>
#include <stdexcept>
#include <sstream>
#include <iterator>
#include <cstring>
#include <functional>
#include "../Utils/Logger.h"
extern simplelogger::Logger *logger;
#ifndef _WIN32
inline bool operator==(const GUID &guid1, const GUID &guid2) {
return !memcmp(&guid1, &guid2, sizeof(GUID));
}
inline bool operator!=(const GUID &guid1, const GUID &guid2) {
return !(guid1 == guid2);
}
#endif
/*
* Helper class for parsing generic encoder options and preparing encoder
* initialization parameters. This class also provides some utility methods
* which generate verbose descriptions of the provided set of encoder
* initialization parameters.
*/
class NvEncoderInitParam {
public:
NvEncoderInitParam(const char *szParam = "",
std::function<void(NV_ENC_INITIALIZE_PARAMS *pParams)> *pfuncInit = NULL, bool _bLowLatency = false)
: strParam(szParam), bLowLatency(_bLowLatency)
{
if (pfuncInit) {
funcInit = *pfuncInit;
}
std::transform(strParam.begin(), strParam.end(), strParam.begin(), tolower);
std::istringstream ss(strParam);
tokens = std::vector<std::string> {
std::istream_iterator<std::string>(ss),
std::istream_iterator<std::string>()
};
for (unsigned i = 0; i < tokens.size(); i++)
{
if (tokens[i] == "-codec" && ++i != tokens.size())
{
ParseString("-codec", tokens[i], vCodec, szCodecNames, &guidCodec);
continue;
}
if (tokens[i] == "-preset" && ++i != tokens.size()) {
ParseString("-preset", tokens[i], vPreset, szPresetNames, &guidPreset);
continue;
}
if (tokens[i] == "-tuninginfo" && ++i != tokens.size())
{
ParseString("-tuninginfo", tokens[i], vTuningInfo, szTuningInfoNames, &m_TuningInfo);
continue;
}
}
}
virtual ~NvEncoderInitParam() {}
virtual bool IsCodecH264() {
return GetEncodeGUID() == NV_ENC_CODEC_H264_GUID;
}
virtual bool IsCodecHEVC() {
return GetEncodeGUID() == NV_ENC_CODEC_HEVC_GUID;
}
std::string GetHelpMessage(bool bMeOnly = false, bool bUnbuffered = false, bool bHide444 = false, bool bOutputInVidMem = false)
{
std::ostringstream oss;
if (bOutputInVidMem && bMeOnly)
{
oss << "-codec Codec: " << "h264" << std::endl;
}
else
{
oss << "-codec Codec: " << szCodecNames << std::endl;
}
oss << "-preset Preset: " << szPresetNames << std::endl
<< "-profile H264: " << szH264ProfileNames;
if (bOutputInVidMem && bMeOnly)
{
oss << std::endl;
}
else
{
oss << "; HEVC: " << szHevcProfileNames << std::endl;
}
if (!bMeOnly)
{
if (bLowLatency == false)
oss << "-tuninginfo TuningInfo: " << szTuningInfoNames << std::endl;
else
oss << "-tuninginfo TuningInfo: " << szLowLatencyTuningInfoNames << std::endl;
oss << "-multipass Multipass: " << szMultipass << std::endl;
}
if (!bHide444 && !bLowLatency)
{
oss << "-444 (Only for RGB input) YUV444 encode" << std::endl;
}
if (bMeOnly) return oss.str();
oss << "-fps Frame rate" << std::endl;
if (!bUnbuffered && !bLowLatency)
{
oss << "-bf Number of consecutive B-frames" << std::endl;
}
if (!bLowLatency)
{
oss << "-rc Rate control mode: " << szRcModeNames << std::endl
<< "-gop Length of GOP (Group of Pictures)" << std::endl
<< "-bitrate Average bit rate, can be in unit of 1, K, M" << std::endl
<< "-maxbitrate Max bit rate, can be in unit of 1, K, M" << std::endl
<< "-vbvbufsize VBV buffer size in bits, can be in unit of 1, K, M" << std::endl
<< "-vbvinit VBV initial delay in bits, can be in unit of 1, K, M" << std::endl
<< "-aq Enable spatial AQ and set its stength (range 1-15, 0-auto)" << std::endl
<< "-temporalaq (No value) Enable temporal AQ" << std::endl
<< "-cq Target constant quality level for VBR mode (range 1-51, 0-auto)" << std::endl;
}
if (!bUnbuffered && !bLowLatency)
{
oss << "-lookahead Maximum depth of lookahead (range 0-(31 - number of B frames))" << std::endl;
}
oss << "-qmin Min QP value" << std::endl
<< "-qmax Max QP value" << std::endl
<< "-initqp Initial QP value" << std::endl;
if (!bLowLatency)
{
oss << "-constqp QP value for constqp rate control mode" << std::endl
<< "Note: QP value can be in the form of qp_of_P_B_I or qp_P,qp_B,qp_I (no space)" << std::endl;
}
if (bUnbuffered && !bLowLatency)
{
oss << "Note: Options -bf and -lookahead are unavailable for this app" << std::endl;
}
return oss.str();
}
/**
* @brief Generate and return a string describing the values of the main/common
* encoder initialization parameters
*/
std::string MainParamToString(const NV_ENC_INITIALIZE_PARAMS *pParams) {
std::ostringstream os;
os
<< "Encoding Parameters:"
<< std::endl << "\tcodec : " << ConvertValueToString(vCodec, szCodecNames, pParams->encodeGUID)
<< std::endl << "\tpreset : " << ConvertValueToString(vPreset, szPresetNames, pParams->presetGUID);
if (pParams->tuningInfo)
{
os << std::endl << "\ttuningInfo : " << ConvertValueToString(vTuningInfo, szTuningInfoNames, pParams->tuningInfo);
}
os
<< std::endl << "\tprofile : " << ConvertValueToString(vProfile, szProfileNames, pParams->encodeConfig->profileGUID)
<< std::endl << "\tchroma : " << ConvertValueToString(vChroma, szChromaNames, (pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? pParams->encodeConfig->encodeCodecConfig.h264Config.chromaFormatIDC : pParams->encodeConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC)
<< std::endl << "\tbitdepth : " << ((pParams->encodeGUID == NV_ENC_CODEC_H264_GUID) ? 0 : pParams->encodeConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8) + 8
<< std::endl << "\trc : " << ConvertValueToString(vRcMode, szRcModeNames, pParams->encodeConfig->rcParams.rateControlMode)
;
if (pParams->encodeConfig->rcParams.rateControlMode == NV_ENC_PARAMS_RC_CONSTQP) {
os << " (P,B,I=" << pParams->encodeConfig->rcParams.constQP.qpInterP << "," << pParams->encodeConfig->rcParams.constQP.qpInterB << "," << pParams->encodeConfig->rcParams.constQP.qpIntra << ")";
}
os
<< std::endl << "\tfps : " << pParams->frameRateNum << "/" << pParams->frameRateDen
<< std::endl << "\tgop : " << (pParams->encodeConfig->gopLength == NVENC_INFINITE_GOPLENGTH ? "INF" : std::to_string(pParams->encodeConfig->gopLength))
<< std::endl << "\tbf : " << pParams->encodeConfig->frameIntervalP - 1
<< std::endl << "\tmultipass : " << pParams->encodeConfig->rcParams.multiPass
<< std::endl << "\tsize : " << pParams->encodeWidth << "x" << pParams->encodeHeight
<< std::endl << "\tbitrate : " << pParams->encodeConfig->rcParams.averageBitRate
<< std::endl << "\tmaxbitrate : " << pParams->encodeConfig->rcParams.maxBitRate
<< std::endl << "\tvbvbufsize : " << pParams->encodeConfig->rcParams.vbvBufferSize
<< std::endl << "\tvbvinit : " << pParams->encodeConfig->rcParams.vbvInitialDelay
<< std::endl << "\taq : " << (pParams->encodeConfig->rcParams.enableAQ ? (pParams->encodeConfig->rcParams.aqStrength ? std::to_string(pParams->encodeConfig->rcParams.aqStrength) : "auto") : "disabled")
<< std::endl << "\ttemporalaq : " << (pParams->encodeConfig->rcParams.enableTemporalAQ ? "enabled" : "disabled")
<< std::endl << "\tlookahead : " << (pParams->encodeConfig->rcParams.enableLookahead ? std::to_string(pParams->encodeConfig->rcParams.lookaheadDepth) : "disabled")
<< std::endl << "\tcq : " << (unsigned int)pParams->encodeConfig->rcParams.targetQuality
<< std::endl << "\tqmin : P,B,I=" << (int)pParams->encodeConfig->rcParams.minQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.minQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.minQP.qpIntra
<< std::endl << "\tqmax : P,B,I=" << (int)pParams->encodeConfig->rcParams.maxQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.maxQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.maxQP.qpIntra
<< std::endl << "\tinitqp : P,B,I=" << (int)pParams->encodeConfig->rcParams.initialRCQP.qpInterP << "," << (int)pParams->encodeConfig->rcParams.initialRCQP.qpInterB << "," << (int)pParams->encodeConfig->rcParams.initialRCQP.qpIntra
;
return os.str();
}
public:
virtual GUID GetEncodeGUID() { return guidCodec; }
virtual GUID GetPresetGUID() { return guidPreset; }
virtual NV_ENC_TUNING_INFO GetTuningInfo() { return m_TuningInfo; }
/*
* @brief Set encoder initialization parameters based on input options
* This method parses the tokens formed from the command line options
* provided to the application and sets the fields from NV_ENC_INITIALIZE_PARAMS
* based on the supplied values.
*/
virtual void SetInitParams(NV_ENC_INITIALIZE_PARAMS *pParams, NV_ENC_BUFFER_FORMAT eBufferFormat)
{
NV_ENC_CONFIG &config = *pParams->encodeConfig;
for (unsigned i = 0; i < tokens.size(); i++)
{
if (
tokens[i] == "-codec" && ++i ||
tokens[i] == "-preset" && ++i ||
tokens[i] == "-tuninginfo" && ++i ||
tokens[i] == "-multipass" && ++i != tokens.size() && ParseString("-multipass", tokens[i], vMultiPass, szMultipass, &config.rcParams.multiPass) ||
tokens[i] == "-profile" && ++i != tokens.size() && (IsCodecH264() ?
ParseString("-profile", tokens[i], vH264Profile, szH264ProfileNames, &config.profileGUID) :
ParseString("-profile", tokens[i], vHevcProfile, szHevcProfileNames, &config.profileGUID)) ||
tokens[i] == "-rc" && ++i != tokens.size() && ParseString("-rc", tokens[i], vRcMode, szRcModeNames, &config.rcParams.rateControlMode) ||
tokens[i] == "-fps" && ++i != tokens.size() && ParseInt("-fps", tokens[i], &pParams->frameRateNum) ||
tokens[i] == "-bf" && ++i != tokens.size() && ParseInt("-bf", tokens[i], &config.frameIntervalP) && ++config.frameIntervalP ||
tokens[i] == "-bitrate" && ++i != tokens.size() && ParseBitRate("-bitrate", tokens[i], &config.rcParams.averageBitRate) ||
tokens[i] == "-maxbitrate" && ++i != tokens.size() && ParseBitRate("-maxbitrate", tokens[i], &config.rcParams.maxBitRate) ||
tokens[i] == "-vbvbufsize" && ++i != tokens.size() && ParseBitRate("-vbvbufsize", tokens[i], &config.rcParams.vbvBufferSize) ||
tokens[i] == "-vbvinit" && ++i != tokens.size() && ParseBitRate("-vbvinit", tokens[i], &config.rcParams.vbvInitialDelay) ||
tokens[i] == "-cq" && ++i != tokens.size() && ParseInt("-cq", tokens[i], &config.rcParams.targetQuality) ||
tokens[i] == "-initqp" && ++i != tokens.size() && ParseQp("-initqp", tokens[i], &config.rcParams.initialRCQP) && (config.rcParams.enableInitialRCQP = true) ||
tokens[i] == "-qmin" && ++i != tokens.size() && ParseQp("-qmin", tokens[i], &config.rcParams.minQP) && (config.rcParams.enableMinQP = true) ||
tokens[i] == "-qmax" && ++i != tokens.size() && ParseQp("-qmax", tokens[i], &config.rcParams.maxQP) && (config.rcParams.enableMaxQP = true) ||
tokens[i] == "-constqp" && ++i != tokens.size() && ParseQp("-constqp", tokens[i], &config.rcParams.constQP) ||
tokens[i] == "-temporalaq" && (config.rcParams.enableTemporalAQ = true)
)
{
continue;
}
if (tokens[i] == "-lookahead" && ++i != tokens.size() && ParseInt("-lookahead", tokens[i], &config.rcParams.lookaheadDepth))
{
config.rcParams.enableLookahead = config.rcParams.lookaheadDepth > 0;
continue;
}
int aqStrength;
if (tokens[i] == "-aq" && ++i != tokens.size() && ParseInt("-aq", tokens[i], &aqStrength)) {
config.rcParams.enableAQ = true;
config.rcParams.aqStrength = aqStrength;
continue;
}
if (tokens[i] == "-gop" && ++i != tokens.size() && ParseInt("-gop", tokens[i], &config.gopLength))
{
if (IsCodecH264())
{
config.encodeCodecConfig.h264Config.idrPeriod = config.gopLength;
}
else
{
config.encodeCodecConfig.hevcConfig.idrPeriod = config.gopLength;
}
continue;
}
if (tokens[i] == "-444")
{
if (IsCodecH264())
{
config.encodeCodecConfig.h264Config.chromaFormatIDC = 3;
} else
{
config.encodeCodecConfig.hevcConfig.chromaFormatIDC = 3;
}
continue;
}
std::ostringstream errmessage;
errmessage << "Incorrect parameter: " << tokens[i] << std::endl;
errmessage << "Re-run the application with the -h option to get a list of the supported options.";
errmessage << std::endl;
throw std::invalid_argument(errmessage.str());
}
if (IsCodecHEVC())
{
if (eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT)
{
config.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2;
}
}
funcInit(pParams);
LOG(INFO) << NvEncoderInitParam().MainParamToString(pParams);
LOG(TRACE) << NvEncoderInitParam().FullParamToString(pParams);
}
private:
/*
* Helper methods for parsing tokens (generated by splitting the command line)
* and performing conversions to the appropriate target type/value.
*/
template<typename T>
bool ParseString(const std::string &strName, const std::string &strValue, const std::vector<T> &vValue, const std::string &strValueNames, T *pValue) {
std::vector<std::string> vstrValueName = split(strValueNames, ' ');
auto it = std::find(vstrValueName.begin(), vstrValueName.end(), strValue);
if (it == vstrValueName.end()) {
LOG(ERROR) << strName << " options: " << strValueNames;
return false;
}
*pValue = vValue[it - vstrValueName.begin()];
return true;
}
template<typename T>
std::string ConvertValueToString(const std::vector<T> &vValue, const std::string &strValueNames, T value) {
auto it = std::find(vValue.begin(), vValue.end(), value);
if (it == vValue.end()) {
LOG(ERROR) << "Invalid value. Can't convert to one of " << strValueNames;
return std::string();
}
return split(strValueNames, ' ')[it - vValue.begin()];
}
bool ParseBitRate(const std::string &strName, const std::string &strValue, unsigned *pBitRate) {
try {
size_t l;
double r = std::stod(strValue, &l);
char c = strValue[l];
if (c != 0 && c != 'k' && c != 'm') {
LOG(ERROR) << strName << " units: 1, K, M (lower case also allowed)";
}
*pBitRate = (unsigned)((c == 'm' ? 1000000 : (c == 'k' ? 1000 : 1)) * r);
} catch (std::invalid_argument) {
return false;
}
return true;
}
template<typename T>
bool ParseInt(const std::string &strName, const std::string &strValue, T *pInt) {
try {
*pInt = std::stoi(strValue);
} catch (std::invalid_argument) {
LOG(ERROR) << strName << " need a value of positive number";
return false;
}
return true;
}
bool ParseQp(const std::string &strName, const std::string &strValue, NV_ENC_QP *pQp) {
std::vector<std::string> vQp = split(strValue, ',');
try {
if (vQp.size() == 1) {
unsigned qp = (unsigned)std::stoi(vQp[0]);
*pQp = {qp, qp, qp};
} else if (vQp.size() == 3) {
*pQp = {(unsigned)std::stoi(vQp[0]), (unsigned)std::stoi(vQp[1]), (unsigned)std::stoi(vQp[2])};
} else {
LOG(ERROR) << strName << " qp_for_P_B_I or qp_P,qp_B,qp_I (no space is allowed)";
return false;
}
} catch (std::invalid_argument) {
return false;
}
return true;
}
std::vector<std::string> split(const std::string &s, char delim) {
std::stringstream ss(s);
std::string token;
std::vector<std::string> tokens;
while (getline(ss, token, delim)) {
tokens.push_back(token);
}
return tokens;
}
private:
std::string strParam;
std::function<void(NV_ENC_INITIALIZE_PARAMS *pParams)> funcInit = [](NV_ENC_INITIALIZE_PARAMS *pParams){};
std::vector<std::string> tokens;
GUID guidCodec = NV_ENC_CODEC_H264_GUID;
GUID guidPreset = NV_ENC_PRESET_P3_GUID;
NV_ENC_TUNING_INFO m_TuningInfo = NV_ENC_TUNING_INFO_HIGH_QUALITY;
bool bLowLatency = false;
const char *szCodecNames = "h264 hevc";
std::vector<GUID> vCodec = std::vector<GUID> {
NV_ENC_CODEC_H264_GUID,
NV_ENC_CODEC_HEVC_GUID
};
const char *szChromaNames = "yuv420 yuv444";
std::vector<uint32_t> vChroma = std::vector<uint32_t>
{
1, 3
};
const char *szPresetNames = "p1 p2 p3 p4 p5 p6 p7";
std::vector<GUID> vPreset = std::vector<GUID> {
NV_ENC_PRESET_P1_GUID,
NV_ENC_PRESET_P2_GUID,
NV_ENC_PRESET_P3_GUID,
NV_ENC_PRESET_P4_GUID,
NV_ENC_PRESET_P5_GUID,
NV_ENC_PRESET_P6_GUID,
NV_ENC_PRESET_P7_GUID,
};
const char *szH264ProfileNames = "baseline main high high444";
std::vector<GUID> vH264Profile = std::vector<GUID> {
NV_ENC_H264_PROFILE_BASELINE_GUID,
NV_ENC_H264_PROFILE_MAIN_GUID,
NV_ENC_H264_PROFILE_HIGH_GUID,
NV_ENC_H264_PROFILE_HIGH_444_GUID,
};
const char *szHevcProfileNames = "main main10 frext";
std::vector<GUID> vHevcProfile = std::vector<GUID> {
NV_ENC_HEVC_PROFILE_MAIN_GUID,
NV_ENC_HEVC_PROFILE_MAIN10_GUID,
NV_ENC_HEVC_PROFILE_FREXT_GUID,
};
const char *szProfileNames = "(default) auto baseline(h264) main(h264) high(h264) high444(h264)"
" stereo(h264) progressiv_high(h264) constrained_high(h264)"
" main(hevc) main10(hevc) frext(hevc)";
std::vector<GUID> vProfile = std::vector<GUID> {
GUID{},
NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID,
NV_ENC_H264_PROFILE_BASELINE_GUID,
NV_ENC_H264_PROFILE_MAIN_GUID,
NV_ENC_H264_PROFILE_HIGH_GUID,
NV_ENC_H264_PROFILE_HIGH_444_GUID,
NV_ENC_H264_PROFILE_STEREO_GUID,
NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID,
NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID,
NV_ENC_HEVC_PROFILE_MAIN_GUID,
NV_ENC_HEVC_PROFILE_MAIN10_GUID,
NV_ENC_HEVC_PROFILE_FREXT_GUID,
};
const char *szLowLatencyTuningInfoNames = "lowlatency ultralowlatency";
const char *szTuningInfoNames = "hq lowlatency ultralowlatency lossless";
std::vector<NV_ENC_TUNING_INFO> vTuningInfo = std::vector<NV_ENC_TUNING_INFO>{
NV_ENC_TUNING_INFO_HIGH_QUALITY,
NV_ENC_TUNING_INFO_LOW_LATENCY,
NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY,
NV_ENC_TUNING_INFO_LOSSLESS
};
const char *szRcModeNames = "constqp vbr cbr";
std::vector<NV_ENC_PARAMS_RC_MODE> vRcMode = std::vector<NV_ENC_PARAMS_RC_MODE> {
NV_ENC_PARAMS_RC_CONSTQP,
NV_ENC_PARAMS_RC_VBR,
NV_ENC_PARAMS_RC_CBR,
};
const char *szMultipass = "disabled qres fullres";
std::vector<NV_ENC_MULTI_PASS> vMultiPass = std::vector<NV_ENC_MULTI_PASS>{
NV_ENC_MULTI_PASS_DISABLED,
NV_ENC_TWO_PASS_QUARTER_RESOLUTION,
NV_ENC_TWO_PASS_FULL_RESOLUTION,
};
const char *szQpMapModeNames = "disabled emphasis_level_map delta_qp_map qp_map";
std::vector<NV_ENC_QP_MAP_MODE> vQpMapMode = std::vector<NV_ENC_QP_MAP_MODE> {
NV_ENC_QP_MAP_DISABLED,
NV_ENC_QP_MAP_EMPHASIS,
NV_ENC_QP_MAP_DELTA,
NV_ENC_QP_MAP,
};
public:
/*
* Generates and returns a string describing the values for each field in
* the NV_ENC_INITIALIZE_PARAMS structure (i.e. a description of the entire
* set of initialization parameters supplied to the API).
*/
std::string FullParamToString(const NV_ENC_INITIALIZE_PARAMS *pInitializeParams) {
std::ostringstream os;
os << "NV_ENC_INITIALIZE_PARAMS:" << std::endl
<< "encodeGUID: " << ConvertValueToString(vCodec, szCodecNames, pInitializeParams->encodeGUID) << std::endl
<< "presetGUID: " << ConvertValueToString(vPreset, szPresetNames, pInitializeParams->presetGUID) << std::endl;
if (pInitializeParams->tuningInfo)
{
os << "tuningInfo: " << ConvertValueToString(vTuningInfo, szTuningInfoNames, pInitializeParams->tuningInfo) << std::endl;
}
os
<< "encodeWidth: " << pInitializeParams->encodeWidth << std::endl
<< "encodeHeight: " << pInitializeParams->encodeHeight << std::endl
<< "darWidth: " << pInitializeParams->darWidth << std::endl
<< "darHeight: " << pInitializeParams->darHeight << std::endl
<< "frameRateNum: " << pInitializeParams->frameRateNum << std::endl
<< "frameRateDen: " << pInitializeParams->frameRateDen << std::endl
<< "enableEncodeAsync: " << pInitializeParams->enableEncodeAsync << std::endl
<< "reportSliceOffsets: " << pInitializeParams->reportSliceOffsets << std::endl
<< "enableSubFrameWrite: " << pInitializeParams->enableSubFrameWrite << std::endl
<< "enableExternalMEHints: " << pInitializeParams->enableExternalMEHints << std::endl
<< "enableMEOnlyMode: " << pInitializeParams->enableMEOnlyMode << std::endl
<< "enableWeightedPrediction: " << pInitializeParams->enableWeightedPrediction << std::endl
<< "maxEncodeWidth: " << pInitializeParams->maxEncodeWidth << std::endl
<< "maxEncodeHeight: " << pInitializeParams->maxEncodeHeight << std::endl
<< "maxMEHintCountsPerBlock: " << pInitializeParams->maxMEHintCountsPerBlock << std::endl
;
NV_ENC_CONFIG *pConfig = pInitializeParams->encodeConfig;
os << "NV_ENC_CONFIG:" << std::endl
<< "profile: " << ConvertValueToString(vProfile, szProfileNames, pConfig->profileGUID) << std::endl
<< "gopLength: " << pConfig->gopLength << std::endl
<< "frameIntervalP: " << pConfig->frameIntervalP << std::endl
<< "monoChromeEncoding: " << pConfig->monoChromeEncoding << std::endl
<< "frameFieldMode: " << pConfig->frameFieldMode << std::endl
<< "mvPrecision: " << pConfig->mvPrecision << std::endl
<< "NV_ENC_RC_PARAMS:" << std::endl
<< " rateControlMode: 0x" << std::hex << pConfig->rcParams.rateControlMode << std::dec << std::endl
<< " constQP: " << pConfig->rcParams.constQP.qpInterP << ", " << pConfig->rcParams.constQP.qpInterB << ", " << pConfig->rcParams.constQP.qpIntra << std::endl
<< " averageBitRate: " << pConfig->rcParams.averageBitRate << std::endl
<< " maxBitRate: " << pConfig->rcParams.maxBitRate << std::endl
<< " vbvBufferSize: " << pConfig->rcParams.vbvBufferSize << std::endl
<< " vbvInitialDelay: " << pConfig->rcParams.vbvInitialDelay << std::endl
<< " enableMinQP: " << pConfig->rcParams.enableMinQP << std::endl
<< " enableMaxQP: " << pConfig->rcParams.enableMaxQP << std::endl
<< " enableInitialRCQP: " << pConfig->rcParams.enableInitialRCQP << std::endl
<< " enableAQ: " << pConfig->rcParams.enableAQ << std::endl
<< " qpMapMode: " << ConvertValueToString(vQpMapMode, szQpMapModeNames, pConfig->rcParams.qpMapMode) << std::endl
<< " multipass: " << ConvertValueToString(vMultiPass, szMultipass, pConfig->rcParams.multiPass) << std::endl
<< " enableLookahead: " << pConfig->rcParams.enableLookahead << std::endl
<< " disableIadapt: " << pConfig->rcParams.disableIadapt << std::endl
<< " disableBadapt: " << pConfig->rcParams.disableBadapt << std::endl
<< " enableTemporalAQ: " << pConfig->rcParams.enableTemporalAQ << std::endl
<< " zeroReorderDelay: " << pConfig->rcParams.zeroReorderDelay << std::endl
<< " enableNonRefP: " << pConfig->rcParams.enableNonRefP << std::endl
<< " strictGOPTarget: " << pConfig->rcParams.strictGOPTarget << std::endl
<< " aqStrength: " << pConfig->rcParams.aqStrength << std::endl
<< " minQP: " << pConfig->rcParams.minQP.qpInterP << ", " << pConfig->rcParams.minQP.qpInterB << ", " << pConfig->rcParams.minQP.qpIntra << std::endl
<< " maxQP: " << pConfig->rcParams.maxQP.qpInterP << ", " << pConfig->rcParams.maxQP.qpInterB << ", " << pConfig->rcParams.maxQP.qpIntra << std::endl
<< " initialRCQP: " << pConfig->rcParams.initialRCQP.qpInterP << ", " << pConfig->rcParams.initialRCQP.qpInterB << ", " << pConfig->rcParams.initialRCQP.qpIntra << std::endl
<< " temporallayerIdxMask: " << pConfig->rcParams.temporallayerIdxMask << std::endl
<< " temporalLayerQP: " << (int)pConfig->rcParams.temporalLayerQP[0] << ", " << (int)pConfig->rcParams.temporalLayerQP[1] << ", " << (int)pConfig->rcParams.temporalLayerQP[2] << ", " << (int)pConfig->rcParams.temporalLayerQP[3] << ", " << (int)pConfig->rcParams.temporalLayerQP[4] << ", " << (int)pConfig->rcParams.temporalLayerQP[5] << ", " << (int)pConfig->rcParams.temporalLayerQP[6] << ", " << (int)pConfig->rcParams.temporalLayerQP[7] << std::endl
<< " targetQuality: " << pConfig->rcParams.targetQuality << std::endl
<< " lookaheadDepth: " << pConfig->rcParams.lookaheadDepth << std::endl;
if (pInitializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID) {
os
<< "NV_ENC_CODEC_CONFIG (H264):" << std::endl
<< " enableStereoMVC: " << pConfig->encodeCodecConfig.h264Config.enableStereoMVC << std::endl
<< " hierarchicalPFrames: " << pConfig->encodeCodecConfig.h264Config.hierarchicalPFrames << std::endl
<< " hierarchicalBFrames: " << pConfig->encodeCodecConfig.h264Config.hierarchicalBFrames << std::endl
<< " outputBufferingPeriodSEI: " << pConfig->encodeCodecConfig.h264Config.outputBufferingPeriodSEI << std::endl
<< " outputPictureTimingSEI: " << pConfig->encodeCodecConfig.h264Config.outputPictureTimingSEI << std::endl
<< " outputAUD: " << pConfig->encodeCodecConfig.h264Config.outputAUD << std::endl
<< " disableSPSPPS: " << pConfig->encodeCodecConfig.h264Config.disableSPSPPS << std::endl
<< " outputFramePackingSEI: " << pConfig->encodeCodecConfig.h264Config.outputFramePackingSEI << std::endl
<< " outputRecoveryPointSEI: " << pConfig->encodeCodecConfig.h264Config.outputRecoveryPointSEI << std::endl
<< " enableIntraRefresh: " << pConfig->encodeCodecConfig.h264Config.enableIntraRefresh << std::endl
<< " enableConstrainedEncoding: " << pConfig->encodeCodecConfig.h264Config.enableConstrainedEncoding << std::endl
<< " repeatSPSPPS: " << pConfig->encodeCodecConfig.h264Config.repeatSPSPPS << std::endl
<< " enableVFR: " << pConfig->encodeCodecConfig.h264Config.enableVFR << std::endl
<< " enableLTR: " << pConfig->encodeCodecConfig.h264Config.enableLTR << std::endl
<< " qpPrimeYZeroTransformBypassFlag: " << pConfig->encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag << std::endl
<< " useConstrainedIntraPred: " << pConfig->encodeCodecConfig.h264Config.useConstrainedIntraPred << std::endl
<< " level: " << pConfig->encodeCodecConfig.h264Config.level << std::endl
<< " idrPeriod: " << pConfig->encodeCodecConfig.h264Config.idrPeriod << std::endl
<< " separateColourPlaneFlag: " << pConfig->encodeCodecConfig.h264Config.separateColourPlaneFlag << std::endl
<< " disableDeblockingFilterIDC: " << pConfig->encodeCodecConfig.h264Config.disableDeblockingFilterIDC << std::endl
<< " numTemporalLayers: " << pConfig->encodeCodecConfig.h264Config.numTemporalLayers << std::endl
<< " spsId: " << pConfig->encodeCodecConfig.h264Config.spsId << std::endl
<< " ppsId: " << pConfig->encodeCodecConfig.h264Config.ppsId << std::endl
<< " adaptiveTransformMode: " << pConfig->encodeCodecConfig.h264Config.adaptiveTransformMode << std::endl
<< " fmoMode: " << pConfig->encodeCodecConfig.h264Config.fmoMode << std::endl
<< " bdirectMode: " << pConfig->encodeCodecConfig.h264Config.bdirectMode << std::endl
<< " entropyCodingMode: " << pConfig->encodeCodecConfig.h264Config.entropyCodingMode << std::endl
<< " stereoMode: " << pConfig->encodeCodecConfig.h264Config.stereoMode << std::endl
<< " intraRefreshPeriod: " << pConfig->encodeCodecConfig.h264Config.intraRefreshPeriod << std::endl
<< " intraRefreshCnt: " << pConfig->encodeCodecConfig.h264Config.intraRefreshCnt << std::endl
<< " maxNumRefFrames: " << pConfig->encodeCodecConfig.h264Config.maxNumRefFrames << std::endl
<< " sliceMode: " << pConfig->encodeCodecConfig.h264Config.sliceMode << std::endl
<< " sliceModeData: " << pConfig->encodeCodecConfig.h264Config.sliceModeData << std::endl
<< " NV_ENC_CONFIG_H264_VUI_PARAMETERS:" << std::endl
<< " overscanInfoPresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.overscanInfoPresentFlag << std::endl
<< " overscanInfo: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.overscanInfo << std::endl
<< " videoSignalTypePresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag << std::endl
<< " videoFormat: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoFormat << std::endl
<< " videoFullRangeFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag << std::endl
<< " colourDescriptionPresentFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag << std::endl
<< " colourPrimaries: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries << std::endl
<< " transferCharacteristics: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics << std::endl
<< " colourMatrix: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix << std::endl
<< " chromaSampleLocationFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationFlag << std::endl
<< " chromaSampleLocationTop: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationTop << std::endl
<< " chromaSampleLocationBot: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.chromaSampleLocationBot << std::endl
<< " bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.h264Config.h264VUIParameters.bitstreamRestrictionFlag << std::endl
<< " ltrNumFrames: " << pConfig->encodeCodecConfig.h264Config.ltrNumFrames << std::endl
<< " ltrTrustMode: " << pConfig->encodeCodecConfig.h264Config.ltrTrustMode << std::endl
<< " chromaFormatIDC: " << pConfig->encodeCodecConfig.h264Config.chromaFormatIDC << std::endl
<< " maxTemporalLayers: " << pConfig->encodeCodecConfig.h264Config.maxTemporalLayers << std::endl;
} else if (pInitializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) {
os
<< "NV_ENC_CODEC_CONFIG (HEVC):" << std::endl
<< " level: " << pConfig->encodeCodecConfig.hevcConfig.level << std::endl
<< " tier: " << pConfig->encodeCodecConfig.hevcConfig.tier << std::endl
<< " minCUSize: " << pConfig->encodeCodecConfig.hevcConfig.minCUSize << std::endl
<< " maxCUSize: " << pConfig->encodeCodecConfig.hevcConfig.maxCUSize << std::endl
<< " useConstrainedIntraPred: " << pConfig->encodeCodecConfig.hevcConfig.useConstrainedIntraPred << std::endl
<< " disableDeblockAcrossSliceBoundary: " << pConfig->encodeCodecConfig.hevcConfig.disableDeblockAcrossSliceBoundary << std::endl
<< " outputBufferingPeriodSEI: " << pConfig->encodeCodecConfig.hevcConfig.outputBufferingPeriodSEI << std::endl
<< " outputPictureTimingSEI: " << pConfig->encodeCodecConfig.hevcConfig.outputPictureTimingSEI << std::endl
<< " outputAUD: " << pConfig->encodeCodecConfig.hevcConfig.outputAUD << std::endl
<< " enableLTR: " << pConfig->encodeCodecConfig.hevcConfig.enableLTR << std::endl
<< " disableSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.disableSPSPPS << std::endl
<< " repeatSPSPPS: " << pConfig->encodeCodecConfig.hevcConfig.repeatSPSPPS << std::endl
<< " enableIntraRefresh: " << pConfig->encodeCodecConfig.hevcConfig.enableIntraRefresh << std::endl
<< " chromaFormatIDC: " << pConfig->encodeCodecConfig.hevcConfig.chromaFormatIDC << std::endl
<< " pixelBitDepthMinus8: " << pConfig->encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 << std::endl
<< " idrPeriod: " << pConfig->encodeCodecConfig.hevcConfig.idrPeriod << std::endl
<< " intraRefreshPeriod: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshPeriod << std::endl
<< " intraRefreshCnt: " << pConfig->encodeCodecConfig.hevcConfig.intraRefreshCnt << std::endl
<< " maxNumRefFramesInDPB: " << pConfig->encodeCodecConfig.hevcConfig.maxNumRefFramesInDPB << std::endl
<< " ltrNumFrames: " << pConfig->encodeCodecConfig.hevcConfig.ltrNumFrames << std::endl
<< " vpsId: " << pConfig->encodeCodecConfig.hevcConfig.vpsId << std::endl
<< " spsId: " << pConfig->encodeCodecConfig.hevcConfig.spsId << std::endl
<< " ppsId: " << pConfig->encodeCodecConfig.hevcConfig.ppsId << std::endl
<< " sliceMode: " << pConfig->encodeCodecConfig.hevcConfig.sliceMode << std::endl
<< " sliceModeData: " << pConfig->encodeCodecConfig.hevcConfig.sliceModeData << std::endl
<< " maxTemporalLayersMinus1: " << pConfig->encodeCodecConfig.hevcConfig.maxTemporalLayersMinus1 << std::endl
<< " NV_ENC_CONFIG_HEVC_VUI_PARAMETERS:" << std::endl
<< " overscanInfoPresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.overscanInfoPresentFlag << std::endl
<< " overscanInfo: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.overscanInfo << std::endl
<< " videoSignalTypePresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoSignalTypePresentFlag << std::endl
<< " videoFormat: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFormat << std::endl
<< " videoFullRangeFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.videoFullRangeFlag << std::endl
<< " colourDescriptionPresentFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourDescriptionPresentFlag << std::endl
<< " colourPrimaries: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourPrimaries << std::endl
<< " transferCharacteristics: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.transferCharacteristics << std::endl
<< " colourMatrix: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.colourMatrix << std::endl
<< " chromaSampleLocationFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationFlag << std::endl
<< " chromaSampleLocationTop: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationTop << std::endl
<< " chromaSampleLocationBot: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.chromaSampleLocationBot << std::endl
<< " bitstreamRestrictionFlag: " << pConfig->encodeCodecConfig.hevcConfig.hevcVUIParameters.bitstreamRestrictionFlag << std::endl
<< " ltrTrustMode: " << pConfig->encodeCodecConfig.hevcConfig.ltrTrustMode << std::endl;
}
return os.str();
}
};

View File

@@ -0,0 +1,192 @@
/*
* Copyright 2017-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include "NvCodecUtils.h"
template<typename YuvUnitx2>
static __global__ void Resize(cudaTextureObject_t texY, cudaTextureObject_t texUv,
uint8_t *pDst, uint8_t *pDstUV, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int ix = blockIdx.x * blockDim.x + threadIdx.x,
iy = blockIdx.y * blockDim.y + threadIdx.y;
if (ix >= nWidth / 2 || iy >= nHeight / 2) {
return;
}
int x = ix * 2, y = iy * 2;
typedef decltype(YuvUnitx2::x) YuvUnit;
const int MAX = (1 << (sizeof(YuvUnit) * 8)) - 1;
*(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
(YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
(YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
};
y++;
*(YuvUnitx2 *)(pDst + y * nPitch + x * sizeof(YuvUnit)) = YuvUnitx2 {
(YuvUnit)(tex2D<float>(texY, x / fxScale, y / fyScale) * MAX),
(YuvUnit)(tex2D<float>(texY, (x + 1) / fxScale, y / fyScale) * MAX)
};
float2 uv = tex2D<float2>(texUv, ix / fxScale, (nHeight + iy) / fyScale + 0.5f);
*(YuvUnitx2 *)(pDstUV + iy * nPitch + ix * 2 * sizeof(YuvUnit)) = YuvUnitx2{ (YuvUnit)(uv.x * MAX), (YuvUnit)(uv.y * MAX) };
}
template <typename YuvUnitx2>
static void Resize(unsigned char *dpDst, unsigned char* dpDstUV, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight) {
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = dpSrc;
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<decltype(YuvUnitx2::x)>();
resDesc.res.pitch2D.width = nSrcWidth;
resDesc.res.pitch2D.height = nSrcHeight;
resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
cudaTextureDesc texDesc = {};
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeNormalizedFloat;
cudaTextureObject_t texY=0;
ck(cudaCreateTextureObject(&texY, &resDesc, &texDesc, NULL));
resDesc.res.pitch2D.desc = cudaCreateChannelDesc<YuvUnitx2>();
resDesc.res.pitch2D.width = nSrcWidth / 2;
resDesc.res.pitch2D.height = nSrcHeight * 3 / 2;
cudaTextureObject_t texUv=0;
ck(cudaCreateTextureObject(&texUv, &resDesc, &texDesc, NULL));
Resize<YuvUnitx2> << <dim3((nDstWidth + 31) / 32, (nDstHeight + 31) / 32), dim3(16, 16) >> >(texY, texUv, dpDst, dpDstUV,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nDstWidth / nSrcWidth, 1.0f * nDstHeight / nSrcHeight);
ck(cudaDestroyTextureObject(texY));
ck(cudaDestroyTextureObject(texUv));
}
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstNv12UV)
{
unsigned char* dpDstUV = dpDstNv12UV ? dpDstNv12UV : dpDstNv12 + (nDstPitch*nDstHeight);
return Resize<uchar2>(dpDstNv12, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcNv12, nSrcPitch, nSrcWidth, nSrcHeight);
}
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016, int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char* dpDstP016UV)
{
unsigned char* dpDstUV = dpDstP016UV ? dpDstP016UV : dpDstP016 + (nDstPitch*nDstHeight);
return Resize<ushort2>(dpDstP016, dpDstUV, nDstPitch, nDstWidth, nDstHeight, dpSrcP016, nSrcPitch, nSrcWidth, nSrcHeight);
}
static __global__ void Scale(cudaTextureObject_t texSrc,
uint8_t *pDst, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
*(unsigned char*)(pDst + (y * nPitch) + x) = (unsigned char)(fminf((tex2D<float>(texSrc, x * fxScale, y * fyScale)) * 255.0f, 255.0f));
}
static __global__ void Scale_uv(cudaTextureObject_t texSrc,
uint8_t *pDst, int nPitch, int nWidth, int nHeight,
float fxScale, float fyScale)
{
int x = blockIdx.x * blockDim.x + threadIdx.x,
y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= nWidth || y >= nHeight)
{
return;
}
float2 uv = tex2D<float2>(texSrc, x * fxScale, y * fyScale);
uchar2 uvOut = uchar2{ (unsigned char)(fminf(uv.x * 255.0f, 255.0f)), (unsigned char)(fminf(uv.y * 255.0f, 255.0f)) };
*(uchar2*)(pDst + (y * nPitch) + 2 * x) = uvOut;
}
void ScaleKernelLaunch(unsigned char *dpDst, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrc, int nSrcPitch, int nSrcWidth, int nSrcHeight, bool bUVPlane = false)
{
cudaResourceDesc resDesc = {};
resDesc.resType = cudaResourceTypePitch2D;
resDesc.res.pitch2D.devPtr = dpSrc;
resDesc.res.pitch2D.desc = bUVPlane ? cudaCreateChannelDesc<uchar2>() : cudaCreateChannelDesc<unsigned char>();
resDesc.res.pitch2D.width = nSrcWidth;
resDesc.res.pitch2D.height = nSrcHeight;
resDesc.res.pitch2D.pitchInBytes = nSrcPitch;
cudaTextureDesc texDesc = {};
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeNormalizedFloat;
texDesc.addressMode[0] = cudaAddressModeClamp;
texDesc.addressMode[1] = cudaAddressModeClamp;
texDesc.addressMode[2] = cudaAddressModeClamp;
cudaTextureObject_t texSrc = 0;
ck(cudaCreateTextureObject(&texSrc, &resDesc, &texDesc, NULL));
dim3 blockSize(16, 16, 1);
dim3 gridSize(((uint32_t)nDstWidth + blockSize.x - 1) / blockSize.x, ((uint32_t)nDstHeight + blockSize.y - 1) / blockSize.y, 1);
if (bUVPlane)
{
Scale_uv << <gridSize, blockSize >> >(texSrc, dpDst,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
}
else
{
Scale << <gridSize, blockSize >> >(texSrc, dpDst,
nDstPitch, nDstWidth, nDstHeight, 1.0f * nSrcWidth / nDstWidth, 1.0f * nSrcHeight / nDstHeight);
}
ck(cudaGetLastError());
ck(cudaDestroyTextureObject(texSrc));
}
void ScaleYUV420(unsigned char *dpDstY,
unsigned char* dpDstU,
unsigned char* dpDstV,
int nDstPitch,
int nDstChromaPitch,
int nDstWidth,
int nDstHeight,
unsigned char *dpSrcY,
unsigned char* dpSrcU,
unsigned char* dpSrcV,
int nSrcPitch,
int nSrcChromaPitch,
int nSrcWidth,
int nSrcHeight,
bool bSemiplanar)
{
int chromaWidthDst = (nDstWidth + 1) / 2;
int chromaHeightDst = (nDstHeight + 1) / 2;
int chromaWidthSrc = (nSrcWidth + 1) / 2;
int chromaHeightSrc = (nSrcHeight + 1) / 2;
ScaleKernelLaunch(dpDstY, nDstPitch, nDstWidth, nDstHeight, dpSrcY, nSrcPitch, nSrcWidth, nSrcHeight);
if (bSemiplanar)
{
ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc, true);
}
else
{
ScaleKernelLaunch(dpDstU, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcU, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
ScaleKernelLaunch(dpDstV, nDstChromaPitch, chromaWidthDst, chromaHeightDst, dpSrcV, nSrcChromaPitch, chromaWidthSrc, chromaHeightSrc);
}
}

126
thirdparty/nvcodec/Samples/Utils/crc.cu vendored Normal file
View File

@@ -0,0 +1,126 @@
/*
* Copyright 2018-2020 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#include <cuda_runtime.h>
#include "NvCodecUtils.h"
/*
* CRC32 lookup table
* Generated by the following routine
* int i, j;
* U032 crc;
* for (i = 0; i < 256; i++)
* {
* crc = i;
* for (j = 0; j < 8; j++) { // 8 reduction
* crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320L : 0);
* }
* Crc32Table[i] = crc;
* }
*/
__device__ __constant__ uint32_t Crc32Table[256] = {
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};
typedef struct _NV_ENC_ENCODE_OUT_PARAMS
{
uint32_t version; /**< [out]: Struct version. */
uint32_t bitstreamSizeInBytes; /**< [out]: Encoded bitstream size in bytes */
uint32_t cycleCount; /**< [out]: Cycle count */
uint32_t firstPassCycleCount; /**< [out]: First pass cycle count */
uint32_t reserved[60]; /**< [out]: Reserved and must be set to 0 */
} NV_ENC_ENCODE_OUT_PARAMS;
static __global__ void ComputeCRCKernel(uint8_t *pBuffer, uint32_t *crcValue)
{
NV_ENC_ENCODE_OUT_PARAMS *outParams = (NV_ENC_ENCODE_OUT_PARAMS *)pBuffer;
uint32_t bitstreamSize = outParams->bitstreamSizeInBytes;
uint8_t *pEncStream = pBuffer + sizeof(NV_ENC_ENCODE_OUT_PARAMS);
uint32_t crc=~0;
for(uint32_t i = 0; i < bitstreamSize; i++)
{
crc = (crc >> 8) ^ Crc32Table[((uint8_t)(crc)) ^ (*pEncStream++)];
}
*crcValue = ~crc;
}
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, cudaStream_t outputCUStream)
{
dim3 blockSize(1, 1, 1);
dim3 gridSize(1, 1, 1);
ComputeCRCKernel <<<gridSize, blockSize, 0, outputCUStream >>>(pBuffer, crcValue);
}