mirror of
https://git.videolan.org/git/ffmpeg.git
synced 2024-08-28 04:06:12 +02:00
avcodec/nvdec: Add support for decoding HEVC 4:4:4 content
The latest generation video decoder on the Turing chips supports decoding HEVC 4:4:4. Supporting this is relatively straight-forward; we need to account for the different chroma format and pick the right output and sw formats at the right times. There was one bug which was the hard-coded assumption that the first chroma plane would be half-height; I fixed this to use the actual shift value on the plane. We also need to pass the SPS and PPS range extension flags.
This commit is contained in:
parent
f4ea930a11
commit
e06ccfbe1d
@ -409,6 +409,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
case AV_PIX_FMT_YUV420P12:
|
case AV_PIX_FMT_YUV420P12:
|
||||||
|
case AV_PIX_FMT_YUV444P:
|
||||||
|
case AV_PIX_FMT_YUV444P10:
|
||||||
|
case AV_PIX_FMT_YUV444P12:
|
||||||
#if CONFIG_HEVC_NVDEC_HWACCEL
|
#if CONFIG_HEVC_NVDEC_HWACCEL
|
||||||
*fmt++ = AV_PIX_FMT_CUDA;
|
*fmt++ = AV_PIX_FMT_CUDA;
|
||||||
#endif
|
#endif
|
||||||
|
@ -35,6 +35,11 @@
|
|||||||
#include "nvdec.h"
|
#include "nvdec.h"
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
|
#if !NVDECAPI_CHECK_VERSION(9, 0)
|
||||||
|
#define cudaVideoSurfaceFormat_YUV444 2
|
||||||
|
#define cudaVideoSurfaceFormat_YUV444_16Bit 3
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct NVDECDecoder {
|
typedef struct NVDECDecoder {
|
||||||
CUvideodecoder decoder;
|
CUvideodecoder decoder;
|
||||||
|
|
||||||
@ -274,7 +279,8 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
|
|||||||
|
|
||||||
CUVIDDECODECREATEINFO params = { 0 };
|
CUVIDDECODECREATEINFO params = { 0 };
|
||||||
|
|
||||||
int cuvid_codec_type, cuvid_chroma_format;
|
cudaVideoSurfaceFormat output_format;
|
||||||
|
int cuvid_codec_type, cuvid_chroma_format, chroma_444;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||||
@ -292,6 +298,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
|
|||||||
av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
|
av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
|
||||||
return AVERROR(ENOSYS);
|
return AVERROR(ENOSYS);
|
||||||
}
|
}
|
||||||
|
chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
|
||||||
|
|
||||||
if (!avctx->hw_frames_ctx) {
|
if (!avctx->hw_frames_ctx) {
|
||||||
ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
|
ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_CUDA);
|
||||||
@ -299,6 +306,21 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (sw_desc->comp[0].depth) {
|
||||||
|
case 8:
|
||||||
|
output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
|
||||||
|
cudaVideoSurfaceFormat_NV12;
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
case 12:
|
||||||
|
output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
|
||||||
|
cudaVideoSurfaceFormat_P016;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
|
||||||
|
return AVERROR(ENOSYS);
|
||||||
|
}
|
||||||
|
|
||||||
frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
|
frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
|
||||||
|
|
||||||
params.ulWidth = avctx->coded_width;
|
params.ulWidth = avctx->coded_width;
|
||||||
@ -306,8 +328,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
|
|||||||
params.ulTargetWidth = avctx->coded_width;
|
params.ulTargetWidth = avctx->coded_width;
|
||||||
params.ulTargetHeight = avctx->coded_height;
|
params.ulTargetHeight = avctx->coded_height;
|
||||||
params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
|
params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
|
||||||
params.OutputFormat = params.bitDepthMinus8 ?
|
params.OutputFormat = output_format;
|
||||||
cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
|
|
||||||
params.CodecType = cuvid_codec_type;
|
params.CodecType = cuvid_codec_type;
|
||||||
params.ChromaFormat = cuvid_chroma_format;
|
params.ChromaFormat = cuvid_chroma_format;
|
||||||
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
|
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
|
||||||
@ -386,6 +407,8 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
|||||||
NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
|
NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
|
||||||
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
|
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
|
||||||
|
|
||||||
|
AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
|
||||||
|
|
||||||
CUVIDPROCPARAMS vpp = { 0 };
|
CUVIDPROCPARAMS vpp = { 0 };
|
||||||
NVDECFrame *unmap_data = NULL;
|
NVDECFrame *unmap_data = NULL;
|
||||||
|
|
||||||
@ -394,6 +417,7 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
|||||||
|
|
||||||
unsigned int pitch, i;
|
unsigned int pitch, i;
|
||||||
unsigned int offset = 0;
|
unsigned int offset = 0;
|
||||||
|
int shift_h = 0, shift_v = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
vpp.progressive_frame = 1;
|
vpp.progressive_frame = 1;
|
||||||
@ -427,10 +451,11 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
|
|||||||
unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
|
unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
|
||||||
unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
|
unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
|
||||||
|
|
||||||
|
av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
|
||||||
for (i = 0; frame->linesize[i]; i++) {
|
for (i = 0; frame->linesize[i]; i++) {
|
||||||
frame->data[i] = (uint8_t*)(devptr + offset);
|
frame->data[i] = (uint8_t*)(devptr + offset);
|
||||||
frame->linesize[i] = pitch;
|
frame->linesize[i] = pitch;
|
||||||
offset += pitch * (frame->height >> (i ? 1 : 0));
|
offset += pitch * (frame->height >> (i ? shift_v : 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
goto finish;
|
goto finish;
|
||||||
@ -566,7 +591,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
|
|||||||
{
|
{
|
||||||
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
|
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
|
||||||
const AVPixFmtDescriptor *sw_desc;
|
const AVPixFmtDescriptor *sw_desc;
|
||||||
int cuvid_codec_type, cuvid_chroma_format;
|
int cuvid_codec_type, cuvid_chroma_format, chroma_444;
|
||||||
|
|
||||||
sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
|
||||||
if (!sw_desc)
|
if (!sw_desc)
|
||||||
@ -583,6 +608,7 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
|
|||||||
av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
|
av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
}
|
}
|
||||||
|
chroma_444 = cuvid_chroma_format == cudaVideoChromaFormat_444;
|
||||||
|
|
||||||
frames_ctx->format = AV_PIX_FMT_CUDA;
|
frames_ctx->format = AV_PIX_FMT_CUDA;
|
||||||
frames_ctx->width = (avctx->coded_width + 1) & ~1;
|
frames_ctx->width = (avctx->coded_width + 1) & ~1;
|
||||||
@ -601,13 +627,13 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
|
|||||||
|
|
||||||
switch (sw_desc->comp[0].depth) {
|
switch (sw_desc->comp[0].depth) {
|
||||||
case 8:
|
case 8:
|
||||||
frames_ctx->sw_format = AV_PIX_FMT_NV12;
|
frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
|
||||||
break;
|
break;
|
||||||
case 10:
|
case 10:
|
||||||
frames_ctx->sw_format = AV_PIX_FMT_P010;
|
frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
|
||||||
break;
|
break;
|
||||||
case 12:
|
case 12:
|
||||||
frames_ctx->sw_format = AV_PIX_FMT_P016;
|
frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return AVERROR(EINVAL);
|
return AVERROR(EINVAL);
|
||||||
|
@ -131,6 +131,17 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
|
|||||||
.IdrPicFlag = IS_IDR(s),
|
.IdrPicFlag = IS_IDR(s),
|
||||||
.bit_depth_luma_minus8 = sps->bit_depth - 8,
|
.bit_depth_luma_minus8 = sps->bit_depth - 8,
|
||||||
.bit_depth_chroma_minus8 = sps->bit_depth - 8,
|
.bit_depth_chroma_minus8 = sps->bit_depth - 8,
|
||||||
|
#if NVDECAPI_CHECK_VERSION(9, 0)
|
||||||
|
.sps_range_extension_flag = sps->sps_range_extension_flag,
|
||||||
|
.transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag,
|
||||||
|
.transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag,
|
||||||
|
.implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag,
|
||||||
|
.explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag,
|
||||||
|
.extended_precision_processing_flag = sps->extended_precision_processing_flag,
|
||||||
|
.intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag,
|
||||||
|
.persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag,
|
||||||
|
.cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag,
|
||||||
|
#endif
|
||||||
|
|
||||||
.dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag,
|
.dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag,
|
||||||
.slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag,
|
.slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag,
|
||||||
@ -164,6 +175,13 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
|
|||||||
.uniform_spacing_flag = pps->uniform_spacing_flag,
|
.uniform_spacing_flag = pps->uniform_spacing_flag,
|
||||||
.num_tile_columns_minus1 = pps->num_tile_columns - 1,
|
.num_tile_columns_minus1 = pps->num_tile_columns - 1,
|
||||||
.num_tile_rows_minus1 = pps->num_tile_rows - 1,
|
.num_tile_rows_minus1 = pps->num_tile_rows - 1,
|
||||||
|
#if NVDECAPI_CHECK_VERSION(9, 0)
|
||||||
|
.pps_range_extension_flag = pps->pps_range_extensions_flag,
|
||||||
|
.cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag,
|
||||||
|
.chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag,
|
||||||
|
.diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth,
|
||||||
|
.chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1,
|
||||||
|
#endif
|
||||||
|
|
||||||
.NumBitsForShortTermRPSInSlice = s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0,
|
.NumBitsForShortTermRPSInSlice = s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0,
|
||||||
.NumDeltaPocsOfRefRpsIdx = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
|
.NumDeltaPocsOfRefRpsIdx = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
|
||||||
@ -185,6 +203,18 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
|
|||||||
for (i = 0; i < pps->num_tile_rows; i++)
|
for (i = 0; i < pps->num_tile_rows; i++)
|
||||||
ppc->row_height_minus1[i] = pps->row_height[i] - 1;
|
ppc->row_height_minus1[i] = pps->row_height[i] - 1;
|
||||||
|
|
||||||
|
#if NVDECAPI_CHECK_VERSION(9, 0)
|
||||||
|
if (pps->chroma_qp_offset_list_len_minus1 > FF_ARRAY_ELEMS(ppc->cb_qp_offset_list) ||
|
||||||
|
pps->chroma_qp_offset_list_len_minus1 > FF_ARRAY_ELEMS(ppc->cr_qp_offset_list)) {
|
||||||
|
av_log(avctx, AV_LOG_ERROR, "Too many chroma_qp_offsets\n");
|
||||||
|
return AVERROR(ENOSYS);
|
||||||
|
}
|
||||||
|
for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
|
||||||
|
ppc->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i];
|
||||||
|
ppc->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (s->rps[LT_CURR].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetLtCurr) ||
|
if (s->rps[LT_CURR].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetLtCurr) ||
|
||||||
s->rps[ST_CURR_BEF].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrBefore) ||
|
s->rps[ST_CURR_BEF].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrBefore) ||
|
||||||
s->rps[ST_CURR_AFT].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrAfter)) {
|
s->rps[ST_CURR_AFT].nb_refs > FF_ARRAY_ELEMS(ppc->RefPicSetStCurrAfter)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user