Newer
Older
* H.264/HEVC hardware encoding using nvidia nvenc
* Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/hwcontext_cuda.h"
#include "libavutil/hwcontext.h"
#include "libavutil/imgutils.h"
#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/pixdesc.h"
#define NVENC_CAP 0x30
#define IS_CBR(rc) (rc == NV_ENC_PARAMS_RC_CBR || \
rc == NV_ENC_PARAMS_RC_2_PASS_QUALITY || \
rc == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP)
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
AV_PIX_FMT_YUV420P,
AV_PIX_FMT_NV12,
AV_PIX_FMT_YUV444P,
AV_PIX_FMT_YUV444P16,
AV_PIX_FMT_CUDA,
AV_PIX_FMT_NONE
};
#define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010 || \
pix_fmt == AV_PIX_FMT_YUV444P16)
#define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
pix_fmt == AV_PIX_FMT_YUV444P16)
static const struct {
NVENCSTATUS nverr;
int averr;
const char *desc;
} nvenc_errors[] = {
{ NV_ENC_SUCCESS, 0, "success" },
{ NV_ENC_ERR_NO_ENCODE_DEVICE, AVERROR(ENOENT), "no encode device" },
{ NV_ENC_ERR_UNSUPPORTED_DEVICE, AVERROR(ENOSYS), "unsupported device" },
{ NV_ENC_ERR_INVALID_ENCODERDEVICE, AVERROR(EINVAL), "invalid encoder device" },
{ NV_ENC_ERR_INVALID_DEVICE, AVERROR(EINVAL), "invalid device" },
{ NV_ENC_ERR_DEVICE_NOT_EXIST, AVERROR(EIO), "device does not exist" },
{ NV_ENC_ERR_INVALID_PTR, AVERROR(EFAULT), "invalid ptr" },
{ NV_ENC_ERR_INVALID_EVENT, AVERROR(EINVAL), "invalid event" },
{ NV_ENC_ERR_INVALID_PARAM, AVERROR(EINVAL), "invalid param" },
{ NV_ENC_ERR_INVALID_CALL, AVERROR(EINVAL), "invalid call" },
{ NV_ENC_ERR_OUT_OF_MEMORY, AVERROR(ENOMEM), "out of memory" },
{ NV_ENC_ERR_ENCODER_NOT_INITIALIZED, AVERROR(EINVAL), "encoder not initialized" },
{ NV_ENC_ERR_UNSUPPORTED_PARAM, AVERROR(ENOSYS), "unsupported param" },
{ NV_ENC_ERR_LOCK_BUSY, AVERROR(EAGAIN), "lock busy" },
{ NV_ENC_ERR_NOT_ENOUGH_BUFFER, AVERROR_BUFFER_TOO_SMALL, "not enough buffer"},
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
{ NV_ENC_ERR_INVALID_VERSION, AVERROR(EINVAL), "invalid version" },
{ NV_ENC_ERR_MAP_FAILED, AVERROR(EIO), "map failed" },
{ NV_ENC_ERR_NEED_MORE_INPUT, AVERROR(EAGAIN), "need more input" },
{ NV_ENC_ERR_ENCODER_BUSY, AVERROR(EAGAIN), "encoder busy" },
{ NV_ENC_ERR_EVENT_NOT_REGISTERD, AVERROR(EBADF), "event not registered" },
{ NV_ENC_ERR_GENERIC, AVERROR_UNKNOWN, "generic error" },
{ NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY, AVERROR(EINVAL), "incompatible client key" },
{ NV_ENC_ERR_UNIMPLEMENTED, AVERROR(ENOSYS), "unimplemented" },
{ NV_ENC_ERR_RESOURCE_REGISTER_FAILED, AVERROR(EIO), "resource register failed" },
{ NV_ENC_ERR_RESOURCE_NOT_REGISTERED, AVERROR(EBADF), "resource not registered" },
{ NV_ENC_ERR_RESOURCE_NOT_MAPPED, AVERROR(EBADF), "resource not mapped" },
};
static int nvenc_map_error(NVENCSTATUS err, const char **desc)
{
int i;
for (i = 0; i < FF_ARRAY_ELEMS(nvenc_errors); i++) {
if (nvenc_errors[i].nverr == err) {
if (desc)
*desc = nvenc_errors[i].desc;
return nvenc_errors[i].averr;
}
}
if (desc)
*desc = "unknown error";
return AVERROR_UNKNOWN;
}
static int nvenc_print_error(void *log_ctx, NVENCSTATUS err,
const char *error_string)
{
const char *desc;
int ret;
ret = nvenc_map_error(err, &desc);
av_log(log_ctx, AV_LOG_ERROR, "%s: %s (%d)\n", error_string, desc, err);
return ret;
}
static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
uint32_t nvenc_max_ver;
int ret;
ret = cuda_load_functions(&dl_fn->cuda_dl);
if (ret < 0)
return ret;
ret = nvenc_load_functions(&dl_fn->nvenc_dl);
if (ret < 0)
return ret;
err = dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
if (err != NV_ENC_SUCCESS)
return nvenc_print_error(avctx, err, "Failed to query nvenc max version");
av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n", nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) {
av_log(avctx, AV_LOG_ERROR, "Driver does not support the required nvenc API version. "
"Required: %d.%d Found: %d.%d\n",
NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
return AVERROR(ENOSYS);
}
dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
err = dl_fn->nvenc_dl->NvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
if (err != NV_ENC_SUCCESS)
return nvenc_print_error(avctx, err, "Failed to create nvenc instance");
av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
static av_cold int nvenc_open_session(AVCodecContext *avctx)
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0 };
NvencContext *ctx = avctx->priv_data;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
NVENCSTATUS ret;
params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
params.apiVersion = NVENCAPI_VERSION;
params.device = ctx->cu_context;
params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
ret = p_nvenc->nvEncOpenEncodeSessionEx(¶ms, &ctx->nvencoder);
if (ret != NV_ENC_SUCCESS) {
ctx->nvencoder = NULL;
return nvenc_print_error(avctx, ret, "OpenEncodeSessionEx failed");
static int nvenc_check_codec_support(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
int i, ret, count = 0;
GUID *guids = NULL;
ret = p_nvenc->nvEncGetEncodeGUIDCount(ctx->nvencoder, &count);
if (ret != NV_ENC_SUCCESS || !count)
return AVERROR(ENOSYS);
guids = av_malloc(count * sizeof(GUID));
if (!guids)
return AVERROR(ENOMEM);
ret = p_nvenc->nvEncGetEncodeGUIDs(ctx->nvencoder, guids, count, &count);
if (ret != NV_ENC_SUCCESS) {
ret = AVERROR(ENOSYS);
goto fail;
}
ret = AVERROR(ENOSYS);
for (i = 0; i < count; i++) {
if (!memcmp(&guids[i], &ctx->init_encode_params.encodeGUID, sizeof(*guids))) {
ret = 0;
break;
}
}
fail:
av_free(guids);
static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
{
NvencContext *ctx = avctx->priv_data;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
NV_ENC_CAPS_PARAM params = { 0 };
int ret, val = 0;
params.version = NV_ENC_CAPS_PARAM_VER;
params.capsToQuery = cap;
ret = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID, ¶ms, &val);
if (ret == NV_ENC_SUCCESS)
return val;
return 0;
}
static int nvenc_check_capabilities(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
int ret;
ret = nvenc_check_codec_support(avctx);
if (ret < 0) {
av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n");
return ret;
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) {
av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n");
return AVERROR(ENOSYS);
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE);
if (ctx->preset >= PRESET_LOSSLESS_DEFAULT && ret <= 0) {
av_log(avctx, AV_LOG_VERBOSE, "Lossless encoding not supported\n");
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_WIDTH_MAX);
if (ret < avctx->width) {
av_log(avctx, AV_LOG_VERBOSE, "Width %d exceeds %d\n",
avctx->width, ret);
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_HEIGHT_MAX);
if (ret < avctx->height) {
av_log(avctx, AV_LOG_VERBOSE, "Height %d exceeds %d\n",
avctx->height, ret);
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_NUM_MAX_BFRAMES);
if (ret < avctx->max_b_frames) {
av_log(avctx, AV_LOG_VERBOSE, "Max B-frames %d exceed %d\n",
avctx->max_b_frames, ret);
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_FIELD_ENCODING);
if (ret < 1 && avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
av_log(avctx, AV_LOG_VERBOSE,
"Interlaced encoding is not supported. Supported level: %d\n",
ret);
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) {
av_log(avctx, AV_LOG_VERBOSE, "10 bit encode not supported\n");
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD);
if (ctx->rc_lookahead > 0 && ret <= 0) {
av_log(avctx, AV_LOG_VERBOSE, "RC lookahead not supported\n");
return AVERROR(ENOSYS);
}
ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ);
if (ctx->temporal_aq > 0 && ret <= 0) {
av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ not supported\n");
return AVERROR(ENOSYS);
}
static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
{
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
char name[128] = { 0};
int major, minor, ret;
CUresult cu_res;
CUdevice cu_device;
CUcontext dummy;
int loglevel = AV_LOG_VERBOSE;
if (ctx->device == LIST_DEVICES)
loglevel = AV_LOG_INFO;
cu_res = dl_fn->cuda_dl->cuDeviceGet(&cu_device, idx);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR,
"Cannot access the CUDA device %d\n",
idx);
return -1;
}
cu_res = dl_fn->cuda_dl->cuDeviceGetName(name, sizeof(name), cu_device);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuDeviceGetName failed on device %d\n", idx);
cu_res = dl_fn->cuda_dl->cuDeviceComputeCapability(&major, &minor, cu_device);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "cuDeviceComputeCapability failed on device %d\n", idx);
av_log(avctx, loglevel, "[ GPU #%d - < %s > has Compute SM %d.%d ]\n", idx, name, major, minor);
if (((major << 4) | minor) < NVENC_CAP) {
av_log(avctx, loglevel, "does not support NVENC\n");
goto fail;
if (ctx->device != idx && ctx->device != ANY_DEVICE)
return -1;
cu_res = dl_fn->cuda_dl->cuCtxCreate(&ctx->cu_context_internal, 0, cu_device);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
goto fail;
ctx->cu_context = ctx->cu_context_internal;
cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
if (cu_res != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
goto fail2;
if ((ret = nvenc_open_session(avctx)) < 0)
goto fail2;
if ((ret = nvenc_check_capabilities(avctx)) < 0)
goto fail3;
av_log(avctx, loglevel, "supports NVENC\n");
dl_fn->nvenc_device_count++;
if (ctx->device == idx || ctx->device == ANY_DEVICE)
fail3:
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context_internal = NULL;
fail:
return AVERROR(ENOSYS);
static av_cold int nvenc_setup_device(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
switch (avctx->codec->id) {
case AV_CODEC_ID_H264:
ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
break;
case AV_CODEC_ID_HEVC:
ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID;
break;
default:
return AVERROR_BUG;
}
if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
AVHWFramesContext *frames_ctx;
if (!avctx->hw_frames_ctx)
frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
device_hwctx = frames_ctx->device_ctx->hwctx;
ctx->cu_context = device_hwctx->cuda_ctx;
ret = nvenc_open_session(avctx);
if (ret < 0)
return ret;
ret = nvenc_check_capabilities(avctx);
if (ret < 0) {
av_log(avctx, AV_LOG_FATAL, "Provided device doesn't support required NVENC features\n");
return ret;
}
} else {
int i, nb_devices = 0;
if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR,
"Cannot init CUDA\n");
return AVERROR_UNKNOWN;
}
if ((dl_fn->cuda_dl->cuDeviceGetCount(&nb_devices)) != CUDA_SUCCESS) {
av_log(avctx, AV_LOG_ERROR,
"Cannot enumerate the CUDA devices\n");
return AVERROR_UNKNOWN;
}
if (!nb_devices) {
av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
return AVERROR_EXTERNAL;
}
av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", nb_devices);
dl_fn->nvenc_device_count = 0;
for (i = 0; i < nb_devices; ++i) {
if ((nvenc_check_device(avctx, i)) >= 0 && ctx->device != LIST_DEVICES)
return 0;
}
if (ctx->device == LIST_DEVICES)
return AVERROR_EXIT;
if (!dl_fn->nvenc_device_count) {
av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
return AVERROR_EXTERNAL;
}
av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->device, nb_devices);
return AVERROR(EINVAL);
}
return 0;
}
typedef struct GUIDTuple {
const GUID guid;
int flags;
} GUIDTuple;
#define PRESET_ALIAS(alias, name, ...) \
[PRESET_ ## alias] = { NV_ENC_PRESET_ ## name ## _GUID, __VA_ARGS__ }
#define PRESET(name, ...) PRESET_ALIAS(name, name, __VA_ARGS__)
static void nvenc_map_preset(NvencContext *ctx)
{
GUIDTuple presets[] = {
PRESET(DEFAULT),
PRESET(HP),
PRESET(HQ),
PRESET(BD),
PRESET_ALIAS(SLOW, HQ, NVENC_TWO_PASSES),
PRESET_ALIAS(MEDIUM, HQ, NVENC_ONE_PASS),
PRESET_ALIAS(FAST, HP, NVENC_ONE_PASS),
PRESET(LOW_LATENCY_DEFAULT, NVENC_LOWLATENCY),
PRESET(LOW_LATENCY_HP, NVENC_LOWLATENCY),
PRESET(LOW_LATENCY_HQ, NVENC_LOWLATENCY),
PRESET(LOSSLESS_DEFAULT, NVENC_LOSSLESS),
PRESET(LOSSLESS_HP, NVENC_LOSSLESS),
};
GUIDTuple *t = &presets[ctx->preset];
ctx->init_encode_params.presetGUID = t->guid;
ctx->flags = t->flags;
}
#undef PRESET
#undef PRESET_ALIAS
static av_cold void set_constqp(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
rc->constQP.qpInterB = avctx->global_quality;
rc->constQP.qpInterP = avctx->global_quality;
rc->constQP.qpIntra = avctx->global_quality;
avctx->qmin = -1;
avctx->qmax = -1;
}
static av_cold void set_vbr(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
int qp_inter_p;
if (avctx->qmin >= 0 && avctx->qmax >= 0) {
rc->enableMinQP = 1;
rc->enableMaxQP = 1;
rc->minQP.qpInterB = avctx->qmin;
rc->minQP.qpInterP = avctx->qmin;
rc->minQP.qpIntra = avctx->qmin;
rc->maxQP.qpInterB = avctx->qmax;
rc->maxQP.qpInterP = avctx->qmax;
rc->maxQP.qpIntra = avctx->qmax;
qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
} else if (avctx->qmin >= 0) {
rc->enableMinQP = 1;
rc->minQP.qpInterB = avctx->qmin;
rc->minQP.qpInterP = avctx->qmin;
rc->minQP.qpIntra = avctx->qmin;
qp_inter_p = avctx->qmin;
} else {
qp_inter_p = 26; // default to 26
}
rc->enableInitialRCQP = 1;
rc->initialRCQP.qpInterP = qp_inter_p;
if (avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
rc->initialRCQP.qpIntra = av_clip(
qp_inter_p * fabs(avctx->i_quant_factor) + avctx->i_quant_offset + 0.5, 0, 51);
rc->initialRCQP.qpInterB = av_clip(
qp_inter_p * fabs(avctx->b_quant_factor) + avctx->b_quant_offset + 0.5, 0, 51);
} else {
rc->initialRCQP.qpIntra = qp_inter_p;
rc->initialRCQP.qpInterB = qp_inter_p;
}
}
static av_cold void set_lossless(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
rc->rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
rc->constQP.qpInterB = 0;
rc->constQP.qpInterP = 0;
rc->constQP.qpIntra = 0;
avctx->qmin = -1;
avctx->qmax = -1;
}
static void nvenc_override_rate_control(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NV_ENC_RC_PARAMS *rc = &ctx->encode_config.rcParams;
switch (ctx->rc) {
case NV_ENC_PARAMS_RC_CONSTQP:
if (avctx->global_quality <= 0) {
av_log(avctx, AV_LOG_WARNING,
"The constant quality rate-control requires "
"the 'global_quality' option set.\n");
return;
}
set_constqp(avctx);
return;
case NV_ENC_PARAMS_RC_2_PASS_VBR:
case NV_ENC_PARAMS_RC_VBR:
if (avctx->qmin < 0 && avctx->qmax < 0) {
av_log(avctx, AV_LOG_WARNING,
"The variable bitrate rate-control requires "
"the 'qmin' and/or 'qmax' option set.\n");
set_vbr(avctx);
return;
}
case NV_ENC_PARAMS_RC_VBR_MINQP:
if (avctx->qmin < 0) {
av_log(avctx, AV_LOG_WARNING,
"The variable bitrate rate-control requires "
"the 'qmin' option set.\n");
set_vbr(avctx);
return;
}
set_vbr(avctx);
break;
case NV_ENC_PARAMS_RC_CBR:
case NV_ENC_PARAMS_RC_2_PASS_QUALITY:
case NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP:
}
rc->rateControlMode = ctx->rc;
static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
int nb_surfaces = 0;
if (ctx->rc_lookahead > 0) {
nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
if (ctx->nb_surfaces < nb_surfaces) {
av_log(avctx, AV_LOG_WARNING,
"Defined rc_lookahead requires more surfaces, "
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
ctx->nb_surfaces = nb_surfaces;
}
}
ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
ctx->async_depth = FFMIN(ctx->async_depth, ctx->nb_surfaces - 1);
return 0;
}
static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
if (avctx->bit_rate > 0) {
ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
} else if (ctx->encode_config.rcParams.averageBitRate > 0) {
ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
}
if (avctx->rc_max_rate > 0)
ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
if (ctx->rc < 0) {
if (ctx->flags & NVENC_ONE_PASS)
ctx->twopass = 0;
if (ctx->flags & NVENC_TWO_PASSES)
ctx->twopass = 1;
if (ctx->twopass < 0)
ctx->twopass = (ctx->flags & NVENC_LOWLATENCY) != 0;
if (ctx->twopass) {
ctx->rc = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
} else if (avctx->global_quality > 0) {
ctx->rc = NV_ENC_PARAMS_RC_CONSTQP;
} else if (ctx->twopass) {
ctx->rc = NV_ENC_PARAMS_RC_2_PASS_VBR;
} else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
ctx->rc = NV_ENC_PARAMS_RC_VBR_MINQP;
if (ctx->flags & NVENC_LOSSLESS) {
set_lossless(avctx);
} else if (ctx->rc >= 0) {
nvenc_override_rate_control(avctx);
} else {
ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
set_vbr(avctx);
}
if (avctx->rc_buffer_size > 0) {
ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
} else if (ctx->encode_config.rcParams.averageBitRate > 0) {
ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
}
if (ctx->aq) {
ctx->encode_config.rcParams.enableAQ = 1;
ctx->encode_config.rcParams.aqStrength = ctx->aq_strength;
av_log(avctx, AV_LOG_VERBOSE, "AQ enabled.\n");
if (ctx->temporal_aq) {
ctx->encode_config.rcParams.enableTemporalAQ = 1;
av_log(avctx, AV_LOG_VERBOSE, "Temporal AQ enabled.\n");
}
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
int lkd_bound = FFMIN(ctx->nb_surfaces, ctx->async_depth) -
ctx->encode_config.frameIntervalP - 4;
if (lkd_bound < 0) {
av_log(avctx, AV_LOG_WARNING,
"Lookahead not enabled. Increase buffer delay (-delay).\n");
} else {
ctx->encode_config.rcParams.enableLookahead = 1;
ctx->encode_config.rcParams.lookaheadDepth = av_clip(ctx->rc_lookahead, 0, lkd_bound);
ctx->encode_config.rcParams.disableIadapt = ctx->no_scenecut;
ctx->encode_config.rcParams.disableBadapt = !ctx->b_adapt;
av_log(avctx, AV_LOG_VERBOSE,
"Lookahead enabled: depth %d, scenecut %s, B-adapt %s.\n",
ctx->encode_config.rcParams.lookaheadDepth,
ctx->encode_config.rcParams.disableIadapt ? "disabled" : "enabled",
ctx->encode_config.rcParams.disableBadapt ? "disabled" : "enabled");
}
}
if (ctx->strict_gop) {
ctx->encode_config.rcParams.strictGOPTarget = 1;
av_log(avctx, AV_LOG_VERBOSE, "Strict GOP target enabled.\n");
}
if (ctx->nonref_p)
ctx->encode_config.rcParams.enableNonRefP = 1;
if (ctx->zerolatency)
ctx->encode_config.rcParams.zeroReorderDelay = 1;
if (ctx->quality)
ctx->encode_config.rcParams.targetQuality = ctx->quality;
static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
NvencContext *ctx = avctx->priv_data;
NV_ENC_CONFIG *cc = &ctx->encode_config;
NV_ENC_CONFIG_H264 *h264 = &cc->encodeCodecConfig.h264Config;
NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
vui->colourMatrix = avctx->colorspace;
vui->colourPrimaries = avctx->color_primaries;
vui->transferCharacteristics = avctx->color_trc;
vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
|| ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
vui->colourDescriptionPresentFlag =
(avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
vui->videoSignalTypePresentFlag =
(vui->colourDescriptionPresentFlag
|| vui->videoFormat != 5
|| vui->videoFullRangeFlag != 0);
h264->sliceMode = 3;
h264->sliceModeData = 1;
h264->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
h264->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
h264->outputAUD = ctx->aud;
if (avctx->refs >= 0) {
/* 0 means "let the hardware decide" */
h264->maxNumRefFrames = avctx->refs;
}
if (avctx->gop_size >= 0) {
h264->idrPeriod = cc->gopLength;
}
if (IS_CBR(cc->rcParams.rateControlMode)) {
h264->outputBufferingPeriodSEI = 1;
h264->outputPictureTimingSEI = 1;
}
if (cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_QUALITY ||
cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP ||
cc->rcParams.rateControlMode == NV_ENC_PARAMS_RC_2_PASS_VBR) {
h264->adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
h264->fmoMode = NV_ENC_H264_FMO_DISABLE;
}
if (ctx->flags & NVENC_LOSSLESS) {
h264->qpPrimeYZeroTransformBypassFlag = 1;
} else {
switch(ctx->profile) {
case NV_ENC_H264_PROFILE_BASELINE:
cc->profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
avctx->profile = FF_PROFILE_H264_BASELINE;
case NV_ENC_H264_PROFILE_MAIN:
cc->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
avctx->profile = FF_PROFILE_H264_MAIN;
case NV_ENC_H264_PROFILE_HIGH:
cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
avctx->profile = FF_PROFILE_H264_HIGH;
break;
case NV_ENC_H264_PROFILE_HIGH_444P:
cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
}
}
// force setting profile as high444p if input is AV_PIX_FMT_YUV444P
if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P) {
cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
h264->chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
return 0;
}
static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NV_ENC_CONFIG *cc = &ctx->encode_config;
NV_ENC_CONFIG_HEVC *hevc = &cc->encodeCodecConfig.hevcConfig;
NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
vui->colourMatrix = avctx->colorspace;
vui->colourPrimaries = avctx->color_primaries;
vui->transferCharacteristics = avctx->color_trc;
vui->videoFullRangeFlag = (avctx->color_range == AVCOL_RANGE_JPEG
|| ctx->data_pix_fmt == AV_PIX_FMT_YUVJ420P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ422P || ctx->data_pix_fmt == AV_PIX_FMT_YUVJ444P);
vui->colourDescriptionPresentFlag =
(avctx->colorspace != 2 || avctx->color_primaries != 2 || avctx->color_trc != 2);
vui->videoSignalTypePresentFlag =
(vui->colourDescriptionPresentFlag
|| vui->videoFormat != 5
|| vui->videoFullRangeFlag != 0);
hevc->sliceMode = 3;
hevc->sliceModeData = 1;
hevc->disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
hevc->repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
hevc->outputAUD = ctx->aud;
if (avctx->refs >= 0) {
/* 0 means "let the hardware decide" */
hevc->maxNumRefFramesInDPB = avctx->refs;
}
if (avctx->gop_size >= 0) {
hevc->idrPeriod = cc->gopLength;
}
if (IS_CBR(cc->rcParams.rateControlMode)) {
hevc->outputBufferingPeriodSEI = 1;
hevc->outputPictureTimingSEI = 1;
}
switch(ctx->profile) {
case NV_ENC_HEVC_PROFILE_MAIN:
cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
avctx->profile = FF_PROFILE_HEVC_MAIN;
break;
case NV_ENC_HEVC_PROFILE_MAIN_10:
cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
avctx->profile = FF_PROFILE_HEVC_MAIN_10;
break;
case NV_ENC_HEVC_PROFILE_REXT:
cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
avctx->profile = FF_PROFILE_HEVC_REXT;
break;
}
// force setting profile as main10 if input is 10 bit
if (IS_10BIT(ctx->data_pix_fmt)) {
cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
avctx->profile = FF_PROFILE_HEVC_MAIN_10;
}
// force setting profile as rext if input is yuv444
if (IS_YUV444(ctx->data_pix_fmt)) {
cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
avctx->profile = FF_PROFILE_HEVC_REXT;
}
hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
return 0;
}
static av_cold int nvenc_setup_codec_config(AVCodecContext *avctx)
{
switch (avctx->codec->id) {
case AV_CODEC_ID_H264:
return nvenc_setup_h264_config(avctx);
case AV_CODEC_ID_HEVC:
return nvenc_setup_hevc_config(avctx);
/* Earlier switch/case will return if unknown codec is passed. */
}
return 0;
}
static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props;
int res = 0;
int dw, dh;
ctx->encode_config.version = NV_ENC_CONFIG_VER;
ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
ctx->init_encode_params.encodeHeight = avctx->height;
ctx->init_encode_params.encodeWidth = avctx->width;
ctx->init_encode_params.encodeConfig = &ctx->encode_config;
nvenc_map_preset(ctx);
preset_config.version = NV_ENC_PRESET_CONFIG_VER;
preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder,
ctx->init_encode_params.encodeGUID,
ctx->init_encode_params.presetGUID,
&preset_config);
if (nv_status != NV_ENC_SUCCESS)
return nvenc_print_error(avctx, nv_status, "Cannot get the preset configuration");
memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
ctx->encode_config.version = NV_ENC_CONFIG_VER;
dw = avctx->width;
dh = avctx->height;
if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
dw*= avctx->sample_aspect_ratio.num;
dh*= avctx->sample_aspect_ratio.den;
}
av_reduce(&dw, &dh, dw, dh, 1024 * 1024);
ctx->init_encode_params.darHeight = dh;
ctx->init_encode_params.darWidth = dw;
ctx->init_encode_params.frameRateNum = avctx->time_base.den;
ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
ctx->init_encode_params.enableEncodeAsync = 0;
ctx->init_encode_params.enablePTD = 1;
if (ctx->bluray_compat) {
ctx->aud = 1;
avctx->refs = FFMIN(FFMAX(avctx->refs, 0), 6);
avctx->max_b_frames = FFMIN(avctx->max_b_frames, 3);
switch (avctx->codec->id) {
case AV_CODEC_ID_H264:
/* maximum level depends on used resolution */
break;
case AV_CODEC_ID_HEVC:
ctx->level = NV_ENC_LEVEL_HEVC_51;
ctx->tier = NV_ENC_TIER_HEVC_HIGH;
break;
}
}
if (avctx->gop_size > 0) {
if (avctx->max_b_frames >= 0) {
/* 0 is intra-only, 1 is I/P only, 2 is one B-Frame, 3 two B-frames, and so on. */
ctx->encode_config.frameIntervalP = avctx->max_b_frames + 1;
}
ctx->encode_config.gopLength = avctx->gop_size;
} else if (avctx->gop_size == 0) {
ctx->encode_config.frameIntervalP = 0;
ctx->encode_config.gopLength = 1;
ctx->initial_pts[0] = AV_NOPTS_VALUE;
ctx->initial_pts[1] = AV_NOPTS_VALUE;
nvenc_recalc_surfaces(avctx);
nvenc_setup_rate_control(avctx);