From 358c887a9fa0fb2e7ce089eaea71ab924a3e47a7 Mon Sep 17 00:00:00 2001
From: Yogender Gupta <ygupta@nvidia.com>
Date: Sat, 24 Sep 2016 17:54:59 +0200
Subject: [PATCH] nvenc: Add support for high bitdepth

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
Signed-off-by: Diego Biurrun <diego@biurrun.de>
---
 libavcodec/nvenc.c      | 87 +++++++++++++++++++++++++++++++++++++++--
 libavcodec/nvenc.h      |  6 +++
 libavcodec/nvenc_hevc.c |  8 +++-
 3 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 7f8737e0bf3..14651d41c7c 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -89,12 +89,22 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
     AV_PIX_FMT_NV12,
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_YUV444P,
+#if NVENCAPI_MAJOR_VERSION >= 7
+    AV_PIX_FMT_P010,
+    AV_PIX_FMT_YUV444P16,
+#endif
 #if CONFIG_CUDA
     AV_PIX_FMT_CUDA,
 #endif
     AV_PIX_FMT_NONE
 };
 
+#define IS_10BIT(pix_fmt)  (pix_fmt == AV_PIX_FMT_P010    || \
+                            pix_fmt == AV_PIX_FMT_YUV444P16)
+
+#define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
+                            pix_fmt == AV_PIX_FMT_YUV444P16)
+
 static const struct {
     NVENCSTATUS nverr;
     int         averr;
@@ -703,9 +713,47 @@ static int nvenc_setup_hevc_config(AVCodecContext *avctx)
         hevc->outputPictureTimingSEI   = 1;
     }
 
-    /* No other profile is supported in the current SDK version 5 */
-    cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
-    avctx->profile  = FF_PROFILE_HEVC_MAIN;
+    switch (ctx->profile) {
+    case NV_ENC_HEVC_PROFILE_MAIN:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
+        avctx->profile  = FF_PROFILE_HEVC_MAIN;
+        break;
+#if NVENCAPI_MAJOR_VERSION >= 7
+    case NV_ENC_HEVC_PROFILE_MAIN_10:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+        avctx->profile  = FF_PROFILE_HEVC_MAIN_10;
+        break;
+    case NV_ENC_HEVC_PROFILE_REXT:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
+        avctx->profile  = FF_PROFILE_HEVC_REXT;
+        break;
+#endif /* NVENCAPI_MAJOR_VERSION >= 7 */
+    }
+
+    // force setting profile for various input formats
+    switch (ctx->data_pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_NV12:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
+        avctx->profile  = FF_PROFILE_HEVC_MAIN;
+        break;
+#if NVENCAPI_MAJOR_VERSION >= 7
+    case AV_PIX_FMT_P010:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+        avctx->profile  = FF_PROFILE_HEVC_MAIN_10;
+        break;
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV444P16:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID;
+        avctx->profile  = FF_PROFILE_HEVC_REXT;
+        break;
+#endif /* NVENCAPI_MAJOR_VERSION >= 7 */
+    }
+
+#if NVENCAPI_MAJOR_VERSION >= 7
+    hevc->chromaFormatIDC     = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
+    hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt)  ? 2 : 0;
+#endif /* NVENCAPI_MAJOR_VERSION >= 7 */
 
     hevc->sliceMode     = 3;
     hevc->sliceModeData = FFMAX(avctx->slices, 1);
@@ -858,6 +906,14 @@ static int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
     case AV_PIX_FMT_YUV444P:
         ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
         break;
+#if NVENCAPI_MAJOR_VERSION >= 7
+    case AV_PIX_FMT_P010:
+        ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
+        break;
+    case AV_PIX_FMT_YUV444P16:
+        ctx->frames[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
+        break;
+#endif /* NVENCAPI_MAJOR_VERSION >= 7 */
     default:
         return AVERROR_BUG;
     }
@@ -1096,6 +1152,16 @@ static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame)
                             frame->data[1], frame->linesize[1],
                             frame->width, frame->height >> 1);
         break;
+    case AV_PIX_FMT_P010:
+        av_image_copy_plane(buf, in->pitch,
+                            frame->data[0], frame->linesize[0],
+                            frame->width << 1, frame->height);
+        buf += off;
+
+        av_image_copy_plane(buf, in->pitch,
+                            frame->data[1], frame->linesize[1],
+                            frame->width << 1, frame->height >> 1);
+        break;
     case AV_PIX_FMT_YUV444P:
         av_image_copy_plane(buf, in->pitch,
                             frame->data[0], frame->linesize[0],
@@ -1111,6 +1177,21 @@ static int nvenc_copy_frame(NV_ENC_LOCK_INPUT_BUFFER *in, const AVFrame *frame)
                             frame->data[2], frame->linesize[2],
                             frame->width, frame->height);
         break;
+    case AV_PIX_FMT_YUV444P16:
+        av_image_copy_plane(buf, in->pitch,
+                            frame->data[0], frame->linesize[0],
+                            frame->width << 1, frame->height);
+        buf += off;
+
+        av_image_copy_plane(buf, in->pitch,
+                            frame->data[1], frame->linesize[1],
+                            frame->width << 1, frame->height);
+        buf += off;
+
+        av_image_copy_plane(buf, in->pitch,
+                            frame->data[2], frame->linesize[2],
+                            frame->width << 1, frame->height);
+        break;
     default:
         return AVERROR_BUG;
     }
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 26c6515c573..fddf433d5ec 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -112,6 +112,12 @@ enum {
     NV_ENC_H264_PROFILE_CONSTRAINED_HIGH,
 };
 
+enum {
+    NV_ENC_HEVC_PROFILE_MAIN,
+    NV_ENC_HEVC_PROFILE_MAIN_10,
+    NV_ENC_HEVC_PROFILE_REXT,
+};
+
 enum {
     NVENC_LOWLATENCY = 1,
     NVENC_LOSSLESS   = 2,
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index f129826834c..d564f07a048 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -40,8 +40,12 @@ static const AVOption options[] = {
     { "llhp",       "low latency hp",                     0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOW_LATENCY_HP }, 0, 0, VE, "preset" },
     { "lossless",   "lossless",                           0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOSSLESS_DEFAULT }, 0, 0, VE, "preset" },
     { "losslesshp", "lossless hp",                        0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOSSLESS_HP }, 0, 0, VE, "preset" },
-    { "profile", "Set the encoding profile",             OFFSET(profile),      AV_OPT_TYPE_INT,    { .i64 = FF_PROFILE_HEVC_MAIN }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN, VE, "profile" },
-    { "high",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = FF_PROFILE_HEVC_MAIN }, 0, 0, VE, "profile" },
+    { "profile", "Set the encoding profile",             OFFSET(profile),      AV_OPT_TYPE_INT,    { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, NV_ENC_HEVC_PROFILE_MAIN, FF_PROFILE_HEVC_REXT, VE, "profile" },
+    { "main",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" },
+#if NVENCAPI_MAJOR_VERSION >= 7
+    { "main10",  "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_MAIN_10 }, 0, 0, VE, "profile" },
+    { "rext",   "",                                      0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_REXT }, 0, 0, VE, "profile" },
+#endif /* NVENCAPI_MAJOR_VERSION >= 7 */
     { "level",   "Set the encoding level restriction",   OFFSET(level),        AV_OPT_TYPE_INT,    { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_HEVC_62, VE, "level" },
     { "1.0",     "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_LEVEL_HEVC_1 },  0, 0, VE,  "level" },
     { "2.0",     "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_LEVEL_HEVC_2 },  0, 0, VE,  "level" },
-- 
GitLab