Newer
Older
* This file is part of FFmpeg.
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "buffer.h"
#include "common.h"
#include "hwcontext.h"
#include "hwcontext_internal.h"
#include "hwcontext_cuda_internal.h"
#include "mem.h"
#include "pixdesc.h"
#include "pixfmt.h"
#define CUDA_FRAME_ALIGNMENT 256
typedef struct CUDAFramesContext {
int shift_width, shift_height;
} CUDAFramesContext;
static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_NV12,
AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV444P,
AV_PIX_FMT_P010,
AV_PIX_FMT_P016,
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
const void *hwconfig,
AVHWFramesConstraints *constraints)
{
int i;
constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
sizeof(*constraints->valid_sw_formats));
if (!constraints->valid_sw_formats)
return AVERROR(ENOMEM);
for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
constraints->valid_sw_formats[i] = supported_formats[i];
constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
if (!constraints->valid_hw_formats)
return AVERROR(ENOMEM);
constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
return 0;
}
static void cuda_buffer_free(void *opaque, uint8_t *data)
{
AVHWFramesContext *ctx = opaque;
AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
cu->cuCtxPushCurrent(hwctx->cuda_ctx);
cu->cuMemFree((CUdeviceptr)data);
cu->cuCtxPopCurrent(&dummy);
}
static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
{
AVHWFramesContext *ctx = opaque;
AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
CudaFunctions *cu = hwctx->internal->cuda_dl;
AVBufferRef *ret = NULL;
CUcontext dummy = NULL;
CUdeviceptr data;
CUresult err;
err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
if (err != CUDA_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
return NULL;
}
err = cu->cuMemAlloc(&data, size);
if (err != CUDA_SUCCESS)
goto fail;
ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
if (!ret) {
cu->cuMemFree(data);
cu->cuCtxPopCurrent(&dummy);
return ret;
}
static int cuda_frames_init(AVHWFramesContext *ctx)
{
CUDAFramesContext *priv = ctx->internal->priv;
int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
int i;
for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
if (ctx->sw_format == supported_formats[i])
break;
}
if (i == FF_ARRAY_ELEMS(supported_formats)) {
av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
av_get_pix_fmt_name(ctx->sw_format));
return AVERROR(ENOSYS);
}
av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
if (!ctx->pool) {
int size;
switch (ctx->sw_format) {
case AV_PIX_FMT_NV12:
case AV_PIX_FMT_YUV420P:
size = aligned_width * ctx->height * 3 / 2;
break;
case AV_PIX_FMT_YUV444P:
case AV_PIX_FMT_P010:
case AV_PIX_FMT_P016:
size = aligned_width * ctx->height * 3;
default:
av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
return AVERROR_BUG;
}
ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
if (!ctx->internal->pool_internal)
return AVERROR(ENOMEM);
}
return 0;
}
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
{
int aligned_width;
int width_in_bytes = ctx->width;
if (ctx->sw_format == AV_PIX_FMT_P010 ||
ctx->sw_format == AV_PIX_FMT_P016) {
width_in_bytes *= 2;
}
aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
frame->buf[0] = av_buffer_pool_get(ctx->pool);
if (!frame->buf[0])
return AVERROR(ENOMEM);
switch (ctx->sw_format) {
case AV_PIX_FMT_NV12:
case AV_PIX_FMT_P010:
case AV_PIX_FMT_P016:
frame->data[0] = frame->buf[0]->data;
frame->data[1] = frame->data[0] + aligned_width * ctx->height;
frame->linesize[0] = aligned_width;
frame->linesize[1] = aligned_width;
break;
case AV_PIX_FMT_YUV420P:
frame->data[0] = frame->buf[0]->data;
frame->data[2] = frame->data[0] + aligned_width * ctx->height;
frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
frame->linesize[0] = aligned_width;
frame->linesize[1] = aligned_width / 2;
frame->linesize[2] = aligned_width / 2;
break;
case AV_PIX_FMT_YUV444P:
frame->data[0] = frame->buf[0]->data;
frame->data[1] = frame->data[0] + aligned_width * ctx->height;
frame->data[2] = frame->data[1] + aligned_width * ctx->height;
frame->linesize[0] = aligned_width;
frame->linesize[1] = aligned_width;
frame->linesize[2] = aligned_width;
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
break;
default:
av_frame_unref(frame);
return AVERROR_BUG;
}
frame->format = AV_PIX_FMT_CUDA;
frame->width = ctx->width;
frame->height = ctx->height;
return 0;
}
static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
enum AVHWFrameTransferDirection dir,
enum AVPixelFormat **formats)
{
enum AVPixelFormat *fmts;
fmts = av_malloc_array(2, sizeof(*fmts));
if (!fmts)
return AVERROR(ENOMEM);
fmts[0] = ctx->sw_format;
fmts[1] = AV_PIX_FMT_NONE;
*formats = fmts;
return 0;
}
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
const AVFrame *src)
{
CUDAFramesContext *priv = ctx->internal->priv;
AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
CudaFunctions *cu = device_hwctx->internal->cuda_dl;
CUcontext dummy;
CUresult err;
int i;
err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
if (err != CUDA_SUCCESS)
return AVERROR_UNKNOWN;
for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_HOST,
.srcDevice = (CUdeviceptr)src->data[i],
.dstHost = dst->data[i],
.srcPitch = src->linesize[i],
.dstPitch = dst->linesize[i],
.WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
.Height = src->height >> (i ? priv->shift_height : 0),
};
err = cu->cuMemcpy2D(&cpy);
if (err != CUDA_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
return AVERROR_UNKNOWN;
}
}
cu->cuCtxPopCurrent(&dummy);
return 0;
}
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
const AVFrame *src)
{
CUDAFramesContext *priv = ctx->internal->priv;
AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
CudaFunctions *cu = device_hwctx->internal->cuda_dl;
CUcontext dummy;
CUresult err;
int i;
err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
if (err != CUDA_SUCCESS)
return AVERROR_UNKNOWN;
for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_HOST,
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
.srcHost = src->data[i],
.dstDevice = (CUdeviceptr)dst->data[i],
.srcPitch = src->linesize[i],
.dstPitch = dst->linesize[i],
.WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
.Height = src->height >> (i ? priv->shift_height : 0),
};
err = cu->cuMemcpy2D(&cpy);
if (err != CUDA_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
return AVERROR_UNKNOWN;
}
}
cu->cuCtxPopCurrent(&dummy);
static void cuda_device_uninit(AVHWDeviceContext *ctx)
{
AVCUDADeviceContext *hwctx = ctx->hwctx;
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
if (hwctx->internal) {
if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
hwctx->cuda_ctx = NULL;
}
cuda_free_functions(&hwctx->internal->cuda_dl);
}
av_freep(&hwctx->internal);
}
static int cuda_device_init(AVHWDeviceContext *ctx)
{
AVCUDADeviceContext *hwctx = ctx->hwctx;
int ret;
if (!hwctx->internal) {
hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
if (!hwctx->internal)
return AVERROR(ENOMEM);
}
if (!hwctx->internal->cuda_dl) {
ret = cuda_load_functions(&hwctx->internal->cuda_dl);
if (ret < 0) {
av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
goto error;
}
}
return 0;
error:
cuda_device_uninit(ctx);
return ret;
}
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
AVDictionary *opts, int flags)
{
AVCUDADeviceContext *hwctx = ctx->hwctx;
CudaFunctions *cu;
CUdevice cu_device;
CUcontext dummy;
CUresult err;
int device_idx = 0;
if (device)
device_idx = strtol(device, NULL, 0);
if (cuda_device_init(ctx) < 0)
goto error;
cu = hwctx->internal->cuda_dl;
err = cu->cuInit(0);
if (err != CUDA_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
err = cu->cuDeviceGet(&cu_device, device_idx);
if (err != CUDA_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
if (err != CUDA_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
cu->cuCtxPopCurrent(&dummy);
hwctx->internal->is_allocated = 1;
error:
cuda_device_uninit(ctx);
return AVERROR_UNKNOWN;
const HWContextType ff_hwcontext_type_cuda = {
.type = AV_HWDEVICE_TYPE_CUDA,
.name = "CUDA",
.device_hwctx_size = sizeof(AVCUDADeviceContext),
.frames_priv_size = sizeof(CUDAFramesContext),
.device_create = cuda_device_create,
.device_init = cuda_device_init,
.device_uninit = cuda_device_uninit,
.frames_get_constraints = cuda_frames_get_constraints,
.frames_init = cuda_frames_init,
.frames_get_buffer = cuda_get_buffer,
.transfer_get_formats = cuda_transfer_get_formats,
.transfer_data_to = cuda_transfer_data_to,
.transfer_data_from = cuda_transfer_data_from,
.pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
};