Newer
Older
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#define _SVID_SOURCE // needed for MAP_ANONYMOUS
#define _DARWIN_C_SOURCE // needed for MAP_ANON
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
#if HAVE_VIRTUALALLOC
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include "libavutil/attributes.h"
#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
unsigned swscale_version(void)
{
av_assert0(LIBSWSCALE_VERSION_MICRO >= 100);
return LIBSWSCALE_VERSION_INT;
}
const char *swscale_configuration(void)
{
return FFMPEG_CONFIGURATION;
}
const char *swscale_license(void)
{
#define LICENSE_PREFIX "libswscale license: "
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}
#define RET 0xC3 // near return opcode for x86
typedef struct FormatEntry {
} FormatEntry;
static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_YUV420P] = { 1, 1 },
[AV_PIX_FMT_YUYV422] = { 1, 1 },
[AV_PIX_FMT_RGB24] = { 1, 1 },
[AV_PIX_FMT_BGR24] = { 1, 1 },
[AV_PIX_FMT_YUV422P] = { 1, 1 },
[AV_PIX_FMT_YUV444P] = { 1, 1 },
[AV_PIX_FMT_YUV410P] = { 1, 1 },
[AV_PIX_FMT_YUV411P] = { 1, 1 },
[AV_PIX_FMT_GRAY8] = { 1, 1 },
[AV_PIX_FMT_MONOWHITE] = { 1, 1 },
[AV_PIX_FMT_MONOBLACK] = { 1, 1 },
[AV_PIX_FMT_PAL8] = { 1, 0 },
[AV_PIX_FMT_YUVJ420P] = { 1, 1 },
[AV_PIX_FMT_YUVJ422P] = { 1, 1 },
[AV_PIX_FMT_YUVJ444P] = { 1, 1 },
[AV_PIX_FMT_UYVY422] = { 1, 1 },
[AV_PIX_FMT_UYYVYY411] = { 0, 0 },
[AV_PIX_FMT_BGR8] = { 1, 1 },
[AV_PIX_FMT_BGR4] = { 0, 1 },
[AV_PIX_FMT_BGR4_BYTE] = { 1, 1 },
[AV_PIX_FMT_RGB8] = { 1, 1 },
[AV_PIX_FMT_RGB4] = { 0, 1 },
[AV_PIX_FMT_RGB4_BYTE] = { 1, 1 },
[AV_PIX_FMT_NV12] = { 1, 1 },
[AV_PIX_FMT_NV21] = { 1, 1 },
[AV_PIX_FMT_ARGB] = { 1, 1 },
[AV_PIX_FMT_RGBA] = { 1, 1 },
[AV_PIX_FMT_ABGR] = { 1, 1 },
[AV_PIX_FMT_BGRA] = { 1, 1 },
[AV_PIX_FMT_0RGB] = { 1, 1 },
[AV_PIX_FMT_RGB0] = { 1, 1 },
[AV_PIX_FMT_0BGR] = { 1, 1 },
[AV_PIX_FMT_BGR0] = { 1, 1 },
[AV_PIX_FMT_GRAY16BE] = { 1, 1 },
[AV_PIX_FMT_GRAY16LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P] = { 1, 1 },
[AV_PIX_FMT_YUVJ440P] = { 1, 1 },
[AV_PIX_FMT_YUVA420P] = { 1, 1 },
[AV_PIX_FMT_YUVA422P] = { 1, 1 },
[AV_PIX_FMT_YUVA444P] = { 1, 1 },
[AV_PIX_FMT_YUVA420P9BE] = { 1, 1 },
[AV_PIX_FMT_YUVA420P9LE] = { 1, 1 },
[AV_PIX_FMT_YUVA422P9BE] = { 1, 1 },
[AV_PIX_FMT_YUVA422P9LE] = { 1, 1 },
[AV_PIX_FMT_YUVA444P9BE] = { 1, 1 },
[AV_PIX_FMT_YUVA444P9LE] = { 1, 1 },
[AV_PIX_FMT_YUVA420P10BE]= { 1, 1 },
[AV_PIX_FMT_YUVA420P10LE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P10BE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P10LE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P10BE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P10LE]= { 1, 1 },
[AV_PIX_FMT_YUVA420P16BE]= { 1, 1 },
[AV_PIX_FMT_YUVA420P16LE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P16BE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P16LE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P16BE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P16LE]= { 1, 1 },
[AV_PIX_FMT_RGB48BE] = { 1, 1 },
[AV_PIX_FMT_RGB48LE] = { 1, 1 },
[AV_PIX_FMT_RGBA64BE] = { 1, 0 },
[AV_PIX_FMT_RGBA64LE] = { 1, 0 },
[AV_PIX_FMT_RGB565BE] = { 1, 1 },
[AV_PIX_FMT_RGB565LE] = { 1, 1 },
[AV_PIX_FMT_RGB555BE] = { 1, 1 },
[AV_PIX_FMT_RGB555LE] = { 1, 1 },
[AV_PIX_FMT_BGR565BE] = { 1, 1 },
[AV_PIX_FMT_BGR565LE] = { 1, 1 },
[AV_PIX_FMT_BGR555BE] = { 1, 1 },
[AV_PIX_FMT_BGR555LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P16LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P16BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P16LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P16BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P16LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P16BE] = { 1, 1 },
[AV_PIX_FMT_RGB444LE] = { 1, 1 },
[AV_PIX_FMT_RGB444BE] = { 1, 1 },
[AV_PIX_FMT_BGR444LE] = { 1, 1 },
[AV_PIX_FMT_BGR444BE] = { 1, 1 },
[AV_PIX_FMT_Y400A] = { 1, 0 },
[AV_PIX_FMT_BGR48BE] = { 1, 1 },
[AV_PIX_FMT_BGR48LE] = { 1, 1 },
[AV_PIX_FMT_BGRA64BE] = { 0, 0 },
[AV_PIX_FMT_BGRA64LE] = { 0, 0 },
[AV_PIX_FMT_YUV420P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P12BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P14BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P14LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P12BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P14BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P14LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P12BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P14BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P14LE] = { 1, 1 },
[AV_PIX_FMT_GBRP9LE] = { 1, 1 },
[AV_PIX_FMT_GBRP9BE] = { 1, 1 },
[AV_PIX_FMT_GBRP10LE] = { 1, 1 },
[AV_PIX_FMT_GBRP10BE] = { 1, 1 },
[AV_PIX_FMT_GBRP12LE] = { 1, 1 },
[AV_PIX_FMT_GBRP12BE] = { 1, 1 },
[AV_PIX_FMT_GBRP14LE] = { 1, 1 },
[AV_PIX_FMT_GBRP14BE] = { 1, 1 },
[AV_PIX_FMT_GBRP16LE] = { 1, 0 },
[AV_PIX_FMT_GBRP16BE] = { 1, 0 },
int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
{
return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
format_entries[pix_fmt].is_supported_in : 0;
}
int sws_isSupportedOutput(enum AVPixelFormat pix_fmt)
{
return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
format_entries[pix_fmt].is_supported_out : 0;
}
extern const int32_t ff_yuv2rgb_coeffs[8][4];
#if FF_API_SWS_FORMAT_NAME
const char *sws_format_name(enum AVPixelFormat format)
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
if (desc)
return desc->name;
else
return "Unknown format";
static double getSplineCoeff(double a, double b, double c, double d,
double dist)
{
if (dist <= 1.0)
return ((d * dist + c) * dist + b) * dist + a;
else
return getSplineCoeff(0.0,
b + 2.0 * c + 3.0 * d,
c + 3.0 * d,
-b - 3.0 * c - 6.0 * d,
dist - 1.0);
}
static int initFilter(int16_t **outFilter, int32_t **filterPos,
int *outFilterSize, int xInc, int srcW, int dstW,
int filterAlign, int one, int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter,
{
int i;
int filterSize;
int filter2Size;
int minFilterSize;
int64_t *filter = NULL;
int64_t *filter2 = NULL;
const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8));
emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
// NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail);
if (FFABS(xInc - 0x10000) < 10) { // unscaled
int i;
filterSize = 1;
FF_ALLOCZ_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
for (i = 0; i < dstW; i++) {
filter[i * filterSize] = fone;
(*filterPos)[i] = i;
}
} else if (flags & SWS_POINT) { // lame looking point sampling mode
int i;
int64_t xDstInSrc;
filterSize = 1;
FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
xDstInSrc = xInc / 2 - 0x8000;
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
(*filterPos)[i] = xx;
filter[i] = fone;
xDstInSrc += xInc;
}
} else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
(flags & SWS_FAST_BILINEAR)) { // bilinear upscale
int i;
int64_t xDstInSrc;
filterSize = 2;
FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
xDstInSrc = xInc / 2 - 0x8000;
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
int j;
(*filterPos)[i] = xx;
// bilinear upscale / linear interpolate / area averaging
for (j = 0; j < filterSize; j++) {
int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
if (coeff < 0)
coeff = 0;
filter[i * filterSize + j] = coeff;
xx++;
}
}
} else {
int64_t xDstInSrc;
int sizeFactor;
if (flags & SWS_BICUBIC)
sizeFactor = 4;
else if (flags & SWS_X)
sizeFactor = 8;
else if (flags & SWS_AREA)
sizeFactor = 1; // downscale only, for upscale it is bilinear
else if (flags & SWS_GAUSS)
sizeFactor = 8; // infinite ;)
else if (flags & SWS_LANCZOS)
sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
else if (flags & SWS_SINC)
sizeFactor = 20; // infinite ;)
else if (flags & SWS_SPLINE)
sizeFactor = 20; // infinite ;)
else if (flags & SWS_BILINEAR)
sizeFactor = 2;
else {
}
if (xInc <= 1 << 16)
filterSize = 1 + sizeFactor; // upscale
else
filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
filterSize = FFMIN(filterSize, srcW - 2);
filterSize = FFMAX(filterSize, 1);
FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
xDstInSrc = xInc - 0x10000;
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
int j;
(*filterPos)[i] = xx;
for (j = 0; j < filterSize; j++) {
int64_t d = (FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
double floatd;
int64_t coeff;
if (xInc > 1 << 16)
d = d * dstW / srcW;
floatd = d * (1.0 / (1 << 30));
if (flags & SWS_BICUBIC) {
int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24);
int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
coeff = 0.0;
} else {
int64_t dd = (d * d) >> 30;
int64_t ddd = (dd * d) >> 30;
if (d < 1LL << 30)
coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
(-18 * (1 << 24) + 12 * B + 6 * C) * dd +
(6 * (1 << 24) - 2 * B) * (1 << 30);
coeff = (-B - 6 * C) * ddd +
(6 * B + 30 * C) * dd +
(-12 * B - 48 * C) * d +
(8 * B + 24 * C) * (1 << 30);
}
#if 0
else if (flags & SWS_X) {
double p = param ? param * 0.01 : 0.3;
coeff = d ? sin(d * M_PI) / (d * M_PI) : 1.0;
coeff *= pow(2.0, -p * d * d);
}
else if (flags & SWS_X) {
double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
double c;
if (floatd < 1.0)
c = cos(floatd * M_PI);
else
c = -1.0;
if (c < 0.0)
c = -pow(-c, A);
else
c = pow(c, A);
coeff = (c * 0.5 + 0.5) * fone;
} else if (flags & SWS_AREA) {
int64_t d2 = d - (1 << 29);
if (d2 * xInc < -(1LL << (29 + 16)))
coeff = 1.0 * (1LL << (30 + 16));
else if (d2 * xInc < (1LL << (29 + 16)))
coeff = -d2 * xInc + (1LL << (29 + 16));
else
coeff = 0.0;
coeff *= fone >> (30 + 16);
} else if (flags & SWS_GAUSS) {
double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (pow(2.0, -p * floatd * floatd)) * fone;
} else if (flags & SWS_SINC) {
coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
} else if (flags & SWS_LANCZOS) {
double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
(floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
if (floatd > p)
coeff = 0;
} else if (flags & SWS_BILINEAR) {
coeff = (1 << 30) - d;
if (coeff < 0)
coeff = 0;
coeff *= fone >> 30;
} else if (flags & SWS_SPLINE) {
double p = -2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
} else {
}
filter[i * filterSize + j] = coeff;
xx++;
}
}
}
/* apply src & dst Filter to filter -> filter2
filter2Size = filterSize;
if (srcFilter)
filter2Size += srcFilter->length - 1;
if (dstFilter)
filter2Size += dstFilter->length - 1;
FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
for (i = 0; i < dstW; i++) {
int j, k;
if (srcFilter) {
for (k = 0; k < srcFilter->length; k++) {
for (j = 0; j < filterSize; j++)
filter2[i * filter2Size + k + j] +=
srcFilter->coeff[k] * filter[i * filterSize + j];
}
} else {
for (j = 0; j < filterSize; j++)
filter2[i * filter2Size + j] = filter[i * filterSize + j];
}
(*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
}
av_freep(&filter);
/* try to reduce the filter-size (step1 find size and shift left) */
// Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
minFilterSize = 0;
for (i = dstW - 1; i >= 0; i--) {
int min = filter2Size;
int j;
/* get rid of near zero elements on the left by shifting left */
for (j = 0; j < filter2Size; j++) {
int k;
cutOff += FFABS(filter2[i * filter2Size]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
break;
/* preserve monotonicity because the core can't handle the
* filter otherwise */
if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
break;
// move filter coefficients left
for (k = 1; k < filter2Size; k++)
filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
filter2[i * filter2Size + k - 1] = 0;
(*filterPos)[i]++;
}
/* count near zeros on the right */
for (j = filter2Size - 1; j > 0; j--) {
cutOff += FFABS(filter2[i * filter2Size + j]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
break;
min--;
}
if (min > minFilterSize)
minFilterSize = min;
}
if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) {
// we can handle the special case 4, so we don't want to go the full 8
if (minFilterSize < 5)
filterAlign = 4;
/* We really don't want to waste our time doing useless computation, so
* fall back on the scalar C code for very small filters.
* Vectorizing is worth it only if you have a decent-sized vector. */
if (minFilterSize < 3)
filterAlign = 1;
}
if (INLINE_MMX(cpu_flags)) {
// special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2)
}
filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
filter = av_malloc(filterSize * dstW * sizeof(*filter));
if (filterSize >= MAX_FILTER_SIZE * 16 /
((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) {
av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreem scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize);
goto fail;
*outFilterSize = filterSize;
if (flags & SWS_PRINT_INFO)
av_log(NULL, AV_LOG_VERBOSE,
"SwScaler: reducing / aligning filtersize %d -> %d\n",
filter2Size, filterSize);
/* try to reduce the filter-size (step2 reduce it) */
for (i = 0; i < dstW; i++) {
int j;
for (j = 0; j < filterSize; j++) {
if (j >= filter2Size)
filter[i * filterSize + j] = 0;
else
filter[i * filterSize + j] = filter2[i * filter2Size + j];
if ((flags & SWS_BITEXACT) && j >= minFilterSize)
filter[i * filterSize + j] = 0;
}
}
// FIXME try to align filterPos if possible
for (i = 0; i < dstW; i++) {
int j;
if ((*filterPos)[i] < 0) {
// move filter coefficients left to compensate for filterPos
for (j = 1; j < filterSize; j++) {
int left = FFMAX(j + (*filterPos)[i], 0);
filter[i * filterSize + left] += filter[i * filterSize + j];
filter[i * filterSize + j] = 0;
}
(*filterPos)[i]= 0;
}
if ((*filterPos)[i] + filterSize > srcW) {
int shift = (*filterPos)[i] + filterSize - srcW;
// move filter coefficients right to compensate for filterPos
for (j = filterSize - 2; j >= 0; j--) {
int right = FFMIN(j + shift, filterSize - 1);
filter[i * filterSize + right] += filter[i * filterSize + j];
filter[i * filterSize + j] = 0;
}
(*filterPos)[i]= srcW - filterSize;
}
}
// Note the +1 is for the MMX scaler which reads over the end
/* align at 16 for AltiVec (needed by hScale_altivec_real) */
FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
*outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
/* normalize & store in outFilter */
for (i = 0; i < dstW; i++) {
int j;
int64_t error = 0;
int64_t sum = 0;
for (j = 0; j < filterSize; j++) {
sum += filter[i * filterSize + j];
}
sum = (sum + one / 2) / one;
for (j = 0; j < *outFilterSize; j++) {
int64_t v = filter[i * filterSize + j] + error;
int intV = ROUNDED_DIV(v, sum);
(*outFilter)[i * (*outFilterSize) + j] = intV;
error = v - intV * sum;
}
}
(*filterPos)[dstW + 0] =
(*filterPos)[dstW + 1] =
(*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will
* read over the end */
for (i = 0; i < *outFilterSize; i++) {
int k = (dstW - 1) * (*outFilterSize) + i;
(*outFilter)[k + 1 * (*outFilterSize)] =
(*outFilter)[k + 2 * (*outFilterSize)] =
(*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
}
fail:
if(ret < 0)
av_log(NULL, AV_LOG_ERROR, "sws: initFilter failed\n");
av_free(filter);
av_free(filter2);
return ret;
}
static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
int16_t *filter, int32_t *filterPos,
int numSplits)
{
uint8_t *fragmentA;
x86_reg imm8OfPShufW1A;
x86_reg imm8OfPShufW2A;
x86_reg fragmentLengthA;
uint8_t *fragmentB;
x86_reg imm8OfPShufW1B;
x86_reg imm8OfPShufW2B;
x86_reg fragmentLengthB;
int fragmentPos;
int xpos, i;
// create an optimized horizontal scaling routine
/* This scaler is made of runtime-generated MMXEXT code using specially tuned
* pshufw instructions. For every four output pixels, if four input pixels
* are enough for the fast bilinear scaling, then a chunk of fragmentB is
* used. If five input pixels are needed, then a chunk of fragmentA is used.
*/
"jmp 9f \n\t"
"0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
"movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm1, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
"9: \n\t"
// "int $3 \n\t"
"lea " LOCAL_MANGLE(0b) ", %0 \n\t"
"lea " LOCAL_MANGLE(1b) ", %1 \n\t"
"lea " LOCAL_MANGLE(2b) ", %2 \n\t"
"dec %1 \n\t"
"dec %2 \n\t"
"sub %0, %1 \n\t"
"sub %0, %2 \n\t"
"lea " LOCAL_MANGLE(9b) ", %3 \n\t"
"sub %0, %3 \n\t"
: "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
"=r" (fragmentLengthA)
);
"jmp 9f \n\t"
"0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm0, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
"9: \n\t"
// "int $3 \n\t"
"lea " LOCAL_MANGLE(0b) ", %0 \n\t"
"lea " LOCAL_MANGLE(1b) ", %1 \n\t"
"lea " LOCAL_MANGLE(2b) ", %2 \n\t"
"dec %1 \n\t"
"dec %2 \n\t"
"sub %0, %1 \n\t"
"sub %0, %2 \n\t"
"lea " LOCAL_MANGLE(9b) ", %3 \n\t"
"sub %0, %3 \n\t"
: "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
"=r" (fragmentLengthB)
);
xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos = 0;
for (i = 0; i < dstW / numSplits; i++) {
int xx = xpos >> 16;
if ((i & 3) == 0) {
int a = 0;
int b = ((xpos + xInc) >> 16) - xx;
int c = ((xpos + xInc * 2) >> 16) - xx;
int d = ((xpos + xInc * 3) >> 16) - xx;
int inc = (d + 1 < 4);
uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA;
x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A;
x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A;
x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA;
int maxShift = 3 - (d + inc);
int shift = 0;
if (filterCode) {
filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
filterPos[i / 2] = xx;
memcpy(filterCode + fragmentPos, fragment, fragmentLength);
filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
((b + inc) << 2) |
((c + inc) << 4) |
((d + inc) << 6);
filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
(c << 4) |
(d << 6);
if (i + 4 - inc >= dstW)
shift = maxShift; // avoid overread
else if ((filterPos[i / 2] & 3) <= maxShift)
shift = filterPos[i / 2] & 3; // align
if (shift && i >= shift) {
filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
filterPos[i / 2] -= shift;
}
}
fragmentPos += fragmentLength;
if (filterCode)
filterCode[fragmentPos] = RET;
}
}
if (filterCode)
filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part
return fragmentPos + 1;
}
#endif /* HAVE_MMXEXT_INLINE */
static void getSubSampleFactors(int *h, int *v, enum AVPixelFormat format)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
*h = desc->log2_chroma_w;
*v = desc->log2_chroma_h;
}
static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange)
{
int64_t W,V,Z;
int64_t vr = table[0];
int64_t ub = table[1];
int64_t ug = -table[2];
int64_t vg = -table[3];
int64_t ONE = 65536;
int64_t cy = ONE;
uint8_t *p = (uint8_t*)c->input_rgb2yuv_table;
int i;
static const int8_t map[] = {
BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX,
RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX,
RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX,
BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX,
BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX,
RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX,
RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX,
BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX,
BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX,
RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX,
RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX,
BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX,
RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX,
BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX,
GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 ,
-1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX,
RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX,
BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX,
GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 ,
-1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX,
RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX,
BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX,
GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 ,
-1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX,
};
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
dstRange = 0; //FIXME range = 1 is handled elsewhere
if (!dstRange) {
cy = cy * 255 / 219;
} else {
vr = vr * 224 / 255;
ub = ub * 224 / 255;
ug = ug * 224 / 255;
vg = vg * 224 / 255;
}
W = ONE*ug/ub;
V = ONE*vg/vr;
Z = ONE-W-V;
c->input_rgb2yuv_table[RY_IDX] = -(1 << RGB2YUV_SHIFT)*ONE*V/(Z*cy);
c->input_rgb2yuv_table[GY_IDX] = (1 << RGB2YUV_SHIFT)*ONE*ONE/(Z*cy);
c->input_rgb2yuv_table[BY_IDX] = -(1 << RGB2YUV_SHIFT)*ONE*W/(Z*cy);
c->input_rgb2yuv_table[RU_IDX] = (1 << RGB2YUV_SHIFT)*ONE*V/(Z*ub);
c->input_rgb2yuv_table[GU_IDX] = -(1 << RGB2YUV_SHIFT)*ONE*ONE/(Z*ub);
c->input_rgb2yuv_table[BU_IDX] = (1 << RGB2YUV_SHIFT)*(ONE + ONE*W/Z)/ub;
c->input_rgb2yuv_table[RV_IDX] = (1 << RGB2YUV_SHIFT)*(ONE + ONE*V/Z)/vr;
c->input_rgb2yuv_table[GV_IDX] = -(1 << RGB2YUV_SHIFT)*ONE*ONE/(Z*vr);
c->input_rgb2yuv_table[BV_IDX] = (1 << RGB2YUV_SHIFT)*ONE*W/(Z*vr);
if(/*!dstRange && */table == ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]) {
c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
}
for(i=0; i<FF_ARRAY_ELEMS(map); i++)
AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
int srcRange, const int table[4], int dstRange,
int brightness, int contrast, int saturation)
{
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat);
memcpy(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
memcpy(c->dstColorspaceTable, table, sizeof(int) * 4);
Michael Niedermayer
committed
if(!isYUV(c->dstFormat) && !isGray(c->dstFormat))
dstRange = 0;
if(!isYUV(c->srcFormat) && !isGray(c->srcFormat))
srcRange = 0;
c->brightness = brightness;
c->contrast = contrast;
c->saturation = saturation;
c->srcRange = srcRange;
c->dstRange = dstRange;
Michael Niedermayer
committed
if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat)))
c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
Michael Niedermayer
committed
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
contrast, saturation);
// FIXME factorize
if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)
ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness,
contrast, saturation);
fill_rgb2yuv_table(c, table, dstRange);
return 0;
}
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
int *srcRange, int **table, int *dstRange,
int *brightness, int *contrast, int *saturation)
{
if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat))
*inv_table = c->srcColorspaceTable;
*table = c->dstColorspaceTable;
*srcRange = c->srcRange;
*dstRange = c->dstRange;
*brightness = c->brightness;
*contrast = c->contrast;
*saturation = c->saturation;
return 0;
}
static int handle_jpeg(enum AVPixelFormat *format)
{
switch (*format) {
case AV_PIX_FMT_YUVJ420P:
*format = AV_PIX_FMT_YUV420P;
case AV_PIX_FMT_YUVJ422P:
*format = AV_PIX_FMT_YUV422P;
case AV_PIX_FMT_YUVJ444P:
*format = AV_PIX_FMT_YUV444P;
case AV_PIX_FMT_YUVJ440P:
*format = AV_PIX_FMT_YUV440P;
return 1;
default:
return 0;
static int handle_0alpha(enum AVPixelFormat *format)
case AV_PIX_FMT_0BGR : *format = AV_PIX_FMT_ABGR ; return 1;
case AV_PIX_FMT_BGR0 : *format = AV_PIX_FMT_BGRA ; return 4;
case AV_PIX_FMT_0RGB : *format = AV_PIX_FMT_ARGB ; return 1;
case AV_PIX_FMT_RGB0 : *format = AV_PIX_FMT_RGBA ; return 4;
Stefano Sabatini
committed
default: return 0;
}
}
SwsContext *sws_alloc_context(void)
{
SwsContext *c = av_mallocz(sizeof(SwsContext));
Michael Niedermayer
committed
if (c) {
c->av_class = &sws_context_class;
av_opt_set_defaults(c);
}
Michael Niedermayer
committed
return c;
}
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
int usesVFilter, usesHFilter;
int unscaled;
SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
int srcW = c->srcW;
int srcH = c->srcH;
int dstW = c->dstW;
int dstH = c->dstH;
int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
int flags, cpu_flags;
enum AVPixelFormat srcFormat = c->srcFormat;
enum AVPixelFormat dstFormat = c->dstFormat;
const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat);
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat);
Michael Niedermayer
committed
cpu_flags = av_get_cpu_flags();