Newer
Older
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define _DEFAULT_SOURCE
#define _SVID_SOURCE // needed for MAP_ANONYMOUS
#define _DARWIN_C_SOURCE // needed for MAP_ANON
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
#if HAVE_VIRTUALALLOC
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include "libavutil/attributes.h"
#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/imgutils.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/aarch64/cpu.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
// We have to implement deprecated functions until they are removed, this is the
// simplest way to prevent warnings
#undef attribute_deprecated
#define attribute_deprecated
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
#if !FF_API_SWS_VECTOR
static SwsVector *sws_getIdentityVec(void);
static void sws_addVec(SwsVector *a, SwsVector *b);
static void sws_shiftVec(SwsVector *a, int shift);
static void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level);
#endif
static void handle_formats(SwsContext *c);
unsigned swscale_version(void)
{
av_assert0(LIBSWSCALE_VERSION_MICRO >= 100);
return LIBSWSCALE_VERSION_INT;
}
const char *swscale_configuration(void)
{
return FFMPEG_CONFIGURATION;
}
const char *swscale_license(void)
{
#define LICENSE_PREFIX "libswscale license: "
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}
typedef struct FormatEntry {
uint8_t is_supported_in :1;
uint8_t is_supported_out :1;
uint8_t is_supported_endianness :1;
} FormatEntry;
static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_YUV420P] = { 1, 1 },
[AV_PIX_FMT_YUYV422] = { 1, 1 },
[AV_PIX_FMT_RGB24] = { 1, 1 },
[AV_PIX_FMT_BGR24] = { 1, 1 },
[AV_PIX_FMT_YUV422P] = { 1, 1 },
[AV_PIX_FMT_YUV444P] = { 1, 1 },
[AV_PIX_FMT_YUV410P] = { 1, 1 },
[AV_PIX_FMT_YUV411P] = { 1, 1 },
[AV_PIX_FMT_GRAY8] = { 1, 1 },
[AV_PIX_FMT_MONOWHITE] = { 1, 1 },
[AV_PIX_FMT_MONOBLACK] = { 1, 1 },
[AV_PIX_FMT_PAL8] = { 1, 0 },
[AV_PIX_FMT_YUVJ420P] = { 1, 1 },
[AV_PIX_FMT_YUVJ422P] = { 1, 1 },
[AV_PIX_FMT_YUVJ444P] = { 1, 1 },
[AV_PIX_FMT_YVYU422] = { 1, 1 },
[AV_PIX_FMT_UYVY422] = { 1, 1 },
[AV_PIX_FMT_UYYVYY411] = { 0, 0 },
[AV_PIX_FMT_BGR8] = { 1, 1 },
[AV_PIX_FMT_BGR4] = { 0, 1 },
[AV_PIX_FMT_BGR4_BYTE] = { 1, 1 },
[AV_PIX_FMT_RGB8] = { 1, 1 },
[AV_PIX_FMT_RGB4] = { 0, 1 },
[AV_PIX_FMT_RGB4_BYTE] = { 1, 1 },
[AV_PIX_FMT_NV12] = { 1, 1 },
[AV_PIX_FMT_NV21] = { 1, 1 },
[AV_PIX_FMT_ARGB] = { 1, 1 },
[AV_PIX_FMT_RGBA] = { 1, 1 },
[AV_PIX_FMT_ABGR] = { 1, 1 },
[AV_PIX_FMT_BGRA] = { 1, 1 },
[AV_PIX_FMT_0RGB] = { 1, 1 },
[AV_PIX_FMT_RGB0] = { 1, 1 },
[AV_PIX_FMT_0BGR] = { 1, 1 },
[AV_PIX_FMT_BGR0] = { 1, 1 },
[AV_PIX_FMT_GRAY9BE] = { 1, 1 },
[AV_PIX_FMT_GRAY9LE] = { 1, 1 },
[AV_PIX_FMT_GRAY10BE] = { 1, 1 },
[AV_PIX_FMT_GRAY10LE] = { 1, 1 },
[AV_PIX_FMT_GRAY12BE] = { 1, 1 },
[AV_PIX_FMT_GRAY12LE] = { 1, 1 },
[AV_PIX_FMT_GRAY14BE] = { 1, 1 },
[AV_PIX_FMT_GRAY14LE] = { 1, 1 },
[AV_PIX_FMT_GRAY16BE] = { 1, 1 },
[AV_PIX_FMT_GRAY16LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P] = { 1, 1 },
[AV_PIX_FMT_YUVJ440P] = { 1, 1 },
[AV_PIX_FMT_YUV440P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV440P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P12BE] = { 1, 1 },
[AV_PIX_FMT_YUVA420P] = { 1, 1 },
[AV_PIX_FMT_YUVA422P] = { 1, 1 },
[AV_PIX_FMT_YUVA444P] = { 1, 1 },
[AV_PIX_FMT_YUVA420P9BE] = { 1, 1 },
[AV_PIX_FMT_YUVA420P9LE] = { 1, 1 },
[AV_PIX_FMT_YUVA422P9BE] = { 1, 1 },
[AV_PIX_FMT_YUVA422P9LE] = { 1, 1 },
[AV_PIX_FMT_YUVA444P9BE] = { 1, 1 },
[AV_PIX_FMT_YUVA444P9LE] = { 1, 1 },
[AV_PIX_FMT_YUVA420P10BE]= { 1, 1 },
[AV_PIX_FMT_YUVA420P10LE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P10BE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P10LE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P10BE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P10LE]= { 1, 1 },
[AV_PIX_FMT_YUVA420P16BE]= { 1, 1 },
[AV_PIX_FMT_YUVA420P16LE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P16BE]= { 1, 1 },
[AV_PIX_FMT_YUVA422P16LE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P16BE]= { 1, 1 },
[AV_PIX_FMT_YUVA444P16LE]= { 1, 1 },
[AV_PIX_FMT_RGB48BE] = { 1, 1 },
[AV_PIX_FMT_RGB48LE] = { 1, 1 },
[AV_PIX_FMT_RGBA64BE] = { 1, 1, 1 },
[AV_PIX_FMT_RGBA64LE] = { 1, 1, 1 },
[AV_PIX_FMT_RGB565BE] = { 1, 1 },
[AV_PIX_FMT_RGB565LE] = { 1, 1 },
[AV_PIX_FMT_RGB555BE] = { 1, 1 },
[AV_PIX_FMT_RGB555LE] = { 1, 1 },
[AV_PIX_FMT_BGR565BE] = { 1, 1 },
[AV_PIX_FMT_BGR565LE] = { 1, 1 },
[AV_PIX_FMT_BGR555BE] = { 1, 1 },
[AV_PIX_FMT_BGR555LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P16LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P16BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P16LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P16BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P16LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P16BE] = { 1, 1 },
[AV_PIX_FMT_RGB444LE] = { 1, 1 },
[AV_PIX_FMT_RGB444BE] = { 1, 1 },
[AV_PIX_FMT_BGR444LE] = { 1, 1 },
[AV_PIX_FMT_BGR444BE] = { 1, 1 },
[AV_PIX_FMT_YA16BE] = { 1, 0 },
[AV_PIX_FMT_YA16LE] = { 1, 0 },
[AV_PIX_FMT_BGR48BE] = { 1, 1 },
[AV_PIX_FMT_BGR48LE] = { 1, 1 },
[AV_PIX_FMT_BGRA64BE] = { 1, 1, 1 },
[AV_PIX_FMT_BGRA64LE] = { 1, 1, 1 },
[AV_PIX_FMT_YUV420P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P12BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P14BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P14LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P12BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P14BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P14LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P10LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P12BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P12LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P14BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P14LE] = { 1, 1 },
[AV_PIX_FMT_GBRP9LE] = { 1, 1 },
[AV_PIX_FMT_GBRP9BE] = { 1, 1 },
[AV_PIX_FMT_GBRP10LE] = { 1, 1 },
[AV_PIX_FMT_GBRP10BE] = { 1, 1 },
[AV_PIX_FMT_GBRAP10LE] = { 1, 1 },
[AV_PIX_FMT_GBRAP10BE] = { 1, 1 },
[AV_PIX_FMT_GBRP12LE] = { 1, 1 },
[AV_PIX_FMT_GBRP12BE] = { 1, 1 },
[AV_PIX_FMT_GBRAP12LE] = { 1, 1 },
[AV_PIX_FMT_GBRAP12BE] = { 1, 1 },
[AV_PIX_FMT_GBRP14LE] = { 1, 1 },
[AV_PIX_FMT_GBRP14BE] = { 1, 1 },
[AV_PIX_FMT_GBRP16LE] = { 1, 1 },
[AV_PIX_FMT_GBRP16BE] = { 1, 1 },
[AV_PIX_FMT_GBRAP] = { 1, 1 },
[AV_PIX_FMT_GBRAP16LE] = { 1, 1 },
[AV_PIX_FMT_GBRAP16BE] = { 1, 1 },
[AV_PIX_FMT_BAYER_BGGR8] = { 1, 0 },
[AV_PIX_FMT_BAYER_RGGB8] = { 1, 0 },
[AV_PIX_FMT_BAYER_GBRG8] = { 1, 0 },
[AV_PIX_FMT_BAYER_GRBG8] = { 1, 0 },
[AV_PIX_FMT_BAYER_BGGR16LE] = { 1, 0 },
[AV_PIX_FMT_BAYER_BGGR16BE] = { 1, 0 },
[AV_PIX_FMT_BAYER_RGGB16LE] = { 1, 0 },
[AV_PIX_FMT_BAYER_RGGB16BE] = { 1, 0 },
[AV_PIX_FMT_BAYER_GBRG16LE] = { 1, 0 },
[AV_PIX_FMT_BAYER_GBRG16BE] = { 1, 0 },
[AV_PIX_FMT_BAYER_GRBG16LE] = { 1, 0 },
[AV_PIX_FMT_BAYER_GRBG16BE] = { 1, 0 },
[AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 },
[AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 },
[AV_PIX_FMT_P010LE] = { 1, 1 },
[AV_PIX_FMT_P010BE] = { 1, 1 },
[AV_PIX_FMT_P016LE] = { 1, 1 },
[AV_PIX_FMT_P016BE] = { 1, 1 },
[AV_PIX_FMT_GRAYF32LE] = { 1, 1 },
[AV_PIX_FMT_GRAYF32BE] = { 1, 1 },
int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
{
return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
format_entries[pix_fmt].is_supported_in : 0;
}
int sws_isSupportedOutput(enum AVPixelFormat pix_fmt)
{
return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
format_entries[pix_fmt].is_supported_out : 0;
}
int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt)
{
return (unsigned)pix_fmt < AV_PIX_FMT_NB ?
format_entries[pix_fmt].is_supported_endianness : 0;
}
static double getSplineCoeff(double a, double b, double c, double d,
double dist)
{
if (dist <= 1.0)
return ((d * dist + c) * dist + b) * dist + a;
else
return getSplineCoeff(0.0,
b + 2.0 * c + 3.0 * d,
c + 3.0 * d,
-b - 3.0 * c - 6.0 * d,
dist - 1.0);
}
static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir)
{
Michael Niedermayer
committed
if (pos == -1 || pos <= -513) {
pos = (128 << chr_subsample) - 128;
}
pos += 128; // relative to ideal left edge
return pos >> chr_subsample;
}
typedef struct {
int flag; ///< flag associated to the algorithm
const char *description; ///< human-readable description
int size_factor; ///< size factor used when initing the filters
} ScaleAlgorithm;
static const ScaleAlgorithm scale_algorithms[] = {
{ SWS_AREA, "area averaging", 1 /* downscale only, for upscale it is bilinear */ },
{ SWS_BICUBIC, "bicubic", 4 },
{ SWS_BICUBLIN, "luma bicubic / chroma bilinear", -1 },
{ SWS_BILINEAR, "bilinear", 2 },
{ SWS_FAST_BILINEAR, "fast bilinear", -1 },
{ SWS_GAUSS, "Gaussian", 8 /* infinite ;) */ },
{ SWS_LANCZOS, "Lanczos", -1 /* custom */ },
{ SWS_POINT, "nearest neighbor / point", -1 },
{ SWS_SINC, "sinc", 20 /* infinite ;) */ },
{ SWS_SPLINE, "bicubic spline", 20 /* infinite :)*/ },
{ SWS_X, "experimental", 8 },
};
static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
int *outFilterSize, int xInc, int srcW,
int dstW, int filterAlign, int one,
int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter,
double param[2], int srcPos, int dstPos)
{
int i;
int filterSize;
int filter2Size;
int minFilterSize;
int64_t *filter = NULL;
int64_t *filter2 = NULL;
const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8));
emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
// NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
Michael Niedermayer
committed
FF_ALLOC_ARRAY_OR_GOTO(NULL, *filterPos, (dstW + 3), sizeof(**filterPos), fail);
if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled
int i;
Michael Niedermayer
committed
FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter,
dstW, sizeof(*filter) * filterSize, fail);
for (i = 0; i < dstW; i++) {
filter[i * filterSize] = fone;
(*filterPos)[i] = i;
}
} else if (flags & SWS_POINT) { // lame looking point sampling mode
int i;
int64_t xDstInSrc;
Michael Niedermayer
committed
FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
dstW, sizeof(*filter) * filterSize, fail);
Michael Niedermayer
committed
xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
(*filterPos)[i] = xx;
filter[i] = fone;
xDstInSrc += xInc;
}
} else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
(flags & SWS_FAST_BILINEAR)) { // bilinear upscale
int i;
int64_t xDstInSrc;
Michael Niedermayer
committed
FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
dstW, sizeof(*filter) * filterSize, fail);
Michael Niedermayer
committed
xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
int j;
(*filterPos)[i] = xx;
// bilinear upscale / linear interpolate / area averaging
for (j = 0; j < filterSize; j++) {
int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
if (coeff < 0)
coeff = 0;
filter[i * filterSize + j] = coeff;
xx++;
}
}
} else {
int64_t xDstInSrc;
int sizeFactor = -1;
for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
if (flags & scale_algorithms[i].flag && scale_algorithms[i].size_factor > 0) {
sizeFactor = scale_algorithms[i].size_factor;
break;
}
}
if (flags & SWS_LANCZOS)
sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
av_assert0(sizeFactor > 0);
if (xInc <= 1 << 16)
filterSize = 1 + sizeFactor; // upscale
else
filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
filterSize = FFMIN(filterSize, srcW - 2);
filterSize = FFMAX(filterSize, 1);
Michael Niedermayer
committed
FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
dstW, sizeof(*filter) * filterSize, fail);
Michael Niedermayer
committed
xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - (filterSize - 2) * (1LL<<16)) / (1 << 17);
int j;
(*filterPos)[i] = xx;
for (j = 0; j < filterSize; j++) {
int64_t d = (FFABS(((int64_t)xx * (1 << 17)) - xDstInSrc)) << 13;
double floatd;
int64_t coeff;
if (xInc > 1 << 16)
d = d * dstW / srcW;
floatd = d * (1.0 / (1 << 30));
if (flags & SWS_BICUBIC) {
int64_t B = (param[0] != SWS_PARAM_DEFAULT ? param[0] : 0) * (1 << 24);
int64_t C = (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1 << 24);
coeff = 0.0;
} else {
int64_t dd = (d * d) >> 30;
int64_t ddd = (dd * d) >> 30;
if (d < 1LL << 30)
coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
(-18 * (1 << 24) + 12 * B + 6 * C) * dd +
(6 * (1 << 24) - 2 * B) * (1 << 30);
coeff = (-B - 6 * C) * ddd +
(6 * B + 30 * C) * dd +
(-12 * B - 48 * C) * d +
(8 * B + 24 * C) * (1 << 30);
} else if (flags & SWS_X) {
double A = param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
double c;
if (floatd < 1.0)
c = cos(floatd * M_PI);
else
c = -1.0;
if (c < 0.0)
c = -pow(-c, A);
else
c = pow(c, A);
coeff = (c * 0.5 + 0.5) * fone;
} else if (flags & SWS_AREA) {
int64_t d2 = d - (1 << 29);
if (d2 * xInc < -(1LL << (29 + 16)))
coeff = 1.0 * (1LL << (30 + 16));
else if (d2 * xInc < (1LL << (29 + 16)))
coeff = -d2 * xInc + (1LL << (29 + 16));
else
coeff = 0.0;
coeff *= fone >> (30 + 16);
} else if (flags & SWS_GAUSS) {
double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = exp2(-p * floatd * floatd) * fone;
} else if (flags & SWS_SINC) {
coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
} else if (flags & SWS_LANCZOS) {
double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = (d ? sin(floatd * M_PI) * sin(floatd * M_PI / p) /
(floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
if (floatd > p)
coeff = 0;
} else if (flags & SWS_BILINEAR) {
coeff = (1 << 30) - d;
if (coeff < 0)
coeff = 0;
coeff *= fone >> 30;
} else if (flags & SWS_SPLINE) {
double p = -2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
} else {
}
filter[i * filterSize + j] = coeff;
xx++;
}
}
}
/* apply src & dst Filter to filter -> filter2
filter2Size = filterSize;
if (srcFilter)
filter2Size += srcFilter->length - 1;
if (dstFilter)
filter2Size += dstFilter->length - 1;
Michael Niedermayer
committed
FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter2, dstW, filter2Size * sizeof(*filter2), fail);
for (i = 0; i < dstW; i++) {
int j, k;
if (srcFilter) {
for (k = 0; k < srcFilter->length; k++) {
for (j = 0; j < filterSize; j++)
filter2[i * filter2Size + k + j] +=
srcFilter->coeff[k] * filter[i * filterSize + j];
}
} else {
for (j = 0; j < filterSize; j++)
filter2[i * filter2Size + j] = filter[i * filterSize + j];
}
(*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
}
av_freep(&filter);
/* try to reduce the filter-size (step1 find size and shift left) */
// Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
minFilterSize = 0;
for (i = dstW - 1; i >= 0; i--) {
int min = filter2Size;
int j;
/* get rid of near zero elements on the left by shifting left */
for (j = 0; j < filter2Size; j++) {
int k;
cutOff += FFABS(filter2[i * filter2Size]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
break;
/* preserve monotonicity because the core can't handle the
* filter otherwise */
if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
break;
// move filter coefficients left
for (k = 1; k < filter2Size; k++)
filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
filter2[i * filter2Size + k - 1] = 0;
(*filterPos)[i]++;
}
/* count near zeros on the right */
for (j = filter2Size - 1; j > 0; j--) {
cutOff += FFABS(filter2[i * filter2Size + j]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF * fone)
break;
min--;
}
if (min > minFilterSize)
minFilterSize = min;
}
if (PPC_ALTIVEC(cpu_flags)) {
// we can handle the special case 4, so we don't want to go the full 8
if (minFilterSize < 5)
filterAlign = 4;
/* We really don't want to waste our time doing useless computation, so
* fall back on the scalar C code for very small filters.
* Vectorizing is worth it only if you have a decent-sized vector. */
if (minFilterSize < 3)
filterAlign = 1;
}
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
// special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2)
}
filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
Michael Niedermayer
committed
filter = av_malloc_array(dstW, filterSize * sizeof(*filter));
Michael Niedermayer
committed
if (!filter)
goto fail;
if (filterSize >= MAX_FILTER_SIZE * 16 /
Michael Niedermayer
committed
((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16)) {
ret = RETCODE_USE_CASCADE;
goto fail;
*outFilterSize = filterSize;
if (flags & SWS_PRINT_INFO)
av_log(NULL, AV_LOG_VERBOSE,
"SwScaler: reducing / aligning filtersize %d -> %d\n",
filter2Size, filterSize);
/* try to reduce the filter-size (step2 reduce it) */
for (i = 0; i < dstW; i++) {
int j;
for (j = 0; j < filterSize; j++) {
if (j >= filter2Size)
filter[i * filterSize + j] = 0;
else
filter[i * filterSize + j] = filter2[i * filter2Size + j];
if ((flags & SWS_BITEXACT) && j >= minFilterSize)
filter[i * filterSize + j] = 0;
}
}
// FIXME try to align filterPos if possible
for (i = 0; i < dstW; i++) {
int j;
if ((*filterPos)[i] < 0) {
// move filter coefficients left to compensate for filterPos
for (j = 1; j < filterSize; j++) {
int left = FFMAX(j + (*filterPos)[i], 0);
filter[i * filterSize + left] += filter[i * filterSize + j];
filter[i * filterSize + j] = 0;
}
(*filterPos)[i]= 0;
}
if ((*filterPos)[i] + filterSize > srcW) {
Michael Niedermayer
committed
int shift = (*filterPos)[i] + FFMIN(filterSize - srcW, 0);
Michael Niedermayer
committed
int64_t acc = 0;
Michael Niedermayer
committed
Michael Niedermayer
committed
for (j = filterSize - 1; j >= 0; j--) {
if ((*filterPos)[i] + j >= srcW) {
acc += filter[i * filterSize + j];
filter[i * filterSize + j] = 0;
}
}
Michael Niedermayer
committed
for (j = filterSize - 1; j >= 0; j--) {
if (j < shift) {
filter[i * filterSize + j] = 0;
} else {
filter[i * filterSize + j] = filter[i * filterSize + j - shift];
}
}
Michael Niedermayer
committed
(*filterPos)[i]-= shift;
Michael Niedermayer
committed
filter[i * filterSize + srcW - 1 - (*filterPos)[i]] += acc;
}
av_assert0((*filterPos)[i] >= 0);
av_assert0((*filterPos)[i] < srcW);
if ((*filterPos)[i] + filterSize > srcW) {
for (j = 0; j < filterSize; j++) {
av_assert0((*filterPos)[i] + j < srcW || !filter[i * filterSize + j]);
}
}
}
// Note the +1 is for the MMX scaler which reads over the end
/* align at 16 for AltiVec (needed by hScale_altivec_real) */
Michael Niedermayer
committed
FF_ALLOCZ_ARRAY_OR_GOTO(NULL, *outFilter,
(dstW + 3), *outFilterSize * sizeof(int16_t), fail);
/* normalize & store in outFilter */
for (i = 0; i < dstW; i++) {
int j;
int64_t error = 0;
int64_t sum = 0;
for (j = 0; j < filterSize; j++) {
sum += filter[i * filterSize + j];
}
sum = (sum + one / 2) / one;
if (!sum) {
av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n");
sum = 1;
}
for (j = 0; j < *outFilterSize; j++) {
int64_t v = filter[i * filterSize + j] + error;
int intV = ROUNDED_DIV(v, sum);
(*outFilter)[i * (*outFilterSize) + j] = intV;
error = v - intV * sum;
}
}
(*filterPos)[dstW + 0] =
(*filterPos)[dstW + 1] =
(*filterPos)[dstW + 2] = (*filterPos)[dstW - 1]; /* the MMX/SSE scaler will
* read over the end */
for (i = 0; i < *outFilterSize; i++) {
int k = (dstW - 1) * (*outFilterSize) + i;
(*outFilter)[k + 1 * (*outFilterSize)] =
(*outFilter)[k + 2 * (*outFilterSize)] =
(*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
}
fail:
av_log(NULL, ret == RETCODE_USE_CASCADE ? AV_LOG_DEBUG : AV_LOG_ERROR, "sws: initFilter failed\n");
av_free(filter);
av_free(filter2);
return ret;
}
static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange)
{
int64_t W, V, Z, Cy, Cu, Cv;
int64_t vr = table[0];
int64_t ub = table[1];
int64_t ug = -table[2];
int64_t vg = -table[3];
int64_t ONE = 65536;
int64_t cy = ONE;
uint8_t *p = (uint8_t*)c->input_rgb2yuv_table;
int i;
static const int8_t map[] = {
BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX,
RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX,
RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX,
BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX,
BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX,
RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX,
RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX,
BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX,
BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX,
RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX,
RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX,
BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX,
RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX,
BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX,
GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 ,
-1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX,
RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX,
BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX,
GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 ,
-1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX,
RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX,
BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX,
GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 ,
Michael Niedermayer
committed
-1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30
-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31
BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32
BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33
BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34
};
dstRange = 0; //FIXME range = 1 is handled elsewhere
if (!dstRange) {
cy = cy * 255 / 219;
} else {
vr = vr * 224 / 255;
ub = ub * 224 / 255;
ug = ug * 224 / 255;
vg = vg * 224 / 255;
}
W = ROUNDED_DIV(ONE*ONE*ug, ub);
V = ROUNDED_DIV(ONE*ONE*vg, vr);
Z = ONE*ONE-W-V;
Cy = ROUNDED_DIV(cy*Z, ONE);
Cu = ROUNDED_DIV(ub*Z, ONE);
Cv = ROUNDED_DIV(vr*Z, ONE);
c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cy);
c->input_rgb2yuv_table[GY_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cy);
c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cy);
c->input_rgb2yuv_table[RU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cu);
c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cu);
c->input_rgb2yuv_table[BU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W) , Cu);
c->input_rgb2yuv_table[RV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z) , Cv);
c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cv);
c->input_rgb2yuv_table[BV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cv);
if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) {
c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
}
for(i=0; i<FF_ARRAY_ELEMS(map); i++)
AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
static void fill_xyztables(struct SwsContext *c)
{
int i;
double xyzgamma = XYZ_GAMMA;
double rgbgamma = 1.0 / RGB_GAMMA;
double xyzgammainv = 1.0 / XYZ_GAMMA;
double rgbgammainv = RGB_GAMMA;
static const int16_t xyz2rgb_matrix[3][4] = {
{13270, -6295, -2041},
{-3969, 7682, 170},
{ 228, -835, 4329} };
static const int16_t rgb2xyz_matrix[3][4] = {
{1689, 1464, 739},
{ 871, 2929, 296},
{ 79, 488, 3891} };
static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix));
memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix));
c->xyzgamma = xyzgamma_tab;
c->rgbgamma = rgbgamma_tab;
c->xyzgammainv = xyzgammainv_tab;
c->rgbgammainv = rgbgammainv_tab;
if (rgbgamma_tab[4095])
return;
/* set gamma vectors */
for (i = 0; i < 4096; i++) {
xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
int srcRange, const int table[4], int dstRange,
int brightness, int contrast, int saturation)
{
const AVPixFmtDescriptor *desc_dst;
const AVPixFmtDescriptor *desc_src;
Michael Niedermayer
committed
int need_reinit = 0;
handle_formats(c);
desc_dst = av_pix_fmt_desc_get(c->dstFormat);
desc_src = av_pix_fmt_desc_get(c->srcFormat);
Michael Niedermayer
committed
if(!isYUV(c->dstFormat) && !isGray(c->dstFormat))
dstRange = 0;
if(!isYUV(c->srcFormat) && !isGray(c->srcFormat))
srcRange = 0;
Michael Niedermayer
committed
if (c->srcRange != srcRange ||
c->dstRange != dstRange ||
c->brightness != brightness ||
c->contrast != contrast ||
c->saturation != saturation ||
memcmp(c->srcColorspaceTable, inv_table, sizeof(int) * 4) ||
memcmp(c->dstColorspaceTable, table, sizeof(int) * 4)
)
need_reinit = 1;
memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
memmove(c->dstColorspaceTable, table, sizeof(int) * 4);
c->brightness = brightness;
c->contrast = contrast;
c->saturation = saturation;
c->srcRange = srcRange;
c->dstRange = dstRange;
Michael Niedermayer
committed
//The srcBpc check is possibly wrong but we seem to lack a definitive reference to test this
//and what we have in ticket 2939 looks better with this check
if (need_reinit && (c->srcBpc == 8 || !isYUV(c->srcFormat)))
Michael Niedermayer
committed
ff_sws_init_range_convert(c);
c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
Michael Niedermayer
committed
if (c->cascaded_context[c->cascaded_mainindex])
return sws_setColorspaceDetails(c->cascaded_context[c->cascaded_mainindex],inv_table, srcRange,table, dstRange, brightness, contrast, saturation);
Michael Niedermayer
committed
if (!need_reinit)
return 0;
Michael Niedermayer
committed
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) {
if (!c->cascaded_context[0] &&
memcmp(c->dstColorspaceTable, c->srcColorspaceTable, sizeof(int) * 4) &&
c->srcW && c->srcH && c->dstW && c->dstH) {
enum AVPixelFormat tmp_format;
int tmp_width, tmp_height;
int srcW = c->srcW;
int srcH = c->srcH;
int dstW = c->dstW;
int dstH = c->dstH;
int ret;
av_log(c, AV_LOG_VERBOSE, "YUV color matrix differs for YUV->YUV, using intermediate RGB to convert\n");
if (isNBPS(c->dstFormat) || is16BPS(c->dstFormat)) {
if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) {
tmp_format = AV_PIX_FMT_BGRA64;
} else {
tmp_format = AV_PIX_FMT_BGR48;
}
} else {
if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) {
tmp_format = AV_PIX_FMT_BGRA;
} else {
tmp_format = AV_PIX_FMT_BGR24;
}
}
if (srcW*srcH > dstW*dstH) {
tmp_width = dstW;
tmp_height = dstH;
} else {
tmp_width = srcW;
tmp_height = srcH;
}
ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
tmp_width, tmp_height, tmp_format, 64);
if (ret < 0)
return ret;
c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, c->srcFormat,
tmp_width, tmp_height, tmp_format,
c->flags, c->param);
if (!c->cascaded_context[0])
return -1;
c->cascaded_context[0]->alphablend = c->alphablend;
ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
if (ret < 0)
return ret;
//we set both src and dst depending on that the RGB side will be ignored
sws_setColorspaceDetails(c->cascaded_context[0], inv_table,
srcRange, table, dstRange,
brightness, contrast, saturation);
c->cascaded_context[1] = sws_getContext(tmp_width, tmp_height, tmp_format,
dstW, dstH, c->dstFormat,
c->flags, NULL, NULL, c->param);
if (!c->cascaded_context[1])
return -1;
sws_setColorspaceDetails(c->cascaded_context[1], inv_table,
srcRange, table, dstRange,
0, 1 << 16, 1 << 16);
return 0;
}
Michael Niedermayer
committed
}
if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) {
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
contrast, saturation);
// FIXME factorize
if (ARCH_PPC)
ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness,
contrast, saturation);
fill_rgb2yuv_table(c, table, dstRange);
return 0;
}
int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
int *srcRange, int **table, int *dstRange,
int *brightness, int *contrast, int *saturation)
{
*inv_table = c->srcColorspaceTable;
*table = c->dstColorspaceTable;
*srcRange = c->srcRange;
*dstRange = c->dstRange;
*brightness = c->brightness;
*contrast = c->contrast;
*saturation = c->saturation;