Newer
Older
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* the C code (not assembly, mmx, ...) of this file can be used
* under the LGPL license too
supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
Michael Niedermayer
committed
{BGR,RGB}{1,4,8,15,16} support dithering
Michael Niedermayer
committed
unscaled special converters (YV12=I420=IYUV, Y800=Y8)
YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
x -> x
YUV9 -> YV12
YUV9/YV12 -> Y800
Y800 -> YUV9/YV12
BGR24 -> BGR32 & RGB24 -> RGB32
BGR32 -> BGR24 & RGB32 -> RGB24
tested special converters (most are tested actually, but I did not write it down ...)
Michael Niedermayer
committed
YV12 -> BGR16
Michael Niedermayer
committed
YVU9 -> YV12
YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
YV12/I420 -> YV12/I420
YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
BGR24 -> BGR32 & RGB24 -> RGB32
BGR32 -> BGR24 & RGB32 -> RGB24
#define _SVID_SOURCE //needed for MAP_ANONYMOUS
#include <inttypes.h>
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#include "swscale.h"
Michael Niedermayer
committed
#include "swscale_internal.h"
#include "rgb2rgb.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/bswap.h"
unsigned swscale_version(void)
{
return LIBSWSCALE_VERSION_INT;
}
//#undef HAVE_MMX2
//#undef HAVE_MMX
//#undef ARCH_X86
#define DITHER1XBPP
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
#define RET 0xC3 //near return opcode for X86
#ifdef M_PI
#define PI M_PI
#else
#define PI 3.14159265358979323846
#endif
#define isSupportedIn(x) ( \
(x)==PIX_FMT_YUV420P \
|| (x)==PIX_FMT_YUVA420P \
|| (x)==PIX_FMT_YUYV422 \
|| (x)==PIX_FMT_UYVY422 \
|| (x)==PIX_FMT_RGB32 \
|| (x)==PIX_FMT_RGB32_1 \
|| (x)==PIX_FMT_BGR24 \
|| (x)==PIX_FMT_BGR565 \
|| (x)==PIX_FMT_BGR555 \
|| (x)==PIX_FMT_BGR32 \
|| (x)==PIX_FMT_BGR32_1 \
|| (x)==PIX_FMT_RGB24 \
|| (x)==PIX_FMT_RGB565 \
|| (x)==PIX_FMT_RGB555 \
|| (x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_YUV410P \
|| (x)==PIX_FMT_GRAY16BE \
|| (x)==PIX_FMT_GRAY16LE \
|| (x)==PIX_FMT_YUV444P \
|| (x)==PIX_FMT_YUV422P \
|| (x)==PIX_FMT_YUV411P \
|| (x)==PIX_FMT_PAL8 \
|| (x)==PIX_FMT_BGR8 \
|| (x)==PIX_FMT_RGB8 \
|| (x)==PIX_FMT_BGR4_BYTE \
|| (x)==PIX_FMT_RGB4_BYTE \
|| (x)==PIX_FMT_MONOWHITE \
|| (x)==PIX_FMT_MONOBLACK \
)
#define isSupportedOut(x) ( \
(x)==PIX_FMT_YUV420P \
|| (x)==PIX_FMT_YUYV422 \
|| (x)==PIX_FMT_UYVY422 \
|| (x)==PIX_FMT_YUV444P \
|| (x)==PIX_FMT_YUV422P \
|| (x)==PIX_FMT_YUV411P \
|| isRGB(x) \
|| isBGR(x) \
|| (x)==PIX_FMT_NV12 \
|| (x)==PIX_FMT_NV21 \
|| (x)==PIX_FMT_GRAY16BE \
|| (x)==PIX_FMT_GRAY16LE \
|| (x)==PIX_FMT_GRAY8 \
|| (x)==PIX_FMT_YUV410P \
)
#define isPacked(x) ( \
(x)==PIX_FMT_PAL8 \
|| (x)==PIX_FMT_YUYV422 \
|| (x)==PIX_FMT_UYVY422 \
|| isRGB(x) \
|| isBGR(x) \
)
#define RGB2YUV_SHIFT 15
Michael Niedermayer
committed
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
Michael Niedermayer
committed
extern const int32_t Inverse_Table_6_9[8][4];
static const double rgb2yuv_table[8][9]={
{0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
{0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
{0.59 , 0.11 , 0.30 , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
{0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
{0.701 , 0.087 , 0.212 , -0.384, 0.5 -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
};
/*
NOTES
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
TODO
write special vertical cubic upscale version
Optimize C code (yv12 / minmax)
add support for packed pixel yuv input & output
add support for Y8 output
optimize bgr24 & bgr32
add BGR4 output support
write special BGR->BGR scaler
*/
#if defined(ARCH_X86) && defined (CONFIG_GPL)
DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
Matthieu Castet
committed
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
Diego Biurrun
committed
0x0103010301030103LL,
0x0200020002000200LL,};
const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
Diego Biurrun
committed
0x0602060206020602LL,
0x0004000400040004LL,};
DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
Reimar Döffinger
committed
DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
Reimar Döffinger
committed
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
Reimar Döffinger
committed
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
Reimar Döffinger
committed
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUV[2][4]) = {
{0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
{0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
};
DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
// clipping helper table for C implementations:
static unsigned char clip_table[768];
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
extern const uint8_t dither_2x2_4[2][8];
extern const uint8_t dither_2x2_8[2][8];
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
extern const uint8_t dither_8x8_220[8][8];
Michael Niedermayer
committed
const char *sws_format_name(enum PixelFormat format)
{
switch (format) {
case PIX_FMT_YUV420P:
return "yuv420p";
case PIX_FMT_YUVA420P:
return "yuva420p";
case PIX_FMT_YUYV422:
return "yuyv422";
case PIX_FMT_RGB24:
return "rgb24";
case PIX_FMT_BGR24:
return "bgr24";
case PIX_FMT_YUV422P:
return "yuv422p";
case PIX_FMT_YUV444P:
return "yuv444p";
case PIX_FMT_RGB32:
return "rgb32";
case PIX_FMT_YUV410P:
return "yuv410p";
case PIX_FMT_YUV411P:
return "yuv411p";
case PIX_FMT_RGB565:
return "rgb565";
case PIX_FMT_RGB555:
return "rgb555";
case PIX_FMT_GRAY16BE:
return "gray16be";
case PIX_FMT_GRAY16LE:
return "gray16le";
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
case PIX_FMT_GRAY8:
return "gray8";
case PIX_FMT_MONOWHITE:
return "mono white";
case PIX_FMT_MONOBLACK:
return "mono black";
case PIX_FMT_PAL8:
return "Palette";
case PIX_FMT_YUVJ420P:
return "yuvj420p";
case PIX_FMT_YUVJ422P:
return "yuvj422p";
case PIX_FMT_YUVJ444P:
return "yuvj444p";
case PIX_FMT_XVMC_MPEG2_MC:
return "xvmc_mpeg2_mc";
case PIX_FMT_XVMC_MPEG2_IDCT:
return "xvmc_mpeg2_idct";
case PIX_FMT_UYVY422:
return "uyvy422";
case PIX_FMT_UYYVYY411:
return "uyyvyy411";
case PIX_FMT_RGB32_1:
return "rgb32x";
case PIX_FMT_BGR32_1:
return "bgr32x";
case PIX_FMT_BGR32:
return "bgr32";
case PIX_FMT_BGR565:
return "bgr565";
case PIX_FMT_BGR555:
return "bgr555";
case PIX_FMT_BGR8:
return "bgr8";
case PIX_FMT_BGR4:
return "bgr4";
case PIX_FMT_BGR4_BYTE:
return "bgr4 byte";
case PIX_FMT_RGB8:
return "rgb8";
case PIX_FMT_RGB4:
return "rgb4";
case PIX_FMT_RGB4_BYTE:
return "rgb4 byte";
case PIX_FMT_NV12:
return "nv12";
case PIX_FMT_NV21:
return "nv21";
default:
return "Unknown format";
}
}
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
Diego Biurrun
committed
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
Diego Biurrun
committed
//FIXME Optimize (just quickly writen not opti..)
int i;
for (i=0; i<dstW; i++)
{
int val=1<<18;
int j;
for (j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
dest[i]= av_clip_uint8(val>>19);
}
if (uDest)
Diego Biurrun
committed
for (i=0; i<chrDstW; i++)
{
int u=1<<18;
int v=1<<18;
int j;
for (j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + VOFW] * chrFilter[j];
Diego Biurrun
committed
}
uDest[i]= av_clip_uint8(u>>19);
vDest[i]= av_clip_uint8(v>>19);
}
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
Diego Biurrun
committed
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
Diego Biurrun
committed
//FIXME Optimize (just quickly writen not opti..)
int i;
for (i=0; i<dstW; i++)
{
int val=1<<18;
int j;
for (j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
dest[i]= av_clip_uint8(val>>19);
}
if (!uDest)
Diego Biurrun
committed
return;
if (dstFormat == PIX_FMT_NV12)
for (i=0; i<chrDstW; i++)
{
int u=1<<18;
int v=1<<18;
int j;
for (j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + VOFW] * chrFilter[j];
Diego Biurrun
committed
}
uDest[2*i]= av_clip_uint8(u>>19);
uDest[2*i+1]= av_clip_uint8(v>>19);
}
else
for (i=0; i<chrDstW; i++)
{
int u=1<<18;
int v=1<<18;
int j;
for (j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + VOFW] * chrFilter[j];
Diego Biurrun
committed
}
uDest[2*i]= av_clip_uint8(v>>19);
uDest[2*i+1]= av_clip_uint8(u>>19);
}
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
Diego Biurrun
committed
for (i=0; i<(dstW>>1); i++){\
int j;\
int Y1 = 1<<18;\
int Y2 = 1<<18;\
int U = 1<<18;\
int V = 1<<18;\
type av_unused *r, *b, *g;\
Diego Biurrun
committed
const int i2= 2*i;\
\
for (j=0; j<lumFilterSize; j++)\
{\
Y1 += lumSrc[j][i2] * lumFilter[j];\
Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\
for (j=0; j<chrFilterSize; j++)\
{\
U += chrSrc[j][i] * chrFilter[j];\
V += chrSrc[j][i+VOFW] * chrFilter[j];\
Diego Biurrun
committed
}\
Y1>>=19;\
Y2>>=19;\
U >>=19;\
V >>=19;\
#define YSCALE_YUV_2_PACKEDX_C(type) \
YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
Diego Biurrun
committed
if ((Y1|Y2|U|V)&256)\
{\
if (Y1>255) Y1=255; \
else if (Y1<0)Y1=0; \
if (Y2>255) Y2=255; \
else if (Y2<0)Y2=0; \
if (U>255) U=255; \
else if (U<0) U=0; \
if (V>255) V=255; \
else if (V<0) V=0; \
}
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
#define YSCALE_YUV_2_PACKEDX_FULL_C \
for (i=0; i<dstW; i++){\
int j;\
int Y = 0;\
int U = -128<<19;\
int V = -128<<19;\
int R,G,B;\
\
for (j=0; j<lumFilterSize; j++){\
Y += lumSrc[j][i ] * lumFilter[j];\
}\
for (j=0; j<chrFilterSize; j++){\
U += chrSrc[j][i ] * chrFilter[j];\
V += chrSrc[j][i+VOFW] * chrFilter[j];\
}\
Y >>=10;\
U >>=10;\
V >>=10;\
#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
YSCALE_YUV_2_PACKEDX_FULL_C\
Y-= c->oy;\
Y*= c->cy;\
Y+= rnd;\
R= Y + V*c->cvr;\
G= Y + V*c->cvg + U*c->cug;\
B= Y + U*c->cub;\
if ((R|G|B)&(0xC0000000)){\
if (R>=(256<<22)) R=(256<<22)-1; \
else if (R<0)R=0; \
if (G>=(256<<22)) G=(256<<22)-1; \
else if (G<0)G=0; \
if (B>=(256<<22)) B=(256<<22)-1; \
else if (B<0)B=0; \
}\
for (i=0; i<(dstW>>1); i++){\
int j;\
int Y1 = 1<<18;\
int Y2 = 1<<18;\
int U = 1<<18;\
int V = 1<<18;\
const int i2= 2*i;\
\
for (j=0; j<lumFilterSize; j++)\
{\
Y1 += lumSrc[j][i2] * lumFilter[j];\
Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\
Y1>>=11;\
Y2>>=11;\
if ((Y1|Y2|U|V)&65536)\
{\
if (Y1>65535) Y1=65535; \
else if (Y1<0)Y1=0; \
if (Y2>65535) Y2=65535; \
else if (Y2<0)Y2=0; \
}
YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
Diego Biurrun
committed
r = (type *)c->table_rV[V]; \
g = (type *)(c->table_gU[U] + c->table_gV[V]); \
b = (type *)c->table_bU[U]; \
#define YSCALE_YUV_2_PACKED2_C \
for (i=0; i<(dstW>>1); i++){ \
const int i2= 2*i; \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \
int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \
#define YSCALE_YUV_2_GRAY16_2_C \
for (i=0; i<(dstW>>1); i++){ \
const int i2= 2*i; \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>11; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11; \
Diego Biurrun
committed
YSCALE_YUV_2_PACKED2_C\
type *r, *b, *g;\
r = (type *)c->table_rV[V];\
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
b = (type *)c->table_bU[U];\
Diego Biurrun
committed
for (i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf1[i ])>>7;\
int V= (uvbuf1[i+VOFW])>>7;\
#define YSCALE_YUV_2_GRAY16_1_C \
for (i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]<<1;\
int Y2= buf0[i2+1]<<1;\
Diego Biurrun
committed
YSCALE_YUV_2_PACKED1_C\
type *r, *b, *g;\
r = (type *)c->table_rV[V];\
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
b = (type *)c->table_bU[U];\
Diego Biurrun
committed
for (i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
Diego Biurrun
committed
YSCALE_YUV_2_PACKED1B_C\
type *r, *b, *g;\
r = (type *)c->table_rV[V];\
g = (type *)(c->table_gU[U] + c->table_gV[V]);\
b = (type *)c->table_bU[U];\
#define YSCALE_YUV_2_MONO2_C \
const uint8_t * const d128=dither_8x8_220[y&7];\
uint8_t *g= c->table_gU[128] + c->table_gV[128];\
for (i=0; i<dstW-7; i+=8){\
int acc;\
acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
#define YSCALE_YUV_2_MONOX_C \
const uint8_t * const d128=dither_8x8_220[y&7];\
uint8_t *g= c->table_gU[128] + c->table_gV[128];\
int acc=0;\
for (i=0; i<dstW-1; i+=2){\
int j;\
int Y1=1<<18;\
int Y2=1<<18;\
\
for (j=0; j<lumFilterSize; j++)\
{\
Y1 += lumSrc[j][i] * lumFilter[j];\
Y2 += lumSrc[j][i+1] * lumFilter[j];\
}\
Y1>>=19;\
Y2>>=19;\
if ((Y1|Y2)&256)\
{\
if (Y1>255) Y1=255;\
else if (Y1<0)Y1=0;\
if (Y2>255) Y2=255;\
else if (Y2<0)Y2=0;\
}\
acc+= acc + g[Y1+d128[(i+0)&7]];\
acc+= acc + g[Y2+d128[(i+1)&7]];\
if ((i&7)==6){\
((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
dest++;\
}\
}
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
Diego Biurrun
committed
switch(c->dstFormat)\
{\
case PIX_FMT_RGB32:\
case PIX_FMT_BGR32:\
case PIX_FMT_RGB32_1:\
case PIX_FMT_BGR32_1:\
Diego Biurrun
committed
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
func(uint32_t)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
} \
break;\
case PIX_FMT_RGB24:\
func(uint8_t)\
((uint8_t*)dest)[0]= r[Y1];\
((uint8_t*)dest)[1]= g[Y1];\
((uint8_t*)dest)[2]= b[Y1];\
((uint8_t*)dest)[3]= r[Y2];\
((uint8_t*)dest)[4]= g[Y2];\
((uint8_t*)dest)[5]= b[Y2];\
dest+=6;\
}\
break;\
case PIX_FMT_BGR24:\
func(uint8_t)\
((uint8_t*)dest)[0]= b[Y1];\
((uint8_t*)dest)[1]= g[Y1];\
((uint8_t*)dest)[2]= r[Y1];\
((uint8_t*)dest)[3]= b[Y2];\
((uint8_t*)dest)[4]= g[Y2];\
((uint8_t*)dest)[5]= r[Y2];\
dest+=6;\
}\
break;\
case PIX_FMT_RGB565:\
case PIX_FMT_BGR565:\
{\
const int dr1= dither_2x2_8[y&1 ][0];\
const int dg1= dither_2x2_4[y&1 ][0];\
const int db1= dither_2x2_8[(y&1)^1][0];\
const int dr2= dither_2x2_8[y&1 ][1];\
const int dg2= dither_2x2_4[y&1 ][1];\
const int db2= dither_2x2_8[(y&1)^1][1];\
func(uint16_t)\
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
}\
}\
break;\
case PIX_FMT_RGB555:\
case PIX_FMT_BGR555:\
{\
const int dr1= dither_2x2_8[y&1 ][0];\
const int dg1= dither_2x2_8[y&1 ][1];\
const int db1= dither_2x2_8[(y&1)^1][0];\
const int dr2= dither_2x2_8[y&1 ][1];\
const int dg2= dither_2x2_8[y&1 ][0];\
const int db2= dither_2x2_8[(y&1)^1][1];\
func(uint16_t)\
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
}\
}\
break;\
case PIX_FMT_RGB8:\
case PIX_FMT_BGR8:\
{\
const uint8_t * const d64= dither_8x8_73[y&7];\
const uint8_t * const d32= dither_8x8_32[y&7];\
func(uint8_t)\
((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
}\
}\
break;\
case PIX_FMT_RGB4:\
case PIX_FMT_BGR4:\
{\
const uint8_t * const d64= dither_8x8_73 [y&7];\
const uint8_t * const d128=dither_8x8_220[y&7];\
func(uint8_t)\
((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
+ ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
}\
}\
break;\
case PIX_FMT_RGB4_BYTE:\
case PIX_FMT_BGR4_BYTE:\
{\
const uint8_t * const d64= dither_8x8_73 [y&7];\
const uint8_t * const d128=dither_8x8_220[y&7];\
func(uint8_t)\
((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
}\
}\
break;\
case PIX_FMT_MONOBLACK:\
case PIX_FMT_MONOWHITE:\
Diego Biurrun
committed
{\
Diego Biurrun
committed
}\
break;\
case PIX_FMT_YUYV422:\
func2\
((uint8_t*)dest)[2*i2+0]= Y1;\
((uint8_t*)dest)[2*i2+1]= U;\
((uint8_t*)dest)[2*i2+2]= Y2;\
((uint8_t*)dest)[2*i2+3]= V;\
} \
break;\
case PIX_FMT_UYVY422:\
func2\
((uint8_t*)dest)[2*i2+0]= U;\
((uint8_t*)dest)[2*i2+1]= Y1;\
((uint8_t*)dest)[2*i2+2]= V;\
((uint8_t*)dest)[2*i2+3]= Y2;\
} \
break;\
case PIX_FMT_GRAY16BE:\
func_g16\
((uint8_t*)dest)[2*i2+0]= Y1>>8;\
((uint8_t*)dest)[2*i2+1]= Y1;\
((uint8_t*)dest)[2*i2+2]= Y2>>8;\
((uint8_t*)dest)[2*i2+3]= Y2;\
} \
break;\
case PIX_FMT_GRAY16LE:\
func_g16\
((uint8_t*)dest)[2*i2+0]= Y1;\
((uint8_t*)dest)[2*i2+1]= Y1>>8;\
((uint8_t*)dest)[2*i2+2]= Y2;\
((uint8_t*)dest)[2*i2+3]= Y2>>8;\
} \
break;\
Diego Biurrun
committed
}\
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
Diego Biurrun
committed
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int y)
Diego Biurrun
committed
int i;
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, int dstW, int y)
{
int i;
int step= fmt_depth(c->dstFormat)/8;
switch(c->dstFormat){
case PIX_FMT_ARGB:
dest++;
case PIX_FMT_RGB24:
case PIX_FMT_RGBA:
YSCALE_YUV_2_RGBX_FULL_C(1<<21)
dest[0]= R>>22;
dest[1]= G>>22;
dest[2]= B>>22;
dest[3]= 0;
dest+= step;
}
break;
case PIX_FMT_ABGR:
dest++;
case PIX_FMT_BGR24:
case PIX_FMT_BGRA:
YSCALE_YUV_2_RGBX_FULL_C(1<<21)
dest[0]= B>>22;
dest[1]= G>>22;
dest[2]= R>>22;
dest[3]= 0;
dest+= step;
}
break;
default:
assert(0);
}
}
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
//Plain C versions
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
#define COMPILE_C
#endif
Romain Dolbeau
committed
#ifdef ARCH_POWERPC
#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
Romain Dolbeau
committed
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC
#endif //ARCH_POWERPC
#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
#define COMPILE_MMX
#endif
#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
#define COMPILE_MMX2
#endif
#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
#define COMPILE_3DNOW
#endif
#endif //ARCH_X86 || ARCH_X86_64
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#ifdef COMPILE_C
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
Romain Dolbeau
committed
#undef HAVE_ALTIVEC
#define RENAME(a) a ## _C
#include "swscale_template.c"
Romain Dolbeau
committed
#ifdef COMPILE_ALTIVEC
#undef RENAME
#define HAVE_ALTIVEC
#define RENAME(a) a ## _altivec
#include "swscale_template.c"
#endif
//X86 versions
/*
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define ARCH_X86
#define RENAME(a) a ## _X86
#include "swscale_template.c"
#ifdef COMPILE_MMX
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
#ifdef COMPILE_MMX2
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
#ifdef COMPILE_3DNOW
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
#define RENAME(a) a ## _3DNow
#include "swscale_template.c"
#endif //ARCH_X86 || ARCH_X86_64
// minor note: the HAVE_xyz is messed up after that line so don't use it
static double getSplineCoeff(double a, double b, double c, double d, double dist)
{
Diego Biurrun
committed
// printf("%f %f %f %f %f\n", a,b,c,d,dist);
if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
else return getSplineCoeff( 0.0,
b+ 2.0*c + 3.0*d,
c + 3.0*d,
-b- 3.0*c - 6.0*d,
dist-1.0);
Michael Niedermayer
committed
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
Diego Biurrun
committed
int srcW, int dstW, int filterAlign, int one, int flags,
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
Diego Biurrun
committed
int i;
int filterSize;
int filter2Size;
int minFilterSize;
double *filter=NULL;
double *filter2=NULL;
Diego Biurrun
committed
if (flags & SWS_CPU_CAPS_MMX)
asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
Diego Biurrun
committed
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Note the +1 is for the MMXscaler which reads over the end
*filterPos = av_malloc((dstW+1)*sizeof(int16_t));
if (FFABS(xInc - 0x10000) <10) // unscaled
{
int i;
filterSize= 1;
filter= av_malloc(dstW*sizeof(double)*filterSize);
for (i=0; i<dstW*filterSize; i++) filter[i]=0;
for (i=0; i<dstW; i++)
{
filter[i*filterSize]=1;
(*filterPos)[i]=i;
}
}
else if (flags&SWS_POINT) // lame looking point sampling mode
{
int i;
int xDstInSrc;
filterSize= 1;
filter= av_malloc(dstW*sizeof(double)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
for (i=0; i<dstW; i++)
{
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
(*filterPos)[i]= xx;
filter[i]= 1.0;
xDstInSrc+= xInc;
}
}
else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
{
int i;
int xDstInSrc;
if (flags&SWS_BICUBIC) filterSize= 4;
else if (flags&SWS_X ) filterSize= 4;
else filterSize= 2; // SWS_BILINEAR / SWS_AREA
filter= av_malloc(dstW*sizeof(double)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
for (i=0; i<dstW; i++)