Newer
Older
Michael Niedermayer
committed
Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
Michael Niedermayer
committed
{BGR,RGB}{1,4,8,15,16} support dithering
Michael Niedermayer
committed
unscaled special converters (YV12=I420=IYUV, Y800=Y8)
YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
x -> x
YUV9 -> YV12
YUV9/YV12 -> Y800
Y800 -> YUV9/YV12
BGR24 -> BGR32 & RGB24 -> RGB32
BGR32 -> BGR24 & RGB32 -> RGB24
Michael Niedermayer
committed
tested special converters (most are tested actually but i didnt write it down ...)
YV12 -> BGR16
Michael Niedermayer
committed
YVU9 -> YV12
YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
YV12/I420 -> YV12/I420
YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
BGR24 -> BGR32 & RGB24 -> RGB32
BGR32 -> BGR24 & RGB32 -> RGB24
#include <inttypes.h>
#include "../config.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
Björn Sandell
committed
#else
#include <stdlib.h>
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include "swscale.h"
Michael Niedermayer
committed
#include "swscale_internal.h"
#include "../libvo/img_format.h"
#include "rgb2rgb.h"
//#undef HAVE_MMX2
//#undef HAVE_MMX
//#undef ARCH_X86
#define DITHER1XBPP
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
#define RET 0xC3 //near return opcode for X86
#ifdef MP_DEBUG
#endif
#ifdef M_PI
#define PI M_PI
#else
#define PI 3.14159265358979323846
#endif
//FIXME replace this with something faster
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
#define isYUV(x) ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
#define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
#define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
|| (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
Michael Niedermayer
committed
|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
Michael Niedermayer
committed
extern const int32_t Inverse_Table_6_9[8][4];
/*
NOTES
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
TODO
more intelligent missalignment avoidance for the horizontal scaler
write special vertical cubic upscale version
Optimize C code (yv12 / minmax)
add support for packed pixel yuv input & output
add support for Y8 output
optimize bgr24 & bgr32
add BGR4 output support
write special BGR->BGR scaler
*/
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
#define MIN(a,b) ((a) > (b) ? (b) : (a))
#define MAX(a,b) ((a) < (b) ? (b) : (a))
#if defined(ARCH_X86) || defined(ARCH_X86_64)
Matthieu Castet
committed
static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
Matthieu Castet
committed
static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
Matthieu Castet
committed
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
static uint64_t __attribute__((aligned(8))) dither4[2]={
0x0103010301030103LL,
0x0200020002000200LL,};
static uint64_t __attribute__((aligned(8))) dither8[2]={
0x0602060206020602LL,
0x0004000400040004LL,};
static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
Matthieu Castet
committed
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
Matthieu Castet
committed
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
Matthieu Castet
committed
static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
Matthieu Castet
committed
static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
Matthieu Castet
committed
static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
Matthieu Castet
committed
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
#endif
// clipping helper table for C implementations:
static unsigned char clip_table[768];
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
extern const uint8_t dither_2x2_4[2][8];
extern const uint8_t dither_2x2_8[2][8];
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
extern const uint8_t dither_8x8_220[8][8];
Michael Niedermayer
committed
#if defined(ARCH_X86) || defined(ARCH_X86_64)
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
{
//FIXME Optimize (just quickly writen not opti..)
int i;
int j;
for(j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
dest[i]= MIN(MAX(val>>19, 0), 255);
}
if(uDest != NULL)
for(j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + 2048] * chrFilter[j];
}
uDest[i]= MIN(MAX(u>>19, 0), 255);
vDest[i]= MIN(MAX(v>>19, 0), 255);
}
}
int Y1=1<<18;\
int Y2=1<<18;\
int U=1<<18;\
int V=1<<18;\
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
type *r, *b, *g;\
const int i2= 2*i;\
\
for(j=0; j<lumFilterSize; j++)\
{\
Y1 += lumSrc[j][i2] * lumFilter[j];\
Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\
for(j=0; j<chrFilterSize; j++)\
{\
U += chrSrc[j][i] * chrFilter[j];\
V += chrSrc[j][i+2048] * chrFilter[j];\
}\
Y1>>=19;\
Y2>>=19;\
U >>=19;\
V >>=19;\
if((Y1|Y2|U|V)&256)\
{\
if(Y1>255) Y1=255;\
else if(Y1<0)Y1=0;\
if(Y2>255) Y2=255;\
else if(Y2<0)Y2=0;\
if(U>255) U=255;\
else if(U<0) U=0;\
if(V>255) V=255;\
else if(V<0) V=0;\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
for(i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19;\
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;\
int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
type *r, *b, *g;\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
for(i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf1[i ])>>7;\
int V= (uvbuf1[i+2048])>>7;\
type *r, *b, *g;\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
for(i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
type *r, *b, *g;\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
switch(c->dstFormat)\
{\
case IMGFMT_BGR32:\
case IMGFMT_RGB32:\
func(uint32_t)\
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
} \
break;\
case IMGFMT_RGB24:\
func(uint8_t)\
((uint8_t*)dest)[0]= r[Y1];\
((uint8_t*)dest)[1]= g[Y1];\
((uint8_t*)dest)[2]= b[Y1];\
((uint8_t*)dest)[3]= r[Y2];\
((uint8_t*)dest)[4]= g[Y2];\
((uint8_t*)dest)[5]= b[Y2];\
D Richard Felker III
committed
dest+=6;\
}\
break;\
case IMGFMT_BGR24:\
func(uint8_t)\
((uint8_t*)dest)[0]= b[Y1];\
((uint8_t*)dest)[1]= g[Y1];\
((uint8_t*)dest)[2]= r[Y1];\
((uint8_t*)dest)[3]= b[Y2];\
((uint8_t*)dest)[4]= g[Y2];\
((uint8_t*)dest)[5]= r[Y2];\
D Richard Felker III
committed
dest+=6;\
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
}\
break;\
case IMGFMT_RGB16:\
case IMGFMT_BGR16:\
{\
const int dr1= dither_2x2_8[y&1 ][0];\
const int dg1= dither_2x2_4[y&1 ][0];\
const int db1= dither_2x2_8[(y&1)^1][0];\
const int dr2= dither_2x2_8[y&1 ][1];\
const int dg2= dither_2x2_4[y&1 ][1];\
const int db2= dither_2x2_8[(y&1)^1][1];\
func(uint16_t)\
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
}\
}\
break;\
case IMGFMT_RGB15:\
case IMGFMT_BGR15:\
{\
const int dr1= dither_2x2_8[y&1 ][0];\
const int dg1= dither_2x2_8[y&1 ][1];\
const int db1= dither_2x2_8[(y&1)^1][0];\
const int dr2= dither_2x2_8[y&1 ][1];\
const int dg2= dither_2x2_8[y&1 ][0];\
const int db2= dither_2x2_8[(y&1)^1][1];\
func(uint16_t)\
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
}\
}\
break;\
case IMGFMT_RGB8:\
case IMGFMT_BGR8:\
{\
const uint8_t * const d64= dither_8x8_73[y&7];\
const uint8_t * const d32= dither_8x8_32[y&7];\
func(uint8_t)\
((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
}\
}\
break;\
case IMGFMT_RGB4:\
case IMGFMT_BGR4:\
{\
const uint8_t * const d64= dither_8x8_73 [y&7];\
const uint8_t * const d128=dither_8x8_220[y&7];\
func(uint8_t)\
Michael Niedermayer
committed
((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
+ ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
}\
}\
break;\
case IMGFMT_RG4B:\
case IMGFMT_BG4B:\
{\
const uint8_t * const d64= dither_8x8_73 [y&7];\
const uint8_t * const d128=dither_8x8_220[y&7];\
func(uint8_t)\
((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
}\
}\
break;\
case IMGFMT_RGB1:\
case IMGFMT_BGR1:\
{\
const uint8_t * const d128=dither_8x8_220[y&7];\
uint8_t *g= c->table_gU[128] + c->table_gV[128];\
for(i=0; i<dstW-7; i+=8){\
int acc;\
acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
((uint8_t*)dest)[0]= acc;\
D Richard Felker III
committed
dest++;\
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
}\
\
/*\
((uint8_t*)dest)-= dstW>>4;\
{\
int acc=0;\
int left=0;\
static int top[1024];\
static int last_new[1024][1024];\
static int last_in3[1024][1024];\
static int drift[1024][1024];\
int topLeft=0;\
int shift=0;\
int count=0;\
const uint8_t * const d128=dither_8x8_220[y&7];\
int error_new=0;\
int error_in3=0;\
int f=0;\
\
for(i=dstW>>1; i<dstW; i++){\
int in= ((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19);\
int in2 = (76309 * (in - 16) + 32768) >> 16;\
int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
+ (last_new[y][i] - in3)*f/256;\
int new= old> 128 ? 255 : 0;\
\
error_new+= ABS(last_new[y][i] - new);\
error_in3+= ABS(last_in3[y][i] - in3);\
f= error_new - error_in3*4;\
if(f<0) f=0;\
if(f>256) f=256;\
\
topLeft= top[i];\
left= top[i]= old - new;\
last_new[y][i]= new;\
last_in3[y][i]= in3;\
\
acc+= acc + (new&1);\
if((i&7)==6){\
((uint8_t*)dest)[0]= acc;\
((uint8_t*)dest)++;\
}\
}\
}\
*/\
}\
break;\
case IMGFMT_YUY2:\
func2\
((uint8_t*)dest)[2*i2+0]= Y1;\
((uint8_t*)dest)[2*i2+1]= U;\
((uint8_t*)dest)[2*i2+2]= Y2;\
((uint8_t*)dest)[2*i2+3]= V;\
} \
break;\
case IMGFMT_UYVY:\
func2\
((uint8_t*)dest)[2*i2+0]= U;\
((uint8_t*)dest)[2*i2+1]= Y1;\
((uint8_t*)dest)[2*i2+2]= V;\
((uint8_t*)dest)[2*i2+3]= Y2;\
} \
break;\
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
case IMGFMT_RGB32:
case IMGFMT_BGR32:
YSCALE_YUV_2_RGBX_C(uint32_t)
((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
break;
case IMGFMT_RGB24:
YSCALE_YUV_2_RGBX_C(uint8_t)
((uint8_t*)dest)[0]= r[Y1];
((uint8_t*)dest)[1]= g[Y1];
((uint8_t*)dest)[2]= b[Y1];
((uint8_t*)dest)[3]= r[Y2];
((uint8_t*)dest)[4]= g[Y2];
((uint8_t*)dest)[5]= b[Y2];
D Richard Felker III
committed
dest+=6;
}
break;
case IMGFMT_BGR24:
YSCALE_YUV_2_RGBX_C(uint8_t)
((uint8_t*)dest)[0]= b[Y1];
((uint8_t*)dest)[1]= g[Y1];
((uint8_t*)dest)[2]= r[Y1];
((uint8_t*)dest)[3]= b[Y2];
((uint8_t*)dest)[4]= g[Y2];
((uint8_t*)dest)[5]= r[Y2];
D Richard Felker III
committed
dest+=6;
}
break;
case IMGFMT_RGB16:
case IMGFMT_BGR16:
{
const int dr1= dither_2x2_8[y&1 ][0];
const int dg1= dither_2x2_4[y&1 ][0];
const int db1= dither_2x2_8[(y&1)^1][0];
const int dr2= dither_2x2_8[y&1 ][1];
const int dg2= dither_2x2_4[y&1 ][1];
const int db2= dither_2x2_8[(y&1)^1][1];
YSCALE_YUV_2_RGBX_C(uint16_t)
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
break;
case IMGFMT_RGB15:
case IMGFMT_BGR15:
{
const int dr1= dither_2x2_8[y&1 ][0];
const int dg1= dither_2x2_8[y&1 ][1];
const int db1= dither_2x2_8[(y&1)^1][0];
const int dr2= dither_2x2_8[y&1 ][1];
const int dg2= dither_2x2_8[y&1 ][0];
const int db2= dither_2x2_8[(y&1)^1][1];
YSCALE_YUV_2_RGBX_C(uint16_t)
((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
}
break;
case IMGFMT_RGB8:
case IMGFMT_BGR8:
{
const uint8_t * const d64= dither_8x8_73[y&7];
const uint8_t * const d32= dither_8x8_32[y&7];
YSCALE_YUV_2_RGBX_C(uint8_t)
((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
break;
case IMGFMT_RGB4:
case IMGFMT_BGR4:
{
const uint8_t * const d64= dither_8x8_73 [y&7];
const uint8_t * const d128=dither_8x8_220[y&7];
YSCALE_YUV_2_RGBX_C(uint8_t)
Michael Niedermayer
committed
((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
+((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
}
}
break;
case IMGFMT_RG4B:
case IMGFMT_BG4B:
{
const uint8_t * const d64= dither_8x8_73 [y&7];
const uint8_t * const d128=dither_8x8_220[y&7];
YSCALE_YUV_2_RGBX_C(uint8_t)
((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
}
break;
case IMGFMT_RGB1:
case IMGFMT_BGR1:
{
const uint8_t * const d128=dither_8x8_220[y&7];
uint8_t *g= c->table_gU[128] + c->table_gV[128];
int acc=0;
for(i=0; i<dstW-1; i+=2){
int j;
for(j=0; j<lumFilterSize; j++)
{
Y1 += lumSrc[j][i] * lumFilter[j];
Y2 += lumSrc[j][i+1] * lumFilter[j];
}
Y1>>=19;
Y2>>=19;
if((Y1|Y2)&256)
{
if(Y1>255) Y1=255;
else if(Y1<0)Y1=0;
if(Y2>255) Y2=255;
else if(Y2<0)Y2=0;
}
acc+= acc + g[Y1+d128[(i+0)&7]];
acc+= acc + g[Y2+d128[(i+1)&7]];
if((i&7)==6){
((uint8_t*)dest)[0]= acc;
D Richard Felker III
committed
dest++;
((uint8_t*)dest)[2*i2+0]= Y1;
((uint8_t*)dest)[2*i2+1]= U;
((uint8_t*)dest)[2*i2+2]= Y2;
((uint8_t*)dest)[2*i2+3]= V;
}
break;
case IMGFMT_UYVY:
YSCALE_YUV_2_PACKEDX_C(void)
((uint8_t*)dest)[2*i2+0]= U;
((uint8_t*)dest)[2*i2+1]= Y1;
((uint8_t*)dest)[2*i2+2]= V;
((uint8_t*)dest)[2*i2+3]= Y2;
}
break;
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
//Plain C versions
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
#define COMPILE_C
#endif
Romain Dolbeau
committed
#ifdef ARCH_POWERPC
#ifdef HAVE_ALTIVEC
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC
#endif //ARCH_POWERPC
#if defined(ARCH_X86) || defined(ARCH_X86_64)
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX
#endif
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX2
#endif
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_3DNOW
#endif
#endif //ARCH_X86 || ARCH_X86_64
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#ifdef COMPILE_C
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
Romain Dolbeau
committed
#undef HAVE_ALTIVEC
#define RENAME(a) a ## _C
#include "swscale_template.c"
Romain Dolbeau
committed
#ifdef ARCH_POWERPC
#ifdef COMPILE_ALTIVEC
#undef RENAME
#define HAVE_ALTIVEC
#define RENAME(a) a ## _altivec
#include "swscale_template.c"
#endif
#endif //ARCH_POWERPC
#if defined(ARCH_X86) || defined(ARCH_X86_64)
//X86 versions
/*
#undef RENAME
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define ARCH_X86
#define RENAME(a) a ## _X86
#include "swscale_template.c"
#ifdef COMPILE_MMX
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX
#include "swscale_template.c"
#ifdef COMPILE_MMX2
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
#undef HAVE_3DNOW
#define RENAME(a) a ## _MMX2
#include "swscale_template.c"
#ifdef COMPILE_3DNOW
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
#define HAVE_3DNOW
#define RENAME(a) a ## _3DNow
#include "swscale_template.c"
#endif //ARCH_X86 || ARCH_X86_64
// minor note: the HAVE_xyz is messed up after that line so don't use it
static double getSplineCoeff(double a, double b, double c, double d, double dist)
{
// printf("%f %f %f %f %f\n", a,b,c,d,dist);
if(dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
else return getSplineCoeff( 0.0,
b+ 2.0*c + 3.0*d,
c + 3.0*d,
-b- 3.0*c - 6.0*d,
dist-1.0);
}
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
int srcW, int dstW, int filterAlign, int one, int flags,
Michael Niedermayer
committed
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
{
int i;
int filterSize;
int filter2Size;
int minFilterSize;
double *filter=NULL;
double *filter2=NULL;
#if defined(ARCH_X86) || defined(ARCH_X86_64)
if(flags & SWS_CPU_CAPS_MMX)
asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
Michael Niedermayer
committed
// Note the +1 is for the MMXscaler which reads over the end
*filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
if(ABS(xInc - 0x10000) <10) // unscaled
{
int i;
filterSize= 1;
filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
for(i=0; i<dstW*filterSize; i++) filter[i]=0;
for(i=0; i<dstW; i++)
{
filter[i*filterSize]=1;
(*filterPos)[i]=i;
}
}
else if(flags&SWS_POINT) // lame looking point sampling mode
{
int i;
int xDstInSrc;
filterSize= 1;
filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
for(i=0; i<dstW; i++)
{
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
(*filterPos)[i]= xx;
filter[i]= 1.0;
xDstInSrc+= xInc;
}
}
else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
{
int i;
int xDstInSrc;
if (flags&SWS_BICUBIC) filterSize= 4;
else if(flags&SWS_X ) filterSize= 4;
Michael Niedermayer
committed
else filterSize= 2; // SWS_BILINEAR / SWS_AREA
filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
xDstInSrc= xInc/2 - 0x8000;
for(i=0; i<dstW; i++)
{
int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
int j;
(*filterPos)[i]= xx;
Michael Niedermayer
committed
//Bilinear upscale / linear interpolate / Area averaging
for(j=0; j<filterSize; j++)
{
double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
double coeff= 1.0 - d;
if(coeff<0) coeff=0;
filter[i*filterSize + j]= coeff;
xx++;
}
xDstInSrc+= xInc;
}
}
double xDstInSrc;
double sizeFactor, filterSizeInSrc;
const double xInc1= (double)xInc / (double)(1<<16);
if (flags&SWS_BICUBIC) sizeFactor= 4.0;
else if(flags&SWS_X) sizeFactor= 8.0;
else if(flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear
else if(flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;)
Michael Niedermayer
committed
else if(flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
Michael Niedermayer
committed
else if(flags&SWS_SINC) sizeFactor= 20.0; // infinite ;)
else if(flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;)
else if(flags&SWS_BILINEAR) sizeFactor= 2.0;
Michael Niedermayer
committed
else {
sizeFactor= 0.0; //GCC warning killer
ASSERT(0)
}
if(xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale
else filterSizeInSrc= sizeFactor*srcW / (double)dstW;
filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
if(filterSize > srcW-2) filterSize=srcW-2;
filter= (double*)memalign(16, dstW*sizeof(double)*filterSize);
xDstInSrc= xInc1 / 2.0 - 0.5;
for(i=0; i<dstW; i++)
{
int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
(*filterPos)[i]= xx;
for(j=0; j<filterSize; j++)
double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
double coeff;
Michael Niedermayer
committed
double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
if(d<1.0)
coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
else if(d<2.0)
Michael Niedermayer
committed
coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
else
coeff=0.0;
}
/* else if(flags & SWS_X)
{
double p= param ? param*0.01 : 0.3;
coeff = d ? sin(d*PI)/(d*PI) : 1.0;
coeff*= pow(2.0, - p*d*d);
}*/
else if(flags & SWS_X)
{
Michael Niedermayer
committed
double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
if(d<1.0)
coeff = cos(d*PI);
else
coeff=-1.0;
if(coeff<0.0) coeff= -pow(-coeff, A);
else coeff= pow( coeff, A);
coeff= coeff*0.5 + 0.5;
}
Michael Niedermayer
committed
else if(flags & SWS_AREA)
Michael Niedermayer
committed
if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
else coeff=0.0;
}
Michael Niedermayer
committed
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = pow(2.0, - p*d*d);
}
else if(flags & SWS_SINC)
{
coeff = d ? sin(d*PI)/(d*PI) : 1.0;
}
else if(flags & SWS_LANCZOS)
{
Michael Niedermayer
committed
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
if(d>p) coeff=0;
}
else if(flags & SWS_BILINEAR)
{
coeff= 1.0 - d;
if(coeff<0) coeff=0;
}
else if(flags & SWS_SPLINE)
{
double p=-2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
}
Michael Niedermayer
committed
else {
coeff= 0.0; //GCC warning killer
ASSERT(0)
}
filter[i*filterSize + j]= coeff;
xx++;
}
}
}
/* apply src & dst Filter to filter -> filter2
free(filter);
*/
filter2Size= filterSize;
if(srcFilter) filter2Size+= srcFilter->length - 1;
if(dstFilter) filter2Size+= dstFilter->length - 1;
filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
for(i=0; i<dstW; i++)
{
int j;
SwsVector scaleFilter;
SwsVector *outVec;
scaleFilter.coeff= filter + i*filterSize;
scaleFilter.length= filterSize;
if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
else outVec= &scaleFilter;
ASSERT(outVec->length == filter2Size)
//FIXME dstFilter
for(j=0; j<outVec->length; j++)
{
filter2[i*filter2Size + j]= outVec->coeff[j];
}
(*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
if(outVec != &scaleFilter) sws_freeVec(outVec);