Skip to content
Snippets Groups Projects
swscale.c 100 KiB
Newer Older
  • Learn to ignore specific revisions
  •  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
     *
     * This file is part of FFmpeg.
     *
     * FFmpeg is free software; you can redistribute it and/or modify
     * it under the terms of the GNU General Public License as published by
     * the Free Software Foundation; either version 2 of the License, or
     * (at your option) any later version.
     *
     * FFmpeg is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with FFmpeg; if not, write to the Free Software
    
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
     * the C code (not assembly, mmx, ...) of this file can be used
     * under the LGPL license too
    
      supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
    
      supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
    
      unscaled special converters (YV12=I420=IYUV, Y800=Y8)
      YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
      x -> x
      YUV9 -> YV12
      YUV9/YV12 -> Y800
      Y800 -> YUV9/YV12
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
      BGR24 -> BGR32 & RGB24 -> RGB32
      BGR32 -> BGR24 & RGB32 -> RGB24
    
      BGR15 -> BGR16
    
    Diego Biurrun's avatar
    Diego Biurrun committed
    tested special converters (most are tested actually, but I did not write it down ...)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
     YV12 -> YV12
    
     BGR15 -> BGR16
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    
    untested special converters
    
    Diego Biurrun's avatar
    Diego Biurrun committed
      YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
    
      YV12/I420 -> YV12/I420
      YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
      BGR24 -> BGR32 & RGB24 -> RGB32
      BGR32 -> BGR24 & RGB32 -> RGB24
    
      BGR24 -> YV12
    
    #define _SVID_SOURCE //needed for MAP_ANONYMOUS
    
    #include <string.h>
    
    #include <math.h>
    
    #include <stdio.h>
    
    Bohdan Horst's avatar
    Bohdan Horst committed
    #include <unistd.h>
    
    #include "config.h"
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #include <assert.h>
    
    #ifdef HAVE_SYS_MMAN_H
    #include <sys/mman.h>
    
    #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
    #define MAP_ANONYMOUS MAP_ANON
    #endif
    
    #include "libavutil/x86_cpu.h"
    #include "libavutil/bswap.h"
    
    Arpi's avatar
    Arpi committed
    
    
    unsigned swscale_version(void)
    {
        return LIBSWSCALE_VERSION_INT;
    }
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #undef MOVNTQ
    
    #undef PAVGB
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    //#define HAVE_3DNOW
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    //#define WORDS_BIGENDIAN
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
    
    
    #define RET 0xC3 //near return opcode for X86
    
    #ifdef M_PI
    #define PI M_PI
    #else
    #define PI 3.14159265358979323846
    #endif
    
    #define isSupportedIn(x)    (       \
               (x)==PIX_FMT_YUV420P     \
    
            || (x)==PIX_FMT_YUVA420P    \
    
            || (x)==PIX_FMT_YUYV422     \
            || (x)==PIX_FMT_UYVY422     \
            || (x)==PIX_FMT_RGB32       \
    
            || (x)==PIX_FMT_RGB32_1     \
    
            || (x)==PIX_FMT_BGR24       \
            || (x)==PIX_FMT_BGR565      \
            || (x)==PIX_FMT_BGR555      \
            || (x)==PIX_FMT_BGR32       \
    
            || (x)==PIX_FMT_BGR32_1     \
    
            || (x)==PIX_FMT_RGB24       \
            || (x)==PIX_FMT_RGB565      \
            || (x)==PIX_FMT_RGB555      \
            || (x)==PIX_FMT_GRAY8       \
            || (x)==PIX_FMT_YUV410P     \
            || (x)==PIX_FMT_GRAY16BE    \
            || (x)==PIX_FMT_GRAY16LE    \
            || (x)==PIX_FMT_YUV444P     \
            || (x)==PIX_FMT_YUV422P     \
            || (x)==PIX_FMT_YUV411P     \
            || (x)==PIX_FMT_PAL8        \
            || (x)==PIX_FMT_BGR8        \
            || (x)==PIX_FMT_RGB8        \
            || (x)==PIX_FMT_BGR4_BYTE   \
            || (x)==PIX_FMT_RGB4_BYTE   \
    
    Andreas Öman's avatar
    Andreas Öman committed
            || (x)==PIX_FMT_YUV440P     \
    
            || (x)==PIX_FMT_MONOWHITE   \
            || (x)==PIX_FMT_MONOBLACK   \
    
        )
    #define isSupportedOut(x)   (       \
               (x)==PIX_FMT_YUV420P     \
            || (x)==PIX_FMT_YUYV422     \
            || (x)==PIX_FMT_UYVY422     \
            || (x)==PIX_FMT_YUV444P     \
            || (x)==PIX_FMT_YUV422P     \
            || (x)==PIX_FMT_YUV411P     \
            || isRGB(x)                 \
            || isBGR(x)                 \
            || (x)==PIX_FMT_NV12        \
            || (x)==PIX_FMT_NV21        \
            || (x)==PIX_FMT_GRAY16BE    \
            || (x)==PIX_FMT_GRAY16LE    \
            || (x)==PIX_FMT_GRAY8       \
            || (x)==PIX_FMT_YUV410P     \
        )
    #define isPacked(x)         (       \
               (x)==PIX_FMT_PAL8        \
            || (x)==PIX_FMT_YUYV422     \
            || (x)==PIX_FMT_UYVY422     \
            || isRGB(x)                 \
            || isBGR(x)                 \
        )
    
    #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
    #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
    
    static const double rgb2yuv_table[8][9]={
        {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
        {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
        {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
        {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
        {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
        {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
        {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
        {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
    };
    
    
    Special versions: fast Y 1:1 scaling (no interpolation in y direction)
    
    Diego Biurrun's avatar
    Diego Biurrun committed
    more intelligent misalignment avoidance for the horizontal scaler
    
    write special vertical cubic upscale version
    Optimize C code (yv12 / minmax)
    
    add support for packed pixel yuv input & output
    
    add support for Y8 output
    optimize bgr24 & bgr32
    
    write special BGR->BGR scaler
    
    #if defined(ARCH_X86) && defined (CONFIG_GPL)
    
    DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
    DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
    DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
    DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
    DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
    DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
    DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
    DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
    
    static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
    static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
    static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
    static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
    
    const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
    
            0x0103010301030103LL,
            0x0200020002000200LL,};
    
    const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
    
            0x0602060206020602LL,
            0x0004000400040004LL,};
    
    DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
    DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
    DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
    DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
    DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
    DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
    
    DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
    DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
    DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #ifdef FAST_BGR2YV12
    
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #else
    
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
    
    #endif /* FAST_BGR2YV12 */
    
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
    
    
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
    
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUV[2][4]) = {
        {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
        {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
    };
    
    DECLARE_ALIGNED(8, const uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
    
    
    #endif /* defined(ARCH_X86) */
    
    
    // clipping helper table for C implementations:
    static unsigned char clip_table[768];
    
    
    static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
    
    extern const uint8_t dither_2x2_4[2][8];
    extern const uint8_t dither_2x2_8[2][8];
    extern const uint8_t dither_8x8_32[8][8];
    extern const uint8_t dither_8x8_73[8][8];
    extern const uint8_t dither_8x8_220[8][8];
    
    const char *sws_format_name(enum PixelFormat format)
    
        switch (format) {
            case PIX_FMT_YUV420P:
                return "yuv420p";
    
            case PIX_FMT_YUVA420P:
                return "yuva420p";
    
            case PIX_FMT_YUYV422:
                return "yuyv422";
            case PIX_FMT_RGB24:
                return "rgb24";
            case PIX_FMT_BGR24:
                return "bgr24";
            case PIX_FMT_YUV422P:
                return "yuv422p";
            case PIX_FMT_YUV444P:
                return "yuv444p";
            case PIX_FMT_RGB32:
                return "rgb32";
            case PIX_FMT_YUV410P:
                return "yuv410p";
            case PIX_FMT_YUV411P:
                return "yuv411p";
            case PIX_FMT_RGB565:
                return "rgb565";
            case PIX_FMT_RGB555:
                return "rgb555";
    
            case PIX_FMT_GRAY16BE:
                return "gray16be";
            case PIX_FMT_GRAY16LE:
                return "gray16le";
    
            case PIX_FMT_GRAY8:
                return "gray8";
            case PIX_FMT_MONOWHITE:
                return "mono white";
            case PIX_FMT_MONOBLACK:
                return "mono black";
            case PIX_FMT_PAL8:
                return "Palette";
            case PIX_FMT_YUVJ420P:
                return "yuvj420p";
            case PIX_FMT_YUVJ422P:
                return "yuvj422p";
            case PIX_FMT_YUVJ444P:
                return "yuvj444p";
            case PIX_FMT_XVMC_MPEG2_MC:
                return "xvmc_mpeg2_mc";
            case PIX_FMT_XVMC_MPEG2_IDCT:
                return "xvmc_mpeg2_idct";
            case PIX_FMT_UYVY422:
                return "uyvy422";
            case PIX_FMT_UYYVYY411:
                return "uyyvyy411";
            case PIX_FMT_RGB32_1:
                return "rgb32x";
            case PIX_FMT_BGR32_1:
                return "bgr32x";
            case PIX_FMT_BGR32:
                return "bgr32";
            case PIX_FMT_BGR565:
                return "bgr565";
            case PIX_FMT_BGR555:
                return "bgr555";
            case PIX_FMT_BGR8:
                return "bgr8";
            case PIX_FMT_BGR4:
                return "bgr4";
            case PIX_FMT_BGR4_BYTE:
                return "bgr4 byte";
            case PIX_FMT_RGB8:
                return "rgb8";
            case PIX_FMT_RGB4:
                return "rgb4";
            case PIX_FMT_RGB4_BYTE:
                return "rgb4 byte";
            case PIX_FMT_NV12:
                return "nv12";
            case PIX_FMT_NV21:
                return "nv21";
    
    Andreas Öman's avatar
    Andreas Öman committed
            case PIX_FMT_YUV440P:
                return "yuv440p";
    
            default:
                return "Unknown format";
        }
    
    static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
    
                                   int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                   uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
    
        //FIXME Optimize (just quickly writen not opti..)
        int i;
        for (i=0; i<dstW; i++)
        {
            int val=1<<18;
            int j;
            for (j=0; j<lumFilterSize; j++)
                val += lumSrc[j][i] * lumFilter[j];
    
            dest[i]= av_clip_uint8(val>>19);
        }
    
    
            for (i=0; i<chrDstW; i++)
            {
                int u=1<<18;
                int v=1<<18;
                int j;
                for (j=0; j<chrFilterSize; j++)
                {
                    u += chrSrc[j][i] * chrFilter[j];
    
                    v += chrSrc[j][i + VOFW] * chrFilter[j];
    
                }
    
                uDest[i]= av_clip_uint8(u>>19);
                vDest[i]= av_clip_uint8(v>>19);
            }
    
    static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
    
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                    uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
    
        //FIXME Optimize (just quickly writen not opti..)
        int i;
        for (i=0; i<dstW; i++)
        {
            int val=1<<18;
            int j;
            for (j=0; j<lumFilterSize; j++)
                val += lumSrc[j][i] * lumFilter[j];
    
            dest[i]= av_clip_uint8(val>>19);
        }
    
    
            return;
    
        if (dstFormat == PIX_FMT_NV12)
            for (i=0; i<chrDstW; i++)
            {
                int u=1<<18;
                int v=1<<18;
                int j;
                for (j=0; j<chrFilterSize; j++)
                {
                    u += chrSrc[j][i] * chrFilter[j];
    
                    v += chrSrc[j][i + VOFW] * chrFilter[j];
    
                }
    
                uDest[2*i]= av_clip_uint8(u>>19);
                uDest[2*i+1]= av_clip_uint8(v>>19);
            }
        else
            for (i=0; i<chrDstW; i++)
            {
                int u=1<<18;
                int v=1<<18;
                int j;
                for (j=0; j<chrFilterSize; j++)
                {
                    u += chrSrc[j][i] * chrFilter[j];
    
                    v += chrSrc[j][i + VOFW] * chrFilter[j];
    
                }
    
                uDest[2*i]= av_clip_uint8(v>>19);
                uDest[2*i+1]= av_clip_uint8(u>>19);
            }
    
    #define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
    
        for (i=0; i<(dstW>>1); i++){\
            int j;\
            int Y1 = 1<<18;\
            int Y2 = 1<<18;\
            int U  = 1<<18;\
            int V  = 1<<18;\
    
            const int i2= 2*i;\
            \
            for (j=0; j<lumFilterSize; j++)\
            {\
                Y1 += lumSrc[j][i2] * lumFilter[j];\
                Y2 += lumSrc[j][i2+1] * lumFilter[j];\
            }\
            for (j=0; j<chrFilterSize; j++)\
            {\
                U += chrSrc[j][i] * chrFilter[j];\
    
                V += chrSrc[j][i+VOFW] * chrFilter[j];\
    
    
    #define YSCALE_YUV_2_PACKEDX_C(type) \
            YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
    
            if ((Y1|Y2|U|V)&256)\
            {\
                if (Y1>255)   Y1=255; \
                else if (Y1<0)Y1=0;   \
                if (Y2>255)   Y2=255; \
                else if (Y2<0)Y2=0;   \
                if (U>255)    U=255;  \
                else if (U<0) U=0;    \
                if (V>255)    V=255;  \
                else if (V<0) V=0;    \
            }
    
    #define YSCALE_YUV_2_PACKEDX_FULL_C \
        for (i=0; i<dstW; i++){\
            int j;\
            int Y = 0;\
            int U = -128<<19;\
            int V = -128<<19;\
            int R,G,B;\
            \
            for (j=0; j<lumFilterSize; j++){\
                Y += lumSrc[j][i     ] * lumFilter[j];\
            }\
            for (j=0; j<chrFilterSize; j++){\
                U += chrSrc[j][i     ] * chrFilter[j];\
                V += chrSrc[j][i+VOFW] * chrFilter[j];\
            }\
            Y >>=10;\
            U >>=10;\
            V >>=10;\
    
    #define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
        YSCALE_YUV_2_PACKEDX_FULL_C\
            Y-= c->oy;\
            Y*= c->cy;\
            Y+= rnd;\
            R= Y + V*c->cvr;\
            G= Y + V*c->cvg + U*c->cug;\
            B= Y +            U*c->cub;\
            if ((R|G|B)&(0xC0000000)){\
                if (R>=(256<<22))   R=(256<<22)-1; \
                else if (R<0)R=0;   \
                if (G>=(256<<22))   G=(256<<22)-1; \
                else if (G<0)G=0;   \
                if (B>=(256<<22))   B=(256<<22)-1; \
                else if (B<0)B=0;   \
            }\
    
    
    
    #define YSCALE_YUV_2_GRAY16_C \
    
        for (i=0; i<(dstW>>1); i++){\
            int j;\
            int Y1 = 1<<18;\
            int Y2 = 1<<18;\
            int U  = 1<<18;\
            int V  = 1<<18;\
    
            const int i2= 2*i;\
            \
            for (j=0; j<lumFilterSize; j++)\
            {\
                Y1 += lumSrc[j][i2] * lumFilter[j];\
                Y2 += lumSrc[j][i2+1] * lumFilter[j];\
            }\
            Y1>>=11;\
            Y2>>=11;\
            if ((Y1|Y2|U|V)&65536)\
            {\
                if (Y1>65535)   Y1=65535; \
                else if (Y1<0)Y1=0;   \
                if (Y2>65535)   Y2=65535; \
                else if (Y2<0)Y2=0;   \
            }
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define YSCALE_YUV_2_RGBX_C(type) \
    
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)  \
    
        r = (type *)c->table_rV[V];   \
        g = (type *)(c->table_gU[U] + c->table_gV[V]); \
        b = (type *)c->table_bU[U];   \
    
    #define YSCALE_YUV_2_PACKED2_C   \
        for (i=0; i<(dstW>>1); i++){ \
            const int i2= 2*i;       \
            int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
            int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
            int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
    
            int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
    
    #define YSCALE_YUV_2_GRAY16_2_C   \
        for (i=0; i<(dstW>>1); i++){ \
            const int i2= 2*i;       \
            int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
            int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define YSCALE_YUV_2_RGB2_C(type) \
    
        YSCALE_YUV_2_PACKED2_C\
        type *r, *b, *g;\
        r = (type *)c->table_rV[V];\
        g = (type *)(c->table_gU[U] + c->table_gV[V]);\
        b = (type *)c->table_bU[U];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define YSCALE_YUV_2_PACKED1_C \
    
        for (i=0; i<(dstW>>1); i++){\
            const int i2= 2*i;\
            int Y1= buf0[i2  ]>>7;\
            int Y2= buf0[i2+1]>>7;\
            int U= (uvbuf1[i     ])>>7;\
    
    #define YSCALE_YUV_2_GRAY16_1_C \
        for (i=0; i<(dstW>>1); i++){\
            const int i2= 2*i;\
            int Y1= buf0[i2  ]<<1;\
            int Y2= buf0[i2+1]<<1;\
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define YSCALE_YUV_2_RGB1_C(type) \
    
        YSCALE_YUV_2_PACKED1_C\
        type *r, *b, *g;\
        r = (type *)c->table_rV[V];\
        g = (type *)(c->table_gU[U] + c->table_gV[V]);\
        b = (type *)c->table_bU[U];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define YSCALE_YUV_2_PACKED1B_C \
    
        for (i=0; i<(dstW>>1); i++){\
            const int i2= 2*i;\
            int Y1= buf0[i2  ]>>7;\
            int Y2= buf0[i2+1]>>7;\
            int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
    
            int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    
    #define YSCALE_YUV_2_RGB1B_C(type) \
    
        YSCALE_YUV_2_PACKED1B_C\
        type *r, *b, *g;\
        r = (type *)c->table_rV[V];\
        g = (type *)(c->table_gU[U] + c->table_gV[V]);\
        b = (type *)c->table_bU[U];\
    
    #define YSCALE_YUV_2_MONO2_C \
    
        const uint8_t * const d128=dither_8x8_220[y&7];\
        uint8_t *g= c->table_gU[128] + c->table_gV[128];\
        for (i=0; i<dstW-7; i+=8){\
            int acc;\
            acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
            acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
            acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
            acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
            acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
            acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
            acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
            acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
    
            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
    
    #define YSCALE_YUV_2_MONOX_C \
    
        const uint8_t * const d128=dither_8x8_220[y&7];\
        uint8_t *g= c->table_gU[128] + c->table_gV[128];\
        int acc=0;\
        for (i=0; i<dstW-1; i+=2){\
            int j;\
            int Y1=1<<18;\
            int Y2=1<<18;\
    \
            for (j=0; j<lumFilterSize; j++)\
            {\
                Y1 += lumSrc[j][i] * lumFilter[j];\
                Y2 += lumSrc[j][i+1] * lumFilter[j];\
            }\
            Y1>>=19;\
            Y2>>=19;\
            if ((Y1|Y2)&256)\
            {\
                if (Y1>255)   Y1=255;\
                else if (Y1<0)Y1=0;\
                if (Y2>255)   Y2=255;\
                else if (Y2<0)Y2=0;\
            }\
            acc+= acc + g[Y1+d128[(i+0)&7]];\
            acc+= acc + g[Y2+d128[(i+1)&7]];\
            if ((i&7)==6){\
    
                ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
    
                dest++;\
            }\
        }
    
    
    #define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
    
        switch(c->dstFormat)\
        {\
        case PIX_FMT_RGB32:\
        case PIX_FMT_BGR32:\
    
        case PIX_FMT_RGB32_1:\
        case PIX_FMT_BGR32_1:\
    
            func(uint32_t)\
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
            }                \
            break;\
        case PIX_FMT_RGB24:\
            func(uint8_t)\
                ((uint8_t*)dest)[0]= r[Y1];\
                ((uint8_t*)dest)[1]= g[Y1];\
                ((uint8_t*)dest)[2]= b[Y1];\
                ((uint8_t*)dest)[3]= r[Y2];\
                ((uint8_t*)dest)[4]= g[Y2];\
                ((uint8_t*)dest)[5]= b[Y2];\
                dest+=6;\
            }\
            break;\
        case PIX_FMT_BGR24:\
            func(uint8_t)\
                ((uint8_t*)dest)[0]= b[Y1];\
                ((uint8_t*)dest)[1]= g[Y1];\
                ((uint8_t*)dest)[2]= r[Y1];\
                ((uint8_t*)dest)[3]= b[Y2];\
                ((uint8_t*)dest)[4]= g[Y2];\
                ((uint8_t*)dest)[5]= r[Y2];\
                dest+=6;\
            }\
            break;\
        case PIX_FMT_RGB565:\
        case PIX_FMT_BGR565:\
            {\
                const int dr1= dither_2x2_8[y&1    ][0];\
                const int dg1= dither_2x2_4[y&1    ][0];\
                const int db1= dither_2x2_8[(y&1)^1][0];\
                const int dr2= dither_2x2_8[y&1    ][1];\
                const int dg2= dither_2x2_4[y&1    ][1];\
                const int db2= dither_2x2_8[(y&1)^1][1];\
                func(uint16_t)\
                    ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
                    ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
                }\
            }\
            break;\
        case PIX_FMT_RGB555:\
        case PIX_FMT_BGR555:\
            {\
                const int dr1= dither_2x2_8[y&1    ][0];\
                const int dg1= dither_2x2_8[y&1    ][1];\
                const int db1= dither_2x2_8[(y&1)^1][0];\
                const int dr2= dither_2x2_8[y&1    ][1];\
                const int dg2= dither_2x2_8[y&1    ][0];\
                const int db2= dither_2x2_8[(y&1)^1][1];\
                func(uint16_t)\
                    ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
                    ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
                }\
            }\
            break;\
        case PIX_FMT_RGB8:\
        case PIX_FMT_BGR8:\
            {\
                const uint8_t * const d64= dither_8x8_73[y&7];\
                const uint8_t * const d32= dither_8x8_32[y&7];\
                func(uint8_t)\
                    ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
                    ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
                }\
            }\
            break;\
        case PIX_FMT_RGB4:\
        case PIX_FMT_BGR4:\
            {\
                const uint8_t * const d64= dither_8x8_73 [y&7];\
                const uint8_t * const d128=dither_8x8_220[y&7];\
                func(uint8_t)\
                    ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
                                     + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
                }\
            }\
            break;\
        case PIX_FMT_RGB4_BYTE:\
        case PIX_FMT_BGR4_BYTE:\
            {\
                const uint8_t * const d64= dither_8x8_73 [y&7];\
                const uint8_t * const d128=dither_8x8_220[y&7];\
                func(uint8_t)\
                    ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
                    ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
                }\
            }\
            break;\
        case PIX_FMT_MONOBLACK:\
    
                func_monoblack\
    
            }\
            break;\
        case PIX_FMT_YUYV422:\
            func2\
                ((uint8_t*)dest)[2*i2+0]= Y1;\
                ((uint8_t*)dest)[2*i2+1]= U;\
                ((uint8_t*)dest)[2*i2+2]= Y2;\
                ((uint8_t*)dest)[2*i2+3]= V;\
            }                \
            break;\
        case PIX_FMT_UYVY422:\
            func2\
                ((uint8_t*)dest)[2*i2+0]= U;\
                ((uint8_t*)dest)[2*i2+1]= Y1;\
                ((uint8_t*)dest)[2*i2+2]= V;\
                ((uint8_t*)dest)[2*i2+3]= Y2;\
            }                \
            break;\
    
        case PIX_FMT_GRAY16BE:\
            func_g16\
                ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
                ((uint8_t*)dest)[2*i2+1]= Y1;\
                ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
                ((uint8_t*)dest)[2*i2+3]= Y2;\
            }                \
            break;\
        case PIX_FMT_GRAY16LE:\
            func_g16\
                ((uint8_t*)dest)[2*i2+0]= Y1;\
                ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
                ((uint8_t*)dest)[2*i2+2]= Y2;\
                ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
            }                \
            break;\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
    
                                      int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                      uint8_t *dest, int dstW, int y)
    
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
    
    static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
                                        int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
                                        uint8_t *dest, int dstW, int y)
    {
        int i;
        int step= fmt_depth(c->dstFormat)/8;
    
        switch(c->dstFormat){
        case PIX_FMT_ARGB:
            dest++;
        case PIX_FMT_RGB24:
        case PIX_FMT_RGBA:
            YSCALE_YUV_2_RGBX_FULL_C(1<<21)
                dest[0]= R>>22;
                dest[1]= G>>22;
                dest[2]= B>>22;
                dest[3]= 0;
                dest+= step;
            }
            break;
        case PIX_FMT_ABGR:
            dest++;
        case PIX_FMT_BGR24:
        case PIX_FMT_BGRA:
            YSCALE_YUV_2_RGBX_FULL_C(1<<21)
                dest[0]= B>>22;
                dest[1]= G>>22;
                dest[2]= R>>22;
                dest[3]= 0;
                dest+= step;
            }
            break;
        default:
            assert(0);
        }
    }
    
    //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
    //Plain C versions
    
    #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
    
    #if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
    
    #define COMPILE_ALTIVEC
    #endif //HAVE_ALTIVEC
    #endif //ARCH_POWERPC
    
    
    #if defined(ARCH_X86)
    
    #if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
    
    #if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
    
    #if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
    
    #define COMPILE_3DNOW
    #endif
    
    
    #undef HAVE_MMX
    #undef HAVE_MMX2
    #undef HAVE_3DNOW
    
    #ifdef COMPILE_C
    
    #undef HAVE_MMX
    #undef HAVE_MMX2
    #undef HAVE_3DNOW
    
    #define RENAME(a) a ## _C
    #include "swscale_template.c"
    
    #ifdef COMPILE_ALTIVEC
    #undef RENAME
    #define HAVE_ALTIVEC
    #define RENAME(a) a ## _altivec
    #include "swscale_template.c"
    #endif
    
    
    #if defined(ARCH_X86)
    
    //X86 versions
    /*
    #undef RENAME
    #undef HAVE_MMX
    #undef HAVE_MMX2
    #undef HAVE_3DNOW
    #define ARCH_X86
    #define RENAME(a) a ## _X86
    #include "swscale_template.c"
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    */
    
    //MMX versions
    
    #undef RENAME
    #define HAVE_MMX
    #undef HAVE_MMX2
    #undef HAVE_3DNOW
    #define RENAME(a) a ## _MMX
    #include "swscale_template.c"
    
    
    //MMX2 versions
    
    #undef RENAME
    #define HAVE_MMX
    #define HAVE_MMX2
    #undef HAVE_3DNOW
    #define RENAME(a) a ## _MMX2
    #include "swscale_template.c"
    
    
    //3DNOW versions
    
    #undef RENAME
    #define HAVE_MMX
    #undef HAVE_MMX2
    #define HAVE_3DNOW
    #define RENAME(a) a ## _3DNow
    #include "swscale_template.c"
    
    // minor note: the HAVE_xyz is messed up after that line so don't use it
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static double getSplineCoeff(double a, double b, double c, double d, double dist)
    {
    
    //    printf("%f %f %f %f %f\n", a,b,c,d,dist);
        if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
        else                return getSplineCoeff(        0.0,
                                                 b+ 2.0*c + 3.0*d,
                                                        c + 3.0*d,
                                                -b- 3.0*c - 6.0*d,
                                                dist-1.0);
    
    static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
    
                                 int srcW, int dstW, int filterAlign, int one, int flags,
                                 SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
    
        int i;
        int filterSize;
        int filter2Size;
        int minFilterSize;
        double *filter=NULL;
        double *filter2=NULL;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int ret= -1;
    
    #if defined(ARCH_X86)
    
    Diego Biurrun's avatar
    Diego Biurrun committed
            asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
    
        // Note the +1 is for the MMXscaler which reads over the end
        *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
    
        if (FFABS(xInc - 0x10000) <10) // unscaled
        {
            int i;
            filterSize= 1;
            filter= av_malloc(dstW*sizeof(double)*filterSize);
            for (i=0; i<dstW*filterSize; i++) filter[i]=0;
    
            for (i=0; i<dstW; i++)
            {
                filter[i*filterSize]=1;
                (*filterPos)[i]=i;
            }
    
        }
        else if (flags&SWS_POINT) // lame looking point sampling mode
        {
            int i;
            int xDstInSrc;
            filterSize= 1;
            filter= av_malloc(dstW*sizeof(double)*filterSize);
    
            xDstInSrc= xInc/2 - 0x8000;
            for (i=0; i<dstW; i++)
            {
                int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
    
                (*filterPos)[i]= xx;
                filter[i]= 1.0;
                xDstInSrc+= xInc;
            }
        }
        else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
        {
            int i;
            int xDstInSrc;
            if      (flags&SWS_BICUBIC) filterSize= 4;
            else if (flags&SWS_X      ) filterSize= 4;
            else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
            filter= av_malloc(dstW*sizeof(double)*filterSize);
    
            xDstInSrc= xInc/2 - 0x8000;
            for (i=0; i<dstW; i++)