Skip to content
Snippets Groups Projects
dsputil.c 79.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • Fabrice Bellard's avatar
    Fabrice Bellard committed
    /*
     * DSP utils
    
     * Copyright (c) 2000, 2001 Fabrice Bellard.
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     *
    
     * This library is free software; you can redistribute it and/or
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
     * version 2 of the License, or (at your option) any later version.
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     *
    
     * This library is distributed in the hope that it will be useful,
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     *
    
     * You should have received a copy of the GNU Lesser General Public
     * License along with this library; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
     *
    
     * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     */
    #include "avcodec.h"
    #include "dsputil.h"
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
    
    void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
    void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
    
    void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
    void (*ff_gmc )(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, 
                      int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
    
    void (*clear_blocks)(DCTELEM *blocks);
    
    int (*pix_sum)(UINT8 * pix, int line_size);
    int (*pix_norm1)(UINT8 * pix, int line_size);
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    
    op_pixels_abs_func pix_abs16x16;
    op_pixels_abs_func pix_abs16x16_x2;
    op_pixels_abs_func pix_abs16x16_y2;
    op_pixels_abs_func pix_abs16x16_xy2;
    
    
    op_pixels_abs_func pix_abs8x8;
    op_pixels_abs_func pix_abs8x8_x2;
    op_pixels_abs_func pix_abs8x8_y2;
    op_pixels_abs_func pix_abs8x8_xy2;
    
    int ff_bit_exact=0;
    
    
    UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    UINT32 squareTbl[512];
    
    
    const UINT8 ff_zigzag_direct[64] = {
        0,   1,  8, 16,  9,  2,  3, 10,
        17, 24, 32, 25, 18, 11,  4,  5,
    
        12, 19, 26, 33, 40, 48, 41, 34,
    
        35, 42, 49, 56, 57, 50, 43, 36,
        29, 22, 15, 23, 30, 37, 44, 51,
        58, 59, 52, 45, 38, 31, 39, 46,
        53, 60, 61, 54, 47, 55, 62, 63
    };
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
    UINT16 __align8 inv_zigzag_direct16[64];
    
    
    const UINT8 ff_alternate_horizontal_scan[64] = {
        0,  1,   2,  3,  8,  9, 16, 17, 
    
        10, 11,  4,  5,  6,  7, 15, 14,
        13, 12, 19, 18, 24, 25, 32, 33, 
        26, 27, 20, 21, 22, 23, 28, 29,
        30, 31, 34, 35, 40, 41, 48, 49, 
        42, 43, 36, 37, 38, 39, 44, 45,
        46, 47, 50, 51, 56, 57, 58, 59, 
        52, 53, 54, 55, 60, 61, 62, 63,
    };
    
    
    const UINT8 ff_alternate_vertical_scan[64] = {
        0,  8,  16, 24,  1,  9,  2, 10, 
    
        17, 25, 32, 40, 48, 56, 57, 49,
        41, 33, 26, 18,  3, 11,  4, 12, 
        19, 27, 34, 42, 50, 58, 35, 43,
        51, 59, 20, 28,  5, 13,  6, 14, 
        21, 29, 36, 44, 52, 60, 37, 45,
        53, 61, 22, 30,  7, 15, 23, 31, 
        38, 46, 54, 62, 39, 47, 55, 63,
    };
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
             0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
     536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
     268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
     178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
     134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
     107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
      89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
      76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
      67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
      59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
      53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
      48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
      44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
      41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
      38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
      35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
      33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
      31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
      29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
      28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
      26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
      25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
      24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
      23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
      22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
      21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
      20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
      19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
      19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
      18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
      17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
      17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
    };
    
    
    static int pix_sum_c(UINT8 * pix, int line_size)
    
    {
        int s, i, j;
    
        s = 0;
        for (i = 0; i < 16; i++) {
    	for (j = 0; j < 16; j += 8) {
    	    s += pix[0];
    	    s += pix[1];
    	    s += pix[2];
    	    s += pix[3];
    	    s += pix[4];
    	    s += pix[5];
    	    s += pix[6];
    	    s += pix[7];
    	    pix += 8;
    	}
    	pix += line_size - 16;
        }
        return s;
    }
    
    
    static int pix_norm1_c(UINT8 * pix, int line_size)
    
    {
        int s, i, j;
        UINT32 *sq = squareTbl + 256;
    
        s = 0;
        for (i = 0; i < 16; i++) {
    	for (j = 0; j < 16; j += 8) {
    	    s += sq[pix[0]];
    	    s += sq[pix[1]];
    	    s += sq[pix[2]];
    	    s += sq[pix[3]];
    	    s += sq[pix[4]];
    	    s += sq[pix[5]];
    	    s += sq[pix[6]];
    	    s += sq[pix[7]];
    	    pix += 8;
    	}
    	pix += line_size - 16;
        }
        return s;
    }
    
    
    
    static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    {
        int i;
    
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            block[0] = pixels[0];
            block[1] = pixels[1];
            block[2] = pixels[2];
            block[3] = pixels[3];
            block[4] = pixels[4];
            block[5] = pixels[5];
            block[6] = pixels[6];
            block[7] = pixels[7];
            pixels += line_size;
            block += 8;
    
    static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
    			  const UINT8 *s2, int stride){
    
        int i;
    
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            block[0] = s1[0] - s2[0];
            block[1] = s1[1] - s2[1];
            block[2] = s1[2] - s2[2];
            block[3] = s1[3] - s2[3];
            block[4] = s1[4] - s2[4];
            block[5] = s1[5] - s2[5];
            block[6] = s1[6] - s2[6];
            block[7] = s1[7] - s2[7];
    
            s1 += stride;
            s2 += stride;
    
            block += 8;
    
    static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
    				 int line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    {
        int i;
        UINT8 *cm = cropTbl + MAX_NEG_CROP;
        
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            pixels[0] = cm[block[0]];
            pixels[1] = cm[block[1]];
            pixels[2] = cm[block[2]];
            pixels[3] = cm[block[3]];
            pixels[4] = cm[block[4]];
            pixels[5] = cm[block[5]];
            pixels[6] = cm[block[6]];
            pixels[7] = cm[block[7]];
    
            pixels += line_size;
            block += 8;
    
    static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
    
                              int line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    {
        int i;
        UINT8 *cm = cropTbl + MAX_NEG_CROP;
        
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            pixels[0] = cm[pixels[0] + block[0]];
            pixels[1] = cm[pixels[1] + block[1]];
            pixels[2] = cm[pixels[2] + block[2]];
            pixels[3] = cm[pixels[3] + block[3]];
            pixels[4] = cm[pixels[4] + block[4]];
            pixels[5] = cm[pixels[5] + block[5]];
            pixels[6] = cm[pixels[6] + block[6]];
            pixels[7] = cm[pixels[7] + block[7]];
            pixels += line_size;
            block += 8;
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
        }
    }
    
    #if 0
    
    #define PIXOP2(OPNAME, OP) \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
            OP(*((uint64_t*)block), LD64(pixels));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
            const uint64_t a= LD64(pixels  );\
            const uint64_t b= LD64(pixels+1);\
            OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
            const uint64_t a= LD64(pixels  );\
            const uint64_t b= LD64(pixels+1);\
            OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
            const uint64_t a= LD64(pixels          );\
            const uint64_t b= LD64(pixels+line_size);\
            OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
            const uint64_t a= LD64(pixels          );\
            const uint64_t b= LD64(pixels+line_size);\
            OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
            int i;\
            const uint64_t a= LD64(pixels  );\
            const uint64_t b= LD64(pixels+1);\
            uint64_t l0=  (a&0x0303030303030303ULL)\
                        + (b&0x0303030303030303ULL)\
                        + 0x0202020202020202ULL;\
            uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                       + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
            uint64_t l1,h1;\
    \
            pixels+=line_size;\
            for(i=0; i<h; i+=2){\
                uint64_t a= LD64(pixels  );\
                uint64_t b= LD64(pixels+1);\
                l1=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL);\
                h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
                a= LD64(pixels  );\
                b= LD64(pixels+1);\
                l0=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL)\
                   + 0x0202020202020202ULL;\
                h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
            }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
            int i;\
            const uint64_t a= LD64(pixels  );\
            const uint64_t b= LD64(pixels+1);\
            uint64_t l0=  (a&0x0303030303030303ULL)\
                        + (b&0x0303030303030303ULL)\
                        + 0x0101010101010101ULL;\
            uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                       + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
            uint64_t l1,h1;\
    \
            pixels+=line_size;\
            for(i=0; i<h; i+=2){\
                uint64_t a= LD64(pixels  );\
                uint64_t b= LD64(pixels+1);\
                l1=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL);\
                h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
                a= LD64(pixels  );\
                b= LD64(pixels+1);\
                l0=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL)\
                   + 0x0101010101010101ULL;\
                h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
            }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    CALL_2X_PIXELS(OPNAME ## _pixels16    , OPNAME ## _pixels    , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels_x2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels_y2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels_xy2, 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels_x2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels_y2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels_xy2, 8)\
    \
    void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
        {\
            OPNAME ## _pixels,\
            OPNAME ## _pixels_x2,\
            OPNAME ## _pixels_y2,\
            OPNAME ## _pixels_xy2},\
        {\
            OPNAME ## _pixels16,\
            OPNAME ## _pixels16_x2,\
            OPNAME ## _pixels16_y2,\
            OPNAME ## _pixels16_xy2}\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
        {\
            OPNAME ## _pixels,\
            OPNAME ## _no_rnd_pixels_x2,\
            OPNAME ## _no_rnd_pixels_y2,\
            OPNAME ## _no_rnd_pixels_xy2},\
        {\
            OPNAME ## _pixels16,\
            OPNAME ## _no_rnd_pixels16_x2,\
            OPNAME ## _no_rnd_pixels16_y2,\
            OPNAME ## _no_rnd_pixels16_xy2}\
    
    };
    
    #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
    #else // 64 bit variant
    
    #define PIXOP2(OPNAME, OP) \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
            OP(*((uint32_t*)(block  )), LD32(pixels  ));\
            OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _no_rnd_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        OPNAME ## _pixels8(block, pixels, line_size, h);\
    }\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            uint32_t a,b;\
            a= LD32(&src1[i*src_stride1  ]);\
            b= LD32(&src2[i*src_stride2  ]);\
            OP(*((uint32_t*)&dst[i*dst_stride  ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
            a= LD32(&src1[i*src_stride1+4]);\
            b= LD32(&src2[i*src_stride2+4]);\
            OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            uint32_t a,b;\
            a= LD32(&src1[i*src_stride1  ]);\
            b= LD32(&src2[i*src_stride2  ]);\
            OP(*((uint32_t*)&dst[i*dst_stride  ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
            a= LD32(&src1[i*src_stride1+4]);\
            b= LD32(&src2[i*src_stride2+4]);\
            OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
        OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
        OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
    }\
    \
    static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
        OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
        OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
    }\
    \
    static inline void OPNAME ## _no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
    }\
    \
    static inline void OPNAME ## _pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
    }\
    \
    static inline void OPNAME ## _no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
    }\
    \
    static inline void OPNAME ## _pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
    }\
    \
    static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
                     int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            uint32_t a, b, c, d, l0, l1, h0, h1;\
            a= LD32(&src1[i*src_stride1]);\
            b= LD32(&src2[i*src_stride2]);\
            c= LD32(&src3[i*src_stride3]);\
            d= LD32(&src4[i*src_stride4]);\
            l0=  (a&0x03030303UL)\
               + (b&0x03030303UL)\
               + 0x02020202UL;\
            h0= ((a&0xFCFCFCFCUL)>>2)\
              + ((b&0xFCFCFCFCUL)>>2);\
            l1=  (c&0x03030303UL)\
               + (d&0x03030303UL);\
            h1= ((c&0xFCFCFCFCUL)>>2)\
              + ((d&0xFCFCFCFCUL)>>2);\
            OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
            a= LD32(&src1[i*src_stride1+4]);\
            b= LD32(&src2[i*src_stride2+4]);\
            c= LD32(&src3[i*src_stride3+4]);\
            d= LD32(&src4[i*src_stride4+4]);\
            l0=  (a&0x03030303UL)\
               + (b&0x03030303UL)\
               + 0x02020202UL;\
            h0= ((a&0xFCFCFCFCUL)>>2)\
              + ((b&0xFCFCFCFCUL)>>2);\
            l1=  (c&0x03030303UL)\
               + (d&0x03030303UL);\
            h1= ((c&0xFCFCFCFCUL)>>2)\
              + ((d&0xFCFCFCFCUL)>>2);\
            OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
                     int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            uint32_t a, b, c, d, l0, l1, h0, h1;\
            a= LD32(&src1[i*src_stride1]);\
            b= LD32(&src2[i*src_stride2]);\
            c= LD32(&src3[i*src_stride3]);\
            d= LD32(&src4[i*src_stride4]);\
            l0=  (a&0x03030303UL)\
               + (b&0x03030303UL)\
               + 0x01010101UL;\
            h0= ((a&0xFCFCFCFCUL)>>2)\
              + ((b&0xFCFCFCFCUL)>>2);\
            l1=  (c&0x03030303UL)\
               + (d&0x03030303UL);\
            h1= ((c&0xFCFCFCFCUL)>>2)\
              + ((d&0xFCFCFCFCUL)>>2);\
            OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
            a= LD32(&src1[i*src_stride1+4]);\
            b= LD32(&src2[i*src_stride2+4]);\
            c= LD32(&src3[i*src_stride3+4]);\
            d= LD32(&src4[i*src_stride4+4]);\
            l0=  (a&0x03030303UL)\
               + (b&0x03030303UL)\
               + 0x01010101UL;\
            h0= ((a&0xFCFCFCFCUL)>>2)\
              + ((b&0xFCFCFCFCUL)>>2);\
            l1=  (c&0x03030303UL)\
               + (d&0x03030303UL);\
            h1= ((c&0xFCFCFCFCUL)>>2)\
              + ((d&0xFCFCFCFCUL)>>2);\
            OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
                     int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
        OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
        OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
    }\
    static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
                     int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
        OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
        OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
    }\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int j;\
        for(j=0; j<2; j++){\
            int i;\
            const uint32_t a= LD32(pixels  );\
            const uint32_t b= LD32(pixels+1);\
            uint32_t l0=  (a&0x03030303UL)\
                        + (b&0x03030303UL)\
                        + 0x02020202UL;\
            uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
                       + ((b&0xFCFCFCFCUL)>>2);\
            uint32_t l1,h1;\
    \
            pixels+=line_size;\
            for(i=0; i<h; i+=2){\
                uint32_t a= LD32(pixels  );\
                uint32_t b= LD32(pixels+1);\
                l1=  (a&0x03030303UL)\
                   + (b&0x03030303UL);\
                h1= ((a&0xFCFCFCFCUL)>>2)\
                  + ((b&0xFCFCFCFCUL)>>2);\
                OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
                pixels+=line_size;\
                block +=line_size;\
                a= LD32(pixels  );\
                b= LD32(pixels+1);\
                l0=  (a&0x03030303UL)\
                   + (b&0x03030303UL)\
                   + 0x02020202UL;\
                h0= ((a&0xFCFCFCFCUL)>>2)\
                  + ((b&0xFCFCFCFCUL)>>2);\
                OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
                pixels+=line_size;\
                block +=line_size;\
            }\
            pixels+=4-line_size*(h+1);\
            block +=4-line_size*h;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _no_rnd_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int j;\
        for(j=0; j<2; j++){\
            int i;\
            const uint32_t a= LD32(pixels  );\
            const uint32_t b= LD32(pixels+1);\
            uint32_t l0=  (a&0x03030303UL)\
                        + (b&0x03030303UL)\
                        + 0x01010101UL;\
            uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
                       + ((b&0xFCFCFCFCUL)>>2);\
            uint32_t l1,h1;\
    \
            pixels+=line_size;\
            for(i=0; i<h; i+=2){\
                uint32_t a= LD32(pixels  );\
                uint32_t b= LD32(pixels+1);\
                l1=  (a&0x03030303UL)\
                   + (b&0x03030303UL);\
                h1= ((a&0xFCFCFCFCUL)>>2)\
                  + ((b&0xFCFCFCFCUL)>>2);\
                OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
                pixels+=line_size;\
                block +=line_size;\
                a= LD32(pixels  );\
                b= LD32(pixels+1);\
                l0=  (a&0x03030303UL)\
                   + (b&0x03030303UL)\
                   + 0x01010101UL;\
                h0= ((a&0xFCFCFCFCUL)>>2)\
                  + ((b&0xFCFCFCFCUL)>>2);\
                OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
                pixels+=line_size;\
                block +=line_size;\
            }\
            pixels+=4-line_size*(h+1);\
            block +=4-line_size*h;\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    CALL_2X_PIXELS(OPNAME ## _pixels16    , OPNAME ## _pixels8    , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels8_x2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels8_y2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels8_xy2, 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16    , OPNAME ## _pixels8    , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels8_x2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels8_y2 , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels8_xy2, 8)\
    \
    void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
        {\
            OPNAME ## _pixels16,\
            OPNAME ## _pixels16_x2,\
            OPNAME ## _pixels16_y2,\
            OPNAME ## _pixels16_xy2},\
        {\
            OPNAME ## _pixels8,\
            OPNAME ## _pixels8_x2,\
            OPNAME ## _pixels8_y2,\
            OPNAME ## _pixels8_xy2},\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
        {\
            OPNAME ## _pixels16,\
            OPNAME ## _no_rnd_pixels16_x2,\
            OPNAME ## _no_rnd_pixels16_y2,\
            OPNAME ## _no_rnd_pixels16_xy2},\
        {\
            OPNAME ## _pixels8,\
            OPNAME ## _no_rnd_pixels8_x2,\
            OPNAME ## _no_rnd_pixels8_y2,\
            OPNAME ## _no_rnd_pixels8_xy2},\
    
    #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
    #endif
    #define op_put(a, b) a = b
    
    PIXOP2(avg, op_avg)
    PIXOP2(put, op_put)
    #undef op_avg
    #undef op_put
    
    
    /* FIXME this stuff could be removed as its ot really used anymore */
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    #define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
                                                                                             \
    static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
    {                                                                                        \
        BTYPE *p;                                                                            \
        const UINT8 *pix;                                                                    \
                                                                                             \
        p = block;                                                                           \
        pix = pixels;                                                                        \
        do {                                                                                 \
            OP(p[0], pix[0]);                                                                  \
            OP(p[1], pix[1]);                                                                  \
            OP(p[2], pix[2]);                                                                  \
            OP(p[3], pix[3]);                                                                  \
            OP(p[4], pix[4]);                                                                  \
            OP(p[5], pix[5]);                                                                  \
            OP(p[6], pix[6]);                                                                  \
            OP(p[7], pix[7]);                                                                  \
            pix += line_size;                                                                \
            p += INCR;                                                                       \
        } while (--h);;                                                                       \
    }                                                                                        \
                                                                                             \
    static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
    {                                                                                        \
        BTYPE *p;                                                                          \
        const UINT8 *pix;                                                                    \
                                                                                             \
        p = block;                                                                           \
        pix = pixels;                                                                        \
        do {                                                                   \
            OP(p[0], avg2(pix[0], pix[1]));                                                    \
            OP(p[1], avg2(pix[1], pix[2]));                                                    \
            OP(p[2], avg2(pix[2], pix[3]));                                                    \
            OP(p[3], avg2(pix[3], pix[4]));                                                    \
            OP(p[4], avg2(pix[4], pix[5]));                                                    \
            OP(p[5], avg2(pix[5], pix[6]));                                                    \
            OP(p[6], avg2(pix[6], pix[7]));                                                    \
            OP(p[7], avg2(pix[7], pix[8]));                                                    \
            pix += line_size;                                                                \
            p += INCR;                                                                       \
        } while (--h);                                                                        \
    }                                                                                        \
                                                                                             \
    static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
    {                                                                                        \
        BTYPE *p;                                                                          \
        const UINT8 *pix;                                                                    \
        const UINT8 *pix1;                                                                   \
                                                                                             \
        p = block;                                                                           \
        pix = pixels;                                                                        \
        pix1 = pixels + line_size;                                                           \
        do {                                                                                 \
            OP(p[0], avg2(pix[0], pix1[0]));                                                   \
            OP(p[1], avg2(pix[1], pix1[1]));                                                   \
            OP(p[2], avg2(pix[2], pix1[2]));                                                   \
            OP(p[3], avg2(pix[3], pix1[3]));                                                   \
            OP(p[4], avg2(pix[4], pix1[4]));                                                   \
            OP(p[5], avg2(pix[5], pix1[5]));                                                   \
            OP(p[6], avg2(pix[6], pix1[6]));                                                   \
            OP(p[7], avg2(pix[7], pix1[7]));                                                   \
            pix += line_size;                                                                \
            pix1 += line_size;                                                               \
            p += INCR;                                                                       \
        } while(--h);                                                                         \
    }                                                                                        \
                                                                                             \
    static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
    {                                                                                        \
        BTYPE *p;                                                                          \
        const UINT8 *pix;                                                                    \
        const UINT8 *pix1;                                                                   \
                                                                                             \
        p = block;                                                                           \
        pix = pixels;                                                                        \
        pix1 = pixels + line_size;                                                           \
        do {                                                                   \
            OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
            OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
            OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
            OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
            OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
            OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
            OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
            OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
            pix += line_size;                                                                \
            pix1 += line_size;                                                               \
            p += INCR;                                                                       \
        } while(--h);                                                                         \
    }                                                                                        \
                                                                                             \
    void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
        OPNAME ## _pixels,                                                                   \
        OPNAME ## _pixels_x2,                                                                \
        OPNAME ## _pixels_y2,                                                                \
        OPNAME ## _pixels_xy2,                                                               \
    };
    
    /* rounding primitives */
    #define avg2(a,b) ((a+b+1)>>1)
    #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
    
    #define op_avg(a, b) a = avg2(a, b)
    #define op_sub(a, b) a -= b
    
    #define op_put(a, b) a = b
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    
    PIXOP(DCTELEM, sub, op_sub, 8)
    
    PIXOP(uint8_t, avg, op_avg, line_size)
    PIXOP(uint8_t, put, op_put, line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    
    /* not rounding primitives */
    #undef avg2
    #undef avg4
    #define avg2(a,b) ((a+b)>>1)
    #define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
    
    
    PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
    PIXOP(uint8_t, put_no_rnd, op_put, line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    /* motion estimation */
    
    #undef avg2
    #undef avg4
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    #define avg2(a,b) ((a+b+1)>>1)
    #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    {
        const int A=(16-x16)*(16-y16);
        const int B=(   x16)*(16-y16);
        const int C=(16-x16)*(   y16);
        const int D=(   x16)*(   y16);
        int i;
    
        for(i=0; i<h; i++)
        {
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
            dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
            dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
            dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
            dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
            dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
            dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
            dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
            dst+= stride;
            src+= stride;
    
    static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, 
                      int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
    {
        int y, vx, vy;
        const int s= 1<<shift;
        
        width--;
        height--;
    
        for(y=0; y<h; y++){
            int x;
    
            vx= ox;
            vy= oy;
            for(x=0; x<8; x++){ //XXX FIXME optimize
                int src_x, src_y, frac_x, frac_y, index;
    
                src_x= vx>>16;
                src_y= vy>>16;
                frac_x= src_x&(s-1);
                frac_y= src_y&(s-1);
                src_x>>=shift;
                src_y>>=shift;
      
                if((unsigned)src_x < width){
                    if((unsigned)src_y < height){
                        index= src_x + src_y*stride;
                        dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
                                               + src[index       +1]*   frac_x )*(s-frac_y)
                                            + (  src[index+stride  ]*(s-frac_x)
                                               + src[index+stride+1]*   frac_x )*   frac_y
                                            + r)>>(shift*2);
                    }else{
                        index= src_x + clip(src_y, 0, height)*stride;                    
                        dst[y*stride + x]= ( (  src[index         ]*(s-frac_x) 
                                              + src[index       +1]*   frac_x )*s
                                            + r)>>(shift*2);
                    }
                }else{
                    if((unsigned)src_y < height){
                        index= clip(src_x, 0, width) + src_y*stride;                    
                        dst[y*stride + x]= (  (  src[index         ]*(s-frac_y) 
                                               + src[index+stride  ]*   frac_y )*s
                                            + r)>>(shift*2);
                    }else{
                        index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;                    
                        dst[y*stride + x]=    src[index         ];
                    }
                }
                
                vx+= dxx;
                vy+= dyx;
            }
            ox += dxy;
            oy += dyy;
        }
    }
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    {
        int i;
        for(i=0; i<h; i++)
        {
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            ST32(dst   , LD32(src   ));
            ST32(dst+4 , LD32(src+4 ));
            ST32(dst+8 , LD32(src+8 ));
            ST32(dst+12, LD32(src+12));
            dst[16]= src[16];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            dst+=dstStride;
            src+=srcStride;
        }
    }
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    {
        int i;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        for(i=0; i<h; i++)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            ST32(dst   , LD32(src   ));
            ST32(dst+4 , LD32(src+4 ));
            dst[8]= src[8];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            dst+=dstStride;
            src+=srcStride;
        }
    }
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define QPEL_MC(r, OPNAME, RND, OP) \
    static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
        UINT8 *cm = cropTbl + MAX_NEG_CROP;\
        int i;\
        for(i=0; i<h; i++)\
        {\
            OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
            OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
            OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
            OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
            OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
            OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
            OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
            OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
            dst+=dstStride;\
            src+=srcStride;\
        }\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
        UINT8 *cm = cropTbl + MAX_NEG_CROP;\
        int i;\
        for(i=0; i<w; i++)\
        {\
            const int src0= src[0*srcStride];\
            const int src1= src[1*srcStride];\
            const int src2= src[2*srcStride];\
            const int src3= src[3*srcStride];\
            const int src4= src[4*srcStride];\
            const int src5= src[5*srcStride];\
            const int src6= src[6*srcStride];\
            const int src7= src[7*srcStride];\
            const int src8= src[8*srcStride];\
            OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
            OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
            OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
            OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
            OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
            OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
            OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
            OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
            dst++;\
            src++;\
        }\
    }\
    \
    static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
        UINT8 *cm = cropTbl + MAX_NEG_CROP;\
        int i;\
        for(i=0; i<h; i++)\
        {\
            OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
            OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
            OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
            OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
            OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
            OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
            OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
            OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
            OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
            OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
            OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
            OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
            OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\