Skip to content
Snippets Groups Projects
dsputil.c 167 KiB
Newer Older
  • Learn to ignore specific revisions
  • Fabrice Bellard's avatar
    Fabrice Bellard committed
    /*
     * DSP utils
    
     * Copyright (c) 2000, 2001 Fabrice Bellard
    
     * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     *
    
     * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
     *
    
     * This file is part of FFmpeg.
     *
     * FFmpeg is free software; you can redistribute it and/or
    
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
    
     * version 2.1 of the License, or (at your option) any later version.
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     *
    
     * FFmpeg is distributed in the hope that it will be useful,
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     *
    
     * You should have received a copy of the GNU Lesser General Public
    
     * License along with FFmpeg; if not, write to the Free Software
    
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
     */
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    /**
    
     * @file libavcodec/dsputil.c
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
     * DSP utils
     */
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    #include "avcodec.h"
    #include "dsputil.h"
    
    #include "simple_idct.h"
    
    #include "faandct.h"
    
    #include "faanidct.h"
    
    /* snow.c */
    void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
    
    
    /* vorbis.c */
    void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
    
    
    Loren Merritt's avatar
    Loren Merritt committed
    /* ac3dec.c */
    void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
    
    
    /* flacenc.c */
    void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
    
    
    /* pngdec.c */
    void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
    
    
    /* eaidct.c */
    void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
    
    
    uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
    
    uint32_t ff_squareTbl[512] = {0, };
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    
    
    // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
    #define pb_7f (~0UL/255 * 0x7f)
    #define pb_80 (~0UL/255 * 0x80)
    
    const uint8_t ff_zigzag_direct[64] = {
    
        0,   1,  8, 16,  9,  2,  3, 10,
        17, 24, 32, 25, 18, 11,  4,  5,
    
        12, 19, 26, 33, 40, 48, 41, 34,
    
        35, 42, 49, 56, 57, 50, 43, 36,
        29, 22, 15, 23, 30, 37, 44, 51,
        58, 59, 52, 45, 38, 31, 39, 46,
        53, 60, 61, 54, 47, 55, 62, 63
    };
    
    
    /* Specific zigzag scan for 248 idct. NOTE that unlike the
       specification, we interleave the fields */
    const uint8_t ff_zigzag248_direct[64] = {
         0,  8,  1,  9, 16, 24,  2, 10,
        17, 25, 32, 40, 48, 56, 33, 41,
        18, 26,  3, 11,  4, 12, 19, 27,
        34, 42, 49, 57, 50, 58, 35, 43,
        20, 28,  5, 13,  6, 14, 21, 29,
        36, 44, 51, 59, 52, 60, 37, 45,
        22, 30,  7, 15, 23, 31, 38, 46,
        53, 61, 54, 62, 39, 47, 55, 63,
    };
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
    
    Måns Rullgård's avatar
    Måns Rullgård committed
    DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
    
    const uint8_t ff_alternate_horizontal_scan[64] = {
    
        0,  1,   2,  3,  8,  9, 16, 17,
    
        10, 11,  4,  5,  6,  7, 15, 14,
    
        13, 12, 19, 18, 24, 25, 32, 33,
    
        26, 27, 20, 21, 22, 23, 28, 29,
    
        30, 31, 34, 35, 40, 41, 48, 49,
    
        42, 43, 36, 37, 38, 39, 44, 45,
    
        46, 47, 50, 51, 56, 57, 58, 59,
    
        52, 53, 54, 55, 60, 61, 62, 63,
    };
    
    
    const uint8_t ff_alternate_vertical_scan[64] = {
    
        0,  8,  16, 24,  1,  9,  2, 10,
    
        17, 25, 32, 40, 48, 56, 57, 49,
    
        41, 33, 26, 18,  3, 11,  4, 12,
    
        19, 27, 34, 42, 50, 58, 35, 43,
    
        51, 59, 20, 28,  5, 13,  6, 14,
    
        21, 29, 36, 44, 52, 60, 37, 45,
    
        53, 61, 22, 30,  7, 15, 23, 31,
    
        38, 46, 54, 62, 39, 47, 55, 63,
    };
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
    
    const uint32_t ff_inverse[256]={
    
             0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
     536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
     268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
     178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
     134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
     107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
      89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
      76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
      67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
      59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
      53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
      48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
      44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
      41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
      38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
      35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
      33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
      31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
      29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
      28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
      26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
      25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
      24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
      23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
      22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
      21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
      20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
      19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
      19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
      18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
      17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
      17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
    };
    
    
    /* Input permutation for the simple_idct_mmx */
    static const uint8_t simple_mmx_permutation[64]={
    
            0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
            0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
            0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
            0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
            0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
            0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
            0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
            0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
    
    static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
    
    
    void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
        int i;
        int end;
    
        st->scantable= src_scantable;
    
        for(i=0; i<64; i++){
            int j;
            j = src_scantable[i];
            st->permutated[i] = permutation[j];
    
            st->inverse[j] = i;
    #endif
        }
    
        end=-1;
        for(i=0; i<64; i++){
            int j;
            j = st->permutated[i];
            if(j>end) end=j;
            st->raster_end[i]= end;
        }
    }
    
    
    static int pix_sum_c(uint8_t * pix, int line_size)
    
    {
        int s, i, j;
    
        s = 0;
        for (i = 0; i < 16; i++) {
    
            for (j = 0; j < 16; j += 8) {
                s += pix[0];
                s += pix[1];
                s += pix[2];
                s += pix[3];
                s += pix[4];
                s += pix[5];
                s += pix[6];
                s += pix[7];
                pix += 8;
            }
            pix += line_size - 16;
    
    static int pix_norm1_c(uint8_t * pix, int line_size)
    
    {
        int s, i, j;
    
        uint32_t *sq = ff_squareTbl + 256;
    
    
        s = 0;
        for (i = 0; i < 16; i++) {
    
            for (j = 0; j < 16; j += 8) {
    
                s += sq[pix[0]];
                s += sq[pix[1]];
                s += sq[pix[2]];
                s += sq[pix[3]];
                s += sq[pix[4]];
                s += sq[pix[5]];
                s += sq[pix[6]];
                s += sq[pix[7]];
    
                register uint64_t x=*(uint64_t*)pix;
                s += sq[x&0xff];
                s += sq[(x>>8)&0xff];
                s += sq[(x>>16)&0xff];
                s += sq[(x>>24)&0xff];
    
                s += sq[(x>>32)&0xff];
                s += sq[(x>>40)&0xff];
                s += sq[(x>>48)&0xff];
                s += sq[(x>>56)&0xff];
    #else
    
                register uint32_t x=*(uint32_t*)pix;
                s += sq[x&0xff];
                s += sq[(x>>8)&0xff];
                s += sq[(x>>16)&0xff];
                s += sq[(x>>24)&0xff];
    
                x=*(uint32_t*)(pix+4);
                s += sq[x&0xff];
                s += sq[(x>>8)&0xff];
                s += sq[(x>>16)&0xff];
                s += sq[(x>>24)&0xff];
    #endif
    #endif
    
                pix += 8;
            }
            pix += line_size - 16;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int i;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        for(i=0; i+8<=w; i+=8){
            dst[i+0]= bswap_32(src[i+0]);
            dst[i+1]= bswap_32(src[i+1]);
            dst[i+2]= bswap_32(src[i+2]);
            dst[i+3]= bswap_32(src[i+3]);
            dst[i+4]= bswap_32(src[i+4]);
            dst[i+5]= bswap_32(src[i+5]);
            dst[i+6]= bswap_32(src[i+6]);
            dst[i+7]= bswap_32(src[i+7]);
        }
        for(;i<w; i++){
            dst[i+0]= bswap_32(src[i+0]);
        }
    }
    
    static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
    {
        int s, i;
    
        uint32_t *sq = ff_squareTbl + 256;
    
    
        s = 0;
        for (i = 0; i < h; i++) {
            s += sq[pix1[0] - pix2[0]];
            s += sq[pix1[1] - pix2[1]];
            s += sq[pix1[2] - pix2[2]];
            s += sq[pix1[3] - pix2[3]];
            pix1 += line_size;
            pix2 += line_size;
        }
        return s;
    }
    
    
    static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    {
        int s, i;
    
        uint32_t *sq = ff_squareTbl + 256;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    
        s = 0;
    
        for (i = 0; i < h; i++) {
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            s += sq[pix1[0] - pix2[0]];
            s += sq[pix1[1] - pix2[1]];
            s += sq[pix1[2] - pix2[2]];
            s += sq[pix1[3] - pix2[3]];
            s += sq[pix1[4] - pix2[4]];
            s += sq[pix1[5] - pix2[5]];
            s += sq[pix1[6] - pix2[6]];
            s += sq[pix1[7] - pix2[7]];
            pix1 += line_size;
            pix2 += line_size;
        }
        return s;
    }
    
    
    static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
    
        uint32_t *sq = ff_squareTbl + 256;
    
        for (i = 0; i < h; i++) {
    
            s += sq[pix1[ 0] - pix2[ 0]];
            s += sq[pix1[ 1] - pix2[ 1]];
            s += sq[pix1[ 2] - pix2[ 2]];
            s += sq[pix1[ 3] - pix2[ 3]];
            s += sq[pix1[ 4] - pix2[ 4]];
            s += sq[pix1[ 5] - pix2[ 5]];
            s += sq[pix1[ 6] - pix2[ 6]];
            s += sq[pix1[ 7] - pix2[ 7]];
            s += sq[pix1[ 8] - pix2[ 8]];
            s += sq[pix1[ 9] - pix2[ 9]];
            s += sq[pix1[10] - pix2[10]];
            s += sq[pix1[11] - pix2[11]];
            s += sq[pix1[12] - pix2[12]];
            s += sq[pix1[13] - pix2[13]];
            s += sq[pix1[14] - pix2[14]];
            s += sq[pix1[15] - pix2[15]];
    
            pix1 += line_size;
            pix2 += line_size;
    
    #if CONFIG_SNOW_ENCODER //dwt is in snow.c
    
    static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
    
        int s, i, j;
        const int dec_count= w==8 ? 3 : 4;
    
        int level, ori;
    
        static const int scale[2][2][4][4]={
    
                {268, 239, 239, 213},
                {  0, 224, 224, 152},
                {  0, 135, 135, 110},
            },{
    
                {344, 310, 310, 280},
                {  0, 320, 320, 228},
                {  0, 175, 175, 136},
                {  0, 129, 129, 102},
            }
          },{
    
                {275, 245, 245, 218},
                {  0, 230, 230, 156},
                {  0, 138, 138, 113},
            },{
    
                {352, 317, 317, 286},
                {  0, 328, 328, 233},
                {  0, 180, 180, 140},
                {  0, 132, 132, 105},
            }
          }
        };
    
        for (i = 0; i < h; i++) {
            for (j = 0; j < w; j+=4) {
    
                tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
                tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
                tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
                tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
    
            }
            pix1 += line_size;
            pix2 += line_size;
        }
    
        for(level=0; level<dec_count; level++){
            for(ori= level ? 1 : 0; ori<4; ori++){
    
                int size= w>>(dec_count-level);
                int sx= (ori&1) ? size : 0;
                int stride= 32<<(dec_count-level);
    
                int sy= (ori&2) ? stride>>1 : 0;
    
                for(i=0; i<size; i++){
                    for(j=0; j<size; j++){
                        int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
    
                        s += FFABS(v);
    
    }
    
    static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
        return w_c(v, pix1, pix2, line_size,  8, h, 1);
    }
    
    static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
        return w_c(v, pix1, pix2, line_size,  8, h, 0);
    }
    
    static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
        return w_c(v, pix1, pix2, line_size, 16, h, 1);
    }
    
    static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
        return w_c(v, pix1, pix2, line_size, 16, h, 0);
    }
    
    
    Måns Rullgård's avatar
    Måns Rullgård committed
    int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
    
    Måns Rullgård's avatar
    Måns Rullgård committed
    int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
    
    /* draw the edges of width 'w' of an image of size width, height */
    //FIXME check that this is ok for mpeg4 interlaced
    static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
    {
        uint8_t *ptr, *last_line;
        int i;
    
        last_line = buf + (height - 1) * wrap;
        for(i=0;i<w;i++) {
            /* top and bottom */
            memcpy(buf - (i + 1) * wrap, buf, width);
            memcpy(last_line + (i + 1) * wrap, last_line, width);
        }
        /* left and right */
        ptr = buf;
        for(i=0;i<height;i++) {
            memset(ptr - w, ptr[0], w);
            memset(ptr + width, ptr[width-1], w);
            ptr += wrap;
        }
        /* corners */
        for(i=0;i<w;i++) {
            memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
            memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
            memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
            memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
        }
    }
    
    
    /**
     * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
     * @param buf destination buffer
     * @param src source buffer
     * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
     * @param block_w width of block
     * @param block_h height of block
     * @param src_x x coordinate of the top left sample of the block in the source buffer
     * @param src_y y coordinate of the top left sample of the block in the source buffer
     * @param w width of the source buffer
     * @param h height of the source buffer
     */
    void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
                                        int src_x, int src_y, int w, int h){
        int x, y;
        int start_y, start_x, end_y, end_x;
    
        if(src_y>= h){
            src+= (h-1-src_y)*linesize;
            src_y=h-1;
        }else if(src_y<=-block_h){
            src+= (1-block_h-src_y)*linesize;
            src_y=1-block_h;
        }
        if(src_x>= w){
            src+= (w-1-src_x);
            src_x=w-1;
        }else if(src_x<=-block_w){
            src+= (1-block_w-src_x);
            src_x=1-block_w;
        }
    
        start_y= FFMAX(0, -src_y);
        start_x= FFMAX(0, -src_x);
        end_y= FFMIN(block_h, h-src_y);
        end_x= FFMIN(block_w, w-src_x);
    
        // copy existing part
        for(y=start_y; y<end_y; y++){
            for(x=start_x; x<end_x; x++){
                buf[x + y*linesize]= src[x + y*linesize];
            }
        }
    
        //top
        for(y=0; y<start_y; y++){
            for(x=start_x; x<end_x; x++){
                buf[x + y*linesize]= buf[x + start_y*linesize];
            }
        }
    
        //bottom
        for(y=end_y; y<block_h; y++){
            for(x=start_x; x<end_x; x++){
                buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
            }
        }
    
        for(y=0; y<block_h; y++){
           //left
            for(x=0; x<start_x; x++){
                buf[x + y*linesize]= buf[start_x + y*linesize];
            }
    
           //right
            for(x=end_x; x<block_w; x++){
                buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
            }
        }
    }
    
    
    static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    {
        int i;
    
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            block[0] = pixels[0];
            block[1] = pixels[1];
            block[2] = pixels[2];
            block[3] = pixels[3];
            block[4] = pixels[4];
            block[5] = pixels[5];
            block[6] = pixels[6];
            block[7] = pixels[7];
            pixels += line_size;
            block += 8;
    
    static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
    
                              const uint8_t *s2, int stride){
    
        int i;
    
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            block[0] = s1[0] - s2[0];
            block[1] = s1[1] - s2[1];
            block[2] = s1[2] - s2[2];
            block[3] = s1[3] - s2[3];
            block[4] = s1[4] - s2[4];
            block[5] = s1[5] - s2[5];
            block[6] = s1[6] - s2[6];
            block[7] = s1[7] - s2[7];
    
            s1 += stride;
            s2 += stride;
    
            block += 8;
    
    static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    {
        int i;
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            pixels[0] = cm[block[0]];
            pixels[1] = cm[block[1]];
            pixels[2] = cm[block[2]];
            pixels[3] = cm[block[3]];
            pixels[4] = cm[block[4]];
            pixels[5] = cm[block[5]];
            pixels[6] = cm[block[6]];
            pixels[7] = cm[block[7]];
    
            pixels += line_size;
            block += 8;
    
    static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
        /* read the pixels */
        for(i=0;i<4;i++) {
            pixels[0] = cm[block[0]];
            pixels[1] = cm[block[1]];
            pixels[2] = cm[block[2]];
            pixels[3] = cm[block[3]];
    
            pixels += line_size;
            block += 8;
        }
    }
    
    
    static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
        /* read the pixels */
        for(i=0;i<2;i++) {
            pixels[0] = cm[block[0]];
            pixels[1] = cm[block[1]];
    
            pixels += line_size;
            block += 8;
        }
    }
    
    
    static void put_signed_pixels_clamped_c(const DCTELEM *block,
    
                                            uint8_t *restrict pixels,
                                            int line_size)
    {
        int i, j;
    
        for (i = 0; i < 8; i++) {
            for (j = 0; j < 8; j++) {
                if (*block < -128)
                    *pixels = 0;
                else if (*block > 127)
                    *pixels = 255;
                else
                    *pixels = (uint8_t)(*block + 128);
                block++;
                pixels++;
            }
            pixels += (line_size - 8);
        }
    }
    
    
    static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
    
                              int line_size)
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
    {
        int i;
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
        /* read the pixels */
        for(i=0;i<8;i++) {
    
            pixels[0] = cm[pixels[0] + block[0]];
            pixels[1] = cm[pixels[1] + block[1]];
            pixels[2] = cm[pixels[2] + block[2]];
            pixels[3] = cm[pixels[3] + block[3]];
            pixels[4] = cm[pixels[4] + block[4]];
            pixels[5] = cm[pixels[5] + block[5]];
            pixels[6] = cm[pixels[6] + block[6]];
            pixels[7] = cm[pixels[7] + block[7]];
            pixels += line_size;
            block += 8;
    
    Fabrice Bellard's avatar
    Fabrice Bellard committed
        }
    }
    
    
    static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
                              int line_size)
    {
        int i;
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
        /* read the pixels */
        for(i=0;i<4;i++) {
            pixels[0] = cm[pixels[0] + block[0]];
            pixels[1] = cm[pixels[1] + block[1]];
            pixels[2] = cm[pixels[2] + block[2]];
            pixels[3] = cm[pixels[3] + block[3]];
            pixels += line_size;
            block += 8;
        }
    }
    
    
    static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
                              int line_size)
    {
        int i;
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
        /* read the pixels */
        for(i=0;i<2;i++) {
            pixels[0] = cm[pixels[0] + block[0]];
            pixels[1] = cm[pixels[1] + block[1]];
            pixels += line_size;
            block += 8;
        }
    }
    
    Loren Merritt's avatar
    Loren Merritt committed
    
    static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
    {
        int i;
        for(i=0;i<8;i++) {
            pixels[0] += block[0];
            pixels[1] += block[1];
            pixels[2] += block[2];
            pixels[3] += block[3];
            pixels[4] += block[4];
            pixels[5] += block[5];
            pixels[6] += block[6];
            pixels[7] += block[7];
            pixels += line_size;
            block += 8;
        }
    }
    
    static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
    {
        int i;
        for(i=0;i<4;i++) {
            pixels[0] += block[0];
            pixels[1] += block[1];
            pixels[2] += block[2];
            pixels[3] += block[3];
            pixels += line_size;
            block += 4;
        }
    }
    
    
    static int sum_abs_dctelem_c(DCTELEM *block)
    {
        int sum=0, i;
        for(i=0; i<64; i++)
            sum+= FFABS(block[i]);
        return sum;
    }
    
    
    #if 0
    
    #define PIXOP2(OPNAME, OP) \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
    
            OP(*((uint64_t*)block), AV_RN64(pixels));\
    
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
    
            const uint64_t a= AV_RN64(pixels  );\
            const uint64_t b= AV_RN64(pixels+1);\
    
            OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
    
            const uint64_t a= AV_RN64(pixels  );\
            const uint64_t b= AV_RN64(pixels+1);\
    
            OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
    
            const uint64_t a= AV_RN64(pixels          );\
            const uint64_t b= AV_RN64(pixels+line_size);\
    
            OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
        int i;\
        for(i=0; i<h; i++){\
    
            const uint64_t a= AV_RN64(pixels          );\
            const uint64_t b= AV_RN64(pixels+line_size);\
    
            OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    \
    
    static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
            int i;\
    
            const uint64_t a= AV_RN64(pixels  );\
            const uint64_t b= AV_RN64(pixels+1);\
    
            uint64_t l0=  (a&0x0303030303030303ULL)\
                        + (b&0x0303030303030303ULL)\
                        + 0x0202020202020202ULL;\
            uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                       + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
            uint64_t l1,h1;\
    \
            pixels+=line_size;\
            for(i=0; i<h; i+=2){\
    
                uint64_t a= AV_RN64(pixels  );\
                uint64_t b= AV_RN64(pixels+1);\
    
                l1=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL);\
                h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
    
                a= AV_RN64(pixels  );\
                b= AV_RN64(pixels+1);\
    
                l0=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL)\
                   + 0x0202020202020202ULL;\
                h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
            }\
    }\
    \
    
    static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
    
    {\
            int i;\
    
            const uint64_t a= AV_RN64(pixels  );\
            const uint64_t b= AV_RN64(pixels+1);\
    
            uint64_t l0=  (a&0x0303030303030303ULL)\
                        + (b&0x0303030303030303ULL)\
                        + 0x0101010101010101ULL;\
            uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                       + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
            uint64_t l1,h1;\
    \
            pixels+=line_size;\
            for(i=0; i<h; i+=2){\
    
                uint64_t a= AV_RN64(pixels  );\
                uint64_t b= AV_RN64(pixels+1);\
    
                l1=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL);\
                h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
    
                a= AV_RN64(pixels  );\
                b= AV_RN64(pixels+1);\
    
                l0=  (a&0x0303030303030303ULL)\
                   + (b&0x0303030303030303ULL)\
                   + 0x0101010101010101ULL;\
                h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
                  + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
                OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
                pixels+=line_size;\
                block +=line_size;\
            }\
    }\
    \
    
    CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
    CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
    CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
    
    
    #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
    #else // 64 bit variant
    
    #define PIXOP2(OPNAME, OP) \
    
    static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        int i;\
        for(i=0; i<h; i++){\
    
            OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
    
    static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        int i;\
        for(i=0; i<h; i++){\
    
            OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
    
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    
    static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
            OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
            OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
    
            pixels+=line_size;\
            block +=line_size;\
        }\
    }\
    
    static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
        OPNAME ## _pixels8_c(block, pixels, line_size, h);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    }\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            uint32_t a,b;\
    
            a= AV_RN32(&src1[i*src_stride1  ]);\
            b= AV_RN32(&src2[i*src_stride2  ]);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
    
            a= AV_RN32(&src1[i*src_stride1+4]);\
            b= AV_RN32(&src2[i*src_stride2+4]);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
    
        int i;\
        for(i=0; i<h; i++){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            uint32_t a,b;\
    
            a= AV_RN32(&src1[i*src_stride1  ]);\
            b= AV_RN32(&src2[i*src_stride2  ]);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
    
            a= AV_RN32(&src1[i*src_stride1+4]);\
            b= AV_RN32(&src2[i*src_stride2+4]);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
    
    static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
        int i;\
        for(i=0; i<h; i++){\
            uint32_t a,b;\
    
            a= AV_RN32(&src1[i*src_stride1  ]);\
            b= AV_RN32(&src2[i*src_stride2  ]);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
    
    static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
        int i;\
        for(i=0; i<h; i++){\
            uint32_t a,b;\
    
            a= AV_RN16(&src1[i*src_stride1  ]);\
            b= AV_RN16(&src2[i*src_stride2  ]);\
    
            OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
        }\
    }\
    \
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
        OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
        OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
    }\
    \
    static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
                                                    int src_stride1, int src_stride2, int h){\
        OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
        OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
    }\
    \
    
    static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
    }\
    \
    
    static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
    }\
    \
    
    static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
    }\
    \
    
    static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
    }\
    \
    static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
                     int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\