Skip to content
Snippets Groups Projects
dsputil.c 167 KiB
Newer Older
  • Learn to ignore specific revisions
  • static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t halfH[72];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    }\
    
    static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
    
        OPNAME ## pixels16_c(dst, src, stride, 16);\
    
    static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t half[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
        OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
    }\
    \
    
    static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
    
    static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t half[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
        OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
    }\
    \
    
    static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t half[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
    }\
    \
    
    static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
    
    static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t half[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
    }\
    
    void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfV[256];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
        put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
        OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
    }\
    
    void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfV[256];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
        put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
        OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
    }\
    
    void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfV[256];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
        put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
        OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
    }\
    
    void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfV[256];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
        put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
        OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t halfH[272];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t halfH[272];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
    }\
    
    void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfV[256];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
        put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
        OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
    }\
    
    void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
        uint8_t halfV[256];\
        uint8_t halfHV[256];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
        put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
    }\
    
    static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[24*17];\
        uint8_t halfH[272];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        copy_block17(full, src, 24, stride, 17);\
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
        put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
        OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
    }\
    
    static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t halfH[272];\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
    #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
    #define op_put(a, b) a = cm[((b) + 16)>>5]
    #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
    
    QPEL_MC(0, put_       , _       , op_put)
    QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
    QPEL_MC(0, avg_       , _       , op_avg)
    //QPEL_MC(1, avg_no_rnd , _       , op_avg)
    #undef op_avg
    #undef op_avg_no_rnd
    #undef op_put
    #undef op_put_no_rnd
    
    #if 1
    #define H264_LOWPASS(OPNAME, OP, OP2) \
    
    static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        for(i=0; i<h; i++)\
        {\
            OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
            OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
            dst+=dstStride;\
            src+=srcStride;\
        }\
    }\
    \
    
    static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        for(i=0; i<w; i++)\
        {\
            const int srcB= src[-2*srcStride];\
            const int srcA= src[-1*srcStride];\
            const int src0= src[0 *srcStride];\
            const int src1= src[1 *srcStride];\
            const int src2= src[2 *srcStride];\
            const int src3= src[3 *srcStride];\
            const int src4= src[4 *srcStride];\
            OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
            OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
            dst++;\
            src++;\
        }\
    }\
    \
    
    static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
    
        const int h=2;\
        const int w=2;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        src -= 2*srcStride;\
        for(i=0; i<h+5; i++)\
        {\
            tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
            tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
            tmp+=tmpStride;\
            src+=srcStride;\
        }\
        tmp -= tmpStride*(h+5-2);\
        for(i=0; i<w; i++)\
        {\
            const int tmpB= tmp[-2*tmpStride];\
            const int tmpA= tmp[-1*tmpStride];\
            const int tmp0= tmp[0 *tmpStride];\
            const int tmp1= tmp[1 *tmpStride];\
            const int tmp2= tmp[2 *tmpStride];\
            const int tmp3= tmp[3 *tmpStride];\
            const int tmp4= tmp[4 *tmpStride];\
            OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
            OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
            dst++;\
            tmp++;\
        }\
    }\
    
    static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
        const int h=4;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        for(i=0; i<h; i++)\
        {\
            OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
            OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
            OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
            OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
            dst+=dstStride;\
            src+=srcStride;\
        }\
    }\
    \
    static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
        const int w=4;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        for(i=0; i<w; i++)\
        {\
            const int srcB= src[-2*srcStride];\
            const int srcA= src[-1*srcStride];\
            const int src0= src[0 *srcStride];\
            const int src1= src[1 *srcStride];\
            const int src2= src[2 *srcStride];\
            const int src3= src[3 *srcStride];\
            const int src4= src[4 *srcStride];\
            const int src5= src[5 *srcStride];\
            const int src6= src[6 *srcStride];\
            OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
            OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
            OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
            OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
            dst++;\
            src++;\
        }\
    }\
    \
    static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
        const int h=4;\
        const int w=4;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        src -= 2*srcStride;\
        for(i=0; i<h+5; i++)\
        {\
            tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
            tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
            tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
            tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
            tmp+=tmpStride;\
            src+=srcStride;\
        }\
        tmp -= tmpStride*(h+5-2);\
        for(i=0; i<w; i++)\
        {\
            const int tmpB= tmp[-2*tmpStride];\
            const int tmpA= tmp[-1*tmpStride];\
            const int tmp0= tmp[0 *tmpStride];\
            const int tmp1= tmp[1 *tmpStride];\
            const int tmp2= tmp[2 *tmpStride];\
            const int tmp3= tmp[3 *tmpStride];\
            const int tmp4= tmp[4 *tmpStride];\
            const int tmp5= tmp[5 *tmpStride];\
            const int tmp6= tmp[6 *tmpStride];\
            OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
            OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
            OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
            OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
            dst++;\
            tmp++;\
        }\
    }\
    \
    static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
        const int h=8;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        for(i=0; i<h; i++)\
        {\
            OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
            OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
            OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
            OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
            OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
            OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
            OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
            OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
            dst+=dstStride;\
            src+=srcStride;\
        }\
    }\
    \
    static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
        const int w=8;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        for(i=0; i<w; i++)\
        {\
            const int srcB= src[-2*srcStride];\
            const int srcA= src[-1*srcStride];\
            const int src0= src[0 *srcStride];\
            const int src1= src[1 *srcStride];\
            const int src2= src[2 *srcStride];\
            const int src3= src[3 *srcStride];\
            const int src4= src[4 *srcStride];\
            const int src5= src[5 *srcStride];\
            const int src6= src[6 *srcStride];\
            const int src7= src[7 *srcStride];\
            const int src8= src[8 *srcStride];\
            const int src9= src[9 *srcStride];\
            const int src10=src[10*srcStride];\
            OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
            OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
            OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
            OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
            OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
            OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
            OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
            OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
            dst++;\
            src++;\
        }\
    }\
    \
    static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
        const int h=8;\
        const int w=8;\
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
    
        int i;\
        src -= 2*srcStride;\
        for(i=0; i<h+5; i++)\
        {\
            tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
            tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
            tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
            tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
            tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
            tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
            tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
            tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
            tmp+=tmpStride;\
            src+=srcStride;\
        }\
        tmp -= tmpStride*(h+5-2);\
        for(i=0; i<w; i++)\
        {\
            const int tmpB= tmp[-2*tmpStride];\
            const int tmpA= tmp[-1*tmpStride];\
            const int tmp0= tmp[0 *tmpStride];\
            const int tmp1= tmp[1 *tmpStride];\
            const int tmp2= tmp[2 *tmpStride];\
            const int tmp3= tmp[3 *tmpStride];\
            const int tmp4= tmp[4 *tmpStride];\
            const int tmp5= tmp[5 *tmpStride];\
            const int tmp6= tmp[6 *tmpStride];\
            const int tmp7= tmp[7 *tmpStride];\
            const int tmp8= tmp[8 *tmpStride];\
            const int tmp9= tmp[9 *tmpStride];\
            const int tmp10=tmp[10*tmpStride];\
            OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
            OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
            OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
            OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
            OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
            OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
            OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
            OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
            dst++;\
            tmp++;\
        }\
    }\
    \
    static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
        OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
        OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
        src += 8*srcStride;\
        dst += 8*dstStride;\
        OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
        OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
    }\
    \
    static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
        OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
        OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
        src += 8*srcStride;\
        dst += 8*dstStride;\
        OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
        OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
    }\
    \
    static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
        OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
        OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
        src += 8*srcStride;\
        dst += 8*dstStride;\
        OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
        OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
    }\
    
    #define H264_MC(OPNAME, SIZE) \
    static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
        OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t half[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
        OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
        OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t half[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
        OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        uint8_t half[SIZE*SIZE];\
        copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
        OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
        OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        uint8_t half[SIZE*SIZE];\
        copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
        OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        uint8_t halfH[SIZE*SIZE];\
        uint8_t halfV[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
        copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        uint8_t halfH[SIZE*SIZE];\
        uint8_t halfV[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
        copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        uint8_t halfH[SIZE*SIZE];\
        uint8_t halfV[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
        copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        uint8_t halfH[SIZE*SIZE];\
        uint8_t halfV[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
        copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
        int16_t tmp[SIZE*(SIZE+5)];\
        OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
        int16_t tmp[SIZE*(SIZE+5)];\
        uint8_t halfH[SIZE*SIZE];\
        uint8_t halfHV[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
        put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
        int16_t tmp[SIZE*(SIZE+5)];\
        uint8_t halfH[SIZE*SIZE];\
        uint8_t halfHV[SIZE*SIZE];\
        put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
        put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        int16_t tmp[SIZE*(SIZE+5)];\
        uint8_t halfV[SIZE*SIZE];\
        uint8_t halfHV[SIZE*SIZE];\
        copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
        put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
    }\
    \
    static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
        uint8_t full[SIZE*(SIZE+5)];\
        uint8_t * const full_mid= full + SIZE*2;\
        int16_t tmp[SIZE*(SIZE+5)];\
        uint8_t halfV[SIZE*SIZE];\
        uint8_t halfHV[SIZE*SIZE];\
        copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
        put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
        put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
        OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
    }\
    
    #define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
    //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
    #define op_put(a, b)  a = cm[((b) + 16)>>5]
    #define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
    #define op2_put(a, b)  a = cm[((b) + 512)>>10]
    
    H264_LOWPASS(put_       , op_put, op2_put)
    H264_LOWPASS(avg_       , op_avg, op2_avg)
    
    H264_MC(put_, 4)
    H264_MC(put_, 8)
    H264_MC(put_, 16)
    H264_MC(avg_, 4)
    H264_MC(avg_, 8)
    H264_MC(avg_, 16)
    
    #undef op_avg
    #undef op_put
    #undef op2_avg
    #undef op2_put
    #endif
    
    
    #define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
    #define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
    
    #define H264_WEIGHT(W,H) \
    static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
    
    Loren Merritt's avatar
    Loren Merritt committed
        int y; \
    
        offset <<= log2_denom; \
        if(log2_denom) offset += 1<<(log2_denom-1); \
        for(y=0; y<H; y++, block += stride){ \
            op_scale1(0); \
            op_scale1(1); \
            if(W==2) continue; \
            op_scale1(2); \
            op_scale1(3); \
            if(W==4) continue; \
            op_scale1(4); \
            op_scale1(5); \
            op_scale1(6); \
            op_scale1(7); \
            if(W==8) continue; \
            op_scale1(8); \
            op_scale1(9); \
            op_scale1(10); \
            op_scale1(11); \
            op_scale1(12); \
            op_scale1(13); \
            op_scale1(14); \
            op_scale1(15); \
        } \
    } \
    
    Loren Merritt's avatar
    Loren Merritt committed
    static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
        int y; \
        offset = ((offset + 1) | 1) << log2_denom; \
    
        for(y=0; y<H; y++, dst += stride, src += stride){ \
            op_scale2(0); \
            op_scale2(1); \
            if(W==2) continue; \
            op_scale2(2); \
            op_scale2(3); \
            if(W==4) continue; \
            op_scale2(4); \
            op_scale2(5); \
            op_scale2(6); \
            op_scale2(7); \
            if(W==8) continue; \
            op_scale2(8); \
            op_scale2(9); \
            op_scale2(10); \
            op_scale2(11); \
            op_scale2(12); \
            op_scale2(13); \
            op_scale2(14); \
            op_scale2(15); \
        } \
    }
    
    H264_WEIGHT(16,16)
    H264_WEIGHT(16,8)
    H264_WEIGHT(8,16)
    H264_WEIGHT(8,8)
    H264_WEIGHT(8,4)
    H264_WEIGHT(4,8)
    H264_WEIGHT(4,4)
    H264_WEIGHT(4,2)
    H264_WEIGHT(2,4)
    H264_WEIGHT(2,2)
    
    #undef op_scale1
    #undef op_scale2
    #undef H264_WEIGHT
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int i;
    
        for(i=0; i<h; i++){
            dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
            dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
            dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
            dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
            dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
            dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
            dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
            dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
            dst+=dstStride;
    
            src+=srcStride;
    
    #if CONFIG_CAVS_DECODER
    
    /* AVS specific */
    void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
    
    void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
        put_pixels8_c(dst, src, stride, 8);
    }
    void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
        avg_pixels8_c(dst, src, stride, 8);
    }
    void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
        put_pixels16_c(dst, src, stride, 16);
    }
    void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
        avg_pixels16_c(dst, src, stride, 16);
    }
    
    #endif /* CONFIG_CAVS_DECODER */
    
    #if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER
    
    /* VC-1 specific */
    void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
    
    void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
    
        put_pixels8_c(dst, src, stride, 8);
    }
    
    #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
    
    
    void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
    
    
    Panagiotis Issaris's avatar
    Panagiotis Issaris committed
    /* H264 specific */
    
    void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
    
    #if CONFIG_RV30_DECODER
    
    Kostya Shishkov's avatar
    Kostya Shishkov committed
    void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
    #endif /* CONFIG_RV30_DECODER */
    
    
    #if CONFIG_RV40_DECODER
    
    Kostya Shishkov's avatar
    Kostya Shishkov committed
    static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
        put_pixels16_xy2_c(dst, src, stride, 16);
    }
    static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
        avg_pixels16_xy2_c(dst, src, stride, 16);
    }
    static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
        put_pixels8_xy2_c(dst, src, stride, 8);
    }
    static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
        avg_pixels8_xy2_c(dst, src, stride, 8);
    }
    
    void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
    #endif /* CONFIG_RV40_DECODER */
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
    
        uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int i;
    
        for(i=0; i<w; i++){
            const int src_1= src[ -srcStride];
            const int src0 = src[0          ];
            const int src1 = src[  srcStride];
            const int src2 = src[2*srcStride];
            const int src3 = src[3*srcStride];
            const int src4 = src[4*srcStride];
            const int src5 = src[5*srcStride];
            const int src6 = src[6*srcStride];
            const int src7 = src[7*srcStride];
            const int src8 = src[8*srcStride];
            const int src9 = src[9*srcStride];
            dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
            dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
            dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
            dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
            dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
            dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
            dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
            dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
            src++;
            dst++;
        }
    }
    
    static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
        put_pixels8_c(dst, src, stride, 8);
    }
    
    static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
        uint8_t half[64];
        wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
        put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
    }
    
    static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
        wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
    }
    
    static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
        uint8_t half[64];
        wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
        put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
    }
    
    static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
        wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
    }
    
    static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
        uint8_t halfH[88];
        uint8_t halfV[64];
        uint8_t halfHV[64];
        wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
        wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
        wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
        put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
    }
    static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
        uint8_t halfH[88];
        uint8_t halfV[64];
        uint8_t halfHV[64];
        wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
        wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
        wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
        put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
    }
    static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
        uint8_t halfH[88];
        wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
        wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
    }
    
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int x;
        const int strength= ff_h263_loop_filter_strength[qscale];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        for(x=0; x<8; x++){
            int d1, d2, ad1;
            int p0= src[x-2*stride];
            int p1= src[x-1*stride];
            int p2= src[x+0*stride];
            int p3= src[x+1*stride];
            int d = (p0 - p3 + 4*(p2 - p1)) / 8;
    
            if     (d<-2*strength) d1= 0;
            else if(d<-  strength) d1=-2*strength - d;
            else if(d<   strength) d1= d;
            else if(d< 2*strength) d1= 2*strength - d;
            else                   d1= 0;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            p1 += d1;
            p2 -= d1;
            if(p1&256) p1= ~(p1>>31);
            if(p2&256) p2= ~(p2>>31);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            src[x-1*stride] = p1;
            src[x+0*stride] = p2;
    
    
            ad1= FFABS(d1)>>1;
    
            d2= av_clip((p0-p3)/4, -ad1, ad1);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            src[x-2*stride] = p0 - d2;
            src[x+  stride] = p3 + d2;
        }
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    }
    
    static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int y;
        const int strength= ff_h263_loop_filter_strength[qscale];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        for(y=0; y<8; y++){
            int d1, d2, ad1;
            int p0= src[y*stride-2];
            int p1= src[y*stride-1];
            int p2= src[y*stride+0];
            int p3= src[y*stride+1];
            int d = (p0 - p3 + 4*(p2 - p1)) / 8;
    
            if     (d<-2*strength) d1= 0;
            else if(d<-  strength) d1=-2*strength - d;
            else if(d<   strength) d1= d;
            else if(d< 2*strength) d1= 2*strength - d;
            else                   d1= 0;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            p1 += d1;
            p2 -= d1;
            if(p1&256) p1= ~(p1>>31);
            if(p2&256) p2= ~(p2>>31);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            src[y*stride-1] = p1;
            src[y*stride+0] = p2;
    
    
            ad1= FFABS(d1)>>1;
    
            d2= av_clip((p0-p3)/4, -ad1, ad1);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            src[y*stride-2] = p0 - d2;
            src[y*stride+1] = p3 + d2;
        }
    
    static void h261_loop_filter_c(uint8_t *src, int stride){
        int x,y,xy,yz;
        int temp[64];
    
        for(x=0; x<8; x++){
            temp[x      ] = 4*src[x           ];
            temp[x + 7*8] = 4*src[x + 7*stride];
        }
        for(y=1; y<7; y++){
            for(x=0; x<8; x++){
                xy = y * stride + x;
                yz = y * 8 + x;
                temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
    
        for(y=0; y<8; y++){
            src[  y*stride] = (temp[  y*8] + 2)>>2;
            src[7+y*stride] = (temp[7+y*8] + 2)>>2;
            for(x=1; x<7; x++){
                xy = y * stride + x;
                yz = y * 8 + x;
                src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
    
    static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
    
    {
        int i, d;
        for( i = 0; i < 4; i++ ) {
            if( tc0[i] < 0 ) {
                pix += 4*ystride;
                continue;
            }
            for( d = 0; d < 4; d++ ) {
                const int p0 = pix[-1*xstride];
                const int p1 = pix[-2*xstride];
                const int p2 = pix[-3*xstride];
                const int q0 = pix[0];
                const int q1 = pix[1*xstride];
                const int q2 = pix[2*xstride];
    
                if( FFABS( p0 - q0 ) < alpha &&
                    FFABS( p1 - p0 ) < beta &&
                    FFABS( q1 - q0 ) < beta ) {
    
                    int tc = tc0[i];
                    int i_delta;
    
                    if( FFABS( p2 - p0 ) < beta ) {
    
                        pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
    
                    if( FFABS( q2 - q0 ) < beta ) {
    
                        pix[   xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
    
                    i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
                    pix[-xstride] = av_clip_uint8( p0 + i_delta );    /* p0' */
                    pix[0]        = av_clip_uint8( q0 - i_delta );    /* q0' */
    
    static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
    
    {
        h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
    }
    
    static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
    
    {
        h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
    }
    
    
    static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
    {
        int d;
        for( d = 0; d < 16; d++ ) {
            const int p2 = pix[-3*xstride];
            const int p1 = pix[-2*xstride];