Skip to content
Snippets Groups Projects
h264.c 312 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
     * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
     *
    
     * This file is part of FFmpeg.
     *
     * FFmpeg is free software; you can redistribute it and/or
    
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
    
     * version 2.1 of the License, or (at your option) any later version.
    
     * FFmpeg is distributed in the hope that it will be useful,
    
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
     *
     * You should have received a copy of the GNU Lesser General Public
    
     * License along with FFmpeg; if not, write to the Free Software
    
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
    /**
     * @file h264.c
     * H.264 / AVC / MPEG4 part10 codec.
     * @author Michael Niedermayer <michaelni@gmx.at>
     */
    
    #include "dsputil.h"
    #include "avcodec.h"
    #include "mpegvideo.h"
    
    #include "h264.h"
    
    #include "h264data.h"
    
    #include "h264_parser.h"
    
    #include "golomb.h"
    
    
    #include <assert.h>
    
    static VLC coeff_token_vlc[4];
    static VLC chroma_dc_coeff_token_vlc;
    
    static VLC total_zeros_vlc[15];
    static VLC chroma_dc_total_zeros_vlc[3];
    
    static VLC run_vlc[6];
    static VLC run7_vlc;
    
    
    static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
    static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
    
    static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
    
    static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
    
    static av_always_inline uint32_t pack16to32(int a, int b){
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #ifdef WORDS_BIGENDIAN
       return (b&0xFFFF) + (a<<16);
    #else
       return (a&0xFFFF) + (b<<16);
    #endif
    }
    
    
    const uint8_t ff_rem6[52]={
    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
    };
    
    const uint8_t ff_div6[52]={
    0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
    };
    
    
    
    /**
     * fill a rectangle.
    
    Loic Le Loarer's avatar
    Loic Le Loarer committed
     * @param h height of the rectangle, should be a constant
     * @param w width of the rectangle, should be a constant
    
     * @param size the size of val (1 or 4), should be a constant
     */
    
    static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
    
        uint8_t *p= (uint8_t*)vp;
    
        assert(size==1 || size==4);
    
        assert(w<=4);
    
        w      *= size;
        stride *= size;
    
        assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        assert((stride&(w-1))==0);
    
        if(w==2){
            const uint16_t v= size==4 ? val : val*0x0101;
            *(uint16_t*)(p + 0*stride)= v;
            if(h==1) return;
            *(uint16_t*)(p + 1*stride)= v;
            if(h==2) return;
    
            *(uint16_t*)(p + 2*stride)=
    
            *(uint16_t*)(p + 3*stride)= v;
        }else if(w==4){
            const uint32_t v= size==4 ? val : val*0x01010101;
            *(uint32_t*)(p + 0*stride)= v;
            if(h==1) return;
            *(uint32_t*)(p + 1*stride)= v;
            if(h==2) return;
    
            *(uint32_t*)(p + 2*stride)=
    
            *(uint32_t*)(p + 3*stride)= v;
        }else if(w==8){
        //gcc can't optimize 64bit math on x86_32
    #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
            const uint64_t v= val*0x0100000001ULL;
            *(uint64_t*)(p + 0*stride)= v;
            if(h==1) return;
            *(uint64_t*)(p + 1*stride)= v;
            if(h==2) return;
    
            *(uint64_t*)(p + 2*stride)=
    
            *(uint64_t*)(p + 3*stride)= v;
        }else if(w==16){
            const uint64_t v= val*0x0100000001ULL;
    
            *(uint64_t*)(p + 0+0*stride)=
            *(uint64_t*)(p + 8+0*stride)=
            *(uint64_t*)(p + 0+1*stride)=
    
            *(uint64_t*)(p + 8+1*stride)= v;
            if(h==2) return;
    
            *(uint64_t*)(p + 0+2*stride)=
            *(uint64_t*)(p + 8+2*stride)=
            *(uint64_t*)(p + 0+3*stride)=
    
            *(uint64_t*)(p + 8+3*stride)= v;
    #else
            *(uint32_t*)(p + 0+0*stride)=
            *(uint32_t*)(p + 4+0*stride)= val;
            if(h==1) return;
            *(uint32_t*)(p + 0+1*stride)=
            *(uint32_t*)(p + 4+1*stride)= val;
            if(h==2) return;
            *(uint32_t*)(p + 0+2*stride)=
            *(uint32_t*)(p + 4+2*stride)=
            *(uint32_t*)(p + 0+3*stride)=
            *(uint32_t*)(p + 4+3*stride)= val;
        }else if(w==16){
            *(uint32_t*)(p + 0+0*stride)=
            *(uint32_t*)(p + 4+0*stride)=
            *(uint32_t*)(p + 8+0*stride)=
            *(uint32_t*)(p +12+0*stride)=
            *(uint32_t*)(p + 0+1*stride)=
            *(uint32_t*)(p + 4+1*stride)=
            *(uint32_t*)(p + 8+1*stride)=
            *(uint32_t*)(p +12+1*stride)= val;
            if(h==2) return;
            *(uint32_t*)(p + 0+2*stride)=
            *(uint32_t*)(p + 4+2*stride)=
            *(uint32_t*)(p + 8+2*stride)=
            *(uint32_t*)(p +12+2*stride)=
            *(uint32_t*)(p + 0+3*stride)=
            *(uint32_t*)(p + 4+3*stride)=
            *(uint32_t*)(p + 8+3*stride)=
            *(uint32_t*)(p +12+3*stride)= val;
    #endif
    
        }else
            assert(0);
    
        assert(h==4);
    
    Loren Merritt's avatar
    Loren Merritt committed
    static void fill_caches(H264Context *h, int mb_type, int for_deblock){
    
        MpegEncContext * const s = &h->s;
    
        const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    
        int topleft_xy, top_xy, topright_xy, left_xy[2];
        int topleft_type, top_type, topright_type, left_type[2];
    
        //FIXME deblocking could skip the intra and nnz parts.
        if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
    
        //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
    
    
        top_xy     = mb_xy  - s->mb_stride;
        topleft_xy = top_xy - 1;
        topright_xy= top_xy + 1;
        left_xy[1] = left_xy[0] = mb_xy-1;
        left_block[0]= 0;
        left_block[1]= 1;
        left_block[2]= 2;
        left_block[3]= 3;
        left_block[4]= 7;
        left_block[5]= 10;
        left_block[6]= 8;
        left_block[7]= 11;
    
        if(FRAME_MBAFF){
    
            const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
            const int top_pair_xy      = pair_xy     - s->mb_stride;
            const int topleft_pair_xy  = top_pair_xy - 1;
            const int topright_pair_xy = top_pair_xy + 1;
            const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
            const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
            const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
            const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
            const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
            const int bottom = (s->mb_y & 1);
    
            tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
    
            if (bottom
                    ? !curr_mb_frame_flag // bottom macroblock
                    : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
                    ) {
                top_xy -= s->mb_stride;
            }
            if (bottom
                    ? !curr_mb_frame_flag // bottom macroblock
                    : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
                    ) {
                topleft_xy -= s->mb_stride;
            }
            if (bottom
                    ? !curr_mb_frame_flag // bottom macroblock
                    : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
                    ) {
                topright_xy -= s->mb_stride;
            }
            if (left_mb_frame_flag != curr_mb_frame_flag) {
                left_xy[1] = left_xy[0] = pair_xy - 1;
                if (curr_mb_frame_flag) {
                    if (bottom) {
                        left_block[0]= 2;
                        left_block[1]= 2;
                        left_block[2]= 3;
                        left_block[3]= 3;
                        left_block[4]= 8;
                        left_block[5]= 11;
                        left_block[6]= 8;
                        left_block[7]= 11;
                    } else {
                        left_block[0]= 0;
                        left_block[1]= 0;
                        left_block[2]= 1;
                        left_block[3]= 1;
                        left_block[4]= 7;
                        left_block[5]= 10;
                        left_block[6]= 7;
                        left_block[7]= 10;
                    }
                } else {
                    left_xy[1] += s->mb_stride;
                    //left_block[0]= 0;
                    left_block[1]= 2;
                    left_block[2]= 0;
                    left_block[3]= 2;
                    //left_block[4]= 7;
                    left_block[5]= 10;
                    left_block[6]= 7;
                    left_block[7]= 10;
                }
            }
    
        h->top_mb_xy = top_xy;
        h->left_mb_xy[0] = left_xy[0];
        h->left_mb_xy[1] = left_xy[1];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        if(for_deblock){
    
            topleft_type = 0;
            topright_type = 0;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
            left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
            left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
    
    
            if(FRAME_MBAFF && !IS_INTRA(mb_type)){
                int list;
                int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
                for(i=0; i<16; i++)
                    h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
    
                for(list=0; list<h->list_count; list++){
    
                    if(USES_LIST(mb_type,list)){
                        uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
                        uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
    
                        int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
    
                        for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
                            dst[0] = src[0];
                            dst[1] = src[1];
                            dst[2] = src[2];
                            dst[3] = src[3];
                        }
                        *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
                        *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
                        ref += h->b8_stride;
                        *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
                        *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
                    }else{
                        fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
                        fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
                    }
                }
            }
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        }else{
            topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
            top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
            topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
            left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
            left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
        }
    
    
        if(IS_INTRA(mb_type)){
    
            h->topleft_samples_available=
            h->top_samples_available=
    
            h->left_samples_available= 0xFFFF;
            h->topright_samples_available= 0xEEEA;
    
            if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
                h->topleft_samples_available= 0xB3FF;
                h->top_samples_available= 0x33FF;
                h->topright_samples_available= 0x26EA;
            }
            for(i=0; i<2; i++){
                if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
                    h->topleft_samples_available&= 0xDF5F;
                    h->left_samples_available&= 0x5F5F;
                }
            }
    
            if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
                h->topleft_samples_available&= 0x7FFF;
    
            if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
                h->topright_samples_available&= 0xFBFF;
    
            if(IS_INTRA4x4(mb_type)){
                if(IS_INTRA4x4(top_type)){
                    h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
                    h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
                    h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
                    h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
                }else{
                    int pred;
    
                    if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
    
                        pred= -1;
    
                    }
                    h->intra4x4_pred_mode_cache[4+8*0]=
                    h->intra4x4_pred_mode_cache[5+8*0]=
                    h->intra4x4_pred_mode_cache[6+8*0]=
                    h->intra4x4_pred_mode_cache[7+8*0]= pred;
                }
                for(i=0; i<2; i++){
                    if(IS_INTRA4x4(left_type[i])){
                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                    }else{
                        int pred;
    
                        if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
    
                            pred= -1;
    
                        }
                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
                    }
                }
            }
        }
    
    0 . T T. T T T T
    1 L . .L . . . .
    2 L . .L . . . .
    3 . T TL . . . .
    4 L . .L . . . .
    5 L . .. . . . .
    
    */
    //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
        if(top_type){
    
            h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
            h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
            h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
    
            h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
    
            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
    
            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
    
            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
    
            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
    
        }else{
    
            h->non_zero_count_cache[4+8*0]=
    
            h->non_zero_count_cache[5+8*0]=
            h->non_zero_count_cache[6+8*0]=
            h->non_zero_count_cache[7+8*0]=
    
            h->non_zero_count_cache[1+8*0]=
            h->non_zero_count_cache[2+8*0]=
    
            h->non_zero_count_cache[1+8*3]=
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
    
        for (i=0; i<2; i++) {
            if(left_type[i]){
                h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
                h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
            }else{
    
                h->non_zero_count_cache[3+8*1 + 2*8*i]=
                h->non_zero_count_cache[3+8*2 + 2*8*i]=
                h->non_zero_count_cache[0+8*1 +   8*i]=
    
                h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
    
            }
        }
    
        if( h->pps.cabac ) {
            // top_cbp
            if(top_type) {
                h->top_cbp = h->cbp_table[top_xy];
            } else if(IS_INTRA(mb_type)) {
                h->top_cbp = 0x1C0;
            } else {
                h->top_cbp = 0;
            }
            // left_cbp
            if (left_type[0]) {
                h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
            } else if(IS_INTRA(mb_type)) {
                h->left_cbp = 0x1C0;
            } else {
                h->left_cbp = 0;
            }
            if (left_type[0]) {
                h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
            }
            if (left_type[1]) {
                h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
    
    #if 1
    
        if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
    
            int list;
    
            for(list=0; list<h->list_count; list++){
    
                if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
    
                    /*if(!h->mv_cache_clean[list]){
                        memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                        memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
                        h->mv_cache_clean[list]= 1;
                    }*/
    
                }
                h->mv_cache_clean[list]= 0;
    
                if(USES_LIST(top_type, list)){
    
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
                }else{
    
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
    
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                    *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
                }
    
    
    Loren Merritt's avatar
    Loren Merritt committed
                for(i=0; i<2; i++){
                    int cache_idx = scan8[0] - 1 + i*2*8;
                    if(USES_LIST(left_type[i], list)){
                        const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
                        const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
                        *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
                        *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
                        h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
                        h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
                    }else{
                        *(uint32_t*)h->mv_cache [list][cache_idx  ]=
                        *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
                        h->ref_cache[list][cache_idx  ]=
                        h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                    }
    
                if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
                    continue;
    
    
                if(USES_LIST(topleft_type, list)){
    
                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                    h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
                }else{
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
                    h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
                if(USES_LIST(topright_type, list)){
    
                    const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                    h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
                }else{
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                    h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
    
                if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
    
                    continue;
    
    
                h->ref_cache[list][scan8[5 ]+1] =
                h->ref_cache[list][scan8[7 ]+1] =
    
                h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
    
                h->ref_cache[list][scan8[4 ]] =
    
                h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
                *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
                *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
    
                *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
    
                *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
                *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
    
                    if(USES_LIST(top_type, list)){
    
                        const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
                    }else{
    
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
    
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
                    }
    
                    if(USES_LIST(left_type[0], list)){
    
                        const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
                    }else{
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
                    }
    
                    if(USES_LIST(left_type[1], list)){
    
                        const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
                    }else{
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
                    }
                    *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
                    *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
    
                    *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
    
                    *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
                    *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
    
    
                    if(h->slice_type == B_TYPE){
                        fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
    
                        if(IS_DIRECT(top_type)){
                            *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
                        }else if(IS_8X8(top_type)){
                            int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
                            h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
                            h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
                        }else{
                            *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
                        }
    
                        if(IS_DIRECT(left_type[0]))
                            h->direct_cache[scan8[0] - 1 + 0*8]= 1;
                        else if(IS_8X8(left_type[0]))
                            h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
                        else
                            h->direct_cache[scan8[0] - 1 + 0*8]= 0;
    
                        if(IS_DIRECT(left_type[1]))
    
                            h->direct_cache[scan8[0] - 1 + 2*8]= 1;
    
                        else if(IS_8X8(left_type[1]))
                            h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
                        else
    
                            h->direct_cache[scan8[0] - 1 + 2*8]= 0;
    
                    }
                }
    
                if(FRAME_MBAFF){
    #define MAP_MVS\
                        MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
                        MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
                        MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
                        MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
                        MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
                        MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
                    if(MB_FIELD){
    #define MAP_F2F(idx, mb_type)\
                        if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                            h->ref_cache[list][idx] <<= 1;\
                            h->mv_cache[list][idx][1] /= 2;\
                            h->mvd_cache[list][idx][1] /= 2;\
                        }
                        MAP_MVS
    #undef MAP_F2F
                    }else{
    #define MAP_F2F(idx, mb_type)\
                        if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                            h->ref_cache[list][idx] >>= 1;\
                            h->mv_cache[list][idx][1] <<= 1;\
                            h->mvd_cache[list][idx][1] <<= 1;\
    
                        MAP_MVS
    #undef MAP_F2F
    
    
        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
    
    }
    
    static inline void write_back_intra_pred_mode(H264Context *h){
        MpegEncContext * const s = &h->s;
    
        const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    
    
        h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
        h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
        h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
        h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
        h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
        h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
        h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
    }
    
    /**
     * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
     */
    static inline int check_intra4x4_pred_mode(H264Context *h){
        MpegEncContext * const s = &h->s;
        static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
        static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
        int i;
    
        if(!(h->top_samples_available&0x8000)){
            for(i=0; i<4; i++){
                int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
                if(status<0){
    
                    av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
    
                    return -1;
                } else if(status){
                    h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
                }
            }
        }
    
        if(!(h->left_samples_available&0x8000)){
            for(i=0; i<4; i++){
                int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
                if(status<0){
    
                    av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
    
                    return -1;
                } else if(status){
                    h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
                }
            }
        }
    
        return 0;
    } //FIXME cleanup like next
    
    /**
     * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
     */
    static inline int check_intra_pred_mode(H264Context *h, int mode){
        MpegEncContext * const s = &h->s;
        static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
        static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        if(mode > 6U) {
    
    Loic Le Loarer's avatar
    Loic Le Loarer committed
            av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            return -1;
    
        if(!(h->top_samples_available&0x8000)){
            mode= top[ mode ];
            if(mode<0){
    
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
    
                return -1;
            }
        }
    
        if(!(h->left_samples_available&0x8000)){
            mode= left[ mode ];
            if(mode<0){
    
                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
    
                return -1;
    
        }
    
        return mode;
    }
    
    /**
     * gets the predicted intra4x4 prediction mode.
     */
    static inline int pred_intra_mode(H264Context *h, int n){
        const int index8= scan8[n];
        const int left= h->intra4x4_pred_mode_cache[index8 - 1];
        const int top = h->intra4x4_pred_mode_cache[index8 - 8];
        const int min= FFMIN(left, top);
    
    
        tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
    
    
        if(min<0) return DC_PRED;
        else      return min;
    }
    
    static inline void write_back_non_zero_count(H264Context *h){
        MpegEncContext * const s = &h->s;
    
        const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    
        h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
        h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
        h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
    
        h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
    
        h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
        h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
        h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
    
        h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
    
        h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
    
        h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
    
        h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
    
        h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
    
        h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
    
    
        if(FRAME_MBAFF){
            // store all luma nnzs, for deblocking
            int v = 0, i;
            for(i=0; i<16; i++)
                v += (!!h->non_zero_count_cache[scan8[i]]) << i;
            *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
        }
    
    }
    
    /**
     * gets the predicted number of non zero coefficients.
     * @param n block index
     */
    static inline int pred_non_zero_count(H264Context *h, int n){
        const int index8= scan8[n];
        const int left= h->non_zero_count_cache[index8 - 1];
        const int top = h->non_zero_count_cache[index8 - 8];
        int i= left + top;
    
        if(i<64) i= (i+1)>>1;
    
    
        tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
    
    static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
        const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
    
        MpegEncContext *s = &h->s;
    
        /* there is no consistent mapping of mvs to neighboring locations that will
         * make mbaff happy, so we can't move all this logic to fill_caches */
        if(FRAME_MBAFF){
    
            const uint32_t *mb_types = s->current_picture_ptr->mb_type;
    
            const int16_t *mv;
            *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
            *C = h->mv_cache[list][scan8[0]-2];
    
            if(!MB_FIELD
               && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
                int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
                if(IS_INTERLACED(mb_types[topright_xy])){
    #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
                    const int x4 = X4, y4 = Y4;\
                    const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
                    if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
                        return LIST_NOT_USED;\
                    mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
                    h->mv_cache[list][scan8[0]-2][0] = mv[0];\
                    h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
                    return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
    
                    SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
                }
            }
            if(topright_ref == PART_NOT_AVAILABLE
               && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
               && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
                if(!MB_FIELD
                   && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
                    SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
                }
                if(MB_FIELD
                   && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
                   && i >= scan8[0]+8){
                    // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
                    SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
                }
            }
    #undef SET_DIAG_MV
        }
    
    
        if(topright_ref != PART_NOT_AVAILABLE){
            *C= h->mv_cache[list][ i - 8 + part_width ];
            return topright_ref;
        }else{
    
            tprintf(s->avctx, "topright MV not available\n");
    
            *C= h->mv_cache[list][ i - 8 - 1 ];
            return h->ref_cache[list][ i - 8 - 1 ];
        }
    }
    
    
    /**
     * gets the predicted MV.
     * @param n the block index
     * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
     * @param mx the x component of the predicted motion vector
     * @param my the y component of the predicted motion vector
     */
    static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
        const int index8= scan8[n];
        const int top_ref=      h->ref_cache[list][ index8 - 8 ];
        const int left_ref=     h->ref_cache[list][ index8 - 1 ];
        const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
        const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
    
        const int16_t * C;
        int diagonal_ref, match_count;
    
    
        assert(part_width==1 || part_width==2 || part_width==4);
    
    /* mv_cache
    
      B . . A T T T T
    
      U . . L . . , .
      U . . L . . . .
      U . . L . . , .
      . . . L . . . .
    */
    
    
        diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
        match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
    
        tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
    
        if(match_count > 1){ //most common
            *mx= mid_pred(A[0], B[0], C[0]);
            *my= mid_pred(A[1], B[1], C[1]);
        }else if(match_count==1){
            if(left_ref==ref){
                *mx= A[0];
    
            }else if(top_ref==ref){
                *mx= B[0];
    
            }else{
    
            if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
    
                *mx= A[0];
    
            }else{
    
                *mx= mid_pred(A[0], B[0], C[0]);
                *my= mid_pred(A[1], B[1], C[1]);
    
        tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
    
    }
    
    /**
     * gets the directionally predicted 16x8 MV.
     * @param n the block index
     * @param mx the x component of the predicted motion vector
     * @param my the y component of the predicted motion vector
     */
    static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
        if(n==0){
            const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
            const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
    
    
            tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
    
            if(top_ref == ref){
                *mx= B[0];
                *my= B[1];
                return;
            }
        }else{
            const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
            const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
    
            tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
    
    
            if(left_ref == ref){
                *mx= A[0];
                *my= A[1];
                return;
            }
        }
    
        //RARE
        pred_motion(h, n, 4, list, ref, mx, my);
    }
    
    /**
     * gets the directionally predicted 8x16 MV.
     * @param n the block index
     * @param mx the x component of the predicted motion vector
     * @param my the y component of the predicted motion vector
     */
    static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
        if(n==0){
            const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
            const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
    
            tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
    
    
            if(left_ref == ref){
                *mx= A[0];
                *my= A[1];
                return;
            }
        }else{
    
            const int16_t * C;
            int diagonal_ref;
    
            diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
    
            tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
    
            if(diagonal_ref == ref){
    
                *mx= C[0];
                *my= C[1];
                return;
            }
        }
    
        //RARE
        pred_motion(h, n, 2, list, ref, mx, my);
    }
    
    static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
        const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
        const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
    
    
        tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
    
    
        if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
           || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
           || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
    
            *mx = *my = 0;
            return;
        }
    
        pred_motion(h, 0, 4, 0, 0, mx, my);
    
        return;
    }
    
    
    static inline void direct_dist_scale_factor(H264Context * const h){
        const int poc = h->s.current_picture_ptr->poc;
        const int poc1 = h->ref_list[1][0].poc;
        int i;
        for(i=0; i<h->ref_count[0]; i++){
            int poc0 = h->ref_list[0][i].poc;
    
            int td = av_clip(poc1 - poc0, -128, 127);
    
            if(td == 0 /* FIXME || pic0 is a long-term ref */){
                h->dist_scale_factor[i] = 256;
            }else{
    
                int tb = av_clip(poc - poc0, -128, 127);
    
                int tx = (16384 + (FFABS(td) >> 1)) / td;
    
                h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
    
        if(FRAME_MBAFF){
            for(i=0; i<h->ref_count[0]; i++){
                h->dist_scale_factor_field[2*i] =
                h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
            }
        }
    
    static inline void direct_ref_list_init(H264Context * const h){
        MpegEncContext * const s = &h->s;
        Picture * const ref1 = &h->ref_list[1][0];
        Picture * const cur = s->current_picture_ptr;
        int list, i, j;
        if(cur->pict_type == I_TYPE)
            cur->ref_count[0] = 0;
        if(cur->pict_type != B_TYPE)
            cur->ref_count[1] = 0;