Skip to content
Snippets Groups Projects
h264.c 213 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
     * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
     *
    
     * This file is part of FFmpeg.
     *
     * FFmpeg is free software; you can redistribute it and/or
    
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
    
     * version 2.1 of the License, or (at your option) any later version.
    
     * FFmpeg is distributed in the hope that it will be useful,
    
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
     *
     * You should have received a copy of the GNU Lesser General Public
    
     * License along with FFmpeg; if not, write to the Free Software
    
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
     * @file libavcodec/h264.c
    
     * H.264 / AVC / MPEG4 part10 codec.
     * @author Michael Niedermayer <michaelni@gmx.at>
     */
    
    
    #include "dsputil.h"
    #include "avcodec.h"
    #include "mpegvideo.h"
    
    #include "h264.h"
    
    #include "h264data.h"
    
    #include "h264_mvpred.h"
    
    #include "h264_parser.h"
    
    #include "golomb.h"
    
    #include "rectangle.h"
    
    #include "x86/h264_i386.h"
    
    #include <assert.h>
    
    static VLC coeff_token_vlc[4];
    
    Art Clarke's avatar
    Art Clarke committed
    static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
    static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
    
    
    static VLC chroma_dc_coeff_token_vlc;
    
    Art Clarke's avatar
    Art Clarke committed
    static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
    static const int chroma_dc_coeff_token_vlc_table_size = 256;
    
    
    static VLC total_zeros_vlc[15];
    
    Art Clarke's avatar
    Art Clarke committed
    static VLC_TYPE total_zeros_vlc_tables[15][512][2];
    static const int total_zeros_vlc_tables_size = 512;
    
    
    static VLC chroma_dc_total_zeros_vlc[3];
    
    Art Clarke's avatar
    Art Clarke committed
    static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
    static const int chroma_dc_total_zeros_vlc_tables_size = 8;
    
    
    static VLC run_vlc[6];
    
    Art Clarke's avatar
    Art Clarke committed
    static VLC_TYPE run_vlc_tables[6][8][2];
    static const int run_vlc_tables_size = 8;
    
    
    static VLC run7_vlc;
    
    Art Clarke's avatar
    Art Clarke committed
    static VLC_TYPE run7_vlc_table[96][2];
    static const int run7_vlc_table_size = 96;
    
    static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
    static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
    
    
    static const uint8_t rem6[52]={
    
    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
    };
    
    
    static const uint8_t div6[52]={
    
    0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
    };
    
    
    static const uint8_t left_block_options[4][8]={
    
        {0,1,2,3,7,10,8,11},
        {2,2,3,3,8,11,8,11},
        {0,0,1,1,7,10,7,10},
        {0,2,0,2,7,10,7,10}
    };
    
    #define LEVEL_TAB_BITS 8
    static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
    
    
    Loren Merritt's avatar
    Loren Merritt committed
    static void fill_caches(H264Context *h, int mb_type, int for_deblock){
    
        MpegEncContext * const s = &h->s;
    
        int topleft_xy, top_xy, topright_xy, left_xy[2];
        int topleft_type, top_type, topright_type, left_type[2];
    
        int topleft_partition= -1;
    
        top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
    
    
        //FIXME deblocking could skip the intra and nnz parts.
    
        if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
    
    Diego Biurrun's avatar
    Diego Biurrun committed
        /* Wow, what a mess, why didn't they simplify the interlacing & intra
         * stuff, I can't imagine that these complex rules are worth it. */
    
        topleft_xy = top_xy - 1;
        topright_xy= top_xy + 1;
        left_xy[1] = left_xy[0] = mb_xy-1;
    
        left_block = left_block_options[0];
    
        if(FRAME_MBAFF){
    
            const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
            const int top_pair_xy      = pair_xy     - s->mb_stride;
            const int topleft_pair_xy  = top_pair_xy - 1;
            const int topright_pair_xy = top_pair_xy + 1;
    
            const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
            const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
            const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
            const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
            const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
    
            const int bottom = (s->mb_y & 1);
    
            tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
    
            if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
    
            if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
    
            } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
    
                topleft_xy += s->mb_stride;
    
                // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
    
                topleft_partition = 0;
    
            if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
    
            if (left_mb_field_flag != curr_mb_field_flag) {
    
                left_xy[1] = left_xy[0] = pair_xy - 1;
    
                if (curr_mb_field_flag) {
                    left_xy[1] += s->mb_stride;
                    left_block = left_block_options[3];
                } else {
    
                    left_block= left_block_options[2 - bottom];
    
        h->top_mb_xy = top_xy;
        h->left_mb_xy[0] = left_xy[0];
        h->left_mb_xy[1] = left_xy[1];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        if(for_deblock){
    
            topleft_type = 0;
            topright_type = 0;
    
            top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
            left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
            left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
    
                int list;
    
                for(list=0; list<h->list_count; list++){
    
                    //These values where changed for ease of performing MC, we need to change them back
                    //FIXME maybe we can make MC and loop filter use the same values or prevent
                    //the MC code from changing ref_cache and rather use a temporary array.
    
                    if(USES_LIST(mb_type,list)){
    
                        int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
    
                        *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
    
                        *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
    
                        ref += h->b8_stride;
                        *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
    
                        *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        }else{
            topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
            top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
            topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
            left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
            left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
    
    
        if(IS_INTRA(mb_type)){
    
            int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
    
            h->topleft_samples_available=
            h->top_samples_available=
    
            h->left_samples_available= 0xFFFF;
            h->topright_samples_available= 0xEEEA;
    
    
            if(!(top_type & type_mask)){
    
                h->topleft_samples_available= 0xB3FF;
                h->top_samples_available= 0x33FF;
                h->topright_samples_available= 0x26EA;
            }
    
            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
                if(IS_INTERLACED(mb_type)){
    
                    if(!(left_type[0] & type_mask)){
    
                        h->topleft_samples_available&= 0xDFFF;
                        h->left_samples_available&= 0x5FFF;
                    }
    
                    if(!(left_type[1] & type_mask)){
    
                        h->topleft_samples_available&= 0xFF5F;
                        h->left_samples_available&= 0xFF5F;
                    }
                }else{
                    int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
                                    ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
                    assert(left_xy[0] == left_xy[1]);
    
                    if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
    
                        h->topleft_samples_available&= 0xDF5F;
                        h->left_samples_available&= 0x5F5F;
                    }
                }
            }else{
    
                if(!(left_type[0] & type_mask)){
    
                    h->topleft_samples_available&= 0xDF5F;
                    h->left_samples_available&= 0x5F5F;
                }
            }
    
            if(!(topleft_type & type_mask))
    
                h->topleft_samples_available&= 0x7FFF;
    
            if(!(topright_type & type_mask))
    
                h->topright_samples_available&= 0xFBFF;
    
            if(IS_INTRA4x4(mb_type)){
                if(IS_INTRA4x4(top_type)){
                    h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
                    h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
                    h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
                    h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
                }else{
                    int pred;
    
                    if(!(top_type & type_mask))
    
                        pred= -1;
    
                    }
                    h->intra4x4_pred_mode_cache[4+8*0]=
                    h->intra4x4_pred_mode_cache[5+8*0]=
                    h->intra4x4_pred_mode_cache[6+8*0]=
                    h->intra4x4_pred_mode_cache[7+8*0]= pred;
                }
                for(i=0; i<2; i++){
                    if(IS_INTRA4x4(left_type[i])){
                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                    }else{
                        int pred;
    
                        if(!(left_type[i] & type_mask))
    
                            pred= -1;
    
                        }
                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
                    }
                }
            }
        }
    
    0 . T T. T T T T
    1 L . .L . . . .
    2 L . .L . . . .
    3 . T TL . . . .
    4 L . .L . . . .
    5 L . .. . . . .
    
    //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
    
        if(top_type){
    
            h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
            h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
            h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
    
            h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
    
            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
    
            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
    
            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
    
            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
    
        }else{
    
            h->non_zero_count_cache[4+8*0]=
    
            h->non_zero_count_cache[5+8*0]=
            h->non_zero_count_cache[6+8*0]=
            h->non_zero_count_cache[7+8*0]=
    
            h->non_zero_count_cache[1+8*0]=
            h->non_zero_count_cache[2+8*0]=
    
            h->non_zero_count_cache[1+8*3]=
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
    
        for (i=0; i<2; i++) {
            if(left_type[i]){
                h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
                h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
            }else{
    
                h->non_zero_count_cache[3+8*1 + 2*8*i]=
                h->non_zero_count_cache[3+8*2 + 2*8*i]=
                h->non_zero_count_cache[0+8*1 +   8*i]=
    
                h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
    
            }
        }
    
        if( h->pps.cabac ) {
            // top_cbp
            if(top_type) {
                h->top_cbp = h->cbp_table[top_xy];
            } else if(IS_INTRA(mb_type)) {
                h->top_cbp = 0x1C0;
            } else {
                h->top_cbp = 0;
            }
            // left_cbp
            if (left_type[0]) {
                h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
            } else if(IS_INTRA(mb_type)) {
                h->left_cbp = 0x1C0;
            } else {
                h->left_cbp = 0;
            }
            if (left_type[0]) {
                h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
            }
            if (left_type[1]) {
                h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
    
    #if 1
    
        if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
    
            int list;
    
            for(list=0; list<h->list_count; list++){
    
                if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
    
                    /*if(!h->mv_cache_clean[list]){
                        memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                        memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
                        h->mv_cache_clean[list]= 1;
                    }*/
    
                }
                h->mv_cache_clean[list]= 0;
    
                if(USES_LIST(top_type, list)){
    
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
                }else{
    
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
    
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                    *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
                }
    
    
    Loren Merritt's avatar
    Loren Merritt committed
                for(i=0; i<2; i++){
                    int cache_idx = scan8[0] - 1 + i*2*8;
                    if(USES_LIST(left_type[i], list)){
                        const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
                        const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
                        *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
                        *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
                        h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
                        h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
                    }else{
                        *(uint32_t*)h->mv_cache [list][cache_idx  ]=
                        *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
                        h->ref_cache[list][cache_idx  ]=
                        h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                    }
    
                if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
                    continue;
    
    
                if(USES_LIST(topleft_type, list)){
    
                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
                    const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
    
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                    h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
                }else{
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
                    h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
                if(USES_LIST(topright_type, list)){
    
                    const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                    h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
                }else{
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                    h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
    
                if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
    
                    continue;
    
    
                h->ref_cache[list][scan8[5 ]+1] =
                h->ref_cache[list][scan8[7 ]+1] =
    
                h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
    
                h->ref_cache[list][scan8[4 ]] =
    
                h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
                *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
                *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
    
                *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
    
                *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
                *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
    
                    if(USES_LIST(top_type, list)){
    
                        const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
                    }else{
    
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
    
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
                    }
    
                    if(USES_LIST(left_type[0], list)){
    
                        const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
                    }else{
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
                    }
    
                    if(USES_LIST(left_type[1], list)){
    
                        const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
                    }else{
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
                    }
                    *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
                    *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
    
                    *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
    
                    *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
                    *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
    
                        fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
    
                        if(IS_DIRECT(top_type)){
                            *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
                        }else if(IS_8X8(top_type)){
                            int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
                            h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
                            h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
                        }else{
                            *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
                        }
    
                        if(IS_DIRECT(left_type[0]))
                            h->direct_cache[scan8[0] - 1 + 0*8]= 1;
                        else if(IS_8X8(left_type[0]))
                            h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
                        else
                            h->direct_cache[scan8[0] - 1 + 0*8]= 0;
    
                        if(IS_DIRECT(left_type[1]))
    
                            h->direct_cache[scan8[0] - 1 + 2*8]= 1;
    
                        else if(IS_8X8(left_type[1]))
                            h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
                        else
    
                            h->direct_cache[scan8[0] - 1 + 2*8]= 0;
    
                    }
                }
    
                if(FRAME_MBAFF){
    #define MAP_MVS\
                        MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
                        MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
                        MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
                        MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
                        MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
                        MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
                    if(MB_FIELD){
    #define MAP_F2F(idx, mb_type)\
                        if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                            h->ref_cache[list][idx] <<= 1;\
                            h->mv_cache[list][idx][1] /= 2;\
                            h->mvd_cache[list][idx][1] /= 2;\
                        }
                        MAP_MVS
    #undef MAP_F2F
                    }else{
    #define MAP_F2F(idx, mb_type)\
                        if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                            h->ref_cache[list][idx] >>= 1;\
                            h->mv_cache[list][idx][1] <<= 1;\
                            h->mvd_cache[list][idx][1] <<= 1;\
    
                        MAP_MVS
    #undef MAP_F2F
    
    
        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
    
    void ff_h264_write_back_intra_pred_mode(H264Context *h){
    
    
        h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
        h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
        h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
        h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
        h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
        h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
        h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
    }
    
    
    /**
     * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
     */
    int ff_h264_check_intra4x4_pred_mode(H264Context *h){
        MpegEncContext * const s = &h->s;
        static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
        static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
        int i;
    
        if(!(h->top_samples_available&0x8000)){
            for(i=0; i<4; i++){
                int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
                if(status<0){
                    av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
                    return -1;
                } else if(status){
                    h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
                }
            }
        }
    
        if((h->left_samples_available&0x8888)!=0x8888){
            static const int mask[4]={0x8000,0x2000,0x80,0x20};
            for(i=0; i<4; i++){
                if(!(h->left_samples_available&mask[i])){
                    int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
                    if(status<0){
                        av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
                        return -1;
                    } else if(status){
                        h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
                    }
                }
            }
        }
    
        return 0;
    } //FIXME cleanup like ff_h264_check_intra_pred_mode
    
    
    /**
     * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
     */
    
    int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
    
        MpegEncContext * const s = &h->s;
        static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
        static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        if(mode > 6U) {
    
    Loic Le Loarer's avatar
    Loic Le Loarer committed
            av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            return -1;
    
        if(!(h->top_samples_available&0x8000)){
            mode= top[ mode ];
            if(mode<0){
    
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
    
                return -1;
            }
        }
    
        if((h->left_samples_available&0x8080) != 0x8080){
    
            mode= left[ mode ];
    
            if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
                mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
            }
    
            if(mode<0){
    
                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
    
                return -1;
    
        }
    
        return mode;
    }
    
    /**
     * gets the predicted intra4x4 prediction mode.
     */
    static inline int pred_intra_mode(H264Context *h, int n){
        const int index8= scan8[n];
        const int left= h->intra4x4_pred_mode_cache[index8 - 1];
        const int top = h->intra4x4_pred_mode_cache[index8 - 8];
        const int min= FFMIN(left, top);
    
    
        tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
    
    
        if(min<0) return DC_PRED;
        else      return min;
    }
    
    static inline void write_back_non_zero_count(H264Context *h){
    
        h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
        h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
        h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
    
        h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
    
        h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
        h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
        h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
    
        h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
    
        h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
    
        h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
    
        h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
    
        h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
    
        h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
    
     * gets the predicted number of non-zero coefficients.
    
     * @param n block index
     */
    static inline int pred_non_zero_count(H264Context *h, int n){
        const int index8= scan8[n];
        const int left= h->non_zero_count_cache[index8 - 1];
        const int top = h->non_zero_count_cache[index8 - 8];
        int i= left + top;
    
        if(i<64) i= (i+1)>>1;
    
    
        tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
    
    
        return i&31;
    }
    
    static inline void write_back_motion(H264Context *h, int mb_type){
        MpegEncContext * const s = &h->s;
        const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
        const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
        int list;
    
    
        if(!USES_LIST(mb_type, 0))
            fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
    
    
        for(list=0; list<h->list_count; list++){
    
            int y;
    
            if(!USES_LIST(mb_type, list))
    
            for(y=0; y<4; y++){
                *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
                *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
            }
    
                if(IS_SKIP(mb_type))
                    fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
                else
    
                for(y=0; y<4; y++){
                    *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
                    *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
                }
            }
    
                int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
    
                ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
                ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
                ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
                ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
    
        if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
    
            if(IS_8X8(mb_type)){
    
                uint8_t *direct_table = &h->direct_table[b8_xy];
                direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
                direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
                direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
    
    const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
    
        int i, si, di;
        uint8_t *dst;
    
    //    src[0]&0x80;                //forbidden bit
    
        h->nal_ref_idc= src[0]>>5;
        h->nal_unit_type= src[0]&0x1F;
    
        src++; length--;
    
        for(i=0; i<length; i++)
            printf("%2X ", src[i]);
    #endif
    
    #if HAVE_FAST_UNALIGNED
    # if HAVE_FAST_64BIT
    
    #   define RS 7
        for(i=0; i+1<length; i+=9){
    
    Ivan Schreter's avatar
    Ivan Schreter committed
            if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
    
    # else
    #   define RS 3
        for(i=0; i+1<length; i+=5){
    
    Ivan Schreter's avatar
    Ivan Schreter committed
            if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
    
    # endif
                continue;
            if(i>0 && !src[i]) i--;
            while(src[i]) i++;
    #else
    #   define RS 0
    
        for(i=0; i+1<length; i+=2){
            if(src[i]) continue;
            if(i>0 && src[i-1]==0) i--;
    
            if(i+2<length && src[i+1]==0 && src[i+2]<=3){
                if(src[i+2]!=3){
                    /* startcode, so we must be past the end */
                    length=i;
                }
                break;
            }
    
        }
    
        if(i>=length-1){ //no escaped 0
            *dst_length= length;
            *consumed= length+1; //+1 for the header
    
        bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
    
        av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
    
        dst= h->rbsp_buffer[bufidx];
    
        memcpy(dst, src, i);
        si=di=i;
        while(si+2<length){
    
            //remove escapes (very rare 1:2^22)
    
            if(src[si+2]>3){
                dst[di++]= src[si++];
                dst[di++]= src[si++];
            }else if(src[si]==0 && src[si+1]==0){
    
                if(src[si+2]==3){ //escape
                    dst[di++]= 0;
                    dst[di++]= 0;
                    si+=3;
    
                }else //next start code
    
                    goto nsc;
    
            }
    
            dst[di++]= src[si++];
        }
    
        while(si<length)
            dst[di++]= src[si++];
    nsc:
    
        memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
    
    
        *dst_length= di;
        *consumed= si + 1;//+1 for the header
    
    Diego Biurrun's avatar
    Diego Biurrun committed
    //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
    
    int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
    
        int v= *src;
        int r;
    
    
        tprintf(h->s.avctx, "rbsp trailing %X\n", v);
    
    
        for(r=1; r<9; r++){
            if(v&1) return r;
            v>>=1;
        }
        return 0;
    }
    
    /**
    
     * IDCT transforms the 16 dc values and dequantizes them.
    
     * @param qp quantization parameter
     */
    
    static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
    
    #define stride 16
        int i;
        int temp[16]; //FIXME check if this is a good idea
        static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
        static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
    
    //memset(block, 64, 2*256);
    //return;
        for(i=0; i<4; i++){
            const int offset= y_offset[i];
            const int z0= block[offset+stride*0] + block[offset+stride*4];
            const int z1= block[offset+stride*0] - block[offset+stride*4];
            const int z2= block[offset+stride*1] - block[offset+stride*5];
            const int z3= block[offset+stride*1] + block[offset+stride*5];
    
            temp[4*i+0]= z0+z3;
            temp[4*i+1]= z1+z2;
            temp[4*i+2]= z1-z2;
            temp[4*i+3]= z0-z3;
        }
    
        for(i=0; i<4; i++){
            const int offset= x_offset[i];
            const int z0= temp[4*0+i] + temp[4*2+i];
            const int z1= temp[4*0+i] - temp[4*2+i];
            const int z2= temp[4*1+i] - temp[4*3+i];
            const int z3= temp[4*1+i] + temp[4*3+i];
    
    
            block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
    
            block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
            block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
            block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
    
     * DCT transforms the 16 dc values.
    
     * @param qp quantization parameter ??? FIXME
     */
    static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
    //    const int qmul= dequant_coeff[qp][0];
        int i;
        int temp[16]; //FIXME check if this is a good idea
        static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
        static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
    
        for(i=0; i<4; i++){
            const int offset= y_offset[i];
            const int z0= block[offset+stride*0] + block[offset+stride*4];
            const int z1= block[offset+stride*0] - block[offset+stride*4];
            const int z2= block[offset+stride*1] - block[offset+stride*5];
            const int z3= block[offset+stride*1] + block[offset+stride*5];
    
            temp[4*i+0]= z0+z3;
            temp[4*i+1]= z1+z2;
            temp[4*i+2]= z1-z2;
            temp[4*i+3]= z0-z3;
        }
    
        for(i=0; i<4; i++){
            const int offset= x_offset[i];
            const int z0= temp[4*0+i] + temp[4*2+i];
            const int z1= temp[4*0+i] - temp[4*2+i];
            const int z2= temp[4*1+i] - temp[4*3+i];
            const int z3= temp[4*1+i] + temp[4*3+i];
    
            block[stride*0 +offset]= (z0 + z3)>>1;
            block[stride*2 +offset]= (z1 + z2)>>1;
            block[stride*8 +offset]= (z1 - z2)>>1;
            block[stride*10+offset]= (z0 - z3)>>1;
        }
    }
    
    #undef xStride
    #undef stride
    
    
    static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
    
        const int stride= 16*2;
        const int xStride= 16;
        int a,b,c,d,e;
    
        a= block[stride*0 + xStride*0];
        b= block[stride*0 + xStride*1];
        c= block[stride*1 + xStride*0];
        d= block[stride*1 + xStride*1];
    
        e= a-b;
        a= a+b;
        b= c-d;
        c= c+d;
    
    
        block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
        block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
        block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
        block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
    
    static void chroma_dc_dct_c(DCTELEM *block){
        const int stride= 16*2;
        const int xStride= 16;
        int a,b,c,d,e;
    
        a= block[stride*0 + xStride*0];
        b= block[stride*0 + xStride*1];
        c= block[stride*1 + xStride*0];
        d= block[stride*1 + xStride*1];
    
        e= a-b;
        a= a+b;
        b= c-d;
        c= c+d;
    
        block[stride*0 + xStride*0]= (a+c);
        block[stride*0 + xStride*1]= (e+b);
        block[stride*1 + xStride*0]= (a-c);
        block[stride*1 + xStride*1]= (e-b);
    }
    
    
    static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                               int src_x_offset, int src_y_offset,
                               qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
        MpegEncContext * const s = &h->s;
        const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
    
        int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
    
        const int luma_xy= (mx&3) + ((my&3)<<2);
    
        uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
        uint8_t * src_cb, * src_cr;
        int extra_width= h->emu_edge_width;
        int extra_height= h->emu_edge_height;
    
        int emu=0;
        const int full_mx= mx>>2;
        const int full_my= my>>2;
    
        const int pic_width  = 16*s->mb_width;
    
        const int pic_height = 16*s->mb_height >> MB_FIELD;
    
        if(mx&7) extra_width -= 3;
        if(my&7) extra_height -= 3;
    
    
        if(   full_mx < 0-extra_width
           || full_my < 0-extra_height
           || full_mx + 16/*FIXME*/ > pic_width + extra_width
    
           || full_my + 16/*FIXME*/ > pic_height + extra_height){
    
            ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
                src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
    
        qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
    
        if(!square){
    
            qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
    
        if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
    
        if(MB_FIELD){
    
            // chroma offset when predicting from a field of opposite parity
    
            my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
    
            emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
        }
        src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
        src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
    
    
        if(emu){
    
            ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
    
                src_cb= s->edge_emu_buffer;
        }
    
        chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
    
            ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);