Skip to content
Snippets Groups Projects
h264.c 322 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
     * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
     *
     * This library is free software; you can redistribute it and/or
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
     * version 2 of the License, or (at your option) any later version.
     *
     * This library is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
     *
     * You should have received a copy of the GNU Lesser General Public
     * License along with this library; if not, write to the Free Software
    
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
    /**
     * @file h264.c
     * H.264 / AVC / MPEG4 part10 codec.
     * @author Michael Niedermayer <michaelni@gmx.at>
     */
    
    #include "common.h"
    #include "dsputil.h"
    #include "avcodec.h"
    #include "mpegvideo.h"
    #include "h264data.h"
    #include "golomb.h"
    
    
    #include <assert.h>
    
    #define interlaced_dct interlaced_dct_is_a_bad_name
    #define mb_intra mb_intra_isnt_initalized_see_mb_type
    
    #define LUMA_DC_BLOCK_INDEX   25
    #define CHROMA_DC_BLOCK_INDEX 26
    
    #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
    #define COEFF_TOKEN_VLC_BITS           8
    #define TOTAL_ZEROS_VLC_BITS           9
    #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
    #define RUN_VLC_BITS                   3
    #define RUN7_VLC_BITS                  6
    
    #define MAX_SPS_COUNT 32
    #define MAX_PPS_COUNT 256
    
    #define MAX_MMCO_COUNT 66
    
    
    /* Compiling in interlaced support reduces the speed
     * of progressive decoding by about 2%. */
    #define ALLOW_INTERLACE
    
    #ifdef ALLOW_INTERLACE
    #define MB_MBAFF h->mb_mbaff
    #define MB_FIELD h->mb_field_decoding_flag
    #define FRAME_MBAFF h->mb_aff_frame
    #else
    #define MB_MBAFF 0
    #define MB_FIELD 0
    #define FRAME_MBAFF 0
    #undef  IS_INTERLACED
    #define IS_INTERLACED(mb_type) 0
    #endif
    
    
    /**
     * Sequence parameter set
     */
    typedef struct SPS{
    
        int profile_idc;
        int level_idc;
    
    Loren Merritt's avatar
    Loren Merritt committed
        int transform_bypass;              ///< qpprime_y_zero_transform_bypass_flag
    
        int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
        int poc_type;                      ///< pic_order_cnt_type
        int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
        int delta_pic_order_always_zero_flag;
        int offset_for_non_ref_pic;
        int offset_for_top_to_bottom_field;
        int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
        int ref_frame_count;               ///< num_ref_frames
    
        int gaps_in_frame_num_allowed_flag;
    
        int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
        int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
        int frame_mbs_only_flag;
        int mb_aff;                        ///<mb_adaptive_frame_field_flag
        int direct_8x8_inference_flag;
    
        int crop;                   ///< frame_cropping_flag
        int crop_left;              ///< frame_cropping_rect_left_offset
        int crop_right;             ///< frame_cropping_rect_right_offset
        int crop_top;               ///< frame_cropping_rect_top_offset
        int crop_bottom;            ///< frame_cropping_rect_bottom_offset
    
        int vui_parameters_present_flag;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        AVRational sar;
    
        int timing_info_present_flag;
        uint32_t num_units_in_tick;
        uint32_t time_scale;
        int fixed_frame_rate_flag;
    
        short offset_for_ref_frame[256]; //FIXME dyn aloc?
    
        int bitstream_restriction_flag;
        int num_reorder_frames;
    
        int scaling_matrix_present;
        uint8_t scaling_matrix4[6][16];
        uint8_t scaling_matrix8[2][64];
    
    }SPS;
    
    /**
     * Picture parameter set
     */
    typedef struct PPS{
        int sps_id;
        int cabac;                  ///< entropy_coding_mode_flag
        int pic_order_present;      ///< pic_order_present_flag
        int slice_group_count;      ///< num_slice_groups_minus1 + 1
        int mb_slice_group_map_type;
        int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
        int weighted_pred;          ///< weighted_pred_flag
        int weighted_bipred_idc;
        int init_qp;                ///< pic_init_qp_minus26 + 26
        int init_qs;                ///< pic_init_qs_minus26 + 26
        int chroma_qp_index_offset;
        int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
        int constrained_intra_pred; ///< constrained_intra_pred_flag
        int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
    
        int transform_8x8_mode;     ///< transform_8x8_mode_flag
    
        uint8_t scaling_matrix4[6][16];
        uint8_t scaling_matrix8[2][64];
    
    }PPS;
    
    /**
     * Memory management control operation opcode.
     */
    typedef enum MMCOOpcode{
        MMCO_END=0,
        MMCO_SHORT2UNUSED,
        MMCO_LONG2UNUSED,
        MMCO_SHORT2LONG,
        MMCO_SET_MAX_LONG,
    
        MMCO_LONG,
    } MMCOOpcode;
    
    /**
     * Memory management control operation.
     */
    typedef struct MMCO{
        MMCOOpcode opcode;
        int short_frame_num;
        int long_index;
    } MMCO;
    
    /**
     * H264Context
     */
    typedef struct H264Context{
        MpegEncContext s;
    
        int nal_ref_idc;
    
        int nal_unit_type;
    
    #define NAL_SLICE                1
    #define NAL_DPA                  2
    #define NAL_DPB                  3
    #define NAL_DPC                  4
    #define NAL_IDR_SLICE            5
    #define NAL_SEI                  6
    #define NAL_SPS                  7
    #define NAL_PPS                  8
    #define NAL_AUD                  9
    
    #define NAL_END_SEQUENCE        10
    #define NAL_END_STREAM          11
    #define NAL_FILLER_DATA         12
    #define NAL_SPS_EXT             13
    #define NAL_AUXILIARY_SLICE     19
    
        uint8_t *rbsp_buffer;
    
        unsigned int rbsp_buffer_size;
    
        /**
          * Used to parse AVC variant of h264
          */
        int is_avc; ///< this flag is != 0 if codec is avc1
        int got_avcC; ///< flag used to parse avcC data only once
        int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
    
    
        int chroma_qp; //QPc
    
    
        int prev_mb_skipped;
        int next_mb_skipped;
    
    
        //prediction stuff
        int chroma_pred_mode;
        int intra16x16_pred_mode;
    
        int8_t intra4x4_pred_mode_cache[5*8];
        int8_t (*intra4x4_pred_mode)[8];
        void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
    
        void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
    
        void (*pred8x8  [4+3])(uint8_t *src, int stride);
        void (*pred16x16[4+3])(uint8_t *src, int stride);
        unsigned int topleft_samples_available;
        unsigned int top_samples_available;
        unsigned int topright_samples_available;
        unsigned int left_samples_available;
    
        uint8_t (*top_borders[2])[16+2*8];
        uint8_t left_border[2*(17+2*9)];
    
    
        /**
         * non zero coeff count cache.
         * is 64 if not available.
         */
    
        DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
    
    
        /**
         * Motion vector cache.
         */
    
        DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
        DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
    
    #define LIST_NOT_USED -1 //FIXME rename?
    #define PART_NOT_AVAILABLE -2
    
        /**
         * is 1 if the specific list MV&references are set to 0,0,-2.
         */
        int mv_cache_clean[2];
    
    
        /**
         * number of neighbors (top and/or left) that used 8x8 dct
         */
        int neighbor_transform_size;
    
    
        /**
         * block_offset[ 0..23] for frame macroblocks
         * block_offset[24..47] for field macroblocks
         */
        int block_offset[2*(16+8)];
    
        uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
        uint32_t *mb2b8_xy;
    
        int b_stride; //FIXME use s->b4_stride
    
        int b8_stride;
    
    
        int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
        int mb_uvlinesize;
    
        int emu_edge_width;
        int emu_edge_height;
    
    
        int halfpel_flag;
        int thirdpel_flag;
    
    
        int unknown_svq3_flag;
        int next_slice_index;
    
    
        SPS sps_buffer[MAX_SPS_COUNT];
        SPS sps; ///< current sps
    
        PPS pps_buffer[MAX_PPS_COUNT];
        /**
         * current pps
         */
    
        PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
    
        uint32_t dequant4_buffer[6][52][16];
        uint32_t dequant8_buffer[2][52][64];
        uint32_t (*dequant4_coeff[6])[16];
        uint32_t (*dequant8_coeff[2])[64];
        int dequant_coeff_pps;     ///< reinit tables when pps changes
    
        int slice_num;
        uint8_t *slice_table_base;
    
        uint8_t *slice_table;      ///< slice_table_base + 2*mb_stride + 1
    
        int slice_type;
        int slice_type_fixed;
    
        //interlacing specific flags
    
        int mb_field_decoding_flag;
    
        int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
    
        int sub_mb_type[4];
    
        //POC stuff
        int poc_lsb;
        int poc_msb;
        int delta_poc_bottom;
        int delta_poc[2];
        int frame_num;
        int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
        int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
        int frame_num_offset;         ///< for POC type 2
        int prev_frame_num_offset;    ///< for POC type 2
        int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
    
        /**
         * frame_num for frames or 2*frame_num for field pics.
         */
        int curr_pic_num;
    
        /**
         * max_frame_num or 2*max_frame_num for field pics.
         */
        int max_pic_num;
    
        //Weighted pred stuff
    
        int use_weight;
        int use_weight_chroma;
    
        int luma_log2_weight_denom;
        int chroma_log2_weight_denom;
    
        int luma_weight[2][48];
        int luma_offset[2][48];
        int chroma_weight[2][48][2];
        int chroma_offset[2][48][2];
        int implicit_weight[48][48];
    
        //deblock
    
        int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
    
        int redundant_pic_count;
    
        int direct_spatial_mv_pred;
    
        int dist_scale_factor[16];
    
        int dist_scale_factor_field[32];
    
        int map_col_to_list0_field[2][32];
    
    
        /**
         * num_ref_idx_l0/1_active_minus1 + 1
         */
    
        int ref_count[2];            ///< counts frames or fields, depending on current mb mode
    
        Picture *short_ref[32];
        Picture *long_ref[32];
    
        Picture default_ref_list[2][32];
    
        Picture ref_list[2][48];     ///< 0..15: frame refs, 16..47: mbaff field refs
    
        Picture *delayed_pic[16]; //FIXME size?
    
        /**
         * memory management control operations buffer.
         */
        MMCO mmco[MAX_MMCO_COUNT];
        int mmco_index;
    
        int long_ref_count;  ///< number of actual long term references
        int short_ref_count; ///< number of actual short term references
    
        //data partitioning
        GetBitContext intra_gb;
        GetBitContext inter_gb;
        GetBitContext *intra_gb_ptr;
        GetBitContext *inter_gb_ptr;
    
        DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
    
        uint8_t      cabac_state[460];
    
        int          cabac_init_idc;
    
        /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
        uint16_t     *cbp_table;
    
    Loren Merritt's avatar
    Loren Merritt committed
        int cbp;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        int top_cbp;
        int left_cbp;
    
        DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
    
        uint8_t     *direct_table;
        uint8_t     direct_cache[5*8];
    
        uint8_t zigzag_scan[16];
    
    Loren Merritt's avatar
    Loren Merritt committed
        uint8_t zigzag_scan8x8[64];
        uint8_t zigzag_scan8x8_cavlc[64];
    
        uint8_t field_scan[16];
        uint8_t field_scan8x8[64];
        uint8_t field_scan8x8_cavlc[64];
    
    Loren Merritt's avatar
    Loren Merritt committed
        const uint8_t *zigzag_scan_q0;
    
    Loren Merritt's avatar
    Loren Merritt committed
        const uint8_t *zigzag_scan8x8_q0;
        const uint8_t *zigzag_scan8x8_cavlc_q0;
    
        const uint8_t *field_scan_q0;
        const uint8_t *field_scan8x8_q0;
        const uint8_t *field_scan8x8_cavlc_q0;
    
    }H264Context;
    
    static VLC coeff_token_vlc[4];
    static VLC chroma_dc_coeff_token_vlc;
    
    static VLC total_zeros_vlc[15];
    static VLC chroma_dc_total_zeros_vlc[3];
    
    static VLC run_vlc[6];
    static VLC run7_vlc;
    
    
    static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
    static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
    
    static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
    
    static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
    
    Loren Merritt's avatar
    Loren Merritt committed
    static always_inline uint32_t pack16to32(int a, int b){
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
    #ifdef WORDS_BIGENDIAN
       return (b&0xFFFF) + (a<<16);
    #else
       return (a&0xFFFF) + (b<<16);
    #endif
    }
    
    
    /**
     * fill a rectangle.
    
    Loic Le Loarer's avatar
    Loic Le Loarer committed
     * @param h height of the rectangle, should be a constant
     * @param w width of the rectangle, should be a constant
    
     * @param size the size of val (1 or 4), should be a constant
     */
    
    Loren Merritt's avatar
    Loren Merritt committed
    static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
    
        uint8_t *p= (uint8_t*)vp;
    
        assert(size==1 || size==4);
    
        assert(w<=4);
    
        w      *= size;
        stride *= size;
    
        assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        assert((stride&(w-1))==0);
    
        if(w==2){
            const uint16_t v= size==4 ? val : val*0x0101;
            *(uint16_t*)(p + 0*stride)= v;
            if(h==1) return;
            *(uint16_t*)(p + 1*stride)= v;
            if(h==2) return;
    
            *(uint16_t*)(p + 2*stride)=
    
            *(uint16_t*)(p + 3*stride)= v;
        }else if(w==4){
            const uint32_t v= size==4 ? val : val*0x01010101;
            *(uint32_t*)(p + 0*stride)= v;
            if(h==1) return;
            *(uint32_t*)(p + 1*stride)= v;
            if(h==2) return;
    
            *(uint32_t*)(p + 2*stride)=
    
            *(uint32_t*)(p + 3*stride)= v;
        }else if(w==8){
        //gcc can't optimize 64bit math on x86_32
    #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
            const uint64_t v= val*0x0100000001ULL;
            *(uint64_t*)(p + 0*stride)= v;
            if(h==1) return;
            *(uint64_t*)(p + 1*stride)= v;
            if(h==2) return;
    
            *(uint64_t*)(p + 2*stride)=
    
            *(uint64_t*)(p + 3*stride)= v;
        }else if(w==16){
            const uint64_t v= val*0x0100000001ULL;
    
            *(uint64_t*)(p + 0+0*stride)=
            *(uint64_t*)(p + 8+0*stride)=
            *(uint64_t*)(p + 0+1*stride)=
    
            *(uint64_t*)(p + 8+1*stride)= v;
            if(h==2) return;
    
            *(uint64_t*)(p + 0+2*stride)=
            *(uint64_t*)(p + 8+2*stride)=
            *(uint64_t*)(p + 0+3*stride)=
    
            *(uint64_t*)(p + 8+3*stride)= v;
    #else
            *(uint32_t*)(p + 0+0*stride)=
            *(uint32_t*)(p + 4+0*stride)= val;
            if(h==1) return;
            *(uint32_t*)(p + 0+1*stride)=
            *(uint32_t*)(p + 4+1*stride)= val;
            if(h==2) return;
            *(uint32_t*)(p + 0+2*stride)=
            *(uint32_t*)(p + 4+2*stride)=
            *(uint32_t*)(p + 0+3*stride)=
            *(uint32_t*)(p + 4+3*stride)= val;
        }else if(w==16){
            *(uint32_t*)(p + 0+0*stride)=
            *(uint32_t*)(p + 4+0*stride)=
            *(uint32_t*)(p + 8+0*stride)=
            *(uint32_t*)(p +12+0*stride)=
            *(uint32_t*)(p + 0+1*stride)=
            *(uint32_t*)(p + 4+1*stride)=
            *(uint32_t*)(p + 8+1*stride)=
            *(uint32_t*)(p +12+1*stride)= val;
            if(h==2) return;
            *(uint32_t*)(p + 0+2*stride)=
            *(uint32_t*)(p + 4+2*stride)=
            *(uint32_t*)(p + 8+2*stride)=
            *(uint32_t*)(p +12+2*stride)=
            *(uint32_t*)(p + 0+3*stride)=
            *(uint32_t*)(p + 4+3*stride)=
            *(uint32_t*)(p + 8+3*stride)=
            *(uint32_t*)(p +12+3*stride)= val;
    #endif
    
        }else
            assert(0);
    
        assert(h==4);
    
    Loren Merritt's avatar
    Loren Merritt committed
    static void fill_caches(H264Context *h, int mb_type, int for_deblock){
    
        MpegEncContext * const s = &h->s;
    
        const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    
        int topleft_xy, top_xy, topright_xy, left_xy[2];
        int topleft_type, top_type, topright_type, left_type[2];
    
        //FIXME deblocking could skip the intra and nnz parts.
        if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
    
        //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
    
    
        top_xy     = mb_xy  - s->mb_stride;
        topleft_xy = top_xy - 1;
        topright_xy= top_xy + 1;
        left_xy[1] = left_xy[0] = mb_xy-1;
        left_block[0]= 0;
        left_block[1]= 1;
        left_block[2]= 2;
        left_block[3]= 3;
        left_block[4]= 7;
        left_block[5]= 10;
        left_block[6]= 8;
        left_block[7]= 11;
    
        if(FRAME_MBAFF){
    
            const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
            const int top_pair_xy      = pair_xy     - s->mb_stride;
            const int topleft_pair_xy  = top_pair_xy - 1;
            const int topright_pair_xy = top_pair_xy + 1;
            const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
            const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
            const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
            const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
            const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
            const int bottom = (s->mb_y & 1);
            tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
            if (bottom
                    ? !curr_mb_frame_flag // bottom macroblock
                    : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
                    ) {
                top_xy -= s->mb_stride;
            }
            if (bottom
                    ? !curr_mb_frame_flag // bottom macroblock
                    : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
                    ) {
                topleft_xy -= s->mb_stride;
            }
            if (bottom
                    ? !curr_mb_frame_flag // bottom macroblock
                    : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
                    ) {
                topright_xy -= s->mb_stride;
            }
            if (left_mb_frame_flag != curr_mb_frame_flag) {
                left_xy[1] = left_xy[0] = pair_xy - 1;
                if (curr_mb_frame_flag) {
                    if (bottom) {
                        left_block[0]= 2;
                        left_block[1]= 2;
                        left_block[2]= 3;
                        left_block[3]= 3;
                        left_block[4]= 8;
                        left_block[5]= 11;
                        left_block[6]= 8;
                        left_block[7]= 11;
                    } else {
                        left_block[0]= 0;
                        left_block[1]= 0;
                        left_block[2]= 1;
                        left_block[3]= 1;
                        left_block[4]= 7;
                        left_block[5]= 10;
                        left_block[6]= 7;
                        left_block[7]= 10;
                    }
                } else {
                    left_xy[1] += s->mb_stride;
                    //left_block[0]= 0;
                    left_block[1]= 2;
                    left_block[2]= 0;
                    left_block[3]= 2;
                    //left_block[4]= 7;
                    left_block[5]= 10;
                    left_block[6]= 7;
                    left_block[7]= 10;
                }
            }
    
        h->top_mb_xy = top_xy;
        h->left_mb_xy[0] = left_xy[0];
        h->left_mb_xy[1] = left_xy[1];
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        if(for_deblock){
    
            topleft_type = 0;
            topright_type = 0;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
            left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
            left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
    
    
            if(FRAME_MBAFF && !IS_INTRA(mb_type)){
                int list;
                int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
                for(i=0; i<16; i++)
                    h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
                for(list=0; list<1+(h->slice_type==B_TYPE); list++){
                    if(USES_LIST(mb_type,list)){
                        uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
                        uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
    
                        int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
    
                        for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
                            dst[0] = src[0];
                            dst[1] = src[1];
                            dst[2] = src[2];
                            dst[3] = src[3];
                        }
                        *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
                        *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
                        ref += h->b8_stride;
                        *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
                        *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
                    }else{
                        fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
                        fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
                    }
                }
            }
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
        }else{
            topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
            top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
            topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
            left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
            left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
        }
    
    
        if(IS_INTRA(mb_type)){
    
            h->topleft_samples_available=
            h->top_samples_available=
    
            h->left_samples_available= 0xFFFF;
            h->topright_samples_available= 0xEEEA;
    
            if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
                h->topleft_samples_available= 0xB3FF;
                h->top_samples_available= 0x33FF;
                h->topright_samples_available= 0x26EA;
            }
            for(i=0; i<2; i++){
                if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
                    h->topleft_samples_available&= 0xDF5F;
                    h->left_samples_available&= 0x5F5F;
                }
            }
    
            if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
                h->topleft_samples_available&= 0x7FFF;
    
            if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
                h->topright_samples_available&= 0xFBFF;
    
            if(IS_INTRA4x4(mb_type)){
                if(IS_INTRA4x4(top_type)){
                    h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
                    h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
                    h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
                    h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
                }else{
                    int pred;
    
                    if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
    
                        pred= -1;
    
                    }
                    h->intra4x4_pred_mode_cache[4+8*0]=
                    h->intra4x4_pred_mode_cache[5+8*0]=
                    h->intra4x4_pred_mode_cache[6+8*0]=
                    h->intra4x4_pred_mode_cache[7+8*0]= pred;
                }
                for(i=0; i<2; i++){
                    if(IS_INTRA4x4(left_type[i])){
                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                    }else{
                        int pred;
    
                        if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
    
                            pred= -1;
    
                        }
                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
                    }
                }
            }
        }
    
    0 . T T. T T T T
    1 L . .L . . . .
    2 L . .L . . . .
    3 . T TL . . . .
    4 L . .L . . . .
    5 L . .. . . . .
    
    */
    //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
        if(top_type){
    
            h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
            h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
            h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
    
            h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
    
            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
    
            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
    
            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
    
            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
    
        }else{
    
            h->non_zero_count_cache[4+8*0]=
    
            h->non_zero_count_cache[5+8*0]=
            h->non_zero_count_cache[6+8*0]=
            h->non_zero_count_cache[7+8*0]=
    
            h->non_zero_count_cache[1+8*0]=
            h->non_zero_count_cache[2+8*0]=
    
            h->non_zero_count_cache[1+8*3]=
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
            h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
    
        for (i=0; i<2; i++) {
            if(left_type[i]){
                h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
                h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
            }else{
    
                h->non_zero_count_cache[3+8*1 + 2*8*i]=
                h->non_zero_count_cache[3+8*2 + 2*8*i]=
                h->non_zero_count_cache[0+8*1 +   8*i]=
    
                h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
    
            }
        }
    
        if( h->pps.cabac ) {
            // top_cbp
            if(top_type) {
                h->top_cbp = h->cbp_table[top_xy];
            } else if(IS_INTRA(mb_type)) {
                h->top_cbp = 0x1C0;
            } else {
                h->top_cbp = 0;
            }
            // left_cbp
            if (left_type[0]) {
                h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
            } else if(IS_INTRA(mb_type)) {
                h->left_cbp = 0x1C0;
            } else {
                h->left_cbp = 0;
            }
            if (left_type[0]) {
                h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
            }
            if (left_type[1]) {
                h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
    
    #if 1
    
        if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
    
            int list;
    
            for(list=0; list<1+(h->slice_type==B_TYPE); list++){
                if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
    
                    /*if(!h->mv_cache_clean[list]){
                        memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                        memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
                        h->mv_cache_clean[list]= 1;
                    }*/
    
                }
                h->mv_cache_clean[list]= 0;
    
                if(USES_LIST(top_type, list)){
    
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
                }else{
    
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
    
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                    *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
                }
    
                //FIXME unify cleanup or sth
    
                if(USES_LIST(left_type[0], list)){
    
                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                    const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
    
                    h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
                    h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
    
                }else{
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
                    h->ref_cache[list][scan8[0] - 1 + 0*8]=
                    h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
                if(USES_LIST(left_type[1], list)){
    
                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                    const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
    
                    h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
                    h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
    
                }else{
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
                    h->ref_cache[list][scan8[0] - 1 + 2*8]=
                    h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
                    assert((!left_type[0]) == (!left_type[1]));
    
                if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
    
    Michael Niedermayer's avatar
    Michael Niedermayer committed
                    continue;
    
    
                if(USES_LIST(topleft_type, list)){
    
                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                    h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
                }else{
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
                    h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
                if(USES_LIST(topright_type, list)){
    
                    const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                    const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
                    *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                    h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
                }else{
                    *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                    h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                }
    
    
                if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
    
                    continue;
    
    
                h->ref_cache[list][scan8[5 ]+1] =
                h->ref_cache[list][scan8[7 ]+1] =
    
                h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
    
                h->ref_cache[list][scan8[4 ]] =
    
                h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
                *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
                *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
    
                *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
    
                *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
                *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
    
                    if(USES_LIST(top_type, list)){
    
                        const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
                    }else{
    
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
    
                        *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
                    }
    
                    if(USES_LIST(left_type[0], list)){
    
                        const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
                    }else{
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
                    }
    
                    if(USES_LIST(left_type[1], list)){
    
                        const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
                        *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
                    }else{
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
                        *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
                    }
                    *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
                    *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
    
                    *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
    
                    *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
                    *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
    
    
                    if(h->slice_type == B_TYPE){
                        fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
    
                        if(IS_DIRECT(top_type)){
                            *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
                        }else if(IS_8X8(top_type)){
                            int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
                            h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
                            h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
                        }else{
                            *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
                        }
    
                        if(IS_DIRECT(left_type[0]))
                            h->direct_cache[scan8[0] - 1 + 0*8]= 1;
                        else if(IS_8X8(left_type[0]))
                            h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
                        else
                            h->direct_cache[scan8[0] - 1 + 0*8]= 0;
    
                        if(IS_DIRECT(left_type[1]))
    
                            h->direct_cache[scan8[0] - 1 + 2*8]= 1;
    
                        else if(IS_8X8(left_type[1]))
                            h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
                        else
    
                            h->direct_cache[scan8[0] - 1 + 2*8]= 0;
    
                    }
                }
    
                if(FRAME_MBAFF){
    #define MAP_MVS\
                        MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
                        MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
                        MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
                        MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
                        MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
                        MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
                        MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
                    if(MB_FIELD){
    #define MAP_F2F(idx, mb_type)\
                        if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                            h->ref_cache[list][idx] <<= 1;\
                            h->mv_cache[list][idx][1] /= 2;\
                            h->mvd_cache[list][idx][1] /= 2;\
                        }
                        MAP_MVS
    #undef MAP_F2F
                    }else{
    #define MAP_F2F(idx, mb_type)\
                        if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                            h->ref_cache[list][idx] >>= 1;\
                            h->mv_cache[list][idx][1] <<= 1;\
                            h->mvd_cache[list][idx][1] <<= 1;\
    
                        MAP_MVS
    #undef MAP_F2F
    
    
        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
    
    }
    
    static inline void write_back_intra_pred_mode(H264Context *h){
        MpegEncContext * const s = &h->s;
    
        const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
    
    
        h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
        h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
        h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
        h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
        h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
        h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
        h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
    }
    
    /**