diff --git a/common.mak b/common.mak
index 66bbed4a408574938954d7f171a3ab70e676eeb8..0f8392a0351b4c1df76fcede0afe1fcec7100dbf 100644
--- a/common.mak
+++ b/common.mak
@@ -27,20 +27,26 @@ $(eval INSTALL = @$(call ECHO,INSTALL,$$(^:$(SRC_DIR)/%=%)); $(INSTALL))
 endif
 
 # NASM requires -I path terminated with /
-IFLAGS   := -I. -I$(SRC_PATH)/
-CPPFLAGS := $(IFLAGS) $(CPPFLAGS)
-CFLAGS   += $(ECFLAGS)
-YASMFLAGS += $(IFLAGS) -Pconfig.asm
-
+IFLAGS     := -I. -I$(SRC_PATH)/
+CPPFLAGS   := $(IFLAGS) $(CPPFLAGS)
+CFLAGS     += $(ECFLAGS)
+CCFLAGS     = $(CFLAGS)
+YASMFLAGS  += $(IFLAGS) -Pconfig.asm
 HOSTCFLAGS += $(IFLAGS)
 
+define COMPILE
+       $($(1)DEP)
+       $($(1)) $(CPPFLAGS) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $<
+endef
+
+COMPILE_C = $(call COMPILE,CC)
+COMPILE_S = $(call COMPILE,AS)
+
 %.o: %.c
-	$(CCDEP)
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(CC_DEPFLAGS) -c $(CC_O) $<
+	$(COMPILE_C)
 
 %.o: %.S
-	$(ASDEP)
-	$(AS) $(CPPFLAGS) $(ASFLAGS) $(AS_DEPFLAGS) -c -o $@ $<
+	$(COMPILE_S)
 
 %.ho: %.h
 	$(CC) $(CPPFLAGS) $(CFLAGS) -Wno-unused -c -o $@ -x c $<
diff --git a/configure b/configure
index af3c09f7c5d3353ca86fbde54d150b73ce8f8bd6..fb197aae551a49e78cc241edb750253dfaea33e8 100755
--- a/configure
+++ b/configure
@@ -1704,6 +1704,7 @@ SLIBNAME_WITH_VERSION='$(SLIBNAME).$(LIBVERSION)'
 SLIBNAME_WITH_MAJOR='$(SLIBNAME).$(LIBMAJOR)'
 LIB_INSTALL_EXTRA_CMD='$$(RANLIB) "$(LIBDIR)/$(LIBNAME)"'
 
+AS_O='-o $@'
 CC_O='-o $@'
 
 host_cflags='-D_ISOC99_SOURCE -O3 -g'
@@ -3346,6 +3347,7 @@ STRIP=$strip
 CPPFLAGS=$CPPFLAGS
 CFLAGS=$CFLAGS
 ASFLAGS=$ASFLAGS
+AS_O=$CC_O
 CC_O=$CC_O
 LDFLAGS=$LDFLAGS
 FFSERVERLDFLAGS=$FFSERVERLDFLAGS
diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c
index a676627de262188ef64171eaa978b39317522a84..be105fe834e0e5c06798bca853e86cafd8a866c2 100644
--- a/libavcodec/fft-test.c
+++ b/libavcodec/fft-test.c
@@ -252,8 +252,9 @@ int main(int argc, char **argv)
 #if CONFIG_FFT_FLOAT
     RDFTContext r1, *r = &r1;
     DCTContext d1, *d = &d1;
+    int fft_size_2;
 #endif
-    int fft_nbits, fft_size, fft_size_2;
+    int fft_nbits, fft_size;
     double scale = 1.0;
     AVLFG prng;
     av_lfg_init(&prng, 1);
@@ -292,7 +293,6 @@ int main(int argc, char **argv)
     }
 
     fft_size = 1 << fft_nbits;
-    fft_size_2 = fft_size >> 1;
     tab = av_malloc(fft_size * sizeof(FFTComplex));
     tab1 = av_malloc(fft_size * sizeof(FFTComplex));
     tab_ref = av_malloc(fft_size * sizeof(FFTComplex));
@@ -372,6 +372,7 @@ int main(int argc, char **argv)
         break;
 #if CONFIG_FFT_FLOAT
     case TRANSFORM_RDFT:
+        fft_size_2 = fft_size >> 1;
         if (do_inverse) {
             tab1[         0].im = 0;
             tab1[fft_size_2].im = 0;
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index dd9f425c3b303f0c16a644fe85c188f130d33019..8ecf9b4dbd0f8d1a53a937660e5d37eb77ed1f05 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2488,7 +2488,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
 
     s->dropable= h->nal_ref_idc == 0;
 
-    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
+    /* FIXME: 2tap qpel isn't implemented for high bit depth. */
+    if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
     }else{
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 46abc54c49452a5f9f07c5f93cc94da1af799edf..226c2aef288ccec2528f8364cfe595e268ee20d7 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -218,10 +218,11 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     int mb_type, left_type;
     int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
     int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
+    int chroma444 = CHROMA444;
 
     mb_xy = h->mb_xy;
 
-    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff || CHROMA444) {
+    if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
     }
@@ -264,16 +265,46 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
         }
         if(chroma){
-            if(left_type){
-                filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
-                filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+            if(chroma444){
+                if(left_type){
+                    filter_mb_edgev( &img_cb[4*0], linesize, bS4, qpc0, h);
+                    filter_mb_edgev( &img_cr[4*0], linesize, bS4, qpc0, h);
+                }
+                if( IS_8x8DCT(mb_type) ) {
+                    filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
+                    filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
+                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+                    filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
+                } else {
+                    filter_mb_edgev( &img_cb[4*1], linesize, bS3, qpc, h);
+                    filter_mb_edgev( &img_cr[4*1], linesize, bS3, qpc, h);
+                    filter_mb_edgev( &img_cb[4*2], linesize, bS3, qpc, h);
+                    filter_mb_edgev( &img_cr[4*2], linesize, bS3, qpc, h);
+                    filter_mb_edgev( &img_cb[4*3], linesize, bS3, qpc, h);
+                    filter_mb_edgev( &img_cr[4*3], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cb[4*0*linesize], linesize, bSH, qpc1, h);
+                    filter_mb_edgeh( &img_cr[4*0*linesize], linesize, bSH, qpc1, h);
+                    filter_mb_edgeh( &img_cb[4*1*linesize], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cr[4*1*linesize], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cb[4*2*linesize], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cr[4*2*linesize], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, h);
+                    filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, h);
+                }
+            }else{
+                if(left_type){
+                    filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
+                    filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
+                }
+                filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
+                filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
+                filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+                filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
+                filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
+                filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
             }
-            filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
-            filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
-            filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-            filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
-            filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
-            filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
         }
         return;
     } else {
@@ -301,9 +332,14 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
 #define FILTER(hv,dir,edge)\
         if(AV_RN64A(bS[dir][edge])) {                                   \
             filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\
-            if(chroma && !(edge&1)) {\
-                filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
-                filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
+            if(chroma){\
+                if(chroma444){\
+                    filter_mb_edge##hv( &img_cb[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
+                    filter_mb_edge##hv( &img_cr[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
+                } else if(!(edge&1)) {\
+                    filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
+                    filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
+                }\
             }\
         }
         if(left_type)
diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c
index bdff535a99bc63b329fb7b2b3f402c91b1da8961..2760ca064fea42779cb7392111359bb3c620a257 100644
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c
@@ -296,10 +296,6 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
                                uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
 {
     int i, last_non_zero;
-#if 0 //variables for the outcommented version
-    int code, sign, last;
-#endif
-    const RLTable *rl;
     uint32_t *bits_tab;
     uint8_t *len_tab;
     const int last_index = s->block_last_index[n];
@@ -309,20 +305,17 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
         mpeg4_encode_dc(dc_pb, intra_dc, n);
         if(last_index<1) return;
         i = 1;
-        rl = &ff_mpeg4_rl_intra;
         bits_tab= uni_mpeg4_intra_rl_bits;
         len_tab = uni_mpeg4_intra_rl_len;
     } else {
         if(last_index<0) return;
         i = 0;
-        rl = &ff_h263_rl_inter;
         bits_tab= uni_mpeg4_inter_rl_bits;
         len_tab = uni_mpeg4_inter_rl_len;
     }
 
     /* AC coefs */
     last_non_zero = i - 1;
-#if 1
     for (; i < last_index; i++) {
         int level = block[ scan_table[i] ];
         if (level) {
@@ -348,64 +341,6 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
             put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
         }
     }
-#else
-    for (; i <= last_index; i++) {
-        const int slevel = block[ scan_table[i] ];
-        if (slevel) {
-            int level;
-            int run = i - last_non_zero - 1;
-            last = (i == last_index);
-            sign = 0;
-            level = slevel;
-            if (level < 0) {
-                sign = 1;
-                level = -level;
-            }
-            code = get_rl_index(rl, last, run, level);
-            put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
-            if (code == rl->n) {
-                int level1, run1;
-                level1 = level - rl->max_level[last][run];
-                if (level1 < 1)
-                    goto esc2;
-                code = get_rl_index(rl, last, run, level1);
-                if (code == rl->n) {
-                esc2:
-                    put_bits(ac_pb, 1, 1);
-                    if (level > MAX_LEVEL)
-                        goto esc3;
-                    run1 = run - rl->max_run[last][level] - 1;
-                    if (run1 < 0)
-                        goto esc3;
-                    code = get_rl_index(rl, last, run1, level);
-                    if (code == rl->n) {
-                    esc3:
-                        /* third escape */
-                        put_bits(ac_pb, 1, 1);
-                        put_bits(ac_pb, 1, last);
-                        put_bits(ac_pb, 6, run);
-                        put_bits(ac_pb, 1, 1);
-                        put_sbits(ac_pb, 12, slevel);
-                        put_bits(ac_pb, 1, 1);
-                    } else {
-                        /* second escape */
-                        put_bits(ac_pb, 1, 0);
-                        put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
-                        put_bits(ac_pb, 1, sign);
-                    }
-                } else {
-                    /* first escape */
-                    put_bits(ac_pb, 1, 0);
-                    put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
-                    put_bits(ac_pb, 1, sign);
-                }
-            } else {
-                put_bits(ac_pb, 1, sign);
-            }
-            last_non_zero = i;
-        }
-    }
-#endif
 }
 
 static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 197000beb978e7ac93ef3b550691eadf9c77d8c6..08f10d2c18a7c86ffbb6e18a6789d057ecede87f 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -222,8 +222,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
 }
 
 static void hScale_altivec_real(int16_t *dst, int dstW,
-                                const uint8_t *src, int srcW,
-                                int xInc, const int16_t *filter,
+                                const uint8_t *src, const int16_t *filter,
                                 const int16_t *filterPos, int filterSize)
 {
     register int i;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index abbe37568529b47d0d4a35d9dbde093189408a97..1f736558dfd346c9041ff42975543ba2330cbeb4 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1920,10 +1920,8 @@ static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
     }
 }
 
-
 // bilinear / bicubic scaling
 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
-                     int srcW, int xInc,
                      const int16_t *filter, const int16_t *filterPos,
                      int filterSize)
 {
@@ -2036,7 +2034,7 @@ static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
         int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
         c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
     } else if (!c->hyscale_fast) {
-        c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
+        c->hScale(dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
     } else { // fast bilinear upscale / crap downscale
         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
     }
@@ -2082,8 +2080,8 @@ static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *ds
         c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
         c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
     } else if (!c->hcscale_fast) {
-        c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
-        c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
+        c->hScale(dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
     } else { // fast bilinear upscale / crap downscale
         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
     }
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index c0f8e64d7065cdeca2ef763afa8b22b935132d16..27de6b24d02932d234c17f49f7e2c0a702d61ce7 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -312,8 +312,8 @@ typedef struct SwsContext {
                          const uint8_t *src1, const uint8_t *src2,
                          int srcW, int xInc);
 
-    void (*hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW,
-                   int xInc, const int16_t *filter, const int16_t *filterPos,
+    void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
+                   const int16_t *filter, const int16_t *filterPos,
                    int filterSize);
 
     void (*hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW,
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 25399fadef23fb654be2a9d670e88d7b12590774..fdf82b2d067f56d11c7c240ce05e37745f9db9a4 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1915,8 +1915,7 @@ static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
 #if !COMPILE_TEMPLATE_MMX2
 // bilinear / bicubic scaling
 static void RENAME(hScale)(int16_t *dst, int dstW,
-                           const uint8_t *src, int srcW,
-                           int xInc, const int16_t *filter,
+                           const uint8_t *src, const int16_t *filter,
                            const int16_t *filterPos, int filterSize)
 {
     assert(filterSize % 4 == 0 && filterSize>0);
diff --git a/subdir.mak b/subdir.mak
index 8b3807378fbad942efda94725a5491884065b1d1..0cb603050189302fd7c3b0c76632d05b7ec1126e 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -11,16 +11,17 @@ all-$(CONFIG_STATIC): $(SUBDIR)$(LIBNAME)
 all-$(CONFIG_SHARED): $(SUBDIR)$(SLIBNAME)
 
 $(SUBDIR)%-test.o: $(SUBDIR)%-test.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) -DTEST -c $(CC_O) $^
+	$(COMPILE_C)
 
 $(SUBDIR)%-test.o: $(SUBDIR)%.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) -DTEST -c $(CC_O) $^
+	$(COMPILE_C)
 
 $(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm
 	$(YASMDEP) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
 	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
 
-$(OBJS) $(SUBDIR)%.ho $(SUBDIR)%-test.o $(TESTOBJS): CPPFLAGS += -DHAVE_AV_CONFIG_H
+$(OBJS) $(SUBDIR)%.ho $(TESTOBJS): CPPFLAGS += -DHAVE_AV_CONFIG_H
+$(TESTOBJS): CPPFLAGS += -DTEST
 
 $(SUBDIR)$(LIBNAME): $(OBJS)
 	$(RM) $@