diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index c49ff76beb4969bffcc482ded65cde309285efe6..7b84a9ed1d77d779cda2eea6a6f42db8743ba317 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -874,6 +874,13 @@ PIXOP2(put, op_put)
 #define avg2(a,b) ((a+b+1)>>1)
 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
 
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
 
 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
 {
@@ -3158,6 +3165,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     dspfunc(avg, 3, 2);
 #undef dspfunc
 
+    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
+    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
+
     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 0a77c7f4fc1aeea7610af8f93fb774e96f727f05..99cac9fa6b8fea645af17d7a77f4e62618beb868 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -204,6 +204,8 @@ typedef struct DSPContext {
      */
     op_pixels_func avg_no_rnd_pixels_tab[2][4];
     
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+    
     /**
      * Thirdpel motion compensation with rounding (a+b+1)>>1.
      * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 636d2dfd803772d976c52da249e02db0d0fe7584..7dd1bd723bc7137e1c80f96f945b3ec47469e743 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -2451,15 +2451,24 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
                         motion_source= temp;
                     }
                 }
+                
 
                 /* first, take care of copying a block from either the
                  * previous or the golden frame */
                 if (s->all_fragments[i].coding_method != MODE_INTRA) {
-
-                    s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
-                        output_plane + s->all_fragments[i].first_pixel,
-                        motion_source,
-                        stride, 8);
+                    //Note, it is possible to implement all MC cases with put_no_rnd_pixels_l2 which would look more like the VP3 source but this would be slower as put_no_rnd_pixels_tab is better optimzed
+                    if(motion_halfpel_index != 3){
+                        s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
+                            output_plane + s->all_fragments[i].first_pixel,
+                            motion_source, stride, 8);
+                    }else{
+                        int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
+                        s->dsp.put_no_rnd_pixels_l2[1](
+                            output_plane + s->all_fragments[i].first_pixel,
+                            motion_source - d, 
+                            motion_source + stride + 1 + d, 
+                            stride, 8);
+                    }
                 }
 
                 /* dequantize the DCT coefficients */