diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 118334ad7c6277c597d9a83f13a8fcfb23c7f6ed..958c859c7218dab4cf1901096a7e1177db2e8990 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -174,20 +174,24 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_l
         i-= RS;
     }
 
-    if(i>=length-1){ //no escaped 0
-        *dst_length= length;
-        *consumed= length+1; //+1 for the header
-        return src;
-    }
-
     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
-    av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
+    si=h->rbsp_buffer_size[bufidx];
+    av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
     dst= h->rbsp_buffer[bufidx];
+    if(si != h->rbsp_buffer_size[bufidx])
+        memset(dst + length, 0, FF_INPUT_BUFFER_PADDING_SIZE+MAX_MBPAIR_SIZE);
 
     if (dst == NULL){
         return NULL;
     }
 
+    if(i>=length-1){ //no escaped 0
+        *dst_length= length;
+        *consumed= length+1; //+1 for the header
+        memcpy(dst, src, length);
+        return dst;
+    }
+
 //printf("decoding esc\n");
     memcpy(dst, src, i);
     si=di=i;
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 2809e3253ed5c66c7d940759412b587f253416ae..e956d3a339df0cf935408c0ca6a6b64f3b725920 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -46,6 +46,8 @@
 
 #define MAX_DELAYED_PIC_COUNT 16
 
+#define MAX_MBPAIR_SIZE (256*1024) // a tighter bound could be calculated if someone cares about a few bytes
+
 /* Compiling in interlaced support reduces the speed
  * of progressive decoding by about 2%. */
 #define ALLOW_INTERLACE