diff --git a/RELEASE b/RELEASE
index 4402ec2c055d0d25e7e6dd75f7bd360488890870..014ccf1a013d7cec276cd51bf0a6a99104592155 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1 +1 @@
-1.0.git
+1.0.git
\ No newline at end of file
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index ad09a0cd433ebccc36f63ccf4acc9316a7399a1a..ecd425964d76ac2c04d082d057fb30f83238f250 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -674,7 +674,7 @@ static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN16(dst   , AV_RN16(src   ));
+        AV_COPY16U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -685,7 +685,7 @@ static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
+        AV_COPY32U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -696,8 +696,7 @@ static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_COPY64U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -708,8 +707,7 @@ static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_COPY64U(dst, src);
         dst[8]= src[8];
         dst+=dstStride;
         src+=srcStride;
@@ -721,10 +719,7 @@ static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
-        AV_WN32(dst+8 , AV_RN32(src+8 ));
-        AV_WN32(dst+12, AV_RN32(src+12));
+        AV_COPY128U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -735,10 +730,7 @@ static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
-        AV_WN32(dst+8 , AV_RN32(src+8 ));
-        AV_WN32(dst+12, AV_RN32(src+12));
+        AV_COPY128U(dst, src);
         dst[16]= src[16];
         dst+=dstStride;
         src+=srcStride;
diff --git a/libavcodec/dxtory.c b/libavcodec/dxtory.c
index f7410780924cfcd11eaf970a6af2a6a65056e06d..da43c4700364adb83b2cd5ee92a8f7f7293ece49 100644
--- a/libavcodec/dxtory.c
+++ b/libavcodec/dxtory.c
@@ -70,8 +70,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     V  = pic->data[2];
     for (h = 0; h < avctx->height; h += 2) {
         for (w = 0; w < avctx->width; w += 2) {
-            AV_WN16A(Y1 + w, AV_RN16A(src));
-            AV_WN16A(Y2 + w, AV_RN16A(src + 2));
+            AV_COPY16(Y1 + w, src);
+            AV_COPY16(Y2 + w, src + 2);
             U[w >> 1] = src[4] + 0x80;
             V[w >> 1] = src[5] + 0x80;
             src += 6;
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index b010af6b3e72711e174538b509778ec7b6e9a3a3..f0256f31c18fd47501d3073e0d9e65dde759fb74 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -1654,7 +1654,7 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
     int buf_size        = avpkt->size;
     MPADecodeContext *s = avctx->priv_data;
     uint32_t header;
-    int out_size;
+    int ret;
 
     while(buf_size && !*buf){
         buf++;
@@ -1693,21 +1693,22 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
         buf_size= s->frame_size;
     }
 
-    out_size = mp_decode_frame(s, NULL, buf, buf_size);
-    if (out_size >= 0) {
+    ret = mp_decode_frame(s, NULL, buf, buf_size);
+    if (ret >= 0) {
         *got_frame_ptr   = 1;
         *(AVFrame *)data = s->frame;
         avctx->sample_rate = s->sample_rate;
         //FIXME maybe move the other codec info stuff from above here too
     } else {
         av_log(avctx, AV_LOG_ERROR, "Error while decoding MPEG audio frame.\n");
-        /* Only return an error if the bad frame makes up the whole packet.
-           If there is more data in the packet, just consume the bad frame
-           instead of returning an error, which would discard the whole
-           packet. */
+        /* Only return an error if the bad frame makes up the whole packet or
+         * the error is related to buffer management.
+         * If there is more data in the packet, just consume the bad frame
+         * instead of returning an error, which would discard the whole
+         * packet. */
         *got_frame_ptr = 0;
-        if (buf_size == avpkt->size)
-            return out_size;
+        if (buf_size == avpkt->size || ret != AVERROR_INVALIDDATA)
+            return ret;
     }
     s->frame_size = 0;
     return buf_size;
@@ -1732,7 +1733,7 @@ static int decode_frame_adu(AVCodecContext *avctx, void *data,
     int buf_size        = avpkt->size;
     MPADecodeContext *s = avctx->priv_data;
     uint32_t header;
-    int len;
+    int len, ret;
     int av_unused out_size;
 
     len = buf_size;
@@ -1764,10 +1765,10 @@ static int decode_frame_adu(AVCodecContext *avctx, void *data,
 
     s->frame_size = len;
 
-    out_size = mp_decode_frame(s, NULL, buf, buf_size);
-    if (out_size < 0) {
+    ret = mp_decode_frame(s, NULL, buf, buf_size);
+    if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error while decoding MPEG audio frame.\n");
-        return AVERROR_INVALIDDATA;
+        return ret;
     }
 
     *got_frame_ptr   = 1;
@@ -1972,7 +1973,10 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
         }
         ch += m->nb_channels;
 
-        out_size += mp_decode_frame(m, outptr, buf, fsize);
+        if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0)
+            return ret;
+
+        out_size += ret;
         buf      += fsize;
         len      -= fsize;
 
diff --git a/libavutil/intreadwrite.h b/libavutil/intreadwrite.h
index 7c68ead92d6392194583bb6bdc541517c01a23bf..34e21d42fa4b4d3bf108767b9e542b172130ce18 100644
--- a/libavutil/intreadwrite.h
+++ b/libavutil/intreadwrite.h
@@ -468,6 +468,33 @@ union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
 #   define AV_WN64A(p, v) AV_WNA(64, p, v)
 #endif
 
+/*
+ * The AV_COPYxxU macros are suitable for copying data to/from unaligned
+ * memory locations.
+ */
+
+#define AV_COPYU(n, d, s) AV_WN##n(d, AV_RN##n(s));
+
+#ifndef AV_COPY16U
+#   define AV_COPY16U(d, s) AV_COPYU(16, d, s)
+#endif
+
+#ifndef AV_COPY32U
+#   define AV_COPY32U(d, s) AV_COPYU(32, d, s)
+#endif
+
+#ifndef AV_COPY64U
+#   define AV_COPY64U(d, s) AV_COPYU(64, d, s)
+#endif
+
+#ifndef AV_COPY128U
+#   define AV_COPY128U(d, s)                                    \
+    do {                                                        \
+        AV_COPY64U(d, s);                                       \
+        AV_COPY64U((char *)(d) + 8, (const char *)(s) + 8);     \
+    } while(0)
+#endif
+
 /* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
  * naturally aligned. They may be implemented using MMX,
  * so emms_c() must be called before using any float code
diff --git a/libavutil/lzo.c b/libavutil/lzo.c
index 79b1ce23ce20ec704d52fecbd2a906cb79b6791b..c76d9a86d32adefde204788cc839cc10662fb55a 100644
--- a/libavutil/lzo.c
+++ b/libavutil/lzo.c
@@ -23,6 +23,7 @@
 
 #include "avutil.h"
 #include "common.h"
+#include "intreadwrite.h"
 #include "lzo.h"
 
 /// Define if we may write up to 12 bytes beyond the output buffer.
@@ -71,19 +72,6 @@ static inline int get_len(LZOContext *c, int x, int mask)
     return cnt;
 }
 
-//#define UNALIGNED_LOADSTORE
-#define BUILTIN_MEMCPY
-#ifdef UNALIGNED_LOADSTORE
-#define COPY2(d, s) *(uint16_t *)(d) = *(uint16_t *)(s);
-#define COPY4(d, s) *(uint32_t *)(d) = *(uint32_t *)(s);
-#elif defined(BUILTIN_MEMCPY)
-#define COPY2(d, s) memcpy(d, s, 2);
-#define COPY4(d, s) memcpy(d, s, 4);
-#else
-#define COPY2(d, s) (d)[0] = (s)[0]; (d)[1] = (s)[1];
-#define COPY4(d, s) (d)[0] = (s)[0]; (d)[1] = (s)[1]; (d)[2] = (s)[2]; (d)[3] = (s)[3];
-#endif
-
 /**
  * @brief Copies bytes from input to output buffer with checking.
  * @param cnt number of bytes to copy, must be >= 0
@@ -101,7 +89,7 @@ static inline void copy(LZOContext *c, int cnt)
         c->error |= AV_LZO_OUTPUT_FULL;
     }
 #if defined(INBUF_PADDED) && defined(OUTBUF_PADDED)
-    COPY4(dst, src);
+    AV_COPY32U(dst, src);
     src += 4;
     dst += 4;
     cnt -= 4;
@@ -145,16 +133,16 @@ static inline void memcpy_backptr(uint8_t *dst, int back, int cnt)
         memset(dst, *src, cnt);
     } else {
 #ifdef OUTBUF_PADDED
-        COPY2(dst, src);
-        COPY2(dst + 2, src + 2);
+        AV_COPY16U(dst,     src);
+        AV_COPY16U(dst + 2, src + 2);
         src += 4;
         dst += 4;
         cnt -= 4;
         if (cnt > 0) {
-            COPY2(dst, src);
-            COPY2(dst + 2, src + 2);
-            COPY2(dst + 4, src + 4);
-            COPY2(dst + 6, src + 6);
+            AV_COPY16U(dst,     src);
+            AV_COPY16U(dst + 2, src + 2);
+            AV_COPY16U(dst + 4, src + 4);
+            AV_COPY16U(dst + 6, src + 6);
             src += 8;
             dst += 8;
             cnt -= 8;