diff --git a/configure b/configure
index 04a2ea3404e97777d5fcc52e09b5af038319e838..2131b218d771e08ba654b3cccdfa1141e05936a7 100755
--- a/configure
+++ b/configure
@@ -129,6 +129,7 @@ Component options:
   --disable-dct            disable DCT code
   --disable-dwt            disable DWT code
   --disable-lsp            disable LSP code
+  --disable-lzo            disable LZO decoder code
   --disable-mdct           disable MDCT code
   --disable-rdft           disable RDFT code
   --disable-fft            disable FFT code
@@ -1170,6 +1171,7 @@ CONFIG_LIST="
     libxavs
     libxvid
     lsp
+    lzo
     mdct
     memalign_hack
     memory_poisoning
@@ -1620,6 +1622,7 @@ binkaudio_dct_decoder_select="mdct rdft dct sinewin"
 binkaudio_rdft_decoder_select="mdct rdft sinewin"
 cavs_decoder_select="golomb mpegvideo"
 cook_decoder_select="mdct sinewin"
+cscd_decoder_select="lzo"
 cscd_decoder_suggest="zlib"
 dca_decoder_select="mdct"
 dirac_decoder_select="dwt golomb"
@@ -1710,6 +1713,7 @@ msmpeg4v3_encoder_select="h263_encoder"
 mss2_decoder_select="vc1_decoder"
 nellymoser_decoder_select="mdct sinewin"
 nellymoser_encoder_select="mdct sinewin"
+nuv_decoder_select="lzo"
 png_decoder_select="zlib"
 png_encoder_select="zlib"
 qcelp_decoder_select="lsp"
@@ -1842,7 +1846,7 @@ ipod_muxer_select="mov_muxer"
 libnut_demuxer_deps="libnut"
 libnut_muxer_deps="libnut"
 matroska_audio_muxer_select="matroska_muxer"
-matroska_demuxer_suggest="zlib bzlib"
+matroska_demuxer_suggest="bzlib lzo zlib"
 mov_demuxer_suggest="zlib"
 mp3_demuxer_select="mpegaudio_parser"
 mp4_muxer_select="mov_muxer"
@@ -2740,21 +2744,13 @@ case "$arch" in
     arm*|iPad*)
         arch="arm"
     ;;
-    mips|mipsel|IP*)
+    mips*|IP*)
         arch="mips"
     ;;
-    mips64*)
-        arch="mips"
-        subarch="mips64"
-    ;;
-    parisc|hppa)
-        arch="parisc"
-    ;;
-    parisc64|hppa64)
+    parisc*|hppa*)
         arch="parisc"
-        subarch="parisc64"
     ;;
-    "Power Macintosh"|ppc|powerpc|ppc64|powerpc64)
+    "Power Macintosh"|ppc*|powerpc*)
         arch="ppc"
     ;;
     s390|s390x)
@@ -2974,13 +2970,31 @@ EOF
 check_host_cflags -std=c99
 check_host_cflags -Wall
 
+check_64bit(){
+    arch32=$1
+    arch64=$2
+    expr=$3
+    check_code cc "" "int test[2*($expr) - 1]" &&
+        subarch=$arch64 || subarch=$arch32
+}
+
 case "$arch" in
-    alpha|ia64|mips|parisc|sparc)
+    alpha|ia64|sparc)
         spic=$shared
     ;;
+    mips)
+        check_64bit mips mips64 '_MIPS_SIM > 1'
+        spic=$shared
+    ;;
+    parisc)
+        check_64bit parisc parisc64 'sizeof(void *) > 4'
+        spic=$shared
+    ;;
+    ppc)
+        check_64bit ppc ppc64 'sizeof(void *) > 4'
+    ;;
     x86)
-        subarch="x86_32"
-        check_code cc "" "int test[(int)sizeof(char*) - 7]" && subarch="x86_64"
+        check_64bit x86_32 x86_64 'sizeof(void *) > 4'
         if test "$subarch" = "x86_64"; then
             spic=$shared
         fi
diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c
index 42c37c831f3907a88066322b61d6f2b677b93f96..548a649fa11beaf47b7254d59f241c1a74c9da4f 100644
--- a/libavcodec/dfa.c
+++ b/libavcodec/dfa.c
@@ -25,7 +25,7 @@
 #include "bytestream.h"
 
 #include "libavutil/imgutils.h"
-#include "libavutil/lzo.h" // for av_memcpy_backptr
+#include "libavutil/mem.h"
 
 typedef struct DfaContext {
     AVFrame pic;
diff --git a/libavcodec/eatgv.c b/libavcodec/eatgv.c
index c986c14e64d7e01fff76a900bb2cd290ba514067..caee311cda959b6a4ec534985c6718f4946601ea 100644
--- a/libavcodec/eatgv.c
+++ b/libavcodec/eatgv.c
@@ -31,8 +31,8 @@
 #include "avcodec.h"
 #define BITSTREAM_READER_LE
 #include "get_bits.h"
-#include "libavutil/lzo.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
 
 #define EA_PREAMBLE_SIZE    8
 #define kVGT_TAG MKTAG('k', 'V', 'G', 'T')
diff --git a/libavcodec/g723_1.c b/libavcodec/g723_1.c
index ae1707d155c1f5953862679a8171061295665cd6..a74c74da616b5d4115ca843547740aa17d087c2d 100644
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@@ -27,7 +27,7 @@
 
 #define BITSTREAM_READER_LE
 #include "libavutil/audioconvert.h"
-#include "libavutil/lzo.h"
+#include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "internal.h"
diff --git a/libavcodec/lcldec.c b/libavcodec/lcldec.c
index 475ffdd55af275b9ab471f3af9068c242bce2168..b3b7b523f67a1478b7f4128419aed86e540865aa 100644
--- a/libavcodec/lcldec.c
+++ b/libavcodec/lcldec.c
@@ -41,10 +41,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include "libavutil/mem.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "lcl.h"
-#include "libavutil/lzo.h"
 
 #if CONFIG_ZLIB_DECODER
 #include <zlib.h>
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 480c9edd502a343de3dfedf4cdfc910c9ffd6e39..9504dab3f1cf958e279fd4329cd226a4d12199b5 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -29,6 +29,7 @@
 
 #include <math.h>
 
+#include "libavutil/mem.h"
 #include "dsputil.h"
 #include "avcodec.h"
 #include "get_bits.h"
@@ -38,7 +39,6 @@
 #include "acelp_vectors.h"
 #include "acelp_filters.h"
 #include "lsp.h"
-#include "libavutil/lzo.h"
 #include "dct.h"
 #include "rdft.h"
 #include "sinewin.h"
diff --git a/libavcodec/xan.c b/libavcodec/xan.c
index f11f1bf0625d50029b27d3c2100728ddb9570e98..25e62e66a255467c11cffbfba33ddf13f46dd98d 100644
--- a/libavcodec/xan.c
+++ b/libavcodec/xan.c
@@ -33,12 +33,11 @@
 #include <string.h>
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #define BITSTREAM_READER_LE
 #include "get_bits.h"
-// for av_memcpy_backptr
-#include "libavutil/lzo.h"
 
 #define RUNTIME_GAMMA 0
 
diff --git a/libavcodec/xxan.c b/libavcodec/xxan.c
index 7a7b5ba277d620f9ff2c74439229816b6af32361..de1ea7f0e4b8e2824b0e304de614971505b1044c 100644
--- a/libavcodec/xxan.c
+++ b/libavcodec/xxan.c
@@ -22,11 +22,10 @@
 
 #include "avcodec.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
 #include "bytestream.h"
 #define BITSTREAM_READER_LE
 #include "get_bits.h"
-// for av_memcpy_backptr
-#include "libavutil/lzo.h"
 
 typedef struct XanContext {
     AVCodecContext *avctx;
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index e23d00c4f9da07515033de01e50ca5d0b0ca1d8a..c4a590b27ffff762b7407adcec01ea86e9e8cb70 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1078,6 +1078,7 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
         memcpy(pkt_data + header_size, data, isize);
         break;
     }
+#if CONFIG_LZO
     case MATROSKA_TRACK_ENCODING_COMP_LZO:
         do {
             olen = pkt_size *= 3;
@@ -1095,6 +1096,7 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
         }
         pkt_size -= olen;
         break;
+#endif
 #if CONFIG_ZLIB
     case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
         z_stream zstream = {0};
@@ -1548,14 +1550,17 @@ static int matroska_read_header(AVFormatContext *s)
                    "Multiple combined encodings not supported");
         } else if (encodings_list->nb_elem == 1) {
             if (encodings[0].type ||
-                (encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP &&
+                (
 #if CONFIG_ZLIB
                  encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
 #endif
 #if CONFIG_BZLIB
                  encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB &&
 #endif
-                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO)) {
+#if CONFIG_LZO
+                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO &&
+#endif
+                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP)) {
                 encodings[0].scope = 0;
                 av_log(matroska->ctx, AV_LOG_ERROR,
                        "Unsupported encoding type");
diff --git a/libavformat/smoothstreamingenc.c b/libavformat/smoothstreamingenc.c
index df7522ee1d5a93d2836146aceeaf6caf7d1ed725..1104d5853f07c3748515f560510449bdf239890d 100644
--- a/libavformat/smoothstreamingenc.c
+++ b/libavformat/smoothstreamingenc.c
@@ -559,12 +559,15 @@ static int ism_write_packet(AVFormatContext *s, AVPacket *pkt)
     SmoothStreamingContext *c = s->priv_data;
     AVStream *st = s->streams[pkt->stream_index];
     OutputStream *os = &c->streams[pkt->stream_index];
-    int64_t end_pts = (c->nb_fragments + 1) * c->min_frag_duration;
+    int64_t end_dts = (c->nb_fragments + 1) * c->min_frag_duration;
     int ret;
 
+    if (st->first_dts == AV_NOPTS_VALUE)
+        st->first_dts = pkt->dts;
+
     if ((!c->has_video || st->codec->codec_type == AVMEDIA_TYPE_VIDEO) &&
-        av_compare_ts(pkt->pts, st->time_base,
-                      end_pts, AV_TIME_BASE_Q) >= 0 &&
+        av_compare_ts(pkt->dts - st->first_dts, st->time_base,
+                      end_dts, AV_TIME_BASE_Q) >= 0 &&
         pkt->flags & AV_PKT_FLAG_KEY && os->packets_written) {
 
         if ((ret = ism_flush(s, 0)) < 0)
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 3a6f7313ed5f8d21bc6f0d8726dcf53fada5e224..88a978f96facfe0f010240e63ec37936d244b846 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -27,7 +27,6 @@ HEADERS = adler32.h                                                     \
           intreadwrite.h                                                \
           lfg.h                                                         \
           log.h                                                         \
-          lzo.h                                                         \
           mathematics.h                                                 \
           md5.h                                                         \
           mem.h                                                         \
@@ -47,6 +46,8 @@ HEADERS = adler32.h                                                     \
           version.h                                                     \
           xtea.h                                                        \
 
+HEADERS-$(CONFIG_LZO)                   += lzo.h
+
 ARCH_HEADERS = bswap.h                                                  \
                intmath.h                                                \
                intreadwrite.h                                           \
@@ -77,7 +78,6 @@ OBJS = adler32.o                                                        \
        log.o                                                            \
        log2.o                                                           \
        log2_tab.o                                                       \
-       lzo.o                                                            \
        mathematics.o                                                    \
        md5.o                                                            \
        mem.o                                                            \
@@ -97,6 +97,7 @@ OBJS = adler32.o                                                        \
        xga_font_data.o                                                  \
        xtea.o                                                           \
 
+OBJS-$(CONFIG_LZO)                      += lzo.o
 
 OBJS += $(COMPAT_OBJS:%=../compat/%)
 
diff --git a/libavutil/lzo.c b/libavutil/lzo.c
index 47fc767a0640ed0e4618e50d37bbb459070c27bb..c7232572122bd386b9185d648152f8bcf16c5a07 100644
--- a/libavutil/lzo.c
+++ b/libavutil/lzo.c
@@ -100,8 +100,6 @@ static inline void copy(LZOContext *c, int cnt)
     c->out = dst + cnt;
 }
 
-static inline void memcpy_backptr(uint8_t *dst, int back, int cnt);
-
 /**
  * @brief Copies previously decoded bytes to current position.
  * @param back how many bytes back we start, must be > 0
@@ -122,50 +120,10 @@ static inline void copy_backptr(LZOContext *c, int back, int cnt)
         cnt       = FFMAX(c->out_end - dst, 0);
         c->error |= AV_LZO_OUTPUT_FULL;
     }
-    memcpy_backptr(dst, back, cnt);
+    av_memcpy_backptr(dst, back, cnt);
     c->out = dst + cnt;
 }
 
-static inline void memcpy_backptr(uint8_t *dst, int back, int cnt)
-{
-    const uint8_t *src = &dst[-back];
-    if (back <= 1) {
-        memset(dst, *src, cnt);
-    } else {
-        if (cnt >= 4) {
-            AV_COPY16U(dst,     src);
-            AV_COPY16U(dst + 2, src + 2);
-            src += 4;
-            dst += 4;
-            cnt -= 4;
-        }
-        if (cnt >= 8) {
-            AV_COPY16U(dst,     src);
-            AV_COPY16U(dst + 2, src + 2);
-            AV_COPY16U(dst + 4, src + 4);
-            AV_COPY16U(dst + 6, src + 6);
-            src += 8;
-            dst += 8;
-            cnt -= 8;
-        }
-        if (cnt > 0) {
-            int blocklen = back;
-            while (cnt > blocklen) {
-                memcpy(dst, src, blocklen);
-                dst       += blocklen;
-                cnt       -= blocklen;
-                blocklen <<= 1;
-            }
-            memcpy(dst, src, cnt);
-        }
-    }
-}
-
-void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
-{
-    memcpy_backptr(dst, back, cnt);
-}
-
 int av_lzo1x_decode(void *out, int *outlen, const void *in, int *inlen)
 {
     int state = 0;
diff --git a/libavutil/lzo.h b/libavutil/lzo.h
index 60b7065b158b53e1720300ecd45e5f1bb8d024cc..c03403992d5f68e7fa8e0e1240522366bb99eee4 100644
--- a/libavutil/lzo.h
+++ b/libavutil/lzo.h
@@ -59,17 +59,6 @@
  */
 int av_lzo1x_decode(void *out, int *outlen, const void *in, int *inlen);
 
-/**
- * @brief deliberately overlapping memcpy implementation
- * @param dst destination buffer
- * @param back how many bytes back we start (the initial size of the overlapping window), must be > 0
- * @param cnt number of bytes to copy, must be >= 0
- *
- * cnt > back is valid, this will copy the bytes we just copied,
- * thus creating a repeating pattern with a period length of back.
- */
-void av_memcpy_backptr(uint8_t *dst, int back, int cnt);
-
 /**
  * @}
  */
diff --git a/libavutil/mem.c b/libavutil/mem.c
index 0c1c4b25a0d5420e46eda3d59246420e5ee44476..f3853b08270862dca203268ab6cd9b3c2c23de06 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -29,6 +29,7 @@
 #include "config.h"
 
 #include <limits.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #if HAVE_MALLOC_H
@@ -36,6 +37,7 @@
 #endif
 
 #include "avutil.h"
+#include "intreadwrite.h"
 #include "mem.h"
 
 /* here we can use OS-dependent allocation functions */
@@ -244,3 +246,38 @@ void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem)
     *nb_ptr = nb;
 }
 
+void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
+{
+    const uint8_t *src = &dst[-back];
+    if (back <= 1) {
+        memset(dst, *src, cnt);
+    } else {
+        if (cnt >= 4) {
+            AV_COPY16U(dst,     src);
+            AV_COPY16U(dst + 2, src + 2);
+            src += 4;
+            dst += 4;
+            cnt -= 4;
+        }
+        if (cnt >= 8) {
+            AV_COPY16U(dst,     src);
+            AV_COPY16U(dst + 2, src + 2);
+            AV_COPY16U(dst + 4, src + 4);
+            AV_COPY16U(dst + 6, src + 6);
+            src += 8;
+            dst += 8;
+            cnt -= 8;
+        }
+        if (cnt > 0) {
+            int blocklen = back;
+            while (cnt > blocklen) {
+                memcpy(dst, src, blocklen);
+                dst       += blocklen;
+                cnt       -= blocklen;
+                blocklen <<= 1;
+            }
+            memcpy(dst, src, cnt);
+        }
+    }
+}
+
diff --git a/libavutil/mem.h b/libavutil/mem.h
index c2f011552db23bdeaaa33763407e54e0f64a0543..ced9453869d401e5b3e1c50fdc7690686cf53667 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -27,6 +27,7 @@
 #define AVUTIL_MEM_H
 
 #include <limits.h>
+#include <stdint.h>
 
 #include "attributes.h"
 #include "error.h"
@@ -216,6 +217,17 @@ static inline int av_size_mult(size_t a, size_t b, size_t *r)
  */
 void av_max_alloc(size_t max);
 
+/**
+ * @brief deliberately overlapping memcpy implementation
+ * @param dst destination buffer
+ * @param back how many bytes back we start (the initial size of the overlapping window), must be > 0
+ * @param cnt number of bytes to copy, must be >= 0
+ *
+ * cnt > back is valid, this will copy the bytes we just copied,
+ * thus creating a repeating pattern with a period length of back.
+ */
+void av_memcpy_backptr(uint8_t *dst, int back, int cnt);
+
 /**
  * @}
  */