diff --git a/Changelog b/Changelog
index b9b9d0e3d6446ee30018e5ca104e12dce27a7db7..4f97ab851e505a6cacce296553dc2663f32231ce 100644
--- a/Changelog
+++ b/Changelog
@@ -53,6 +53,7 @@ version next:
   -pass and -passlogfile are now per-output stream
 - volume measurement filter
 - Ut Video encoder
+- Microsoft Screen 2 decoder
 - Matroska demuxer now identifies SRT subtitles as AV_CODEC_ID_SUBRIP
   instead of AV_CODEC_ID_TEXT
 - smartblur filter ported from MPlayer
diff --git a/configure b/configure
index b14586b4c3fc2fbcf9a9d6b65880a6ef24ef898b..b2fc4bc14124fc2080adb91efa56a2f74b136891 100755
--- a/configure
+++ b/configure
@@ -419,6 +419,12 @@ map(){
     for v; do eval $m; done
 }
 
+add_suffix(){
+    suffix=$1
+    shift
+    for v; do echo ${v}${suffix}; done
+}
+
 set_all(){
     value=$1
     shift
@@ -1246,6 +1252,8 @@ HAVE_LIST_PUB='
 
 HAVE_LIST="
     $ARCH_EXT_LIST
+    $(add_suffix _external $ARCH_EXT_LIST)
+    $(add_suffix _inline   $ARCH_EXT_LIST)
     $HAVE_LIST_PUB
     $THREADS_LIST
     aligned_malloc
@@ -1524,6 +1532,17 @@ sse42_deps="sse4"
 avx_deps="sse42"
 fma4_deps="avx"
 
+mmx_external_deps="yasm"
+mmx_inline_deps="inline_asm"
+mmx_suggest="mmx_external mmx_inline"
+
+for ext in $(filter_out mmx $ARCH_EXT_LIST_X86); do
+    eval dep=\$${ext}_deps
+    eval ${ext}_external_deps='"${dep}_external"'
+    eval ${ext}_inline_deps='"${dep}_inline"'
+    eval ${ext}_suggest='"${ext}_external ${ext}_inline"'
+done
+
 aligned_stack_if_any="ppc x86"
 fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
 fast_clz_if_any="alpha armv5te avr32 mips ppc x86"
@@ -1645,6 +1664,7 @@ msmpeg4v2_decoder_select="h263_decoder"
 msmpeg4v2_encoder_select="h263_encoder"
 msmpeg4v3_decoder_select="h263_decoder"
 msmpeg4v3_encoder_select="h263_encoder"
+mss2_decoder_select="vc1_decoder"
 nellymoser_decoder_select="mdct sinewin"
 nellymoser_encoder_select="mdct sinewin"
 png_decoder_select="zlib"
@@ -3293,8 +3313,8 @@ EOF
     check_inline_asm xmm_clobbers '"":::"%xmm0"'
 
     # check whether binutils is new enough to compile SSSE3/MMXEXT
-    enabled ssse3  && check_inline_asm ssse3  '"pabsw %xmm0, %xmm0"'
-    enabled mmxext && check_inline_asm mmxext '"pmaxub %mm0, %mm1"'
+    enabled ssse3  && check_inline_asm ssse3_inline  '"pabsw %xmm0, %xmm0"'
+    enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"'
 
     if ! disabled_any asm mmx yasm; then
         if check_cmd $yasmexe --version; then
@@ -3315,8 +3335,8 @@ EOF
 
         check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
             die "yasm not found, use --disable-yasm for a crippled build"
-        check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
-        check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
+        check_yasm "vextractf128 xmm0, ymm0, 0"      || disable avx_external
+        check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4_external
         check_yasm "CPU amdnop" && enable cpunop
     fi
 
@@ -3808,13 +3828,14 @@ enabled broken_strtod && force_include compat/strtod.h
 
 enabled_any $THREADS_LIST      && enable threads
 
+enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
+
 check_deps $CONFIG_LIST       \
            $CONFIG_EXTRA      \
            $HAVE_LIST         \
            $ALL_COMPONENTS    \
            $ALL_TESTS         \
 
-enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
 
 if test $target_os = "haiku"; then
     disable memalign
diff --git a/doc/general.texi b/doc/general.texi
index 301c98cc882bf2c6a2578ac8c2cede47aebac694..ef9cf729c7448b8d9f6d569a9d5e023461c6874b 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -592,6 +592,8 @@ following image formats are supported:
 @item Microsoft RLE          @tab     @tab  X
 @item Microsoft Screen 1     @tab     @tab  X
     @tab Also known as Windows Media Video V7 Screen.
+@item Microsoft Screen 2     @tab     @tab  X
+    @tab Also known as Windows Media Video V9 Screen.
 @item Microsoft Video 1      @tab     @tab  X
 @item Mimic                  @tab     @tab  X
     @tab Used in MSN Messenger Webcam streams.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index e9b4806ed1531e33aff215b69c7524ee1e4bd566..1fadb3f23588e314ca6a4b41c2ff561235a725b6 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -309,6 +309,7 @@ OBJS-$(CONFIG_MSMPEG4V3_ENCODER)       += msmpeg4.o msmpeg4enc.o msmpeg4data.o \
 OBJS-$(CONFIG_MSRLE_DECODER)           += msrle.o msrledec.o
 OBJS-$(CONFIG_MSA1_DECODER)            += mss3.o mss34dsp.o
 OBJS-$(CONFIG_MSS1_DECODER)            += mss1.o mss12.o
+OBJS-$(CONFIG_MSS2_DECODER)            += mss2.o mss12.o mss2dsp.o
 OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
 OBJS-$(CONFIG_MSVIDEO1_ENCODER)        += msvideo1enc.o elbg.o
 OBJS-$(CONFIG_MSZH_DECODER)            += lcldec.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index d0cec39979215362c2c076f5945a2a017ce12f13..8806c6a5989ab381f43a72bf901492e891c653bc 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -177,6 +177,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC  (MSMPEG4V3, msmpeg4v3);
     REGISTER_DECODER (MSRLE, msrle);
     REGISTER_DECODER (MSS1, mss1);
+    REGISTER_DECODER (MSS2, mss2);
     REGISTER_ENCDEC  (MSVIDEO1, msvideo1);
     REGISTER_DECODER (MSZH, mszh);
     REGISTER_DECODER (MTS2, mts2);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index cea123ddd5897556234a737f4be9edbc11dbbc77..7ae6717e5eb1a498b048ae4d4706c7324c6e3602 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -266,6 +266,7 @@ enum AVCodecID {
     AV_CODEC_ID_TSCC2,
     AV_CODEC_ID_MTS2,
     AV_CODEC_ID_CLLC,
+    AV_CODEC_ID_MSS2,
     AV_CODEC_ID_Y41P       = MKBETAG('Y','4','1','P'),
     AV_CODEC_ID_ESCAPE130  = MKBETAG('E','1','3','0'),
     AV_CODEC_ID_EXR        = MKBETAG('0','E','X','R'),
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 15701744b28904945fb8e559c3000c6247435a56..b4e72a1fe93ba6342b32cdda6664141b57d196cb 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1205,6 +1205,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Canopus Lossless Codec"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_MSS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mss2",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
     {
         .id        = AV_CODEC_ID_Y41P,
         .type      = AVMEDIA_TYPE_VIDEO,
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index e37b23c59e04c7620d56eace98cf5a8aed36e700..b44b68bfe9b6aaf0232fc4383fefa486fc852e4f 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -85,7 +85,7 @@ static const struct algo fdct_tab[] = {
     { "IJG-AAN-INT",    ff_fdct_ifast,         SCALE_PERM },
     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
 
-#if HAVE_MMX && HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
     { "MMXEXT",         ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMXEXT  },
     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
@@ -125,7 +125,7 @@ static const struct algo idct_tab[] = {
     { "INT",            ff_j_rev_dct,          MMX_PERM },
     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
 
-#if HAVE_MMX && HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
 #if CONFIG_GPL
     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 },
     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 },
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 3566c11414bb783b0e305766546518366896d4da..3acb6c2b905142bf835959f2978e8cef39412b69 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -60,7 +60,10 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
     s->quant_precision=5;
     s->decode_mb= ff_h263_decode_mb;
     s->low_delay= 1;
-    avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
+    if (avctx->codec->id == AV_CODEC_ID_MSS2)
+        avctx->pix_fmt = PIX_FMT_YUV420P;
+    else
+        avctx->pix_fmt = avctx->get_format(avctx, avctx->codec->pix_fmts);
     s->unrestricted_mv= 1;
 
     /* select sub codec */
@@ -96,6 +99,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
     case AV_CODEC_ID_WMV3:
     case AV_CODEC_ID_VC1IMAGE:
     case AV_CODEC_ID_WMV3IMAGE:
+    case AV_CODEC_ID_MSS2:
         s->h263_pred = 1;
         s->msmpeg4_version=6;
         avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 39f0fd56e193f56f61a1a08b5f403fff226e00a6..9d867354b7613b8e17f2702d35326bc8eeceb1df 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -39,7 +39,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
 
-#if HAVE_MMX && HAVE_YASM
+#if HAVE_MMX_EXTERNAL
 #include "x86/dsputil_mmx.h"
 #endif
 
@@ -48,7 +48,7 @@
 #define FF_COLOR_YUV      2 /**< YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */
 #define FF_COLOR_YUV_JPEG 3 /**< YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */
 
-#if HAVE_MMX && HAVE_YASM
+#if HAVE_MMX_EXTERNAL
 #define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
 #define deinterlace_line         ff_deinterlace_line_mmx
 #else
@@ -815,7 +815,7 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
     return 0;
 }
 
-#if !(HAVE_MMX && HAVE_YASM)
+#if !HAVE_MMX_EXTERNAL
 /* filter parameters: [-1 4 2 4 -1] // 8 */
 static void deinterlace_line_c(uint8_t *dst,
                              const uint8_t *lum_m4, const uint8_t *lum_m3,
@@ -864,7 +864,7 @@ static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
         lum++;
     }
 }
-#endif
+#endif /* !HAVE_MMX_EXTERNAL */
 
 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
    top field is copied as is, but the bottom field is deinterlaced
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 77108cf8343fa5d729d3b3ebe5562f747b0d56b5..6fe2303c03c1c1fefb688a620cdaeb078fd81dd8 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -226,10 +226,11 @@ void ff_copy_picture(Picture *dst, Picture *src)
  */
 static void free_frame_buffer(MpegEncContext *s, Picture *pic)
 {
-    /* Windows Media Image codecs allocate internal buffers with different
-     * dimensions; ignore user defined callbacks for these
-     */
-    if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && s->codec_id != AV_CODEC_ID_VC1IMAGE)
+    /* WM Image / Screen codecs allocate internal buffers with different
+     * dimensions / colorspaces; ignore user-defined callbacks for these. */
+    if (s->codec_id != AV_CODEC_ID_WMV3IMAGE &&
+        s->codec_id != AV_CODEC_ID_VC1IMAGE  &&
+        s->codec_id != AV_CODEC_ID_MSS2)
         ff_thread_release_buffer(s->avctx, &pic->f);
     else
         avcodec_default_release_buffer(s->avctx, &pic->f);
@@ -254,7 +255,9 @@ static int alloc_frame_buffer(MpegEncContext *s, Picture *pic)
         }
     }
 
-    if (s->codec_id != AV_CODEC_ID_WMV3IMAGE && s->codec_id != AV_CODEC_ID_VC1IMAGE)
+    if (s->codec_id != AV_CODEC_ID_WMV3IMAGE &&
+        s->codec_id != AV_CODEC_ID_VC1IMAGE  &&
+        s->codec_id != AV_CODEC_ID_MSS2)
         r = ff_thread_get_buffer(s->avctx, &pic->f);
     else
         r = avcodec_default_get_buffer(s->avctx, &pic->f);
diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c
index 7b4dbbbaa963fd410e27b5d0ca17b1bfebc7a64e..d622e2ca61eab4d0f451e0ff80f596acbfedf56a 100644
--- a/libavcodec/mss1.c
+++ b/libavcodec/mss1.c
@@ -24,14 +24,13 @@
  * Microsoft Screen 1 (aka Windows Media Video V7 Screen) decoder
  */
 
-#include "libavutil/intfloat.h"
-#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "mss12.h"
 
 typedef struct MSS1Context {
     MSS12Context   ctx;
     AVFrame        pic;
+    SliceContext   sc[2];
 } MSS1Context;
 
 static void arith_normalise(ArithCoder *c)
@@ -56,24 +55,11 @@ static void arith_normalise(ArithCoder *c)
         c->low   <<= 1;
         c->high  <<= 1;
         c->high   |= 1;
-        c->value  |= get_bits1(c->gb);
+        c->value  |= get_bits1(c->gbc.gb);
     }
 }
 
-static int arith_get_bit(ArithCoder *c)
-{
-    int range = c->high - c->low + 1;
-    int bit   = (((c->value - c->low) << 1) + 1) / range;
-
-    if (bit)
-        c->low += range >> 1;
-    else
-        c->high = c->low + (range >> 1) - 1;
-
-    arith_normalise(c);
-
-    return bit;
-}
+ARITH_GET_BIT()
 
 static int arith_get_bits(ArithCoder *c, int bits)
 {
@@ -118,40 +104,27 @@ static int arith_get_prob(ArithCoder *c, int *probs)
     return sym;
 }
 
-static int arith_get_model_sym(ArithCoder *c, Model *m)
-{
-    int idx, val;
-
-    idx = arith_get_prob(c, m->cum_prob);
-
-    val = m->idx2sym[idx];
-    ff_mss12_model_update(m, idx);
-
-    arith_normalise(c);
-
-    return val;
-}
+ARITH_GET_MODEL_SYM()
 
 static void arith_init(ArithCoder *c, GetBitContext *gb)
 {
-    c->low   = 0;
-    c->high  = 0xFFFF;
-    c->value = get_bits(gb, 16);
-    c->gb    = gb;
-
+    c->low           = 0;
+    c->high          = 0xFFFF;
+    c->value         = get_bits(gb, 16);
+    c->gbc.gb        = gb;
     c->get_model_sym = arith_get_model_sym;
     c->get_number    = arith_get_number;
 }
 
-static int decode_pal(MSS1Context *ctx, ArithCoder *acoder)
+static int decode_pal(MSS12Context *ctx, ArithCoder *acoder)
 {
     int i, ncol, r, g, b;
-    uint32_t *pal = ctx->ctx.pal + 256 - ctx->ctx.free_colours;
+    uint32_t *pal = ctx->pal + 256 - ctx->free_colours;
 
-    if (!ctx->ctx.free_colours)
+    if (!ctx->free_colours)
         return 0;
 
-    ncol = arith_get_number(acoder, ctx->ctx.free_colours + 1);
+    ncol = arith_get_number(acoder, ctx->free_colours + 1);
     for (i = 0; i < ncol; i++) {
         r = arith_get_bits(acoder, 8);
         g = arith_get_bits(acoder, 8);
@@ -167,7 +140,8 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
 {
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
-    MSS1Context *c = avctx->priv_data;
+    MSS1Context *ctx = avctx->priv_data;
+    MSS12Context *c = &ctx->ctx;
     GetBitContext gb;
     ArithCoder acoder;
     int pal_changed = 0;
@@ -176,37 +150,37 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     init_get_bits(&gb, buf, buf_size * 8);
     arith_init(&acoder, &gb);
 
-    c->pic.reference    = 3;
-    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE |
-                          FF_BUFFER_HINTS_REUSABLE;
-    if ((ret = avctx->reget_buffer(avctx, &c->pic)) < 0) {
+    ctx->pic.reference    = 3;
+    ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_READABLE |
+                            FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
+    if ((ret = avctx->reget_buffer(avctx, &ctx->pic)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
         return ret;
     }
 
-    c->ctx.pic_start  = c->pic.data[0] + c->pic.linesize[0] * (avctx->height - 1);
-    c->ctx.pic_stride = -c->pic.linesize[0];
-    c->ctx.keyframe   = !arith_get_bit(&acoder);
-    if (c->ctx.keyframe) {
-        ff_mss12_codec_reset(&c->ctx);
-        pal_changed      = decode_pal(c, &acoder);
-        c->pic.key_frame = 1;
-        c->pic.pict_type = AV_PICTURE_TYPE_I;
+    c->pal_pic    =  ctx->pic.data[0] + ctx->pic.linesize[0] * (avctx->height - 1);
+    c->pal_stride = -ctx->pic.linesize[0];
+    c->keyframe   = !arith_get_bit(&acoder);
+    if (c->keyframe) {
+        ff_mss12_codec_reset(c);
+        pal_changed        = decode_pal(c, &acoder);
+        ctx->pic.key_frame = 1;
+        ctx->pic.pict_type = AV_PICTURE_TYPE_I;
     } else {
-        if (c->ctx.corrupted)
+        if (c->corrupted)
             return AVERROR_INVALIDDATA;
-        c->pic.key_frame = 0;
-        c->pic.pict_type = AV_PICTURE_TYPE_P;
+        ctx->pic.key_frame = 0;
+        ctx->pic.pict_type = AV_PICTURE_TYPE_P;
     }
-    c->ctx.corrupted = ff_mss12_decode_rect(&c->ctx, &acoder, 0, 0,
-                                            avctx->width, avctx->height);
-    if (c->ctx.corrupted)
+    c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0,
+                                        avctx->width, avctx->height);
+    if (c->corrupted)
         return AVERROR_INVALIDDATA;
-    memcpy(c->pic.data[1], c->ctx.pal, AVPALETTE_SIZE);
-    c->pic.palette_has_changed = pal_changed;
+    memcpy(ctx->pic.data[1], c->pal, AVPALETTE_SIZE);
+    ctx->pic.palette_has_changed = pal_changed;
 
     *data_size = sizeof(AVFrame);
-    *(AVFrame*)data = c->pic;
+    *(AVFrame*)data = ctx->pic;
 
     /* always report that the buffer was completely consumed */
     return buf_size;
@@ -215,20 +189,25 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
 static av_cold int mss1_decode_init(AVCodecContext *avctx)
 {
     MSS1Context * const c = avctx->priv_data;
+    int ret;
 
     c->ctx.avctx       = avctx;
     avctx->coded_frame = &c->pic;
 
-    return ff_mss12_decode_init(avctx, 0);
+    ret = ff_mss12_decode_init(&c->ctx, 0);
+
+    avctx->pix_fmt = PIX_FMT_PAL8;
+
+    return ret;
 }
 
 static av_cold int mss1_decode_end(AVCodecContext *avctx)
 {
-    MSS1Context * const c = avctx->priv_data;
+    MSS1Context * const ctx = avctx->priv_data;
 
-    if (c->pic.data[0])
-        avctx->release_buffer(avctx, &c->pic);
-    ff_mss12_decode_end(avctx);
+    if (ctx->pic.data[0])
+        avctx->release_buffer(avctx, &ctx->pic);
+    ff_mss12_decode_end(&ctx->ctx);
 
     return 0;
 }
diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c
index 38291d910cfcaf407e448e3953308961865b5c09..92834708844f7d593a92f3c1a018ca1af574aefd 100644
--- a/libavcodec/mss12.c
+++ b/libavcodec/mss12.c
@@ -47,12 +47,8 @@ static int model_calc_threshold(Model *m)
 {
     int thr;
 
-    if (m->thr_weight == -1) {
-        thr = 2 * m->weights[m->num_syms] - 1;
-        thr = ((thr >> 1) + 4 * m->cum_prob[0]) / thr;
-    } else {
-        thr = m->num_syms * m->thr_weight;
-    }
+    thr = 2 * m->weights[m->num_syms] - 1;
+    thr = ((thr >> 1) + 4 * m->cum_prob[0]) / thr;
 
     return FFMIN(thr, 0x3FFF);
 }
@@ -78,7 +74,7 @@ static av_cold void model_init(Model *m, int num_syms, int thr_weight)
 {
     m->num_syms   = num_syms;
     m->thr_weight = thr_weight;
-    m->threshold  = model_calc_threshold(m);
+    m->threshold  = num_syms * thr_weight;
     model_reset(m);
 }
 
@@ -87,7 +83,7 @@ static void model_rescale_weights(Model *m)
     int i;
     int cum_prob;
 
-    if (m->thr_weight == -1)
+    if (m->thr_weight == THRESH_ADAPTIVE)
         m->threshold = model_calc_threshold(m);
     while (m->cum_prob[0] > m->threshold) {
         cum_prob = 0;
@@ -129,8 +125,14 @@ static void pixctx_reset(PixContext *ctx)
 {
     int i, j, k;
 
-    for (i = 0; i < ctx->cache_size; i++)
-        ctx->cache[i] = i;
+    if (!ctx->special_initial_cache)
+        for (i = 0; i < ctx->cache_size; i++)
+            ctx->cache[i] = i;
+    else {
+        ctx->cache[0] = 1;
+        ctx->cache[1] = 2;
+        ctx->cache[2] = 4;
+    }
 
     model_reset(&ctx->cache_model);
     model_reset(&ctx->full_model);
@@ -141,27 +143,23 @@ static void pixctx_reset(PixContext *ctx)
                 model_reset(&ctx->sec_models[i][j][k]);
 }
 
-static av_cold void pixctx_init(PixContext *ctx, int cache_size)
+static av_cold void pixctx_init(PixContext *ctx, int cache_size,
+                                int full_model_syms, int special_initial_cache)
 {
     int i, j, k;
 
-    ctx->cache_size = cache_size + 4;
-    ctx->num_syms   = cache_size;
-
-    for (i = 0; i < ctx->cache_size; i++)
-        ctx->cache[i] = i;
+    ctx->cache_size            = cache_size + 4;
+    ctx->num_syms              = cache_size;
+    ctx->special_initial_cache = special_initial_cache;
 
     model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW);
-    model_init(&ctx->full_model, 256, THRESH_HIGH);
+    model_init(&ctx->full_model, full_model_syms, THRESH_HIGH);
 
-    for (i = 0; i < 4; i++) {
-        for (j = 0; j < sec_order_sizes[i]; j++) {
-            for (k = 0; k < 4; k++) {
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < sec_order_sizes[i]; j++)
+            for (k = 0; k < 4; k++)
                 model_init(&ctx->sec_models[i][j][k], 2 + i,
                            i ? THRESH_LOW : THRESH_ADAPTIVE);
-            }
-        }
-    }
 }
 
 static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx)
@@ -196,7 +194,6 @@ static int decode_pixel(ArithCoder *acoder, PixContext *pctx,
     if (val < pctx->num_syms) {
         int idx, j;
 
-
         idx = 0;
         for (i = 0; i < pctx->cache_size; i++) {
             for (j = 0; j < num_ngb; j++)
@@ -309,195 +306,288 @@ static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx,
         break;
     }
 
-    pix = acoder->get_model_sym(acoder, &pctx->sec_models[nlen - 1][layer][sub]);
+    pix = acoder->get_model_sym(acoder,
+                                &pctx->sec_models[nlen - 1][layer][sub]);
     if (pix < nlen)
         return ref_pix[pix];
     else
         return decode_pixel(acoder, pctx, ref_pix, nlen);
 }
 
-static int decode_region(MSS12Context *ctx, ArithCoder *acoder, uint8_t *dst,
+static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic,
                          int x, int y, int width, int height, int stride,
-                         PixContext *pctx)
+                         int rgb_stride, PixContext *pctx, const uint32_t *pal)
 {
-    int i, j;
+    int i, j, p;
+    uint8_t *rgb_dst = rgb_pic + x * 3 + y * rgb_stride;
 
     dst += x + y * stride;
 
-    dst[0] = decode_top_left_pixel(acoder, pctx);
     for (j = 0; j < height; j++) {
         for (i = 0; i < width; i++) {
             if (!i && !j)
-                continue;
+                p = decode_top_left_pixel(acoder, pctx);
+            else
+                p = decode_pixel_in_context(acoder, pctx, dst + i, stride,
+                                            i, j, width - i - 1);
+            dst[i] = p;
 
-            dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride,
-                                             i, j, width - i - 1);
+            if (rgb_pic)
+                AV_WB24(rgb_dst + i * 3, pal[p]);
         }
-        dst += stride;
+        dst     += stride;
+        rgb_dst += rgb_stride;
     }
 
     return 0;
 }
 
-static int decode_region_masked(MSS12Context *ctx, ArithCoder *acoder,
+static void copy_rectangles(MSS12Context const *c,
+                            int x, int y, int width, int height)
+{
+    int j;
+
+    if (c->last_rgb_pic)
+        for (j = y; j < y + height; j++) {
+            memcpy(c->rgb_pic      + j * c->rgb_stride + x * 3,
+                   c->last_rgb_pic + j * c->rgb_stride + x * 3,
+                   width * 3);
+            memcpy(c->pal_pic      + j * c->pal_stride + x,
+                   c->last_pal_pic + j * c->pal_stride + x,
+                   width);
+        }
+}
+
+static int motion_compensation(MSS12Context const *c,
+                               int x, int y, int width, int height)
+{
+    if (x + c->mvX < 0 || x + c->mvX + width  > c->avctx->width  ||
+        y + c->mvY < 0 || y + c->mvY + height > c->avctx->height ||
+        !c->rgb_pic)
+        return -1;
+    else {
+        uint8_t *dst     = c->pal_pic + x     + y * c->pal_stride;
+        uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * c->rgb_stride;
+        uint8_t *src;
+        uint8_t *rgb_src;
+        int j;
+        x += c->mvX;
+        y += c->mvY;
+        if (c->last_rgb_pic) {
+            src     = c->last_pal_pic + x +     y * c->pal_stride;
+            rgb_src = c->last_rgb_pic + x * 3 + y * c->rgb_stride;
+        } else {
+            src     = c->pal_pic + x     + y * c->pal_stride;
+            rgb_src = c->rgb_pic + x * 3 + y * c->rgb_stride;
+        }
+        for (j = 0; j < height; j++) {
+            memmove(dst, src, width);
+            memmove(rgb_dst, rgb_src, width * 3);
+            dst     += c->pal_stride;
+            src     += c->pal_stride;
+            rgb_dst += c->rgb_stride;
+            rgb_src += c->rgb_stride;
+        }
+    }
+    return 0;
+}
+
+static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder,
                                 uint8_t *dst, int stride, uint8_t *mask,
                                 int mask_stride, int x, int y,
                                 int width, int height,
                                 PixContext *pctx)
 {
-    int i, j;
+    int i, j, p;
+    uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * c->rgb_stride;
 
     dst  += x + y * stride;
     mask += x + y * mask_stride;
 
-    if (mask[0] == 0xFF)
-        dst[0] = decode_top_left_pixel(acoder, pctx);
     for (j = 0; j < height; j++) {
         for (i = 0; i < width; i++) {
-            if (!i && !j || mask[i] != 0xFF)
-                continue;
-
-            dst[i] = decode_pixel_in_context(acoder, pctx, dst + i, stride,
-                                             i, j, width - i - 1);
+            if (c->avctx->err_recognition & AV_EF_EXPLODE &&
+                ( c->rgb_pic && mask[i] != 0x01 && mask[i] != 0x02 && mask[i] != 0x04 ||
+                 !c->rgb_pic && mask[i] != 0x80 && mask[i] != 0xFF))
+                return -1;
+
+            if (mask[i] == 0x02) {
+                copy_rectangles(c, x + i, y + j, 1, 1);
+            } else if (mask[i] == 0x04) {
+                if (motion_compensation(c, x + i, y + j, 1, 1))
+                    return -1;
+            } else if (mask[i] != 0x80) {
+                if (!i && !j)
+                    p = decode_top_left_pixel(acoder, pctx);
+                else
+                    p = decode_pixel_in_context(acoder, pctx, dst + i, stride,
+                                                i, j, width - i - 1);
+                dst[i] = p;
+                if (c->rgb_pic)
+                    AV_WB24(rgb_dst + i * 3, c->pal[p]);
+            }
         }
-        dst  += stride;
-        mask += mask_stride;
+        dst     += stride;
+        mask    += mask_stride;
+        rgb_dst += c->rgb_stride;
     }
 
     return 0;
 }
 
-static av_cold void codec_init(MSS12Context *ctx)
+static av_cold void codec_init(MSS12Context *c, int version)
 {
-    model_init(&ctx->intra_region, 2, THRESH_ADAPTIVE);
-    model_init(&ctx->inter_region, 2, THRESH_ADAPTIVE);
-    model_init(&ctx->split_mode,   3, THRESH_HIGH);
-    model_init(&ctx->edge_mode,    2, THRESH_HIGH);
-    model_init(&ctx->pivot,        3, THRESH_LOW);
-    pixctx_init(&ctx->intra_pix_ctx, 8);
-    pixctx_init(&ctx->inter_pix_ctx, 2);
-    ctx->corrupted = 1;
+    int i;
+    for (i = 0; i < (c->slice_split ? 2 : 1); i++) {
+        c->sc[i].c = c;
+        model_init(&c->sc[i].intra_region, 2, THRESH_ADAPTIVE);
+        model_init(&c->sc[i].inter_region, 2, THRESH_ADAPTIVE);
+        model_init(&c->sc[i].split_mode,   3, THRESH_HIGH);
+        model_init(&c->sc[i].edge_mode,    2, THRESH_HIGH);
+        model_init(&c->sc[i].pivot,        3, THRESH_LOW);
+
+        pixctx_init(&c->sc[i].intra_pix_ctx, 8, c->full_model_syms, 0);
+
+        pixctx_init(&c->sc[i].inter_pix_ctx, version ? 3 : 2,
+                    c->full_model_syms, version ? 1 : 0);
+    }
+    c->corrupted = 1;
 }
 
-void ff_mss12_codec_reset(MSS12Context *ctx)
+void ff_mss12_codec_reset(MSS12Context *c)
 {
-    model_reset(&ctx->intra_region);
-    model_reset(&ctx->inter_region);
-    model_reset(&ctx->split_mode);
-    model_reset(&ctx->edge_mode);
-    model_reset(&ctx->pivot);
-    pixctx_reset(&ctx->intra_pix_ctx);
-    pixctx_reset(&ctx->inter_pix_ctx);
-
-    ctx->corrupted = 0;
+    int i;
+    for (i = 0; i < (c->slice_split ? 2 : 1); i++) {
+        model_reset(&c->sc[i].intra_region);
+        model_reset(&c->sc[i].inter_region);
+        model_reset(&c->sc[i].split_mode);
+        model_reset(&c->sc[i].edge_mode);
+        model_reset(&c->sc[i].pivot);
+        pixctx_reset(&c->sc[i].intra_pix_ctx);
+        pixctx_reset(&c->sc[i].inter_pix_ctx);
+    }
+
+    c->corrupted = 0;
 }
 
-static int decode_pivot(MSS12Context *ctx, ArithCoder *acoder, int base)
+static int decode_pivot(SliceContext *sc, ArithCoder *acoder, int base)
 {
     int val, inv;
 
-    inv = acoder->get_model_sym(acoder, &ctx->edge_mode);
-    val = acoder->get_model_sym(acoder, &ctx->pivot) + 1;
+    inv = acoder->get_model_sym(acoder, &sc->edge_mode);
+    val = acoder->get_model_sym(acoder, &sc->pivot) + 1;
 
     if (val > 2) {
-        if ((base + 1) / 2 - 2 <= 0) {
-            ctx->corrupted = 1;
-            return 0;
-        }
+        if ((base + 1) / 2 - 2 <= 0)
+            return -1;
+
         val = acoder->get_number(acoder, (base + 1) / 2 - 2) + 3;
     }
 
-    if ((unsigned)val >= base) {
-        ctx->corrupted = 1;
-        return 0;
-    }
+    if ((unsigned)val >= base)
+        return -1;
 
     return inv ? base - val : val;
 }
 
-static int decode_region_intra(MSS12Context *ctx, ArithCoder *acoder,
+static int decode_region_intra(SliceContext *sc, ArithCoder *acoder,
                                int x, int y, int width, int height)
 {
+    MSS12Context const *c = sc->c;
     int mode;
 
-    mode = acoder->get_model_sym(acoder, &ctx->intra_region);
+    mode = acoder->get_model_sym(acoder, &sc->intra_region);
 
     if (!mode) {
-        int i, pix;
-        int stride = ctx->pic_stride;
-        uint8_t *dst = ctx->pic_start + x + y * stride;
-
-        pix = decode_top_left_pixel(acoder, &ctx->intra_pix_ctx);
-        for (i = 0; i < height; i++, dst += stride)
+        int i, j, pix, rgb_pix;
+        int stride       = c->pal_stride;
+        int rgb_stride   = c->rgb_stride;
+        uint8_t *dst     = c->pal_pic + x     + y * stride;
+        uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * rgb_stride;
+
+        pix     = decode_top_left_pixel(acoder, &sc->intra_pix_ctx);
+        rgb_pix = c->pal[pix];
+        for (i = 0; i < height; i++, dst += stride, rgb_dst += rgb_stride) {
             memset(dst, pix, width);
+            if (c->rgb_pic)
+                for (j = 0; j < width * 3; j += 3)
+                    AV_WB24(rgb_dst + j, rgb_pix);
+        }
     } else {
-        return decode_region(ctx, acoder, ctx->pic_start,
-                             x, y, width, height, ctx->pic_stride,
-                             &ctx->intra_pix_ctx);
+        return decode_region(acoder, c->pal_pic, c->rgb_pic,
+                             x, y, width, height, c->pal_stride, c->rgb_stride,
+                             &sc->intra_pix_ctx, &c->pal[0]);
     }
 
     return 0;
 }
 
-static int decode_region_inter(MSS12Context *ctx, ArithCoder *acoder,
+static int decode_region_inter(SliceContext *sc, ArithCoder *acoder,
                                int x, int y, int width, int height)
 {
+    MSS12Context const *c = sc->c;
     int mode;
 
-    mode = acoder->get_model_sym(acoder, &ctx->inter_region);
+    mode = acoder->get_model_sym(acoder, &sc->inter_region);
 
     if (!mode) {
-        mode = decode_top_left_pixel(acoder, &ctx->inter_pix_ctx);
-        if (mode != 0xFF) {
-            return 0;
-        } else {
-            return decode_region_intra(ctx, acoder, x, y, width, height);
-        }
+        mode = decode_top_left_pixel(acoder, &sc->inter_pix_ctx);
+
+        if (c->avctx->err_recognition & AV_EF_EXPLODE &&
+            ( c->rgb_pic && mode != 0x01 && mode != 0x02 && mode != 0x04 ||
+             !c->rgb_pic && mode != 0x80 && mode != 0xFF))
+            return -1;
+
+        if (mode == 0x02)
+            copy_rectangles(c, x, y, width, height);
+        else if (mode == 0x04)
+            return motion_compensation(c, x, y, width, height);
+        else if (mode != 0x80)
+            return decode_region_intra(sc, acoder, x, y, width, height);
     } else {
-        if (decode_region(ctx, acoder, ctx->mask,
-                          x, y, width, height, ctx->mask_linesize,
-                          &ctx->inter_pix_ctx) < 0)
+        if (decode_region(acoder, c->mask, NULL,
+                          x, y, width, height, c->mask_stride, 0,
+                          &sc->inter_pix_ctx, &c->pal[0]) < 0)
             return -1;
-        return decode_region_masked(ctx, acoder, ctx->pic_start,
-                                    ctx->pic_stride, ctx->mask,
-                                    ctx->mask_linesize,
+        return decode_region_masked(c, acoder, c->pal_pic,
+                                    c->pal_stride, c->mask,
+                                    c->mask_stride,
                                     x, y, width, height,
-                                    &ctx->intra_pix_ctx);
+                                    &sc->intra_pix_ctx);
     }
 
     return 0;
 }
 
-int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder,
+int ff_mss12_decode_rect(SliceContext *sc, ArithCoder *acoder,
                          int x, int y, int width, int height)
 {
     int mode, pivot;
 
-    if (ctx->corrupted)
-        return -1;
-
-    mode = acoder->get_model_sym(acoder, &ctx->split_mode);
+    mode = acoder->get_model_sym(acoder, &sc->split_mode);
 
     switch (mode) {
     case SPLIT_VERT:
-        pivot = decode_pivot(ctx, acoder, height);
-        if (ff_mss12_decode_rect(ctx, acoder, x, y, width, pivot))
+        if ((pivot = decode_pivot(sc, acoder, height)) < 1)
+            return -1;
+        if (ff_mss12_decode_rect(sc, acoder, x, y, width, pivot))
             return -1;
-        if (ff_mss12_decode_rect(ctx, acoder, x, y + pivot, width, height - pivot))
+        if (ff_mss12_decode_rect(sc, acoder, x, y + pivot, width, height - pivot))
             return -1;
         break;
     case SPLIT_HOR:
-        pivot = decode_pivot(ctx, acoder, width);
-        if (ff_mss12_decode_rect(ctx, acoder, x, y, pivot, height))
+        if ((pivot = decode_pivot(sc, acoder, width)) < 1)
             return -1;
-        if (ff_mss12_decode_rect(ctx, acoder, x + pivot, y, width - pivot, height))
+        if (ff_mss12_decode_rect(sc, acoder, x, y, pivot, height))
+            return -1;
+        if (ff_mss12_decode_rect(sc, acoder, x + pivot, y, width - pivot, height))
             return -1;
         break;
     case SPLIT_NONE:
-        if (ctx->keyframe)
-            return decode_region_intra(ctx, acoder, x, y, width, height);
+        if (sc->c->keyframe)
+            return decode_region_intra(sc, acoder, x, y, width, height);
         else
-            return decode_region_inter(ctx, acoder, x, y, width, height);
+            return decode_region_inter(sc, acoder, x, y, width, height);
     default:
         return -1;
     }
@@ -505,13 +595,11 @@ int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder,
     return 0;
 }
 
-av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version)
+av_cold int ff_mss12_decode_init(MSS12Context *c, int version)
 {
-    MSS12Context * const c = avctx->priv_data;
+    AVCodecContext *avctx = c->avctx;
     int i;
 
-    c->avctx = avctx;
-
     if (avctx->extradata_size < 52 + 256 * 3) {
         av_log(avctx, AV_LOG_ERROR, "Insufficient extradata size %d\n",
                avctx->extradata_size);
@@ -526,9 +614,23 @@ av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version)
         return AVERROR_INVALIDDATA;
     }
 
+    avctx->coded_width  = AV_RB32(avctx->extradata + 20);
+    avctx->coded_height = AV_RB32(avctx->extradata + 24);
+    if (avctx->coded_width > 4096 || avctx->coded_height > 4096) {
+        av_log(avctx, AV_LOG_ERROR, "Frame dimensions %dx%d too large",
+               avctx->coded_width, avctx->coded_height);
+        return AVERROR_INVALIDDATA;
+    }
+
     av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d\n",
            AV_RB32(avctx->extradata + 4), AV_RB32(avctx->extradata + 8));
-    c->free_colours     = AV_RB32(avctx->extradata + 48);
+    if (version != AV_RB32(avctx->extradata + 4) > 1) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Header version doesn't match codec tag\n");
+        return -1;
+    }
+
+    c->free_colours = AV_RB32(avctx->extradata + 48);
     if ((unsigned)c->free_colours > 256) {
         av_log(avctx, AV_LOG_ERROR,
                "Incorrect number of changeable palette entries: %d\n",
@@ -536,8 +638,6 @@ av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version)
         return AVERROR_INVALIDDATA;
     }
     av_log(avctx, AV_LOG_DEBUG, "%d free colour(s)\n", c->free_colours);
-    avctx->coded_width  = AV_RB32(avctx->extradata + 20);
-    avctx->coded_height = AV_RB32(avctx->extradata + 24);
 
     av_log(avctx, AV_LOG_DEBUG, "Display dimensions %dx%d\n",
            AV_RB32(avctx->extradata + 12), AV_RB32(avctx->extradata + 16));
@@ -554,27 +654,49 @@ av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version)
     av_log(avctx, AV_LOG_DEBUG, "Max. seek time %g ms\n",
            av_int2float(AV_RB32(avctx->extradata + 44)));
 
-    for (i = 0; i < 256; i++)
-        c->pal[i] = 0xFF << 24 | AV_RB24(avctx->extradata + 52 + i * 3);
+    if (version) {
+        if (avctx->extradata_size < 60 + 256 * 3) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Insufficient extradata size %d for v2\n",
+                   avctx->extradata_size);
+            return AVERROR_INVALIDDATA;
+        }
 
-    avctx->pix_fmt = PIX_FMT_PAL8;
+        c->slice_split = AV_RB32(avctx->extradata + 52);
+        av_log(avctx, AV_LOG_DEBUG, "Slice split %d\n", c->slice_split);
 
-    c->mask_linesize = FFALIGN(avctx->width, 16);
-    c->mask          = av_malloc(c->mask_linesize * avctx->height);
+        c->full_model_syms = AV_RB32(avctx->extradata + 56);
+        if (c->full_model_syms < 2 || c->full_model_syms > 256) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Incorrect number of used colours %d\n",
+                   c->full_model_syms);
+            return AVERROR_INVALIDDATA;
+        }
+        av_log(avctx, AV_LOG_DEBUG, "Used colours %d\n",
+               c->full_model_syms);
+    } else {
+        c->slice_split     = 0;
+        c->full_model_syms = 256;
+    }
+
+    for (i = 0; i < 256; i++)
+        c->pal[i] = 0xFF << 24 | AV_RB24(avctx->extradata + 52 +
+                            (version ? 8 : 0) + i * 3);
+
+    c->mask_stride = FFALIGN(avctx->width, 16);
+    c->mask        = av_malloc(c->mask_stride * avctx->height);
     if (!c->mask) {
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate mask plane\n");
         return AVERROR(ENOMEM);
     }
 
-    codec_init(c);
+    codec_init(c, version);
 
     return 0;
 }
 
-av_cold int ff_mss12_decode_end(AVCodecContext *avctx)
+av_cold int ff_mss12_decode_end(MSS12Context *c)
 {
-    MSS12Context * const c = avctx->priv_data;
-
     av_freep(&c->mask);
 
     return 0;
diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h
index c1c316044d87063d817ea74ec79db4554648a511..9068651e06ecfbe67e7fbf7280bbafc04f528d83 100644
--- a/libavcodec/mss12.h
+++ b/libavcodec/mss12.h
@@ -26,8 +26,10 @@
 #ifndef AVCODEC_MSS12_H
 #define AVCODEC_MSS12_H
 
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "get_bits.h"
+#include "bytestream.h"
 
 #define MODEL_MIN_SYMS    2
 #define MODEL_MAX_SYMS  256
@@ -46,7 +48,10 @@ typedef struct Model {
 
 typedef struct ArithCoder {
     int low, high, value;
-    GetBitContext *gb;
+    union {
+        GetBitContext *gb;
+        GetByteContext *gB;
+    } gbc;
     int (*get_model_sym)(struct ArithCoder *c, Model *m);
     int (*get_number)   (struct ArithCoder *c, int n);
 } ArithCoder;
@@ -56,28 +61,77 @@ typedef struct PixContext {
     uint8_t cache[12];
     Model cache_model, full_model;
     Model sec_models[4][8][4];
+    int special_initial_cache;
 } PixContext;
 
+struct MSS12Context;
+
+typedef struct SliceContext {
+    struct MSS12Context *c;
+    Model      intra_region, inter_region;
+    Model      pivot, edge_mode, split_mode;
+    PixContext intra_pix_ctx, inter_pix_ctx;
+} SliceContext;
+
 typedef struct MSS12Context {
     AVCodecContext *avctx;
-    uint8_t        *pic_start;
-    int            pic_stride;
-    uint8_t        *mask;
-    int            mask_linesize;
     uint32_t       pal[256];
+    uint8_t        *pal_pic;
+    uint8_t        *last_pal_pic;
+    int            pal_stride;
+    uint8_t        *mask;
+    int            mask_stride;
+    uint8_t        *rgb_pic;
+    uint8_t        *last_rgb_pic;
+    int            rgb_stride;
     int            free_colours;
     int            keyframe;
     Model          intra_region, inter_region;
     Model          pivot, edge_mode, split_mode;
     PixContext     intra_pix_ctx, inter_pix_ctx;
+    int            mvX, mvY;
     int            corrupted;
+    int            slice_split;
+    int            full_model_syms;
+    SliceContext   sc[2];
 } MSS12Context;
 
-int ff_mss12_decode_rect(MSS12Context *ctx, ArithCoder *acoder,
+int ff_mss12_decode_rect(SliceContext *ctx, ArithCoder *acoder,
                          int x, int y, int width, int height);
 void ff_mss12_model_update(Model *m, int val);
 void ff_mss12_codec_reset(MSS12Context *ctx);
-av_cold int ff_mss12_decode_init(AVCodecContext *avctx, int version);
-av_cold int ff_mss12_decode_end(AVCodecContext *avctx);
+av_cold int ff_mss12_decode_init(MSS12Context *ctx, int version);
+av_cold int ff_mss12_decode_end(MSS12Context *ctx);
+
+#define ARITH_GET_BIT(VERSION)                                          \
+static int arith ## VERSION ## _get_bit(ArithCoder *c)                  \
+{                                                                       \
+    int range = c->high - c->low + 1;                                   \
+    int bit   = (((c->value - c->low) << 1) + 1) / range;               \
+                                                                        \
+    if (bit)                                                            \
+        c->low += range >> 1;                                           \
+    else                                                                \
+        c->high = c->low + (range >> 1) - 1;                            \
+                                                                        \
+    arith ## VERSION ## _normalise(c);                                  \
+                                                                        \
+    return bit;                                                         \
+}
+
+#define ARITH_GET_MODEL_SYM(VERSION)                                    \
+static int arith ## VERSION ## _get_model_sym(ArithCoder *c, Model *m)  \
+{                                                                       \
+    int idx, val;                                                       \
+                                                                        \
+    idx = arith ## VERSION ## _get_prob(c, m->cum_prob);                \
+                                                                        \
+    val = m->idx2sym[idx];                                              \
+    ff_mss12_model_update(m, idx);                                      \
+                                                                        \
+    arith ## VERSION ## _normalise(c);                                  \
+                                                                        \
+    return val;                                                         \
+}
 
 #endif /* AVCODEC_MSS12_H */
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
new file mode 100644
index 0000000000000000000000000000000000000000..886da055405109b98625c1948724449fac86f055
--- /dev/null
+++ b/libavcodec/mss2.c
@@ -0,0 +1,864 @@
+/*
+ * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
+ */
+
+#include "libavutil/avassert.h"
+#include "msmpeg4data.h"
+#include "vc1.h"
+#include "mss12.h"
+#include "mss2dsp.h"
+
+typedef struct MSS2Context {
+    VC1Context     v;
+    int            split_position;
+    AVFrame        pic;
+    AVFrame        last_pic;
+    MSS12Context   c;
+    MSS2DSPContext dsp;
+    SliceContext   sc[2];
+} MSS2Context;
+
+static void arith2_normalise(ArithCoder *c)
+{
+    while ((c->high >> 15) - (c->low >> 15) < 2) {
+        if ((c->low ^ c->high) & 0x10000) {
+            c->high  ^= 0x8000;
+            c->value ^= 0x8000;
+            c->low   ^= 0x8000;
+        }
+        c->high  = c->high  << 8 & 0xFFFFFF | 0xFF;
+        c->value = c->value << 8 & 0xFFFFFF | bytestream2_get_byte(c->gbc.gB);
+        c->low   = c->low   << 8 & 0xFFFFFF;
+    }
+}
+
+ARITH_GET_BIT(2)
+
+/* L. Stuiver and A. Moffat: "Piecewise Integer Mapping for Arithmetic Coding."
+ * In Proc. 8th Data Compression Conference (DCC '98), pp. 3-12, Mar. 1998 */
+
+static int arith2_get_scaled_value(int value, int n, int range)
+{
+    int split = (n << 1) - range;
+
+    if (value > split)
+        return split + (value - split >> 1);
+    else
+        return value;
+}
+
+static void arith2_rescale_interval(ArithCoder *c, int range,
+                                    int low, int high, int n)
+{
+    int split = (n << 1) - range;
+
+    if (high > split)
+        c->high = split + (high - split << 1);
+    else
+        c->high = high;
+
+    c->high += c->low - 1;
+
+    if (low > split)
+        c->low += split + (low - split << 1);
+    else
+        c->low += low;
+}
+
+static int arith2_get_number(ArithCoder *c, int n)
+{
+    int range = c->high - c->low + 1;
+    int scale = av_log2(range) - av_log2(n);
+    int val;
+
+    if (n << scale > range)
+        scale--;
+
+    n <<= scale;
+
+    val = arith2_get_scaled_value(c->value - c->low, n, range) >> scale;
+
+    arith2_rescale_interval(c, range, val << scale, (val + 1) << scale, n);
+
+    arith2_normalise(c);
+
+    return val;
+}
+
+static int arith2_get_prob(ArithCoder *c, int *probs)
+{
+    int range = c->high - c->low + 1, n = *probs;
+    int scale = av_log2(range) - av_log2(n);
+    int i     = 0, val;
+
+    if (n << scale > range)
+        scale--;
+
+    n <<= scale;
+
+    val = arith2_get_scaled_value(c->value - c->low, n, range) >> scale;
+    while (probs[++i] > val) ;
+
+    arith2_rescale_interval(c, range,
+                            probs[i] << scale, probs[i - 1] << scale, n);
+
+    return i;
+}
+
+ARITH_GET_MODEL_SYM(2)
+
+static int arith2_get_consumed_bytes(ArithCoder *c)
+{
+    int diff = (c->high >> 16) - (c->low >> 16);
+    int bp   = bytestream2_tell(c->gbc.gB) - 3 << 3;
+    int bits = 1;
+
+    while (!(diff & 0x80)) {
+        bits++;
+        diff <<= 1;
+    }
+
+    return (bits + bp + 7 >> 3) + ((c->low >> 16) + 1 == c->high >> 16);
+}
+
+static void arith2_init(ArithCoder *c, GetByteContext *gB)
+{
+    c->low           = 0;
+    c->high          = 0xFFFFFF;
+    c->value         = bytestream2_get_be24(gB);
+    c->gbc.gB        = gB;
+    c->get_model_sym = arith2_get_model_sym;
+    c->get_number    = arith2_get_number;
+}
+
+static int decode_pal_v2(MSS12Context *ctx, const uint8_t *buf, int buf_size)
+{
+    int i, ncol;
+    uint32_t *pal = ctx->pal + 256 - ctx->free_colours;
+
+    if (!ctx->free_colours)
+        return 0;
+
+    ncol = *buf++;
+    if (buf_size < 2 + ncol * 3)
+        return -1;
+    for (i = 0; i < ncol; i++)
+        *pal++ = AV_RB24(buf + 3 * i);
+
+    return 1 + ncol * 3;
+}
+
+static int decode_555(GetByteContext *gB, uint16_t *dst, int stride,
+                      int keyframe, int w, int h)
+{
+    int last_symbol = 0, repeat = 0, prev_avail = 0;
+
+    if (!keyframe) {
+        int x, y, endx, endy, t;
+
+#define READ_PAIR(a, b)                 \
+    a  = bytestream2_get_byte(gB) << 4; \
+    t  = bytestream2_get_byte(gB);      \
+    a |= t >> 4;                        \
+    b  = (t & 0xF) << 8;                \
+    b |= bytestream2_get_byte(gB);      \
+
+        READ_PAIR(x, endx)
+        READ_PAIR(y, endy)
+
+        if (endx >= w || endy >= h || x > endx || y > endy)
+            return -1;
+        dst += x + stride * y;
+        w    = endx - x + 1;
+        h    = endy - y + 1;
+        if (y)
+            prev_avail = 1;
+    }
+
+    do {
+        uint16_t *p = dst;
+        do {
+            if (repeat-- < 1) {
+                int b = bytestream2_get_byte(gB);
+                if (b < 128)
+                    last_symbol = b << 8 | bytestream2_get_byte(gB);
+                else if (b > 129) {
+                    repeat = 0;
+                    while (b-- > 130)
+                        repeat = (repeat << 8) + bytestream2_get_byte(gB) + 1;
+                    if (last_symbol == -2) {
+                        int skip = FFMIN((unsigned)repeat, dst + w - p);
+                        repeat -= skip;
+                        p      += skip;
+                    }
+                } else
+                    last_symbol = 127 - b;
+            }
+            if (last_symbol >= 0)
+                *p = last_symbol;
+            else if (last_symbol == -1 && prev_avail)
+                *p = *(p - stride);
+        } while (++p < dst + w);
+        dst       += stride;
+        prev_avail = 1;
+    } while (--h);
+
+    return 0;
+}
+
+static int decode_rle(GetBitContext *gb, uint8_t *pal_dst, int pal_stride,
+                      uint8_t *rgb_dst, int rgb_stride, uint32_t *pal,
+                      int keyframe, int kf_slipt, int slice, int w, int h)
+{
+    uint8_t bits[270] = { 0 };
+    uint32_t codes[270];
+    VLC vlc;
+
+    int current_length = 0, read_codes = 0, next_code = 0, current_codes = 0;
+    int remaining_codes, surplus_codes, i;
+
+    const int alphabet_size = 270 - keyframe;
+
+    int last_symbol = 0, repeat = 0, prev_avail = 0;
+
+    if (!keyframe) {
+        int x, y, clipw, cliph;
+
+        x     = get_bits(gb, 12);
+        y     = get_bits(gb, 12);
+        clipw = get_bits(gb, 12) + 1;
+        cliph = get_bits(gb, 12) + 1;
+
+        if (x + clipw > w || y + cliph > h)
+            return AVERROR_INVALIDDATA;
+        pal_dst += pal_stride * y + x;
+        rgb_dst += rgb_stride * y + x * 3;
+        w        = clipw;
+        h        = cliph;
+        if (y)
+            prev_avail = 1;
+    } else {
+        if (slice > 0) {
+            pal_dst   += pal_stride * kf_slipt;
+            rgb_dst   += rgb_stride * kf_slipt;
+            prev_avail = 1;
+            h         -= kf_slipt;
+        } else
+            h = kf_slipt;
+    }
+
+    /* read explicit codes */
+    do {
+        while (current_codes--) {
+            int symbol = get_bits(gb, 8);
+            if (symbol >= 204 - keyframe)
+                symbol += 14 - keyframe;
+            else if (symbol > 189)
+                symbol = get_bits1(gb) + (symbol << 1) - 190;
+            if (bits[symbol])
+                return AVERROR_INVALIDDATA;
+            bits[symbol]  = current_length;
+            codes[symbol] = next_code++;
+            read_codes++;
+        }
+        current_length++;
+        next_code     <<= 1;
+        remaining_codes = (1 << current_length) - next_code;
+        current_codes   = get_bits(gb, av_ceil_log2(remaining_codes + 1));
+        if (current_length > 22 || current_codes > remaining_codes)
+            return AVERROR_INVALIDDATA;
+    } while (current_codes != remaining_codes);
+
+    remaining_codes = alphabet_size - read_codes;
+
+    /* determine the minimum length to fit the rest of the alphabet */
+    while ((surplus_codes = (2 << current_length) -
+                            (next_code << 1) - remaining_codes) < 0) {
+        current_length++;
+        next_code <<= 1;
+    }
+
+    /* add the rest of the symbols lexicographically */
+    for (i = 0; i < alphabet_size; i++)
+        if (!bits[i]) {
+            if (surplus_codes-- == 0) {
+                current_length++;
+                next_code <<= 1;
+            }
+            bits[i]  = current_length;
+            codes[i] = next_code++;
+        }
+
+    if (next_code != 1 << current_length)
+        return AVERROR_INVALIDDATA;
+
+    if (i = init_vlc(&vlc, 9, alphabet_size, bits, 1, 1, codes, 4, 4, 0))
+        return i;
+
+    /* frame decode */
+    do {
+        uint8_t *pp = pal_dst;
+        uint8_t *rp = rgb_dst;
+        do {
+            if (repeat-- < 1) {
+                int b = get_vlc2(gb, vlc.table, 9, 3);
+                if (b < 256)
+                    last_symbol = b;
+                else if (b < 268) {
+                    b -= 256;
+                    if (b == 11)
+                        b = get_bits(gb, 4) + 10;
+
+                    if (!b)
+                        repeat = 0;
+                    else
+                        repeat = get_bits(gb, b);
+
+                    while (b--)
+                        repeat += 1 << b;
+
+                    if (last_symbol == -2) {
+                        int skip = FFMIN(repeat, pal_dst + w - pp);
+                        repeat -= skip;
+                        pp     += skip;
+                        rp     += skip * 3;
+                    }
+                } else
+                    last_symbol = 267 - b;
+            }
+            if (last_symbol >= 0) {
+                *pp = last_symbol;
+                AV_WB24(rp, pal[last_symbol]);
+            } else if (last_symbol == -1 && prev_avail) {
+                *pp = *(pp - pal_stride);
+                memcpy(rp, rp - rgb_stride, 3);
+            }
+            rp += 3;
+        } while (++pp < pal_dst + w);
+        pal_dst   += pal_stride;
+        rgb_dst   += rgb_stride;
+        prev_avail = 1;
+    } while (--h);
+
+    ff_free_vlc(&vlc);
+    return 0;
+}
+
+static int decode_wmv9(AVCodecContext *avctx, const uint8_t *buf, int buf_size,
+                       int x, int y, int w, int h, int wmv9_mask)
+{
+    MSS2Context *ctx  = avctx->priv_data;
+    MSS12Context *c   = &ctx->c;
+    VC1Context *v     = avctx->priv_data;
+    MpegEncContext *s = &v->s;
+    AVFrame *f;
+
+    ff_mpeg_flush(avctx);
+
+    if (s->current_picture_ptr == NULL || s->current_picture_ptr->f.data[0]) {
+        int i = ff_find_unused_picture(s, 0);
+        if (i < 0)
+            return -1;
+        s->current_picture_ptr = &s->picture[i];
+    }
+
+    init_get_bits(&s->gb, buf, buf_size * 8);
+
+    s->loop_filter = avctx->skip_loop_filter < AVDISCARD_ALL;
+
+    if (ff_vc1_parse_frame_header(v, &s->gb) == -1) {
+        av_log(v->s.avctx, AV_LOG_ERROR, "header error\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->pict_type != AV_PICTURE_TYPE_I) {
+        av_log(v->s.avctx, AV_LOG_ERROR, "expected I-frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avctx->pix_fmt = PIX_FMT_YUV420P;
+
+    if (ff_MPV_frame_start(s, avctx) < 0) {
+        av_log(v->s.avctx, AV_LOG_ERROR, "ff_MPV_frame_start error\n");
+        avctx->pix_fmt = PIX_FMT_RGB24;
+        return -1;
+    }
+
+    ff_er_frame_start(s);
+
+    v->bits = buf_size * 8;
+
+    v->end_mb_x = (w + 15) >> 4;
+    s->end_mb_y = (h + 15) >> 4;
+    if (v->respic & 1)
+        v->end_mb_x = v->end_mb_x + 1 >> 1;
+    if (v->respic & 2)
+        s->end_mb_y = s->end_mb_y + 1 >> 1;
+
+    ff_vc1_decode_blocks(v);
+
+    ff_er_frame_end(s);
+
+    ff_MPV_frame_end(s);
+
+    f = &s->current_picture.f;
+
+    if (v->respic == 3) {
+        ctx->dsp.upsample_plane(f->data[0], f->linesize[0], w,      h);
+        ctx->dsp.upsample_plane(f->data[1], f->linesize[1], w >> 1, h >> 1);
+        ctx->dsp.upsample_plane(f->data[2], f->linesize[2], w >> 1, h >> 1);
+    } else if (v->respic)
+        av_log_ask_for_sample(v->s.avctx,
+                              "Asymmetric WMV9 rectangle subsampling\n");
+
+    av_assert0(f->linesize[1] == f->linesize[2]);
+
+    if (wmv9_mask != -1)
+        ctx->dsp.mss2_blit_wmv9_masked(c->rgb_pic + y * c->rgb_stride + x * 3,
+                                       c->rgb_stride, wmv9_mask,
+                                       c->pal_pic + y * c->pal_stride + x,
+                                       c->pal_stride,
+                                       f->data[0], f->linesize[0],
+                                       f->data[1], f->data[2], f->linesize[1],
+                                       w, h);
+    else
+        ctx->dsp.mss2_blit_wmv9(c->rgb_pic + y * c->rgb_stride + x * 3,
+                                c->rgb_stride,
+                                f->data[0], f->linesize[0],
+                                f->data[1], f->data[2], f->linesize[1],
+                                w, h);
+
+    avctx->pix_fmt = PIX_FMT_RGB24;
+
+    return 0;
+}
+
+typedef struct Rectangle {
+    int coded, x, y, w, h;
+} Rectangle;
+
+#define MAX_WMV9_RECTANGLES 20
+#define ARITH2_PADDING 2
+
+static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                             AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    MSS2Context *ctx = avctx->priv_data;
+    MSS12Context *c  = &ctx->c;
+    GetBitContext gb;
+    GetByteContext gB;
+    ArithCoder acoder;
+
+    int keyframe, has_wmv9, has_mv, is_rle, is_555, ret;
+
+    Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
+    int used_rects = 0, i, implicit_rect, av_uninit(wmv9_mask);
+
+    av_assert0(FF_INPUT_BUFFER_PADDING_SIZE >=
+               ARITH2_PADDING + (MIN_CACHE_BITS + 7) / 8);
+
+    init_get_bits(&gb, buf, buf_size * 8);
+
+    if (keyframe = get_bits1(&gb))
+        skip_bits(&gb, 7);
+    has_wmv9 = get_bits1(&gb);
+    has_mv   = keyframe ? 0 : get_bits1(&gb);
+    is_rle   = get_bits1(&gb);
+    is_555   = is_rle && get_bits1(&gb);
+    if (c->slice_split > 0)
+        ctx->split_position = c->slice_split;
+    else if (c->slice_split < 0) {
+        if (get_bits1(&gb)) {
+            if (get_bits1(&gb)) {
+                if (get_bits1(&gb))
+                    ctx->split_position = get_bits(&gb, 16);
+                else
+                    ctx->split_position = get_bits(&gb, 12);
+            } else
+                ctx->split_position = get_bits(&gb, 8) << 4;
+        } else {
+            if (keyframe)
+                ctx->split_position = avctx->height / 2;
+        }
+    } else
+        ctx->split_position = avctx->height;
+
+    if (c->slice_split && (ctx->split_position < 1 - is_555 ||
+                           ctx->split_position > avctx->height - 1))
+        return AVERROR_INVALIDDATA;
+
+    align_get_bits(&gb);
+    buf      += get_bits_count(&gb) >> 3;
+    buf_size -= get_bits_count(&gb) >> 3;
+
+    if (buf_size < 1)
+        return AVERROR_INVALIDDATA;
+
+    if (is_555 && (has_wmv9 || has_mv || c->slice_split && ctx->split_position))
+        return AVERROR_INVALIDDATA;
+
+    avctx->pix_fmt = is_555 ? PIX_FMT_RGB555 : PIX_FMT_RGB24;
+    if (ctx->pic.data[0] && ctx->pic.format != avctx->pix_fmt)
+        avctx->release_buffer(avctx, &ctx->pic);
+
+    if (has_wmv9) {
+        bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING);
+        arith2_init(&acoder, &gB);
+
+        implicit_rect = !arith2_get_bit(&acoder);
+
+        while (arith2_get_bit(&acoder)) {
+            if (used_rects == MAX_WMV9_RECTANGLES)
+                return AVERROR_INVALIDDATA;
+            r = &wmv9rects[used_rects];
+            if (!used_rects)
+                r->x = arith2_get_number(&acoder, avctx->width);
+            else
+                r->x = arith2_get_number(&acoder, avctx->width -
+                                         wmv9rects[used_rects - 1].x) +
+                       wmv9rects[used_rects - 1].x;
+            r->y = arith2_get_number(&acoder, avctx->height);
+            r->w = arith2_get_number(&acoder, avctx->width  - r->x) + 1;
+            r->h = arith2_get_number(&acoder, avctx->height - r->y) + 1;
+            used_rects++;
+        }
+
+        if (implicit_rect && used_rects) {
+            av_log(avctx, AV_LOG_ERROR, "implicit_rect && used_rects > 0\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (implicit_rect) {
+            wmv9rects[0].x = 0;
+            wmv9rects[0].y = 0;
+            wmv9rects[0].w = avctx->width;
+            wmv9rects[0].h = avctx->height;
+
+            used_rects = 1;
+        }
+        for (i = 0; i < used_rects; i++) {
+            if (!implicit_rect && arith2_get_bit(&acoder)) {
+                av_log(avctx, AV_LOG_ERROR, "Unexpected grandchildren\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (!i) {
+                wmv9_mask = arith2_get_bit(&acoder) - 1;
+                if (!wmv9_mask)
+                    wmv9_mask = arith2_get_number(&acoder, 256);
+            }
+            wmv9rects[i].coded = arith2_get_number(&acoder, 2);
+        }
+
+        buf      += arith2_get_consumed_bytes(&acoder);
+        buf_size -= arith2_get_consumed_bytes(&acoder);
+        if (buf_size < 1)
+            return AVERROR_INVALIDDATA;
+    }
+
+    c->mvX = c->mvY = 0;
+    if (keyframe && !is_555) {
+        if ((i = decode_pal_v2(c, buf, buf_size)) < 0)
+            return AVERROR_INVALIDDATA;
+        buf      += i;
+        buf_size -= i;
+    } else if (has_mv) {
+        buf      += 4;
+        buf_size -= 4;
+        if (buf_size < 1)
+            return AVERROR_INVALIDDATA;
+        c->mvX = AV_RB16(buf - 4) - avctx->width;
+        c->mvY = AV_RB16(buf - 2) - avctx->height;
+    }
+
+    if (c->mvX < 0 || c->mvY < 0) {
+        FFSWAP(AVFrame, ctx->pic, ctx->last_pic);
+        FFSWAP(uint8_t *, c->pal_pic, c->last_pal_pic);
+
+        if (ctx->pic.data[0])
+            avctx->release_buffer(avctx, &ctx->pic);
+
+        ctx->pic.reference    = 3;
+        ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID    |
+                                FF_BUFFER_HINTS_READABLE |
+                                FF_BUFFER_HINTS_PRESERVE |
+                                FF_BUFFER_HINTS_REUSABLE;
+
+        if ((ret = avctx->get_buffer(avctx, &ctx->pic)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            return ret;
+        }
+
+        if (ctx->last_pic.data[0]) {
+            av_assert0(ctx->pic.linesize[0] == ctx->last_pic.linesize[0]);
+            c->last_rgb_pic = ctx->last_pic.data[0] +
+                              ctx->last_pic.linesize[0] * (avctx->height - 1);
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Missing keyframe\n");
+            return -1;
+        }
+    } else {
+        if (ctx->last_pic.data[0])
+            avctx->release_buffer(avctx, &ctx->last_pic);
+
+        ctx->pic.reference    = 3;
+        ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID    |
+                                FF_BUFFER_HINTS_READABLE |
+                                FF_BUFFER_HINTS_PRESERVE |
+                                FF_BUFFER_HINTS_REUSABLE;
+
+        if ((ret = avctx->reget_buffer(avctx, &ctx->pic)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+            return ret;
+        }
+
+        c->last_rgb_pic = NULL;
+    }
+    c->rgb_pic    = ctx->pic.data[0] +
+                    ctx->pic.linesize[0] * (avctx->height - 1);
+    c->rgb_stride = -ctx->pic.linesize[0];
+
+    ctx->pic.key_frame = keyframe;
+    ctx->pic.pict_type = keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+
+    if (is_555) {
+        bytestream2_init(&gB, buf, buf_size);
+
+        if (decode_555(&gB, (uint16_t *)c->rgb_pic, c->rgb_stride >> 1,
+                       keyframe, avctx->width, avctx->height))
+            return AVERROR_INVALIDDATA;
+
+        buf_size -= bytestream2_tell(&gB);
+    } else if (is_rle) {
+        init_get_bits(&gb, buf, buf_size * 8);
+        if (ret = decode_rle(&gb, c->pal_pic, c->pal_stride,
+                             c->rgb_pic, c->rgb_stride, c->pal, keyframe,
+                             ctx->split_position, 0,
+                             avctx->width, avctx->height))
+            return ret;
+        align_get_bits(&gb);
+
+        if (c->slice_split)
+            if (ret = decode_rle(&gb, c->pal_pic, c->pal_stride,
+                                 c->rgb_pic, c->rgb_stride, c->pal, keyframe,
+                                 ctx->split_position, 1,
+                                 avctx->width, avctx->height))
+                return ret;
+
+        align_get_bits(&gb);
+        buf      += get_bits_count(&gb) >> 3;
+        buf_size -= get_bits_count(&gb) >> 3;
+    } else {
+        if (keyframe)
+            ff_mss12_codec_reset(c);
+        else if (c->corrupted)
+            return AVERROR_INVALIDDATA;
+        bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING);
+        arith2_init(&acoder, &gB);
+        c->keyframe = keyframe;
+        if (c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0,
+                                                avctx->width,
+                                                ctx->split_position))
+            return AVERROR_INVALIDDATA;
+
+        buf      += arith2_get_consumed_bytes(&acoder);
+        buf_size -= arith2_get_consumed_bytes(&acoder);
+        if (c->slice_split) {
+            if (buf_size < 1)
+                return AVERROR_INVALIDDATA;
+            bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING);
+            arith2_init(&acoder, &gB);
+            if (c->corrupted = ff_mss12_decode_rect(&c->sc[1], &acoder, 0,
+                                                    ctx->split_position,
+                                                    avctx->width,
+                                                    avctx->height - ctx->split_position))
+                return AVERROR_INVALIDDATA;
+
+            buf      += arith2_get_consumed_bytes(&acoder);
+            buf_size -= arith2_get_consumed_bytes(&acoder);
+        }
+    }
+
+    if (has_wmv9) {
+        for (i = 0; i < used_rects; i++) {
+            int x = wmv9rects[i].x;
+            int y = wmv9rects[i].y;
+            int w = wmv9rects[i].w;
+            int h = wmv9rects[i].h;
+            if (wmv9rects[i].coded) {
+                int WMV9codedFrameSize;
+                if (buf_size < 4 || !(WMV9codedFrameSize = AV_RL24(buf)))
+                    return AVERROR_INVALIDDATA;
+                if (ret = decode_wmv9(avctx, buf + 3, buf_size - 3,
+                                      x, y, w, h, wmv9_mask))
+                    return ret;
+                buf      += WMV9codedFrameSize + 3;
+                buf_size -= WMV9codedFrameSize + 3;
+            } else {
+                uint8_t *dst = c->rgb_pic + y * c->rgb_stride + x * 3;
+                if (wmv9_mask != -1) {
+                    ctx->dsp.mss2_gray_fill_masked(dst, c->rgb_stride,
+                                                   wmv9_mask,
+                                                   c->pal_pic + y * c->pal_stride + x,
+                                                   c->pal_stride,
+                                                   w, h);
+                } else {
+                    do {
+                        memset(dst, 0x80, w * 3);
+                        dst += c->rgb_stride;
+                    } while (--h);
+                }
+            }
+        }
+    }
+
+    if (buf_size)
+        av_log(avctx, AV_LOG_WARNING, "buffer not fully consumed\n");
+
+    *data_size       = sizeof(AVFrame);
+    *(AVFrame *)data = ctx->pic;
+
+    return avpkt->size;
+}
+
+static av_cold int wmv9_init(AVCodecContext *avctx)
+{
+    VC1Context *v = avctx->priv_data;
+
+    v->s.avctx    = avctx;
+    avctx->flags |= CODEC_FLAG_EMU_EDGE;
+    v->s.flags   |= CODEC_FLAG_EMU_EDGE;
+
+    if (avctx->idct_algo == FF_IDCT_AUTO)
+        avctx->idct_algo = FF_IDCT_WMV2;
+
+    if (ff_vc1_init_common(v) < 0)
+        return -1;
+    ff_vc1dsp_init(&v->vc1dsp);
+
+    v->profile = PROFILE_MAIN;
+
+    v->zz_8x4     = ff_wmv2_scantableA;
+    v->zz_4x8     = ff_wmv2_scantableB;
+    v->res_y411   = 0;
+    v->res_sprite = 0;
+
+    v->frmrtq_postproc = 7;
+    v->bitrtq_postproc = 31;
+
+    v->res_x8          = 0;
+    v->multires        = 0;
+    v->res_fasttx      = 1;
+
+    v->fastuvmc        = 0;
+
+    v->extended_mv     = 0;
+
+    v->dquant          = 1;
+    v->vstransform     = 1;
+
+    v->res_transtab    = 0;
+
+    v->overlap         = 0;
+
+    v->s.resync_marker = 0;
+    v->rangered        = 0;
+
+    v->s.max_b_frames = avctx->max_b_frames = 0;
+    v->quantizer_mode = 0;
+
+    v->finterpflag = 0;
+
+    v->res_rtm_flag = 1;
+
+    ff_vc1_init_transposed_scantables(v);
+
+    if (ff_msmpeg4_decode_init(avctx) < 0 ||
+        ff_vc1_decode_init_alloc_tables(v) < 0)
+        return -1;
+
+    /* error concealment */
+    v->s.me.qpel_put = v->s.dsp.put_qpel_pixels_tab;
+    v->s.me.qpel_avg = v->s.dsp.avg_qpel_pixels_tab;
+
+    return 0;
+}
+
+static av_cold int mss2_decode_end(AVCodecContext *avctx)
+{
+    MSS2Context *const ctx = avctx->priv_data;
+
+    if (ctx->pic.data[0])
+        avctx->release_buffer(avctx, &ctx->pic);
+    if (ctx->last_pic.data[0])
+        avctx->release_buffer(avctx, &ctx->last_pic);
+
+    ff_mss12_decode_end(&ctx->c);
+    av_freep(&ctx->c.pal_pic);
+    av_freep(&ctx->c.last_pal_pic);
+    ff_vc1_decode_end(avctx);
+
+    return 0;
+}
+
+static av_cold int mss2_decode_init(AVCodecContext *avctx)
+{
+    MSS2Context * const ctx = avctx->priv_data;
+    MSS12Context *c = &ctx->c;
+    int ret;
+    c->avctx = avctx;
+    avctx->coded_frame = &ctx->pic;
+    if (ret = ff_mss12_decode_init(c, 1))
+        return ret;
+    c->pal_stride   = c->mask_stride;
+    c->pal_pic      = av_malloc(c->pal_stride * avctx->height);
+    c->last_pal_pic = av_malloc(c->pal_stride * avctx->height);
+    if (!c->pal_pic || !c->last_pal_pic) {
+        mss2_decode_end(avctx);
+        return AVERROR(ENOMEM);
+    }
+    if (ret = wmv9_init(avctx)) {
+        mss2_decode_end(avctx);
+        return ret;
+    }
+    ff_mss2dsp_init(&ctx->dsp);
+
+    avctx->pix_fmt = c->free_colours == 127 ? PIX_FMT_RGB555
+                                            : PIX_FMT_RGB24;
+
+    return 0;
+}
+
+AVCodec ff_mss2_decoder = {
+    .name           = "mss2",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MSS2,
+    .priv_data_size = sizeof(MSS2Context),
+    .init           = mss2_decode_init,
+    .close          = mss2_decode_end,
+    .decode         = mss2_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"),
+};
diff --git a/libavcodec/mss2dsp.c b/libavcodec/mss2dsp.c
new file mode 100644
index 0000000000000000000000000000000000000000..b18bf1f0a96634961296ce8f316dfb9b0a7d192c
--- /dev/null
+++ b/libavcodec/mss2dsp.c
@@ -0,0 +1,153 @@
+/*
+ * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder DSP routines
+ */
+
+#include "mss2dsp.h"
+#include "libavutil/common.h"
+
+static av_always_inline void mss2_blit_wmv9_template(uint8_t *dst,
+                                                     int dst_stride,
+                                                     int gray,
+                                                     int use_mask,
+                                                     int maskcolor,
+                                                     const uint8_t *mask,
+                                                     int mask_stride,
+                                                     const uint8_t *srcy,
+                                                     int srcy_stride,
+                                                     const uint8_t *srcu,
+                                                     const uint8_t *srcv,
+                                                     int srcuv_stride,
+                                                     int w, int h)
+{
+    int i, j, k, r = -1;
+    while (++r < h) {
+        for (i = 0, j = 0, k = 0; i < w; j += (i & 1), i++, k += 3) {
+            if (!use_mask || mask[i] == maskcolor) {
+                if (gray) {
+                    dst[k] = dst[k + 1] = dst[k + 2] = 0x80;
+                } else {
+                    int y = srcy[i];
+                    int u = srcu[j] - 128;
+                    int v = srcv[j] - 128;
+                    dst[k]     = av_clip_uint8(y + (             91881 * v + 32768 >> 16));
+                    dst[k + 1] = av_clip_uint8(y + (-22554 * u - 46802 * v + 32768 >> 16));
+                    dst[k + 2] = av_clip_uint8(y + (116130 * u             + 32768 >> 16));
+                }
+            }
+        }
+        mask +=  mask_stride;
+        dst  +=   dst_stride;
+        srcy +=  srcy_stride;
+        srcu += srcuv_stride * (r & 1);
+        srcv += srcuv_stride * (r & 1);
+    }
+}
+
+static void mss2_blit_wmv9_c(uint8_t *dst, int dst_stride,
+                             const uint8_t *srcy, int srcy_stride,
+                             const uint8_t *srcu, const uint8_t *srcv,
+                             int srcuv_stride, int w, int h)
+{
+    mss2_blit_wmv9_template(dst, dst_stride, 0, 0,
+                            0, NULL, 0,
+                            srcy, srcy_stride,
+                            srcu, srcv, srcuv_stride,
+                            w, h);
+}
+
+static void mss2_blit_wmv9_masked_c(uint8_t *dst, int dst_stride,
+                                    int maskcolor, const uint8_t *mask,
+                                    int mask_stride,
+                                    const uint8_t *srcy, int srcy_stride,
+                                    const uint8_t *srcu, const uint8_t *srcv,
+                                    int srcuv_stride, int w, int h)
+{
+    mss2_blit_wmv9_template(dst, dst_stride, 0, 1,
+                            maskcolor, mask, mask_stride,
+                            srcy, srcy_stride,
+                            srcu, srcv, srcuv_stride,
+                            w, h);
+}
+
+static void mss2_gray_fill_masked_c(uint8_t *dst, int dst_stride,
+                                    int maskcolor, const uint8_t *mask,
+                                    int mask_stride, int w, int h)
+{
+    mss2_blit_wmv9_template(dst, dst_stride, 1, 1,
+                            maskcolor, mask, mask_stride,
+                            NULL, 0,
+                            NULL, NULL, 0,
+                            w, h);
+}
+
+static void upsample_plane_c(uint8_t *plane, int plane_stride, int w, int h)
+{
+    uint8_t *src1, *src2, *dst1, *dst2, *p, a, b;
+    int i, j;
+
+    w += (w & 1);
+    h += (h & 1);
+
+    j = h - 1;
+
+    memcpy(plane + plane_stride *  j,
+           plane + plane_stride * (j >> 1),
+           w);
+
+    while ((j -= 2) > 0) {
+        dst1 = plane + plane_stride *  (j + 1);
+        dst2 = plane + plane_stride *   j;
+        src1 = plane + plane_stride * ((j + 1) >> 1);
+        src2 = plane + plane_stride * ( j      >> 1);
+
+        for (i = (w - 1) >> 1; i >= 0; i--) {
+            a = src1[i];
+            b = src2[i];
+            dst1[i] = (3 * a + b + 2) >> 2;
+            dst2[i] = (a + 3 * b + 2) >> 2;
+        }
+    }
+
+    for (j = h - 1; j >= 0; j--) {
+        p = plane + plane_stride * j;
+        i = w - 1;
+
+        p[i] = p[i >> 1];
+
+        while ((i -= 2) > 0) {
+            a = p[ i      >> 1];
+            b = p[(i + 1) >> 1];
+            p[i]     = (3 * a + b + 1) >> 2;
+            p[i + 1] = (a + 3 * b + 1) >> 2;
+        }
+    }
+}
+
+av_cold void ff_mss2dsp_init(MSS2DSPContext* dsp)
+{
+    dsp->mss2_blit_wmv9        = mss2_blit_wmv9_c;
+    dsp->mss2_blit_wmv9_masked = mss2_blit_wmv9_masked_c;
+    dsp->mss2_gray_fill_masked = mss2_gray_fill_masked_c;
+    dsp->upsample_plane        = upsample_plane_c;
+}
diff --git a/libavcodec/mss2dsp.h b/libavcodec/mss2dsp.h
new file mode 100644
index 0000000000000000000000000000000000000000..e04aab04d595d18a88e3c2f197c8914fe8b526d4
--- /dev/null
+++ b/libavcodec/mss2dsp.h
@@ -0,0 +1,50 @@
+/*
+ * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder DSP routines
+ */
+
+#ifndef AVCODEC_MSS2DSP_H
+#define AVCODEC_MSS2DSP_H
+
+#include "dsputil.h"
+
+typedef struct MSS2DSPContext {
+    void (*mss2_blit_wmv9)(uint8_t *dst, int dst_stride,
+                           const uint8_t *srcy, int srcy_stride,
+                           const uint8_t *srcu, const uint8_t *srcv,
+                           int srcuv_stride, int w, int h);
+    void (*mss2_blit_wmv9_masked)(uint8_t *dst, int dst_stride,
+                                  int maskcolor, const uint8_t *mask,
+                                  int mask_stride,
+                                  const uint8_t *srcy, int srcy_stride,
+                                  const uint8_t *srcu, const uint8_t *srcv,
+                                  int srcuv_stride, int w, int h);
+    void (*mss2_gray_fill_masked)(uint8_t *dst, int dst_stride,
+                                  int maskcolor, const uint8_t *mask,
+                                  int mask_stride, int w, int h);
+    void (*upsample_plane)(uint8_t *plane, int plane_stride, int w, int h);
+} MSS2DSPContext;
+
+av_cold void ff_mss2dsp_init(MSS2DSPContext* dsp);
+
+#endif /* AVCODEC_MSS2DSP_H */
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 955dfa71dc3646579f0152c71713a1106cf8f3e6..e8dc5ed30c51a210035d0f63c34079e07c3c852e 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -584,7 +584,14 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
 
     if (v->finterpflag)
         v->interpfrm = get_bits1(gb);
-    skip_bits(gb, 2); //framecnt unused
+    if (!v->s.avctx->codec)
+        return -1;
+    if (v->s.avctx->codec->id == AV_CODEC_ID_MSS2)
+        v->respic   =
+        v->rangered =
+        v->multires = get_bits(gb, 2) == 1;
+    else
+        skip_bits(gb, 2); //framecnt unused
     v->rangeredfrm = 0;
     if (v->rangered)
         v->rangeredfrm = get_bits1(gb);
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index a5269f43a05e91459144f0ef2f95fe58923b33ab..b28b705fa01624fa82943b449938e31ddf319bbb 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -395,6 +395,8 @@ typedef struct VC1Context{
     uint8_t broken_link;         ///< Broken link flag (BROKEN_LINK syntax element)
     uint8_t closed_entry;        ///< Closed entry point flag (CLOSED_ENTRY syntax element)
 
+    int end_mb_x;                ///< Horizontal macroblock limit (used only by mss2)
+
     int parse_only;              ///< Context is used within parser
 
     int warn_interlaced;
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 0362c1a04f10d72fbdbde0775d567198eac97e70..3f62201979db5b568ef4c78280479bb7890589fd 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -4350,10 +4350,10 @@ static void vc1_decode_i_blocks(VC1Context *v)
     s->mb_x = s->mb_y = 0;
     s->mb_intra         = 1;
     s->first_slice_line = 1;
-    for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+    for (s->mb_y = 0; s->mb_y < s->end_mb_y; s->mb_y++) {
         s->mb_x = 0;
         ff_init_block_index(s);
-        for (; s->mb_x < s->mb_width; s->mb_x++) {
+        for (; s->mb_x < v->end_mb_x; s->mb_x++) {
             uint8_t *dst[6];
             ff_update_block_index(s);
             dst[0] = s->dest[0];
@@ -4440,7 +4440,10 @@ static void vc1_decode_i_blocks(VC1Context *v)
         s->first_slice_line = 0;
     }
     if (v->s.loop_filter)
-        ff_draw_horiz_band(s, (s->mb_height - 1) * 16, 16);
+        ff_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
+
+    /* This is intentionally mb_height and not end_mb_y - unlike in advanced
+     * profile, these only differ are when decoding MSS2 rectangles. */
     ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, ER_MB_END);
 }
 
@@ -5560,6 +5563,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         ff_er_frame_start(s);
 
         v->bits = buf_size * 8;
+        v->end_mb_x = s->mb_width;
         if (v->field_mode) {
             uint8_t *tmp[2];
             s->current_picture.f.linesize[0] <<= 1;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 32a61f55ed488cbd0e54967a18ae0dd08e6676df..a2e231bf7fe2bfc921b0292df27062046a7ca0db 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -27,8 +27,8 @@
  */
 
 #define LIBAVCODEC_VERSION_MAJOR 54
-#define LIBAVCODEC_VERSION_MINOR 54
-#define LIBAVCODEC_VERSION_MICRO 101
+#define LIBAVCODEC_VERSION_MINOR 55
+#define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index acbab35caeff35dfebe40f0055dcd997ccbd9db4..9eaf743b4b19cc55bb1132248d98691782978b51 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -73,7 +73,7 @@ AC3_EXPONENT_MIN mmx
 %define LOOP_ALIGN ALIGN 16
 AC3_EXPONENT_MIN mmxext
 %endif
-%if HAVE_SSE
+%if HAVE_SSE2_EXTERNAL
 INIT_XMM
 AC3_EXPONENT_MIN sse2
 %endif
@@ -385,7 +385,7 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
     pabsd    %1, %1
 %endmacro
 
-%if HAVE_AMD3DNOW
+%if HAVE_AMD3DNOW_EXTERNAL
 INIT_MMX
 cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
     add      expq, lenq
@@ -453,11 +453,11 @@ cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
     REP_RET
 %endmacro
 
-%if HAVE_SSE
+%if HAVE_SSE2_EXTERNAL
 INIT_XMM
 %define PABSD PABSD_MMX
 AC3_EXTRACT_EXPONENTS sse2
-%if HAVE_SSSE3
+%if HAVE_SSSE3_EXTERNAL
 %define PABSD PABSD_SSSE3
 AC3_EXTRACT_EXPONENTS ssse3
 %endif
diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm
index 02b5f3fc8923aa7f745a69f5435df4a9d9e237e5..91ea493dc746d33ee1d8fcffcb7a1854f4aa8d9f 100644
--- a/libavcodec/x86/dct32.asm
+++ b/libavcodec/x86/dct32.asm
@@ -193,7 +193,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 
 INIT_YMM avx
 SECTION_TEXT
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
 cglobal dct32_float, 2,3,8, out, in, tmp
     ; pass 1
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 19884a36a81160cd79d47d863d1fa70af3d9ae48..af7669ac5bfba9192cbad24aef2d08e27b0b4753 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -1169,7 +1169,7 @@ ALIGN 16
 
 INIT_XMM sse
 VECTOR_FMUL_REVERSE
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 VECTOR_FMUL_REVERSE
 %endif
@@ -1199,7 +1199,7 @@ ALIGN 16
 
 INIT_XMM sse
 VECTOR_FMUL_ADD
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 VECTOR_FMUL_ADD
 %endif
@@ -1245,7 +1245,7 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
 
 INIT_XMM sse
 BUTTERFLIES_FLOAT_INTERLEAVE
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 BUTTERFLIES_FLOAT_INTERLEAVE
 %endif
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 5997adcb3a844d0ede0c6a7320b7ebce5efb4602..d293e19b59825fbd8df18aed6ff5855bb98663c8 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2942,7 +2942,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
 static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx,
                                   int mm_flags)
 {
-#if HAVE_6REGS && HAVE_INLINE_ASM
+#if HAVE_AMD3DNOWEXT_INLINE && HAVE_6REGS
     c->vector_fmul_window  = vector_fmul_window_3dnowext;
 #endif
 }
@@ -3056,11 +3056,10 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
 static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
                                int mm_flags)
 {
-#if HAVE_SSSE3
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
     const int bit_depth      = avctx->bits_per_raw_sample;
 
-#if HAVE_INLINE_ASM
+#if HAVE_SSSE3_INLINE
     if (!high_bit_depth && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS(1, 0, ssse3);
         H264_QPEL_FUNCS(1, 1, ssse3);
@@ -3075,8 +3074,9 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
         H264_QPEL_FUNCS(3, 2, ssse3);
         H264_QPEL_FUNCS(3, 3, ssse3);
     }
-#endif /* HAVE_INLINE_ASM */
-#if HAVE_YASM
+#endif /* HAVE_SSSE3_INLINE */
+
+#if HAVE_SSSE3_EXTERNAL
     if (bit_depth == 10 && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
@@ -3099,21 +3099,20 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
     if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) // cachesplit
         c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
     c->bswap_buf = ff_bswap32_buf_ssse3;
-#endif
-#endif
+#endif /* HAVE_SSSE3_EXTERNAL */
 }
 
 static void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
                               int mm_flags)
 {
-#if HAVE_YASM
+#if HAVE_SSE4_EXTERNAL
     c->vector_clip_int32 = ff_vector_clip_int32_sse4;
-#endif
+#endif /* HAVE_SSE4_EXTERNAL */
 }
 
 static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
 {
-#if HAVE_AVX && HAVE_YASM
+#if HAVE_AVX_EXTERNAL
     const int bit_depth = avctx->bits_per_raw_sample;
 
     if (bit_depth == 10) {
@@ -3133,7 +3132,7 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
     c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
     c->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
     c->vector_fmul_add = ff_vector_fmul_add_avx;
-#endif
+#endif /* HAVE_AVX_EXTERNAL */
 }
 
 void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 65c38fa89d9f1a54037782b4529bbf82afea5384..9f9680b5967aa3dc70b45ad9f10c52fcc475cd59 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -983,7 +983,7 @@ DCT_SAD_FUNC(mmx2)
 DCT_SAD_FUNC(sse2)
 #undef MMABS
 
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 #define MMABS(a,z)    MMABS_SSSE3(a,z)
 DCT_SAD_FUNC(ssse3)
 #undef MMABS
@@ -1063,7 +1063,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
 #undef SCALE_OFFSET
 #undef PMULHRW
 
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 #undef PHADDD
 #define DEF(x) x ## _ssse3
 #define SET_RND(x)
@@ -1082,7 +1082,7 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
 #undef SCALE_OFFSET
 #undef PMULHRW
 #undef PHADDD
-#endif //HAVE_SSSE3
+#endif /* HAVE_SSSE3_INLINE */
 
 #endif /* HAVE_INLINE_ASM */
 
@@ -1162,7 +1162,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->sum_abs_dctelem= sum_abs_dctelem_sse2;
         }
 
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
         if(mm_flags & AV_CPU_FLAG_SSSE3){
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
                 c->try_8x8basis= try_8x8basis_ssse3;
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index e8a99251055b9fc3f22d30338559666e580381e9..c2b9cbb12f420f2d5f29e2fea74f929f7d9f4db2 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -305,7 +305,7 @@ IF%1 mova  Z(1), m5
 
 INIT_YMM avx
 
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 align 16
 fft8_avx:
     mova      m0, Z(0)
@@ -553,7 +553,7 @@ DEFINE_ARGS zc, w, n, o1, o3
 
 INIT_YMM avx
 
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 %macro INTERL_AVX 5
     vunpckhps      %3, %2, %1
     vunpcklps      %2, %2, %1
@@ -794,7 +794,7 @@ align 8
 dispatch_tab %+ fullsuffix: pointer list_of_fft
 %endmacro ; DECL_FFT
 
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 DECL_FFT 6
 DECL_FFT 6, _interleave
@@ -1101,6 +1101,6 @@ DECL_IMDCT POSROTATESHUF_3DNOW
 
 INIT_YMM avx
 
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 DECL_IMDCT POSROTATESHUF_AVX
 %endif
diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm
index f8a2cff68ff7c10dd9a9a8ffd3854ba671db261f..bcdb27c176a8002dc9f570d4fb6a8273b25ced33 100644
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -252,7 +252,7 @@ cglobal %1_h264_chroma_mc2_10, 6,7
 %define CHROMAMC_AVG  NOTHING
 INIT_XMM sse2
 CHROMA_MC8 put
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CHROMA_MC8 put
 %endif
@@ -264,7 +264,7 @@ CHROMA_MC2 put
 %define PAVG          pavgw
 INIT_XMM sse2
 CHROMA_MC8 avg
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CHROMA_MC8 avg
 %endif
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 3d4ac5fb7191a1c8c09c4596c5cf807098b0748f..525ce39e4586bec8d9f1e4baa1759c83dfa7f885 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -80,7 +80,7 @@ cglobal h264_idct_add_10, 3,3
 
 INIT_XMM sse2
 IDCT_ADD_10
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD_10
 %endif
@@ -110,7 +110,7 @@ add4x4_idct %+ SUFFIX:
 INIT_XMM sse2
 ALIGN 16
 ADD4x4IDCT
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 ALIGN 16
 ADD4x4IDCT
@@ -150,7 +150,7 @@ cglobal h264_idct_add16_10, 5,6
 
 INIT_XMM sse2
 IDCT_ADD16_10
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD16_10
 %endif
@@ -216,7 +216,7 @@ cglobal h264_idct8_dc_add_10,3,3,7
 
 INIT_XMM sse2
 IDCT8_DC_ADD
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT8_DC_ADD
 %endif
@@ -287,7 +287,7 @@ cglobal h264_idct_add16intra_10,5,7,8
 
 INIT_XMM sse2
 IDCT_ADD16INTRA_10
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD16INTRA_10
 %endif
@@ -324,7 +324,7 @@ cglobal h264_idct_add8_10,5,8,7
 
 INIT_XMM sse2
 IDCT_ADD8
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD8
 %endif
@@ -501,7 +501,7 @@ h264_idct8_add1_10 %+ SUFFIX:
 
 INIT_XMM sse2
 IDCT8_ADD
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT8_ADD
 %endif
@@ -541,7 +541,7 @@ cglobal h264_idct8_add4_10, 0,7,16
 
 INIT_XMM sse2
 IDCT8_ADD4
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT8_ADD4
 %endif
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 79fa23e71d45e20394a1286c28a75575a0f3a8f3..4eeb0a43032f84353965208628539da88c4ff395 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -84,7 +84,7 @@ INIT_XMM
 PRED4x4_DR sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED4x4_DR ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED4x4_DR avx
 %endif
@@ -124,7 +124,7 @@ INIT_XMM
 PRED4x4_VR sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED4x4_VR ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED4x4_VR avx
 %endif
@@ -167,7 +167,7 @@ INIT_XMM
 PRED4x4_HD sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED4x4_HD ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED4x4_HD avx
 %endif
@@ -238,7 +238,7 @@ cglobal pred4x4_down_left_10_%1, 3,3
 
 INIT_XMM
 PRED4x4_DL sse2
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED4x4_DL avx
 %endif
@@ -267,7 +267,7 @@ cglobal pred4x4_vertical_left_10_%1, 3,3
 
 INIT_XMM
 PRED4x4_VL sse2
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED4x4_VL avx
 %endif
@@ -577,7 +577,7 @@ cglobal pred8x8l_top_dc_10_%1, 4,4,6
 
 INIT_XMM
 PRED8x8L_TOP_DC sse2
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_TOP_DC avx
 %endif
@@ -636,7 +636,7 @@ cglobal pred8x8l_dc_10_%1, 4,6,6
 
 INIT_XMM
 PRED8x8L_DC sse2
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_DC avx
 %endif
@@ -671,7 +671,7 @@ cglobal pred8x8l_vertical_10_%1, 4,4,6
 
 INIT_XMM
 PRED8x8L_VERTICAL sse2
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_VERTICAL avx
 %endif
@@ -728,7 +728,7 @@ INIT_XMM
 PRED8x8L_HORIZONTAL sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED8x8L_HORIZONTAL ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_HORIZONTAL avx
 %endif
@@ -797,7 +797,7 @@ INIT_XMM
 PRED8x8L_DOWN_LEFT sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED8x8L_DOWN_LEFT ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_DOWN_LEFT avx
 %endif
@@ -872,7 +872,7 @@ INIT_XMM
 PRED8x8L_DOWN_RIGHT sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED8x8L_DOWN_RIGHT ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_DOWN_RIGHT avx
 %endif
@@ -943,7 +943,7 @@ INIT_XMM
 PRED8x8L_VERTICAL_RIGHT sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED8x8L_VERTICAL_RIGHT ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_VERTICAL_RIGHT avx
 %endif
@@ -1005,7 +1005,7 @@ INIT_XMM
 PRED8x8L_HORIZONTAL_UP sse2
 %define PALIGNR PALIGNR_SSSE3
 PRED8x8L_HORIZONTAL_UP ssse3
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_AVX
 PRED8x8L_HORIZONTAL_UP avx
 %endif
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 71a1fbeed9303b9a69ed7238f773d62d03290006..faf8a76b76d9b461476450d55da34c5453e1ed2c 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -1174,7 +1174,7 @@ QPEL_H264_V_XMM(put_,       PUT_OP, sse2)
 QPEL_H264_V_XMM(avg_,  AVG_MMX2_OP, sse2)
 QPEL_H264_HV_XMM(put_,       PUT_OP, sse2)
 QPEL_H264_HV_XMM(avg_,  AVG_MMX2_OP, sse2)
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 QPEL_H264_H_XMM(put_,       PUT_OP, ssse3)
 QPEL_H264_H_XMM(avg_,  AVG_MMX2_OP, ssse3)
 QPEL_H264_HV2_XMM(put_,       PUT_OP, ssse3)
@@ -1188,7 +1188,7 @@ H264_MC_4816(3dnow)
 H264_MC_4816(mmx2)
 H264_MC_816(H264_MC_V, sse2)
 H264_MC_816(H264_MC_HV, sse2)
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 H264_MC_816(H264_MC_H, ssse3)
 H264_MC_816(H264_MC_HV, ssse3)
 #endif
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index ea71515079b8d75bd999c014f97fbc82cd837cbd..6fd77be9f44dfa523974587db24024da35bffe91 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -39,7 +39,7 @@ IDCT_ADD_FUNC(8_dc, 10, sse2)
 IDCT_ADD_FUNC(8, 8, mmx)
 IDCT_ADD_FUNC(8, 8, sse2)
 IDCT_ADD_FUNC(8, 10, sse2)
-#if HAVE_AVX
+#if HAVE_AVX_EXTERNAL
 IDCT_ADD_FUNC(, 10, avx)
 IDCT_ADD_FUNC(8_dc, 10, avx)
 IDCT_ADD_FUNC(8, 10, avx)
@@ -64,7 +64,7 @@ IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
 IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
 IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
 IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
-#if HAVE_AVX
+#if HAVE_AVX_EXTERNAL
 IDCT_ADD_REP_FUNC(, 16, 10, avx)
 IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
 #endif
@@ -79,7 +79,7 @@ IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
 IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
 IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
-#if HAVE_AVX
+#if HAVE_AVX_EXTERNAL
 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
 #endif
 
@@ -353,7 +353,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                     c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
                     c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
                 }
-#if HAVE_AVX
+#if HAVE_AVX_EXTERNAL
                 if (mm_flags & AV_CPU_FLAG_AVX) {
                     c->h264_idct_dc_add  =
                     c->h264_idct_add     = ff_h264_idct_add_10_avx;
@@ -377,7 +377,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                     c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
 #endif /* HAVE_ALIGNED_STACK */
                 }
-#endif /* HAVE_AVX */
+#endif /* HAVE_AVX_EXTERNAL */
             }
         }
     }
diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c
index 93ca54aff93f4d82b23947899ad10607dca147d5..850467f663292d1cee514642ec2dfec3f4f407c2 100644
--- a/libavcodec/x86/mpegvideoenc.c
+++ b/libavcodec/x86/mpegvideoenc.c
@@ -26,20 +26,18 @@
 #include "libavcodec/mpegvideo.h"
 #include "dsputil_mmx.h"
 
-#if HAVE_INLINE_ASM
-
 extern uint16_t ff_inv_zigzag_direct16[64];
 
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 #define COMPILE_TEMPLATE_MMXEXT 0
 #define COMPILE_TEMPLATE_SSE2   0
 #define COMPILE_TEMPLATE_SSSE3  0
 #define RENAME(a) a ## _MMX
 #define RENAMEl(a) a ## _mmx
 #include "mpegvideoenc_template.c"
-#endif /* HAVE_MMX */
+#endif /* HAVE_MMX_INLINE */
 
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 #undef COMPILE_TEMPLATE_SSSE3
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_MMXEXT
@@ -51,9 +49,9 @@ extern uint16_t ff_inv_zigzag_direct16[64];
 #define RENAME(a) a ## _MMX2
 #define RENAMEl(a) a ## _mmx2
 #include "mpegvideoenc_template.c"
-#endif /* HAVE_MMXEXT */
+#endif /* HAVE_MMXEXT_INLINE */
 
-#if HAVE_SSE2
+#if HAVE_SSE2_INLINE
 #undef COMPILE_TEMPLATE_MMXEXT
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_SSSE3
@@ -65,9 +63,9 @@ extern uint16_t ff_inv_zigzag_direct16[64];
 #define RENAME(a) a ## _SSE2
 #define RENAMEl(a) a ## _sse2
 #include "mpegvideoenc_template.c"
-#endif /* HAVE_SSE2 */
+#endif /* HAVE_SSE2_INLINE */
 
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 #undef COMPILE_TEMPLATE_MMXEXT
 #undef COMPILE_TEMPLATE_SSE2
 #undef COMPILE_TEMPLATE_SSSE3
@@ -79,33 +77,29 @@ extern uint16_t ff_inv_zigzag_direct16[64];
 #define RENAME(a) a ## _SSSE3
 #define RENAMEl(a) a ## _sse2
 #include "mpegvideoenc_template.c"
-#endif /* HAVE_SSSE3 */
-
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_SSSE3_INLINE */
 
 void ff_MPV_encode_init_x86(MpegEncContext *s)
 {
-#if HAVE_INLINE_ASM
     int mm_flags = av_get_cpu_flags();
     const int dct_algo = s->avctx->dct_algo;
 
     if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) {
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
         if (mm_flags & AV_CPU_FLAG_MMX && HAVE_MMX)
             s->dct_quantize = dct_quantize_MMX;
 #endif
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
         if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT)
             s->dct_quantize = dct_quantize_MMX2;
 #endif
-#if HAVE_SSE2
+#if HAVE_SSE2_INLINE
         if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE2)
             s->dct_quantize = dct_quantize_SSE2;
 #endif
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
         if (mm_flags & AV_CPU_FLAG_SSSE3)
             s->dct_quantize = dct_quantize_SSSE3;
 #endif
     }
-#endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c
index a7bde0275538636ea581f33c8d7dc7bf0b04fb0e..6faf7d1230c14dcadeded3f2b025202f4f0d6a98 100644
--- a/libavfilter/x86/gradfun.c
+++ b/libavfilter/x86/gradfun.c
@@ -29,7 +29,7 @@
 DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
 DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
 
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
 {
     intptr_t x;
@@ -77,7 +77,7 @@ static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uin
 }
 #endif
 
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 static void gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
 {
     intptr_t x;
@@ -122,9 +122,9 @@ static void gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const ui
         :"memory"
     );
 }
-#endif // HAVE_SSSE3
+#endif /* HAVE_SSSE3_INLINE */
 
-#if HAVE_SSE
+#if HAVE_SSE2_INLINE
 static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width)
 {
 #define BLURV(load)\
@@ -165,7 +165,7 @@ static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *
         BLURV("movdqa");
     }
 }
-#endif // HAVE_SSE
+#endif /* HAVE_SSE2_INLINE */
 
 #endif /* HAVE_INLINE_ASM */
 
@@ -173,18 +173,16 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf)
 {
     int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_INLINE_ASM
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
     if (cpu_flags & AV_CPU_FLAG_MMXEXT)
         gf->filter_line = gradfun_filter_line_mmx2;
 #endif
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
     if (cpu_flags & AV_CPU_FLAG_SSSE3)
         gf->filter_line = gradfun_filter_line_ssse3;
 #endif
-#if HAVE_SSE
+#if HAVE_SSE2_INLINE
     if (cpu_flags & AV_CPU_FLAG_SSE2)
         gf->blur_line = gradfun_blur_line_sse2;
 #endif
-#endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavfilter/x86/yadif.c b/libavfilter/x86/yadif.c
index 07790d9c3e29047ce6b2214dc2226da96241a058..881be5a9347d3e7dee9f31b08450aae37f7d0c12 100644
--- a/libavfilter/x86/yadif.c
+++ b/libavfilter/x86/yadif.c
@@ -30,7 +30,7 @@
 DECLARE_ASM_CONST(16, const xmm_reg, pb_1) = {0x0101010101010101ULL, 0x0101010101010101ULL};
 DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x0001000100010001ULL};
 
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
 #define COMPILE_TEMPLATE_SSE2 1
 #define COMPILE_TEMPLATE_SSSE3 1
 #undef RENAME
@@ -39,14 +39,14 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
 #undef COMPILE_TEMPLATE_SSSE3
 #endif
 
-#if HAVE_SSE
+#if HAVE_SSE2_INLINE
 #undef RENAME
 #define RENAME(a) a ## _sse2
 #include "yadif_template.c"
 #undef COMPILE_TEMPLATE_SSE2
 #endif
 
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 #undef RENAME
 #define RENAME(a) a ## _mmx2
 #include "yadif_template.c"
@@ -58,18 +58,16 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
 {
     int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_INLINE_ASM
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
     if (cpu_flags & AV_CPU_FLAG_MMXEXT)
         yadif->filter_line = yadif_filter_line_mmx2;
 #endif
-#if HAVE_SSE
+#if HAVE_SSE2_INLINE
     if (cpu_flags & AV_CPU_FLAG_SSE2)
         yadif->filter_line = yadif_filter_line_sse2;
 #endif
-#if HAVE_SSSE3
+#if HAVE_SSSE3_INLINE
     if (cpu_flags & AV_CPU_FLAG_SSSE3)
         yadif->filter_line = yadif_filter_line_ssse3;
 #endif
-#endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 1fa89289cd0eee08cc2c1771dce421bd9300d42e..9ff2148375002b901c6c9802de4a2949c93b0b64 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -316,6 +316,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_TSCC2,        MKTAG('T', 'S', 'C', '2') },
     { AV_CODEC_ID_MTS2,         MKTAG('M', 'T', 'S', '2') },
     { AV_CODEC_ID_CLLC,         MKTAG('C', 'L', 'L', 'C') },
+    { AV_CODEC_ID_MSS2,         MKTAG('M', 'S', 'S', '2') },
     { AV_CODEC_ID_NONE,         0 }
 };
 
diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm
index 3db64d2f9b45ef66690d290ee57c2942bd89774b..2ebdbc1ec0f71535ee6477b33d2700fa6c0b8489 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -155,7 +155,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len
 
 INIT_XMM sse2
 CONV_S32_TO_FLT
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 CONV_S32_TO_FLT
 %endif
@@ -223,7 +223,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len
 
 INIT_XMM sse2
 CONV_FLT_TO_S32
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 CONV_FLT_TO_S32
 %endif
@@ -260,7 +260,7 @@ cglobal conv_s16p_to_s16_2ch, 3,4,5, dst, src0, len, src1
 
 INIT_XMM sse2
 CONV_S16P_TO_S16_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16P_TO_S16_2CH
 %endif
@@ -383,7 +383,7 @@ INIT_XMM sse2
 CONV_S16P_TO_S16_6CH
 INIT_XMM sse2slow
 CONV_S16P_TO_S16_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16P_TO_S16_6CH
 %endif
@@ -432,7 +432,7 @@ cglobal conv_s16p_to_flt_2ch, 3,4,6, dst, src0, len, src1
 
 INIT_XMM sse2
 CONV_S16P_TO_FLT_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16P_TO_FLT_2CH
 %endif
@@ -536,7 +536,7 @@ INIT_XMM sse2
 CONV_S16P_TO_FLT_6CH
 INIT_XMM ssse3
 CONV_S16P_TO_FLT_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16P_TO_FLT_6CH
 %endif
@@ -692,7 +692,7 @@ INIT_MMX sse
 CONV_FLTP_TO_S16_6CH
 INIT_XMM sse2
 CONV_FLTP_TO_S16_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLTP_TO_S16_6CH
 %endif
@@ -729,7 +729,7 @@ cglobal conv_fltp_to_flt_2ch, 3,4,5, dst, src0, len, src1
 
 INIT_XMM sse
 CONV_FLTP_TO_FLT_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLTP_TO_FLT_2CH
 %endif
@@ -810,7 +810,7 @@ INIT_MMX mmx
 CONV_FLTP_TO_FLT_6CH
 INIT_XMM sse4
 CONV_FLTP_TO_FLT_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLTP_TO_FLT_6CH
 %endif
@@ -859,7 +859,7 @@ INIT_XMM sse2
 CONV_S16_TO_S16P_2CH
 INIT_XMM ssse3
 CONV_S16_TO_S16P_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16_TO_S16P_2CH
 %endif
@@ -920,7 +920,7 @@ CONV_S16_TO_S16P_6CH
 %define PALIGNR PALIGNR_SSSE3
 INIT_XMM ssse3
 CONV_S16_TO_S16P_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16_TO_S16P_6CH
 %endif
@@ -958,7 +958,7 @@ cglobal conv_s16_to_fltp_2ch, 3,4,5, dst0, src, len, dst1
 
 INIT_XMM sse2
 CONV_S16_TO_FLTP_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16_TO_FLTP_2CH
 %endif
@@ -1041,7 +1041,7 @@ INIT_XMM ssse3
 CONV_S16_TO_FLTP_6CH
 INIT_XMM sse4
 CONV_S16_TO_FLTP_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_S16_TO_FLTP_6CH
 %endif
@@ -1087,7 +1087,7 @@ cglobal conv_flt_to_s16p_2ch, 3,4,6, dst0, src, len, dst1
 
 INIT_XMM sse2
 CONV_FLT_TO_S16P_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLT_TO_S16P_2CH
 %endif
@@ -1161,7 +1161,7 @@ CONV_FLT_TO_S16P_6CH
 %define PALIGNR PALIGNR_SSSE3
 INIT_XMM ssse3
 CONV_FLT_TO_S16P_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLT_TO_S16P_6CH
 %endif
@@ -1193,7 +1193,7 @@ cglobal conv_flt_to_fltp_2ch, 3,4,3, dst0, src, len, dst1
 
 INIT_XMM sse
 CONV_FLT_TO_FLTP_2CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLT_TO_FLTP_2CH
 %endif
@@ -1256,7 +1256,7 @@ cglobal conv_flt_to_fltp_6ch, 2,7,7, dst, src, dst1, dst2, dst3, dst4, dst5
 
 INIT_XMM sse2
 CONV_FLT_TO_FLTP_6CH
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CONV_FLT_TO_FLTP_6CH
 %endif
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index bab4292e1331d64baf17dd499d86710bcc5fd6af..0c4a9bd3ad7b018c0089e9ea6ad878a4f444de18 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -56,7 +56,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1
 
 INIT_XMM sse
 MIX_2_TO_1_FLTP_FLT
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 MIX_2_TO_1_FLTP_FLT
 %endif
@@ -175,7 +175,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
 
 INIT_XMM sse
 MIX_1_TO_2_FLTP_FLT
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 MIX_1_TO_2_FLTP_FLT
 %endif
@@ -222,7 +222,7 @@ INIT_XMM sse2
 MIX_1_TO_2_S16P_FLT
 INIT_XMM sse4
 MIX_1_TO_2_S16P_FLT
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 MIX_1_TO_2_S16P_FLT
 %endif
@@ -490,7 +490,7 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s
     MIX_3_8_TO_1_2_FLT %%i, 1, s16p
     MIX_3_8_TO_1_2_FLT %%i, 2, s16p
     ; do not use ymm AVX or FMA4 in x86-32 for 6 or more channels due to stack alignment issues
-    %if HAVE_AVX
+    %if HAVE_AVX_EXTERNAL
     %if ARCH_X86_64 || %%i < 6
     INIT_YMM avx
     %else
@@ -502,7 +502,7 @@ cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, s
     MIX_3_8_TO_1_2_FLT %%i, 1, s16p
     MIX_3_8_TO_1_2_FLT %%i, 2, s16p
     %endif
-    %if HAVE_FMA4
+    %if HAVE_FMA4_EXTERNAL
     %if ARCH_X86_64 || %%i < 6
     INIT_YMM fma4
     %else
diff --git a/libavutil/internal.h b/libavutil/internal.h
index a966e18dc37acebbabf06e91e5db23adbb45bf32..2f92db6853113e44c6909b6209a1024a16b74b6d 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -158,7 +158,7 @@
 #   define ONLY_IF_THREADS_ENABLED(x) NULL
 #endif
 
-#if HAVE_MMX && HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
 /**
  * Empty mmx state.
  * this must be called between any dsp function and float/double code.
@@ -172,8 +172,8 @@ static av_always_inline void emms_c(void)
 #elif HAVE_MMX && HAVE_MM_EMPTY
 #   include <mmintrin.h>
 #   define emms_c _mm_empty
-#else /* HAVE_MMX */
+#else
 #   define emms_c()
-#endif /* HAVE_MMX */
+#endif /* HAVE_MMX_INLINE */
 
 #endif /* AVUTIL_INTERNAL_H */
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index ce823302f6f5801cf4ba6ef7ee920f7637294773..f1fd01b9660fea3f731580cbcaf0b9e143d99e7a 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -47,7 +47,7 @@ ALIGN 16
 
 INIT_XMM sse
 VECTOR_FMUL
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 VECTOR_FMUL
 %endif
@@ -88,7 +88,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
 
 INIT_XMM sse
 VECTOR_FMAC_SCALAR
-%if HAVE_AVX
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 VECTOR_FMAC_SCALAR
 %endif
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 1cc1ee621aec877381fd9219861e656c00f18a35..7cc630e4f1444e4ce6f2d4ebb4576d35c47b62c3 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -542,7 +542,7 @@ static int swScale(SwsContext *c, const uint8_t *src[],
         if (!enough_lines)
             break;  // we can't output a dstY line so let's try with the next slice
 
-#if HAVE_MMX && HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
                               lastInLumBuf, lastInChrBuf);
 #endif
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 1e3b71823258e17f393c2b5ad3d31af4bef8c030..f2a007f936c9351acb164ff09939b6e64b249c31 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -599,7 +599,7 @@ fail:
     return ret;
 }
 
-#if HAVE_MMXEXT && HAVE_INLINE_ASM
+#if HAVE_MMXEXT_INLINE
 static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
                            int16_t *filter, int32_t *filterPos, int numSplits)
 {
@@ -762,7 +762,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
 
     return fragmentPos + 1;
 }
-#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */
+#endif /* HAVE_MMXEXT_INLINE */
 
 static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
 {
@@ -1063,7 +1063,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
 
     /* precalculate horizontal scaler filter coefficients */
     {
-#if HAVE_MMXEXT && HAVE_INLINE_ASM
+#if HAVE_MMXEXT_INLINE
 // can't downscale !!!
         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
             c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
@@ -1107,7 +1107,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
             mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
 #endif
         } else
-#endif /* HAVE_MMXEXT && HAVE_INLINE_ASM */
+#endif /* HAVE_MMXEXT_INLINE */
         {
             const int filterAlign =
                 (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
@@ -1688,7 +1688,7 @@ void sws_freeContext(SwsContext *c)
     av_freep(&c->hLumFilterPos);
     av_freep(&c->hChrFilterPos);
 
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 #ifdef MAP_ANONYMOUS
     if (c->lumMmx2FilterCode)
         munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize);
@@ -1705,7 +1705,7 @@ void sws_freeContext(SwsContext *c)
 #endif
     c->lumMmx2FilterCode = NULL;
     c->chrMmx2FilterCode = NULL;
-#endif /* HAVE_MMX */
+#endif /* HAVE_MMX_INLINE */
 
     av_freep(&c->yuvTable);
     av_freep(&c->formatConvBuffer);
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 3d17c328c73396651f190d9714e35a0bbc755542..3217adf9e2bd0bfd670805dcb86ba7994dfbb91b 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -72,7 +72,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
 
 
 //MMX versions
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 #undef RENAME
 #define COMPILE_TEMPLATE_MMXEXT 0
 #define RENAME(a) a ## _MMX
@@ -80,7 +80,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
 #endif
 
 //MMX2 versions
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
@@ -375,7 +375,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
 #if HAVE_INLINE_ASM
     if (cpu_flags & AV_CPU_FLAG_MMX)
         sws_init_swScale_MMX(c);
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
     if (cpu_flags & AV_CPU_FLAG_MMXEXT)
         sws_init_swScale_MMX2(c);
     if (cpu_flags & AV_CPU_FLAG_SSE3){
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 9445d08e84309ebf3845175df919fabf49a0f5ce..9a9220ddba6abd74069a4e5a0a985adf292a7bb0 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -50,22 +50,22 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
 DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
 
 //MMX versions
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 0
 #define RENAME(a) a ## _MMX
 #include "yuv2rgb_template.c"
-#endif /* HAVE_MMX */
+#endif /* HAVE_MMX_INLINE */
 
 //MMX2 versions
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 #undef RENAME
 #undef COMPILE_TEMPLATE_MMXEXT
 #define COMPILE_TEMPLATE_MMXEXT 1
 #define RENAME(a) a ## _MMX2
 #include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT */
+#endif /* HAVE_MMXEXT_INLINE */
 
 #endif /* HAVE_INLINE_ASM */
 
@@ -74,7 +74,7 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
 #if HAVE_INLINE_ASM
     int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
     if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
         switch (c->dstFormat) {
         case PIX_FMT_RGB24:  return yuv420_rgb24_MMX2;
diff --git a/tools/pktdumper.c b/tools/pktdumper.c
index 3fb94f6e9a58df0bf0a18683d47f4e9bd3646c68..2a80b25d9853ff3f2e9f114bdbb57a32837792e4 100644
--- a/tools/pktdumper.c
+++ b/tools/pktdumper.c
@@ -31,6 +31,9 @@
 #include <io.h>
 #endif
 
+#define FILENAME_BUF_SIZE 4096
+
+#include "libavutil/avstring.h"
 #include "libavutil/time.h"
 #include "libavformat/avformat.h"
 
@@ -48,8 +51,8 @@ static int usage(int ret)
 
 int main(int argc, char **argv)
 {
-    char fntemplate[PATH_MAX];
-    char pktfilename[PATH_MAX];
+    char fntemplate[FILENAME_BUF_SIZE];
+    char pktfilename[FILENAME_BUF_SIZE];
     AVFormatContext *fctx = NULL;
     AVPacket pkt;
     int64_t pktnum  = 0;
@@ -70,16 +73,16 @@ int main(int argc, char **argv)
         return usage(1);
     if (argc > 2)
         maxpkts = atoi(argv[2]);
-    strncpy(fntemplate, argv[1], PATH_MAX - 1);
+    av_strlcpy(fntemplate, argv[1], sizeof(fntemplate));
     if (strrchr(argv[1], '/'))
-        strncpy(fntemplate, strrchr(argv[1], '/') + 1, PATH_MAX - 1);
+        av_strlcpy(fntemplate, strrchr(argv[1], '/') + 1, sizeof(fntemplate));
     if (strrchr(fntemplate, '.'))
         *strrchr(fntemplate, '.') = '\0';
     if (strchr(fntemplate, '%')) {
         fprintf(stderr, "can't use filenames containing '%%'\n");
         return usage(1);
     }
-    if (strlen(fntemplate) + sizeof(PKTFILESUFF) >= PATH_MAX - 1) {
+    if (strlen(fntemplate) + sizeof(PKTFILESUFF) >= sizeof(fntemplate) - 1) {
         fprintf(stderr, "filename too long\n");
         return usage(1);
     }
@@ -105,7 +108,7 @@ int main(int argc, char **argv)
 
     while ((err = av_read_frame(fctx, &pkt)) >= 0) {
         int fd;
-        snprintf(pktfilename, PATH_MAX - 1, fntemplate, pktnum,
+        snprintf(pktfilename, sizeof(pktfilename), fntemplate, pktnum,
                  pkt.stream_index, pkt.pts, pkt.size,
                  (pkt.flags & AV_PKT_FLAG_KEY) ? 'K' : '_');
         printf(PKTFILESUFF "\n", pktnum, pkt.stream_index, pkt.pts, pkt.size,