diff --git a/configure b/configure
index eead2d66be955d58643449ec381917287c6afd19..33dd3b30f1a5ecf5fc19d33fdf8697ad487312e0 100755
--- a/configure
+++ b/configure
@@ -1344,6 +1344,7 @@ HAVE_LIST="
     mkstemp
     mm_empty
     mmap
+    mprotect
     msvcrt
     nanosleep
     PeekNamedPipe
@@ -3540,6 +3541,7 @@ check_func  localtime_r
 check_func  ${malloc_prefix}memalign            && enable memalign
 check_func  mkstemp
 check_func  mmap
+check_func  mprotect
 check_func  ${malloc_prefix}posix_memalign      && enable posix_memalign
 check_func_headers malloc.h _aligned_malloc     && enable aligned_malloc
 check_func  setrlimit
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 84a1d0ba676a34de35da9f7aac394faa8ea99bda..0b3a19af1936ef718e123b958562502cf3cdfd38 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1066,7 +1066,7 @@ typedef struct AVFrame {
      * extended_data must be used by the decoder in order to access all
      * channels.
      *
-     * encoding: unused
+     * encoding: set by user
      * decoding: set by AVCodecContext.get_buffer()
      */
     uint8_t **extended_data;
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index d04011e4687bd3c9bd823ebe9bc6a81b16f6fada..c36bb9ee2b4d39fed1835ee99680db937f0659a8 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -195,7 +195,7 @@ static int init_offset(ShortenContext *s)
             break;
         case TYPE_S16HL:
         case TYPE_S16LH:
-            s->avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
             break;
         default:
             av_log(s->avctx, AV_LOG_ERROR, "unknown audio type\n");
@@ -587,11 +587,11 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
                     av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
                     return ret;
                 }
-                samples_u8  = (uint8_t *)s->frame.data[0];
-                samples_s16 = (int16_t *)s->frame.data[0];
-                /* interleave output */
-                for (i = 0; i < s->blocksize; i++) {
-                    for (chan = 0; chan < s->channels; chan++) {
+
+                for (chan = 0; chan < s->channels; chan++) {
+                    samples_u8  = ((uint8_t **)s->frame.extended_data)[chan];
+                    samples_s16 = ((int16_t **)s->frame.extended_data)[chan];
+                    for (i = 0; i < s->blocksize; i++) {
                         switch (s->internal_ftype) {
                         case TYPE_U8:
                             *samples_u8++ = av_clip_uint8(s->decoded[chan][i]);
@@ -604,6 +604,7 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
                     }
                 }
 
+
                 *got_frame_ptr   = 1;
                 *(AVFrame *)data = s->frame;
             }
@@ -655,4 +656,6 @@ AVCodec ff_shorten_decoder = {
     .decode         = shorten_decode_frame,
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("Shorten"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index 1bcf0da453d1ae8d7d9c5a085bcc14aa4407fb1b..ca12d24031f9a1876a9ed11dd05e4f5f9175605a 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -48,20 +48,6 @@
 static void wma_lsp_to_curve_init(WMACodecContext *s, int frame_len);
 
 #ifdef TRACE
-static void dump_shorts(WMACodecContext *s, const char *name, const short *tab, int n)
-{
-    int i;
-
-    tprintf(s->avctx, "%s[%d]:\n", name, n);
-    for(i=0;i<n;i++) {
-        if ((i & 7) == 0)
-            tprintf(s->avctx, "%4d: ", i);
-        tprintf(s->avctx, " %5d.0", tab[i]);
-        if ((i & 7) == 7)
-            tprintf(s->avctx, "\n");
-    }
-}
-
 static void dump_floats(WMACodecContext *s, const char *name, int prec, const float *tab, int n)
 {
     int i;
@@ -112,7 +98,7 @@ static int wma_decode_init(AVCodecContext * avctx)
 
     /* init MDCT */
     for(i = 0; i < s->nb_block_sizes; i++)
-        ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1, 1.0);
+        ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1, 1.0 / 32768.0);
 
     if (s->use_noise_coding) {
         init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(ff_wma_hgain_huffbits),
@@ -128,7 +114,7 @@ static int wma_decode_init(AVCodecContext * avctx)
         wma_lsp_to_curve_init(s, s->frame_len);
     }
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
     avcodec_get_frame_defaults(&s->frame);
     avctx->coded_frame = &s->frame;
@@ -774,10 +760,10 @@ next:
 }
 
 /* decode a frame of frame_len samples */
-static int wma_decode_frame(WMACodecContext *s, int16_t *samples)
+static int wma_decode_frame(WMACodecContext *s, float **samples,
+                            int samples_offset)
 {
-    int ret, n, ch, incr;
-    const float *output[MAX_CHANNELS];
+    int ret, ch;
 
 #ifdef TRACE
     tprintf(s->avctx, "***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len);
@@ -794,20 +780,19 @@ static int wma_decode_frame(WMACodecContext *s, int16_t *samples)
             break;
     }
 
-    /* convert frame to integer */
-    n = s->frame_len;
-    incr = s->nb_channels;
-    for (ch = 0; ch < MAX_CHANNELS; ch++)
-        output[ch] = s->frame_out[ch];
-    s->fmt_conv.float_to_int16_interleave(samples, output, n, incr);
-    for (ch = 0; ch < incr; ch++) {
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        /* copy current block to output */
+        memcpy(samples[ch] + samples_offset, s->frame_out[ch],
+               s->frame_len * sizeof(*s->frame_out[ch]));
         /* prepare for next block */
-        memmove(&s->frame_out[ch][0], &s->frame_out[ch][n], n * sizeof(float));
-    }
+        memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
+                s->frame_len * sizeof(*s->frame_out[ch]));
 
 #ifdef TRACE
-    dump_shorts(s, "samples", samples, n * s->nb_channels);
+        dump_floats(s, "samples", 6, samples[ch] + samples_offset, s->frame_len);
 #endif
+    }
+
     return 0;
 }
 
@@ -819,7 +804,8 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
     WMACodecContext *s = avctx->priv_data;
     int nb_frames, bit_offset, i, pos, len, ret;
     uint8_t *q;
-    int16_t *samples;
+    float **samples;
+    int samples_offset;
 
     tprintf(avctx, "***decode_superframe:\n");
 
@@ -852,7 +838,8 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return ret;
     }
-    samples = (int16_t *)s->frame.data[0];
+    samples = (float **)s->frame.extended_data;
+    samples_offset = 0;
 
     if (s->use_bit_reservoir) {
         bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3);
@@ -886,9 +873,9 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
                 skip_bits(&s->gb, s->last_bitoffset);
             /* this frame is stored in the last superframe and in the
                current one */
-            if (wma_decode_frame(s, samples) < 0)
+            if (wma_decode_frame(s, samples, samples_offset) < 0)
                 goto fail;
-            samples += s->nb_channels * s->frame_len;
+            samples_offset += s->frame_len;
             nb_frames--;
         }
 
@@ -903,9 +890,9 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
 
         s->reset_block_lengths = 1;
         for(i=0;i<nb_frames;i++) {
-            if (wma_decode_frame(s, samples) < 0)
+            if (wma_decode_frame(s, samples, samples_offset) < 0)
                 goto fail;
-            samples += s->nb_channels * s->frame_len;
+            samples_offset += s->frame_len;
         }
 
         /* we copy the end of the frame in the last frame buffer */
@@ -921,9 +908,9 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
         memcpy(s->last_superframe, buf + pos, len);
     } else {
         /* single frame decode */
-        if (wma_decode_frame(s, samples) < 0)
+        if (wma_decode_frame(s, samples, samples_offset) < 0)
             goto fail;
-        samples += s->nb_channels * s->frame_len;
+        samples_offset += s->frame_len;
     }
 
     av_dlog(s->avctx, "%d %d %d %d outbytes:%td eaten:%d\n",
@@ -960,6 +947,8 @@ AVCodec ff_wmav1_decoder = {
     .flush          = flush,
     .capabilities   = CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
 #if CONFIG_WMAV2_DECODER
@@ -974,5 +963,7 @@ AVCodec ff_wmav2_decoder = {
     .flush          = flush,
     .capabilities   = CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
 };
 #endif
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 7e09fd0689040c0defb8f3fb9ef25eb95bf396ed..6ec1fb43800786804437d08d5ae20704c7ee8892 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -186,9 +186,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
         channel_mask       = AV_RL32(edata_ptr +  2);
         s->bits_per_sample = AV_RL16(edata_ptr);
         if (s->bits_per_sample == 16)
-            avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+            avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
         else if (s->bits_per_sample == 24) {
-            avctx->sample_fmt = AV_SAMPLE_FMT_S32;
+            avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
             av_log_missing_feature(avctx, "bit-depth higher than 16", 0);
             return AVERROR_PATCHWELCOME;
         } else {
@@ -984,11 +984,9 @@ static int decode_subframe(WmallDecodeCtx *s)
 
         for (j = 0; j < subframe_len; j++) {
             if (s->bits_per_sample == 16) {
-                *s->samples_16[c] = (int16_t) s->channel_residues[c][j] << padding_zeroes;
-                s->samples_16[c] += s->num_channels;
+                *s->samples_16[c]++ = (int16_t) s->channel_residues[c][j] << padding_zeroes;
             } else {
-                *s->samples_32[c] = s->channel_residues[c][j] << padding_zeroes;
-                s->samples_32[c] += s->num_channels;
+                *s->samples_32[c]++ = s->channel_residues[c][j] << padding_zeroes;
             }
         }
     }
@@ -1025,8 +1023,8 @@ static int decode_frame(WmallDecodeCtx *s)
         return ret;
     }
     for (i = 0; i < s->num_channels; i++) {
-        s->samples_16[i] = (int16_t *)s->frame.data[0] + i;
-        s->samples_32[i] = (int32_t *)s->frame.data[0] + i;
+        s->samples_16[i] = (int16_t *)s->frame.extended_data[i];
+        s->samples_32[i] = (int32_t *)s->frame.extended_data[i];
     }
 
     /* get frame length */
@@ -1296,4 +1294,7 @@ AVCodec ff_wmalossless_decoder = {
     .flush          = flush,
     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1 | CODEC_CAP_DELAY,
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio Lossless"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S32P,
+                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index 9de8c3861b53686caaea66bb0ab42187f142cbc6..4d15e458753e1a0a275a6f9aff1042a1782fafc0 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -94,7 +94,6 @@
 #include "put_bits.h"
 #include "wmaprodata.h"
 #include "dsputil.h"
-#include "fmtconvert.h"
 #include "sinewin.h"
 #include "wma.h"
 #include "wma_common.h"
@@ -171,7 +170,6 @@ typedef struct WMAProDecodeCtx {
     AVCodecContext*  avctx;                         ///< codec context for av_log
     AVFrame          frame;                         ///< AVFrame for decoded output
     DSPContext       dsp;                           ///< accelerated DSP functions
-    FmtConvertContext fmt_conv;
     uint8_t          frame_data[MAX_FRAMESIZE +
                       FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
     PutBitContext    pb;                            ///< context for filling the frame_data buffer
@@ -283,10 +281,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
     ff_dsputil_init(&s->dsp, avctx);
-    ff_fmt_convert_init(&s->fmt_conv, avctx);
     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
     if (avctx->extradata_size >= 18) {
         s->decode_flags    = AV_RL16(edata_ptr+14);
@@ -1310,8 +1307,6 @@ static int decode_frame(WMAProDecodeCtx *s, int *got_frame_ptr)
     int more_frames = 0;
     int len = 0;
     int i, ret;
-    const float *out_ptr[WMAPRO_MAX_CHANNELS];
-    float *samples;
 
     /** get frame length */
     if (s->len_prefix)
@@ -1384,13 +1379,11 @@ static int decode_frame(WMAProDecodeCtx *s, int *got_frame_ptr)
         s->packet_loss = 1;
         return 0;
     }
-    samples = (float *)s->frame.data[0];
 
-    /** interleave samples and write them to the output buffer */
+    /** copy samples to the output buffer */
     for (i = 0; i < s->num_channels; i++)
-        out_ptr[i] = s->channel[i].out;
-    s->fmt_conv.float_interleave(samples, out_ptr, s->samples_per_frame,
-                                 s->num_channels);
+        memcpy(s->frame.extended_data[i], s->channel[i].out,
+               s->samples_per_frame * sizeof(*s->channel[i].out));
 
     for (i = 0; i < s->num_channels; i++) {
         /** reuse second half of the IMDCT output for the next frame */
@@ -1643,4 +1636,6 @@ AVCodec ff_wmapro_decoder = {
     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
     .flush          = flush,
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 3310d78dc7481240a094dc42876e7ce3c94e3a7b..24058c3b0cbc5d73c27cfe85a114903ed26efc1b 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1070,6 +1070,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
         }
     }
 
+#define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
+
     /* precalculate horizontal scaler filter coefficients */
     {
 #if HAVE_MMXEXT_INLINE
@@ -1080,7 +1082,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
             c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc,
                                                        NULL, NULL, NULL, 4);
 
-#ifdef MAP_ANONYMOUS
+#if USE_MMAP
             c->lumMmx2FilterCode = mmap(NULL, c->lumMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
             c->chrMmx2FilterCode = mmap(NULL, c->chrMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 #elif HAVE_VIRTUALALLOC
@@ -1111,7 +1113,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
             initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode,
                             c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
 
-#ifdef MAP_ANONYMOUS
+#if USE_MMAP
             mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
             mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
 #endif
@@ -1698,7 +1700,7 @@ void sws_freeContext(SwsContext *c)
     av_freep(&c->hChrFilterPos);
 
 #if HAVE_MMX_INLINE
-#ifdef MAP_ANONYMOUS
+#if USE_MMAP
     if (c->lumMmx2FilterCode)
         munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize);
     if (c->chrMmx2FilterCode)