diff --git a/configure b/configure
index 00f2ebe83a713243b25f5fb11b9a539f1c63dc85..3ae7a55195d94eac19ec53b8f2b9caa8eca14161 100755
--- a/configure
+++ b/configure
@@ -205,7 +205,6 @@ External library support:
   --enable-libxvid         enable Xvid encoding via xvidcore,
                            native MPEG-4/Xvid encoder exists [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
-  --enable-mlib            enable Sun medialib [no]
   --enable-openssl         enable openssl [no]
   --enable-zlib            enable zlib [autodetect]
 
@@ -1062,7 +1061,6 @@ CONFIG_LIST="
     lsp
     mdct
     memalign_hack
-    mlib
     mpegaudiodsp
     network
     nonfree
@@ -3179,7 +3177,6 @@ enabled openal     && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
                         die "ERROR: openal not found"; } &&
                       { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
                         die "ERROR: openal version must be 1.1 or compatible"; }
-enabled mlib       && require  mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib
 enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
                         check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
                         check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
@@ -3478,7 +3475,6 @@ echo "network support           ${network-no}"
 echo "threading support         ${thread_type-no}"
 echo "safe bitstream reader     ${safe_bitstream_reader-no}"
 echo "SDL support               ${sdl-no}"
-echo "Sun medialib support      ${mlib-no}"
 echo "libdxva2 enabled          ${dxva2-no}"
 echo "libva enabled             ${vaapi-no}"
 echo "libvdpau enabled          ${vdpau-no}"
diff --git a/doc/APIchanges b/doc/APIchanges
index 9a1d2ebab088e49b2440e8b330a10c628ca60ec3..7a3117e58c7ed91d0c0579995a74e3b5dd753681 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -25,6 +25,12 @@ API changes, most recent first:
 2012-01-24 - xxxxxxx - lavfi 2.60.100
   Add avfilter_graph_dump.
 
+2012-02-xx - xxxxxxx - lavu 51.22.1 - pixdesc.h
+  Add PIX_FMT_PSEUDOPAL flag.
+
+2012-02-01 - xxxxxxx - lavc 54.01.0
+  Add avcodec_encode_video2() and deprecate avcodec_encode_video().
+
 2012-02-01 - 316fc74 - lavc 54.01.0
   Add av_fast_padded_malloc() as alternative for av_realloc() when aligned
   memory is required. The buffer will always have FF_INPUT_BUFFER_PADDING_SIZE
diff --git a/ffmpeg.c b/ffmpeg.c
index f34d3dc15d1554e6cd30557c7610a242cfe9723c..7365cfbdf3f2e0d8cad6a488a37449ec5f586c5e 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1385,9 +1385,6 @@ static void do_subtitle_out(AVFormatContext *s,
     }
 }
 
-static int bit_buffer_size = 1024 * 256;
-static uint8_t *bit_buffer = NULL;
-
 static void do_video_resample(OutputStream *ost,
                               InputStream *ist,
                               AVFrame *in_picture,
@@ -1513,6 +1510,8 @@ static void do_video_out(AVFormatContext *s,
     for (i = 0; i < nb_frames; i++) {
         AVPacket pkt;
         av_init_packet(&pkt);
+        pkt.data = NULL;
+        pkt.size = 0;
 
         if (s->oformat->flags & AVFMT_RAWPICTURE &&
             enc->codec->id == CODEC_ID_RAWVIDEO) {
@@ -1528,6 +1527,7 @@ static void do_video_out(AVFormatContext *s,
 
             write_frame(s, &pkt, ost);
         } else {
+            int got_packet;
             AVFrame big_picture;
 
             big_picture = *final_picture;
@@ -1552,29 +1552,27 @@ static void do_video_out(AVFormatContext *s,
                 big_picture.pict_type = AV_PICTURE_TYPE_I;
                 ost->forced_kf_index++;
             }
-            ret = avcodec_encode_video(enc,
-                                       bit_buffer, bit_buffer_size,
-                                       &big_picture);
+            ret = avcodec_encode_video2(enc, &pkt, &big_picture, &got_packet);
             if (ret < 0) {
                 av_log(NULL, AV_LOG_FATAL, "Video encoding failed\n");
                 exit_program(1);
             }
 
-            if (ret > 0) {
-                pkt.data = bit_buffer;
-                pkt.size = ret;
-                if (!(enc->codec->capabilities & CODEC_CAP_DELAY))
-                    pkt.pts = av_rescale_q(ost->sync_opts, enc->time_base, ost->st->time_base);
-                if (enc->coded_frame->pts != AV_NOPTS_VALUE)
-                    pkt.pts = av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base);
+            if (got_packet) {
+                if (pkt.pts == AV_NOPTS_VALUE && !(enc->codec->capabilities & CODEC_CAP_DELAY))
+                    pkt.pts = ost->sync_opts;
+
+                if (pkt.pts != AV_NOPTS_VALUE)
+                    pkt.pts = av_rescale_q(pkt.pts, enc->time_base, ost->st->time_base);
+                if (pkt.dts != AV_NOPTS_VALUE)
+                    pkt.dts = av_rescale_q(pkt.dts, enc->time_base, ost->st->time_base);
 
-                if (enc->coded_frame->key_frame)
-                    pkt.flags |= AV_PKT_FLAG_KEY;
                 if (format_video_sync == VSYNC_DROP)
                     pkt.pts = pkt.dts = AV_NOPTS_VALUE;
+
                 write_frame(s, &pkt, ost);
-                *frame_size = ret;
-                video_size += ret;
+                *frame_size = pkt.size;
+                video_size += pkt.size;
 
                 /* if two pass, output log */
                 if (ost->logfile && enc->stats_out) {
@@ -1789,7 +1787,7 @@ static void flush_encoders(OutputStream *ost_table, int nb_ostreams)
 
         for (;;) {
             AVPacket pkt;
-            int fifo_bytes;
+            int fifo_bytes, got_packet;
             av_init_packet(&pkt);
             pkt.data = NULL;
             pkt.size = 0;
@@ -1822,25 +1820,23 @@ static void flush_encoders(OutputStream *ost_table, int nb_ostreams)
                 }
                 break;
             case AVMEDIA_TYPE_VIDEO:
-                ret = avcodec_encode_video(enc, bit_buffer, bit_buffer_size, NULL);
+                ret = avcodec_encode_video2(enc, &pkt, NULL, &got_packet);
                 if (ret < 0) {
                     av_log(NULL, AV_LOG_FATAL, "Video encoding failed\n");
                     exit_program(1);
                 }
-                video_size += ret;
-                if (enc->coded_frame && enc->coded_frame->key_frame)
-                    pkt.flags |= AV_PKT_FLAG_KEY;
+                video_size += pkt.size;
                 if (ost->logfile && enc->stats_out) {
                     fprintf(ost->logfile, "%s", enc->stats_out);
                 }
-                if (ret <= 0) {
+                if (!got_packet) {
                     stop_encoding = 1;
                     break;
                 }
-                pkt.data = bit_buffer;
-                pkt.size = ret;
-                if (enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE)
-                    pkt.pts = av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base);
+                if (pkt.pts != AV_NOPTS_VALUE)
+                    pkt.pts = av_rescale_q(pkt.pts, enc->time_base, ost->st->time_base);
+                if (pkt.dts != AV_NOPTS_VALUE)
+                    pkt.dts = av_rescale_q(pkt.dts, enc->time_base, ost->st->time_base);
                 write_frame(os, &pkt, ost);
                 break;
             default:
@@ -2668,19 +2664,6 @@ static int transcode_init(OutputFile *output_files, int nb_output_files,
                 }
             }
         }
-        if (codec->codec_type == AVMEDIA_TYPE_VIDEO) {
-            /* maximum video buffer size is 8-bytes per pixel, plus DPX header size (1664)*/
-            int size        = codec->width * codec->height;
-            bit_buffer_size = FFMAX(bit_buffer_size, 9*size + 10000);
-        }
-    }
-
-    if (!bit_buffer)
-        bit_buffer = av_malloc(bit_buffer_size);
-    if (!bit_buffer) {
-        av_log(NULL, AV_LOG_ERROR, "Cannot allocate %d bytes output buffer\n",
-               bit_buffer_size);
-        return AVERROR(ENOMEM);
     }
 
     /* open each encoder */
@@ -3118,7 +3101,6 @@ static int transcode(OutputFile *output_files, int nb_output_files,
     ret = 0;
 
  fail:
-    av_freep(&bit_buffer);
     av_freep(&no_packet);
 
     if (output_streams) {
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7a8ceb6114f45066644d6784bebfd6a84f6f44f8..27372f3b9ab05e23e8a29c77abff2ff0333790eb 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -718,8 +718,6 @@ OBJS-$(HAVE_PTHREADS)                  += pthread.o
 OBJS-$(HAVE_W32THREADS)                += pthread.o
 OBJS-$(HAVE_OS2THREADS)                += pthread.o
 
-OBJS-$(CONFIG_MLIB)                    += mlib/dsputil_mlib.o           \
-
 # inverse.o contains the ff_inverse table definition, which is used by
 # the FASTDIV macro (from libavutil); since referencing the external
 # table has a negative effect on performance, copy it in libavcodec as
@@ -749,7 +747,7 @@ HOSTPROGS = aac_tablegen aacps_tablegen cbrt_tablegen cos_tablegen      \
             dv_tablegen motionpixels_tablegen mpegaudio_tablegen        \
             pcm_tablegen qdm2_tablegen sinewin_tablegen
 
-DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
+DIRS = alpha arm bfin ppc ps2 sh4 sparc x86
 
 CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
 
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 77ee59f0abc783cdf2b2ac08ef396d50dce7ae41..4e591222fcdc32684c1236962c1cdc397510a464 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -189,10 +189,10 @@ static int count_channels(enum ChannelPosition che_pos[4][MAX_ELEM_ID])
  * @return  Returns error status. 0 - OK, !0 - error
  */
 static av_cold int che_configure(AACContext *ac,
-                                 enum ChannelPosition che_pos[4][MAX_ELEM_ID],
+                                 enum ChannelPosition che_pos,
                                  int type, int id, int *channels)
 {
-    if (che_pos[type][id]) {
+    if (che_pos) {
         if (!ac->che[type][id]) {
             if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
                 return AVERROR(ENOMEM);
@@ -222,22 +222,21 @@ static av_cold int che_configure(AACContext *ac,
  * @return  Returns error status. 0 - OK, !0 - error
  */
 static av_cold int output_configure(AACContext *ac,
-                                    enum ChannelPosition che_pos[4][MAX_ELEM_ID],
                                     enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
                                     int channel_config, enum OCStatus oc_type)
 {
     AVCodecContext *avctx = ac->avctx;
     int i, type, channels = 0, ret;
 
-    if (new_che_pos != che_pos)
-    memcpy(che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
+    if (new_che_pos)
+        memcpy(ac->che_pos, new_che_pos, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
 
     if (channel_config) {
         for (i = 0; i < tags_per_config[channel_config]; i++) {
-            if ((ret = che_configure(ac, che_pos,
-                                     aac_channel_layout_map[channel_config - 1][i][0],
-                                     aac_channel_layout_map[channel_config - 1][i][1],
-                                     &channels)))
+            int id = aac_channel_layout_map[channel_config - 1][i][1];
+            type = aac_channel_layout_map[channel_config - 1][i][0];
+            if ((ret = che_configure(ac, ac->che_pos[type][id],
+                                     type, id, &channels)))
                 return ret;
         }
 
@@ -249,14 +248,12 @@ static av_cold int output_configure(AACContext *ac,
          * current program configuration.
          *
          * Set up default 1:1 output mapping.
-         *
-         * For a 5.1 stream the output order will be:
-         *    [ Center ] [ Front Left ] [ Front Right ] [ LFE ] [ Surround Left ] [ Surround Right ]
          */
 
         for (i = 0; i < MAX_ELEM_ID; i++) {
             for (type = 0; type < 4; type++) {
-                if ((ret = che_configure(ac, che_pos, type, i, &channels)))
+                if ((ret = che_configure(ac, ac->che_pos[type][i],
+                                         type, i, &channels)))
                     return ret;
             }
         }
@@ -456,7 +453,7 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
     } else if (m4ac->sbr == 1 && m4ac->ps == -1)
         m4ac->ps = 1;
 
-    if (ac && (ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR)))
+    if (ac && (ret = output_configure(ac, new_che_pos, channel_config, OC_GLOBAL_HDR)))
         return ret;
 
     if (extension_flag) {
@@ -629,7 +626,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
         if (ac->m4ac.chan_config) {
             int ret = set_default_channel_config(avctx, new_che_pos, ac->m4ac.chan_config);
             if (!ret)
-                output_configure(ac, ac->che_pos, new_che_pos, ac->m4ac.chan_config, OC_GLOBAL_HDR);
+                output_configure(ac, new_che_pos, ac->m4ac.chan_config, OC_GLOBAL_HDR);
             else if (avctx->err_recognition & AV_EF_EXPLODE)
                 return AVERROR_INVALIDDATA;
         }
@@ -1733,7 +1730,7 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
         } else if (ac->m4ac.ps == -1 && ac->output_configured < OC_LOCKED && ac->avctx->channels == 1) {
             ac->m4ac.sbr = 1;
             ac->m4ac.ps = 1;
-            output_configure(ac, ac->che_pos, ac->che_pos, ac->m4ac.chan_config, ac->output_configured);
+            output_configure(ac, NULL, ac->m4ac.chan_config, ac->output_configured);
         } else {
             ac->m4ac.sbr = 1;
         }
@@ -2116,7 +2113,7 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
             ac->m4ac.chan_config = hdr_info.chan_config;
             if (set_default_channel_config(ac->avctx, new_che_pos, hdr_info.chan_config))
                 return -7;
-            if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config,
+            if (output_configure(ac, new_che_pos, hdr_info.chan_config,
                                  FFMAX(ac->output_configured, OC_TRIAL_FRAME)))
                 return -7;
         } else if (ac->output_configured != OC_LOCKED) {
@@ -2176,7 +2173,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
 
                 if (set_default_channel_config(ac->avctx, new_che_pos, 2)<0)
                     return -1;
-                if (output_configure(ac, ac->che_pos, new_che_pos, 2, OC_TRIAL_FRAME)<0)
+                if (output_configure(ac, new_che_pos, 2, OC_TRIAL_FRAME)<0)
                     return -1;
             }
             if (!(che=get_che(ac, elem_type, elem_id))) {
@@ -2220,7 +2217,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
             if (ac->output_configured > OC_TRIAL_PCE)
                 av_log(avctx, AV_LOG_INFO,
                        "Evaluating a further program_config_element.\n");
-            err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE);
+            err = output_configure(ac, new_che_pos, 0, OC_TRIAL_PCE);
             if (!err)
                 ac->m4ac.chan_config = 0;
             break;
diff --git a/libavcodec/aacdectab.h b/libavcodec/aacdectab.h
index 442ef9165487540774fce7589e4a31965dc2c965..5f95dad8463b573aa09e6efa3ad30e2a3331de49 100644
--- a/libavcodec/aacdectab.h
+++ b/libavcodec/aacdectab.h
@@ -87,7 +87,7 @@ static const uint8_t aac_channel_layout_map[7][5][2] = {
     { { TYPE_CPE, 0 }, { TYPE_SCE, 0 }, { TYPE_SCE, 1 }, },
     { { TYPE_CPE, 0 }, { TYPE_SCE, 0 }, { TYPE_CPE, 1 }, },
     { { TYPE_CPE, 0 }, { TYPE_SCE, 0 }, { TYPE_LFE, 0 }, { TYPE_CPE, 1 }, },
-    { { TYPE_CPE, 0 }, { TYPE_SCE, 0 }, { TYPE_LFE, 0 }, { TYPE_CPE, 2 }, { TYPE_CPE, 1 }, },
+    { { TYPE_CPE, 1 }, { TYPE_SCE, 0 }, { TYPE_LFE, 0 }, { TYPE_CPE, 2 }, { TYPE_CPE, 0 }, },
 };
 
 static const uint64_t aac_channel_layout[8] = {
@@ -97,7 +97,7 @@ static const uint64_t aac_channel_layout[8] = {
     AV_CH_LAYOUT_4POINT0,
     AV_CH_LAYOUT_5POINT0_BACK,
     AV_CH_LAYOUT_5POINT1_BACK,
-    AV_CH_LAYOUT_7POINT1_WIDE,
+    AV_CH_LAYOUT_7POINT1_WIDE_BACK,
     0,
 };
 
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 8513cebde10eb1878ab1610477f551df07d837ef..b496d729489c2953eda30d60ba840099c3a261e4 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1735,7 +1735,6 @@ typedef struct AVCodecContext {
 #define FF_DCT_FASTINT 1
 #define FF_DCT_INT     2
 #define FF_DCT_MMX     3
-#define FF_DCT_MLIB    4
 #define FF_DCT_ALTIVEC 5
 #define FF_DCT_FAAN    6
 
@@ -1786,7 +1785,6 @@ typedef struct AVCodecContext {
 #define FF_IDCT_SIMPLEMMX     3
 #define FF_IDCT_LIBMPEG2MMX   4
 #define FF_IDCT_PS2           5
-#define FF_IDCT_MLIB          6
 #define FF_IDCT_ARM           7
 #define FF_IDCT_ALTIVEC       8
 #define FF_IDCT_SH4           9
@@ -3960,7 +3958,10 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
                              enum AVSampleFormat sample_fmt, const uint8_t *buf,
                              int buf_size, int align);
 
+#if FF_API_OLD_ENCODE_VIDEO
 /**
+ * @deprecated use avcodec_encode_video2() instead.
+ *
  * Encode a video frame from pict into buf.
  * The input picture should be
  * stored using a specific format, namely avctx.pix_fmt.
@@ -3972,8 +3973,44 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
  * @return On error a negative value is returned, on success zero or the number
  * of bytes used from the output buffer.
  */
+attribute_deprecated
 int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                          const AVFrame *pict);
+#endif
+
+/**
+ * Encode a frame of video.
+ *
+ * Takes input raw video data from frame and writes the next output packet, if
+ * available, to avpkt. The output packet does not necessarily contain data for
+ * the most recent frame, as encoders can delay and reorder input frames
+ * internally as needed.
+ *
+ * @param avctx     codec context
+ * @param avpkt     output AVPacket.
+ *                  The user can supply an output buffer by setting
+ *                  avpkt->data and avpkt->size prior to calling the
+ *                  function, but if the size of the user-provided data is not
+ *                  large enough, encoding will fail. All other AVPacket fields
+ *                  will be reset by the encoder using av_init_packet(). If
+ *                  avpkt->data is NULL, the encoder will allocate it.
+ *                  The encoder will set avpkt->size to the size of the
+ *                  output packet. The returned data (if any) belongs to the
+ *                  caller, he is responsible for freeing it.
+ * @param[in] frame AVFrame containing the raw video data to be encoded.
+ *                  May be NULL when flushing an encoder that has the
+ *                  CODEC_CAP_DELAY capability set.
+ * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
+ *                            output packet is non-empty, and to 0 if it is
+ *                            empty. If the function returns an error, the
+ *                            packet can be assumed to be invalid, and the
+ *                            value of got_packet_ptr is undefined and should
+ *                            not be used.
+ * @return          0 on success, negative error code on failure
+ */
+int avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt,
+                          const AVFrame *frame, int *got_packet_ptr);
+
 int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                             const AVSubtitle *sub);
 
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index e80d48de4e83ab1a2694d711135839e852bad9e2..dfa526a5cbf7cb3ddf98327e8d9bd9b66716b6b5 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -3184,7 +3184,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 
     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
-    if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
     if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
     if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
     if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index a44c146af9ad73f2862b5b131e1be8013110dead..3584a49a04e205eff017dc272cbfba7d4fe3ceb0 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -644,7 +644,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
@@ -656,25 +655,9 @@ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
 void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
 
-
-#if ARCH_ARM
-
-
-#if HAVE_NEON
+#if (ARCH_ARM && HAVE_NEON) || ARCH_PPC || HAVE_MMI || HAVE_MMX
 #   define STRIDE_ALIGN 16
-#endif
-
-#elif ARCH_PPC
-
-#define STRIDE_ALIGN 16
-
-#elif HAVE_MMI
-
-#define STRIDE_ALIGN 16
-
-#endif
-
-#ifndef STRIDE_ALIGN
+#else
 #   define STRIDE_ALIGN 8
 #endif
 
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 51f6b0ed935a75f768d65c86ba2bd9b9c86ff47a..058a0d5c48609c166b52afd401460204b887a251 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -361,15 +361,9 @@ int avpicture_get_size(enum PixelFormat pix_fmt, int width, int height)
     AVPicture dummy_pict;
     if(av_image_check_size(width, height, 0, NULL))
         return -1;
-    switch (pix_fmt) {
-    case PIX_FMT_RGB8:
-    case PIX_FMT_BGR8:
-    case PIX_FMT_RGB4_BYTE:
-    case PIX_FMT_BGR4_BYTE:
-    case PIX_FMT_GRAY8:
+    if (av_pix_fmt_descriptors[pix_fmt].flags & PIX_FMT_PSEUDOPAL)
         // do not include palette for these pseudo-paletted formats
         return width * height;
-    }
     return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height);
 }
 
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 1380e0a438b191e02d5a890cadb319db285a01a2..6279612c82c1b1633bc3b4d2d43fbdb4152352ac 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -88,12 +88,23 @@ static void X264_log(void *p, int level, const char *fmt, va_list args)
 }
 
 
-static int encode_nals(AVCodecContext *ctx, uint8_t *buf, int size,
-                       x264_nal_t *nals, int nnal, int skip_sei)
+static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
+                       x264_nal_t *nals, int nnal)
 {
     X264Context *x4 = ctx->priv_data;
-    uint8_t *p = buf;
-    int i;
+    uint8_t *p;
+    int i, size = x4->sei_size, ret;
+
+    if (!nnal)
+        return 0;
+
+    for (i = 0; i < nnal; i++)
+        size += nals[i].i_payload;
+
+    if ((ret = ff_alloc_packet(pkt, size)) < 0)
+        return ret;
+
+    p = pkt->data;
 
     /* Write the SEI as part of the first frame. */
     if (x4->sei_size > 0 && nnal > 0) {
@@ -108,23 +119,11 @@ static int encode_nals(AVCodecContext *ctx, uint8_t *buf, int size,
     }
 
     for (i = 0; i < nnal; i++){
-        /* Don't put the SEI in extradata. */
-        if (skip_sei && nals[i].i_type == NAL_SEI) {
-            x4->sei_size = nals[i].i_payload;
-            x4->sei      = av_malloc(x4->sei_size);
-            memcpy(x4->sei, nals[i].p_payload, nals[i].i_payload);
-            continue;
-        }
-        if (nals[i].i_payload > (size - (p - buf))) {
-            // return only complete nals which fit in buf
-            av_log(ctx, AV_LOG_ERROR, "Error: nal buffer is too small\n");
-            break;
-        }
         memcpy(p, nals[i].p_payload, nals[i].i_payload);
         p += nals[i].i_payload;
     }
 
-    return p - buf;
+    return 1;
 }
 
 static int avfmt2_num_planes(int avfmt)
@@ -146,15 +145,13 @@ static int avfmt2_num_planes(int avfmt)
     }
 }
 
-static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
-                      int orig_bufsize, void *data)
+static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
+                      int *got_packet)
 {
     X264Context *x4 = ctx->priv_data;
-    AVFrame *frame = data;
     x264_nal_t *nal;
-    int nnal, i;
+    int nnal, i, ret;
     x264_picture_t pic_out;
-    int bufsize;
 
     x264_picture_init( &x4->pic );
     x4->pic.img.i_csp   = x4->params.i_csp;
@@ -187,17 +184,16 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
     }
 
     do {
-        bufsize = orig_bufsize;
         if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
             return -1;
 
-        bufsize = encode_nals(ctx, buf, bufsize, nal, nnal, 0);
-        if (bufsize < 0)
+        ret = encode_nals(ctx, pkt, nal, nnal);
+        if (ret < 0)
             return -1;
-    } while (!bufsize && !frame && x264_encoder_delayed_frames(x4->enc));
+    } while (!ret && !frame && x264_encoder_delayed_frames(x4->enc));
 
-    /* FIXME: libx264 now provides DTS, but AVFrame doesn't have a field for it. */
-    x4->out_pic.pts = pic_out.i_pts;
+    pkt->pts = pic_out.i_pts;
+    pkt->dts = pic_out.i_dts;
 
     switch (pic_out.i_type) {
     case X264_TYPE_IDR:
@@ -213,11 +209,12 @@ static int X264_frame(AVCodecContext *ctx, uint8_t *buf,
         break;
     }
 
-    x4->out_pic.key_frame = pic_out.b_keyframe;
-    if (bufsize)
+    pkt->flags |= AV_PKT_FLAG_KEY*pic_out.b_keyframe;
+    if (ret)
         x4->out_pic.quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
 
-    return bufsize;
+    *got_packet = ret;
+    return 0;
 }
 
 static av_cold int X264_close(AVCodecContext *avctx)
@@ -485,16 +482,25 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
         x264_nal_t *nal;
+        uint8_t *p;
         int nnal, s, i;
 
         s = x264_encoder_headers(x4->enc, &nal, &nnal);
+        avctx->extradata = p = av_malloc(s);
 
-        for (i = 0; i < nnal; i++)
-            if (nal[i].i_type == NAL_SEI)
+        for (i = 0; i < nnal; i++) {
+            /* Don't put the SEI in extradata. */
+            if (nal[i].i_type == NAL_SEI) {
                 av_log(avctx, AV_LOG_INFO, "%s\n", nal[i].p_payload+25);
-
-        avctx->extradata      = av_malloc(s);
-        avctx->extradata_size = encode_nals(avctx, avctx->extradata, s, nal, nnal, 1);
+                x4->sei_size = nal[i].i_payload;
+                x4->sei      = av_malloc(x4->sei_size);
+                memcpy(x4->sei, nal[i].p_payload, nal[i].i_payload);
+                continue;
+            }
+            memcpy(p, nal[i].p_payload, nal[i].i_payload);
+            p += nal[i].i_payload;
+        }
+        avctx->extradata_size = p - avctx->extradata;
     }
 
     return 0;
@@ -634,7 +640,7 @@ AVCodec ff_libx264_encoder = {
     .id             = CODEC_ID_H264,
     .priv_data_size = sizeof(X264Context),
     .init           = X264_init,
-    .encode         = X264_frame,
+    .encode2        = X264_frame,
     .close          = X264_close,
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
     .long_name      = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
deleted file mode 100644
index b5594a9a0306e21442a1f07e2215cac4ab51f704..0000000000000000000000000000000000000000
--- a/libavcodec/mlib/dsputil_mlib.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Sun mediaLib optimized DSP utils
- * Copyright (c) 2001 Fabrice Bellard
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/dsputil.h"
-#include "libavcodec/mpegvideo.h"
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_algebra.h>
-#include <mlib_video.h>
-
-/* misc */
-
-static void get_pixels_mlib(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
-{
-  int i;
-
-  for (i=0;i<8;i++) {
-    mlib_VectorConvert_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)pixels, 8);
-
-    pixels += line_size;
-    block += 8;
-  }
-}
-
-static void diff_pixels_mlib(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int line_size)
-{
-  int i;
-
-  for (i=0;i<8;i++) {
-    mlib_VectorSub_S16_U8_Mod((mlib_s16 *)block, (mlib_u8 *)s1, (mlib_u8 *)s2, 8);
-
-    s1 += line_size;
-    s2 += line_size;
-    block += 8;
-  }
-}
-
-static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int line_size)
-{
-    mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size);
-}
-
-/* put block, width 16 pixel, height 8/16 */
-
-static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
-                               int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoCopyRef_U8_U8_16x8(dest, (uint8_t *)ref, stride);
-    break;
-
-    case 16:
-      mlib_VideoCopyRef_U8_U8_16x16(dest, (uint8_t *)ref, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoInterpX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoInterpY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoInterpXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-/* put block, width 8 pixel, height 4/8/16 */
-
-static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
-                               int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoCopyRef_U8_U8_8x4(dest, (uint8_t *)ref, stride);
-    break;
-
-    case 8:
-      mlib_VideoCopyRef_U8_U8_8x8(dest, (uint8_t *)ref, stride);
-    break;
-
-    case 16:
-      mlib_VideoCopyRef_U8_U8_8x16(dest, (uint8_t *)ref, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoInterpX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 8:
-      mlib_VideoInterpX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoInterpY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 8:
-      mlib_VideoInterpY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoInterpXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 8:
-      mlib_VideoInterpXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-/* average block, width 16 pixel, height 8/16 */
-
-static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
-                               int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoCopyRefAve_U8_U8_16x8(dest, (uint8_t *)ref, stride);
-    break;
-
-    case 16:
-      mlib_VideoCopyRefAve_U8_U8_16x16(dest, (uint8_t *)ref, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoInterpAveX_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpAveX_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoInterpAveY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpAveY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 8:
-      mlib_VideoInterpAveXY_U8_U8_16x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpAveXY_U8_U8_16x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-/* average block, width 8 pixel, height 4/8/16 */
-
-static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
-                               int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoCopyRefAve_U8_U8_8x4(dest, (uint8_t *)ref, stride);
-    break;
-
-    case 8:
-      mlib_VideoCopyRefAve_U8_U8_8x8(dest, (uint8_t *)ref, stride);
-    break;
-
-    case 16:
-      mlib_VideoCopyRefAve_U8_U8_8x16(dest, (uint8_t *)ref, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoInterpAveX_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 8:
-      mlib_VideoInterpAveX_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpAveX_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoInterpAveY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 8:
-      mlib_VideoInterpAveY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpAveY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-                                  int stride, int height)
-{
-  switch (height) {
-    case 4:
-      mlib_VideoInterpAveXY_U8_U8_8x4(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 8:
-      mlib_VideoInterpAveXY_U8_U8_8x8(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    case 16:
-      mlib_VideoInterpAveXY_U8_U8_8x16(dest, (uint8_t *)ref, stride, stride);
-    break;
-
-    default:
-      assert(0);
-  }
-}
-
-/* swap byte order of a buffer */
-
-static void bswap_buf_mlib(uint32_t *dst, const uint32_t *src, int w)
-{
-  mlib_VectorReverseByteOrder_U32_U32(dst, src, w);
-}
-
-/* transformations */
-
-static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
-{
-    int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    mlib_VideoIDCT8x8_S16_S16 (data, data);
-
-    for(i=0;i<8;i++) {
-        dest[0] = cm[data[0]];
-        dest[1] = cm[data[1]];
-        dest[2] = cm[data[2]];
-        dest[3] = cm[data[3]];
-        dest[4] = cm[data[4]];
-        dest[5] = cm[data[5]];
-        dest[6] = cm[data[6]];
-        dest[7] = cm[data[7]];
-
-        dest += line_size;
-        data += 8;
-    }
-}
-
-static void ff_idct_add_mlib(uint8_t *dest, int line_size, DCTELEM *data)
-{
-    mlib_VideoIDCT8x8_S16_S16 (data, data);
-    mlib_VideoAddBlock_U8_S16(dest, (mlib_s16 *)data, line_size);
-}
-
-static void ff_idct_mlib(DCTELEM *data)
-{
-    mlib_VideoIDCT8x8_S16_S16 (data, data);
-}
-
-static void ff_fdct_mlib(DCTELEM *data)
-{
-    mlib_VideoDCT8x8_S16_S16 (data, data);
-}
-
-void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
-{
-    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
-    c->diff_pixels = diff_pixels_mlib;
-    c->add_pixels_clamped = add_pixels_clamped_mlib;
-
-    if (!high_bit_depth) {
-    c->get_pixels  = get_pixels_mlib;
-
-    c->put_pixels_tab[0][0] = put_pixels16_mlib;
-    c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
-    c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
-    c->put_pixels_tab[0][3] = put_pixels16_xy2_mlib;
-    c->put_pixels_tab[1][0] = put_pixels8_mlib;
-    c->put_pixels_tab[1][1] = put_pixels8_x2_mlib;
-    c->put_pixels_tab[1][2] = put_pixels8_y2_mlib;
-    c->put_pixels_tab[1][3] = put_pixels8_xy2_mlib;
-
-    c->avg_pixels_tab[0][0] = avg_pixels16_mlib;
-    c->avg_pixels_tab[0][1] = avg_pixels16_x2_mlib;
-    c->avg_pixels_tab[0][2] = avg_pixels16_y2_mlib;
-    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mlib;
-    c->avg_pixels_tab[1][0] = avg_pixels8_mlib;
-    c->avg_pixels_tab[1][1] = avg_pixels8_x2_mlib;
-    c->avg_pixels_tab[1][2] = avg_pixels8_y2_mlib;
-    c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mlib;
-
-    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
-    c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
-    }
-
-    c->bswap_buf = bswap_buf_mlib;
-}
-
-void MPV_common_init_mlib(MpegEncContext *s)
-{
-    if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
-        s->dsp.fdct = ff_fdct_mlib;
-    }
-
-    if(s->avctx->idct_algo==FF_IDCT_MLIB){
-        s->dsp.idct_put= ff_idct_put_mlib;
-        s->dsp.idct_add= ff_idct_add_mlib;
-        s->dsp.idct    = ff_idct_mlib;
-        s->dsp.idct_permutation_type= FF_NO_IDCT_PERM;
-    }
-}
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index f197bd78a26cbd959024166fa4b4c4c054c61bc7..1befe7f4927571e4262365e9409091619d9feffe 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -191,8 +191,6 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
     MPV_common_init_mmx(s);
 #elif ARCH_ALPHA
     MPV_common_init_axp(s);
-#elif CONFIG_MLIB
-    MPV_common_init_mlib(s);
 #elif HAVE_MMI
     MPV_common_init_mmi(s);
 #elif ARCH_ARM
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 568fb86d82f795d90d62adb734c93f894302b7c0..eb785cd84f03cc1e4a2d1e216ffaa5b2e6f8980c 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -705,7 +705,6 @@ int MPV_encode_end(AVCodecContext *avctx);
 int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data);
 void MPV_common_init_mmx(MpegEncContext *s);
 void MPV_common_init_axp(MpegEncContext *s);
-void MPV_common_init_mlib(MpegEncContext *s);
 void MPV_common_init_mmi(MpegEncContext *s);
 void MPV_common_init_arm(MpegEncContext *s);
 void MPV_common_init_altivec(MpegEncContext *s);
diff --git a/libavcodec/options.c b/libavcodec/options.c
index a5b36976c351aba688a7c49c375b8c118d610472..b11d6abe6b44c4b3045abe4caf554df93415f012 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -206,7 +206,6 @@ static const AVOption options[]={
 {"fastint", "fast integer", 0, AV_OPT_TYPE_CONST, {.dbl = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
 {"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.dbl = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
 {"mmx", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
-{"mlib", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_DCT_MLIB }, INT_MIN, INT_MAX, V|E, "dct"},
 {"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
 {"faan", "floating point AAN DCT", 0, AV_OPT_TYPE_CONST, {.dbl = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
 {"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
@@ -221,7 +220,6 @@ static const AVOption options[]={
 {"simplemmx", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"libmpeg2mmx", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_LIBMPEG2MMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"ps2", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_PS2 }, INT_MIN, INT_MAX, V|E|D, "idct"},
-{"mlib", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_MLIB }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"arm", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"sh4", NULL, 0, AV_OPT_TYPE_CONST, {.dbl = FF_IDCT_SH4 }, INT_MIN, INT_MAX, V|E|D, "idct"},
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index a9156dedab68a50efccfbbca046ebfcaf4d1060c..68b461d2f181f79e78fb19834e048efc5153f89f 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -187,8 +187,7 @@ static int raw_decode(AVCodecContext *avctx,
 
     avpicture_fill(picture, buf, avctx->pix_fmt, avctx->width, avctx->height);
     if((avctx->pix_fmt==PIX_FMT_PAL8 && buf_size < context->length) ||
-       (avctx->pix_fmt!=PIX_FMT_PAL8 &&
-        (av_pix_fmt_descriptors[avctx->pix_fmt].flags & PIX_FMT_PAL))){
+       (av_pix_fmt_descriptors[avctx->pix_fmt].flags & PIX_FMT_PSEUDOPAL)) {
         frame->data[1]= context->palette;
     }
     if (avctx->pix_fmt == PIX_FMT_PAL8) {
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 74206720ca607ac0d8317e20d45b04de908ac2e4..9e31d9ede0be0dc8b5636c81d9e0e9af54caccdf 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -244,18 +244,6 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
 
     for (i = 0; i < 4; i++)
         linesize_align[i] = STRIDE_ALIGN;
-//STRIDE_ALIGN is 8 for SSE* but this does not work for SVQ1 chroma planes
-//we could change STRIDE_ALIGN to 16 for x86/sse but it would increase the
-//picture size unneccessarily in some cases. The solution here is not
-//pretty and better ideas are welcome!
-#if HAVE_MMX
-    if(s->codec_id == CODEC_ID_SVQ1 || s->codec_id == CODEC_ID_VP5 ||
-       s->codec_id == CODEC_ID_VP6 || s->codec_id == CODEC_ID_VP6F ||
-       s->codec_id == CODEC_ID_VP6A || s->codec_id == CODEC_ID_DIRAC) {
-        for (i = 0; i < 4; i++)
-            linesize_align[i] = 16;
-    }
-#endif
 }
 
 void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
@@ -1125,23 +1113,107 @@ int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx,
 }
 #endif
 
+#if FF_API_OLD_ENCODE_VIDEO
 int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                          const AVFrame *pict)
 {
+    AVPacket pkt;
+    int ret, got_packet = 0;
+
     if(buf_size < FF_MIN_BUFFER_SIZE){
         av_log(avctx, AV_LOG_ERROR, "buffer smaller than minimum size\n");
         return -1;
     }
-    if(av_image_check_size(avctx->width, avctx->height, 0, avctx))
-        return -1;
-    if((avctx->codec->capabilities & CODEC_CAP_DELAY) || pict){
-        int ret = avctx->codec->encode(avctx, buf, buf_size, pict);
-        avctx->frame_number++;
-        emms_c(); //needed to avoid an emms_c() call before every return;
 
-        return ret;
-    }else
+    av_init_packet(&pkt);
+    pkt.data = buf;
+    pkt.size = buf_size;
+
+    ret = avcodec_encode_video2(avctx, &pkt, pict, &got_packet);
+    if (!ret && got_packet && avctx->coded_frame) {
+        avctx->coded_frame->pts       = pkt.pts;
+        avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY);
+    }
+
+    /* free any side data since we cannot return it */
+    if (pkt.side_data_elems > 0) {
+        int i;
+        for (i = 0; i < pkt.side_data_elems; i++)
+            av_free(pkt.side_data[i].data);
+        av_freep(&pkt.side_data);
+        pkt.side_data_elems = 0;
+    }
+
+    return ret ? ret : pkt.size;
+}
+#endif
+
+#define MAX_CODED_FRAME_SIZE(width, height)\
+    (9*(width)*(height) + FF_MIN_BUFFER_SIZE)
+
+int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
+                                              AVPacket *avpkt,
+                                              const AVFrame *frame,
+                                              int *got_packet_ptr)
+{
+    int ret;
+    int user_packet = !!avpkt->data;
+
+    if (!(avctx->codec->capabilities & CODEC_CAP_DELAY) && !frame) {
+        av_init_packet(avpkt);
+        avpkt->size     = 0;
+        *got_packet_ptr = 0;
         return 0;
+    }
+
+    if (av_image_check_size(avctx->width, avctx->height, 0, avctx))
+        return AVERROR(EINVAL);
+
+    if (avctx->codec->encode2) {
+        *got_packet_ptr = 0;
+        ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+        if (!ret) {
+            if (!*got_packet_ptr)
+                avpkt->size = 0;
+            else if (!(avctx->codec->capabilities & CODEC_CAP_DELAY))
+                avpkt->pts = avpkt->dts = frame->pts;
+        }
+    } else {
+        /* for compatibility with encoders not supporting encode2(), we need to
+           allocate a packet buffer if the user has not provided one or check
+           the size otherwise */
+        int buf_size = avpkt->size;
+
+        if (!user_packet)
+            buf_size = MAX_CODED_FRAME_SIZE(avctx->width, avctx->height);
+
+        if ((ret = ff_alloc_packet(avpkt, buf_size)))
+            return ret;
+
+        /* encode the frame */
+        ret = avctx->codec->encode(avctx, avpkt->data, avpkt->size, frame);
+        if (ret >= 0) {
+            if (!ret) {
+                /* no output. if the packet data was allocated by libavcodec,
+                   free it */
+                if (!user_packet)
+                    av_freep(&avpkt->data);
+            } else if (avctx->coded_frame) {
+                avpkt->pts    = avctx->coded_frame->pts;
+                avpkt->flags |= AV_PKT_FLAG_KEY*!!avctx->coded_frame->key_frame;
+            }
+
+            avpkt->size     = ret;
+            *got_packet_ptr = (ret > 0);
+            ret             = 0;
+        }
+    }
+
+    if (!ret)
+        avctx->frame_number++;
+
+    emms_c();
+    return ret;
 }
 
 int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
diff --git a/libavcodec/version.h b/libavcodec/version.h
index db72466927872ac51662f53e46607aedac41aa5b..6d82bdb49d22f871b8d1279400720aee7a26c230 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -57,5 +57,8 @@
 #ifndef FF_API_OLD_ENCODE_AUDIO
 #define FF_API_OLD_ENCODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 55)
 #endif
+#ifndef FF_API_OLD_ENCODE_VIDEO
+#define FF_API_OLD_ENCODE_VIDEO (LIBAVCODEC_VERSION_MAJOR < 55)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 15ba297ee9478360e3838eb9852f3d75f8ed7d52..25f4755fa19ff36a47a3a40bfee052ac40e7d5eb 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -968,6 +968,9 @@ cglobal h264_idct_add8_8_sse2, 5, 7, 8
 
 %macro IDCT_DC_DEQUANT 2
 cglobal h264_luma_dc_dequant_idct_%1, 3,4,%2
+    ; manually spill XMM registers for Win64 because
+    ; the code here is initialized with INIT_MMX
+    WIN64_SPILL_XMM %2
     movq        m3, [r1+24]
     movq        m2, [r1+16]
     movq        m1, [r1+ 8]
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 9b99f5287bd54e749cbb6ba9ba41c884a9afd535..f97d0518656f53e9770376947783cd815a45eeca 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -1931,6 +1931,9 @@ cglobal pred8x8l_vertical_right_mmxext, 4,5
 
 %macro PRED8x8L_VERTICAL_RIGHT 1
 cglobal pred8x8l_vertical_right_%1, 4,5,7
+    ; manually spill XMM registers for Win64 because
+    ; the code here is initialized with INIT_MMX
+    WIN64_SPILL_XMM 7
     sub          r0, r3
     lea          r4, [r0+r3*2]
     movq        mm0, [r0+r3*1-8]
diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
index b3a82aae800ac6a8496a5251d14de930ba3e3da8..f9039b59430e993b0265fb4823904c8cc62d957e 100644
--- a/libavfilter/vf_crop.c
+++ b/libavfilter/vf_crop.c
@@ -278,7 +278,8 @@ static void start_frame(AVFilterLink *link, AVFilterBufferRef *picref)
     ref2->data[0] += crop->y * ref2->linesize[0];
     ref2->data[0] += crop->x * crop->max_step[0];
 
-    if (!(av_pix_fmt_descriptors[link->format].flags & PIX_FMT_PAL)) {
+    if (!(av_pix_fmt_descriptors[link->format].flags & PIX_FMT_PAL ||
+          av_pix_fmt_descriptors[link->format].flags & PIX_FMT_PSEUDOPAL)) {
         for (i = 1; i < 3; i ++) {
             if (ref2->data[i]) {
                 ref2->data[i] += (crop->y >> crop->vsub) * ref2->linesize[i];
diff --git a/libavfilter/vf_pixdesctest.c b/libavfilter/vf_pixdesctest.c
index 901f3540b7eba76fa9660d15022f178bc91a803d..97e0f2f53e4ccb530ff62e19934cc99f3f1bb3a3 100644
--- a/libavfilter/vf_pixdesctest.c
+++ b/libavfilter/vf_pixdesctest.c
@@ -72,7 +72,8 @@ static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
     }
 
     /* copy palette */
-    if (priv->pix_desc->flags & PIX_FMT_PAL)
+    if (priv->pix_desc->flags & PIX_FMT_PAL ||
+        priv->pix_desc->flags & PIX_FMT_PSEUDOPAL)
         memcpy(outpicref->data[1], outpicref->data[1], 256*4);
 
     avfilter_start_frame(outlink, avfilter_ref_buffer(outpicref, ~0));
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index 5bca9fc1844a572485b45f2c62a39eb36c3196bc..0437751667b8f7658fb5b36cd987d502bb25ed1b 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -212,7 +212,8 @@ static int config_props(AVFilterLink *outlink)
            outlink->w, outlink->h, av_pix_fmt_descriptors[outlink->format].name,
            scale->flags);
 
-    scale->input_is_pal = av_pix_fmt_descriptors[inlink->format].flags & PIX_FMT_PAL;
+    scale->input_is_pal = av_pix_fmt_descriptors[inlink->format].flags & PIX_FMT_PAL ||
+                          av_pix_fmt_descriptors[inlink->format].flags & PIX_FMT_PSEUDOPAL;
     if (outfmt == PIX_FMT_PAL8) outfmt = PIX_FMT_BGR8;
 
     if (scale->sws)
diff --git a/libavutil/audioconvert.h b/libavutil/audioconvert.h
index 29ec1cbc0a016f6d194eb1dce3d53c3cb05d96e4..6414d3c3a0a96a58b20854f0235913ffaf717ce5 100644
--- a/libavutil/audioconvert.h
+++ b/libavutil/audioconvert.h
@@ -97,6 +97,7 @@
 #define AV_CH_LAYOUT_7POINT0_FRONT     (AV_CH_LAYOUT_5POINT0|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
 #define AV_CH_LAYOUT_7POINT1           (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)
 #define AV_CH_LAYOUT_7POINT1_WIDE      (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
+#define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
 #define AV_CH_LAYOUT_OCTAGONAL         (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
 #define AV_CH_LAYOUT_STEREO_DOWNMIX    (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
 
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index c8427fd0bfbfc88ac98ba6c3fe0ff1d2838407f8..0725c7f2e2431b72e437b1eaff1651b2a6f55279 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -155,7 +155,7 @@
 
 #define LIBAVUTIL_VERSION_MAJOR 51
 #define LIBAVUTIL_VERSION_MINOR 38
-#define LIBAVUTIL_VERSION_MICRO 100
+#define LIBAVUTIL_VERSION_MICRO 101
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index 3847c53aec89d4cf0462ace5e2ff7d05f0e825c2..f45c373eae2e6afe03bc3e1ae8182c5204dcb9ab 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -116,7 +116,8 @@ int av_image_fill_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, int heigh
         return AVERROR(EINVAL);
     size[0] = linesizes[0] * height;
 
-    if (desc->flags & PIX_FMT_PAL) {
+    if (desc->flags & PIX_FMT_PAL ||
+        desc->flags & PIX_FMT_PSEUDOPAL) {
         size[0] = (size[0] + 3) & ~3;
         data[1] = ptr + size[0]; /* palette is stored here as 256 32 bits words */
         return size[0] + 256 * 4;
@@ -204,7 +205,8 @@ int av_image_alloc(uint8_t *pointers[4], int linesizes[4],
         av_free(buf);
         return ret;
     }
-    if (av_pix_fmt_descriptors[pix_fmt].flags & PIX_FMT_PAL)
+    if (av_pix_fmt_descriptors[pix_fmt].flags & PIX_FMT_PAL ||
+        av_pix_fmt_descriptors[pix_fmt].flags & PIX_FMT_PSEUDOPAL)
         ff_set_systematic_pal2((uint32_t*)pointers[1], pix_fmt);
 
     return ret;
@@ -251,7 +253,8 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
     if (desc->flags & PIX_FMT_HWACCEL)
         return;
 
-    if (desc->flags & PIX_FMT_PAL) {
+    if (desc->flags & PIX_FMT_PAL ||
+        desc->flags & PIX_FMT_PSEUDOPAL) {
         av_image_copy_plane(dst_data[0], dst_linesizes[0],
                             src_data[0], src_linesizes[0],
                             width, height);
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index c04ec3139e05e232f6c48fbf128ee2790567fe8d..8d4a27b8f9221cc8a90b3c016f84b6da562a27cd 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -327,7 +327,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
             { 0, 0, 1, 3, 2 },        /* G */
             { 0, 0, 1, 0, 2 },        /* R */
         },
-        .flags = PIX_FMT_PAL | PIX_FMT_RGB,
+        .flags = PIX_FMT_RGB | PIX_FMT_PSEUDOPAL,
     },
     [PIX_FMT_BGR4] = {
         .name = "bgr4",
@@ -351,7 +351,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
             { 0, 0, 1, 1, 1 },        /* G */
             { 0, 0, 1, 0, 0 },        /* R */
         },
-        .flags = PIX_FMT_PAL | PIX_FMT_RGB,
+        .flags = PIX_FMT_RGB | PIX_FMT_PSEUDOPAL,
     },
     [PIX_FMT_RGB8] = {
         .name = "rgb8",
@@ -363,7 +363,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
             { 0, 0, 1, 3, 2 },        /* G */
             { 0, 0, 1, 0, 2 },        /* B */
         },
-        .flags = PIX_FMT_PAL | PIX_FMT_RGB,
+        .flags = PIX_FMT_RGB | PIX_FMT_PSEUDOPAL,
     },
     [PIX_FMT_RGB4] = {
         .name = "rgb4",
@@ -387,7 +387,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[PIX_FMT_NB] = {
             { 0, 0, 1, 1, 1 },        /* G */
             { 0, 0, 1, 0, 0 },        /* B */
         },
-        .flags = PIX_FMT_PAL | PIX_FMT_RGB,
+        .flags = PIX_FMT_RGB | PIX_FMT_PSEUDOPAL,
     },
     [PIX_FMT_NV12] = {
         .name = "nv12",
diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index 217524678593c5732f3f461377c79619571d4964..0730f5bb217f0e71d686106dddf03c97993586ef 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -89,6 +89,12 @@ typedef struct AVPixFmtDescriptor{
 #define PIX_FMT_HWACCEL   8 ///< Pixel format is an HW accelerated format.
 #define PIX_FMT_PLANAR   16 ///< At least one pixel component is not in the first data plane
 #define PIX_FMT_RGB      32 ///< The pixel format contains RGB-like data (as opposed to YUV/grayscale)
+/**
+ * The pixel format is "pseudo-paletted". This means that Libav treats it as
+ * paletted internally, but the palette is generated by the decoder and is not
+ * stored in the file.
+ */
+#define PIX_FMT_PSEUDOPAL 64
 
 /**
  * The array of all the pixel format descriptors.
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 2921ddb62a53ea166388bd6307489056479b4754..908e1dacc90f045c8cbbde0567efd0a8ff1f5da1 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -40,6 +40,8 @@
 %if ARCH_X86_64
     %ifidn __OUTPUT_FORMAT__,win32
         %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,win64
+        %define WIN64  1
     %else
         %define UNIX64 1
     %endif
@@ -290,7 +292,11 @@ DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 56]
         push r5
         %assign stack_offset stack_offset+16
     %endif
-    WIN64_SPILL_XMM %3
+    %if mmsize == 8
+        %assign xmm_regs_used 0
+    %else
+        WIN64_SPILL_XMM %3
+    %endif
     LOAD_IF_USED 4, %1
     LOAD_IF_USED 5, %1
     LOAD_IF_USED 6, %1
@@ -299,9 +305,6 @@ DECLARE_REG 6, rax, eax, ax,  al,  [rsp + stack_offset + 56]
 
 %macro WIN64_SPILL_XMM 1
     %assign xmm_regs_used %1
-    %if mmsize == 8
-        %assign xmm_regs_used 0
-    %endif
     ASSERT xmm_regs_used <= 16
     %if xmm_regs_used > 6
         sub rsp, (xmm_regs_used-6)*16+16
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 50e7f7fcaa636b44ae4e3367d79a8638903f4cd8..e78ec42975dbed43d3f53aa2a163d1674d225b6c 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -11,7 +11,6 @@ OBJS = input.o options.o output.o rgb2rgb.o swscale.o \
 OBJS-$(ARCH_BFIN)          +=  bfin/internal_bfin.o     \
                                bfin/swscale_bfin.o      \
                                bfin/yuv2rgb_bfin.o
-OBJS-$(CONFIG_MLIB)        +=  mlib/yuv2rgb_mlib.o
 OBJS-$(HAVE_ALTIVEC)       +=  ppc/swscale_altivec.o    \
                                ppc/yuv2rgb_altivec.o    \
                                ppc/yuv2yuv_altivec.o
@@ -29,4 +28,4 @@ OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
 
 TESTPROGS = colorspace swscale
 
-DIRS = bfin mlib ppc sparc x86
+DIRS = bfin ppc sparc x86
diff --git a/libswscale/mlib/yuv2rgb_mlib.c b/libswscale/mlib/yuv2rgb_mlib.c
deleted file mode 100644
index e9f11494ee1069b775ca0c5f79987f9196f4958e..0000000000000000000000000000000000000000
--- a/libswscale/mlib/yuv2rgb_mlib.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * software YUV to RGB converter using mediaLib
- *
- * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <mlib_types.h>
-#include <mlib_status.h>
-#include <mlib_sys.h>
-#include <mlib_video.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "libswscale/swscale.h"
-#include "libswscale/swscale_internal.h"
-
-static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    if(c->srcFormat == PIX_FMT_YUV422P) {
-        srcStride[1] *= 2;
-        srcStride[2] *= 2;
-    }
-
-    assert(srcStride[1] == srcStride[2]);
-
-    mlib_VideoColorYUV2ARGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
-                               srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
-    return srcSliceH;
-}
-
-static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                               int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    if(c->srcFormat == PIX_FMT_YUV422P) {
-        srcStride[1] *= 2;
-        srcStride[2] *= 2;
-    }
-
-    assert(srcStride[1] == srcStride[2]);
-
-    mlib_VideoColorYUV2ABGR420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
-                               srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
-    return srcSliceH;
-}
-
-static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-                              int srcSliceH, uint8_t* dst[], int dstStride[])
-{
-    if(c->srcFormat == PIX_FMT_YUV422P) {
-        srcStride[1] *= 2;
-        srcStride[2] *= 2;
-    }
-
-    assert(srcStride[1] == srcStride[2]);
-
-    mlib_VideoColorYUV2RGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
-                              srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
-    return srcSliceH;
-}
-
-
-SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c)
-{
-    switch(c->dstFormat) {
-    case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;
-    case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32;
-    case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32;
-    default: return NULL;
-    }
-}
-
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 80abf621f7164ae7ce126d5cb8ace4b717b4f81d..930435608b8f2045768f867dabe48de5bf76cc9a 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -542,7 +542,6 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
 
 SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_vis(SwsContext *c);
-SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c);
 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
 SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
 void ff_bfin_get_unscaled_swscale(SwsContext *c);
@@ -682,7 +681,9 @@ const char *sws_format_name(enum PixelFormat format);
     (av_pix_fmt_descriptors[x].nb_components >= 2          &&  \
      (av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR))
 
-#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A)
+#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) ||       \
+                   (av_pix_fmt_descriptors[x].flags & PIX_FMT_PSEUDOPAL) || \
+                   (x) == PIX_FMT_Y400A)
 
 extern const uint64_t ff_dither4[2];
 extern const uint64_t ff_dither8[2];
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 4b2f5c89eb33ed76f3f9739634a6c82ba0dc9b1e..8368b247d6a9075d693a1bde7e6ebaa947107ca4 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -319,6 +319,7 @@ yuv2planeX_fn 10,  7, 5
 
 %macro yuv2plane1_fn 3
 cglobal yuv2plane1_%1, %3, %3, %2, src, dst, dstw, dither, offset
+    movsxdifnidn dstwq, dstwd
     add          dstwq, mmsize - 1
     and          dstwq, ~(mmsize - 1)
 %if %1 == 8
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index c553bc6011806e55b766483e2c87a8f4984e392c..84e3a17b452ed7e344fb7b5fd2ac0162d22a8885 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -511,8 +511,6 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
         t = ff_yuv2rgb_init_mmx(c);
     } else if (HAVE_VIS) {
         t = ff_yuv2rgb_init_vis(c);
-    } else if (CONFIG_MLIB) {
-        t = ff_yuv2rgb_init_mlib(c);
     } else if (HAVE_ALTIVEC) {
         t = ff_yuv2rgb_init_altivec(c);
     } else if (ARCH_BFIN) {