diff --git a/configure b/configure
index d7919fd86797e24626241deafdee0f311ccaa587..ebf1ab235c9a7909bfb1118bdd9e140edb2b08f8 100755
--- a/configure
+++ b/configure
@@ -1377,6 +1377,7 @@ CONFIG_EXTRA="
     lpc
     mpegaudiodsp
     mpegvideo
+    mpegvideoenc
     nettle
     rtpdec
     sinewin
@@ -1500,6 +1501,7 @@ dct_select="rdft"
 mdct_select="fft"
 rdft_select="fft"
 mpegaudiodsp_select="dct"
+mpegvideoenc_select="mpegvideo"
 
 # decoders / encoders / hardware accelerators
 aac_decoder_select="mdct sinewin"
@@ -1521,7 +1523,7 @@ cook_decoder_select="mdct sinewin"
 cscd_decoder_suggest="zlib"
 dca_decoder_select="mdct"
 dirac_decoder_select="dwt golomb"
-dnxhd_encoder_select="aandcttables mpegvideo"
+dnxhd_encoder_select="aandcttables mpegvideoenc"
 dxa_decoder_select="zlib"
 eac3_decoder_select="ac3_decoder"
 eac3_encoder_select="mdct ac3dsp"
@@ -1540,9 +1542,9 @@ flv_decoder_select="h263_decoder"
 flv_encoder_select="h263_encoder"
 fraps_decoder_select="huffman"
 h261_decoder_select="mpegvideo"
-h261_encoder_select="aandcttables mpegvideo"
+h261_encoder_select="aandcttables mpegvideoenc"
 h263_decoder_select="h263_parser mpegvideo"
-h263_encoder_select="aandcttables mpegvideo"
+h263_encoder_select="aandcttables mpegvideoenc"
 h263_vaapi_hwaccel_select="vaapi h263_decoder"
 h263i_decoder_select="h263_decoder"
 h263p_encoder_select="h263_encoder"
@@ -1558,10 +1560,10 @@ iac_decoder_select="fft mdct sinewin"
 imc_decoder_select="fft mdct sinewin"
 jpegls_decoder_select="golomb"
 jpegls_encoder_select="golomb"
-ljpeg_encoder_select="aandcttables mpegvideo"
+ljpeg_encoder_select="aandcttables mpegvideoenc"
 loco_decoder_select="golomb"
 mdec_decoder_select="mpegvideo"
-mjpeg_encoder_select="aandcttables mpegvideo"
+mjpeg_encoder_select="aandcttables mpegvideoenc"
 mlp_decoder_select="mlp_parser"
 mp1_decoder_select="mpegaudiodsp"
 mp1float_decoder_select="mpegaudiodsp"
@@ -1581,14 +1583,14 @@ mpeg_xvmc_decoder_select="mpegvideo_decoder"
 mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder"
 mpeg1_vdpau_hwaccel_select="vdpau mpeg1video_decoder"
 mpeg1video_decoder_select="mpegvideo"
-mpeg1video_encoder_select="aandcttables mpegvideo"
+mpeg1video_encoder_select="aandcttables mpegvideoenc"
 mpeg2_crystalhd_decoder_select="crystalhd"
 mpeg2_dxva2_hwaccel_deps="dxva2api_h"
 mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder"
 mpeg2_vdpau_hwaccel_select="vdpau mpeg2video_decoder"
 mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder"
-mpeg2video_encoder_select="mpegvideo"
-mpeg2video_encoder_select="aandcttables mpegvideo"
+mpeg2video_decoder_select="mpegvideo"
+mpeg2video_encoder_select="aandcttables mpegvideoenc"
 mpeg4_crystalhd_decoder_select="crystalhd"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
 mpeg4_encoder_select="h263_encoder"
@@ -1618,12 +1620,12 @@ rv40_decoder_select="golomb h264chroma h264pred h264qpel mpegvideo"
 shorten_decoder_select="golomb"
 sipr_decoder_select="lsp"
 snow_decoder_select="dwt"
-snow_encoder_select="aandcttables dwt mpegvideo"
+snow_encoder_select="aandcttables dwt mpegvideoenc"
 sonic_decoder_select="golomb"
 sonic_encoder_select="golomb"
 sonic_ls_encoder_select="golomb"
-svq1_encoder_select="mpegvideo"
-svq1_encoder_select="aandcttables mpegvideo"
+svq1_decoder_select="mpegvideo"
+svq1_encoder_select="aandcttables mpegvideoenc"
 svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel mpegvideo"
 svq3_decoder_suggest="zlib"
 theora_decoder_select="vp3_decoder"
diff --git a/ffmpeg.c b/ffmpeg.c
index 043f68cb52ec568cd2fd116e54bd1d2df647e047..b97ad7bdbfe9ddbef36b6aff0c9d5f50656cf56f 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -1120,7 +1120,7 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
                 if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram))
                     qp_histogram[qp]++;
                 for (j = 0; j < 32; j++)
-                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2)));
+                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log2(qp_histogram[j] + 1)));
             }
             if (enc->flags&CODEC_FLAG_PSNR) {
                 int j;
diff --git a/ffprobe.c b/ffprobe.c
index 6de9eb333127a44b03aeffa3104139697fec1f58..a40f756d8291831f9eab58d6c6e31055169d23b3 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -33,6 +33,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/dict.h"
+#include "libavutil/libm.h"
 #include "libavutil/timecode.h"
 #include "libavdevice/avdevice.h"
 #include "libswscale/swscale.h"
@@ -121,7 +122,7 @@ static char *value_string(char *buf, int buf_size, struct unit_value uv)
             long long int index;
 
             if (uv.unit == unit_byte_str && use_byte_value_binary_prefix) {
-                index = (long long int) (log(vald)/log(2)) / 10;
+                index = (long long int) (log2(vald)) / 10;
                 index = av_clip(index, 0, FF_ARRAY_ELEMS(binary_unit_prefixes) - 1);
                 vald /= pow(2, index * 10);
                 prefix_string = binary_unit_prefixes[index];
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index aded4f4112ab043edcd775b9312d4aa338a0c405..a8e4e88749716f3a0a63840e98aa914adfa28aab 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -56,6 +56,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
                                           mpegaudiodsp_fixed.o          \
                                           mpegaudiodsp_float.o
 OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideo_motion.o
+OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
+                                          motion_est.o ratecontrol.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
@@ -144,9 +146,7 @@ OBJS-$(CONFIG_DIRAC_DECODER)           += diracdec.o dirac.o diracdsp.o \
                                           dirac_arith.o mpeg12data.o dwt.o
 OBJS-$(CONFIG_DFA_DECODER)             += dfa.o
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
-OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o       \
-                                          mpegvideo_enc.o motion_est.o \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o
 OBJS-$(CONFIG_DPX_DECODER)             += dpx.o
 OBJS-$(CONFIG_DPX_ENCODER)             += dpxenc.o
 OBJS-$(CONFIG_DSICINAUDIO_DECODER)     += dsicinav.o
@@ -199,17 +199,13 @@ OBJS-$(CONFIG_GIF_ENCODER)             += gif.o lzwenc.o
 OBJS-$(CONFIG_GSM_DECODER)             += gsmdec.o gsmdec_data.o msgsmdec.o
 OBJS-$(CONFIG_GSM_MS_DECODER)          += gsmdec.o gsmdec_data.o msgsmdec.o
 OBJS-$(CONFIG_H261_DECODER)            += h261dec.o h261.o h261data.o error_resilience.o
-OBJS-$(CONFIG_H261_ENCODER)            += h261enc.o h261.o h261data.o  \
-                                          mpegvideo_enc.o motion_est.o \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_H261_ENCODER)            += h261enc.o h261.o h261data.o
 OBJS-$(CONFIG_H263_DECODER)            += h263dec.o h263.o ituh263dec.o        \
                                           mpeg4video.o mpeg4videodec.o flvdec.o\
                                           intelh263dec.o  error_resilience.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL)      += vaapi_mpeg4.o
-OBJS-$(CONFIG_H263_ENCODER)            += mpegvideo_enc.o mpeg4video.o      \
-                                          mpeg4videoenc.o motion_est.o      \
-                                          ratecontrol.o h263.o ituh263enc.o \
-                                          flvenc.o mpeg12data.o             \
+OBJS-$(CONFIG_H263_ENCODER)            += mpeg4videoenc.o mpeg4video.o  \
+                                          h263.o ituh263enc.o flvenc.o  \
                                           error_resilience.o
 OBJS-$(CONFIG_H264_DECODER)            += h264.o                               \
                                           h264_loopfilter.o h264_direct.o      \
@@ -243,9 +239,7 @@ OBJS-$(CONFIG_JV_DECODER)              += jvdec.o
 OBJS-$(CONFIG_KGV1_DECODER)            += kgv1dec.o
 OBJS-$(CONFIG_KMVC_DECODER)            += kmvc.o
 OBJS-$(CONFIG_LAGARITH_DECODER)        += lagarith.o lagarithrac.o
-OBJS-$(CONFIG_LJPEG_ENCODER)           += ljpegenc.o mjpegenc.o mjpeg.o \
-                                          mpegvideo_enc.o motion_est.o  \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_LJPEG_ENCODER)           += ljpegenc.o mjpegenc.o mjpeg.o
 OBJS-$(CONFIG_LOCO_DECODER)            += loco.o
 OBJS-$(CONFIG_MACE3_DECODER)           += mace.o
 OBJS-$(CONFIG_MACE6_DECODER)           += mace.o
@@ -254,9 +248,7 @@ OBJS-$(CONFIG_MDEC_DECODER)            += mdec.o mpeg12.o mpeg12data.o \
 OBJS-$(CONFIG_MICRODVD_DECODER)        += microdvddec.o ass.o
 OBJS-$(CONFIG_MIMIC_DECODER)           += mimic.o
 OBJS-$(CONFIG_MJPEG_DECODER)           += mjpegdec.o mjpeg.o
-OBJS-$(CONFIG_MJPEG_ENCODER)           += mjpegenc.o mjpeg.o           \
-                                          mpegvideo_enc.o motion_est.o \
-                                          ratecontrol.o mpeg12data.o
+OBJS-$(CONFIG_MJPEG_ENCODER)           += mjpegenc.o mjpeg.o
 OBJS-$(CONFIG_MJPEGB_DECODER)          += mjpegbdec.o mjpegdec.o mjpeg.o
 OBJS-$(CONFIG_MLP_DECODER)             += mlpdec.o mlpdsp.o
 OBJS-$(CONFIG_MMVIDEO_DECODER)         += mmvideo.o
@@ -298,19 +290,15 @@ OBJS-$(CONFIG_MPEGVIDEO_DECODER)       += mpeg12.o mpeg12data.o \
 OBJS-$(CONFIG_MPEG_XVMC_DECODER)       += mpegvideo_xvmc.o
 OBJS-$(CONFIG_MPEG1VIDEO_DECODER)      += mpeg12.o mpeg12data.o \
                                           error_resilience.o
-OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpegvideo_enc.o \
+OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o          \
                                           timecode.o                  \
-                                          motion_est.o ratecontrol.o  \
-                                          mpeg12.o mpeg12data.o       \
                                           error_resilience.o
 OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)     += dxva2_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)     += vaapi_mpeg2.o
 OBJS-$(CONFIG_MPEG2VIDEO_DECODER)      += mpeg12.o mpeg12data.o \
                                           error_resilience.o
-OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpegvideo_enc.o \
+OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o          \
                                           timecode.o                  \
-                                          motion_est.o ratecontrol.o  \
-                                          mpeg12.o mpeg12data.o       \
                                           error_resilience.o
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)     += vaapi_mpeg4.o
 OBJS-$(CONFIG_MSMPEG4V1_DECODER)       += msmpeg4.o msmpeg4data.o
@@ -411,10 +399,8 @@ OBJS-$(CONFIG_SMACKER_DECODER)         += smacker.o
 OBJS-$(CONFIG_SMC_DECODER)             += smc.o
 OBJS-$(CONFIG_SNOW_DECODER)            += snowdec.o snow.o rangecoder.o
 OBJS-$(CONFIG_SNOW_ENCODER)            += snowenc.o snow.o rangecoder.o    \
-                                          motion_est.o ratecontrol.o       \
-                                          h263.o                           \
-                                          error_resilience.o ituh263enc.o  \
-                                          mpegvideo_enc.o mpeg12data.o
+                                          h263.o ituh263enc.o          \
+                                          error_resilience.o
 OBJS-$(CONFIG_SOL_DPCM_DECODER)        += dpcm.o
 OBJS-$(CONFIG_SONIC_DECODER)           += sonic.o
 OBJS-$(CONFIG_SONIC_ENCODER)           += sonic.o
@@ -428,10 +414,8 @@ OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o h263.o \
                                           error_resilience.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o    \
-                                          motion_est.o h263.o \
-                                          error_resilience.o \
-                                          ituh263enc.o mpegvideo_enc.o   \
-                                          ratecontrol.o mpeg12data.o
+                                          h263.o ituh263enc.o \
+                                          error_resilience.o
 OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o svq3.o                       \
                                           h264_loopfilter.o h264_direct.o     \
                                           h264_sei.o h264_ps.o h264_refs.o    \
diff --git a/libavcodec/g723_1.c b/libavcodec/g723_1.c
index 72151b43d95bde0d2e4c2b1dbbf948c1f3f7c7ba..70c00d5f66ffd1839fa4772f4c200c40e7b3d0bd 100644
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@@ -230,17 +230,7 @@ static int16_t square_root(int val)
  */
 static int normalize_bits(int num, int width)
 {
-    int i = 0;
-
-    if (num) {
-        if (num == -1)
-            return width;
-        if (num < 0)
-            num = ~num;
-        i= width - av_log2(num) - 1;
-        i= FFMAX(i, 0);
-    }
-    return i;
+    return width - av_log2(num) - 1;
 }
 
 #define normalize_bits_int16(num) normalize_bits(num, 15)
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 76e23865d4c105c57bcb3f40e0c7cd2042cc1a86..ba48df74607ca6beb8bcc7eb87f4a731780340a6 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -342,7 +342,7 @@ static void imc_decode_level_coefficients(IMCContext *q, int *levlCoeffBuf,
     // maybe some frequency division thingy
 
     flcoeffs1[0] = 20000.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125
-    flcoeffs2[0] = log(flcoeffs1[0]) / log(2);
+    flcoeffs2[0] = log2f(flcoeffs1[0]);
     tmp  = flcoeffs1[0];
     tmp2 = flcoeffs2[0];
 
@@ -414,7 +414,7 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx,
         highest = FFMAX(highest, chctx->flcoeffs1[i]);
 
     for (i = 0; i < BANDS - 1; i++)
-        chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log(chctx->flcoeffs5[i]) / log(2);
+        chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]);
     chctx->flcoeffs4[BANDS - 1] = limit;
 
     highest = highest * 0.25;
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 0944a700b10410d1f24c2e758e18cd0d5c3cb6f5..82f6e426691770b589f12626d4b34873abc66d57 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -1538,7 +1538,7 @@ static void update_last_header_values(SnowContext *s){
 }
 
 static int qscale2qlog(int qscale){
-    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
+    return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
            + 61*QROOT/8; ///< 64 > 60
 }
 
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 8c5ed663068e126374a1b51c89a1c2b119c4883b..ccbf3752cf6a56e75d88d42d324e7badc73c4a8c 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -34,6 +34,7 @@
 #if FF_API_VDA_ASYNC
 #include <pthread.h>
 #endif
+
 #include <stdint.h>
 
 // emmintrin.h is unable to compile with -std=c99 -Werror=missing-prototypes
@@ -53,34 +54,33 @@
 
 #if FF_API_VDA_ASYNC
 /**
- * This structure is used to store a decoded frame information and data.
+ * This structure is used to store decoded frame information and data.
  *
  * @deprecated Use synchronous decoding mode.
- *
  */
 typedef struct {
     /**
-    * The PTS of the frame.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * The PTS of the frame.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     int64_t             pts;
 
     /**
-    * The CoreVideo buffer that contains the decoded data.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * The CoreVideo buffer that contains the decoded data.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     CVPixelBufferRef    cv_buffer;
 
     /**
-    * A pointer to the next frame.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * A pointer to the next frame.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     struct vda_frame    *next_frame;
 } vda_frame;
 #endif
@@ -93,106 +93,106 @@ typedef struct {
  */
 struct vda_context {
     /**
-    * VDA decoder object.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * VDA decoder object.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     VDADecoder          decoder;
 
     /**
-    * The Core Video pixel buffer that contains the current image data.
-    *
-    * encoding: unused
-    * decoding: Set by libavcodec. Unset by user.
-    */
+     * The Core Video pixel buffer that contains the current image data.
+     *
+     * encoding: unused
+     * decoding: Set by libavcodec. Unset by user.
+     */
     CVPixelBufferRef    cv_buffer;
 
     /**
-    * An integer value that indicates whether use the hardware decoder in synchronous mode.
-    *
-    * encoding: unused
-    * decoding: Set by user.
-    */
+     * Use the hardware decoder in synchronous mode.
+     *
+     * encoding: unused
+     * decoding: Set by user.
+     */
     int                 use_sync_decoding;
 
 #if FF_API_VDA_ASYNC
     /**
-    * VDA frames queue ordered by presentation timestamp.
-    *
-    * @deprecated Use synchronous decoding mode.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * VDA frames queue ordered by presentation timestamp.
+     *
+     * @deprecated Use synchronous decoding mode.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     vda_frame           *queue;
 
     /**
-    * Mutex for locking queue operations.
-    *
-    * @deprecated Use synchronous decoding mode.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
+     * Mutex for locking queue operations.
+     *
+     * @deprecated Use synchronous decoding mode.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
     pthread_mutex_t     queue_mutex;
 #endif
 
     /**
-    * The frame width.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The frame width.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     int                 width;
 
     /**
-    * The frame height.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The frame height.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     int                 height;
 
     /**
-    * The frame format.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The frame format.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     int                 format;
 
     /**
-    * The pixel format for output image buffers.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by user.
-    */
+     * The pixel format for output image buffers.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by user.
+     */
     OSType              cv_pix_fmt_type;
 
     /**
-    * The current bitstream buffer.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    uint8_t             *bitstream;
+     * The current bitstream buffer.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
+    uint8_t             *priv_bitstream;
 
     /**
-    * The current size of the bitstream.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    int                 bitstream_size;
+     * The current size of the bitstream.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
+    int                 priv_bitstream_size;
 
     /**
-    * The reference size used for fast reallocation.
-    *
-    * - encoding: unused
-    * - decoding: Set/Unset by libavcodec.
-    */
-    int                 ref_size;
+     * The reference size used for fast reallocation.
+     *
+     * - encoding: unused
+     * - decoding: Set/Unset by libavcodec.
+     */
+    int                 priv_allocated_size;
 };
 
 /** Create the video decoder. */
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index 86437017351909616dd9e4904a8d5fa300c35dba..78a32245da61d0289a4392a9fbabf3962261b9a4 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -130,7 +130,7 @@ static void vda_decoder_callback (void *vda_hw_ctx,
                                   uint32_t infoFlags,
                                   CVImageBufferRef image_buffer)
 {
-    struct vda_context *vda_ctx = (struct vda_context*)vda_hw_ctx;
+    struct vda_context *vda_ctx = vda_hw_ctx;
 
     if (!image_buffer)
         return;
@@ -140,8 +140,7 @@ static void vda_decoder_callback (void *vda_hw_ctx,
 
     if (vda_ctx->use_sync_decoding) {
         vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
-    }
-    else {
+    } else {
         vda_frame *new_frame;
         vda_frame *queue_walker;
 
@@ -188,8 +187,8 @@ static int vda_sync_decode(struct vda_context *vda_ctx)
     uint32_t flush_flags = 1 << 0; ///< kVDADecoderFlush_emitFrames
 
     coded_frame = CFDataCreate(kCFAllocatorDefault,
-                               vda_ctx->bitstream,
-                               vda_ctx->bitstream_size);
+                               vda_ctx->priv_bitstream,
+                               vda_ctx->priv_bitstream_size);
 
     status = VDADecoderDecode(vda_ctx->decoder, 0, coded_frame, NULL);
 
@@ -210,7 +209,7 @@ static int start_frame(AVCodecContext *avctx,
     if (!vda_ctx->decoder)
         return -1;
 
-    vda_ctx->bitstream_size = 0;
+    vda_ctx->priv_bitstream_size = 0;
 
     return 0;
 }
@@ -225,38 +224,38 @@ static int decode_slice(AVCodecContext *avctx,
     if (!vda_ctx->decoder)
         return -1;
 
-    tmp = av_fast_realloc(vda_ctx->bitstream,
-                          &vda_ctx->ref_size,
-                          vda_ctx->bitstream_size+size+4);
+    tmp = av_fast_realloc(vda_ctx->priv_bitstream,
+                          &vda_ctx->priv_allocated_size,
+                          vda_ctx->priv_bitstream_size + size + 4);
     if (!tmp)
         return AVERROR(ENOMEM);
 
-    vda_ctx->bitstream = tmp;
+    vda_ctx->priv_bitstream = tmp;
 
-    AV_WB32(vda_ctx->bitstream+vda_ctx->bitstream_size, size);
-    memcpy(vda_ctx->bitstream+vda_ctx->bitstream_size+4, buffer, size);
+    AV_WB32(vda_ctx->priv_bitstream + vda_ctx->priv_bitstream_size, size);
+    memcpy(vda_ctx->priv_bitstream + vda_ctx->priv_bitstream_size + 4, buffer, size);
 
-    vda_ctx->bitstream_size += size + 4;
+    vda_ctx->priv_bitstream_size += size + 4;
 
     return 0;
 }
 
 static int end_frame(AVCodecContext *avctx)
 {
-    H264Context *h = avctx->priv_data;
-    struct vda_context *vda_ctx = avctx->hwaccel_context;
-    AVFrame *frame = &h->s.current_picture_ptr->f;
+    H264Context *h                      = avctx->priv_data;
+    struct vda_context *vda_ctx         = avctx->hwaccel_context;
+    AVFrame *frame                      = &h->s.current_picture_ptr->f;
     int status;
 
-    if (!vda_ctx->decoder || !vda_ctx->bitstream)
+    if (!vda_ctx->decoder || !vda_ctx->priv_bitstream)
         return -1;
 
     if (vda_ctx->use_sync_decoding) {
         status = vda_sync_decode(vda_ctx);
         frame->data[3] = (void*)vda_ctx->cv_buffer;
     } else {
-        status = vda_decoder_decode(vda_ctx, vda_ctx->bitstream,
-                                    vda_ctx->bitstream_size,
+        status = vda_decoder_decode(vda_ctx, vda_ctx->priv_bitstream,
+                                    vda_ctx->priv_bitstream_size,
                                     frame->reordered_opaque);
     }
 
@@ -280,8 +279,8 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
     CFMutableDictionaryRef io_surface_properties;
     CFNumberRef cv_pix_fmt;
 
-    vda_ctx->bitstream = NULL;
-    vda_ctx->ref_size = 0;
+    vda_ctx->priv_bitstream = NULL;
+    vda_ctx->priv_allocated_size = 0;
 
 #if FF_API_VDA_ASYNC
     pthread_mutex_init(&vda_ctx->queue_mutex, NULL);
@@ -341,7 +340,7 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
 
     status = VDADecoderCreate(config_info,
                               buffer_attributes,
-                              (VDADecoderOutputCallback *)vda_decoder_callback,
+                              vda_decoder_callback,
                               vda_ctx,
                               &vda_ctx->decoder);
 
@@ -368,8 +367,7 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
     vda_clear_queue(vda_ctx);
     pthread_mutex_destroy(&vda_ctx->queue_mutex);
 #endif
-    if (vda_ctx->bitstream)
-        av_freep(&vda_ctx->bitstream);
+    av_freep(&vda_ctx->priv_bitstream);
 
     return status;
 }
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 8fd2164fac9e6170abc7656d5bce1b3ddea2ca18..ab109733b1c7b1e26032b9d5803280f2e65b774b 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -89,6 +89,9 @@
 #ifndef FF_API_CODEC_ID
 #define FF_API_CODEC_ID          (LIBAVCODEC_VERSION_MAJOR < 55)
 #endif
+#ifndef FF_API_VDA_ASYNC
+#define FF_API_VDA_ASYNC         (LIBAVCODEC_VERSION_MAJOR < 55)
+#endif
 
 #ifndef FF_API_VDA_ASYNC
 #define FF_API_VDA_ASYNC         (LIBAVCODEC_VERSION_MAJOR < 55)
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index dc61a5bce222d66644da32d0be07231f7afa0f74..dbac83f85893dc2e2c3ab10f4c10847e563c0512 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -169,14 +169,16 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
     __asm__ volatile(
         BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
                              "%2", "%q2", "%3", "%b3",
-                             "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10", "%11")
+                             "%c6(%5)", "%c7(%5)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%8")
         : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
         : "r"(state), "r"(c),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end)),
-          "i"(H264_NORM_SHIFT_OFFSET),
-          "i"(H264_LPS_RANGE_OFFSET),
-          "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG
+          "i"(offsetof(CABACContext, bytestream_end))
+          TABLES_ARG
         : "%"REG_c, "memory"
     );
     return bit & 1;
@@ -188,8 +190,8 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
 {
     x86_reg tmp;
     __asm__ volatile(
-        "movl        %a6(%2), %k1       \n\t"
-        "movl        %a3(%2), %%eax     \n\t"
+        "movl        %c6(%2), %k1       \n\t"
+        "movl        %c3(%2), %%eax     \n\t"
         "shl             $17, %k1       \n\t"
         "add           %%eax, %%eax     \n\t"
         "sub             %k1, %%eax     \n\t"
@@ -200,16 +202,16 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
         "sub           %%edx, %%ecx     \n\t"
         "test           %%ax, %%ax      \n\t"
         "jnz              1f            \n\t"
-        "mov         %a4(%2), %1        \n\t"
+        "mov         %c4(%2), %1        \n\t"
         "subl        $0xFFFF, %%eax     \n\t"
         "movzwl         (%1), %%edx     \n\t"
         "bswap         %%edx            \n\t"
         "shrl            $15, %%edx     \n\t"
         "add              $2, %1        \n\t"
         "addl          %%edx, %%eax     \n\t"
-        "mov              %1, %a4(%2)   \n\t"
+        "mov              %1, %c4(%2)   \n\t"
         "1:                             \n\t"
-        "movl          %%eax, %a3(%2)   \n\t"
+        "movl          %%eax, %c3(%2)   \n\t"
 
         : "+c"(val), "=&r"(tmp)
         : "r"(c),
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index b059cf94235927bb2992de3b104595d868fb5525..0dc0a7cb0f39bf809032041177aef9da1be50747 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -63,7 +63,11 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16")
+                             "%c11(%6)", "%c12(%6)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%13")
 
         "test $1, %4                            \n\t"
         " jz 4f                                 \n\t"
@@ -71,7 +75,11 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15", "%16")
+                             "%c11(%6)", "%c12(%6)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%13")
 
         "sub  %10, %1                           \n\t"
         "mov  %2, %0                            \n\t"
@@ -99,10 +107,8 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
           "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
         : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
           "i"(offsetof(CABACContext, bytestream)),
-          "i"(offsetof(CABACContext, bytestream_end)),
-          "i"(H264_NORM_SHIFT_OFFSET),
-          "i"(H264_LPS_RANGE_OFFSET),
-          "i"(H264_MLPS_STATE_OFFSET) TABLES_ARG
+          "i"(offsetof(CABACContext, bytestream_end))
+          TABLES_ARG
         : "%"REG_c, "memory"
     );
     return coeff_count;
@@ -137,22 +143,30 @@ static int decode_significance_8x8_x86(CABACContext *c,
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18")
+                             "%c12(%7)", "%c13(%7)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%15")
 
         "mov %1, %k6                            \n\t"
         "test $1, %4                            \n\t"
         " jz 4f                                 \n\t"
 
 #ifdef BROKEN_RELOCATIONS
-        "movzbl %a17(%18, %q6), %k6\n\t"
+        "movzbl %c14(%15, %q6), %k6\n\t"
 #else
-        "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t"
+        "movzbl "MANGLE(ff_h264_cabac_tables)"+%c14(%k6), %k6\n\t"
 #endif
         "add %11, %6                            \n\t"
 
         BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
                              "%5", "%q5", "%k0", "%b0",
-                             "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16", "%18")
+                             "%c12(%7)", "%c13(%7)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%15")
 
         "mov %2, %0                             \n\t"
         "mov %1, %k6                            \n\t"
@@ -179,9 +193,6 @@ static int decode_significance_8x8_x86(CABACContext *c,
           "m"(sig_off), "m"(last_coeff_ctx_base),
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end)),
-          "i"(H264_NORM_SHIFT_OFFSET),
-          "i"(H264_LPS_RANGE_OFFSET),
-          "i"(H264_MLPS_STATE_OFFSET),
           "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
         : "%"REG_c, "memory"
     );
diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c
index 6b9f9efd8321e5267e26f4ec01551981362503d5..0cd0e4c2fb3fdaa350b4339cbb17cd82035ed609 100644
--- a/libavcodec/x86/mlpdsp.c
+++ b/libavcodec/x86/mlpdsp.c
@@ -25,21 +25,21 @@
 
 #if HAVE_7REGS
 
-extern void ff_mlp_firorder_8;
-extern void ff_mlp_firorder_7;
-extern void ff_mlp_firorder_6;
-extern void ff_mlp_firorder_5;
-extern void ff_mlp_firorder_4;
-extern void ff_mlp_firorder_3;
-extern void ff_mlp_firorder_2;
-extern void ff_mlp_firorder_1;
-extern void ff_mlp_firorder_0;
-
-extern void ff_mlp_iirorder_4;
-extern void ff_mlp_iirorder_3;
-extern void ff_mlp_iirorder_2;
-extern void ff_mlp_iirorder_1;
-extern void ff_mlp_iirorder_0;
+extern char ff_mlp_firorder_8;
+extern char ff_mlp_firorder_7;
+extern char ff_mlp_firorder_6;
+extern char ff_mlp_firorder_5;
+extern char ff_mlp_firorder_4;
+extern char ff_mlp_firorder_3;
+extern char ff_mlp_firorder_2;
+extern char ff_mlp_firorder_1;
+extern char ff_mlp_firorder_0;
+
+extern char ff_mlp_iirorder_4;
+extern char ff_mlp_iirorder_3;
+extern char ff_mlp_iirorder_2;
+extern char ff_mlp_iirorder_1;
+extern char ff_mlp_iirorder_0;
 
 static const void *firtable[9] = { &ff_mlp_firorder_0, &ff_mlp_firorder_1,
                                    &ff_mlp_firorder_2, &ff_mlp_firorder_3,
diff --git a/libavfilter/x86/yadif_template.c b/libavfilter/x86/yadif_template.c
index 06c8797e28b479d11499838bb026be720ab48360..e2d450e2889422eba0f3ab13bb353bba88f3438b 100644
--- a/libavfilter/x86/yadif_template.c
+++ b/libavfilter/x86/yadif_template.c
@@ -107,8 +107,8 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
                                       uint8_t *next, int w, int prefs,
                                       int mrefs, int parity, int mode)
 {
-    uint8_t tmp[5*16];
-    uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
+    uint8_t tmpU[5*16];
+    uint8_t *tmp= (uint8_t*)(((uint64_t)(tmpU+15)) & ~15);
     int x;
 
 #define FILTER\
@@ -122,9 +122,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             MOVQ"      "MM"3, "MM"4 \n\t"\
             "paddw     "MM"2, "MM"3 \n\t"\
             "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
-            MOVQ"      "MM"0, (%[tmpA]) \n\t" /* c */\
-            MOVQ"      "MM"3, 16(%[tmpA]) \n\t" /* d */\
-            MOVQ"      "MM"1, 32(%[tmpA]) \n\t" /* e */\
+            MOVQ"      "MM"0,   (%[tmp]) \n\t" /* c */\
+            MOVQ"      "MM"3, 16(%[tmp]) \n\t" /* d */\
+            MOVQ"      "MM"1, 32(%[tmp]) \n\t" /* e */\
             "psubw     "MM"4, "MM"2 \n\t"\
             PABS(      MM"4", MM"2") /* temporal_diff0 */\
             LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
@@ -146,7 +146,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
             "psrlw     $1,    "MM"3 \n\t"\
             "pmaxsw    "MM"3, "MM"2 \n\t"\
-            MOVQ"      "MM"2, 48(%[tmpA]) \n\t" /* diff */\
+            MOVQ"      "MM"2, 48(%[tmp]) \n\t" /* diff */\
 \
             "paddw     "MM"0, "MM"1 \n\t"\
             "paddw     "MM"0, "MM"0 \n\t"\
@@ -177,7 +177,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             CHECK2\
 \
             /* if(p->mode<2) ... */\
-            MOVQ"    48(%[tmpA]), "MM"6 \n\t" /* diff */\
+            MOVQ" 48(%[tmp]), "MM"6 \n\t" /* diff */\
             "cmpl      $2, %[mode] \n\t"\
             "jge       1f \n\t"\
             LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
@@ -188,9 +188,9 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "paddw     "MM"5, "MM"3 \n\t"\
             "psrlw     $1,    "MM"2 \n\t" /* b */\
             "psrlw     $1,    "MM"3 \n\t" /* f */\
-            MOVQ"    (%[tmpA]), "MM"4 \n\t" /* c */\
-            MOVQ"    16(%[tmpA]), "MM"5 \n\t" /* d */\
-            MOVQ"    32(%[tmpA]), "MM"7 \n\t" /* e */\
+            MOVQ"   (%[tmp]), "MM"4 \n\t" /* c */\
+            MOVQ" 16(%[tmp]), "MM"5 \n\t" /* d */\
+            MOVQ" 32(%[tmp]), "MM"7 \n\t" /* e */\
             "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
             "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
             MOVQ"      "MM"5, "MM"0 \n\t"\
@@ -209,7 +209,7 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
             "1: \n\t"\
 \
-            MOVQ"    16(%[tmpA]), "MM"2 \n\t" /* d */\
+            MOVQ" 16(%[tmp]), "MM"2 \n\t" /* d */\
             MOVQ"      "MM"2, "MM"3 \n\t"\
             "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
             "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
@@ -217,14 +217,13 @@ static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
             "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
             "packuswb  "MM"1, "MM"1 \n\t"\
 \
-            :\
-            :[tmpA] "r"(tmpA),\
-             [prev] "r"(prev),\
+            ::[prev] "r"(prev),\
              [cur]  "r"(cur),\
              [next] "r"(next),\
              [prefs]"r"((x86_reg)prefs),\
              [mrefs]"r"((x86_reg)mrefs),\
-             [mode] "g"(mode)\
+             [mode] "g"(mode),\
+             [tmp]  "r"(tmp)\
         );\
         __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
         dst += STEP;\
diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 55cb67243eb0bca475f7f97df33f35671d0f99d8..4f57cb8a2194f4d548c81f7b1dbdba65f008c242 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -1200,6 +1200,9 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
 #endif
 
     switch (pkt->type) {
+    case RTMP_PT_BYTES_READ:
+        av_dlog(s, "received bytes read report\n");
+        break;
     case RTMP_PT_CHUNK_SIZE:
         if ((ret = handle_chunk_size(s, pkt)) < 0)
             return ret;
diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index cf6293525fc2cf245ff9f4dd0c024c1fb1771106..0ab0d4c6f817b9af2dc2a659c7d28176e50520f8 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -44,7 +44,7 @@ static av_always_inline av_const int FASTDIV(int a, int b)
 }
 
 #define av_clip_uint8 av_clip_uint8_arm
-static av_always_inline av_const uint8_t av_clip_uint8_arm(int a)
+static av_always_inline av_const unsigned av_clip_uint8_arm(int a)
 {
     unsigned x;
     __asm__ ("usat %0, #8,  %1" : "=r"(x) : "r"(a));
@@ -52,15 +52,15 @@ static av_always_inline av_const uint8_t av_clip_uint8_arm(int a)
 }
 
 #define av_clip_int8 av_clip_int8_arm
-static av_always_inline av_const uint8_t av_clip_int8_arm(int a)
+static av_always_inline av_const int av_clip_int8_arm(int a)
 {
-    unsigned x;
+    int x;
     __asm__ ("ssat %0, #8,  %1" : "=r"(x) : "r"(a));
     return x;
 }
 
 #define av_clip_uint16 av_clip_uint16_arm
-static av_always_inline av_const uint16_t av_clip_uint16_arm(int a)
+static av_always_inline av_const unsigned av_clip_uint16_arm(int a)
 {
     unsigned x;
     __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a));
@@ -68,7 +68,7 @@ static av_always_inline av_const uint16_t av_clip_uint16_arm(int a)
 }
 
 #define av_clip_int16 av_clip_int16_arm
-static av_always_inline av_const int16_t av_clip_int16_arm(int a)
+static av_always_inline av_const int av_clip_int16_arm(int a)
 {
     int x;
     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 594524d9edb48132e77f638e25bbe31a7a05a813..7e5ffdf8d11779f55e4de3dd4dac4b906720de38 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
     __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "punpckldq    3%1, %%mm0    \n\t"
-            "movd         6%1, %%mm1    \n\t"
-            "punpckldq    9%1, %%mm1    \n\t"
-            "movd        12%1, %%mm2    \n\t"
-            "punpckldq   15%1, %%mm2    \n\t"
-            "movd        18%1, %%mm3    \n\t"
-            "punpckldq   21%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "punpckldq  3(%1), %%mm0    \n\t"
+            "movd       6(%1), %%mm1    \n\t"
+            "punpckldq  9(%1), %%mm1    \n\t"
+            "movd      12(%1), %%mm2    \n\t"
+            "punpckldq 15(%1), %%mm2    \n\t"
+            "movd      18(%1), %%mm3    \n\t"
+            "punpckldq 21(%1), %%mm3    \n\t"
             "por        %%mm7, %%mm0    \n\t"
             "por        %%mm7, %%mm1    \n\t"
             "por        %%mm7, %%mm2    \n\t"
             "por        %%mm7, %%mm3    \n\t"
-            MOVNTQ"     %%mm0,   %0     \n\t"
-            MOVNTQ"     %%mm1,  8%0     \n\t"
-            MOVNTQ"     %%mm2, 16%0     \n\t"
-            MOVNTQ"     %%mm3, 24%0"
-            :"=m"(*dest)
-            :"m"(*s)
+            MOVNTQ"     %%mm0,   (%0)   \n\t"
+            MOVNTQ"     %%mm1,  8(%0)   \n\t"
+            MOVNTQ"     %%mm2, 16(%0)   \n\t"
+            MOVNTQ"     %%mm3, 24(%0)"
+            :: "r"(dest), "r"(s)
             :"memory");
         dest += 32;
         s += 24;
@@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
             "pand  "MANGLE(mask24hhhh)", %%mm5\n\t" \
             "por        %%mm5, %%mm4    \n\t" \
  \
-            MOVNTQ"     %%mm0,   %0     \n\t" \
-            MOVNTQ"     %%mm1,  8%0     \n\t" \
-            MOVNTQ"     %%mm4, 16%0"
+            MOVNTQ"     %%mm0,   (%0)    \n\t" \
+            MOVNTQ"     %%mm1,  8(%0)    \n\t" \
+            MOVNTQ"     %%mm4, 16(%0)"
 
 
 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
@@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 31;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq        16%1, %%mm4    \n\t"
-            "movq        24%1, %%mm5    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq       8(%1), %%mm1    \n\t"
+            "movq      16(%1), %%mm4    \n\t"
+            "movq      24(%1), %%mm5    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm1, %%mm3    \n\t"
             "movq       %%mm4, %%mm6    \n\t"
             "movq       %%mm5, %%mm7    \n\t"
             STORE_BGR24_MMX
-            :"=m"(*dest)
-            :"m"(*s)
+            :: "r"(dest), "r"(s)
             :"memory");
         dest += 24;
         s += 32;
@@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s<mm_end) {
         __asm__ volatile(
-            PREFETCH"  32%1         \n\t"
-            "movq        %1, %%mm0  \n\t"
-            "movq       8%1, %%mm2  \n\t"
+            PREFETCH" 32(%1)        \n\t"
+            "movq      (%1), %%mm0  \n\t"
+            "movq     8(%1), %%mm2  \n\t"
             "movq     %%mm0, %%mm1  \n\t"
             "movq     %%mm2, %%mm3  \n\t"
             "pand     %%mm4, %%mm0  \n\t"
             "pand     %%mm4, %%mm2  \n\t"
             "paddw    %%mm1, %%mm0  \n\t"
             "paddw    %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  %0    \n\t"
-            MOVNTQ"   %%mm2, 8%0"
-            :"=m"(*d)
-            :"m"(*s)
+            MOVNTQ"   %%mm0,  (%0)  \n\t"
+            MOVNTQ"   %%mm2, 8(%0)"
+            :: "r"(d), "r"(s)
         );
         d+=16;
         s+=16;
@@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s<mm_end) {
         __asm__ volatile(
-            PREFETCH"  32%1         \n\t"
-            "movq        %1, %%mm0  \n\t"
-            "movq       8%1, %%mm2  \n\t"
+            PREFETCH" 32(%1)        \n\t"
+            "movq      (%1), %%mm0  \n\t"
+            "movq     8(%1), %%mm2  \n\t"
             "movq     %%mm0, %%mm1  \n\t"
             "movq     %%mm2, %%mm3  \n\t"
             "psrlq       $1, %%mm0  \n\t"
@@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
             "pand     %%mm6, %%mm3  \n\t"
             "por      %%mm1, %%mm0  \n\t"
             "por      %%mm3, %%mm2  \n\t"
-            MOVNTQ"   %%mm0,  %0    \n\t"
-            MOVNTQ"   %%mm2, 8%0"
-            :"=m"(*d)
-            :"m"(*s)
+            MOVNTQ"   %%mm0,  (%0)  \n\t"
+            MOVNTQ"   %%mm2, 8(%0)"
+            :: "r"(d), "r"(s)
         );
         d+=16;
         s+=16;
@@ -287,7 +283,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
     mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
     __asm__ volatile(
         "movq           %3, %%mm5   \n\t"
         "movq           %4, %%mm6   \n\t"
@@ -322,47 +317,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s
         : "+r" (d), "+r"(s)
         : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
     );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq    %0, %%mm7    \n\t"
-        "movq    %1, %%mm6    \n\t"
-        ::"m"(red_16mask),"m"(green_16mask));
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $5, %%mm1    \n\t"
-            "psrlq         $5, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $8, %%mm2    \n\t"
-            "psrlq         $8, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
     __asm__ volatile(SFENCE:::"memory");
     __asm__ volatile(EMMS:::"memory");
     while (s < end) {
@@ -386,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       4(%1), %%mm3    \n\t"
+            "punpckldq  8(%1), %%mm0    \n\t"
+            "punpckldq 12(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -413,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            :: "r"(d),"r"(s),"m"(blue_16mask):"memory");
         d += 4;
         s += 16;
     }
@@ -434,7 +388,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s
     uint16_t *d = (uint16_t *)dst;
     end = s + src_size;
     mm_end = end - 15;
-#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
     __asm__ volatile(
         "movq           %3, %%mm5   \n\t"
         "movq           %4, %%mm6   \n\t"
@@ -469,47 +422,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s
         : "+r" (d), "+r"(s)
         : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
     );
-#else
-    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
-    __asm__ volatile(
-        "movq          %0, %%mm7    \n\t"
-        "movq          %1, %%mm6    \n\t"
-        ::"m"(red_15mask),"m"(green_15mask));
-    while (s < mm_end) {
-        __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
-            "movq       %%mm0, %%mm1    \n\t"
-            "movq       %%mm0, %%mm2    \n\t"
-            "movq       %%mm3, %%mm4    \n\t"
-            "movq       %%mm3, %%mm5    \n\t"
-            "psrlq         $3, %%mm0    \n\t"
-            "psrlq         $3, %%mm3    \n\t"
-            "pand          %2, %%mm0    \n\t"
-            "pand          %2, %%mm3    \n\t"
-            "psrlq         $6, %%mm1    \n\t"
-            "psrlq         $6, %%mm4    \n\t"
-            "pand       %%mm6, %%mm1    \n\t"
-            "pand       %%mm6, %%mm4    \n\t"
-            "psrlq         $9, %%mm2    \n\t"
-            "psrlq         $9, %%mm5    \n\t"
-            "pand       %%mm7, %%mm2    \n\t"
-            "pand       %%mm7, %%mm5    \n\t"
-            "por        %%mm1, %%mm0    \n\t"
-            "por        %%mm4, %%mm3    \n\t"
-            "por        %%mm2, %%mm0    \n\t"
-            "por        %%mm5, %%mm3    \n\t"
-            "psllq        $16, %%mm3    \n\t"
-            "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
-        d += 4;
-        s += 16;
-    }
-#endif
     __asm__ volatile(SFENCE:::"memory");
     __asm__ volatile(EMMS:::"memory");
     while (s < end) {
@@ -533,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         4%1, %%mm3    \n\t"
-            "punpckldq    8%1, %%mm0    \n\t"
-            "punpckldq   12%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       4(%1), %%mm3    \n\t"
+            "punpckldq  8(%1), %%mm0    \n\t"
+            "punpckldq 12(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -560,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
         d += 4;
         s += 16;
     }
@@ -588,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 11;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       3(%1), %%mm3    \n\t"
+            "punpckldq  6(%1), %%mm0    \n\t"
+            "punpckldq  9(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -615,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
         d += 4;
         s += 12;
     }
@@ -645,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       3(%1), %%mm3    \n\t"
+            "punpckldq  6(%1), %%mm0    \n\t"
+            "punpckldq  9(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -672,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_16mask):"memory");
         d += 4;
         s += 12;
     }
@@ -702,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 11;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movd          %1, %%mm0    \n\t"
-            "movd         3%1, %%mm3    \n\t"
-            "punpckldq    6%1, %%mm0    \n\t"
-            "punpckldq    9%1, %%mm3    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movd        (%1), %%mm0    \n\t"
+            "movd       3(%1), %%mm3    \n\t"
+            "punpckldq  6(%1), %%mm0    \n\t"
+            "punpckldq  9(%1), %%mm3    \n\t"
             "movq       %%mm0, %%mm1    \n\t"
             "movq       %%mm0, %%mm2    \n\t"
             "movq       %%mm3, %%mm4    \n\t"
@@ -729,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
             "psllq        $16, %%mm3    \n\t"
             "por        %%mm3, %%mm0    \n\t"
-            MOVNTQ"     %%mm0, %0       \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+            MOVNTQ"     %%mm0, (%0)     \n\t"
+            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
         d += 4;
         s += 12;
     }
@@ -759,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 15;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"   32%1            \n\t"
-            "movd         %1, %%mm0     \n\t"
-            "movd        3%1, %%mm3     \n\t"
-            "punpckldq   6%1, %%mm0     \n\t"
-            "punpckldq   9%1, %%mm3     \n\t"
+            PREFETCH" 32(%1)            \n\t"
+            "movd       (%1), %%mm0     \n\t"
+            "movd      3(%1), %%mm3     \n\t"
+            "punpckldq 6(%1), %%mm0     \n\t"
+            "punpckldq 9(%1), %%mm3     \n\t"
             "movq      %%mm0, %%mm1     \n\t"
             "movq      %%mm0, %%mm2     \n\t"
             "movq      %%mm3, %%mm4     \n\t"
@@ -786,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
             "por       %%mm5, %%mm3     \n\t"
             "psllq       $16, %%mm3     \n\t"
             "por       %%mm3, %%mm0     \n\t"
-            MOVNTQ"    %%mm0, %0        \n\t"
-            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
+            MOVNTQ"    %%mm0, (%0)      \n\t"
+            ::"r"(d),"r"(s),"m"(blue_15mask):"memory");
         d += 4;
         s += 12;
     }
@@ -812,10 +724,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 7;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -844,9 +756,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "movq       %%mm0, %%mm6    \n\t"
             "movq       %%mm3, %%mm7    \n\t"
 
-            "movq         8%1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq         8%1, %%mm2    \n\t"
+            "movq       8(%1), %%mm0    \n\t"
+            "movq       8(%1), %%mm1    \n\t"
+            "movq       8(%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -873,7 +785,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm5, %%mm3    \n\t"
 
             :"=m"(*d)
-            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null)
+            :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
             :"memory");
         /* borrowed 32 to 24 */
         __asm__ volatile(
@@ -889,8 +801,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
 
             STORE_BGR24_MMX
 
-            :"=m"(*d)
-            :"m"(*s)
+            :: "r"(d), "m"(*s)
             :"memory");
         d += 24;
         s += 8;
@@ -917,10 +828,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     mm_end = end - 7;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -950,9 +861,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "movq       %%mm0, %%mm6    \n\t"
             "movq       %%mm3, %%mm7    \n\t"
 
-            "movq         8%1, %%mm0    \n\t"
-            "movq         8%1, %%mm1    \n\t"
-            "movq         8%1, %%mm2    \n\t"
+            "movq       8(%1), %%mm0    \n\t"
+            "movq       8(%1), %%mm1    \n\t"
+            "movq       8(%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -979,7 +890,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
             "por        %%mm4, %%mm3    \n\t"
             "por        %%mm5, %%mm3    \n\t"
             :"=m"(*d)
-            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+            :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
             :"memory");
         /* borrowed 32 to 24 */
         __asm__ volatile(
@@ -995,8 +906,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
 
             STORE_BGR24_MMX
 
-            :"=m"(*d)
-            :"m"(*s)
+            :: "r"(d), "m"(*s)
             :"memory");
         d += 24;
         s += 8;
@@ -1028,8 +938,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
     "movq       %%mm0, %%mm3    \n\t"                               \
     "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
     "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
-    MOVNTQ"     %%mm0,  %0      \n\t"                               \
-    MOVNTQ"     %%mm3, 8%0      \n\t"                               \
+    MOVNTQ"     %%mm0,  (%0)    \n\t"                               \
+    MOVNTQ"     %%mm3, 8(%0)    \n\t"                               \
 
 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size)
 {
@@ -1044,10 +954,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 3;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -1056,8 +966,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
             "pmulhw        %5, %%mm1    \n\t"
             "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
             PACK_RGB32
-            :"=m"(*d)
-            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid)
+            ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
             :"memory");
         d += 16;
         s += 4;
@@ -1087,10 +996,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
     mm_end = end - 3;
     while (s < mm_end) {
         __asm__ volatile(
-            PREFETCH"    32%1           \n\t"
-            "movq          %1, %%mm0    \n\t"
-            "movq          %1, %%mm1    \n\t"
-            "movq          %1, %%mm2    \n\t"
+            PREFETCH"  32(%1)           \n\t"
+            "movq        (%1), %%mm0    \n\t"
+            "movq        (%1), %%mm1    \n\t"
+            "movq        (%1), %%mm2    \n\t"
             "pand          %2, %%mm0    \n\t"
             "pand          %3, %%mm1    \n\t"
             "pand          %4, %%mm2    \n\t"
@@ -1100,8 +1009,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
             "pmulhw        "MANGLE(mul16_mid)", %%mm1    \n\t"
             "pmulhw        "MANGLE(mul15_hi)", %%mm2    \n\t"
             PACK_RGB32
-            :"=m"(*d)
-            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
+            ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
             :"memory");
         d += 16;
         s += 4;
@@ -2029,8 +1937,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                                        int srcStride1, int srcStride2,
                                        int dstStride1, int dstStride2)
 {
-    x86_reg y;
-    int x,w,h;
+    x86_reg x, y;
+    int w,h;
     w=width/2; h=height/2;
     __asm__ volatile(
         PREFETCH" %0    \n\t"
@@ -2042,11 +1950,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
         x=0;
         for (;x<w-31;x+=32) {
             __asm__ volatile(
-                PREFETCH"   32%1        \n\t"
-                "movq         %1, %%mm0 \n\t"
-                "movq        8%1, %%mm2 \n\t"
-                "movq       16%1, %%mm4 \n\t"
-                "movq       24%1, %%mm6 \n\t"
+                PREFETCH"   32(%1,%2)        \n\t"
+                "movq         (%1,%2), %%mm0 \n\t"
+                "movq        8(%1,%2), %%mm2 \n\t"
+                "movq       16(%1,%2), %%mm4 \n\t"
+                "movq       24(%1,%2), %%mm6 \n\t"
                 "movq      %%mm0, %%mm1 \n\t"
                 "movq      %%mm2, %%mm3 \n\t"
                 "movq      %%mm4, %%mm5 \n\t"
@@ -2059,16 +1967,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                 "punpckhbw %%mm5, %%mm5 \n\t"
                 "punpcklbw %%mm6, %%mm6 \n\t"
                 "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   %0  \n\t"
-                MOVNTQ"    %%mm1,  8%0  \n\t"
-                MOVNTQ"    %%mm2, 16%0  \n\t"
-                MOVNTQ"    %%mm3, 24%0  \n\t"
-                MOVNTQ"    %%mm4, 32%0  \n\t"
-                MOVNTQ"    %%mm5, 40%0  \n\t"
-                MOVNTQ"    %%mm6, 48%0  \n\t"
-                MOVNTQ"    %%mm7, 56%0"
-                :"=m"(d[2*x])
-                :"m"(s1[x])
+                MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm7, 56(%0,%2,2)"
+                :: "r"(d), "r"(s1), "r"(x)
                 :"memory");
         }
         for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
@@ -2079,11 +1986,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
         x=0;
         for (;x<w-31;x+=32) {
             __asm__ volatile(
-                PREFETCH"   32%1        \n\t"
-                "movq         %1, %%mm0 \n\t"
-                "movq        8%1, %%mm2 \n\t"
-                "movq       16%1, %%mm4 \n\t"
-                "movq       24%1, %%mm6 \n\t"
+                PREFETCH"   32(%1,%2)        \n\t"
+                "movq         (%1,%2), %%mm0 \n\t"
+                "movq        8(%1,%2), %%mm2 \n\t"
+                "movq       16(%1,%2), %%mm4 \n\t"
+                "movq       24(%1,%2), %%mm6 \n\t"
                 "movq      %%mm0, %%mm1 \n\t"
                 "movq      %%mm2, %%mm3 \n\t"
                 "movq      %%mm4, %%mm5 \n\t"
@@ -2096,16 +2003,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
                 "punpckhbw %%mm5, %%mm5 \n\t"
                 "punpcklbw %%mm6, %%mm6 \n\t"
                 "punpckhbw %%mm7, %%mm7 \n\t"
-                MOVNTQ"    %%mm0,   %0  \n\t"
-                MOVNTQ"    %%mm1,  8%0  \n\t"
-                MOVNTQ"    %%mm2, 16%0  \n\t"
-                MOVNTQ"    %%mm3, 24%0  \n\t"
-                MOVNTQ"    %%mm4, 32%0  \n\t"
-                MOVNTQ"    %%mm5, 40%0  \n\t"
-                MOVNTQ"    %%mm6, 48%0  \n\t"
-                MOVNTQ"    %%mm7, 56%0"
-                :"=m"(d[2*x])
-                :"m"(s2[x])
+                MOVNTQ"    %%mm0,   (%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm1,  8(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm2, 16(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm3, 24(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm4, 32(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm5, 40(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm6, 48(%0,%2,2)  \n\t"
+                MOVNTQ"    %%mm7, 56(%0,%2,2)"
+                :: "r"(d), "r"(s2), "r"(x)
                 :"memory");
         }
         for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];