diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 03833220a0c975790f50a881c549e2478f4e641c..2a3e4a5f74aed284f1294b860d257742e06d7b8d 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -61,7 +61,14 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
     mova  [dstq+lenq+16], m2
     add     lenq, 32
     jl .loop
-    REP_RET
+%if notcpuflag(sse2)
+    ;; cvtpi2ps switches to MMX even if the source is a memory location
+    ;; possible an error in documentation since every tested CPU disagrees with
+    ;; that. Use emms anyway since the vast majority of machines will use the
+    ;; SSE2 variant
+    emms
+%endif
+    RET
 %endmacro
 
 INIT_XMM sse