From 51f0ac65df62c8da804f2e995c38ab9adaf32b9b Mon Sep 17 00:00:00 2001
From: Loren Merritt <lorenm@u.washington.edu>
Date: Sun, 3 Feb 2008 03:21:47 +0000
Subject: [PATCH] remove some movq in ff_h264_idct8_add_mmx. 225->217 cycles on
 core2.

Originally committed as revision 11825 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/i386/h264dsp_mmx.c | 52 ++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c
index 1ecb0cbef11..8d96ba7f1fc 100644
--- a/libavcodec/i386/h264dsp_mmx.c
+++ b/libavcodec/i386/h264dsp_mmx.c
@@ -94,44 +94,38 @@ static inline void h264_idct8_1d(int16_t *block)
 {
     asm volatile(
         "movq 112(%0), %%mm7  \n\t"
-        "movq  80(%0), %%mm5  \n\t"
+        "movq  80(%0), %%mm0  \n\t"
         "movq  48(%0), %%mm3  \n\t"
-        "movq  16(%0), %%mm1  \n\t"
+        "movq  16(%0), %%mm5  \n\t"
 
-        "movq   %%mm7, %%mm4  \n\t"
-        "movq   %%mm3, %%mm6  \n\t"
-        "movq   %%mm5, %%mm0  \n\t"
-        "movq   %%mm7, %%mm2  \n\t"
+        "movq   %%mm0, %%mm4  \n\t"
+        "movq   %%mm5, %%mm1  \n\t"
         "psraw  $1,    %%mm4  \n\t"
-        "psraw  $1,    %%mm6  \n\t"
-        "psubw  %%mm7, %%mm0  \n\t"
-        "psubw  %%mm6, %%mm2  \n\t"
-        "psubw  %%mm4, %%mm0  \n\t"
-        "psubw  %%mm3, %%mm2  \n\t"
-        "psubw  %%mm3, %%mm0  \n\t"
-        "paddw  %%mm1, %%mm2  \n\t"
-
-        "movq   %%mm5, %%mm4  \n\t"
-        "movq   %%mm1, %%mm6  \n\t"
-        "psraw  $1,    %%mm4  \n\t"
-        "psraw  $1,    %%mm6  \n\t"
-        "paddw  %%mm5, %%mm4  \n\t"
-        "paddw  %%mm1, %%mm6  \n\t"
+        "psraw  $1,    %%mm1  \n\t"
+        "paddw  %%mm0, %%mm4  \n\t"
+        "paddw  %%mm5, %%mm1  \n\t"
         "paddw  %%mm7, %%mm4  \n\t"
-        "paddw  %%mm5, %%mm6  \n\t"
-        "psubw  %%mm1, %%mm4  \n\t"
-        "paddw  %%mm3, %%mm6  \n\t"
+        "paddw  %%mm0, %%mm1  \n\t"
+        "psubw  %%mm5, %%mm4  \n\t"
+        "paddw  %%mm3, %%mm1  \n\t"
+
+        "psubw  %%mm3, %%mm5  \n\t"
+        "psubw  %%mm3, %%mm0  \n\t"
+        "paddw  %%mm7, %%mm5  \n\t"
+        "psubw  %%mm7, %%mm0  \n\t"
+        "psraw  $1,    %%mm3  \n\t"
+        "psraw  $1,    %%mm7  \n\t"
+        "psubw  %%mm3, %%mm5  \n\t"
+        "psubw  %%mm7, %%mm0  \n\t"
 
-        "movq   %%mm0, %%mm1  \n\t"
         "movq   %%mm4, %%mm3  \n\t"
-        "movq   %%mm2, %%mm5  \n\t"
-        "movq   %%mm6, %%mm7  \n\t"
-        "psraw  $2,    %%mm6  \n\t"
+        "movq   %%mm1, %%mm7  \n\t"
+        "psraw  $2,    %%mm1  \n\t"
         "psraw  $2,    %%mm3  \n\t"
+        "paddw  %%mm5, %%mm3  \n\t"
         "psraw  $2,    %%mm5  \n\t"
+        "paddw  %%mm0, %%mm1  \n\t"
         "psraw  $2,    %%mm0  \n\t"
-        "paddw  %%mm6, %%mm1  \n\t"
-        "paddw  %%mm2, %%mm3  \n\t"
         "psubw  %%mm4, %%mm5  \n\t"
         "psubw  %%mm0, %%mm7  \n\t"
 
-- 
GitLab