From 084c726ba306eb19077ac525764acec6c110f1d4 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Fri, 20 Sep 2002 10:03:01 +0000
Subject: [PATCH] pix_sum16_mmx()

Originally committed as revision 961 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/i386/dsputil_mmx.c | 39 +++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 4336e4bde02..eaec8fe45ab 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -420,6 +420,44 @@ static void clear_blocks_mmx(DCTELEM *blocks)
         );
 }
 
+static int pix_sum16_mmx(UINT8 * pix, int line_size){
+    const int h=16;
+    int sum;
+    int index= -line_size*h;
+
+    __asm __volatile(
+                "pxor %%mm7, %%mm7		\n\t"
+                "pxor %%mm6, %%mm6		\n\t"
+                "1:				\n\t"
+                "movq (%2, %1), %%mm0		\n\t"
+                "movq (%2, %1), %%mm1		\n\t"
+                "movq 8(%2, %1), %%mm2		\n\t"
+                "movq 8(%2, %1), %%mm3		\n\t"
+                "punpcklbw %%mm7, %%mm0		\n\t"
+                "punpckhbw %%mm7, %%mm1		\n\t"
+                "punpcklbw %%mm7, %%mm2		\n\t"
+                "punpckhbw %%mm7, %%mm3		\n\t"
+                "paddw %%mm0, %%mm1		\n\t"
+                "paddw %%mm2, %%mm3		\n\t"
+                "paddw %%mm1, %%mm3		\n\t"
+                "paddw %%mm3, %%mm6		\n\t"
+                "addl %3, %1			\n\t"
+                " js 1b				\n\t"
+                "movq %%mm6, %%mm5		\n\t"
+                "psrlq $32, %%mm6		\n\t"
+                "paddw %%mm5, %%mm6		\n\t"
+                "movq %%mm6, %%mm5		\n\t"
+                "psrlq $16, %%mm6		\n\t"
+                "paddw %%mm5, %%mm6		\n\t"
+                "movd %%mm6, %0			\n\t"
+                "andl $0xFFFF, %0		\n\t"
+                : "=&r" (sum), "+r" (index)
+                : "r" (pix - index), "r" (line_size)
+        );
+
+        return sum;
+}
+
 #if 0
 static void just_return() { return; }
 #endif
@@ -448,6 +486,7 @@ void dsputil_init_mmx(void)
         put_pixels_clamped = put_pixels_clamped_mmx;
         add_pixels_clamped = add_pixels_clamped_mmx;
         clear_blocks= clear_blocks_mmx;
+        pix_sum= pix_sum16_mmx;
 
         pix_abs16x16     = pix_abs16x16_mmx;
         pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
-- 
GitLab