From 9ac4548ff7e57bee6f47d1826208cdd626ca45d7 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Tue, 19 Jan 2010 16:40:36 +0000
Subject: [PATCH] Fix h264_loop_filter_strength_mmx2() so it works with b
 frames.

Originally committed as revision 21327 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/x86/h264dsp_mmx.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 878ebed500f..e26881beaf9 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -850,6 +850,40 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
                           "m"(mv[l][b_idx+d_idx+2][0])
                     );
                 }
+                if(bidir==1){
+                    __asm__ volatile("pxor %%mm3, %%mm3 \n\t":);
+                    for( l = bidir; l >= 0; l-- ) {
+                    __asm__ volatile(
+                        "movd %0, %%mm1 \n\t"
+                        "punpckldq %1, %%mm1 \n\t"
+                        "punpckldq %%mm1, %%mm2 \n\t"
+                        "pcmpeqb %%mm2, %%mm1 \n\t"
+                        "paddb %%mm6, %%mm1 \n\t"
+                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
+                        "por %%mm1, %%mm3 \n\t"
+
+                        "movq %2, %%mm1 \n\t"
+                        "movq %3, %%mm2 \n\t"
+                        "psubw %4, %%mm1 \n\t"
+                        "psubw %5, %%mm2 \n\t"
+                        "packsswb %%mm2, %%mm1 \n\t"
+                        "paddb %%mm5, %%mm1 \n\t"
+                        "pminub %%mm4, %%mm1 \n\t"
+                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
+                        "por %%mm1, %%mm3 \n\t"
+                        ::"m"(ref[l][b_idx]),
+                          "m"(ref[1-l][b_idx+d_idx]),
+                          "m"(mv[l][b_idx][0]),
+                          "m"(mv[l][b_idx+2][0]),
+                          "m"(mv[1-l][b_idx+d_idx][0]),
+                          "m"(mv[1-l][b_idx+d_idx+2][0])
+                    );
+                    }
+                    __asm__ volatile(
+                        "pcmpeqw %%mm7, %%mm3 \n\t"
+                        "psubusw %%mm3, %%mm0 \n\t"
+                    :);
+                }
             }
             __asm__ volatile(
                 "movd %0, %%mm1 \n\t"
-- 
GitLab