From 00eebe3d6a5d8ecd05d76d57eeb4b2b3e91f1d02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
Date: Sun, 22 Jun 2008 07:05:40 +0000
Subject: [PATCH] Fix add_bytes_mmx and add_bytes_l2_mmx for w < 16

Originally committed as revision 13877 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/i386/dsputil_mmx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 53cfe9d848f..60511a3babd 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -482,6 +482,7 @@ static void clear_blocks_mmx(DCTELEM *blocks)
 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
     x86_reg i=0;
     asm volatile(
+        "jmp 2f                         \n\t"
         "1:                             \n\t"
         "movq  (%1, %0), %%mm0          \n\t"
         "movq  (%2, %0), %%mm1          \n\t"
@@ -492,8 +493,9 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
         "paddb %%mm0, %%mm1             \n\t"
         "movq %%mm1, 8(%2, %0)          \n\t"
         "add $16, %0                    \n\t"
+        "2:                             \n\t"
         "cmp %3, %0                     \n\t"
-        " jb 1b                         \n\t"
+        " js 1b                         \n\t"
         : "+r" (i)
         : "r"(src), "r"(dst), "r"((x86_reg)w-15)
     );
@@ -504,6 +506,7 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
 static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
     x86_reg i=0;
     asm volatile(
+        "jmp 2f                         \n\t"
         "1:                             \n\t"
         "movq   (%2, %0), %%mm0         \n\t"
         "movq  8(%2, %0), %%mm1         \n\t"
@@ -512,8 +515,9 @@ static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
         "movq %%mm0,  (%1, %0)          \n\t"
         "movq %%mm1, 8(%1, %0)          \n\t"
         "add $16, %0                    \n\t"
+        "2:                             \n\t"
         "cmp %4, %0                     \n\t"
-        " jb 1b                         \n\t"
+        " js 1b                         \n\t"
         : "+r" (i)
         : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
     );
-- 
GitLab