From fa5daaca0d6ffcfaa9e9d19089910ee7ebf9a8b7 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sat, 28 Apr 2012 13:01:50 +0200
Subject: [PATCH] swr: seperate functions for aligned & unaligned

If someone has an idea on how to do this cleaner, its welcome

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswresample/x86/audio_convert.asm | 22 ++++++++++++++++------
 libswresample/x86/swresample_x86.c  |  4 ++--
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index c7ce8c6c53c..d98c35cf5dd 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -23,18 +23,26 @@
 
 SECTION .text
 
-%macro INT16_TO_INT32 0
+%macro INT16_TO_INT32 1
 cglobal int16_to_int32_%1, 3, 3, 0, dst, src, len
     mov srcq, [srcq]
     mov dstq, [dstq]
+%ifidn %1, a
+    test dstq, mmsize-1
+        jne int16_to_int32_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne int16_to_int32_u_int %+ SUFFIX
+%else
+int16_to_int32_u_int %+ SUFFIX
+%endif
 .next
-    movu m4, [srcq]
+    mov%1 m4, [srcq]
     pxor m0, m0
     pxor m1, m1
     punpcklwd m0, m4
     punpckhwd m1, m4
-    movu [         dstq], m0
-    movu [mmsize + dstq], m1
+    mov%1 [         dstq], m0
+    mov%1 [mmsize + dstq], m1
     add srcq, mmsize
     add dstq, 2*mmsize
     sub lenq, 2*mmsize
@@ -46,7 +54,9 @@ cglobal int16_to_int32_%1, 3, 3, 0, dst, src, len
 %endmacro
 
 INIT_MMX mmx
-INT16_TO_INT32
+INT16_TO_INT32 u
+INT16_TO_INT32 a
 
 INIT_XMM sse
-INT16_TO_INT32
+INT16_TO_INT32 u
+INT16_TO_INT32 a
diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c
index 5c8d8290cd8..f2a14c1faf1 100644
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -22,7 +22,7 @@
 #include "libswresample/audioconvert.h"
 
 #define MULTI_CAPS_FUNC_DECL(cap) \
-    void ff_int16_to_int32_ ## cap(uint8_t **dst, const uint8_t **src, int len);
+    void ff_int16_to_int32_a_ ## cap(uint8_t **dst, const uint8_t **src, int len);
 MULTI_CAPS_FUNC_DECL(mmx)
 MULTI_CAPS_FUNC_DECL(sse)
 
@@ -39,7 +39,7 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,
 #define MULTI_CAPS_FUNC(flag, cap) \
     if (mm_flags & flag) {\
         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
-            ac->simd_f =  ff_int16_to_int32_ ## cap;\
+            ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
     }
 
 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
-- 
GitLab