From 5932938c9a2300963ac4f4270d89b58d6fd2b401 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Sun, 29 Apr 2012 11:27:22 +0200
Subject: [PATCH] swr: float_to_int32_sse2()

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
 libswresample/x86/audio_convert.asm | 37 +++++++++++++++++++++++++++++
 libswresample/x86/swresample_x86.c  |  3 +++
 2 files changed, 40 insertions(+)

diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index 761fbb752d2..99bfdcaafd2 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -24,6 +24,7 @@
 SECTION_RODATA
 
 flt2pm31: times 8 dd 4.6566129e-10
+flt2p31 : times 8 dd 2147483648.0
 
 SECTION .text
 
@@ -128,6 +129,40 @@ int16_to_float_u_int %+ SUFFIX
     REP_RET
 %endmacro
 
+%macro FLOAT_TO_INT32 1
+cglobal float_to_int32_%1, 3, 3, 5, dst, src, len
+    mov srcq, [srcq]
+    mov dstq, [dstq]
+%ifidn %1, a
+    test dstq, mmsize-1
+        jne float_to_int32_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne float_to_int32_u_int %+ SUFFIX
+%else
+float_to_int32_u_int %+ SUFFIX
+%endif
+    add     srcq, lenq
+    add     dstq, lenq
+    neg     lenq
+    mova      m2, [flt2p31]
+.next:
+    mov%1     m0, [         srcq+lenq]
+    mov%1     m1, [mmsize + srcq+lenq]
+    mulps m0, m2
+    mulps m1, m2
+    cvtps2dq  m3, m0
+    cvtps2dq  m4, m1
+    cmpnltps m0, m2
+    cmpnltps m1, m2
+    paddd m0, m3
+    paddd m1, m4
+    mov%1 [         dstq+lenq], m0
+    mov%1 [mmsize + dstq+lenq], m1
+    add lenq, 2*mmsize
+        jl .next
+    REP_RET
+%endmacro
+
 
 INIT_MMX mmx
 INT16_TO_INT32 u
@@ -142,3 +177,5 @@ INT32_TO_FLOAT u
 INT32_TO_FLOAT a
 INT16_TO_FLOAT u
 INT16_TO_FLOAT a
+FLOAT_TO_INT32 u
+FLOAT_TO_INT32 a
diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c
index fb973ee0201..9b5f3cd024e 100644
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -28,6 +28,7 @@ MULTI_CAPS_FUNC_DECL(sse)
 
 void ff_int32_to_float_a_sse2(uint8_t **dst, const uint8_t **src, int len);
 void ff_int16_to_float_a_sse2(uint8_t **dst, const uint8_t **src, int len);
+void ff_float_to_int32_a_sse2(uint8_t **dst, const uint8_t **src, int len);
 
 void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                  enum AVSampleFormat out_fmt,
@@ -53,5 +54,7 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
             ac->simd_f =  ff_int32_to_float_a_sse2;
         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
             ac->simd_f =  ff_int16_to_float_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_int32_a_sse2;
     }
 }
-- 
GitLab