diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index 6c91f459d00b6dedf4519f315ae785245ae4b45a..caa210d5add10030dd8ab7d23e4bec064648c1cd 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4], static void clip_coefficients(AudioDSPContext *adsp, float *coef, unsigned int len) { - adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX); } diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c index f7bd162482f1dfd0d2f0b84e7e0cb0791fa6e715..6902db86b4ce997edf4da6fa7d8ecfd1e413dfd2 100644 --- a/libavcodec/arm/audiodsp_init_neon.c +++ b/libavcodec/arm/audiodsp_init_neon.c @@ -25,8 +25,7 @@ #include "libavcodec/audiodsp.h" #include "audiodsp_arm.h" -void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, - int len); +void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max); void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S index ab32cef7ab5acf9e4662c502387a304267bd8cf3..cea700c84de83a664a39e9937d734a5e7ba84151 100644 --- a/libavcodec/arm/audiodsp_neon.S +++ b/libavcodec/arm/audiodsp_neon.S @@ -24,9 +24,8 @@ function ff_vector_clipf_neon, export=1 VFP vdup.32 q1, d0[1] VFP vdup.32 q0, d0[0] -NOVFP vdup.32 q0, r2 -NOVFP vdup.32 q1, r3 -NOVFP ldr r2, [sp] +NOVFP vdup.32 q0, r3 +NOVFP vld1.32 {d2[],d3[]}, [sp] vld1.f32 {q2},[r1,:128]! vmin.f32 q10, q2, q1 vld1.f32 {q3},[r1,:128]! diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c index 85b5a7494773ed6061b2bbb9e2d50a147de02ca0..3c7a3a7583a4a3fc1feed0eaba6c35612596d193 100644 --- a/libavcodec/audiodsp.c +++ b/libavcodec/audiodsp.c @@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src, } } -static void vector_clipf_c(float *dst, const float *src, - float min, float max, int len) +static void vector_clipf_c(float *dst, const float *src, int len, + float min, float max) { int i; diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h index 003a1d0ea9cadfc250e30cc89ad7197a943cbf15..aa6fa7898b5133dc02972e2409d7ea9d2855cee7 100644 --- a/libavcodec/audiodsp.h +++ b/libavcodec/audiodsp.h @@ -48,7 +48,8 @@ typedef struct AudioDSPContext { /* assume len is a multiple of 16, and arrays are 16-byte aligned */ void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, - float min, float max, int len /* align 16 */); + int len /* align 16 */, + float min, float max); } AudioDSPContext; void ff_audiodsp_init(AudioDSPContext *c); diff --git a/libavcodec/cook.c b/libavcodec/cook.c index 4488f8eaf7118f007657a71dadc62274c4203eb4..53cb83852e3e23b93267add4471b252deaab9e63 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, static void saturate_output_float(COOKContext *q, float *out) { q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, - -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); + FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f); } diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 3eeb6fd67f13e16baec81de0bc8bbcb661046dff..8ef2a8c68034377981cfc49e001491e32c7ac5c7 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 VECTOR_CLIP_INT32 6, 1, 0, 0 %endif -;----------------------------------------------------- -;void ff_vector_clipf(float *dst, const float *src, -; float min, float max, int len) -;----------------------------------------------------- +; void ff_vector_clipf_sse(float *dst, const float *src, +; int len, float min, float max) INIT_XMM sse -%if UNIX64 -cglobal vector_clipf, 3,3,6, dst, src, len -%else -cglobal vector_clipf, 5,5,6, dst, src, min, max, len -%endif -%if WIN64 - SWAP 0, 2 - SWAP 1, 3 -%elif ARCH_X86_32 - movss m0, minm - movss m1, maxm +cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max +%if ARCH_X86_32 + VBROADCASTSS m0, minm + VBROADCASTSS m1, maxm +%elif WIN64 + VBROADCASTSS m0, m3 + VBROADCASTSS m1, maxm +%else ; 64bit sysv + VBROADCASTSS m0, m0 + VBROADCASTSS m1, m1 %endif - SPLATD m0 - SPLATD m1 - shl lend, 2 - add srcq, lenq - add dstq, lenq - neg lenq -.loop: - mova m2, [srcq+lenq+mmsize*0] - mova m3, [srcq+lenq+mmsize*1] - mova m4, [srcq+lenq+mmsize*2] - mova m5, [srcq+lenq+mmsize*3] - maxps m2, m0 - maxps m3, m0 - maxps m4, m0 - maxps m5, m0 - minps m2, m1 - minps m3, m1 - minps m4, m1 - minps m5, m1 - mova [dstq+lenq+mmsize*0], m2 - mova [dstq+lenq+mmsize*1], m3 - mova [dstq+lenq+mmsize*2], m4 - mova [dstq+lenq+mmsize*3], m5 - add lenq, mmsize*4 - jl .loop - REP_RET + + movsxdifnidn lenq, lend + +.loop + mova m2, [srcq + 4 * lenq - 4 * mmsize] + mova m3, [srcq + 4 * lenq - 3 * mmsize] + mova m4, [srcq + 4 * lenq - 2 * mmsize] + mova m5, [srcq + 4 * lenq - 1 * mmsize] + + maxps m2, m0 + maxps m3, m0 + maxps m4, m0 + maxps m5, m0 + + minps m2, m1 + minps m3, m1 + minps m4, m1 + minps m5, m1 + + mova [dstq + 4 * lenq - 4 * mmsize], m2 + mova [dstq + 4 * lenq - 3 * mmsize], m3 + mova [dstq + 4 * lenq - 2 * mmsize], m4 + mova [dstq + 4 * lenq - 1 * mmsize], m5 + + sub lenq, mmsize + jg .loop + + RET diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c index 8f9e604bb24e913294f0dc65d5ced2e2f409b359..98e296c264ee57f42f7a6e81ecaab8a25aab1459 100644 --- a/libavcodec/x86/audiodsp_init.c +++ b/libavcodec/x86/audiodsp_init.c @@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); + int len, float min, float max); av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) { diff --git a/tests/checkasm/audiodsp.c b/tests/checkasm/audiodsp.c index 1da1d1ea09882ea248b715973d9d321d405192e3..7c4e16e40d5d4bc3a6058bd95437318e0f6714bd 100644 --- a/tests/checkasm/audiodsp.c +++ b/tests/checkasm/audiodsp.c @@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void) int i, len; declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src, - float min, float max, unsigned int len); + int len, float min, float max); val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; @@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void) len = rnd() % 128; len = 16 * FFMAX(len, 1); - call_ref(dst0, src, min, max, len); - call_new(dst1, src, min, max, len); + call_ref(dst0, src, len, min, max); + call_new(dst1, src, len, min, max); for (i = 0; i < len; i++) { if (!float_near_ulp_array(dst0, dst1, 3, len)) fail(); } - bench_new(dst1, src, min, max, MAX_SIZE); + bench_new(dst1, src, MAX_SIZE, min, max); } report("audiodsp");