diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c index 3e0c42fb1e5f5769561d5ec8a0ca81af81f339d8..5495821e5b28c5882db7610d9b776f36ed937416 100644 --- a/libavcodec/x86/fft.c +++ b/libavcodec/x86/fft.c @@ -25,30 +25,31 @@ av_cold void ff_fft_init_mmx(FFTContext *s) { #if HAVE_YASM int has_vectors = av_get_cpu_flags(); - if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) { - /* AVX for SB */ - s->imdct_calc = ff_imdct_calc_sse; - s->imdct_half = ff_imdct_half_avx; - s->fft_permute = ff_fft_permute_sse; - s->fft_calc = ff_fft_calc_avx; - s->fft_permutation = FF_FFT_PERM_AVX; - } else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { + if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { + /* 3DNow! for K6-2/3 */ + s->imdct_calc = ff_imdct_calc_3dn; + s->imdct_half = ff_imdct_half_3dn; + s->fft_calc = ff_fft_calc_3dn; + } + if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { + /* 3DNowEx for K7 */ + s->imdct_calc = ff_imdct_calc_3dn2; + s->imdct_half = ff_imdct_half_3dn2; + s->fft_calc = ff_fft_calc_3dn2; + } + if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { /* SSE for P3/P4/K8 */ s->imdct_calc = ff_imdct_calc_sse; s->imdct_half = ff_imdct_half_sse; s->fft_permute = ff_fft_permute_sse; s->fft_calc = ff_fft_calc_sse; s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; - } else if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { - /* 3DNowEx for K7 */ - s->imdct_calc = ff_imdct_calc_3dn2; - s->imdct_half = ff_imdct_half_3dn2; - s->fft_calc = ff_fft_calc_3dn2; - } else if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) { - /* 3DNow! for K6-2/3 */ - s->imdct_calc = ff_imdct_calc_3dn; - s->imdct_half = ff_imdct_half_3dn; - s->fft_calc = ff_fft_calc_3dn; + } + if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) { + /* AVX for SB */ + s->imdct_half = ff_imdct_half_avx; + s->fft_calc = ff_fft_calc_avx; + s->fft_permutation = FF_FFT_PERM_AVX; } #endif } @@ -58,12 +59,12 @@ av_cold void ff_dct_init_mmx(DCTContext *s) { #if HAVE_YASM int has_vectors = av_get_cpu_flags(); + if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) + s->dct32 = ff_dct32_float_sse; + if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE) + s->dct32 = ff_dct32_float_sse2; if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX) s->dct32 = ff_dct32_float_avx; - else if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE) - s->dct32 = ff_dct32_float_sse2; - else if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) - s->dct32 = ff_dct32_float_sse; #endif } #endif