diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 0de01637ad12d9bd1b47f5b0990f610dee873bd6..09eb06d0c9d1cc08ddeb377bb9f8ea867fbdaffa 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -753,6 +753,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->transform_add[3] = ff_hevc_transform_add32_8_avx; } if (EXTERNAL_AVX2(cpu_flags)) { + c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; + c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2; + } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2; if (ARCH_X86_64) { @@ -897,7 +901,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) SAO_BAND_INIT(10, avx); } if (EXTERNAL_AVX2(cpu_flags)) { - + c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2; + } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2; if (ARCH_X86_64) { @@ -1095,6 +1101,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) SAO_BAND_INIT(12, avx); } if (EXTERNAL_AVX2(cpu_flags)) { + c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2; + } + if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c index 0ba435816592e5feaedebea4d2a6a7e193560598..9767b212da42ebea3dfe500e36da72b9e829d159 100644 --- a/libavcodec/x86/huffyuvencdsp_mmx.c +++ b/libavcodec/x86/huffyuvencdsp_mmx.c @@ -98,7 +98,7 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c) c->diff_bytes = ff_diff_bytes_sse2; } - if (EXTERNAL_AVX2(cpu_flags)) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->diff_bytes = ff_diff_bytes_avx2; } } diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c index 0dbd2db7f5639d09cd6aa18502da729d8f85c15d..baa81383eac20b580dd2cd7a739fe05ccce8875c 100644 --- a/libavcodec/x86/jpeg2000dsp_init.c +++ b/libavcodec/x86/jpeg2000dsp_init.c @@ -44,7 +44,7 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c) c->mct_decode[FF_DWT97] = ff_ict_float_avx; } - if (EXTERNAL_AVX2(cpu_flags)) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->mct_decode[FF_DWT53] = ff_rct_int_avx2; } } diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c index e9d9b1bf184b3f16cf16e9f6faea7f5f86571def..7f5e6b11d5ca53b3cb490d964350b71735f6d4ef 100644 --- a/libavcodec/x86/mlpdsp_init.c +++ b/libavcodec/x86/mlpdsp_init.c @@ -199,6 +199,6 @@ av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c) #endif if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4; - if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) + if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2) c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2; } diff --git a/libavcodec/x86/synth_filter_init.c b/libavcodec/x86/synth_filter_init.c index 0649ea20a6864727d17c65efe7ca17d9ad1faa94..9ef00cdb0a36b58c999d414b600e3ba09ca7465a 100644 --- a/libavcodec/x86/synth_filter_init.c +++ b/libavcodec/x86/synth_filter_init.c @@ -67,7 +67,7 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_AVX_FAST(cpu_flags)) { s->synth_filter_float = synth_filter_avx; } - if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) { + if (EXTERNAL_FMA3_FAST(cpu_flags)) { s->synth_filter_float = synth_filter_fma3; } #endif /* HAVE_YASM */ diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c index ee48e80538e503515373afdb36a6577f001577c3..8abb152c4f7573be0ac29456d9765811aa433e04 100644 --- a/libavcodec/x86/v210enc_init.c +++ b/libavcodec/x86/v210enc_init.c @@ -45,7 +45,7 @@ av_cold void ff_v210enc_init_x86(V210EncContext *s) if (EXTERNAL_AVX(cpu_flags)) s->pack_line_8 = ff_v210_planar_pack_8_avx; - if (EXTERNAL_AVX2(cpu_flags)) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) { s->pack_line_8 = ff_v210_planar_pack_8_avx2; s->pack_line_10 = ff_v210_planar_pack_10_avx2; s->sample_factor = 2; diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 8efb18c89f268514373cdcd1766df352f048505d..469a66171745ccfc5b7209221bb6014ec7fae7f2 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -377,7 +377,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) init_ipred(32, avx, v, VERT); } - if (EXTERNAL_AVX2(cpu_flags)) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) { init_fpel_func(1, 1, 32, avg, _8, avx2); init_fpel_func(0, 1, 64, avg, _8, avx2); if (ARCH_X86_64) { diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c b/libavcodec/x86/vp9dsp_init_16bpp.c index 4ceb4d4b49dbfb27095db75aeb88cb8213c20f24..eb67499c96241442cb5501877fac2f79b5451049 100644 --- a/libavcodec/x86/vp9dsp_init_16bpp.c +++ b/libavcodec/x86/vp9dsp_init_16bpp.c @@ -129,7 +129,7 @@ av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp) init_ipred_funcs(hd, HOR_DOWN, 16, avx); } - if (EXTERNAL_AVX2(cpu_flags)) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) { init_fpel_func(2, 1, 32, avg, _16, avx2); init_fpel_func(1, 1, 64, avg, _16, avx2); init_fpel_func(0, 1, 128, avg, _16, avx2); diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c index 90cdcc9ace759f9d3d5d23b810ea8aba3f63d895..4840b2844e3a351b139b6fa31e8405f35ccb680a 100644 --- a/libavcodec/x86/vp9dsp_init_16bpp_template.c +++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c @@ -225,7 +225,7 @@ av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact) init_lpf_funcs(BPC, avx); } - if (EXTERNAL_AVX2(cpu_flags)) { + if (EXTERNAL_AVX2_FAST(cpu_flags)) { #if HAVE_AVX2_EXTERNAL init_subpel3_32_64(0, put, BPC, avx2); init_subpel3_32_64(1, avg, BPC, avx2); diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index f211f2396b298ccd0994ad399d1690fa5a22d668..c836a78e1b9937f194289baa7c9bc8a6bbb737ad 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -92,7 +92,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; } - if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) { + if (EXTERNAL_FMA3_FAST(cpu_flags)) { fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; } diff --git a/libavutil/x86/lls_init.c b/libavutil/x86/lls_init.c index 9f0d862b0e1c0ebfcb6d18e9ead318d6c3171d52..1c5dca42dc08bdb6c238b85a43854e05b86cf351 100644 --- a/libavutil/x86/lls_init.c +++ b/libavutil/x86/lls_init.c @@ -39,7 +39,7 @@ av_cold void ff_init_lls_x86(LLSModel *m) if (EXTERNAL_AVX_FAST(cpu_flags)) { m->update_lls = ff_update_lls_avx; } - if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) { + if (EXTERNAL_FMA3_FAST(cpu_flags)) { m->update_lls = ff_update_lls_fma3; } } diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c index 5e5e91d1422a36a0aed63906592884d29f7118bd..bb89cf604bffddbda905cf4466bc2387722f3789 100644 --- a/libswresample/x86/audio_convert_init.c +++ b/libswresample/x86/audio_convert_init.c @@ -174,7 +174,7 @@ MULTI_CAPS_FUNC(SSE2, sse2) ac->simd_f = ff_pack_8ch_float_to_int32_a_avx; } } - if(EXTERNAL_AVX2(mm_flags)) { + if(EXTERNAL_AVX2_FAST(mm_flags)) { if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP) ac->simd_f = ff_float_to_int32_a_avx2; } diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c index bc444cfb8b5efaafd208b85278f1b3928e142b55..9d7d5cf89e23bcc8a99c9a139bf6566a98cfb232 100644 --- a/libswresample/x86/resample_init.c +++ b/libswresample/x86/resample_init.c @@ -71,7 +71,7 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c) c->dsp.resample = c->linear ? ff_resample_linear_float_avx : ff_resample_common_float_avx; } - if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) { + if (EXTERNAL_FMA3_FAST(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_float_fma3 : ff_resample_common_float_fma3; }