Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
f3df42e8
Commit
f3df42e8
authored
7 years ago
by
Martin Vignali
Browse files
Options
Downloads
Patches
Plain Diff
avfilter/x86/vf_blend : add SIMD for 16 bit version of
grainextract grainmerge average extremity negation
parent
8eb0bb11
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libavfilter/x86/vf_blend.asm
+108
-60
108 additions, 60 deletions
libavfilter/x86/vf_blend.asm
libavfilter/x86/vf_blend_init.c
+20
-0
20 additions, 0 deletions
libavfilter/x86/vf_blend_init.c
with
128 additions
and
60 deletions
libavfilter/x86/vf_blend.asm
+
108
−
60
View file @
f3df42e8
...
...
@@ -27,6 +27,8 @@
SECTION
_RODATA
ps_255:
times
4
dd
255.0
pd_32768
:
times
4
dd
32768
pd_65535
:
times
4
dd
65535
pw_1:
times
8
dw
1
pw_128:
times
8
dw
128
pw_255:
times
8
dw
255
...
...
@@ -79,26 +81,33 @@ BLEND_INIT %1, 2, %3
BLEND_END
%endmacro
%macro GRAINEXTRACT 0
BLEND_INIT
grainextract
,
6
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro GRAINEXTRACT 3-4
BLEND_INIT
%
1
,
6
,
%
4
pxor
m4
,
m4
%if %0 == 4
; 16 bit
VBROADCASTI128
m5
,
[
pd_32768
]
%else
VBROADCASTI128
m5
,
[
pw_128
]
%endif
.nextrow:
mov
xq
,
widthq
.loop:
movu
m1
,
[
topq
+
xq
]
movu
m3
,
[
bottomq
+
xq
]
punpcklbw
m0
,
m1
,
m4
punpckhbw
m1
,
m4
punpcklbw
m2
,
m3
,
m4
punpckhbw
m3
,
m4
paddw
m0
,
m5
paddw
m1
,
m5
psubw
m0
,
m2
psubw
m1
,
m3
punpckl
%
2
%
3
m0
,
m1
,
m4
punpckh
%
2
%
3
m1
,
m4
punpckl
%
2
%
3
m2
,
m3
,
m4
punpckh
%
2
%
3
m3
,
m4
padd
%
3
m0
,
m5
padd
%
3
m1
,
m5
psub
%
3
m0
,
m2
psub
%
3
m1
,
m3
packus
%
3
%
2
m0
,
m1
packuswb
m0
,
m1
mova
[
ds
tq
+
xq
],
m0
add
xq
,
mmsize
jl
.loop
...
...
@@ -172,8 +181,9 @@ BLEND_INIT screen, 7
BLEND_END
%endmacro
%macro AVERAGE 0
BLEND_INIT
average
,
3
;%1 name, %2 (b or w), %3 (set if 16 bit)
%macro AVERAGE 2-3
BLEND_INIT
%
1
,
3
,
%
3
pcmpeqb
m2
,
m2
.nextrow:
...
...
@@ -184,7 +194,7 @@ BLEND_INIT average, 3
movu
m1
,
[
bottomq
+
xq
]
pxor
m0
,
m2
pxor
m1
,
m2
pavg
b
m0
,
m1
pavg
%
2
m0
,
m1
pxor
m0
,
m2
mova
[
ds
tq
+
xq
],
m0
add
xq
,
mmsize
...
...
@@ -192,29 +202,34 @@ BLEND_INIT average, 3
BLEND_END
%endmacro
%macro GRAINMERGE
0
BLEND_INIT
grainmerge
,
6
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro GRAINMERGE
3-4
BLEND_INIT
%
1
,
6
,
%
4
pxor
m4
,
m4
%if %0 == 4
; 16 bit
VBROADCASTI128
m5
,
[
pd_32768
]
%else
VBROADCASTI128
m5
,
[
pw_128
]
%endif
.nextrow:
mov
xq
,
widthq
.loop:
movu
m1
,
[
topq
+
xq
]
movu
m3
,
[
bottomq
+
xq
]
punpcklbw
m0
,
m1
,
m4
punpckhbw
m1
,
m4
punpcklbw
m2
,
m3
,
m4
punpckhbw
m3
,
m4
paddw
m0
,
m2
paddw
m1
,
m3
psubw
m0
,
m5
psubw
m1
,
m5
punpckl
%
2
%
3
m0
,
m1
,
m4
punpckh
%
2
%
3
m1
,
m4
punpckl
%
2
%
3
m2
,
m3
,
m4
punpckh
%
2
%
3
m3
,
m4
padd
%
3
m0
,
m2
padd
%
3
m1
,
m3
psub
%
3
m0
,
m5
psub
%
3
m1
,
m5
packus
%
3
%
2
m0
,
m1
packuswb
m0
,
m1
mova
[
ds
tq
+
xq
],
m0
add
xq
,
mmsize
jl
.loop
...
...
@@ -324,52 +339,73 @@ BLEND_INIT %1, 5, %4
BLEND_END
%endmacro
%macro BLEND_ABS 0
BLEND_INIT
extremity
,
8
; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
%macro EXTREMITY 3-4
BLEND_INIT
%
1
,
8
,
%
4
pxor
m2
,
m2
%if %0 == 4
; 16 bit
VBROADCASTI128
m4
,
[
pd_65535
]
%else
VBROADCASTI128
m4
,
[
pw_255
]
%endif
.nextrow:
mov
xq
,
widthq
.loop:
movu
m0
,
[
topq
+
xq
]
movu
m1
,
[
bottomq
+
xq
]
punpckhbw
m5
,
m0
,
m2
punpcklbw
m0
,
m2
punpckhbw
m6
,
m1
,
m2
punpcklbw
m1
,
m2
psubw
m3
,
m4
,
m0
psubw
m7
,
m4
,
m5
psubw
m3
,
m1
psubw
m7
,
m6
punpckh
%
2
%
3
m5
,
m0
,
m2
punpckl
%
2
%
3
m0
,
m2
punpckh
%
2
%
3
m6
,
m1
,
m2
punpckl
%
2
%
3
m1
,
m2
psub
%
3
m3
,
m4
,
m0
psub
%
3
m7
,
m4
,
m5
psub
%
3
m3
,
m1
psub
%
3
m7
,
m6
%if %0 == 4
; 16 bit
pabsd
m3
,
m3
pabsd
m7
,
m7
%else
ABS2
m3
,
m7
,
m1
,
m6
packuswb
m3
,
m7
%endif
packus
%
3
%
2
m3
,
m7
mova
[
ds
tq
+
xq
],
m3
add
xq
,
mmsize
jl
.loop
BLEND_END
%endmacro
BLEND_INIT
negation
,
8
%macro NEGATION 3-4
BLEND_INIT
%
1
,
8
,
%
4
pxor
m2
,
m2
%if %0 == 4
; 16 bit
VBROADCASTI128
m4
,
[
pd_65535
]
%else
VBROADCASTI128
m4
,
[
pw_255
]
%endif
.nextrow:
mov
xq
,
widthq
.loop:
movu
m0
,
[
topq
+
xq
]
movu
m1
,
[
bottomq
+
xq
]
punpckhbw
m5
,
m0
,
m2
punpcklbw
m0
,
m2
punpckhbw
m6
,
m1
,
m2
punpcklbw
m1
,
m2
psubw
m3
,
m4
,
m0
psubw
m7
,
m4
,
m5
psubw
m3
,
m1
psubw
m7
,
m6
punpckh
%
2
%
3
m5
,
m0
,
m2
punpckl
%
2
%
3
m0
,
m2
punpckh
%
2
%
3
m6
,
m1
,
m2
punpckl
%
2
%
3
m1
,
m2
psub
%
3
m3
,
m4
,
m0
psub
%
3
m7
,
m4
,
m5
psub
%
3
m3
,
m1
psub
%
3
m7
,
m6
%if %0 == 4
; 16 bit
pabsd
m3
,
m3
pabsd
m7
,
m7
%else
ABS2
m3
,
m7
,
m1
,
m6
psubw
m0
,
m4
,
m3
psubw
m1
,
m4
,
m7
packuswb
m0
,
m1
%endif
psub
%
3
m0
,
m4
,
m3
psub
%
3
m1
,
m4
,
m7
packus
%
3
%
2
m0
,
m1
mova
[
ds
tq
+
xq
],
m0
add
xq
,
mmsize
jl
.loop
...
...
@@ -384,17 +420,17 @@ BLEND_SIMPLE addition, addusb
BLEND_SIMPLE
subtract
,
subusb
BLEND_SIMPLE
darken
,
minub
BLEND_SIMPLE
lighten
,
maxub
GRAINEXTRACT
GRAINEXTRACT
grainextract
,
b
,
w
BLEND_MULTIPLY
BLEND_SCREEN
AVERAGE
GRAINMERGE
AVERAGE
average
,
b
GRAINMERGE
grainmerge
,
b
,
w
HARDMIX
PHOENIX
phoenix
,
b
DIFFERENCE
di
fference
,
b
,
w
DIVIDE
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
%if ARCH_X86_64
BLEND_SIMPLE
addition_16
,
addusw
,
1
...
...
@@ -402,18 +438,24 @@ BLEND_SIMPLE and_16, and, 1
BLEND_SIMPLE
or_16
,
or
,
1
BLEND_SIMPLE
subtract_16
,
subusw
,
1
BLEND_SIMPLE
xor_16
,
xor
,
1
AVERAGE
average_16
,
w
,
1
%endif
INIT_XMM
ss
se3
DIFFERENCE
di
fference
,
b
,
w
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
INIT_XMM
ss
e4
%if ARCH_X86_64
BLEND_SIMPLE
darken_16
,
minuw
,
1
BLEND_SIMPLE
lighten_16
,
maxuw
,
1
GRAINEXTRACT
grainextract_16
,
w
,
d
,
1
GRAINMERGE
grainmerge_16
,
w
,
d
,
1
PHOENIX
phoenix_16
,
w
,
1
DIFFERENCE
di
fference_16
,
w
,
d
,
1
EXTREMITY
extremity_16
,
w
,
d
,
1
NEGATION
negation_16
,
w
,
d
,
1
%endif
%if HAVE_AVX2_EXTERNAL
...
...
@@ -425,16 +467,17 @@ BLEND_SIMPLE addition, addusb
BLEND_SIMPLE
subtract
,
subusb
BLEND_SIMPLE
darken
,
minub
BLEND_SIMPLE
lighten
,
maxub
GRAINEXTRACT
GRAINEXTRACT
grainextract
,
b
,
w
BLEND_MULTIPLY
BLEND_SCREEN
AVERAGE
GRAINMERGE
AVERAGE
average
,
b
GRAINMERGE
grainmerge
,
b
,
w
HARDMIX
PHOENIX
phoenix
,
b
DIFFERENCE
di
fference
,
b
,
w
BLEND_ABS
EXTREMITY
extremity
,
b
,
w
NEGATION
negation
,
b
,
w
%if ARCH_X86_64
BLEND_SIMPLE
addition_16
,
addusw
,
1
...
...
@@ -444,7 +487,12 @@ BLEND_SIMPLE lighten_16, maxuw, 1
BLEND_SIMPLE
or_16
,
or
,
1
BLEND_SIMPLE
subtract_16
,
subusw
,
1
BLEND_SIMPLE
xor_16
,
xor
,
1
GRAINEXTRACT
grainextract_16
,
w
,
d
,
1
AVERAGE
average_16
,
w
,
1
GRAINMERGE
grainmerge_16
,
w
,
d
,
1
PHOENIX
phoenix_16
,
w
,
1
DIFFERENCE
di
fference_16
,
w
,
d
,
1
EXTREMITY
extremity_16
,
w
,
d
,
1
NEGATION
negation_16
,
w
,
d
,
1
%endif
%endif
This diff is collapsed.
Click to expand it.
libavfilter/x86/vf_blend_init.c
+
20
−
0
View file @
f3df42e8
...
...
@@ -72,12 +72,22 @@ BLEND_FUNC(negation, avx2)
#if ARCH_X86_64
BLEND_FUNC
(
addition_16
,
sse2
)
BLEND_FUNC
(
addition_16
,
avx2
)
BLEND_FUNC
(
grainmerge_16
,
sse4
)
BLEND_FUNC
(
grainmerge_16
,
avx2
)
BLEND_FUNC
(
average_16
,
sse2
)
BLEND_FUNC
(
average_16
,
avx2
)
BLEND_FUNC
(
and_16
,
sse2
)
BLEND_FUNC
(
and_16
,
avx2
)
BLEND_FUNC
(
darken_16
,
sse4
)
BLEND_FUNC
(
darken_16
,
avx2
)
BLEND_FUNC
(
grainextract_16
,
sse4
)
BLEND_FUNC
(
grainextract_16
,
avx2
)
BLEND_FUNC
(
difference_16
,
sse4
)
BLEND_FUNC
(
difference_16
,
avx2
)
BLEND_FUNC
(
extremity_16
,
sse4
)
BLEND_FUNC
(
extremity_16
,
avx2
)
BLEND_FUNC
(
negation_16
,
sse4
)
BLEND_FUNC
(
negation_16
,
avx2
)
BLEND_FUNC
(
lighten_16
,
sse4
)
BLEND_FUNC
(
lighten_16
,
avx2
)
BLEND_FUNC
(
or_16
,
sse2
)
...
...
@@ -152,6 +162,7 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
switch
(
param
->
mode
)
{
case
BLEND_ADDITION
:
param
->
blend
=
ff_blend_addition_16_sse2
;
break
;
case
BLEND_AND
:
param
->
blend
=
ff_blend_and_16_sse2
;
break
;
case
BLEND_AVERAGE
:
param
->
blend
=
ff_blend_average_16_sse2
;
break
;
case
BLEND_OR
:
param
->
blend
=
ff_blend_or_16_sse2
;
break
;
case
BLEND_SUBTRACT
:
param
->
blend
=
ff_blend_subtract_16_sse2
;
break
;
case
BLEND_XOR
:
param
->
blend
=
ff_blend_xor_16_sse2
;
break
;
...
...
@@ -159,8 +170,12 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
}
if
(
EXTERNAL_SSE4
(
cpu_flags
)
&&
param
->
opacity
==
1
)
{
switch
(
param
->
mode
)
{
case
BLEND_GRAINMERGE
:
param
->
blend
=
ff_blend_grainmerge_16_sse4
;
break
;
case
BLEND_DARKEN
:
param
->
blend
=
ff_blend_darken_16_sse4
;
break
;
case
BLEND_GRAINEXTRACT
:
param
->
blend
=
ff_blend_grainextract_16_sse4
;
break
;
case
BLEND_DIFFERENCE
:
param
->
blend
=
ff_blend_difference_16_sse4
;
break
;
case
BLEND_EXTREMITY
:
param
->
blend
=
ff_blend_extremity_16_sse4
;
break
;
case
BLEND_NEGATION
:
param
->
blend
=
ff_blend_negation_16_sse4
;
break
;
case
BLEND_LIGHTEN
:
param
->
blend
=
ff_blend_lighten_16_sse4
;
break
;
case
BLEND_PHOENIX
:
param
->
blend
=
ff_blend_phoenix_16_sse4
;
break
;
}
...
...
@@ -168,9 +183,14 @@ av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
)
&&
param
->
opacity
==
1
)
{
switch
(
param
->
mode
)
{
case
BLEND_ADDITION
:
param
->
blend
=
ff_blend_addition_16_avx2
;
break
;
case
BLEND_GRAINMERGE
:
param
->
blend
=
ff_blend_grainmerge_16_avx2
;
break
;
case
BLEND_AND
:
param
->
blend
=
ff_blend_and_16_avx2
;
break
;
case
BLEND_AVERAGE
:
param
->
blend
=
ff_blend_average_16_avx2
;
break
;
case
BLEND_DARKEN
:
param
->
blend
=
ff_blend_darken_16_avx2
;
break
;
case
BLEND_GRAINEXTRACT
:
param
->
blend
=
ff_blend_grainextract_16_avx2
;
break
;
case
BLEND_DIFFERENCE
:
param
->
blend
=
ff_blend_difference_16_avx2
;
break
;
case
BLEND_EXTREMITY
:
param
->
blend
=
ff_blend_extremity_16_avx2
;
break
;
case
BLEND_NEGATION
:
param
->
blend
=
ff_blend_negation_16_avx2
;
break
;
case
BLEND_LIGHTEN
:
param
->
blend
=
ff_blend_lighten_16_avx2
;
break
;
case
BLEND_OR
:
param
->
blend
=
ff_blend_or_16_avx2
;
break
;
case
BLEND_PHOENIX
:
param
->
blend
=
ff_blend_phoenix_16_avx2
;
break
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment