Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
f1816481
Commit
f1816481
authored
7 years ago
by
Martin Vignali
Browse files
Options
Downloads
Patches
Plain Diff
avfilter/x86/vf_hflip : add avx2 version for hflip_byte and hflip_short
parent
a4a4179e
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libavfilter/x86/vf_hflip.asm
+11
-1
11 additions, 1 deletion
libavfilter/x86/vf_hflip.asm
libavfilter/x86/vf_hflip_init.c
+16
-4
16 additions, 4 deletions
libavfilter/x86/vf_hflip_init.c
with
27 additions
and
5 deletions
libavfilter/x86/vf_hflip.asm
+
11
−
1
View file @
f1816481
...
@@ -32,7 +32,7 @@ SECTION .text
...
@@ -32,7 +32,7 @@ SECTION .text
;%1 byte or short, %2 b or w, %3 size in byte (1 for byte, 2 for short)
;%1 byte or short, %2 b or w, %3 size in byte (1 for byte, 2 for short)
%macro HFLIP 3
%macro HFLIP 3
cglobal
hflip_
%
1
,
3
,
5
,
3
,
src
,
ds
t
,
w
,
r
,
x
cglobal
hflip_
%
1
,
3
,
5
,
3
,
src
,
ds
t
,
w
,
r
,
x
mova
m0
,
[
pb_flip_
%
1
]
VBROADCASTI128
m0
,
[
pb_flip_
%
1
]
xor
xq
,
xq
xor
xq
,
xq
%if %3 == 1
%if %3 == 1
movsxdifnidn
wq
,
wd
movsxdifnidn
wq
,
wd
...
@@ -47,8 +47,13 @@ cglobal hflip_%1, 3, 5, 3, src, dst, w, r, x
...
@@ -47,8 +47,13 @@ cglobal hflip_%1, 3, 5, 3, src, dst, w, r, x
.loop0:
.loop0:
neg
xq
neg
xq
%if mmsize == 32
vpermq
m1
,
[
srcq
+
xq
-
mmsize
+
%
3
],
0x4e
; flip each lane at load
vpermq
m2
,
[
srcq
+
xq
-
2
*
mmsize
+
%
3
],
0x4e
; flip each lane at load
%else
movu
m1
,
[
srcq
+
xq
-
mmsize
+
%
3
]
movu
m1
,
[
srcq
+
xq
-
mmsize
+
%
3
]
movu
m2
,
[
srcq
+
xq
-
2
*
mmsize
+
%
3
]
movu
m2
,
[
srcq
+
xq
-
2
*
mmsize
+
%
3
]
%endif
pshufb
m1
,
m0
pshufb
m1
,
m0
pshufb
m2
,
m0
pshufb
m2
,
m0
neg
xq
neg
xq
...
@@ -78,3 +83,8 @@ INIT_XMM ssse3
...
@@ -78,3 +83,8 @@ INIT_XMM ssse3
HFLIP
byte
,
b
,
1
HFLIP
byte
,
b
,
1
HFLIP
short
,
w
,
2
HFLIP
short
,
w
,
2
%if HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
HFLIP
byte
,
b
,
1
HFLIP
short
,
w
,
2
%endif
This diff is collapsed.
Click to expand it.
libavfilter/x86/vf_hflip_init.c
+
16
−
4
View file @
f1816481
...
@@ -24,7 +24,9 @@
...
@@ -24,7 +24,9 @@
#include
"libavfilter/hflip.h"
#include
"libavfilter/hflip.h"
void
ff_hflip_byte_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
);
void
ff_hflip_byte_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
);
void
ff_hflip_byte_avx2
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
);
void
ff_hflip_short_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
);
void
ff_hflip_short_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
);
void
ff_hflip_short_avx2
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
w
);
av_cold
void
ff_hflip_init_x86
(
FlipContext
*
s
,
int
step
[
4
],
int
nb_planes
)
av_cold
void
ff_hflip_init_x86
(
FlipContext
*
s
,
int
step
[
4
],
int
nb_planes
)
{
{
...
@@ -32,10 +34,20 @@ av_cold void ff_hflip_init_x86(FlipContext *s, int step[4], int nb_planes)
...
@@ -32,10 +34,20 @@ av_cold void ff_hflip_init_x86(FlipContext *s, int step[4], int nb_planes)
int
i
;
int
i
;
for
(
i
=
0
;
i
<
nb_planes
;
i
++
)
{
for
(
i
=
0
;
i
<
nb_planes
;
i
++
)
{
if
(
EXTERNAL_SSSE3
(
cpu_flags
)
&&
step
[
i
]
==
1
)
{
if
(
step
[
i
]
==
1
)
{
s
->
flip_line
[
i
]
=
ff_hflip_byte_ssse3
;
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
}
else
if
(
EXTERNAL_SSSE3
(
cpu_flags
)
&&
step
[
i
]
==
2
)
{
s
->
flip_line
[
i
]
=
ff_hflip_byte_ssse3
;
s
->
flip_line
[
i
]
=
ff_hflip_short_ssse3
;
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
s
->
flip_line
[
i
]
=
ff_hflip_byte_avx2
;
}
}
else
if
(
step
[
i
]
==
2
)
{
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
s
->
flip_line
[
i
]
=
ff_hflip_short_ssse3
;
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
s
->
flip_line
[
i
]
=
ff_hflip_short_avx2
;
}
}
}
}
}
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment