Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
4353c350
Commit
4353c350
authored
7 years ago
by
Martin Vignali
Browse files
Options
Downloads
Patches
Plain Diff
avcodec/x86/lossless_videodsp : add avx2 version for add_left_pred
parent
cfbcea1c
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libavcodec/x86/lossless_videodsp.asm
+41
-22
41 additions, 22 deletions
libavcodec/x86/lossless_videodsp.asm
libavcodec/x86/lossless_videodsp_init.c
+3
-0
3 additions, 0 deletions
libavcodec/x86/lossless_videodsp_init.c
with
44 additions
and
22 deletions
libavcodec/x86/lossless_videodsp.asm
+
41
−
22
View file @
4353c350
...
...
@@ -114,40 +114,54 @@ MEDIAN_PRED
add
ds
tq
,
wq
neg
wq
%%.loop:
pshufb
xm0
,
xm5
%if %2
mova
m1
,
[
srcq
+
wq
]
%else
movu
m1
,
[
srcq
+
wq
]
%endif
mova
m2
,
m1
psllw
m1
,
8
psllw
m2
,
m1
,
8
paddb
m1
,
m2
mova
m2
,
m1
pshufb
m1
,
m3
pshufb
m2
,
m1
,
m3
paddb
m1
,
m2
pshufb
m0
,
m5
mova
m2
,
m1
pshufb
m1
,
m4
pshufb
m2
,
m1
,
m4
paddb
m1
,
m2
%if mmsize == 16
mova
m2
,
m1
pshufb
m1
,
m6
%if mmsize >= 16
pshufb
m2
,
m1
,
m6
paddb
m1
,
m2
%endif
paddb
m0
,
m1
paddb
x
m0
,
x
m1
%if %1
mova
[
ds
tq
+
wq
],
m0
mova
[
ds
tq
+
wq
],
x
m0
%else
movq
[
ds
tq
+
wq
],
m0
movhps
[
ds
tq
+
wq
+
8
],
m0
movq
[
ds
tq
+
wq
],
xm0
movhps
[
ds
tq
+
wq
+
8
],
xm0
%endif
%if mmsize == 32
vextracti128
xm2
,
m1
,
1
; get second lane of the ymm
pshufb
xm0
,
xm5
; set alls val to last val of the first lane
paddb
xm0
,
xm2
;store val
%if %1
mova
[
ds
tq
+
wq
+
16
],
xm0
%else
;
movq
[
ds
tq
+
wq
+
16
],
xm0
movhps
[
ds
tq
+
wq
+
16
+
8
],
xm0
%endif
%endif
add
wq
,
mmsize
jl
%%
.loop
%if mmsize == 32
mov
eax
,
[
ds
tq
-
1
]
and
eax
,
0xff
%else
;
mov
eax
,
mmsize
-
1
sub
eax
,
wd
movd
m1
,
eax
pshufb
m0
,
m1
movd
eax
,
m0
%endif
RET
%endmacro
...
...
@@ -166,15 +180,15 @@ cglobal add_left_pred, 3,3,7, dst, src, w, left
%macro ADD_LEFT_PRED_UNALIGNED 0
cglobal
add_left_pred_unaligned
,
3
,
3
,
7
,
ds
t
,
src
,
w
,
left
mova
m5
,
[
pb_15
]
mova
m6
,
[
pb_zzzzzzzz77777777
]
mova
m4
,
[
pb_zzzz3333zzzzbbbb
]
mova
m3
,
[
pb_zz11zz55zz99zzdd
]
movd
m0
,
leftm
pslldq
m0
,
15
test
srcq
,
1
5
mova
x
m5
,
[
pb_15
]
VBROADCASTI128
m6
,
[
pb_zzzzzzzz77777777
]
VBROADCASTI128
m4
,
[
pb_zzzz3333zzzzbbbb
]
VBROADCASTI128
m3
,
[
pb_zz11zz55zz99zzdd
]
movd
x
m0
,
leftm
pslldq
x
m0
,
15
test
srcq
,
mmsize
-
1
jnz
.src_unaligned
test
ds
tq
,
1
5
test
ds
tq
,
mmsize
-
1
jnz
.dst_unaligned
ADD_LEFT_LOOP
1
,
1
.dst_unaligned:
...
...
@@ -186,6 +200,11 @@ cglobal add_left_pred_unaligned, 3,3,7, dst, src, w, left
INIT_XMM
ss
se3
ADD_LEFT_PRED_UNALIGNED
%if HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
ADD_LEFT_PRED_UNALIGNED
%endif
;------------------------------------------------------------------------------
; void ff_add_bytes(uint8_t *dst, uint8_t *src, ptrdiff_t w);
;------------------------------------------------------------------------------
...
...
This diff is collapsed.
Click to expand it.
libavcodec/x86/lossless_videodsp_init.c
+
3
−
0
View file @
4353c350
...
...
@@ -38,6 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_unaligned_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_unaligned_avx2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
ptrdiff_t
w
,
int
left
);
int
ff_add_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
int
ff_add_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
ptrdiff_t
w
,
unsigned
acc
);
...
...
@@ -118,5 +120,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
}
if
(
EXTERNAL_AVX2_FAST
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_avx2
;
c
->
add_left_pred
=
ff_add_left_pred_unaligned_avx2
;
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment