Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
2dd2f716
Commit
2dd2f716
authored
14 years ago
by
Ronald S. Bultje
Browse files
Options
Downloads
Patches
Plain Diff
MMX idct_add for VP8.
Originally committed as revision 23886 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
29e71937
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
libavcodec/x86/vp8dsp-init.c
+2
-0
2 additions, 0 deletions
libavcodec/x86/vp8dsp-init.c
libavcodec/x86/vp8dsp.asm
+89
-0
89 additions, 0 deletions
libavcodec/x86/vp8dsp.asm
libavcodec/x86/x86util.asm
+15
-0
15 additions, 0 deletions
libavcodec/x86/x86util.asm
with
106 additions
and
0 deletions
libavcodec/x86/vp8dsp-init.c
+
2
−
0
View file @
2dd2f716
...
...
@@ -196,6 +196,7 @@ HVBILIN(ssse3, 8, 16, 16)
extern
void
ff_vp8_idct_dc_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
extern
void
ff_vp8_idct_dc_add_sse4
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
extern
void
ff_vp8_luma_dc_wht_mmxext
(
DCTELEM
block
[
4
][
4
][
16
],
DCTELEM
dc
[
16
]);
extern
void
ff_vp8_idct_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
#endif
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
...
...
@@ -229,6 +230,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
#if HAVE_YASM
if
(
mm_flags
&
FF_MM_MMX
)
{
c
->
vp8_idct_dc_add
=
ff_vp8_idct_dc_add_mmx
;
c
->
vp8_idct_add
=
ff_vp8_idct_add_mmx
;
c
->
put_vp8_epel_pixels_tab
[
0
][
0
][
0
]
=
c
->
put_vp8_bilinear_pixels_tab
[
0
][
0
][
0
]
=
ff_put_vp8_pixels16_mmx
;
c
->
put_vp8_epel_pixels_tab
[
1
][
0
][
0
]
=
...
...
This diff is collapsed.
Click to expand it.
libavcodec/x86/vp8dsp.asm
+
89
−
0
View file @
2dd2f716
...
...
@@ -142,6 +142,9 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
filter_h6_shuf2:
db
1
,
2
,
2
,
3
,
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
filter_h6_shuf3:
db
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
,
9
,
10
,
10
,
11
pw_20091:
times
4
dw
20091
pw_17734:
times
4
dw
17734
cextern
pw_3
cextern
pw_4
cextern
pw_64
...
...
@@ -923,6 +926,92 @@ cglobal vp8_idct_dc_add_sse4, 3, 3, 6
pextrd
[
r1
+
r2
],
xmm2
,
3
RET
;-----------------------------------------------------------------------------
; void vp8_idct_add_<opt>(uint8_t *dst, DCTELEM block[16], int stride);
;-----------------------------------------------------------------------------
; calculate %1=%2+%1; %2=%2-%1, with %3=temp register
%macro SUMSUB 3
mova
%
3
,
%
1
paddw
%
1
,
%
2
psubw
%
2
,
%
3
%endmacro
; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
; this macro assumes that m6/m7 have words for 20091/17734 loaded
%macro VP8_MULTIPLY_SUMSUB 4
mova
%
3
,
%
1
mova
%
4
,
%
2
pmulhw
%
3
,
m6
;20091(1)
pmulhw
%
4
,
m6
;20091(2)
paddw
%
3
,
%
1
paddw
%
4
,
%
2
psllw
%
1
,
1
psllw
%
2
,
1
pmulhw
%
1
,
m7
;35468(1)
pmulhw
%
2
,
m7
;35468(2)
psubw
%
1
,
%
4
paddw
%
2
,
%
3
%endmacro
; calculate x0=%1+%3; x1=%1-%3
; x2=mul_35468(%2)-mul_20091(%4); x3=mul_20091(%2)+mul_35468(%4)
; %1=x0+x3 (tmp0); %2=x1+x2 (tmp1); %3=x1-x2 (tmp2); %4=x0-x3 (tmp3)
; %5/%6 are temporary registers
; we assume m6/m7 have constant words 20091/17734 loaded in them
%macro VP8_IDCT_TRANSFORM4x4_1D 6
SUMSUB_BA
m
%
3
,
m
%
1
,
m
%
5
;t0, t1
VP8_MULTIPLY_SUMSUB
m
%
2
,
m
%
4
,
m
%
5
,
m
%
6
;t2, t3
SUMSUB_BA
m
%
4
,
m
%
3
,
m
%
5
;tmp0, tmp3
SUMSUB_BA
m
%
2
,
m
%
1
,
m
%
5
;tmp1, tmp2
SWAP
%
4
,
%
1
SWAP
%
4
,
%
3
%endmacro
; transpose a 4x4 table
%macro TRANSPOSE4x4 5
; output in %1/%4/%5/%3
mova
m
%
5
,
m
%
1
punpcklwd
m
%
1
,
m
%
2
punpckhwd
m
%
5
,
m
%
2
mova
m
%
2
,
m
%
3
punpcklwd
m
%
3
,
m
%
4
punpckhwd
m
%
2
,
m
%
4
mova
m
%
4
,
m
%
1
punpckldq
m
%
1
,
m
%
3
;col0
punpckhdq
m
%
4
,
m
%
3
;col1
mova
m
%
3
,
m
%
5
punpckldq
m
%
5
,
m
%
2
;col2
punpckhdq
m
%
3
,
m
%
2
;col3
SWAP
%
4
,
%
2
SWAP
%
4
,
%
5
SWAP
%
4
,
%
3
%endmacro
INIT_MMX
cglobal
vp8_idct_add_mmx
,
3
,
3
; load block data
movq
m0
,
[
r1
]
movq
m1
,
[
r1
+
8
]
movq
m2
,
[
r1
+
16
]
movq
m3
,
[
r1
+
24
]
movq
m6
,
[
pw_20091
]
movq
m7
,
[
pw_17734
]
; actual IDCT
VP8_IDCT_TRANSFORM4x4_1D
0
,
1
,
2
,
3
,
4
,
5
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
paddw
m0
,
[
pw_4
]
VP8_IDCT_TRANSFORM4x4_1D
0
,
1
,
2
,
3
,
4
,
5
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
; store
pxor
m4
,
m4
lea
r1
,
[
r0
+
2
*
r2
]
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
m4
,
3
,
r0
,
r2
STORE_DIFFx2
m2
,
m3
,
m6
,
m7
,
m4
,
3
,
r1
,
r2
RET
;-----------------------------------------------------------------------------
; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
;-----------------------------------------------------------------------------
...
...
This diff is collapsed.
Click to expand it.
libavcodec/x86/x86util.asm
+
15
−
0
View file @
2dd2f716
...
...
@@ -365,3 +365,18 @@
packuswb
%
1
,
%
1
movh
%
4
,
%
1
%endmacro
%macro STORE_DIFFx2 8
; add1, add2, reg1, reg2, zero, shift, source, stride
movh
%
3
,
[
%
7
]
movh
%
4
,
[
%
7
+%
8
]
punpcklbw
%
3
,
%
5
punpcklbw
%
4
,
%
5
psraw
%
1
,
%
6
psraw
%
2
,
%
6
paddw
%
3
,
%
1
paddw
%
4
,
%
2
packuswb
%
3
,
%
5
packuswb
%
4
,
%
5
movh
[
%
7
],
%
3
movh
[
%
7
+%
8
],
%
4
%endmacro
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment