Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
15ce1601
Commit
15ce1601
authored
10 years ago
by
Christophe Gisquet
Committed by
Michael Niedermayer
10 years ago
Browse files
Options
Downloads
Patches
Plain Diff
x86: xvid_idct: SSE2 merged add version
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
decd5193
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libavcodec/x86/xvididct.asm
+89
-3
89 additions, 3 deletions
libavcodec/x86/xvididct.asm
libavcodec/x86/xvididct_init.c
+2
-7
2 additions, 7 deletions
libavcodec/x86/xvididct_init.c
with
91 additions
and
10 deletions
libavcodec/x86/xvididct.asm
+
89
−
3
View file @
15ce1601
...
@@ -384,6 +384,12 @@ SECTION .text
...
@@ -384,6 +384,12 @@ SECTION .text
; Must now load args as gprs are no longer used for masks
; Must now load args as gprs are no longer used for masks
; DEST is set to where address of dest was loaded
; DEST is set to where address of dest was loaded
%if ARCH_X86_32
%if ARCH_X86_32
%if %2 == 2
; Not enough xmms, store
movdqa
[
%
1
+
1
*
16
],
TAN3
movdqa
[
%
1
+
2
*
16
],
xmm3
movdqa
[
%
1
+
5
*
16
],
REG0
movdqa
[
%
1
+
6
*
16
],
xmm5
%endif
%xdefine DEST r2q
; BLOCK is r0, stride r1
%xdefine DEST r2q
; BLOCK is r0, stride r1
movifnidn
DEST
,
destm
movifnidn
DEST
,
destm
movifnidn
strideq
,
stridem
movifnidn
strideq
,
stridem
...
@@ -397,8 +403,6 @@ SECTION .text
...
@@ -397,8 +403,6 @@ SECTION .text
movq
[
DEST
+
strideq
],
TAN3
movq
[
DEST
+
strideq
],
TAN3
movhps
[
DEST
+
2
*
strideq
],
TAN3
movhps
[
DEST
+
2
*
strideq
],
TAN3
; REG0 and TAN3 are now available (and likely used in second half)
; REG0 and TAN3 are now available (and likely used in second half)
%else
%warning Unimplemented
%endif
%endif
%endif
%endif
%endmacro
%endmacro
...
@@ -427,7 +431,88 @@ SECTION .text
...
@@ -427,7 +431,88 @@ SECTION .text
movq
[
DEST
+
2
*
strideq
],
xmm5
movq
[
DEST
+
2
*
strideq
],
xmm5
movhps
[
DEST
+
strideq
],
xmm5
movhps
[
DEST
+
strideq
],
xmm5
%elif %2 == 2
%elif %2 == 2
%warning Unimplemented
pxor
xmm0
,
xmm0
%if ARCH_X86_32
; free: m3 REG0=m4 m5
; input: m1, m7, m2, m6
movq
xmm3
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
punpcklbw
xmm3
,
xmm0
punpcklbw
xmm4
,
xmm0
paddsw
xmm3
,
%
3
paddsw
xmm4
,
[
%
1
+
1
*
16
]
movq
%
3
,
[
DEST
+
2
*
strideq
]
movq
xmm5
,
[
DEST
+
r3q
]
punpcklbw
%
3
,
xmm0
punpcklbw
xmm5
,
xmm0
paddsw
%
3
,
[
%
1
+
2
*
16
]
paddsw
xmm5
,
%
5
packuswb
xmm3
,
xmm4
packuswb
%
3
,
xmm5
movq
[
DEST
+
0
*
strideq
],
xmm3
movhps
[
DEST
+
1
*
strideq
],
xmm3
movq
[
DEST
+
2
*
strideq
],
%
3
movhps
[
DEST
+
r3q
],
%
3
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
xmm3
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
%
3
,
[
DEST
+
2
*
strideq
]
movq
xmm5
,
[
DEST
+
r3q
]
punpcklbw
xmm3
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
%
3
,
xmm0
punpcklbw
xmm5
,
xmm0
paddsw
xmm3
,
%
6
paddsw
xmm4
,
[
%
1
+
5
*
16
]
paddsw
%
3
,
[
%
1
+
6
*
16
]
paddsw
xmm5
,
%
4
packuswb
xmm3
,
xmm4
packuswb
%
3
,
xmm5
movq
[
DEST
+
0
*
strideq
],
xmm3
movhps
[
DEST
+
1
*
strideq
],
xmm3
movq
[
DEST
+
2
*
strideq
],
%
3
movhps
[
DEST
+
r3q
],
%
3
%else
; l1:TAN3=m13 l2:m3 l5:REG0=m8 l6=m5
; input: m1, m7/SREG2=m9, TAN1=m14, REG4=m10
movq
xmm2
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
xmm12
,
[
DEST
+
2
*
strideq
]
movq
xmm11
,
[
DEST
+
r3q
]
punpcklbw
xmm2
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
xmm12
,
xmm0
punpcklbw
xmm11
,
xmm0
paddsw
xmm2
,
%
3
paddsw
xmm4
,
TAN3
paddsw
xmm12
,
xmm3
paddsw
xmm11
,
%
5
packuswb
xmm2
,
xmm4
packuswb
xmm12
,
xmm11
movq
[
DEST
+
0
*
strideq
],
xmm2
movhps
[
DEST
+
1
*
strideq
],
xmm2
movq
[
DEST
+
2
*
strideq
],
xmm12
movhps
[
DEST
+
r3q
],
xmm12
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
xmm2
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
xmm12
,
[
DEST
+
2
*
strideq
]
movq
xmm11
,
[
DEST
+
r3q
]
punpcklbw
xmm2
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
xmm12
,
xmm0
punpcklbw
xmm11
,
xmm0
paddsw
xmm2
,
%
6
paddsw
xmm4
,
REG0
paddsw
xmm12
,
xmm5
paddsw
xmm11
,
%
4
packuswb
xmm2
,
xmm4
packuswb
xmm12
,
xmm11
movq
[
DEST
+
0
*
strideq
],
xmm2
movhps
[
DEST
+
1
*
strideq
],
xmm2
movq
[
DEST
+
2
*
strideq
],
xmm12
movhps
[
DEST
+
r3q
],
xmm12
%endif
%endif
%endif
%endmacro
%endmacro
...
@@ -623,6 +708,7 @@ cglobal xvid_idct_add, 0, NUM_GPRS, 8+7*ARCH_X86_64, dest, stride, block
...
@@ -623,6 +708,7 @@ cglobal xvid_idct_add, 0, NUM_GPRS, 8+7*ARCH_X86_64, dest, stride, block
INIT_XMM
ss
e2
INIT_XMM
ss
e2
IDCT_SSE2
0
IDCT_SSE2
0
IDCT_SSE2
1
IDCT_SSE2
1
IDCT_SSE2
2
%if ARCH_X86_32
%if ARCH_X86_32
...
...
This diff is collapsed.
Click to expand it.
libavcodec/x86/xvididct_init.c
+
2
−
7
View file @
15ce1601
...
@@ -27,12 +27,7 @@
...
@@ -27,12 +27,7 @@
#include
"xvididct.h"
#include
"xvididct.h"
void
ff_xvid_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
void
ff_xvid_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
void
ff_xvid_idct_add_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
static
void
xvid_idct_sse2_add
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
{
ff_xvid_idct_sse2
(
block
);
ff_add_pixels_clamped
(
block
,
dest
,
line_size
);
}
#if ARCH_X86_32
#if ARCH_X86_32
static
void
xvid_idct_mmx_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
static
void
xvid_idct_mmx_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
...
@@ -88,7 +83,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
...
@@ -88,7 +83,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
idct_put
=
ff_xvid_idct_put_sse2
;
c
->
idct_put
=
ff_xvid_idct_put_sse2
;
c
->
idct_add
=
xvid_idct_sse2
_add
;
c
->
idct_add
=
ff_
xvid_idct_
add_
sse2
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment