Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
88307b3e
Commit
88307b3e
authored
8 years ago
by
James Darnley
Browse files
Options
Downloads
Patches
Plain Diff
avcodec/h264: add avx 8-bit 4:2:2 chroma h deblock/loop filter
~1.21x faster (68 vs. 56 cycles) compared with mmxext function
parent
ac096fc8
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libavcodec/x86/h264_deblock.asm
+27
-0
27 additions, 0 deletions
libavcodec/x86/h264_deblock.asm
libavcodec/x86/h264dsp_init.c
+2
-0
2 additions, 0 deletions
libavcodec/x86/h264dsp_init.c
with
29 additions
and
0 deletions
libavcodec/x86/h264_deblock.asm
+
27
−
0
View file @
88307b3e
...
@@ -1163,6 +1163,33 @@ cglobal deblock_h_chroma_8, 5, 7, 8, 0-16, pix_, stride_, alpha_, beta_, tc0_
...
@@ -1163,6 +1163,33 @@ cglobal deblock_h_chroma_8, 5, 7, 8, 0-16, pix_, stride_, alpha_, beta_, tc0_
STORE_8_ROWS
PASS8ROWS
(
pix_q
-
2
,
r5
-
2
,
stride_q
,
r6
)
STORE_8_ROWS
PASS8ROWS
(
pix_q
-
2
,
r5
-
2
,
stride_q
,
r6
)
RET
RET
cglobal
deblock_h_chroma422_8
,
5
,
7
,
8
,
0
-
16
,
pix_
,
stride_
,
al
pha_
,
beta_
,
tc0_
,
CHROMA_H_START_XMM
r5
,
r6
LOAD_8_ROWS
PASS8ROWS
(
pix_q
-
2
,
r5
-
2
,
stride_q
,
r6
)
TRANSPOSE_8x4B_XMM
movq
[
rsp
],
m0
movq
[
rsp
+
8
],
m3
CHROMA_INTER_BODY_XMM
2
movq
m0
,
[
rsp
]
movq
m3
,
[
rsp
+
8
]
TRANSPOSE_4x8B_XMM
STORE_8_ROWS
PASS8ROWS
(
pix_q
-
2
,
r5
-
2
,
stride_q
,
r6
)
lea
pix_q
,
[
pix_q
+
8
*
stride_q
]
lea
r5
,
[
r5
+
8
*
stride_q
]
add
tc0_q
,
2
LOAD_8_ROWS
PASS8ROWS
(
pix_q
-
2
,
r5
-
2
,
stride_q
,
r6
)
TRANSPOSE_8x4B_XMM
movq
[
rsp
],
m0
movq
[
rsp
+
8
],
m3
CHROMA_INTER_BODY_XMM
2
movq
m0
,
[
rsp
]
movq
m3
,
[
rsp
+
8
]
TRANSPOSE_4x8B_XMM
STORE_8_ROWS
PASS8ROWS
(
pix_q
-
2
,
r5
-
2
,
stride_q
,
r6
)
RET
%endmacro
; DEBLOCK_CHROMA_XMM
%endmacro
; DEBLOCK_CHROMA_XMM
DEBLOCK_CHROMA_XMM
avx
DEBLOCK_CHROMA_XMM
avx
...
...
This diff is collapsed.
Click to expand it.
libavcodec/x86/h264dsp_init.c
+
2
−
0
View file @
88307b3e
...
@@ -321,6 +321,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
...
@@ -321,6 +321,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_8_avx
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_8_avx
;
if
(
chroma_format_idc
<=
1
)
{
if
(
chroma_format_idc
<=
1
)
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_avx
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_avx
;
}
else
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma422_8_avx
;
}
}
}
}
}
else
if
(
bit_depth
==
10
)
{
}
else
if
(
bit_depth
==
10
)
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment