Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
edbf0fff
Commit
edbf0fff
authored
7 years ago
by
Alexandra Hájková
Committed by
Martin Storsjö
7 years ago
Browse files
Options
Downloads
Patches
Plain Diff
hevc: Add NEON add_residual for bitdepth 10
Signed-off-by:
Martin Storsjö
<
martin@martin.st
>
parent
81a4cb8e
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libavcodec/arm/hevc_idct.S
+89
-0
89 additions, 0 deletions
libavcodec/arm/hevc_idct.S
libavcodec/arm/hevcdsp_init_arm.c
+13
-0
13 additions, 0 deletions
libavcodec/arm/hevcdsp_init_arm.c
with
102 additions
and
0 deletions
libavcodec/arm/hevc_idct.S
+
89
−
0
View file @
edbf0fff
...
...
@@ -30,6 +30,13 @@ const trans, align=4
.
short
57
,
43
,
25
,
9
endconst
.
macro
clip10
in1
,
in2
,
c1
,
c2
vmax.s16
\
in1
,
\
in1
,
\
c1
vmax.s16
\
in2
,
\
in2
,
\
c1
vmin.s16
\
in1
,
\
in1
,
\
c2
vmin.s16
\
in2
,
\
in2
,
\
c2
.
endm
function
ff_hevc_add_residual_4x4_8_neon
,
export
=
1
vld1.16
{
q0
-
q1
},
[
r1
,
:
128
]
vld1.32
d4
[
0
],
[
r0
,
:
32
],
r2
...
...
@@ -50,6 +57,25 @@ function ff_hevc_add_residual_4x4_8_neon, export=1
bx
lr
endfunc
function
ff_hevc_add_residual_4x4_10_neon
,
export
=
1
mov
r12
,
r0
vld1.16
{
q0
-
q1
},
[
r1
,
:
128
]
vld1.16
d4
,
[
r12
,
:
64
],
r2
vld1.16
d5
,
[
r12
,
:
64
],
r2
vld1.16
d6
,
[
r12
,
:
64
],
r2
vqadd.s16
q0
,
q2
vld1.16
d7
,
[
r12
,
:
64
],
r2
vmov.s16
q12
,
#
0
vqadd.s16
q1
,
q3
vmvn.s16
q13
,
#
0xFC00
@
vmov
.
s16
#
0x3FF
clip10
q0
,
q1
,
q12
,
q13
vst1.16
d0
,
[
r0
,
:
64
],
r2
vst1.16
d1
,
[
r0
,
:
64
],
r2
vst1.16
d2
,
[
r0
,
:
64
],
r2
vst1.16
d3
,
[
r0
,
:
64
],
r2
bx
lr
endfunc
function
ff_hevc_add_residual_8x8_8_neon
,
export
=
1
add
r12
,
r0
,
r2
add
r2
,
r2
,
r2
...
...
@@ -70,6 +96,25 @@ function ff_hevc_add_residual_8x8_8_neon, export=1
bx
lr
endfunc
function
ff_hevc_add_residual_8x8_10_neon
,
export
=
1
add
r12
,
r0
,
r2
add
r2
,
r2
,
r2
mov
r3
,
#
8
vmov.s16
q12
,
#
0
vmvn.s16
q13
,
#
0xFC00
@
vmov
.
s16
#
0x3FF
1
:
subs
r3
,
#
2
vld1.16
{
q0
-
q1
},
[
r1
,
:
128
]!
vld1.16
{
q8
},
[
r0
,
:
128
]
vqadd.s16
q0
,
q8
vld1.16
{
q9
},
[
r12
,
:
128
]
vqadd.s16
q1
,
q9
clip10
q0
,
q1
,
q12
,
q13
vst1.16
{
q0
},
[
r0
,
:
128
],
r2
vst1.16
{
q1
},
[
r12
,
:
128
],
r2
bne
1
b
bx
lr
endfunc
function
ff_hevc_add_residual_16x16_8_neon
,
export
=
1
mov
r3
,
#
16
add
r12
,
r0
,
r2
...
...
@@ -97,6 +142,29 @@ function ff_hevc_add_residual_16x16_8_neon, export=1
bx
lr
endfunc
function
ff_hevc_add_residual_16x16_10_neon
,
export
=
1
mov
r3
,
#
16
vmov.s16
q12
,
#
0
vmvn.s16
q13
,
#
0xFC00
@
vmov
.
s16
#
0x3FF
add
r12
,
r0
,
r2
add
r2
,
r2
,
r2
1
:
subs
r3
,
#
2
vld1.16
{
q8
-
q9
},
[
r0
,
:
128
]
vld1.16
{
q0
,
q1
},
[
r1
,
:
128
]!
vqadd.s16
q0
,
q8
vld1.16
{
q10
-
q11
},
[
r12
,
:
128
]
vqadd.s16
q1
,
q9
vld1.16
{
q2
,
q3
},
[
r1
,
:
128
]!
vqadd.s16
q2
,
q10
vqadd.s16
q3
,
q11
clip10
q0
,
q1
,
q12
,
q13
clip10
q2
,
q3
,
q12
,
q13
vst1.16
{
q0
-
q1
},
[
r0
,
:
128
],
r2
vst1.16
{
q2
-
q3
},
[
r12
,
:
128
],
r2
bne
1
b
bx
lr
endfunc
function
ff_hevc_add_residual_32x32_8_neon
,
export
=
1
vpush
{
q4
-
q7
}
add
r12
,
r0
,
r2
...
...
@@ -137,6 +205,27 @@ function ff_hevc_add_residual_32x32_8_neon, export=1
bx
lr
endfunc
function
ff_hevc_add_residual_32x32_10_neon
,
export
=
1
mov
r3
,
#
32
add
r12
,
r0
,
#
32
vmov.s16
q12
,
#
0
vmvn.s16
q13
,
#
0xFC00
@
vmov
.
s16
#
0x3FF
1
:
subs
r3
,
#
1
vldm
r1
!,
{
q0
-
q3
}
vld1.16
{
q8
,
q9
},
[
r0
,
:
128
]
vld1.16
{
q10
,
q11
},
[
r12
,
:
128
]
vqadd.s16
q0
,
q8
vqadd.s16
q1
,
q9
vqadd.s16
q2
,
q10
vqadd.s16
q3
,
q11
clip10
q0
,
q1
,
q12
,
q13
clip10
q2
,
q3
,
q12
,
q13
vst1.16
{
q0
-
q1
},
[
r0
,
:
128
],
r2
vst1.16
{
q2
-
q3
},
[
r12
,
:
128
],
r2
bne
1
b
bx
lr
endfunc
.
macro
idct_4x4_dc
bitdepth
function
ff_hevc_idct_4x4_dc_
\
bitdepth
\()
_neon
,
export
=
1
ldrsh
r1
,
[
r0
]
...
...
This diff is collapsed.
Click to expand it.
libavcodec/arm/hevcdsp_init_arm.c
+
13
−
0
View file @
edbf0fff
...
...
@@ -28,12 +28,20 @@
void
ff_hevc_add_residual_4x4_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_4x4_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_8x8_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_8x8_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_16x16_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_16x16_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_32x32_8_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_add_residual_32x32_10_neon
(
uint8_t
*
_dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
);
void
ff_hevc_idct_4x4_dc_8_neon
(
int16_t
*
coeffs
);
void
ff_hevc_idct_8x8_dc_8_neon
(
int16_t
*
coeffs
);
...
...
@@ -72,6 +80,11 @@ av_cold void ff_hevc_dsp_init_arm(HEVCDSPContext *c, int bit_depth)
c
->
idct
[
2
]
=
ff_hevc_idct_16x16_8_neon
;
}
if
(
bit_depth
==
10
)
{
c
->
add_residual
[
0
]
=
ff_hevc_add_residual_4x4_10_neon
;
c
->
add_residual
[
1
]
=
ff_hevc_add_residual_8x8_10_neon
;
c
->
add_residual
[
2
]
=
ff_hevc_add_residual_16x16_10_neon
;
c
->
add_residual
[
3
]
=
ff_hevc_add_residual_32x32_10_neon
;
c
->
idct_dc
[
0
]
=
ff_hevc_idct_4x4_dc_10_neon
;
c
->
idct_dc
[
1
]
=
ff_hevc_idct_8x8_dc_10_neon
;
c
->
idct_dc
[
2
]
=
ff_hevc_idct_16x16_dc_10_neon
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment