Skip to content
Snippets Groups Projects
Commit 2da4e5e3 authored by Måns Rullgård's avatar Måns Rullgård
Browse files

ARM: slightly faster NEON H264 horizontal loop filter

Originally committed as revision 19216 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent f4ca612f
No related branches found
No related tags found
No related merge requests found
...@@ -37,6 +37,13 @@ ...@@ -37,6 +37,13 @@
vtrn.8 \r6, \r7 vtrn.8 \r6, \r7
.endm .endm
.macro transpose_4x4 r0 r1 r2 r3
vtrn.16 \r0, \r2
vtrn.16 \r1, \r3
vtrn.8 \r0, \r1
vtrn.8 \r2, \r3
.endm
.macro swap4 r0 r1 r2 r3 r4 r5 r6 r7 .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
vswp \r0, \r4 vswp \r0, \r4
vswp \r1, \r5 vswp \r1, \r5
...@@ -469,35 +476,29 @@ function ff_h264_h_loop_filter_luma_neon, export=1 ...@@ -469,35 +476,29 @@ function ff_h264_h_loop_filter_luma_neon, export=1
transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13 transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13
align_push_regs align_push_regs
sub sp, sp, #16
vst1.64 {d4, d5}, [sp,:128]
sub sp, sp, #16
vst1.64 {d20,d21}, [sp,:128]
h264_loop_filter_luma h264_loop_filter_luma
vld1.64 {d20,d21}, [sp,:128]! transpose_4x4 q4, q8, q0, q5
vld1.64 {d4, d5}, [sp,:128]!
transpose_8x8 q3, q10, q4, q8, q0, q5, q2, q13
sub r0, r0, r1, lsl #4 sub r0, r0, r1, lsl #4
vst1.64 {d6}, [r0], r1 add r0, r0, #2
vst1.64 {d20}, [r0], r1 vst1.32 {d8[0]}, [r0], r1
vst1.64 {d8}, [r0], r1 vst1.32 {d16[0]}, [r0], r1
vst1.64 {d16}, [r0], r1 vst1.32 {d0[0]}, [r0], r1
vst1.64 {d0}, [r0], r1 vst1.32 {d10[0]}, [r0], r1
vst1.64 {d10}, [r0], r1 vst1.32 {d8[1]}, [r0], r1
vst1.64 {d4}, [r0], r1 vst1.32 {d16[1]}, [r0], r1
vst1.64 {d26}, [r0], r1 vst1.32 {d0[1]}, [r0], r1
vst1.64 {d7}, [r0], r1 vst1.32 {d10[1]}, [r0], r1
vst1.64 {d21}, [r0], r1 vst1.32 {d9[0]}, [r0], r1
vst1.64 {d9}, [r0], r1 vst1.32 {d17[0]}, [r0], r1
vst1.64 {d17}, [r0], r1 vst1.32 {d1[0]}, [r0], r1
vst1.64 {d1}, [r0], r1 vst1.32 {d11[0]}, [r0], r1
vst1.64 {d11}, [r0], r1 vst1.32 {d9[1]}, [r0], r1
vst1.64 {d5}, [r0], r1 vst1.32 {d17[1]}, [r0], r1
vst1.64 {d27}, [r0], r1 vst1.32 {d1[1]}, [r0], r1
vst1.32 {d11[1]}, [r0], r1
align_pop_regs align_pop_regs
bx lr bx lr
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment