Skip to content
Snippets Groups Projects
Commit 3b92075e authored by Michael Niedermayer's avatar Michael Niedermayer
Browse files

Revert "arm/h264: fix overreads in h264_chroma_mc8-and-h264_chroma_mc4"

This reverts commit d25f87f5.

This breaks decoding of some h264 files
I have tested the original patch with fate but by mistake have
forgotten to specify the fate samples so testing was limited to
the internal regression tests.
parent 55a6f705
No related branches found
No related tags found
No related merge requests found
...@@ -58,15 +58,14 @@ T cmp r7, #0 ...@@ -58,15 +58,14 @@ T cmp r7, #0
vdup.8 d1, r12 vdup.8 d1, r12
vld1.8 {d4, d5}, [r1], r4 vld1.8 {d4, d5}, [r1], r4
vdup.8 d2, r6 vdup.8 d2, r6
vld1.8 {d6, d7}, [r5], r4
vdup.8 d3, r7 vdup.8 d3, r7
vext.8 d5, d4, d5, #1 vext.8 d5, d4, d5, #1
vext.8 d7, d6, d7, #1
1: 1: pld [r5]
vld1.64 {d6, d7}, [r5], r4
pld [r5]
vmull.u8 q8, d4, d0 vmull.u8 q8, d4, d0
vext.8 d7, d6, d7, #1
vmlal.u8 q8, d5, d1 vmlal.u8 q8, d5, d1
vld1.8 {d4, d5}, [r1], r4 vld1.8 {d4, d5}, [r1], r4
vmlal.u8 q8, d6, d2 vmlal.u8 q8, d6, d2
...@@ -77,6 +76,7 @@ T cmp r7, #0 ...@@ -77,6 +76,7 @@ T cmp r7, #0
vmlal.u8 q9, d7, d1 vmlal.u8 q9, d7, d1
vmlal.u8 q9, d4, d2 vmlal.u8 q9, d4, d2
vmlal.u8 q9, d5, d3 vmlal.u8 q9, d5, d3
vld1.8 {d6, d7}, [r5], r4
pld [r1] pld [r1]
.ifc \codec,h264 .ifc \codec,h264
vrshrn.u16 d16, q8, #6 vrshrn.u16 d16, q8, #6
...@@ -92,6 +92,7 @@ T cmp r7, #0 ...@@ -92,6 +92,7 @@ T cmp r7, #0
vld1.8 {d21}, [lr,:64], r2 vld1.8 {d21}, [lr,:64], r2
vrhadd.u8 q8, q8, q10 vrhadd.u8 q8, q8, q10
.endif .endif
vext.8 d7, d6, d7, #1
vst1.8 {d16}, [r0,:64], r2 vst1.8 {d16}, [r0,:64], r2
vst1.8 {d17}, [r0,:64], r2 vst1.8 {d17}, [r0,:64], r2
bgt 1b bgt 1b
...@@ -107,16 +108,16 @@ T cmp r7, #0 ...@@ -107,16 +108,16 @@ T cmp r7, #0
add r5, r1, r2 add r5, r1, r2
lsl r4, r2, #1 lsl r4, r2, #1
3:
vld1.8 {d4}, [r1], r4 vld1.8 {d4}, [r1], r4
vld1.8 {d6}, [r5], r4 vld1.8 {d6}, [r5], r4
pld [r5] 3: pld [r5]
vmull.u8 q8, d4, d0 vmull.u8 q8, d4, d0
vmlal.u8 q8, d6, d1 vmlal.u8 q8, d6, d1
vld1.8 {d4}, [r1], r4
vmull.u8 q9, d6, d0 vmull.u8 q9, d6, d0
vmlal.u8 q9, d4, d1 vmlal.u8 q9, d4, d1
vld1.8 {d6}, [r5], r4
.ifc \codec,h264 .ifc \codec,h264
vrshrn.u16 d16, q8, #6 vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6 vrshrn.u16 d17, q9, #6
...@@ -144,13 +145,15 @@ T cmp r7, #0 ...@@ -144,13 +145,15 @@ T cmp r7, #0
vext.8 d5, d4, d5, #1 vext.8 d5, d4, d5, #1
vext.8 d7, d6, d7, #1 vext.8 d7, d6, d7, #1
pld [r1] 5: pld [r1]
subs r3, r3, #2 subs r3, r3, #2
vmull.u8 q8, d4, d0 vmull.u8 q8, d4, d0
vmlal.u8 q8, d5, d1 vmlal.u8 q8, d5, d1
vld1.8 {d4, d5}, [r1], r2
vmull.u8 q9, d6, d0 vmull.u8 q9, d6, d0
vmlal.u8 q9, d7, d1 vmlal.u8 q9, d7, d1
pld [r1] pld [r1]
vext.8 d5, d4, d5, #1
.ifc \codec,h264 .ifc \codec,h264
vrshrn.u16 d16, q8, #6 vrshrn.u16 d16, q8, #6
vrshrn.u16 d17, q9, #6 vrshrn.u16 d17, q9, #6
...@@ -165,9 +168,11 @@ T cmp r7, #0 ...@@ -165,9 +168,11 @@ T cmp r7, #0
vld1.8 {d21}, [lr,:64], r2 vld1.8 {d21}, [lr,:64], r2
vrhadd.u8 q8, q8, q10 vrhadd.u8 q8, q8, q10
.endif .endif
vld1.8 {d6, d7}, [r1], r2
vext.8 d7, d6, d7, #1
vst1.8 {d16}, [r0,:64], r2 vst1.8 {d16}, [r0,:64], r2
vst1.8 {d17}, [r0,:64], r2 vst1.8 {d17}, [r0,:64], r2
bgt 4b bgt 5b
pop {r4-r7, pc} pop {r4-r7, pc}
endfunc endfunc
...@@ -177,7 +182,7 @@ endfunc ...@@ -177,7 +182,7 @@ endfunc
.macro h264_chroma_mc4 type, codec=h264 .macro h264_chroma_mc4 type, codec=h264
function ff_\type\()_\codec\()_chroma_mc4_neon, export=1 function ff_\type\()_\codec\()_chroma_mc4_neon, export=1
push {r4-r7, lr} push {r4-r7, lr}
ldrd r4, [sp, #20] ldrd r4, r5, [sp, #20]
.ifc \type,avg .ifc \type,avg
mov lr, r0 mov lr, r0
.endif .endif
...@@ -211,27 +216,26 @@ T cmp r7, #0 ...@@ -211,27 +216,26 @@ T cmp r7, #0
vdup.8 d1, r12 vdup.8 d1, r12
vld1.8 {d4}, [r1], r4 vld1.8 {d4}, [r1], r4
vdup.8 d2, r6 vdup.8 d2, r6
vld1.8 {d6}, [r5], r4
vdup.8 d3, r7 vdup.8 d3, r7
vext.8 d5, d4, d5, #1 vext.8 d5, d4, d5, #1
vext.8 d7, d6, d7, #1
vtrn.32 d4, d5
vtrn.32 d6, d7
vtrn.32 d0, d1 vtrn.32 d0, d1
vtrn.32 d2, d3 vtrn.32 d2, d3
vtrn.32 d4, d5
1: 1: pld [r5]
vld1.8 {d6}, [r5], r4
pld [r5]
vext.8 d7, d6, d7, #1
vmull.u8 q8, d4, d0 vmull.u8 q8, d4, d0
vtrn.32 d6, d7
vld1.8 {d4}, [r1], r4
vmlal.u8 q8, d6, d2 vmlal.u8 q8, d6, d2
vld1.8 {d4}, [r1], r4
vext.8 d5, d4, d5, #1 vext.8 d5, d4, d5, #1
vmull.u8 q9, d6, d0
vtrn.32 d4, d5 vtrn.32 d4, d5
vmull.u8 q9, d6, d0
vmlal.u8 q9, d4, d2 vmlal.u8 q9, d4, d2
vld1.8 {d6}, [r5], r4
vadd.i16 d16, d16, d17 vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19 vadd.i16 d17, d18, d19
.ifc \codec,h264 .ifc \codec,h264
...@@ -247,6 +251,8 @@ T cmp r7, #0 ...@@ -247,6 +251,8 @@ T cmp r7, #0
vld1.32 {d20[1]}, [lr,:32], r2 vld1.32 {d20[1]}, [lr,:32], r2
vrhadd.u8 d16, d16, d20 vrhadd.u8 d16, d16, d20
.endif .endif
vext.8 d7, d6, d7, #1
vtrn.32 d6, d7
vst1.32 {d16[0]}, [r0,:32], r2 vst1.32 {d16[0]}, [r0,:32], r2
vst1.32 {d16[1]}, [r0,:32], r2 vst1.32 {d16[1]}, [r0,:32], r2
bgt 1b bgt 1b
...@@ -265,14 +271,13 @@ T cmp r7, #0 ...@@ -265,14 +271,13 @@ T cmp r7, #0
add r5, r1, r2 add r5, r1, r2
lsl r4, r2, #1 lsl r4, r2, #1
vld1.32 {d4[0]}, [r1], r4 vld1.32 {d4[0]}, [r1], r4
3:
vld1.32 {d4[1]}, [r5], r4 vld1.32 {d4[1]}, [r5], r4
pld [r5] 3: pld [r5]
vmull.u8 q8, d4, d0 vmull.u8 q8, d4, d0
vld1.32 {d4[0]}, [r1], r4 vld1.32 {d4[0]}, [r1], r4
vmull.u8 q9, d4, d1 vmull.u8 q9, d4, d1
vld1.32 {d4[1]}, [r5], r4
vadd.i16 d16, d16, d17 vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19 vadd.i16 d17, d18, d19
.ifc \codec,h264 .ifc \codec,h264
...@@ -301,10 +306,12 @@ T cmp r7, #0 ...@@ -301,10 +306,12 @@ T cmp r7, #0
vtrn.32 d4, d5 vtrn.32 d4, d5
vtrn.32 d6, d7 vtrn.32 d6, d7
vmull.u8 q8, d4, d0 5: vmull.u8 q8, d4, d0
vmull.u8 q9, d6, d0 vmull.u8 q9, d6, d0
subs r3, r3, #2 subs r3, r3, #2
vld1.8 {d4}, [r1], r2
vext.8 d5, d4, d5, #1
vtrn.32 d4, d5
vadd.i16 d16, d16, d17 vadd.i16 d16, d16, d17
vadd.i16 d17, d18, d19 vadd.i16 d17, d18, d19
pld [r1] pld [r1]
...@@ -319,10 +326,13 @@ T cmp r7, #0 ...@@ -319,10 +326,13 @@ T cmp r7, #0
vld1.32 {d20[1]}, [lr,:32], r2 vld1.32 {d20[1]}, [lr,:32], r2
vrhadd.u8 d16, d16, d20 vrhadd.u8 d16, d16, d20
.endif .endif
vld1.8 {d6}, [r1], r2
vext.8 d7, d6, d7, #1
vtrn.32 d6, d7
pld [r1] pld [r1]
vst1.32 {d16[0]}, [r0,:32], r2 vst1.32 {d16[0]}, [r0,:32], r2
vst1.32 {d16[1]}, [r0,:32], r2 vst1.32 {d16[1]}, [r0,:32], r2
bgt 4b bgt 5b
pop {r4-r7, pc} pop {r4-r7, pc}
endfunc endfunc
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment