Skip to content
Snippets Groups Projects
Commit 3a9f44d5 authored by Michael Niedermayer's avatar Michael Niedermayer
Browse files

and of course the unneeded double subtractions were blindly put in the

mmx code
this also makes the affected code 4% faster

Originally committed as revision 10156 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent f29bd6fa
No related branches found
No related tags found
No related merge requests found
......@@ -294,9 +294,10 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
DWTELEM * const ref = b+w2 - 1;
i = 1;
b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS);
b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
asm volatile(
"pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */
"pcmpeqd %%mm7, %%mm7 \n\t"
"psrld $29, %%mm7 \n\t"
::);
for(; i<w_l-3; i+=4){
asm volatile(
......@@ -304,22 +305,18 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
"movq 8(%1), %%mm4 \n\t"
"paddd 4(%1), %%mm0 \n\t"
"paddd 12(%1), %%mm4 \n\t"
"movq %%mm7, %%mm1 \n\t"
"movq %%mm7, %%mm5 \n\t"
"psubd %%mm0, %%mm1 \n\t"
"psubd %%mm4, %%mm5 \n\t"
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm4 \n\t"
"pslld $2, %%mm0 \n\t"
"pslld $2, %%mm4 \n\t"
"psubd %%mm0, %%mm1 \n\t"
"psubd %%mm4, %%mm5 \n\t"
"psrad $4, %%mm1 \n\t"
"psrad $4, %%mm5 \n\t"
"movq (%0), %%mm0 \n\t"
"movq 8(%0), %%mm4 \n\t"
"psubd %%mm1, %%mm0 \n\t"
"psubd %%mm5, %%mm4 \n\t"
"paddd %%mm7, %%mm0 \n\t"
"paddd %%mm7, %%mm4 \n\t"
"psrad $2, %%mm0 \n\t"
"psrad $2, %%mm4 \n\t"
"movq (%0), %%mm1 \n\t"
"movq 8(%0), %%mm5 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm5, %%mm4 \n\t"
"psrad $2, %%mm0 \n\t"
"psrad $2, %%mm4 \n\t"
"paddd %%mm1, %%mm0 \n\t"
"paddd %%mm5, %%mm4 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm4, 8(%0) \n\t"
:: "r"(&b[i]), "r"(&ref[i])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment