Skip to content
Snippets Groups Projects
Commit ef0090a9 authored by Michael Niedermayer's avatar Michael Niedermayer
Browse files

x86 branchless cabac decoder

slightly faster on P3

Originally committed as revision 6608 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 2e1aee80
No related branches found
No related tags found
No related merge requests found
......@@ -31,6 +31,7 @@
#define CABAC_BITS 16
#define CABAC_MASK ((1<<CABAC_BITS)-1)
#define BRANCHLESS_CABAD 1
typedef struct CABACContext{
int low;
......@@ -374,7 +375,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
#define BYTESTART "12+2*66*4+4*65"
#define BYTE "16+2*66*4+4*65"
#define BYTEEND "20+2*66*4+4*65"
#ifndef BRANCHLESS_CABAD
asm volatile(
"movzbl (%1), %%eax \n\t"
"movl "RANGE "(%2), %%ebx \n\t"
......@@ -446,6 +447,72 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
:"r"(state), "r"(c)
: "%ecx", "%ebx", "%edx", "%esi"
);
#else
asm volatile(
"movzbl (%1), %%eax \n\t"
"movl "RANGE "(%2), %%ebx \n\t"
"movl "RANGE "(%2), %%edx \n\t"
"shrl $23, %%ebx \n\t"
"leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t"
"movzbl (%%ebx, %%esi), %%esi \n\t"
"shll $17, %%esi \n\t"
"movl "LOW "(%2), %%ebx \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS
"subl %%esi, %%edx \n\t"
"movl %%edx, %%ecx \n\t"
"subl %%ebx, %%edx \n\t"
"sarl $31, %%edx \n\t" //lps_mask
"subl %%ecx, %%esi \n\t" //RangeLPS - range
"andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
"addl %%ecx, %%esi \n\t" //new range
"andl %%edx, %%ecx \n\t"
"subl %%ecx, %%ebx \n\t"
//eax:state ebx:low edx:mask esi:range
"movl $-130, %%ecx \n\t"
"andl %%edx, %%ecx \n\t"
"addl %%eax, %%ecx \n\t"
"xorl %%edx, %%eax \n\t"
"movb "MPS_STATE"(%2, %%eax), %%cl \n\t"
"movb %%cl, (%1) \n\t"
"movl %%esi, %%edx \n\t"
//eax:bit ebx:low edx:range esi:range
"shr $19, %%esi \n\t"
"movb " MANGLE(ff_h264_norm_shift) "(%%esi), %%cl \n\t"
"shll %%cl, %%ebx \n\t"
"shll %%cl, %%edx \n\t"
"test %%bx, %%bx \n\t"
" jnz 1f \n\t"
"movl "BYTE "(%2), %%ecx \n\t"
"movzwl (%%ecx), %%esi \n\t"
"bswap %%esi \n\t"
"shrl $15, %%esi \n\t"
"subl $0xFFFF, %%esi \n\t"
"addl $2, %%ecx \n\t"
"movl %%ecx, "BYTE "(%2) \n\t"
"leal -1(%%ebx), %%ecx \n\t"
"xorl %%ebx, %%ecx \n\t"
"shrl $17, %%ecx \n\t"
"movb " MANGLE(ff_h264_norm_shift) "(%%ecx), %%cl \n\t"
"neg %%cl \n\t"
"add $7, %%cl \n\t"
"shll %%cl , %%esi \n\t"
"addl %%esi, %%ebx \n\t"
"1: \n\t"
"movl %%edx, "RANGE "(%2) \n\t"
"movl %%ebx, "LOW "(%2) \n\t"
"andl $1, %%eax \n\t"
:"=&a"(bit)
:"r"(state), "r"(c)
: "%ecx", "%ebx", "%edx", "%esi"
);
#endif
#else
int s = *state;
int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment