Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
FFmpeg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
libremedia
Tethys
FFmpeg
Commits
2143d69b
Commit
2143d69b
authored
13 years ago
by
Mans Rullgard
Browse files
Options
Downloads
Patches
Plain Diff
cabac: move x86 asm to libavcodec/x86/cabac.h
Signed-off-by:
Mans Rullgard
<
mans@mansr.com
>
parent
d075e7d5
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
libavcodec/cabac.h
+24
-132
24 additions, 132 deletions
libavcodec/cabac.h
libavcodec/x86/cabac.h
+148
-0
148 additions, 0 deletions
libavcodec/x86/cabac.h
libavcodec/x86/h264_i386.h
+1
-0
1 addition, 0 deletions
libavcodec/x86/h264_i386.h
with
173 additions
and
132 deletions
libavcodec/cabac.h
+
24
−
132
View file @
2143d69b
...
...
@@ -33,7 +33,6 @@
//#undef NDEBUG
#include
<assert.h>
#include
"libavutil/x86_cpu.h"
#define CABAC_BITS 16
#define CABAC_MASK ((1<<CABAC_BITS)-1)
...
...
@@ -57,6 +56,9 @@ extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS
extern
uint8_t
ff_h264_lps_state
[
2
*
64
];
///< transIdxLPS
extern
const
uint8_t
ff_h264_norm_shift
[
512
];
#if ARCH_X86
# include "x86/cabac.h"
#endif
void
ff_init_cabac_encoder
(
CABACContext
*
c
,
uint8_t
*
buf
,
int
buf_size
);
void
ff_init_cabac_decoder
(
CABACContext
*
c
,
const
uint8_t
*
buf
,
int
buf_size
);
...
...
@@ -270,7 +272,24 @@ static void refill(CABACContext *c){
c
->
bytestream
+=
CABAC_BITS
/
8
;
}
#if ! ( ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) )
static
inline
void
renorm_cabac_decoder
(
CABACContext
*
c
){
while
(
c
->
range
<
0x100
){
c
->
range
+=
c
->
range
;
c
->
low
+=
c
->
low
;
if
(
!
(
c
->
low
&
CABAC_MASK
))
refill
(
c
);
}
}
static
inline
void
renorm_cabac_decoder_once
(
CABACContext
*
c
){
int
shift
=
(
uint32_t
)(
c
->
range
-
0x100
)
>>
31
;
c
->
range
<<=
shift
;
c
->
low
<<=
shift
;
if
(
!
(
c
->
low
&
CABAC_MASK
))
refill
(
c
);
}
#ifndef get_cabac_inline
static
void
refill2
(
CABACContext
*
c
){
int
i
,
x
;
...
...
@@ -288,102 +307,8 @@ static void refill2(CABACContext *c){
c
->
low
+=
x
<<
i
;
c
->
bytestream
+=
CABAC_BITS
/
8
;
}
#endif
static
inline
void
renorm_cabac_decoder
(
CABACContext
*
c
){
while
(
c
->
range
<
0x100
){
c
->
range
+=
c
->
range
;
c
->
low
+=
c
->
low
;
if
(
!
(
c
->
low
&
CABAC_MASK
))
refill
(
c
);
}
}
static
inline
void
renorm_cabac_decoder_once
(
CABACContext
*
c
){
int
shift
=
(
uint32_t
)(
c
->
range
-
0x100
)
>>
31
;
c
->
range
<<=
shift
;
c
->
low
<<=
shift
;
if
(
!
(
c
->
low
&
CABAC_MASK
))
refill
(
c
);
}
static
av_always_inline
int
get_cabac_inline
(
CABACContext
*
c
,
uint8_t
*
const
state
){
//FIXME gcc generates duplicate load/stores for c->low and c->range
#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
int
bit
,
low
,
range
,
tmp
;
#if HAVE_FAST_CMOV
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"mov "tmp" , %%ecx \n\t"\
"shl $17 , "tmp" \n\t"\
"cmp "low" , "tmp" \n\t"\
"cmova %%ecx , "range" \n\t"\
"sbb %%ecx , %%ecx \n\t"\
"and %%ecx , "tmp" \n\t"\
"sub "tmp" , "low" \n\t"\
"xor %%ecx , "ret" \n\t"
#else
/* HAVE_FAST_CMOV */
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"mov "tmp" , %%ecx \n\t"\
"shl $17 , "tmp" \n\t"\
"sub "low" , "tmp" \n\t"\
"sar $31 , "tmp" \n\t"
/*lps_mask*/
\
"sub %%ecx , "range" \n\t"
/*RangeLPS - range*/
\
"and "tmp" , "range" \n\t"
/*(RangeLPS - range)&lps_mask*/
\
"add %%ecx , "range" \n\t"
/*new range*/
\
"shl $17 , %%ecx \n\t"\
"and "tmp" , %%ecx \n\t"\
"sub %%ecx , "low" \n\t"\
"xor "tmp" , "ret" \n\t"
#endif
/* HAVE_FAST_CMOV */
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \
"movzbl "statep" , "ret" \n\t"\
"mov "range" , "tmp" \n\t"\
"and $0xC0 , "range" \n\t"\
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
"sub "range" , "tmp" \n\t"\
BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
"shl %%cl , "range" \n\t"\
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
"mov "tmpbyte" , "statep" \n\t"\
"shl %%cl , "low" \n\t"\
"test "lowword" , "lowword" \n\t"\
" jnz 1f \n\t"\
"mov "byte"("cabac"), %%"REG_c" \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\
"bswap "tmp" \n\t"\
"shr $15 , "tmp" \n\t"\
"sub $0xFFFF , "tmp" \n\t"\
"add $2 , %%"REG_c" \n\t"\
"mov %%"REG_c" , "byte "("cabac") \n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
"neg %%ecx \n\t"\
"add $7 , %%ecx \n\t"\
"shl %%cl , "tmp" \n\t"\
"add "tmp" , "low" \n\t"\
"1: \n\t"
__asm__
volatile
(
"movl %a6(%5), %2
\n\t
"
"movl %a7(%5), %1
\n\t
"
BRANCHLESS_GET_CABAC
(
"%0"
,
"%5"
,
"(%4)"
,
"%1"
,
"%w1"
,
"%2"
,
"%3"
,
"%b3"
,
"%a8"
)
"movl %2, %a6(%5)
\n\t
"
"movl %1, %a7(%5)
\n\t
"
:
"=&r"
(
bit
),
"=&r"
(
low
),
"=&r"
(
range
),
"=&r"
(
tmp
)
:
"r"
(
state
),
"r"
(
c
),
"i"
(
offsetof
(
CABACContext
,
range
)),
"i"
(
offsetof
(
CABACContext
,
low
)),
"i"
(
offsetof
(
CABACContext
,
bytestream
))
:
"%"
REG_c
,
"memory"
);
bit
&=
1
;
#else
/* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
int
s
=
*
state
;
int
RangeLPS
=
ff_h264_lps_range
[
2
*
(
c
->
range
&
0xC0
)
+
s
];
int
bit
,
lps_mask
;
...
...
@@ -403,9 +328,9 @@ static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const st
c
->
low
<<=
lps_mask
;
if
(
!
(
c
->
low
&
CABAC_MASK
))
refill2
(
c
);
#endif
/* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
return
bit
;
}
#endif
static
int
av_noinline
av_unused
get_cabac_noinline
(
CABACContext
*
c
,
uint8_t
*
const
state
){
return
get_cabac_inline
(
c
,
state
);
...
...
@@ -432,41 +357,8 @@ static int av_unused get_cabac_bypass(CABACContext *c){
}
#ifndef get_cabac_bypass_sign
static
av_always_inline
int
get_cabac_bypass_sign
(
CABACContext
*
c
,
int
val
){
#if ARCH_X86
x86_reg
tmp
;
__asm__
volatile
(
"movl %a3(%2), %k1
\n\t
"
"movl %a4(%2), %%eax
\n\t
"
"shl $17, %k1
\n\t
"
"add %%eax, %%eax
\n\t
"
"sub %k1, %%eax
\n\t
"
"cltd
\n\t
"
"and %%edx, %k1
\n\t
"
"add %k1, %%eax
\n\t
"
"xor %%edx, %%ecx
\n\t
"
"sub %%edx, %%ecx
\n\t
"
"test %%ax, %%ax
\n\t
"
" jnz 1f
\n\t
"
"mov %a5(%2), %1
\n\t
"
"subl $0xFFFF, %%eax
\n\t
"
"movzwl (%1), %%edx
\n\t
"
"bswap %%edx
\n\t
"
"shrl $15, %%edx
\n\t
"
"add $2, %1
\n\t
"
"addl %%edx, %%eax
\n\t
"
"mov %1, %a5(%2)
\n\t
"
"1:
\n\t
"
"movl %%eax, %a4(%2)
\n\t
"
:
"+c"
(
val
),
"=&r"
(
tmp
)
:
"r"
(
c
),
"i"
(
offsetof
(
CABACContext
,
range
)),
"i"
(
offsetof
(
CABACContext
,
low
)),
"i"
(
offsetof
(
CABACContext
,
bytestream
))
:
"%eax"
,
"%edx"
,
"memory"
);
return
val
;
#else
int
range
,
mask
;
c
->
low
+=
c
->
low
;
...
...
@@ -479,8 +371,8 @@ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
range
&=
mask
;
c
->
low
+=
range
;
return
(
val
^
mask
)
-
mask
;
#endif
}
#endif
/**
*
...
...
This diff is collapsed.
Click to expand it.
libavcodec/x86/cabac.h
0 → 100644
+
148
−
0
View file @
2143d69b
/*
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_CABAC_H
#define AVCODEC_X86_CABAC_H
#include
"libavcodec/cabac.h"
#include
"libavutil/attributes.h"
#include
"libavutil/x86_cpu.h"
#include
"config.h"
#if HAVE_FAST_CMOV
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"mov "tmp" , %%ecx \n\t"\
"shl $17 , "tmp" \n\t"\
"cmp "low" , "tmp" \n\t"\
"cmova %%ecx , "range" \n\t"\
"sbb %%ecx , %%ecx \n\t"\
"and %%ecx , "tmp" \n\t"\
"sub "tmp" , "low" \n\t"\
"xor %%ecx , "ret" \n\t"
#else
/* HAVE_FAST_CMOV */
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"mov "tmp" , %%ecx \n\t"\
"shl $17 , "tmp" \n\t"\
"sub "low" , "tmp" \n\t"\
"sar $31 , "tmp" \n\t"
/*lps_mask*/
\
"sub %%ecx , "range" \n\t"
/*RangeLPS - range*/
\
"and "tmp" , "range" \n\t"
/*(RangeLPS - range)&lps_mask*/
\
"add %%ecx , "range" \n\t"
/*new range*/
\
"shl $17 , %%ecx \n\t"\
"and "tmp" , %%ecx \n\t"\
"sub %%ecx , "low" \n\t"\
"xor "tmp" , "ret" \n\t"
#endif
/* HAVE_FAST_CMOV */
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \
"movzbl "statep" , "ret" \n\t"\
"mov "range" , "tmp" \n\t"\
"and $0xC0 , "range" \n\t"\
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
"sub "range" , "tmp" \n\t"\
BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, \
range, tmp) \
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
"shl %%cl , "range" \n\t"\
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
"mov "tmpbyte" , "statep" \n\t"\
"shl %%cl , "low" \n\t"\
"test "lowword" , "lowword" \n\t"\
" jnz 1f \n\t"\
"mov "byte"("cabac"), %%"REG_c" \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\
"bswap "tmp" \n\t"\
"shr $15 , "tmp" \n\t"\
"sub $0xFFFF , "tmp" \n\t"\
"add $2 , %%"REG_c" \n\t"\
"mov %%"REG_c" , "byte "("cabac") \n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
"neg %%ecx \n\t"\
"add $7 , %%ecx \n\t"\
"shl %%cl , "tmp" \n\t"\
"add "tmp" , "low" \n\t"\
"1: \n\t"
#if ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
#define get_cabac_inline get_cabac_inline_x86
static
av_always_inline
int
get_cabac_inline_x86
(
CABACContext
*
c
,
uint8_t
*
const
state
)
{
int
bit
,
low
,
range
,
tmp
;
__asm__
volatile
(
"movl %a6(%5), %2
\n\t
"
"movl %a7(%5), %1
\n\t
"
BRANCHLESS_GET_CABAC
(
"%0"
,
"%5"
,
"(%4)"
,
"%1"
,
"%w1"
,
"%2"
,
"%3"
,
"%b3"
,
"%a8"
)
"movl %2, %a6(%5)
\n\t
"
"movl %1, %a7(%5)
\n\t
"
:
"=&r"
(
bit
),
"=&r"
(
low
),
"=&r"
(
range
),
"=&r"
(
tmp
)
:
"r"
(
state
),
"r"
(
c
),
"i"
(
offsetof
(
CABACContext
,
range
)),
"i"
(
offsetof
(
CABACContext
,
low
)),
"i"
(
offsetof
(
CABACContext
,
bytestream
))
:
"%"
REG_c
,
"memory"
);
return
bit
&
1
;
}
#endif
/* ARCH_X86 && HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
static
av_always_inline
int
get_cabac_bypass_sign_x86
(
CABACContext
*
c
,
int
val
)
{
x86_reg
tmp
;
__asm__
volatile
(
"movl %a3(%2), %k1
\n\t
"
"movl %a4(%2), %%eax
\n\t
"
"shl $17, %k1
\n\t
"
"add %%eax, %%eax
\n\t
"
"sub %k1, %%eax
\n\t
"
"cltd
\n\t
"
"and %%edx, %k1
\n\t
"
"add %k1, %%eax
\n\t
"
"xor %%edx, %%ecx
\n\t
"
"sub %%edx, %%ecx
\n\t
"
"test %%ax, %%ax
\n\t
"
" jnz 1f
\n\t
"
"mov %a5(%2), %1
\n\t
"
"subl $0xFFFF, %%eax
\n\t
"
"movzwl (%1), %%edx
\n\t
"
"bswap %%edx
\n\t
"
"shrl $15, %%edx
\n\t
"
"add $2, %1
\n\t
"
"addl %%edx, %%eax
\n\t
"
"mov %1, %a5(%2)
\n\t
"
"1:
\n\t
"
"movl %%eax, %a4(%2)
\n\t
"
:
"+c"
(
val
),
"=&r"
(
tmp
)
:
"r"
(
c
),
"i"
(
offsetof
(
CABACContext
,
range
)),
"i"
(
offsetof
(
CABACContext
,
low
)),
"i"
(
offsetof
(
CABACContext
,
bytestream
))
:
"%eax"
,
"%edx"
,
"memory"
);
return
val
;
}
#endif
/* AVCODEC_X86_CABAC_H */
This diff is collapsed.
Click to expand it.
libavcodec/x86/h264_i386.h
+
1
−
0
View file @
2143d69b
...
...
@@ -32,6 +32,7 @@
#include
<stddef.h>
#include
"libavcodec/cabac.h"
#include
"cabac.h"
//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
//as that would make optimization work hard)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment