Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
const int p0 = pix[-1*xstride];
const int q0 = pix[ 0*xstride];
const int q1 = pix[ 1*xstride];
const int q2 = pix[ 2*xstride];
if( FFABS( p0 - q0 ) < alpha &&
FFABS( p1 - p0 ) < beta &&
FFABS( q1 - q0 ) < beta ) {
if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
if( FFABS( p2 - p0 ) < beta)
{
const int p3 = pix[-4*xstride];
/* p0', p1', p2' */
pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
} else {
/* p0' */
pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
}
if( FFABS( q2 - q0 ) < beta)
{
const int q3 = pix[3*xstride];
/* q0', q1', q2' */
pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
} else {
/* q0' */
pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
}
}else{
/* p0', q0' */
pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
}
}
pix += ystride;
}
}
static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
{
h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
}
static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
{
h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
}
Loren Merritt
committed
static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
{
int i, d;
for( i = 0; i < 4; i++ ) {
const int tc = tc0[i];
if( tc <= 0 ) {
pix += 2*ystride;
continue;
}
for( d = 0; d < 2; d++ ) {
const int p0 = pix[-1*xstride];
const int p1 = pix[-2*xstride];
const int q0 = pix[0];
const int q1 = pix[1*xstride];
if( FFABS( p0 - q0 ) < alpha &&
FFABS( p1 - p0 ) < beta &&
FFABS( q1 - q0 ) < beta ) {
int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */
pix[0] = av_clip_uint8( q0 - delta ); /* q0' */
}
pix += ystride;
}
}
}
Loren Merritt
committed
static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
}
Loren Merritt
committed
static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
}
Loren Merritt
committed
static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
{
int d;
for( d = 0; d < 8; d++ ) {
const int p0 = pix[-1*xstride];
const int p1 = pix[-2*xstride];
const int q0 = pix[0];
const int q1 = pix[1*xstride];
if( FFABS( p0 - q0 ) < alpha &&
FFABS( p1 - p0 ) < beta &&
FFABS( q1 - q0 ) < beta ) {
Loren Merritt
committed
pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
}
pix += ystride;
}
}
static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
{
h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
}
static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
{
h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
}
static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
s += abs(pix1[0] - pix2[0]);
s += abs(pix1[1] - pix2[1]);
s += abs(pix1[2] - pix2[2]);
s += abs(pix1[3] - pix2[3]);
s += abs(pix1[4] - pix2[4]);
s += abs(pix1[5] - pix2[5]);
s += abs(pix1[6] - pix2[6]);
s += abs(pix1[7] - pix2[7]);
s += abs(pix1[8] - pix2[8]);
s += abs(pix1[9] - pix2[9]);
s += abs(pix1[10] - pix2[10]);
s += abs(pix1[11] - pix2[11]);
s += abs(pix1[12] - pix2[12]);
s += abs(pix1[13] - pix2[13]);
s += abs(pix1[14] - pix2[14]);
s += abs(pix1[15] - pix2[15]);
pix1 += line_size;
pix2 += line_size;
}
return s;
}
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
pix1 += line_size;
pix2 += line_size;
}
return s;
}
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
pix1 += line_size;
pix2 += line_size;
pix3 += line_size;
}
return s;
}
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
pix1 += line_size;
pix2 += line_size;
pix3 += line_size;
}
return s;
}
static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
s = 0;
s += abs(pix1[0] - pix2[0]);
s += abs(pix1[1] - pix2[1]);
s += abs(pix1[2] - pix2[2]);
s += abs(pix1[3] - pix2[3]);
s += abs(pix1[4] - pix2[4]);
s += abs(pix1[5] - pix2[5]);
s += abs(pix1[6] - pix2[6]);
s += abs(pix1[7] - pix2[7]);
pix1 += line_size;
pix2 += line_size;
}
return s;
}
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
int s, i;
s = 0;
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
pix1 += line_size;
pix2 += line_size;
}
return s;
}
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
pix1 += line_size;
pix2 += line_size;
pix3 += line_size;
}
return s;
}
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
pix1 += line_size;
pix2 += line_size;
pix3 += line_size;
}
return s;
}
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
MpegEncContext *c = v;
int score1=0;
int score2=0;
int x,y;
for(y=0; y<h; y++){
for(x=0; x<16; x++){
score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
}
if(y+1<h){
for(x=0; x<15; x++){
- s1[x+1] + s1[x+1+stride])
- s2[x+1] + s2[x+1+stride]);
}
}
s1+= stride;
s2+= stride;
}
if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
else return score1 + FFABS(score2)*8;
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
MpegEncContext *c = v;
int score1=0;
int score2=0;
int x,y;
for(y=0; y<h; y++){
for(x=0; x<8; x++){
score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
}
if(y+1<h){
for(x=0; x<7; x++){
- s1[x+1] + s1[x+1+stride])
- s2[x+1] + s2[x+1+stride]);
}
}
s1+= stride;
s2+= stride;
}
if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
else return score1 + FFABS(score2)*8;
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
int i;
unsigned int sum=0;
for(i=0; i<8*8; i++){
int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
int w= weight[i];
b>>= RECON_SHIFT;
assert(-512<b && b<512);
sum += (w*b)*(w*b)>>4;
}
return sum>>2;
}
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
int i;
for(i=0; i<8*8; i++){
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
* @param block the block which will be permuted according to the given permutation vector
* @param permutation the permutation vector
* @param last the last non zero coefficient in scantable order, used to speed the permutation up
* @param scantable the used scantable, this is only used to speed the permutation up, the block is not
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
Arpi
committed
{
//if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
Arpi
committed
for(i=0; i<=last; i++){
const int j= scantable[i];
temp[j]= block[j];
block[j]=0;
}
for(i=0; i<=last; i++){
const int j= scantable[i];
const int perm_j= permutation[j];
block[perm_j]= temp[j];
}
Arpi
committed
}
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
return 0;
}
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
int i;
memset(cmp, 0, sizeof(void*)*6);
for(i=0; i<6; i++){
switch(type&0xFF){
case FF_CMP_SAD:
cmp[i]= c->sad[i];
break;
case FF_CMP_SATD:
cmp[i]= c->hadamard8_diff[i];
break;
case FF_CMP_SSE:
cmp[i]= c->sse[i];
break;
case FF_CMP_DCT:
cmp[i]= c->dct_sad[i];
break;
case FF_CMP_DCT264:
cmp[i]= c->dct264_sad[i];
break;
case FF_CMP_DCTMAX:
cmp[i]= c->dct_max[i];
break;
case FF_CMP_PSNR:
cmp[i]= c->quant_psnr[i];
break;
case FF_CMP_BIT:
cmp[i]= c->bit[i];
break;
case FF_CMP_RD:
cmp[i]= c->rd[i];
break;
case FF_CMP_VSAD:
cmp[i]= c->vsad[i];
break;
case FF_CMP_VSSE:
cmp[i]= c->vsse[i];
break;
case FF_CMP_ZERO:
cmp[i]= zero_cmp;
break;
case FF_CMP_NSSE:
cmp[i]= c->nsse[i];
break;
#if CONFIG_SNOW_ENCODER
case FF_CMP_W53:
cmp[i]= c->w53[i];
break;
case FF_CMP_W97:
cmp[i]= c->w97[i];
break;
default:
av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
}
}
}
static void clear_block_c(DCTELEM *block)
{
memset(block, 0, sizeof(DCTELEM)*64);
}
/**
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
*/
Zdenek Kabelac
committed
static void clear_blocks_c(DCTELEM *blocks)
{
memset(blocks, 0, sizeof(DCTELEM)*6*64);
}
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
long i;
for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
long a = *(long*)(src+i);
long b = *(long*)(dst+i);
*(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
}
for(; i<w; i++)
dst[i+0] += src[i+0];
}
static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
long a = *(long*)(src1+i);
long b = *(long*)(src2+i);
*(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
}
for(; i<w; i++)
dst[i] = src1[i]+src2[i];
}
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
#if !HAVE_FAST_UNALIGNED
if((long)src2 & (sizeof(long)-1)){
for(i=0; i+7<w; i+=8){
dst[i+0] = src1[i+0]-src2[i+0];
dst[i+1] = src1[i+1]-src2[i+1];
dst[i+2] = src1[i+2]-src2[i+2];
dst[i+3] = src1[i+3]-src2[i+3];
dst[i+4] = src1[i+4]-src2[i+4];
dst[i+5] = src1[i+5]-src2[i+5];
dst[i+6] = src1[i+6]-src2[i+6];
dst[i+7] = src1[i+7]-src2[i+7];
}
}else
#endif
for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
long a = *(long*)(src1+i);
long b = *(long*)(src2+i);
*(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
}
for(; i<w; i++)
dst[i+0] = src1[i+0]-src2[i+0];
}
static void add_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){
int i;
uint8_t l, lt;
l= *left;
lt= *left_top;
for(i=0; i<w; i++){
l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
lt= src1[i];
dst[i]= l;
}
*left= l;
*left_top= lt;
}
static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
int i;
uint8_t l, lt;
l= *left;
lt= *left_top;
for(i=0; i<w; i++){
const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
lt= src1[i];
l= src2[i];
dst[i]= l - pred;
*left= l;
*left_top= lt;
}
#define BUTTERFLY2(o1,o2,i1,i2) \
o1= (i1)+(i2);\
o2= (i1)-(i2);
#define BUTTERFLY1(x,y) \
{\
int a,b;\
a= x;\
b= y;\
x= a+b;\
y= a-b;\
}
#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
for(i=0; i<8; i++){
//FIXME try pointer walks
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
}
for(i=0; i<8; i++){
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
}
#if 0
static int maxi=0;
if(sum>maxi){
maxi=sum;
printf("MAX:%d\n", maxi);
}
#endif
return sum;
}
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
}
for(i=0; i<8; i++){
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
}
sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
s->dsp.fdct(temp);
return s->dsp.sum_abs_dctelem(temp);
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
#define DCT8_1D {\
const int s07 = SRC(0) + SRC(7);\
const int s16 = SRC(1) + SRC(6);\
const int s25 = SRC(2) + SRC(5);\
const int s34 = SRC(3) + SRC(4);\
const int a0 = s07 + s34;\
const int a1 = s16 + s25;\
const int a2 = s07 - s34;\
const int a3 = s16 - s25;\
const int d07 = SRC(0) - SRC(7);\
const int d16 = SRC(1) - SRC(6);\
const int d25 = SRC(2) - SRC(5);\
const int d34 = SRC(3) - SRC(4);\
const int a4 = d16 + d25 + (d07 + (d07>>1));\
const int a5 = d07 - d34 - (d25 + (d25>>1));\
const int a6 = d07 + d34 - (d16 + (d16>>1));\
const int a7 = d16 - d25 + (d34 + (d34>>1));\
DST(0, a0 + a1 ) ;\
DST(1, a4 + (a7>>2)) ;\
DST(2, a2 + (a3>>1)) ;\
DST(3, a5 + (a6>>2)) ;\
DST(4, a0 - a1 ) ;\
DST(5, a6 - (a5>>2)) ;\
DST(6, (a2>>1) - a3 ) ;\
DST(7, (a4>>2) - a7 ) ;\
}
static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
int i;
int sum=0;
#define SRC(x) dct[i][x]
#define DST(x,v) dct[i][x]= v
for( i = 0; i < 8; i++ )
DCT8_1D
#undef SRC
#undef DST
#define SRC(x) dct[x][i]
for( i = 0; i < 8; i++ )
DCT8_1D
#undef SRC
#undef DST
return sum;
}
#endif
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
DCTELEM * const temp= (DCTELEM*)aligned_temp;
int sum=0, i;
assert(h==8);
s->dsp.diff_pixels(temp, src1, src2, stride);
s->dsp.fdct(temp);
for(i=0; i<64; i++)
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
DCTELEM * const temp= (DCTELEM*)aligned_temp;
DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
s->dct_unquantize_inter(s, temp, 0, s->qscale);
ff_simple_idct(temp); //FIXME
for(i=0; i<64; i++)
sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
const uint8_t *scantable= s->intra_scantable.permutated;
DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
DCTELEM * const temp= (DCTELEM*)aligned_temp;
uint8_t * const bak= (uint8_t*)aligned_bak;
int i, last, run, bits, level, distortion, start_i;
const int esc_length= s->ac_esc_length;
uint8_t * length;
uint8_t * last_length;
for(i=0; i<8; i++){
((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
}
s->dsp.diff_pixels(temp, src1, src2, stride);
s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
bits=0;
length = s->intra_ac_vlc_length;
last_length= s->intra_ac_vlc_last_length;
bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
} else {
start_i = 0;
length = s->inter_ac_vlc_length;
last_length= s->inter_ac_vlc_last_length;
}
run=0;
for(i=start_i; i<last; i++){
int j= scantable[i];
level= temp[j];
if(level){
level+=64;
if((level&(~127)) == 0){
bits+= length[UNI_AC_ENC_INDEX(run, level)];
}else
bits+= esc_length;
run=0;
}else
run++;
}
i= scantable[last];
if((level&(~127)) == 0){
bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
}else
bits+= esc_length;
if(s->mb_intra)
s->dct_unquantize_intra(s, temp, 0, s->qscale);
else
s->dct_unquantize_inter(s, temp, 0, s->qscale);
s->dsp.idct_add(bak, stride, temp);
distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8);
return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
const uint8_t *scantable= s->intra_scantable.permutated;
DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
int i, last, run, bits, level, start_i;
const int esc_length= s->ac_esc_length;
uint8_t * length;
uint8_t * last_length;
s->dsp.diff_pixels(temp, src1, src2, stride);
s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
bits=0;
length = s->intra_ac_vlc_length;
last_length= s->intra_ac_vlc_last_length;
bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
} else {
start_i = 0;
length = s->inter_ac_vlc_length;
last_length= s->inter_ac_vlc_last_length;
}
run=0;
for(i=start_i; i<last; i++){
int j= scantable[i];
level= temp[j];
if(level){
level+=64;
if((level&(~127)) == 0){
bits+= length[UNI_AC_ENC_INDEX(run, level)];
}else
bits+= esc_length;
run=0;
}else
run++;
}
i= scantable[last];
if((level&(~127)) == 0){
bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
}else
bits+= esc_length;
}
return bits;
}
#define VSAD_INTRA(size) \
static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
int score=0; \
int x,y; \
\
for(y=1; y<h; y++){ \
for(x=0; x<size; x+=4){ \
score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
+FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
} \
s+= stride; \
} \
\
return score; \
}
VSAD_INTRA(8)
VSAD_INTRA(16)
static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
int score=0;
int x,y;
for(y=1; y<h; y++){
for(x=0; x<16; x++){
score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
}
s1+= stride;
s2+= stride;
}
return score;
}
#define SQ(a) ((a)*(a))
#define VSSE_INTRA(size) \
static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
int score=0; \
int x,y; \
\
for(y=1; y<h; y++){ \
for(x=0; x<size; x+=4){ \
score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
+SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
} \
s+= stride; \
} \
\
return score; \
}
VSSE_INTRA(8)
VSSE_INTRA(16)
static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
int score=0;
int x,y;
for(y=1; y<h; y++){
for(x=0; x<16; x++){
score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
}
s1+= stride;