Newer
Older
Michael Niedermayer
committed
* Copyright (c) 2002-2004 Michael Niedermayer
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Michael Niedermayer
committed
*
* new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
/**
* @file motion_est.c
* Motion estimation.
*/
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
Michael Niedermayer
committed
//#undef NDEBUG
//#include <assert.h>
#define P_LEFT P[1]
#define P_TOP P[2]
#define P_TOPRIGHT P[3]
#define P_MEDIAN P[4]
#define P_MV1 P[9]
static inline int sad_hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int pred_x, int pred_y, uint8_t *src_data[3],
uint8_t *ref_data[6], int stride, int uvstride,
int size, int h, uint8_t * const mv_penalty);
static inline int update_map_generation(MpegEncContext * s)
{
s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
if(s->me.map_generation==0){
s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
}
return s->me.map_generation;
}
/* shape adaptive search stuff */
typedef struct Minima{
int height;
int x, y;
int checked;
}Minima;
static int minima_cmp(const void *a, const void *b){
const Minima *da = (const Minima *) a;
const Minima *db = (const Minima *) b;
return da->height - db->height;
}
/* SIMPLE */
#define RENAME(a) simple_ ## a
#define CMP(d, x, y, size)\
d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);
#define CMP_HPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 2*(dy);\
hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
#define CMP_QPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 4*(dy);\
qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
}
#include "motion_est_template.c"
#undef RENAME
#undef CMP
#undef CMP_HPEL
#undef CMP_QPEL
#undef INIT
/* SIMPLE CHROMA */
#define RENAME(a) simple_chroma_ ## a
#define CMP(d, x, y, size)\
d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\
if(chroma_cmp){\
int dxy= ((x)&1) + 2*((y)&1);\
int c= ((x)>>1) + ((y)>>1)*uvstride;\
\
chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\
chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\
}
#define CMP_HPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 2*(dy);\
hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
if(chroma_cmp_sub){\
int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
int c= ((x)>>1) + ((y)>>1)*uvstride;\
chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
}\
}
#define CMP_QPEL(d, dx, dy, x, y, size)\
{\
const int dxy= (dx) + 4*(dy);\
qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
if(chroma_cmp_sub){\
int cxy, c;\
int cx= (4*(x) + (dx))/2;\
int cy= (4*(y) + (dy))/2;\
cx= (cx>>1)|(cx&1);\
cy= (cy>>1)|(cy&1);\
cxy= (cx&1) + 2*(cy&1);\
c= ((cx)>>1) + ((cy)>>1)*uvstride;\
chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
}\
}
#include "motion_est_template.c"
#undef RENAME
#undef CMP
#undef CMP_HPEL
#undef CMP_QPEL
#undef INIT
/* SIMPLE DIRECT HPEL */
#define RENAME(a) simple_direct_hpel_ ## a
//FIXME precalc divisions stuff
#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
const int hx= 2*(x) + (dx);\
const int hy= 2*(y) + (dy);\
if(s->mv_type==MV_TYPE_8X8){\
int i;\
for(i=0; i<4; i++){\
int fx = s->me.direct_basis_mv[i][0] + hx;\
int fy = s->me.direct_basis_mv[i][1] + hy;\
int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
int fxy= (fx&1) + 2*(fy&1);\
int bxy= (bx&1) + 2*(by&1);\
\
uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\
}\
}else{\
int fx = s->me.direct_basis_mv[0][0] + hx;\
int fy = s->me.direct_basis_mv[0][1] + hy;\
int bx = hx ? fx - s->me.co_located_mv[0][0] : (s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp);\
int by = hy ? fy - s->me.co_located_mv[0][1] : (s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp);\
int fxy= (fx&1) + 2*(fy&1);\
int bxy= (bx&1) + 2*(by&1);\
\
assert((fx>>1) + 16*s->mb_x >= -16);\
assert((fy>>1) + 16*s->mb_y >= -16);\
assert((fx>>1) + 16*s->mb_x <= s->width);\
assert((fy>>1) + 16*s->mb_y <= s->height);\
assert((bx>>1) + 16*s->mb_x >= -16);\
assert((by>>1) + 16*s->mb_y >= -16);\
assert((bx>>1) + 16*s->mb_x <= s->width);\
assert((by>>1) + 16*s->mb_y <= s->height);\
\
hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\
d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
}else\
d= 256*256*256*32;
#define CMP_HPEL(d, dx, dy, x, y, size)\
CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
#define CMP(d, x, y, size)\
CMP_DIRECT(d, 0, 0, x, y, size, cmp)
#include "motion_est_template.c"
#undef RENAME
#undef CMP
#undef CMP_HPEL
#undef CMP_QPEL
#undef INIT
#undef CMP_DIRECT
/* SIMPLE DIRECT QPEL */
#define RENAME(a) simple_direct_qpel_ ## a
#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
const int qx= 4*(x) + (dx);\
const int qy= 4*(y) + (dy);\
if(s->mv_type==MV_TYPE_8X8){\
int i;\
for(i=0; i<4; i++){\
int fx = s->me.direct_basis_mv[i][0] + qx;\
int fy = s->me.direct_basis_mv[i][1] + qy;\
int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
int fxy= (fx&3) + 4*(fy&3);\
int bxy= (bx&3) + 4*(by&3);\
\
uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\
}\
}else{\
int fx = s->me.direct_basis_mv[0][0] + qx;\
int fy = s->me.direct_basis_mv[0][1] + qy;\
int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
int fxy= (fx&3) + 4*(fy&3);\
int bxy= (bx&3) + 4*(by&3);\
\
qpel_put[1][fxy](s->me.scratchpad , (ref_y ) + (fx>>2) + (fy>>2)*(stride) , stride);\
qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\
qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\
qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\
qpel_avg[1][bxy](s->me.scratchpad , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) , stride);\
qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\
qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\
qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
}else\
d= 256*256*256*32;
#define CMP_QPEL(d, dx, dy, x, y, size)\
CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
#define CMP(d, x, y, size)\
CMP_DIRECT(d, 0, 0, x, y, size, cmp)
#include "motion_est_template.c"
#undef RENAME
#undef CMP
#undef CMP_HPEL
#undef CMP_QPEL
#undef INIT
#undef CMP__DIRECT
static inline int get_penalty_factor(MpegEncContext *s, int type){
case FF_CMP_BIT:
return 1;
case FF_CMP_RD:
case FF_CMP_PSNR:
return (s->qscale*s->qscale*185 + 64)>>7;
ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp);
if(s->flags&CODEC_FLAG_QPEL){
if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
s->me.sub_motion_search= simple_chroma_qpel_motion_search;
else
s->me.sub_motion_search= simple_qpel_motion_search;
}else{
if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
s->me.sub_motion_search= simple_chroma_hpel_motion_search;
else if( s->avctx->me_sub_cmp == FF_CMP_SAD
&& s->avctx-> me_cmp == FF_CMP_SAD
&& s->avctx-> mb_cmp == FF_CMP_SAD)
s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
else
s->me.sub_motion_search= simple_hpel_motion_search;
}
if(s->avctx->me_cmp&FF_CMP_CHROMA){
s->me.motion_search[0]= simple_chroma_epzs_motion_search;
s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
s->me.motion_search[4]= simple_chroma_epzs_motion_search2;
}else{
s->me.motion_search[0]= simple_epzs_motion_search;
s->me.motion_search[1]= simple_epzs_motion_search4;
s->me.motion_search[4]= simple_epzs_motion_search2;
if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){
s->me.pre_motion_search= simple_chroma_epzs_motion_search;
}else{
s->me.pre_motion_search= simple_epzs_motion_search;
}
if(s->flags&CODEC_FLAG_QPEL){
if(s->avctx->mb_cmp&FF_CMP_CHROMA)
s->me.get_mb_score= simple_chroma_qpel_get_mb_score;
else
s->me.get_mb_score= simple_qpel_get_mb_score;
}else{
if(s->avctx->mb_cmp&FF_CMP_CHROMA)
s->me.get_mb_score= simple_chroma_hpel_get_mb_score;
else
s->me.get_mb_score= simple_hpel_get_mb_score;
}
static int pix_dev(uint8_t * pix, int line_size, int mean)
{
int s, i, j;
s = 0;
for (i = 0; i < 16; i++) {
for (j = 0; j < 16; j += 8) {
s += ABS(pix[0]-mean);
s += ABS(pix[1]-mean);
s += ABS(pix[2]-mean);
s += ABS(pix[3]-mean);
s += ABS(pix[4]-mean);
s += ABS(pix[5]-mean);
s += ABS(pix[6]-mean);
s += ABS(pix[7]-mean);
pix += 8;
}
pix += line_size - 16;
}
return s;
}
Zdenek Kabelac
committed
static inline void no_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr)
{
*mx_ptr = 16 * s->mb_x;
*my_ptr = 16 * s->mb_y;
}
static int full_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int range,
int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int x1, y1, x2, y2, xx, yy, x, y;
int mx, my, dmin, d;
xx = 16 * s->mb_x;
yy = 16 * s->mb_y;
x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */
if (x1 < xmin)
x1 = xmin;
x2 = xx + range - 1;
if (x2 > xmax)
x2 = xmax;
y1 = yy - range + 1;
if (y1 < ymin)
y1 = ymin;
y2 = yy + range - 1;
if (y2 > ymax)
y2 = ymax;
dmin = 0x7fffffff;
mx = 0;
my = 0;
for (y = y1; y <= y2; y++) {
for (x = x1; x <= x2; x++) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
s->linesize, 16);
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
if (d < dmin ||
(d == dmin &&
(abs(x - xx) + abs(y - yy)) <
(abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
my = y;
}
}
}
*mx_ptr = mx;
*my_ptr = my;
#if 0
if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
}
#endif
return dmin;
}
static int log_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int range,
int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int x1, y1, x2, y2, xx, yy, x, y;
int mx, my, dmin, d;
xx = s->mb_x << 4;
yy = s->mb_y << 4;
/* Left limit */
x1 = xx - range;
if (x1 < xmin)
x1 = xmin;
/* Right limit */
x2 = xx + range;
if (x2 > xmax)
x2 = xmax;
/* Upper limit */
y1 = yy - range;
if (y1 < ymin)
y1 = ymin;
/* Lower limit */
y2 = yy + range;
if (y2 > ymax)
y2 = ymax;
dmin = 0x7fffffff;
mx = 0;
my = 0;
do {
for (y = y1; y <= y2; y += range) {
for (x = x1; x <= x2; x += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dmin = d;
mx = x;
my = y;
}
}
}
range = range >> 1;
x1 = mx - range;
if (x1 < xmin)
x1 = xmin;
x2 = mx + range;
if (x2 > xmax)
x2 = xmax;
y1 = my - range;
if (y1 < ymin)
y1 = ymin;
y2 = my + range;
if (y2 > ymax)
y2 = ymax;
} while (range >= 1);
#ifdef DEBUG
fprintf(stderr, "log - MX: %d\tMY: %d\n", mx, my);
#endif
*mx_ptr = mx;
*my_ptr = my;
return dmin;
}
static int phods_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int range,
int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
{
int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
int mx, my, dminx, dminy;
xx = s->mb_x << 4;
yy = s->mb_y << 4;
/* Left limit */
x1 = xx - range;
if (x1 < xmin)
x1 = xmin;
/* Right limit */
x2 = xx + range;
if (x2 > xmax)
x2 = xmax;
/* Upper limit */
y1 = yy - range;
if (y1 < ymin)
y1 = ymin;
/* Lower limit */
y2 = yy + range;
if (y2 > ymax)
y2 = ymax;
mx = 0;
my = 0;
x = xx;
y = yy;
do {
dminx = 0x7fffffff;
dminy = 0x7fffffff;
lastx = x;
for (x = x1; x <= x2; x += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminx = d;
mx = x;
}
}
x = lastx;
for (y = y1; y <= y2; y += range) {
d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
dminy = d;
my = y;
}
}
range = range >> 1;
x = mx;
y = my;
x1 = mx - range;
if (x1 < xmin)
x1 = xmin;
x2 = mx + range;
if (x2 > xmax)
x2 = xmax;
y1 = my - range;
if (y1 < ymin)
y1 = ymin;
y2 = my + range;
if (y2 > ymax)
y2 = ymax;
} while (range >= 1);
#ifdef DEBUG
fprintf(stderr, "phods - MX: %d\tMY: %d\n", mx, my);
#endif
/* half pixel search */
*mx_ptr = mx;
*my_ptr = my;
return dminy;
}
Michael Niedermayer
committed
#define Z_THRESHOLD 256
d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
Michael Niedermayer
committed
static inline int sad_hpel_motion_search(MpegEncContext * s,
int pred_x, int pred_y, uint8_t *src_data[3],
uint8_t *ref_data[6], int stride, int uvstride,
int size, int h, uint8_t * const mv_penalty)
uint32_t *score_map= s->me.score_map;
const int penalty_factor= s->me.sub_penalty_factor;
const int xmin= s->me.xmin;
const int ymin= s->me.ymin;
const int xmax= s->me.xmax;
const int ymax= s->me.ymax;
// printf("S");
*mx_ptr = 0;
*my_ptr = 0;
return dmin;
}
// printf("N");
mx = *mx_ptr;
my = *my_ptr;
ptr = ref_data[0] + (my * stride) + mx;
dminh = dmin;
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
int dx=0, dy=0;
int d, pen_x, pen_y;
const int index= (my<<ME_MAP_SHIFT) + mx;
const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
mx<<=1;
my<<=1;
pen_x= pred_x + mx;
pen_y= pred_y + my;
}
}else{
if(l<=r){
if(t+l<=b+r){
CHECK_SAD_HALF_MV(x2 , -1, 0)
CHECK_SAD_HALF_MV(xy2, -1, +1)
CHECK_SAD_HALF_MV(x2 , +1, 0)
CHECK_SAD_HALF_MV(xy2, +1, +1)
}
*mx_ptr = mx;
*my_ptr = my;
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
Michael Niedermayer
committed
const int xy= s->mb_x + s->mb_y*s->mb_stride;
s->p_mv_table[xy][0] = mx;
s->p_mv_table[xy][1] = my;
/* has allready been set to the 4 MV if 4MV is done */
int mot_xy= s->block_index[0];
Wolfgang Hesseler
committed
s->current_picture.motion_val[0][mot_xy ][0]= mx;
s->current_picture.motion_val[0][mot_xy ][1]= my;
s->current_picture.motion_val[0][mot_xy+1][0]= mx;
s->current_picture.motion_val[0][mot_xy+1][1]= my;
mot_xy += s->block_wrap[0];
Wolfgang Hesseler
committed
s->current_picture.motion_val[0][mot_xy ][0]= mx;
s->current_picture.motion_val[0][mot_xy ][1]= my;
s->current_picture.motion_val[0][mot_xy+1][0]= mx;
s->current_picture.motion_val[0][mot_xy+1][1]= my;
/**
* get fullpel ME search limits.
*/
static inline void get_limits(MpegEncContext *s, int x, int y)
/*
if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1;
else s->me.range= 16;
*/
s->me.xmin = - x - 16;
s->me.ymin = - y - 16;
s->me.xmax = - x + s->mb_width *16;
s->me.ymax = - y + s->mb_height*16;
s->me.xmin = - x;
s->me.ymin = - y;
s->me.xmax = - x + s->mb_width *16 - 16;
s->me.ymax = - y + s->mb_height*16 - 16;
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
const int size= 1;
const int h=8;
int dmin_sum=0, mx4_sum=0, my4_sum=0;
uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
Michael Niedermayer
committed
int same=1;
const int stride= s->linesize;
const int uvstride= s->uvlinesize;
const int xmin= s->me.xmin;
const int ymin= s->me.ymin;
const int xmax= s->me.xmax;
const int ymax= s->me.ymax;
for(block=0; block<4; block++){
int mx4, my4;
int pred_x4, pred_y4;
int dmin4;
static const int off[4]= {2, 1, 1, -1};
const int mot_stride = s->block_wrap[0];
const int mot_xy = s->block_index[block];
const int block_x= (block&1);
const int block_y= (block>>1);
uint8_t *src_data[3]= {
s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
};
uint8_t *ref_data[3]= {
s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
};
Wolfgang Hesseler
committed
P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
if (s->first_slice_line && block<2) {
pred_x4= P_LEFT[0];
pred_y4= P_LEFT[1];
} else {
Wolfgang Hesseler
committed
P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift);
if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
pred_x4 = P_MEDIAN[0];
pred_y4 = P_MEDIAN[1];
}else { /* mpeg1 at least */
pred_x4= P_LEFT[0];
pred_y4= P_LEFT[1];
}
}
P_MV1[0]= mx;
P_MV1[1]= my;
dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4,
src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4,
pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]
&& s->avctx->mb_decision == FF_MB_DECISION_SIMPLE){
const int offset= ((block&1) + (block>>1)*stride)*8;
uint8_t *dest_y = s->me.scratchpad + offset;
if(s->quarter_sample){
Michael Niedermayer
committed
uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride;
dxy = ((my4 & 3) << 2) | (mx4 & 3);
if(s->no_rounding)
s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , stride);
s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride);
Michael Niedermayer
committed
uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride;
dxy = ((my4 & 1) << 1) | (mx4 & 1);
if(s->no_rounding)
s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h);
s->dsp.put_pixels_tab [1][dxy](dest_y , ref , stride, h);
}
dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor;
}else
dmin_sum+= dmin4;
if(s->quarter_sample){
mx4_sum+= mx4/2;
my4_sum+= my4/2;
}else{
mx4_sum+= mx4;
my4_sum+= my4;
}
Wolfgang Hesseler
committed
s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
Michael Niedermayer
committed
if(mx4 != mx || my4 != my) same=0;
Michael Niedermayer
committed
if(same)
return INT_MAX;
if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16);
}
if(s->avctx->mb_cmp&FF_CMP_CHROMA){
int dxy;
int mx, my;
int offset;
mx= ff_h263_round_chroma(mx4_sum);
my= ff_h263_round_chroma(my4_sum);
dxy = ((my & 1) << 1) | (mx & 1);
offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
if(s->no_rounding){
s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8);
s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8);
}else{
s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8);
s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8);
}
dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8);
dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8);
}
switch(s->avctx->mb_cmp&0xFF){
/*case FF_CMP_SSE:
return dmin_sum+ 32*s->qscale*s->qscale;*/
case FF_CMP_RD:
return dmin_sum;
default:
return dmin_sum+ 11*s->me.mb_penalty_factor;
}
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3],
int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my)
{
const int size=0;
const int h=8;
int block;
int P[10][2];
uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
int same=1;
const int stride= 2*s->linesize;
const int uvstride= 2*s->uvlinesize;
int dmin_sum= 0;
const int mot_stride= s->mb_stride;
const int xy= s->mb_x + s->mb_y*mot_stride;
s->me.ymin>>=1;
s->me.ymax>>=1;
for(block=0; block<2; block++){
int field_select;
int best_dmin= INT_MAX;
int best_field= -1;
uint8_t *src_data[3]= {
frame_src_data[0] + s-> linesize*block,
frame_src_data[1] + s->uvlinesize*block,
frame_src_data[2] + s->uvlinesize*block
};
for(field_select=0; field_select<2; field_select++){
int dmin, mx_i, my_i, pred_x, pred_y;
uint8_t *ref_data[3]= {
frame_ref_data[0] + s-> linesize*field_select,
frame_ref_data[1] + s->uvlinesize*field_select,
frame_ref_data[2] + s->uvlinesize*field_select
};
int16_t (*mv_table)[2]= mv_tables[block][field_select];
P_LEFT[0] = mv_table[xy - 1][0];
P_LEFT[1] = mv_table[xy - 1][1];
if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1);
pred_x= P_LEFT[0];
pred_y= P_LEFT[1];
P_TOP[0] = mv_table[xy - mot_stride][0];
P_TOP[1] = mv_table[xy - mot_stride][1];
P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1);
if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1);
if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1);
if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1);
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
}
P_MV1[0]= mx; //FIXME not correct if block != field_select
P_MV1[1]= my / 2;
dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y,
src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty);
dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin,
pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
mv_table[xy][0]= mx_i;
mv_table[xy][1]= my_i;
if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]
&& s->avctx->mb_decision == FF_MB_DECISION_SIMPLE){
int dxy;
//FIXME chroma ME
uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride;
dxy = ((my_i & 1) << 1) | (mx_i & 1);