Newer
Older
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
Diego Biurrun
committed
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
Diego Biurrun
committed
* version 2.1 of the License, or (at your option) any later version.
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* H.264 / AVC / MPEG4 part10 codec.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#include "internal.h"
#include "cabac.h"
#include "cabac_functions.h"
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264_mvpred.h"
Aurelien Jacobs
committed
#include "mathops.h"
#include "rectangle.h"
#include "vdpau_internal.h"
Michael Niedermayer
committed
#include "libavutil/avassert.h"
static const uint8_t rem6[QP_MAX_NUM + 1] = {
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
};
static const uint8_t div6[QP_MAX_NUM + 1] = {
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
};
Baptiste Coudurier
committed
static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
PIX_FMT_DXVA2_VLD,
PIX_FMT_VAAPI_VLD,
PIX_FMT_VDA_VLD,
Baptiste Coudurier
committed
PIX_FMT_YUVJ420P,
PIX_FMT_NONE
};
Michael Niedermayer
committed
/**
* Check if the top & left blocks are available if needed and
* change the dc mode so it only uses the available blocks.
Michael Niedermayer
committed
*/
int ff_h264_check_intra4x4_pred_mode(H264Context *h)
{
MpegEncContext *const s = &h->s;
static const int8_t top[12] = {
-1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
};
static const int8_t left[12] = {
0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
};
Michael Niedermayer
committed
int i;
if (!(h->top_samples_available & 0x8000)) {
for (i = 0; i < 4; i++) {
int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
if (status < 0) {
av_log(h->s.avctx, AV_LOG_ERROR,
"top block unavailable for requested intra4x4 mode %d at %d %d\n",
status, s->mb_x, s->mb_y);
Michael Niedermayer
committed
return -1;
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
Michael Niedermayer
committed
}
}
}
if ((h->left_samples_available & 0x8888) != 0x8888) {
static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
for (i = 0; i < 4; i++)
if (!(h->left_samples_available & mask[i])) {
int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
if (status < 0) {
av_log(h->s.avctx, AV_LOG_ERROR,
"left block unavailable for requested intra4x4 mode %d at %d %d\n",
status, s->mb_x, s->mb_y);
Michael Niedermayer
committed
return -1;
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
Michael Niedermayer
committed
}
}
}
return 0;
} // FIXME cleanup like ff_h264_check_intra_pred_mode
Michael Niedermayer
committed
* Check if the top & left blocks are available if needed and
* change the dc mode so it only uses the available blocks.
int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
{
MpegEncContext *const s = &h->s;
static const int8_t top[7] = { LEFT_DC_PRED8x8, 1, -1, -1 };
static const int8_t left[7] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };
if (mode > 6U) {
av_log(h->s.avctx, AV_LOG_ERROR,
"out of range intra chroma pred mode at %d %d\n",
s->mb_x, s->mb_y);
if (!(h->top_samples_available & 0x8000)) {
mode = top[mode];
if (mode < 0) {
av_log(h->s.avctx, AV_LOG_ERROR,
"top block unavailable for requested intra mode at %d %d\n",
s->mb_x, s->mb_y);
if ((h->left_samples_available & 0x8080) != 0x8080) {
mode = left[mode];
if (is_chroma && (h->left_samples_available & 0x8080)) {
// mad cow disease mode, aka MBAFF + constrained_intra_pred
mode = ALZHEIMER_DC_L0T_PRED8x8 +
(!(h->left_samples_available & 0x8000)) +
2 * (mode == DC_128_PRED8x8);
if (mode < 0) {
av_log(h->s.avctx, AV_LOG_ERROR,
"left block unavailable for requested intra mode at %d %d\n",
s->mb_x, s->mb_y);
const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
int *dst_length, int *consumed, int length)
{
// src[0]&0x80; // forbidden bit
h->nal_ref_idc = src[0] >> 5;
h->nal_unit_type = src[0] & 0x1F;
#if HAVE_FAST_UNALIGNED
#if HAVE_FAST_64BIT
#define RS 7
for (i = 0; i + 1 < length; i += 9) {
if (!((~AV_RN64A(src + i) &
(AV_RN64A(src + i) - 0x0100010001000101ULL)) &
0x8000800080008080ULL))
#else
#define RS 3
for (i = 0; i + 1 < length; i += 5) {
if (!((~AV_RN32A(src + i) &
(AV_RN32A(src + i) - 0x01000101U)) &
0x80008080U))
#endif
if (i > 0 && !src[i])
i--;
while (src[i])
i++;
#define RS 0
for (i = 0; i + 1 < length; i += 2) {
if (src[i])
continue;
if (i > 0 && src[i - 1] == 0)
i--;
if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {
if (src[i + 2] != 3) {
/* startcode, so we must be past the end */
if (i >= length - 1) { // no escaped 0
*dst_length = length;
*consumed = length + 1; // +1 for the header
// use second escape buffer for inter data
bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx],
length + FF_INPUT_BUFFER_PADDING_SIZE);
dst = h->rbsp_buffer[bufidx];
Francois Oligny-Lemieux
committed
return NULL;
si = di = i;
while (si + 2 < length) {
// remove escapes (very rare 1:2^22)
if (src[si + 2] > 3) {
dst[di++] = src[si++];
dst[di++] = src[si++];
} else if (src[si] == 0 && src[si + 1] == 0) {
if (src[si + 2] == 3) { // escape
dst[di++] = 0;
dst[di++] = 0;
si += 3;
Michael Niedermayer
committed
continue;
while (si < length)
dst[di++] = src[si++];
memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
Alexander Strange
committed
*dst_length = di;
*consumed = si + 1; // +1 for the header
/* FIXME store exact number of bits in the getbitcontext
* (it is needed for decoding) */
/**
* Identify the exact end of the bitstream
* @return the length of the trailing, or 0 if damaged
*/
Diego Biurrun
committed
static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
tprintf(h->s.avctx, "rbsp trailing %X\n", v);
for (r = 1; r < 9; r++) {
if (v & 1)
return r;
v >>= 1;
static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
int height, int y_offset, int list)
{
int raw_my = h->mv_cache[list][scan8[n]][1];
int filter_height = (raw_my & 3) ? 2 : 0;
int full_my = (raw_my >> 2) + y_offset;
int top = full_my - filter_height;
int bottom = full_my + filter_height + height;
return FFMAX(abs(top), bottom);
}
static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
int height, int y_offset, int list0,
int list1, int *nrefs)
{
MpegEncContext *const s = &h->s;
if (list0) {
int ref_n = h->ref_cache[0][scan8[n]];
Picture *ref = &h->ref_list[0][ref_n];
// Error resilience puts the current picture in the ref list.
// Don't try to wait on these as it will cause a deadlock.
// Fields can wait on each other, though.
if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
(ref->f.reference & 3) != s->picture_structure) {
my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
if (refs[0][ref_n] < 0)
nrefs[0] += 1;
refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
}
}
if (list1) {
int ref_n = h->ref_cache[1][scan8[n]];
Picture *ref = &h->ref_list[1][ref_n];
if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
(ref->f.reference & 3) != s->picture_structure) {
my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
if (refs[1][ref_n] < 0)
nrefs[1] += 1;
refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
}
}
}
/**
* Wait until all reference frames are available for MC operations.
*
* @param h the H264 context
*/
static void await_references(H264Context *h)
{
MpegEncContext *const s = &h->s;
const int mb_xy = h->mb_xy;
const int mb_type = s->current_picture.f.mb_type[mb_xy];
int ref, list;
memset(refs, -1, sizeof(refs));
get_lowest_part_y(h, refs, 0, 16, 0,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
} else if (IS_16X8(mb_type)) {
get_lowest_part_y(h, refs, 0, 8, 0,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
get_lowest_part_y(h, refs, 8, 8, 8,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
} else if (IS_8X16(mb_type)) {
get_lowest_part_y(h, refs, 0, 16, 0,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
get_lowest_part_y(h, refs, 4, 16, 0,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
} else {
int i;
assert(IS_8X8(mb_type));
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
for (i = 0; i < 4; i++) {
const int sub_mb_type = h->sub_mb_type[i];
const int n = 4 * i;
int y_offset = (i & 2) << 2;
if (IS_SUB_8X8(sub_mb_type)) {
get_lowest_part_y(h, refs, n, 8, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
} else if (IS_SUB_8X4(sub_mb_type)) {
get_lowest_part_y(h, refs, n, 4, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
} else if (IS_SUB_4X8(sub_mb_type)) {
get_lowest_part_y(h, refs, n, 8, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
get_lowest_part_y(h, refs, n + 1, 8, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
} else {
int j;
assert(IS_SUB_4X4(sub_mb_type));
for (j = 0; j < 4; j++) {
int sub_y_offset = y_offset + 2 * (j & 2);
get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
for (list = h->list_count - 1; list >= 0; list--)
for (ref = 0; ref < 48 && nrefs[list]; ref++) {
if (row >= 0) {
Picture *ref_pic = &h->ref_list[list][ref];
int ref_field = ref_pic->f.reference - 1;
int ref_field_picture = ref_pic->field_picture;
int pic_height = 16 * s->mb_height >> ref_field_picture;
row <<= MB_MBAFF;
nrefs[list]--;
if (!FIELD_PICTURE && ref_field_picture) { // frame referencing two fields
ff_thread_await_progress(&ref_pic->f,
FFMIN((row >> 1) - !(row & 1),
pic_height - 1),
1);
ff_thread_await_progress(&ref_pic->f,
FFMIN((row >> 1), pic_height - 1),
0);
} else if (FIELD_PICTURE && !ref_field_picture) { // field referencing one field of a frame
ff_thread_await_progress(&ref_pic->f,
FFMIN(row * 2 + ref_field,
pic_height - 1),
0);
} else if (FIELD_PICTURE) {
ff_thread_await_progress(&ref_pic->f,
FFMIN(row, pic_height - 1),
ref_field);
} else {
ff_thread_await_progress(&ref_pic->f,
FFMIN(row, pic_height - 1),
0);
}
static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
int n, int square, int height,
int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op,
h264_chroma_mc_func chroma_op,
int pixel_shift, int chroma_idc)
MpegEncContext *const s = &h->s;
const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
const int luma_xy = (mx & 3) + ((my & 3) << 2);
int offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
uint8_t *src_y = pic->f.data[0] + offset;
uint8_t *src_cb, *src_cr;
int extra_width = h->emu_edge_width;
int extra_height = h->emu_edge_height;
int emu = 0;
const int full_mx = mx >> 2;
const int full_my = my >> 2;
const int pic_width = 16 * s->mb_width;
const int pic_height = 16 * s->mb_height >> MB_FIELD;
if (mx & 7)
extra_width -= 3;
if (my & 7)
extra_height -= 3;
if (full_mx < 0 - extra_width ||
full_my < 0 - extra_height ||
full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
full_my + 16 /*FIXME*/ > pic_height + extra_height) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer,
src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
full_my - 2, pic_width, pic_height);
src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
emu = 1;
}
qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
if (!square)
qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY)
return;
if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer,
src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2,
pic_width, pic_height);
src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
if (!square)
qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
if (emu) {
s->dsp.emulated_edge_mc(s->edge_emu_buffer,
src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2,
pic_width, pic_height);
src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
if (!square)
qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
return;
}
ysh = 3 - (chroma_idc == 2 /* yuv422 */);
if (chroma_idc == 1 /* yuv420 */ && MB_FIELD) {
// chroma offset when predicting from a field of opposite parity
my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
(my >> ysh) * h->mb_uvlinesize;
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
(my >> ysh) * h->mb_uvlinesize;
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
height >> (chroma_idc == 1 /* yuv420 */),
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
static av_always_inline void mc_part_std(H264Context *h, int n, int square,
int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
int x_offset, int y_offset,
qpel_mc_func *qpix_put,
h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg,
h264_chroma_mc_func chroma_avg,
int list0, int list1,
int pixel_shift, int chroma_idc)
MpegEncContext *const s = &h->s;
qpel_mc_func *qpix_op = qpix_put;
h264_chroma_mc_func chroma_op = chroma_put;
dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
if (chroma_idc == 3 /* yuv444 */) {
dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
} else if (chroma_idc == 2 /* yuv422 */) {
dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
} else { /* yuv420 */
dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
x_offset += 8 * s->mb_x;
y_offset += 8 * (s->mb_y >> MB_FIELD);
if (list0) {
Picture *ref = &h->ref_list[0][h->ref_cache[0][scan8[n]]];
mc_dir_part(h, ref, n, square, height, delta, 0,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma_idc);
qpix_op = qpix_avg;
chroma_op = chroma_avg;
if (list1) {
Picture *ref = &h->ref_list[1][h->ref_cache[1][scan8[n]]];
mc_dir_part(h, ref, n, square, height, delta, 1,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_op, chroma_op, pixel_shift, chroma_idc);
static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
int x_offset, int y_offset,
qpel_mc_func *qpix_put,
h264_chroma_mc_func chroma_put,
h264_weight_func luma_weight_op,
h264_weight_func chroma_weight_op,
h264_biweight_func luma_weight_avg,
h264_biweight_func chroma_weight_avg,
int list0, int list1,
int pixel_shift, int chroma_idc)
{
MpegEncContext *const s = &h->s;
int chroma_height;
dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
if (chroma_idc == 3 /* yuv444 */) {
chroma_weight_avg = luma_weight_avg;
chroma_weight_op = luma_weight_op;
dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * h->mb_linesize;
} else if (chroma_idc == 2 /* yuv422 */) {
chroma_height = height;
dest_cb += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
dest_cr += (x_offset << pixel_shift) + 2 * y_offset * h->mb_uvlinesize;
} else { /* yuv420 */
chroma_height = height >> 1;
dest_cb += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
dest_cr += (x_offset << pixel_shift) + y_offset * h->mb_uvlinesize;
x_offset += 8 * s->mb_x;
y_offset += 8 * (s->mb_y >> MB_FIELD);
/* don't optimize for luma-only case, since B-frames usually
* use implicit weights => chroma too. */
uint8_t *tmp_cb = s->obmc_scratchpad;
uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
uint8_t *tmp_y = s->obmc_scratchpad + 16 * h->mb_uvlinesize;
int refn0 = h->ref_cache[0][scan8[n]];
int refn1 = h->ref_cache[1][scan8[n]];
mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
x_offset, y_offset, qpix_put, chroma_put,
pixel_shift, chroma_idc);
mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
x_offset, y_offset, qpix_put, chroma_put,
pixel_shift, chroma_idc);
if (h->use_weight == 2) {
int weight0 = h->implicit_weight[refn0][refn1][s->mb_y & 1];
luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
chroma_height, 5, weight0, weight1, 0);
} else {
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
h->luma_log2_weight_denom,
h->luma_weight[refn0][0][0],
h->luma_weight[refn1][1][0],
h->luma_weight[refn0][0][1] +
h->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][0][0],
h->chroma_weight[refn1][1][0][0],
h->chroma_weight[refn0][0][0][1] +
h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][1][0],
h->chroma_weight[refn1][1][1][0],
h->chroma_weight[refn0][0][1][1] +
h->chroma_weight[refn1][1][1][1]);
} else {
int list = list1 ? 1 : 0;
int refn = h->ref_cache[list][scan8[n]];
Picture *ref = &h->ref_list[list][refn];
mc_dir_part(h, ref, n, square, height, delta, list,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put, chroma_put, pixel_shift, chroma_idc);
luma_weight_op(dest_y, h->mb_linesize, height,
h->luma_log2_weight_denom,
h->luma_weight[refn][list][0],
h->luma_weight[refn][list][1]);
if (h->use_weight_chroma) {
chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][0][0],
h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][1][0],
h->chroma_weight[refn][list][1][1]);
static av_always_inline void mc_part(H264Context *h, int n, int square,
int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
int x_offset, int y_offset,
qpel_mc_func *qpix_put,
h264_chroma_mc_func chroma_put,
qpel_mc_func *qpix_avg,
h264_chroma_mc_func chroma_avg,
h264_weight_func *weight_op,
h264_biweight_func *weight_avg,
int list0, int list1,
int pixel_shift, int chroma_idc)
if ((h->use_weight == 2 && list0 && list1 &&
(h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->s.mb_y & 1] != 32)) ||
h->use_weight == 1)
mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
weight_op[0], weight_op[1], weight_avg[0],
weight_avg[1], list0, list1, pixel_shift, chroma_idc);
mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
chroma_avg, list0, list1, pixel_shift, chroma_idc);
static av_always_inline void prefetch_motion(H264Context *h, int list,
int pixel_shift, int chroma_idc)
{
/* fetch pixels for estimated mv 4 macroblocks ahead
* optimized for 64byte cache lines */
const int refn = h->ref_cache[list][scan8[0]];
if (refn >= 0) {
const int mx = (h->mv_cache[list][scan8[0]][0] >> 2) + 16 * s->mb_x + 8;
const int my = (h->mv_cache[list][scan8[0]][1] >> 2) + 16 * s->mb_y;
uint8_t **src = h->ref_list[list][refn].f.data;
int off = (mx << pixel_shift) +
(my + (s->mb_x & 3) * 4) * h->mb_linesize +
(64 << pixel_shift);
s->dsp.prefetch(src[0] + off, s->linesize, 4);
if (chroma_idc == 3 /* yuv444 */) {
s->dsp.prefetch(src[1] + off, s->linesize, 4);
s->dsp.prefetch(src[2] + off, s->linesize, 4);
} else {
off = ((mx >> 1) << pixel_shift) +
((my >> 1) + (s->mb_x & 7)) * s->uvlinesize +
(64 << pixel_shift);
s->dsp.prefetch(src[1] + off, src[2] - src[1], 2);
}
}
static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y,
uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func(*qpix_put)[16],
h264_chroma_mc_func(*chroma_put),
qpel_mc_func(*qpix_avg)[16],
h264_chroma_mc_func(*chroma_avg),
h264_weight_func *weight_op,
h264_biweight_func *weight_avg,
int pixel_shift, int chroma_idc)
MpegEncContext *const s = &h->s;
const int mb_xy = h->mb_xy;
const int mb_type = s->current_picture.f.mb_type[mb_xy];
if (HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
prefetch_motion(h, 0, pixel_shift, chroma_idc);
mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma_idc);
mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma_idc);
mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma_idc);
} else if (IS_8X16(mb_type)) {
mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
pixel_shift, chroma_idc);
mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
pixel_shift, chroma_idc);
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
for (i = 0; i < 4; i++) {
const int sub_mb_type = h->sub_mb_type[i];
const int n = 4 * i;
int x_offset = (i & 1) << 2;
int y_offset = (i & 2) << 1;
if (IS_SUB_8X8(sub_mb_type)) {
mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma_idc);
} else if (IS_SUB_8X4(sub_mb_type)) {
mc_part(h, n, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr,
x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma_idc);
mc_part(h, n + 2, 0, 4, 4 << pixel_shift,
dest_y, dest_cb, dest_cr, x_offset, y_offset + 2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma_idc);
} else if (IS_SUB_4X8(sub_mb_type)) {
mc_part(h, n, 0, 8, 4 * h->mb_linesize,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma_idc);
mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize,
dest_y, dest_cb, dest_cr, x_offset + 2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma_idc);
} else {
int j;
assert(IS_SUB_4X4(sub_mb_type));
for (j = 0; j < 4; j++) {
int sub_x_offset = x_offset + 2 * (j & 1);
int sub_y_offset = y_offset + (j & 2);
mc_part(h, n + j, 1, 4, 0,
dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
pixel_shift, chroma_idc);
prefetch_motion(h, 1, pixel_shift, chroma_idc);
}
static av_always_inline void hl_motion_420(H264Context *h, uint8_t *dest_y,
uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func(*qpix_put)[16],
h264_chroma_mc_func(*chroma_put),
qpel_mc_func(*qpix_avg)[16],
h264_chroma_mc_func(*chroma_avg),
h264_weight_func *weight_op,
h264_biweight_func *weight_avg,
int pixel_shift)
{
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
}
static av_always_inline void hl_motion_422(H264Context *h, uint8_t *dest_y,
uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func(*qpix_put)[16],
h264_chroma_mc_func(*chroma_put),
qpel_mc_func(*qpix_avg)[16],
h264_chroma_mc_func(*chroma_avg),
h264_weight_func *weight_op,
h264_biweight_func *weight_avg,
int pixel_shift)
{
hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
static void free_tables(H264Context *h, int free_rbsp)
{
Laurent Aimar
committed
av_freep(&h->chroma_pred_mode_table);
av_freep(&h->cbp_table);
Laurent Aimar
committed
av_freep(&h->mvd_table[0]);
av_freep(&h->mvd_table[1]);
av_freep(&h->direct_table);
av_freep(&h->non_zero_count);
av_freep(&h->slice_table_base);
av_freep(&h->list_counts);
Laurent Aimar
committed
av_freep(&hx->top_borders[1]);
av_freep(&hx->top_borders[0]);
av_freep(&hx->s.obmc_scratchpad);
av_freep(&hx->rbsp_buffer[1]);
av_freep(&hx->rbsp_buffer[0]);
hx->rbsp_buffer_size[0] = 0;
hx->rbsp_buffer_size[1] = 0;
if (i)
av_freep(&h->thread_context[i]);
static void init_dequant8_coeff_table(H264Context *h)
{
int i, j, q, x;
const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
h->dequant8_coeff[i] = h->dequant8_buffer[i];
for (j = 0; j < i; j++)
if (!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i],
64 * sizeof(uint8_t))) {
h->dequant8_coeff[i] = h->dequant8_buffer[j];
break;
}
int shift = div6[q];
int idx = rem6[q];
for (x = 0; x < 64; x++)
h->dequant8_coeff[i][q][(x >> 3) | ((x & 7) << 3)] =
((uint32_t)dequant8_coeff_init[idx][dequant8_coeff_init_scan[((x >> 1) & 12) | (x & 3)]] *
h->pps.scaling_matrix8[i][x]) << shift;
static void init_dequant4_coeff_table(H264Context *h)
{
int i, j, q, x;
const int max_qp = 51 + 6 * (h->sps.bit_depth_luma - 8);
for (i = 0; i < 6; i++) {
h->dequant4_coeff[i] = h->dequant4_buffer[i];
for (j = 0; j < i; j++)
if (!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i],
16 * sizeof(uint8_t))) {
h->dequant4_coeff[i] = h->dequant4_buffer[j];
break;
}
int shift = div6[q] + 2;
int idx = rem6[q];
for (x = 0; x < 16; x++)
h->dequant4_coeff[i][q][(x >> 2) | ((x << 2) & 0xF)] =
((uint32_t)dequant4_coeff_init[idx][(x & 1) + ((x >> 2) & 1)] *
h->pps.scaling_matrix4[i][x]) << shift;
static void init_dequant_tables(H264Context *h)
{