Newer
Older
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
Diego Biurrun
committed
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
Diego Biurrun
committed
* version 2.1 of the License, or (at your option) any later version.
Diego Biurrun
committed
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
Diego Biurrun
committed
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* H.264 / AVC / MPEG4 part10 codec.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
#define UNCHECKED_BITSTREAM_READER 1
Michael Niedermayer
committed
#include "libavutil/opt.h"
#include "internal.h"
#include "cabac.h"
#include "cabac_functions.h"
#include "error_resilience.h"
#include "avcodec.h"
#include "mpegvideo.h"
#include "h264_mvpred.h"
Aurelien Jacobs
committed
#include "mathops.h"
#include "rectangle.h"
#include "svq3.h"
#include "thread.h"
#include "vdpau_internal.h"
static void flush_change(H264Context *h);
const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
static const uint8_t rem6[QP_MAX_NUM + 1] = {
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
0, 1, 2, 3,
};
static const uint8_t div6[QP_MAX_NUM + 1] = {
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10,
10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13,
14,14,14,14,
};
static const uint8_t field_scan[16+1] = {
0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
};
static const uint8_t field_scan8x8[64+1] = {
0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8,
2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8,
2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8,
2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8,
3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8,
3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8,
4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8,
4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8,
5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8,
5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8,
7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
};
static const uint8_t field_scan8x8_cavlc[64+1] = {
0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8,
0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8,
1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8,
3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8,
5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8,
0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8,
1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8,
3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8,
5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8,
1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8,
1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
};
// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
static const uint8_t zigzag_scan8x8_cavlc[64+1] = {
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8,
1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8,
3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8,
2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8,
3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8,
0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8,
2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8,
1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8,
4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8,
0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8,
1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
};
static const uint8_t dequant4_coeff_init[6][3] = {
{ 10, 13, 16 },
{ 11, 14, 18 },
{ 13, 16, 20 },
{ 14, 18, 23 },
{ 16, 20, 25 },
{ 18, 23, 29 },
};
static const uint8_t dequant8_coeff_init_scan[16] = {
0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
};
static const uint8_t dequant8_coeff_init[6][6] = {
{ 20, 18, 32, 19, 25, 24 },
{ 22, 19, 35, 21, 28, 26 },
{ 26, 23, 42, 24, 33, 31 },
{ 28, 25, 45, 26, 35, 33 },
{ 32, 28, 51, 30, 40, 38 },
{ 36, 32, 58, 34, 46, 43 },
};
static const enum AVPixelFormat h264_hwaccel_pixfmt_list_420[] = {
#if CONFIG_H264_DXVA2_HWACCEL
AV_PIX_FMT_DXVA2_VLD,
#endif
#if CONFIG_H264_VAAPI_HWACCEL
AV_PIX_FMT_VAAPI_VLD,
#endif
#if CONFIG_H264_VDA_HWACCEL
AV_PIX_FMT_VDA_VLD,
#endif
#if CONFIG_H264_VDPAU_HWACCEL
AV_PIX_FMT_VDPAU,
#endif
AV_PIX_FMT_YUV420P,
AV_PIX_FMT_NONE
};
static const enum AVPixelFormat h264_hwaccel_pixfmt_list_jpeg_420[] = {
#if CONFIG_H264_DXVA2_HWACCEL
AV_PIX_FMT_DXVA2_VLD,
#endif
#if CONFIG_H264_VAAPI_HWACCEL
AV_PIX_FMT_VAAPI_VLD,
#endif
#if CONFIG_H264_VDA_HWACCEL
AV_PIX_FMT_VDA_VLD,
#endif
#if CONFIG_H264_VDPAU_HWACCEL
AV_PIX_FMT_YUVJ420P,
AV_PIX_FMT_NONE
Baptiste Coudurier
committed
};
int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx)
{
H264Context *h = avctx->priv_data;
return h ? h->sps.num_reorder_frames : 0;
}
static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
int (*mv)[2][4][2],
int mb_x, int mb_y, int mb_intra, int mb_skipped)
{
h->mb_x = mb_x;
h->mb_y = mb_y;
h->mb_xy = mb_x + mb_y * h->mb_stride;
memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
av_assert1(ref >= 0);
/* FIXME: It is possible albeit uncommon that slice references
* differ between slices. We take the easy approach and ignore
* it for now. If this turns out to have any relevance in
* practice then correct remapping should be added. */
if (ref >= h->ref_count[0])
ref = 0;
if (!h->ref_list[0][ref].f.data[0]) {
av_log(h->avctx, AV_LOG_DEBUG, "Reference not available for error concealing\n");
ref = 0;
}
if ((h->ref_list[0][ref].reference&3) != 3) {
av_log(h->avctx, AV_LOG_DEBUG, "Reference invalid\n");
return;
}
fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy],
2, 2, 2, ref, 1);
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
h->mb_mbaff =
h->mb_field_decoding_flag = 0;
ff_h264_hl_decode_mb(h);
}
void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
AVCodecContext *avctx = h->avctx;
Picture *cur = &h->cur_pic;
Picture *last = h->ref_list[0][0].f.data[0] ? &h->ref_list[0][0] : NULL;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
int vshift = desc->log2_chroma_h;
const int field_pic = h->picture_structure != PICT_FRAME;
if (field_pic) {
height <<= 1;
height = FFMIN(height, avctx->height - y);
if (field_pic && h->first_field && !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD))
return;
if (avctx->draw_horiz_band) {
AVFrame *src;
int offset[AV_NUM_DATA_POINTERS];
int i;
if (cur->f.pict_type == AV_PICTURE_TYPE_B || h->low_delay ||
(avctx->slice_flags & SLICE_FLAG_CODED_ORDER))
src = &cur->f;
else if (last)
src = &last->f;
else
return;
offset[0] = y * src->linesize[0];
offset[1] =
offset[2] = (y >> vshift) * src->linesize[1];
for (i = 3; i < AV_NUM_DATA_POINTERS; i++)
offset[i] = 0;
emms_c();
avctx->draw_horiz_band(avctx, src, offset,
y, h->picture_structure, height);
static void unref_picture(H264Context *h, Picture *pic)
int off = offsetof(Picture, tf) + sizeof(pic->tf);
if (!pic->f.data[0])
return;
ff_thread_release_buffer(h->avctx, &pic->tf);
av_buffer_unref(&pic->hwaccel_priv_buf);
av_buffer_unref(&pic->qscale_table_buf);
av_buffer_unref(&pic->mb_type_buf);
av_buffer_unref(&pic->motion_val_buf[i]);
av_buffer_unref(&pic->ref_index_buf[i]);
memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
}
static void release_unused_pictures(H264Context *h, int remove_current)
{
int i;
/* release non reference frames */
for (i = 0; i < MAX_PICTURE_COUNT; i++) {
if (h->DPB[i].f.data[0] && !h->DPB[i].reference &&
(remove_current || &h->DPB[i] != h->cur_pic_ptr)) {
unref_picture(h, &h->DPB[i]);
static int ref_picture(H264Context *h, Picture *dst, Picture *src)
{
int ret, i;
av_assert0(!dst->f.buf[0]);
av_assert0(src->f.buf[0]);
src->tf.f = &src->f;
dst->tf.f = &dst->f;
ret = ff_thread_ref_frame(&dst->tf, &src->tf);
if (ret < 0)
goto fail;
dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
dst->mb_type_buf = av_buffer_ref(src->mb_type_buf);
if (!dst->qscale_table_buf || !dst->mb_type_buf)
goto fail;
dst->qscale_table = src->qscale_table;
dst->mb_type = src->mb_type;
dst->motion_val_buf[i] = av_buffer_ref(src->motion_val_buf[i]);
dst->ref_index_buf[i] = av_buffer_ref(src->ref_index_buf[i]);
if (!dst->motion_val_buf[i] || !dst->ref_index_buf[i])
goto fail;
dst->motion_val[i] = src->motion_val[i];
dst->ref_index[i] = src->ref_index[i];
}
if (src->hwaccel_picture_private) {
dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
if (!dst->hwaccel_priv_buf)
goto fail;
dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
}
for (i = 0; i < 2; i++)
dst->field_poc[i] = src->field_poc[i];
memcpy(dst->ref_poc, src->ref_poc, sizeof(src->ref_poc));
memcpy(dst->ref_count, src->ref_count, sizeof(src->ref_count));
dst->poc = src->poc;
dst->frame_num = src->frame_num;
dst->mmco_reset = src->mmco_reset;
dst->pic_id = src->pic_id;
dst->long_ref = src->long_ref;
dst->mbaff = src->mbaff;
dst->field_picture = src->field_picture;
dst->needs_realloc = src->needs_realloc;
dst->reference = src->reference;
dst->crop = src->crop;
dst->crop_left = src->crop_left;
dst->crop_top = src->crop_top;
dst->recovered = src->recovered;
return 0;
fail:
unref_picture(h, dst);
return ret;
}
static int alloc_scratch_buffers(H264Context *h, int linesize)
{
int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
if (h->bipred_scratchpad)
return 0;
h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size);
// edge emu needs blocksize + filter length - 1
// (= 21x21 for h264)
h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21);
h->me.scratchpad = av_mallocz(alloc_size * 2 * 16 * 2);
if (!h->bipred_scratchpad || !h->edge_emu_buffer || !h->me.scratchpad) {
av_freep(&h->bipred_scratchpad);
av_freep(&h->edge_emu_buffer);
av_freep(&h->me.scratchpad);
return AVERROR(ENOMEM);
}
h->me.temp = h->me.scratchpad;
return 0;
}
static int init_table_pools(H264Context *h)
{
const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1;
const int mb_array_size = h->mb_stride * h->mb_height;
const int b4_stride = h->mb_width * 4 + 1;
const int b4_array_size = b4_stride * h->mb_height * 4;
h->qscale_table_pool = av_buffer_pool_init(big_mb_num + h->mb_stride,
av_buffer_allocz);
h->mb_type_pool = av_buffer_pool_init((big_mb_num + h->mb_stride) *
sizeof(uint32_t), av_buffer_allocz);
h->motion_val_pool = av_buffer_pool_init(2 * (b4_array_size + 4) *
sizeof(int16_t), av_buffer_allocz);
h->ref_index_pool = av_buffer_pool_init(4 * mb_array_size, av_buffer_allocz);
if (!h->qscale_table_pool || !h->mb_type_pool || !h->motion_val_pool ||
!h->ref_index_pool) {
av_buffer_pool_uninit(&h->qscale_table_pool);
av_buffer_pool_uninit(&h->mb_type_pool);
av_buffer_pool_uninit(&h->motion_val_pool);
av_buffer_pool_uninit(&h->ref_index_pool);
return AVERROR(ENOMEM);
}
return 0;
}
static int alloc_picture(H264Context *h, Picture *pic)
{
int i, ret = 0;
av_assert0(!pic->f.data[0]);
pic->tf.f = &pic->f;
ret = ff_thread_get_buffer(h->avctx, &pic->tf, pic->reference ?
AV_GET_BUFFER_FLAG_REF : 0);
if (ret < 0)
goto fail;
h->linesize = pic->f.linesize[0];
h->uvlinesize = pic->f.linesize[1];
pic->crop = h->sps.crop;
pic->crop_top = h->sps.crop_top;
pic->crop_left= h->sps.crop_left;
if (h->avctx->hwaccel) {
const AVHWAccel *hwaccel = h->avctx->hwaccel;
av_assert0(!pic->hwaccel_picture_private);
pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->priv_data_size);
if (!pic->hwaccel_priv_buf)
pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
if (!h->qscale_table_pool) {
ret = init_table_pools(h);
if (ret < 0)
goto fail;
}
pic->qscale_table_buf = av_buffer_pool_get(h->qscale_table_pool);
pic->mb_type_buf = av_buffer_pool_get(h->mb_type_pool);
if (!pic->qscale_table_buf || !pic->mb_type_buf)
goto fail;
pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
pic->qscale_table = pic->qscale_table_buf->data + 2 * h->mb_stride + 1;
for (i = 0; i < 2; i++) {
pic->motion_val_buf[i] = av_buffer_pool_get(h->motion_val_pool);
pic->ref_index_buf[i] = av_buffer_pool_get(h->ref_index_pool);
if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i])
goto fail;
pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
pic->ref_index[i] = pic->ref_index_buf[i]->data;
}
return (ret < 0) ? ret : AVERROR(ENOMEM);
}
static inline int pic_is_unused(H264Context *h, Picture *pic)
{
if (pic->f.data[0] == NULL)
return 1;
if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF))
return 1;
return 0;
}
static int find_unused_picture(H264Context *h)
{
int i;
for (i = 0; i < MAX_PICTURE_COUNT; i++) {
if (pic_is_unused(h, &h->DPB[i]))
break;
}
return AVERROR_INVALIDDATA;
if (h->DPB[i].needs_realloc) {
h->DPB[i].needs_realloc = 0;
unref_picture(h, &h->DPB[i]);
Michael Niedermayer
committed
/**
* Check if the top & left blocks are available if needed and
* change the dc mode so it only uses the available blocks.
Michael Niedermayer
committed
*/
int ff_h264_check_intra4x4_pred_mode(H264Context *h)
{
static const int8_t top[12] = {
-1, 0, LEFT_DC_PRED, -1, -1, -1, -1, -1, 0
};
static const int8_t left[12] = {
0, -1, TOP_DC_PRED, 0, -1, -1, -1, 0, -1, DC_128_PRED
};
Michael Niedermayer
committed
int i;
if (!(h->top_samples_available & 0x8000)) {
for (i = 0; i < 4; i++) {
int status = top[h->intra4x4_pred_mode_cache[scan8[0] + i]];
if (status < 0) {
"top block unavailable for requested intra4x4 mode %d at %d %d\n",
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + i] = status;
Michael Niedermayer
committed
}
}
}
if ((h->left_samples_available & 0x8888) != 0x8888) {
static const int mask[4] = { 0x8000, 0x2000, 0x80, 0x20 };
for (i = 0; i < 4; i++)
if (!(h->left_samples_available & mask[i])) {
int status = left[h->intra4x4_pred_mode_cache[scan8[0] + 8 * i]];
if (status < 0) {
"left block unavailable for requested intra4x4 mode %d at %d %d\n",
} else if (status) {
h->intra4x4_pred_mode_cache[scan8[0] + 8 * i] = status;
Michael Niedermayer
committed
}
}
}
return 0;
} // FIXME cleanup like ff_h264_check_intra_pred_mode
* Check if the top & left blocks are available if needed and
* change the dc mode so it only uses the available blocks.
int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma)
{
static const int8_t top[4] = { LEFT_DC_PRED8x8, 1, -1, -1 };
static const int8_t left[5] = { TOP_DC_PRED8x8, -1, 2, -1, DC_128_PRED8x8 };
if (mode > 3U) {
"out of range intra chroma pred mode at %d %d\n",
if (!(h->top_samples_available & 0x8000)) {
mode = top[mode];
if (mode < 0) {
"top block unavailable for requested intra mode at %d %d\n",
if ((h->left_samples_available & 0x8080) != 0x8080) {
mode = left[mode];
if (is_chroma && (h->left_samples_available & 0x8080)) {
// mad cow disease mode, aka MBAFF + constrained_intra_pred
mode = ALZHEIMER_DC_L0T_PRED8x8 +
(!(h->left_samples_available & 0x8000)) +
2 * (mode == DC_128_PRED8x8);
"left block unavailable for requested intra mode at %d %d\n",
const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
int *dst_length, int *consumed, int length)
{
// src[0]&0x80; // forbidden bit
h->nal_ref_idc = src[0] >> 5;
h->nal_unit_type = src[0] & 0x1F;
if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
if (src[i + 2] != 3) { \
/* startcode, so we must be past the end */ \
length = i; \
} \
break; \
}
#if HAVE_FAST_UNALIGNED
if (i > 0 && !src[i]) \
i--; \
while (src[i]) \
i++
#if HAVE_FAST_64BIT
for (i = 0; i + 1 < length; i += 9) {
if (!((~AV_RN64A(src + i) &
(AV_RN64A(src + i) - 0x0100010001000101ULL)) &
0x8000800080008080ULL))
continue;
FIND_FIRST_ZERO;
STARTCODE_TEST;
i -= 7;
}
#else
for (i = 0; i + 1 < length; i += 5) {
if (!((~AV_RN32A(src + i) &
(AV_RN32A(src + i) - 0x01000101U)) &
0x80008080U))
FIND_FIRST_ZERO;
STARTCODE_TEST;
i -= 3;
}
#endif
for (i = 0; i + 1 < length; i += 2) {
if (src[i])
continue;
if (i > 0 && src[i - 1] == 0)
i--;
// use second escape buffer for inter data
bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
si = h->rbsp_buffer_size[bufidx];
av_fast_padded_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+MAX_MBPAIR_SIZE);
Francois Oligny-Lemieux
committed
return NULL;
if(i>=length-1){ //no escaped 0
*dst_length= length;
*consumed= length+1; //+1 for the header
if(h->avctx->flags2 & CODEC_FLAG2_FAST){
Michael Niedermayer
committed
return src;
}else{
memcpy(dst, src, length);
return dst;
}
si = di = i;
while (si + 2 < length) {
// remove escapes (very rare 1:2^22)
if (src[si + 2] > 3) {
dst[di++] = src[si++];
dst[di++] = src[si++];
} else if (src[si] == 0 && src[si + 1] == 0) {
if (src[si + 2] == 3) { // escape
dst[di++] = 0;
dst[di++] = 0;
si += 3;
Michael Niedermayer
committed
continue;
while (si < length)
dst[di++] = src[si++];
memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
Alexander Strange
committed
*dst_length = di;
*consumed = si + 1; // +1 for the header
/* FIXME store exact number of bits in the getbitcontext
* (it is needed for decoding) */
/**
* Identify the exact end of the bitstream
* @return the length of the trailing, or 0 if damaged
*/
Diego Biurrun
committed
static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
for (r = 1; r < 9; r++) {
if (v & 1)
return r;
v >>= 1;
static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
int height, int y_offset, int list)
{
int raw_my = h->mv_cache[list][scan8[n]][1];
int filter_height_down = (raw_my & 3) ? 3 : 0;
int full_my = (raw_my >> 2) + y_offset;
int bottom = full_my + filter_height_down + height;
av_assert2(height >= 0);
return FFMAX(0, bottom);
static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
int height, int y_offset, int list0,
int list1, int *nrefs)
{
y_offset += 16 * (h->mb_y >> MB_FIELD(h));
if (list0) {
int ref_n = h->ref_cache[0][scan8[n]];
Picture *ref = &h->ref_list[0][ref_n];
// Error resilience puts the current picture in the ref list.
// Don't try to wait on these as it will cause a deadlock.
// Fields can wait on each other, though.
if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
(ref->reference & 3) != h->picture_structure) {
my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
if (refs[0][ref_n] < 0)
nrefs[0] += 1;
refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
}
}
if (list1) {
int ref_n = h->ref_cache[1][scan8[n]];
Picture *ref = &h->ref_list[1][ref_n];
if (ref->tf.progress->data != h->cur_pic.tf.progress->data ||
(ref->reference & 3) != h->picture_structure) {
my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
if (refs[1][ref_n] < 0)
nrefs[1] += 1;
refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
}
}
}
/**
* Wait until all reference frames are available for MC operations.
*
* @param h the H264 context
*/
static void await_references(H264Context *h)
{
const int mb_xy = h->mb_xy;
const int mb_type = h->cur_pic.mb_type[mb_xy];
int ref, list;
memset(refs, -1, sizeof(refs));
get_lowest_part_y(h, refs, 0, 16, 0,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
} else if (IS_16X8(mb_type)) {
get_lowest_part_y(h, refs, 0, 8, 0,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
get_lowest_part_y(h, refs, 8, 8, 8,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
} else if (IS_8X16(mb_type)) {
get_lowest_part_y(h, refs, 0, 16, 0,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
get_lowest_part_y(h, refs, 4, 16, 0,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
} else {
av_assert2(IS_8X8(mb_type));
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
for (i = 0; i < 4; i++) {
const int sub_mb_type = h->sub_mb_type[i];
const int n = 4 * i;
int y_offset = (i & 2) << 2;
if (IS_SUB_8X8(sub_mb_type)) {
get_lowest_part_y(h, refs, n, 8, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
} else if (IS_SUB_8X4(sub_mb_type)) {
get_lowest_part_y(h, refs, n, 4, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
get_lowest_part_y(h, refs, n + 2, 4, y_offset + 4,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
} else if (IS_SUB_4X8(sub_mb_type)) {
get_lowest_part_y(h, refs, n, 8, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
get_lowest_part_y(h, refs, n + 1, 8, y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
} else {
av_assert2(IS_SUB_4X4(sub_mb_type));
for (j = 0; j < 4; j++) {
int sub_y_offset = y_offset + 2 * (j & 2);
get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
IS_DIR(sub_mb_type, 0, 0),
IS_DIR(sub_mb_type, 0, 1),
nrefs);
for (list = h->list_count - 1; list >= 0; list--)
for (ref = 0; ref < 48 && nrefs[list]; ref++) {
if (row >= 0) {
Picture *ref_pic = &h->ref_list[list][ref];
int ref_field = ref_pic->reference - 1;
int ref_field_picture = ref_pic->field_picture;
int pic_height = 16 * h->mb_height >> ref_field_picture;
if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields
ff_thread_await_progress(&ref_pic->tf,
FFMIN((row >> 1) - !(row & 1),
pic_height - 1),
1);
ff_thread_await_progress(&ref_pic->tf,
FFMIN((row >> 1), pic_height - 1),
0);
} else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame
ff_thread_await_progress(&ref_pic->tf,
FFMIN(row * 2 + ref_field,
pic_height - 1),
0);
} else if (FIELD_PICTURE(h)) {
ff_thread_await_progress(&ref_pic->tf,
FFMIN(row, pic_height - 1),
ref_field);
} else {
ff_thread_await_progress(&ref_pic->tf,
static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
int n, int square, int height,
int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
qpel_mc_func *qpix_op,
h264_chroma_mc_func chroma_op,
int pixel_shift, int chroma_idc)
const int mx = h->mv_cache[list][scan8[n]][0] + src_x_offset * 8;
int my = h->mv_cache[list][scan8[n]][1] + src_y_offset * 8;
const int luma_xy = (mx & 3) + ((my & 3) << 2);
ptrdiff_t offset = ((mx >> 2) << pixel_shift) + (my >> 2) * h->mb_linesize;
uint8_t *src_y = pic->f.data[0] + offset;
uint8_t *src_cb, *src_cr;
int extra_width = 0;
int extra_height = 0;
int emu = 0;
const int full_mx = mx >> 2;
const int full_my = my >> 2;
const int pic_height = 16 * h->mb_height >> MB_FIELD(h);
Michael Niedermayer
committed
if (mx & 7)
extra_width -= 3;
if (my & 7)
extra_height -= 3;
if (full_mx < 0 - extra_width ||
full_my < 0 - extra_height ||
full_mx + 16 /*FIXME*/ > pic_width + extra_width ||
full_my + 16 /*FIXME*/ > pic_height + extra_height) {
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize,
src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
full_my - 2, pic_width, pic_height);
src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
emu = 1;
}
qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); // FIXME try variable height perhaps?
if (!square)
Michael Niedermayer
committed
qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY)
Michael Niedermayer
committed
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize,
src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2,
pic_width, pic_height);
src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
if (!square)
qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_linesize,
src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
h->mb_linesize,
16 + 5, 16 + 5 /*FIXME*/,
full_mx - 2, full_my - 2,
pic_width, pic_height);
src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
if (!square)
qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
return;
}
ysh = 3 - (chroma_idc == 2 /* yuv422 */);
if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(h)) {
Michael Niedermayer
committed
// chroma offset when predicting from a field of opposite parity
my += 2 * ((h->mb_y & 1) - (pic->reference - 1));
emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1);
Michael Niedermayer
committed
}
src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) +
(my >> ysh) * h->mb_uvlinesize;
src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) +
(my >> ysh) * h->mb_uvlinesize;
Michael Niedermayer
committed
h->vdsp.emulated_edge_mc(h->edge_emu_buffer, h->mb_uvlinesize, src_cb, h->mb_uvlinesize,
9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));