diff --git a/.gitignore b/.gitignore index cb370bb9c5b02a2c51e4425909aab1fd7c1b7a52..480fbe0171cc16418e68b27c171a5251847a1fdb 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ /doc/examples/avio_reading /doc/examples/decoding_encoding /doc/examples/demuxing_decoding +/doc/examples/extract_mvs /doc/examples/filter_audio /doc/examples/filtering_audio /doc/examples/filtering_video diff --git a/configure b/configure index 1224362e77cafedf62427e25512d8d531bb2739f..f355dad1d750458d75893916daa94c42f9269a56 100755 --- a/configure +++ b/configure @@ -1305,6 +1305,7 @@ EXAMPLE_LIST=" avio_reading_example decoding_encoding_example demuxing_decoding_example + extract_mvs_example filter_audio_example filtering_audio_example filtering_video_example @@ -2586,6 +2587,7 @@ zoompan_filter_deps="swscale" avio_reading="avformat avcodec avutil" avcodec_example_deps="avcodec avutil" demuxing_decoding_example_deps="avcodec avformat avutil" +extract_mvs_example_deps="avcodec avformat avutil" filter_audio_example_deps="avfilter avutil" filtering_audio_example_deps="avfilter avcodec avformat avutil" filtering_video_example_deps="avfilter avcodec avformat avutil" diff --git a/doc/APIchanges b/doc/APIchanges index 8b3051c2552727336a8339525216ddbb542c88cd..1bed10778d492c738fcb5a55f646add521a134c1 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2014-08-09 API changes, most recent first: +2014-08-xx - xxxxxxx - lavu 54.5.100 - frame.h motion_vector.h + Add AV_FRAME_DATA_MOTION_VECTORS side data and AVMotionVector structure + 2014-08-16 - xxxxxxx - lswr 1.1.100 - swresample.h Add AVFrame based API diff --git a/doc/Makefile b/doc/Makefile index 99f588ac05d07c9318d380144da20425eba2f042..2fb9058e6a32f35ba254518748e544d57f7ae7e7 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -39,6 +39,7 @@ DOCS = $(DOCS-yes) DOC_EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE) += avio_reading DOC_EXAMPLES-$(CONFIG_AVCODEC_EXAMPLE) += avcodec DOC_EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE) += demuxing_decoding +DOC_EXAMPLES-$(CONFIG_EXTRACT_MVS_EXAMPLE) += extract_mvs DOC_EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE) += filter_audio DOC_EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE) += filtering_audio DOC_EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE) += filtering_video diff --git a/doc/codecs.texi b/doc/codecs.texi index 1160e5d800b049c89ccb2885ee3bcaab238ecb06..820dce51e6dbf534ba8b646eecc7c364339ceef8 100644 --- a/doc/codecs.texi +++ b/doc/codecs.texi @@ -797,6 +797,9 @@ Frame data might be split into multiple chunks. Show all frames before the first keyframe. @item skiprd Deprecated, use mpegvideo private options instead. +@item export_mvs +Export motion vectors into frame side-data (see @code{AV_FRAME_DATA_MOTION_VECTORS}) +for codecs that support it. See also @file{doc/examples/export_mvs.c}. @end table @item error @var{integer} (@emph{encoding,video}) diff --git a/doc/examples/Makefile b/doc/examples/Makefile index 03c7021e8bb14d6d88fadd2a0789d21f0d159871..07251fe3c2fd894f285f71b4a586f6aa14adef55 100644 --- a/doc/examples/Makefile +++ b/doc/examples/Makefile @@ -14,6 +14,7 @@ LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) $(LDLIBS) EXAMPLES= avio_reading \ decoding_encoding \ demuxing_decoding \ + extract_mvs \ filtering_video \ filtering_audio \ metadata \ diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c new file mode 100644 index 0000000000000000000000000000000000000000..d6fd61335e797a050dc4c7b865615a5ca8f1c734 --- /dev/null +++ b/doc/examples/extract_mvs.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2012 Stefano Sabatini + * Copyright (c) 2014 Clément Bœsch + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <libavutil/motion_vector.h> +#include <libavformat/avformat.h> + +static AVFormatContext *fmt_ctx = NULL; +static AVCodecContext *video_dec_ctx = NULL; +static AVStream *video_stream = NULL; +static const char *src_filename = NULL; + +static int video_stream_idx = -1; +static AVFrame *frame = NULL; +static AVPacket pkt; +static int video_frame_count = 0; + +static int decode_packet(int *got_frame, int cached) +{ + int decoded = pkt.size; + + *got_frame = 0; + + if (pkt.stream_index == video_stream_idx) { + int ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt); + if (ret < 0) { + fprintf(stderr, "Error decoding video frame (%s)\n", av_err2str(ret)); + return ret; + } + + if (*got_frame) { + int i; + AVFrameSideData *sd; + + video_frame_count++; + sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS); + if (sd) { + const AVMotionVector *mvs = (const AVMotionVector *)sd->data; + for (i = 0; i < sd->size / sizeof(*mvs); i++) { + const AVMotionVector *mv = &mvs[i]; + printf("%d,%2d,%2d,%2d,%4d,%4d,%4d,%4d,0x%"PRIx64"\n", + video_frame_count, mv->source, + mv->w, mv->h, mv->src_x, mv->src_y, + mv->dst_x, mv->dst_y, mv->flags); + } + } + } + } + + return decoded; +} + +static int open_codec_context(int *stream_idx, + AVFormatContext *fmt_ctx, enum AVMediaType type) +{ + int ret; + AVStream *st; + AVCodecContext *dec_ctx = NULL; + AVCodec *dec = NULL; + AVDictionary *opts = NULL; + + ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0); + if (ret < 0) { + fprintf(stderr, "Could not find %s stream in input file '%s'\n", + av_get_media_type_string(type), src_filename); + return ret; + } else { + *stream_idx = ret; + st = fmt_ctx->streams[*stream_idx]; + + /* find decoder for the stream */ + dec_ctx = st->codec; + dec = avcodec_find_decoder(dec_ctx->codec_id); + if (!dec) { + fprintf(stderr, "Failed to find %s codec\n", + av_get_media_type_string(type)); + return AVERROR(EINVAL); + } + + /* Init the video decoder */ + av_dict_set(&opts, "flags2", "+export_mvs", 0); + if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) { + fprintf(stderr, "Failed to open %s codec\n", + av_get_media_type_string(type)); + return ret; + } + } + + return 0; +} + +int main(int argc, char **argv) +{ + int ret = 0, got_frame; + + if (argc != 2) { + fprintf(stderr, "Usage: %s <video>\n", argv[0]); + exit(1); + } + src_filename = argv[1]; + + av_register_all(); + + if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) { + fprintf(stderr, "Could not open source file %s\n", src_filename); + exit(1); + } + + if (avformat_find_stream_info(fmt_ctx, NULL) < 0) { + fprintf(stderr, "Could not find stream information\n"); + exit(1); + } + + if (open_codec_context(&video_stream_idx, fmt_ctx, AVMEDIA_TYPE_VIDEO) >= 0) { + video_stream = fmt_ctx->streams[video_stream_idx]; + video_dec_ctx = video_stream->codec; + } + + av_dump_format(fmt_ctx, 0, src_filename, 0); + + if (!video_stream) { + fprintf(stderr, "Could not find video stream in the input, aborting\n"); + ret = 1; + goto end; + } + + frame = av_frame_alloc(); + if (!frame) { + fprintf(stderr, "Could not allocate frame\n"); + ret = AVERROR(ENOMEM); + goto end; + } + + printf("framenum,source,blockw,blockh,srcx,srcy,dstx,dsty,flags\n"); + + /* initialize packet, set data to NULL, let the demuxer fill it */ + av_init_packet(&pkt); + pkt.data = NULL; + pkt.size = 0; + + /* read frames from the file */ + while (av_read_frame(fmt_ctx, &pkt) >= 0) { + AVPacket orig_pkt = pkt; + do { + ret = decode_packet(&got_frame, 0); + if (ret < 0) + break; + pkt.data += ret; + pkt.size -= ret; + } while (pkt.size > 0); + av_free_packet(&orig_pkt); + } + + /* flush cached frames */ + pkt.data = NULL; + pkt.size = 0; + do { + decode_packet(&got_frame, 1); + } while (got_frame); + +end: + avcodec_close(video_dec_ctx); + avformat_close_input(&fmt_ctx); + av_frame_free(&frame); + return ret < 0; +} diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 4f53359d65a57b21cd081e1ea30ffdd866c75d84..d39e7f14b5c7052a7b0af01e47a888c26274f6bf 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -767,6 +767,7 @@ typedef struct RcOverride{ #define CODEC_FLAG2_CHUNKS 0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries. #define CODEC_FLAG2_SHOW_ALL 0x00400000 ///< Show all frames before the first keyframe +#define CODEC_FLAG2_EXPORT_MVS 0x10000000 ///< Export motion vectors through frame side data /* Unsupported options : * Syntax Arithmetic coding (SAC) diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index ed4ca3561dc5e82bc9e6359a9f057715f1c2250b..6a0b822557c84db8ae7908ba3ee37f3a06c43cf7 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -31,6 +31,7 @@ #include "libavutil/avassert.h" #include "libavutil/imgutils.h" #include "libavutil/internal.h" +#include "libavutil/motion_vector.h" #include "libavutil/timer.h" #include "avcodec.h" #include "blockdsp.h" @@ -600,7 +601,8 @@ static int alloc_picture_tables(MpegEncContext *s, Picture *pic) return AVERROR(ENOMEM); } - if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv) { + if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv || + (s->avctx->flags2 & CODEC_FLAG2_EXPORT_MVS)) { int mv_size = 2 * (b8_array_size + 4) * sizeof(int16_t); int ref_index_size = 4 * mb_array_size; @@ -2106,6 +2108,24 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, draw_line(buf, sx, sy, ex, ey, w, h, stride, color); } +static int add_mb(AVMotionVector *mb, uint32_t mb_type, + int dst_x, int dst_y, + int src_x, int src_y, + int direction) +{ + if (dst_x == src_x && dst_y == src_y) + return 0; + mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16; + mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16; + mb->src_x = src_x; + mb->src_y = src_y; + mb->dst_x = dst_x; + mb->dst_y = dst_y; + mb->source = direction ? 1 : -1; + mb->flags = 0; // XXX: does mb_type contain extra information that could be exported here? + return 1; +} + /** * Print debugging info for the given picture. */ @@ -2114,6 +2134,87 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_ int *low_delay, int mb_width, int mb_height, int mb_stride, int quarter_sample) { + if ((avctx->flags2 & CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) { + const int shift = 1 + quarter_sample; + const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1; + const int mv_stride = (mb_width << mv_sample_log2) + + (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1); + int mb_x, mb_y, mbcount = 0; + + /* size is width * height * 2 * 4 where 2 is for directions and 4 is + * for the maximum number of MB (4 MB in case of IS_8x8) */ + AVMotionVector *mvs = av_malloc_array(mb_width * mb_height, 2 * 4 * sizeof(AVMotionVector)); + if (!mvs) + return; + + for (mb_y = 0; mb_y < mb_height; mb_y++) { + for (mb_x = 0; mb_x < mb_width; mb_x++) { + int i, direction, mb_type = mbtype_table[mb_x + mb_y * mb_stride]; + for (direction = 0; direction < 2; direction++) { + if (!USES_LIST(mb_type, direction)) + continue; + if (IS_8X8(mb_type)) { + for (i = 0; i < 4; i++) { + int sx = mb_x * 16 + 4 + 8 * (i & 1); + int sy = mb_y * 16 + 4 + 8 * (i >> 1); + int xy = (mb_x * 2 + (i & 1) + + (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1); + int mx = (motion_val[direction][xy][0] >> shift) + sx; + int my = (motion_val[direction][xy][1] >> shift) + sy; + mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction); + } + } else if (IS_16X8(mb_type)) { + for (i = 0; i < 2; i++) { + int sx = mb_x * 16 + 8; + int sy = mb_y * 16 + 4 + 8 * i; + int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1); + int mx = (motion_val[direction][xy][0] >> shift); + int my = (motion_val[direction][xy][1] >> shift); + + if (IS_INTERLACED(mb_type)) + my *= 2; + + mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction); + } + } else if (IS_8X16(mb_type)) { + for (i = 0; i < 2; i++) { + int sx = mb_x * 16 + 4 + 8 * i; + int sy = mb_y * 16 + 8; + int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1); + int mx = motion_val[direction][xy][0] >> shift; + int my = motion_val[direction][xy][1] >> shift; + + if (IS_INTERLACED(mb_type)) + my *= 2; + + mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction); + } + } else { + int sx = mb_x * 16 + 8; + int sy = mb_y * 16 + 8; + int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2; + int mx = (motion_val[direction][xy][0]>>shift) + sx; + int my = (motion_val[direction][xy][1]>>shift) + sy; + mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction); + } + } + } + } + + if (mbcount) { + AVFrameSideData *sd; + + av_log(avctx, AV_LOG_DEBUG, "Adding %d MVs info to frame %d\n", mbcount, avctx->frame_number); + sd = av_frame_new_side_data(pict, AV_FRAME_DATA_MOTION_VECTORS, mbcount * sizeof(AVMotionVector)); + if (!sd) + return; + memcpy(sd->data, mvs, mbcount * sizeof(AVMotionVector)); + } + + av_freep(&mvs); + } + + /* TODO: export all the following to make them accessible for users (and filters) */ if (avctx->hwaccel || !mbtype_table || (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)) return; diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h index 2e9dfa017867f1a95d6edb82b216f7c240316268..700053129eb5e060c0025f518ab20b175d90dc7a 100644 --- a/libavcodec/options_table.h +++ b/libavcodec/options_table.h @@ -88,6 +88,7 @@ static const AVOption avcodec_options[] = { {"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"}, {"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"}, {"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"}, +{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"}, {"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"}, {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" }, {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" }, diff --git a/libavutil/Makefile b/libavutil/Makefile index d3a92457f63263c99b5b976fcf0a4ee7e2061d90..48ae0efa4335a82cf6d386319de3b1219c45a95c 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -38,6 +38,7 @@ HEADERS = adler32.h \ mathematics.h \ md5.h \ mem.h \ + motion_vector.h \ murmur3.h \ dict.h \ old_pix_fmts.h \ diff --git a/libavutil/frame.c b/libavutil/frame.c index 12eac5540bee43487a5e62e171b9d4ae244fb738..4dc8e4ea5df6f72e5e82e76df309f2235a9df523 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -683,6 +683,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) case AV_FRAME_DATA_DOWNMIX_INFO: return "Metadata relevant to a downmix procedure"; case AV_FRAME_DATA_REPLAYGAIN: return "AVReplayGain"; case AV_FRAME_DATA_DISPLAYMATRIX: return "3x3 displaymatrix"; + case AV_FRAME_DATA_MOTION_VECTORS: return "Motion vectors"; } return NULL; } diff --git a/libavutil/frame.h b/libavutil/frame.h index dbbdd29d1be1a135873f5bbf9c7f3132d1200a67..2391d3e48ed353bfe719f0d65e03d04bce53b279 100644 --- a/libavutil/frame.h +++ b/libavutil/frame.h @@ -87,6 +87,13 @@ enum AVFrameSideDataType { * in ETSI TS 101 154 using AVActiveFormatDescription enum. */ AV_FRAME_DATA_AFD, + /** + * Motion vectors exported by some codecs (on demand through the export_mvs + * flag set in the libavcodec AVCodecContext flags2 option). + * The data is the AVMotionVector struct defined in + * libavutil/motion_vector.h. + */ + AV_FRAME_DATA_MOTION_VECTORS, }; enum AVActiveFormatDescription { diff --git a/libavutil/motion_vector.h b/libavutil/motion_vector.h new file mode 100644 index 0000000000000000000000000000000000000000..245e5116a3b6df9747388c044752ca6dfa761d1a --- /dev/null +++ b/libavutil/motion_vector.h @@ -0,0 +1,50 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_MOTION_VECTOR_H +#define AVUTIL_MOTION_VECTOR_H + +#include <stdint.h> + +typedef struct AVMotionVector { + /** + * Where the current macroblock comes from; negative value when it comes + * from the past, positive value when it comes from the future. + * XXX: set exact relative ref frame reference instead of a +/- 1 "direction". + */ + int32_t source; + /** + * Width and height of the block. + */ + uint8_t w, h; + /** + * Absolute source position. + */ + uint16_t src_x, src_y; + /** + * Absolute destination position. + */ + uint16_t dst_x, dst_y; + /** + * Extra flag information. + * Currently unused. + */ + uint64_t flags; +} AVMotionVector; + +#endif /* AVUTIL_MOTION_VECTOR_H */ diff --git a/libavutil/version.h b/libavutil/version.h index b86609b08a696b842d1fd2cfc4bf9df18b977e9d..d42209dcc9e5c55ffc3582c54a904b805071a4fe 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -56,7 +56,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 54 -#define LIBAVUTIL_VERSION_MINOR 4 +#define LIBAVUTIL_VERSION_MINOR 5 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \