Skip to content
Snippets Groups Projects
matroskadec.c 65.7 KiB
Newer Older
  • Learn to ignore specific revisions
  •  * Matroska file demuxer
    
     * Copyright (c) 2003-2008 The FFmpeg Project
    
     *
     * This file is part of FFmpeg.
     *
     * FFmpeg is free software; you can redistribute it and/or
     * modify it under the terms of the GNU Lesser General Public
     * License as published by the Free Software Foundation; either
     * version 2.1 of the License, or (at your option) any later version.
     *
     * FFmpeg is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     * Lesser General Public License for more details.
     *
     * You should have received a copy of the GNU Lesser General Public
     * License along with FFmpeg; if not, write to the Free Software
     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     */
    
    /**
    
     * @file libavformat/matroskadec.c
    
     * Matroska file demuxer
     * by Ronald Bultje <rbultje@ronald.bitfreak.net>
     * with a little help from Moritz Bunkus <moritz@bunkus.org>
    
     * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
    
     * Specs available on the Matroska project page: http://www.matroska.org/.
    
    /* For ff_codec_get_id(). */
    
    #include "libavcodec/mpeg4audio.h"
    
    #include "libavutil/intfloat_readwrite.h"
    
    #include "libavutil/intreadwrite.h"
    
    #include "libavutil/avstring.h"
    
    #include "libavutil/lzo.h"
    
    typedef enum {
        EBML_NONE,
        EBML_UINT,
        EBML_FLOAT,
        EBML_STR,
        EBML_UTF8,
        EBML_BIN,
        EBML_NEST,
        EBML_PASS,
        EBML_STOP,
    } EbmlType;
    
    typedef const struct EbmlSyntax {
        uint32_t id;
        EbmlType type;
        int list_elem_size;
        int data_offset;
        union {
            uint64_t    u;
            double      f;
            const char *s;
            const struct EbmlSyntax *n;
        } def;
    } EbmlSyntax;
    
    typedef struct {
        int nb_elem;
        void *elem;
    } EbmlList;
    
    typedef struct {
        int      size;
        uint8_t *data;
        int64_t  pos;
    } EbmlBin;
    
    
    typedef struct {
        uint64_t version;
        uint64_t max_size;
        uint64_t id_length;
        char    *doctype;
        uint64_t doctype_version;
    } Ebml;
    
    
    typedef struct {
        uint64_t algo;
        EbmlBin  settings;
    } MatroskaTrackCompression;
    
    typedef struct {
        uint64_t scope;
        uint64_t type;
        MatroskaTrackCompression compression;
    } MatroskaTrackEncoding;
    
    typedef struct {
        double   frame_rate;
        uint64_t display_width;
        uint64_t display_height;
        uint64_t pixel_width;
        uint64_t pixel_height;
        uint64_t fourcc;
    } MatroskaTrackVideo;
    
    typedef struct {
        double   samplerate;
        double   out_samplerate;
        uint64_t bitdepth;
        uint64_t channels;
    
        /* real audio header (extracted from extradata) */
        int      coded_framesize;
        int      sub_packet_h;
        int      frame_size;
        int      sub_packet_size;
        int      sub_packet_cnt;
        int      pkt_cnt;
        uint8_t *buf;
    } MatroskaTrackAudio;
    
    typedef struct {
        uint64_t num;
    
        char    *codec_id;
        EbmlBin  codec_priv;
        char    *language;
    
        uint64_t flag_default;
    
        MatroskaTrackVideo video;
        MatroskaTrackAudio audio;
        EbmlList encodings;
    
        char *filename;
        char *mime;
        EbmlBin bin;
    
    typedef struct {
        uint64_t start;
        uint64_t end;
        uint64_t uid;
        char    *title;
    
    typedef struct {
        uint64_t track;
        uint64_t pos;
    } MatroskaIndexPos;
    
    typedef struct {
        uint64_t time;
        EbmlList pos;
    } MatroskaIndex;
    
    
    typedef struct {
        char *name;
        char *string;
    
        EbmlList sub;
    } MatroskaTag;
    
    
    typedef struct {
        char    *type;
        uint64_t typevalue;
        uint64_t trackuid;
        uint64_t chapteruid;
        uint64_t attachuid;
    } MatroskaTagTarget;
    
    typedef struct {
        MatroskaTagTarget target;
        EbmlList tag;
    } MatroskaTags;
    
    
    typedef struct {
        uint64_t id;
        uint64_t pos;
    } MatroskaSeekhead;
    
    
    typedef struct {
    
        uint64_t start;
        uint64_t length;
    
    typedef struct {
    
        /* EBML stuff */
    
        int num_levels;
        MatroskaLevel levels[EBML_MAX_DEPTH];
        int level_up;
    
    
        uint64_t time_scale;
        double   duration;
        char    *title;
    
    
        /* byte position of the segment inside the stream */
    
        /* the packet queue */
    
    
        /* What to skip before effectively reading a packet. */
        int skip_to_keyframe;
    
    typedef struct {
        uint64_t duration;
        int64_t  reference;
    
        EbmlBin  bin;
    } MatroskaBlock;
    
    typedef struct {
        uint64_t timecode;
        EbmlList blocks;
    } MatroskaCluster;
    
    
    static EbmlSyntax ebml_header[] = {
        { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
        { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
        { EBML_ID_EBMLMAXIDLENGTH,        EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
        { EBML_ID_DOCTYPE,                EBML_STR,  0, offsetof(Ebml,doctype), {.s="(none)"} },
        { EBML_ID_DOCTYPEREADVERSION,     EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
        { EBML_ID_EBMLVERSION,            EBML_NONE },
        { EBML_ID_DOCTYPEVERSION,         EBML_NONE },
        { 0 }
    };
    
    static EbmlSyntax ebml_syntax[] = {
        { EBML_ID_HEADER,                 EBML_NEST, 0, 0, {.n=ebml_header} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_info[] = {
        { MATROSKA_ID_TIMECODESCALE,      EBML_UINT,  0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
        { MATROSKA_ID_DURATION,           EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
        { MATROSKA_ID_TITLE,              EBML_UTF8,  0, offsetof(MatroskaDemuxContext,title) },
        { MATROSKA_ID_WRITINGAPP,         EBML_NONE },
        { MATROSKA_ID_MUXINGAPP,          EBML_NONE },
        { MATROSKA_ID_DATEUTC,            EBML_NONE },
        { MATROSKA_ID_SEGMENTUID,         EBML_NONE },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_track_video[] = {
        { MATROSKA_ID_VIDEOFRAMERATE,     EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
        { MATROSKA_ID_VIDEODISPLAYWIDTH,  EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
        { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
        { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
        { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
        { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
    
        { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
        { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
        { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
        { MATROSKA_ID_VIDEOPIXELCROPR,    EBML_NONE },
        { MATROSKA_ID_VIDEODISPLAYUNIT,   EBML_NONE },
    
        { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
        { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
        { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
        { 0 }
    };
    
    static EbmlSyntax matroska_track_audio[] = {
        { MATROSKA_ID_AUDIOSAMPLINGFREQ,  EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
        { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
        { MATROSKA_ID_AUDIOBITDEPTH,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
        { MATROSKA_ID_AUDIOCHANNELS,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
        { 0 }
    };
    
    static EbmlSyntax matroska_track_encoding_compression[] = {
        { MATROSKA_ID_ENCODINGCOMPALGO,   EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
        { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
        { 0 }
    };
    
    static EbmlSyntax matroska_track_encoding[] = {
        { MATROSKA_ID_ENCODINGSCOPE,      EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
        { MATROSKA_ID_ENCODINGTYPE,       EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
        { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
    
        { MATROSKA_ID_ENCODINGORDER,      EBML_NONE },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_track_encodings[] = {
        { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
        { 0 }
    };
    
    static EbmlSyntax matroska_track[] = {
        { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
    
        { MATROSKA_ID_TRACKNAME,            EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
    
        { MATROSKA_ID_TRACKUID,             EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
    
        { MATROSKA_ID_TRACKTYPE,            EBML_UINT, 0, offsetof(MatroskaTrack,type) },
        { MATROSKA_ID_CODECID,              EBML_STR,  0, offsetof(MatroskaTrack,codec_id) },
        { MATROSKA_ID_CODECPRIVATE,         EBML_BIN,  0, offsetof(MatroskaTrack,codec_priv) },
        { MATROSKA_ID_TRACKLANGUAGE,        EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
        { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
        { MATROSKA_ID_TRACKTIMECODESCALE,   EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
        { MATROSKA_ID_TRACKFLAGDEFAULT,     EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
        { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
        { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
        { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
        { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
        { MATROSKA_ID_TRACKFLAGFORCED,      EBML_NONE },
        { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
        { MATROSKA_ID_CODECNAME,            EBML_NONE },
        { MATROSKA_ID_CODECDECODEALL,       EBML_NONE },
        { MATROSKA_ID_CODECINFOURL,         EBML_NONE },
        { MATROSKA_ID_CODECDOWNLOADURL,     EBML_NONE },
        { MATROSKA_ID_TRACKMINCACHE,        EBML_NONE },
        { MATROSKA_ID_TRACKMAXCACHE,        EBML_NONE },
    
        { MATROSKA_ID_TRACKMAXBLKADDID,     EBML_NONE },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_tracks[] = {
        { MATROSKA_ID_TRACKENTRY,         EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_attachment[] = {
    
        { MATROSKA_ID_FILEUID,            EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
    
        { MATROSKA_ID_FILENAME,           EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
        { MATROSKA_ID_FILEMIMETYPE,       EBML_STR,  0, offsetof(MatroskaAttachement,mime) },
        { MATROSKA_ID_FILEDATA,           EBML_BIN,  0, offsetof(MatroskaAttachement,bin) },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_attachments[] = {
        { MATROSKA_ID_ATTACHEDFILE,       EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_chapter_display[] = {
        { MATROSKA_ID_CHAPSTRING,         EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_chapter_entry[] = {
        { MATROSKA_ID_CHAPTERTIMESTART,   EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
        { MATROSKA_ID_CHAPTERTIMEEND,     EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
        { MATROSKA_ID_CHAPTERUID,         EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
        { MATROSKA_ID_CHAPTERDISPLAY,     EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
        { MATROSKA_ID_CHAPTERFLAGHIDDEN,  EBML_NONE },
    
        { MATROSKA_ID_CHAPTERFLAGENABLED, EBML_NONE },
        { MATROSKA_ID_CHAPTERPHYSEQUIV,   EBML_NONE },
        { MATROSKA_ID_CHAPTERATOM,        EBML_NONE },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_chapter[] = {
        { MATROSKA_ID_CHAPTERATOM,        EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
        { MATROSKA_ID_EDITIONUID,         EBML_NONE },
        { MATROSKA_ID_EDITIONFLAGHIDDEN,  EBML_NONE },
        { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
    
        { MATROSKA_ID_EDITIONFLAGORDERED, EBML_NONE },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_chapters[] = {
        { MATROSKA_ID_EDITIONENTRY,       EBML_NEST, 0, 0, {.n=matroska_chapter} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_index_pos[] = {
        { MATROSKA_ID_CUETRACK,           EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
        { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos)   },
    
        { MATROSKA_ID_CUEBLOCKNUMBER,     EBML_NONE },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_index_entry[] = {
        { MATROSKA_ID_CUETIME,            EBML_UINT, 0, offsetof(MatroskaIndex,time) },
        { MATROSKA_ID_CUETRACKPOSITION,   EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
        { 0 }
    };
    
    static EbmlSyntax matroska_index[] = {
        { MATROSKA_ID_POINTENTRY,         EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_simpletag[] = {
        { MATROSKA_ID_TAGNAME,            EBML_UTF8, 0, offsetof(MatroskaTag,name) },
        { MATROSKA_ID_TAGSTRING,          EBML_UTF8, 0, offsetof(MatroskaTag,string) },
    
        { MATROSKA_ID_TAGLANG,            EBML_STR,  0, offsetof(MatroskaTag,lang), {.s="und"} },
        { MATROSKA_ID_TAGDEFAULT,         EBML_UINT, 0, offsetof(MatroskaTag,def) },
    
        { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_tagtargets[] = {
        { MATROSKA_ID_TAGTARGETS_TYPE,      EBML_STR,  0, offsetof(MatroskaTagTarget,type) },
        { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
        { MATROSKA_ID_TAGTARGETS_TRACKUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
        { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) },
        { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_tag[] = {
    
        { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
        { MATROSKA_ID_TAGTARGETS,         EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
    
    static EbmlSyntax matroska_tags[] = {
    
        { MATROSKA_ID_TAG,                EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
    
    static EbmlSyntax matroska_seekhead_entry[] = {
        { MATROSKA_ID_SEEKID,             EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
        { MATROSKA_ID_SEEKPOSITION,       EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
        { 0 }
    };
    
    static EbmlSyntax matroska_seekhead[] = {
        { MATROSKA_ID_SEEKENTRY,          EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_segment[] = {
        { MATROSKA_ID_INFO,           EBML_NEST, 0, 0, {.n=matroska_info       } },
        { MATROSKA_ID_TRACKS,         EBML_NEST, 0, 0, {.n=matroska_tracks     } },
        { MATROSKA_ID_ATTACHMENTS,    EBML_NEST, 0, 0, {.n=matroska_attachments} },
        { MATROSKA_ID_CHAPTERS,       EBML_NEST, 0, 0, {.n=matroska_chapters   } },
        { MATROSKA_ID_CUES,           EBML_NEST, 0, 0, {.n=matroska_index      } },
        { MATROSKA_ID_TAGS,           EBML_NEST, 0, 0, {.n=matroska_tags       } },
        { MATROSKA_ID_SEEKHEAD,       EBML_NEST, 0, 0, {.n=matroska_seekhead   } },
        { MATROSKA_ID_CLUSTER,        EBML_STOP, 0, offsetof(MatroskaDemuxContext,has_cluster_id) },
        { 0 }
    };
    
    static EbmlSyntax matroska_segments[] = {
        { MATROSKA_ID_SEGMENT,        EBML_NEST, 0, 0, {.n=matroska_segment    } },
        { 0 }
    };
    
    
    static EbmlSyntax matroska_blockgroup[] = {
        { MATROSKA_ID_BLOCK,          EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
        { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
        { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} },
        { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
    
        { 1,                          EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_cluster[] = {
        { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
        { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
        { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
    
        { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
        { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
    
        { 0 }
    };
    
    static EbmlSyntax matroska_clusters[] = {
        { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster} },
    
        { MATROSKA_ID_INFO,           EBML_NONE },
        { MATROSKA_ID_CUES,           EBML_NONE },
        { MATROSKA_ID_TAGS,           EBML_NONE },
        { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
    
     * Return: Whether we reached the end of a level in the hierarchy or not.
    
    static int ebml_level_end(MatroskaDemuxContext *matroska)
    
        ByteIOContext *pb = matroska->ctx->pb;
    
        int64_t pos = url_ftell(pb);
    
        if (matroska->num_levels > 0) {
    
            MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
    
            if (pos - level->start >= level->length) {
    
    }
    
    /*
     * Read: an "EBML number", which is defined as a variable-length
     * array of bytes. The first byte indicates the length by giving a
     * number of 0-bits followed by a one. The position of the first
     * "one" bit inside the first byte indicates the length of this
     * number.
    
     * Returns: number of bytes read, < 0 on error
    
    static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb,
    
    {
        int len_mask = 0x80, read = 1, n = 1;
        int64_t total = 0;
    
    
        /* The first byte tells us the length in bytes - get_byte() can normally
    
         * return 0, but since that's not a valid first ebmlID byte, we can
         * use it safely here to catch EOS. */
        if (!(total = get_byte(pb))) {
            /* we might encounter EOS here */
            if (!url_feof(pb)) {
    
                int64_t pos = url_ftell(pb);
    
                av_log(matroska->ctx, AV_LOG_ERROR,
                       "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
                       pos, pos);
            }
    
            return AVERROR(EIO); /* EOS or actual I/O error */
    
        }
    
        /* get the length of the EBML number */
        while (read <= max_size && !(total & len_mask)) {
            read++;
            len_mask >>= 1;
        }
        if (read > max_size) {
    
            int64_t pos = url_ftell(pb) - 1;
    
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
                   (uint8_t) total, pos, pos);
            return AVERROR_INVALIDDATA;
        }
    
        /* read out length */
        total &= ~len_mask;
        while (n++ < read)
            total = (total << 8) | get_byte(pb);
    
        *number = total;
    
        return read;
    }
    
    /*
     * Read the next element as an unsigned int.
     * 0 is success, < 0 is failure.
     */
    
    static int ebml_read_uint(ByteIOContext *pb, int size, uint64_t *num)
    
        /* big-endian ordering; build up number */
    
        *num = 0;
        while (n++ < size)
            *num = (*num << 8) | get_byte(pb);
    
        return 0;
    }
    
    /*
     * Read the next element as a float.
     * 0 is success, < 0 is failure.
     */
    
    static int ebml_read_float(ByteIOContext *pb, int size, double *num)
    
    {
        if (size == 4) {
            *num= av_int2flt(get_be32(pb));
        } else if(size==8){
            *num= av_int2dbl(get_be64(pb));
    
            return AVERROR_INVALIDDATA;
    
        return 0;
    }
    
    /*
     * Read the next element as an ASCII string.
     * 0 is success, < 0 is failure.
     */
    
    static int ebml_read_ascii(ByteIOContext *pb, int size, char **str)
    
        /* EBML strings are usually not 0-terminated, so we allocate one
    
         * byte more, read the string and NULL-terminate it ourselves. */
    
        if (!(*str = av_malloc(size + 1)))
    
        if (get_buffer(pb, (uint8_t *) *str, size) != size) {
    
            av_free(*str);
    
    /*
     * Read the next element as binary data.
     * 0 is success, < 0 is failure.
     */
    static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin)
    {
        av_free(bin->data);
        if (!(bin->data = av_malloc(length)))
            return AVERROR(ENOMEM);
    
        bin->size = length;
        bin->pos  = url_ftell(pb);
        if (get_buffer(pb, bin->data, length) != length)
            return AVERROR(EIO);
    
        return 0;
    }
    
    
    /*
     * Read the next element, but only the header. The contents
     * are supposed to be sub-elements which can be read separately.
     * 0 is success, < 0 is failure.
     */
    
    static int ebml_read_master(MatroskaDemuxContext *matroska, int length)
    
        ByteIOContext *pb = matroska->ctx->pb;
    
        MatroskaLevel *level;
    
        if (matroska->num_levels >= EBML_MAX_DEPTH) {
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
    
        }
    
        level = &matroska->levels[matroska->num_levels++];
        level->start = url_ftell(pb);
        level->length = length;
    
        return 0;
    }
    
    /*
     * Read signed/unsigned "EBML" numbers.
    
     * Return: number of bytes processed, < 0 on error
    
    static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
                                     uint8_t *data, uint32_t size, uint64_t *num)
    
        ByteIOContext pb;
        init_put_byte(&pb, data, size, 0, NULL, NULL, NULL, NULL);
        return ebml_read_num(matroska, &pb, 8, num);
    
    static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
                                     uint8_t *data, uint32_t size, int64_t *num)
    
    {
        uint64_t unum;
        int res;
    
        /* read as unsigned number first */
    
        if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
    
        *num = unum - ((1LL << (7*res - 1)) - 1);
    
    static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                               EbmlSyntax *syntax, void *data);
    
    static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                             uint32_t id, void *data)
    
        for (i=0; syntax[i].id; i++)
            if (id == syntax[i].id)
                break;
    
        if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32)
    
            av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
        return ebml_parse_elem(matroska, &syntax[i], data);
    
    static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                          void *data)
    
        uint64_t id;
        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
        id |= 1 << 7*res;
    
        return res < 0 ? res : ebml_parse_id(matroska, syntax, id, data);
    
    static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                               void *data)
    
        for (i=0; syntax[i].id; i++)
            switch (syntax[i].type) {
            case EBML_UINT:
                *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
                break;
            case EBML_FLOAT:
                *(double   *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
                break;
            case EBML_STR:
            case EBML_UTF8:
                *(char    **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
                break;
    
        while (!res && !ebml_level_end(matroska))
            res = ebml_parse(matroska, syntax, data);
    
    static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                               EbmlSyntax *syntax, void *data)
    {
    
        ByteIOContext *pb = matroska->ctx->pb;
    
        uint32_t id = syntax->id;
    
        int res;
    
        data = (char *)data + syntax->data_offset;
        if (syntax->list_elem_size) {
            EbmlList *list = data;
            list->elem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
            data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
            memset(data, 0, syntax->list_elem_size);
            list->nb_elem++;
        }
    
    
        if (syntax->type != EBML_PASS && syntax->type != EBML_STOP)
    
            if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0)
    
        switch (syntax->type) {
    
        case EBML_UINT:  res = ebml_read_uint  (pb, length, data);  break;
        case EBML_FLOAT: res = ebml_read_float (pb, length, data);  break;
    
        case EBML_STR:
    
        case EBML_UTF8:  res = ebml_read_ascii (pb, length, data);  break;
    
        case EBML_BIN:   res = ebml_read_binary(pb, length, data);  break;
    
        case EBML_NEST:  if ((res=ebml_read_master(matroska, length)) < 0)
    
                             return res;
                         if (id == MATROSKA_ID_SEGMENT)
                             matroska->segment_start = url_ftell(matroska->ctx->pb);
    
                         return ebml_parse_nest(matroska, syntax->def.n, data);
    
        case EBML_PASS:  return ebml_parse_id(matroska, syntax->def.n, id, data);
    
        case EBML_STOP:  *(int *)data = 1;      return 1;
    
        default:         return url_fseek(pb,length,SEEK_CUR)<0 ? AVERROR(EIO) : 0;
    
        if (res == AVERROR_INVALIDDATA)
            av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
        else if (res == AVERROR(EIO))
            av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
        return res;
    
    }
    
    static void ebml_free(EbmlSyntax *syntax, void *data)
    {
        int i, j;
        for (i=0; syntax[i].id; i++) {
            void *data_off = (char *)data + syntax[i].data_offset;
            switch (syntax[i].type) {
            case EBML_STR:
            case EBML_UTF8:  av_freep(data_off);                      break;
            case EBML_BIN:   av_freep(&((EbmlBin *)data_off)->data);  break;
            case EBML_NEST:
                if (syntax[i].list_elem_size) {
                    EbmlList *list = data_off;
                    char *ptr = list->elem;
                    for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
                        ebml_free(syntax[i].def.n, ptr);
                    av_free(list->elem);
                } else
                    ebml_free(syntax[i].def.n, data_off);
            default:  break;
            }
        }
    }
    
    
    
    /*
     * Autodetecting...
     */
    static int matroska_probe(AVProbeData *p)
    {
        uint64_t total = 0;
        int len_mask = 0x80, size = 1, n = 1;
    
        static const char probe_data[] = "matroska";
    
        /* EBML header? */
    
        if (AV_RB32(p->buf) != EBML_ID_HEADER)
            return 0;
    
        /* length of header */
        total = p->buf[4];
        while (size <= 8 && !(total & len_mask)) {
            size++;
            len_mask >>= 1;
        }
        if (size > 8)
          return 0;
        total &= (len_mask - 1);
        while (n < size)
            total = (total << 8) | p->buf[4 + n++];
    
    
        /* Does the probe data contain the whole header? */
    
        if (p->buf_size < 4 + size + total)
          return 0;
    
    
        /* The header must contain the document type 'matroska'. For now,
    
         * we don't parse the whole header but simply check for the
         * availability of that array of characters inside the header.
         * Not fully fool-proof, but good enough. */
        for (n = 4+size; n <= 4+size+total-(sizeof(probe_data)-1); n++)
            if (!memcmp(p->buf+n, probe_data, sizeof(probe_data)-1))
                return AVPROBE_SCORE_MAX;
    
        return 0;
    }
    
    static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
                                                     int num)
    {
        MatroskaTrack *tracks = matroska->tracks.elem;
        int i;
    
        for (i=0; i < matroska->tracks.nb_elem; i++)
            if (tracks[i].num == num)
                return &tracks[i];
    
        av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
        return NULL;
    }
    
    
    static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
                                      MatroskaTrack *track)
    
        MatroskaTrackEncoding *encodings = track->encodings.elem;
    
        uint8_t* data = *buf;
        int isize = *buf_size;
        uint8_t* pkt_data = NULL;
        int pkt_size = isize;
        int result = 0;
        int olen;
    
    
        switch (encodings[0].compression.algo) {
    
        case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
    
            return encodings[0].compression.settings.size;
    
        case MATROSKA_TRACK_ENCODING_COMP_LZO:
            do {
                olen = pkt_size *= 3;
    
    Aurelien Jacobs's avatar
    Aurelien Jacobs committed
                pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING);
    
                result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
            } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
    
            if (result)
                goto failed;
            pkt_size -= olen;
            break;
    
        case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
            z_stream zstream = {0};
            if (inflateInit(&zstream) != Z_OK)
                return -1;
            zstream.next_in = data;
            zstream.avail_in = isize;
            do {
                pkt_size *= 3;
                pkt_data = av_realloc(pkt_data, pkt_size);
                zstream.avail_out = pkt_size - zstream.total_out;
                zstream.next_out = pkt_data + zstream.total_out;
                result = inflate(&zstream, Z_NO_FLUSH);
            } while (result==Z_OK && pkt_size<10000000);
            pkt_size = zstream.total_out;
            inflateEnd(&zstream);
            if (result != Z_STREAM_END)
                goto failed;
            break;
        }
    #endif
    
        case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
            bz_stream bzstream = {0};
            if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
                return -1;
            bzstream.next_in = data;
            bzstream.avail_in = isize;
            do {
                pkt_size *= 3;
                pkt_data = av_realloc(pkt_data, pkt_size);
                bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
                bzstream.next_out = pkt_data + bzstream.total_out_lo32;
                result = BZ2_bzDecompress(&bzstream);
            } while (result==BZ_OK && pkt_size<10000000);
            pkt_size = bzstream.total_out_lo32;
            BZ2_bzDecompressEnd(&bzstream);
            if (result != BZ_STREAM_END)
                goto failed;
            break;
        }
    #endif
    
        }
    
        *buf = pkt_data;
        *buf_size = pkt_size;
        return 0;
     failed:
        av_free(pkt_data);
        return -1;
    }
    
    
    static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
    
                                        AVPacket *pkt, uint64_t display_duration)
    
    {
        char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size;
        for (; *ptr!=',' && ptr<end-1; ptr++);
        if (*ptr == ',')
            layer = ++ptr;
        for (; *ptr!=',' && ptr<end-1; ptr++);
        if (*ptr == ',') {
    
            int64_t end_pts = pkt->pts + display_duration;
    
            int sc = matroska->time_scale * pkt->pts / 10000000;
            int ec = matroska->time_scale * end_pts  / 10000000;
            int sh, sm, ss, eh, em, es, len;
            sh = sc/360000;  sc -= 360000*sh;
            sm = sc/  6000;  sc -=   6000*sm;
            ss = sc/   100;  sc -=    100*ss;
            eh = ec/360000;  ec -= 360000*eh;
            em = ec/  6000;  ec -=   6000*em;
            es = ec/   100;  ec -=    100*es;
            *ptr++ = '\0';
            len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
            if (!(line = av_malloc(len)))
                return;
    
            snprintf(line,len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
    
                     layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
            av_free(pkt->data);
            pkt->data = line;
            pkt->size = strlen(line);
        }
    }
    
    
    static void matroska_merge_packets(AVPacket *out, AVPacket *in)
    {
        out->data = av_realloc(out->data, out->size+in->size);
        memcpy(out->data+out->size, in->data, in->size);
        out->size += in->size;
        av_destruct_packet(in);
        av_free(in);
    }
    
    
    static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
                                     AVMetadata **metadata, char *prefix)
    
    {
        MatroskaTag *tags = list->elem;
    
        char key[1024];
        int i;
    
    
        for (i=0; i < list->nb_elem; i++) {
    
            const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
    
            if (prefix)  snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
            else         av_strlcpy(key, tags[i].name, sizeof(key));