aacdec.c

/*
 * AAC decoder
 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
 *
 * AAC LATM decoder
 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
 * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * AAC decoder
 * @author Oded Shimon  ( ods15 ods15 dyndns org )
 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
 */

/*
 * supported tools
 *
 * Support?             Name
 * N (code in SoC repo) gain control
 * Y                    block switching
 * Y                    window shapes - standard
 * N                    window shapes - Low Delay
 * Y                    filterbank - standard
 * N (code in SoC repo) filterbank - Scalable Sample Rate
 * Y                    Temporal Noise Shaping
 * Y                    Long Term Prediction
 * Y                    intensity stereo
 * Y                    channel coupling
 * Y                    frequency domain prediction
 * Y                    Perceptual Noise Substitution
 * Y                    Mid/Side stereo
 * N                    Scalable Inverse AAC Quantization
 * N                    Frequency Selective Switch
 * N                    upsampling filter
 * Y                    quantization & coding - AAC
 * N                    quantization & coding - TwinVQ
 * N                    quantization & coding - BSAC
 * N                    AAC Error Resilience tools
 * N                    Error Resilience payload syntax
 * N                    Error Protection tool
 * N                    CELP
 * N                    Silence Compression
 * N                    HVXC
 * N                    HVXC 4kbits/s VR
 * N                    Structured Audio tools
 * N                    Structured Audio Sample Bank Format
 * N                    MIDI
 * N                    Harmonic and Individual Lines plus Noise
 * N                    Text-To-Speech Interface
 * Y                    Spectral Band Replication
 * Y (not in this code) Layer-1
 * Y (not in this code) Layer-2
 * Y (not in this code) Layer-3
 * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
 * Y                    Parametric Stereo
 * N                    Direct Stream Transfer
 *
 * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
 *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
           Parametric Stereo.
 */

#include "libavutil/float_dsp.h"
#include "avcodec.h"
#include "internal.h"
#include "get_bits.h"
#include "dsputil.h"
#include "fft.h"
#include "fmtconvert.h"
#include "lpc.h"
#include "kbdwin.h"
#include "sinewin.h"

#include "aac.h"
#include "aactab.h"
#include "aacdectab.h"
#include "cbrt_tablegen.h"
#include "sbr.h"
#include "aacsbr.h"
#include "mpeg4audio.h"
#include "aacadtsdec.h"
#include "libavutil/intfloat.h"

#include <assert.h>
#include <errno.h>
#include <math.h>
#include <string.h>

#if ARCH_ARM
#   include "arm/aac.h"
#endif

static VLC vlc_scalefactors;
static VLC vlc_spectral[11];

#define overread_err "Input buffer exhausted before END element found\n"

static int count_channels(uint8_t (*layout)[3], int tags)
{
    int i, sum = 0;
    for (i = 0; i < tags; i++) {
        int syn_ele = layout[i][0];
        int pos     = layout[i][2];
        sum += (1 + (syn_ele == TYPE_CPE)) *
               (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
    }
    return sum;
}

/**
 * Check for the channel element in the current channel position configuration.
 * If it exists, make sure the appropriate element is allocated and map the
 * channel order to match the internal FFmpeg channel layout.
 *
 * @param   che_pos current channel position configuration
 * @param   type channel element type
 * @param   id channel element id
 * @param   channels count of the number of channels in the configuration
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
static av_cold int che_configure(AACContext *ac,
                                 enum ChannelPosition che_pos,
                                 int type, int id, int *channels)
{
    if (che_pos) {
        if (!ac->che[type][id]) {
            if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
                return AVERROR(ENOMEM);
            ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
        }
        if (type != TYPE_CCE) {
            if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
                av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
                return AVERROR_INVALIDDATA;
            }
            ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
            if (type == TYPE_CPE ||
                (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
                ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
            }
        }
    } else {
        if (ac->che[type][id])
            ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
        av_freep(&ac->che[type][id]);
    }
    return 0;
}

static int frame_configure_elements(AVCodecContext *avctx)
{
    AACContext *ac = avctx->priv_data;
    int type, id, ch, ret;

    /* set channel pointers to internal buffers by default */
    for (type = 0; type < 4; type++) {
        for (id = 0; id < MAX_ELEM_ID; id++) {
            ChannelElement *che = ac->che[type][id];
            if (che) {
                che->ch[0].ret = che->ch[0].ret_buf;
                che->ch[1].ret = che->ch[1].ret_buf;
            }
        }
    }

    /* get output buffer */
    ac->frame.nb_samples = 2048;
    if ((ret = ff_get_buffer(avctx, &ac->frame)) < 0) {
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
        return ret;
    }

    /* map output channel pointers to AVFrame data */
    for (ch = 0; ch < avctx->channels; ch++) {
        if (ac->output_element[ch])
            ac->output_element[ch]->ret = (float *)ac->frame.extended_data[ch];
    }

    return 0;
}

struct elem_to_channel {
    uint64_t av_position;
    uint8_t syn_ele;
    uint8_t elem_id;
    uint8_t aac_position;
};

static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
                       uint8_t (*layout_map)[3], int offset, uint64_t left,
    uint64_t right, int pos)
{
    if (layout_map[offset][0] == TYPE_CPE) {
        e2c_vec[offset] = (struct elem_to_channel) {
            .av_position = left | right, .syn_ele = TYPE_CPE,
            .elem_id = layout_map[offset    ][1], .aac_position = pos };
        return 1;
    } else {
        e2c_vec[offset]   = (struct elem_to_channel) {
            .av_position = left, .syn_ele = TYPE_SCE,
            .elem_id = layout_map[offset    ][1], .aac_position = pos };
        e2c_vec[offset + 1] = (struct elem_to_channel) {
            .av_position = right, .syn_ele = TYPE_SCE,
            .elem_id = layout_map[offset + 1][1], .aac_position = pos };
        return 2;
    }
}

static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos, int *current) {
    int num_pos_channels = 0;
    int first_cpe = 0;
    int sce_parity = 0;
    int i;
    for (i = *current; i < tags; i++) {
        if (layout_map[i][2] != pos)
            break;
        if (layout_map[i][0] == TYPE_CPE) {
            if (sce_parity) {
                if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
                    sce_parity = 0;
                } else {
                    return -1;
                }
            }
            num_pos_channels += 2;
            first_cpe = 1;
        } else {
            num_pos_channels++;
            sce_parity ^= 1;
        }
    }
    if (sce_parity &&
        ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
            return -1;
    *current = i;
    return num_pos_channels;
}

static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
{
    int i, n, total_non_cc_elements;
    struct elem_to_channel e2c_vec[4*MAX_ELEM_ID] = {{ 0 }};
    int num_front_channels, num_side_channels, num_back_channels;
    uint64_t layout;

    if (FF_ARRAY_ELEMS(e2c_vec) < tags)
        return 0;

    i = 0;
    num_front_channels =
        count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
    if (num_front_channels < 0)
        return 0;
    num_side_channels =
        count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
    if (num_side_channels < 0)
        return 0;
    num_back_channels =
        count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
    if (num_back_channels < 0)
        return 0;

    i = 0;
    if (num_front_channels & 1) {
        e2c_vec[i] = (struct elem_to_channel) {
            .av_position = AV_CH_FRONT_CENTER, .syn_ele = TYPE_SCE,
            .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_FRONT };
        i++;
        num_front_channels--;
    }
    if (num_front_channels >= 4) {
        i += assign_pair(e2c_vec, layout_map, i,
                         AV_CH_FRONT_LEFT_OF_CENTER,
                         AV_CH_FRONT_RIGHT_OF_CENTER,
                         AAC_CHANNEL_FRONT);
        num_front_channels -= 2;
    }
    if (num_front_channels >= 2) {
        i += assign_pair(e2c_vec, layout_map, i,
                         AV_CH_FRONT_LEFT,
                         AV_CH_FRONT_RIGHT,
                         AAC_CHANNEL_FRONT);
        num_front_channels -= 2;
    }
    while (num_front_channels >= 2) {
        i += assign_pair(e2c_vec, layout_map, i,
                         UINT64_MAX,
                         UINT64_MAX,
                         AAC_CHANNEL_FRONT);
        num_front_channels -= 2;
    }

    if (num_side_channels >= 2) {
        i += assign_pair(e2c_vec, layout_map, i,
                         AV_CH_SIDE_LEFT,
                         AV_CH_SIDE_RIGHT,
                         AAC_CHANNEL_FRONT);
        num_side_channels -= 2;
    }
    while (num_side_channels >= 2) {
        i += assign_pair(e2c_vec, layout_map, i,
                         UINT64_MAX,
                         UINT64_MAX,
                         AAC_CHANNEL_SIDE);
        num_side_channels -= 2;
    }

    while (num_back_channels >= 4) {
        i += assign_pair(e2c_vec, layout_map, i,
                         UINT64_MAX,
                         UINT64_MAX,
                         AAC_CHANNEL_BACK);
        num_back_channels -= 2;
    }
    if (num_back_channels >= 2) {
        i += assign_pair(e2c_vec, layout_map, i,
                         AV_CH_BACK_LEFT,
                         AV_CH_BACK_RIGHT,
                         AAC_CHANNEL_BACK);
        num_back_channels -= 2;
    }
    if (num_back_channels) {
        e2c_vec[i] = (struct elem_to_channel) {
          .av_position = AV_CH_BACK_CENTER, .syn_ele = TYPE_SCE,
          .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_BACK };
        i++;
        num_back_channels--;
    }

    if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
        e2c_vec[i] = (struct elem_to_channel) {
          .av_position = AV_CH_LOW_FREQUENCY, .syn_ele = TYPE_LFE,
          .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
        i++;
    }
    while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
        e2c_vec[i] = (struct elem_to_channel) {
          .av_position = UINT64_MAX, .syn_ele = TYPE_LFE,
          .elem_id = layout_map[i][1], .aac_position = AAC_CHANNEL_LFE };
        i++;
    }

    // Must choose a stable sort
    total_non_cc_elements = n = i;
    do {
        int next_n = 0;
        for (i = 1; i < n; i++) {
            if (e2c_vec[i-1].av_position > e2c_vec[i].av_position) {
                FFSWAP(struct elem_to_channel, e2c_vec[i-1], e2c_vec[i]);
                next_n = i;
            }
        }
        n = next_n;
    } while (n > 0);

    layout = 0;
    for (i = 0; i < total_non_cc_elements; i++) {
        layout_map[i][0] = e2c_vec[i].syn_ele;
        layout_map[i][1] = e2c_vec[i].elem_id;
        layout_map[i][2] = e2c_vec[i].aac_position;
        if (e2c_vec[i].av_position != UINT64_MAX) {
            layout |= e2c_vec[i].av_position;
        }
    }

    return layout;
}

/**
 * Save current output configuration if and only if it has been locked.
 */
static void push_output_configuration(AACContext *ac) {
    if (ac->oc[1].status == OC_LOCKED) {
        ac->oc[0] = ac->oc[1];
    }
    ac->oc[1].status = OC_NONE;
}

/**
 * Restore the previous output configuration if and only if the current
 * configuration is unlocked.
 */
static void pop_output_configuration(AACContext *ac) {
    if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
        ac->oc[1] = ac->oc[0];
        ac->avctx->channels = ac->oc[1].channels;
        ac->avctx->channel_layout = ac->oc[1].channel_layout;
    }
}

/**
 * Configure output channel order based on the current program configuration element.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
static int output_configure(AACContext *ac,
                            uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
                            enum OCStatus oc_type, int get_new_frame)
{
    AVCodecContext *avctx = ac->avctx;
    int i, channels = 0, ret;
    uint64_t layout = 0;

    if (ac->oc[1].layout_map != layout_map) {
        memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
        ac->oc[1].layout_map_tags = tags;
    }

    // Try to sniff a reasonable channel order, otherwise output the
    // channels in the order the PCE declared them.
    if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
        layout = sniff_channel_order(layout_map, tags);
    for (i = 0; i < tags; i++) {
        int type =     layout_map[i][0];
        int id =       layout_map[i][1];
        int position = layout_map[i][2];
        // Allocate or free elements depending on if they are in the
        // current program configuration.
        ret = che_configure(ac, position, type, id, &channels);
        if (ret < 0)
            return ret;
    }
    if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
        if (layout == AV_CH_FRONT_CENTER) {
            layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT;
        } else {
            layout = 0;
        }
    }

    memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
    if (layout) avctx->channel_layout = layout;
    ac->oc[1].channel_layout = layout;
    avctx->channels = ac->oc[1].channels = channels;
    ac->oc[1].status = oc_type;

    if (get_new_frame) {
        if ((ret = frame_configure_elements(ac->avctx)) < 0)
            return ret;
    }

    return 0;
}

static void flush(AVCodecContext *avctx)
{
    AACContext *ac= avctx->priv_data;
    int type, i, j;

    for (type = 3; type >= 0; type--) {
        for (i = 0; i < MAX_ELEM_ID; i++) {
            ChannelElement *che = ac->che[type][i];
            if (che) {
                for (j = 0; j <= 1; j++) {
                    memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
                }
            }
        }
    }
}

/**
 * Set up channel positions based on a default channel configuration
 * as specified in table 1.17.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
static int set_default_channel_config(AVCodecContext *avctx,
                                              uint8_t (*layout_map)[3],
                                              int *tags,
                                              int channel_config)
{
    if (channel_config < 1 || channel_config > 7) {
        av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
               channel_config);
        return -1;
    }
    *tags = tags_per_config[channel_config];
    memcpy(layout_map, aac_channel_layout_map[channel_config-1], *tags * sizeof(*layout_map));
    return 0;
}

static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
{
    // For PCE based channel configurations map the channels solely based on tags.
    if (!ac->oc[1].m4ac.chan_config) {
        return ac->tag_che_map[type][elem_id];
    }
    // Allow single CPE stereo files to be signalled with mono configuration.
    if (!ac->tags_mapped && type == TYPE_CPE && ac->oc[1].m4ac.chan_config == 1) {
        uint8_t layout_map[MAX_ELEM_ID*4][3];
        int layout_map_tags;
        push_output_configuration(ac);

        av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");

        if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
                                       2) < 0)
            return NULL;
        if (output_configure(ac, layout_map, layout_map_tags,
                             OC_TRIAL_FRAME, 1) < 0)
            return NULL;

        ac->oc[1].m4ac.chan_config = 2;
        ac->oc[1].m4ac.ps = 0;
    }
    // And vice-versa
    if (!ac->tags_mapped && type == TYPE_SCE && ac->oc[1].m4ac.chan_config == 2) {
        uint8_t layout_map[MAX_ELEM_ID*4][3];
        int layout_map_tags;
        push_output_configuration(ac);

        av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");

        if (set_default_channel_config(ac->avctx, layout_map, &layout_map_tags,
                                       1) < 0)
            return NULL;
        if (output_configure(ac, layout_map, layout_map_tags,
                             OC_TRIAL_FRAME, 1) < 0)
            return NULL;

        ac->oc[1].m4ac.chan_config = 1;
        if (ac->oc[1].m4ac.sbr)
            ac->oc[1].m4ac.ps = -1;
    }
    // For indexed channel configurations map the channels solely based on position.
    switch (ac->oc[1].m4ac.chan_config) {
    case 7:
        if (ac->tags_mapped == 3 && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
        }
    case 6:
        /* Some streams incorrectly code 5.1 audio as SCE[0] CPE[0] CPE[1] SCE[1]
           instead of SCE[0] CPE[0] CPE[1] LFE[0]. If we seem to have
           encountered such a stream, transfer the LFE[0] element to the SCE[1]'s mapping */
        if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
            ac->tags_mapped++;
            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
        }
    case 5:
        if (ac->tags_mapped == 2 && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
        }
    case 4:
        if (ac->tags_mapped == 2 && ac->oc[1].m4ac.chan_config == 4 && type == TYPE_SCE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
        }
    case 3:
    case 2:
        if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) && type == TYPE_CPE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
        } else if (ac->oc[1].m4ac.chan_config == 2) {
            return NULL;
        }
    case 1:
        if (!ac->tags_mapped && type == TYPE_SCE) {
            ac->tags_mapped++;
            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
        }
    default:
        return NULL;
    }
}

/**
 * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit.
 *
 * @param type speaker type/position for these channels
 */
static void decode_channel_map(uint8_t layout_map[][3],
                               enum ChannelPosition type,
                               GetBitContext *gb, int n)
{
    while (n--) {
        enum RawDataBlockType syn_ele;
        switch (type) {
        case AAC_CHANNEL_FRONT:
        case AAC_CHANNEL_BACK:
        case AAC_CHANNEL_SIDE:
            syn_ele = get_bits1(gb);
            break;
        case AAC_CHANNEL_CC:
            skip_bits1(gb);
            syn_ele = TYPE_CCE;
            break;
        case AAC_CHANNEL_LFE:
            syn_ele = TYPE_LFE;
            break;
        default:
            av_assert0(0);
        }
        layout_map[0][0] = syn_ele;
        layout_map[0][1] = get_bits(gb, 4);
        layout_map[0][2] = type;
        layout_map++;
    }
}

/**
 * Decode program configuration element; reference: table 4.2.
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
                      uint8_t (*layout_map)[3],
                      GetBitContext *gb)
{
    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
    int comment_len;
    int tags;

    skip_bits(gb, 2);  // object_type

    sampling_index = get_bits(gb, 4);
    if (m4ac->sampling_index != sampling_index)
        av_log(avctx, AV_LOG_WARNING, "Sample rate index in program config element does not match the sample rate index configured by the container.\n");

    num_front       = get_bits(gb, 4);
    num_side        = get_bits(gb, 4);
    num_back        = get_bits(gb, 4);
    num_lfe         = get_bits(gb, 2);
    num_assoc_data  = get_bits(gb, 3);
    num_cc          = get_bits(gb, 4);

    if (get_bits1(gb))
        skip_bits(gb, 4); // mono_mixdown_tag
    if (get_bits1(gb))
        skip_bits(gb, 4); // stereo_mixdown_tag

    if (get_bits1(gb))
        skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround

    if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
        return -1;
    }
    decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
    tags = num_front;
    decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
    tags += num_side;
    decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
    tags += num_back;
    decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
    tags += num_lfe;

    skip_bits_long(gb, 4 * num_assoc_data);

    decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
    tags += num_cc;

    align_get_bits(gb);

    /* comment field, first byte is length */
    comment_len = get_bits(gb, 8) * 8;
    if (get_bits_left(gb) < comment_len) {
        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
        return -1;
    }
    skip_bits_long(gb, comment_len);
    return tags;
}

/**
 * Decode GA "General Audio" specific configuration; reference: table 4.1.
 *
 * @param   ac          pointer to AACContext, may be null
 * @param   avctx       pointer to AVCCodecContext, used for logging
 *
 * @return  Returns error status. 0 - OK, !0 - error
 */
static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
                                     GetBitContext *gb,
                                     MPEG4AudioConfig *m4ac,
                                     int channel_config)
{
    int extension_flag, ret;
    uint8_t layout_map[MAX_ELEM_ID*4][3];
    int tags = 0;

    if (get_bits1(gb)) { // frameLengthFlag
        av_log_missing_feature(avctx, "960/120 MDCT window", 1);
        return AVERROR_PATCHWELCOME;
    }

    if (get_bits1(gb))       // dependsOnCoreCoder
        skip_bits(gb, 14);   // coreCoderDelay
    extension_flag = get_bits1(gb);

    if (m4ac->object_type == AOT_AAC_SCALABLE ||
        m4ac->object_type == AOT_ER_AAC_SCALABLE)
        skip_bits(gb, 3);     // layerNr

    if (channel_config == 0) {
        skip_bits(gb, 4);  // element_instance_tag
        tags = decode_pce(avctx, m4ac, layout_map, gb);
        if (tags < 0)
            return tags;
    } else {
        if ((ret = set_default_channel_config(avctx, layout_map, &tags, channel_config)))
            return ret;
    }

    if (count_channels(layout_map, tags) > 1) {
        m4ac->ps = 0;
    } else if (m4ac->sbr == 1 && m4ac->ps == -1)
        m4ac->ps = 1;

    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
        return ret;

    if (extension_flag) {
        switch (m4ac->object_type) {
        case AOT_ER_BSAC:
            skip_bits(gb, 5);    // numOfSubFrame
            skip_bits(gb, 11);   // layer_length
            break;
        case AOT_ER_AAC_LC:
        case AOT_ER_AAC_LTP:
        case AOT_ER_AAC_SCALABLE:
        case AOT_ER_AAC_LD:
            skip_bits(gb, 3);  /* aacSectionDataResilienceFlag
                                    * aacScalefactorDataResilienceFlag
                                    * aacSpectralDataResilienceFlag
                                    */
            break;
        }
        skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
    }
    return 0;
}

/**
 * Decode audio specific configuration; reference: table 1.13.
 *
 * @param   ac          pointer to AACContext, may be null
 * @param   avctx       pointer to AVCCodecContext, used for logging
 * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
 * @param   data        pointer to buffer holding an audio specific config
 * @param   bit_size    size of audio specific config or data in bits
 * @param   sync_extension look for an appended sync extension
 *
 * @return  Returns error status or number of consumed bits. <0 - error
 */
static int decode_audio_specific_config(AACContext *ac,
                                        AVCodecContext *avctx,
                                        MPEG4AudioConfig *m4ac,
                                        const uint8_t *data, int bit_size,
                                        int sync_extension)
{
    GetBitContext gb;
    int i;

    av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
    for (i = 0; i < bit_size >> 3; i++)
         av_dlog(avctx, "%02x ", data[i]);
    av_dlog(avctx, "\n");

    init_get_bits(&gb, data, bit_size);

    if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0)
        return -1;
    if (m4ac->sampling_index > 12) {
        av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index);
        return -1;
    }

    skip_bits_long(&gb, i);

    switch (m4ac->object_type) {
    case AOT_AAC_MAIN:
    case AOT_AAC_LC:
    case AOT_AAC_LTP:
        if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config))
            return -1;
        break;
    default:
        av_log(avctx, AV_LOG_ERROR, "Audio object type %s%d is not supported.\n",
               m4ac->sbr == 1? "SBR+" : "", m4ac->object_type);
        return -1;
    }

    av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
            m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
            m4ac->sample_rate, m4ac->sbr, m4ac->ps);

    return get_bits_count(&gb);
}

/**
 * linear congruential pseudorandom number generator
 *
 * @param   previous_val    pointer to the current state of the generator
 *
 * @return  Returns a 32-bit pseudorandom integer
 */
static av_always_inline int lcg_random(unsigned previous_val)
{
    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
    return v.s;
}

static av_always_inline void reset_predict_state(PredictorState *ps)
{
    ps->r0   = 0.0f;
    ps->r1   = 0.0f;
    ps->cor0 = 0.0f;
    ps->cor1 = 0.0f;
    ps->var0 = 1.0f;
    ps->var1 = 1.0f;
}

static void reset_all_predictors(PredictorState *ps)
{
    int i;
    for (i = 0; i < MAX_PREDICTORS; i++)
        reset_predict_state(&ps[i]);
}

static int sample_rate_idx (int rate)
{
         if (92017 <= rate) return 0;
    else if (75132 <= rate) return 1;
    else if (55426 <= rate) return 2;
    else if (46009 <= rate) return 3;
    else if (37566 <= rate) return 4;
    else if (27713 <= rate) return 5;
    else if (23004 <= rate) return 6;
    else if (18783 <= rate) return 7;
    else if (13856 <= rate) return 8;
    else if (11502 <= rate) return 9;
    else if (9391  <= rate) return 10;
    else                    return 11;
}

static void reset_predictor_group(PredictorState *ps, int group_num)
{
    int i;
    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
        reset_predict_state(&ps[i]);
}

#define AAC_INIT_VLC_STATIC(num, size) \
    INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num], \
         ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \
        ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \
        size);

static av_cold int aac_decode_init(AVCodecContext *avctx)
{
    AACContext *ac = avctx->priv_data;

    ac->avctx = avctx;
    ac->oc[1].m4ac.sample_rate = avctx->sample_rate;

    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;

    if (avctx->extradata_size > 0) {
        if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
                                         avctx->extradata,
                                         avctx->extradata_size*8, 1) < 0)
            return -1;
    } else {
        int sr, i;
        uint8_t layout_map[MAX_ELEM_ID*4][3];
        int layout_map_tags;

        sr = sample_rate_idx(avctx->sample_rate);
        ac->oc[1].m4ac.sampling_index = sr;
        ac->oc[1].m4ac.channels = avctx->channels;
        ac->oc[1].m4ac.sbr = -1;
        ac->oc[1].m4ac.ps = -1;

        for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
            if (ff_mpeg4audio_channels[i] == avctx->channels)
                break;
        if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
            i = 0;
        }
        ac->oc[1].m4ac.chan_config = i;

        if (ac->oc[1].m4ac.chan_config) {
            int ret = set_default_channel_config(avctx, layout_map,
                &layout_map_tags, ac->oc[1].m4ac.chan_config);
            if (!ret)
                output_configure(ac, layout_map, layout_map_tags,
                                 OC_GLOBAL_HDR, 0);
            else if (avctx->err_recognition & AV_EF_EXPLODE)
                return AVERROR_INVALIDDATA;
        }
    }

    AAC_INIT_VLC_STATIC( 0, 304);
    AAC_INIT_VLC_STATIC( 1, 270);
    AAC_INIT_VLC_STATIC( 2, 550);
    AAC_INIT_VLC_STATIC( 3, 300);
    AAC_INIT_VLC_STATIC( 4, 328);
    AAC_INIT_VLC_STATIC( 5, 294);
    AAC_INIT_VLC_STATIC( 6, 306);
    AAC_INIT_VLC_STATIC( 7, 268);
    AAC_INIT_VLC_STATIC( 8, 510);
    AAC_INIT_VLC_STATIC( 9, 366);
    AAC_INIT_VLC_STATIC(10, 462);

    ff_aac_sbr_init();

    ff_dsputil_init(&ac->dsp, avctx);
    ff_fmt_convert_init(&ac->fmt_conv, avctx);
    avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);

    ac->random_state = 0x1f2e3d4c;

    ff_aac_tableinit();

    INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
                    ff_aac_scalefactor_bits, sizeof(ff_aac_scalefactor_bits[0]), sizeof(ff_aac_scalefactor_bits[0]),
                    ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
                    352);

    ff_mdct_init(&ac->mdct,       11, 1, 1.0 / (32768.0 * 1024.0));
    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0 / (32768.0 * 128.0));
    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0 * 32768.0);
    // window initialization
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
    ff_init_ff_sine_windows(10);
    ff_init_ff_sine_windows( 7);

    cbrt_tableinit();

    avcodec_get_frame_defaults(&ac->frame);
    avctx->coded_frame = &ac->frame;

    return 0;
}

/**
 * Skip data_stream_element; reference: table 4.10.
 */
static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
{
    int byte_align = get_bits1(gb);
    int count = get_bits(gb, 8);
    if (count == 255)
        count += get_bits(gb, 8);
    if (byte_align)
        align_get_bits(gb);

    if (get_bits_left(gb) < 8 * count) {
        av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
        return -1;
    }
    skip_bits_long(gb, 8 * count);
    return 0;
}

static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
                             GetBitContext *gb)
{
    int sfb;
    if (get_bits1(gb)) {
        ics->predictor_reset_group = get_bits(gb, 5);
        if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
            av_log(ac->avctx, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
            return -1;
        }
    }
    for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
        ics->prediction_used[sfb] = get_bits1(gb);
    }
    return 0;
}

/**
 * Decode Long Term Prediction data; reference: table 4.xx.
 */
static void decode_ltp(LongTermPrediction *ltp,