avconv.c


static int alloc_audio_output_buf(AVCodecContext *dec, AVCodecContext *enc,
                                  int nb_samples)
{
    int64_t audio_buf_samples;
    int audio_buf_size;

    /* calculate required number of samples to allocate */
    audio_buf_samples = ((int64_t)nb_samples * enc->sample_rate + dec->sample_rate) /
                        dec->sample_rate;
    audio_buf_samples = 4 * audio_buf_samples + 16; // safety factors for resampling
    audio_buf_samples = FFMAX(audio_buf_samples, enc->frame_size);
    if (audio_buf_samples > INT_MAX)
        return AVERROR(EINVAL);

    audio_buf_size = av_samples_get_buffer_size(NULL, enc->channels,
                                                audio_buf_samples,
                                                enc->sample_fmt, 32);
    if (audio_buf_size < 0)
        return audio_buf_size;

    av_fast_malloc(&audio_buf, &allocated_audio_buf_size, audio_buf_size);
    if (!audio_buf)
        return AVERROR(ENOMEM);

    return 0;
}

static void do_audio_out(AVFormatContext *s, OutputStream *ost,
                         InputStream *ist, AVFrame *decoded_frame)
{
    uint8_t *buftmp;

    int size_out, frame_bytes, resample_changed;
    AVCodecContext *enc = ost->st->codec;
    AVCodecContext *dec = ist->st->codec;
    int osize = av_get_bytes_per_sample(enc->sample_fmt);
    int isize = av_get_bytes_per_sample(dec->sample_fmt);
    uint8_t *buf = decoded_frame->data[0];
    int size     = decoded_frame->nb_samples * dec->channels * isize;

    if (alloc_audio_output_buf(dec, enc, decoded_frame->nb_samples) < 0) {
        av_log(NULL, AV_LOG_FATAL, "Error allocating audio buffer\n");
        exit_program(1);
    }

    if (enc->channels != dec->channels || enc->sample_rate != dec->sample_rate)
        ost->audio_resample = 1;

    resample_changed = ost->resample_sample_fmt  != dec->sample_fmt ||
                       ost->resample_channels    != dec->channels   ||
                       ost->resample_sample_rate != dec->sample_rate;

    if ((ost->audio_resample && !ost->resample) || resample_changed) {
        if (resample_changed) {
            av_log(NULL, AV_LOG_INFO, "Input stream #%d:%d frame changed from rate:%d fmt:%s ch:%d to rate:%d fmt:%s ch:%d\n",
                   ist->file_index, ist->st->index,
                   ost->resample_sample_rate, av_get_sample_fmt_name(ost->resample_sample_fmt), ost->resample_channels,
                   dec->sample_rate, av_get_sample_fmt_name(dec->sample_fmt), dec->channels);
            ost->resample_sample_fmt  = dec->sample_fmt;
            ost->resample_channels    = dec->channels;
            ost->resample_sample_rate = dec->sample_rate;
            if (ost->resample)
                audio_resample_close(ost->resample);
        }
        /* if audio_sync_method is >1 the resampler is needed for audio drift compensation */
        if (audio_sync_method <= 1 &&
            ost->resample_sample_fmt  == enc->sample_fmt &&
            ost->resample_channels    == enc->channels   &&
            ost->resample_sample_rate == enc->sample_rate) {
            ost->resample = NULL;
            ost->audio_resample = 0;
        } else if (ost->audio_resample) {
            if (dec->sample_fmt != AV_SAMPLE_FMT_S16)
                av_log(NULL, AV_LOG_WARNING, "Using s16 intermediate sample format for resampling\n");
            ost->resample = av_audio_resample_init(enc->channels,    dec->channels,
                                                   enc->sample_rate, dec->sample_rate,
                                                   enc->sample_fmt,  dec->sample_fmt,
                                                   16, 10, 0, 0.8);
            if (!ost->resample) {
                av_log(NULL, AV_LOG_FATAL, "Can not resample %d channels @ %d Hz to %d channels @ %d Hz\n",
                       dec->channels, dec->sample_rate,
                       enc->channels, enc->sample_rate);
                exit_program(1);
            }
        }
    }

#define MAKE_SFMT_PAIR(a,b) ((a)+AV_SAMPLE_FMT_NB*(b))
    if (!ost->audio_resample && dec->sample_fmt != enc->sample_fmt &&
        MAKE_SFMT_PAIR(enc->sample_fmt,dec->sample_fmt) != ost->reformat_pair) {
        if (ost->reformat_ctx)
            av_audio_convert_free(ost->reformat_ctx);
        ost->reformat_ctx = av_audio_convert_alloc(enc->sample_fmt, 1,
                                                   dec->sample_fmt, 1, NULL, 0);
        if (!ost->reformat_ctx) {
            av_log(NULL, AV_LOG_FATAL, "Cannot convert %s sample format to %s sample format\n",
                   av_get_sample_fmt_name(dec->sample_fmt),
                   av_get_sample_fmt_name(enc->sample_fmt));
            exit_program(1);
        }
        ost->reformat_pair = MAKE_SFMT_PAIR(enc->sample_fmt,dec->sample_fmt);
    }

    if (audio_sync_method) {
        double delta = get_sync_ipts(ost, ist->last_dts) * enc->sample_rate - ost->sync_opts -
                       av_fifo_size(ost->fifo) / (enc->channels * osize);
        int idelta = delta * dec->sample_rate / enc->sample_rate;
        int byte_delta = idelta * isize * dec->channels;

        // FIXME resample delay
        if (fabs(delta) > 50) {
            if (ist->is_start || fabs(delta) > audio_drift_threshold*enc->sample_rate) {
                if (byte_delta < 0) {
                    byte_delta = FFMAX(byte_delta, -size);
                    size += byte_delta;
                    buf  -= byte_delta;
                    av_log(NULL, AV_LOG_VERBOSE, "discarding %d audio samples\n",
                           -byte_delta / (isize * dec->channels));
                    if (!size)
                        return;
                    ist->is_start = 0;
                } else {
                    av_fast_malloc(&async_buf, &allocated_async_buf_size,
                                   byte_delta + size);
                    if (!async_buf) {
                        av_log(NULL, AV_LOG_FATAL, "Out of memory in do_audio_out\n");
                        exit_program(1);
                    }

                    if (alloc_audio_output_buf(dec, enc, decoded_frame->nb_samples + idelta) < 0) {
                        av_log(NULL, AV_LOG_FATAL, "Error allocating audio buffer\n");
                        exit_program(1);
                    }
                    ist->is_start = 0;

                    generate_silence(async_buf, dec->sample_fmt, byte_delta);
                    memcpy(async_buf + byte_delta, buf, size);
                    buf = async_buf;
                    size += byte_delta;
                    av_log(NULL, AV_LOG_VERBOSE, "adding %d audio samples of silence\n", idelta);
                }
            } else if (audio_sync_method > 1) {
                int comp = av_clip(delta, -audio_sync_method, audio_sync_method);
                av_assert0(ost->audio_resample);
                av_log(NULL, AV_LOG_VERBOSE, "compensating audio timestamp drift:%f compensation:%d in:%d\n",
                       delta, comp, enc->sample_rate);
//                fprintf(stderr, "drift:%f len:%d opts:%"PRId64" ipts:%"PRId64" fifo:%d\n", delta, -1, ost->sync_opts, (int64_t)(get_sync_ipts(ost) * enc->sample_rate), av_fifo_size(ost->fifo)/(ost->st->codec->channels * 2));
                av_resample_compensate(*(struct AVResampleContext**)ost->resample, comp, enc->sample_rate);
            }
        }
    } else
        ost->sync_opts = lrintf(get_sync_ipts(ost, ist->last_dts) * enc->sample_rate) -
                                av_fifo_size(ost->fifo) / (enc->channels * osize); // FIXME wrong

    if (ost->audio_resample) {
        buftmp = audio_buf;
        size_out = audio_resample(ost->resample,
                                  (short *)buftmp, (short *)buf,
                                  size / (dec->channels * isize));
        size_out = size_out * enc->channels * osize;
    } else {
        buftmp = buf;
        size_out = size;
    }

    if (!ost->audio_resample && dec->sample_fmt != enc->sample_fmt) {
        const void *ibuf[6] = { buftmp };
        void *obuf[6]  = { audio_buf };
        int istride[6] = { isize };
        int ostride[6] = { osize };
        int len = size_out / istride[0];
        if (av_audio_convert(ost->reformat_ctx, obuf, ostride, ibuf, istride, len) < 0) {
            printf("av_audio_convert() failed\n");
            if (exit_on_error)
                exit_program(1);
            return;
        }
        buftmp = audio_buf;
        size_out = len * osize;
    }

    /* now encode as many frames as possible */
    if (!(enc->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)) {
        /* output resampled raw samples */
        if (av_fifo_realloc2(ost->fifo, av_fifo_size(ost->fifo) + size_out) < 0) {
            av_log(NULL, AV_LOG_FATAL, "av_fifo_realloc2() failed\n");
            exit_program(1);
        }
        av_fifo_generic_write(ost->fifo, buftmp, size_out, NULL);

        frame_bytes = enc->frame_size * osize * enc->channels;

        while (av_fifo_size(ost->fifo) >= frame_bytes) {
            av_fifo_generic_read(ost->fifo, audio_buf, frame_bytes, NULL);
            encode_audio_frame(s, ost, audio_buf, frame_bytes);
        }
    } else {
        encode_audio_frame(s, ost, buftmp, size_out);
    }
}

static void pre_process_video_frame(InputStream *ist, AVPicture *picture, void **bufp)
{
    AVCodecContext *dec;
    AVPicture *picture2;
    AVPicture picture_tmp;
    uint8_t *buf = 0;

    dec = ist->st->codec;

    /* deinterlace : must be done before any resize */
    if (do_deinterlace) {
        int size;

        /* create temporary picture */
        size = avpicture_get_size(dec->pix_fmt, dec->width, dec->height);
        buf  = av_malloc(size);
        if (!buf)
            return;

        picture2 = &picture_tmp;
        avpicture_fill(picture2, buf, dec->pix_fmt, dec->width, dec->height);

        if (avpicture_deinterlace(picture2, picture,
                                 dec->pix_fmt, dec->width, dec->height) < 0) {
            /* if error, do not deinterlace */
            av_log(NULL, AV_LOG_WARNING, "Deinterlacing failed\n");
            av_free(buf);
            buf = NULL;
            picture2 = picture;
        }
    } else {
        picture2 = picture;
    }

    if (picture != picture2)
        *picture = *picture2;
    *bufp = buf;
}

static void do_subtitle_out(AVFormatContext *s,
                            OutputStream *ost,
                            InputStream *ist,
                            AVSubtitle *sub,
                            int64_t pts)
{
    static uint8_t *subtitle_out = NULL;
    int subtitle_out_max_size = 1024 * 1024;
    int subtitle_out_size, nb, i;
    AVCodecContext *enc;
    AVPacket pkt;

    if (pts == AV_NOPTS_VALUE) {
        av_log(NULL, AV_LOG_ERROR, "Subtitle packets must have a pts\n");
        if (exit_on_error)
            exit_program(1);
        return;
    }

    enc = ost->st->codec;

    if (!subtitle_out) {
        subtitle_out = av_malloc(subtitle_out_max_size);
    }

    /* Note: DVB subtitle need one packet to draw them and one other
       packet to clear them */
    /* XXX: signal it in the codec context ? */
    if (enc->codec_id == CODEC_ID_DVB_SUBTITLE)
        nb = 2;
    else
        nb = 1;

    for (i = 0; i < nb; i++) {
        ost->sync_opts = av_rescale_q(pts, ist->st->time_base, enc->time_base);
        if (!check_recording_time(ost))
            return;

        sub->pts = av_rescale_q(pts, ist->st->time_base, AV_TIME_BASE_Q);
        // start_display_time is required to be 0
        sub->pts               += av_rescale_q(sub->start_display_time, (AVRational){ 1, 1000 }, AV_TIME_BASE_Q);
        sub->end_display_time  -= sub->start_display_time;
        sub->start_display_time = 0;
        subtitle_out_size = avcodec_encode_subtitle(enc, subtitle_out,
                                                    subtitle_out_max_size, sub);
        if (subtitle_out_size < 0) {
            av_log(NULL, AV_LOG_FATAL, "Subtitle encoding failed\n");
            exit_program(1);
        }

        av_init_packet(&pkt);
        pkt.data = subtitle_out;
        pkt.size = subtitle_out_size;
        pkt.pts  = av_rescale_q(sub->pts, AV_TIME_BASE_Q, ost->st->time_base);
        if (enc->codec_id == CODEC_ID_DVB_SUBTITLE) {
            /* XXX: the pts correction is handled here. Maybe handling
               it in the codec would be better */
            if (i == 0)
                pkt.pts += 90 * sub->start_display_time;
            else
                pkt.pts += 90 * sub->end_display_time;
        }
        write_frame(s, &pkt, ost);
    }
}

static void do_video_out(AVFormatContext *s,
                         OutputStream *ost,
                         AVFrame *in_picture,
                         int *frame_size, float quality)
{
    int nb_frames, i, ret, format_video_sync;
    AVCodecContext *enc;
    double sync_ipts, delta;

    enc = ost->st->codec;

    sync_ipts = get_sync_ipts(ost, in_picture->pts) / av_q2d(enc->time_base);
    delta = sync_ipts - ost->sync_opts;

    /* by default, we output a single frame */
    nb_frames = 1;

    *frame_size = 0;

    format_video_sync = video_sync_method;
    if (format_video_sync == VSYNC_AUTO)
        format_video_sync = (s->oformat->flags & AVFMT_NOTIMESTAMPS) ? VSYNC_PASSTHROUGH :
                            (s->oformat->flags & AVFMT_VARIABLE_FPS) ? VSYNC_VFR : VSYNC_CFR;

    switch (format_video_sync) {
    case VSYNC_CFR:
        // FIXME set to 0.5 after we fix some dts/pts bugs like in avidec.c
        if (delta < -1.1)
            nb_frames = 0;
        else if (delta > 1.1)
            nb_frames = lrintf(delta);
        break;
    case VSYNC_VFR:
        if (delta <= -0.6)
            nb_frames = 0;
        else if (delta > 0.6)
            ost->sync_opts = lrintf(sync_ipts);
        break;
    case VSYNC_PASSTHROUGH:
        ost->sync_opts = lrintf(sync_ipts);
        break;
    default:
        av_assert0(0);
    }

    nb_frames = FFMIN(nb_frames, ost->max_frames - ost->frame_number);
    if (nb_frames == 0) {
        nb_frames_drop++;
        av_log(NULL, AV_LOG_VERBOSE, "*** drop!\n");
        return;
    } else if (nb_frames > 1) {
        nb_frames_dup += nb_frames - 1;
        av_log(NULL, AV_LOG_VERBOSE, "*** %d dup!\n", nb_frames - 1);
    }

    if (!ost->frame_number)
        ost->first_pts = ost->sync_opts;

    /* duplicates frame if needed */
    for (i = 0; i < nb_frames; i++) {
        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;

        if (!check_recording_time(ost))
            return;

        if (s->oformat->flags & AVFMT_RAWPICTURE &&
            enc->codec->id == CODEC_ID_RAWVIDEO) {
            /* raw pictures are written as AVPicture structure to
               avoid any copies. We support temporarily the older
               method. */
            enc->coded_frame->interlaced_frame = in_picture->interlaced_frame;
            enc->coded_frame->top_field_first  = in_picture->top_field_first;
            pkt.data   = (uint8_t *)in_picture;
            pkt.size   =  sizeof(AVPicture);
            pkt.pts    = av_rescale_q(ost->sync_opts, enc->time_base, ost->st->time_base);
            pkt.flags |= AV_PKT_FLAG_KEY;

            write_frame(s, &pkt, ost);
        } else {
            int got_packet;
            AVFrame big_picture;

            big_picture = *in_picture;
            /* better than nothing: use input picture interlaced
               settings */
            big_picture.interlaced_frame = in_picture->interlaced_frame;
            if (ost->st->codec->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)) {
                if (ost->top_field_first == -1)
                    big_picture.top_field_first = in_picture->top_field_first;
                else
                    big_picture.top_field_first = !!ost->top_field_first;
            }

            /* handles same_quant here. This is not correct because it may
               not be a global option */
            big_picture.quality = quality;
            if (!enc->me_threshold)
                big_picture.pict_type = 0;
            big_picture.pts = ost->sync_opts;
            if (ost->forced_kf_index < ost->forced_kf_count &&
                big_picture.pts >= ost->forced_kf_pts[ost->forced_kf_index]) {
                big_picture.pict_type = AV_PICTURE_TYPE_I;
                ost->forced_kf_index++;
            }
            ret = avcodec_encode_video2(enc, &pkt, &big_picture, &got_packet);
            if (ret < 0) {
                av_log(NULL, AV_LOG_FATAL, "Video encoding failed\n");
                exit_program(1);
            }

            if (got_packet) {
                if (pkt.pts != AV_NOPTS_VALUE)
                    pkt.pts = av_rescale_q(pkt.pts, enc->time_base, ost->st->time_base);
                if (pkt.dts != AV_NOPTS_VALUE)
                    pkt.dts = av_rescale_q(pkt.dts, enc->time_base, ost->st->time_base);

                write_frame(s, &pkt, ost);
                *frame_size = pkt.size;
                video_size += pkt.size;

                /* if two pass, output log */
                if (ost->logfile && enc->stats_out) {
                    fprintf(ost->logfile, "%s", enc->stats_out);
                }
            }
        }
        ost->sync_opts++;
        /*
         * For video, number of frames in == number of packets out.
         * But there may be reordering, so we can't throw away frames on encoder
         * flush, we need to limit them here, before they go into encoder.
         */
        ost->frame_number++;
    }
}

static double psnr(double d)
{
    return -10.0 * log(d) / log(10.0);
}

static void do_video_stats(AVFormatContext *os, OutputStream *ost,
                           int frame_size)
{
    AVCodecContext *enc;
    int frame_number;
    double ti1, bitrate, avg_bitrate;

    /* this is executed just the first time do_video_stats is called */
    if (!vstats_file) {
        vstats_file = fopen(vstats_filename, "w");
        if (!vstats_file) {
            perror("fopen");
            exit_program(1);
        }
    }

    enc = ost->st->codec;
    if (enc->codec_type == AVMEDIA_TYPE_VIDEO) {
        frame_number = ost->frame_number;
        fprintf(vstats_file, "frame= %5d q= %2.1f ", frame_number, enc->coded_frame->quality / (float)FF_QP2LAMBDA);
        if (enc->flags&CODEC_FLAG_PSNR)
            fprintf(vstats_file, "PSNR= %6.2f ", psnr(enc->coded_frame->error[0] / (enc->width * enc->height * 255.0 * 255.0)));

        fprintf(vstats_file,"f_size= %6d ", frame_size);
        /* compute pts value */
        ti1 = ost->sync_opts * av_q2d(enc->time_base);
        if (ti1 < 0.01)
            ti1 = 0.01;

        bitrate     = (frame_size * 8) / av_q2d(enc->time_base) / 1000.0;
        avg_bitrate = (double)(video_size * 8) / ti1 / 1000.0;
        fprintf(vstats_file, "s_size= %8.0fkB time= %0.3f br= %7.1fkbits/s avg_br= %7.1fkbits/s ",
               (double)video_size / 1024, ti1, bitrate, avg_bitrate);
        fprintf(vstats_file, "type= %c\n", av_get_picture_type_char(enc->coded_frame->pict_type));
    }
}

static void print_report(OutputFile *output_files,
                         OutputStream *ost_table, int nb_ostreams,
                         int is_last_report, int64_t timer_start)
{
    char buf[1024];
    OutputStream *ost;
    AVFormatContext *oc;
    int64_t total_size;
    AVCodecContext *enc;
    int frame_number, vid, i;
    double bitrate, ti1, pts;
    static int64_t last_time = -1;
    static int qp_histogram[52];

    if (!print_stats && !is_last_report)
        return;

    if (!is_last_report) {
        int64_t cur_time;
        /* display the report every 0.5 seconds */
        cur_time = av_gettime();
        if (last_time == -1) {
            last_time = cur_time;
            return;
        }
        if ((cur_time - last_time) < 500000)
            return;
        last_time = cur_time;
    }


    oc = output_files[0].ctx;

    total_size = avio_size(oc->pb);
    if (total_size < 0) // FIXME improve avio_size() so it works with non seekable output too
        total_size = avio_tell(oc->pb);

    buf[0] = '\0';
    ti1 = 1e10;
    vid = 0;
    for (i = 0; i < nb_ostreams; i++) {
        float q = -1;
        ost = &ost_table[i];
        enc = ost->st->codec;
        if (!ost->stream_copy && enc->coded_frame)
            q = enc->coded_frame->quality / (float)FF_QP2LAMBDA;
        if (vid && enc->codec_type == AVMEDIA_TYPE_VIDEO) {
            snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "q=%2.1f ", q);
        }
        if (!vid && enc->codec_type == AVMEDIA_TYPE_VIDEO) {
            float t = (av_gettime() - timer_start) / 1000000.0;

            frame_number = ost->frame_number;
            snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "frame=%5d fps=%3d q=%3.1f ",
                     frame_number, (t > 1) ? (int)(frame_number / t + 0.5) : 0, q);
            if (is_last_report)
                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "L");
            if (qp_hist) {
                int j;
                int qp = lrintf(q);
                if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram))
                    qp_histogram[qp]++;
                for (j = 0; j < 32; j++)
                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log(qp_histogram[j] + 1) / log(2)));
            }
            if (enc->flags&CODEC_FLAG_PSNR) {
                int j;
                double error, error_sum = 0;
                double scale, scale_sum = 0;
                char type[3] = { 'Y','U','V' };
                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "PSNR=");
                for (j = 0; j < 3; j++) {
                    if (is_last_report) {
                        error = enc->error[j];
                        scale = enc->width * enc->height * 255.0 * 255.0 * frame_number;
                    } else {
                        error = enc->coded_frame->error[j];
                        scale = enc->width * enc->height * 255.0 * 255.0;
                    }
                    if (j)
                        scale /= 4;
                    error_sum += error;
                    scale_sum += scale;
                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%c:%2.2f ", type[j], psnr(error / scale));
                }
                snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "*:%2.2f ", psnr(error_sum / scale_sum));
            }
            vid = 1;
        }
        /* compute min output value */
        pts = (double)ost->st->pts.val * av_q2d(ost->st->time_base);
        if ((pts < ti1) && (pts > 0))
            ti1 = pts;
    }
    if (ti1 < 0.01)
        ti1 = 0.01;

    bitrate = (double)(total_size * 8) / ti1 / 1000.0;

    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
            "size=%8.0fkB time=%0.2f bitrate=%6.1fkbits/s",
            (double)total_size / 1024, ti1, bitrate);

    if (nb_frames_dup || nb_frames_drop)
        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d",
                nb_frames_dup, nb_frames_drop);

    av_log(NULL, AV_LOG_INFO, "%s    \r", buf);

    fflush(stderr);

    if (is_last_report) {
        int64_t raw= audio_size + video_size + extra_size;
        av_log(NULL, AV_LOG_INFO, "\n");
        av_log(NULL, AV_LOG_INFO, "video:%1.0fkB audio:%1.0fkB global headers:%1.0fkB muxing overhead %f%%\n",
               video_size / 1024.0,
               audio_size / 1024.0,
               extra_size / 1024.0,
               100.0 * (total_size - raw) / raw
        );
    }
}

static void flush_encoders(OutputStream *ost_table, int nb_ostreams)
{
    int i, ret;

    for (i = 0; i < nb_ostreams; i++) {
        OutputStream   *ost = &ost_table[i];
        AVCodecContext *enc = ost->st->codec;
        AVFormatContext *os = output_files[ost->file_index].ctx;
        int stop_encoding = 0;

        if (!ost->encoding_needed)
            continue;

        if (ost->st->codec->codec_type == AVMEDIA_TYPE_AUDIO && enc->frame_size <= 1)
            continue;
        if (ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && (os->oformat->flags & AVFMT_RAWPICTURE) && enc->codec->id == CODEC_ID_RAWVIDEO)
            continue;

        for (;;) {
            AVPacket pkt;
            int fifo_bytes, got_packet;
            av_init_packet(&pkt);
            pkt.data = NULL;
            pkt.size = 0;

            switch (ost->st->codec->codec_type) {
            case AVMEDIA_TYPE_AUDIO:
                fifo_bytes = av_fifo_size(ost->fifo);
                if (fifo_bytes > 0) {
                    /* encode any samples remaining in fifo */
                    int frame_bytes = fifo_bytes;

                    av_fifo_generic_read(ost->fifo, audio_buf, fifo_bytes, NULL);

                    /* pad last frame with silence if needed */
                    if (!(enc->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME)) {
                        frame_bytes = enc->frame_size * enc->channels *
                                      av_get_bytes_per_sample(enc->sample_fmt);
                        if (allocated_audio_buf_size < frame_bytes)
                            exit_program(1);
                        generate_silence(audio_buf+fifo_bytes, enc->sample_fmt, frame_bytes - fifo_bytes);
                    }
                    encode_audio_frame(os, ost, audio_buf, frame_bytes);
                } else {
                    /* flush encoder with NULL frames until it is done
                       returning packets */
                    if (encode_audio_frame(os, ost, NULL, 0) == 0) {
                        stop_encoding = 1;
                        break;
                    }
                }
                break;
            case AVMEDIA_TYPE_VIDEO:
                ret = avcodec_encode_video2(enc, &pkt, NULL, &got_packet);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_FATAL, "Video encoding failed\n");
                    exit_program(1);
                }
                video_size += ret;
                if (ost->logfile && enc->stats_out) {
                    fprintf(ost->logfile, "%s", enc->stats_out);
                }
                if (!got_packet) {
                    stop_encoding = 1;
                    break;
                }
                if (pkt.pts != AV_NOPTS_VALUE)
                    pkt.pts = av_rescale_q(pkt.pts, enc->time_base, ost->st->time_base);
                if (pkt.dts != AV_NOPTS_VALUE)
                    pkt.dts = av_rescale_q(pkt.dts, enc->time_base, ost->st->time_base);
                write_frame(os, &pkt, ost);
                break;
            default:
                stop_encoding = 1;
            }
            if (stop_encoding)
                break;
        }
    }
}

/*
 * Check whether a packet from ist should be written into ost at this time
 */
static int check_output_constraints(InputStream *ist, OutputStream *ost)
{
    OutputFile *of = &output_files[ost->file_index];
    int ist_index  = ist - input_streams;

    if (ost->source_index != ist_index)
        return 0;

    if (of->start_time && ist->last_dts < of->start_time)
        return 0;

    return 1;
}

static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *pkt)
{
    OutputFile *of = &output_files[ost->file_index];
    int64_t ost_tb_start_time = av_rescale_q(of->start_time, AV_TIME_BASE_Q, ost->st->time_base);
    AVPacket opkt;

    av_init_packet(&opkt);

    if ((!ost->frame_number && !(pkt->flags & AV_PKT_FLAG_KEY)) &&
        !ost->copy_initial_nonkeyframes)
        return;

    if (of->recording_time != INT64_MAX &&
        ist->last_dts >= of->recording_time + of->start_time) {
        ost->is_past_recording_time = 1;
        return;
    }

    /* force the input stream PTS */
    if (ost->st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
        audio_size += pkt->size;
    else if (ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
        video_size += pkt->size;
        ost->sync_opts++;
    }

    if (pkt->pts != AV_NOPTS_VALUE)
        opkt.pts = av_rescale_q(pkt->pts, ist->st->time_base, ost->st->time_base) - ost_tb_start_time;
    else
        opkt.pts = AV_NOPTS_VALUE;

    if (pkt->dts == AV_NOPTS_VALUE)
        opkt.dts = av_rescale_q(ist->last_dts, AV_TIME_BASE_Q, ost->st->time_base);
    else
        opkt.dts = av_rescale_q(pkt->dts, ist->st->time_base, ost->st->time_base);
    opkt.dts -= ost_tb_start_time;

    opkt.duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->st->time_base);
    opkt.flags    = pkt->flags;

    // FIXME remove the following 2 lines they shall be replaced by the bitstream filters
    if (  ost->st->codec->codec_id != CODEC_ID_H264
       && ost->st->codec->codec_id != CODEC_ID_MPEG1VIDEO
       && ost->st->codec->codec_id != CODEC_ID_MPEG2VIDEO
       && ost->st->codec->codec_id != CODEC_ID_VC1
       ) {
        if (av_parser_change(ist->st->parser, ost->st->codec, &opkt.data, &opkt.size, pkt->data, pkt->size, pkt->flags & AV_PKT_FLAG_KEY))
            opkt.destruct = av_destruct_packet;
    } else {
        opkt.data = pkt->data;
        opkt.size = pkt->size;
    }

    write_frame(of->ctx, &opkt, ost);
    ost->st->codec->frame_number++;
    av_free_packet(&opkt);
}

static void rate_emu_sleep(InputStream *ist)
{
    if (input_files[ist->file_index].rate_emu) {
        int64_t pts = av_rescale(ist->last_dts, 1000000, AV_TIME_BASE);
        int64_t now = av_gettime() - ist->start;
        if (pts > now)
            usleep(pts - now);
    }
}

static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
{
    AVFrame *decoded_frame;
    AVCodecContext *avctx = ist->st->codec;
    int bps = av_get_bytes_per_sample(ist->st->codec->sample_fmt);
    int i, ret;

    if (!ist->decoded_frame && !(ist->decoded_frame = avcodec_alloc_frame()))
        return AVERROR(ENOMEM);
    else
        avcodec_get_frame_defaults(ist->decoded_frame);
    decoded_frame = ist->decoded_frame;

    ret = avcodec_decode_audio4(avctx, decoded_frame, got_output, pkt);
    if (ret < 0) {
        return ret;
    }

    if (!*got_output) {
        /* no audio frame */
        return ret;
    }

    /* if the decoder provides a pts, use it instead of the last packet pts.
       the decoder could be delaying output by a packet or more. */
    if (decoded_frame->pts != AV_NOPTS_VALUE)
        ist->next_dts = decoded_frame->pts;

    /* increment next_dts to use for the case where the input stream does not
       have timestamps or there are multiple frames in the packet */
    ist->next_dts += ((int64_t)AV_TIME_BASE * decoded_frame->nb_samples) /
                     avctx->sample_rate;

    // preprocess audio (volume)
    if (audio_volume != 256) {
        int decoded_data_size = decoded_frame->nb_samples * avctx->channels * bps;
        void *samples = decoded_frame->data[0];
        switch (avctx->sample_fmt) {
        case AV_SAMPLE_FMT_U8:
        {
            uint8_t *volp = samples;
            for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
                int v = (((*volp - 128) * audio_volume + 128) >> 8) + 128;
                *volp++ = av_clip_uint8(v);
            }
            break;
        }
        case AV_SAMPLE_FMT_S16:
        {
            int16_t *volp = samples;
            for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
                int v = ((*volp) * audio_volume + 128) >> 8;
                *volp++ = av_clip_int16(v);
            }
            break;
        }
        case AV_SAMPLE_FMT_S32:
        {
            int32_t *volp = samples;
            for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
                int64_t v = (((int64_t)*volp * audio_volume + 128) >> 8);
                *volp++ = av_clipl_int32(v);
            }
            break;
        }
        case AV_SAMPLE_FMT_FLT:
        {
            float *volp = samples;
            float scale = audio_volume / 256.f;
            for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
                *volp++ *= scale;
            }
            break;
        }
        case AV_SAMPLE_FMT_DBL:
        {
            double *volp = samples;
            double scale = audio_volume / 256.;
            for (i = 0; i < (decoded_data_size / sizeof(*volp)); i++) {
                *volp++ *= scale;
            }
            break;
        }
        default:
            av_log(NULL, AV_LOG_FATAL,
                   "Audio volume adjustment on sample format %s is not supported.\n",
                   av_get_sample_fmt_name(ist->st->codec->sample_fmt));
            exit_program(1);
        }
    }

    rate_emu_sleep(ist);

    for (i = 0; i < nb_output_streams; i++) {
        OutputStream *ost = &output_streams[i];

        if (!check_output_constraints(ist, ost) || !ost->encoding_needed)
            continue;
        do_audio_out(output_files[ost->file_index].ctx, ost, ist, decoded_frame);
    }

    return ret;
}

static int transcode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_t *pkt_pts)
{
    AVFrame *decoded_frame, *filtered_frame = NULL;
    void *buffer_to_free = NULL;
    int i, ret = 0;
    float quality;
    int frame_available = 1;

    if (!ist->decoded_frame && !(ist->decoded_frame = avcodec_alloc_frame()))
        return AVERROR(ENOMEM);
    else
        avcodec_get_frame_defaults(ist->decoded_frame);
    decoded_frame = ist->decoded_frame;
    pkt->pts  = *pkt_pts;
    pkt->dts  = ist->last_dts;
    *pkt_pts  = AV_NOPTS_VALUE;

    ret = avcodec_decode_video2(ist->st->codec,
                                decoded_frame, got_output, pkt);
    if (ret < 0)
        return ret;

    quality = same_quant ? decoded_frame->quality : 0;
    if (!*got_output) {
        /* no picture yet */
        return ret;
    }
    decoded_frame->pts = guess_correct_pts(&ist->pts_ctx, decoded_frame->pkt_pts,
                                           decoded_frame->pkt_dts);
    pkt->size = 0;
    pre_process_video_frame(ist, (AVPicture *)decoded_frame, &buffer_to_free);

    rate_emu_sleep(ist);

    for (i = 0; i < nb_output_streams; i++) {
        OutputStream *ost = &output_streams[i];
        int frame_size, resample_changed;

        if (!check_output_constraints(ist, ost) || !ost->encoding_needed)
            continue;

        resample_changed = ost->resample_width   != decoded_frame->width  ||
                           ost->resample_height  != decoded_frame->height ||
                           ost->resample_pix_fmt != decoded_frame->format;
        if (resample_changed) {
            av_log(NULL, AV_LOG_INFO,
                    "Input stream #%d:%d frame changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s\n",
                    ist->file_index, ist->st->index,
                    ost->resample_width,  ost->resample_height,  av_get_pix_fmt_name(ost->resample_pix_fmt),
                    decoded_frame->width, decoded_frame->height, av_get_pix_fmt_name(decoded_frame->format));

            avfilter_graph_free(&ost->graph);
            if (configure_video_filters(ist, ost)) {
                av_log(NULL, AV_LOG_FATAL, "Error reinitializing filters!\n");
                exit_program(1);
            }

            ost->resample_width   = decoded_frame->width;
            ost->resample_height  = decoded_frame->height;
            ost->resample_pix_fmt = decoded_frame->format;
        }

        if (ist->st->sample_aspect_ratio.num)
            decoded_frame->sample_aspect_ratio = ist->st->sample_aspect_ratio;
        if (ist->st->codec->codec->capabilities & CODEC_CAP_DR1) {
            FrameBuffer      *buf = decoded_frame->opaque;
            AVFilterBufferRef *fb = avfilter_get_video_buffer_ref_from_arrays(
                                        decoded_frame->data, decoded_frame->linesize,
                                        AV_PERM_READ | AV_PERM_PRESERVE,
                                        ist->st->codec->width, ist->st->codec->height,
                                        ist->st->codec->pix_fmt);

            avfilter_copy_frame_props(fb, decoded_frame);
            fb->buf->priv           = buf;
            fb->buf->free           = filter_release_buffer;

            buf->refcount++;
            av_buffersrc_buffer(ost->input_video_filter, fb);
        } else
            av_vsrc_buffer_add_frame(ost->input_video_filter, decoded_frame,
                                     decoded_frame->pts, decoded_frame->sample_aspect_ratio);

        if (!ist->filtered_frame && !(ist->filtered_frame = avcodec_alloc_frame())) {
            ret = AVERROR(ENOMEM);
            goto fail;
        } else
            avcodec_get_frame_defaults(ist->filtered_frame);
        filtered_frame = ist->filtered_frame;

        frame_available = avfilter_poll_frame(ost->output_video_filter->inputs[0]);
        while (frame_available) {
            AVRational ist_pts_tb;
            if ((ret = get_filtered_video_frame(ost->output_video_filter,
                                                filtered_frame, &ost->picref,
                                                &ist_pts_tb)) < 0)
                goto fail;
            filtered_frame->pts = av_rescale_q(ost->picref->pts, ist_pts_tb, AV_TIME_BASE_Q);
            if (!ost->frame_aspect_ratio)
                ost->st->codec->sample_aspect_ratio = ost->picref->video->pixel_aspect;

            do_video_out(output_files[ost->file_index].ctx, ost, filtered_frame, &frame_size,
                         same_quant ? quality : ost->st->codec->global_quality);
            if (vstats_filename && frame_size)
                do_video_stats(output_files[ost->file_index].ctx, ost, frame_size);
            frame_available = ost->output_video_filter && avfilter_poll_frame(ost->output_video_filter->inputs[0]);
            avfilter_unref_buffer(ost->picref);
        }
    }

fail:
    av_free(buffer_to_free);
    return ret;
}

static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output)
{
    AVSubtitle subtitle;
    int i, ret = avcodec_decode_subtitle2(ist->st->codec,
                                          &subtitle, got_output, pkt);