postprocess.c

/*
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
 *
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * postprocessing.
 */

/*
                        C       MMX     MMX2    3DNow   AltiVec
isVertDC                Ec      Ec                      Ec
isVertMinMaxOk          Ec      Ec                      Ec
doVertLowPass           E               e       e       Ec
doVertDefFilter         Ec      Ec      e       e       Ec
isHorizDC               Ec      Ec                      Ec
isHorizMinMaxOk         a       E                       Ec
doHorizLowPass          E               e       e       Ec
doHorizDefFilter        Ec      Ec      e       e       Ec
do_a_deblock            Ec      E       Ec      E
deRing                  E               e       e*      Ecp
Vertical RKAlgo1        E               a       a
Horizontal RKAlgo1                      a       a
Vertical X1#            a               E       E
Horizontal X1#          a               E       E
LinIpolDeinterlace      e               E       E*
CubicIpolDeinterlace    a               e       e*
LinBlendDeinterlace     e               E       E*
MedianDeinterlace#      E       Ec      Ec
TempDeNoiser#           E               e       e       Ec

* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
# more or less selfinvented filters so the exactness is not too meaningful
E = Exact implementation
e = almost exact implementation (slightly different rounding,...)
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
p = partially optimized, still some work to do
*/

/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
write a faster and higher quality deblocking filter :)
make the mainloop more flexible (variable number of blocks at once
        (the if/else stuff per block is slowing things down)
compare the quality & speed of all filters
split this huge file
optimize c versions
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
...
*/

//Changelog: use git log

#include "config.h"
#include "libavutil/avutil.h"
#include "libavutil/avassert.h"
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#undef HAVE_MMXEXT_INLINE
//#define HAVE_AMD3DNOW_INLINE
//#undef HAVE_MMX_INLINE
//#undef ARCH_X86
//#define DEBUG_BRIGHTNESS
#include "postprocess.h"
#include "postprocess_internal.h"
#include "libavutil/avstring.h"

#include "libavutil/ffversion.h"
const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;

unsigned postproc_version(void)
{
    av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
    return LIBPOSTPROC_VERSION_INT;
}

const char *postproc_configuration(void)
{
    return FFMPEG_CONFIGURATION;
}

const char *postproc_license(void)
{
#define LICENSE_PREFIX "libpostproc license: "
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}

#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif

#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet

#if ARCH_X86 && HAVE_INLINE_ASM
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
#endif

DECLARE_ASM_CONST(8, int, deringThreshold)= 20;


static const struct PPFilter filters[]=
{
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
    {"dr", "dering",                1, 5, 6, DERING},
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
    {"be", "bitexact",              1, 0, 0, BITEXACT},
    {"vi", "visualize",             1, 0, 0, VISUALIZE},
    {NULL, NULL,0,0,0,0} //End Marker
};

static const char * const replaceTable[]=
{
    "default",      "hb:a,vb:a,dr:a",
    "de",           "hb:a,vb:a,dr:a",
    "fast",         "h1:a,v1:a,dr:a",
    "fa",           "h1:a,v1:a,dr:a",
    "ac",           "ha:a:128:7,va:a,dr:a",
    NULL //End Marker
};

/* The horizontal functions exist only in C because the MMX
 * code is faster with vertical filters and transposing. */

/**
 * Check if the given 8x8 Block is mostly "flat"
 */
static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
{
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    for(y=0; y<BLOCK_SIZE; y++){
        numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
}

/**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
{
    int numEq= 0;
    int y;
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
    const int dcThreshold= dcOffset*2 + 1;

    src+= stride*4; // src points to begin of the 8x8 Block
    for(y=0; y<BLOCK_SIZE-1; y++){
        numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
        numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
        src+= stride;
    }
    return numEq > c->ppMode.flatnessThreshold;
}

static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
{
    int i;
    for(i=0; i<2; i++){
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
        src += stride;
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
        src += stride;
    }
    return 1;
}

static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
{
    int x;
    src+= stride*4;
    for(x=0; x<BLOCK_SIZE; x+=4){
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
    }
    return 1;
}

static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
{
    if( isHorizDC_C(src, stride, c) ){
        return isHorizMinMaxOk_C(src, stride, c->QP);
    }else{
        return 2;
    }
}

static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
{
    if( isVertDC_C(src, stride, c) ){
        return isVertMinMaxOk_C(src, stride, c->QP);
    }else{
        return 2;
    }
}

static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
{
    int y;
    for(y=0; y<BLOCK_SIZE; y++){
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);

        if(FFABS(middleEnergy) < 8*c->QP){
            const int q=(dst[3] - dst[4])/2;
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);

            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
            d= FFMAX(d, 0);

            d= (5*d + 32) >> 6;
            d*= FFSIGN(-middleEnergy);

            if(q>0)
            {
                d = FFMAX(d, 0);
                d = FFMIN(d, q);