Newer
Older
Diego Biurrun
committed
* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
*
* AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
*
Diego Biurrun
committed
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
C MMX MMX2 3DNow AltiVec
isVertDC Ec Ec Ec
isVertMinMaxOk Ec Ec Ec
doVertLowPass E e e Ec
doVertDefFilter Ec Ec e e Ec
isHorizDC Ec Ec Ec
isHorizMinMaxOk a E Ec
doHorizLowPass E e e Ec
doHorizDefFilter Ec Ec e e Ec
do_a_deblock Ec E Ec E
deRing E e e* Ecp
Vertical RKAlgo1 E a a
Horizontal RKAlgo1 a a
Vertical X1# a E E
Horizontal X1# a E E
LinIpolDeinterlace e E E*
CubicIpolDeinterlace a e e*
LinBlendDeinterlace e E E*
MedianDeinterlace# E Ec Ec
TempDeNoiser# E e e Ec
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
# more or less selfinvented filters so the exactness is not too meaningful
E = Exact implementation
e = almost exact implementation (slightly different rounding,...)
a = alternative / approximate impl
c = checked against the other implementations (-vo md5)
Michael Niedermayer
committed
p = partially optimized, still some work to do
*/
/*
TODO:
reduce the time wasted on the mem transfer
unroll stuff if instructions depend too much on the prior one
move YScale thing to the end instead of fixing QP
write a faster and higher quality deblocking filter :)
make the mainloop more flexible (variable number of blocks at once
(the if/else stuff per block is slowing things down)
compare the quality & speed of all filters
split this huge file
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
#include "libavutil/avutil.h"
#include "libavutil/avassert.h"
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
//#undef HAVE_MMXEXT_INLINE
//#define HAVE_AMD3DNOW_INLINE
//#undef HAVE_MMX_INLINE
#include "libavutil/avstring.h"
#include "libavutil/ffversion.h"
const char postproc_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
return LIBPOSTPROC_VERSION_INT;
}
const char *postproc_configuration(void)
return FFMPEG_CONFIGURATION;
}
const char *postproc_license(void)
{
#define LICENSE_PREFIX "libpostproc license: "
return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}
#include <altivec.h>
#endif
#define GET_MODE_BUFFER_SIZE 500
#define OPTIONS_ARRAY_SIZE 10
#define BLOCK_SIZE 8
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
#endif
Reimar Döffinger
committed
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
static const struct PPFilter filters[]=
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
{"dr", "dering", 1, 5, 6, DERING},
{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
Michael Niedermayer
committed
{"be", "bitexact", 1, 0, 0, BITEXACT},
{"vi", "visualize", 1, 0, 0, VISUALIZE},
{NULL, NULL,0,0,0,0} //End Marker
static const char * const replaceTable[]=
"default", "hb:a,vb:a,dr:a",
"de", "hb:a,vb:a,dr:a",
"fast", "h1:a,v1:a,dr:a",
"fa", "h1:a,v1:a,dr:a",
"ac", "ha:a:128:7,va:a,dr:a",
NULL //End Marker
/* The horizontal functions exist only in C because the MMX
* code is faster with vertical filters and transposing. */
/**
* Check if the given 8x8 Block is mostly "flat"
*/
static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
int numEq= 0;
int y;
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
const int dcThreshold= dcOffset*2 + 1;
for(y=0; y<BLOCK_SIZE; y++){
numEq += ((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold;
src+= stride;
}
return numEq > c->ppMode.flatnessThreshold;
}
/**
* Check if the middle 8x8 Block in the given 8x16 block is flat
*/
static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
{
int numEq= 0;
int y;
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
const int dcThreshold= dcOffset*2 + 1;
src+= stride*4; // src points to begin of the 8x8 Block
for(y=0; y<BLOCK_SIZE-1; y++){
numEq += ((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold;
numEq += ((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold;
src+= stride;
}
return numEq > c->ppMode.flatnessThreshold;
static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
int i;
for(i=0; i<2; i++){
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
src += stride;
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
src += stride;
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
src += stride;
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
src += stride;
}
return 1;
static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
int x;
src+= stride*4;
for(x=0; x<BLOCK_SIZE; x+=4){
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
}
return 1;
static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
{
if( isHorizDC_C(src, stride, c) ){
return isHorizMinMaxOk_C(src, stride, c->QP);
}else{
return 2;
}
Michael Niedermayer
committed
}
static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
{
if( isVertDC_C(src, stride, c) ){
return isVertMinMaxOk_C(src, stride, c->QP);
}else{
return 2;
}
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
int y;
for(y=0; y<BLOCK_SIZE; y++){
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
if(FFABS(middleEnergy) < 8*c->QP){
const int q=(dst[3] - dst[4])/2;
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
d= FFMAX(d, 0);
d= (5*d + 32) >> 6;
d*= FFSIGN(-middleEnergy);
if(q>0)
{
d = FFMAX(d, 0);
d = FFMIN(d, q);
Loading
Loading full blame...