From 8c8bbd10e0e9bcd2a8ac9bda157eefd87235e341 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 27 May 2004 16:16:09 +0000
Subject: [PATCH] faster c lowpass filter

Originally committed as revision 3164 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/libpostproc/postprocess.c          | 40 +++++++++----------
 libavcodec/libpostproc/postprocess_template.c | 39 +++++++++---------
 2 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/libavcodec/libpostproc/postprocess.c b/libavcodec/libpostproc/postprocess.c
index b7ffadbc638..c543495eb7a 100644
--- a/libavcodec/libpostproc/postprocess.c
+++ b/libavcodec/libpostproc/postprocess.c
@@ -376,32 +376,32 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
  */
 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 {
-
 	int y;
 	for(y=0; y<BLOCK_SIZE; y++)
 	{
 		const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 		const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 
-		int sums[9];
-		sums[0] = first + dst[0];
-		sums[1] = dst[0] + dst[1];
-		sums[2] = dst[1] + dst[2];
-		sums[3] = dst[2] + dst[3];
-		sums[4] = dst[3] + dst[4];
-		sums[5] = dst[4] + dst[5];
-		sums[6] = dst[5] + dst[6];
-		sums[7] = dst[6] + dst[7];
-		sums[8] = dst[7] + last;
-
-		dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
-		dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
-		dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
-		dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
-		dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
-		dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
-		dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
-		dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
+		int sums[10];
+		sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
+		sums[1] = sums[0] - first  + dst[3];
+		sums[2] = sums[1] - first  + dst[4];
+		sums[3] = sums[2] - first  + dst[5];
+		sums[4] = sums[3] - first  + dst[6];
+		sums[5] = sums[4] - dst[0] + dst[7];
+		sums[6] = sums[5] - dst[1] + last;
+		sums[7] = sums[6] - dst[2] + last;
+		sums[8] = sums[7] - dst[3] + last;
+		sums[9] = sums[8] - dst[4] + last;
+
+		dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
+		dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
+		dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
+		dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
+		dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
+		dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
+		dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
+		dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 
 		dst+= stride;
 	}
diff --git a/libavcodec/libpostproc/postprocess_template.c b/libavcodec/libpostproc/postprocess_template.c
index 0a8d8e238b1..e8e1bcac904 100644
--- a/libavcodec/libpostproc/postprocess_template.c
+++ b/libavcodec/libpostproc/postprocess_template.c
@@ -317,25 +317,26 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 		const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
 		const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
 
-		int sums[9];
-		sums[0] = first + src[l1];
-		sums[1] = src[l1] + src[l2];
-		sums[2] = src[l2] + src[l3];
-		sums[3] = src[l3] + src[l4];
-		sums[4] = src[l4] + src[l5];
-		sums[5] = src[l5] + src[l6];
-		sums[6] = src[l6] + src[l7];
-		sums[7] = src[l7] + src[l8];
-		sums[8] = src[l8] + last;
-
-		src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
-		src[l2]= ((src[l2]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
-		src[l3]= ((src[l3]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
-		src[l4]= ((src[l4]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
-		src[l5]= ((src[l5]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
-		src[l6]= ((src[l6]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
-		src[l7]= (((last + src[l7])<<2) + ((src[l8] + sums[5])<<1) + sums[3] + 8)>>4;
-		src[l8]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
+		int sums[10];
+		sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
+		sums[1] = sums[0] - first  + src[l4];
+		sums[2] = sums[1] - first  + src[l5];
+		sums[3] = sums[2] - first  + src[l6];
+		sums[4] = sums[3] - first  + src[l7];
+		sums[5] = sums[4] - src[l1] + src[l8];
+		sums[6] = sums[5] - src[l2] + last;
+		sums[7] = sums[6] - src[l3] + last;
+		sums[8] = sums[7] - src[l4] + last;
+		sums[9] = sums[8] - src[l5] + last;
+
+		src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
+		src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
+		src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
+		src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
+		src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
+		src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
+		src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
+		src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
 
 		src++;
 	}
-- 
GitLab