From bd49d4fd4962d28447cb6edfaba8a218cf60b2da Mon Sep 17 00:00:00 2001
From: Loren Merritt <lorenm@u.washington.edu>
Date: Tue, 25 Sep 2007 23:31:13 +0000
Subject: [PATCH] optimize decode_subframe_lpc() 50%/67%/43% faster on
 core2/k8/p4, making flac decoding overall 24%/25%/11% faster

Originally committed as revision 10586 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/flac.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/libavcodec/flac.c b/libavcodec/flac.c
index 74b245d0e5a..d8e8813d9eb 100644
--- a/libavcodec/flac.c
+++ b/libavcodec/flac.c
@@ -359,10 +359,25 @@ static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order)
             s->decoded[channel][i] += sum >> qlevel;
         }
     } else {
-        int sum;
-        for (i = pred_order; i < s->blocksize; i++)
+        for (i = pred_order; i < s->blocksize-1; i += 2)
         {
-            sum = 0;
+            int c = coeffs[pred_order-1];
+            int s0 = c * s->decoded[channel][i-pred_order];
+            int s1 = 0;
+            for (j = pred_order-1; j > 0; j--)
+            {
+                int d = s->decoded[channel][i-j];
+                s1 += c*d;
+                c = coeffs[j-1];
+                s0 += c*d;
+            }
+            s0 = s->decoded[channel][i] += s0 >> qlevel;
+            s1 += c * s0;
+            s->decoded[channel][i+1] += s1 >> qlevel;
+        }
+        if (i < s->blocksize)
+        {
+            int sum = 0;
             for (j = 0; j < pred_order; j++)
                 sum += coeffs[j] * s->decoded[channel][i-j-1];
             s->decoded[channel][i] += sum >> qlevel;
-- 
GitLab