From 601949b898946c9c2e607b81aa63ab1307246386 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Fri, 5 Mar 2010 08:32:54 +0000
Subject: [PATCH] libswscale: Extend the unaccelerated path of the unscaled
 yuv2rgb special             converter with support for rgb444 output format.
 Patch by Janusz Krzysztofik jkrzyszt chez tis icnet pl

Originally committed as revision 30841 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
---
 libswscale/swscale.c          |  9 ++++++-
 libswscale/swscale_internal.h |  2 ++
 libswscale/yuv2rgb.c          | 49 +++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index a4e9aa7a5f9..ab77537ff25 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -27,7 +27,7 @@
   {BGR,RGB}{1,4,8,15,16} support dithering
 
   unscaled special converters (YV12=I420=IYUV, Y800=Y8)
-  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
+  YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
   x -> x
   YUV9 -> YV12
   YUV9/YV12 -> Y800
@@ -198,6 +198,13 @@ DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
 {  0,   4,   0,   4,   0,   4,   0,   4, },
 };
 
+DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
+{  8,   4,  11,   7,   8,   4,  11,   7, },
+{  2,  14,   1,  13,   2,  14,   1,  13, },
+{ 10,   6,   9,   5,  10,   6,   9,   5, },
+{  0,  12,   3,  15,   0,  12,   3,  15, },
+};
+
 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
 { 17,   9,  23,  15,  16,   8,  22,  14, },
 {  5,  29,   3,  27,   4,  28,   2,  26, },
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 5534e467ea2..856b61c4840 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -395,6 +395,7 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_RGB565LE    \
         || (x)==PIX_FMT_RGB555BE    \
         || (x)==PIX_FMT_RGB555LE    \
+        || (x)==PIX_FMT_RGB444      \
         || (x)==PIX_FMT_RGB8        \
         || (x)==PIX_FMT_RGB4        \
         || (x)==PIX_FMT_RGB4_BYTE   \
@@ -409,6 +410,7 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_BGR565LE    \
         || (x)==PIX_FMT_BGR555BE    \
         || (x)==PIX_FMT_BGR555LE    \
+        || (x)==PIX_FMT_BGR444      \
         || (x)==PIX_FMT_BGR8        \
         || (x)==PIX_FMT_BGR4        \
         || (x)==PIX_FMT_BGR4_BYTE   \
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index babc8c311ec..c3c2e74ac9f 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -35,6 +35,7 @@
 #include "libavutil/x86_cpu.h"
 #include "libavutil/bswap.h"
 
+extern const uint8_t dither_4x4_16[4][8];
 extern const uint8_t dither_8x8_32[8][8];
 extern const uint8_t dither_8x8_73[8][8];
 extern const uint8_t dither_8x8_220[8][8];
@@ -351,6 +352,32 @@ YUV2RGBFUNC(yuv2rgb_c_8, uint8_t, 0)
 CLOSEYUV2RGBFUNC(8)
 #endif
 
+// r, g, b, dst_1, dst_2
+YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0)
+    const uint8_t *d16 = dither_4x4_16[y&3];
+#define PUTRGB12(dst,src,i,o)                                   \
+    Y = src[2*i];                                               \
+    dst[2*i]   = r[Y+d16[0+o]] + g[Y+d16[0+o]] + b[Y+d16[0+o]]; \
+    Y = src[2*i+1];                                             \
+    dst[2*i+1] = r[Y+d16[1+o]] + g[Y+d16[1+o]] + b[Y+d16[1+o]];
+
+    LOADCHROMA(0);
+    PUTRGB12(dst_1,py_1,0,0);
+    PUTRGB12(dst_2,py_2,0,0+8);
+
+    LOADCHROMA(1);
+    PUTRGB12(dst_2,py_2,1,2+8);
+    PUTRGB12(dst_1,py_1,1,2);
+
+    LOADCHROMA(2);
+    PUTRGB12(dst_1,py_1,2,4);
+    PUTRGB12(dst_2,py_2,2,4+8);
+
+    LOADCHROMA(3);
+    PUTRGB12(dst_2,py_2,3,6+8);
+    PUTRGB12(dst_1,py_1,3,6);
+CLOSEYUV2RGBFUNC(8)
+
 // r, g, b, dst_1, dst_2
 YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0)
     const uint8_t *d32 = dither_8x8_32[y&7];
@@ -553,6 +580,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
     case PIX_FMT_BGR565:
     case PIX_FMT_RGB555:
     case PIX_FMT_BGR555:     return yuv2rgb_c_16;
+    case PIX_FMT_RGB444:
+    case PIX_FMT_BGR444:     return yuv2rgb_c_12_ordered_dither;
     case PIX_FMT_RGB8:
     case PIX_FMT_BGR8:       return yuv2rgb_c_8_ordered_dither;
     case PIX_FMT_RGB4:
@@ -601,6 +630,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
                         || c->dstFormat==PIX_FMT_RGB565LE
                         || c->dstFormat==PIX_FMT_RGB555BE
                         || c->dstFormat==PIX_FMT_RGB555LE
+                        || c->dstFormat==PIX_FMT_RGB444
                         || c->dstFormat==PIX_FMT_RGB8
                         || c->dstFormat==PIX_FMT_RGB4
                         || c->dstFormat==PIX_FMT_RGB4_BYTE
@@ -701,6 +731,25 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int
         fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
         fill_gv_table(c->table_gV, 1, cgv);
         break;
+    case 12:
+        rbase = isRgb ? 8 : 0;
+        gbase = 4;
+        bbase = isRgb ? 0 : 8;
+        c->yuvTable = av_malloc(1024*3*2);
+        y_table16 = c->yuvTable;
+        yb = -(384<<16) - oy;
+        for (i = 0; i < 1024; i++) {
+            uint8_t yval = av_clip_uint8((yb + 0x8000) >> 16);
+            y_table16[i     ] = (yval >> 4)          << rbase;
+            y_table16[i+1024] = (yval >> 4) << gbase;
+            y_table16[i+2048] = (yval >> 4)          << bbase;
+            yb += cy;
+        }
+        fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
+        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
+        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
+        fill_gv_table(c->table_gV, 2, cgv);
+        break;
     case 15:
     case 16:
         rbase = isRgb ? bpp - 5 : 0;
-- 
GitLab