diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 6ab262097ca93c1812234e364c338012c227fbf4..bcf7e5be0e2266bdf7832b9d0e1bf5525091be8a 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -86,8 +86,8 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
         c->add_pixels_clamped        = ff_add_pixels_clamped_sse2;
     }
 
-    if (ARCH_X86_64 &&
-        avctx->bits_per_raw_sample == 10 && avctx->lowres == 0 &&
+    if (ARCH_X86_64 && avctx->lowres == 0) {
+        if (avctx->bits_per_raw_sample == 10 &&
         (avctx->idct_algo == FF_IDCT_AUTO ||
          avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
          avctx->idct_algo == FF_IDCT_SIMPLE)) {
@@ -104,5 +104,23 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
             c->idct      = ff_simple_idct10_avx;
             c->perm_type = FF_IDCT_PERM_TRANSPOSE;
         }
+        }
+
+        if (avctx->bits_per_raw_sample == 12 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+            if (EXTERNAL_SSE2(cpu_flags)) {
+                c->idct_put  = ff_simple_idct12_put_sse2;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct12_sse2;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+            if (EXTERNAL_AVX(cpu_flags)) {
+                c->idct_put  = ff_simple_idct12_put_avx;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct12_avx;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+        }
     }
 }
diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h
index e8f59c1865bde63f37fb03c6a00146662c72ec7a..8eeb31e29985932198bb478fc77baaabd2c6c828 100644
--- a/libavcodec/x86/simple_idct.h
+++ b/libavcodec/x86/simple_idct.h
@@ -31,4 +31,10 @@ void ff_simple_idct10_avx(int16_t *block);
 void ff_simple_idct10_put_sse2(uint8_t *dest, int line_size, int16_t *block);
 void ff_simple_idct10_put_avx(uint8_t *dest, int line_size, int16_t *block);
 
+void ff_simple_idct12_sse2(int16_t *block);
+void ff_simple_idct12_avx(int16_t *block);
+
+void ff_simple_idct12_put_sse2(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct12_put_avx(uint8_t *dest, int line_size, int16_t *block);
+
 #endif /* AVCODEC_X86_SIMPLE_IDCT_H */
diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm
index 3af2042f08c5fa196e489c35801bdac88b58fea1..ec388f99ed08d654a5631456c85516ebc8ba1a40 100644
--- a/libavcodec/x86/simple_idct10.asm
+++ b/libavcodec/x86/simple_idct10.asm
@@ -29,9 +29,12 @@
 
 SECTION_RODATA
 
+cextern pw_2
 cextern pw_16
 cextern pw_1023
+cextern pw_4095
 pd_round_12: times 4 dd 1<<(12-1)
+pd_round_15: times 4 dd 1<<(15-1)
 pd_round_19: times 4 dd 1<<(19-1)
 
 %include "libavcodec/x86/simple_idct10_template.asm"
@@ -46,6 +49,19 @@ cglobal simple_idct10, 1, 1, 16
 cglobal simple_idct10_put, 3, 3, 16
     IDCT_FN    "", 12, "", 19, 0, pw_1023
     RET
+
+cglobal simple_idct12, 1, 1, 16
+    ; coeffs are already 15bits, adding the offset would cause
+    ; overflow in the input
+    IDCT_FN    "", 15, pw_2, 16
+    RET
+
+cglobal simple_idct12_put, 3, 3, 16
+    ; range isn't known, so the C simple_idct range is used
+    ; Also, using a bias on input overflows, so use the bias
+    ; on output of the first butterfly instead
+    IDCT_FN    "", 15, pw_2, 16, 0, pw_4095
+    RET
 %endmacro
 
 INIT_XMM sse2