diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index 08d54a64ad2adf1f82f2ba3e6e8d3aaad379dc1d..b412385e5318d784bba1ad6aad4713a703b807a8 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -114,6 +114,9 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
 #ifndef ff_ctz
 #define ff_ctz(v) __builtin_ctz(v)
 #endif
+#ifndef ff_ctzll
+#define ff_ctzll(v) __builtin_ctzll(v)
+#endif
 #endif
 #endif
 
@@ -158,6 +161,22 @@ static av_always_inline av_const int ff_ctz_c( int v )
 #endif
 #endif
 
+#ifndef ff_ctzll
+#define ff_ctzll ff_ctzll_c
+/* We use the De-Bruijn method outlined in:
+ * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
+static av_always_inline av_const int ff_ctzll_c(long long v)
+{
+    static const int debruijn_ctz64[64] = {
+        0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
+        62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
+        63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
+        51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
+    };
+    return debruijn_ctz64[(uint64_t)((v & -v) * 0x022FDD63CC95386D) >> 58];
+}
+#endif
+
 /**
  * Trailing zero bit count.
  *
diff --git a/libavutil/mathematics.c b/libavutil/mathematics.c
index 252794e46005246b08f27c734e0eaa210333e57b..16e4eba5b99873846f77b0bf4d24f4bda6a4e1fd 100644
--- a/libavutil/mathematics.c
+++ b/libavutil/mathematics.c
@@ -27,16 +27,32 @@
 #include <limits.h>
 
 #include "mathematics.h"
+#include "libavutil/intmath.h"
 #include "libavutil/common.h"
 #include "avassert.h"
 #include "version.h"
 
-int64_t av_gcd(int64_t a, int64_t b)
-{
-    if (b)
-        return av_gcd(b, a % b);
-    else
+/* Stein's binary GCD algorithm:
+ * https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
+int64_t av_gcd(int64_t a, int64_t b) {
+    int za, zb, k;
+    int64_t u, v;
+    if (a == 0)
+        return b;
+    if (b == 0)
         return a;
+    za = ff_ctzll(a);
+    zb = ff_ctzll(b);
+    k  = FFMIN(za, zb);
+    u = llabs(a >> za);
+    v = llabs(b >> zb);
+    while (u != v) {
+        if (u > v)
+            FFSWAP(int64_t, v, u);
+        v -= u;
+        v >>= ff_ctzll(v);
+    }
+    return u << k;
 }
 
 int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)