diff --git a/configure b/configure
index 2f3fa2ba3de15f9755eec1eb7be0fee5deb44900..a2aad677c8beadfcd244f3a5ab1f200fa95ec71e 100755
--- a/configure
+++ b/configure
@@ -448,6 +448,7 @@ Developer options (useful when working on FFmpeg itself):
   --libfuzzer=PATH         path to libfuzzer
   --ignore-tests=TESTS     comma-separated list (without "fate-" prefix
                            in the name) of tests whose result is ignored
+  --enable-linux-perf      enable Linux Performance Monitor API
 
 NOTE: Object files are built at the place where configure is launched.
 EOF
@@ -1699,6 +1700,7 @@ CONFIG_LIST="
     $SUBSYSTEM_LIST
     autodetect
     fontconfig
+    linux_perf
     memory_poisoning
     neon_clobber_test
     ossfuzz
@@ -5015,6 +5017,7 @@ case $target_os in
     linux)
         enable dv1394
         enable section_data_rel_ro
+        enabled_any arm aarch64 && enable_weak linux_perf
         ;;
     irix*)
         target_os=irix
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 9173ed19d98fae3b493be0fdef3c5f37f73c894e..ba729ac1bf4ae023c57bcb8dd72a9a173635d7f8 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -20,6 +20,14 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include "config.h"
+
+#if CONFIG_LINUX_PERF
+# ifndef _GNU_SOURCE
+#  define _GNU_SOURCE // for syscall (performance monitoring API)
+# endif
+#endif
+
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -190,8 +198,7 @@ typedef struct CheckasmFuncVersion {
     void *func;
     int ok;
     int cpu;
-    int iterations;
-    uint64_t cycles;
+    CheckasmPerf perf;
 } CheckasmFuncVersion;
 
 /* Binary search tree node */
@@ -212,7 +219,11 @@ static struct {
     int bench_pattern_len;
     int num_checked;
     int num_failed;
+
+    /* perf */
     int nop_time;
+    int sysfd;
+
     int cpu_flag;
     const char *cpu_flag_name;
     const char *test_name;
@@ -396,7 +407,6 @@ static const char *cpu_suffix(int cpu)
     return "c";
 }
 
-#ifdef AV_READ_TIME
 static int cmp_nop(const void *a, const void *b)
 {
     return *(const uint16_t*)a - *(const uint16_t*)b;
@@ -407,10 +417,13 @@ static int measure_nop_time(void)
 {
     uint16_t nops[10000];
     int i, nop_sum = 0;
+    av_unused const int sysfd = state.sysfd;
 
+    uint64_t t = 0;
     for (i = 0; i < 10000; i++) {
-        uint64_t t = AV_READ_TIME();
-        nops[i] = AV_READ_TIME() - t;
+        PERF_START(t);
+        PERF_STOP(t);
+        nops[i] = t;
     }
 
     qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
@@ -430,8 +443,9 @@ static void print_benchs(CheckasmFunc *f)
         if (f->versions.cpu || f->versions.next) {
             CheckasmFuncVersion *v = &f->versions;
             do {
-                if (v->iterations) {
-                    int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4;
+                CheckasmPerf *p = &v->perf;
+                if (p->iterations) {
+                    int decicycles = (10*p->cycles/p->iterations - state.nop_time) / 4;
                     printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
                 }
             } while ((v = v->next));
@@ -440,7 +454,6 @@ static void print_benchs(CheckasmFunc *f)
         print_benchs(f->child[1]);
     }
 }
-#endif
 
 /* ASCIIbetical sort except preserving natural order for numbers */
 static int cmp_func_names(const char *a, const char *b)
@@ -543,6 +556,63 @@ static void print_cpu_name(void)
     }
 }
 
+#if CONFIG_LINUX_PERF
+static int bench_init_linux(void)
+{
+    struct perf_event_attr attr = {
+        .type           = PERF_TYPE_HARDWARE,
+        .size           = sizeof(struct perf_event_attr),
+        .config         = PERF_COUNT_HW_CPU_CYCLES,
+        .disabled       = 1, // start counting only on demand
+        .exclude_kernel = 1,
+        .exclude_hv     = 1,
+    };
+
+    printf("benchmarking with Linux Perf Monitoring API\n");
+
+    state.sysfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
+    if (state.sysfd == -1) {
+        perror("syscall");
+        return -1;
+    }
+    return 0;
+}
+#endif
+
+static int bench_init_ffmpeg(void)
+{
+#ifdef AV_READ_TIME
+    printf("benchmarking with native FFmpeg timers\n");
+    return 0;
+#else
+    fprintf(stderr, "checkasm: --bench is not supported on your system\n");
+    return -1;
+#endif
+}
+
+static int bench_init(void)
+{
+#if CONFIG_LINUX_PERF
+    int ret = bench_init_linux();
+#else
+    int ret = bench_init_ffmpeg();
+#endif
+    if (ret < 0)
+        return ret;
+
+    state.nop_time = measure_nop_time();
+    printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
+    return 0;
+}
+
+static void bench_uninit(void)
+{
+#if CONFIG_LINUX_PERF
+    if (state.sysfd > 0)
+        close(state.sysfd);
+#endif
+}
+
 int main(int argc, char *argv[])
 {
     unsigned int seed = av_get_random_seed();
@@ -560,10 +630,8 @@ int main(int argc, char *argv[])
 
     while (argc > 1) {
         if (!strncmp(argv[1], "--bench", 7)) {
-#ifndef AV_READ_TIME
-            fprintf(stderr, "checkasm: --bench is not supported on your system\n");
-            return 1;
-#endif
+            if (bench_init() < 0)
+                return 1;
             if (argv[1][7] == '=') {
                 state.bench_pattern = argv[1] + 8;
                 state.bench_pattern_len = strlen(state.bench_pattern);
@@ -591,16 +659,13 @@ int main(int argc, char *argv[])
         ret = 1;
     } else {
         fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
-#ifdef AV_READ_TIME
         if (state.bench_pattern) {
-            state.nop_time = measure_nop_time();
-            printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
             print_benchs(state.funcs);
         }
-#endif
     }
 
     destroy_func_tree(state.funcs);
+    bench_uninit();
     return ret;
 }
 
@@ -678,11 +743,13 @@ void checkasm_fail_func(const char *msg, ...)
     }
 }
 
-/* Update benchmark results of the current function */
-void checkasm_update_bench(int iterations, uint64_t cycles)
+/* Get the benchmark context of the current function */
+CheckasmPerf *checkasm_get_perf_context(void)
 {
-    state.current_func_ver->iterations += iterations;
-    state.current_func_ver->cycles += cycles;
+    CheckasmPerf *perf = &state.current_func_ver->perf;
+    memset(perf, 0, sizeof(*perf));
+    perf->sysfd = state.sysfd;
+    return perf;
 }
 
 /* Print the outcome of all tests performed since the last time this function was called */
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 3165b21086c6163fa541946c17a7abdce3a3f20b..b29a61331e4f0c7ae308859cbbe64fb503950b4a 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -25,6 +25,14 @@
 
 #include <stdint.h>
 #include "config.h"
+
+#if CONFIG_LINUX_PERF
+#include <unistd.h> // read(3)
+#include <sys/ioctl.h>
+#include <asm/unistd.h>
+#include <linux/perf_event.h>
+#endif
+
 #include "libavutil/avstring.h"
 #include "libavutil/cpu.h"
 #include "libavutil/internal.h"
@@ -58,10 +66,12 @@ void checkasm_check_vp8dsp(void);
 void checkasm_check_vp9dsp(void);
 void checkasm_check_videodsp(void);
 
+struct CheckasmPerf;
+
 void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3);
 int checkasm_bench_func(void);
 void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
-void checkasm_update_bench(int iterations, uint64_t cycles);
+struct CheckasmPerf *checkasm_get_perf_context(void);
 void checkasm_report(const char *name, ...) av_printf_format(1, 2);
 
 /* float compare utilities */
@@ -178,32 +188,59 @@ void checkasm_checked_call(void *func, ...);
 #define declare_new_float(ret, ...) declare_new(ret, __VA_ARGS__)
 #endif
 
+typedef struct CheckasmPerf {
+    int sysfd;
+    uint64_t cycles;
+    int iterations;
+} CheckasmPerf;
+
+#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
+
+#if CONFIG_LINUX_PERF
+#define PERF_START(t) do {                              \
+    ioctl(sysfd, PERF_EVENT_IOC_RESET, 0);              \
+    ioctl(sysfd, PERF_EVENT_IOC_ENABLE, 0);             \
+} while (0)
+#define PERF_STOP(t) do {                               \
+    ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0);            \
+    read(sysfd, &t, sizeof(t));                         \
+} while (0)
+#else
+#define PERF_START(t) t = AV_READ_TIME()
+#define PERF_STOP(t)  t = AV_READ_TIME() - t
+#endif
+
 /* Benchmark the function */
-#ifdef AV_READ_TIME
 #define bench_new(...)\
     do {\
         if (checkasm_bench_func()) {\
+            struct CheckasmPerf *perf = checkasm_get_perf_context();\
+            av_unused const int sysfd = perf->sysfd;\
             func_type *tfunc = func_new;\
             uint64_t tsum = 0;\
             int ti, tcount = 0;\
+            uint64_t t = 0; \
             for (ti = 0; ti < BENCH_RUNS; ti++) {\
-                uint64_t t = AV_READ_TIME();\
+                PERF_START(t);\
                 tfunc(__VA_ARGS__);\
                 tfunc(__VA_ARGS__);\
                 tfunc(__VA_ARGS__);\
                 tfunc(__VA_ARGS__);\
-                t = AV_READ_TIME() - t;\
+                PERF_STOP(t);\
                 if (t*tcount <= tsum*4 && ti > 0) {\
                     tsum += t;\
                     tcount++;\
                 }\
             }\
             emms_c();\
-            checkasm_update_bench(tcount, tsum);\
+            perf->cycles += t;\
+            perf->iterations++;\
         }\
     } while (0)
 #else
 #define bench_new(...) while(0)
+#define PERF_START(t)  while(0)
+#define PERF_STOP(t)   while(0)
 #endif
 
 #endif /* TESTS_CHECKASM_CHECKASM_H */