Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | // SPDX-License-Identifier: GPL-2.0 /* * Benchmark find_next_bit and related bit operations. * * Copyright 2020 Google LLC. */ #include <stdlib.h> #include "bench.h" #include "../util/stat.h" #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/time64.h> #include <subcmd/parse-options.h> static unsigned int outer_iterations = 5; static unsigned int inner_iterations = 100000; static const struct option options[] = { OPT_UINTEGER('i', "outer-iterations", &outer_iterations, "Number of outer iterations used"), OPT_UINTEGER('j', "inner-iterations", &inner_iterations, "Number of inner iterations used"), OPT_END() }; static const char *const bench_usage[] = { "perf bench mem find_bit <options>", NULL }; static unsigned int accumulator; static unsigned int use_of_val; static noinline void workload(int val) { use_of_val += val; accumulator++; } #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) static bool asm_test_bit(long nr, const unsigned long *addr) { bool oldbit; asm volatile("bt %2,%1" : "=@ccc" (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); return oldbit; } #else #define asm_test_bit test_bit #endif static int do_for_each_set_bit(unsigned int num_bits) { unsigned long *to_test = bitmap_zalloc(num_bits); struct timeval start, end, diff; u64 runtime_us; struct stats fb_time_stats, tb_time_stats; double time_average, time_stddev; unsigned int bit, i, j; unsigned int set_bits, skip; init_stats(&fb_time_stats); init_stats(&tb_time_stats); for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { bitmap_zero(to_test, num_bits); skip = num_bits / set_bits; for (i = 0; i < num_bits; i += skip) __set_bit(i, to_test); for (i = 0; i < outer_iterations; i++) { #ifndef NDEBUG unsigned int old = accumulator; #endif gettimeofday(&start, NULL); for (j = 0; j < inner_iterations; j++) { for_each_set_bit(bit, to_test, num_bits) workload(bit); } gettimeofday(&end, NULL); assert(old + (inner_iterations * set_bits) == accumulator); timersub(&end, &start, &diff); runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; update_stats(&fb_time_stats, runtime_us); #ifndef NDEBUG old = accumulator; #endif gettimeofday(&start, NULL); for (j = 0; j < inner_iterations; j++) { for (bit = 0; bit < num_bits; bit++) { if (asm_test_bit(bit, to_test)) workload(bit); } } gettimeofday(&end, NULL); assert(old + (inner_iterations * set_bits) == accumulator); timersub(&end, &start, &diff); runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; update_stats(&tb_time_stats, runtime_us); } printf("%d operations %d bits set of %d bits\n", inner_iterations, set_bits, num_bits); time_average = avg_stats(&fb_time_stats); time_stddev = stddev_stats(&fb_time_stats); printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n", time_average, time_stddev); time_average = avg_stats(&tb_time_stats); time_stddev = stddev_stats(&tb_time_stats); printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n", time_average, time_stddev); if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ printf("\n"); } bitmap_free(to_test); return 0; } int bench_mem_find_bit(int argc, const char **argv) { int err = 0, i; argc = parse_options(argc, argv, options, bench_usage, 0); if (argc) { usage_with_options(bench_usage, options); exit(EXIT_FAILURE); } for (i = 1; i <= 2048; i <<= 1) do_for_each_set_bit(i); return err; } |