1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
// SPDX-License-Identifier: GPL-2.0
/*
* Benchmark find_next_bit and related bit operations.
*
* Copyright 2020 Google LLC.
*/
#include <stdlib.h>
#include "bench.h"
#include "../util/stat.h"
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/time64.h>
#include <subcmd/parse-options.h>
static unsigned int outer_iterations = 5;
static unsigned int inner_iterations = 100000;
static const struct option options[] = {
OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
"Number of outer iterations used"),
OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
"Number of inner iterations used"),
OPT_END()
};
static const char *const bench_usage[] = {
"perf bench mem find_bit <options>",
NULL
};
static unsigned int accumulator;
static unsigned int use_of_val;
static noinline void workload(int val)
{
use_of_val += val;
accumulator++;
}
#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
static bool asm_test_bit(long nr, const unsigned long *addr)
{
bool oldbit;
asm volatile("bt %2,%1"
: "=@ccc" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
return oldbit;
}
#else
#define asm_test_bit test_bit
#endif
static int do_for_each_set_bit(unsigned int num_bits)
{
unsigned long *to_test = bitmap_zalloc(num_bits);
struct timeval start, end, diff;
u64 runtime_us;
struct stats fb_time_stats, tb_time_stats;
double time_average, time_stddev;
unsigned int bit, i, j;
unsigned int set_bits, skip;
unsigned int old;
init_stats(&fb_time_stats);
init_stats(&tb_time_stats);
for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
bitmap_zero(to_test, num_bits);
skip = num_bits / set_bits;
for (i = 0; i < num_bits; i += skip)
__set_bit(i, to_test);
for (i = 0; i < outer_iterations; i++) {
old = accumulator;
gettimeofday(&start, NULL);
for (j = 0; j < inner_iterations; j++) {
for_each_set_bit(bit, to_test, num_bits)
workload(bit);
}
gettimeofday(&end, NULL);
assert(old + (inner_iterations * set_bits) == accumulator);
timersub(&end, &start, &diff);
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
update_stats(&fb_time_stats, runtime_us);
old = accumulator;
gettimeofday(&start, NULL);
for (j = 0; j < inner_iterations; j++) {
for (bit = 0; bit < num_bits; bit++) {
if (asm_test_bit(bit, to_test))
workload(bit);
}
}
gettimeofday(&end, NULL);
assert(old + (inner_iterations * set_bits) == accumulator);
timersub(&end, &start, &diff);
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
update_stats(&tb_time_stats, runtime_us);
}
printf("%d operations %d bits set of %d bits\n",
inner_iterations, set_bits, num_bits);
time_average = avg_stats(&fb_time_stats);
time_stddev = stddev_stats(&fb_time_stats);
printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
time_average, time_stddev);
time_average = avg_stats(&tb_time_stats);
time_stddev = stddev_stats(&tb_time_stats);
printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
time_average, time_stddev);
if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
printf("\n");
}
bitmap_free(to_test);
return 0;
}
int bench_mem_find_bit(int argc, const char **argv)
{
int err = 0, i;
argc = parse_options(argc, argv, options, bench_usage, 0);
if (argc) {
usage_with_options(bench_usage, options);
exit(EXIT_FAILURE);
}
for (i = 1; i <= 2048; i <<= 1)
do_for_each_set_bit(i);
return err;
}
|