1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> 4 * 5 * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing. 6 * 7 * This program is particularly useful for measuring the kernel's futex hash 8 * table/function implementation. In order for it to make sense, use with as 9 * many threads and futexes as possible. 10 */ 11 12 /* For the CLR_() macros */ 13 #include <string.h> 14 #include <pthread.h> 15 16 #include <errno.h> 17 #include <signal.h> 18 #include <stdlib.h> 19 #include <linux/compiler.h> 20 #include <linux/kernel.h> 21 #include <linux/zalloc.h> 22 #include <sys/time.h> 23 #include <sys/mman.h> 24 #include <perf/cpumap.h> 25 26 #include "../util/mutex.h" 27 #include "../util/stat.h" 28 #include <subcmd/parse-options.h> 29 #include "bench.h" 30 #include "futex.h" 31 32 #include <err.h> 33 34 static bool done = false; 35 static int futex_flag = 0; 36 37 struct timeval bench__start, bench__end, bench__runtime; 38 static struct mutex thread_lock; 39 static unsigned int threads_starting; 40 static struct stats throughput_stats; 41 static struct cond thread_parent, thread_worker; 42 43 struct worker { 44 int tid; 45 u_int32_t *futex; 46 pthread_t thread; 47 unsigned long ops; 48 }; 49 50 static struct bench_futex_parameters params = { 51 .nfutexes = 1024, 52 .runtime = 10, 53 }; 54 55 static const struct option options[] = { 56 OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), 57 OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), 58 OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), 59 OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), 60 OPT_BOOLEAN( 'S', "shared", ¶ms.fshared, "Use shared futexes instead of private ones"), 61 OPT_BOOLEAN( 'm', "mlockall", ¶ms.mlockall, "Lock all current and future memory"), 62 OPT_END() 63 }; 64 65 static const char * const bench_futex_hash_usage[] = { 66 "perf bench futex hash <options>", 67 NULL 68 }; 69 70 static void *workerfn(void *arg) 71 { 72 int ret; 73 struct worker *w = (struct worker *) arg; 74 unsigned int i; 75 unsigned long ops = w->ops; /* avoid cacheline bouncing */ 76 77 mutex_lock(&thread_lock); 78 threads_starting--; 79 if (!threads_starting) 80 cond_signal(&thread_parent); 81 cond_wait(&thread_worker, &thread_lock); 82 mutex_unlock(&thread_lock); 83 84 do { 85 for (i = 0; i < params.nfutexes; i++, ops++) { 86 /* 87 * We want the futex calls to fail in order to stress 88 * the hashing of uaddr and not measure other steps, 89 * such as internal waitqueue handling, thus enlarging 90 * the critical region protected by hb->lock. 91 */ 92 ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag); 93 if (!params.silent && 94 (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) 95 warn("Non-expected futex return call"); 96 } 97 } while (!done); 98 99 w->ops = ops; 100 return NULL; 101 } 102 103 static void toggle_done(int sig __maybe_unused, 104 siginfo_t *info __maybe_unused, 105 void *uc __maybe_unused) 106 { 107 /* inform all threads that we're done for the day */ 108 done = true; 109 gettimeofday(&bench__end, NULL); 110 timersub(&bench__end, &bench__start, &bench__runtime); 111 } 112 113 static void print_summary(void) 114 { 115 unsigned long avg = avg_stats(&throughput_stats); 116 double stddev = stddev_stats(&throughput_stats); 117 118 printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", 119 !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), 120 (int)bench__runtime.tv_sec); 121 } 122 123 int bench_futex_hash(int argc, const char **argv) 124 { 125 int ret = 0; 126 cpu_set_t *cpuset; 127 struct sigaction act; 128 unsigned int i; 129 pthread_attr_t thread_attr; 130 struct worker *worker = NULL; 131 struct perf_cpu_map *cpu; 132 int nrcpus; 133 size_t size; 134 135 argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); 136 if (argc) { 137 usage_with_options(bench_futex_hash_usage, options); 138 exit(EXIT_FAILURE); 139 } 140 141 cpu = perf_cpu_map__new_online_cpus(); 142 if (!cpu) 143 goto errmem; 144 145 memset(&act, 0, sizeof(act)); 146 sigfillset(&act.sa_mask); 147 act.sa_sigaction = toggle_done; 148 sigaction(SIGINT, &act, NULL); 149 150 if (params.mlockall) { 151 if (mlockall(MCL_CURRENT | MCL_FUTURE)) 152 err(EXIT_FAILURE, "mlockall"); 153 } 154 155 if (!params.nthreads) /* default to the number of CPUs */ 156 params.nthreads = perf_cpu_map__nr(cpu); 157 158 worker = calloc(params.nthreads, sizeof(*worker)); 159 if (!worker) 160 goto errmem; 161 162 if (!params.fshared) 163 futex_flag = FUTEX_PRIVATE_FLAG; 164 165 printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", 166 getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); 167 168 init_stats(&throughput_stats); 169 mutex_init(&thread_lock); 170 cond_init(&thread_parent); 171 cond_init(&thread_worker); 172 173 threads_starting = params.nthreads; 174 pthread_attr_init(&thread_attr); 175 gettimeofday(&bench__start, NULL); 176 177 nrcpus = cpu__max_cpu().cpu; 178 cpuset = CPU_ALLOC(nrcpus); 179 BUG_ON(!cpuset); 180 size = CPU_ALLOC_SIZE(nrcpus); 181 182 for (i = 0; i < params.nthreads; i++) { 183 worker[i].tid = i; 184 worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex)); 185 if (!worker[i].futex) 186 goto errmem; 187 188 CPU_ZERO_S(size, cpuset); 189 190 CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); 191 ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); 192 if (ret) { 193 CPU_FREE(cpuset); 194 err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 195 } 196 ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, 197 (void *)(struct worker *) &worker[i]); 198 if (ret) { 199 CPU_FREE(cpuset); 200 err(EXIT_FAILURE, "pthread_create"); 201 } 202 203 } 204 CPU_FREE(cpuset); 205 pthread_attr_destroy(&thread_attr); 206 207 mutex_lock(&thread_lock); 208 while (threads_starting) 209 cond_wait(&thread_parent, &thread_lock); 210 cond_broadcast(&thread_worker); 211 mutex_unlock(&thread_lock); 212 213 sleep(params.runtime); 214 toggle_done(0, NULL, NULL); 215 216 for (i = 0; i < params.nthreads; i++) { 217 ret = pthread_join(worker[i].thread, NULL); 218 if (ret) 219 err(EXIT_FAILURE, "pthread_join"); 220 } 221 222 /* cleanup & report results */ 223 cond_destroy(&thread_parent); 224 cond_destroy(&thread_worker); 225 mutex_destroy(&thread_lock); 226 227 for (i = 0; i < params.nthreads; i++) { 228 unsigned long t = bench__runtime.tv_sec > 0 ? 229 worker[i].ops / bench__runtime.tv_sec : 0; 230 update_stats(&throughput_stats, t); 231 if (!params.silent) { 232 if (params.nfutexes == 1) 233 printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", 234 worker[i].tid, &worker[i].futex[0], t); 235 else 236 printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", 237 worker[i].tid, &worker[i].futex[0], 238 &worker[i].futex[params.nfutexes-1], t); 239 } 240 241 zfree(&worker[i].futex); 242 } 243 244 print_summary(); 245 246 free(worker); 247 free(cpu); 248 return ret; 249 errmem: 250 err(EXIT_FAILURE, "calloc"); 251 } 252