1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Davidlohr Bueso. 4 * 5 * Benchmark the various operations allowed for epoll_ctl(2). 6 * The idea is to concurrently stress a single epoll instance 7 */ 8 #ifdef HAVE_EVENTFD_SUPPORT 9 /* For the CLR_() macros */ 10 #include <string.h> 11 #include <pthread.h> 12 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <signal.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 #include <linux/compiler.h> 19 #include <linux/kernel.h> 20 #include <sys/time.h> 21 #include <sys/resource.h> 22 #include <sys/epoll.h> 23 #include <sys/eventfd.h> 24 #include <perf/cpumap.h> 25 26 #include "../util/mutex.h" 27 #include "../util/stat.h" 28 #include <subcmd/parse-options.h> 29 #include "bench.h" 30 31 #include <err.h> 32 33 #define printinfo(fmt, arg...) \ 34 do { if (__verbose) printf(fmt, ## arg); } while (0) 35 36 static unsigned int nthreads = 0; 37 static unsigned int nsecs = 8; 38 static bool done, __verbose, randomize; 39 40 /* 41 * epoll related shared variables. 42 */ 43 44 /* Maximum number of nesting allowed inside epoll sets */ 45 #define EPOLL_MAXNESTS 4 46 47 enum { 48 OP_EPOLL_ADD, 49 OP_EPOLL_MOD, 50 OP_EPOLL_DEL, 51 EPOLL_NR_OPS, 52 }; 53 54 static int epollfd; 55 static int *epollfdp; 56 static bool noaffinity; 57 static unsigned int nested = 0; 58 59 /* amount of fds to monitor, per thread */ 60 static unsigned int nfds = 64; 61 62 static struct mutex thread_lock; 63 static unsigned int threads_starting; 64 static struct stats all_stats[EPOLL_NR_OPS]; 65 static struct cond thread_parent, thread_worker; 66 67 struct worker { 68 int tid; 69 pthread_t thread; 70 unsigned long ops[EPOLL_NR_OPS]; 71 int *fdmap; 72 }; 73 74 static const struct option options[] = { 75 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), 76 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), 77 OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"), 78 OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"), 79 OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"), 80 OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"), 81 OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"), 82 OPT_END() 83 }; 84 85 static const char * const bench_epoll_ctl_usage[] = { 86 "perf bench epoll ctl <options>", 87 NULL 88 }; 89 90 static void toggle_done(int sig __maybe_unused, 91 siginfo_t *info __maybe_unused, 92 void *uc __maybe_unused) 93 { 94 /* inform all threads that we're done for the day */ 95 done = true; 96 gettimeofday(&bench__end, NULL); 97 timersub(&bench__end, &bench__start, &bench__runtime); 98 } 99 100 static void nest_epollfd(void) 101 { 102 unsigned int i; 103 struct epoll_event ev; 104 105 if (nested > EPOLL_MAXNESTS) 106 nested = EPOLL_MAXNESTS; 107 printinfo("Nesting level(s): %d\n", nested); 108 109 epollfdp = calloc(nested, sizeof(int)); 110 if (!epollfdp) 111 err(EXIT_FAILURE, "calloc"); 112 113 for (i = 0; i < nested; i++) { 114 epollfdp[i] = epoll_create(1); 115 if (epollfd < 0) 116 err(EXIT_FAILURE, "epoll_create"); 117 } 118 119 ev.events = EPOLLHUP; /* anything */ 120 ev.data.u64 = i; /* any number */ 121 122 for (i = nested - 1; i; i--) { 123 if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD, 124 epollfdp[i], &ev) < 0) 125 err(EXIT_FAILURE, "epoll_ctl"); 126 } 127 128 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0) 129 err(EXIT_FAILURE, "epoll_ctl"); 130 } 131 132 static inline void do_epoll_op(struct worker *w, int op, int fd) 133 { 134 int error; 135 struct epoll_event ev; 136 137 ev.events = EPOLLIN; 138 ev.data.u64 = fd; 139 140 switch (op) { 141 case OP_EPOLL_ADD: 142 error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); 143 break; 144 case OP_EPOLL_MOD: 145 ev.events = EPOLLOUT; 146 error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev); 147 break; 148 case OP_EPOLL_DEL: 149 error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); 150 break; 151 default: 152 error = 1; 153 break; 154 } 155 156 if (!error) 157 w->ops[op]++; 158 } 159 160 static inline void do_random_epoll_op(struct worker *w) 161 { 162 unsigned long rnd1 = random(), rnd2 = random(); 163 int op, fd; 164 165 fd = w->fdmap[rnd1 % nfds]; 166 op = rnd2 % EPOLL_NR_OPS; 167 168 do_epoll_op(w, op, fd); 169 } 170 171 static void *workerfn(void *arg) 172 { 173 unsigned int i; 174 struct worker *w = (struct worker *) arg; 175 struct timespec ts = { .tv_sec = 0, 176 .tv_nsec = 250 }; 177 178 mutex_lock(&thread_lock); 179 threads_starting--; 180 if (!threads_starting) 181 cond_signal(&thread_parent); 182 cond_wait(&thread_worker, &thread_lock); 183 mutex_unlock(&thread_lock); 184 185 /* Let 'em loose */ 186 do { 187 /* random */ 188 if (randomize) { 189 do_random_epoll_op(w); 190 } else { 191 for (i = 0; i < nfds; i++) { 192 do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]); 193 do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]); 194 do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]); 195 } 196 } 197 198 nanosleep(&ts, NULL); 199 } while (!done); 200 201 return NULL; 202 } 203 204 static void init_fdmaps(struct worker *w, int pct) 205 { 206 unsigned int i; 207 int inc; 208 struct epoll_event ev; 209 210 if (!pct) 211 return; 212 213 inc = 100/pct; 214 for (i = 0; i < nfds; i+=inc) { 215 ev.data.fd = w->fdmap[i]; 216 ev.events = EPOLLIN; 217 218 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0) 219 err(EXIT_FAILURE, "epoll_ct"); 220 } 221 } 222 223 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) 224 { 225 pthread_attr_t thread_attr, *attrp = NULL; 226 cpu_set_t *cpuset; 227 unsigned int i, j; 228 int ret = 0; 229 int nrcpus; 230 size_t size; 231 232 if (!noaffinity) 233 pthread_attr_init(&thread_attr); 234 235 nrcpus = perf_cpu_map__nr(cpu); 236 cpuset = CPU_ALLOC(nrcpus); 237 BUG_ON(!cpuset); 238 size = CPU_ALLOC_SIZE(nrcpus); 239 240 for (i = 0; i < nthreads; i++) { 241 struct worker *w = &worker[i]; 242 243 w->tid = i; 244 w->fdmap = calloc(nfds, sizeof(int)); 245 if (!w->fdmap) 246 return 1; 247 248 for (j = 0; j < nfds; j++) { 249 w->fdmap[j] = eventfd(0, EFD_NONBLOCK); 250 if (w->fdmap[j] < 0) 251 err(EXIT_FAILURE, "eventfd"); 252 } 253 254 /* 255 * Lets add 50% of the fdmap to the epoll instance, and 256 * do it before any threads are started; otherwise there is 257 * an initial bias of the call failing (mod and del ops). 258 */ 259 if (randomize) 260 init_fdmaps(w, 50); 261 262 if (!noaffinity) { 263 CPU_ZERO_S(size, cpuset); 264 CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, 265 size, cpuset); 266 267 ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); 268 if (ret) { 269 CPU_FREE(cpuset); 270 err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 271 } 272 273 attrp = &thread_attr; 274 } 275 276 ret = pthread_create(&w->thread, attrp, workerfn, 277 (void *)(struct worker *) w); 278 if (ret) { 279 CPU_FREE(cpuset); 280 err(EXIT_FAILURE, "pthread_create"); 281 } 282 } 283 284 CPU_FREE(cpuset); 285 if (!noaffinity) 286 pthread_attr_destroy(&thread_attr); 287 288 return ret; 289 } 290 291 static void print_summary(void) 292 { 293 int i; 294 unsigned long avg[EPOLL_NR_OPS]; 295 double stddev[EPOLL_NR_OPS]; 296 297 for (i = 0; i < EPOLL_NR_OPS; i++) { 298 avg[i] = avg_stats(&all_stats[i]); 299 stddev[i] = stddev_stats(&all_stats[i]); 300 } 301 302 printf("\nAveraged %ld ADD operations (+- %.2f%%)\n", 303 avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD], 304 avg[OP_EPOLL_ADD])); 305 printf("Averaged %ld MOD operations (+- %.2f%%)\n", 306 avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD], 307 avg[OP_EPOLL_MOD])); 308 printf("Averaged %ld DEL operations (+- %.2f%%)\n", 309 avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL], 310 avg[OP_EPOLL_DEL])); 311 } 312 313 int bench_epoll_ctl(int argc, const char **argv) 314 { 315 int j, ret = 0; 316 struct sigaction act; 317 struct worker *worker = NULL; 318 struct perf_cpu_map *cpu; 319 struct rlimit rl, prevrl; 320 unsigned int i; 321 322 argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0); 323 if (argc) { 324 usage_with_options(bench_epoll_ctl_usage, options); 325 exit(EXIT_FAILURE); 326 } 327 328 memset(&act, 0, sizeof(act)); 329 sigfillset(&act.sa_mask); 330 act.sa_sigaction = toggle_done; 331 sigaction(SIGINT, &act, NULL); 332 333 cpu = perf_cpu_map__new_online_cpus(); 334 if (!cpu) 335 goto errmem; 336 337 /* a single, main epoll instance */ 338 epollfd = epoll_create(1); 339 if (epollfd < 0) 340 err(EXIT_FAILURE, "epoll_create"); 341 342 /* 343 * Deal with nested epolls, if any. 344 */ 345 if (nested) 346 nest_epollfd(); 347 348 /* default to the number of CPUs */ 349 if (!nthreads) 350 nthreads = perf_cpu_map__nr(cpu); 351 352 worker = calloc(nthreads, sizeof(*worker)); 353 if (!worker) 354 goto errmem; 355 356 if (getrlimit(RLIMIT_NOFILE, &prevrl)) 357 err(EXIT_FAILURE, "getrlimit"); 358 rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50; 359 printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n", 360 (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max); 361 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 362 err(EXIT_FAILURE, "setrlimit"); 363 364 printf("Run summary [PID %d]: %d threads doing epoll_ctl ops " 365 "%d file-descriptors for %d secs.\n\n", 366 getpid(), nthreads, nfds, nsecs); 367 368 for (i = 0; i < EPOLL_NR_OPS; i++) 369 init_stats(&all_stats[i]); 370 371 mutex_init(&thread_lock); 372 cond_init(&thread_parent); 373 cond_init(&thread_worker); 374 375 threads_starting = nthreads; 376 377 gettimeofday(&bench__start, NULL); 378 379 do_threads(worker, cpu); 380 381 mutex_lock(&thread_lock); 382 while (threads_starting) 383 cond_wait(&thread_parent, &thread_lock); 384 cond_broadcast(&thread_worker); 385 mutex_unlock(&thread_lock); 386 387 sleep(nsecs); 388 toggle_done(0, NULL, NULL); 389 printinfo("main thread: toggling done\n"); 390 391 for (i = 0; i < nthreads; i++) { 392 ret = pthread_join(worker[i].thread, NULL); 393 if (ret) 394 err(EXIT_FAILURE, "pthread_join"); 395 } 396 397 /* cleanup & report results */ 398 cond_destroy(&thread_parent); 399 cond_destroy(&thread_worker); 400 mutex_destroy(&thread_lock); 401 402 for (i = 0; i < nthreads; i++) { 403 unsigned long t[EPOLL_NR_OPS]; 404 405 for (j = 0; j < EPOLL_NR_OPS; j++) { 406 t[j] = worker[i].ops[j]; 407 update_stats(&all_stats[j], t[j]); 408 } 409 410 if (nfds == 1) 411 printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n", 412 worker[i].tid, &worker[i].fdmap[0], 413 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 414 else 415 printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n", 416 worker[i].tid, &worker[i].fdmap[0], 417 &worker[i].fdmap[nfds-1], 418 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 419 } 420 421 print_summary(); 422 423 close(epollfd); 424 perf_cpu_map__put(cpu); 425 for (i = 0; i < nthreads; i++) 426 free(worker[i].fdmap); 427 428 free(worker); 429 return ret; 430 errmem: 431 err(EXIT_FAILURE, "calloc"); 432 } 433 #endif // HAVE_EVENTFD_SUPPORT 434