1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Davidlohr Bueso. 4 * 5 * Benchmark the various operations allowed for epoll_ctl(2). 6 * The idea is to concurrently stress a single epoll instance 7 */ 8 #ifdef HAVE_EVENTFD 9 /* For the CLR_() macros */ 10 #include <string.h> 11 #include <pthread.h> 12 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <signal.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 #include <linux/compiler.h> 19 #include <linux/kernel.h> 20 #include <sys/time.h> 21 #include <sys/resource.h> 22 #include <sys/epoll.h> 23 #include <sys/eventfd.h> 24 #include <perf/cpumap.h> 25 26 #include "../util/stat.h" 27 #include <subcmd/parse-options.h> 28 #include "bench.h" 29 #include "cpumap.h" 30 31 #include <err.h> 32 33 #define printinfo(fmt, arg...) \ 34 do { if (__verbose) printf(fmt, ## arg); } while (0) 35 36 static unsigned int nthreads = 0; 37 static unsigned int nsecs = 8; 38 struct timeval start, end, runtime; 39 static bool done, __verbose, randomize; 40 41 /* 42 * epoll related shared variables. 43 */ 44 45 /* Maximum number of nesting allowed inside epoll sets */ 46 #define EPOLL_MAXNESTS 4 47 48 enum { 49 OP_EPOLL_ADD, 50 OP_EPOLL_MOD, 51 OP_EPOLL_DEL, 52 EPOLL_NR_OPS, 53 }; 54 55 static int epollfd; 56 static int *epollfdp; 57 static bool noaffinity; 58 static unsigned int nested = 0; 59 60 /* amount of fds to monitor, per thread */ 61 static unsigned int nfds = 64; 62 63 static pthread_mutex_t thread_lock; 64 static unsigned int threads_starting; 65 static struct stats all_stats[EPOLL_NR_OPS]; 66 static pthread_cond_t thread_parent, thread_worker; 67 68 struct worker { 69 int tid; 70 pthread_t thread; 71 unsigned long ops[EPOLL_NR_OPS]; 72 int *fdmap; 73 }; 74 75 static const struct option options[] = { 76 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), 77 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), 78 OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"), 79 OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"), 80 OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"), 81 OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"), 82 OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"), 83 OPT_END() 84 }; 85 86 static const char * const bench_epoll_ctl_usage[] = { 87 "perf bench epoll ctl <options>", 88 NULL 89 }; 90 91 static void toggle_done(int sig __maybe_unused, 92 siginfo_t *info __maybe_unused, 93 void *uc __maybe_unused) 94 { 95 /* inform all threads that we're done for the day */ 96 done = true; 97 gettimeofday(&end, NULL); 98 timersub(&end, &start, &runtime); 99 } 100 101 static void nest_epollfd(void) 102 { 103 unsigned int i; 104 struct epoll_event ev; 105 106 if (nested > EPOLL_MAXNESTS) 107 nested = EPOLL_MAXNESTS; 108 printinfo("Nesting level(s): %d\n", nested); 109 110 epollfdp = calloc(nested, sizeof(int)); 111 if (!epollfd) 112 err(EXIT_FAILURE, "calloc"); 113 114 for (i = 0; i < nested; i++) { 115 epollfdp[i] = epoll_create(1); 116 if (epollfd < 0) 117 err(EXIT_FAILURE, "epoll_create"); 118 } 119 120 ev.events = EPOLLHUP; /* anything */ 121 ev.data.u64 = i; /* any number */ 122 123 for (i = nested - 1; i; i--) { 124 if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD, 125 epollfdp[i], &ev) < 0) 126 err(EXIT_FAILURE, "epoll_ctl"); 127 } 128 129 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0) 130 err(EXIT_FAILURE, "epoll_ctl"); 131 } 132 133 static inline void do_epoll_op(struct worker *w, int op, int fd) 134 { 135 int error; 136 struct epoll_event ev; 137 138 ev.events = EPOLLIN; 139 ev.data.u64 = fd; 140 141 switch (op) { 142 case OP_EPOLL_ADD: 143 error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); 144 break; 145 case OP_EPOLL_MOD: 146 ev.events = EPOLLOUT; 147 error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev); 148 break; 149 case OP_EPOLL_DEL: 150 error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); 151 break; 152 default: 153 error = 1; 154 break; 155 } 156 157 if (!error) 158 w->ops[op]++; 159 } 160 161 static inline void do_random_epoll_op(struct worker *w) 162 { 163 unsigned long rnd1 = random(), rnd2 = random(); 164 int op, fd; 165 166 fd = w->fdmap[rnd1 % nfds]; 167 op = rnd2 % EPOLL_NR_OPS; 168 169 do_epoll_op(w, op, fd); 170 } 171 172 static void *workerfn(void *arg) 173 { 174 unsigned int i; 175 struct worker *w = (struct worker *) arg; 176 struct timespec ts = { .tv_sec = 0, 177 .tv_nsec = 250 }; 178 179 pthread_mutex_lock(&thread_lock); 180 threads_starting--; 181 if (!threads_starting) 182 pthread_cond_signal(&thread_parent); 183 pthread_cond_wait(&thread_worker, &thread_lock); 184 pthread_mutex_unlock(&thread_lock); 185 186 /* Let 'em loose */ 187 do { 188 /* random */ 189 if (randomize) { 190 do_random_epoll_op(w); 191 } else { 192 for (i = 0; i < nfds; i++) { 193 do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]); 194 do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]); 195 do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]); 196 } 197 } 198 199 nanosleep(&ts, NULL); 200 } while (!done); 201 202 return NULL; 203 } 204 205 static void init_fdmaps(struct worker *w, int pct) 206 { 207 unsigned int i; 208 int inc; 209 struct epoll_event ev; 210 211 if (!pct) 212 return; 213 214 inc = 100/pct; 215 for (i = 0; i < nfds; i+=inc) { 216 ev.data.fd = w->fdmap[i]; 217 ev.events = EPOLLIN; 218 219 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0) 220 err(EXIT_FAILURE, "epoll_ct"); 221 } 222 } 223 224 static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) 225 { 226 pthread_attr_t thread_attr, *attrp = NULL; 227 cpu_set_t cpuset; 228 unsigned int i, j; 229 int ret = 0; 230 231 if (!noaffinity) 232 pthread_attr_init(&thread_attr); 233 234 for (i = 0; i < nthreads; i++) { 235 struct worker *w = &worker[i]; 236 237 w->tid = i; 238 w->fdmap = calloc(nfds, sizeof(int)); 239 if (!w->fdmap) 240 return 1; 241 242 for (j = 0; j < nfds; j++) { 243 w->fdmap[j] = eventfd(0, EFD_NONBLOCK); 244 if (w->fdmap[j] < 0) 245 err(EXIT_FAILURE, "eventfd"); 246 } 247 248 /* 249 * Lets add 50% of the fdmap to the epoll instance, and 250 * do it before any threads are started; otherwise there is 251 * an initial bias of the call failing (mod and del ops). 252 */ 253 if (randomize) 254 init_fdmaps(w, 50); 255 256 if (!noaffinity) { 257 CPU_ZERO(&cpuset); 258 CPU_SET(cpu->map[i % cpu->nr], &cpuset); 259 260 ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); 261 if (ret) 262 err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 263 264 attrp = &thread_attr; 265 } 266 267 ret = pthread_create(&w->thread, attrp, workerfn, 268 (void *)(struct worker *) w); 269 if (ret) 270 err(EXIT_FAILURE, "pthread_create"); 271 } 272 273 if (!noaffinity) 274 pthread_attr_destroy(&thread_attr); 275 276 return ret; 277 } 278 279 static void print_summary(void) 280 { 281 int i; 282 unsigned long avg[EPOLL_NR_OPS]; 283 double stddev[EPOLL_NR_OPS]; 284 285 for (i = 0; i < EPOLL_NR_OPS; i++) { 286 avg[i] = avg_stats(&all_stats[i]); 287 stddev[i] = stddev_stats(&all_stats[i]); 288 } 289 290 printf("\nAveraged %ld ADD operations (+- %.2f%%)\n", 291 avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD], 292 avg[OP_EPOLL_ADD])); 293 printf("Averaged %ld MOD operations (+- %.2f%%)\n", 294 avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD], 295 avg[OP_EPOLL_MOD])); 296 printf("Averaged %ld DEL operations (+- %.2f%%)\n", 297 avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL], 298 avg[OP_EPOLL_DEL])); 299 } 300 301 int bench_epoll_ctl(int argc, const char **argv) 302 { 303 int j, ret = 0; 304 struct sigaction act; 305 struct worker *worker = NULL; 306 struct perf_cpu_map *cpu; 307 struct rlimit rl, prevrl; 308 unsigned int i; 309 310 argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0); 311 if (argc) { 312 usage_with_options(bench_epoll_ctl_usage, options); 313 exit(EXIT_FAILURE); 314 } 315 316 sigfillset(&act.sa_mask); 317 act.sa_sigaction = toggle_done; 318 sigaction(SIGINT, &act, NULL); 319 320 cpu = perf_cpu_map__new(NULL); 321 if (!cpu) 322 goto errmem; 323 324 /* a single, main epoll instance */ 325 epollfd = epoll_create(1); 326 if (epollfd < 0) 327 err(EXIT_FAILURE, "epoll_create"); 328 329 /* 330 * Deal with nested epolls, if any. 331 */ 332 if (nested) 333 nest_epollfd(); 334 335 /* default to the number of CPUs */ 336 if (!nthreads) 337 nthreads = cpu->nr; 338 339 worker = calloc(nthreads, sizeof(*worker)); 340 if (!worker) 341 goto errmem; 342 343 if (getrlimit(RLIMIT_NOFILE, &prevrl)) 344 err(EXIT_FAILURE, "getrlimit"); 345 rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50; 346 printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n", 347 (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max); 348 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 349 err(EXIT_FAILURE, "setrlimit"); 350 351 printf("Run summary [PID %d]: %d threads doing epoll_ctl ops " 352 "%d file-descriptors for %d secs.\n\n", 353 getpid(), nthreads, nfds, nsecs); 354 355 for (i = 0; i < EPOLL_NR_OPS; i++) 356 init_stats(&all_stats[i]); 357 358 pthread_mutex_init(&thread_lock, NULL); 359 pthread_cond_init(&thread_parent, NULL); 360 pthread_cond_init(&thread_worker, NULL); 361 362 threads_starting = nthreads; 363 364 gettimeofday(&start, NULL); 365 366 do_threads(worker, cpu); 367 368 pthread_mutex_lock(&thread_lock); 369 while (threads_starting) 370 pthread_cond_wait(&thread_parent, &thread_lock); 371 pthread_cond_broadcast(&thread_worker); 372 pthread_mutex_unlock(&thread_lock); 373 374 sleep(nsecs); 375 toggle_done(0, NULL, NULL); 376 printinfo("main thread: toggling done\n"); 377 378 for (i = 0; i < nthreads; i++) { 379 ret = pthread_join(worker[i].thread, NULL); 380 if (ret) 381 err(EXIT_FAILURE, "pthread_join"); 382 } 383 384 /* cleanup & report results */ 385 pthread_cond_destroy(&thread_parent); 386 pthread_cond_destroy(&thread_worker); 387 pthread_mutex_destroy(&thread_lock); 388 389 for (i = 0; i < nthreads; i++) { 390 unsigned long t[EPOLL_NR_OPS]; 391 392 for (j = 0; j < EPOLL_NR_OPS; j++) { 393 t[j] = worker[i].ops[j]; 394 update_stats(&all_stats[j], t[j]); 395 } 396 397 if (nfds == 1) 398 printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n", 399 worker[i].tid, &worker[i].fdmap[0], 400 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 401 else 402 printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n", 403 worker[i].tid, &worker[i].fdmap[0], 404 &worker[i].fdmap[nfds-1], 405 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 406 } 407 408 print_summary(); 409 410 close(epollfd); 411 return ret; 412 errmem: 413 err(EXIT_FAILURE, "calloc"); 414 } 415 #endif // HAVE_EVENTFD 416