1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Davidlohr Bueso. 4 * 5 * Benchmark the various operations allowed for epoll_ctl(2). 6 * The idea is to concurrently stress a single epoll instance 7 */ 8 #ifdef HAVE_EVENTFD 9 /* For the CLR_() macros */ 10 #include <string.h> 11 #include <pthread.h> 12 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <signal.h> 16 #include <stdlib.h> 17 #include <linux/compiler.h> 18 #include <linux/kernel.h> 19 #include <sys/time.h> 20 #include <sys/resource.h> 21 #include <sys/epoll.h> 22 #include <sys/eventfd.h> 23 24 #include "../util/stat.h" 25 #include <subcmd/parse-options.h> 26 #include "bench.h" 27 #include "cpumap.h" 28 29 #include <err.h> 30 31 #define printinfo(fmt, arg...) \ 32 do { if (__verbose) printf(fmt, ## arg); } while (0) 33 34 static unsigned int nthreads = 0; 35 static unsigned int nsecs = 8; 36 struct timeval start, end, runtime; 37 static bool done, __verbose, randomize; 38 39 /* 40 * epoll related shared variables. 41 */ 42 43 /* Maximum number of nesting allowed inside epoll sets */ 44 #define EPOLL_MAXNESTS 4 45 46 enum { 47 OP_EPOLL_ADD, 48 OP_EPOLL_MOD, 49 OP_EPOLL_DEL, 50 EPOLL_NR_OPS, 51 }; 52 53 static int epollfd; 54 static int *epollfdp; 55 static bool noaffinity; 56 static unsigned int nested = 0; 57 58 /* amount of fds to monitor, per thread */ 59 static unsigned int nfds = 64; 60 61 static pthread_mutex_t thread_lock; 62 static unsigned int threads_starting; 63 static struct stats all_stats[EPOLL_NR_OPS]; 64 static pthread_cond_t thread_parent, thread_worker; 65 66 struct worker { 67 int tid; 68 pthread_t thread; 69 unsigned long ops[EPOLL_NR_OPS]; 70 int *fdmap; 71 }; 72 73 static const struct option options[] = { 74 OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), 75 OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), 76 OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"), 77 OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"), 78 OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"), 79 OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"), 80 OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"), 81 OPT_END() 82 }; 83 84 static const char * const bench_epoll_ctl_usage[] = { 85 "perf bench epoll ctl <options>", 86 NULL 87 }; 88 89 static void toggle_done(int sig __maybe_unused, 90 siginfo_t *info __maybe_unused, 91 void *uc __maybe_unused) 92 { 93 /* inform all threads that we're done for the day */ 94 done = true; 95 gettimeofday(&end, NULL); 96 timersub(&end, &start, &runtime); 97 } 98 99 static void nest_epollfd(void) 100 { 101 unsigned int i; 102 struct epoll_event ev; 103 104 if (nested > EPOLL_MAXNESTS) 105 nested = EPOLL_MAXNESTS; 106 printinfo("Nesting level(s): %d\n", nested); 107 108 epollfdp = calloc(nested, sizeof(int)); 109 if (!epollfd) 110 err(EXIT_FAILURE, "calloc"); 111 112 for (i = 0; i < nested; i++) { 113 epollfdp[i] = epoll_create(1); 114 if (epollfd < 0) 115 err(EXIT_FAILURE, "epoll_create"); 116 } 117 118 ev.events = EPOLLHUP; /* anything */ 119 ev.data.u64 = i; /* any number */ 120 121 for (i = nested - 1; i; i--) { 122 if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD, 123 epollfdp[i], &ev) < 0) 124 err(EXIT_FAILURE, "epoll_ctl"); 125 } 126 127 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0) 128 err(EXIT_FAILURE, "epoll_ctl"); 129 } 130 131 static inline void do_epoll_op(struct worker *w, int op, int fd) 132 { 133 int error; 134 struct epoll_event ev; 135 136 ev.events = EPOLLIN; 137 ev.data.u64 = fd; 138 139 switch (op) { 140 case OP_EPOLL_ADD: 141 error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); 142 break; 143 case OP_EPOLL_MOD: 144 ev.events = EPOLLOUT; 145 error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev); 146 break; 147 case OP_EPOLL_DEL: 148 error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); 149 break; 150 default: 151 error = 1; 152 break; 153 } 154 155 if (!error) 156 w->ops[op]++; 157 } 158 159 static inline void do_random_epoll_op(struct worker *w) 160 { 161 unsigned long rnd1 = random(), rnd2 = random(); 162 int op, fd; 163 164 fd = w->fdmap[rnd1 % nfds]; 165 op = rnd2 % EPOLL_NR_OPS; 166 167 do_epoll_op(w, op, fd); 168 } 169 170 static void *workerfn(void *arg) 171 { 172 unsigned int i; 173 struct worker *w = (struct worker *) arg; 174 struct timespec ts = { .tv_sec = 0, 175 .tv_nsec = 250 }; 176 177 pthread_mutex_lock(&thread_lock); 178 threads_starting--; 179 if (!threads_starting) 180 pthread_cond_signal(&thread_parent); 181 pthread_cond_wait(&thread_worker, &thread_lock); 182 pthread_mutex_unlock(&thread_lock); 183 184 /* Let 'em loose */ 185 do { 186 /* random */ 187 if (randomize) { 188 do_random_epoll_op(w); 189 } else { 190 for (i = 0; i < nfds; i++) { 191 do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]); 192 do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]); 193 do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]); 194 } 195 } 196 197 nanosleep(&ts, NULL); 198 } while (!done); 199 200 return NULL; 201 } 202 203 static void init_fdmaps(struct worker *w, int pct) 204 { 205 unsigned int i; 206 int inc; 207 struct epoll_event ev; 208 209 if (!pct) 210 return; 211 212 inc = 100/pct; 213 for (i = 0; i < nfds; i+=inc) { 214 ev.data.fd = w->fdmap[i]; 215 ev.events = EPOLLIN; 216 217 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0) 218 err(EXIT_FAILURE, "epoll_ct"); 219 } 220 } 221 222 static int do_threads(struct worker *worker, struct cpu_map *cpu) 223 { 224 pthread_attr_t thread_attr, *attrp = NULL; 225 cpu_set_t cpuset; 226 unsigned int i, j; 227 int ret = 0; 228 229 if (!noaffinity) 230 pthread_attr_init(&thread_attr); 231 232 for (i = 0; i < nthreads; i++) { 233 struct worker *w = &worker[i]; 234 235 w->tid = i; 236 w->fdmap = calloc(nfds, sizeof(int)); 237 if (!w->fdmap) 238 return 1; 239 240 for (j = 0; j < nfds; j++) { 241 w->fdmap[j] = eventfd(0, EFD_NONBLOCK); 242 if (w->fdmap[j] < 0) 243 err(EXIT_FAILURE, "eventfd"); 244 } 245 246 /* 247 * Lets add 50% of the fdmap to the epoll instance, and 248 * do it before any threads are started; otherwise there is 249 * an initial bias of the call failing (mod and del ops). 250 */ 251 if (randomize) 252 init_fdmaps(w, 50); 253 254 if (!noaffinity) { 255 CPU_ZERO(&cpuset); 256 CPU_SET(cpu->map[i % cpu->nr], &cpuset); 257 258 ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); 259 if (ret) 260 err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); 261 262 attrp = &thread_attr; 263 } 264 265 ret = pthread_create(&w->thread, attrp, workerfn, 266 (void *)(struct worker *) w); 267 if (ret) 268 err(EXIT_FAILURE, "pthread_create"); 269 } 270 271 if (!noaffinity) 272 pthread_attr_destroy(&thread_attr); 273 274 return ret; 275 } 276 277 static void print_summary(void) 278 { 279 int i; 280 unsigned long avg[EPOLL_NR_OPS]; 281 double stddev[EPOLL_NR_OPS]; 282 283 for (i = 0; i < EPOLL_NR_OPS; i++) { 284 avg[i] = avg_stats(&all_stats[i]); 285 stddev[i] = stddev_stats(&all_stats[i]); 286 } 287 288 printf("\nAveraged %ld ADD operations (+- %.2f%%)\n", 289 avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD], 290 avg[OP_EPOLL_ADD])); 291 printf("Averaged %ld MOD operations (+- %.2f%%)\n", 292 avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD], 293 avg[OP_EPOLL_MOD])); 294 printf("Averaged %ld DEL operations (+- %.2f%%)\n", 295 avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL], 296 avg[OP_EPOLL_DEL])); 297 } 298 299 int bench_epoll_ctl(int argc, const char **argv) 300 { 301 int j, ret = 0; 302 struct sigaction act; 303 struct worker *worker = NULL; 304 struct cpu_map *cpu; 305 struct rlimit rl, prevrl; 306 unsigned int i; 307 308 argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0); 309 if (argc) { 310 usage_with_options(bench_epoll_ctl_usage, options); 311 exit(EXIT_FAILURE); 312 } 313 314 sigfillset(&act.sa_mask); 315 act.sa_sigaction = toggle_done; 316 sigaction(SIGINT, &act, NULL); 317 318 cpu = cpu_map__new(NULL); 319 if (!cpu) 320 goto errmem; 321 322 /* a single, main epoll instance */ 323 epollfd = epoll_create(1); 324 if (epollfd < 0) 325 err(EXIT_FAILURE, "epoll_create"); 326 327 /* 328 * Deal with nested epolls, if any. 329 */ 330 if (nested) 331 nest_epollfd(); 332 333 /* default to the number of CPUs */ 334 if (!nthreads) 335 nthreads = cpu->nr; 336 337 worker = calloc(nthreads, sizeof(*worker)); 338 if (!worker) 339 goto errmem; 340 341 if (getrlimit(RLIMIT_NOFILE, &prevrl)) 342 err(EXIT_FAILURE, "getrlimit"); 343 rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50; 344 printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n", 345 (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max); 346 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 347 err(EXIT_FAILURE, "setrlimit"); 348 349 printf("Run summary [PID %d]: %d threads doing epoll_ctl ops " 350 "%d file-descriptors for %d secs.\n\n", 351 getpid(), nthreads, nfds, nsecs); 352 353 for (i = 0; i < EPOLL_NR_OPS; i++) 354 init_stats(&all_stats[i]); 355 356 pthread_mutex_init(&thread_lock, NULL); 357 pthread_cond_init(&thread_parent, NULL); 358 pthread_cond_init(&thread_worker, NULL); 359 360 threads_starting = nthreads; 361 362 gettimeofday(&start, NULL); 363 364 do_threads(worker, cpu); 365 366 pthread_mutex_lock(&thread_lock); 367 while (threads_starting) 368 pthread_cond_wait(&thread_parent, &thread_lock); 369 pthread_cond_broadcast(&thread_worker); 370 pthread_mutex_unlock(&thread_lock); 371 372 sleep(nsecs); 373 toggle_done(0, NULL, NULL); 374 printinfo("main thread: toggling done\n"); 375 376 for (i = 0; i < nthreads; i++) { 377 ret = pthread_join(worker[i].thread, NULL); 378 if (ret) 379 err(EXIT_FAILURE, "pthread_join"); 380 } 381 382 /* cleanup & report results */ 383 pthread_cond_destroy(&thread_parent); 384 pthread_cond_destroy(&thread_worker); 385 pthread_mutex_destroy(&thread_lock); 386 387 for (i = 0; i < nthreads; i++) { 388 unsigned long t[EPOLL_NR_OPS]; 389 390 for (j = 0; j < EPOLL_NR_OPS; j++) { 391 t[j] = worker[i].ops[j]; 392 update_stats(&all_stats[j], t[j]); 393 } 394 395 if (nfds == 1) 396 printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n", 397 worker[i].tid, &worker[i].fdmap[0], 398 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 399 else 400 printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n", 401 worker[i].tid, &worker[i].fdmap[0], 402 &worker[i].fdmap[nfds-1], 403 t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); 404 } 405 406 print_summary(); 407 408 close(epollfd); 409 return ret; 410 errmem: 411 err(EXIT_FAILURE, "calloc"); 412 } 413 #endif // HAVE_EVENTFD 414