1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <subcmd/parse-options.h> 4 #include <linux/hw_breakpoint.h> 5 #include <linux/perf_event.h> 6 #include <linux/time64.h> 7 #include <sys/syscall.h> 8 #include <sys/ioctl.h> 9 #include <sys/time.h> 10 #include <pthread.h> 11 #include <stddef.h> 12 #include <stdlib.h> 13 #include <unistd.h> 14 #include <stdio.h> 15 #include <errno.h> 16 #include "bench.h" 17 #include "futex.h" 18 19 struct { 20 unsigned int nbreakpoints; 21 unsigned int nparallel; 22 unsigned int nthreads; 23 } thread_params = { 24 .nbreakpoints = 1, 25 .nparallel = 1, 26 .nthreads = 1, 27 }; 28 29 static const struct option thread_options[] = { 30 OPT_UINTEGER('b', "breakpoints", &thread_params.nbreakpoints, 31 "Specify amount of breakpoints"), 32 OPT_UINTEGER('p', "parallelism", &thread_params.nparallel, "Specify amount of parallelism"), 33 OPT_UINTEGER('t', "threads", &thread_params.nthreads, "Specify amount of threads"), 34 OPT_END() 35 }; 36 37 static const char * const thread_usage[] = { 38 "perf bench breakpoint thread <options>", 39 NULL 40 }; 41 42 struct breakpoint { 43 int fd; 44 char watched; 45 }; 46 47 static int breakpoint_setup(void *addr) 48 { 49 struct perf_event_attr attr = { .size = 0, }; 50 int fd; 51 52 attr.type = PERF_TYPE_BREAKPOINT; 53 attr.size = sizeof(attr); 54 attr.inherit = 1; 55 attr.exclude_kernel = 1; 56 attr.exclude_hv = 1; 57 attr.bp_addr = (unsigned long)addr; 58 attr.bp_type = HW_BREAKPOINT_RW; 59 attr.bp_len = HW_BREAKPOINT_LEN_1; 60 fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); 61 62 if (fd < 0) 63 fd = -errno; 64 65 return fd; 66 } 67 68 static void *passive_thread(void *arg) 69 { 70 unsigned int *done = (unsigned int *)arg; 71 72 while (!__atomic_load_n(done, __ATOMIC_RELAXED)) 73 futex_wait(done, 0, NULL, 0); 74 return NULL; 75 } 76 77 static void *active_thread(void *arg) 78 { 79 unsigned int *done = (unsigned int *)arg; 80 81 while (!__atomic_load_n(done, __ATOMIC_RELAXED)); 82 return NULL; 83 } 84 85 static void *breakpoint_thread(void *arg) 86 { 87 unsigned int i, done; 88 int *repeat = (int *)arg; 89 pthread_t *threads; 90 91 threads = calloc(thread_params.nthreads, sizeof(threads[0])); 92 if (!threads) 93 exit((perror("calloc"), EXIT_FAILURE)); 94 95 while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) { 96 done = 0; 97 for (i = 0; i < thread_params.nthreads; i++) { 98 if (pthread_create(&threads[i], NULL, passive_thread, &done)) 99 exit((perror("pthread_create"), EXIT_FAILURE)); 100 } 101 __atomic_store_n(&done, 1, __ATOMIC_RELAXED); 102 futex_wake(&done, thread_params.nthreads, 0); 103 for (i = 0; i < thread_params.nthreads; i++) 104 pthread_join(threads[i], NULL); 105 } 106 free(threads); 107 return NULL; 108 } 109 110 // The benchmark creates nbreakpoints inheritable breakpoints, 111 // then starts nparallel threads which create and join bench_repeat batches of nthreads threads. 112 int bench_breakpoint_thread(int argc, const char **argv) 113 { 114 unsigned int i, result_usec; 115 int repeat = bench_repeat; 116 struct breakpoint *breakpoints; 117 pthread_t *parallel; 118 struct timeval start, stop, diff; 119 120 if (parse_options(argc, argv, thread_options, thread_usage, 0)) { 121 usage_with_options(thread_usage, thread_options); 122 exit(EXIT_FAILURE); 123 } 124 breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0])); 125 parallel = calloc(thread_params.nparallel, sizeof(parallel[0])); 126 if (!breakpoints || !parallel) 127 exit((perror("calloc"), EXIT_FAILURE)); 128 129 for (i = 0; i < thread_params.nbreakpoints; i++) { 130 breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched); 131 132 if (breakpoints[i].fd < 0) { 133 if (breakpoints[i].fd == -ENODEV) { 134 printf("Skipping perf bench breakpoint thread: No hardware support\n"); 135 return 0; 136 } 137 exit((perror("perf_event_open"), EXIT_FAILURE)); 138 } 139 } 140 gettimeofday(&start, NULL); 141 for (i = 0; i < thread_params.nparallel; i++) { 142 if (pthread_create(¶llel[i], NULL, breakpoint_thread, &repeat)) 143 exit((perror("pthread_create"), EXIT_FAILURE)); 144 } 145 for (i = 0; i < thread_params.nparallel; i++) 146 pthread_join(parallel[i], NULL); 147 gettimeofday(&stop, NULL); 148 timersub(&stop, &start, &diff); 149 for (i = 0; i < thread_params.nbreakpoints; i++) 150 close(breakpoints[i].fd); 151 free(parallel); 152 free(breakpoints); 153 switch (bench_format) { 154 case BENCH_FORMAT_DEFAULT: 155 printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n", 156 bench_repeat, thread_params.nbreakpoints, thread_params.nparallel); 157 printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", 158 (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 159 result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 160 printf(" %14lf usecs/op\n", 161 (double)result_usec / bench_repeat / thread_params.nthreads); 162 printf(" %14lf usecs/op/cpu\n", 163 (double)result_usec / bench_repeat / 164 thread_params.nthreads * thread_params.nparallel); 165 break; 166 case BENCH_FORMAT_SIMPLE: 167 printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 168 break; 169 default: 170 fprintf(stderr, "Unknown format: %d\n", bench_format); 171 exit(EXIT_FAILURE); 172 } 173 return 0; 174 } 175 176 struct { 177 unsigned int npassive; 178 unsigned int nactive; 179 } enable_params = { 180 .nactive = 0, 181 .npassive = 0, 182 }; 183 184 static const struct option enable_options[] = { 185 OPT_UINTEGER('p', "passive", &enable_params.npassive, "Specify amount of passive threads"), 186 OPT_UINTEGER('a', "active", &enable_params.nactive, "Specify amount of active threads"), 187 OPT_END() 188 }; 189 190 static const char * const enable_usage[] = { 191 "perf bench breakpoint enable <options>", 192 NULL 193 }; 194 195 // The benchmark creates an inheritable breakpoint, 196 // then starts npassive threads that block and nactive threads that actively spin 197 // and then disables and enables the breakpoint bench_repeat times. 198 int bench_breakpoint_enable(int argc, const char **argv) 199 { 200 unsigned int i, nthreads, result_usec, done = 0; 201 char watched; 202 int fd; 203 pthread_t *threads; 204 struct timeval start, stop, diff; 205 206 if (parse_options(argc, argv, enable_options, enable_usage, 0)) { 207 usage_with_options(enable_usage, enable_options); 208 exit(EXIT_FAILURE); 209 } 210 fd = breakpoint_setup(&watched); 211 212 if (fd < 0) { 213 if (fd == -ENODEV) { 214 printf("Skipping perf bench breakpoint enable: No hardware support\n"); 215 return 0; 216 } 217 exit((perror("perf_event_open"), EXIT_FAILURE)); 218 } 219 nthreads = enable_params.npassive + enable_params.nactive; 220 threads = calloc(nthreads, sizeof(threads[0])); 221 if (!threads) 222 exit((perror("calloc"), EXIT_FAILURE)); 223 224 for (i = 0; i < nthreads; i++) { 225 if (pthread_create(&threads[i], NULL, 226 i < enable_params.npassive ? passive_thread : active_thread, &done)) 227 exit((perror("pthread_create"), EXIT_FAILURE)); 228 } 229 usleep(10000); // let the threads block 230 gettimeofday(&start, NULL); 231 for (i = 0; i < bench_repeat; i++) { 232 if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0)) 233 exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)"), EXIT_FAILURE)); 234 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) 235 exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)"), EXIT_FAILURE)); 236 } 237 gettimeofday(&stop, NULL); 238 timersub(&stop, &start, &diff); 239 __atomic_store_n(&done, 1, __ATOMIC_RELAXED); 240 futex_wake(&done, enable_params.npassive, 0); 241 for (i = 0; i < nthreads; i++) 242 pthread_join(threads[i], NULL); 243 free(threads); 244 close(fd); 245 switch (bench_format) { 246 case BENCH_FORMAT_DEFAULT: 247 printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n", 248 bench_repeat, enable_params.npassive, enable_params.nactive); 249 printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", 250 (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 251 result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 252 printf(" %14lf usecs/op\n", (double)result_usec / bench_repeat); 253 break; 254 case BENCH_FORMAT_SIMPLE: 255 printf("%lu.%03lu\n", (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); 256 break; 257 default: 258 fprintf(stderr, "Unknown format: %d\n", bench_format); 259 exit(EXIT_FAILURE); 260 } 261 return 0; 262 } 263