1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 #include <stdbool.h> 20 21 static inline pid_t rseq_gettid(void) 22 { 23 return syscall(__NR_gettid); 24 } 25 26 #define NR_INJECT 9 27 static int loop_cnt[NR_INJECT + 1]; 28 29 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 30 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 31 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 32 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 33 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 34 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 35 36 static int opt_modulo, verbose; 37 38 static int opt_yield, opt_signal, opt_sleep, 39 opt_disable_rseq, opt_threads = 200, 40 opt_disable_mod = 0, opt_test = 's'; 41 42 static long long opt_reps = 5000; 43 44 static __thread __attribute__((tls_model("initial-exec"))) 45 unsigned int signals_delivered; 46 47 #ifndef BENCHMARK 48 49 static __thread __attribute__((tls_model("initial-exec"), unused)) 50 unsigned int yield_mod_cnt, nr_abort; 51 52 #define printf_verbose(fmt, ...) \ 53 do { \ 54 if (verbose) \ 55 printf(fmt, ## __VA_ARGS__); \ 56 } while (0) 57 58 #ifdef __i386__ 59 60 #define INJECT_ASM_REG "eax" 61 62 #define RSEQ_INJECT_CLOBBER \ 63 , INJECT_ASM_REG 64 65 #define RSEQ_INJECT_ASM(n) \ 66 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 67 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 68 "jz 333f\n\t" \ 69 "222:\n\t" \ 70 "dec %%" INJECT_ASM_REG "\n\t" \ 71 "jnz 222b\n\t" \ 72 "333:\n\t" 73 74 #elif defined(__x86_64__) 75 76 #define INJECT_ASM_REG_P "rax" 77 #define INJECT_ASM_REG "eax" 78 79 #define RSEQ_INJECT_CLOBBER \ 80 , INJECT_ASM_REG_P \ 81 , INJECT_ASM_REG 82 83 #define RSEQ_INJECT_ASM(n) \ 84 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 85 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 86 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 87 "jz 333f\n\t" \ 88 "222:\n\t" \ 89 "dec %%" INJECT_ASM_REG "\n\t" \ 90 "jnz 222b\n\t" \ 91 "333:\n\t" 92 93 #elif defined(__s390__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r12" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 111 "je 333f\n\t" \ 112 "222:\n\t" \ 113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 114 "jnz 222b\n\t" \ 115 "333:\n\t" 116 117 #elif defined(__ARMEL__) 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r4" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmp " INJECT_ASM_REG ", #0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subs " INJECT_ASM_REG ", #1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 141 #elif defined(__AARCH64EL__) 142 143 #define RSEQ_INJECT_INPUT \ 144 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 145 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 146 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 147 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 148 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 149 , [loop_cnt_6] "Qo" (loop_cnt[6]) 150 151 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 152 153 #define RSEQ_INJECT_ASM(n) \ 154 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 155 " cbz " INJECT_ASM_REG ", 333f\n" \ 156 "222:\n" \ 157 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 158 " cbnz " INJECT_ASM_REG ", 222b\n" \ 159 "333:\n" 160 161 #elif defined(__PPC__) 162 163 #define RSEQ_INJECT_INPUT \ 164 , [loop_cnt_1]"m"(loop_cnt[1]) \ 165 , [loop_cnt_2]"m"(loop_cnt[2]) \ 166 , [loop_cnt_3]"m"(loop_cnt[3]) \ 167 , [loop_cnt_4]"m"(loop_cnt[4]) \ 168 , [loop_cnt_5]"m"(loop_cnt[5]) \ 169 , [loop_cnt_6]"m"(loop_cnt[6]) 170 171 #define INJECT_ASM_REG "r18" 172 173 #define RSEQ_INJECT_CLOBBER \ 174 , INJECT_ASM_REG 175 176 #define RSEQ_INJECT_ASM(n) \ 177 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 178 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 179 "beq 333f\n\t" \ 180 "222:\n\t" \ 181 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 182 "bne 222b\n\t" \ 183 "333:\n\t" 184 185 #elif defined(__mips__) 186 187 #define RSEQ_INJECT_INPUT \ 188 , [loop_cnt_1]"m"(loop_cnt[1]) \ 189 , [loop_cnt_2]"m"(loop_cnt[2]) \ 190 , [loop_cnt_3]"m"(loop_cnt[3]) \ 191 , [loop_cnt_4]"m"(loop_cnt[4]) \ 192 , [loop_cnt_5]"m"(loop_cnt[5]) \ 193 , [loop_cnt_6]"m"(loop_cnt[6]) 194 195 #define INJECT_ASM_REG "$5" 196 197 #define RSEQ_INJECT_CLOBBER \ 198 , INJECT_ASM_REG 199 200 #define RSEQ_INJECT_ASM(n) \ 201 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 202 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 203 "222:\n\t" \ 204 "addiu " INJECT_ASM_REG ", -1\n\t" \ 205 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 206 "333:\n\t" 207 #elif defined(__riscv) 208 209 #define RSEQ_INJECT_INPUT \ 210 , [loop_cnt_1]"m"(loop_cnt[1]) \ 211 , [loop_cnt_2]"m"(loop_cnt[2]) \ 212 , [loop_cnt_3]"m"(loop_cnt[3]) \ 213 , [loop_cnt_4]"m"(loop_cnt[4]) \ 214 , [loop_cnt_5]"m"(loop_cnt[5]) \ 215 , [loop_cnt_6]"m"(loop_cnt[6]) 216 217 #define INJECT_ASM_REG "t1" 218 219 #define RSEQ_INJECT_CLOBBER \ 220 , INJECT_ASM_REG 221 222 #define RSEQ_INJECT_ASM(n) \ 223 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 224 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 225 "222:\n\t" \ 226 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 227 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 228 "333:\n\t" 229 230 231 #else 232 #error unsupported target 233 #endif 234 235 #define RSEQ_INJECT_FAILED \ 236 nr_abort++; 237 238 #define RSEQ_INJECT_C(n) \ 239 { \ 240 int loc_i, loc_nr_loops = loop_cnt[n]; \ 241 \ 242 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 243 rseq_barrier(); \ 244 } \ 245 if (loc_nr_loops == -1 && opt_modulo) { \ 246 if (yield_mod_cnt == opt_modulo - 1) { \ 247 if (opt_sleep > 0) \ 248 poll(NULL, 0, opt_sleep); \ 249 if (opt_yield) \ 250 sched_yield(); \ 251 if (opt_signal) \ 252 raise(SIGUSR1); \ 253 yield_mod_cnt = 0; \ 254 } else { \ 255 yield_mod_cnt++; \ 256 } \ 257 } \ 258 } 259 260 #else 261 262 #define printf_verbose(fmt, ...) 263 264 #endif /* BENCHMARK */ 265 266 #include "rseq.h" 267 268 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; 269 270 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 271 #define TEST_MEMBARRIER 272 273 static int sys_membarrier(int cmd, int flags, int cpu_id) 274 { 275 return syscall(__NR_membarrier, cmd, flags, cpu_id); 276 } 277 #endif 278 279 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID 280 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID 281 static 282 int get_current_cpu_id(void) 283 { 284 return rseq_current_mm_cid(); 285 } 286 static 287 bool rseq_validate_cpu_id(void) 288 { 289 return rseq_mm_cid_available(); 290 } 291 static 292 bool rseq_use_cpu_index(void) 293 { 294 return false; /* Use mm_cid */ 295 } 296 # ifdef TEST_MEMBARRIER 297 /* 298 * Membarrier does not currently support targeting a mm_cid, so 299 * issue the barrier on all cpus. 300 */ 301 static 302 int rseq_membarrier_expedited(int cpu) 303 { 304 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 305 0, 0); 306 } 307 # endif /* TEST_MEMBARRIER */ 308 #else 309 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID 310 static 311 int get_current_cpu_id(void) 312 { 313 return rseq_cpu_start(); 314 } 315 static 316 bool rseq_validate_cpu_id(void) 317 { 318 return rseq_current_cpu_raw() >= 0; 319 } 320 static 321 bool rseq_use_cpu_index(void) 322 { 323 return true; /* Use cpu_id as index. */ 324 } 325 # ifdef TEST_MEMBARRIER 326 static 327 int rseq_membarrier_expedited(int cpu) 328 { 329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 330 MEMBARRIER_CMD_FLAG_CPU, cpu); 331 } 332 # endif /* TEST_MEMBARRIER */ 333 #endif 334 335 struct percpu_lock_entry { 336 intptr_t v; 337 } __attribute__((aligned(128))); 338 339 struct percpu_lock { 340 struct percpu_lock_entry c[CPU_SETSIZE]; 341 }; 342 343 struct test_data_entry { 344 intptr_t count; 345 } __attribute__((aligned(128))); 346 347 struct spinlock_test_data { 348 struct percpu_lock lock; 349 struct test_data_entry c[CPU_SETSIZE]; 350 }; 351 352 struct spinlock_thread_test_data { 353 struct spinlock_test_data *data; 354 long long reps; 355 int reg; 356 }; 357 358 struct inc_test_data { 359 struct test_data_entry c[CPU_SETSIZE]; 360 }; 361 362 struct inc_thread_test_data { 363 struct inc_test_data *data; 364 long long reps; 365 int reg; 366 }; 367 368 struct percpu_list_node { 369 intptr_t data; 370 struct percpu_list_node *next; 371 }; 372 373 struct percpu_list_entry { 374 struct percpu_list_node *head; 375 } __attribute__((aligned(128))); 376 377 struct percpu_list { 378 struct percpu_list_entry c[CPU_SETSIZE]; 379 }; 380 381 #define BUFFER_ITEM_PER_CPU 100 382 383 struct percpu_buffer_node { 384 intptr_t data; 385 }; 386 387 struct percpu_buffer_entry { 388 intptr_t offset; 389 intptr_t buflen; 390 struct percpu_buffer_node **array; 391 } __attribute__((aligned(128))); 392 393 struct percpu_buffer { 394 struct percpu_buffer_entry c[CPU_SETSIZE]; 395 }; 396 397 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 398 399 struct percpu_memcpy_buffer_node { 400 intptr_t data1; 401 uint64_t data2; 402 }; 403 404 struct percpu_memcpy_buffer_entry { 405 intptr_t offset; 406 intptr_t buflen; 407 struct percpu_memcpy_buffer_node *array; 408 } __attribute__((aligned(128))); 409 410 struct percpu_memcpy_buffer { 411 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 412 }; 413 414 /* A simple percpu spinlock. Grabs lock on current cpu. */ 415 static int rseq_this_cpu_lock(struct percpu_lock *lock) 416 { 417 int cpu; 418 419 for (;;) { 420 int ret; 421 422 cpu = get_current_cpu_id(); 423 if (cpu < 0) { 424 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n", 425 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); 426 abort(); 427 } 428 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 429 &lock->c[cpu].v, 430 0, 1, cpu); 431 if (rseq_likely(!ret)) 432 break; 433 /* Retry if comparison fails or rseq aborts. */ 434 } 435 /* 436 * Acquire semantic when taking lock after control dependency. 437 * Matches rseq_smp_store_release(). 438 */ 439 rseq_smp_acquire__after_ctrl_dep(); 440 return cpu; 441 } 442 443 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 444 { 445 assert(lock->c[cpu].v == 1); 446 /* 447 * Release lock, with release semantic. Matches 448 * rseq_smp_acquire__after_ctrl_dep(). 449 */ 450 rseq_smp_store_release(&lock->c[cpu].v, 0); 451 } 452 453 void *test_percpu_spinlock_thread(void *arg) 454 { 455 struct spinlock_thread_test_data *thread_data = arg; 456 struct spinlock_test_data *data = thread_data->data; 457 long long i, reps; 458 459 if (!opt_disable_rseq && thread_data->reg && 460 rseq_register_current_thread()) 461 abort(); 462 reps = thread_data->reps; 463 for (i = 0; i < reps; i++) { 464 int cpu = rseq_this_cpu_lock(&data->lock); 465 data->c[cpu].count++; 466 rseq_percpu_unlock(&data->lock, cpu); 467 #ifndef BENCHMARK 468 if (i != 0 && !(i % (reps / 10))) 469 printf_verbose("tid %d: count %lld\n", 470 (int) rseq_gettid(), i); 471 #endif 472 } 473 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 474 (int) rseq_gettid(), nr_abort, signals_delivered); 475 if (!opt_disable_rseq && thread_data->reg && 476 rseq_unregister_current_thread()) 477 abort(); 478 return NULL; 479 } 480 481 /* 482 * A simple test which implements a sharded counter using a per-cpu 483 * lock. Obviously real applications might prefer to simply use a 484 * per-cpu increment; however, this is reasonable for a test and the 485 * lock can be extended to synchronize more complicated operations. 486 */ 487 void test_percpu_spinlock(void) 488 { 489 const int num_threads = opt_threads; 490 int i, ret; 491 uint64_t sum; 492 pthread_t test_threads[num_threads]; 493 struct spinlock_test_data data; 494 struct spinlock_thread_test_data thread_data[num_threads]; 495 496 memset(&data, 0, sizeof(data)); 497 for (i = 0; i < num_threads; i++) { 498 thread_data[i].reps = opt_reps; 499 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 500 thread_data[i].reg = 1; 501 else 502 thread_data[i].reg = 0; 503 thread_data[i].data = &data; 504 ret = pthread_create(&test_threads[i], NULL, 505 test_percpu_spinlock_thread, 506 &thread_data[i]); 507 if (ret) { 508 errno = ret; 509 perror("pthread_create"); 510 abort(); 511 } 512 } 513 514 for (i = 0; i < num_threads; i++) { 515 ret = pthread_join(test_threads[i], NULL); 516 if (ret) { 517 errno = ret; 518 perror("pthread_join"); 519 abort(); 520 } 521 } 522 523 sum = 0; 524 for (i = 0; i < CPU_SETSIZE; i++) 525 sum += data.c[i].count; 526 527 assert(sum == (uint64_t)opt_reps * num_threads); 528 } 529 530 void *test_percpu_inc_thread(void *arg) 531 { 532 struct inc_thread_test_data *thread_data = arg; 533 struct inc_test_data *data = thread_data->data; 534 long long i, reps; 535 536 if (!opt_disable_rseq && thread_data->reg && 537 rseq_register_current_thread()) 538 abort(); 539 reps = thread_data->reps; 540 for (i = 0; i < reps; i++) { 541 int ret; 542 543 do { 544 int cpu; 545 546 cpu = get_current_cpu_id(); 547 ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 548 &data->c[cpu].count, 1, cpu); 549 } while (rseq_unlikely(ret)); 550 #ifndef BENCHMARK 551 if (i != 0 && !(i % (reps / 10))) 552 printf_verbose("tid %d: count %lld\n", 553 (int) rseq_gettid(), i); 554 #endif 555 } 556 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 557 (int) rseq_gettid(), nr_abort, signals_delivered); 558 if (!opt_disable_rseq && thread_data->reg && 559 rseq_unregister_current_thread()) 560 abort(); 561 return NULL; 562 } 563 564 void test_percpu_inc(void) 565 { 566 const int num_threads = opt_threads; 567 int i, ret; 568 uint64_t sum; 569 pthread_t test_threads[num_threads]; 570 struct inc_test_data data; 571 struct inc_thread_test_data thread_data[num_threads]; 572 573 memset(&data, 0, sizeof(data)); 574 for (i = 0; i < num_threads; i++) { 575 thread_data[i].reps = opt_reps; 576 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 577 thread_data[i].reg = 1; 578 else 579 thread_data[i].reg = 0; 580 thread_data[i].data = &data; 581 ret = pthread_create(&test_threads[i], NULL, 582 test_percpu_inc_thread, 583 &thread_data[i]); 584 if (ret) { 585 errno = ret; 586 perror("pthread_create"); 587 abort(); 588 } 589 } 590 591 for (i = 0; i < num_threads; i++) { 592 ret = pthread_join(test_threads[i], NULL); 593 if (ret) { 594 errno = ret; 595 perror("pthread_join"); 596 abort(); 597 } 598 } 599 600 sum = 0; 601 for (i = 0; i < CPU_SETSIZE; i++) 602 sum += data.c[i].count; 603 604 assert(sum == (uint64_t)opt_reps * num_threads); 605 } 606 607 void this_cpu_list_push(struct percpu_list *list, 608 struct percpu_list_node *node, 609 int *_cpu) 610 { 611 int cpu; 612 613 for (;;) { 614 intptr_t *targetptr, newval, expect; 615 int ret; 616 617 cpu = get_current_cpu_id(); 618 /* Load list->c[cpu].head with single-copy atomicity. */ 619 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 620 newval = (intptr_t)node; 621 targetptr = (intptr_t *)&list->c[cpu].head; 622 node->next = (struct percpu_list_node *)expect; 623 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 624 targetptr, expect, newval, cpu); 625 if (rseq_likely(!ret)) 626 break; 627 /* Retry if comparison fails or rseq aborts. */ 628 } 629 if (_cpu) 630 *_cpu = cpu; 631 } 632 633 /* 634 * Unlike a traditional lock-less linked list; the availability of a 635 * rseq primitive allows us to implement pop without concerns over 636 * ABA-type races. 637 */ 638 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 639 int *_cpu) 640 { 641 struct percpu_list_node *node = NULL; 642 int cpu; 643 644 for (;;) { 645 struct percpu_list_node *head; 646 intptr_t *targetptr, expectnot, *load; 647 long offset; 648 int ret; 649 650 cpu = get_current_cpu_id(); 651 targetptr = (intptr_t *)&list->c[cpu].head; 652 expectnot = (intptr_t)NULL; 653 offset = offsetof(struct percpu_list_node, next); 654 load = (intptr_t *)&head; 655 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, 656 targetptr, expectnot, 657 offset, load, cpu); 658 if (rseq_likely(!ret)) { 659 node = head; 660 break; 661 } 662 if (ret > 0) 663 break; 664 /* Retry if rseq aborts. */ 665 } 666 if (_cpu) 667 *_cpu = cpu; 668 return node; 669 } 670 671 /* 672 * __percpu_list_pop is not safe against concurrent accesses. Should 673 * only be used on lists that are not concurrently modified. 674 */ 675 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 676 { 677 struct percpu_list_node *node; 678 679 node = list->c[cpu].head; 680 if (!node) 681 return NULL; 682 list->c[cpu].head = node->next; 683 return node; 684 } 685 686 void *test_percpu_list_thread(void *arg) 687 { 688 long long i, reps; 689 struct percpu_list *list = (struct percpu_list *)arg; 690 691 if (!opt_disable_rseq && rseq_register_current_thread()) 692 abort(); 693 694 reps = opt_reps; 695 for (i = 0; i < reps; i++) { 696 struct percpu_list_node *node; 697 698 node = this_cpu_list_pop(list, NULL); 699 if (opt_yield) 700 sched_yield(); /* encourage shuffling */ 701 if (node) 702 this_cpu_list_push(list, node, NULL); 703 } 704 705 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 706 (int) rseq_gettid(), nr_abort, signals_delivered); 707 if (!opt_disable_rseq && rseq_unregister_current_thread()) 708 abort(); 709 710 return NULL; 711 } 712 713 /* Simultaneous modification to a per-cpu linked list from many threads. */ 714 void test_percpu_list(void) 715 { 716 const int num_threads = opt_threads; 717 int i, j, ret; 718 uint64_t sum = 0, expected_sum = 0; 719 struct percpu_list list; 720 pthread_t test_threads[num_threads]; 721 cpu_set_t allowed_cpus; 722 723 memset(&list, 0, sizeof(list)); 724 725 /* Generate list entries for every usable cpu. */ 726 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 727 for (i = 0; i < CPU_SETSIZE; i++) { 728 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 729 continue; 730 for (j = 1; j <= 100; j++) { 731 struct percpu_list_node *node; 732 733 expected_sum += j; 734 735 node = malloc(sizeof(*node)); 736 assert(node); 737 node->data = j; 738 node->next = list.c[i].head; 739 list.c[i].head = node; 740 } 741 } 742 743 for (i = 0; i < num_threads; i++) { 744 ret = pthread_create(&test_threads[i], NULL, 745 test_percpu_list_thread, &list); 746 if (ret) { 747 errno = ret; 748 perror("pthread_create"); 749 abort(); 750 } 751 } 752 753 for (i = 0; i < num_threads; i++) { 754 ret = pthread_join(test_threads[i], NULL); 755 if (ret) { 756 errno = ret; 757 perror("pthread_join"); 758 abort(); 759 } 760 } 761 762 for (i = 0; i < CPU_SETSIZE; i++) { 763 struct percpu_list_node *node; 764 765 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 766 continue; 767 768 while ((node = __percpu_list_pop(&list, i))) { 769 sum += node->data; 770 free(node); 771 } 772 } 773 774 /* 775 * All entries should now be accounted for (unless some external 776 * actor is interfering with our allowed affinity while this 777 * test is running). 778 */ 779 assert(sum == expected_sum); 780 } 781 782 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 783 struct percpu_buffer_node *node, 784 int *_cpu) 785 { 786 bool result = false; 787 int cpu; 788 789 for (;;) { 790 intptr_t *targetptr_spec, newval_spec; 791 intptr_t *targetptr_final, newval_final; 792 intptr_t offset; 793 int ret; 794 795 cpu = get_current_cpu_id(); 796 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 797 if (offset == buffer->c[cpu].buflen) 798 break; 799 newval_spec = (intptr_t)node; 800 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 801 newval_final = offset + 1; 802 targetptr_final = &buffer->c[cpu].offset; 803 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU, 804 targetptr_final, offset, targetptr_spec, 805 newval_spec, newval_final, cpu); 806 if (rseq_likely(!ret)) { 807 result = true; 808 break; 809 } 810 /* Retry if comparison fails or rseq aborts. */ 811 } 812 if (_cpu) 813 *_cpu = cpu; 814 return result; 815 } 816 817 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 818 int *_cpu) 819 { 820 struct percpu_buffer_node *head; 821 int cpu; 822 823 for (;;) { 824 intptr_t *targetptr, newval; 825 intptr_t offset; 826 int ret; 827 828 cpu = get_current_cpu_id(); 829 /* Load offset with single-copy atomicity. */ 830 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 831 if (offset == 0) { 832 head = NULL; 833 break; 834 } 835 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 836 newval = offset - 1; 837 targetptr = (intptr_t *)&buffer->c[cpu].offset; 838 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 839 targetptr, offset, 840 (intptr_t *)&buffer->c[cpu].array[offset - 1], 841 (intptr_t)head, newval, cpu); 842 if (rseq_likely(!ret)) 843 break; 844 /* Retry if comparison fails or rseq aborts. */ 845 } 846 if (_cpu) 847 *_cpu = cpu; 848 return head; 849 } 850 851 /* 852 * __percpu_buffer_pop is not safe against concurrent accesses. Should 853 * only be used on buffers that are not concurrently modified. 854 */ 855 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 856 int cpu) 857 { 858 struct percpu_buffer_node *head; 859 intptr_t offset; 860 861 offset = buffer->c[cpu].offset; 862 if (offset == 0) 863 return NULL; 864 head = buffer->c[cpu].array[offset - 1]; 865 buffer->c[cpu].offset = offset - 1; 866 return head; 867 } 868 869 void *test_percpu_buffer_thread(void *arg) 870 { 871 long long i, reps; 872 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 873 874 if (!opt_disable_rseq && rseq_register_current_thread()) 875 abort(); 876 877 reps = opt_reps; 878 for (i = 0; i < reps; i++) { 879 struct percpu_buffer_node *node; 880 881 node = this_cpu_buffer_pop(buffer, NULL); 882 if (opt_yield) 883 sched_yield(); /* encourage shuffling */ 884 if (node) { 885 if (!this_cpu_buffer_push(buffer, node, NULL)) { 886 /* Should increase buffer size. */ 887 abort(); 888 } 889 } 890 } 891 892 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 893 (int) rseq_gettid(), nr_abort, signals_delivered); 894 if (!opt_disable_rseq && rseq_unregister_current_thread()) 895 abort(); 896 897 return NULL; 898 } 899 900 /* Simultaneous modification to a per-cpu buffer from many threads. */ 901 void test_percpu_buffer(void) 902 { 903 const int num_threads = opt_threads; 904 int i, j, ret; 905 uint64_t sum = 0, expected_sum = 0; 906 struct percpu_buffer buffer; 907 pthread_t test_threads[num_threads]; 908 cpu_set_t allowed_cpus; 909 910 memset(&buffer, 0, sizeof(buffer)); 911 912 /* Generate list entries for every usable cpu. */ 913 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 914 for (i = 0; i < CPU_SETSIZE; i++) { 915 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 916 continue; 917 /* Worse-case is every item in same CPU. */ 918 buffer.c[i].array = 919 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 920 BUFFER_ITEM_PER_CPU); 921 assert(buffer.c[i].array); 922 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 923 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 924 struct percpu_buffer_node *node; 925 926 expected_sum += j; 927 928 /* 929 * We could theoretically put the word-sized 930 * "data" directly in the buffer. However, we 931 * want to model objects that would not fit 932 * within a single word, so allocate an object 933 * for each node. 934 */ 935 node = malloc(sizeof(*node)); 936 assert(node); 937 node->data = j; 938 buffer.c[i].array[j - 1] = node; 939 buffer.c[i].offset++; 940 } 941 } 942 943 for (i = 0; i < num_threads; i++) { 944 ret = pthread_create(&test_threads[i], NULL, 945 test_percpu_buffer_thread, &buffer); 946 if (ret) { 947 errno = ret; 948 perror("pthread_create"); 949 abort(); 950 } 951 } 952 953 for (i = 0; i < num_threads; i++) { 954 ret = pthread_join(test_threads[i], NULL); 955 if (ret) { 956 errno = ret; 957 perror("pthread_join"); 958 abort(); 959 } 960 } 961 962 for (i = 0; i < CPU_SETSIZE; i++) { 963 struct percpu_buffer_node *node; 964 965 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 966 continue; 967 968 while ((node = __percpu_buffer_pop(&buffer, i))) { 969 sum += node->data; 970 free(node); 971 } 972 free(buffer.c[i].array); 973 } 974 975 /* 976 * All entries should now be accounted for (unless some external 977 * actor is interfering with our allowed affinity while this 978 * test is running). 979 */ 980 assert(sum == expected_sum); 981 } 982 983 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 984 struct percpu_memcpy_buffer_node item, 985 int *_cpu) 986 { 987 bool result = false; 988 int cpu; 989 990 for (;;) { 991 intptr_t *targetptr_final, newval_final, offset; 992 char *destptr, *srcptr; 993 size_t copylen; 994 int ret; 995 996 cpu = get_current_cpu_id(); 997 /* Load offset with single-copy atomicity. */ 998 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 999 if (offset == buffer->c[cpu].buflen) 1000 break; 1001 destptr = (char *)&buffer->c[cpu].array[offset]; 1002 srcptr = (char *)&item; 1003 /* copylen must be <= 4kB. */ 1004 copylen = sizeof(item); 1005 newval_final = offset + 1; 1006 targetptr_final = &buffer->c[cpu].offset; 1007 ret = rseq_cmpeqv_trymemcpy_storev( 1008 opt_mo, RSEQ_PERCPU, 1009 targetptr_final, offset, 1010 destptr, srcptr, copylen, 1011 newval_final, cpu); 1012 if (rseq_likely(!ret)) { 1013 result = true; 1014 break; 1015 } 1016 /* Retry if comparison fails or rseq aborts. */ 1017 } 1018 if (_cpu) 1019 *_cpu = cpu; 1020 return result; 1021 } 1022 1023 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1024 struct percpu_memcpy_buffer_node *item, 1025 int *_cpu) 1026 { 1027 bool result = false; 1028 int cpu; 1029 1030 for (;;) { 1031 intptr_t *targetptr_final, newval_final, offset; 1032 char *destptr, *srcptr; 1033 size_t copylen; 1034 int ret; 1035 1036 cpu = get_current_cpu_id(); 1037 /* Load offset with single-copy atomicity. */ 1038 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 1039 if (offset == 0) 1040 break; 1041 destptr = (char *)item; 1042 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 1043 /* copylen must be <= 4kB. */ 1044 copylen = sizeof(*item); 1045 newval_final = offset - 1; 1046 targetptr_final = &buffer->c[cpu].offset; 1047 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1048 targetptr_final, offset, destptr, srcptr, copylen, 1049 newval_final, cpu); 1050 if (rseq_likely(!ret)) { 1051 result = true; 1052 break; 1053 } 1054 /* Retry if comparison fails or rseq aborts. */ 1055 } 1056 if (_cpu) 1057 *_cpu = cpu; 1058 return result; 1059 } 1060 1061 /* 1062 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 1063 * only be used on buffers that are not concurrently modified. 1064 */ 1065 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1066 struct percpu_memcpy_buffer_node *item, 1067 int cpu) 1068 { 1069 intptr_t offset; 1070 1071 offset = buffer->c[cpu].offset; 1072 if (offset == 0) 1073 return false; 1074 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 1075 buffer->c[cpu].offset = offset - 1; 1076 return true; 1077 } 1078 1079 void *test_percpu_memcpy_buffer_thread(void *arg) 1080 { 1081 long long i, reps; 1082 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 1083 1084 if (!opt_disable_rseq && rseq_register_current_thread()) 1085 abort(); 1086 1087 reps = opt_reps; 1088 for (i = 0; i < reps; i++) { 1089 struct percpu_memcpy_buffer_node item; 1090 bool result; 1091 1092 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1093 if (opt_yield) 1094 sched_yield(); /* encourage shuffling */ 1095 if (result) { 1096 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1097 /* Should increase buffer size. */ 1098 abort(); 1099 } 1100 } 1101 } 1102 1103 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1104 (int) rseq_gettid(), nr_abort, signals_delivered); 1105 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1106 abort(); 1107 1108 return NULL; 1109 } 1110 1111 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1112 void test_percpu_memcpy_buffer(void) 1113 { 1114 const int num_threads = opt_threads; 1115 int i, j, ret; 1116 uint64_t sum = 0, expected_sum = 0; 1117 struct percpu_memcpy_buffer buffer; 1118 pthread_t test_threads[num_threads]; 1119 cpu_set_t allowed_cpus; 1120 1121 memset(&buffer, 0, sizeof(buffer)); 1122 1123 /* Generate list entries for every usable cpu. */ 1124 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1125 for (i = 0; i < CPU_SETSIZE; i++) { 1126 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 1127 continue; 1128 /* Worse-case is every item in same CPU. */ 1129 buffer.c[i].array = 1130 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1131 MEMCPY_BUFFER_ITEM_PER_CPU); 1132 assert(buffer.c[i].array); 1133 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1134 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1135 expected_sum += 2 * j + 1; 1136 1137 /* 1138 * We could theoretically put the word-sized 1139 * "data" directly in the buffer. However, we 1140 * want to model objects that would not fit 1141 * within a single word, so allocate an object 1142 * for each node. 1143 */ 1144 buffer.c[i].array[j - 1].data1 = j; 1145 buffer.c[i].array[j - 1].data2 = j + 1; 1146 buffer.c[i].offset++; 1147 } 1148 } 1149 1150 for (i = 0; i < num_threads; i++) { 1151 ret = pthread_create(&test_threads[i], NULL, 1152 test_percpu_memcpy_buffer_thread, 1153 &buffer); 1154 if (ret) { 1155 errno = ret; 1156 perror("pthread_create"); 1157 abort(); 1158 } 1159 } 1160 1161 for (i = 0; i < num_threads; i++) { 1162 ret = pthread_join(test_threads[i], NULL); 1163 if (ret) { 1164 errno = ret; 1165 perror("pthread_join"); 1166 abort(); 1167 } 1168 } 1169 1170 for (i = 0; i < CPU_SETSIZE; i++) { 1171 struct percpu_memcpy_buffer_node item; 1172 1173 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 1174 continue; 1175 1176 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1177 sum += item.data1; 1178 sum += item.data2; 1179 } 1180 free(buffer.c[i].array); 1181 } 1182 1183 /* 1184 * All entries should now be accounted for (unless some external 1185 * actor is interfering with our allowed affinity while this 1186 * test is running). 1187 */ 1188 assert(sum == expected_sum); 1189 } 1190 1191 static void test_signal_interrupt_handler(int signo) 1192 { 1193 signals_delivered++; 1194 } 1195 1196 static int set_signal_handler(void) 1197 { 1198 int ret = 0; 1199 struct sigaction sa; 1200 sigset_t sigset; 1201 1202 ret = sigemptyset(&sigset); 1203 if (ret < 0) { 1204 perror("sigemptyset"); 1205 return ret; 1206 } 1207 1208 sa.sa_handler = test_signal_interrupt_handler; 1209 sa.sa_mask = sigset; 1210 sa.sa_flags = 0; 1211 ret = sigaction(SIGUSR1, &sa, NULL); 1212 if (ret < 0) { 1213 perror("sigaction"); 1214 return ret; 1215 } 1216 1217 printf_verbose("Signal handler set for SIGUSR1\n"); 1218 1219 return ret; 1220 } 1221 1222 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1223 #ifdef TEST_MEMBARRIER 1224 struct test_membarrier_thread_args { 1225 int stop; 1226 intptr_t percpu_list_ptr; 1227 }; 1228 1229 /* Worker threads modify data in their "active" percpu lists. */ 1230 void *test_membarrier_worker_thread(void *arg) 1231 { 1232 struct test_membarrier_thread_args *args = 1233 (struct test_membarrier_thread_args *)arg; 1234 const int iters = opt_reps; 1235 int i; 1236 1237 if (rseq_register_current_thread()) { 1238 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1239 errno, strerror(errno)); 1240 abort(); 1241 } 1242 1243 /* Wait for initialization. */ 1244 while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {} 1245 1246 for (i = 0; i < iters; ++i) { 1247 int ret; 1248 1249 do { 1250 int cpu = get_current_cpu_id(); 1251 1252 ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1253 &args->percpu_list_ptr, 1254 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1255 } while (rseq_unlikely(ret)); 1256 } 1257 1258 if (rseq_unregister_current_thread()) { 1259 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1260 errno, strerror(errno)); 1261 abort(); 1262 } 1263 return NULL; 1264 } 1265 1266 void test_membarrier_init_percpu_list(struct percpu_list *list) 1267 { 1268 int i; 1269 1270 memset(list, 0, sizeof(*list)); 1271 for (i = 0; i < CPU_SETSIZE; i++) { 1272 struct percpu_list_node *node; 1273 1274 node = malloc(sizeof(*node)); 1275 assert(node); 1276 node->data = 0; 1277 node->next = NULL; 1278 list->c[i].head = node; 1279 } 1280 } 1281 1282 void test_membarrier_free_percpu_list(struct percpu_list *list) 1283 { 1284 int i; 1285 1286 for (i = 0; i < CPU_SETSIZE; i++) 1287 free(list->c[i].head); 1288 } 1289 1290 /* 1291 * The manager thread swaps per-cpu lists that worker threads see, 1292 * and validates that there are no unexpected modifications. 1293 */ 1294 void *test_membarrier_manager_thread(void *arg) 1295 { 1296 struct test_membarrier_thread_args *args = 1297 (struct test_membarrier_thread_args *)arg; 1298 struct percpu_list list_a, list_b; 1299 intptr_t expect_a = 0, expect_b = 0; 1300 int cpu_a = 0, cpu_b = 0; 1301 1302 if (rseq_register_current_thread()) { 1303 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1304 errno, strerror(errno)); 1305 abort(); 1306 } 1307 1308 /* Init lists. */ 1309 test_membarrier_init_percpu_list(&list_a); 1310 test_membarrier_init_percpu_list(&list_b); 1311 1312 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); 1313 1314 while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) { 1315 /* list_a is "active". */ 1316 cpu_a = rand() % CPU_SETSIZE; 1317 /* 1318 * As list_b is "inactive", we should never see changes 1319 * to list_b. 1320 */ 1321 if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) { 1322 fprintf(stderr, "Membarrier test failed\n"); 1323 abort(); 1324 } 1325 1326 /* Make list_b "active". */ 1327 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE); 1328 if (rseq_membarrier_expedited(cpu_a) && 1329 errno != ENXIO /* missing CPU */) { 1330 perror("sys_membarrier"); 1331 abort(); 1332 } 1333 /* 1334 * Cpu A should now only modify list_b, so the values 1335 * in list_a should be stable. 1336 */ 1337 expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE); 1338 1339 cpu_b = rand() % CPU_SETSIZE; 1340 /* 1341 * As list_a is "inactive", we should never see changes 1342 * to list_a. 1343 */ 1344 if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) { 1345 fprintf(stderr, "Membarrier test failed\n"); 1346 abort(); 1347 } 1348 1349 /* Make list_a "active". */ 1350 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); 1351 if (rseq_membarrier_expedited(cpu_b) && 1352 errno != ENXIO /* missing CPU*/) { 1353 perror("sys_membarrier"); 1354 abort(); 1355 } 1356 /* Remember a value from list_b. */ 1357 expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE); 1358 } 1359 1360 test_membarrier_free_percpu_list(&list_a); 1361 test_membarrier_free_percpu_list(&list_b); 1362 1363 if (rseq_unregister_current_thread()) { 1364 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1365 errno, strerror(errno)); 1366 abort(); 1367 } 1368 return NULL; 1369 } 1370 1371 void test_membarrier(void) 1372 { 1373 const int num_threads = opt_threads; 1374 struct test_membarrier_thread_args thread_args; 1375 pthread_t worker_threads[num_threads]; 1376 pthread_t manager_thread; 1377 int i, ret; 1378 1379 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1380 perror("sys_membarrier"); 1381 abort(); 1382 } 1383 1384 thread_args.stop = 0; 1385 thread_args.percpu_list_ptr = 0; 1386 ret = pthread_create(&manager_thread, NULL, 1387 test_membarrier_manager_thread, &thread_args); 1388 if (ret) { 1389 errno = ret; 1390 perror("pthread_create"); 1391 abort(); 1392 } 1393 1394 for (i = 0; i < num_threads; i++) { 1395 ret = pthread_create(&worker_threads[i], NULL, 1396 test_membarrier_worker_thread, &thread_args); 1397 if (ret) { 1398 errno = ret; 1399 perror("pthread_create"); 1400 abort(); 1401 } 1402 } 1403 1404 1405 for (i = 0; i < num_threads; i++) { 1406 ret = pthread_join(worker_threads[i], NULL); 1407 if (ret) { 1408 errno = ret; 1409 perror("pthread_join"); 1410 abort(); 1411 } 1412 } 1413 1414 __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE); 1415 ret = pthread_join(manager_thread, NULL); 1416 if (ret) { 1417 errno = ret; 1418 perror("pthread_join"); 1419 abort(); 1420 } 1421 } 1422 #else /* TEST_MEMBARRIER */ 1423 void test_membarrier(void) 1424 { 1425 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1426 "Skipping membarrier test.\n"); 1427 } 1428 #endif 1429 1430 static void show_usage(int argc, char **argv) 1431 { 1432 printf("Usage : %s <OPTIONS>\n", 1433 argv[0]); 1434 printf("OPTIONS:\n"); 1435 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1436 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1437 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1438 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1439 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1440 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1441 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1442 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1443 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1444 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1445 printf(" [-y] Yield\n"); 1446 printf(" [-k] Kill thread with signal\n"); 1447 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1448 printf(" [-t N] Number of threads (default 200)\n"); 1449 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1450 printf(" [-d] Disable rseq system call (no initialization)\n"); 1451 printf(" [-D M] Disable rseq for each M threads\n"); 1452 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1453 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1454 printf(" [-v] Verbose output.\n"); 1455 printf(" [-h] Show this help.\n"); 1456 printf("\n"); 1457 } 1458 1459 int main(int argc, char **argv) 1460 { 1461 int i; 1462 1463 for (i = 1; i < argc; i++) { 1464 if (argv[i][0] != '-') 1465 continue; 1466 switch (argv[i][1]) { 1467 case '1': 1468 case '2': 1469 case '3': 1470 case '4': 1471 case '5': 1472 case '6': 1473 case '7': 1474 case '8': 1475 case '9': 1476 if (argc < i + 2) { 1477 show_usage(argc, argv); 1478 goto error; 1479 } 1480 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1481 i++; 1482 break; 1483 case 'm': 1484 if (argc < i + 2) { 1485 show_usage(argc, argv); 1486 goto error; 1487 } 1488 opt_modulo = atol(argv[i + 1]); 1489 if (opt_modulo < 0) { 1490 show_usage(argc, argv); 1491 goto error; 1492 } 1493 i++; 1494 break; 1495 case 's': 1496 if (argc < i + 2) { 1497 show_usage(argc, argv); 1498 goto error; 1499 } 1500 opt_sleep = atol(argv[i + 1]); 1501 if (opt_sleep < 0) { 1502 show_usage(argc, argv); 1503 goto error; 1504 } 1505 i++; 1506 break; 1507 case 'y': 1508 opt_yield = 1; 1509 break; 1510 case 'k': 1511 opt_signal = 1; 1512 break; 1513 case 'd': 1514 opt_disable_rseq = 1; 1515 break; 1516 case 'D': 1517 if (argc < i + 2) { 1518 show_usage(argc, argv); 1519 goto error; 1520 } 1521 opt_disable_mod = atol(argv[i + 1]); 1522 if (opt_disable_mod < 0) { 1523 show_usage(argc, argv); 1524 goto error; 1525 } 1526 i++; 1527 break; 1528 case 't': 1529 if (argc < i + 2) { 1530 show_usage(argc, argv); 1531 goto error; 1532 } 1533 opt_threads = atol(argv[i + 1]); 1534 if (opt_threads < 0) { 1535 show_usage(argc, argv); 1536 goto error; 1537 } 1538 i++; 1539 break; 1540 case 'r': 1541 if (argc < i + 2) { 1542 show_usage(argc, argv); 1543 goto error; 1544 } 1545 opt_reps = atoll(argv[i + 1]); 1546 if (opt_reps < 0) { 1547 show_usage(argc, argv); 1548 goto error; 1549 } 1550 i++; 1551 break; 1552 case 'h': 1553 show_usage(argc, argv); 1554 goto end; 1555 case 'T': 1556 if (argc < i + 2) { 1557 show_usage(argc, argv); 1558 goto error; 1559 } 1560 opt_test = *argv[i + 1]; 1561 switch (opt_test) { 1562 case 's': 1563 case 'l': 1564 case 'i': 1565 case 'b': 1566 case 'm': 1567 case 'r': 1568 break; 1569 default: 1570 show_usage(argc, argv); 1571 goto error; 1572 } 1573 i++; 1574 break; 1575 case 'v': 1576 verbose = 1; 1577 break; 1578 case 'M': 1579 opt_mo = RSEQ_MO_RELEASE; 1580 break; 1581 default: 1582 show_usage(argc, argv); 1583 goto error; 1584 } 1585 } 1586 1587 loop_cnt_1 = loop_cnt[1]; 1588 loop_cnt_2 = loop_cnt[2]; 1589 loop_cnt_3 = loop_cnt[3]; 1590 loop_cnt_4 = loop_cnt[4]; 1591 loop_cnt_5 = loop_cnt[5]; 1592 loop_cnt_6 = loop_cnt[6]; 1593 1594 if (set_signal_handler()) 1595 goto error; 1596 1597 if (!opt_disable_rseq && rseq_register_current_thread()) 1598 goto error; 1599 if (!opt_disable_rseq && !rseq_validate_cpu_id()) { 1600 fprintf(stderr, "Error: cpu id getter unavailable\n"); 1601 goto error; 1602 } 1603 switch (opt_test) { 1604 case 's': 1605 printf_verbose("spinlock\n"); 1606 test_percpu_spinlock(); 1607 break; 1608 case 'l': 1609 printf_verbose("linked list\n"); 1610 test_percpu_list(); 1611 break; 1612 case 'b': 1613 printf_verbose("buffer\n"); 1614 test_percpu_buffer(); 1615 break; 1616 case 'm': 1617 printf_verbose("memcpy buffer\n"); 1618 test_percpu_memcpy_buffer(); 1619 break; 1620 case 'i': 1621 printf_verbose("counter increment\n"); 1622 test_percpu_inc(); 1623 break; 1624 case 'r': 1625 printf_verbose("membarrier\n"); 1626 test_membarrier(); 1627 break; 1628 } 1629 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1630 abort(); 1631 end: 1632 return 0; 1633 1634 error: 1635 return -1; 1636 } 1637