1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 #include <stdbool.h> 20 21 static inline pid_t rseq_gettid(void) 22 { 23 return syscall(__NR_gettid); 24 } 25 26 #define NR_INJECT 9 27 static int loop_cnt[NR_INJECT + 1]; 28 29 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 30 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 31 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 32 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 33 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 34 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 35 36 static int opt_modulo, verbose; 37 38 static int opt_yield, opt_signal, opt_sleep, 39 opt_disable_rseq, opt_threads = 200, 40 opt_disable_mod = 0, opt_test = 's'; 41 static bool opt_rseq_legacy; 42 static long long opt_reps = 5000; 43 44 static __thread __attribute__((tls_model("initial-exec"))) 45 unsigned int signals_delivered; 46 47 #ifndef BENCHMARK 48 49 static __thread __attribute__((tls_model("initial-exec"), unused)) 50 unsigned int yield_mod_cnt, nr_abort; 51 52 #define printf_verbose(fmt, ...) \ 53 do { \ 54 if (verbose) \ 55 printf(fmt, ## __VA_ARGS__); \ 56 } while (0) 57 58 #ifdef __i386__ 59 60 #define INJECT_ASM_REG "eax" 61 62 #define RSEQ_INJECT_CLOBBER \ 63 , INJECT_ASM_REG 64 65 #define RSEQ_INJECT_ASM(n) \ 66 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 67 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 68 "jz 333f\n\t" \ 69 "222:\n\t" \ 70 "dec %%" INJECT_ASM_REG "\n\t" \ 71 "jnz 222b\n\t" \ 72 "333:\n\t" 73 74 #elif defined(__x86_64__) 75 76 #define INJECT_ASM_REG_P "rax" 77 #define INJECT_ASM_REG "eax" 78 79 #define RSEQ_INJECT_CLOBBER \ 80 , INJECT_ASM_REG_P \ 81 , INJECT_ASM_REG 82 83 #define RSEQ_INJECT_ASM(n) \ 84 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 85 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 86 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 87 "jz 333f\n\t" \ 88 "222:\n\t" \ 89 "dec %%" INJECT_ASM_REG "\n\t" \ 90 "jnz 222b\n\t" \ 91 "333:\n\t" 92 93 #elif defined(__s390__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r12" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 111 "je 333f\n\t" \ 112 "222:\n\t" \ 113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 114 "jnz 222b\n\t" \ 115 "333:\n\t" 116 117 #elif defined(__ARMEL__) 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r4" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmp " INJECT_ASM_REG ", #0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subs " INJECT_ASM_REG ", #1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 141 #elif defined(__AARCH64EL__) 142 143 #define RSEQ_INJECT_INPUT \ 144 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 145 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 146 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 147 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 148 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 149 , [loop_cnt_6] "Qo" (loop_cnt[6]) 150 151 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 152 153 #define RSEQ_INJECT_ASM(n) \ 154 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 155 " cbz " INJECT_ASM_REG ", 333f\n" \ 156 "222:\n" \ 157 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 158 " cbnz " INJECT_ASM_REG ", 222b\n" \ 159 "333:\n" 160 161 #elif defined(__PPC__) 162 163 #define RSEQ_INJECT_INPUT \ 164 , [loop_cnt_1]"m"(loop_cnt[1]) \ 165 , [loop_cnt_2]"m"(loop_cnt[2]) \ 166 , [loop_cnt_3]"m"(loop_cnt[3]) \ 167 , [loop_cnt_4]"m"(loop_cnt[4]) \ 168 , [loop_cnt_5]"m"(loop_cnt[5]) \ 169 , [loop_cnt_6]"m"(loop_cnt[6]) 170 171 #define INJECT_ASM_REG "r18" 172 173 #define RSEQ_INJECT_CLOBBER \ 174 , INJECT_ASM_REG 175 176 #define RSEQ_INJECT_ASM(n) \ 177 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 178 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 179 "beq 333f\n\t" \ 180 "222:\n\t" \ 181 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 182 "bne 222b\n\t" \ 183 "333:\n\t" 184 185 #elif defined(__mips__) 186 187 #define RSEQ_INJECT_INPUT \ 188 , [loop_cnt_1]"m"(loop_cnt[1]) \ 189 , [loop_cnt_2]"m"(loop_cnt[2]) \ 190 , [loop_cnt_3]"m"(loop_cnt[3]) \ 191 , [loop_cnt_4]"m"(loop_cnt[4]) \ 192 , [loop_cnt_5]"m"(loop_cnt[5]) \ 193 , [loop_cnt_6]"m"(loop_cnt[6]) 194 195 #define INJECT_ASM_REG "$5" 196 197 #define RSEQ_INJECT_CLOBBER \ 198 , INJECT_ASM_REG 199 200 #define RSEQ_INJECT_ASM(n) \ 201 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 202 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 203 "222:\n\t" \ 204 "addiu " INJECT_ASM_REG ", -1\n\t" \ 205 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 206 "333:\n\t" 207 #elif defined(__riscv) 208 209 #define RSEQ_INJECT_INPUT \ 210 , [loop_cnt_1]"m"(loop_cnt[1]) \ 211 , [loop_cnt_2]"m"(loop_cnt[2]) \ 212 , [loop_cnt_3]"m"(loop_cnt[3]) \ 213 , [loop_cnt_4]"m"(loop_cnt[4]) \ 214 , [loop_cnt_5]"m"(loop_cnt[5]) \ 215 , [loop_cnt_6]"m"(loop_cnt[6]) 216 217 #define INJECT_ASM_REG "t1" 218 219 #define RSEQ_INJECT_CLOBBER \ 220 , INJECT_ASM_REG 221 222 #define RSEQ_INJECT_ASM(n) \ 223 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 224 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 225 "222:\n\t" \ 226 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 227 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 228 "333:\n\t" 229 #elif defined(__or1k__) 230 231 #define RSEQ_INJECT_INPUT \ 232 , [loop_cnt_1]"m"(loop_cnt[1]) \ 233 , [loop_cnt_2]"m"(loop_cnt[2]) \ 234 , [loop_cnt_3]"m"(loop_cnt[3]) \ 235 , [loop_cnt_4]"m"(loop_cnt[4]) \ 236 , [loop_cnt_5]"m"(loop_cnt[5]) \ 237 , [loop_cnt_6]"m"(loop_cnt[6]) 238 239 #define INJECT_ASM_REG "r31" 240 241 #define RSEQ_INJECT_CLOBBER \ 242 , INJECT_ASM_REG 243 244 #define RSEQ_INJECT_ASM(n) \ 245 "l.lwz " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 246 "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \ 247 "l.bf 333f\n\t" \ 248 " l.nop\n\t" \ 249 "222:\n\t" \ 250 "l.addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 251 "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \ 252 "l.bf 222f\n\t" \ 253 " l.nop\n\t" \ 254 "333:\n\t" 255 #else 256 #error unsupported target 257 #endif 258 259 #define RSEQ_INJECT_FAILED \ 260 nr_abort++; 261 262 #define RSEQ_INJECT_C(n) \ 263 { \ 264 int loc_i, loc_nr_loops = loop_cnt[n]; \ 265 \ 266 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 267 rseq_barrier(); \ 268 } \ 269 if (loc_nr_loops == -1 && opt_modulo) { \ 270 if (yield_mod_cnt == opt_modulo - 1) { \ 271 if (opt_sleep > 0) \ 272 poll(NULL, 0, opt_sleep); \ 273 if (opt_yield) \ 274 sched_yield(); \ 275 if (opt_signal) \ 276 raise(SIGUSR1); \ 277 yield_mod_cnt = 0; \ 278 } else { \ 279 yield_mod_cnt++; \ 280 } \ 281 } \ 282 } 283 284 #define rseq_no_glibc true 285 286 #else 287 288 #define printf_verbose(fmt, ...) 289 #define rseq_no_glibc false 290 291 #endif /* BENCHMARK */ 292 293 #include "rseq.h" 294 295 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; 296 297 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 298 #define TEST_MEMBARRIER 299 300 static int sys_membarrier(int cmd, int flags, int cpu_id) 301 { 302 return syscall(__NR_membarrier, cmd, flags, cpu_id); 303 } 304 #endif 305 306 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID 307 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID 308 static 309 int get_current_cpu_id(void) 310 { 311 return rseq_current_mm_cid(); 312 } 313 static 314 bool rseq_validate_cpu_id(void) 315 { 316 return rseq_mm_cid_available(); 317 } 318 static 319 bool rseq_use_cpu_index(void) 320 { 321 return false; /* Use mm_cid */ 322 } 323 # ifdef TEST_MEMBARRIER 324 /* 325 * Membarrier does not currently support targeting a mm_cid, so 326 * issue the barrier on all cpus. 327 */ 328 static 329 int rseq_membarrier_expedited(int cpu) 330 { 331 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 332 0, 0); 333 } 334 # endif /* TEST_MEMBARRIER */ 335 #else 336 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID 337 static 338 int get_current_cpu_id(void) 339 { 340 return rseq_cpu_start(); 341 } 342 static 343 bool rseq_validate_cpu_id(void) 344 { 345 return rseq_current_cpu_raw() >= 0; 346 } 347 static 348 bool rseq_use_cpu_index(void) 349 { 350 return true; /* Use cpu_id as index. */ 351 } 352 # ifdef TEST_MEMBARRIER 353 static 354 int rseq_membarrier_expedited(int cpu) 355 { 356 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 357 MEMBARRIER_CMD_FLAG_CPU, cpu); 358 } 359 # endif /* TEST_MEMBARRIER */ 360 #endif 361 362 struct percpu_lock_entry { 363 intptr_t v; 364 } __attribute__((aligned(128))); 365 366 struct percpu_lock { 367 struct percpu_lock_entry c[CPU_SETSIZE]; 368 }; 369 370 struct test_data_entry { 371 intptr_t count; 372 } __attribute__((aligned(128))); 373 374 struct spinlock_test_data { 375 struct percpu_lock lock; 376 struct test_data_entry c[CPU_SETSIZE]; 377 }; 378 379 struct spinlock_thread_test_data { 380 struct spinlock_test_data *data; 381 long long reps; 382 int reg; 383 }; 384 385 struct inc_test_data { 386 struct test_data_entry c[CPU_SETSIZE]; 387 }; 388 389 struct inc_thread_test_data { 390 struct inc_test_data *data; 391 long long reps; 392 int reg; 393 }; 394 395 struct percpu_list_node { 396 intptr_t data; 397 struct percpu_list_node *next; 398 }; 399 400 struct percpu_list_entry { 401 struct percpu_list_node *head; 402 } __attribute__((aligned(128))); 403 404 struct percpu_list { 405 struct percpu_list_entry c[CPU_SETSIZE]; 406 }; 407 408 #define BUFFER_ITEM_PER_CPU 100 409 410 struct percpu_buffer_node { 411 intptr_t data; 412 }; 413 414 struct percpu_buffer_entry { 415 intptr_t offset; 416 intptr_t buflen; 417 struct percpu_buffer_node **array; 418 } __attribute__((aligned(128))); 419 420 struct percpu_buffer { 421 struct percpu_buffer_entry c[CPU_SETSIZE]; 422 }; 423 424 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 425 426 struct percpu_memcpy_buffer_node { 427 intptr_t data1; 428 uint64_t data2; 429 }; 430 431 struct percpu_memcpy_buffer_entry { 432 intptr_t offset; 433 intptr_t buflen; 434 struct percpu_memcpy_buffer_node *array; 435 } __attribute__((aligned(128))); 436 437 struct percpu_memcpy_buffer { 438 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 439 }; 440 441 /* A simple percpu spinlock. Grabs lock on current cpu. */ 442 static int rseq_this_cpu_lock(struct percpu_lock *lock) 443 { 444 int cpu; 445 446 for (;;) { 447 int ret; 448 449 cpu = get_current_cpu_id(); 450 if (cpu < 0) { 451 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n", 452 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); 453 abort(); 454 } 455 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 456 &lock->c[cpu].v, 457 0, 1, cpu); 458 if (rseq_likely(!ret)) 459 break; 460 /* Retry if comparison fails or rseq aborts. */ 461 } 462 /* 463 * Acquire semantic when taking lock after control dependency. 464 * Matches rseq_smp_store_release(). 465 */ 466 rseq_smp_acquire__after_ctrl_dep(); 467 return cpu; 468 } 469 470 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 471 { 472 assert(lock->c[cpu].v == 1); 473 /* 474 * Release lock, with release semantic. Matches 475 * rseq_smp_acquire__after_ctrl_dep(). 476 */ 477 rseq_smp_store_release(&lock->c[cpu].v, 0); 478 } 479 480 void *test_percpu_spinlock_thread(void *arg) 481 { 482 struct spinlock_thread_test_data *thread_data = arg; 483 struct spinlock_test_data *data = thread_data->data; 484 long long i, reps; 485 486 if (!opt_disable_rseq && thread_data->reg && 487 __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) 488 abort(); 489 reps = thread_data->reps; 490 for (i = 0; i < reps; i++) { 491 int cpu = rseq_this_cpu_lock(&data->lock); 492 data->c[cpu].count++; 493 rseq_percpu_unlock(&data->lock, cpu); 494 #ifndef BENCHMARK 495 if (i != 0 && !(i % (reps / 10))) 496 printf_verbose("tid %d: count %lld\n", 497 (int) rseq_gettid(), i); 498 #endif 499 } 500 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 501 (int) rseq_gettid(), nr_abort, signals_delivered); 502 if (!opt_disable_rseq && thread_data->reg && 503 rseq_unregister_current_thread()) 504 abort(); 505 return NULL; 506 } 507 508 /* 509 * A simple test which implements a sharded counter using a per-cpu 510 * lock. Obviously real applications might prefer to simply use a 511 * per-cpu increment; however, this is reasonable for a test and the 512 * lock can be extended to synchronize more complicated operations. 513 */ 514 void test_percpu_spinlock(void) 515 { 516 const int num_threads = opt_threads; 517 int i, ret; 518 uint64_t sum; 519 pthread_t test_threads[num_threads]; 520 struct spinlock_test_data data; 521 struct spinlock_thread_test_data thread_data[num_threads]; 522 523 memset(&data, 0, sizeof(data)); 524 for (i = 0; i < num_threads; i++) { 525 thread_data[i].reps = opt_reps; 526 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 527 thread_data[i].reg = 1; 528 else 529 thread_data[i].reg = 0; 530 thread_data[i].data = &data; 531 ret = pthread_create(&test_threads[i], NULL, 532 test_percpu_spinlock_thread, 533 &thread_data[i]); 534 if (ret) { 535 errno = ret; 536 perror("pthread_create"); 537 abort(); 538 } 539 } 540 541 for (i = 0; i < num_threads; i++) { 542 ret = pthread_join(test_threads[i], NULL); 543 if (ret) { 544 errno = ret; 545 perror("pthread_join"); 546 abort(); 547 } 548 } 549 550 sum = 0; 551 for (i = 0; i < CPU_SETSIZE; i++) 552 sum += data.c[i].count; 553 554 assert(sum == (uint64_t)opt_reps * num_threads); 555 } 556 557 void *test_percpu_inc_thread(void *arg) 558 { 559 struct inc_thread_test_data *thread_data = arg; 560 struct inc_test_data *data = thread_data->data; 561 long long i, reps; 562 563 if (!opt_disable_rseq && thread_data->reg && 564 __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) 565 abort(); 566 reps = thread_data->reps; 567 for (i = 0; i < reps; i++) { 568 int ret; 569 570 do { 571 int cpu; 572 573 cpu = get_current_cpu_id(); 574 ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 575 &data->c[cpu].count, 1, cpu); 576 } while (rseq_unlikely(ret)); 577 #ifndef BENCHMARK 578 if (i != 0 && !(i % (reps / 10))) 579 printf_verbose("tid %d: count %lld\n", 580 (int) rseq_gettid(), i); 581 #endif 582 } 583 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 584 (int) rseq_gettid(), nr_abort, signals_delivered); 585 if (!opt_disable_rseq && thread_data->reg && 586 rseq_unregister_current_thread()) 587 abort(); 588 return NULL; 589 } 590 591 void test_percpu_inc(void) 592 { 593 const int num_threads = opt_threads; 594 int i, ret; 595 uint64_t sum; 596 pthread_t test_threads[num_threads]; 597 struct inc_test_data data; 598 struct inc_thread_test_data thread_data[num_threads]; 599 600 memset(&data, 0, sizeof(data)); 601 for (i = 0; i < num_threads; i++) { 602 thread_data[i].reps = opt_reps; 603 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 604 thread_data[i].reg = 1; 605 else 606 thread_data[i].reg = 0; 607 thread_data[i].data = &data; 608 ret = pthread_create(&test_threads[i], NULL, 609 test_percpu_inc_thread, 610 &thread_data[i]); 611 if (ret) { 612 errno = ret; 613 perror("pthread_create"); 614 abort(); 615 } 616 } 617 618 for (i = 0; i < num_threads; i++) { 619 ret = pthread_join(test_threads[i], NULL); 620 if (ret) { 621 errno = ret; 622 perror("pthread_join"); 623 abort(); 624 } 625 } 626 627 sum = 0; 628 for (i = 0; i < CPU_SETSIZE; i++) 629 sum += data.c[i].count; 630 631 assert(sum == (uint64_t)opt_reps * num_threads); 632 } 633 634 void this_cpu_list_push(struct percpu_list *list, 635 struct percpu_list_node *node, 636 int *_cpu) 637 { 638 int cpu; 639 640 for (;;) { 641 intptr_t *targetptr, newval, expect; 642 int ret; 643 644 cpu = get_current_cpu_id(); 645 /* Load list->c[cpu].head with single-copy atomicity. */ 646 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 647 newval = (intptr_t)node; 648 targetptr = (intptr_t *)&list->c[cpu].head; 649 node->next = (struct percpu_list_node *)expect; 650 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 651 targetptr, expect, newval, cpu); 652 if (rseq_likely(!ret)) 653 break; 654 /* Retry if comparison fails or rseq aborts. */ 655 } 656 if (_cpu) 657 *_cpu = cpu; 658 } 659 660 /* 661 * Unlike a traditional lock-less linked list; the availability of a 662 * rseq primitive allows us to implement pop without concerns over 663 * ABA-type races. 664 */ 665 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 666 int *_cpu) 667 { 668 struct percpu_list_node *node = NULL; 669 int cpu; 670 671 for (;;) { 672 struct percpu_list_node *head; 673 intptr_t *targetptr, expectnot, *load; 674 long offset; 675 int ret; 676 677 cpu = get_current_cpu_id(); 678 targetptr = (intptr_t *)&list->c[cpu].head; 679 expectnot = (intptr_t)NULL; 680 offset = offsetof(struct percpu_list_node, next); 681 load = (intptr_t *)&head; 682 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, 683 targetptr, expectnot, 684 offset, load, cpu); 685 if (rseq_likely(!ret)) { 686 node = head; 687 break; 688 } 689 if (ret > 0) 690 break; 691 /* Retry if rseq aborts. */ 692 } 693 if (_cpu) 694 *_cpu = cpu; 695 return node; 696 } 697 698 /* 699 * __percpu_list_pop is not safe against concurrent accesses. Should 700 * only be used on lists that are not concurrently modified. 701 */ 702 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 703 { 704 struct percpu_list_node *node; 705 706 node = list->c[cpu].head; 707 if (!node) 708 return NULL; 709 list->c[cpu].head = node->next; 710 return node; 711 } 712 713 void *test_percpu_list_thread(void *arg) 714 { 715 long long i, reps; 716 struct percpu_list *list = (struct percpu_list *)arg; 717 718 if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) 719 abort(); 720 721 reps = opt_reps; 722 for (i = 0; i < reps; i++) { 723 struct percpu_list_node *node; 724 725 node = this_cpu_list_pop(list, NULL); 726 if (opt_yield) 727 sched_yield(); /* encourage shuffling */ 728 if (node) 729 this_cpu_list_push(list, node, NULL); 730 } 731 732 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 733 (int) rseq_gettid(), nr_abort, signals_delivered); 734 if (!opt_disable_rseq && rseq_unregister_current_thread()) 735 abort(); 736 737 return NULL; 738 } 739 740 /* Simultaneous modification to a per-cpu linked list from many threads. */ 741 void test_percpu_list(void) 742 { 743 const int num_threads = opt_threads; 744 int i, j, ret; 745 uint64_t sum = 0, expected_sum = 0; 746 struct percpu_list list; 747 pthread_t test_threads[num_threads]; 748 cpu_set_t allowed_cpus; 749 750 memset(&list, 0, sizeof(list)); 751 752 /* Generate list entries for every usable cpu. */ 753 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 754 for (i = 0; i < CPU_SETSIZE; i++) { 755 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 756 continue; 757 for (j = 1; j <= 100; j++) { 758 struct percpu_list_node *node; 759 760 expected_sum += j; 761 762 node = malloc(sizeof(*node)); 763 assert(node); 764 node->data = j; 765 node->next = list.c[i].head; 766 list.c[i].head = node; 767 } 768 } 769 770 for (i = 0; i < num_threads; i++) { 771 ret = pthread_create(&test_threads[i], NULL, 772 test_percpu_list_thread, &list); 773 if (ret) { 774 errno = ret; 775 perror("pthread_create"); 776 abort(); 777 } 778 } 779 780 for (i = 0; i < num_threads; i++) { 781 ret = pthread_join(test_threads[i], NULL); 782 if (ret) { 783 errno = ret; 784 perror("pthread_join"); 785 abort(); 786 } 787 } 788 789 for (i = 0; i < CPU_SETSIZE; i++) { 790 struct percpu_list_node *node; 791 792 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 793 continue; 794 795 while ((node = __percpu_list_pop(&list, i))) { 796 sum += node->data; 797 free(node); 798 } 799 } 800 801 /* 802 * All entries should now be accounted for (unless some external 803 * actor is interfering with our allowed affinity while this 804 * test is running). 805 */ 806 assert(sum == expected_sum); 807 } 808 809 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 810 struct percpu_buffer_node *node, 811 int *_cpu) 812 { 813 bool result = false; 814 int cpu; 815 816 for (;;) { 817 intptr_t *targetptr_spec, newval_spec; 818 intptr_t *targetptr_final, newval_final; 819 intptr_t offset; 820 int ret; 821 822 cpu = get_current_cpu_id(); 823 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 824 if (offset == buffer->c[cpu].buflen) 825 break; 826 newval_spec = (intptr_t)node; 827 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 828 newval_final = offset + 1; 829 targetptr_final = &buffer->c[cpu].offset; 830 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU, 831 targetptr_final, offset, targetptr_spec, 832 newval_spec, newval_final, cpu); 833 if (rseq_likely(!ret)) { 834 result = true; 835 break; 836 } 837 /* Retry if comparison fails or rseq aborts. */ 838 } 839 if (_cpu) 840 *_cpu = cpu; 841 return result; 842 } 843 844 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 845 int *_cpu) 846 { 847 struct percpu_buffer_node *head; 848 int cpu; 849 850 for (;;) { 851 intptr_t *targetptr, newval; 852 intptr_t offset; 853 int ret; 854 855 cpu = get_current_cpu_id(); 856 /* Load offset with single-copy atomicity. */ 857 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 858 if (offset == 0) { 859 head = NULL; 860 break; 861 } 862 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 863 newval = offset - 1; 864 targetptr = (intptr_t *)&buffer->c[cpu].offset; 865 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 866 targetptr, offset, 867 (intptr_t *)&buffer->c[cpu].array[offset - 1], 868 (intptr_t)head, newval, cpu); 869 if (rseq_likely(!ret)) 870 break; 871 /* Retry if comparison fails or rseq aborts. */ 872 } 873 if (_cpu) 874 *_cpu = cpu; 875 return head; 876 } 877 878 /* 879 * __percpu_buffer_pop is not safe against concurrent accesses. Should 880 * only be used on buffers that are not concurrently modified. 881 */ 882 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 883 int cpu) 884 { 885 struct percpu_buffer_node *head; 886 intptr_t offset; 887 888 offset = buffer->c[cpu].offset; 889 if (offset == 0) 890 return NULL; 891 head = buffer->c[cpu].array[offset - 1]; 892 buffer->c[cpu].offset = offset - 1; 893 return head; 894 } 895 896 void *test_percpu_buffer_thread(void *arg) 897 { 898 long long i, reps; 899 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 900 901 if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) 902 abort(); 903 904 reps = opt_reps; 905 for (i = 0; i < reps; i++) { 906 struct percpu_buffer_node *node; 907 908 node = this_cpu_buffer_pop(buffer, NULL); 909 if (opt_yield) 910 sched_yield(); /* encourage shuffling */ 911 if (node) { 912 if (!this_cpu_buffer_push(buffer, node, NULL)) { 913 /* Should increase buffer size. */ 914 abort(); 915 } 916 } 917 } 918 919 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 920 (int) rseq_gettid(), nr_abort, signals_delivered); 921 if (!opt_disable_rseq && rseq_unregister_current_thread()) 922 abort(); 923 924 return NULL; 925 } 926 927 /* Simultaneous modification to a per-cpu buffer from many threads. */ 928 void test_percpu_buffer(void) 929 { 930 const int num_threads = opt_threads; 931 int i, j, ret; 932 uint64_t sum = 0, expected_sum = 0; 933 struct percpu_buffer buffer; 934 pthread_t test_threads[num_threads]; 935 cpu_set_t allowed_cpus; 936 937 memset(&buffer, 0, sizeof(buffer)); 938 939 /* Generate list entries for every usable cpu. */ 940 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 941 for (i = 0; i < CPU_SETSIZE; i++) { 942 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 943 continue; 944 /* Worse-case is every item in same CPU. */ 945 buffer.c[i].array = 946 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 947 BUFFER_ITEM_PER_CPU); 948 assert(buffer.c[i].array); 949 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 950 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 951 struct percpu_buffer_node *node; 952 953 expected_sum += j; 954 955 /* 956 * We could theoretically put the word-sized 957 * "data" directly in the buffer. However, we 958 * want to model objects that would not fit 959 * within a single word, so allocate an object 960 * for each node. 961 */ 962 node = malloc(sizeof(*node)); 963 assert(node); 964 node->data = j; 965 buffer.c[i].array[j - 1] = node; 966 buffer.c[i].offset++; 967 } 968 } 969 970 for (i = 0; i < num_threads; i++) { 971 ret = pthread_create(&test_threads[i], NULL, 972 test_percpu_buffer_thread, &buffer); 973 if (ret) { 974 errno = ret; 975 perror("pthread_create"); 976 abort(); 977 } 978 } 979 980 for (i = 0; i < num_threads; i++) { 981 ret = pthread_join(test_threads[i], NULL); 982 if (ret) { 983 errno = ret; 984 perror("pthread_join"); 985 abort(); 986 } 987 } 988 989 for (i = 0; i < CPU_SETSIZE; i++) { 990 struct percpu_buffer_node *node; 991 992 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 993 continue; 994 995 while ((node = __percpu_buffer_pop(&buffer, i))) { 996 sum += node->data; 997 free(node); 998 } 999 free(buffer.c[i].array); 1000 } 1001 1002 /* 1003 * All entries should now be accounted for (unless some external 1004 * actor is interfering with our allowed affinity while this 1005 * test is running). 1006 */ 1007 assert(sum == expected_sum); 1008 } 1009 1010 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 1011 struct percpu_memcpy_buffer_node item, 1012 int *_cpu) 1013 { 1014 bool result = false; 1015 int cpu; 1016 1017 for (;;) { 1018 intptr_t *targetptr_final, newval_final, offset; 1019 char *destptr, *srcptr; 1020 size_t copylen; 1021 int ret; 1022 1023 cpu = get_current_cpu_id(); 1024 /* Load offset with single-copy atomicity. */ 1025 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 1026 if (offset == buffer->c[cpu].buflen) 1027 break; 1028 destptr = (char *)&buffer->c[cpu].array[offset]; 1029 srcptr = (char *)&item; 1030 /* copylen must be <= 4kB. */ 1031 copylen = sizeof(item); 1032 newval_final = offset + 1; 1033 targetptr_final = &buffer->c[cpu].offset; 1034 ret = rseq_cmpeqv_trymemcpy_storev( 1035 opt_mo, RSEQ_PERCPU, 1036 targetptr_final, offset, 1037 destptr, srcptr, copylen, 1038 newval_final, cpu); 1039 if (rseq_likely(!ret)) { 1040 result = true; 1041 break; 1042 } 1043 /* Retry if comparison fails or rseq aborts. */ 1044 } 1045 if (_cpu) 1046 *_cpu = cpu; 1047 return result; 1048 } 1049 1050 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1051 struct percpu_memcpy_buffer_node *item, 1052 int *_cpu) 1053 { 1054 bool result = false; 1055 int cpu; 1056 1057 for (;;) { 1058 intptr_t *targetptr_final, newval_final, offset; 1059 char *destptr, *srcptr; 1060 size_t copylen; 1061 int ret; 1062 1063 cpu = get_current_cpu_id(); 1064 /* Load offset with single-copy atomicity. */ 1065 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 1066 if (offset == 0) 1067 break; 1068 destptr = (char *)item; 1069 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 1070 /* copylen must be <= 4kB. */ 1071 copylen = sizeof(*item); 1072 newval_final = offset - 1; 1073 targetptr_final = &buffer->c[cpu].offset; 1074 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1075 targetptr_final, offset, destptr, srcptr, copylen, 1076 newval_final, cpu); 1077 if (rseq_likely(!ret)) { 1078 result = true; 1079 break; 1080 } 1081 /* Retry if comparison fails or rseq aborts. */ 1082 } 1083 if (_cpu) 1084 *_cpu = cpu; 1085 return result; 1086 } 1087 1088 /* 1089 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 1090 * only be used on buffers that are not concurrently modified. 1091 */ 1092 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1093 struct percpu_memcpy_buffer_node *item, 1094 int cpu) 1095 { 1096 intptr_t offset; 1097 1098 offset = buffer->c[cpu].offset; 1099 if (offset == 0) 1100 return false; 1101 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 1102 buffer->c[cpu].offset = offset - 1; 1103 return true; 1104 } 1105 1106 void *test_percpu_memcpy_buffer_thread(void *arg) 1107 { 1108 long long i, reps; 1109 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 1110 1111 if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) 1112 abort(); 1113 1114 reps = opt_reps; 1115 for (i = 0; i < reps; i++) { 1116 struct percpu_memcpy_buffer_node item; 1117 bool result; 1118 1119 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1120 if (opt_yield) 1121 sched_yield(); /* encourage shuffling */ 1122 if (result) { 1123 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1124 /* Should increase buffer size. */ 1125 abort(); 1126 } 1127 } 1128 } 1129 1130 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1131 (int) rseq_gettid(), nr_abort, signals_delivered); 1132 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1133 abort(); 1134 1135 return NULL; 1136 } 1137 1138 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1139 void test_percpu_memcpy_buffer(void) 1140 { 1141 const int num_threads = opt_threads; 1142 int i, j, ret; 1143 uint64_t sum = 0, expected_sum = 0; 1144 struct percpu_memcpy_buffer buffer; 1145 pthread_t test_threads[num_threads]; 1146 cpu_set_t allowed_cpus; 1147 1148 memset(&buffer, 0, sizeof(buffer)); 1149 1150 /* Generate list entries for every usable cpu. */ 1151 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1152 for (i = 0; i < CPU_SETSIZE; i++) { 1153 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 1154 continue; 1155 /* Worse-case is every item in same CPU. */ 1156 buffer.c[i].array = 1157 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1158 MEMCPY_BUFFER_ITEM_PER_CPU); 1159 assert(buffer.c[i].array); 1160 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1161 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1162 expected_sum += 2 * j + 1; 1163 1164 /* 1165 * We could theoretically put the word-sized 1166 * "data" directly in the buffer. However, we 1167 * want to model objects that would not fit 1168 * within a single word, so allocate an object 1169 * for each node. 1170 */ 1171 buffer.c[i].array[j - 1].data1 = j; 1172 buffer.c[i].array[j - 1].data2 = j + 1; 1173 buffer.c[i].offset++; 1174 } 1175 } 1176 1177 for (i = 0; i < num_threads; i++) { 1178 ret = pthread_create(&test_threads[i], NULL, 1179 test_percpu_memcpy_buffer_thread, 1180 &buffer); 1181 if (ret) { 1182 errno = ret; 1183 perror("pthread_create"); 1184 abort(); 1185 } 1186 } 1187 1188 for (i = 0; i < num_threads; i++) { 1189 ret = pthread_join(test_threads[i], NULL); 1190 if (ret) { 1191 errno = ret; 1192 perror("pthread_join"); 1193 abort(); 1194 } 1195 } 1196 1197 for (i = 0; i < CPU_SETSIZE; i++) { 1198 struct percpu_memcpy_buffer_node item; 1199 1200 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 1201 continue; 1202 1203 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1204 sum += item.data1; 1205 sum += item.data2; 1206 } 1207 free(buffer.c[i].array); 1208 } 1209 1210 /* 1211 * All entries should now be accounted for (unless some external 1212 * actor is interfering with our allowed affinity while this 1213 * test is running). 1214 */ 1215 assert(sum == expected_sum); 1216 } 1217 1218 static void test_signal_interrupt_handler(int signo) 1219 { 1220 signals_delivered++; 1221 } 1222 1223 static int set_signal_handler(void) 1224 { 1225 int ret = 0; 1226 struct sigaction sa; 1227 sigset_t sigset; 1228 1229 ret = sigemptyset(&sigset); 1230 if (ret < 0) { 1231 perror("sigemptyset"); 1232 return ret; 1233 } 1234 1235 sa.sa_handler = test_signal_interrupt_handler; 1236 sa.sa_mask = sigset; 1237 sa.sa_flags = 0; 1238 ret = sigaction(SIGUSR1, &sa, NULL); 1239 if (ret < 0) { 1240 perror("sigaction"); 1241 return ret; 1242 } 1243 1244 printf_verbose("Signal handler set for SIGUSR1\n"); 1245 1246 return ret; 1247 } 1248 1249 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1250 #ifdef TEST_MEMBARRIER 1251 struct test_membarrier_thread_args { 1252 int stop; 1253 intptr_t percpu_list_ptr; 1254 }; 1255 1256 /* Worker threads modify data in their "active" percpu lists. */ 1257 void *test_membarrier_worker_thread(void *arg) 1258 { 1259 struct test_membarrier_thread_args *args = 1260 (struct test_membarrier_thread_args *)arg; 1261 const int iters = opt_reps; 1262 int i; 1263 1264 if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { 1265 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1266 errno, strerror(errno)); 1267 abort(); 1268 } 1269 1270 /* Wait for initialization. */ 1271 while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {} 1272 1273 for (i = 0; i < iters; ++i) { 1274 int ret; 1275 1276 do { 1277 int cpu = get_current_cpu_id(); 1278 1279 ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1280 &args->percpu_list_ptr, 1281 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1282 } while (rseq_unlikely(ret)); 1283 } 1284 1285 if (rseq_unregister_current_thread()) { 1286 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1287 errno, strerror(errno)); 1288 abort(); 1289 } 1290 return NULL; 1291 } 1292 1293 void test_membarrier_init_percpu_list(struct percpu_list *list) 1294 { 1295 int i; 1296 1297 memset(list, 0, sizeof(*list)); 1298 for (i = 0; i < CPU_SETSIZE; i++) { 1299 struct percpu_list_node *node; 1300 1301 node = malloc(sizeof(*node)); 1302 assert(node); 1303 node->data = 0; 1304 node->next = NULL; 1305 list->c[i].head = node; 1306 } 1307 } 1308 1309 void test_membarrier_free_percpu_list(struct percpu_list *list) 1310 { 1311 int i; 1312 1313 for (i = 0; i < CPU_SETSIZE; i++) 1314 free(list->c[i].head); 1315 } 1316 1317 /* 1318 * The manager thread swaps per-cpu lists that worker threads see, 1319 * and validates that there are no unexpected modifications. 1320 */ 1321 void *test_membarrier_manager_thread(void *arg) 1322 { 1323 struct test_membarrier_thread_args *args = 1324 (struct test_membarrier_thread_args *)arg; 1325 struct percpu_list list_a, list_b; 1326 intptr_t expect_a = 0, expect_b = 0; 1327 int cpu_a = 0, cpu_b = 0; 1328 1329 if (__rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) { 1330 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1331 errno, strerror(errno)); 1332 abort(); 1333 } 1334 1335 /* Init lists. */ 1336 test_membarrier_init_percpu_list(&list_a); 1337 test_membarrier_init_percpu_list(&list_b); 1338 1339 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); 1340 1341 while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) { 1342 /* list_a is "active". */ 1343 cpu_a = rand() % CPU_SETSIZE; 1344 /* 1345 * As list_b is "inactive", we should never see changes 1346 * to list_b. 1347 */ 1348 if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) { 1349 fprintf(stderr, "Membarrier test failed\n"); 1350 abort(); 1351 } 1352 1353 /* Make list_b "active". */ 1354 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE); 1355 if (rseq_membarrier_expedited(cpu_a) && 1356 errno != ENXIO /* missing CPU */) { 1357 perror("sys_membarrier"); 1358 abort(); 1359 } 1360 /* 1361 * Cpu A should now only modify list_b, so the values 1362 * in list_a should be stable. 1363 */ 1364 expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE); 1365 1366 cpu_b = rand() % CPU_SETSIZE; 1367 /* 1368 * As list_a is "inactive", we should never see changes 1369 * to list_a. 1370 */ 1371 if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) { 1372 fprintf(stderr, "Membarrier test failed\n"); 1373 abort(); 1374 } 1375 1376 /* Make list_a "active". */ 1377 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); 1378 if (rseq_membarrier_expedited(cpu_b) && 1379 errno != ENXIO /* missing CPU*/) { 1380 perror("sys_membarrier"); 1381 abort(); 1382 } 1383 /* Remember a value from list_b. */ 1384 expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE); 1385 } 1386 1387 test_membarrier_free_percpu_list(&list_a); 1388 test_membarrier_free_percpu_list(&list_b); 1389 1390 if (rseq_unregister_current_thread()) { 1391 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1392 errno, strerror(errno)); 1393 abort(); 1394 } 1395 return NULL; 1396 } 1397 1398 void test_membarrier(void) 1399 { 1400 const int num_threads = opt_threads; 1401 struct test_membarrier_thread_args thread_args; 1402 pthread_t worker_threads[num_threads]; 1403 pthread_t manager_thread; 1404 int i, ret; 1405 1406 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1407 perror("sys_membarrier"); 1408 abort(); 1409 } 1410 1411 thread_args.stop = 0; 1412 thread_args.percpu_list_ptr = 0; 1413 ret = pthread_create(&manager_thread, NULL, 1414 test_membarrier_manager_thread, &thread_args); 1415 if (ret) { 1416 errno = ret; 1417 perror("pthread_create"); 1418 abort(); 1419 } 1420 1421 for (i = 0; i < num_threads; i++) { 1422 ret = pthread_create(&worker_threads[i], NULL, 1423 test_membarrier_worker_thread, &thread_args); 1424 if (ret) { 1425 errno = ret; 1426 perror("pthread_create"); 1427 abort(); 1428 } 1429 } 1430 1431 1432 for (i = 0; i < num_threads; i++) { 1433 ret = pthread_join(worker_threads[i], NULL); 1434 if (ret) { 1435 errno = ret; 1436 perror("pthread_join"); 1437 abort(); 1438 } 1439 } 1440 1441 __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE); 1442 ret = pthread_join(manager_thread, NULL); 1443 if (ret) { 1444 errno = ret; 1445 perror("pthread_join"); 1446 abort(); 1447 } 1448 } 1449 #else /* TEST_MEMBARRIER */ 1450 void test_membarrier(void) 1451 { 1452 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1453 "Skipping membarrier test.\n"); 1454 } 1455 #endif 1456 1457 static void show_usage(int argc, char **argv) 1458 { 1459 printf("Usage : %s <OPTIONS>\n", 1460 argv[0]); 1461 printf("OPTIONS:\n"); 1462 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1463 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1464 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1465 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1466 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1467 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1468 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1469 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1470 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1471 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1472 printf(" [-y] Yield\n"); 1473 printf(" [-k] Kill thread with signal\n"); 1474 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1475 printf(" [-t N] Number of threads (default 200)\n"); 1476 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1477 printf(" [-d] Disable rseq system call (no initialization)\n"); 1478 printf(" [-D M] Disable rseq for each M threads\n"); 1479 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1480 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1481 printf(" [-O] Test with optimized RSEQ\n"); 1482 printf(" [-v] Verbose output.\n"); 1483 printf(" [-h] Show this help.\n"); 1484 printf("\n"); 1485 } 1486 1487 int main(int argc, char **argv) 1488 { 1489 int i; 1490 1491 for (i = 1; i < argc; i++) { 1492 if (argv[i][0] != '-') 1493 continue; 1494 switch (argv[i][1]) { 1495 case '1': 1496 case '2': 1497 case '3': 1498 case '4': 1499 case '5': 1500 case '6': 1501 case '7': 1502 case '8': 1503 case '9': 1504 if (argc < i + 2) { 1505 show_usage(argc, argv); 1506 goto error; 1507 } 1508 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1509 i++; 1510 break; 1511 case 'm': 1512 if (argc < i + 2) { 1513 show_usage(argc, argv); 1514 goto error; 1515 } 1516 opt_modulo = atol(argv[i + 1]); 1517 if (opt_modulo < 0) { 1518 show_usage(argc, argv); 1519 goto error; 1520 } 1521 i++; 1522 break; 1523 case 's': 1524 if (argc < i + 2) { 1525 show_usage(argc, argv); 1526 goto error; 1527 } 1528 opt_sleep = atol(argv[i + 1]); 1529 if (opt_sleep < 0) { 1530 show_usage(argc, argv); 1531 goto error; 1532 } 1533 i++; 1534 break; 1535 case 'y': 1536 opt_yield = 1; 1537 break; 1538 case 'k': 1539 opt_signal = 1; 1540 break; 1541 case 'd': 1542 opt_disable_rseq = 1; 1543 break; 1544 case 'D': 1545 if (argc < i + 2) { 1546 show_usage(argc, argv); 1547 goto error; 1548 } 1549 opt_disable_mod = atol(argv[i + 1]); 1550 if (opt_disable_mod < 0) { 1551 show_usage(argc, argv); 1552 goto error; 1553 } 1554 i++; 1555 break; 1556 case 't': 1557 if (argc < i + 2) { 1558 show_usage(argc, argv); 1559 goto error; 1560 } 1561 opt_threads = atol(argv[i + 1]); 1562 if (opt_threads < 0) { 1563 show_usage(argc, argv); 1564 goto error; 1565 } 1566 i++; 1567 break; 1568 case 'r': 1569 if (argc < i + 2) { 1570 show_usage(argc, argv); 1571 goto error; 1572 } 1573 opt_reps = atoll(argv[i + 1]); 1574 if (opt_reps < 0) { 1575 show_usage(argc, argv); 1576 goto error; 1577 } 1578 i++; 1579 break; 1580 case 'h': 1581 show_usage(argc, argv); 1582 goto end; 1583 case 'T': 1584 if (argc < i + 2) { 1585 show_usage(argc, argv); 1586 goto error; 1587 } 1588 opt_test = *argv[i + 1]; 1589 switch (opt_test) { 1590 case 's': 1591 case 'l': 1592 case 'i': 1593 case 'b': 1594 case 'm': 1595 case 'r': 1596 break; 1597 default: 1598 show_usage(argc, argv); 1599 goto error; 1600 } 1601 i++; 1602 break; 1603 case 'v': 1604 verbose = 1; 1605 break; 1606 case 'M': 1607 opt_mo = RSEQ_MO_RELEASE; 1608 break; 1609 case 'L': 1610 opt_rseq_legacy = true; 1611 break; 1612 default: 1613 show_usage(argc, argv); 1614 goto error; 1615 } 1616 } 1617 1618 loop_cnt_1 = loop_cnt[1]; 1619 loop_cnt_2 = loop_cnt[2]; 1620 loop_cnt_3 = loop_cnt[3]; 1621 loop_cnt_4 = loop_cnt[4]; 1622 loop_cnt_5 = loop_cnt[5]; 1623 loop_cnt_6 = loop_cnt[6]; 1624 1625 if (set_signal_handler()) 1626 goto error; 1627 1628 if (!opt_disable_rseq && __rseq_register_current_thread(rseq_no_glibc, opt_rseq_legacy)) 1629 goto error; 1630 if (!opt_disable_rseq && !rseq_validate_cpu_id()) { 1631 fprintf(stderr, "Error: cpu id getter unavailable\n"); 1632 goto error; 1633 } 1634 switch (opt_test) { 1635 case 's': 1636 printf_verbose("spinlock\n"); 1637 test_percpu_spinlock(); 1638 break; 1639 case 'l': 1640 printf_verbose("linked list\n"); 1641 test_percpu_list(); 1642 break; 1643 case 'b': 1644 printf_verbose("buffer\n"); 1645 test_percpu_buffer(); 1646 break; 1647 case 'm': 1648 printf_verbose("memcpy buffer\n"); 1649 test_percpu_memcpy_buffer(); 1650 break; 1651 case 'i': 1652 printf_verbose("counter increment\n"); 1653 test_percpu_inc(); 1654 break; 1655 case 'r': 1656 printf_verbose("membarrier\n"); 1657 test_membarrier(); 1658 break; 1659 } 1660 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1661 abort(); 1662 end: 1663 return 0; 1664 1665 error: 1666 return -1; 1667 } 1668