1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <linux/membarrier.h> 5 #include <pthread.h> 6 #include <sched.h> 7 #include <stdatomic.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <syscall.h> 13 #include <unistd.h> 14 #include <poll.h> 15 #include <sys/types.h> 16 #include <signal.h> 17 #include <errno.h> 18 #include <stddef.h> 19 #include <stdbool.h> 20 21 static inline pid_t rseq_gettid(void) 22 { 23 return syscall(__NR_gettid); 24 } 25 26 #define NR_INJECT 9 27 static int loop_cnt[NR_INJECT + 1]; 28 29 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 30 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 31 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 32 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 33 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 34 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 35 36 static int opt_modulo, verbose; 37 38 static int opt_yield, opt_signal, opt_sleep, 39 opt_disable_rseq, opt_threads = 200, 40 opt_disable_mod = 0, opt_test = 's'; 41 42 static long long opt_reps = 5000; 43 44 static __thread __attribute__((tls_model("initial-exec"))) 45 unsigned int signals_delivered; 46 47 #ifndef BENCHMARK 48 49 static __thread __attribute__((tls_model("initial-exec"), unused)) 50 unsigned int yield_mod_cnt, nr_abort; 51 52 #define printf_verbose(fmt, ...) \ 53 do { \ 54 if (verbose) \ 55 printf(fmt, ## __VA_ARGS__); \ 56 } while (0) 57 58 #ifdef __i386__ 59 60 #define INJECT_ASM_REG "eax" 61 62 #define RSEQ_INJECT_CLOBBER \ 63 , INJECT_ASM_REG 64 65 #define RSEQ_INJECT_ASM(n) \ 66 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 67 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 68 "jz 333f\n\t" \ 69 "222:\n\t" \ 70 "dec %%" INJECT_ASM_REG "\n\t" \ 71 "jnz 222b\n\t" \ 72 "333:\n\t" 73 74 #elif defined(__x86_64__) 75 76 #define INJECT_ASM_REG_P "rax" 77 #define INJECT_ASM_REG "eax" 78 79 #define RSEQ_INJECT_CLOBBER \ 80 , INJECT_ASM_REG_P \ 81 , INJECT_ASM_REG 82 83 #define RSEQ_INJECT_ASM(n) \ 84 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ 85 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ 86 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 87 "jz 333f\n\t" \ 88 "222:\n\t" \ 89 "dec %%" INJECT_ASM_REG "\n\t" \ 90 "jnz 222b\n\t" \ 91 "333:\n\t" 92 93 #elif defined(__s390__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r12" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 111 "je 333f\n\t" \ 112 "222:\n\t" \ 113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 114 "jnz 222b\n\t" \ 115 "333:\n\t" 116 117 #elif defined(__ARMEL__) 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r4" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmp " INJECT_ASM_REG ", #0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subs " INJECT_ASM_REG ", #1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 141 #elif defined(__AARCH64EL__) 142 143 #define RSEQ_INJECT_INPUT \ 144 , [loop_cnt_1] "Qo" (loop_cnt[1]) \ 145 , [loop_cnt_2] "Qo" (loop_cnt[2]) \ 146 , [loop_cnt_3] "Qo" (loop_cnt[3]) \ 147 , [loop_cnt_4] "Qo" (loop_cnt[4]) \ 148 , [loop_cnt_5] "Qo" (loop_cnt[5]) \ 149 , [loop_cnt_6] "Qo" (loop_cnt[6]) 150 151 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 152 153 #define RSEQ_INJECT_ASM(n) \ 154 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ 155 " cbz " INJECT_ASM_REG ", 333f\n" \ 156 "222:\n" \ 157 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ 158 " cbnz " INJECT_ASM_REG ", 222b\n" \ 159 "333:\n" 160 161 #elif defined(__PPC__) 162 163 #define RSEQ_INJECT_INPUT \ 164 , [loop_cnt_1]"m"(loop_cnt[1]) \ 165 , [loop_cnt_2]"m"(loop_cnt[2]) \ 166 , [loop_cnt_3]"m"(loop_cnt[3]) \ 167 , [loop_cnt_4]"m"(loop_cnt[4]) \ 168 , [loop_cnt_5]"m"(loop_cnt[5]) \ 169 , [loop_cnt_6]"m"(loop_cnt[6]) 170 171 #define INJECT_ASM_REG "r18" 172 173 #define RSEQ_INJECT_CLOBBER \ 174 , INJECT_ASM_REG 175 176 #define RSEQ_INJECT_ASM(n) \ 177 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 178 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 179 "beq 333f\n\t" \ 180 "222:\n\t" \ 181 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 182 "bne 222b\n\t" \ 183 "333:\n\t" 184 185 #elif defined(__mips__) 186 187 #define RSEQ_INJECT_INPUT \ 188 , [loop_cnt_1]"m"(loop_cnt[1]) \ 189 , [loop_cnt_2]"m"(loop_cnt[2]) \ 190 , [loop_cnt_3]"m"(loop_cnt[3]) \ 191 , [loop_cnt_4]"m"(loop_cnt[4]) \ 192 , [loop_cnt_5]"m"(loop_cnt[5]) \ 193 , [loop_cnt_6]"m"(loop_cnt[6]) 194 195 #define INJECT_ASM_REG "$5" 196 197 #define RSEQ_INJECT_CLOBBER \ 198 , INJECT_ASM_REG 199 200 #define RSEQ_INJECT_ASM(n) \ 201 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 202 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 203 "222:\n\t" \ 204 "addiu " INJECT_ASM_REG ", -1\n\t" \ 205 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 206 "333:\n\t" 207 #elif defined(__riscv) 208 209 #define RSEQ_INJECT_INPUT \ 210 , [loop_cnt_1]"m"(loop_cnt[1]) \ 211 , [loop_cnt_2]"m"(loop_cnt[2]) \ 212 , [loop_cnt_3]"m"(loop_cnt[3]) \ 213 , [loop_cnt_4]"m"(loop_cnt[4]) \ 214 , [loop_cnt_5]"m"(loop_cnt[5]) \ 215 , [loop_cnt_6]"m"(loop_cnt[6]) 216 217 #define INJECT_ASM_REG "t1" 218 219 #define RSEQ_INJECT_CLOBBER \ 220 , INJECT_ASM_REG 221 222 #define RSEQ_INJECT_ASM(n) \ 223 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 224 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 225 "222:\n\t" \ 226 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 227 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 228 "333:\n\t" 229 #elif defined(__or1k__) 230 231 #define RSEQ_INJECT_INPUT \ 232 , [loop_cnt_1]"m"(loop_cnt[1]) \ 233 , [loop_cnt_2]"m"(loop_cnt[2]) \ 234 , [loop_cnt_3]"m"(loop_cnt[3]) \ 235 , [loop_cnt_4]"m"(loop_cnt[4]) \ 236 , [loop_cnt_5]"m"(loop_cnt[5]) \ 237 , [loop_cnt_6]"m"(loop_cnt[6]) 238 239 #define INJECT_ASM_REG "r31" 240 241 #define RSEQ_INJECT_CLOBBER \ 242 , INJECT_ASM_REG 243 244 #define RSEQ_INJECT_ASM(n) \ 245 "l.lwz " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 246 "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \ 247 "l.bf 333f\n\t" \ 248 " l.nop\n\t" \ 249 "222:\n\t" \ 250 "l.addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ 251 "l.sfeqi " INJECT_ASM_REG ", 0\n\t" \ 252 "l.bf 222f\n\t" \ 253 " l.nop\n\t" \ 254 "333:\n\t" 255 #else 256 #error unsupported target 257 #endif 258 259 #define RSEQ_INJECT_FAILED \ 260 nr_abort++; 261 262 #define RSEQ_INJECT_C(n) \ 263 { \ 264 int loc_i, loc_nr_loops = loop_cnt[n]; \ 265 \ 266 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 267 rseq_barrier(); \ 268 } \ 269 if (loc_nr_loops == -1 && opt_modulo) { \ 270 if (yield_mod_cnt == opt_modulo - 1) { \ 271 if (opt_sleep > 0) \ 272 poll(NULL, 0, opt_sleep); \ 273 if (opt_yield) \ 274 sched_yield(); \ 275 if (opt_signal) \ 276 raise(SIGUSR1); \ 277 yield_mod_cnt = 0; \ 278 } else { \ 279 yield_mod_cnt++; \ 280 } \ 281 } \ 282 } 283 284 #else 285 286 #define printf_verbose(fmt, ...) 287 288 #endif /* BENCHMARK */ 289 290 #include "rseq.h" 291 292 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; 293 294 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV 295 #define TEST_MEMBARRIER 296 297 static int sys_membarrier(int cmd, int flags, int cpu_id) 298 { 299 return syscall(__NR_membarrier, cmd, flags, cpu_id); 300 } 301 #endif 302 303 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID 304 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID 305 static 306 int get_current_cpu_id(void) 307 { 308 return rseq_current_mm_cid(); 309 } 310 static 311 bool rseq_validate_cpu_id(void) 312 { 313 return rseq_mm_cid_available(); 314 } 315 static 316 bool rseq_use_cpu_index(void) 317 { 318 return false; /* Use mm_cid */ 319 } 320 # ifdef TEST_MEMBARRIER 321 /* 322 * Membarrier does not currently support targeting a mm_cid, so 323 * issue the barrier on all cpus. 324 */ 325 static 326 int rseq_membarrier_expedited(int cpu) 327 { 328 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 329 0, 0); 330 } 331 # endif /* TEST_MEMBARRIER */ 332 #else 333 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID 334 static 335 int get_current_cpu_id(void) 336 { 337 return rseq_cpu_start(); 338 } 339 static 340 bool rseq_validate_cpu_id(void) 341 { 342 return rseq_current_cpu_raw() >= 0; 343 } 344 static 345 bool rseq_use_cpu_index(void) 346 { 347 return true; /* Use cpu_id as index. */ 348 } 349 # ifdef TEST_MEMBARRIER 350 static 351 int rseq_membarrier_expedited(int cpu) 352 { 353 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 354 MEMBARRIER_CMD_FLAG_CPU, cpu); 355 } 356 # endif /* TEST_MEMBARRIER */ 357 #endif 358 359 struct percpu_lock_entry { 360 intptr_t v; 361 } __attribute__((aligned(128))); 362 363 struct percpu_lock { 364 struct percpu_lock_entry c[CPU_SETSIZE]; 365 }; 366 367 struct test_data_entry { 368 intptr_t count; 369 } __attribute__((aligned(128))); 370 371 struct spinlock_test_data { 372 struct percpu_lock lock; 373 struct test_data_entry c[CPU_SETSIZE]; 374 }; 375 376 struct spinlock_thread_test_data { 377 struct spinlock_test_data *data; 378 long long reps; 379 int reg; 380 }; 381 382 struct inc_test_data { 383 struct test_data_entry c[CPU_SETSIZE]; 384 }; 385 386 struct inc_thread_test_data { 387 struct inc_test_data *data; 388 long long reps; 389 int reg; 390 }; 391 392 struct percpu_list_node { 393 intptr_t data; 394 struct percpu_list_node *next; 395 }; 396 397 struct percpu_list_entry { 398 struct percpu_list_node *head; 399 } __attribute__((aligned(128))); 400 401 struct percpu_list { 402 struct percpu_list_entry c[CPU_SETSIZE]; 403 }; 404 405 #define BUFFER_ITEM_PER_CPU 100 406 407 struct percpu_buffer_node { 408 intptr_t data; 409 }; 410 411 struct percpu_buffer_entry { 412 intptr_t offset; 413 intptr_t buflen; 414 struct percpu_buffer_node **array; 415 } __attribute__((aligned(128))); 416 417 struct percpu_buffer { 418 struct percpu_buffer_entry c[CPU_SETSIZE]; 419 }; 420 421 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 422 423 struct percpu_memcpy_buffer_node { 424 intptr_t data1; 425 uint64_t data2; 426 }; 427 428 struct percpu_memcpy_buffer_entry { 429 intptr_t offset; 430 intptr_t buflen; 431 struct percpu_memcpy_buffer_node *array; 432 } __attribute__((aligned(128))); 433 434 struct percpu_memcpy_buffer { 435 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 436 }; 437 438 /* A simple percpu spinlock. Grabs lock on current cpu. */ 439 static int rseq_this_cpu_lock(struct percpu_lock *lock) 440 { 441 int cpu; 442 443 for (;;) { 444 int ret; 445 446 cpu = get_current_cpu_id(); 447 if (cpu < 0) { 448 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n", 449 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); 450 abort(); 451 } 452 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 453 &lock->c[cpu].v, 454 0, 1, cpu); 455 if (rseq_likely(!ret)) 456 break; 457 /* Retry if comparison fails or rseq aborts. */ 458 } 459 /* 460 * Acquire semantic when taking lock after control dependency. 461 * Matches rseq_smp_store_release(). 462 */ 463 rseq_smp_acquire__after_ctrl_dep(); 464 return cpu; 465 } 466 467 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 468 { 469 assert(lock->c[cpu].v == 1); 470 /* 471 * Release lock, with release semantic. Matches 472 * rseq_smp_acquire__after_ctrl_dep(). 473 */ 474 rseq_smp_store_release(&lock->c[cpu].v, 0); 475 } 476 477 void *test_percpu_spinlock_thread(void *arg) 478 { 479 struct spinlock_thread_test_data *thread_data = arg; 480 struct spinlock_test_data *data = thread_data->data; 481 long long i, reps; 482 483 if (!opt_disable_rseq && thread_data->reg && 484 rseq_register_current_thread()) 485 abort(); 486 reps = thread_data->reps; 487 for (i = 0; i < reps; i++) { 488 int cpu = rseq_this_cpu_lock(&data->lock); 489 data->c[cpu].count++; 490 rseq_percpu_unlock(&data->lock, cpu); 491 #ifndef BENCHMARK 492 if (i != 0 && !(i % (reps / 10))) 493 printf_verbose("tid %d: count %lld\n", 494 (int) rseq_gettid(), i); 495 #endif 496 } 497 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 498 (int) rseq_gettid(), nr_abort, signals_delivered); 499 if (!opt_disable_rseq && thread_data->reg && 500 rseq_unregister_current_thread()) 501 abort(); 502 return NULL; 503 } 504 505 /* 506 * A simple test which implements a sharded counter using a per-cpu 507 * lock. Obviously real applications might prefer to simply use a 508 * per-cpu increment; however, this is reasonable for a test and the 509 * lock can be extended to synchronize more complicated operations. 510 */ 511 void test_percpu_spinlock(void) 512 { 513 const int num_threads = opt_threads; 514 int i, ret; 515 uint64_t sum; 516 pthread_t test_threads[num_threads]; 517 struct spinlock_test_data data; 518 struct spinlock_thread_test_data thread_data[num_threads]; 519 520 memset(&data, 0, sizeof(data)); 521 for (i = 0; i < num_threads; i++) { 522 thread_data[i].reps = opt_reps; 523 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 524 thread_data[i].reg = 1; 525 else 526 thread_data[i].reg = 0; 527 thread_data[i].data = &data; 528 ret = pthread_create(&test_threads[i], NULL, 529 test_percpu_spinlock_thread, 530 &thread_data[i]); 531 if (ret) { 532 errno = ret; 533 perror("pthread_create"); 534 abort(); 535 } 536 } 537 538 for (i = 0; i < num_threads; i++) { 539 ret = pthread_join(test_threads[i], NULL); 540 if (ret) { 541 errno = ret; 542 perror("pthread_join"); 543 abort(); 544 } 545 } 546 547 sum = 0; 548 for (i = 0; i < CPU_SETSIZE; i++) 549 sum += data.c[i].count; 550 551 assert(sum == (uint64_t)opt_reps * num_threads); 552 } 553 554 void *test_percpu_inc_thread(void *arg) 555 { 556 struct inc_thread_test_data *thread_data = arg; 557 struct inc_test_data *data = thread_data->data; 558 long long i, reps; 559 560 if (!opt_disable_rseq && thread_data->reg && 561 rseq_register_current_thread()) 562 abort(); 563 reps = thread_data->reps; 564 for (i = 0; i < reps; i++) { 565 int ret; 566 567 do { 568 int cpu; 569 570 cpu = get_current_cpu_id(); 571 ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 572 &data->c[cpu].count, 1, cpu); 573 } while (rseq_unlikely(ret)); 574 #ifndef BENCHMARK 575 if (i != 0 && !(i % (reps / 10))) 576 printf_verbose("tid %d: count %lld\n", 577 (int) rseq_gettid(), i); 578 #endif 579 } 580 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 581 (int) rseq_gettid(), nr_abort, signals_delivered); 582 if (!opt_disable_rseq && thread_data->reg && 583 rseq_unregister_current_thread()) 584 abort(); 585 return NULL; 586 } 587 588 void test_percpu_inc(void) 589 { 590 const int num_threads = opt_threads; 591 int i, ret; 592 uint64_t sum; 593 pthread_t test_threads[num_threads]; 594 struct inc_test_data data; 595 struct inc_thread_test_data thread_data[num_threads]; 596 597 memset(&data, 0, sizeof(data)); 598 for (i = 0; i < num_threads; i++) { 599 thread_data[i].reps = opt_reps; 600 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 601 thread_data[i].reg = 1; 602 else 603 thread_data[i].reg = 0; 604 thread_data[i].data = &data; 605 ret = pthread_create(&test_threads[i], NULL, 606 test_percpu_inc_thread, 607 &thread_data[i]); 608 if (ret) { 609 errno = ret; 610 perror("pthread_create"); 611 abort(); 612 } 613 } 614 615 for (i = 0; i < num_threads; i++) { 616 ret = pthread_join(test_threads[i], NULL); 617 if (ret) { 618 errno = ret; 619 perror("pthread_join"); 620 abort(); 621 } 622 } 623 624 sum = 0; 625 for (i = 0; i < CPU_SETSIZE; i++) 626 sum += data.c[i].count; 627 628 assert(sum == (uint64_t)opt_reps * num_threads); 629 } 630 631 void this_cpu_list_push(struct percpu_list *list, 632 struct percpu_list_node *node, 633 int *_cpu) 634 { 635 int cpu; 636 637 for (;;) { 638 intptr_t *targetptr, newval, expect; 639 int ret; 640 641 cpu = get_current_cpu_id(); 642 /* Load list->c[cpu].head with single-copy atomicity. */ 643 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 644 newval = (intptr_t)node; 645 targetptr = (intptr_t *)&list->c[cpu].head; 646 node->next = (struct percpu_list_node *)expect; 647 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 648 targetptr, expect, newval, cpu); 649 if (rseq_likely(!ret)) 650 break; 651 /* Retry if comparison fails or rseq aborts. */ 652 } 653 if (_cpu) 654 *_cpu = cpu; 655 } 656 657 /* 658 * Unlike a traditional lock-less linked list; the availability of a 659 * rseq primitive allows us to implement pop without concerns over 660 * ABA-type races. 661 */ 662 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 663 int *_cpu) 664 { 665 struct percpu_list_node *node = NULL; 666 int cpu; 667 668 for (;;) { 669 struct percpu_list_node *head; 670 intptr_t *targetptr, expectnot, *load; 671 long offset; 672 int ret; 673 674 cpu = get_current_cpu_id(); 675 targetptr = (intptr_t *)&list->c[cpu].head; 676 expectnot = (intptr_t)NULL; 677 offset = offsetof(struct percpu_list_node, next); 678 load = (intptr_t *)&head; 679 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, 680 targetptr, expectnot, 681 offset, load, cpu); 682 if (rseq_likely(!ret)) { 683 node = head; 684 break; 685 } 686 if (ret > 0) 687 break; 688 /* Retry if rseq aborts. */ 689 } 690 if (_cpu) 691 *_cpu = cpu; 692 return node; 693 } 694 695 /* 696 * __percpu_list_pop is not safe against concurrent accesses. Should 697 * only be used on lists that are not concurrently modified. 698 */ 699 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 700 { 701 struct percpu_list_node *node; 702 703 node = list->c[cpu].head; 704 if (!node) 705 return NULL; 706 list->c[cpu].head = node->next; 707 return node; 708 } 709 710 void *test_percpu_list_thread(void *arg) 711 { 712 long long i, reps; 713 struct percpu_list *list = (struct percpu_list *)arg; 714 715 if (!opt_disable_rseq && rseq_register_current_thread()) 716 abort(); 717 718 reps = opt_reps; 719 for (i = 0; i < reps; i++) { 720 struct percpu_list_node *node; 721 722 node = this_cpu_list_pop(list, NULL); 723 if (opt_yield) 724 sched_yield(); /* encourage shuffling */ 725 if (node) 726 this_cpu_list_push(list, node, NULL); 727 } 728 729 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 730 (int) rseq_gettid(), nr_abort, signals_delivered); 731 if (!opt_disable_rseq && rseq_unregister_current_thread()) 732 abort(); 733 734 return NULL; 735 } 736 737 /* Simultaneous modification to a per-cpu linked list from many threads. */ 738 void test_percpu_list(void) 739 { 740 const int num_threads = opt_threads; 741 int i, j, ret; 742 uint64_t sum = 0, expected_sum = 0; 743 struct percpu_list list; 744 pthread_t test_threads[num_threads]; 745 cpu_set_t allowed_cpus; 746 747 memset(&list, 0, sizeof(list)); 748 749 /* Generate list entries for every usable cpu. */ 750 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 751 for (i = 0; i < CPU_SETSIZE; i++) { 752 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 753 continue; 754 for (j = 1; j <= 100; j++) { 755 struct percpu_list_node *node; 756 757 expected_sum += j; 758 759 node = malloc(sizeof(*node)); 760 assert(node); 761 node->data = j; 762 node->next = list.c[i].head; 763 list.c[i].head = node; 764 } 765 } 766 767 for (i = 0; i < num_threads; i++) { 768 ret = pthread_create(&test_threads[i], NULL, 769 test_percpu_list_thread, &list); 770 if (ret) { 771 errno = ret; 772 perror("pthread_create"); 773 abort(); 774 } 775 } 776 777 for (i = 0; i < num_threads; i++) { 778 ret = pthread_join(test_threads[i], NULL); 779 if (ret) { 780 errno = ret; 781 perror("pthread_join"); 782 abort(); 783 } 784 } 785 786 for (i = 0; i < CPU_SETSIZE; i++) { 787 struct percpu_list_node *node; 788 789 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 790 continue; 791 792 while ((node = __percpu_list_pop(&list, i))) { 793 sum += node->data; 794 free(node); 795 } 796 } 797 798 /* 799 * All entries should now be accounted for (unless some external 800 * actor is interfering with our allowed affinity while this 801 * test is running). 802 */ 803 assert(sum == expected_sum); 804 } 805 806 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 807 struct percpu_buffer_node *node, 808 int *_cpu) 809 { 810 bool result = false; 811 int cpu; 812 813 for (;;) { 814 intptr_t *targetptr_spec, newval_spec; 815 intptr_t *targetptr_final, newval_final; 816 intptr_t offset; 817 int ret; 818 819 cpu = get_current_cpu_id(); 820 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 821 if (offset == buffer->c[cpu].buflen) 822 break; 823 newval_spec = (intptr_t)node; 824 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 825 newval_final = offset + 1; 826 targetptr_final = &buffer->c[cpu].offset; 827 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU, 828 targetptr_final, offset, targetptr_spec, 829 newval_spec, newval_final, cpu); 830 if (rseq_likely(!ret)) { 831 result = true; 832 break; 833 } 834 /* Retry if comparison fails or rseq aborts. */ 835 } 836 if (_cpu) 837 *_cpu = cpu; 838 return result; 839 } 840 841 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 842 int *_cpu) 843 { 844 struct percpu_buffer_node *head; 845 int cpu; 846 847 for (;;) { 848 intptr_t *targetptr, newval; 849 intptr_t offset; 850 int ret; 851 852 cpu = get_current_cpu_id(); 853 /* Load offset with single-copy atomicity. */ 854 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 855 if (offset == 0) { 856 head = NULL; 857 break; 858 } 859 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 860 newval = offset - 1; 861 targetptr = (intptr_t *)&buffer->c[cpu].offset; 862 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 863 targetptr, offset, 864 (intptr_t *)&buffer->c[cpu].array[offset - 1], 865 (intptr_t)head, newval, cpu); 866 if (rseq_likely(!ret)) 867 break; 868 /* Retry if comparison fails or rseq aborts. */ 869 } 870 if (_cpu) 871 *_cpu = cpu; 872 return head; 873 } 874 875 /* 876 * __percpu_buffer_pop is not safe against concurrent accesses. Should 877 * only be used on buffers that are not concurrently modified. 878 */ 879 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 880 int cpu) 881 { 882 struct percpu_buffer_node *head; 883 intptr_t offset; 884 885 offset = buffer->c[cpu].offset; 886 if (offset == 0) 887 return NULL; 888 head = buffer->c[cpu].array[offset - 1]; 889 buffer->c[cpu].offset = offset - 1; 890 return head; 891 } 892 893 void *test_percpu_buffer_thread(void *arg) 894 { 895 long long i, reps; 896 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 897 898 if (!opt_disable_rseq && rseq_register_current_thread()) 899 abort(); 900 901 reps = opt_reps; 902 for (i = 0; i < reps; i++) { 903 struct percpu_buffer_node *node; 904 905 node = this_cpu_buffer_pop(buffer, NULL); 906 if (opt_yield) 907 sched_yield(); /* encourage shuffling */ 908 if (node) { 909 if (!this_cpu_buffer_push(buffer, node, NULL)) { 910 /* Should increase buffer size. */ 911 abort(); 912 } 913 } 914 } 915 916 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 917 (int) rseq_gettid(), nr_abort, signals_delivered); 918 if (!opt_disable_rseq && rseq_unregister_current_thread()) 919 abort(); 920 921 return NULL; 922 } 923 924 /* Simultaneous modification to a per-cpu buffer from many threads. */ 925 void test_percpu_buffer(void) 926 { 927 const int num_threads = opt_threads; 928 int i, j, ret; 929 uint64_t sum = 0, expected_sum = 0; 930 struct percpu_buffer buffer; 931 pthread_t test_threads[num_threads]; 932 cpu_set_t allowed_cpus; 933 934 memset(&buffer, 0, sizeof(buffer)); 935 936 /* Generate list entries for every usable cpu. */ 937 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 938 for (i = 0; i < CPU_SETSIZE; i++) { 939 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 940 continue; 941 /* Worse-case is every item in same CPU. */ 942 buffer.c[i].array = 943 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 944 BUFFER_ITEM_PER_CPU); 945 assert(buffer.c[i].array); 946 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 947 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 948 struct percpu_buffer_node *node; 949 950 expected_sum += j; 951 952 /* 953 * We could theoretically put the word-sized 954 * "data" directly in the buffer. However, we 955 * want to model objects that would not fit 956 * within a single word, so allocate an object 957 * for each node. 958 */ 959 node = malloc(sizeof(*node)); 960 assert(node); 961 node->data = j; 962 buffer.c[i].array[j - 1] = node; 963 buffer.c[i].offset++; 964 } 965 } 966 967 for (i = 0; i < num_threads; i++) { 968 ret = pthread_create(&test_threads[i], NULL, 969 test_percpu_buffer_thread, &buffer); 970 if (ret) { 971 errno = ret; 972 perror("pthread_create"); 973 abort(); 974 } 975 } 976 977 for (i = 0; i < num_threads; i++) { 978 ret = pthread_join(test_threads[i], NULL); 979 if (ret) { 980 errno = ret; 981 perror("pthread_join"); 982 abort(); 983 } 984 } 985 986 for (i = 0; i < CPU_SETSIZE; i++) { 987 struct percpu_buffer_node *node; 988 989 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 990 continue; 991 992 while ((node = __percpu_buffer_pop(&buffer, i))) { 993 sum += node->data; 994 free(node); 995 } 996 free(buffer.c[i].array); 997 } 998 999 /* 1000 * All entries should now be accounted for (unless some external 1001 * actor is interfering with our allowed affinity while this 1002 * test is running). 1003 */ 1004 assert(sum == expected_sum); 1005 } 1006 1007 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 1008 struct percpu_memcpy_buffer_node item, 1009 int *_cpu) 1010 { 1011 bool result = false; 1012 int cpu; 1013 1014 for (;;) { 1015 intptr_t *targetptr_final, newval_final, offset; 1016 char *destptr, *srcptr; 1017 size_t copylen; 1018 int ret; 1019 1020 cpu = get_current_cpu_id(); 1021 /* Load offset with single-copy atomicity. */ 1022 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 1023 if (offset == buffer->c[cpu].buflen) 1024 break; 1025 destptr = (char *)&buffer->c[cpu].array[offset]; 1026 srcptr = (char *)&item; 1027 /* copylen must be <= 4kB. */ 1028 copylen = sizeof(item); 1029 newval_final = offset + 1; 1030 targetptr_final = &buffer->c[cpu].offset; 1031 ret = rseq_cmpeqv_trymemcpy_storev( 1032 opt_mo, RSEQ_PERCPU, 1033 targetptr_final, offset, 1034 destptr, srcptr, copylen, 1035 newval_final, cpu); 1036 if (rseq_likely(!ret)) { 1037 result = true; 1038 break; 1039 } 1040 /* Retry if comparison fails or rseq aborts. */ 1041 } 1042 if (_cpu) 1043 *_cpu = cpu; 1044 return result; 1045 } 1046 1047 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1048 struct percpu_memcpy_buffer_node *item, 1049 int *_cpu) 1050 { 1051 bool result = false; 1052 int cpu; 1053 1054 for (;;) { 1055 intptr_t *targetptr_final, newval_final, offset; 1056 char *destptr, *srcptr; 1057 size_t copylen; 1058 int ret; 1059 1060 cpu = get_current_cpu_id(); 1061 /* Load offset with single-copy atomicity. */ 1062 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 1063 if (offset == 0) 1064 break; 1065 destptr = (char *)item; 1066 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 1067 /* copylen must be <= 4kB. */ 1068 copylen = sizeof(*item); 1069 newval_final = offset - 1; 1070 targetptr_final = &buffer->c[cpu].offset; 1071 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1072 targetptr_final, offset, destptr, srcptr, copylen, 1073 newval_final, cpu); 1074 if (rseq_likely(!ret)) { 1075 result = true; 1076 break; 1077 } 1078 /* Retry if comparison fails or rseq aborts. */ 1079 } 1080 if (_cpu) 1081 *_cpu = cpu; 1082 return result; 1083 } 1084 1085 /* 1086 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 1087 * only be used on buffers that are not concurrently modified. 1088 */ 1089 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 1090 struct percpu_memcpy_buffer_node *item, 1091 int cpu) 1092 { 1093 intptr_t offset; 1094 1095 offset = buffer->c[cpu].offset; 1096 if (offset == 0) 1097 return false; 1098 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 1099 buffer->c[cpu].offset = offset - 1; 1100 return true; 1101 } 1102 1103 void *test_percpu_memcpy_buffer_thread(void *arg) 1104 { 1105 long long i, reps; 1106 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 1107 1108 if (!opt_disable_rseq && rseq_register_current_thread()) 1109 abort(); 1110 1111 reps = opt_reps; 1112 for (i = 0; i < reps; i++) { 1113 struct percpu_memcpy_buffer_node item; 1114 bool result; 1115 1116 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 1117 if (opt_yield) 1118 sched_yield(); /* encourage shuffling */ 1119 if (result) { 1120 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 1121 /* Should increase buffer size. */ 1122 abort(); 1123 } 1124 } 1125 } 1126 1127 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 1128 (int) rseq_gettid(), nr_abort, signals_delivered); 1129 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1130 abort(); 1131 1132 return NULL; 1133 } 1134 1135 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1136 void test_percpu_memcpy_buffer(void) 1137 { 1138 const int num_threads = opt_threads; 1139 int i, j, ret; 1140 uint64_t sum = 0, expected_sum = 0; 1141 struct percpu_memcpy_buffer buffer; 1142 pthread_t test_threads[num_threads]; 1143 cpu_set_t allowed_cpus; 1144 1145 memset(&buffer, 0, sizeof(buffer)); 1146 1147 /* Generate list entries for every usable cpu. */ 1148 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1149 for (i = 0; i < CPU_SETSIZE; i++) { 1150 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 1151 continue; 1152 /* Worse-case is every item in same CPU. */ 1153 buffer.c[i].array = 1154 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1155 MEMCPY_BUFFER_ITEM_PER_CPU); 1156 assert(buffer.c[i].array); 1157 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1158 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1159 expected_sum += 2 * j + 1; 1160 1161 /* 1162 * We could theoretically put the word-sized 1163 * "data" directly in the buffer. However, we 1164 * want to model objects that would not fit 1165 * within a single word, so allocate an object 1166 * for each node. 1167 */ 1168 buffer.c[i].array[j - 1].data1 = j; 1169 buffer.c[i].array[j - 1].data2 = j + 1; 1170 buffer.c[i].offset++; 1171 } 1172 } 1173 1174 for (i = 0; i < num_threads; i++) { 1175 ret = pthread_create(&test_threads[i], NULL, 1176 test_percpu_memcpy_buffer_thread, 1177 &buffer); 1178 if (ret) { 1179 errno = ret; 1180 perror("pthread_create"); 1181 abort(); 1182 } 1183 } 1184 1185 for (i = 0; i < num_threads; i++) { 1186 ret = pthread_join(test_threads[i], NULL); 1187 if (ret) { 1188 errno = ret; 1189 perror("pthread_join"); 1190 abort(); 1191 } 1192 } 1193 1194 for (i = 0; i < CPU_SETSIZE; i++) { 1195 struct percpu_memcpy_buffer_node item; 1196 1197 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) 1198 continue; 1199 1200 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1201 sum += item.data1; 1202 sum += item.data2; 1203 } 1204 free(buffer.c[i].array); 1205 } 1206 1207 /* 1208 * All entries should now be accounted for (unless some external 1209 * actor is interfering with our allowed affinity while this 1210 * test is running). 1211 */ 1212 assert(sum == expected_sum); 1213 } 1214 1215 static void test_signal_interrupt_handler(int signo) 1216 { 1217 signals_delivered++; 1218 } 1219 1220 static int set_signal_handler(void) 1221 { 1222 int ret = 0; 1223 struct sigaction sa; 1224 sigset_t sigset; 1225 1226 ret = sigemptyset(&sigset); 1227 if (ret < 0) { 1228 perror("sigemptyset"); 1229 return ret; 1230 } 1231 1232 sa.sa_handler = test_signal_interrupt_handler; 1233 sa.sa_mask = sigset; 1234 sa.sa_flags = 0; 1235 ret = sigaction(SIGUSR1, &sa, NULL); 1236 if (ret < 0) { 1237 perror("sigaction"); 1238 return ret; 1239 } 1240 1241 printf_verbose("Signal handler set for SIGUSR1\n"); 1242 1243 return ret; 1244 } 1245 1246 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ 1247 #ifdef TEST_MEMBARRIER 1248 struct test_membarrier_thread_args { 1249 int stop; 1250 intptr_t percpu_list_ptr; 1251 }; 1252 1253 /* Worker threads modify data in their "active" percpu lists. */ 1254 void *test_membarrier_worker_thread(void *arg) 1255 { 1256 struct test_membarrier_thread_args *args = 1257 (struct test_membarrier_thread_args *)arg; 1258 const int iters = opt_reps; 1259 int i; 1260 1261 if (rseq_register_current_thread()) { 1262 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1263 errno, strerror(errno)); 1264 abort(); 1265 } 1266 1267 /* Wait for initialization. */ 1268 while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {} 1269 1270 for (i = 0; i < iters; ++i) { 1271 int ret; 1272 1273 do { 1274 int cpu = get_current_cpu_id(); 1275 1276 ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU, 1277 &args->percpu_list_ptr, 1278 sizeof(struct percpu_list_entry) * cpu, 1, cpu); 1279 } while (rseq_unlikely(ret)); 1280 } 1281 1282 if (rseq_unregister_current_thread()) { 1283 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1284 errno, strerror(errno)); 1285 abort(); 1286 } 1287 return NULL; 1288 } 1289 1290 void test_membarrier_init_percpu_list(struct percpu_list *list) 1291 { 1292 int i; 1293 1294 memset(list, 0, sizeof(*list)); 1295 for (i = 0; i < CPU_SETSIZE; i++) { 1296 struct percpu_list_node *node; 1297 1298 node = malloc(sizeof(*node)); 1299 assert(node); 1300 node->data = 0; 1301 node->next = NULL; 1302 list->c[i].head = node; 1303 } 1304 } 1305 1306 void test_membarrier_free_percpu_list(struct percpu_list *list) 1307 { 1308 int i; 1309 1310 for (i = 0; i < CPU_SETSIZE; i++) 1311 free(list->c[i].head); 1312 } 1313 1314 /* 1315 * The manager thread swaps per-cpu lists that worker threads see, 1316 * and validates that there are no unexpected modifications. 1317 */ 1318 void *test_membarrier_manager_thread(void *arg) 1319 { 1320 struct test_membarrier_thread_args *args = 1321 (struct test_membarrier_thread_args *)arg; 1322 struct percpu_list list_a, list_b; 1323 intptr_t expect_a = 0, expect_b = 0; 1324 int cpu_a = 0, cpu_b = 0; 1325 1326 if (rseq_register_current_thread()) { 1327 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", 1328 errno, strerror(errno)); 1329 abort(); 1330 } 1331 1332 /* Init lists. */ 1333 test_membarrier_init_percpu_list(&list_a); 1334 test_membarrier_init_percpu_list(&list_b); 1335 1336 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); 1337 1338 while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) { 1339 /* list_a is "active". */ 1340 cpu_a = rand() % CPU_SETSIZE; 1341 /* 1342 * As list_b is "inactive", we should never see changes 1343 * to list_b. 1344 */ 1345 if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) { 1346 fprintf(stderr, "Membarrier test failed\n"); 1347 abort(); 1348 } 1349 1350 /* Make list_b "active". */ 1351 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE); 1352 if (rseq_membarrier_expedited(cpu_a) && 1353 errno != ENXIO /* missing CPU */) { 1354 perror("sys_membarrier"); 1355 abort(); 1356 } 1357 /* 1358 * Cpu A should now only modify list_b, so the values 1359 * in list_a should be stable. 1360 */ 1361 expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE); 1362 1363 cpu_b = rand() % CPU_SETSIZE; 1364 /* 1365 * As list_a is "inactive", we should never see changes 1366 * to list_a. 1367 */ 1368 if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) { 1369 fprintf(stderr, "Membarrier test failed\n"); 1370 abort(); 1371 } 1372 1373 /* Make list_a "active". */ 1374 __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); 1375 if (rseq_membarrier_expedited(cpu_b) && 1376 errno != ENXIO /* missing CPU*/) { 1377 perror("sys_membarrier"); 1378 abort(); 1379 } 1380 /* Remember a value from list_b. */ 1381 expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE); 1382 } 1383 1384 test_membarrier_free_percpu_list(&list_a); 1385 test_membarrier_free_percpu_list(&list_b); 1386 1387 if (rseq_unregister_current_thread()) { 1388 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", 1389 errno, strerror(errno)); 1390 abort(); 1391 } 1392 return NULL; 1393 } 1394 1395 void test_membarrier(void) 1396 { 1397 const int num_threads = opt_threads; 1398 struct test_membarrier_thread_args thread_args; 1399 pthread_t worker_threads[num_threads]; 1400 pthread_t manager_thread; 1401 int i, ret; 1402 1403 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { 1404 perror("sys_membarrier"); 1405 abort(); 1406 } 1407 1408 thread_args.stop = 0; 1409 thread_args.percpu_list_ptr = 0; 1410 ret = pthread_create(&manager_thread, NULL, 1411 test_membarrier_manager_thread, &thread_args); 1412 if (ret) { 1413 errno = ret; 1414 perror("pthread_create"); 1415 abort(); 1416 } 1417 1418 for (i = 0; i < num_threads; i++) { 1419 ret = pthread_create(&worker_threads[i], NULL, 1420 test_membarrier_worker_thread, &thread_args); 1421 if (ret) { 1422 errno = ret; 1423 perror("pthread_create"); 1424 abort(); 1425 } 1426 } 1427 1428 1429 for (i = 0; i < num_threads; i++) { 1430 ret = pthread_join(worker_threads[i], NULL); 1431 if (ret) { 1432 errno = ret; 1433 perror("pthread_join"); 1434 abort(); 1435 } 1436 } 1437 1438 __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE); 1439 ret = pthread_join(manager_thread, NULL); 1440 if (ret) { 1441 errno = ret; 1442 perror("pthread_join"); 1443 abort(); 1444 } 1445 } 1446 #else /* TEST_MEMBARRIER */ 1447 void test_membarrier(void) 1448 { 1449 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " 1450 "Skipping membarrier test.\n"); 1451 } 1452 #endif 1453 1454 static void show_usage(int argc, char **argv) 1455 { 1456 printf("Usage : %s <OPTIONS>\n", 1457 argv[0]); 1458 printf("OPTIONS:\n"); 1459 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1460 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1461 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1462 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1463 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1464 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1465 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1466 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1467 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1468 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1469 printf(" [-y] Yield\n"); 1470 printf(" [-k] Kill thread with signal\n"); 1471 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1472 printf(" [-t N] Number of threads (default 200)\n"); 1473 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1474 printf(" [-d] Disable rseq system call (no initialization)\n"); 1475 printf(" [-D M] Disable rseq for each M threads\n"); 1476 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); 1477 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1478 printf(" [-v] Verbose output.\n"); 1479 printf(" [-h] Show this help.\n"); 1480 printf("\n"); 1481 } 1482 1483 int main(int argc, char **argv) 1484 { 1485 int i; 1486 1487 for (i = 1; i < argc; i++) { 1488 if (argv[i][0] != '-') 1489 continue; 1490 switch (argv[i][1]) { 1491 case '1': 1492 case '2': 1493 case '3': 1494 case '4': 1495 case '5': 1496 case '6': 1497 case '7': 1498 case '8': 1499 case '9': 1500 if (argc < i + 2) { 1501 show_usage(argc, argv); 1502 goto error; 1503 } 1504 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1505 i++; 1506 break; 1507 case 'm': 1508 if (argc < i + 2) { 1509 show_usage(argc, argv); 1510 goto error; 1511 } 1512 opt_modulo = atol(argv[i + 1]); 1513 if (opt_modulo < 0) { 1514 show_usage(argc, argv); 1515 goto error; 1516 } 1517 i++; 1518 break; 1519 case 's': 1520 if (argc < i + 2) { 1521 show_usage(argc, argv); 1522 goto error; 1523 } 1524 opt_sleep = atol(argv[i + 1]); 1525 if (opt_sleep < 0) { 1526 show_usage(argc, argv); 1527 goto error; 1528 } 1529 i++; 1530 break; 1531 case 'y': 1532 opt_yield = 1; 1533 break; 1534 case 'k': 1535 opt_signal = 1; 1536 break; 1537 case 'd': 1538 opt_disable_rseq = 1; 1539 break; 1540 case 'D': 1541 if (argc < i + 2) { 1542 show_usage(argc, argv); 1543 goto error; 1544 } 1545 opt_disable_mod = atol(argv[i + 1]); 1546 if (opt_disable_mod < 0) { 1547 show_usage(argc, argv); 1548 goto error; 1549 } 1550 i++; 1551 break; 1552 case 't': 1553 if (argc < i + 2) { 1554 show_usage(argc, argv); 1555 goto error; 1556 } 1557 opt_threads = atol(argv[i + 1]); 1558 if (opt_threads < 0) { 1559 show_usage(argc, argv); 1560 goto error; 1561 } 1562 i++; 1563 break; 1564 case 'r': 1565 if (argc < i + 2) { 1566 show_usage(argc, argv); 1567 goto error; 1568 } 1569 opt_reps = atoll(argv[i + 1]); 1570 if (opt_reps < 0) { 1571 show_usage(argc, argv); 1572 goto error; 1573 } 1574 i++; 1575 break; 1576 case 'h': 1577 show_usage(argc, argv); 1578 goto end; 1579 case 'T': 1580 if (argc < i + 2) { 1581 show_usage(argc, argv); 1582 goto error; 1583 } 1584 opt_test = *argv[i + 1]; 1585 switch (opt_test) { 1586 case 's': 1587 case 'l': 1588 case 'i': 1589 case 'b': 1590 case 'm': 1591 case 'r': 1592 break; 1593 default: 1594 show_usage(argc, argv); 1595 goto error; 1596 } 1597 i++; 1598 break; 1599 case 'v': 1600 verbose = 1; 1601 break; 1602 case 'M': 1603 opt_mo = RSEQ_MO_RELEASE; 1604 break; 1605 default: 1606 show_usage(argc, argv); 1607 goto error; 1608 } 1609 } 1610 1611 loop_cnt_1 = loop_cnt[1]; 1612 loop_cnt_2 = loop_cnt[2]; 1613 loop_cnt_3 = loop_cnt[3]; 1614 loop_cnt_4 = loop_cnt[4]; 1615 loop_cnt_5 = loop_cnt[5]; 1616 loop_cnt_6 = loop_cnt[6]; 1617 1618 if (set_signal_handler()) 1619 goto error; 1620 1621 if (!opt_disable_rseq && rseq_register_current_thread()) 1622 goto error; 1623 if (!opt_disable_rseq && !rseq_validate_cpu_id()) { 1624 fprintf(stderr, "Error: cpu id getter unavailable\n"); 1625 goto error; 1626 } 1627 switch (opt_test) { 1628 case 's': 1629 printf_verbose("spinlock\n"); 1630 test_percpu_spinlock(); 1631 break; 1632 case 'l': 1633 printf_verbose("linked list\n"); 1634 test_percpu_list(); 1635 break; 1636 case 'b': 1637 printf_verbose("buffer\n"); 1638 test_percpu_buffer(); 1639 break; 1640 case 'm': 1641 printf_verbose("memcpy buffer\n"); 1642 test_percpu_memcpy_buffer(); 1643 break; 1644 case 'i': 1645 printf_verbose("counter increment\n"); 1646 test_percpu_inc(); 1647 break; 1648 case 'r': 1649 printf_verbose("membarrier\n"); 1650 test_membarrier(); 1651 break; 1652 } 1653 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1654 abort(); 1655 end: 1656 return 0; 1657 1658 error: 1659 return -1; 1660 } 1661