1 // SPDX-License-Identifier: LGPL-2.1 2 #define _GNU_SOURCE 3 #include <assert.h> 4 #include <pthread.h> 5 #include <sched.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <syscall.h> 11 #include <unistd.h> 12 #include <poll.h> 13 #include <sys/types.h> 14 #include <signal.h> 15 #include <errno.h> 16 #include <stddef.h> 17 18 static inline pid_t gettid(void) 19 { 20 return syscall(__NR_gettid); 21 } 22 23 #define NR_INJECT 9 24 static int loop_cnt[NR_INJECT + 1]; 25 26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used)); 27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used)); 28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used)); 29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used)); 30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used)); 31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used)); 32 33 static int opt_modulo, verbose; 34 35 static int opt_yield, opt_signal, opt_sleep, 36 opt_disable_rseq, opt_threads = 200, 37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0; 38 39 #ifndef RSEQ_SKIP_FASTPATH 40 static long long opt_reps = 5000; 41 #else 42 static long long opt_reps = 100; 43 #endif 44 45 static __thread __attribute__((tls_model("initial-exec"))) 46 unsigned int signals_delivered; 47 48 #ifndef BENCHMARK 49 50 static __thread __attribute__((tls_model("initial-exec"), unused)) 51 unsigned int yield_mod_cnt, nr_abort; 52 53 #define printf_verbose(fmt, ...) \ 54 do { \ 55 if (verbose) \ 56 printf(fmt, ## __VA_ARGS__); \ 57 } while (0) 58 59 #if defined(__x86_64__) || defined(__i386__) 60 61 #define INJECT_ASM_REG "eax" 62 63 #define RSEQ_INJECT_CLOBBER \ 64 , INJECT_ASM_REG 65 66 #ifdef __i386__ 67 68 #define RSEQ_INJECT_ASM(n) \ 69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ 70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 71 "jz 333f\n\t" \ 72 "222:\n\t" \ 73 "dec %%" INJECT_ASM_REG "\n\t" \ 74 "jnz 222b\n\t" \ 75 "333:\n\t" 76 77 #elif defined(__x86_64__) 78 79 #define RSEQ_INJECT_ASM(n) \ 80 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \ 81 "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ 82 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ 83 "jz 333f\n\t" \ 84 "222:\n\t" \ 85 "dec %%" INJECT_ASM_REG "\n\t" \ 86 "jnz 222b\n\t" \ 87 "333:\n\t" 88 89 #else 90 #error "Unsupported architecture" 91 #endif 92 93 #elif defined(__s390__) 94 95 #define RSEQ_INJECT_INPUT \ 96 , [loop_cnt_1]"m"(loop_cnt[1]) \ 97 , [loop_cnt_2]"m"(loop_cnt[2]) \ 98 , [loop_cnt_3]"m"(loop_cnt[3]) \ 99 , [loop_cnt_4]"m"(loop_cnt[4]) \ 100 , [loop_cnt_5]"m"(loop_cnt[5]) \ 101 , [loop_cnt_6]"m"(loop_cnt[6]) 102 103 #define INJECT_ASM_REG "r12" 104 105 #define RSEQ_INJECT_CLOBBER \ 106 , INJECT_ASM_REG 107 108 #define RSEQ_INJECT_ASM(n) \ 109 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 110 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ 111 "je 333f\n\t" \ 112 "222:\n\t" \ 113 "ahi %%" INJECT_ASM_REG ", -1\n\t" \ 114 "jnz 222b\n\t" \ 115 "333:\n\t" 116 117 #elif defined(__ARMEL__) 118 119 #define RSEQ_INJECT_INPUT \ 120 , [loop_cnt_1]"m"(loop_cnt[1]) \ 121 , [loop_cnt_2]"m"(loop_cnt[2]) \ 122 , [loop_cnt_3]"m"(loop_cnt[3]) \ 123 , [loop_cnt_4]"m"(loop_cnt[4]) \ 124 , [loop_cnt_5]"m"(loop_cnt[5]) \ 125 , [loop_cnt_6]"m"(loop_cnt[6]) 126 127 #define INJECT_ASM_REG "r4" 128 129 #define RSEQ_INJECT_CLOBBER \ 130 , INJECT_ASM_REG 131 132 #define RSEQ_INJECT_ASM(n) \ 133 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 134 "cmp " INJECT_ASM_REG ", #0\n\t" \ 135 "beq 333f\n\t" \ 136 "222:\n\t" \ 137 "subs " INJECT_ASM_REG ", #1\n\t" \ 138 "bne 222b\n\t" \ 139 "333:\n\t" 140 141 #elif __PPC__ 142 143 #define RSEQ_INJECT_INPUT \ 144 , [loop_cnt_1]"m"(loop_cnt[1]) \ 145 , [loop_cnt_2]"m"(loop_cnt[2]) \ 146 , [loop_cnt_3]"m"(loop_cnt[3]) \ 147 , [loop_cnt_4]"m"(loop_cnt[4]) \ 148 , [loop_cnt_5]"m"(loop_cnt[5]) \ 149 , [loop_cnt_6]"m"(loop_cnt[6]) 150 151 #define INJECT_ASM_REG "r18" 152 153 #define RSEQ_INJECT_CLOBBER \ 154 , INJECT_ASM_REG 155 156 #define RSEQ_INJECT_ASM(n) \ 157 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 158 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \ 159 "beq 333f\n\t" \ 160 "222:\n\t" \ 161 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ 162 "bne 222b\n\t" \ 163 "333:\n\t" 164 165 #elif defined(__mips__) 166 167 #define RSEQ_INJECT_INPUT \ 168 , [loop_cnt_1]"m"(loop_cnt[1]) \ 169 , [loop_cnt_2]"m"(loop_cnt[2]) \ 170 , [loop_cnt_3]"m"(loop_cnt[3]) \ 171 , [loop_cnt_4]"m"(loop_cnt[4]) \ 172 , [loop_cnt_5]"m"(loop_cnt[5]) \ 173 , [loop_cnt_6]"m"(loop_cnt[6]) 174 175 #define INJECT_ASM_REG "$5" 176 177 #define RSEQ_INJECT_CLOBBER \ 178 , INJECT_ASM_REG 179 180 #define RSEQ_INJECT_ASM(n) \ 181 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ 182 "beqz " INJECT_ASM_REG ", 333f\n\t" \ 183 "222:\n\t" \ 184 "addiu " INJECT_ASM_REG ", -1\n\t" \ 185 "bnez " INJECT_ASM_REG ", 222b\n\t" \ 186 "333:\n\t" 187 188 #else 189 #error unsupported target 190 #endif 191 192 #define RSEQ_INJECT_FAILED \ 193 nr_abort++; 194 195 #define RSEQ_INJECT_C(n) \ 196 { \ 197 int loc_i, loc_nr_loops = loop_cnt[n]; \ 198 \ 199 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \ 200 rseq_barrier(); \ 201 } \ 202 if (loc_nr_loops == -1 && opt_modulo) { \ 203 if (yield_mod_cnt == opt_modulo - 1) { \ 204 if (opt_sleep > 0) \ 205 poll(NULL, 0, opt_sleep); \ 206 if (opt_yield) \ 207 sched_yield(); \ 208 if (opt_signal) \ 209 raise(SIGUSR1); \ 210 yield_mod_cnt = 0; \ 211 } else { \ 212 yield_mod_cnt++; \ 213 } \ 214 } \ 215 } 216 217 #else 218 219 #define printf_verbose(fmt, ...) 220 221 #endif /* BENCHMARK */ 222 223 #include "rseq.h" 224 225 struct percpu_lock_entry { 226 intptr_t v; 227 } __attribute__((aligned(128))); 228 229 struct percpu_lock { 230 struct percpu_lock_entry c[CPU_SETSIZE]; 231 }; 232 233 struct test_data_entry { 234 intptr_t count; 235 } __attribute__((aligned(128))); 236 237 struct spinlock_test_data { 238 struct percpu_lock lock; 239 struct test_data_entry c[CPU_SETSIZE]; 240 }; 241 242 struct spinlock_thread_test_data { 243 struct spinlock_test_data *data; 244 long long reps; 245 int reg; 246 }; 247 248 struct inc_test_data { 249 struct test_data_entry c[CPU_SETSIZE]; 250 }; 251 252 struct inc_thread_test_data { 253 struct inc_test_data *data; 254 long long reps; 255 int reg; 256 }; 257 258 struct percpu_list_node { 259 intptr_t data; 260 struct percpu_list_node *next; 261 }; 262 263 struct percpu_list_entry { 264 struct percpu_list_node *head; 265 } __attribute__((aligned(128))); 266 267 struct percpu_list { 268 struct percpu_list_entry c[CPU_SETSIZE]; 269 }; 270 271 #define BUFFER_ITEM_PER_CPU 100 272 273 struct percpu_buffer_node { 274 intptr_t data; 275 }; 276 277 struct percpu_buffer_entry { 278 intptr_t offset; 279 intptr_t buflen; 280 struct percpu_buffer_node **array; 281 } __attribute__((aligned(128))); 282 283 struct percpu_buffer { 284 struct percpu_buffer_entry c[CPU_SETSIZE]; 285 }; 286 287 #define MEMCPY_BUFFER_ITEM_PER_CPU 100 288 289 struct percpu_memcpy_buffer_node { 290 intptr_t data1; 291 uint64_t data2; 292 }; 293 294 struct percpu_memcpy_buffer_entry { 295 intptr_t offset; 296 intptr_t buflen; 297 struct percpu_memcpy_buffer_node *array; 298 } __attribute__((aligned(128))); 299 300 struct percpu_memcpy_buffer { 301 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; 302 }; 303 304 /* A simple percpu spinlock. Grabs lock on current cpu. */ 305 static int rseq_this_cpu_lock(struct percpu_lock *lock) 306 { 307 int cpu; 308 309 for (;;) { 310 int ret; 311 312 cpu = rseq_cpu_start(); 313 ret = rseq_cmpeqv_storev(&lock->c[cpu].v, 314 0, 1, cpu); 315 if (rseq_likely(!ret)) 316 break; 317 /* Retry if comparison fails or rseq aborts. */ 318 } 319 /* 320 * Acquire semantic when taking lock after control dependency. 321 * Matches rseq_smp_store_release(). 322 */ 323 rseq_smp_acquire__after_ctrl_dep(); 324 return cpu; 325 } 326 327 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) 328 { 329 assert(lock->c[cpu].v == 1); 330 /* 331 * Release lock, with release semantic. Matches 332 * rseq_smp_acquire__after_ctrl_dep(). 333 */ 334 rseq_smp_store_release(&lock->c[cpu].v, 0); 335 } 336 337 void *test_percpu_spinlock_thread(void *arg) 338 { 339 struct spinlock_thread_test_data *thread_data = arg; 340 struct spinlock_test_data *data = thread_data->data; 341 long long i, reps; 342 343 if (!opt_disable_rseq && thread_data->reg && 344 rseq_register_current_thread()) 345 abort(); 346 reps = thread_data->reps; 347 for (i = 0; i < reps; i++) { 348 int cpu = rseq_cpu_start(); 349 350 cpu = rseq_this_cpu_lock(&data->lock); 351 data->c[cpu].count++; 352 rseq_percpu_unlock(&data->lock, cpu); 353 #ifndef BENCHMARK 354 if (i != 0 && !(i % (reps / 10))) 355 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 356 #endif 357 } 358 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 359 (int) gettid(), nr_abort, signals_delivered); 360 if (!opt_disable_rseq && thread_data->reg && 361 rseq_unregister_current_thread()) 362 abort(); 363 return NULL; 364 } 365 366 /* 367 * A simple test which implements a sharded counter using a per-cpu 368 * lock. Obviously real applications might prefer to simply use a 369 * per-cpu increment; however, this is reasonable for a test and the 370 * lock can be extended to synchronize more complicated operations. 371 */ 372 void test_percpu_spinlock(void) 373 { 374 const int num_threads = opt_threads; 375 int i, ret; 376 uint64_t sum; 377 pthread_t test_threads[num_threads]; 378 struct spinlock_test_data data; 379 struct spinlock_thread_test_data thread_data[num_threads]; 380 381 memset(&data, 0, sizeof(data)); 382 for (i = 0; i < num_threads; i++) { 383 thread_data[i].reps = opt_reps; 384 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 385 thread_data[i].reg = 1; 386 else 387 thread_data[i].reg = 0; 388 thread_data[i].data = &data; 389 ret = pthread_create(&test_threads[i], NULL, 390 test_percpu_spinlock_thread, 391 &thread_data[i]); 392 if (ret) { 393 errno = ret; 394 perror("pthread_create"); 395 abort(); 396 } 397 } 398 399 for (i = 0; i < num_threads; i++) { 400 ret = pthread_join(test_threads[i], NULL); 401 if (ret) { 402 errno = ret; 403 perror("pthread_join"); 404 abort(); 405 } 406 } 407 408 sum = 0; 409 for (i = 0; i < CPU_SETSIZE; i++) 410 sum += data.c[i].count; 411 412 assert(sum == (uint64_t)opt_reps * num_threads); 413 } 414 415 void *test_percpu_inc_thread(void *arg) 416 { 417 struct inc_thread_test_data *thread_data = arg; 418 struct inc_test_data *data = thread_data->data; 419 long long i, reps; 420 421 if (!opt_disable_rseq && thread_data->reg && 422 rseq_register_current_thread()) 423 abort(); 424 reps = thread_data->reps; 425 for (i = 0; i < reps; i++) { 426 int ret; 427 428 do { 429 int cpu; 430 431 cpu = rseq_cpu_start(); 432 ret = rseq_addv(&data->c[cpu].count, 1, cpu); 433 } while (rseq_unlikely(ret)); 434 #ifndef BENCHMARK 435 if (i != 0 && !(i % (reps / 10))) 436 printf_verbose("tid %d: count %lld\n", (int) gettid(), i); 437 #endif 438 } 439 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 440 (int) gettid(), nr_abort, signals_delivered); 441 if (!opt_disable_rseq && thread_data->reg && 442 rseq_unregister_current_thread()) 443 abort(); 444 return NULL; 445 } 446 447 void test_percpu_inc(void) 448 { 449 const int num_threads = opt_threads; 450 int i, ret; 451 uint64_t sum; 452 pthread_t test_threads[num_threads]; 453 struct inc_test_data data; 454 struct inc_thread_test_data thread_data[num_threads]; 455 456 memset(&data, 0, sizeof(data)); 457 for (i = 0; i < num_threads; i++) { 458 thread_data[i].reps = opt_reps; 459 if (opt_disable_mod <= 0 || (i % opt_disable_mod)) 460 thread_data[i].reg = 1; 461 else 462 thread_data[i].reg = 0; 463 thread_data[i].data = &data; 464 ret = pthread_create(&test_threads[i], NULL, 465 test_percpu_inc_thread, 466 &thread_data[i]); 467 if (ret) { 468 errno = ret; 469 perror("pthread_create"); 470 abort(); 471 } 472 } 473 474 for (i = 0; i < num_threads; i++) { 475 ret = pthread_join(test_threads[i], NULL); 476 if (ret) { 477 errno = ret; 478 perror("pthread_join"); 479 abort(); 480 } 481 } 482 483 sum = 0; 484 for (i = 0; i < CPU_SETSIZE; i++) 485 sum += data.c[i].count; 486 487 assert(sum == (uint64_t)opt_reps * num_threads); 488 } 489 490 void this_cpu_list_push(struct percpu_list *list, 491 struct percpu_list_node *node, 492 int *_cpu) 493 { 494 int cpu; 495 496 for (;;) { 497 intptr_t *targetptr, newval, expect; 498 int ret; 499 500 cpu = rseq_cpu_start(); 501 /* Load list->c[cpu].head with single-copy atomicity. */ 502 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); 503 newval = (intptr_t)node; 504 targetptr = (intptr_t *)&list->c[cpu].head; 505 node->next = (struct percpu_list_node *)expect; 506 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); 507 if (rseq_likely(!ret)) 508 break; 509 /* Retry if comparison fails or rseq aborts. */ 510 } 511 if (_cpu) 512 *_cpu = cpu; 513 } 514 515 /* 516 * Unlike a traditional lock-less linked list; the availability of a 517 * rseq primitive allows us to implement pop without concerns over 518 * ABA-type races. 519 */ 520 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, 521 int *_cpu) 522 { 523 struct percpu_list_node *node = NULL; 524 int cpu; 525 526 for (;;) { 527 struct percpu_list_node *head; 528 intptr_t *targetptr, expectnot, *load; 529 off_t offset; 530 int ret; 531 532 cpu = rseq_cpu_start(); 533 targetptr = (intptr_t *)&list->c[cpu].head; 534 expectnot = (intptr_t)NULL; 535 offset = offsetof(struct percpu_list_node, next); 536 load = (intptr_t *)&head; 537 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, 538 offset, load, cpu); 539 if (rseq_likely(!ret)) { 540 node = head; 541 break; 542 } 543 if (ret > 0) 544 break; 545 /* Retry if rseq aborts. */ 546 } 547 if (_cpu) 548 *_cpu = cpu; 549 return node; 550 } 551 552 /* 553 * __percpu_list_pop is not safe against concurrent accesses. Should 554 * only be used on lists that are not concurrently modified. 555 */ 556 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) 557 { 558 struct percpu_list_node *node; 559 560 node = list->c[cpu].head; 561 if (!node) 562 return NULL; 563 list->c[cpu].head = node->next; 564 return node; 565 } 566 567 void *test_percpu_list_thread(void *arg) 568 { 569 long long i, reps; 570 struct percpu_list *list = (struct percpu_list *)arg; 571 572 if (!opt_disable_rseq && rseq_register_current_thread()) 573 abort(); 574 575 reps = opt_reps; 576 for (i = 0; i < reps; i++) { 577 struct percpu_list_node *node; 578 579 node = this_cpu_list_pop(list, NULL); 580 if (opt_yield) 581 sched_yield(); /* encourage shuffling */ 582 if (node) 583 this_cpu_list_push(list, node, NULL); 584 } 585 586 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 587 (int) gettid(), nr_abort, signals_delivered); 588 if (!opt_disable_rseq && rseq_unregister_current_thread()) 589 abort(); 590 591 return NULL; 592 } 593 594 /* Simultaneous modification to a per-cpu linked list from many threads. */ 595 void test_percpu_list(void) 596 { 597 const int num_threads = opt_threads; 598 int i, j, ret; 599 uint64_t sum = 0, expected_sum = 0; 600 struct percpu_list list; 601 pthread_t test_threads[num_threads]; 602 cpu_set_t allowed_cpus; 603 604 memset(&list, 0, sizeof(list)); 605 606 /* Generate list entries for every usable cpu. */ 607 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 608 for (i = 0; i < CPU_SETSIZE; i++) { 609 if (!CPU_ISSET(i, &allowed_cpus)) 610 continue; 611 for (j = 1; j <= 100; j++) { 612 struct percpu_list_node *node; 613 614 expected_sum += j; 615 616 node = malloc(sizeof(*node)); 617 assert(node); 618 node->data = j; 619 node->next = list.c[i].head; 620 list.c[i].head = node; 621 } 622 } 623 624 for (i = 0; i < num_threads; i++) { 625 ret = pthread_create(&test_threads[i], NULL, 626 test_percpu_list_thread, &list); 627 if (ret) { 628 errno = ret; 629 perror("pthread_create"); 630 abort(); 631 } 632 } 633 634 for (i = 0; i < num_threads; i++) { 635 ret = pthread_join(test_threads[i], NULL); 636 if (ret) { 637 errno = ret; 638 perror("pthread_join"); 639 abort(); 640 } 641 } 642 643 for (i = 0; i < CPU_SETSIZE; i++) { 644 struct percpu_list_node *node; 645 646 if (!CPU_ISSET(i, &allowed_cpus)) 647 continue; 648 649 while ((node = __percpu_list_pop(&list, i))) { 650 sum += node->data; 651 free(node); 652 } 653 } 654 655 /* 656 * All entries should now be accounted for (unless some external 657 * actor is interfering with our allowed affinity while this 658 * test is running). 659 */ 660 assert(sum == expected_sum); 661 } 662 663 bool this_cpu_buffer_push(struct percpu_buffer *buffer, 664 struct percpu_buffer_node *node, 665 int *_cpu) 666 { 667 bool result = false; 668 int cpu; 669 670 for (;;) { 671 intptr_t *targetptr_spec, newval_spec; 672 intptr_t *targetptr_final, newval_final; 673 intptr_t offset; 674 int ret; 675 676 cpu = rseq_cpu_start(); 677 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 678 if (offset == buffer->c[cpu].buflen) 679 break; 680 newval_spec = (intptr_t)node; 681 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; 682 newval_final = offset + 1; 683 targetptr_final = &buffer->c[cpu].offset; 684 if (opt_mb) 685 ret = rseq_cmpeqv_trystorev_storev_release( 686 targetptr_final, offset, targetptr_spec, 687 newval_spec, newval_final, cpu); 688 else 689 ret = rseq_cmpeqv_trystorev_storev(targetptr_final, 690 offset, targetptr_spec, newval_spec, 691 newval_final, cpu); 692 if (rseq_likely(!ret)) { 693 result = true; 694 break; 695 } 696 /* Retry if comparison fails or rseq aborts. */ 697 } 698 if (_cpu) 699 *_cpu = cpu; 700 return result; 701 } 702 703 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, 704 int *_cpu) 705 { 706 struct percpu_buffer_node *head; 707 int cpu; 708 709 for (;;) { 710 intptr_t *targetptr, newval; 711 intptr_t offset; 712 int ret; 713 714 cpu = rseq_cpu_start(); 715 /* Load offset with single-copy atomicity. */ 716 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 717 if (offset == 0) { 718 head = NULL; 719 break; 720 } 721 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); 722 newval = offset - 1; 723 targetptr = (intptr_t *)&buffer->c[cpu].offset; 724 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, 725 (intptr_t *)&buffer->c[cpu].array[offset - 1], 726 (intptr_t)head, newval, cpu); 727 if (rseq_likely(!ret)) 728 break; 729 /* Retry if comparison fails or rseq aborts. */ 730 } 731 if (_cpu) 732 *_cpu = cpu; 733 return head; 734 } 735 736 /* 737 * __percpu_buffer_pop is not safe against concurrent accesses. Should 738 * only be used on buffers that are not concurrently modified. 739 */ 740 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, 741 int cpu) 742 { 743 struct percpu_buffer_node *head; 744 intptr_t offset; 745 746 offset = buffer->c[cpu].offset; 747 if (offset == 0) 748 return NULL; 749 head = buffer->c[cpu].array[offset - 1]; 750 buffer->c[cpu].offset = offset - 1; 751 return head; 752 } 753 754 void *test_percpu_buffer_thread(void *arg) 755 { 756 long long i, reps; 757 struct percpu_buffer *buffer = (struct percpu_buffer *)arg; 758 759 if (!opt_disable_rseq && rseq_register_current_thread()) 760 abort(); 761 762 reps = opt_reps; 763 for (i = 0; i < reps; i++) { 764 struct percpu_buffer_node *node; 765 766 node = this_cpu_buffer_pop(buffer, NULL); 767 if (opt_yield) 768 sched_yield(); /* encourage shuffling */ 769 if (node) { 770 if (!this_cpu_buffer_push(buffer, node, NULL)) { 771 /* Should increase buffer size. */ 772 abort(); 773 } 774 } 775 } 776 777 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 778 (int) gettid(), nr_abort, signals_delivered); 779 if (!opt_disable_rseq && rseq_unregister_current_thread()) 780 abort(); 781 782 return NULL; 783 } 784 785 /* Simultaneous modification to a per-cpu buffer from many threads. */ 786 void test_percpu_buffer(void) 787 { 788 const int num_threads = opt_threads; 789 int i, j, ret; 790 uint64_t sum = 0, expected_sum = 0; 791 struct percpu_buffer buffer; 792 pthread_t test_threads[num_threads]; 793 cpu_set_t allowed_cpus; 794 795 memset(&buffer, 0, sizeof(buffer)); 796 797 /* Generate list entries for every usable cpu. */ 798 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 799 for (i = 0; i < CPU_SETSIZE; i++) { 800 if (!CPU_ISSET(i, &allowed_cpus)) 801 continue; 802 /* Worse-case is every item in same CPU. */ 803 buffer.c[i].array = 804 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 805 BUFFER_ITEM_PER_CPU); 806 assert(buffer.c[i].array); 807 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; 808 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { 809 struct percpu_buffer_node *node; 810 811 expected_sum += j; 812 813 /* 814 * We could theoretically put the word-sized 815 * "data" directly in the buffer. However, we 816 * want to model objects that would not fit 817 * within a single word, so allocate an object 818 * for each node. 819 */ 820 node = malloc(sizeof(*node)); 821 assert(node); 822 node->data = j; 823 buffer.c[i].array[j - 1] = node; 824 buffer.c[i].offset++; 825 } 826 } 827 828 for (i = 0; i < num_threads; i++) { 829 ret = pthread_create(&test_threads[i], NULL, 830 test_percpu_buffer_thread, &buffer); 831 if (ret) { 832 errno = ret; 833 perror("pthread_create"); 834 abort(); 835 } 836 } 837 838 for (i = 0; i < num_threads; i++) { 839 ret = pthread_join(test_threads[i], NULL); 840 if (ret) { 841 errno = ret; 842 perror("pthread_join"); 843 abort(); 844 } 845 } 846 847 for (i = 0; i < CPU_SETSIZE; i++) { 848 struct percpu_buffer_node *node; 849 850 if (!CPU_ISSET(i, &allowed_cpus)) 851 continue; 852 853 while ((node = __percpu_buffer_pop(&buffer, i))) { 854 sum += node->data; 855 free(node); 856 } 857 free(buffer.c[i].array); 858 } 859 860 /* 861 * All entries should now be accounted for (unless some external 862 * actor is interfering with our allowed affinity while this 863 * test is running). 864 */ 865 assert(sum == expected_sum); 866 } 867 868 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, 869 struct percpu_memcpy_buffer_node item, 870 int *_cpu) 871 { 872 bool result = false; 873 int cpu; 874 875 for (;;) { 876 intptr_t *targetptr_final, newval_final, offset; 877 char *destptr, *srcptr; 878 size_t copylen; 879 int ret; 880 881 cpu = rseq_cpu_start(); 882 /* Load offset with single-copy atomicity. */ 883 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 884 if (offset == buffer->c[cpu].buflen) 885 break; 886 destptr = (char *)&buffer->c[cpu].array[offset]; 887 srcptr = (char *)&item; 888 /* copylen must be <= 4kB. */ 889 copylen = sizeof(item); 890 newval_final = offset + 1; 891 targetptr_final = &buffer->c[cpu].offset; 892 if (opt_mb) 893 ret = rseq_cmpeqv_trymemcpy_storev_release( 894 targetptr_final, offset, 895 destptr, srcptr, copylen, 896 newval_final, cpu); 897 else 898 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 899 offset, destptr, srcptr, copylen, 900 newval_final, cpu); 901 if (rseq_likely(!ret)) { 902 result = true; 903 break; 904 } 905 /* Retry if comparison fails or rseq aborts. */ 906 } 907 if (_cpu) 908 *_cpu = cpu; 909 return result; 910 } 911 912 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 913 struct percpu_memcpy_buffer_node *item, 914 int *_cpu) 915 { 916 bool result = false; 917 int cpu; 918 919 for (;;) { 920 intptr_t *targetptr_final, newval_final, offset; 921 char *destptr, *srcptr; 922 size_t copylen; 923 int ret; 924 925 cpu = rseq_cpu_start(); 926 /* Load offset with single-copy atomicity. */ 927 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); 928 if (offset == 0) 929 break; 930 destptr = (char *)item; 931 srcptr = (char *)&buffer->c[cpu].array[offset - 1]; 932 /* copylen must be <= 4kB. */ 933 copylen = sizeof(*item); 934 newval_final = offset - 1; 935 targetptr_final = &buffer->c[cpu].offset; 936 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, 937 offset, destptr, srcptr, copylen, 938 newval_final, cpu); 939 if (rseq_likely(!ret)) { 940 result = true; 941 break; 942 } 943 /* Retry if comparison fails or rseq aborts. */ 944 } 945 if (_cpu) 946 *_cpu = cpu; 947 return result; 948 } 949 950 /* 951 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should 952 * only be used on buffers that are not concurrently modified. 953 */ 954 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, 955 struct percpu_memcpy_buffer_node *item, 956 int cpu) 957 { 958 intptr_t offset; 959 960 offset = buffer->c[cpu].offset; 961 if (offset == 0) 962 return false; 963 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); 964 buffer->c[cpu].offset = offset - 1; 965 return true; 966 } 967 968 void *test_percpu_memcpy_buffer_thread(void *arg) 969 { 970 long long i, reps; 971 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; 972 973 if (!opt_disable_rseq && rseq_register_current_thread()) 974 abort(); 975 976 reps = opt_reps; 977 for (i = 0; i < reps; i++) { 978 struct percpu_memcpy_buffer_node item; 979 bool result; 980 981 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); 982 if (opt_yield) 983 sched_yield(); /* encourage shuffling */ 984 if (result) { 985 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { 986 /* Should increase buffer size. */ 987 abort(); 988 } 989 } 990 } 991 992 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", 993 (int) gettid(), nr_abort, signals_delivered); 994 if (!opt_disable_rseq && rseq_unregister_current_thread()) 995 abort(); 996 997 return NULL; 998 } 999 1000 /* Simultaneous modification to a per-cpu buffer from many threads. */ 1001 void test_percpu_memcpy_buffer(void) 1002 { 1003 const int num_threads = opt_threads; 1004 int i, j, ret; 1005 uint64_t sum = 0, expected_sum = 0; 1006 struct percpu_memcpy_buffer buffer; 1007 pthread_t test_threads[num_threads]; 1008 cpu_set_t allowed_cpus; 1009 1010 memset(&buffer, 0, sizeof(buffer)); 1011 1012 /* Generate list entries for every usable cpu. */ 1013 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); 1014 for (i = 0; i < CPU_SETSIZE; i++) { 1015 if (!CPU_ISSET(i, &allowed_cpus)) 1016 continue; 1017 /* Worse-case is every item in same CPU. */ 1018 buffer.c[i].array = 1019 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * 1020 MEMCPY_BUFFER_ITEM_PER_CPU); 1021 assert(buffer.c[i].array); 1022 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; 1023 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { 1024 expected_sum += 2 * j + 1; 1025 1026 /* 1027 * We could theoretically put the word-sized 1028 * "data" directly in the buffer. However, we 1029 * want to model objects that would not fit 1030 * within a single word, so allocate an object 1031 * for each node. 1032 */ 1033 buffer.c[i].array[j - 1].data1 = j; 1034 buffer.c[i].array[j - 1].data2 = j + 1; 1035 buffer.c[i].offset++; 1036 } 1037 } 1038 1039 for (i = 0; i < num_threads; i++) { 1040 ret = pthread_create(&test_threads[i], NULL, 1041 test_percpu_memcpy_buffer_thread, 1042 &buffer); 1043 if (ret) { 1044 errno = ret; 1045 perror("pthread_create"); 1046 abort(); 1047 } 1048 } 1049 1050 for (i = 0; i < num_threads; i++) { 1051 ret = pthread_join(test_threads[i], NULL); 1052 if (ret) { 1053 errno = ret; 1054 perror("pthread_join"); 1055 abort(); 1056 } 1057 } 1058 1059 for (i = 0; i < CPU_SETSIZE; i++) { 1060 struct percpu_memcpy_buffer_node item; 1061 1062 if (!CPU_ISSET(i, &allowed_cpus)) 1063 continue; 1064 1065 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { 1066 sum += item.data1; 1067 sum += item.data2; 1068 } 1069 free(buffer.c[i].array); 1070 } 1071 1072 /* 1073 * All entries should now be accounted for (unless some external 1074 * actor is interfering with our allowed affinity while this 1075 * test is running). 1076 */ 1077 assert(sum == expected_sum); 1078 } 1079 1080 static void test_signal_interrupt_handler(int signo) 1081 { 1082 signals_delivered++; 1083 } 1084 1085 static int set_signal_handler(void) 1086 { 1087 int ret = 0; 1088 struct sigaction sa; 1089 sigset_t sigset; 1090 1091 ret = sigemptyset(&sigset); 1092 if (ret < 0) { 1093 perror("sigemptyset"); 1094 return ret; 1095 } 1096 1097 sa.sa_handler = test_signal_interrupt_handler; 1098 sa.sa_mask = sigset; 1099 sa.sa_flags = 0; 1100 ret = sigaction(SIGUSR1, &sa, NULL); 1101 if (ret < 0) { 1102 perror("sigaction"); 1103 return ret; 1104 } 1105 1106 printf_verbose("Signal handler set for SIGUSR1\n"); 1107 1108 return ret; 1109 } 1110 1111 static void show_usage(int argc, char **argv) 1112 { 1113 printf("Usage : %s <OPTIONS>\n", 1114 argv[0]); 1115 printf("OPTIONS:\n"); 1116 printf(" [-1 loops] Number of loops for delay injection 1\n"); 1117 printf(" [-2 loops] Number of loops for delay injection 2\n"); 1118 printf(" [-3 loops] Number of loops for delay injection 3\n"); 1119 printf(" [-4 loops] Number of loops for delay injection 4\n"); 1120 printf(" [-5 loops] Number of loops for delay injection 5\n"); 1121 printf(" [-6 loops] Number of loops for delay injection 6\n"); 1122 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n"); 1123 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n"); 1124 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n"); 1125 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n"); 1126 printf(" [-y] Yield\n"); 1127 printf(" [-k] Kill thread with signal\n"); 1128 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n"); 1129 printf(" [-t N] Number of threads (default 200)\n"); 1130 printf(" [-r N] Number of repetitions per thread (default 5000)\n"); 1131 printf(" [-d] Disable rseq system call (no initialization)\n"); 1132 printf(" [-D M] Disable rseq for each M threads\n"); 1133 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); 1134 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); 1135 printf(" [-v] Verbose output.\n"); 1136 printf(" [-h] Show this help.\n"); 1137 printf("\n"); 1138 } 1139 1140 int main(int argc, char **argv) 1141 { 1142 int i; 1143 1144 for (i = 1; i < argc; i++) { 1145 if (argv[i][0] != '-') 1146 continue; 1147 switch (argv[i][1]) { 1148 case '1': 1149 case '2': 1150 case '3': 1151 case '4': 1152 case '5': 1153 case '6': 1154 case '7': 1155 case '8': 1156 case '9': 1157 if (argc < i + 2) { 1158 show_usage(argc, argv); 1159 goto error; 1160 } 1161 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]); 1162 i++; 1163 break; 1164 case 'm': 1165 if (argc < i + 2) { 1166 show_usage(argc, argv); 1167 goto error; 1168 } 1169 opt_modulo = atol(argv[i + 1]); 1170 if (opt_modulo < 0) { 1171 show_usage(argc, argv); 1172 goto error; 1173 } 1174 i++; 1175 break; 1176 case 's': 1177 if (argc < i + 2) { 1178 show_usage(argc, argv); 1179 goto error; 1180 } 1181 opt_sleep = atol(argv[i + 1]); 1182 if (opt_sleep < 0) { 1183 show_usage(argc, argv); 1184 goto error; 1185 } 1186 i++; 1187 break; 1188 case 'y': 1189 opt_yield = 1; 1190 break; 1191 case 'k': 1192 opt_signal = 1; 1193 break; 1194 case 'd': 1195 opt_disable_rseq = 1; 1196 break; 1197 case 'D': 1198 if (argc < i + 2) { 1199 show_usage(argc, argv); 1200 goto error; 1201 } 1202 opt_disable_mod = atol(argv[i + 1]); 1203 if (opt_disable_mod < 0) { 1204 show_usage(argc, argv); 1205 goto error; 1206 } 1207 i++; 1208 break; 1209 case 't': 1210 if (argc < i + 2) { 1211 show_usage(argc, argv); 1212 goto error; 1213 } 1214 opt_threads = atol(argv[i + 1]); 1215 if (opt_threads < 0) { 1216 show_usage(argc, argv); 1217 goto error; 1218 } 1219 i++; 1220 break; 1221 case 'r': 1222 if (argc < i + 2) { 1223 show_usage(argc, argv); 1224 goto error; 1225 } 1226 opt_reps = atoll(argv[i + 1]); 1227 if (opt_reps < 0) { 1228 show_usage(argc, argv); 1229 goto error; 1230 } 1231 i++; 1232 break; 1233 case 'h': 1234 show_usage(argc, argv); 1235 goto end; 1236 case 'T': 1237 if (argc < i + 2) { 1238 show_usage(argc, argv); 1239 goto error; 1240 } 1241 opt_test = *argv[i + 1]; 1242 switch (opt_test) { 1243 case 's': 1244 case 'l': 1245 case 'i': 1246 case 'b': 1247 case 'm': 1248 break; 1249 default: 1250 show_usage(argc, argv); 1251 goto error; 1252 } 1253 i++; 1254 break; 1255 case 'v': 1256 verbose = 1; 1257 break; 1258 case 'M': 1259 opt_mb = 1; 1260 break; 1261 default: 1262 show_usage(argc, argv); 1263 goto error; 1264 } 1265 } 1266 1267 loop_cnt_1 = loop_cnt[1]; 1268 loop_cnt_2 = loop_cnt[2]; 1269 loop_cnt_3 = loop_cnt[3]; 1270 loop_cnt_4 = loop_cnt[4]; 1271 loop_cnt_5 = loop_cnt[5]; 1272 loop_cnt_6 = loop_cnt[6]; 1273 1274 if (set_signal_handler()) 1275 goto error; 1276 1277 if (!opt_disable_rseq && rseq_register_current_thread()) 1278 goto error; 1279 switch (opt_test) { 1280 case 's': 1281 printf_verbose("spinlock\n"); 1282 test_percpu_spinlock(); 1283 break; 1284 case 'l': 1285 printf_verbose("linked list\n"); 1286 test_percpu_list(); 1287 break; 1288 case 'b': 1289 printf_verbose("buffer\n"); 1290 test_percpu_buffer(); 1291 break; 1292 case 'm': 1293 printf_verbose("memcpy buffer\n"); 1294 test_percpu_memcpy_buffer(); 1295 break; 1296 case 'i': 1297 printf_verbose("counter increment\n"); 1298 test_percpu_inc(); 1299 break; 1300 } 1301 if (!opt_disable_rseq && rseq_unregister_current_thread()) 1302 abort(); 1303 end: 1304 return 0; 1305 1306 error: 1307 return -1; 1308 } 1309