1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2025 Igalia S.L. 4 * 5 * Robust list test by André Almeida <andrealmeid@igalia.com> 6 * 7 * The robust list uAPI allows userspace to create "robust" locks, in the sense 8 * that if the lock holder thread dies, the remaining threads that are waiting 9 * for the lock won't block forever, waiting for a lock that will never be 10 * released. 11 * 12 * This is achieve by userspace setting a list where a thread can enter all the 13 * locks (futexes) that it is holding. The robust list is a linked list, and 14 * userspace register the start of the list with the syscall set_robust_list(). 15 * If such thread eventually dies, the kernel will walk this list, waking up one 16 * thread waiting for each futex and marking the futex word with the flag 17 * FUTEX_OWNER_DIED. 18 * 19 * See also 20 * man set_robust_list 21 * Documententation/locking/robust-futex-ABI.rst 22 * Documententation/locking/robust-futexes.rst 23 */ 24 25 #define _GNU_SOURCE 26 27 #include "futextest.h" 28 #include "../../kselftest_harness.h" 29 30 #include <errno.h> 31 #include <pthread.h> 32 #include <signal.h> 33 #include <stdatomic.h> 34 #include <stdbool.h> 35 #include <stddef.h> 36 #include <sys/mman.h> 37 #include <sys/wait.h> 38 39 #define STACK_SIZE (1024 * 1024) 40 41 #define FUTEX_TIMEOUT 3 42 43 #define SLEEP_US 100 44 45 static pthread_barrier_t barrier, barrier2; 46 47 static int set_robust_list(struct robust_list_head *head, size_t len) 48 { 49 return syscall(SYS_set_robust_list, head, len); 50 } 51 52 static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr) 53 { 54 return syscall(SYS_get_robust_list, pid, head, len_ptr); 55 } 56 57 /* 58 * Basic lock struct, contains just the futex word and the robust list element 59 * Real implementations have also a *prev to easily walk in the list 60 */ 61 struct lock_struct { 62 _Atomic(unsigned int) futex; 63 struct robust_list list; 64 }; 65 66 /* 67 * Helper function to spawn a child thread. Returns -1 on error, pid on success 68 */ 69 static int create_child(int (*fn)(void *arg), void *arg) 70 { 71 char *stack; 72 pid_t pid; 73 74 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, 75 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); 76 if (stack == MAP_FAILED) 77 return -1; 78 79 stack += STACK_SIZE; 80 81 pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg); 82 83 if (pid == -1) 84 return -1; 85 86 return pid; 87 } 88 89 /* 90 * Helper function to prepare and register a robust list 91 */ 92 static int set_list(struct robust_list_head *head) 93 { 94 int ret; 95 96 ret = set_robust_list(head, sizeof(*head)); 97 if (ret) 98 return ret; 99 100 head->futex_offset = (size_t) offsetof(struct lock_struct, futex) - 101 (size_t) offsetof(struct lock_struct, list); 102 head->list.next = &head->list; 103 head->list_op_pending = NULL; 104 105 return 0; 106 } 107 108 /* 109 * A basic (and incomplete) mutex lock function with robustness 110 */ 111 static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject) 112 { 113 _Atomic(unsigned int) *futex = &lock->futex; 114 unsigned int zero = 0; 115 pid_t tid = gettid(); 116 int ret = -1; 117 118 /* 119 * Set list_op_pending before starting the lock, so the kernel can catch 120 * the case where the thread died during the lock operation 121 */ 122 head->list_op_pending = &lock->list; 123 124 if (atomic_compare_exchange_strong(futex, &zero, tid)) { 125 /* 126 * We took the lock, insert it in the robust list 127 */ 128 struct robust_list *list = &head->list; 129 130 /* Error injection to test list_op_pending */ 131 if (error_inject) 132 return 0; 133 134 while (list->next != &head->list) 135 list = list->next; 136 137 list->next = &lock->list; 138 lock->list.next = &head->list; 139 140 ret = 0; 141 } else { 142 /* 143 * We didn't take the lock, wait until the owner wakes (or dies) 144 */ 145 struct timespec to; 146 147 to.tv_sec = FUTEX_TIMEOUT; 148 to.tv_nsec = 0; 149 150 tid = atomic_load(futex); 151 /* Kernel ignores futexes without the waiters flag */ 152 tid |= FUTEX_WAITERS; 153 atomic_store(futex, tid); 154 155 ret = futex_wait((futex_t *) futex, tid, &to, 0); 156 157 /* 158 * A real mutex_lock() implementation would loop here to finally 159 * take the lock. We don't care about that, so we stop here. 160 */ 161 } 162 163 head->list_op_pending = NULL; 164 165 return ret; 166 } 167 168 /* 169 * This child thread will succeed taking the lock, and then will exit holding it 170 */ 171 static int child_fn_lock(void *arg) 172 { 173 struct lock_struct *lock = arg; 174 struct robust_list_head head; 175 int ret; 176 177 ret = set_list(&head); 178 if (ret) { 179 ksft_test_result_fail("set_robust_list error\n"); 180 return ret; 181 } 182 183 ret = mutex_lock(lock, &head, false); 184 if (ret) { 185 ksft_test_result_fail("mutex_lock error\n"); 186 return ret; 187 } 188 189 pthread_barrier_wait(&barrier); 190 191 /* 192 * There's a race here: the parent thread needs to be inside 193 * futex_wait() before the child thread dies, otherwise it will miss the 194 * wakeup from handle_futex_death() that this child will emit. We wait a 195 * little bit just to make sure that this happens. 196 */ 197 usleep(SLEEP_US); 198 199 return 0; 200 } 201 202 /* 203 * Spawns a child thread that will set a robust list, take the lock, register it 204 * in the robust list and die. The parent thread will wait on this futex, and 205 * should be waken up when the child exits. 206 */ 207 TEST(test_robustness) 208 { 209 struct lock_struct lock = { .futex = 0 }; 210 _Atomic(unsigned int) *futex = &lock.futex; 211 struct robust_list_head head; 212 int ret, pid, wstatus; 213 214 ret = set_list(&head); 215 ASSERT_EQ(ret, 0); 216 217 /* 218 * Lets use a barrier to ensure that the child thread takes the lock 219 * before the parent 220 */ 221 ret = pthread_barrier_init(&barrier, NULL, 2); 222 ASSERT_EQ(ret, 0); 223 224 pid = create_child(&child_fn_lock, &lock); 225 ASSERT_NE(pid, -1); 226 227 pthread_barrier_wait(&barrier); 228 ret = mutex_lock(&lock, &head, false); 229 230 /* 231 * futex_wait() should return 0 and the futex word should be marked with 232 * FUTEX_OWNER_DIED 233 */ 234 ASSERT_EQ(ret, 0); 235 236 ASSERT_TRUE(*futex & FUTEX_OWNER_DIED); 237 238 wait(&wstatus); 239 pthread_barrier_destroy(&barrier); 240 241 /* Pass only if the child hasn't return error */ 242 if (!WEXITSTATUS(wstatus)) 243 ksft_test_result_pass("%s\n", __func__); 244 } 245 246 /* 247 * The only valid value for len is sizeof(*head) 248 */ 249 TEST(test_set_robust_list_invalid_size) 250 { 251 struct robust_list_head head; 252 size_t head_size = sizeof(head); 253 int ret; 254 255 ret = set_robust_list(&head, head_size); 256 ASSERT_EQ(ret, 0); 257 258 ret = set_robust_list(&head, head_size * 2); 259 ASSERT_EQ(ret, -1); 260 ASSERT_EQ(errno, EINVAL); 261 262 ret = set_robust_list(&head, head_size - 1); 263 ASSERT_EQ(ret, -1); 264 ASSERT_EQ(errno, EINVAL); 265 266 ret = set_robust_list(&head, 0); 267 ASSERT_EQ(ret, -1); 268 ASSERT_EQ(errno, EINVAL); 269 270 ksft_test_result_pass("%s\n", __func__); 271 } 272 273 /* 274 * Test get_robust_list with pid = 0, getting the list of the running thread 275 */ 276 TEST(test_get_robust_list_self) 277 { 278 struct robust_list_head head, head2, *get_head; 279 size_t head_size = sizeof(head), len_ptr; 280 int ret; 281 282 ret = set_robust_list(&head, head_size); 283 ASSERT_EQ(ret, 0); 284 285 ret = get_robust_list(0, &get_head, &len_ptr); 286 ASSERT_EQ(ret, 0); 287 ASSERT_EQ(get_head, &head); 288 ASSERT_EQ(head_size, len_ptr); 289 290 ret = set_robust_list(&head2, head_size); 291 ASSERT_EQ(ret, 0); 292 293 ret = get_robust_list(0, &get_head, &len_ptr); 294 ASSERT_EQ(ret, 0); 295 ASSERT_EQ(get_head, &head2); 296 ASSERT_EQ(head_size, len_ptr); 297 298 ksft_test_result_pass("%s\n", __func__); 299 } 300 301 static int child_list(void *arg) 302 { 303 struct robust_list_head *head = arg; 304 int ret; 305 306 ret = set_robust_list(head, sizeof(*head)); 307 if (ret) { 308 ksft_test_result_fail("set_robust_list error\n"); 309 return -1; 310 } 311 312 /* 313 * After setting the list head, wait until the main thread can call 314 * get_robust_list() for this thread before exiting. 315 */ 316 pthread_barrier_wait(&barrier); 317 pthread_barrier_wait(&barrier2); 318 319 return 0; 320 } 321 322 /* 323 * Test get_robust_list from another thread. We use two barriers here to ensure 324 * that: 325 * 1) the child thread set the list before we try to get it from the 326 * parent 327 * 2) the child thread still alive when we try to get the list from it 328 */ 329 TEST(test_get_robust_list_child) 330 { 331 struct robust_list_head head, *get_head; 332 int ret, wstatus; 333 size_t len_ptr; 334 pid_t tid; 335 336 ret = pthread_barrier_init(&barrier, NULL, 2); 337 ret = pthread_barrier_init(&barrier2, NULL, 2); 338 ASSERT_EQ(ret, 0); 339 340 tid = create_child(&child_list, &head); 341 ASSERT_NE(tid, -1); 342 343 pthread_barrier_wait(&barrier); 344 345 ret = get_robust_list(tid, &get_head, &len_ptr); 346 ASSERT_EQ(ret, 0); 347 ASSERT_EQ(&head, get_head); 348 349 pthread_barrier_wait(&barrier2); 350 351 wait(&wstatus); 352 pthread_barrier_destroy(&barrier); 353 pthread_barrier_destroy(&barrier2); 354 355 /* Pass only if the child hasn't return error */ 356 if (!WEXITSTATUS(wstatus)) 357 ksft_test_result_pass("%s\n", __func__); 358 } 359 360 static int child_fn_lock_with_error(void *arg) 361 { 362 struct lock_struct *lock = arg; 363 struct robust_list_head head; 364 int ret; 365 366 ret = set_list(&head); 367 if (ret) { 368 ksft_test_result_fail("set_robust_list error\n"); 369 return -1; 370 } 371 372 ret = mutex_lock(lock, &head, true); 373 if (ret) { 374 ksft_test_result_fail("mutex_lock error\n"); 375 return -1; 376 } 377 378 pthread_barrier_wait(&barrier); 379 380 /* See comment at child_fn_lock() */ 381 usleep(SLEEP_US); 382 383 return 0; 384 } 385 386 /* 387 * Same as robustness test, but inject an error where the mutex_lock() exits 388 * earlier, just after setting list_op_pending and taking the lock, to test the 389 * list_op_pending mechanism 390 */ 391 TEST(test_set_list_op_pending) 392 { 393 struct lock_struct lock = { .futex = 0 }; 394 _Atomic(unsigned int) *futex = &lock.futex; 395 struct robust_list_head head; 396 int ret, wstatus; 397 398 ret = set_list(&head); 399 ASSERT_EQ(ret, 0); 400 401 ret = pthread_barrier_init(&barrier, NULL, 2); 402 ASSERT_EQ(ret, 0); 403 404 ret = create_child(&child_fn_lock_with_error, &lock); 405 ASSERT_NE(ret, -1); 406 407 pthread_barrier_wait(&barrier); 408 ret = mutex_lock(&lock, &head, false); 409 410 ASSERT_EQ(ret, 0); 411 412 ASSERT_TRUE(*futex & FUTEX_OWNER_DIED); 413 414 wait(&wstatus); 415 pthread_barrier_destroy(&barrier); 416 417 /* Pass only if the child hasn't return error */ 418 if (!WEXITSTATUS(wstatus)) 419 ksft_test_result_pass("%s\n", __func__); 420 else 421 ksft_test_result_fail("%s\n", __func__); 422 } 423 424 #define CHILD_NR 10 425 426 static int child_lock_holder(void *arg) 427 { 428 struct lock_struct *locks = arg; 429 struct robust_list_head head; 430 int i; 431 432 set_list(&head); 433 434 for (i = 0; i < CHILD_NR; i++) { 435 locks[i].futex = 0; 436 mutex_lock(&locks[i], &head, false); 437 } 438 439 pthread_barrier_wait(&barrier); 440 pthread_barrier_wait(&barrier2); 441 442 /* See comment at child_fn_lock() */ 443 usleep(SLEEP_US); 444 445 return 0; 446 } 447 448 static int child_wait_lock(void *arg) 449 { 450 struct lock_struct *lock = arg; 451 struct robust_list_head head; 452 int ret; 453 454 pthread_barrier_wait(&barrier2); 455 ret = mutex_lock(lock, &head, false); 456 457 if (ret) { 458 ksft_test_result_fail("mutex_lock error\n"); 459 return -1; 460 } 461 462 if (!(lock->futex & FUTEX_OWNER_DIED)) { 463 ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n"); 464 return -1; 465 } 466 467 return 0; 468 } 469 470 /* 471 * Test a robust list of more than one element. All the waiters should wake when 472 * the holder dies 473 */ 474 TEST(test_robust_list_multiple_elements) 475 { 476 struct lock_struct locks[CHILD_NR]; 477 pid_t pids[CHILD_NR + 1]; 478 int i, ret, wstatus; 479 480 ret = pthread_barrier_init(&barrier, NULL, 2); 481 ASSERT_EQ(ret, 0); 482 ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1); 483 ASSERT_EQ(ret, 0); 484 485 pids[0] = create_child(&child_lock_holder, &locks); 486 487 /* Wait until the locker thread takes the look */ 488 pthread_barrier_wait(&barrier); 489 490 for (i = 0; i < CHILD_NR; i++) 491 pids[i+1] = create_child(&child_wait_lock, &locks[i]); 492 493 /* Wait for all children to return */ 494 ret = 0; 495 496 for (i = 0; i < CHILD_NR; i++) { 497 waitpid(pids[i], &wstatus, 0); 498 if (WEXITSTATUS(wstatus)) 499 ret = -1; 500 } 501 502 pthread_barrier_destroy(&barrier); 503 pthread_barrier_destroy(&barrier2); 504 505 /* Pass only if the child hasn't return error */ 506 if (!ret) 507 ksft_test_result_pass("%s\n", __func__); 508 } 509 510 static int child_circular_list(void *arg) 511 { 512 static struct robust_list_head head; 513 struct lock_struct a, b, c; 514 int ret; 515 516 ret = set_list(&head); 517 if (ret) { 518 ksft_test_result_fail("set_list error\n"); 519 return -1; 520 } 521 522 head.list.next = &a.list; 523 524 /* 525 * The last element should point to head list, but we short circuit it 526 */ 527 a.list.next = &b.list; 528 b.list.next = &c.list; 529 c.list.next = &a.list; 530 531 return 0; 532 } 533 534 /* 535 * Create a circular robust list. The kernel should be able to destroy the list 536 * while processing it so it won't be trapped in an infinite loop while handling 537 * a process exit 538 */ 539 TEST(test_circular_list) 540 { 541 int wstatus; 542 543 create_child(child_circular_list, NULL); 544 545 wait(&wstatus); 546 547 /* Pass only if the child hasn't return error */ 548 if (!WEXITSTATUS(wstatus)) 549 ksft_test_result_pass("%s\n", __func__); 550 } 551 552 TEST_HARNESS_MAIN 553