1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2025 Igalia S.L.
4 *
5 * Robust list test by André Almeida <andrealmeid@igalia.com>
6 *
7 * The robust list uAPI allows userspace to create "robust" locks, in the sense
8 * that if the lock holder thread dies, the remaining threads that are waiting
9 * for the lock won't block forever, waiting for a lock that will never be
10 * released.
11 *
12 * This is achieve by userspace setting a list where a thread can enter all the
13 * locks (futexes) that it is holding. The robust list is a linked list, and
14 * userspace register the start of the list with the syscall set_robust_list().
15 * If such thread eventually dies, the kernel will walk this list, waking up one
16 * thread waiting for each futex and marking the futex word with the flag
17 * FUTEX_OWNER_DIED.
18 *
19 * See also
20 * man set_robust_list
21 * Documententation/locking/robust-futex-ABI.rst
22 * Documententation/locking/robust-futexes.rst
23 */
24
25 #define _GNU_SOURCE
26
27 #include "futextest.h"
28 #include "../../kselftest_harness.h"
29
30 #include <errno.h>
31 #include <pthread.h>
32 #include <signal.h>
33 #include <stdatomic.h>
34 #include <stdbool.h>
35 #include <stddef.h>
36 #include <sys/mman.h>
37 #include <sys/wait.h>
38
39 #define STACK_SIZE (1024 * 1024)
40
41 #define FUTEX_TIMEOUT 3
42
43 #define SLEEP_US 100
44
45 static pthread_barrier_t barrier, barrier2;
46
set_robust_list(struct robust_list_head * head,size_t len)47 static int set_robust_list(struct robust_list_head *head, size_t len)
48 {
49 return syscall(SYS_set_robust_list, head, len);
50 }
51
get_robust_list(int pid,struct robust_list_head ** head,size_t * len_ptr)52 static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr)
53 {
54 return syscall(SYS_get_robust_list, pid, head, len_ptr);
55 }
56
57 /*
58 * Basic lock struct, contains just the futex word and the robust list element
59 * Real implementations have also a *prev to easily walk in the list
60 */
61 struct lock_struct {
62 _Atomic(unsigned int) futex;
63 struct robust_list list;
64 };
65
66 /*
67 * Helper function to spawn a child thread. Returns -1 on error, pid on success
68 */
create_child(int (* fn)(void * arg),void * arg)69 static int create_child(int (*fn)(void *arg), void *arg)
70 {
71 char *stack;
72 pid_t pid;
73
74 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
75 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
76 if (stack == MAP_FAILED)
77 return -1;
78
79 stack += STACK_SIZE;
80
81 pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg);
82
83 if (pid == -1)
84 return -1;
85
86 return pid;
87 }
88
89 /*
90 * Helper function to prepare and register a robust list
91 */
set_list(struct robust_list_head * head)92 static int set_list(struct robust_list_head *head)
93 {
94 int ret;
95
96 ret = set_robust_list(head, sizeof(*head));
97 if (ret)
98 return ret;
99
100 head->futex_offset = (size_t) offsetof(struct lock_struct, futex) -
101 (size_t) offsetof(struct lock_struct, list);
102 head->list.next = &head->list;
103 head->list_op_pending = NULL;
104
105 return 0;
106 }
107
108 /*
109 * A basic (and incomplete) mutex lock function with robustness
110 */
mutex_lock(struct lock_struct * lock,struct robust_list_head * head,bool error_inject)111 static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject)
112 {
113 _Atomic(unsigned int) *futex = &lock->futex;
114 unsigned int zero = 0;
115 pid_t tid = gettid();
116 int ret = -1;
117
118 /*
119 * Set list_op_pending before starting the lock, so the kernel can catch
120 * the case where the thread died during the lock operation
121 */
122 head->list_op_pending = &lock->list;
123
124 if (atomic_compare_exchange_strong(futex, &zero, tid)) {
125 /*
126 * We took the lock, insert it in the robust list
127 */
128 struct robust_list *list = &head->list;
129
130 /* Error injection to test list_op_pending */
131 if (error_inject)
132 return 0;
133
134 while (list->next != &head->list)
135 list = list->next;
136
137 list->next = &lock->list;
138 lock->list.next = &head->list;
139
140 ret = 0;
141 } else {
142 /*
143 * We didn't take the lock, wait until the owner wakes (or dies)
144 */
145 struct timespec to;
146
147 to.tv_sec = FUTEX_TIMEOUT;
148 to.tv_nsec = 0;
149
150 tid = atomic_load(futex);
151 /* Kernel ignores futexes without the waiters flag */
152 tid |= FUTEX_WAITERS;
153 atomic_store(futex, tid);
154
155 ret = futex_wait((futex_t *) futex, tid, &to, 0);
156
157 /*
158 * A real mutex_lock() implementation would loop here to finally
159 * take the lock. We don't care about that, so we stop here.
160 */
161 }
162
163 head->list_op_pending = NULL;
164
165 return ret;
166 }
167
168 /*
169 * This child thread will succeed taking the lock, and then will exit holding it
170 */
child_fn_lock(void * arg)171 static int child_fn_lock(void *arg)
172 {
173 struct lock_struct *lock = arg;
174 struct robust_list_head head;
175 int ret;
176
177 ret = set_list(&head);
178 if (ret) {
179 ksft_test_result_fail("set_robust_list error\n");
180 return ret;
181 }
182
183 ret = mutex_lock(lock, &head, false);
184 if (ret) {
185 ksft_test_result_fail("mutex_lock error\n");
186 return ret;
187 }
188
189 pthread_barrier_wait(&barrier);
190
191 /*
192 * There's a race here: the parent thread needs to be inside
193 * futex_wait() before the child thread dies, otherwise it will miss the
194 * wakeup from handle_futex_death() that this child will emit. We wait a
195 * little bit just to make sure that this happens.
196 */
197 usleep(SLEEP_US);
198
199 return 0;
200 }
201
202 /*
203 * Spawns a child thread that will set a robust list, take the lock, register it
204 * in the robust list and die. The parent thread will wait on this futex, and
205 * should be waken up when the child exits.
206 */
TEST(test_robustness)207 TEST(test_robustness)
208 {
209 struct lock_struct lock = { .futex = 0 };
210 _Atomic(unsigned int) *futex = &lock.futex;
211 struct robust_list_head head;
212 int ret, pid, wstatus;
213
214 ret = set_list(&head);
215 ASSERT_EQ(ret, 0);
216
217 /*
218 * Lets use a barrier to ensure that the child thread takes the lock
219 * before the parent
220 */
221 ret = pthread_barrier_init(&barrier, NULL, 2);
222 ASSERT_EQ(ret, 0);
223
224 pid = create_child(&child_fn_lock, &lock);
225 ASSERT_NE(pid, -1);
226
227 pthread_barrier_wait(&barrier);
228 ret = mutex_lock(&lock, &head, false);
229
230 /*
231 * futex_wait() should return 0 and the futex word should be marked with
232 * FUTEX_OWNER_DIED
233 */
234 ASSERT_EQ(ret, 0);
235
236 ASSERT_TRUE(*futex & FUTEX_OWNER_DIED);
237
238 wait(&wstatus);
239 pthread_barrier_destroy(&barrier);
240
241 /* Pass only if the child hasn't return error */
242 if (!WEXITSTATUS(wstatus))
243 ksft_test_result_pass("%s\n", __func__);
244 }
245
246 /*
247 * The only valid value for len is sizeof(*head)
248 */
TEST(test_set_robust_list_invalid_size)249 TEST(test_set_robust_list_invalid_size)
250 {
251 struct robust_list_head head;
252 size_t head_size = sizeof(head);
253 int ret;
254
255 ret = set_robust_list(&head, head_size);
256 ASSERT_EQ(ret, 0);
257
258 ret = set_robust_list(&head, head_size * 2);
259 ASSERT_EQ(ret, -1);
260 ASSERT_EQ(errno, EINVAL);
261
262 ret = set_robust_list(&head, head_size - 1);
263 ASSERT_EQ(ret, -1);
264 ASSERT_EQ(errno, EINVAL);
265
266 ret = set_robust_list(&head, 0);
267 ASSERT_EQ(ret, -1);
268 ASSERT_EQ(errno, EINVAL);
269
270 ksft_test_result_pass("%s\n", __func__);
271 }
272
273 /*
274 * Test get_robust_list with pid = 0, getting the list of the running thread
275 */
TEST(test_get_robust_list_self)276 TEST(test_get_robust_list_self)
277 {
278 struct robust_list_head head, head2, *get_head;
279 size_t head_size = sizeof(head), len_ptr;
280 int ret;
281
282 ret = set_robust_list(&head, head_size);
283 ASSERT_EQ(ret, 0);
284
285 ret = get_robust_list(0, &get_head, &len_ptr);
286 ASSERT_EQ(ret, 0);
287 ASSERT_EQ(get_head, &head);
288 ASSERT_EQ(head_size, len_ptr);
289
290 ret = set_robust_list(&head2, head_size);
291 ASSERT_EQ(ret, 0);
292
293 ret = get_robust_list(0, &get_head, &len_ptr);
294 ASSERT_EQ(ret, 0);
295 ASSERT_EQ(get_head, &head2);
296 ASSERT_EQ(head_size, len_ptr);
297
298 ksft_test_result_pass("%s\n", __func__);
299 }
300
child_list(void * arg)301 static int child_list(void *arg)
302 {
303 struct robust_list_head *head = arg;
304 int ret;
305
306 ret = set_robust_list(head, sizeof(*head));
307 if (ret) {
308 ksft_test_result_fail("set_robust_list error\n");
309 return -1;
310 }
311
312 /*
313 * After setting the list head, wait until the main thread can call
314 * get_robust_list() for this thread before exiting.
315 */
316 pthread_barrier_wait(&barrier);
317 pthread_barrier_wait(&barrier2);
318
319 return 0;
320 }
321
322 /*
323 * Test get_robust_list from another thread. We use two barriers here to ensure
324 * that:
325 * 1) the child thread set the list before we try to get it from the
326 * parent
327 * 2) the child thread still alive when we try to get the list from it
328 */
TEST(test_get_robust_list_child)329 TEST(test_get_robust_list_child)
330 {
331 struct robust_list_head head, *get_head;
332 int ret, wstatus;
333 size_t len_ptr;
334 pid_t tid;
335
336 ret = pthread_barrier_init(&barrier, NULL, 2);
337 ret = pthread_barrier_init(&barrier2, NULL, 2);
338 ASSERT_EQ(ret, 0);
339
340 tid = create_child(&child_list, &head);
341 ASSERT_NE(tid, -1);
342
343 pthread_barrier_wait(&barrier);
344
345 ret = get_robust_list(tid, &get_head, &len_ptr);
346 ASSERT_EQ(ret, 0);
347 ASSERT_EQ(&head, get_head);
348
349 pthread_barrier_wait(&barrier2);
350
351 wait(&wstatus);
352 pthread_barrier_destroy(&barrier);
353 pthread_barrier_destroy(&barrier2);
354
355 /* Pass only if the child hasn't return error */
356 if (!WEXITSTATUS(wstatus))
357 ksft_test_result_pass("%s\n", __func__);
358 }
359
child_fn_lock_with_error(void * arg)360 static int child_fn_lock_with_error(void *arg)
361 {
362 struct lock_struct *lock = arg;
363 struct robust_list_head head;
364 int ret;
365
366 ret = set_list(&head);
367 if (ret) {
368 ksft_test_result_fail("set_robust_list error\n");
369 return -1;
370 }
371
372 ret = mutex_lock(lock, &head, true);
373 if (ret) {
374 ksft_test_result_fail("mutex_lock error\n");
375 return -1;
376 }
377
378 pthread_barrier_wait(&barrier);
379
380 /* See comment at child_fn_lock() */
381 usleep(SLEEP_US);
382
383 return 0;
384 }
385
386 /*
387 * Same as robustness test, but inject an error where the mutex_lock() exits
388 * earlier, just after setting list_op_pending and taking the lock, to test the
389 * list_op_pending mechanism
390 */
TEST(test_set_list_op_pending)391 TEST(test_set_list_op_pending)
392 {
393 struct lock_struct lock = { .futex = 0 };
394 _Atomic(unsigned int) *futex = &lock.futex;
395 struct robust_list_head head;
396 int ret, wstatus;
397
398 ret = set_list(&head);
399 ASSERT_EQ(ret, 0);
400
401 ret = pthread_barrier_init(&barrier, NULL, 2);
402 ASSERT_EQ(ret, 0);
403
404 ret = create_child(&child_fn_lock_with_error, &lock);
405 ASSERT_NE(ret, -1);
406
407 pthread_barrier_wait(&barrier);
408 ret = mutex_lock(&lock, &head, false);
409
410 ASSERT_EQ(ret, 0);
411
412 ASSERT_TRUE(*futex & FUTEX_OWNER_DIED);
413
414 wait(&wstatus);
415 pthread_barrier_destroy(&barrier);
416
417 /* Pass only if the child hasn't return error */
418 if (!WEXITSTATUS(wstatus))
419 ksft_test_result_pass("%s\n", __func__);
420 else
421 ksft_test_result_fail("%s\n", __func__);
422 }
423
424 #define CHILD_NR 10
425
child_lock_holder(void * arg)426 static int child_lock_holder(void *arg)
427 {
428 struct lock_struct *locks = arg;
429 struct robust_list_head head;
430 int i;
431
432 set_list(&head);
433
434 for (i = 0; i < CHILD_NR; i++) {
435 locks[i].futex = 0;
436 mutex_lock(&locks[i], &head, false);
437 }
438
439 pthread_barrier_wait(&barrier);
440 pthread_barrier_wait(&barrier2);
441
442 /* See comment at child_fn_lock() */
443 usleep(SLEEP_US);
444
445 return 0;
446 }
447
child_wait_lock(void * arg)448 static int child_wait_lock(void *arg)
449 {
450 struct lock_struct *lock = arg;
451 struct robust_list_head head;
452 int ret;
453
454 pthread_barrier_wait(&barrier2);
455 ret = mutex_lock(lock, &head, false);
456
457 if (ret) {
458 ksft_test_result_fail("mutex_lock error\n");
459 return -1;
460 }
461
462 if (!(lock->futex & FUTEX_OWNER_DIED)) {
463 ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n");
464 return -1;
465 }
466
467 return 0;
468 }
469
470 /*
471 * Test a robust list of more than one element. All the waiters should wake when
472 * the holder dies
473 */
TEST(test_robust_list_multiple_elements)474 TEST(test_robust_list_multiple_elements)
475 {
476 struct lock_struct locks[CHILD_NR];
477 pid_t pids[CHILD_NR + 1];
478 int i, ret, wstatus;
479
480 ret = pthread_barrier_init(&barrier, NULL, 2);
481 ASSERT_EQ(ret, 0);
482 ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1);
483 ASSERT_EQ(ret, 0);
484
485 pids[0] = create_child(&child_lock_holder, &locks);
486
487 /* Wait until the locker thread takes the look */
488 pthread_barrier_wait(&barrier);
489
490 for (i = 0; i < CHILD_NR; i++)
491 pids[i+1] = create_child(&child_wait_lock, &locks[i]);
492
493 /* Wait for all children to return */
494 ret = 0;
495
496 for (i = 0; i < CHILD_NR; i++) {
497 waitpid(pids[i], &wstatus, 0);
498 if (WEXITSTATUS(wstatus))
499 ret = -1;
500 }
501
502 pthread_barrier_destroy(&barrier);
503 pthread_barrier_destroy(&barrier2);
504
505 /* Pass only if the child hasn't return error */
506 if (!ret)
507 ksft_test_result_pass("%s\n", __func__);
508 }
509
child_circular_list(void * arg)510 static int child_circular_list(void *arg)
511 {
512 static struct robust_list_head head;
513 struct lock_struct a, b, c;
514 int ret;
515
516 ret = set_list(&head);
517 if (ret) {
518 ksft_test_result_fail("set_list error\n");
519 return -1;
520 }
521
522 head.list.next = &a.list;
523
524 /*
525 * The last element should point to head list, but we short circuit it
526 */
527 a.list.next = &b.list;
528 b.list.next = &c.list;
529 c.list.next = &a.list;
530
531 return 0;
532 }
533
534 /*
535 * Create a circular robust list. The kernel should be able to destroy the list
536 * while processing it so it won't be trapped in an infinite loop while handling
537 * a process exit
538 */
TEST(test_circular_list)539 TEST(test_circular_list)
540 {
541 int wstatus;
542
543 create_child(child_circular_list, NULL);
544
545 wait(&wstatus);
546
547 /* Pass only if the child hasn't return error */
548 if (!WEXITSTATUS(wstatus))
549 ksft_test_result_pass("%s\n", __func__);
550 }
551
552 TEST_HARNESS_MAIN
553