xref: /linux/tools/testing/selftests/futex/functional/robust_list.c (revision 0048fbb4011ec55c32d3148b2cda56433f273375)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2025 Igalia S.L.
4  *
5  * Robust list test by André Almeida <andrealmeid@igalia.com>
6  *
7  * The robust list uAPI allows userspace to create "robust" locks, in the sense
8  * that if the lock holder thread dies, the remaining threads that are waiting
9  * for the lock won't block forever, waiting for a lock that will never be
10  * released.
11  *
12  * This is achieve by userspace setting a list where a thread can enter all the
13  * locks (futexes) that it is holding. The robust list is a linked list, and
14  * userspace register the start of the list with the syscall set_robust_list().
15  * If such thread eventually dies, the kernel will walk this list, waking up one
16  * thread waiting for each futex and marking the futex word with the flag
17  * FUTEX_OWNER_DIED.
18  *
19  * See also
20  *	man set_robust_list
21  *	Documententation/locking/robust-futex-ABI.rst
22  *	Documententation/locking/robust-futexes.rst
23  */
24 
25 #define _GNU_SOURCE
26 
27 #include "futextest.h"
28 #include "../../kselftest_harness.h"
29 
30 #include <errno.h>
31 #include <pthread.h>
32 #include <signal.h>
33 #include <stdatomic.h>
34 #include <stdbool.h>
35 #include <stddef.h>
36 #include <sys/mman.h>
37 #include <sys/wait.h>
38 
39 #define STACK_SIZE (1024 * 1024)
40 
41 #define FUTEX_TIMEOUT 3
42 
43 #define SLEEP_US 100
44 
45 static pthread_barrier_t barrier, barrier2;
46 
set_robust_list(struct robust_list_head * head,size_t len)47 static int set_robust_list(struct robust_list_head *head, size_t len)
48 {
49 	return syscall(SYS_set_robust_list, head, len);
50 }
51 
get_robust_list(int pid,struct robust_list_head ** head,size_t * len_ptr)52 static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr)
53 {
54 	return syscall(SYS_get_robust_list, pid, head, len_ptr);
55 }
56 
57 /*
58  * Basic lock struct, contains just the futex word and the robust list element
59  * Real implementations have also a *prev to easily walk in the list
60  */
61 struct lock_struct {
62 	_Atomic(unsigned int)	futex;
63 	struct robust_list	list;
64 };
65 
66 /*
67  * Helper function to spawn a child thread. Returns -1 on error, pid on success
68  */
create_child(int (* fn)(void * arg),void * arg)69 static int create_child(int (*fn)(void *arg), void *arg)
70 {
71 	char *stack;
72 	pid_t pid;
73 
74 	stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
75 		     MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
76 	if (stack == MAP_FAILED)
77 		return -1;
78 
79 	stack += STACK_SIZE;
80 
81 	pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg);
82 
83 	if (pid == -1)
84 		return -1;
85 
86 	return pid;
87 }
88 
89 /*
90  * Helper function to prepare and register a robust list
91  */
set_list(struct robust_list_head * head)92 static int set_list(struct robust_list_head *head)
93 {
94 	int ret;
95 
96 	ret = set_robust_list(head, sizeof(*head));
97 	if (ret)
98 		return ret;
99 
100 	head->futex_offset = (size_t) offsetof(struct lock_struct, futex) -
101 			     (size_t) offsetof(struct lock_struct, list);
102 	head->list.next = &head->list;
103 	head->list_op_pending = NULL;
104 
105 	return 0;
106 }
107 
108 /*
109  * A basic (and incomplete) mutex lock function with robustness
110  */
mutex_lock(struct lock_struct * lock,struct robust_list_head * head,bool error_inject)111 static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject)
112 {
113 	_Atomic(unsigned int) *futex = &lock->futex;
114 	unsigned int zero = 0;
115 	pid_t tid = gettid();
116 	int ret = -1;
117 
118 	/*
119 	 * Set list_op_pending before starting the lock, so the kernel can catch
120 	 * the case where the thread died during the lock operation
121 	 */
122 	head->list_op_pending = &lock->list;
123 
124 	if (atomic_compare_exchange_strong(futex, &zero, tid)) {
125 		/*
126 		 * We took the lock, insert it in the robust list
127 		 */
128 		struct robust_list *list = &head->list;
129 
130 		/* Error injection to test list_op_pending */
131 		if (error_inject)
132 			return 0;
133 
134 		while (list->next != &head->list)
135 			list = list->next;
136 
137 		list->next = &lock->list;
138 		lock->list.next = &head->list;
139 
140 		ret = 0;
141 	} else {
142 		/*
143 		 * We didn't take the lock, wait until the owner wakes (or dies)
144 		 */
145 		struct timespec to;
146 
147 		to.tv_sec = FUTEX_TIMEOUT;
148 		to.tv_nsec = 0;
149 
150 		tid = atomic_load(futex);
151 		/* Kernel ignores futexes without the waiters flag */
152 		tid |= FUTEX_WAITERS;
153 		atomic_store(futex, tid);
154 
155 		ret = futex_wait((futex_t *) futex, tid, &to, 0);
156 
157 		/*
158 		 * A real mutex_lock() implementation would loop here to finally
159 		 * take the lock. We don't care about that, so we stop here.
160 		 */
161 	}
162 
163 	head->list_op_pending = NULL;
164 
165 	return ret;
166 }
167 
168 /*
169  * This child thread will succeed taking the lock, and then will exit holding it
170  */
child_fn_lock(void * arg)171 static int child_fn_lock(void *arg)
172 {
173 	struct lock_struct *lock = arg;
174 	struct robust_list_head head;
175 	int ret;
176 
177 	ret = set_list(&head);
178 	if (ret) {
179 		ksft_test_result_fail("set_robust_list error\n");
180 		return ret;
181 	}
182 
183 	ret = mutex_lock(lock, &head, false);
184 	if (ret) {
185 		ksft_test_result_fail("mutex_lock error\n");
186 		return ret;
187 	}
188 
189 	pthread_barrier_wait(&barrier);
190 
191 	/*
192 	 * There's a race here: the parent thread needs to be inside
193 	 * futex_wait() before the child thread dies, otherwise it will miss the
194 	 * wakeup from handle_futex_death() that this child will emit. We wait a
195 	 * little bit just to make sure that this happens.
196 	 */
197 	usleep(SLEEP_US);
198 
199 	return 0;
200 }
201 
202 /*
203  * Spawns a child thread that will set a robust list, take the lock, register it
204  * in the robust list and die. The parent thread will wait on this futex, and
205  * should be waken up when the child exits.
206  */
TEST(test_robustness)207 TEST(test_robustness)
208 {
209 	struct lock_struct lock = { .futex = 0 };
210 	_Atomic(unsigned int) *futex = &lock.futex;
211 	struct robust_list_head head;
212 	int ret, pid, wstatus;
213 
214 	ret = set_list(&head);
215 	ASSERT_EQ(ret, 0);
216 
217 	/*
218 	 * Lets use a barrier to ensure that the child thread takes the lock
219 	 * before the parent
220 	 */
221 	ret = pthread_barrier_init(&barrier, NULL, 2);
222 	ASSERT_EQ(ret, 0);
223 
224 	pid = create_child(&child_fn_lock, &lock);
225 	ASSERT_NE(pid, -1);
226 
227 	pthread_barrier_wait(&barrier);
228 	ret = mutex_lock(&lock, &head, false);
229 
230 	/*
231 	 * futex_wait() should return 0 and the futex word should be marked with
232 	 * FUTEX_OWNER_DIED
233 	 */
234 	ASSERT_EQ(ret, 0);
235 
236 	ASSERT_TRUE(*futex & FUTEX_OWNER_DIED);
237 
238 	wait(&wstatus);
239 	pthread_barrier_destroy(&barrier);
240 
241 	/* Pass only if the child hasn't return error */
242 	if (!WEXITSTATUS(wstatus))
243 		ksft_test_result_pass("%s\n", __func__);
244 }
245 
246 /*
247  * The only valid value for len is sizeof(*head)
248  */
TEST(test_set_robust_list_invalid_size)249 TEST(test_set_robust_list_invalid_size)
250 {
251 	struct robust_list_head head;
252 	size_t head_size = sizeof(head);
253 	int ret;
254 
255 	ret = set_robust_list(&head, head_size);
256 	ASSERT_EQ(ret, 0);
257 
258 	ret = set_robust_list(&head, head_size * 2);
259 	ASSERT_EQ(ret, -1);
260 	ASSERT_EQ(errno, EINVAL);
261 
262 	ret = set_robust_list(&head, head_size - 1);
263 	ASSERT_EQ(ret, -1);
264 	ASSERT_EQ(errno, EINVAL);
265 
266 	ret = set_robust_list(&head, 0);
267 	ASSERT_EQ(ret, -1);
268 	ASSERT_EQ(errno, EINVAL);
269 
270 	ksft_test_result_pass("%s\n", __func__);
271 }
272 
273 /*
274  * Test get_robust_list with pid = 0, getting the list of the running thread
275  */
TEST(test_get_robust_list_self)276 TEST(test_get_robust_list_self)
277 {
278 	struct robust_list_head head, head2, *get_head;
279 	size_t head_size = sizeof(head), len_ptr;
280 	int ret;
281 
282 	ret = set_robust_list(&head, head_size);
283 	ASSERT_EQ(ret, 0);
284 
285 	ret = get_robust_list(0, &get_head, &len_ptr);
286 	ASSERT_EQ(ret, 0);
287 	ASSERT_EQ(get_head, &head);
288 	ASSERT_EQ(head_size, len_ptr);
289 
290 	ret = set_robust_list(&head2, head_size);
291 	ASSERT_EQ(ret, 0);
292 
293 	ret = get_robust_list(0, &get_head, &len_ptr);
294 	ASSERT_EQ(ret, 0);
295 	ASSERT_EQ(get_head, &head2);
296 	ASSERT_EQ(head_size, len_ptr);
297 
298 	ksft_test_result_pass("%s\n", __func__);
299 }
300 
child_list(void * arg)301 static int child_list(void *arg)
302 {
303 	struct robust_list_head *head = arg;
304 	int ret;
305 
306 	ret = set_robust_list(head, sizeof(*head));
307 	if (ret) {
308 		ksft_test_result_fail("set_robust_list error\n");
309 		return -1;
310 	}
311 
312 	/*
313 	 * After setting the list head, wait until the main thread can call
314 	 * get_robust_list() for this thread before exiting.
315 	 */
316 	pthread_barrier_wait(&barrier);
317 	pthread_barrier_wait(&barrier2);
318 
319 	return 0;
320 }
321 
322 /*
323  * Test get_robust_list from another thread. We use two barriers here to ensure
324  * that:
325  *   1) the child thread set the list before we try to get it from the
326  * parent
327  *   2) the child thread still alive when we try to get the list from it
328  */
TEST(test_get_robust_list_child)329 TEST(test_get_robust_list_child)
330 {
331 	struct robust_list_head head, *get_head;
332 	int ret, wstatus;
333 	size_t len_ptr;
334 	pid_t tid;
335 
336 	ret = pthread_barrier_init(&barrier, NULL, 2);
337 	ret = pthread_barrier_init(&barrier2, NULL, 2);
338 	ASSERT_EQ(ret, 0);
339 
340 	tid = create_child(&child_list, &head);
341 	ASSERT_NE(tid, -1);
342 
343 	pthread_barrier_wait(&barrier);
344 
345 	ret = get_robust_list(tid, &get_head, &len_ptr);
346 	ASSERT_EQ(ret, 0);
347 	ASSERT_EQ(&head, get_head);
348 
349 	pthread_barrier_wait(&barrier2);
350 
351 	wait(&wstatus);
352 	pthread_barrier_destroy(&barrier);
353 	pthread_barrier_destroy(&barrier2);
354 
355 	/* Pass only if the child hasn't return error */
356 	if (!WEXITSTATUS(wstatus))
357 		ksft_test_result_pass("%s\n", __func__);
358 }
359 
child_fn_lock_with_error(void * arg)360 static int child_fn_lock_with_error(void *arg)
361 {
362 	struct lock_struct *lock = arg;
363 	struct robust_list_head head;
364 	int ret;
365 
366 	ret = set_list(&head);
367 	if (ret) {
368 		ksft_test_result_fail("set_robust_list error\n");
369 		return -1;
370 	}
371 
372 	ret = mutex_lock(lock, &head, true);
373 	if (ret) {
374 		ksft_test_result_fail("mutex_lock error\n");
375 		return -1;
376 	}
377 
378 	pthread_barrier_wait(&barrier);
379 
380 	/* See comment at child_fn_lock() */
381 	usleep(SLEEP_US);
382 
383 	return 0;
384 }
385 
386 /*
387  * Same as robustness test, but inject an error where the mutex_lock() exits
388  * earlier, just after setting list_op_pending and taking the lock, to test the
389  * list_op_pending mechanism
390  */
TEST(test_set_list_op_pending)391 TEST(test_set_list_op_pending)
392 {
393 	struct lock_struct lock = { .futex = 0 };
394 	_Atomic(unsigned int) *futex = &lock.futex;
395 	struct robust_list_head head;
396 	int ret, wstatus;
397 
398 	ret = set_list(&head);
399 	ASSERT_EQ(ret, 0);
400 
401 	ret = pthread_barrier_init(&barrier, NULL, 2);
402 	ASSERT_EQ(ret, 0);
403 
404 	ret = create_child(&child_fn_lock_with_error, &lock);
405 	ASSERT_NE(ret, -1);
406 
407 	pthread_barrier_wait(&barrier);
408 	ret = mutex_lock(&lock, &head, false);
409 
410 	ASSERT_EQ(ret, 0);
411 
412 	ASSERT_TRUE(*futex & FUTEX_OWNER_DIED);
413 
414 	wait(&wstatus);
415 	pthread_barrier_destroy(&barrier);
416 
417 	/* Pass only if the child hasn't return error */
418 	if (!WEXITSTATUS(wstatus))
419 		ksft_test_result_pass("%s\n", __func__);
420 	else
421 		ksft_test_result_fail("%s\n", __func__);
422 }
423 
424 #define CHILD_NR 10
425 
child_lock_holder(void * arg)426 static int child_lock_holder(void *arg)
427 {
428 	struct lock_struct *locks = arg;
429 	struct robust_list_head head;
430 	int i;
431 
432 	set_list(&head);
433 
434 	for (i = 0; i < CHILD_NR; i++) {
435 		locks[i].futex = 0;
436 		mutex_lock(&locks[i], &head, false);
437 	}
438 
439 	pthread_barrier_wait(&barrier);
440 	pthread_barrier_wait(&barrier2);
441 
442 	/* See comment at child_fn_lock() */
443 	usleep(SLEEP_US);
444 
445 	return 0;
446 }
447 
child_wait_lock(void * arg)448 static int child_wait_lock(void *arg)
449 {
450 	struct lock_struct *lock = arg;
451 	struct robust_list_head head;
452 	int ret;
453 
454 	pthread_barrier_wait(&barrier2);
455 	ret = mutex_lock(lock, &head, false);
456 
457 	if (ret) {
458 		ksft_test_result_fail("mutex_lock error\n");
459 		return -1;
460 	}
461 
462 	if (!(lock->futex & FUTEX_OWNER_DIED)) {
463 		ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n");
464 		return -1;
465 	}
466 
467 	return 0;
468 }
469 
470 /*
471  * Test a robust list of more than one element. All the waiters should wake when
472  * the holder dies
473  */
TEST(test_robust_list_multiple_elements)474 TEST(test_robust_list_multiple_elements)
475 {
476 	struct lock_struct locks[CHILD_NR];
477 	pid_t pids[CHILD_NR + 1];
478 	int i, ret, wstatus;
479 
480 	ret = pthread_barrier_init(&barrier, NULL, 2);
481 	ASSERT_EQ(ret, 0);
482 	ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1);
483 	ASSERT_EQ(ret, 0);
484 
485 	pids[0] = create_child(&child_lock_holder, &locks);
486 
487 	/* Wait until the locker thread takes the look */
488 	pthread_barrier_wait(&barrier);
489 
490 	for (i = 0; i < CHILD_NR; i++)
491 		pids[i+1] = create_child(&child_wait_lock, &locks[i]);
492 
493 	/* Wait for all children to return */
494 	ret = 0;
495 
496 	for (i = 0; i < CHILD_NR; i++) {
497 		waitpid(pids[i], &wstatus, 0);
498 		if (WEXITSTATUS(wstatus))
499 			ret = -1;
500 	}
501 
502 	pthread_barrier_destroy(&barrier);
503 	pthread_barrier_destroy(&barrier2);
504 
505 	/* Pass only if the child hasn't return error */
506 	if (!ret)
507 		ksft_test_result_pass("%s\n", __func__);
508 }
509 
child_circular_list(void * arg)510 static int child_circular_list(void *arg)
511 {
512 	static struct robust_list_head head;
513 	struct lock_struct a, b, c;
514 	int ret;
515 
516 	ret = set_list(&head);
517 	if (ret) {
518 		ksft_test_result_fail("set_list error\n");
519 		return -1;
520 	}
521 
522 	head.list.next = &a.list;
523 
524 	/*
525 	 * The last element should point to head list, but we short circuit it
526 	 */
527 	a.list.next = &b.list;
528 	b.list.next = &c.list;
529 	c.list.next = &a.list;
530 
531 	return 0;
532 }
533 
534 /*
535  * Create a circular robust list. The kernel should be able to destroy the list
536  * while processing it so it won't be trapped in an infinite loop while handling
537  * a process exit
538  */
TEST(test_circular_list)539 TEST(test_circular_list)
540 {
541 	int wstatus;
542 
543 	create_child(child_circular_list, NULL);
544 
545 	wait(&wstatus);
546 
547 	/* Pass only if the child hasn't return error */
548 	if (!WEXITSTATUS(wstatus))
549 		ksft_test_result_pass("%s\n", __func__);
550 }
551 
552 TEST_HARNESS_MAIN
553