xref: /freebsd/sys/compat/linux/linux_futex.c (revision 4ec234c813eed05c166859bba82c882e40826eb9)
1 /*	$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
2 
3 /*-
4  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by Emmanuel Dreyfus
17  * 4. The name of the author may not be used to endorse or promote
18  *    products derived from this software without specific prior written
19  *    permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 #if 0
37 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
38 #endif
39 
40 #include "opt_compat.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/imgact.h>
45 #include <sys/kernel.h>
46 #include <sys/ktr.h>
47 #include <sys/lock.h>
48 #include <sys/malloc.h>
49 #include <sys/mutex.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/queue.h>
53 #include <sys/sched.h>
54 #include <sys/sdt.h>
55 #include <sys/sx.h>
56 #include <sys/umtx.h>
57 
58 #ifdef COMPAT_LINUX32
59 #include <machine/../linux32/linux.h>
60 #include <machine/../linux32/linux32_proto.h>
61 #else
62 #include <machine/../linux/linux.h>
63 #include <machine/../linux/linux_proto.h>
64 #endif
65 #include <compat/linux/linux_dtrace.h>
66 #include <compat/linux/linux_emul.h>
67 #include <compat/linux/linux_futex.h>
68 #include <compat/linux/linux_util.h>
69 
70 /* DTrace init */
71 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
72 
73 /* Linuxulator-global DTrace probes */
74 LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked);
75 LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock);
76 
77 /**
78  * Futex part for the special DTrace module "locks".
79  */
80 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
81 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
82 
83 /**
84  * Per futex probes.
85  */
86 LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
87 LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
88 
89 /**
90  * DTrace probes in this module.
91  */
92 LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
93     "struct waiting_proc *");
94 LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
95     "int");
96 LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
97     "int");
98 LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
99 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
100     "uint32_t");
101 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
102 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
103     "int");
104 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
105 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
106 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
107 LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
108     "struct waiting_proc **", "struct futex **");
109 LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
110 LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
111 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
112     "struct waiting_proc **", "int");
113 LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
114     "struct waiting_proc *", "uint32_t *", "uint32_t");
115 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
116     "struct waiting_proc *");
117 LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
118 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
119     "uint32_t");
120 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
121     "struct waiting_proc *", "uint32_t");
122 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
123 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
124 LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
125     "struct futex *", "int");
126 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
127 LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
128     "struct waiting_proc *", "uint32_t");
129 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
130 LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
131     "struct waiting_proc **", "struct l_timespec *", "uint32_t");
132 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, copyin_error, "int");
133 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, itimerfix_error, "int");
134 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
135 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
136 LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
137     "int", "uint32_t");
138 LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
139     "int");
140 LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
141 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
142 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
143 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
144 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
145     "struct linux_sys_futex_args *");
146 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
147 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
148 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
149 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
150     "uint32_t", "uint32_t");
151 LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
152     "uint32_t *", "uint32_t", "int", "uint32_t");
153 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
154     "uint32_t", "uint32_t");
155 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
156     "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
157 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
158     "uint32_t", "int");
159 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
160     "int", "uint32_t", "uint32_t *", "uint32_t");
161 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
162 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
163 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
164 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
165 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
166 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
167 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
168 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
169 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
170 LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
171     "struct linux_set_robust_list_args *");
172 LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
173 LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
174 LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
175     "struct linux_get_robust_list_args *");
176 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
177 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
178 LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, "struct proc *",
179     "uint32_t *", "int");
180 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
181 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
182 LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
183     "struct linux_robust_list **", "struct linux_robust_list **", "int *");
184 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
185 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
186 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, entry, "struct proc *");
187 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
188 LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
189 
190 static MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
191 static MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp");
192 
193 struct futex;
194 
195 struct waiting_proc {
196 	uint32_t	wp_flags;
197 	struct futex	*wp_futex;
198 	TAILQ_ENTRY(waiting_proc) wp_list;
199 };
200 
201 struct futex {
202 	struct sx	f_lck;
203 	uint32_t	*f_uaddr;	/* user-supplied value, for debug */
204 	struct umtx_key	f_key;
205 	uint32_t	f_refcount;
206 	uint32_t	f_bitset;
207 	LIST_ENTRY(futex) f_list;
208 	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
209 };
210 
211 struct futex_list futex_list;
212 
213 #define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
214 #define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
215 #define FUTEX_INIT(f)		do { \
216 				    sx_init_flags(&(f)->f_lck, "ftlk", \
217 					SX_DUPOK); \
218 				    LIN_SDT_PROBE1(futex, futex, create, \
219 					&(f)->f_lck); \
220 				} while (0)
221 #define FUTEX_DESTROY(f)	do { \
222 				    LIN_SDT_PROBE1(futex, futex, destroy, \
223 					&(f)->f_lck); \
224 				    sx_destroy(&(f)->f_lck); \
225 				} while (0)
226 #define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
227 
228 struct mtx futex_mtx;			/* protects the futex list */
229 #define FUTEXES_LOCK		do { \
230 				    mtx_lock(&futex_mtx); \
231 				    LIN_SDT_PROBE1(locks, futex_mtx, \
232 					locked, &futex_mtx); \
233 				} while (0)
234 #define FUTEXES_UNLOCK		do { \
235 				    LIN_SDT_PROBE1(locks, futex_mtx, \
236 					unlock, &futex_mtx); \
237 				    mtx_unlock(&futex_mtx); \
238 				} while (0)
239 
240 /* flags for futex_get() */
241 #define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
242 #define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
243 #define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
244 #define	FUTEX_SHARED		0x8	/* shared futex */
245 
246 /* wp_flags */
247 #define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
248 					 * of futex where thread sleep to wp_list
249 					 * of another futex.
250 					 */
251 #define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
252 					 * wp_list to prevent double wakeup.
253 					 */
254 
255 /* support.s */
256 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
257 int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
258 int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
259 int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
260 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
261 
262 static void
263 futex_put(struct futex *f, struct waiting_proc *wp)
264 {
265 	LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
266 
267 	FUTEX_ASSERT_LOCKED(f);
268 	if (wp != NULL) {
269 		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
270 			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
271 		free(wp, M_FUTEX_WP);
272 	}
273 
274 	FUTEXES_LOCK;
275 	if (--f->f_refcount == 0) {
276 		LIST_REMOVE(f, f_list);
277 		FUTEXES_UNLOCK;
278 		FUTEX_UNLOCK(f);
279 
280 		LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
281 		    f->f_refcount, f->f_key.shared);
282 		LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
283 		    "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
284 		umtx_key_release(&f->f_key);
285 		FUTEX_DESTROY(f);
286 		free(f, M_FUTEX);
287 
288 		LIN_SDT_PROBE0(futex, futex_put, return);
289 		return;
290 	}
291 
292 	LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
293 	    f->f_key.shared);
294 	LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
295 	    f->f_uaddr, f->f_refcount, f->f_key.shared);
296 	FUTEXES_UNLOCK;
297 	FUTEX_UNLOCK(f);
298 
299 	LIN_SDT_PROBE0(futex, futex_put, return);
300 }
301 
302 static int
303 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
304 {
305 	struct futex *f, *tmpf;
306 	struct umtx_key key;
307 	int error;
308 
309 	LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
310 
311 	*newf = tmpf = NULL;
312 
313 	error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
314 	    AUTO_SHARE : THREAD_SHARE, &key);
315 	if (error) {
316 		LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
317 		LIN_SDT_PROBE1(futex, futex_get0, return, error);
318 		return (error);
319 	}
320 retry:
321 	FUTEXES_LOCK;
322 	LIST_FOREACH(f, &futex_list, f_list) {
323 		if (umtx_key_match(&f->f_key, &key)) {
324 			if (tmpf != NULL) {
325 				FUTEX_UNLOCK(tmpf);
326 				FUTEX_DESTROY(tmpf);
327 				free(tmpf, M_FUTEX);
328 			}
329 			if (flags & FUTEX_DONTEXISTS) {
330 				FUTEXES_UNLOCK;
331 				umtx_key_release(&key);
332 
333 				LIN_SDT_PROBE1(futex, futex_get0, return,
334 				    EINVAL);
335 				return (EINVAL);
336 			}
337 
338 			/*
339 			 * Increment refcount of the found futex to
340 			 * prevent it from deallocation before FUTEX_LOCK()
341 			 */
342 			++f->f_refcount;
343 			FUTEXES_UNLOCK;
344 			umtx_key_release(&key);
345 
346 			FUTEX_LOCK(f);
347 			*newf = f;
348 			LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
349 			    f->f_refcount, f->f_key.shared);
350 			LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
351 			    uaddr, f->f_refcount, f->f_key.shared);
352 
353 			LIN_SDT_PROBE1(futex, futex_get0, return, 0);
354 			return (0);
355 		}
356 	}
357 
358 	if (flags & FUTEX_DONTCREATE) {
359 		FUTEXES_UNLOCK;
360 		umtx_key_release(&key);
361 		LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
362 		LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
363 
364 		LIN_SDT_PROBE1(futex, futex_get0, return, 0);
365 		return (0);
366 	}
367 
368 	if (tmpf == NULL) {
369 		FUTEXES_UNLOCK;
370 		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
371 		tmpf->f_uaddr = uaddr;
372 		tmpf->f_key = key;
373 		tmpf->f_refcount = 1;
374 		tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
375 		FUTEX_INIT(tmpf);
376 		TAILQ_INIT(&tmpf->f_waiting_proc);
377 
378 		/*
379 		 * Lock the new futex before an insert into the futex_list
380 		 * to prevent futex usage by other.
381 		 */
382 		FUTEX_LOCK(tmpf);
383 		goto retry;
384 	}
385 
386 	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
387 	FUTEXES_UNLOCK;
388 
389 	LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
390 	    tmpf->f_key.shared);
391 	LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
392 	    uaddr, tmpf->f_refcount, tmpf->f_key.shared);
393 	*newf = tmpf;
394 
395 	LIN_SDT_PROBE1(futex, futex_get0, return, 0);
396 	return (0);
397 }
398 
399 static int
400 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
401     uint32_t flags)
402 {
403 	int error;
404 
405 	LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
406 
407 	if (flags & FUTEX_CREATE_WP) {
408 		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
409 		(*wp)->wp_flags = 0;
410 	}
411 	error = futex_get0(uaddr, f, flags);
412 	if (error) {
413 		LIN_SDT_PROBE0(futex, futex_get, error);
414 
415 		if (flags & FUTEX_CREATE_WP)
416 			free(*wp, M_FUTEX_WP);
417 
418 		LIN_SDT_PROBE1(futex, futex_get, return, error);
419 		return (error);
420 	}
421 	if (flags & FUTEX_CREATE_WP) {
422 		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
423 		(*wp)->wp_futex = *f;
424 	}
425 
426 	LIN_SDT_PROBE1(futex, futex_get, return, error);
427 	return (error);
428 }
429 
430 static int
431 futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
432 {
433 	int error;
434 
435 	FUTEX_ASSERT_LOCKED(f);
436 	LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
437 	LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
438 	    f->f_uaddr, wp, timeout, f->f_refcount);
439 	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
440 	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
441 		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
442 
443 		if (error) {
444 			LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
445 			    f->f_uaddr, wp, wp->wp_futex->f_uaddr,
446 			    wp->wp_futex->f_refcount);
447 		}
448 
449 		LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
450 		    " %p requeued uaddr %p ref %d",
451 		    error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
452 		    wp->wp_futex->f_refcount);
453 		futex_put(f, NULL);
454 		f = wp->wp_futex;
455 		FUTEX_LOCK(f);
456 	} else {
457 		if (error) {
458 			LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
459 			    f->f_uaddr, wp);
460 		}
461 		LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
462 		    error, f->f_uaddr, wp);
463 	}
464 
465 	futex_put(f, wp);
466 
467 	LIN_SDT_PROBE1(futex, futex_sleep, return, error);
468 	return (error);
469 }
470 
471 static int
472 futex_wake(struct futex *f, int n, uint32_t bitset)
473 {
474 	struct waiting_proc *wp, *wpt;
475 	int count = 0;
476 
477 	LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
478 
479 	if (bitset == 0) {
480 		LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
481 		return (EINVAL);
482 	}
483 
484 	FUTEX_ASSERT_LOCKED(f);
485 	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
486 		LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
487 		    f->f_refcount);
488 		LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
489 		    f->f_uaddr, wp, f->f_refcount);
490 		/*
491 		 * Unless we find a matching bit in
492 		 * the bitset, continue searching.
493 		 */
494 		if (!(wp->wp_futex->f_bitset & bitset))
495 			continue;
496 
497 		wp->wp_flags |= FUTEX_WP_REMOVED;
498 		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
499 		LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
500 		wakeup_one(wp);
501 		if (++count == n)
502 			break;
503 	}
504 
505 	LIN_SDT_PROBE1(futex, futex_wake, return, count);
506 	return (count);
507 }
508 
509 static int
510 futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
511 {
512 	struct waiting_proc *wp, *wpt;
513 	int count = 0;
514 
515 	LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
516 
517 	FUTEX_ASSERT_LOCKED(f);
518 	FUTEX_ASSERT_LOCKED(f2);
519 
520 	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
521 		if (++count <= n) {
522 			LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
523 			    f->f_uaddr, wp);
524 			wp->wp_flags |= FUTEX_WP_REMOVED;
525 			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
526 			LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
527 			wakeup_one(wp);
528 		} else {
529 			LIN_SDT_PROBE3(futex, futex_requeue, requeue,
530 			    f->f_uaddr, wp, f2->f_uaddr);
531 			LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
532 			    f->f_uaddr, wp, f2->f_uaddr);
533 			wp->wp_flags |= FUTEX_WP_REQUEUED;
534 			/* Move wp to wp_list of f2 futex */
535 			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
536 			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
537 
538 			/*
539 			 * Thread which sleeps on wp after waking should
540 			 * acquire f2 lock, so increment refcount of f2 to
541 			 * prevent it from premature deallocation.
542 			 */
543 			wp->wp_futex = f2;
544 			FUTEXES_LOCK;
545 			++f2->f_refcount;
546 			FUTEXES_UNLOCK;
547 			if (count - n >= n2)
548 				break;
549 		}
550 	}
551 
552 	LIN_SDT_PROBE1(futex, futex_requeue, return, count);
553 	return (count);
554 }
555 
556 static int
557 futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts,
558     uint32_t bitset)
559 {
560 	struct l_timespec timeout;
561 	struct timeval tv;
562 	int timeout_hz;
563 	int error;
564 
565 	LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, ts, bitset);
566 
567 	if (bitset == 0) {
568 		LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
569 		return (EINVAL);
570 	}
571 
572 	f->f_bitset = bitset;
573 
574 	if (ts != NULL) {
575 		error = copyin(ts, &timeout, sizeof(timeout));
576 		if (error) {
577 			LIN_SDT_PROBE1(futex, futex_wait, copyin_error, error);
578 			LIN_SDT_PROBE1(futex, futex_wait, return, error);
579 			return (error);
580 		}
581 		TIMESPEC_TO_TIMEVAL(&tv, &timeout);
582 		error = itimerfix(&tv);
583 		if (error) {
584 			LIN_SDT_PROBE1(futex, futex_wait, itimerfix_error,
585 			    error);
586 			LIN_SDT_PROBE1(futex, futex_wait, return, error);
587 			return (error);
588 		}
589 		timeout_hz = tvtohz(&tv);
590 	} else
591 		timeout_hz = 0;
592 
593 	error = futex_sleep(f, wp, timeout_hz);
594 	if (error) {
595 		LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
596 	}
597 	if (error == EWOULDBLOCK)
598 		error = ETIMEDOUT;
599 
600 	LIN_SDT_PROBE1(futex, futex_wait, return, error);
601 	return (error);
602 }
603 
604 static int
605 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
606 {
607 	int op = (encoded_op >> 28) & 7;
608 	int cmp = (encoded_op >> 24) & 15;
609 	int oparg = (encoded_op << 8) >> 20;
610 	int cmparg = (encoded_op << 20) >> 20;
611 	int oldval = 0, ret;
612 
613 	LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
614 
615 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
616 		oparg = 1 << oparg;
617 
618 	LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
619 	    cmparg);
620 
621 	/* XXX: Linux verifies access here and returns EFAULT */
622 	LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
623 
624 	switch (op) {
625 	case FUTEX_OP_SET:
626 		ret = futex_xchgl(oparg, uaddr, &oldval);
627 		break;
628 	case FUTEX_OP_ADD:
629 		ret = futex_addl(oparg, uaddr, &oldval);
630 		break;
631 	case FUTEX_OP_OR:
632 		ret = futex_orl(oparg, uaddr, &oldval);
633 		break;
634 	case FUTEX_OP_ANDN:
635 		ret = futex_andl(~oparg, uaddr, &oldval);
636 		break;
637 	case FUTEX_OP_XOR:
638 		ret = futex_xorl(oparg, uaddr, &oldval);
639 		break;
640 	default:
641 		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
642 		ret = -ENOSYS;
643 		break;
644 	}
645 
646 	if (ret) {
647 		LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
648 		return (ret);
649 	}
650 
651 	switch (cmp) {
652 	case FUTEX_OP_CMP_EQ:
653 		ret = (oldval == cmparg);
654 		break;
655 	case FUTEX_OP_CMP_NE:
656 		ret = (oldval != cmparg);
657 		break;
658 	case FUTEX_OP_CMP_LT:
659 		ret = (oldval < cmparg);
660 		break;
661 	case FUTEX_OP_CMP_GE:
662 		ret = (oldval >= cmparg);
663 		break;
664 	case FUTEX_OP_CMP_LE:
665 		ret = (oldval <= cmparg);
666 		break;
667 	case FUTEX_OP_CMP_GT:
668 		ret = (oldval > cmparg);
669 		break;
670 	default:
671 		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
672 		ret = -ENOSYS;
673 	}
674 
675 	LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
676 	return (ret);
677 }
678 
679 int
680 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
681 {
682 	int clockrt, nrwake, op_ret, ret, val;
683 	struct linux_emuldata *em;
684 	struct waiting_proc *wp;
685 	struct futex *f, *f2;
686 	int error;
687 	uint32_t flags;
688 
689 	LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
690 
691 	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
692 		flags = 0;
693 		args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
694 	} else
695 		flags = FUTEX_SHARED;
696 
697 	/*
698 	 * Currently support for switching between CLOCK_MONOTONIC and
699 	 * CLOCK_REALTIME is not present. However Linux forbids the use of
700 	 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
701 	 * FUTEX_WAIT_REQUEUE_PI.
702 	 */
703 	clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
704 	args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
705 	if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
706 		args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
707 		LIN_SDT_PROBE0(futex, linux_sys_futex,
708 		    unimplemented_clockswitch);
709 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
710 		return (ENOSYS);
711 	}
712 
713 	error = 0;
714 	f = f2 = NULL;
715 
716 	switch (args->op) {
717 	case LINUX_FUTEX_WAIT:
718 		args->val3 = FUTEX_BITSET_MATCH_ANY;
719 		/* FALLTHROUGH */
720 
721 	case LINUX_FUTEX_WAIT_BITSET:
722 		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
723 		    args->val, args->val3);
724 		LINUX_CTR3(sys_futex, "WAIT uaddr %p val %d val3 %d",
725 		    args->uaddr, args->val, args->val3);
726 
727 		error = futex_get(args->uaddr, &wp, &f,
728 		    flags | FUTEX_CREATE_WP);
729 		if (error) {
730 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
731 			return (error);
732 		}
733 
734 		error = copyin(args->uaddr, &val, sizeof(val));
735 		if (error) {
736 			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
737 			    error);
738 			LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
739 			    error);
740 			futex_put(f, wp);
741 
742 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
743 			return (error);
744 		}
745 		if (val != args->val) {
746 			LIN_SDT_PROBE4(futex, linux_sys_futex,
747 			    debug_wait_value_neq, args->uaddr, args->val, val,
748 			    args->val3);
749 			LINUX_CTR4(sys_futex,
750 			    "WAIT uaddr %p val %d != uval %d val3 %d",
751 			    args->uaddr, args->val, val, args->val3);
752 			futex_put(f, wp);
753 
754 			LIN_SDT_PROBE1(futex, linux_sys_futex, return,
755 			    EWOULDBLOCK);
756 			return (EWOULDBLOCK);
757 		}
758 
759 		error = futex_wait(f, wp, args->timeout, args->val3);
760 		break;
761 
762 	case LINUX_FUTEX_WAKE:
763 		args->val3 = FUTEX_BITSET_MATCH_ANY;
764 		/* FALLTHROUGH */
765 
766 	case LINUX_FUTEX_WAKE_BITSET:
767 		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
768 		    args->val, args->val3);
769 		LINUX_CTR3(sys_futex, "WAKE uaddr %p val % d val3 %d",
770 		    args->uaddr, args->val, args->val3);
771 
772 		error = futex_get(args->uaddr, NULL, &f,
773 		    flags | FUTEX_DONTCREATE);
774 		if (error) {
775 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
776 			return (error);
777 		}
778 
779 		if (f == NULL) {
780 			td->td_retval[0] = 0;
781 
782 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
783 			return (error);
784 		}
785 		td->td_retval[0] = futex_wake(f, args->val, args->val3);
786 		futex_put(f, NULL);
787 		break;
788 
789 	case LINUX_FUTEX_CMP_REQUEUE:
790 		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
791 		    args->uaddr, args->val, args->val3, args->uaddr2,
792 		    args->timeout);
793 		LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
794 		    "val %d val3 %d uaddr2 %p val2 %d",
795 		    args->uaddr, args->val, args->val3, args->uaddr2,
796 		    (int)(unsigned long)args->timeout);
797 
798 		/*
799 		 * Linux allows this, we would not, it is an incorrect
800 		 * usage of declared ABI, so return EINVAL.
801 		 */
802 		if (args->uaddr == args->uaddr2) {
803 			LIN_SDT_PROBE0(futex, linux_sys_futex,
804 			    invalid_cmp_requeue_use);
805 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
806 			return (EINVAL);
807 		}
808 
809 		error = futex_get(args->uaddr, NULL, &f, flags);
810 		if (error) {
811 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
812 			return (error);
813 		}
814 
815 		/*
816 		 * To avoid deadlocks return EINVAL if second futex
817 		 * exists at this time.
818 		 *
819 		 * Glibc fall back to FUTEX_WAKE in case of any error
820 		 * returned by FUTEX_CMP_REQUEUE.
821 		 */
822 		error = futex_get(args->uaddr2, NULL, &f2,
823 		    flags | FUTEX_DONTEXISTS);
824 		if (error) {
825 			futex_put(f, NULL);
826 
827 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
828 			return (error);
829 		}
830 		error = copyin(args->uaddr, &val, sizeof(val));
831 		if (error) {
832 			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
833 			    error);
834 			LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
835 			    error);
836 			futex_put(f2, NULL);
837 			futex_put(f, NULL);
838 
839 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
840 			return (error);
841 		}
842 		if (val != args->val3) {
843 			LIN_SDT_PROBE2(futex, linux_sys_futex,
844 			    debug_cmp_requeue_value_neq, args->val, val);
845 			LINUX_CTR2(sys_futex, "CMP_REQUEUE val %d != uval %d",
846 			    args->val, val);
847 			futex_put(f2, NULL);
848 			futex_put(f, NULL);
849 
850 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
851 			return (EAGAIN);
852 		}
853 
854 		nrwake = (int)(unsigned long)args->timeout;
855 		td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
856 		futex_put(f2, NULL);
857 		futex_put(f, NULL);
858 		break;
859 
860 	case LINUX_FUTEX_WAKE_OP:
861 		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
862 		    args->uaddr, args->op, args->val, args->uaddr2, args->val3);
863 		LINUX_CTR5(sys_futex, "WAKE_OP "
864 		    "uaddr %p op %d val %x uaddr2 %p val3 %x",
865 		    args->uaddr, args->op, args->val,
866 		    args->uaddr2, args->val3);
867 
868 		error = futex_get(args->uaddr, NULL, &f, flags);
869 		if (error) {
870 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
871 			return (error);
872 		}
873 
874 		if (args->uaddr != args->uaddr2)
875 			error = futex_get(args->uaddr2, NULL, &f2, flags);
876 		if (error) {
877 			futex_put(f, NULL);
878 
879 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
880 			return (error);
881 		}
882 
883 		/*
884 		 * This function returns positive number as results and
885 		 * negative as errors
886 		 */
887 		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
888 
889 		if (op_ret < 0) {
890 			/* XXX: We don't handle the EFAULT yet. */
891 			if (op_ret != -EFAULT) {
892 				if (f2 != NULL)
893 					futex_put(f2, NULL);
894 				futex_put(f, NULL);
895 
896 				LIN_SDT_PROBE1(futex, linux_sys_futex, return,
897 				    -op_ret);
898 				return (-op_ret);
899 			} else {
900 				LIN_SDT_PROBE0(futex, linux_sys_futex,
901 				    unhandled_efault);
902 			}
903 			if (f2 != NULL)
904 				futex_put(f2, NULL);
905 			futex_put(f, NULL);
906 
907 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
908 			return (EFAULT);
909 		}
910 
911 		ret = futex_wake(f, args->val, args->val3);
912 
913 		if (op_ret > 0) {
914 			op_ret = 0;
915 			nrwake = (int)(unsigned long)args->timeout;
916 
917 			if (f2 != NULL)
918 				op_ret += futex_wake(f2, nrwake, args->val3);
919 			else
920 				op_ret += futex_wake(f, nrwake, args->val3);
921 			ret += op_ret;
922 
923 		}
924 		if (f2 != NULL)
925 			futex_put(f2, NULL);
926 		futex_put(f, NULL);
927 		td->td_retval[0] = ret;
928 		break;
929 
930 	case LINUX_FUTEX_LOCK_PI:
931 		/* not yet implemented */
932 		linux_msg(td,
933 			  "linux_sys_futex: "
934 			  "op LINUX_FUTEX_LOCK_PI not implemented\n");
935 		LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi);
936 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
937 		return (ENOSYS);
938 
939 	case LINUX_FUTEX_UNLOCK_PI:
940 		/* not yet implemented */
941 		linux_msg(td,
942 			  "linux_sys_futex: "
943 			  "op LINUX_FUTEX_UNLOCK_PI not implemented\n");
944 		LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi);
945 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
946 		return (ENOSYS);
947 
948 	case LINUX_FUTEX_TRYLOCK_PI:
949 		/* not yet implemented */
950 		linux_msg(td,
951 			  "linux_sys_futex: "
952 			  "op LINUX_FUTEX_TRYLOCK_PI not implemented\n");
953 		LIN_SDT_PROBE0(futex, linux_sys_futex,
954 		    unimplemented_trylock_pi);
955 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
956 		return (ENOSYS);
957 
958 	case LINUX_FUTEX_REQUEUE:
959 
960 		/*
961 		 * Glibc does not use this operation since version 2.3.3,
962 		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
963 		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
964 		 * FUTEX_REQUEUE returned EINVAL.
965 		 */
966 		em = em_find(td->td_proc, EMUL_DONTLOCK);
967 		if ((em->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
968 			linux_msg(td,
969 				  "linux_sys_futex: "
970 				  "unsupported futex_requeue op\n");
971 			em->flags |= LINUX_XDEPR_REQUEUEOP;
972 			LIN_SDT_PROBE0(futex, linux_sys_futex,
973 			    deprecated_requeue);
974 		}
975 
976 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
977 		return (EINVAL);
978 
979 	case LINUX_FUTEX_WAIT_REQUEUE_PI:
980 		/* not yet implemented */
981 		linux_msg(td,
982 			  "linux_sys_futex: "
983 			  "op FUTEX_WAIT_REQUEUE_PI not implemented\n");
984 		LIN_SDT_PROBE0(futex, linux_sys_futex,
985 		    unimplemented_wait_requeue_pi);
986 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
987 		return (ENOSYS);
988 
989 	case LINUX_FUTEX_CMP_REQUEUE_PI:
990 		/* not yet implemented */
991 		linux_msg(td,
992 			    "linux_sys_futex: "
993 			    "op LINUX_FUTEX_CMP_REQUEUE_PI not implemented\n");
994 		LIN_SDT_PROBE0(futex, linux_sys_futex,
995 		    unimplemented_cmp_requeue_pi);
996 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
997 		return (ENOSYS);
998 
999 	default:
1000 		linux_msg(td,
1001 			  "linux_sys_futex: unknown op %d\n", args->op);
1002 		LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
1003 		    args->op);
1004 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
1005 		return (ENOSYS);
1006 	}
1007 
1008 	LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
1009 	return (error);
1010 }
1011 
1012 int
1013 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
1014 {
1015 	struct linux_emuldata *em;
1016 
1017 	LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
1018 
1019 	if (args->len != sizeof(struct linux_robust_list_head)) {
1020 		LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
1021 		LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
1022 		return (EINVAL);
1023 	}
1024 
1025 	em = em_find(td->td_proc, EMUL_DOLOCK);
1026 	em->robust_futexes = args->head;
1027 	EMUL_UNLOCK(&emul_lock);
1028 
1029 	LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
1030 	return (0);
1031 }
1032 
1033 int
1034 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
1035 {
1036 	struct linux_emuldata *em;
1037 	struct linux_robust_list_head *head;
1038 	l_size_t len = sizeof(struct linux_robust_list_head);
1039 	int error = 0;
1040 
1041 	LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
1042 
1043 	if (!args->pid) {
1044 		em = em_find(td->td_proc, EMUL_DONTLOCK);
1045 		head = em->robust_futexes;
1046 	} else {
1047 		struct proc *p;
1048 
1049 		p = pfind(args->pid);
1050 		if (p == NULL) {
1051 			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1052 			    ESRCH);
1053 			return (ESRCH);
1054 		}
1055 
1056 		em = em_find(p, EMUL_DONTLOCK);
1057 		/* XXX: ptrace? */
1058 		if (priv_check(td, PRIV_CRED_SETUID) ||
1059 		    priv_check(td, PRIV_CRED_SETEUID) ||
1060 		    p_candebug(td, p)) {
1061 			PROC_UNLOCK(p);
1062 
1063 			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
1064 			    EPERM);
1065 			return (EPERM);
1066 		}
1067 		head = em->robust_futexes;
1068 
1069 		PROC_UNLOCK(p);
1070 	}
1071 
1072 	error = copyout(&len, args->len, sizeof(l_size_t));
1073 	if (error) {
1074 		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1075 		    error);
1076 		LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
1077 		return (EFAULT);
1078 	}
1079 
1080 	error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
1081 	if (error) {
1082 		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
1083 		    error);
1084 	}
1085 
1086 	LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
1087 	return (error);
1088 }
1089 
1090 static int
1091 handle_futex_death(struct proc *p, uint32_t *uaddr, int pi)
1092 {
1093 	uint32_t uval, nval, mval;
1094 	struct futex *f;
1095 	int error;
1096 
1097 	LIN_SDT_PROBE3(futex, handle_futex_death, entry, p, uaddr, pi);
1098 
1099 retry:
1100 	error = copyin(uaddr, &uval, 4);
1101 	if (error) {
1102 		LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
1103 		LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
1104 		return (EFAULT);
1105 	}
1106 	if ((uval & FUTEX_TID_MASK) == p->p_pid) {
1107 		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1108 		nval = casuword32(uaddr, uval, mval);
1109 
1110 		if (nval == -1) {
1111 			LIN_SDT_PROBE1(futex, handle_futex_death, return,
1112 			    EFAULT);
1113 			return (EFAULT);
1114 		}
1115 
1116 		if (nval != uval)
1117 			goto retry;
1118 
1119 		if (!pi && (uval & FUTEX_WAITERS)) {
1120 			error = futex_get(uaddr, NULL, &f,
1121 			    FUTEX_DONTCREATE | FUTEX_SHARED);
1122 			if (error) {
1123 				LIN_SDT_PROBE1(futex, handle_futex_death,
1124 				    return, error);
1125 				return (error);
1126 			}
1127 			if (f != NULL) {
1128 				futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
1129 				futex_put(f, NULL);
1130 			}
1131 		}
1132 	}
1133 
1134 	LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
1135 	return (0);
1136 }
1137 
1138 static int
1139 fetch_robust_entry(struct linux_robust_list **entry,
1140     struct linux_robust_list **head, int *pi)
1141 {
1142 	l_ulong uentry;
1143 	int error;
1144 
1145 	LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
1146 
1147 	error = copyin((const void *)head, &uentry, sizeof(l_ulong));
1148 	if (error) {
1149 		LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
1150 		LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
1151 		return (EFAULT);
1152 	}
1153 
1154 	*entry = (void *)(uentry & ~1UL);
1155 	*pi = uentry & 1;
1156 
1157 	LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
1158 	return (0);
1159 }
1160 
1161 /* This walks the list of robust futexes releasing them. */
1162 void
1163 release_futexes(struct proc *p)
1164 {
1165 	struct linux_robust_list_head *head = NULL;
1166 	struct linux_robust_list *entry, *next_entry, *pending;
1167 	unsigned int limit = 2048, pi, next_pi, pip;
1168 	struct linux_emuldata *em;
1169 	l_long futex_offset;
1170 	int rc, error;
1171 
1172 	LIN_SDT_PROBE1(futex, release_futexes, entry, p);
1173 
1174 	em = em_find(p, EMUL_DONTLOCK);
1175 	head = em->robust_futexes;
1176 
1177 	if (head == NULL) {
1178 		LIN_SDT_PROBE0(futex, release_futexes, return);
1179 		return;
1180 	}
1181 
1182 	if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
1183 		LIN_SDT_PROBE0(futex, release_futexes, return);
1184 		return;
1185 	}
1186 
1187 	error = copyin(&head->futex_offset, &futex_offset,
1188 	    sizeof(futex_offset));
1189 	if (error) {
1190 		LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
1191 		LIN_SDT_PROBE0(futex, release_futexes, return);
1192 		return;
1193 	}
1194 
1195 	if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
1196 		LIN_SDT_PROBE0(futex, release_futexes, return);
1197 		return;
1198 	}
1199 
1200 	while (entry != &head->list) {
1201 		rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
1202 
1203 		if (entry != pending)
1204 			if (handle_futex_death(p,
1205 			    (uint32_t *)((caddr_t)entry + futex_offset), pi)) {
1206 				LIN_SDT_PROBE0(futex, release_futexes, return);
1207 				return;
1208 			}
1209 		if (rc) {
1210 			LIN_SDT_PROBE0(futex, release_futexes, return);
1211 			return;
1212 		}
1213 
1214 		entry = next_entry;
1215 		pi = next_pi;
1216 
1217 		if (!--limit)
1218 			break;
1219 
1220 		sched_relinquish(curthread);
1221 	}
1222 
1223 	if (pending)
1224 		handle_futex_death(p, (uint32_t *)((caddr_t)pending + futex_offset), pip);
1225 
1226 	LIN_SDT_PROBE0(futex, release_futexes, return);
1227 }
1228