xref: /freebsd/sys/kern/kern_umtx.c (revision 1e4896b176ff664dc9c2fce5426bf2fdf8017a7d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
5  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
6  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
7  * All rights reserved.
8  *
9  * Portions of this software were developed by Konstantin Belousov
10  * under sponsorship from the FreeBSD Foundation.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice unmodified, this list of conditions, and the following
17  *    disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_umtx_profiling.h"
38 
39 #include <sys/param.h>
40 #include <sys/kernel.h>
41 #include <sys/fcntl.h>
42 #include <sys/file.h>
43 #include <sys/filedesc.h>
44 #include <sys/limits.h>
45 #include <sys/lock.h>
46 #include <sys/malloc.h>
47 #include <sys/mman.h>
48 #include <sys/mutex.h>
49 #include <sys/priv.h>
50 #include <sys/proc.h>
51 #include <sys/resource.h>
52 #include <sys/resourcevar.h>
53 #include <sys/rwlock.h>
54 #include <sys/sbuf.h>
55 #include <sys/sched.h>
56 #include <sys/smp.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysent.h>
59 #include <sys/systm.h>
60 #include <sys/sysproto.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/taskqueue.h>
63 #include <sys/time.h>
64 #include <sys/eventhandler.h>
65 #include <sys/umtx.h>
66 
67 #include <security/mac/mac_framework.h>
68 
69 #include <vm/vm.h>
70 #include <vm/vm_param.h>
71 #include <vm/pmap.h>
72 #include <vm/vm_map.h>
73 #include <vm/vm_object.h>
74 
75 #include <machine/atomic.h>
76 #include <machine/cpu.h>
77 
78 #ifdef COMPAT_FREEBSD32
79 #include <compat/freebsd32/freebsd32.h>
80 #include <compat/freebsd32/freebsd32_proto.h>
81 #endif
82 
83 #define _UMUTEX_TRY		1
84 #define _UMUTEX_WAIT		2
85 
86 #ifdef UMTX_PROFILING
87 #define	UPROF_PERC_BIGGER(w, f, sw, sf)					\
88 	(((w) > (sw)) || ((w) == (sw) && (f) > (sf)))
89 #endif
90 
91 /* Priority inheritance mutex info. */
92 struct umtx_pi {
93 	/* Owner thread */
94 	struct thread		*pi_owner;
95 
96 	/* Reference count */
97 	int			pi_refcount;
98 
99 	/* List entry to link umtx holding by thread */
100 	TAILQ_ENTRY(umtx_pi)	pi_link;
101 
102 	/* List entry in hash */
103 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
104 
105 	/* List for waiters */
106 	TAILQ_HEAD(,umtx_q)	pi_blocked;
107 
108 	/* Identify a userland lock object */
109 	struct umtx_key		pi_key;
110 };
111 
112 /* A userland synchronous object user. */
113 struct umtx_q {
114 	/* Linked list for the hash. */
115 	TAILQ_ENTRY(umtx_q)	uq_link;
116 
117 	/* Umtx key. */
118 	struct umtx_key		uq_key;
119 
120 	/* Umtx flags. */
121 	int			uq_flags;
122 #define UQF_UMTXQ	0x0001
123 
124 	/* The thread waits on. */
125 	struct thread		*uq_thread;
126 
127 	/*
128 	 * Blocked on PI mutex. read can use chain lock
129 	 * or umtx_lock, write must have both chain lock and
130 	 * umtx_lock being hold.
131 	 */
132 	struct umtx_pi		*uq_pi_blocked;
133 
134 	/* On blocked list */
135 	TAILQ_ENTRY(umtx_q)	uq_lockq;
136 
137 	/* Thread contending with us */
138 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
139 
140 	/* Inherited priority from PP mutex */
141 	u_char			uq_inherited_pri;
142 
143 	/* Spare queue ready to be reused */
144 	struct umtxq_queue	*uq_spare_queue;
145 
146 	/* The queue we on */
147 	struct umtxq_queue	*uq_cur_queue;
148 };
149 
150 TAILQ_HEAD(umtxq_head, umtx_q);
151 
152 /* Per-key wait-queue */
153 struct umtxq_queue {
154 	struct umtxq_head	head;
155 	struct umtx_key		key;
156 	LIST_ENTRY(umtxq_queue)	link;
157 	int			length;
158 };
159 
160 LIST_HEAD(umtxq_list, umtxq_queue);
161 
162 /* Userland lock object's wait-queue chain */
163 struct umtxq_chain {
164 	/* Lock for this chain. */
165 	struct mtx		uc_lock;
166 
167 	/* List of sleep queues. */
168 	struct umtxq_list	uc_queue[2];
169 #define UMTX_SHARED_QUEUE	0
170 #define UMTX_EXCLUSIVE_QUEUE	1
171 
172 	LIST_HEAD(, umtxq_queue) uc_spare_queue;
173 
174 	/* Busy flag */
175 	char			uc_busy;
176 
177 	/* Chain lock waiters */
178 	int			uc_waiters;
179 
180 	/* All PI in the list */
181 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
182 
183 #ifdef UMTX_PROFILING
184 	u_int			length;
185 	u_int			max_length;
186 #endif
187 };
188 
189 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
190 
191 /*
192  * Don't propagate time-sharing priority, there is a security reason,
193  * a user can simply introduce PI-mutex, let thread A lock the mutex,
194  * and let another thread B block on the mutex, because B is
195  * sleeping, its priority will be boosted, this causes A's priority to
196  * be boosted via priority propagating too and will never be lowered even
197  * if it is using 100%CPU, this is unfair to other processes.
198  */
199 
200 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
201 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
202 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
203 
204 #define	GOLDEN_RATIO_PRIME	2654404609U
205 #ifndef	UMTX_CHAINS
206 #define	UMTX_CHAINS		512
207 #endif
208 #define	UMTX_SHIFTS		(__WORD_BIT - 9)
209 
210 #define	GET_SHARE(flags)	\
211     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
212 
213 #define BUSY_SPINS		200
214 
215 struct abs_timeout {
216 	int clockid;
217 	bool is_abs_real;	/* TIMER_ABSTIME && CLOCK_REALTIME* */
218 	struct timespec cur;
219 	struct timespec end;
220 };
221 
222 #ifdef COMPAT_FREEBSD32
223 _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32");
224 _Static_assert(__offsetof(struct umutex, m_spare[0]) ==
225     __offsetof(struct umutex32, m_spare[0]), "m_spare32");
226 #endif
227 
228 int umtx_shm_vnobj_persistent = 0;
229 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN,
230     &umtx_shm_vnobj_persistent, 0,
231     "False forces destruction of umtx attached to file, on last close");
232 static int umtx_max_rb = 1000;
233 SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN,
234     &umtx_max_rb, 0,
235     "Maximum number of robust mutexes allowed for each thread");
236 
237 static uma_zone_t		umtx_pi_zone;
238 static struct umtxq_chain	umtxq_chains[2][UMTX_CHAINS];
239 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
240 static int			umtx_pi_allocated;
241 
242 static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
243     "umtx debug");
244 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
245     &umtx_pi_allocated, 0, "Allocated umtx_pi");
246 static int umtx_verbose_rb = 1;
247 SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN,
248     &umtx_verbose_rb, 0,
249     "");
250 
251 #ifdef UMTX_PROFILING
252 static long max_length;
253 SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length");
254 static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
255     "umtx chain stats");
256 #endif
257 
258 static void abs_timeout_update(struct abs_timeout *timo);
259 
260 static void umtx_shm_init(void);
261 static void umtxq_sysinit(void *);
262 static void umtxq_hash(struct umtx_key *key);
263 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
264 static void umtxq_lock(struct umtx_key *key);
265 static void umtxq_unlock(struct umtx_key *key);
266 static void umtxq_busy(struct umtx_key *key);
267 static void umtxq_unbusy(struct umtx_key *key);
268 static void umtxq_insert_queue(struct umtx_q *uq, int q);
269 static void umtxq_remove_queue(struct umtx_q *uq, int q);
270 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *);
271 static int umtxq_count(struct umtx_key *key);
272 static struct umtx_pi *umtx_pi_alloc(int);
273 static void umtx_pi_free(struct umtx_pi *pi);
274 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags,
275     bool rb);
276 static void umtx_thread_cleanup(struct thread *td);
277 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
278     struct image_params *imgp __unused);
279 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
280 
281 #define umtxq_signal(key, nwake)	umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
282 #define umtxq_insert(uq)	umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
283 #define umtxq_remove(uq)	umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
284 
285 static struct mtx umtx_lock;
286 
287 #ifdef UMTX_PROFILING
288 static void
289 umtx_init_profiling(void)
290 {
291 	struct sysctl_oid *chain_oid;
292 	char chain_name[10];
293 	int i;
294 
295 	for (i = 0; i < UMTX_CHAINS; ++i) {
296 		snprintf(chain_name, sizeof(chain_name), "%d", i);
297 		chain_oid = SYSCTL_ADD_NODE(NULL,
298 		    SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO,
299 		    chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
300 		    "umtx hash stats");
301 		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
302 		    "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL);
303 		SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
304 		    "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL);
305 	}
306 }
307 
308 static int
309 sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS)
310 {
311 	char buf[512];
312 	struct sbuf sb;
313 	struct umtxq_chain *uc;
314 	u_int fract, i, j, tot, whole;
315 	u_int sf0, sf1, sf2, sf3, sf4;
316 	u_int si0, si1, si2, si3, si4;
317 	u_int sw0, sw1, sw2, sw3, sw4;
318 
319 	sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
320 	for (i = 0; i < 2; i++) {
321 		tot = 0;
322 		for (j = 0; j < UMTX_CHAINS; ++j) {
323 			uc = &umtxq_chains[i][j];
324 			mtx_lock(&uc->uc_lock);
325 			tot += uc->max_length;
326 			mtx_unlock(&uc->uc_lock);
327 		}
328 		if (tot == 0)
329 			sbuf_printf(&sb, "%u) Empty ", i);
330 		else {
331 			sf0 = sf1 = sf2 = sf3 = sf4 = 0;
332 			si0 = si1 = si2 = si3 = si4 = 0;
333 			sw0 = sw1 = sw2 = sw3 = sw4 = 0;
334 			for (j = 0; j < UMTX_CHAINS; j++) {
335 				uc = &umtxq_chains[i][j];
336 				mtx_lock(&uc->uc_lock);
337 				whole = uc->max_length * 100;
338 				mtx_unlock(&uc->uc_lock);
339 				fract = (whole % tot) * 100;
340 				if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) {
341 					sf0 = fract;
342 					si0 = j;
343 					sw0 = whole;
344 				} else if (UPROF_PERC_BIGGER(whole, fract, sw1,
345 				    sf1)) {
346 					sf1 = fract;
347 					si1 = j;
348 					sw1 = whole;
349 				} else if (UPROF_PERC_BIGGER(whole, fract, sw2,
350 				    sf2)) {
351 					sf2 = fract;
352 					si2 = j;
353 					sw2 = whole;
354 				} else if (UPROF_PERC_BIGGER(whole, fract, sw3,
355 				    sf3)) {
356 					sf3 = fract;
357 					si3 = j;
358 					sw3 = whole;
359 				} else if (UPROF_PERC_BIGGER(whole, fract, sw4,
360 				    sf4)) {
361 					sf4 = fract;
362 					si4 = j;
363 					sw4 = whole;
364 				}
365 			}
366 			sbuf_printf(&sb, "queue %u:\n", i);
367 			sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot,
368 			    sf0 / tot, si0);
369 			sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot,
370 			    sf1 / tot, si1);
371 			sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot,
372 			    sf2 / tot, si2);
373 			sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot,
374 			    sf3 / tot, si3);
375 			sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot,
376 			    sf4 / tot, si4);
377 		}
378 	}
379 	sbuf_trim(&sb);
380 	sbuf_finish(&sb);
381 	sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
382 	sbuf_delete(&sb);
383 	return (0);
384 }
385 
386 static int
387 sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS)
388 {
389 	struct umtxq_chain *uc;
390 	u_int i, j;
391 	int clear, error;
392 
393 	clear = 0;
394 	error = sysctl_handle_int(oidp, &clear, 0, req);
395 	if (error != 0 || req->newptr == NULL)
396 		return (error);
397 
398 	if (clear != 0) {
399 		for (i = 0; i < 2; ++i) {
400 			for (j = 0; j < UMTX_CHAINS; ++j) {
401 				uc = &umtxq_chains[i][j];
402 				mtx_lock(&uc->uc_lock);
403 				uc->length = 0;
404 				uc->max_length = 0;
405 				mtx_unlock(&uc->uc_lock);
406 			}
407 		}
408 	}
409 	return (0);
410 }
411 
412 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear,
413     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
414     sysctl_debug_umtx_chains_clear, "I",
415     "Clear umtx chains statistics");
416 SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks,
417     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
418     sysctl_debug_umtx_chains_peaks, "A",
419     "Highest peaks in chains max length");
420 #endif
421 
422 static void
423 umtxq_sysinit(void *arg __unused)
424 {
425 	int i, j;
426 
427 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
428 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
429 	for (i = 0; i < 2; ++i) {
430 		for (j = 0; j < UMTX_CHAINS; ++j) {
431 			mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
432 				 MTX_DEF | MTX_DUPOK);
433 			LIST_INIT(&umtxq_chains[i][j].uc_queue[0]);
434 			LIST_INIT(&umtxq_chains[i][j].uc_queue[1]);
435 			LIST_INIT(&umtxq_chains[i][j].uc_spare_queue);
436 			TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
437 			umtxq_chains[i][j].uc_busy = 0;
438 			umtxq_chains[i][j].uc_waiters = 0;
439 #ifdef UMTX_PROFILING
440 			umtxq_chains[i][j].length = 0;
441 			umtxq_chains[i][j].max_length = 0;
442 #endif
443 		}
444 	}
445 #ifdef UMTX_PROFILING
446 	umtx_init_profiling();
447 #endif
448 	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF);
449 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
450 	    EVENTHANDLER_PRI_ANY);
451 	umtx_shm_init();
452 }
453 
454 struct umtx_q *
455 umtxq_alloc(void)
456 {
457 	struct umtx_q *uq;
458 
459 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
460 	uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX,
461 	    M_WAITOK | M_ZERO);
462 	TAILQ_INIT(&uq->uq_spare_queue->head);
463 	TAILQ_INIT(&uq->uq_pi_contested);
464 	uq->uq_inherited_pri = PRI_MAX;
465 	return (uq);
466 }
467 
468 void
469 umtxq_free(struct umtx_q *uq)
470 {
471 
472 	MPASS(uq->uq_spare_queue != NULL);
473 	free(uq->uq_spare_queue, M_UMTX);
474 	free(uq, M_UMTX);
475 }
476 
477 static inline void
478 umtxq_hash(struct umtx_key *key)
479 {
480 	unsigned n;
481 
482 	n = (uintptr_t)key->info.both.a + key->info.both.b;
483 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
484 }
485 
486 static inline struct umtxq_chain *
487 umtxq_getchain(struct umtx_key *key)
488 {
489 
490 	if (key->type <= TYPE_SEM)
491 		return (&umtxq_chains[1][key->hash]);
492 	return (&umtxq_chains[0][key->hash]);
493 }
494 
495 /*
496  * Lock a chain.
497  */
498 static inline void
499 umtxq_lock(struct umtx_key *key)
500 {
501 	struct umtxq_chain *uc;
502 
503 	uc = umtxq_getchain(key);
504 	mtx_lock(&uc->uc_lock);
505 }
506 
507 /*
508  * Unlock a chain.
509  */
510 static inline void
511 umtxq_unlock(struct umtx_key *key)
512 {
513 	struct umtxq_chain *uc;
514 
515 	uc = umtxq_getchain(key);
516 	mtx_unlock(&uc->uc_lock);
517 }
518 
519 /*
520  * Set chain to busy state when following operation
521  * may be blocked (kernel mutex can not be used).
522  */
523 static inline void
524 umtxq_busy(struct umtx_key *key)
525 {
526 	struct umtxq_chain *uc;
527 
528 	uc = umtxq_getchain(key);
529 	mtx_assert(&uc->uc_lock, MA_OWNED);
530 	if (uc->uc_busy) {
531 #ifdef SMP
532 		if (smp_cpus > 1) {
533 			int count = BUSY_SPINS;
534 			if (count > 0) {
535 				umtxq_unlock(key);
536 				while (uc->uc_busy && --count > 0)
537 					cpu_spinwait();
538 				umtxq_lock(key);
539 			}
540 		}
541 #endif
542 		while (uc->uc_busy) {
543 			uc->uc_waiters++;
544 			msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
545 			uc->uc_waiters--;
546 		}
547 	}
548 	uc->uc_busy = 1;
549 }
550 
551 /*
552  * Unbusy a chain.
553  */
554 static inline void
555 umtxq_unbusy(struct umtx_key *key)
556 {
557 	struct umtxq_chain *uc;
558 
559 	uc = umtxq_getchain(key);
560 	mtx_assert(&uc->uc_lock, MA_OWNED);
561 	KASSERT(uc->uc_busy != 0, ("not busy"));
562 	uc->uc_busy = 0;
563 	if (uc->uc_waiters)
564 		wakeup_one(uc);
565 }
566 
567 static inline void
568 umtxq_unbusy_unlocked(struct umtx_key *key)
569 {
570 
571 	umtxq_lock(key);
572 	umtxq_unbusy(key);
573 	umtxq_unlock(key);
574 }
575 
576 static struct umtxq_queue *
577 umtxq_queue_lookup(struct umtx_key *key, int q)
578 {
579 	struct umtxq_queue *uh;
580 	struct umtxq_chain *uc;
581 
582 	uc = umtxq_getchain(key);
583 	UMTXQ_LOCKED_ASSERT(uc);
584 	LIST_FOREACH(uh, &uc->uc_queue[q], link) {
585 		if (umtx_key_match(&uh->key, key))
586 			return (uh);
587 	}
588 
589 	return (NULL);
590 }
591 
592 static inline void
593 umtxq_insert_queue(struct umtx_q *uq, int q)
594 {
595 	struct umtxq_queue *uh;
596 	struct umtxq_chain *uc;
597 
598 	uc = umtxq_getchain(&uq->uq_key);
599 	UMTXQ_LOCKED_ASSERT(uc);
600 	KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue"));
601 	uh = umtxq_queue_lookup(&uq->uq_key, q);
602 	if (uh != NULL) {
603 		LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link);
604 	} else {
605 		uh = uq->uq_spare_queue;
606 		uh->key = uq->uq_key;
607 		LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link);
608 #ifdef UMTX_PROFILING
609 		uc->length++;
610 		if (uc->length > uc->max_length) {
611 			uc->max_length = uc->length;
612 			if (uc->max_length > max_length)
613 				max_length = uc->max_length;
614 		}
615 #endif
616 	}
617 	uq->uq_spare_queue = NULL;
618 
619 	TAILQ_INSERT_TAIL(&uh->head, uq, uq_link);
620 	uh->length++;
621 	uq->uq_flags |= UQF_UMTXQ;
622 	uq->uq_cur_queue = uh;
623 	return;
624 }
625 
626 static inline void
627 umtxq_remove_queue(struct umtx_q *uq, int q)
628 {
629 	struct umtxq_chain *uc;
630 	struct umtxq_queue *uh;
631 
632 	uc = umtxq_getchain(&uq->uq_key);
633 	UMTXQ_LOCKED_ASSERT(uc);
634 	if (uq->uq_flags & UQF_UMTXQ) {
635 		uh = uq->uq_cur_queue;
636 		TAILQ_REMOVE(&uh->head, uq, uq_link);
637 		uh->length--;
638 		uq->uq_flags &= ~UQF_UMTXQ;
639 		if (TAILQ_EMPTY(&uh->head)) {
640 			KASSERT(uh->length == 0,
641 			    ("inconsistent umtxq_queue length"));
642 #ifdef UMTX_PROFILING
643 			uc->length--;
644 #endif
645 			LIST_REMOVE(uh, link);
646 		} else {
647 			uh = LIST_FIRST(&uc->uc_spare_queue);
648 			KASSERT(uh != NULL, ("uc_spare_queue is empty"));
649 			LIST_REMOVE(uh, link);
650 		}
651 		uq->uq_spare_queue = uh;
652 		uq->uq_cur_queue = NULL;
653 	}
654 }
655 
656 /*
657  * Check if there are multiple waiters
658  */
659 static int
660 umtxq_count(struct umtx_key *key)
661 {
662 	struct umtxq_queue *uh;
663 
664 	UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
665 	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
666 	if (uh != NULL)
667 		return (uh->length);
668 	return (0);
669 }
670 
671 /*
672  * Check if there are multiple PI waiters and returns first
673  * waiter.
674  */
675 static int
676 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
677 {
678 	struct umtxq_queue *uh;
679 
680 	*first = NULL;
681 	UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
682 	uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE);
683 	if (uh != NULL) {
684 		*first = TAILQ_FIRST(&uh->head);
685 		return (uh->length);
686 	}
687 	return (0);
688 }
689 
690 /*
691  * Wake up threads waiting on an userland object.
692  */
693 
694 static int
695 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
696 {
697 	struct umtxq_queue *uh;
698 	struct umtx_q *uq;
699 	int ret;
700 
701 	ret = 0;
702 	UMTXQ_LOCKED_ASSERT(umtxq_getchain(key));
703 	uh = umtxq_queue_lookup(key, q);
704 	if (uh != NULL) {
705 		while ((uq = TAILQ_FIRST(&uh->head)) != NULL) {
706 			umtxq_remove_queue(uq, q);
707 			wakeup(uq);
708 			if (++ret >= n_wake)
709 				return (ret);
710 		}
711 	}
712 	return (ret);
713 }
714 
715 /*
716  * Wake up specified thread.
717  */
718 static inline void
719 umtxq_signal_thread(struct umtx_q *uq)
720 {
721 
722 	UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
723 	umtxq_remove(uq);
724 	wakeup(uq);
725 }
726 
727 static inline int
728 tstohz(const struct timespec *tsp)
729 {
730 	struct timeval tv;
731 
732 	TIMESPEC_TO_TIMEVAL(&tv, tsp);
733 	return tvtohz(&tv);
734 }
735 
736 static void
737 abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute,
738 	const struct timespec *timeout)
739 {
740 
741 	timo->clockid = clockid;
742 	if (!absolute) {
743 		timo->is_abs_real = false;
744 		abs_timeout_update(timo);
745 		timespecadd(&timo->cur, timeout, &timo->end);
746 	} else {
747 		timo->end = *timeout;
748 		timo->is_abs_real = clockid == CLOCK_REALTIME ||
749 		    clockid == CLOCK_REALTIME_FAST ||
750 		    clockid == CLOCK_REALTIME_PRECISE;
751 		/*
752 		 * If is_abs_real, umtxq_sleep will read the clock
753 		 * after setting td_rtcgen; otherwise, read it here.
754 		 */
755 		if (!timo->is_abs_real) {
756 			abs_timeout_update(timo);
757 		}
758 	}
759 }
760 
761 static void
762 abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime)
763 {
764 
765 	abs_timeout_init(timo, umtxtime->_clockid,
766 	    (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout);
767 }
768 
769 static inline void
770 abs_timeout_update(struct abs_timeout *timo)
771 {
772 
773 	kern_clock_gettime(curthread, timo->clockid, &timo->cur);
774 }
775 
776 static int
777 abs_timeout_gethz(struct abs_timeout *timo)
778 {
779 	struct timespec tts;
780 
781 	if (timespeccmp(&timo->end, &timo->cur, <=))
782 		return (-1);
783 	timespecsub(&timo->end, &timo->cur, &tts);
784 	return (tstohz(&tts));
785 }
786 
787 static uint32_t
788 umtx_unlock_val(uint32_t flags, bool rb)
789 {
790 
791 	if (rb)
792 		return (UMUTEX_RB_OWNERDEAD);
793 	else if ((flags & UMUTEX_NONCONSISTENT) != 0)
794 		return (UMUTEX_RB_NOTRECOV);
795 	else
796 		return (UMUTEX_UNOWNED);
797 
798 }
799 
800 /*
801  * Put thread into sleep state, before sleeping, check if
802  * thread was removed from umtx queue.
803  */
804 static inline int
805 umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime)
806 {
807 	struct umtxq_chain *uc;
808 	int error, timo;
809 
810 	if (abstime != NULL && abstime->is_abs_real) {
811 		curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
812 		abs_timeout_update(abstime);
813 	}
814 
815 	uc = umtxq_getchain(&uq->uq_key);
816 	UMTXQ_LOCKED_ASSERT(uc);
817 	for (;;) {
818 		if (!(uq->uq_flags & UQF_UMTXQ)) {
819 			error = 0;
820 			break;
821 		}
822 		if (abstime != NULL) {
823 			timo = abs_timeout_gethz(abstime);
824 			if (timo < 0) {
825 				error = ETIMEDOUT;
826 				break;
827 			}
828 		} else
829 			timo = 0;
830 		error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
831 		if (error == EINTR || error == ERESTART) {
832 			umtxq_lock(&uq->uq_key);
833 			break;
834 		}
835 		if (abstime != NULL) {
836 			if (abstime->is_abs_real)
837 				curthread->td_rtcgen =
838 				    atomic_load_acq_int(&rtc_generation);
839 			abs_timeout_update(abstime);
840 		}
841 		umtxq_lock(&uq->uq_key);
842 	}
843 
844 	curthread->td_rtcgen = 0;
845 	return (error);
846 }
847 
848 /*
849  * Convert userspace address into unique logical address.
850  */
851 int
852 umtx_key_get(const void *addr, int type, int share, struct umtx_key *key)
853 {
854 	struct thread *td = curthread;
855 	vm_map_t map;
856 	vm_map_entry_t entry;
857 	vm_pindex_t pindex;
858 	vm_prot_t prot;
859 	boolean_t wired;
860 
861 	key->type = type;
862 	if (share == THREAD_SHARE) {
863 		key->shared = 0;
864 		key->info.private.vs = td->td_proc->p_vmspace;
865 		key->info.private.addr = (uintptr_t)addr;
866 	} else {
867 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
868 		map = &td->td_proc->p_vmspace->vm_map;
869 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
870 		    &entry, &key->info.shared.object, &pindex, &prot,
871 		    &wired) != KERN_SUCCESS) {
872 			return (EFAULT);
873 		}
874 
875 		if ((share == PROCESS_SHARE) ||
876 		    (share == AUTO_SHARE &&
877 		     VM_INHERIT_SHARE == entry->inheritance)) {
878 			key->shared = 1;
879 			key->info.shared.offset = (vm_offset_t)addr -
880 			    entry->start + entry->offset;
881 			vm_object_reference(key->info.shared.object);
882 		} else {
883 			key->shared = 0;
884 			key->info.private.vs = td->td_proc->p_vmspace;
885 			key->info.private.addr = (uintptr_t)addr;
886 		}
887 		vm_map_lookup_done(map, entry);
888 	}
889 
890 	umtxq_hash(key);
891 	return (0);
892 }
893 
894 /*
895  * Release key.
896  */
897 void
898 umtx_key_release(struct umtx_key *key)
899 {
900 	if (key->shared)
901 		vm_object_deallocate(key->info.shared.object);
902 }
903 
904 /*
905  * Fetch and compare value, sleep on the address if value is not changed.
906  */
907 static int
908 do_wait(struct thread *td, void *addr, u_long id,
909     struct _umtx_time *timeout, int compat32, int is_private)
910 {
911 	struct abs_timeout timo;
912 	struct umtx_q *uq;
913 	u_long tmp;
914 	uint32_t tmp32;
915 	int error = 0;
916 
917 	uq = td->td_umtxq;
918 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
919 		is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
920 		return (error);
921 
922 	if (timeout != NULL)
923 		abs_timeout_init2(&timo, timeout);
924 
925 	umtxq_lock(&uq->uq_key);
926 	umtxq_insert(uq);
927 	umtxq_unlock(&uq->uq_key);
928 	if (compat32 == 0) {
929 		error = fueword(addr, &tmp);
930 		if (error != 0)
931 			error = EFAULT;
932 	} else {
933 		error = fueword32(addr, &tmp32);
934 		if (error == 0)
935 			tmp = tmp32;
936 		else
937 			error = EFAULT;
938 	}
939 	umtxq_lock(&uq->uq_key);
940 	if (error == 0) {
941 		if (tmp == id)
942 			error = umtxq_sleep(uq, "uwait", timeout == NULL ?
943 			    NULL : &timo);
944 		if ((uq->uq_flags & UQF_UMTXQ) == 0)
945 			error = 0;
946 		else
947 			umtxq_remove(uq);
948 	} else if ((uq->uq_flags & UQF_UMTXQ) != 0) {
949 		umtxq_remove(uq);
950 	}
951 	umtxq_unlock(&uq->uq_key);
952 	umtx_key_release(&uq->uq_key);
953 	if (error == ERESTART)
954 		error = EINTR;
955 	return (error);
956 }
957 
958 /*
959  * Wake up threads sleeping on the specified address.
960  */
961 int
962 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
963 {
964 	struct umtx_key key;
965 	int ret;
966 
967 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
968 	    is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
969 		return (ret);
970 	umtxq_lock(&key);
971 	umtxq_signal(&key, n_wake);
972 	umtxq_unlock(&key);
973 	umtx_key_release(&key);
974 	return (0);
975 }
976 
977 /*
978  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
979  */
980 static int
981 do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags,
982     struct _umtx_time *timeout, int mode)
983 {
984 	struct abs_timeout timo;
985 	struct umtx_q *uq;
986 	uint32_t owner, old, id;
987 	int error, rv;
988 
989 	id = td->td_tid;
990 	uq = td->td_umtxq;
991 	error = 0;
992 	if (timeout != NULL)
993 		abs_timeout_init2(&timo, timeout);
994 
995 	/*
996 	 * Care must be exercised when dealing with umtx structure. It
997 	 * can fault on any access.
998 	 */
999 	for (;;) {
1000 		rv = fueword32(&m->m_owner, &owner);
1001 		if (rv == -1)
1002 			return (EFAULT);
1003 		if (mode == _UMUTEX_WAIT) {
1004 			if (owner == UMUTEX_UNOWNED ||
1005 			    owner == UMUTEX_CONTESTED ||
1006 			    owner == UMUTEX_RB_OWNERDEAD ||
1007 			    owner == UMUTEX_RB_NOTRECOV)
1008 				return (0);
1009 		} else {
1010 			/*
1011 			 * Robust mutex terminated.  Kernel duty is to
1012 			 * return EOWNERDEAD to the userspace.  The
1013 			 * umutex.m_flags UMUTEX_NONCONSISTENT is set
1014 			 * by the common userspace code.
1015 			 */
1016 			if (owner == UMUTEX_RB_OWNERDEAD) {
1017 				rv = casueword32(&m->m_owner,
1018 				    UMUTEX_RB_OWNERDEAD, &owner,
1019 				    id | UMUTEX_CONTESTED);
1020 				if (rv == -1)
1021 					return (EFAULT);
1022 				if (rv == 0) {
1023 					MPASS(owner == UMUTEX_RB_OWNERDEAD);
1024 					return (EOWNERDEAD); /* success */
1025 				}
1026 				MPASS(rv == 1);
1027 				rv = thread_check_susp(td, false);
1028 				if (rv != 0)
1029 					return (rv);
1030 				continue;
1031 			}
1032 			if (owner == UMUTEX_RB_NOTRECOV)
1033 				return (ENOTRECOVERABLE);
1034 
1035 			/*
1036 			 * Try the uncontested case.  This should be
1037 			 * done in userland.
1038 			 */
1039 			rv = casueword32(&m->m_owner, UMUTEX_UNOWNED,
1040 			    &owner, id);
1041 			/* The address was invalid. */
1042 			if (rv == -1)
1043 				return (EFAULT);
1044 
1045 			/* The acquire succeeded. */
1046 			if (rv == 0) {
1047 				MPASS(owner == UMUTEX_UNOWNED);
1048 				return (0);
1049 			}
1050 
1051 			/*
1052 			 * If no one owns it but it is contested try
1053 			 * to acquire it.
1054 			 */
1055 			MPASS(rv == 1);
1056 			if (owner == UMUTEX_CONTESTED) {
1057 				rv = casueword32(&m->m_owner,
1058 				    UMUTEX_CONTESTED, &owner,
1059 				    id | UMUTEX_CONTESTED);
1060 				/* The address was invalid. */
1061 				if (rv == -1)
1062 					return (EFAULT);
1063 				if (rv == 0) {
1064 					MPASS(owner == UMUTEX_CONTESTED);
1065 					return (0);
1066 				}
1067 				if (rv == 1) {
1068 					rv = thread_check_susp(td, false);
1069 					if (rv != 0)
1070 						return (rv);
1071 				}
1072 
1073 				/*
1074 				 * If this failed the lock has
1075 				 * changed, restart.
1076 				 */
1077 				continue;
1078 			}
1079 
1080 			/* rv == 1 but not contested, likely store failure */
1081 			rv = thread_check_susp(td, false);
1082 			if (rv != 0)
1083 				return (rv);
1084 		}
1085 
1086 		if (mode == _UMUTEX_TRY)
1087 			return (EBUSY);
1088 
1089 		/*
1090 		 * If we caught a signal, we have retried and now
1091 		 * exit immediately.
1092 		 */
1093 		if (error != 0)
1094 			return (error);
1095 
1096 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1097 		    GET_SHARE(flags), &uq->uq_key)) != 0)
1098 			return (error);
1099 
1100 		umtxq_lock(&uq->uq_key);
1101 		umtxq_busy(&uq->uq_key);
1102 		umtxq_insert(uq);
1103 		umtxq_unlock(&uq->uq_key);
1104 
1105 		/*
1106 		 * Set the contested bit so that a release in user space
1107 		 * knows to use the system call for unlock.  If this fails
1108 		 * either some one else has acquired the lock or it has been
1109 		 * released.
1110 		 */
1111 		rv = casueword32(&m->m_owner, owner, &old,
1112 		    owner | UMUTEX_CONTESTED);
1113 
1114 		/* The address was invalid or casueword failed to store. */
1115 		if (rv == -1 || rv == 1) {
1116 			umtxq_lock(&uq->uq_key);
1117 			umtxq_remove(uq);
1118 			umtxq_unbusy(&uq->uq_key);
1119 			umtxq_unlock(&uq->uq_key);
1120 			umtx_key_release(&uq->uq_key);
1121 			if (rv == -1)
1122 				return (EFAULT);
1123 			if (rv == 1) {
1124 				rv = thread_check_susp(td, false);
1125 				if (rv != 0)
1126 					return (rv);
1127 			}
1128 			continue;
1129 		}
1130 
1131 		/*
1132 		 * We set the contested bit, sleep. Otherwise the lock changed
1133 		 * and we need to retry or we lost a race to the thread
1134 		 * unlocking the umtx.
1135 		 */
1136 		umtxq_lock(&uq->uq_key);
1137 		umtxq_unbusy(&uq->uq_key);
1138 		MPASS(old == owner);
1139 		error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
1140 		    NULL : &timo);
1141 		umtxq_remove(uq);
1142 		umtxq_unlock(&uq->uq_key);
1143 		umtx_key_release(&uq->uq_key);
1144 
1145 		if (error == 0)
1146 			error = thread_check_susp(td, false);
1147 	}
1148 
1149 	return (0);
1150 }
1151 
1152 /*
1153  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1154  */
1155 static int
1156 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1157 {
1158 	struct umtx_key key;
1159 	uint32_t owner, old, id, newlock;
1160 	int error, count;
1161 
1162 	id = td->td_tid;
1163 
1164 again:
1165 	/*
1166 	 * Make sure we own this mtx.
1167 	 */
1168 	error = fueword32(&m->m_owner, &owner);
1169 	if (error == -1)
1170 		return (EFAULT);
1171 
1172 	if ((owner & ~UMUTEX_CONTESTED) != id)
1173 		return (EPERM);
1174 
1175 	newlock = umtx_unlock_val(flags, rb);
1176 	if ((owner & UMUTEX_CONTESTED) == 0) {
1177 		error = casueword32(&m->m_owner, owner, &old, newlock);
1178 		if (error == -1)
1179 			return (EFAULT);
1180 		if (error == 1) {
1181 			error = thread_check_susp(td, false);
1182 			if (error != 0)
1183 				return (error);
1184 			goto again;
1185 		}
1186 		MPASS(old == owner);
1187 		return (0);
1188 	}
1189 
1190 	/* We should only ever be in here for contested locks */
1191 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1192 	    &key)) != 0)
1193 		return (error);
1194 
1195 	umtxq_lock(&key);
1196 	umtxq_busy(&key);
1197 	count = umtxq_count(&key);
1198 	umtxq_unlock(&key);
1199 
1200 	/*
1201 	 * When unlocking the umtx, it must be marked as unowned if
1202 	 * there is zero or one thread only waiting for it.
1203 	 * Otherwise, it must be marked as contested.
1204 	 */
1205 	if (count > 1)
1206 		newlock |= UMUTEX_CONTESTED;
1207 	error = casueword32(&m->m_owner, owner, &old, newlock);
1208 	umtxq_lock(&key);
1209 	umtxq_signal(&key, 1);
1210 	umtxq_unbusy(&key);
1211 	umtxq_unlock(&key);
1212 	umtx_key_release(&key);
1213 	if (error == -1)
1214 		return (EFAULT);
1215 	if (error == 1) {
1216 		if (old != owner)
1217 			return (EINVAL);
1218 		error = thread_check_susp(td, false);
1219 		if (error != 0)
1220 			return (error);
1221 		goto again;
1222 	}
1223 	return (0);
1224 }
1225 
1226 /*
1227  * Check if the mutex is available and wake up a waiter,
1228  * only for simple mutex.
1229  */
1230 static int
1231 do_wake_umutex(struct thread *td, struct umutex *m)
1232 {
1233 	struct umtx_key key;
1234 	uint32_t owner;
1235 	uint32_t flags;
1236 	int error;
1237 	int count;
1238 
1239 again:
1240 	error = fueword32(&m->m_owner, &owner);
1241 	if (error == -1)
1242 		return (EFAULT);
1243 
1244 	if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD &&
1245 	    owner != UMUTEX_RB_NOTRECOV)
1246 		return (0);
1247 
1248 	error = fueword32(&m->m_flags, &flags);
1249 	if (error == -1)
1250 		return (EFAULT);
1251 
1252 	/* We should only ever be in here for contested locks */
1253 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1254 	    &key)) != 0)
1255 		return (error);
1256 
1257 	umtxq_lock(&key);
1258 	umtxq_busy(&key);
1259 	count = umtxq_count(&key);
1260 	umtxq_unlock(&key);
1261 
1262 	if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD &&
1263 	    owner != UMUTEX_RB_NOTRECOV) {
1264 		error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
1265 		    UMUTEX_UNOWNED);
1266 		if (error == -1) {
1267 			error = EFAULT;
1268 		} else if (error == 1) {
1269 			umtxq_lock(&key);
1270 			umtxq_unbusy(&key);
1271 			umtxq_unlock(&key);
1272 			umtx_key_release(&key);
1273 			error = thread_check_susp(td, false);
1274 			if (error != 0)
1275 				return (error);
1276 			goto again;
1277 		}
1278 	}
1279 
1280 	umtxq_lock(&key);
1281 	if (error == 0 && count != 0) {
1282 		MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
1283 		    owner == UMUTEX_RB_OWNERDEAD ||
1284 		    owner == UMUTEX_RB_NOTRECOV);
1285 		umtxq_signal(&key, 1);
1286 	}
1287 	umtxq_unbusy(&key);
1288 	umtxq_unlock(&key);
1289 	umtx_key_release(&key);
1290 	return (error);
1291 }
1292 
1293 /*
1294  * Check if the mutex has waiters and tries to fix contention bit.
1295  */
1296 static int
1297 do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags)
1298 {
1299 	struct umtx_key key;
1300 	uint32_t owner, old;
1301 	int type;
1302 	int error;
1303 	int count;
1304 
1305 	switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT |
1306 	    UMUTEX_ROBUST)) {
1307 	case 0:
1308 	case UMUTEX_ROBUST:
1309 		type = TYPE_NORMAL_UMUTEX;
1310 		break;
1311 	case UMUTEX_PRIO_INHERIT:
1312 		type = TYPE_PI_UMUTEX;
1313 		break;
1314 	case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST):
1315 		type = TYPE_PI_ROBUST_UMUTEX;
1316 		break;
1317 	case UMUTEX_PRIO_PROTECT:
1318 		type = TYPE_PP_UMUTEX;
1319 		break;
1320 	case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST):
1321 		type = TYPE_PP_ROBUST_UMUTEX;
1322 		break;
1323 	default:
1324 		return (EINVAL);
1325 	}
1326 	if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0)
1327 		return (error);
1328 
1329 	owner = 0;
1330 	umtxq_lock(&key);
1331 	umtxq_busy(&key);
1332 	count = umtxq_count(&key);
1333 	umtxq_unlock(&key);
1334 
1335 	error = fueword32(&m->m_owner, &owner);
1336 	if (error == -1)
1337 		error = EFAULT;
1338 
1339 	/*
1340 	 * Only repair contention bit if there is a waiter, this means
1341 	 * the mutex is still being referenced by userland code,
1342 	 * otherwise don't update any memory.
1343 	 */
1344 	while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
1345 	    (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
1346 		error = casueword32(&m->m_owner, owner, &old,
1347 		    owner | UMUTEX_CONTESTED);
1348 		if (error == -1) {
1349 			error = EFAULT;
1350 			break;
1351 		}
1352 		if (error == 0) {
1353 			MPASS(old == owner);
1354 			break;
1355 		}
1356 		owner = old;
1357 		error = thread_check_susp(td, false);
1358 	}
1359 
1360 	umtxq_lock(&key);
1361 	if (error == EFAULT) {
1362 		umtxq_signal(&key, INT_MAX);
1363 	} else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
1364 	    owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
1365 		umtxq_signal(&key, 1);
1366 	umtxq_unbusy(&key);
1367 	umtxq_unlock(&key);
1368 	umtx_key_release(&key);
1369 	return (error);
1370 }
1371 
1372 static inline struct umtx_pi *
1373 umtx_pi_alloc(int flags)
1374 {
1375 	struct umtx_pi *pi;
1376 
1377 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1378 	TAILQ_INIT(&pi->pi_blocked);
1379 	atomic_add_int(&umtx_pi_allocated, 1);
1380 	return (pi);
1381 }
1382 
1383 static inline void
1384 umtx_pi_free(struct umtx_pi *pi)
1385 {
1386 	uma_zfree(umtx_pi_zone, pi);
1387 	atomic_add_int(&umtx_pi_allocated, -1);
1388 }
1389 
1390 /*
1391  * Adjust the thread's position on a pi_state after its priority has been
1392  * changed.
1393  */
1394 static int
1395 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1396 {
1397 	struct umtx_q *uq, *uq1, *uq2;
1398 	struct thread *td1;
1399 
1400 	mtx_assert(&umtx_lock, MA_OWNED);
1401 	if (pi == NULL)
1402 		return (0);
1403 
1404 	uq = td->td_umtxq;
1405 
1406 	/*
1407 	 * Check if the thread needs to be moved on the blocked chain.
1408 	 * It needs to be moved if either its priority is lower than
1409 	 * the previous thread or higher than the next thread.
1410 	 */
1411 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1412 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1413 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1414 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1415 		/*
1416 		 * Remove thread from blocked chain and determine where
1417 		 * it should be moved to.
1418 		 */
1419 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1420 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1421 			td1 = uq1->uq_thread;
1422 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1423 			if (UPRI(td1) > UPRI(td))
1424 				break;
1425 		}
1426 
1427 		if (uq1 == NULL)
1428 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1429 		else
1430 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1431 	}
1432 	return (1);
1433 }
1434 
1435 static struct umtx_pi *
1436 umtx_pi_next(struct umtx_pi *pi)
1437 {
1438 	struct umtx_q *uq_owner;
1439 
1440 	if (pi->pi_owner == NULL)
1441 		return (NULL);
1442 	uq_owner = pi->pi_owner->td_umtxq;
1443 	if (uq_owner == NULL)
1444 		return (NULL);
1445 	return (uq_owner->uq_pi_blocked);
1446 }
1447 
1448 /*
1449  * Floyd's Cycle-Finding Algorithm.
1450  */
1451 static bool
1452 umtx_pi_check_loop(struct umtx_pi *pi)
1453 {
1454 	struct umtx_pi *pi1;	/* fast iterator */
1455 
1456 	mtx_assert(&umtx_lock, MA_OWNED);
1457 	if (pi == NULL)
1458 		return (false);
1459 	pi1 = pi;
1460 	for (;;) {
1461 		pi = umtx_pi_next(pi);
1462 		if (pi == NULL)
1463 			break;
1464 		pi1 = umtx_pi_next(pi1);
1465 		if (pi1 == NULL)
1466 			break;
1467 		pi1 = umtx_pi_next(pi1);
1468 		if (pi1 == NULL)
1469 			break;
1470 		if (pi == pi1)
1471 			return (true);
1472 	}
1473 	return (false);
1474 }
1475 
1476 /*
1477  * Propagate priority when a thread is blocked on POSIX
1478  * PI mutex.
1479  */
1480 static void
1481 umtx_propagate_priority(struct thread *td)
1482 {
1483 	struct umtx_q *uq;
1484 	struct umtx_pi *pi;
1485 	int pri;
1486 
1487 	mtx_assert(&umtx_lock, MA_OWNED);
1488 	pri = UPRI(td);
1489 	uq = td->td_umtxq;
1490 	pi = uq->uq_pi_blocked;
1491 	if (pi == NULL)
1492 		return;
1493 	if (umtx_pi_check_loop(pi))
1494 		return;
1495 
1496 	for (;;) {
1497 		td = pi->pi_owner;
1498 		if (td == NULL || td == curthread)
1499 			return;
1500 
1501 		MPASS(td->td_proc != NULL);
1502 		MPASS(td->td_proc->p_magic == P_MAGIC);
1503 
1504 		thread_lock(td);
1505 		if (td->td_lend_user_pri > pri)
1506 			sched_lend_user_prio(td, pri);
1507 		else {
1508 			thread_unlock(td);
1509 			break;
1510 		}
1511 		thread_unlock(td);
1512 
1513 		/*
1514 		 * Pick up the lock that td is blocked on.
1515 		 */
1516 		uq = td->td_umtxq;
1517 		pi = uq->uq_pi_blocked;
1518 		if (pi == NULL)
1519 			break;
1520 		/* Resort td on the list if needed. */
1521 		umtx_pi_adjust_thread(pi, td);
1522 	}
1523 }
1524 
1525 /*
1526  * Unpropagate priority for a PI mutex when a thread blocked on
1527  * it is interrupted by signal or resumed by others.
1528  */
1529 static void
1530 umtx_repropagate_priority(struct umtx_pi *pi)
1531 {
1532 	struct umtx_q *uq, *uq_owner;
1533 	struct umtx_pi *pi2;
1534 	int pri;
1535 
1536 	mtx_assert(&umtx_lock, MA_OWNED);
1537 
1538 	if (umtx_pi_check_loop(pi))
1539 		return;
1540 	while (pi != NULL && pi->pi_owner != NULL) {
1541 		pri = PRI_MAX;
1542 		uq_owner = pi->pi_owner->td_umtxq;
1543 
1544 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1545 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1546 			if (uq != NULL) {
1547 				if (pri > UPRI(uq->uq_thread))
1548 					pri = UPRI(uq->uq_thread);
1549 			}
1550 		}
1551 
1552 		if (pri > uq_owner->uq_inherited_pri)
1553 			pri = uq_owner->uq_inherited_pri;
1554 		thread_lock(pi->pi_owner);
1555 		sched_lend_user_prio(pi->pi_owner, pri);
1556 		thread_unlock(pi->pi_owner);
1557 		if ((pi = uq_owner->uq_pi_blocked) != NULL)
1558 			umtx_pi_adjust_thread(pi, uq_owner->uq_thread);
1559 	}
1560 }
1561 
1562 /*
1563  * Insert a PI mutex into owned list.
1564  */
1565 static void
1566 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1567 {
1568 	struct umtx_q *uq_owner;
1569 
1570 	uq_owner = owner->td_umtxq;
1571 	mtx_assert(&umtx_lock, MA_OWNED);
1572 	MPASS(pi->pi_owner == NULL);
1573 	pi->pi_owner = owner;
1574 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1575 }
1576 
1577 /*
1578  * Disown a PI mutex, and remove it from the owned list.
1579  */
1580 static void
1581 umtx_pi_disown(struct umtx_pi *pi)
1582 {
1583 
1584 	mtx_assert(&umtx_lock, MA_OWNED);
1585 	TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link);
1586 	pi->pi_owner = NULL;
1587 }
1588 
1589 /*
1590  * Claim ownership of a PI mutex.
1591  */
1592 static int
1593 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1594 {
1595 	struct umtx_q *uq;
1596 	int pri;
1597 
1598 	mtx_lock(&umtx_lock);
1599 	if (pi->pi_owner == owner) {
1600 		mtx_unlock(&umtx_lock);
1601 		return (0);
1602 	}
1603 
1604 	if (pi->pi_owner != NULL) {
1605 		/*
1606 		 * userland may have already messed the mutex, sigh.
1607 		 */
1608 		mtx_unlock(&umtx_lock);
1609 		return (EPERM);
1610 	}
1611 	umtx_pi_setowner(pi, owner);
1612 	uq = TAILQ_FIRST(&pi->pi_blocked);
1613 	if (uq != NULL) {
1614 		pri = UPRI(uq->uq_thread);
1615 		thread_lock(owner);
1616 		if (pri < UPRI(owner))
1617 			sched_lend_user_prio(owner, pri);
1618 		thread_unlock(owner);
1619 	}
1620 	mtx_unlock(&umtx_lock);
1621 	return (0);
1622 }
1623 
1624 /*
1625  * Adjust a thread's order position in its blocked PI mutex,
1626  * this may result new priority propagating process.
1627  */
1628 void
1629 umtx_pi_adjust(struct thread *td, u_char oldpri)
1630 {
1631 	struct umtx_q *uq;
1632 	struct umtx_pi *pi;
1633 
1634 	uq = td->td_umtxq;
1635 	mtx_lock(&umtx_lock);
1636 	/*
1637 	 * Pick up the lock that td is blocked on.
1638 	 */
1639 	pi = uq->uq_pi_blocked;
1640 	if (pi != NULL) {
1641 		umtx_pi_adjust_thread(pi, td);
1642 		umtx_repropagate_priority(pi);
1643 	}
1644 	mtx_unlock(&umtx_lock);
1645 }
1646 
1647 /*
1648  * Sleep on a PI mutex.
1649  */
1650 static int
1651 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner,
1652     const char *wmesg, struct abs_timeout *timo, bool shared)
1653 {
1654 	struct thread *td, *td1;
1655 	struct umtx_q *uq1;
1656 	int error, pri;
1657 #ifdef INVARIANTS
1658 	struct umtxq_chain *uc;
1659 
1660 	uc = umtxq_getchain(&pi->pi_key);
1661 #endif
1662 	error = 0;
1663 	td = uq->uq_thread;
1664 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1665 	UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key));
1666 	KASSERT(uc->uc_busy != 0, ("umtx chain is not busy"));
1667 	umtxq_insert(uq);
1668 	mtx_lock(&umtx_lock);
1669 	if (pi->pi_owner == NULL) {
1670 		mtx_unlock(&umtx_lock);
1671 		td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid);
1672 		mtx_lock(&umtx_lock);
1673 		if (td1 != NULL) {
1674 			if (pi->pi_owner == NULL)
1675 				umtx_pi_setowner(pi, td1);
1676 			PROC_UNLOCK(td1->td_proc);
1677 		}
1678 	}
1679 
1680 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1681 		pri = UPRI(uq1->uq_thread);
1682 		if (pri > UPRI(td))
1683 			break;
1684 	}
1685 
1686 	if (uq1 != NULL)
1687 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1688 	else
1689 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1690 
1691 	uq->uq_pi_blocked = pi;
1692 	thread_lock(td);
1693 	td->td_flags |= TDF_UPIBLOCKED;
1694 	thread_unlock(td);
1695 	umtx_propagate_priority(td);
1696 	mtx_unlock(&umtx_lock);
1697 	umtxq_unbusy(&uq->uq_key);
1698 
1699 	error = umtxq_sleep(uq, wmesg, timo);
1700 	umtxq_remove(uq);
1701 
1702 	mtx_lock(&umtx_lock);
1703 	uq->uq_pi_blocked = NULL;
1704 	thread_lock(td);
1705 	td->td_flags &= ~TDF_UPIBLOCKED;
1706 	thread_unlock(td);
1707 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1708 	umtx_repropagate_priority(pi);
1709 	mtx_unlock(&umtx_lock);
1710 	umtxq_unlock(&uq->uq_key);
1711 
1712 	return (error);
1713 }
1714 
1715 /*
1716  * Add reference count for a PI mutex.
1717  */
1718 static void
1719 umtx_pi_ref(struct umtx_pi *pi)
1720 {
1721 
1722 	UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key));
1723 	pi->pi_refcount++;
1724 }
1725 
1726 /*
1727  * Decrease reference count for a PI mutex, if the counter
1728  * is decreased to zero, its memory space is freed.
1729  */
1730 static void
1731 umtx_pi_unref(struct umtx_pi *pi)
1732 {
1733 	struct umtxq_chain *uc;
1734 
1735 	uc = umtxq_getchain(&pi->pi_key);
1736 	UMTXQ_LOCKED_ASSERT(uc);
1737 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1738 	if (--pi->pi_refcount == 0) {
1739 		mtx_lock(&umtx_lock);
1740 		if (pi->pi_owner != NULL)
1741 			umtx_pi_disown(pi);
1742 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1743 			("blocked queue not empty"));
1744 		mtx_unlock(&umtx_lock);
1745 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1746 		umtx_pi_free(pi);
1747 	}
1748 }
1749 
1750 /*
1751  * Find a PI mutex in hash table.
1752  */
1753 static struct umtx_pi *
1754 umtx_pi_lookup(struct umtx_key *key)
1755 {
1756 	struct umtxq_chain *uc;
1757 	struct umtx_pi *pi;
1758 
1759 	uc = umtxq_getchain(key);
1760 	UMTXQ_LOCKED_ASSERT(uc);
1761 
1762 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1763 		if (umtx_key_match(&pi->pi_key, key)) {
1764 			return (pi);
1765 		}
1766 	}
1767 	return (NULL);
1768 }
1769 
1770 /*
1771  * Insert a PI mutex into hash table.
1772  */
1773 static inline void
1774 umtx_pi_insert(struct umtx_pi *pi)
1775 {
1776 	struct umtxq_chain *uc;
1777 
1778 	uc = umtxq_getchain(&pi->pi_key);
1779 	UMTXQ_LOCKED_ASSERT(uc);
1780 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1781 }
1782 
1783 /*
1784  * Lock a PI mutex.
1785  */
1786 static int
1787 do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags,
1788     struct _umtx_time *timeout, int try)
1789 {
1790 	struct abs_timeout timo;
1791 	struct umtx_q *uq;
1792 	struct umtx_pi *pi, *new_pi;
1793 	uint32_t id, old_owner, owner, old;
1794 	int error, rv;
1795 
1796 	id = td->td_tid;
1797 	uq = td->td_umtxq;
1798 
1799 	if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
1800 	    TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
1801 	    &uq->uq_key)) != 0)
1802 		return (error);
1803 
1804 	if (timeout != NULL)
1805 		abs_timeout_init2(&timo, timeout);
1806 
1807 	umtxq_lock(&uq->uq_key);
1808 	pi = umtx_pi_lookup(&uq->uq_key);
1809 	if (pi == NULL) {
1810 		new_pi = umtx_pi_alloc(M_NOWAIT);
1811 		if (new_pi == NULL) {
1812 			umtxq_unlock(&uq->uq_key);
1813 			new_pi = umtx_pi_alloc(M_WAITOK);
1814 			umtxq_lock(&uq->uq_key);
1815 			pi = umtx_pi_lookup(&uq->uq_key);
1816 			if (pi != NULL) {
1817 				umtx_pi_free(new_pi);
1818 				new_pi = NULL;
1819 			}
1820 		}
1821 		if (new_pi != NULL) {
1822 			new_pi->pi_key = uq->uq_key;
1823 			umtx_pi_insert(new_pi);
1824 			pi = new_pi;
1825 		}
1826 	}
1827 	umtx_pi_ref(pi);
1828 	umtxq_unlock(&uq->uq_key);
1829 
1830 	/*
1831 	 * Care must be exercised when dealing with umtx structure.  It
1832 	 * can fault on any access.
1833 	 */
1834 	for (;;) {
1835 		/*
1836 		 * Try the uncontested case.  This should be done in userland.
1837 		 */
1838 		rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id);
1839 		/* The address was invalid. */
1840 		if (rv == -1) {
1841 			error = EFAULT;
1842 			break;
1843 		}
1844 		/* The acquire succeeded. */
1845 		if (rv == 0) {
1846 			MPASS(owner == UMUTEX_UNOWNED);
1847 			error = 0;
1848 			break;
1849 		}
1850 
1851 		if (owner == UMUTEX_RB_NOTRECOV) {
1852 			error = ENOTRECOVERABLE;
1853 			break;
1854 		}
1855 
1856 		/*
1857 		 * Avoid overwriting a possible error from sleep due
1858 		 * to the pending signal with suspension check result.
1859 		 */
1860 		if (error == 0) {
1861 			error = thread_check_susp(td, true);
1862 			if (error != 0)
1863 				break;
1864 		}
1865 
1866 		/* If no one owns it but it is contested try to acquire it. */
1867 		if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
1868 			old_owner = owner;
1869 			rv = casueword32(&m->m_owner, owner, &owner,
1870 			    id | UMUTEX_CONTESTED);
1871 			/* The address was invalid. */
1872 			if (rv == -1) {
1873 				error = EFAULT;
1874 				break;
1875 			}
1876 			if (rv == 1) {
1877 				if (error == 0) {
1878 					error = thread_check_susp(td, true);
1879 					if (error != 0)
1880 						break;
1881 				}
1882 
1883 				/*
1884 				 * If this failed the lock could
1885 				 * changed, restart.
1886 				 */
1887 				continue;
1888 			}
1889 
1890 			MPASS(rv == 0);
1891 			MPASS(owner == old_owner);
1892 			umtxq_lock(&uq->uq_key);
1893 			umtxq_busy(&uq->uq_key);
1894 			error = umtx_pi_claim(pi, td);
1895 			umtxq_unbusy(&uq->uq_key);
1896 			umtxq_unlock(&uq->uq_key);
1897 			if (error != 0) {
1898 				/*
1899 				 * Since we're going to return an
1900 				 * error, restore the m_owner to its
1901 				 * previous, unowned state to avoid
1902 				 * compounding the problem.
1903 				 */
1904 				(void)casuword32(&m->m_owner,
1905 				    id | UMUTEX_CONTESTED, old_owner);
1906 			}
1907 			if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
1908 				error = EOWNERDEAD;
1909 			break;
1910 		}
1911 
1912 		if ((owner & ~UMUTEX_CONTESTED) == id) {
1913 			error = EDEADLK;
1914 			break;
1915 		}
1916 
1917 		if (try != 0) {
1918 			error = EBUSY;
1919 			break;
1920 		}
1921 
1922 		/*
1923 		 * If we caught a signal, we have retried and now
1924 		 * exit immediately.
1925 		 */
1926 		if (error != 0)
1927 			break;
1928 
1929 		umtxq_lock(&uq->uq_key);
1930 		umtxq_busy(&uq->uq_key);
1931 		umtxq_unlock(&uq->uq_key);
1932 
1933 		/*
1934 		 * Set the contested bit so that a release in user space
1935 		 * knows to use the system call for unlock.  If this fails
1936 		 * either some one else has acquired the lock or it has been
1937 		 * released.
1938 		 */
1939 		rv = casueword32(&m->m_owner, owner, &old, owner |
1940 		    UMUTEX_CONTESTED);
1941 
1942 		/* The address was invalid. */
1943 		if (rv == -1) {
1944 			umtxq_unbusy_unlocked(&uq->uq_key);
1945 			error = EFAULT;
1946 			break;
1947 		}
1948 		if (rv == 1) {
1949 			umtxq_unbusy_unlocked(&uq->uq_key);
1950 			error = thread_check_susp(td, true);
1951 			if (error != 0)
1952 				break;
1953 
1954 			/*
1955 			 * The lock changed and we need to retry or we
1956 			 * lost a race to the thread unlocking the
1957 			 * umtx.  Note that the UMUTEX_RB_OWNERDEAD
1958 			 * value for owner is impossible there.
1959 			 */
1960 			continue;
1961 		}
1962 
1963 		umtxq_lock(&uq->uq_key);
1964 
1965 		/* We set the contested bit, sleep. */
1966 		MPASS(old == owner);
1967 		error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1968 		    "umtxpi", timeout == NULL ? NULL : &timo,
1969 		    (flags & USYNC_PROCESS_SHARED) != 0);
1970 		if (error != 0)
1971 			continue;
1972 
1973 		error = thread_check_susp(td, false);
1974 		if (error != 0)
1975 			break;
1976 	}
1977 
1978 	umtxq_lock(&uq->uq_key);
1979 	umtx_pi_unref(pi);
1980 	umtxq_unlock(&uq->uq_key);
1981 
1982 	umtx_key_release(&uq->uq_key);
1983 	return (error);
1984 }
1985 
1986 /*
1987  * Unlock a PI mutex.
1988  */
1989 static int
1990 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
1991 {
1992 	struct umtx_key key;
1993 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1994 	struct umtx_pi *pi, *pi2;
1995 	uint32_t id, new_owner, old, owner;
1996 	int count, error, pri;
1997 
1998 	id = td->td_tid;
1999 
2000 usrloop:
2001 	/*
2002 	 * Make sure we own this mtx.
2003 	 */
2004 	error = fueword32(&m->m_owner, &owner);
2005 	if (error == -1)
2006 		return (EFAULT);
2007 
2008 	if ((owner & ~UMUTEX_CONTESTED) != id)
2009 		return (EPERM);
2010 
2011 	new_owner = umtx_unlock_val(flags, rb);
2012 
2013 	/* This should be done in userland */
2014 	if ((owner & UMUTEX_CONTESTED) == 0) {
2015 		error = casueword32(&m->m_owner, owner, &old, new_owner);
2016 		if (error == -1)
2017 			return (EFAULT);
2018 		if (error == 1) {
2019 			error = thread_check_susp(td, true);
2020 			if (error != 0)
2021 				return (error);
2022 			goto usrloop;
2023 		}
2024 		if (old == owner)
2025 			return (0);
2026 		owner = old;
2027 	}
2028 
2029 	/* We should only ever be in here for contested locks */
2030 	if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2031 	    TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags),
2032 	    &key)) != 0)
2033 		return (error);
2034 
2035 	umtxq_lock(&key);
2036 	umtxq_busy(&key);
2037 	count = umtxq_count_pi(&key, &uq_first);
2038 	if (uq_first != NULL) {
2039 		mtx_lock(&umtx_lock);
2040 		pi = uq_first->uq_pi_blocked;
2041 		KASSERT(pi != NULL, ("pi == NULL?"));
2042 		if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) {
2043 			mtx_unlock(&umtx_lock);
2044 			umtxq_unbusy(&key);
2045 			umtxq_unlock(&key);
2046 			umtx_key_release(&key);
2047 			/* userland messed the mutex */
2048 			return (EPERM);
2049 		}
2050 		uq_me = td->td_umtxq;
2051 		if (pi->pi_owner == td)
2052 			umtx_pi_disown(pi);
2053 		/* get highest priority thread which is still sleeping. */
2054 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
2055 		while (uq_first != NULL &&
2056 		    (uq_first->uq_flags & UQF_UMTXQ) == 0) {
2057 			uq_first = TAILQ_NEXT(uq_first, uq_lockq);
2058 		}
2059 		pri = PRI_MAX;
2060 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
2061 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
2062 			if (uq_first2 != NULL) {
2063 				if (pri > UPRI(uq_first2->uq_thread))
2064 					pri = UPRI(uq_first2->uq_thread);
2065 			}
2066 		}
2067 		thread_lock(td);
2068 		sched_lend_user_prio(td, pri);
2069 		thread_unlock(td);
2070 		mtx_unlock(&umtx_lock);
2071 		if (uq_first)
2072 			umtxq_signal_thread(uq_first);
2073 	} else {
2074 		pi = umtx_pi_lookup(&key);
2075 		/*
2076 		 * A umtx_pi can exist if a signal or timeout removed the
2077 		 * last waiter from the umtxq, but there is still
2078 		 * a thread in do_lock_pi() holding the umtx_pi.
2079 		 */
2080 		if (pi != NULL) {
2081 			/*
2082 			 * The umtx_pi can be unowned, such as when a thread
2083 			 * has just entered do_lock_pi(), allocated the
2084 			 * umtx_pi, and unlocked the umtxq.
2085 			 * If the current thread owns it, it must disown it.
2086 			 */
2087 			mtx_lock(&umtx_lock);
2088 			if (pi->pi_owner == td)
2089 				umtx_pi_disown(pi);
2090 			mtx_unlock(&umtx_lock);
2091 		}
2092 	}
2093 	umtxq_unlock(&key);
2094 
2095 	/*
2096 	 * When unlocking the umtx, it must be marked as unowned if
2097 	 * there is zero or one thread only waiting for it.
2098 	 * Otherwise, it must be marked as contested.
2099 	 */
2100 
2101 	if (count > 1)
2102 		new_owner |= UMUTEX_CONTESTED;
2103 again:
2104 	error = casueword32(&m->m_owner, owner, &old, new_owner);
2105 	if (error == 1) {
2106 		error = thread_check_susp(td, false);
2107 		if (error == 0)
2108 			goto again;
2109 	}
2110 	umtxq_unbusy_unlocked(&key);
2111 	umtx_key_release(&key);
2112 	if (error == -1)
2113 		return (EFAULT);
2114 	if (error == 0 && old != owner)
2115 		return (EINVAL);
2116 	return (error);
2117 }
2118 
2119 /*
2120  * Lock a PP mutex.
2121  */
2122 static int
2123 do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags,
2124     struct _umtx_time *timeout, int try)
2125 {
2126 	struct abs_timeout timo;
2127 	struct umtx_q *uq, *uq2;
2128 	struct umtx_pi *pi;
2129 	uint32_t ceiling;
2130 	uint32_t owner, id;
2131 	int error, pri, old_inherited_pri, su, rv;
2132 
2133 	id = td->td_tid;
2134 	uq = td->td_umtxq;
2135 	if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2136 	    TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2137 	    &uq->uq_key)) != 0)
2138 		return (error);
2139 
2140 	if (timeout != NULL)
2141 		abs_timeout_init2(&timo, timeout);
2142 
2143 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2144 	for (;;) {
2145 		old_inherited_pri = uq->uq_inherited_pri;
2146 		umtxq_lock(&uq->uq_key);
2147 		umtxq_busy(&uq->uq_key);
2148 		umtxq_unlock(&uq->uq_key);
2149 
2150 		rv = fueword32(&m->m_ceilings[0], &ceiling);
2151 		if (rv == -1) {
2152 			error = EFAULT;
2153 			goto out;
2154 		}
2155 		ceiling = RTP_PRIO_MAX - ceiling;
2156 		if (ceiling > RTP_PRIO_MAX) {
2157 			error = EINVAL;
2158 			goto out;
2159 		}
2160 
2161 		mtx_lock(&umtx_lock);
2162 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
2163 			mtx_unlock(&umtx_lock);
2164 			error = EINVAL;
2165 			goto out;
2166 		}
2167 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
2168 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
2169 			thread_lock(td);
2170 			if (uq->uq_inherited_pri < UPRI(td))
2171 				sched_lend_user_prio(td, uq->uq_inherited_pri);
2172 			thread_unlock(td);
2173 		}
2174 		mtx_unlock(&umtx_lock);
2175 
2176 		rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2177 		    id | UMUTEX_CONTESTED);
2178 		/* The address was invalid. */
2179 		if (rv == -1) {
2180 			error = EFAULT;
2181 			break;
2182 		}
2183 		if (rv == 0) {
2184 			MPASS(owner == UMUTEX_CONTESTED);
2185 			error = 0;
2186 			break;
2187 		}
2188 		/* rv == 1 */
2189 		if (owner == UMUTEX_RB_OWNERDEAD) {
2190 			rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
2191 			    &owner, id | UMUTEX_CONTESTED);
2192 			if (rv == -1) {
2193 				error = EFAULT;
2194 				break;
2195 			}
2196 			if (rv == 0) {
2197 				MPASS(owner == UMUTEX_RB_OWNERDEAD);
2198 				error = EOWNERDEAD; /* success */
2199 				break;
2200 			}
2201 
2202 			/*
2203 			 *  rv == 1, only check for suspension if we
2204 			 *  did not already catched a signal.  If we
2205 			 *  get an error from the check, the same
2206 			 *  condition is checked by the umtxq_sleep()
2207 			 *  call below, so we should obliterate the
2208 			 *  error to not skip the last loop iteration.
2209 			 */
2210 			if (error == 0) {
2211 				error = thread_check_susp(td, false);
2212 				if (error == 0) {
2213 					if (try != 0)
2214 						error = EBUSY;
2215 					else
2216 						continue;
2217 				}
2218 				error = 0;
2219 			}
2220 		} else if (owner == UMUTEX_RB_NOTRECOV) {
2221 			error = ENOTRECOVERABLE;
2222 		}
2223 
2224 		if (try != 0)
2225 			error = EBUSY;
2226 
2227 		/*
2228 		 * If we caught a signal, we have retried and now
2229 		 * exit immediately.
2230 		 */
2231 		if (error != 0)
2232 			break;
2233 
2234 		umtxq_lock(&uq->uq_key);
2235 		umtxq_insert(uq);
2236 		umtxq_unbusy(&uq->uq_key);
2237 		error = umtxq_sleep(uq, "umtxpp", timeout == NULL ?
2238 		    NULL : &timo);
2239 		umtxq_remove(uq);
2240 		umtxq_unlock(&uq->uq_key);
2241 
2242 		mtx_lock(&umtx_lock);
2243 		uq->uq_inherited_pri = old_inherited_pri;
2244 		pri = PRI_MAX;
2245 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2246 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2247 			if (uq2 != NULL) {
2248 				if (pri > UPRI(uq2->uq_thread))
2249 					pri = UPRI(uq2->uq_thread);
2250 			}
2251 		}
2252 		if (pri > uq->uq_inherited_pri)
2253 			pri = uq->uq_inherited_pri;
2254 		thread_lock(td);
2255 		sched_lend_user_prio(td, pri);
2256 		thread_unlock(td);
2257 		mtx_unlock(&umtx_lock);
2258 	}
2259 
2260 	if (error != 0 && error != EOWNERDEAD) {
2261 		mtx_lock(&umtx_lock);
2262 		uq->uq_inherited_pri = old_inherited_pri;
2263 		pri = PRI_MAX;
2264 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2265 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2266 			if (uq2 != NULL) {
2267 				if (pri > UPRI(uq2->uq_thread))
2268 					pri = UPRI(uq2->uq_thread);
2269 			}
2270 		}
2271 		if (pri > uq->uq_inherited_pri)
2272 			pri = uq->uq_inherited_pri;
2273 		thread_lock(td);
2274 		sched_lend_user_prio(td, pri);
2275 		thread_unlock(td);
2276 		mtx_unlock(&umtx_lock);
2277 	}
2278 
2279 out:
2280 	umtxq_unbusy_unlocked(&uq->uq_key);
2281 	umtx_key_release(&uq->uq_key);
2282 	return (error);
2283 }
2284 
2285 /*
2286  * Unlock a PP mutex.
2287  */
2288 static int
2289 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb)
2290 {
2291 	struct umtx_key key;
2292 	struct umtx_q *uq, *uq2;
2293 	struct umtx_pi *pi;
2294 	uint32_t id, owner, rceiling;
2295 	int error, pri, new_inherited_pri, su;
2296 
2297 	id = td->td_tid;
2298 	uq = td->td_umtxq;
2299 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2300 
2301 	/*
2302 	 * Make sure we own this mtx.
2303 	 */
2304 	error = fueword32(&m->m_owner, &owner);
2305 	if (error == -1)
2306 		return (EFAULT);
2307 
2308 	if ((owner & ~UMUTEX_CONTESTED) != id)
2309 		return (EPERM);
2310 
2311 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2312 	if (error != 0)
2313 		return (error);
2314 
2315 	if (rceiling == -1)
2316 		new_inherited_pri = PRI_MAX;
2317 	else {
2318 		rceiling = RTP_PRIO_MAX - rceiling;
2319 		if (rceiling > RTP_PRIO_MAX)
2320 			return (EINVAL);
2321 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2322 	}
2323 
2324 	if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2325 	    TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2326 	    &key)) != 0)
2327 		return (error);
2328 	umtxq_lock(&key);
2329 	umtxq_busy(&key);
2330 	umtxq_unlock(&key);
2331 	/*
2332 	 * For priority protected mutex, always set unlocked state
2333 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2334 	 * to lock the mutex, it is necessary because thread priority
2335 	 * has to be adjusted for such mutex.
2336 	 */
2337 	error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) |
2338 	    UMUTEX_CONTESTED);
2339 
2340 	umtxq_lock(&key);
2341 	if (error == 0)
2342 		umtxq_signal(&key, 1);
2343 	umtxq_unbusy(&key);
2344 	umtxq_unlock(&key);
2345 
2346 	if (error == -1)
2347 		error = EFAULT;
2348 	else {
2349 		mtx_lock(&umtx_lock);
2350 		if (su != 0)
2351 			uq->uq_inherited_pri = new_inherited_pri;
2352 		pri = PRI_MAX;
2353 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2354 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2355 			if (uq2 != NULL) {
2356 				if (pri > UPRI(uq2->uq_thread))
2357 					pri = UPRI(uq2->uq_thread);
2358 			}
2359 		}
2360 		if (pri > uq->uq_inherited_pri)
2361 			pri = uq->uq_inherited_pri;
2362 		thread_lock(td);
2363 		sched_lend_user_prio(td, pri);
2364 		thread_unlock(td);
2365 		mtx_unlock(&umtx_lock);
2366 	}
2367 	umtx_key_release(&key);
2368 	return (error);
2369 }
2370 
2371 static int
2372 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2373     uint32_t *old_ceiling)
2374 {
2375 	struct umtx_q *uq;
2376 	uint32_t flags, id, owner, save_ceiling;
2377 	int error, rv, rv1;
2378 
2379 	error = fueword32(&m->m_flags, &flags);
2380 	if (error == -1)
2381 		return (EFAULT);
2382 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2383 		return (EINVAL);
2384 	if (ceiling > RTP_PRIO_MAX)
2385 		return (EINVAL);
2386 	id = td->td_tid;
2387 	uq = td->td_umtxq;
2388 	if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ?
2389 	    TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags),
2390 	    &uq->uq_key)) != 0)
2391 		return (error);
2392 	for (;;) {
2393 		umtxq_lock(&uq->uq_key);
2394 		umtxq_busy(&uq->uq_key);
2395 		umtxq_unlock(&uq->uq_key);
2396 
2397 		rv = fueword32(&m->m_ceilings[0], &save_ceiling);
2398 		if (rv == -1) {
2399 			error = EFAULT;
2400 			break;
2401 		}
2402 
2403 		rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
2404 		    id | UMUTEX_CONTESTED);
2405 		if (rv == -1) {
2406 			error = EFAULT;
2407 			break;
2408 		}
2409 
2410 		if (rv == 0) {
2411 			MPASS(owner == UMUTEX_CONTESTED);
2412 			rv = suword32(&m->m_ceilings[0], ceiling);
2413 			rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED);
2414 			error = (rv == 0 && rv1 == 0) ? 0: EFAULT;
2415 			break;
2416 		}
2417 
2418 		if ((owner & ~UMUTEX_CONTESTED) == id) {
2419 			rv = suword32(&m->m_ceilings[0], ceiling);
2420 			error = rv == 0 ? 0 : EFAULT;
2421 			break;
2422 		}
2423 
2424 		if (owner == UMUTEX_RB_OWNERDEAD) {
2425 			error = EOWNERDEAD;
2426 			break;
2427 		} else if (owner == UMUTEX_RB_NOTRECOV) {
2428 			error = ENOTRECOVERABLE;
2429 			break;
2430 		}
2431 
2432 		/*
2433 		 * If we caught a signal, we have retried and now
2434 		 * exit immediately.
2435 		 */
2436 		if (error != 0)
2437 			break;
2438 
2439 		/*
2440 		 * We set the contested bit, sleep. Otherwise the lock changed
2441 		 * and we need to retry or we lost a race to the thread
2442 		 * unlocking the umtx.
2443 		 */
2444 		umtxq_lock(&uq->uq_key);
2445 		umtxq_insert(uq);
2446 		umtxq_unbusy(&uq->uq_key);
2447 		error = umtxq_sleep(uq, "umtxpp", NULL);
2448 		umtxq_remove(uq);
2449 		umtxq_unlock(&uq->uq_key);
2450 	}
2451 	umtxq_lock(&uq->uq_key);
2452 	if (error == 0)
2453 		umtxq_signal(&uq->uq_key, INT_MAX);
2454 	umtxq_unbusy(&uq->uq_key);
2455 	umtxq_unlock(&uq->uq_key);
2456 	umtx_key_release(&uq->uq_key);
2457 	if (error == 0 && old_ceiling != NULL) {
2458 		rv = suword32(old_ceiling, save_ceiling);
2459 		error = rv == 0 ? 0 : EFAULT;
2460 	}
2461 	return (error);
2462 }
2463 
2464 /*
2465  * Lock a userland POSIX mutex.
2466  */
2467 static int
2468 do_lock_umutex(struct thread *td, struct umutex *m,
2469     struct _umtx_time *timeout, int mode)
2470 {
2471 	uint32_t flags;
2472 	int error;
2473 
2474 	error = fueword32(&m->m_flags, &flags);
2475 	if (error == -1)
2476 		return (EFAULT);
2477 
2478 	switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2479 	case 0:
2480 		error = do_lock_normal(td, m, flags, timeout, mode);
2481 		break;
2482 	case UMUTEX_PRIO_INHERIT:
2483 		error = do_lock_pi(td, m, flags, timeout, mode);
2484 		break;
2485 	case UMUTEX_PRIO_PROTECT:
2486 		error = do_lock_pp(td, m, flags, timeout, mode);
2487 		break;
2488 	default:
2489 		return (EINVAL);
2490 	}
2491 	if (timeout == NULL) {
2492 		if (error == EINTR && mode != _UMUTEX_WAIT)
2493 			error = ERESTART;
2494 	} else {
2495 		/* Timed-locking is not restarted. */
2496 		if (error == ERESTART)
2497 			error = EINTR;
2498 	}
2499 	return (error);
2500 }
2501 
2502 /*
2503  * Unlock a userland POSIX mutex.
2504  */
2505 static int
2506 do_unlock_umutex(struct thread *td, struct umutex *m, bool rb)
2507 {
2508 	uint32_t flags;
2509 	int error;
2510 
2511 	error = fueword32(&m->m_flags, &flags);
2512 	if (error == -1)
2513 		return (EFAULT);
2514 
2515 	switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2516 	case 0:
2517 		return (do_unlock_normal(td, m, flags, rb));
2518 	case UMUTEX_PRIO_INHERIT:
2519 		return (do_unlock_pi(td, m, flags, rb));
2520 	case UMUTEX_PRIO_PROTECT:
2521 		return (do_unlock_pp(td, m, flags, rb));
2522 	}
2523 
2524 	return (EINVAL);
2525 }
2526 
2527 static int
2528 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2529     struct timespec *timeout, u_long wflags)
2530 {
2531 	struct abs_timeout timo;
2532 	struct umtx_q *uq;
2533 	uint32_t flags, clockid, hasw;
2534 	int error;
2535 
2536 	uq = td->td_umtxq;
2537 	error = fueword32(&cv->c_flags, &flags);
2538 	if (error == -1)
2539 		return (EFAULT);
2540 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2541 	if (error != 0)
2542 		return (error);
2543 
2544 	if ((wflags & CVWAIT_CLOCKID) != 0) {
2545 		error = fueword32(&cv->c_clockid, &clockid);
2546 		if (error == -1) {
2547 			umtx_key_release(&uq->uq_key);
2548 			return (EFAULT);
2549 		}
2550 		if (clockid < CLOCK_REALTIME ||
2551 		    clockid >= CLOCK_THREAD_CPUTIME_ID) {
2552 			/* hmm, only HW clock id will work. */
2553 			umtx_key_release(&uq->uq_key);
2554 			return (EINVAL);
2555 		}
2556 	} else {
2557 		clockid = CLOCK_REALTIME;
2558 	}
2559 
2560 	umtxq_lock(&uq->uq_key);
2561 	umtxq_busy(&uq->uq_key);
2562 	umtxq_insert(uq);
2563 	umtxq_unlock(&uq->uq_key);
2564 
2565 	/*
2566 	 * Set c_has_waiters to 1 before releasing user mutex, also
2567 	 * don't modify cache line when unnecessary.
2568 	 */
2569 	error = fueword32(&cv->c_has_waiters, &hasw);
2570 	if (error == 0 && hasw == 0)
2571 		suword32(&cv->c_has_waiters, 1);
2572 
2573 	umtxq_unbusy_unlocked(&uq->uq_key);
2574 
2575 	error = do_unlock_umutex(td, m, false);
2576 
2577 	if (timeout != NULL)
2578 		abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0,
2579 		    timeout);
2580 
2581 	umtxq_lock(&uq->uq_key);
2582 	if (error == 0) {
2583 		error = umtxq_sleep(uq, "ucond", timeout == NULL ?
2584 		    NULL : &timo);
2585 	}
2586 
2587 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
2588 		error = 0;
2589 	else {
2590 		/*
2591 		 * This must be timeout,interrupted by signal or
2592 		 * surprious wakeup, clear c_has_waiter flag when
2593 		 * necessary.
2594 		 */
2595 		umtxq_busy(&uq->uq_key);
2596 		if ((uq->uq_flags & UQF_UMTXQ) != 0) {
2597 			int oldlen = uq->uq_cur_queue->length;
2598 			umtxq_remove(uq);
2599 			if (oldlen == 1) {
2600 				umtxq_unlock(&uq->uq_key);
2601 				suword32(&cv->c_has_waiters, 0);
2602 				umtxq_lock(&uq->uq_key);
2603 			}
2604 		}
2605 		umtxq_unbusy(&uq->uq_key);
2606 		if (error == ERESTART)
2607 			error = EINTR;
2608 	}
2609 
2610 	umtxq_unlock(&uq->uq_key);
2611 	umtx_key_release(&uq->uq_key);
2612 	return (error);
2613 }
2614 
2615 /*
2616  * Signal a userland condition variable.
2617  */
2618 static int
2619 do_cv_signal(struct thread *td, struct ucond *cv)
2620 {
2621 	struct umtx_key key;
2622 	int error, cnt, nwake;
2623 	uint32_t flags;
2624 
2625 	error = fueword32(&cv->c_flags, &flags);
2626 	if (error == -1)
2627 		return (EFAULT);
2628 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2629 		return (error);
2630 	umtxq_lock(&key);
2631 	umtxq_busy(&key);
2632 	cnt = umtxq_count(&key);
2633 	nwake = umtxq_signal(&key, 1);
2634 	if (cnt <= nwake) {
2635 		umtxq_unlock(&key);
2636 		error = suword32(&cv->c_has_waiters, 0);
2637 		if (error == -1)
2638 			error = EFAULT;
2639 		umtxq_lock(&key);
2640 	}
2641 	umtxq_unbusy(&key);
2642 	umtxq_unlock(&key);
2643 	umtx_key_release(&key);
2644 	return (error);
2645 }
2646 
2647 static int
2648 do_cv_broadcast(struct thread *td, struct ucond *cv)
2649 {
2650 	struct umtx_key key;
2651 	int error;
2652 	uint32_t flags;
2653 
2654 	error = fueword32(&cv->c_flags, &flags);
2655 	if (error == -1)
2656 		return (EFAULT);
2657 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2658 		return (error);
2659 
2660 	umtxq_lock(&key);
2661 	umtxq_busy(&key);
2662 	umtxq_signal(&key, INT_MAX);
2663 	umtxq_unlock(&key);
2664 
2665 	error = suword32(&cv->c_has_waiters, 0);
2666 	if (error == -1)
2667 		error = EFAULT;
2668 
2669 	umtxq_unbusy_unlocked(&key);
2670 
2671 	umtx_key_release(&key);
2672 	return (error);
2673 }
2674 
2675 static int
2676 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag,
2677     struct _umtx_time *timeout)
2678 {
2679 	struct abs_timeout timo;
2680 	struct umtx_q *uq;
2681 	uint32_t flags, wrflags;
2682 	int32_t state, oldstate;
2683 	int32_t blocked_readers;
2684 	int error, error1, rv;
2685 
2686 	uq = td->td_umtxq;
2687 	error = fueword32(&rwlock->rw_flags, &flags);
2688 	if (error == -1)
2689 		return (EFAULT);
2690 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2691 	if (error != 0)
2692 		return (error);
2693 
2694 	if (timeout != NULL)
2695 		abs_timeout_init2(&timo, timeout);
2696 
2697 	wrflags = URWLOCK_WRITE_OWNER;
2698 	if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2699 		wrflags |= URWLOCK_WRITE_WAITERS;
2700 
2701 	for (;;) {
2702 		rv = fueword32(&rwlock->rw_state, &state);
2703 		if (rv == -1) {
2704 			umtx_key_release(&uq->uq_key);
2705 			return (EFAULT);
2706 		}
2707 
2708 		/* try to lock it */
2709 		while (!(state & wrflags)) {
2710 			if (__predict_false(URWLOCK_READER_COUNT(state) ==
2711 			    URWLOCK_MAX_READERS)) {
2712 				umtx_key_release(&uq->uq_key);
2713 				return (EAGAIN);
2714 			}
2715 			rv = casueword32(&rwlock->rw_state, state,
2716 			    &oldstate, state + 1);
2717 			if (rv == -1) {
2718 				umtx_key_release(&uq->uq_key);
2719 				return (EFAULT);
2720 			}
2721 			if (rv == 0) {
2722 				MPASS(oldstate == state);
2723 				umtx_key_release(&uq->uq_key);
2724 				return (0);
2725 			}
2726 			error = thread_check_susp(td, true);
2727 			if (error != 0)
2728 				break;
2729 			state = oldstate;
2730 		}
2731 
2732 		if (error)
2733 			break;
2734 
2735 		/* grab monitor lock */
2736 		umtxq_lock(&uq->uq_key);
2737 		umtxq_busy(&uq->uq_key);
2738 		umtxq_unlock(&uq->uq_key);
2739 
2740 		/*
2741 		 * re-read the state, in case it changed between the try-lock above
2742 		 * and the check below
2743 		 */
2744 		rv = fueword32(&rwlock->rw_state, &state);
2745 		if (rv == -1)
2746 			error = EFAULT;
2747 
2748 		/* set read contention bit */
2749 		while (error == 0 && (state & wrflags) &&
2750 		    !(state & URWLOCK_READ_WAITERS)) {
2751 			rv = casueword32(&rwlock->rw_state, state,
2752 			    &oldstate, state | URWLOCK_READ_WAITERS);
2753 			if (rv == -1) {
2754 				error = EFAULT;
2755 				break;
2756 			}
2757 			if (rv == 0) {
2758 				MPASS(oldstate == state);
2759 				goto sleep;
2760 			}
2761 			state = oldstate;
2762 			error = thread_check_susp(td, false);
2763 			if (error != 0)
2764 				break;
2765 		}
2766 		if (error != 0) {
2767 			umtxq_unbusy_unlocked(&uq->uq_key);
2768 			break;
2769 		}
2770 
2771 		/* state is changed while setting flags, restart */
2772 		if (!(state & wrflags)) {
2773 			umtxq_unbusy_unlocked(&uq->uq_key);
2774 			error = thread_check_susp(td, true);
2775 			if (error != 0)
2776 				break;
2777 			continue;
2778 		}
2779 
2780 sleep:
2781 		/*
2782 		 * Contention bit is set, before sleeping, increase
2783 		 * read waiter count.
2784 		 */
2785 		rv = fueword32(&rwlock->rw_blocked_readers,
2786 		    &blocked_readers);
2787 		if (rv == -1) {
2788 			umtxq_unbusy_unlocked(&uq->uq_key);
2789 			error = EFAULT;
2790 			break;
2791 		}
2792 		suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2793 
2794 		while (state & wrflags) {
2795 			umtxq_lock(&uq->uq_key);
2796 			umtxq_insert(uq);
2797 			umtxq_unbusy(&uq->uq_key);
2798 
2799 			error = umtxq_sleep(uq, "urdlck", timeout == NULL ?
2800 			    NULL : &timo);
2801 
2802 			umtxq_busy(&uq->uq_key);
2803 			umtxq_remove(uq);
2804 			umtxq_unlock(&uq->uq_key);
2805 			if (error)
2806 				break;
2807 			rv = fueword32(&rwlock->rw_state, &state);
2808 			if (rv == -1) {
2809 				error = EFAULT;
2810 				break;
2811 			}
2812 		}
2813 
2814 		/* decrease read waiter count, and may clear read contention bit */
2815 		rv = fueword32(&rwlock->rw_blocked_readers,
2816 		    &blocked_readers);
2817 		if (rv == -1) {
2818 			umtxq_unbusy_unlocked(&uq->uq_key);
2819 			error = EFAULT;
2820 			break;
2821 		}
2822 		suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2823 		if (blocked_readers == 1) {
2824 			rv = fueword32(&rwlock->rw_state, &state);
2825 			if (rv == -1) {
2826 				umtxq_unbusy_unlocked(&uq->uq_key);
2827 				error = EFAULT;
2828 				break;
2829 			}
2830 			for (;;) {
2831 				rv = casueword32(&rwlock->rw_state, state,
2832 				    &oldstate, state & ~URWLOCK_READ_WAITERS);
2833 				if (rv == -1) {
2834 					error = EFAULT;
2835 					break;
2836 				}
2837 				if (rv == 0) {
2838 					MPASS(oldstate == state);
2839 					break;
2840 				}
2841 				state = oldstate;
2842 				error1 = thread_check_susp(td, false);
2843 				if (error1 != 0) {
2844 					if (error == 0)
2845 						error = error1;
2846 					break;
2847 				}
2848 			}
2849 		}
2850 
2851 		umtxq_unbusy_unlocked(&uq->uq_key);
2852 		if (error != 0)
2853 			break;
2854 	}
2855 	umtx_key_release(&uq->uq_key);
2856 	if (error == ERESTART)
2857 		error = EINTR;
2858 	return (error);
2859 }
2860 
2861 static int
2862 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout)
2863 {
2864 	struct abs_timeout timo;
2865 	struct umtx_q *uq;
2866 	uint32_t flags;
2867 	int32_t state, oldstate;
2868 	int32_t blocked_writers;
2869 	int32_t blocked_readers;
2870 	int error, error1, rv;
2871 
2872 	uq = td->td_umtxq;
2873 	error = fueword32(&rwlock->rw_flags, &flags);
2874 	if (error == -1)
2875 		return (EFAULT);
2876 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2877 	if (error != 0)
2878 		return (error);
2879 
2880 	if (timeout != NULL)
2881 		abs_timeout_init2(&timo, timeout);
2882 
2883 	blocked_readers = 0;
2884 	for (;;) {
2885 		rv = fueword32(&rwlock->rw_state, &state);
2886 		if (rv == -1) {
2887 			umtx_key_release(&uq->uq_key);
2888 			return (EFAULT);
2889 		}
2890 		while ((state & URWLOCK_WRITE_OWNER) == 0 &&
2891 		    URWLOCK_READER_COUNT(state) == 0) {
2892 			rv = casueword32(&rwlock->rw_state, state,
2893 			    &oldstate, state | URWLOCK_WRITE_OWNER);
2894 			if (rv == -1) {
2895 				umtx_key_release(&uq->uq_key);
2896 				return (EFAULT);
2897 			}
2898 			if (rv == 0) {
2899 				MPASS(oldstate == state);
2900 				umtx_key_release(&uq->uq_key);
2901 				return (0);
2902 			}
2903 			state = oldstate;
2904 			error = thread_check_susp(td, true);
2905 			if (error != 0)
2906 				break;
2907 		}
2908 
2909 		if (error) {
2910 			if ((state & (URWLOCK_WRITE_OWNER |
2911 			    URWLOCK_WRITE_WAITERS)) == 0 &&
2912 			    blocked_readers != 0) {
2913 				umtxq_lock(&uq->uq_key);
2914 				umtxq_busy(&uq->uq_key);
2915 				umtxq_signal_queue(&uq->uq_key, INT_MAX,
2916 				    UMTX_SHARED_QUEUE);
2917 				umtxq_unbusy(&uq->uq_key);
2918 				umtxq_unlock(&uq->uq_key);
2919 			}
2920 
2921 			break;
2922 		}
2923 
2924 		/* grab monitor lock */
2925 		umtxq_lock(&uq->uq_key);
2926 		umtxq_busy(&uq->uq_key);
2927 		umtxq_unlock(&uq->uq_key);
2928 
2929 		/*
2930 		 * Re-read the state, in case it changed between the
2931 		 * try-lock above and the check below.
2932 		 */
2933 		rv = fueword32(&rwlock->rw_state, &state);
2934 		if (rv == -1)
2935 			error = EFAULT;
2936 
2937 		while (error == 0 && ((state & URWLOCK_WRITE_OWNER) ||
2938 		    URWLOCK_READER_COUNT(state) != 0) &&
2939 		    (state & URWLOCK_WRITE_WAITERS) == 0) {
2940 			rv = casueword32(&rwlock->rw_state, state,
2941 			    &oldstate, state | URWLOCK_WRITE_WAITERS);
2942 			if (rv == -1) {
2943 				error = EFAULT;
2944 				break;
2945 			}
2946 			if (rv == 0) {
2947 				MPASS(oldstate == state);
2948 				goto sleep;
2949 			}
2950 			state = oldstate;
2951 			error = thread_check_susp(td, false);
2952 			if (error != 0)
2953 				break;
2954 		}
2955 		if (error != 0) {
2956 			umtxq_unbusy_unlocked(&uq->uq_key);
2957 			break;
2958 		}
2959 
2960 		if ((state & URWLOCK_WRITE_OWNER) == 0 &&
2961 		    URWLOCK_READER_COUNT(state) == 0) {
2962 			umtxq_unbusy_unlocked(&uq->uq_key);
2963 			error = thread_check_susp(td, false);
2964 			if (error != 0)
2965 				break;
2966 			continue;
2967 		}
2968 sleep:
2969 		rv = fueword32(&rwlock->rw_blocked_writers,
2970 		    &blocked_writers);
2971 		if (rv == -1) {
2972 			umtxq_unbusy_unlocked(&uq->uq_key);
2973 			error = EFAULT;
2974 			break;
2975 		}
2976 		suword32(&rwlock->rw_blocked_writers, blocked_writers + 1);
2977 
2978 		while ((state & URWLOCK_WRITE_OWNER) ||
2979 		    URWLOCK_READER_COUNT(state) != 0) {
2980 			umtxq_lock(&uq->uq_key);
2981 			umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2982 			umtxq_unbusy(&uq->uq_key);
2983 
2984 			error = umtxq_sleep(uq, "uwrlck", timeout == NULL ?
2985 			    NULL : &timo);
2986 
2987 			umtxq_busy(&uq->uq_key);
2988 			umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2989 			umtxq_unlock(&uq->uq_key);
2990 			if (error)
2991 				break;
2992 			rv = fueword32(&rwlock->rw_state, &state);
2993 			if (rv == -1) {
2994 				error = EFAULT;
2995 				break;
2996 			}
2997 		}
2998 
2999 		rv = fueword32(&rwlock->rw_blocked_writers,
3000 		    &blocked_writers);
3001 		if (rv == -1) {
3002 			umtxq_unbusy_unlocked(&uq->uq_key);
3003 			error = EFAULT;
3004 			break;
3005 		}
3006 		suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
3007 		if (blocked_writers == 1) {
3008 			rv = fueword32(&rwlock->rw_state, &state);
3009 			if (rv == -1) {
3010 				umtxq_unbusy_unlocked(&uq->uq_key);
3011 				error = EFAULT;
3012 				break;
3013 			}
3014 			for (;;) {
3015 				rv = casueword32(&rwlock->rw_state, state,
3016 				    &oldstate, state & ~URWLOCK_WRITE_WAITERS);
3017 				if (rv == -1) {
3018 					error = EFAULT;
3019 					break;
3020 				}
3021 				if (rv == 0) {
3022 					MPASS(oldstate == state);
3023 					break;
3024 				}
3025 				state = oldstate;
3026 				error1 = thread_check_susp(td, false);
3027 				/*
3028 				 * We are leaving the URWLOCK_WRITE_WAITERS
3029 				 * behind, but this should not harm the
3030 				 * correctness.
3031 				 */
3032 				if (error1 != 0) {
3033 					if (error == 0)
3034 						error = error1;
3035 					break;
3036 				}
3037 			}
3038 			rv = fueword32(&rwlock->rw_blocked_readers,
3039 			    &blocked_readers);
3040 			if (rv == -1) {
3041 				umtxq_unbusy_unlocked(&uq->uq_key);
3042 				error = EFAULT;
3043 				break;
3044 			}
3045 		} else
3046 			blocked_readers = 0;
3047 
3048 		umtxq_unbusy_unlocked(&uq->uq_key);
3049 	}
3050 
3051 	umtx_key_release(&uq->uq_key);
3052 	if (error == ERESTART)
3053 		error = EINTR;
3054 	return (error);
3055 }
3056 
3057 static int
3058 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
3059 {
3060 	struct umtx_q *uq;
3061 	uint32_t flags;
3062 	int32_t state, oldstate;
3063 	int error, rv, q, count;
3064 
3065 	uq = td->td_umtxq;
3066 	error = fueword32(&rwlock->rw_flags, &flags);
3067 	if (error == -1)
3068 		return (EFAULT);
3069 	error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
3070 	if (error != 0)
3071 		return (error);
3072 
3073 	error = fueword32(&rwlock->rw_state, &state);
3074 	if (error == -1) {
3075 		error = EFAULT;
3076 		goto out;
3077 	}
3078 	if (state & URWLOCK_WRITE_OWNER) {
3079 		for (;;) {
3080 			rv = casueword32(&rwlock->rw_state, state,
3081 			    &oldstate, state & ~URWLOCK_WRITE_OWNER);
3082 			if (rv == -1) {
3083 				error = EFAULT;
3084 				goto out;
3085 			}
3086 			if (rv == 1) {
3087 				state = oldstate;
3088 				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
3089 					error = EPERM;
3090 					goto out;
3091 				}
3092 				error = thread_check_susp(td, true);
3093 				if (error != 0)
3094 					goto out;
3095 			} else
3096 				break;
3097 		}
3098 	} else if (URWLOCK_READER_COUNT(state) != 0) {
3099 		for (;;) {
3100 			rv = casueword32(&rwlock->rw_state, state,
3101 			    &oldstate, state - 1);
3102 			if (rv == -1) {
3103 				error = EFAULT;
3104 				goto out;
3105 			}
3106 			if (rv == 1) {
3107 				state = oldstate;
3108 				if (URWLOCK_READER_COUNT(oldstate) == 0) {
3109 					error = EPERM;
3110 					goto out;
3111 				}
3112 				error = thread_check_susp(td, true);
3113 				if (error != 0)
3114 					goto out;
3115 			} else
3116 				break;
3117 		}
3118 	} else {
3119 		error = EPERM;
3120 		goto out;
3121 	}
3122 
3123 	count = 0;
3124 
3125 	if (!(flags & URWLOCK_PREFER_READER)) {
3126 		if (state & URWLOCK_WRITE_WAITERS) {
3127 			count = 1;
3128 			q = UMTX_EXCLUSIVE_QUEUE;
3129 		} else if (state & URWLOCK_READ_WAITERS) {
3130 			count = INT_MAX;
3131 			q = UMTX_SHARED_QUEUE;
3132 		}
3133 	} else {
3134 		if (state & URWLOCK_READ_WAITERS) {
3135 			count = INT_MAX;
3136 			q = UMTX_SHARED_QUEUE;
3137 		} else if (state & URWLOCK_WRITE_WAITERS) {
3138 			count = 1;
3139 			q = UMTX_EXCLUSIVE_QUEUE;
3140 		}
3141 	}
3142 
3143 	if (count) {
3144 		umtxq_lock(&uq->uq_key);
3145 		umtxq_busy(&uq->uq_key);
3146 		umtxq_signal_queue(&uq->uq_key, count, q);
3147 		umtxq_unbusy(&uq->uq_key);
3148 		umtxq_unlock(&uq->uq_key);
3149 	}
3150 out:
3151 	umtx_key_release(&uq->uq_key);
3152 	return (error);
3153 }
3154 
3155 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3156 static int
3157 do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
3158 {
3159 	struct abs_timeout timo;
3160 	struct umtx_q *uq;
3161 	uint32_t flags, count, count1;
3162 	int error, rv, rv1;
3163 
3164 	uq = td->td_umtxq;
3165 	error = fueword32(&sem->_flags, &flags);
3166 	if (error == -1)
3167 		return (EFAULT);
3168 	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3169 	if (error != 0)
3170 		return (error);
3171 
3172 	if (timeout != NULL)
3173 		abs_timeout_init2(&timo, timeout);
3174 
3175 again:
3176 	umtxq_lock(&uq->uq_key);
3177 	umtxq_busy(&uq->uq_key);
3178 	umtxq_insert(uq);
3179 	umtxq_unlock(&uq->uq_key);
3180 	rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
3181 	if (rv == 0)
3182 		rv1 = fueword32(&sem->_count, &count);
3183 	if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) ||
3184 	    (rv == 1 && count1 == 0)) {
3185 		umtxq_lock(&uq->uq_key);
3186 		umtxq_unbusy(&uq->uq_key);
3187 		umtxq_remove(uq);
3188 		umtxq_unlock(&uq->uq_key);
3189 		if (rv == 1) {
3190 			rv = thread_check_susp(td, true);
3191 			if (rv == 0)
3192 				goto again;
3193 			error = rv;
3194 			goto out;
3195 		}
3196 		if (rv == 0)
3197 			rv = rv1;
3198 		error = rv == -1 ? EFAULT : 0;
3199 		goto out;
3200 	}
3201 	umtxq_lock(&uq->uq_key);
3202 	umtxq_unbusy(&uq->uq_key);
3203 
3204 	error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3205 
3206 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
3207 		error = 0;
3208 	else {
3209 		umtxq_remove(uq);
3210 		/* A relative timeout cannot be restarted. */
3211 		if (error == ERESTART && timeout != NULL &&
3212 		    (timeout->_flags & UMTX_ABSTIME) == 0)
3213 			error = EINTR;
3214 	}
3215 	umtxq_unlock(&uq->uq_key);
3216 out:
3217 	umtx_key_release(&uq->uq_key);
3218 	return (error);
3219 }
3220 
3221 /*
3222  * Signal a userland semaphore.
3223  */
3224 static int
3225 do_sem_wake(struct thread *td, struct _usem *sem)
3226 {
3227 	struct umtx_key key;
3228 	int error, cnt;
3229 	uint32_t flags;
3230 
3231 	error = fueword32(&sem->_flags, &flags);
3232 	if (error == -1)
3233 		return (EFAULT);
3234 	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3235 		return (error);
3236 	umtxq_lock(&key);
3237 	umtxq_busy(&key);
3238 	cnt = umtxq_count(&key);
3239 	if (cnt > 0) {
3240 		/*
3241 		 * Check if count is greater than 0, this means the memory is
3242 		 * still being referenced by user code, so we can safely
3243 		 * update _has_waiters flag.
3244 		 */
3245 		if (cnt == 1) {
3246 			umtxq_unlock(&key);
3247 			error = suword32(&sem->_has_waiters, 0);
3248 			umtxq_lock(&key);
3249 			if (error == -1)
3250 				error = EFAULT;
3251 		}
3252 		umtxq_signal(&key, 1);
3253 	}
3254 	umtxq_unbusy(&key);
3255 	umtxq_unlock(&key);
3256 	umtx_key_release(&key);
3257 	return (error);
3258 }
3259 #endif
3260 
3261 static int
3262 do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
3263 {
3264 	struct abs_timeout timo;
3265 	struct umtx_q *uq;
3266 	uint32_t count, flags;
3267 	int error, rv;
3268 
3269 	uq = td->td_umtxq;
3270 	flags = fuword32(&sem->_flags);
3271 	if (timeout != NULL)
3272 		abs_timeout_init2(&timo, timeout);
3273 
3274 again:
3275 	error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
3276 	if (error != 0)
3277 		return (error);
3278 	umtxq_lock(&uq->uq_key);
3279 	umtxq_busy(&uq->uq_key);
3280 	umtxq_insert(uq);
3281 	umtxq_unlock(&uq->uq_key);
3282 	rv = fueword32(&sem->_count, &count);
3283 	if (rv == -1) {
3284 		umtxq_lock(&uq->uq_key);
3285 		umtxq_unbusy(&uq->uq_key);
3286 		umtxq_remove(uq);
3287 		umtxq_unlock(&uq->uq_key);
3288 		umtx_key_release(&uq->uq_key);
3289 		return (EFAULT);
3290 	}
3291 	for (;;) {
3292 		if (USEM_COUNT(count) != 0) {
3293 			umtxq_lock(&uq->uq_key);
3294 			umtxq_unbusy(&uq->uq_key);
3295 			umtxq_remove(uq);
3296 			umtxq_unlock(&uq->uq_key);
3297 			umtx_key_release(&uq->uq_key);
3298 			return (0);
3299 		}
3300 		if (count == USEM_HAS_WAITERS)
3301 			break;
3302 		rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
3303 		if (rv == 0)
3304 			break;
3305 		umtxq_lock(&uq->uq_key);
3306 		umtxq_unbusy(&uq->uq_key);
3307 		umtxq_remove(uq);
3308 		umtxq_unlock(&uq->uq_key);
3309 		umtx_key_release(&uq->uq_key);
3310 		if (rv == -1)
3311 			return (EFAULT);
3312 		rv = thread_check_susp(td, true);
3313 		if (rv != 0)
3314 			return (rv);
3315 		goto again;
3316 	}
3317 	umtxq_lock(&uq->uq_key);
3318 	umtxq_unbusy(&uq->uq_key);
3319 
3320 	error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
3321 
3322 	if ((uq->uq_flags & UQF_UMTXQ) == 0)
3323 		error = 0;
3324 	else {
3325 		umtxq_remove(uq);
3326 		if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) {
3327 			/* A relative timeout cannot be restarted. */
3328 			if (error == ERESTART)
3329 				error = EINTR;
3330 			if (error == EINTR) {
3331 				abs_timeout_update(&timo);
3332 				timespecsub(&timo.end, &timo.cur,
3333 				    &timeout->_timeout);
3334 			}
3335 		}
3336 	}
3337 	umtxq_unlock(&uq->uq_key);
3338 	umtx_key_release(&uq->uq_key);
3339 	return (error);
3340 }
3341 
3342 /*
3343  * Signal a userland semaphore.
3344  */
3345 static int
3346 do_sem2_wake(struct thread *td, struct _usem2 *sem)
3347 {
3348 	struct umtx_key key;
3349 	int error, cnt, rv;
3350 	uint32_t count, flags;
3351 
3352 	rv = fueword32(&sem->_flags, &flags);
3353 	if (rv == -1)
3354 		return (EFAULT);
3355 	if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
3356 		return (error);
3357 	umtxq_lock(&key);
3358 	umtxq_busy(&key);
3359 	cnt = umtxq_count(&key);
3360 	if (cnt > 0) {
3361 		/*
3362 		 * If this was the last sleeping thread, clear the waiters
3363 		 * flag in _count.
3364 		 */
3365 		if (cnt == 1) {
3366 			umtxq_unlock(&key);
3367 			rv = fueword32(&sem->_count, &count);
3368 			while (rv != -1 && count & USEM_HAS_WAITERS) {
3369 				rv = casueword32(&sem->_count, count, &count,
3370 				    count & ~USEM_HAS_WAITERS);
3371 				if (rv == 1) {
3372 					rv = thread_check_susp(td, true);
3373 					if (rv != 0)
3374 						break;
3375 				}
3376 			}
3377 			if (rv == -1)
3378 				error = EFAULT;
3379 			else if (rv > 0) {
3380 				error = rv;
3381 			}
3382 			umtxq_lock(&key);
3383 		}
3384 
3385 		umtxq_signal(&key, 1);
3386 	}
3387 	umtxq_unbusy(&key);
3388 	umtxq_unlock(&key);
3389 	umtx_key_release(&key);
3390 	return (error);
3391 }
3392 
3393 inline int
3394 umtx_copyin_timeout(const void *uaddr, struct timespec *tsp)
3395 {
3396 	int error;
3397 
3398 	error = copyin(uaddr, tsp, sizeof(*tsp));
3399 	if (error == 0) {
3400 		if (tsp->tv_sec < 0 ||
3401 		    tsp->tv_nsec >= 1000000000 ||
3402 		    tsp->tv_nsec < 0)
3403 			error = EINVAL;
3404 	}
3405 	return (error);
3406 }
3407 
3408 static inline int
3409 umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp)
3410 {
3411 	int error;
3412 
3413 	if (size <= sizeof(tp->_timeout)) {
3414 		tp->_clockid = CLOCK_REALTIME;
3415 		tp->_flags = 0;
3416 		error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout));
3417 	} else
3418 		error = copyin(uaddr, tp, sizeof(*tp));
3419 	if (error != 0)
3420 		return (error);
3421 	if (tp->_timeout.tv_sec < 0 ||
3422 	    tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0)
3423 		return (EINVAL);
3424 	return (0);
3425 }
3426 
3427 static int
3428 umtx_copyin_robust_lists(const void *uaddr, size_t size,
3429     struct umtx_robust_lists_params *rb)
3430 {
3431 
3432 	if (size > sizeof(*rb))
3433 		return (EINVAL);
3434 	return (copyin(uaddr, rb, size));
3435 }
3436 
3437 static int
3438 umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp)
3439 {
3440 
3441 	/*
3442 	 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
3443 	 * and we're only called if sz >= sizeof(timespec) as supplied in the
3444 	 * copyops.
3445 	 */
3446 	KASSERT(sz >= sizeof(*tsp),
3447 	    ("umtx_copyops specifies incorrect sizes"));
3448 
3449 	return (copyout(tsp, uaddr, sizeof(*tsp)));
3450 }
3451 
3452 static int
3453 __umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap,
3454     const struct umtx_copyops *ops __unused)
3455 {
3456 
3457 	return (EOPNOTSUPP);
3458 }
3459 
3460 static int
3461 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap,
3462     const struct umtx_copyops *ops)
3463 {
3464 	struct _umtx_time timeout, *tm_p;
3465 	int error;
3466 
3467 	if (uap->uaddr2 == NULL)
3468 		tm_p = NULL;
3469 	else {
3470 		error = ops->copyin_umtx_time(
3471 		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3472 		if (error != 0)
3473 			return (error);
3474 		tm_p = &timeout;
3475 	}
3476 	return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0));
3477 }
3478 
3479 static int
3480 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap,
3481     const struct umtx_copyops *ops)
3482 {
3483 	struct _umtx_time timeout, *tm_p;
3484 	int error;
3485 
3486 	if (uap->uaddr2 == NULL)
3487 		tm_p = NULL;
3488 	else {
3489 		error = ops->copyin_umtx_time(
3490 		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3491 		if (error != 0)
3492 			return (error);
3493 		tm_p = &timeout;
3494 	}
3495 	return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0));
3496 }
3497 
3498 static int
3499 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap,
3500     const struct umtx_copyops *ops)
3501 {
3502 	struct _umtx_time *tm_p, timeout;
3503 	int error;
3504 
3505 	if (uap->uaddr2 == NULL)
3506 		tm_p = NULL;
3507 	else {
3508 		error = ops->copyin_umtx_time(
3509 		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3510 		if (error != 0)
3511 			return (error);
3512 		tm_p = &timeout;
3513 	}
3514 	return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1));
3515 }
3516 
3517 static int
3518 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap,
3519     const struct umtx_copyops *ops __unused)
3520 {
3521 
3522 	return (kern_umtx_wake(td, uap->obj, uap->val, 0));
3523 }
3524 
3525 #define BATCH_SIZE	128
3526 static int
3527 __umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap)
3528 {
3529 	char *uaddrs[BATCH_SIZE], **upp;
3530 	int count, error, i, pos, tocopy;
3531 
3532 	upp = (char **)uap->obj;
3533 	error = 0;
3534 	for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3535 	    pos += tocopy) {
3536 		tocopy = MIN(count, BATCH_SIZE);
3537 		error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *));
3538 		if (error != 0)
3539 			break;
3540 		for (i = 0; i < tocopy; ++i) {
3541 			kern_umtx_wake(td, uaddrs[i], INT_MAX, 1);
3542 		}
3543 		maybe_yield();
3544 	}
3545 	return (error);
3546 }
3547 
3548 static int
3549 __umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3550 {
3551 	uint32_t uaddrs[BATCH_SIZE], *upp;
3552 	int count, error, i, pos, tocopy;
3553 
3554 	upp = (uint32_t *)uap->obj;
3555 	error = 0;
3556 	for (count = uap->val, pos = 0; count > 0; count -= tocopy,
3557 	    pos += tocopy) {
3558 		tocopy = MIN(count, BATCH_SIZE);
3559 		error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t));
3560 		if (error != 0)
3561 			break;
3562 		for (i = 0; i < tocopy; ++i) {
3563 			kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i],
3564 			    INT_MAX, 1);
3565 		}
3566 		maybe_yield();
3567 	}
3568 	return (error);
3569 }
3570 
3571 static int
3572 __umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap,
3573     const struct umtx_copyops *ops)
3574 {
3575 
3576 	if (ops->compat32)
3577 		return (__umtx_op_nwake_private_compat32(td, uap));
3578 	return (__umtx_op_nwake_private_native(td, uap));
3579 }
3580 
3581 static int
3582 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap,
3583     const struct umtx_copyops *ops __unused)
3584 {
3585 
3586 	return (kern_umtx_wake(td, uap->obj, uap->val, 1));
3587 }
3588 
3589 static int
3590 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap,
3591    const struct umtx_copyops *ops)
3592 {
3593 	struct _umtx_time *tm_p, timeout;
3594 	int error;
3595 
3596 	/* Allow a null timespec (wait forever). */
3597 	if (uap->uaddr2 == NULL)
3598 		tm_p = NULL;
3599 	else {
3600 		error = ops->copyin_umtx_time(
3601 		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3602 		if (error != 0)
3603 			return (error);
3604 		tm_p = &timeout;
3605 	}
3606 	return (do_lock_umutex(td, uap->obj, tm_p, 0));
3607 }
3608 
3609 static int
3610 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap,
3611     const struct umtx_copyops *ops __unused)
3612 {
3613 
3614 	return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY));
3615 }
3616 
3617 static int
3618 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap,
3619     const struct umtx_copyops *ops)
3620 {
3621 	struct _umtx_time *tm_p, timeout;
3622 	int error;
3623 
3624 	/* Allow a null timespec (wait forever). */
3625 	if (uap->uaddr2 == NULL)
3626 		tm_p = NULL;
3627 	else {
3628 		error = ops->copyin_umtx_time(
3629 		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3630 		if (error != 0)
3631 			return (error);
3632 		tm_p = &timeout;
3633 	}
3634 	return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT));
3635 }
3636 
3637 static int
3638 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap,
3639     const struct umtx_copyops *ops __unused)
3640 {
3641 
3642 	return (do_wake_umutex(td, uap->obj));
3643 }
3644 
3645 static int
3646 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap,
3647     const struct umtx_copyops *ops __unused)
3648 {
3649 
3650 	return (do_unlock_umutex(td, uap->obj, false));
3651 }
3652 
3653 static int
3654 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap,
3655     const struct umtx_copyops *ops __unused)
3656 {
3657 
3658 	return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1));
3659 }
3660 
3661 static int
3662 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap,
3663     const struct umtx_copyops *ops)
3664 {
3665 	struct timespec *ts, timeout;
3666 	int error;
3667 
3668 	/* Allow a null timespec (wait forever). */
3669 	if (uap->uaddr2 == NULL)
3670 		ts = NULL;
3671 	else {
3672 		error = ops->copyin_timeout(uap->uaddr2, &timeout);
3673 		if (error != 0)
3674 			return (error);
3675 		ts = &timeout;
3676 	}
3677 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3678 }
3679 
3680 static int
3681 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap,
3682     const struct umtx_copyops *ops __unused)
3683 {
3684 
3685 	return (do_cv_signal(td, uap->obj));
3686 }
3687 
3688 static int
3689 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap,
3690     const struct umtx_copyops *ops __unused)
3691 {
3692 
3693 	return (do_cv_broadcast(td, uap->obj));
3694 }
3695 
3696 static int
3697 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap,
3698     const struct umtx_copyops *ops)
3699 {
3700 	struct _umtx_time timeout;
3701 	int error;
3702 
3703 	/* Allow a null timespec (wait forever). */
3704 	if (uap->uaddr2 == NULL) {
3705 		error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3706 	} else {
3707 		error = ops->copyin_umtx_time(uap->uaddr2,
3708 		   (size_t)uap->uaddr1, &timeout);
3709 		if (error != 0)
3710 			return (error);
3711 		error = do_rw_rdlock(td, uap->obj, uap->val, &timeout);
3712 	}
3713 	return (error);
3714 }
3715 
3716 static int
3717 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap,
3718     const struct umtx_copyops *ops)
3719 {
3720 	struct _umtx_time timeout;
3721 	int error;
3722 
3723 	/* Allow a null timespec (wait forever). */
3724 	if (uap->uaddr2 == NULL) {
3725 		error = do_rw_wrlock(td, uap->obj, 0);
3726 	} else {
3727 		error = ops->copyin_umtx_time(uap->uaddr2,
3728 		   (size_t)uap->uaddr1, &timeout);
3729 		if (error != 0)
3730 			return (error);
3731 
3732 		error = do_rw_wrlock(td, uap->obj, &timeout);
3733 	}
3734 	return (error);
3735 }
3736 
3737 static int
3738 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap,
3739     const struct umtx_copyops *ops __unused)
3740 {
3741 
3742 	return (do_rw_unlock(td, uap->obj));
3743 }
3744 
3745 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
3746 static int
3747 __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap,
3748     const struct umtx_copyops *ops)
3749 {
3750 	struct _umtx_time *tm_p, timeout;
3751 	int error;
3752 
3753 	/* Allow a null timespec (wait forever). */
3754 	if (uap->uaddr2 == NULL)
3755 		tm_p = NULL;
3756 	else {
3757 		error = ops->copyin_umtx_time(
3758 		    uap->uaddr2, (size_t)uap->uaddr1, &timeout);
3759 		if (error != 0)
3760 			return (error);
3761 		tm_p = &timeout;
3762 	}
3763 	return (do_sem_wait(td, uap->obj, tm_p));
3764 }
3765 
3766 static int
3767 __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap,
3768     const struct umtx_copyops *ops __unused)
3769 {
3770 
3771 	return (do_sem_wake(td, uap->obj));
3772 }
3773 #endif
3774 
3775 static int
3776 __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap,
3777     const struct umtx_copyops *ops __unused)
3778 {
3779 
3780 	return (do_wake2_umutex(td, uap->obj, uap->val));
3781 }
3782 
3783 static int
3784 __umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap,
3785     const struct umtx_copyops *ops)
3786 {
3787 	struct _umtx_time *tm_p, timeout;
3788 	size_t uasize;
3789 	int error;
3790 
3791 	/* Allow a null timespec (wait forever). */
3792 	if (uap->uaddr2 == NULL) {
3793 		uasize = 0;
3794 		tm_p = NULL;
3795 	} else {
3796 		uasize = (size_t)uap->uaddr1;
3797 		error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout);
3798 		if (error != 0)
3799 			return (error);
3800 		tm_p = &timeout;
3801 	}
3802 	error = do_sem2_wait(td, uap->obj, tm_p);
3803 	if (error == EINTR && uap->uaddr2 != NULL &&
3804 	    (timeout._flags & UMTX_ABSTIME) == 0 &&
3805 	    uasize >= ops->umtx_time_sz + ops->timespec_sz) {
3806 		error = ops->copyout_timeout(
3807 		    (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz),
3808 		    uasize - ops->umtx_time_sz, &timeout._timeout);
3809 		if (error == 0) {
3810 			error = EINTR;
3811 		}
3812 	}
3813 
3814 	return (error);
3815 }
3816 
3817 static int
3818 __umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap,
3819     const struct umtx_copyops *ops __unused)
3820 {
3821 
3822 	return (do_sem2_wake(td, uap->obj));
3823 }
3824 
3825 #define	USHM_OBJ_UMTX(o)						\
3826     ((struct umtx_shm_obj_list *)(&(o)->umtx_data))
3827 
3828 #define	USHMF_REG_LINKED	0x0001
3829 #define	USHMF_OBJ_LINKED	0x0002
3830 struct umtx_shm_reg {
3831 	TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link;
3832 	LIST_ENTRY(umtx_shm_reg) ushm_obj_link;
3833 	struct umtx_key		ushm_key;
3834 	struct ucred		*ushm_cred;
3835 	struct shmfd		*ushm_obj;
3836 	u_int			ushm_refcnt;
3837 	u_int			ushm_flags;
3838 };
3839 
3840 LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg);
3841 TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg);
3842 
3843 static uma_zone_t umtx_shm_reg_zone;
3844 static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS];
3845 static struct mtx umtx_shm_lock;
3846 static struct umtx_shm_reg_head umtx_shm_reg_delfree =
3847     TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree);
3848 
3849 static void umtx_shm_free_reg(struct umtx_shm_reg *reg);
3850 
3851 static void
3852 umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused)
3853 {
3854 	struct umtx_shm_reg_head d;
3855 	struct umtx_shm_reg *reg, *reg1;
3856 
3857 	TAILQ_INIT(&d);
3858 	mtx_lock(&umtx_shm_lock);
3859 	TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link);
3860 	mtx_unlock(&umtx_shm_lock);
3861 	TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) {
3862 		TAILQ_REMOVE(&d, reg, ushm_reg_link);
3863 		umtx_shm_free_reg(reg);
3864 	}
3865 }
3866 
3867 static struct task umtx_shm_reg_delfree_task =
3868     TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL);
3869 
3870 static struct umtx_shm_reg *
3871 umtx_shm_find_reg_locked(const struct umtx_key *key)
3872 {
3873 	struct umtx_shm_reg *reg;
3874 	struct umtx_shm_reg_head *reg_head;
3875 
3876 	KASSERT(key->shared, ("umtx_p_find_rg: private key"));
3877 	mtx_assert(&umtx_shm_lock, MA_OWNED);
3878 	reg_head = &umtx_shm_registry[key->hash];
3879 	TAILQ_FOREACH(reg, reg_head, ushm_reg_link) {
3880 		KASSERT(reg->ushm_key.shared,
3881 		    ("non-shared key on reg %p %d", reg, reg->ushm_key.shared));
3882 		if (reg->ushm_key.info.shared.object ==
3883 		    key->info.shared.object &&
3884 		    reg->ushm_key.info.shared.offset ==
3885 		    key->info.shared.offset) {
3886 			KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM"));
3887 			KASSERT(reg->ushm_refcnt > 0,
3888 			    ("reg %p refcnt 0 onlist", reg));
3889 			KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0,
3890 			    ("reg %p not linked", reg));
3891 			reg->ushm_refcnt++;
3892 			return (reg);
3893 		}
3894 	}
3895 	return (NULL);
3896 }
3897 
3898 static struct umtx_shm_reg *
3899 umtx_shm_find_reg(const struct umtx_key *key)
3900 {
3901 	struct umtx_shm_reg *reg;
3902 
3903 	mtx_lock(&umtx_shm_lock);
3904 	reg = umtx_shm_find_reg_locked(key);
3905 	mtx_unlock(&umtx_shm_lock);
3906 	return (reg);
3907 }
3908 
3909 static void
3910 umtx_shm_free_reg(struct umtx_shm_reg *reg)
3911 {
3912 
3913 	chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0);
3914 	crfree(reg->ushm_cred);
3915 	shm_drop(reg->ushm_obj);
3916 	uma_zfree(umtx_shm_reg_zone, reg);
3917 }
3918 
3919 static bool
3920 umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force)
3921 {
3922 	bool res;
3923 
3924 	mtx_assert(&umtx_shm_lock, MA_OWNED);
3925 	KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg));
3926 	reg->ushm_refcnt--;
3927 	res = reg->ushm_refcnt == 0;
3928 	if (res || force) {
3929 		if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) {
3930 			TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash],
3931 			    reg, ushm_reg_link);
3932 			reg->ushm_flags &= ~USHMF_REG_LINKED;
3933 		}
3934 		if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) {
3935 			LIST_REMOVE(reg, ushm_obj_link);
3936 			reg->ushm_flags &= ~USHMF_OBJ_LINKED;
3937 		}
3938 	}
3939 	return (res);
3940 }
3941 
3942 static void
3943 umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force)
3944 {
3945 	vm_object_t object;
3946 	bool dofree;
3947 
3948 	if (force) {
3949 		object = reg->ushm_obj->shm_object;
3950 		VM_OBJECT_WLOCK(object);
3951 		object->flags |= OBJ_UMTXDEAD;
3952 		VM_OBJECT_WUNLOCK(object);
3953 	}
3954 	mtx_lock(&umtx_shm_lock);
3955 	dofree = umtx_shm_unref_reg_locked(reg, force);
3956 	mtx_unlock(&umtx_shm_lock);
3957 	if (dofree)
3958 		umtx_shm_free_reg(reg);
3959 }
3960 
3961 void
3962 umtx_shm_object_init(vm_object_t object)
3963 {
3964 
3965 	LIST_INIT(USHM_OBJ_UMTX(object));
3966 }
3967 
3968 void
3969 umtx_shm_object_terminated(vm_object_t object)
3970 {
3971 	struct umtx_shm_reg *reg, *reg1;
3972 	bool dofree;
3973 
3974 	if (LIST_EMPTY(USHM_OBJ_UMTX(object)))
3975 		return;
3976 
3977 	dofree = false;
3978 	mtx_lock(&umtx_shm_lock);
3979 	LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) {
3980 		if (umtx_shm_unref_reg_locked(reg, true)) {
3981 			TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg,
3982 			    ushm_reg_link);
3983 			dofree = true;
3984 		}
3985 	}
3986 	mtx_unlock(&umtx_shm_lock);
3987 	if (dofree)
3988 		taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task);
3989 }
3990 
3991 static int
3992 umtx_shm_create_reg(struct thread *td, const struct umtx_key *key,
3993     struct umtx_shm_reg **res)
3994 {
3995 	struct umtx_shm_reg *reg, *reg1;
3996 	struct ucred *cred;
3997 	int error;
3998 
3999 	reg = umtx_shm_find_reg(key);
4000 	if (reg != NULL) {
4001 		*res = reg;
4002 		return (0);
4003 	}
4004 	cred = td->td_ucred;
4005 	if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP)))
4006 		return (ENOMEM);
4007 	reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO);
4008 	reg->ushm_refcnt = 1;
4009 	bcopy(key, &reg->ushm_key, sizeof(*key));
4010 	reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false);
4011 	reg->ushm_cred = crhold(cred);
4012 	error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE);
4013 	if (error != 0) {
4014 		umtx_shm_free_reg(reg);
4015 		return (error);
4016 	}
4017 	mtx_lock(&umtx_shm_lock);
4018 	reg1 = umtx_shm_find_reg_locked(key);
4019 	if (reg1 != NULL) {
4020 		mtx_unlock(&umtx_shm_lock);
4021 		umtx_shm_free_reg(reg);
4022 		*res = reg1;
4023 		return (0);
4024 	}
4025 	reg->ushm_refcnt++;
4026 	TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link);
4027 	LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg,
4028 	    ushm_obj_link);
4029 	reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED;
4030 	mtx_unlock(&umtx_shm_lock);
4031 	*res = reg;
4032 	return (0);
4033 }
4034 
4035 static int
4036 umtx_shm_alive(struct thread *td, void *addr)
4037 {
4038 	vm_map_t map;
4039 	vm_map_entry_t entry;
4040 	vm_object_t object;
4041 	vm_pindex_t pindex;
4042 	vm_prot_t prot;
4043 	int res, ret;
4044 	boolean_t wired;
4045 
4046 	map = &td->td_proc->p_vmspace->vm_map;
4047 	res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry,
4048 	    &object, &pindex, &prot, &wired);
4049 	if (res != KERN_SUCCESS)
4050 		return (EFAULT);
4051 	if (object == NULL)
4052 		ret = EINVAL;
4053 	else
4054 		ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0;
4055 	vm_map_lookup_done(map, entry);
4056 	return (ret);
4057 }
4058 
4059 static void
4060 umtx_shm_init(void)
4061 {
4062 	int i;
4063 
4064 	umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg),
4065 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
4066 	mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF);
4067 	for (i = 0; i < nitems(umtx_shm_registry); i++)
4068 		TAILQ_INIT(&umtx_shm_registry[i]);
4069 }
4070 
4071 static int
4072 umtx_shm(struct thread *td, void *addr, u_int flags)
4073 {
4074 	struct umtx_key key;
4075 	struct umtx_shm_reg *reg;
4076 	struct file *fp;
4077 	int error, fd;
4078 
4079 	if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP |
4080 	    UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1)
4081 		return (EINVAL);
4082 	if ((flags & UMTX_SHM_ALIVE) != 0)
4083 		return (umtx_shm_alive(td, addr));
4084 	error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key);
4085 	if (error != 0)
4086 		return (error);
4087 	KASSERT(key.shared == 1, ("non-shared key"));
4088 	if ((flags & UMTX_SHM_CREAT) != 0) {
4089 		error = umtx_shm_create_reg(td, &key, &reg);
4090 	} else {
4091 		reg = umtx_shm_find_reg(&key);
4092 		if (reg == NULL)
4093 			error = ESRCH;
4094 	}
4095 	umtx_key_release(&key);
4096 	if (error != 0)
4097 		return (error);
4098 	KASSERT(reg != NULL, ("no reg"));
4099 	if ((flags & UMTX_SHM_DESTROY) != 0) {
4100 		umtx_shm_unref_reg(reg, true);
4101 	} else {
4102 #if 0
4103 #ifdef MAC
4104 		error = mac_posixshm_check_open(td->td_ucred,
4105 		    reg->ushm_obj, FFLAGS(O_RDWR));
4106 		if (error == 0)
4107 #endif
4108 			error = shm_access(reg->ushm_obj, td->td_ucred,
4109 			    FFLAGS(O_RDWR));
4110 		if (error == 0)
4111 #endif
4112 			error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL);
4113 		if (error == 0) {
4114 			shm_hold(reg->ushm_obj);
4115 			finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj,
4116 			    &shm_ops);
4117 			td->td_retval[0] = fd;
4118 			fdrop(fp, td);
4119 		}
4120 	}
4121 	umtx_shm_unref_reg(reg, false);
4122 	return (error);
4123 }
4124 
4125 static int
4126 __umtx_op_shm(struct thread *td, struct _umtx_op_args *uap,
4127     const struct umtx_copyops *ops __unused)
4128 {
4129 
4130 	return (umtx_shm(td, uap->uaddr1, uap->val));
4131 }
4132 
4133 static int
4134 __umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap,
4135     const struct umtx_copyops *ops)
4136 {
4137 	struct umtx_robust_lists_params rb;
4138 	int error;
4139 
4140 	bzero(&rb, sizeof(rb));
4141 	error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb);
4142 	if (error != 0)
4143 		return (error);
4144 
4145 	if (ops->compat32)
4146 		td->td_pflags2 |= TDP2_COMPAT32RB;
4147 	else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0)
4148 		return (EINVAL);
4149 
4150 	td->td_rb_list = rb.robust_list_offset;
4151 	td->td_rbp_list = rb.robust_priv_list_offset;
4152 	td->td_rb_inact = rb.robust_inact_offset;
4153 	return (0);
4154 }
4155 
4156 #ifdef COMPAT_FREEBSD32
4157 static inline int
4158 umtx_copyin_timeout32(const void *uaddr, struct timespec *tsp)
4159 {
4160 	struct timespec32 ts32;
4161 	int error;
4162 
4163 	error = copyin(uaddr, &ts32, sizeof(ts32));
4164 	if (error == 0) {
4165 		if (ts32.tv_sec < 0 ||
4166 		    ts32.tv_nsec >= 1000000000 ||
4167 		    ts32.tv_nsec < 0)
4168 			error = EINVAL;
4169 		else {
4170 			CP(ts32, *tsp, tv_sec);
4171 			CP(ts32, *tsp, tv_nsec);
4172 		}
4173 	}
4174 	return (error);
4175 }
4176 
4177 static inline int
4178 umtx_copyin_umtx_time32(const void *uaddr, size_t size, struct _umtx_time *tp)
4179 {
4180 	struct umtx_time32 t32;
4181 	int error;
4182 
4183 	t32._clockid = CLOCK_REALTIME;
4184 	t32._flags   = 0;
4185 	if (size <= sizeof(t32._timeout))
4186 		error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout));
4187 	else
4188 		error = copyin(uaddr, &t32, sizeof(t32));
4189 	if (error != 0)
4190 		return (error);
4191 	if (t32._timeout.tv_sec < 0 ||
4192 	    t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0)
4193 		return (EINVAL);
4194 	TS_CP(t32, *tp, _timeout);
4195 	CP(t32, *tp, _flags);
4196 	CP(t32, *tp, _clockid);
4197 	return (0);
4198 }
4199 
4200 static int
4201 umtx_copyin_robust_lists32(const void *uaddr, size_t size,
4202     struct umtx_robust_lists_params *rbp)
4203 {
4204 	struct umtx_robust_lists_params_compat32 rb32;
4205 	int error;
4206 
4207 	if (size > sizeof(rb32))
4208 		return (EINVAL);
4209 	bzero(&rb32, sizeof(rb32));
4210 	error = copyin(uaddr, &rb32, size);
4211 	if (error != 0)
4212 		return (error);
4213 	CP(rb32, *rbp, robust_list_offset);
4214 	CP(rb32, *rbp, robust_priv_list_offset);
4215 	CP(rb32, *rbp, robust_inact_offset);
4216 	return (0);
4217 }
4218 
4219 static int
4220 umtx_copyout_timeout32(void *uaddr, size_t sz, struct timespec *tsp)
4221 {
4222 	struct timespec32 remain32 = {
4223 		.tv_sec = tsp->tv_sec,
4224 		.tv_nsec = tsp->tv_nsec,
4225 	};
4226 
4227 	/*
4228 	 * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time)
4229 	 * and we're only called if sz >= sizeof(timespec) as supplied in the
4230 	 * copyops.
4231 	 */
4232 	KASSERT(sz >= sizeof(remain32),
4233 	    ("umtx_copyops specifies incorrect sizes"));
4234 
4235 	return (copyout(&remain32, uaddr, sizeof(remain32)));
4236 }
4237 #endif /* COMPAT_FREEBSD32 */
4238 
4239 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap,
4240     const struct umtx_copyops *umtx_ops);
4241 
4242 static const _umtx_op_func op_table[] = {
4243 	[UMTX_OP_RESERVED0]	= __umtx_op_unimpl,
4244 	[UMTX_OP_RESERVED1]	= __umtx_op_unimpl,
4245 	[UMTX_OP_WAIT]		= __umtx_op_wait,
4246 	[UMTX_OP_WAKE]		= __umtx_op_wake,
4247 	[UMTX_OP_MUTEX_TRYLOCK]	= __umtx_op_trylock_umutex,
4248 	[UMTX_OP_MUTEX_LOCK]	= __umtx_op_lock_umutex,
4249 	[UMTX_OP_MUTEX_UNLOCK]	= __umtx_op_unlock_umutex,
4250 	[UMTX_OP_SET_CEILING]	= __umtx_op_set_ceiling,
4251 	[UMTX_OP_CV_WAIT]	= __umtx_op_cv_wait,
4252 	[UMTX_OP_CV_SIGNAL]	= __umtx_op_cv_signal,
4253 	[UMTX_OP_CV_BROADCAST]	= __umtx_op_cv_broadcast,
4254 	[UMTX_OP_WAIT_UINT]	= __umtx_op_wait_uint,
4255 	[UMTX_OP_RW_RDLOCK]	= __umtx_op_rw_rdlock,
4256 	[UMTX_OP_RW_WRLOCK]	= __umtx_op_rw_wrlock,
4257 	[UMTX_OP_RW_UNLOCK]	= __umtx_op_rw_unlock,
4258 	[UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private,
4259 	[UMTX_OP_WAKE_PRIVATE]	= __umtx_op_wake_private,
4260 	[UMTX_OP_MUTEX_WAIT]	= __umtx_op_wait_umutex,
4261 	[UMTX_OP_MUTEX_WAKE]	= __umtx_op_wake_umutex,
4262 #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
4263 	[UMTX_OP_SEM_WAIT]	= __umtx_op_sem_wait,
4264 	[UMTX_OP_SEM_WAKE]	= __umtx_op_sem_wake,
4265 #else
4266 	[UMTX_OP_SEM_WAIT]	= __umtx_op_unimpl,
4267 	[UMTX_OP_SEM_WAKE]	= __umtx_op_unimpl,
4268 #endif
4269 	[UMTX_OP_NWAKE_PRIVATE]	= __umtx_op_nwake_private,
4270 	[UMTX_OP_MUTEX_WAKE2]	= __umtx_op_wake2_umutex,
4271 	[UMTX_OP_SEM2_WAIT]	= __umtx_op_sem2_wait,
4272 	[UMTX_OP_SEM2_WAKE]	= __umtx_op_sem2_wake,
4273 	[UMTX_OP_SHM]		= __umtx_op_shm,
4274 	[UMTX_OP_ROBUST_LISTS]	= __umtx_op_robust_lists,
4275 };
4276 
4277 static const struct umtx_copyops umtx_native_ops = {
4278 	.copyin_timeout = umtx_copyin_timeout,
4279 	.copyin_umtx_time = umtx_copyin_umtx_time,
4280 	.copyin_robust_lists = umtx_copyin_robust_lists,
4281 	.copyout_timeout = umtx_copyout_timeout,
4282 	.timespec_sz = sizeof(struct timespec),
4283 	.umtx_time_sz = sizeof(struct _umtx_time),
4284 };
4285 
4286 #ifdef COMPAT_FREEBSD32
4287 const struct umtx_copyops umtx_native_ops32 = {
4288 	.copyin_timeout = umtx_copyin_timeout32,
4289 	.copyin_umtx_time = umtx_copyin_umtx_time32,
4290 	.copyin_robust_lists = umtx_copyin_robust_lists32,
4291 	.copyout_timeout = umtx_copyout_timeout32,
4292 	.timespec_sz = sizeof(struct timespec32),
4293 	.umtx_time_sz = sizeof(struct umtx_time32),
4294 	.compat32 = true,
4295 };
4296 #endif
4297 
4298 int
4299 kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val,
4300     void *uaddr1, void *uaddr2, const struct umtx_copyops *ops)
4301 {
4302 	struct _umtx_op_args uap = {
4303 		.obj = obj,
4304 		.op = op,
4305 		.val = val,
4306 		.uaddr1 = uaddr1,
4307 		.uaddr2 = uaddr2
4308 	};
4309 
4310 	if ((uap.op >= nitems(op_table)))
4311 		return (EINVAL);
4312 	return ((*op_table[uap.op])(td, &uap, ops));
4313 }
4314 
4315 int
4316 sys__umtx_op(struct thread *td, struct _umtx_op_args *uap)
4317 {
4318 
4319 	return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1,
4320 	    uap->uaddr2, &umtx_native_ops));
4321 }
4322 
4323 void
4324 umtx_thread_init(struct thread *td)
4325 {
4326 
4327 	td->td_umtxq = umtxq_alloc();
4328 	td->td_umtxq->uq_thread = td;
4329 }
4330 
4331 void
4332 umtx_thread_fini(struct thread *td)
4333 {
4334 
4335 	umtxq_free(td->td_umtxq);
4336 }
4337 
4338 /*
4339  * It will be called when new thread is created, e.g fork().
4340  */
4341 void
4342 umtx_thread_alloc(struct thread *td)
4343 {
4344 	struct umtx_q *uq;
4345 
4346 	uq = td->td_umtxq;
4347 	uq->uq_inherited_pri = PRI_MAX;
4348 
4349 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
4350 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
4351 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
4352 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
4353 }
4354 
4355 /*
4356  * exec() hook.
4357  *
4358  * Clear robust lists for all process' threads, not delaying the
4359  * cleanup to thread_exit hook, since the relevant address space is
4360  * destroyed right now.
4361  */
4362 static void
4363 umtx_exec_hook(void *arg __unused, struct proc *p,
4364     struct image_params *imgp __unused)
4365 {
4366 	struct thread *td;
4367 
4368 	KASSERT(p == curproc, ("need curproc"));
4369 	KASSERT((p->p_flag & P_HADTHREADS) == 0 ||
4370 	    (p->p_flag & P_STOPPED_SINGLE) != 0,
4371 	    ("curproc must be single-threaded"));
4372 	/*
4373 	 * There is no need to lock the list as only this thread can be
4374 	 * running.
4375 	 */
4376 	FOREACH_THREAD_IN_PROC(p, td) {
4377 		KASSERT(td == curthread ||
4378 		    ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)),
4379 		    ("running thread %p %p", p, td));
4380 		umtx_thread_cleanup(td);
4381 		td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0;
4382 	}
4383 }
4384 
4385 /*
4386  * thread_exit() hook.
4387  */
4388 void
4389 umtx_thread_exit(struct thread *td)
4390 {
4391 
4392 	umtx_thread_cleanup(td);
4393 }
4394 
4395 static int
4396 umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32)
4397 {
4398 	u_long res1;
4399 #ifdef COMPAT_FREEBSD32
4400 	uint32_t res32;
4401 #endif
4402 	int error;
4403 
4404 #ifdef COMPAT_FREEBSD32
4405 	if (compat32) {
4406 		error = fueword32((void *)ptr, &res32);
4407 		if (error == 0)
4408 			res1 = res32;
4409 	} else
4410 #endif
4411 	{
4412 		error = fueword((void *)ptr, &res1);
4413 	}
4414 	if (error == 0)
4415 		*res = res1;
4416 	else
4417 		error = EFAULT;
4418 	return (error);
4419 }
4420 
4421 static void
4422 umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list,
4423     bool compat32)
4424 {
4425 #ifdef COMPAT_FREEBSD32
4426 	struct umutex32 m32;
4427 
4428 	if (compat32) {
4429 		memcpy(&m32, m, sizeof(m32));
4430 		*rb_list = m32.m_rb_lnk;
4431 	} else
4432 #endif
4433 		*rb_list = m->m_rb_lnk;
4434 }
4435 
4436 static int
4437 umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact,
4438     bool compat32)
4439 {
4440 	struct umutex m;
4441 	int error;
4442 
4443 	KASSERT(td->td_proc == curproc, ("need current vmspace"));
4444 	error = copyin((void *)rbp, &m, sizeof(m));
4445 	if (error != 0)
4446 		return (error);
4447 	if (rb_list != NULL)
4448 		umtx_read_rb_list(td, &m, rb_list, compat32);
4449 	if ((m.m_flags & UMUTEX_ROBUST) == 0)
4450 		return (EINVAL);
4451 	if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid)
4452 		/* inact is cleared after unlock, allow the inconsistency */
4453 		return (inact ? 0 : EINVAL);
4454 	return (do_unlock_umutex(td, (struct umutex *)rbp, true));
4455 }
4456 
4457 static void
4458 umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact,
4459     const char *name, bool compat32)
4460 {
4461 	int error, i;
4462 	uintptr_t rbp;
4463 	bool inact;
4464 
4465 	if (rb_list == 0)
4466 		return;
4467 	error = umtx_read_uptr(td, rb_list, &rbp, compat32);
4468 	for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) {
4469 		if (rbp == *rb_inact) {
4470 			inact = true;
4471 			*rb_inact = 0;
4472 		} else
4473 			inact = false;
4474 		error = umtx_handle_rb(td, rbp, &rbp, inact, compat32);
4475 	}
4476 	if (i == umtx_max_rb && umtx_verbose_rb) {
4477 		uprintf("comm %s pid %d: reached umtx %smax rb %d\n",
4478 		    td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb);
4479 	}
4480 	if (error != 0 && umtx_verbose_rb) {
4481 		uprintf("comm %s pid %d: handling %srb error %d\n",
4482 		    td->td_proc->p_comm, td->td_proc->p_pid, name, error);
4483 	}
4484 }
4485 
4486 /*
4487  * Clean up umtx data.
4488  */
4489 static void
4490 umtx_thread_cleanup(struct thread *td)
4491 {
4492 	struct umtx_q *uq;
4493 	struct umtx_pi *pi;
4494 	uintptr_t rb_inact;
4495 	bool compat32;
4496 
4497 	/*
4498 	 * Disown pi mutexes.
4499 	 */
4500 	uq = td->td_umtxq;
4501 	if (uq != NULL) {
4502 		if (uq->uq_inherited_pri != PRI_MAX ||
4503 		    !TAILQ_EMPTY(&uq->uq_pi_contested)) {
4504 			mtx_lock(&umtx_lock);
4505 			uq->uq_inherited_pri = PRI_MAX;
4506 			while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
4507 				pi->pi_owner = NULL;
4508 				TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
4509 			}
4510 			mtx_unlock(&umtx_lock);
4511 		}
4512 		sched_lend_user_prio_cond(td, PRI_MAX);
4513 	}
4514 
4515 	compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0;
4516 	td->td_pflags2 &= ~TDP2_COMPAT32RB;
4517 
4518 	if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0)
4519 		return;
4520 
4521 	/*
4522 	 * Handle terminated robust mutexes.  Must be done after
4523 	 * robust pi disown, otherwise unlock could see unowned
4524 	 * entries.
4525 	 */
4526 	rb_inact = td->td_rb_inact;
4527 	if (rb_inact != 0)
4528 		(void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32);
4529 	umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32);
4530 	umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32);
4531 	if (rb_inact != 0)
4532 		(void)umtx_handle_rb(td, rb_inact, NULL, true, compat32);
4533 }
4534