xref: /freebsd/sys/kern/kern_umtx.c (revision 9336e0699bda8a301cd2bfa37106b6ec5e32012e)
1 /*-
2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54 
55 #include <machine/cpu.h>
56 
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60 
61 #define TYPE_SIMPLE_LOCK	0
62 #define TYPE_SIMPLE_WAIT	1
63 #define TYPE_NORMAL_UMUTEX	2
64 #define TYPE_PI_UMUTEX		3
65 #define TYPE_PP_UMUTEX		4
66 #define TYPE_CV			5
67 
68 /* Key to represent a unique userland synchronous object */
69 struct umtx_key {
70 	int	hash;
71 	int	type;
72 	int	shared;
73 	union {
74 		struct {
75 			vm_object_t	object;
76 			uintptr_t	offset;
77 		} shared;
78 		struct {
79 			struct vmspace	*vs;
80 			uintptr_t	addr;
81 		} private;
82 		struct {
83 			void		*a;
84 			uintptr_t	b;
85 		} both;
86 	} info;
87 };
88 
89 /* Priority inheritance mutex info. */
90 struct umtx_pi {
91 	/* Owner thread */
92 	struct thread		*pi_owner;
93 
94 	/* Reference count */
95 	int			pi_refcount;
96 
97  	/* List entry to link umtx holding by thread */
98 	TAILQ_ENTRY(umtx_pi)	pi_link;
99 
100 	/* List entry in hash */
101 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
102 
103 	/* List for waiters */
104 	TAILQ_HEAD(,umtx_q)	pi_blocked;
105 
106 	/* Identify a userland lock object */
107 	struct umtx_key		pi_key;
108 };
109 
110 /* A userland synchronous object user. */
111 struct umtx_q {
112 	/* Linked list for the hash. */
113 	TAILQ_ENTRY(umtx_q)	uq_link;
114 
115 	/* Umtx key. */
116 	struct umtx_key		uq_key;
117 
118 	/* Umtx flags. */
119 	int			uq_flags;
120 #define UQF_UMTXQ	0x0001
121 
122 	/* The thread waits on. */
123 	struct thread		*uq_thread;
124 
125 	/*
126 	 * Blocked on PI mutex. read can use chain lock
127 	 * or umtx_lock, write must have both chain lock and
128 	 * umtx_lock being hold.
129 	 */
130 	struct umtx_pi		*uq_pi_blocked;
131 
132 	/* On blocked list */
133 	TAILQ_ENTRY(umtx_q)	uq_lockq;
134 
135 	/* Thread contending with us */
136 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
137 
138 	/* Inherited priority from PP mutex */
139 	u_char			uq_inherited_pri;
140 };
141 
142 TAILQ_HEAD(umtxq_head, umtx_q);
143 
144 /* Userland lock object's wait-queue chain */
145 struct umtxq_chain {
146 	/* Lock for this chain. */
147 	struct mtx		uc_lock;
148 
149 	/* List of sleep queues. */
150 	struct umtxq_head	uc_queue;
151 
152 	/* Busy flag */
153 	char			uc_busy;
154 
155 	/* Chain lock waiters */
156 	int			uc_waiters;
157 
158 	/* All PI in the list */
159 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
160 };
161 
162 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
163 
164 /*
165  * Don't propagate time-sharing priority, there is a security reason,
166  * a user can simply introduce PI-mutex, let thread A lock the mutex,
167  * and let another thread B block on the mutex, because B is
168  * sleeping, its priority will be boosted, this causes A's priority to
169  * be boosted via priority propagating too and will never be lowered even
170  * if it is using 100%CPU, this is unfair to other processes.
171  */
172 
173 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
174 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
175 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
176 
177 #define	GOLDEN_RATIO_PRIME	2654404609U
178 #define	UMTX_CHAINS		128
179 #define	UMTX_SHIFTS		(__WORD_BIT - 7)
180 
181 #define THREAD_SHARE		0
182 #define PROCESS_SHARE		1
183 #define AUTO_SHARE		2
184 
185 #define	GET_SHARE(flags)	\
186     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187 
188 static uma_zone_t		umtx_pi_zone;
189 static struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
190 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
191 static int			umtx_pi_allocated;
192 
193 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
194 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
195     &umtx_pi_allocated, 0, "Allocated umtx_pi");
196 
197 static void umtxq_sysinit(void *);
198 static void umtxq_hash(struct umtx_key *key);
199 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
200 static void umtxq_lock(struct umtx_key *key);
201 static void umtxq_unlock(struct umtx_key *key);
202 static void umtxq_busy(struct umtx_key *key);
203 static void umtxq_unbusy(struct umtx_key *key);
204 static void umtxq_insert(struct umtx_q *uq);
205 static void umtxq_remove(struct umtx_q *uq);
206 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
207 static int umtxq_count(struct umtx_key *key);
208 static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
209 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
210 static int umtx_key_get(void *addr, int type, int share,
211 	struct umtx_key *key);
212 static void umtx_key_release(struct umtx_key *key);
213 static struct umtx_pi *umtx_pi_alloc(int);
214 static void umtx_pi_free(struct umtx_pi *pi);
215 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
216 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
217 static void umtx_thread_cleanup(struct thread *td);
218 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
219 	struct image_params *imgp __unused);
220 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
221 
222 static struct mtx umtx_lock;
223 
224 static void
225 umtxq_sysinit(void *arg __unused)
226 {
227 	int i;
228 
229 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
230 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
231 	for (i = 0; i < UMTX_CHAINS; ++i) {
232 		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
233 			 MTX_DEF | MTX_DUPOK);
234 		TAILQ_INIT(&umtxq_chains[i].uc_queue);
235 		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
236 		umtxq_chains[i].uc_busy = 0;
237 		umtxq_chains[i].uc_waiters = 0;
238 	}
239 	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
240 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
241 	    EVENTHANDLER_PRI_ANY);
242 }
243 
244 struct umtx_q *
245 umtxq_alloc(void)
246 {
247 	struct umtx_q *uq;
248 
249 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
250 	TAILQ_INIT(&uq->uq_pi_contested);
251 	uq->uq_inherited_pri = PRI_MAX;
252 	return (uq);
253 }
254 
255 void
256 umtxq_free(struct umtx_q *uq)
257 {
258 	free(uq, M_UMTX);
259 }
260 
261 static inline void
262 umtxq_hash(struct umtx_key *key)
263 {
264 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
265 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
266 }
267 
268 static inline int
269 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
270 {
271 	return (k1->type == k2->type &&
272 		k1->info.both.a == k2->info.both.a &&
273 	        k1->info.both.b == k2->info.both.b);
274 }
275 
276 static inline struct umtxq_chain *
277 umtxq_getchain(struct umtx_key *key)
278 {
279 	return (&umtxq_chains[key->hash]);
280 }
281 
282 /*
283  * Set chain to busy state when following operation
284  * may be blocked (kernel mutex can not be used).
285  */
286 static inline void
287 umtxq_busy(struct umtx_key *key)
288 {
289 	struct umtxq_chain *uc;
290 
291 	uc = umtxq_getchain(key);
292 	mtx_assert(&uc->uc_lock, MA_OWNED);
293 	while (uc->uc_busy != 0) {
294 		uc->uc_waiters++;
295 		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
296 		uc->uc_waiters--;
297 	}
298 	uc->uc_busy = 1;
299 }
300 
301 /*
302  * Unbusy a chain.
303  */
304 static inline void
305 umtxq_unbusy(struct umtx_key *key)
306 {
307 	struct umtxq_chain *uc;
308 
309 	uc = umtxq_getchain(key);
310 	mtx_assert(&uc->uc_lock, MA_OWNED);
311 	KASSERT(uc->uc_busy != 0, ("not busy"));
312 	uc->uc_busy = 0;
313 	if (uc->uc_waiters)
314 		wakeup_one(uc);
315 }
316 
317 /*
318  * Lock a chain.
319  */
320 static inline void
321 umtxq_lock(struct umtx_key *key)
322 {
323 	struct umtxq_chain *uc;
324 
325 	uc = umtxq_getchain(key);
326 	mtx_lock(&uc->uc_lock);
327 }
328 
329 /*
330  * Unlock a chain.
331  */
332 static inline void
333 umtxq_unlock(struct umtx_key *key)
334 {
335 	struct umtxq_chain *uc;
336 
337 	uc = umtxq_getchain(key);
338 	mtx_unlock(&uc->uc_lock);
339 }
340 
341 /*
342  * Insert a thread onto the umtx queue.
343  */
344 static inline void
345 umtxq_insert(struct umtx_q *uq)
346 {
347 	struct umtxq_chain *uc;
348 
349 	uc = umtxq_getchain(&uq->uq_key);
350 	UMTXQ_LOCKED_ASSERT(uc);
351 	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
352 	uq->uq_flags |= UQF_UMTXQ;
353 }
354 
355 /*
356  * Remove thread from the umtx queue.
357  */
358 static inline void
359 umtxq_remove(struct umtx_q *uq)
360 {
361 	struct umtxq_chain *uc;
362 
363 	uc = umtxq_getchain(&uq->uq_key);
364 	UMTXQ_LOCKED_ASSERT(uc);
365 	if (uq->uq_flags & UQF_UMTXQ) {
366 		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
367 		uq->uq_flags &= ~UQF_UMTXQ;
368 	}
369 }
370 
371 /*
372  * Check if there are multiple waiters
373  */
374 static int
375 umtxq_count(struct umtx_key *key)
376 {
377 	struct umtxq_chain *uc;
378 	struct umtx_q *uq;
379 	int count = 0;
380 
381 	uc = umtxq_getchain(key);
382 	UMTXQ_LOCKED_ASSERT(uc);
383 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
384 		if (umtx_key_match(&uq->uq_key, key)) {
385 			if (++count > 1)
386 				break;
387 		}
388 	}
389 	return (count);
390 }
391 
392 /*
393  * Check if there are multiple PI waiters and returns first
394  * waiter.
395  */
396 static int
397 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
398 {
399 	struct umtxq_chain *uc;
400 	struct umtx_q *uq;
401 	int count = 0;
402 
403 	*first = NULL;
404 	uc = umtxq_getchain(key);
405 	UMTXQ_LOCKED_ASSERT(uc);
406 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
407 		if (umtx_key_match(&uq->uq_key, key)) {
408 			if (++count > 1)
409 				break;
410 			*first = uq;
411 		}
412 	}
413 	return (count);
414 }
415 
416 /*
417  * Wake up threads waiting on an userland object.
418  */
419 static int
420 umtxq_signal(struct umtx_key *key, int n_wake)
421 {
422 	struct umtxq_chain *uc;
423 	struct umtx_q *uq, *next;
424 	int ret;
425 
426 	ret = 0;
427 	uc = umtxq_getchain(key);
428 	UMTXQ_LOCKED_ASSERT(uc);
429 	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
430 		if (umtx_key_match(&uq->uq_key, key)) {
431 			umtxq_remove(uq);
432 			wakeup(uq);
433 			if (++ret >= n_wake)
434 				break;
435 		}
436 	}
437 	return (ret);
438 }
439 
440 /*
441  * Wake up specified thread.
442  */
443 static inline void
444 umtxq_signal_thread(struct umtx_q *uq)
445 {
446 	struct umtxq_chain *uc;
447 
448 	uc = umtxq_getchain(&uq->uq_key);
449 	UMTXQ_LOCKED_ASSERT(uc);
450 	umtxq_remove(uq);
451 	wakeup(uq);
452 }
453 
454 /*
455  * Put thread into sleep state, before sleeping, check if
456  * thread was removed from umtx queue.
457  */
458 static inline int
459 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
460 {
461 	struct umtxq_chain *uc;
462 	int error;
463 
464 	uc = umtxq_getchain(&uq->uq_key);
465 	UMTXQ_LOCKED_ASSERT(uc);
466 	if (!(uq->uq_flags & UQF_UMTXQ))
467 		return (0);
468 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
469 	if (error == EWOULDBLOCK)
470 		error = ETIMEDOUT;
471 	return (error);
472 }
473 
474 /*
475  * Convert userspace address into unique logical address.
476  */
477 static int
478 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
479 {
480 	struct thread *td = curthread;
481 	vm_map_t map;
482 	vm_map_entry_t entry;
483 	vm_pindex_t pindex;
484 	vm_prot_t prot;
485 	boolean_t wired;
486 
487 	key->type = type;
488 	if (share == THREAD_SHARE) {
489 		key->shared = 0;
490 		key->info.private.vs = td->td_proc->p_vmspace;
491 		key->info.private.addr = (uintptr_t)addr;
492 	} else {
493 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
494 		map = &td->td_proc->p_vmspace->vm_map;
495 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
496 		    &entry, &key->info.shared.object, &pindex, &prot,
497 		    &wired) != KERN_SUCCESS) {
498 			return EFAULT;
499 		}
500 
501 		if ((share == PROCESS_SHARE) ||
502 		    (share == AUTO_SHARE &&
503 		     VM_INHERIT_SHARE == entry->inheritance)) {
504 			key->shared = 1;
505 			key->info.shared.offset = entry->offset + entry->start -
506 				(vm_offset_t)addr;
507 			vm_object_reference(key->info.shared.object);
508 		} else {
509 			key->shared = 0;
510 			key->info.private.vs = td->td_proc->p_vmspace;
511 			key->info.private.addr = (uintptr_t)addr;
512 		}
513 		vm_map_lookup_done(map, entry);
514 	}
515 
516 	umtxq_hash(key);
517 	return (0);
518 }
519 
520 /*
521  * Release key.
522  */
523 static inline void
524 umtx_key_release(struct umtx_key *key)
525 {
526 	if (key->shared)
527 		vm_object_deallocate(key->info.shared.object);
528 }
529 
530 /*
531  * Lock a umtx object.
532  */
533 static int
534 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
535 {
536 	struct umtx_q *uq;
537 	u_long owner;
538 	u_long old;
539 	int error = 0;
540 
541 	uq = td->td_umtxq;
542 
543 	/*
544 	 * Care must be exercised when dealing with umtx structure. It
545 	 * can fault on any access.
546 	 */
547 	for (;;) {
548 		/*
549 		 * Try the uncontested case.  This should be done in userland.
550 		 */
551 		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
552 
553 		/* The acquire succeeded. */
554 		if (owner == UMTX_UNOWNED)
555 			return (0);
556 
557 		/* The address was invalid. */
558 		if (owner == -1)
559 			return (EFAULT);
560 
561 		/* If no one owns it but it is contested try to acquire it. */
562 		if (owner == UMTX_CONTESTED) {
563 			owner = casuword(&umtx->u_owner,
564 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
565 
566 			if (owner == UMTX_CONTESTED)
567 				return (0);
568 
569 			/* The address was invalid. */
570 			if (owner == -1)
571 				return (EFAULT);
572 
573 			/* If this failed the lock has changed, restart. */
574 			continue;
575 		}
576 
577 		/*
578 		 * If we caught a signal, we have retried and now
579 		 * exit immediately.
580 		 */
581 		if (error != 0)
582 			return (error);
583 
584 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
585 			AUTO_SHARE, &uq->uq_key)) != 0)
586 			return (error);
587 
588 		umtxq_lock(&uq->uq_key);
589 		umtxq_busy(&uq->uq_key);
590 		umtxq_insert(uq);
591 		umtxq_unbusy(&uq->uq_key);
592 		umtxq_unlock(&uq->uq_key);
593 
594 		/*
595 		 * Set the contested bit so that a release in user space
596 		 * knows to use the system call for unlock.  If this fails
597 		 * either some one else has acquired the lock or it has been
598 		 * released.
599 		 */
600 		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
601 
602 		/* The address was invalid. */
603 		if (old == -1) {
604 			umtxq_lock(&uq->uq_key);
605 			umtxq_remove(uq);
606 			umtxq_unlock(&uq->uq_key);
607 			umtx_key_release(&uq->uq_key);
608 			return (EFAULT);
609 		}
610 
611 		/*
612 		 * We set the contested bit, sleep. Otherwise the lock changed
613 		 * and we need to retry or we lost a race to the thread
614 		 * unlocking the umtx.
615 		 */
616 		umtxq_lock(&uq->uq_key);
617 		if (old == owner)
618 			error = umtxq_sleep(uq, "umtx", timo);
619 		umtxq_remove(uq);
620 		umtxq_unlock(&uq->uq_key);
621 		umtx_key_release(&uq->uq_key);
622 	}
623 
624 	return (0);
625 }
626 
627 /*
628  * Lock a umtx object.
629  */
630 static int
631 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
632 	struct timespec *timeout)
633 {
634 	struct timespec ts, ts2, ts3;
635 	struct timeval tv;
636 	int error;
637 
638 	if (timeout == NULL) {
639 		error = _do_lock_umtx(td, umtx, id, 0);
640 		/* Mutex locking is restarted if it is interrupted. */
641 		if (error == EINTR)
642 			error = ERESTART;
643 	} else {
644 		getnanouptime(&ts);
645 		timespecadd(&ts, timeout);
646 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
647 		for (;;) {
648 			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
649 			if (error != ETIMEDOUT)
650 				break;
651 			getnanouptime(&ts2);
652 			if (timespeccmp(&ts2, &ts, >=)) {
653 				error = ETIMEDOUT;
654 				break;
655 			}
656 			ts3 = ts;
657 			timespecsub(&ts3, &ts2);
658 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
659 		}
660 		/* Timed-locking is not restarted. */
661 		if (error == ERESTART)
662 			error = EINTR;
663 	}
664 	return (error);
665 }
666 
667 /*
668  * Unlock a umtx object.
669  */
670 static int
671 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
672 {
673 	struct umtx_key key;
674 	u_long owner;
675 	u_long old;
676 	int error;
677 	int count;
678 
679 	/*
680 	 * Make sure we own this mtx.
681 	 */
682 	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
683 	if (owner == -1)
684 		return (EFAULT);
685 
686 	if ((owner & ~UMTX_CONTESTED) != id)
687 		return (EPERM);
688 
689 	/* This should be done in userland */
690 	if ((owner & UMTX_CONTESTED) == 0) {
691 		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
692 		if (old == -1)
693 			return (EFAULT);
694 		if (old == owner)
695 			return (0);
696 		owner = old;
697 	}
698 
699 	/* We should only ever be in here for contested locks */
700 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
701 		&key)) != 0)
702 		return (error);
703 
704 	umtxq_lock(&key);
705 	umtxq_busy(&key);
706 	count = umtxq_count(&key);
707 	umtxq_unlock(&key);
708 
709 	/*
710 	 * When unlocking the umtx, it must be marked as unowned if
711 	 * there is zero or one thread only waiting for it.
712 	 * Otherwise, it must be marked as contested.
713 	 */
714 	old = casuword(&umtx->u_owner, owner,
715 		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
716 	umtxq_lock(&key);
717 	umtxq_signal(&key,1);
718 	umtxq_unbusy(&key);
719 	umtxq_unlock(&key);
720 	umtx_key_release(&key);
721 	if (old == -1)
722 		return (EFAULT);
723 	if (old != owner)
724 		return (EINVAL);
725 	return (0);
726 }
727 
728 #ifdef COMPAT_IA32
729 
730 /*
731  * Lock a umtx object.
732  */
733 static int
734 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
735 {
736 	struct umtx_q *uq;
737 	uint32_t owner;
738 	uint32_t old;
739 	int error = 0;
740 
741 	uq = td->td_umtxq;
742 
743 	/*
744 	 * Care must be exercised when dealing with umtx structure. It
745 	 * can fault on any access.
746 	 */
747 	for (;;) {
748 		/*
749 		 * Try the uncontested case.  This should be done in userland.
750 		 */
751 		owner = casuword32(m, UMUTEX_UNOWNED, id);
752 
753 		/* The acquire succeeded. */
754 		if (owner == UMUTEX_UNOWNED)
755 			return (0);
756 
757 		/* The address was invalid. */
758 		if (owner == -1)
759 			return (EFAULT);
760 
761 		/* If no one owns it but it is contested try to acquire it. */
762 		if (owner == UMUTEX_CONTESTED) {
763 			owner = casuword32(m,
764 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
765 			if (owner == UMUTEX_CONTESTED)
766 				return (0);
767 
768 			/* The address was invalid. */
769 			if (owner == -1)
770 				return (EFAULT);
771 
772 			/* If this failed the lock has changed, restart. */
773 			continue;
774 		}
775 
776 		/*
777 		 * If we caught a signal, we have retried and now
778 		 * exit immediately.
779 		 */
780 		if (error != 0)
781 			return (error);
782 
783 		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
784 			AUTO_SHARE, &uq->uq_key)) != 0)
785 			return (error);
786 
787 		umtxq_lock(&uq->uq_key);
788 		umtxq_busy(&uq->uq_key);
789 		umtxq_insert(uq);
790 		umtxq_unbusy(&uq->uq_key);
791 		umtxq_unlock(&uq->uq_key);
792 
793 		/*
794 		 * Set the contested bit so that a release in user space
795 		 * knows to use the system call for unlock.  If this fails
796 		 * either some one else has acquired the lock or it has been
797 		 * released.
798 		 */
799 		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
800 
801 		/* The address was invalid. */
802 		if (old == -1) {
803 			umtxq_lock(&uq->uq_key);
804 			umtxq_remove(uq);
805 			umtxq_unlock(&uq->uq_key);
806 			umtx_key_release(&uq->uq_key);
807 			return (EFAULT);
808 		}
809 
810 		/*
811 		 * We set the contested bit, sleep. Otherwise the lock changed
812 		 * and we need to retry or we lost a race to the thread
813 		 * unlocking the umtx.
814 		 */
815 		umtxq_lock(&uq->uq_key);
816 		if (old == owner)
817 			error = umtxq_sleep(uq, "umtx", timo);
818 		umtxq_remove(uq);
819 		umtxq_unlock(&uq->uq_key);
820 		umtx_key_release(&uq->uq_key);
821 	}
822 
823 	return (0);
824 }
825 
826 /*
827  * Lock a umtx object.
828  */
829 static int
830 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
831 	struct timespec *timeout)
832 {
833 	struct timespec ts, ts2, ts3;
834 	struct timeval tv;
835 	int error;
836 
837 	if (timeout == NULL) {
838 		error = _do_lock_umtx32(td, m, id, 0);
839 		/* Mutex locking is restarted if it is interrupted. */
840 		if (error == EINTR)
841 			error = ERESTART;
842 	} else {
843 		getnanouptime(&ts);
844 		timespecadd(&ts, timeout);
845 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
846 		for (;;) {
847 			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
848 			if (error != ETIMEDOUT)
849 				break;
850 			getnanouptime(&ts2);
851 			if (timespeccmp(&ts2, &ts, >=)) {
852 				error = ETIMEDOUT;
853 				break;
854 			}
855 			ts3 = ts;
856 			timespecsub(&ts3, &ts2);
857 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
858 		}
859 		/* Timed-locking is not restarted. */
860 		if (error == ERESTART)
861 			error = EINTR;
862 	}
863 	return (error);
864 }
865 
866 /*
867  * Unlock a umtx object.
868  */
869 static int
870 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
871 {
872 	struct umtx_key key;
873 	uint32_t owner;
874 	uint32_t old;
875 	int error;
876 	int count;
877 
878 	/*
879 	 * Make sure we own this mtx.
880 	 */
881 	owner = fuword32(m);
882 	if (owner == -1)
883 		return (EFAULT);
884 
885 	if ((owner & ~UMUTEX_CONTESTED) != id)
886 		return (EPERM);
887 
888 	/* This should be done in userland */
889 	if ((owner & UMUTEX_CONTESTED) == 0) {
890 		old = casuword32(m, owner, UMUTEX_UNOWNED);
891 		if (old == -1)
892 			return (EFAULT);
893 		if (old == owner)
894 			return (0);
895 		owner = old;
896 	}
897 
898 	/* We should only ever be in here for contested locks */
899 	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
900 		&key)) != 0)
901 		return (error);
902 
903 	umtxq_lock(&key);
904 	umtxq_busy(&key);
905 	count = umtxq_count(&key);
906 	umtxq_unlock(&key);
907 
908 	/*
909 	 * When unlocking the umtx, it must be marked as unowned if
910 	 * there is zero or one thread only waiting for it.
911 	 * Otherwise, it must be marked as contested.
912 	 */
913 	old = casuword32(m, owner,
914 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
915 	umtxq_lock(&key);
916 	umtxq_signal(&key,1);
917 	umtxq_unbusy(&key);
918 	umtxq_unlock(&key);
919 	umtx_key_release(&key);
920 	if (old == -1)
921 		return (EFAULT);
922 	if (old != owner)
923 		return (EINVAL);
924 	return (0);
925 }
926 #endif
927 
928 /*
929  * Fetch and compare value, sleep on the address if value is not changed.
930  */
931 static int
932 do_wait(struct thread *td, void *addr, u_long id,
933 	struct timespec *timeout, int compat32)
934 {
935 	struct umtx_q *uq;
936 	struct timespec ts, ts2, ts3;
937 	struct timeval tv;
938 	u_long tmp;
939 	int error = 0;
940 
941 	uq = td->td_umtxq;
942 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
943 	    &uq->uq_key)) != 0)
944 		return (error);
945 
946 	umtxq_lock(&uq->uq_key);
947 	umtxq_insert(uq);
948 	umtxq_unlock(&uq->uq_key);
949 	if (compat32 == 0)
950 		tmp = fuword(addr);
951         else
952 		tmp = fuword32(addr);
953 	if (tmp != id) {
954 		umtxq_lock(&uq->uq_key);
955 		umtxq_remove(uq);
956 		umtxq_unlock(&uq->uq_key);
957 	} else if (timeout == NULL) {
958 		umtxq_lock(&uq->uq_key);
959 		error = umtxq_sleep(uq, "uwait", 0);
960 		umtxq_remove(uq);
961 		umtxq_unlock(&uq->uq_key);
962 	} else {
963 		getnanouptime(&ts);
964 		timespecadd(&ts, timeout);
965 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
966 		umtxq_lock(&uq->uq_key);
967 		for (;;) {
968 			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
969 			if (!(uq->uq_flags & UQF_UMTXQ))
970 				break;
971 			if (error != ETIMEDOUT)
972 				break;
973 			umtxq_unlock(&uq->uq_key);
974 			getnanouptime(&ts2);
975 			if (timespeccmp(&ts2, &ts, >=)) {
976 				error = ETIMEDOUT;
977 				umtxq_lock(&uq->uq_key);
978 				break;
979 			}
980 			ts3 = ts;
981 			timespecsub(&ts3, &ts2);
982 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
983 			umtxq_lock(&uq->uq_key);
984 		}
985 		umtxq_remove(uq);
986 		umtxq_unlock(&uq->uq_key);
987 	}
988 	umtx_key_release(&uq->uq_key);
989 	if (error == ERESTART)
990 		error = EINTR;
991 	return (error);
992 }
993 
994 /*
995  * Wake up threads sleeping on the specified address.
996  */
997 int
998 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
999 {
1000 	struct umtx_key key;
1001 	int ret;
1002 
1003 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1004 	   &key)) != 0)
1005 		return (ret);
1006 	umtxq_lock(&key);
1007 	ret = umtxq_signal(&key, n_wake);
1008 	umtxq_unlock(&key);
1009 	umtx_key_release(&key);
1010 	return (0);
1011 }
1012 
1013 /*
1014  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1015  */
1016 static int
1017 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1018 	int try)
1019 {
1020 	struct umtx_q *uq;
1021 	uint32_t owner, old, id;
1022 	int error = 0;
1023 
1024 	id = td->td_tid;
1025 	uq = td->td_umtxq;
1026 
1027 	/*
1028 	 * Care must be exercised when dealing with umtx structure. It
1029 	 * can fault on any access.
1030 	 */
1031 	for (;;) {
1032 		/*
1033 		 * Try the uncontested case.  This should be done in userland.
1034 		 */
1035 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1036 
1037 		/* The acquire succeeded. */
1038 		if (owner == UMUTEX_UNOWNED)
1039 			return (0);
1040 
1041 		/* The address was invalid. */
1042 		if (owner == -1)
1043 			return (EFAULT);
1044 
1045 		/* If no one owns it but it is contested try to acquire it. */
1046 		if (owner == UMUTEX_CONTESTED) {
1047 			owner = casuword32(&m->m_owner,
1048 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1049 
1050 			if (owner == UMUTEX_CONTESTED)
1051 				return (0);
1052 
1053 			/* The address was invalid. */
1054 			if (owner == -1)
1055 				return (EFAULT);
1056 
1057 			/* If this failed the lock has changed, restart. */
1058 			continue;
1059 		}
1060 
1061 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1062 		    (owner & ~UMUTEX_CONTESTED) == id)
1063 			return (EDEADLK);
1064 
1065 		if (try != 0)
1066 			return (EBUSY);
1067 
1068 		/*
1069 		 * If we caught a signal, we have retried and now
1070 		 * exit immediately.
1071 		 */
1072 		if (error != 0)
1073 			return (error);
1074 
1075 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1076 		    GET_SHARE(flags), &uq->uq_key)) != 0)
1077 			return (error);
1078 
1079 		umtxq_lock(&uq->uq_key);
1080 		umtxq_busy(&uq->uq_key);
1081 		umtxq_insert(uq);
1082 		umtxq_unbusy(&uq->uq_key);
1083 		umtxq_unlock(&uq->uq_key);
1084 
1085 		/*
1086 		 * Set the contested bit so that a release in user space
1087 		 * knows to use the system call for unlock.  If this fails
1088 		 * either some one else has acquired the lock or it has been
1089 		 * released.
1090 		 */
1091 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1092 
1093 		/* The address was invalid. */
1094 		if (old == -1) {
1095 			umtxq_lock(&uq->uq_key);
1096 			umtxq_remove(uq);
1097 			umtxq_unlock(&uq->uq_key);
1098 			umtx_key_release(&uq->uq_key);
1099 			return (EFAULT);
1100 		}
1101 
1102 		/*
1103 		 * We set the contested bit, sleep. Otherwise the lock changed
1104 		 * and we need to retry or we lost a race to the thread
1105 		 * unlocking the umtx.
1106 		 */
1107 		umtxq_lock(&uq->uq_key);
1108 		if (old == owner)
1109 			error = umtxq_sleep(uq, "umtxn", timo);
1110 		umtxq_remove(uq);
1111 		umtxq_unlock(&uq->uq_key);
1112 		umtx_key_release(&uq->uq_key);
1113 	}
1114 
1115 	return (0);
1116 }
1117 
1118 /*
1119  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1120  */
1121 /*
1122  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1123  */
1124 static int
1125 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1126 {
1127 	struct umtx_key key;
1128 	uint32_t owner, old, id;
1129 	int error;
1130 	int count;
1131 
1132 	id = td->td_tid;
1133 	/*
1134 	 * Make sure we own this mtx.
1135 	 */
1136 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1137 	if (owner == -1)
1138 		return (EFAULT);
1139 
1140 	if ((owner & ~UMUTEX_CONTESTED) != id)
1141 		return (EPERM);
1142 
1143 	/* This should be done in userland */
1144 	if ((owner & UMUTEX_CONTESTED) == 0) {
1145 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1146 		if (old == -1)
1147 			return (EFAULT);
1148 		if (old == owner)
1149 			return (0);
1150 		owner = old;
1151 	}
1152 
1153 	/* We should only ever be in here for contested locks */
1154 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1155 	    &key)) != 0)
1156 		return (error);
1157 
1158 	umtxq_lock(&key);
1159 	umtxq_busy(&key);
1160 	count = umtxq_count(&key);
1161 	umtxq_unlock(&key);
1162 
1163 	/*
1164 	 * When unlocking the umtx, it must be marked as unowned if
1165 	 * there is zero or one thread only waiting for it.
1166 	 * Otherwise, it must be marked as contested.
1167 	 */
1168 	old = casuword32(&m->m_owner, owner,
1169 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1170 	umtxq_lock(&key);
1171 	umtxq_signal(&key,1);
1172 	umtxq_unbusy(&key);
1173 	umtxq_unlock(&key);
1174 	umtx_key_release(&key);
1175 	if (old == -1)
1176 		return (EFAULT);
1177 	if (old != owner)
1178 		return (EINVAL);
1179 	return (0);
1180 }
1181 
1182 static inline struct umtx_pi *
1183 umtx_pi_alloc(int flags)
1184 {
1185 	struct umtx_pi *pi;
1186 
1187 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1188 	TAILQ_INIT(&pi->pi_blocked);
1189 	atomic_add_int(&umtx_pi_allocated, 1);
1190 	return (pi);
1191 }
1192 
1193 static inline void
1194 umtx_pi_free(struct umtx_pi *pi)
1195 {
1196 	uma_zfree(umtx_pi_zone, pi);
1197 	atomic_add_int(&umtx_pi_allocated, -1);
1198 }
1199 
1200 /*
1201  * Adjust the thread's position on a pi_state after its priority has been
1202  * changed.
1203  */
1204 static int
1205 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1206 {
1207 	struct umtx_q *uq, *uq1, *uq2;
1208 	struct thread *td1;
1209 
1210 	mtx_assert(&umtx_lock, MA_OWNED);
1211 	if (pi == NULL)
1212 		return (0);
1213 
1214 	uq = td->td_umtxq;
1215 
1216 	/*
1217 	 * Check if the thread needs to be moved on the blocked chain.
1218 	 * It needs to be moved if either its priority is lower than
1219 	 * the previous thread or higher than the next thread.
1220 	 */
1221 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1222 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1223 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1224 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1225 		/*
1226 		 * Remove thread from blocked chain and determine where
1227 		 * it should be moved to.
1228 		 */
1229 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1230 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1231 			td1 = uq1->uq_thread;
1232 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1233 			if (UPRI(td1) > UPRI(td))
1234 				break;
1235 		}
1236 
1237 		if (uq1 == NULL)
1238 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1239 		else
1240 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1241 	}
1242 	return (1);
1243 }
1244 
1245 /*
1246  * Propagate priority when a thread is blocked on POSIX
1247  * PI mutex.
1248  */
1249 static void
1250 umtx_propagate_priority(struct thread *td)
1251 {
1252 	struct umtx_q *uq;
1253 	struct umtx_pi *pi;
1254 	int pri;
1255 
1256 	mtx_assert(&umtx_lock, MA_OWNED);
1257 	pri = UPRI(td);
1258 	uq = td->td_umtxq;
1259 	pi = uq->uq_pi_blocked;
1260 	if (pi == NULL)
1261 		return;
1262 
1263 	for (;;) {
1264 		td = pi->pi_owner;
1265 		if (td == NULL)
1266 			return;
1267 
1268 		MPASS(td->td_proc != NULL);
1269 		MPASS(td->td_proc->p_magic == P_MAGIC);
1270 
1271 		if (UPRI(td) <= pri)
1272 			return;
1273 
1274 		thread_lock(td);
1275 		sched_lend_user_prio(td, pri);
1276 		thread_unlock(td);
1277 
1278 		/*
1279 		 * Pick up the lock that td is blocked on.
1280 		 */
1281 		uq = td->td_umtxq;
1282 		pi = uq->uq_pi_blocked;
1283 		/* Resort td on the list if needed. */
1284 		if (!umtx_pi_adjust_thread(pi, td))
1285 			break;
1286 	}
1287 }
1288 
1289 /*
1290  * Unpropagate priority for a PI mutex when a thread blocked on
1291  * it is interrupted by signal or resumed by others.
1292  */
1293 static void
1294 umtx_unpropagate_priority(struct umtx_pi *pi)
1295 {
1296 	struct umtx_q *uq, *uq_owner;
1297 	struct umtx_pi *pi2;
1298 	int pri, oldpri;
1299 
1300 	mtx_assert(&umtx_lock, MA_OWNED);
1301 
1302 	while (pi != NULL && pi->pi_owner != NULL) {
1303 		pri = PRI_MAX;
1304 		uq_owner = pi->pi_owner->td_umtxq;
1305 
1306 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1307 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1308 			if (uq != NULL) {
1309 				if (pri > UPRI(uq->uq_thread))
1310 					pri = UPRI(uq->uq_thread);
1311 			}
1312 		}
1313 
1314 		if (pri > uq_owner->uq_inherited_pri)
1315 			pri = uq_owner->uq_inherited_pri;
1316 		thread_lock(pi->pi_owner);
1317 		oldpri = pi->pi_owner->td_user_pri;
1318 		sched_unlend_user_prio(pi->pi_owner, pri);
1319 		thread_unlock(pi->pi_owner);
1320 		umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1321 		pi = uq_owner->uq_pi_blocked;
1322 	}
1323 }
1324 
1325 /*
1326  * Insert a PI mutex into owned list.
1327  */
1328 static void
1329 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1330 {
1331 	struct umtx_q *uq_owner;
1332 
1333 	uq_owner = owner->td_umtxq;
1334 	mtx_assert(&umtx_lock, MA_OWNED);
1335 	if (pi->pi_owner != NULL)
1336 		panic("pi_ower != NULL");
1337 	pi->pi_owner = owner;
1338 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1339 }
1340 
1341 /*
1342  * Claim ownership of a PI mutex.
1343  */
1344 static int
1345 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1346 {
1347 	struct umtx_q *uq, *uq_owner;
1348 
1349 	uq_owner = owner->td_umtxq;
1350 	mtx_lock_spin(&umtx_lock);
1351 	if (pi->pi_owner == owner) {
1352 		mtx_unlock_spin(&umtx_lock);
1353 		return (0);
1354 	}
1355 
1356 	if (pi->pi_owner != NULL) {
1357 		/*
1358 		 * userland may have already messed the mutex, sigh.
1359 		 */
1360 		mtx_unlock_spin(&umtx_lock);
1361 		return (EPERM);
1362 	}
1363 	umtx_pi_setowner(pi, owner);
1364 	uq = TAILQ_FIRST(&pi->pi_blocked);
1365 	if (uq != NULL) {
1366 		int pri;
1367 
1368 		pri = UPRI(uq->uq_thread);
1369 		thread_lock(owner);
1370 		if (pri < UPRI(owner))
1371 			sched_lend_user_prio(owner, pri);
1372 		thread_unlock(owner);
1373 	}
1374 	mtx_unlock_spin(&umtx_lock);
1375 	return (0);
1376 }
1377 
1378 static void
1379 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1380 {
1381 	struct umtx_q *uq;
1382 	struct umtx_pi *pi;
1383 
1384 	uq = td->td_umtxq;
1385 	/*
1386 	 * Pick up the lock that td is blocked on.
1387 	 */
1388 	pi = uq->uq_pi_blocked;
1389 	MPASS(pi != NULL);
1390 
1391 	/* Resort the turnstile on the list. */
1392 	if (!umtx_pi_adjust_thread(pi, td))
1393 		return;
1394 
1395 	/*
1396 	 * If our priority was lowered and we are at the head of the
1397 	 * turnstile, then propagate our new priority up the chain.
1398 	 */
1399 	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1400 		umtx_propagate_priority(td);
1401 }
1402 
1403 /*
1404  * Adjust a thread's order position in its blocked PI mutex,
1405  * this may result new priority propagating process.
1406  */
1407 void
1408 umtx_pi_adjust(struct thread *td, u_char oldpri)
1409 {
1410 	struct umtx_q *uq;
1411 	struct umtx_pi *pi;
1412 
1413 	uq = td->td_umtxq;
1414 	mtx_lock_spin(&umtx_lock);
1415 	/*
1416 	 * Pick up the lock that td is blocked on.
1417 	 */
1418 	pi = uq->uq_pi_blocked;
1419 	if (pi != NULL)
1420 		umtx_pi_adjust_locked(td, oldpri);
1421 	mtx_unlock_spin(&umtx_lock);
1422 }
1423 
1424 /*
1425  * Sleep on a PI mutex.
1426  */
1427 static int
1428 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1429 	uint32_t owner, const char *wmesg, int timo)
1430 {
1431 	struct umtxq_chain *uc;
1432 	struct thread *td, *td1;
1433 	struct umtx_q *uq1;
1434 	int pri;
1435 	int error = 0;
1436 
1437 	td = uq->uq_thread;
1438 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1439 	uc = umtxq_getchain(&uq->uq_key);
1440 	UMTXQ_LOCKED_ASSERT(uc);
1441 	umtxq_insert(uq);
1442 	if (pi->pi_owner == NULL) {
1443 		/* XXX
1444 		 * Current, We only support process private PI-mutex,
1445 		 * non-contended PI-mutexes are locked in userland.
1446 		 * Process shared PI-mutex should always be initialized
1447 		 * by kernel and be registered in kernel, locking should
1448 		 * always be done by kernel to avoid security problems.
1449 		 * For process private PI-mutex, we can find owner
1450 		 * thread and boost its priority safely.
1451 		 */
1452 		PROC_LOCK(curproc);
1453 		td1 = thread_find(curproc, owner);
1454 		mtx_lock_spin(&umtx_lock);
1455 		if (td1 != NULL && pi->pi_owner == NULL) {
1456 			uq1 = td1->td_umtxq;
1457 			umtx_pi_setowner(pi, td1);
1458 		}
1459 		PROC_UNLOCK(curproc);
1460 	} else {
1461 		mtx_lock_spin(&umtx_lock);
1462 	}
1463 
1464 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1465 		pri = UPRI(uq1->uq_thread);
1466 		if (pri > UPRI(td))
1467 			break;
1468 	}
1469 
1470 	if (uq1 != NULL)
1471 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1472 	else
1473 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1474 
1475 	uq->uq_pi_blocked = pi;
1476 	thread_lock(td);
1477 	td->td_flags |= TDF_UPIBLOCKED;
1478 	thread_unlock(td);
1479 	mtx_unlock_spin(&umtx_lock);
1480 	umtxq_unlock(&uq->uq_key);
1481 
1482 	mtx_lock_spin(&umtx_lock);
1483 	umtx_propagate_priority(td);
1484 	mtx_unlock_spin(&umtx_lock);
1485 
1486 	umtxq_lock(&uq->uq_key);
1487 	if (uq->uq_flags & UQF_UMTXQ) {
1488 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1489 		if (error == EWOULDBLOCK)
1490 			error = ETIMEDOUT;
1491 		if (uq->uq_flags & UQF_UMTXQ) {
1492 			umtxq_busy(&uq->uq_key);
1493 			umtxq_remove(uq);
1494 			umtxq_unbusy(&uq->uq_key);
1495 		}
1496 	}
1497 	umtxq_unlock(&uq->uq_key);
1498 
1499 	mtx_lock_spin(&umtx_lock);
1500 	uq->uq_pi_blocked = NULL;
1501 	thread_lock(td);
1502 	td->td_flags &= ~TDF_UPIBLOCKED;
1503 	thread_unlock(td);
1504 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1505 	umtx_unpropagate_priority(pi);
1506 	mtx_unlock_spin(&umtx_lock);
1507 
1508 	umtxq_lock(&uq->uq_key);
1509 
1510 	return (error);
1511 }
1512 
1513 /*
1514  * Add reference count for a PI mutex.
1515  */
1516 static void
1517 umtx_pi_ref(struct umtx_pi *pi)
1518 {
1519 	struct umtxq_chain *uc;
1520 
1521 	uc = umtxq_getchain(&pi->pi_key);
1522 	UMTXQ_LOCKED_ASSERT(uc);
1523 	pi->pi_refcount++;
1524 }
1525 
1526 /*
1527  * Decrease reference count for a PI mutex, if the counter
1528  * is decreased to zero, its memory space is freed.
1529  */
1530 static void
1531 umtx_pi_unref(struct umtx_pi *pi)
1532 {
1533 	struct umtxq_chain *uc;
1534 	int free = 0;
1535 
1536 	uc = umtxq_getchain(&pi->pi_key);
1537 	UMTXQ_LOCKED_ASSERT(uc);
1538 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1539 	if (--pi->pi_refcount == 0) {
1540 		mtx_lock_spin(&umtx_lock);
1541 		if (pi->pi_owner != NULL) {
1542 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1543 				pi, pi_link);
1544 			pi->pi_owner = NULL;
1545 		}
1546 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1547 			("blocked queue not empty"));
1548 		mtx_unlock_spin(&umtx_lock);
1549 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1550 		free = 1;
1551 	}
1552 	if (free)
1553 		umtx_pi_free(pi);
1554 }
1555 
1556 /*
1557  * Find a PI mutex in hash table.
1558  */
1559 static struct umtx_pi *
1560 umtx_pi_lookup(struct umtx_key *key)
1561 {
1562 	struct umtxq_chain *uc;
1563 	struct umtx_pi *pi;
1564 
1565 	uc = umtxq_getchain(key);
1566 	UMTXQ_LOCKED_ASSERT(uc);
1567 
1568 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1569 		if (umtx_key_match(&pi->pi_key, key)) {
1570 			return (pi);
1571 		}
1572 	}
1573 	return (NULL);
1574 }
1575 
1576 /*
1577  * Insert a PI mutex into hash table.
1578  */
1579 static inline void
1580 umtx_pi_insert(struct umtx_pi *pi)
1581 {
1582 	struct umtxq_chain *uc;
1583 
1584 	uc = umtxq_getchain(&pi->pi_key);
1585 	UMTXQ_LOCKED_ASSERT(uc);
1586 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1587 }
1588 
1589 /*
1590  * Lock a PI mutex.
1591  */
1592 static int
1593 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1594 	int try)
1595 {
1596 	struct umtx_q *uq;
1597 	struct umtx_pi *pi, *new_pi;
1598 	uint32_t id, owner, old;
1599 	int error;
1600 
1601 	id = td->td_tid;
1602 	uq = td->td_umtxq;
1603 
1604 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1605 	    &uq->uq_key)) != 0)
1606 		return (error);
1607 	umtxq_lock(&uq->uq_key);
1608 	pi = umtx_pi_lookup(&uq->uq_key);
1609 	if (pi == NULL) {
1610 		new_pi = umtx_pi_alloc(M_NOWAIT);
1611 		if (new_pi == NULL) {
1612 			umtxq_unlock(&uq->uq_key);
1613 			new_pi = umtx_pi_alloc(M_WAITOK);
1614 			new_pi->pi_key = uq->uq_key;
1615 			umtxq_lock(&uq->uq_key);
1616 			pi = umtx_pi_lookup(&uq->uq_key);
1617 			if (pi != NULL) {
1618 				umtx_pi_free(new_pi);
1619 				new_pi = NULL;
1620 			}
1621 		}
1622 		if (new_pi != NULL) {
1623 			new_pi->pi_key = uq->uq_key;
1624 			umtx_pi_insert(new_pi);
1625 			pi = new_pi;
1626 		}
1627 	}
1628 	umtx_pi_ref(pi);
1629 	umtxq_unlock(&uq->uq_key);
1630 
1631 	/*
1632 	 * Care must be exercised when dealing with umtx structure.  It
1633 	 * can fault on any access.
1634 	 */
1635 	for (;;) {
1636 		/*
1637 		 * Try the uncontested case.  This should be done in userland.
1638 		 */
1639 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1640 
1641 		/* The acquire succeeded. */
1642 		if (owner == UMUTEX_UNOWNED) {
1643 			error = 0;
1644 			break;
1645 		}
1646 
1647 		/* The address was invalid. */
1648 		if (owner == -1) {
1649 			error = EFAULT;
1650 			break;
1651 		}
1652 
1653 		/* If no one owns it but it is contested try to acquire it. */
1654 		if (owner == UMUTEX_CONTESTED) {
1655 			owner = casuword32(&m->m_owner,
1656 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1657 
1658 			if (owner == UMUTEX_CONTESTED) {
1659 				umtxq_lock(&uq->uq_key);
1660 				error = umtx_pi_claim(pi, td);
1661 				umtxq_unlock(&uq->uq_key);
1662 				break;
1663 			}
1664 
1665 			/* The address was invalid. */
1666 			if (owner == -1) {
1667 				error = EFAULT;
1668 				break;
1669 			}
1670 
1671 			/* If this failed the lock has changed, restart. */
1672 			continue;
1673 		}
1674 
1675 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1676 		    (owner & ~UMUTEX_CONTESTED) == id) {
1677 			error = EDEADLK;
1678 			break;
1679 		}
1680 
1681 		if (try != 0) {
1682 			error = EBUSY;
1683 			break;
1684 		}
1685 
1686 		/*
1687 		 * If we caught a signal, we have retried and now
1688 		 * exit immediately.
1689 		 */
1690 		if (error != 0)
1691 			break;
1692 
1693 		umtxq_lock(&uq->uq_key);
1694 		umtxq_busy(&uq->uq_key);
1695 		umtxq_unlock(&uq->uq_key);
1696 
1697 		/*
1698 		 * Set the contested bit so that a release in user space
1699 		 * knows to use the system call for unlock.  If this fails
1700 		 * either some one else has acquired the lock or it has been
1701 		 * released.
1702 		 */
1703 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1704 
1705 		/* The address was invalid. */
1706 		if (old == -1) {
1707 			umtxq_lock(&uq->uq_key);
1708 			umtxq_unbusy(&uq->uq_key);
1709 			umtxq_unlock(&uq->uq_key);
1710 			error = EFAULT;
1711 			break;
1712 		}
1713 
1714 		umtxq_lock(&uq->uq_key);
1715 		umtxq_unbusy(&uq->uq_key);
1716 		/*
1717 		 * We set the contested bit, sleep. Otherwise the lock changed
1718 		 * and we need to retry or we lost a race to the thread
1719 		 * unlocking the umtx.
1720 		 */
1721 		if (old == owner)
1722 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1723 				 "umtxpi", timo);
1724 		umtxq_unlock(&uq->uq_key);
1725 	}
1726 
1727 	umtxq_lock(&uq->uq_key);
1728 	umtx_pi_unref(pi);
1729 	umtxq_unlock(&uq->uq_key);
1730 
1731 	umtx_key_release(&uq->uq_key);
1732 	return (error);
1733 }
1734 
1735 /*
1736  * Unlock a PI mutex.
1737  */
1738 static int
1739 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1740 {
1741 	struct umtx_key key;
1742 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1743 	struct umtx_pi *pi, *pi2;
1744 	uint32_t owner, old, id;
1745 	int error;
1746 	int count;
1747 	int pri;
1748 
1749 	id = td->td_tid;
1750 	/*
1751 	 * Make sure we own this mtx.
1752 	 */
1753 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1754 	if (owner == -1)
1755 		return (EFAULT);
1756 
1757 	if ((owner & ~UMUTEX_CONTESTED) != id)
1758 		return (EPERM);
1759 
1760 	/* This should be done in userland */
1761 	if ((owner & UMUTEX_CONTESTED) == 0) {
1762 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1763 		if (old == -1)
1764 			return (EFAULT);
1765 		if (old == owner)
1766 			return (0);
1767 		owner = old;
1768 	}
1769 
1770 	/* We should only ever be in here for contested locks */
1771 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1772 	    &key)) != 0)
1773 		return (error);
1774 
1775 	umtxq_lock(&key);
1776 	umtxq_busy(&key);
1777 	count = umtxq_count_pi(&key, &uq_first);
1778 	if (uq_first != NULL) {
1779 		pi = uq_first->uq_pi_blocked;
1780 		if (pi->pi_owner != curthread) {
1781 			umtxq_unbusy(&key);
1782 			umtxq_unlock(&key);
1783 			/* userland messed the mutex */
1784 			return (EPERM);
1785 		}
1786 		uq_me = curthread->td_umtxq;
1787 		mtx_lock_spin(&umtx_lock);
1788 		pi->pi_owner = NULL;
1789 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1790 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1791 		pri = PRI_MAX;
1792 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1793 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1794 			if (uq_first2 != NULL) {
1795 				if (pri > UPRI(uq_first2->uq_thread))
1796 					pri = UPRI(uq_first2->uq_thread);
1797 			}
1798 		}
1799 		thread_lock(curthread);
1800 		sched_unlend_user_prio(curthread, pri);
1801 		thread_unlock(curthread);
1802 		mtx_unlock_spin(&umtx_lock);
1803 	}
1804 	umtxq_unlock(&key);
1805 
1806 	/*
1807 	 * When unlocking the umtx, it must be marked as unowned if
1808 	 * there is zero or one thread only waiting for it.
1809 	 * Otherwise, it must be marked as contested.
1810 	 */
1811 	old = casuword32(&m->m_owner, owner,
1812 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1813 
1814 	umtxq_lock(&key);
1815 	if (uq_first != NULL)
1816 		umtxq_signal_thread(uq_first);
1817 	umtxq_unbusy(&key);
1818 	umtxq_unlock(&key);
1819 	umtx_key_release(&key);
1820 	if (old == -1)
1821 		return (EFAULT);
1822 	if (old != owner)
1823 		return (EINVAL);
1824 	return (0);
1825 }
1826 
1827 /*
1828  * Lock a PP mutex.
1829  */
1830 static int
1831 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1832 	int try)
1833 {
1834 	struct umtx_q *uq, *uq2;
1835 	struct umtx_pi *pi;
1836 	uint32_t ceiling;
1837 	uint32_t owner, id;
1838 	int error, pri, old_inherited_pri, su;
1839 
1840 	id = td->td_tid;
1841 	uq = td->td_umtxq;
1842 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1843 	    &uq->uq_key)) != 0)
1844 		return (error);
1845 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1846 	for (;;) {
1847 		old_inherited_pri = uq->uq_inherited_pri;
1848 		umtxq_lock(&uq->uq_key);
1849 		umtxq_busy(&uq->uq_key);
1850 		umtxq_unlock(&uq->uq_key);
1851 
1852 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1853 		if (ceiling > RTP_PRIO_MAX) {
1854 			error = EINVAL;
1855 			goto out;
1856 		}
1857 
1858 		mtx_lock_spin(&umtx_lock);
1859 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1860 			mtx_unlock_spin(&umtx_lock);
1861 			error = EINVAL;
1862 			goto out;
1863 		}
1864 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1865 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1866 			thread_lock(td);
1867 			if (uq->uq_inherited_pri < UPRI(td))
1868 				sched_lend_user_prio(td, uq->uq_inherited_pri);
1869 			thread_unlock(td);
1870 		}
1871 		mtx_unlock_spin(&umtx_lock);
1872 
1873 		owner = casuword32(&m->m_owner,
1874 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1875 
1876 		if (owner == UMUTEX_CONTESTED) {
1877 			error = 0;
1878 			break;
1879 		}
1880 
1881 		/* The address was invalid. */
1882 		if (owner == -1) {
1883 			error = EFAULT;
1884 			break;
1885 		}
1886 
1887 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1888 		    (owner & ~UMUTEX_CONTESTED) == id) {
1889 			error = EDEADLK;
1890 			break;
1891 		}
1892 
1893 		if (try != 0) {
1894 			error = EBUSY;
1895 			break;
1896 		}
1897 
1898 		/*
1899 		 * If we caught a signal, we have retried and now
1900 		 * exit immediately.
1901 		 */
1902 		if (error != 0)
1903 			break;
1904 
1905 		umtxq_lock(&uq->uq_key);
1906 		umtxq_insert(uq);
1907 		umtxq_unbusy(&uq->uq_key);
1908 		error = umtxq_sleep(uq, "umtxpp", timo);
1909 		umtxq_remove(uq);
1910 		umtxq_unlock(&uq->uq_key);
1911 
1912 		mtx_lock_spin(&umtx_lock);
1913 		uq->uq_inherited_pri = old_inherited_pri;
1914 		pri = PRI_MAX;
1915 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1916 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1917 			if (uq2 != NULL) {
1918 				if (pri > UPRI(uq2->uq_thread))
1919 					pri = UPRI(uq2->uq_thread);
1920 			}
1921 		}
1922 		if (pri > uq->uq_inherited_pri)
1923 			pri = uq->uq_inherited_pri;
1924 		thread_lock(td);
1925 		sched_unlend_user_prio(td, pri);
1926 		thread_unlock(td);
1927 		mtx_unlock_spin(&umtx_lock);
1928 	}
1929 
1930 	if (error != 0) {
1931 		mtx_lock_spin(&umtx_lock);
1932 		uq->uq_inherited_pri = old_inherited_pri;
1933 		pri = PRI_MAX;
1934 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1935 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1936 			if (uq2 != NULL) {
1937 				if (pri > UPRI(uq2->uq_thread))
1938 					pri = UPRI(uq2->uq_thread);
1939 			}
1940 		}
1941 		if (pri > uq->uq_inherited_pri)
1942 			pri = uq->uq_inherited_pri;
1943 		thread_lock(td);
1944 		sched_unlend_user_prio(td, pri);
1945 		thread_unlock(td);
1946 		mtx_unlock_spin(&umtx_lock);
1947 	}
1948 
1949 out:
1950 	umtxq_lock(&uq->uq_key);
1951 	umtxq_unbusy(&uq->uq_key);
1952 	umtxq_unlock(&uq->uq_key);
1953 	umtx_key_release(&uq->uq_key);
1954 	return (error);
1955 }
1956 
1957 /*
1958  * Unlock a PP mutex.
1959  */
1960 static int
1961 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1962 {
1963 	struct umtx_key key;
1964 	struct umtx_q *uq, *uq2;
1965 	struct umtx_pi *pi;
1966 	uint32_t owner, id;
1967 	uint32_t rceiling;
1968 	int error, pri, new_inherited_pri, su;
1969 
1970 	id = td->td_tid;
1971 	uq = td->td_umtxq;
1972 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1973 
1974 	/*
1975 	 * Make sure we own this mtx.
1976 	 */
1977 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1978 	if (owner == -1)
1979 		return (EFAULT);
1980 
1981 	if ((owner & ~UMUTEX_CONTESTED) != id)
1982 		return (EPERM);
1983 
1984 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1985 	if (error != 0)
1986 		return (error);
1987 
1988 	if (rceiling == -1)
1989 		new_inherited_pri = PRI_MAX;
1990 	else {
1991 		rceiling = RTP_PRIO_MAX - rceiling;
1992 		if (rceiling > RTP_PRIO_MAX)
1993 			return (EINVAL);
1994 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1995 	}
1996 
1997 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1998 	    &key)) != 0)
1999 		return (error);
2000 	umtxq_lock(&key);
2001 	umtxq_busy(&key);
2002 	umtxq_unlock(&key);
2003 	/*
2004 	 * For priority protected mutex, always set unlocked state
2005 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2006 	 * to lock the mutex, it is necessary because thread priority
2007 	 * has to be adjusted for such mutex.
2008 	 */
2009 	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2010 		UMUTEX_CONTESTED);
2011 
2012 	umtxq_lock(&key);
2013 	if (error == 0)
2014 		umtxq_signal(&key, 1);
2015 	umtxq_unbusy(&key);
2016 	umtxq_unlock(&key);
2017 
2018 	if (error == -1)
2019 		error = EFAULT;
2020 	else {
2021 		mtx_lock_spin(&umtx_lock);
2022 		if (su != 0)
2023 			uq->uq_inherited_pri = new_inherited_pri;
2024 		pri = PRI_MAX;
2025 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2026 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2027 			if (uq2 != NULL) {
2028 				if (pri > UPRI(uq2->uq_thread))
2029 					pri = UPRI(uq2->uq_thread);
2030 			}
2031 		}
2032 		if (pri > uq->uq_inherited_pri)
2033 			pri = uq->uq_inherited_pri;
2034 		thread_lock(td);
2035 		sched_unlend_user_prio(td, pri);
2036 		thread_unlock(td);
2037 		mtx_unlock_spin(&umtx_lock);
2038 	}
2039 	umtx_key_release(&key);
2040 	return (error);
2041 }
2042 
2043 static int
2044 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2045 	uint32_t *old_ceiling)
2046 {
2047 	struct umtx_q *uq;
2048 	uint32_t save_ceiling;
2049 	uint32_t owner, id;
2050 	uint32_t flags;
2051 	int error;
2052 
2053 	flags = fuword32(&m->m_flags);
2054 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2055 		return (EINVAL);
2056 	if (ceiling > RTP_PRIO_MAX)
2057 		return (EINVAL);
2058 	id = td->td_tid;
2059 	uq = td->td_umtxq;
2060 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2061 	   &uq->uq_key)) != 0)
2062 		return (error);
2063 	for (;;) {
2064 		umtxq_lock(&uq->uq_key);
2065 		umtxq_busy(&uq->uq_key);
2066 		umtxq_unlock(&uq->uq_key);
2067 
2068 		save_ceiling = fuword32(&m->m_ceilings[0]);
2069 
2070 		owner = casuword32(&m->m_owner,
2071 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2072 
2073 		if (owner == UMUTEX_CONTESTED) {
2074 			suword32(&m->m_ceilings[0], ceiling);
2075 			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2076 				UMUTEX_CONTESTED);
2077 			error = 0;
2078 			break;
2079 		}
2080 
2081 		/* The address was invalid. */
2082 		if (owner == -1) {
2083 			error = EFAULT;
2084 			break;
2085 		}
2086 
2087 		if ((owner & ~UMUTEX_CONTESTED) == id) {
2088 			suword32(&m->m_ceilings[0], ceiling);
2089 			error = 0;
2090 			break;
2091 		}
2092 
2093 		/*
2094 		 * If we caught a signal, we have retried and now
2095 		 * exit immediately.
2096 		 */
2097 		if (error != 0)
2098 			break;
2099 
2100 		/*
2101 		 * We set the contested bit, sleep. Otherwise the lock changed
2102 		 * and we need to retry or we lost a race to the thread
2103 		 * unlocking the umtx.
2104 		 */
2105 		umtxq_lock(&uq->uq_key);
2106 		umtxq_insert(uq);
2107 		umtxq_unbusy(&uq->uq_key);
2108 		error = umtxq_sleep(uq, "umtxpp", 0);
2109 		umtxq_remove(uq);
2110 		umtxq_unlock(&uq->uq_key);
2111 	}
2112 	umtxq_lock(&uq->uq_key);
2113 	if (error == 0)
2114 		umtxq_signal(&uq->uq_key, INT_MAX);
2115 	umtxq_unbusy(&uq->uq_key);
2116 	umtxq_unlock(&uq->uq_key);
2117 	umtx_key_release(&uq->uq_key);
2118 	if (error == 0 && old_ceiling != NULL)
2119 		suword32(old_ceiling, save_ceiling);
2120 	return (error);
2121 }
2122 
2123 static int
2124 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2125 	int try)
2126 {
2127 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2128 	case 0:
2129 		return (_do_lock_normal(td, m, flags, timo, try));
2130 	case UMUTEX_PRIO_INHERIT:
2131 		return (_do_lock_pi(td, m, flags, timo, try));
2132 	case UMUTEX_PRIO_PROTECT:
2133 		return (_do_lock_pp(td, m, flags, timo, try));
2134 	}
2135 	return (EINVAL);
2136 }
2137 
2138 /*
2139  * Lock a userland POSIX mutex.
2140  */
2141 static int
2142 do_lock_umutex(struct thread *td, struct umutex *m,
2143 	struct timespec *timeout, int try)
2144 {
2145 	struct timespec ts, ts2, ts3;
2146 	struct timeval tv;
2147 	uint32_t flags;
2148 	int error;
2149 
2150 	flags = fuword32(&m->m_flags);
2151 	if (flags == -1)
2152 		return (EFAULT);
2153 
2154 	if (timeout == NULL) {
2155 		error = _do_lock_umutex(td, m, flags, 0, try);
2156 		/* Mutex locking is restarted if it is interrupted. */
2157 		if (error == EINTR)
2158 			error = ERESTART;
2159 	} else {
2160 		getnanouptime(&ts);
2161 		timespecadd(&ts, timeout);
2162 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2163 		for (;;) {
2164 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2165 			if (error != ETIMEDOUT)
2166 				break;
2167 			getnanouptime(&ts2);
2168 			if (timespeccmp(&ts2, &ts, >=)) {
2169 				error = ETIMEDOUT;
2170 				break;
2171 			}
2172 			ts3 = ts;
2173 			timespecsub(&ts3, &ts2);
2174 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2175 		}
2176 		/* Timed-locking is not restarted. */
2177 		if (error == ERESTART)
2178 			error = EINTR;
2179 	}
2180 	return (error);
2181 }
2182 
2183 /*
2184  * Unlock a userland POSIX mutex.
2185  */
2186 static int
2187 do_unlock_umutex(struct thread *td, struct umutex *m)
2188 {
2189 	uint32_t flags;
2190 
2191 	flags = fuword32(&m->m_flags);
2192 	if (flags == -1)
2193 		return (EFAULT);
2194 
2195 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2196 	case 0:
2197 		return (do_unlock_normal(td, m, flags));
2198 	case UMUTEX_PRIO_INHERIT:
2199 		return (do_unlock_pi(td, m, flags));
2200 	case UMUTEX_PRIO_PROTECT:
2201 		return (do_unlock_pp(td, m, flags));
2202 	}
2203 
2204 	return (EINVAL);
2205 }
2206 
2207 static int
2208 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2209 	struct timespec *timeout, u_long wflags)
2210 {
2211 	struct umtx_q *uq;
2212 	struct timeval tv;
2213 	struct timespec cts, ets, tts;
2214 	uint32_t flags;
2215 	int error;
2216 
2217 	uq = td->td_umtxq;
2218 	flags = fuword32(&cv->c_flags);
2219 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2220 	if (error != 0)
2221 		return (error);
2222 	umtxq_lock(&uq->uq_key);
2223 	umtxq_busy(&uq->uq_key);
2224 	umtxq_insert(uq);
2225 	umtxq_unlock(&uq->uq_key);
2226 
2227 	/*
2228 	 * The magic thing is we should set c_has_waiters to 1 before
2229 	 * releasing user mutex.
2230 	 */
2231 	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2232 
2233 	umtxq_lock(&uq->uq_key);
2234 	umtxq_unbusy(&uq->uq_key);
2235 	umtxq_unlock(&uq->uq_key);
2236 
2237 	error = do_unlock_umutex(td, m);
2238 
2239 	umtxq_lock(&uq->uq_key);
2240 	if (error == 0) {
2241 		if ((wflags & UMTX_CHECK_UNPARKING) &&
2242 		    (td->td_pflags & TDP_WAKEUP)) {
2243 			td->td_pflags &= ~TDP_WAKEUP;
2244 			error = EINTR;
2245 		} else if (timeout == NULL) {
2246 			error = umtxq_sleep(uq, "ucond", 0);
2247 		} else {
2248 			getnanouptime(&ets);
2249 			timespecadd(&ets, timeout);
2250 			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2251 			for (;;) {
2252 				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2253 				if (error != ETIMEDOUT)
2254 					break;
2255 				getnanouptime(&cts);
2256 				if (timespeccmp(&cts, &ets, >=)) {
2257 					error = ETIMEDOUT;
2258 					break;
2259 				}
2260 				tts = ets;
2261 				timespecsub(&tts, &cts);
2262 				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2263 			}
2264 		}
2265 	}
2266 
2267 	if (error != 0) {
2268 		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2269 			/*
2270 			 * If we concurrently got do_cv_signal()d
2271 			 * and we got an error or UNIX signals or a timeout,
2272 			 * then, perform another umtxq_signal to avoid
2273 			 * consuming the wakeup. This may cause supurious
2274 			 * wakeup for another thread which was just queued,
2275 			 * but SUSV3 explicitly allows supurious wakeup to
2276 			 * occur, and indeed a kernel based implementation
2277 			 * can not avoid it.
2278 			 */
2279 			if (!umtxq_signal(&uq->uq_key, 1))
2280 				error = 0;
2281 		}
2282 		if (error == ERESTART)
2283 			error = EINTR;
2284 	}
2285 	umtxq_remove(uq);
2286 	umtxq_unlock(&uq->uq_key);
2287 	umtx_key_release(&uq->uq_key);
2288 	return (error);
2289 }
2290 
2291 /*
2292  * Signal a userland condition variable.
2293  */
2294 static int
2295 do_cv_signal(struct thread *td, struct ucond *cv)
2296 {
2297 	struct umtx_key key;
2298 	int error, cnt, nwake;
2299 	uint32_t flags;
2300 
2301 	flags = fuword32(&cv->c_flags);
2302 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2303 		return (error);
2304 	umtxq_lock(&key);
2305 	umtxq_busy(&key);
2306 	cnt = umtxq_count(&key);
2307 	nwake = umtxq_signal(&key, 1);
2308 	if (cnt <= nwake) {
2309 		umtxq_unlock(&key);
2310 		error = suword32(
2311 		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2312 		umtxq_lock(&key);
2313 	}
2314 	umtxq_unbusy(&key);
2315 	umtxq_unlock(&key);
2316 	umtx_key_release(&key);
2317 	return (error);
2318 }
2319 
2320 static int
2321 do_cv_broadcast(struct thread *td, struct ucond *cv)
2322 {
2323 	struct umtx_key key;
2324 	int error;
2325 	uint32_t flags;
2326 
2327 	flags = fuword32(&cv->c_flags);
2328 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2329 		return (error);
2330 
2331 	umtxq_lock(&key);
2332 	umtxq_busy(&key);
2333 	umtxq_signal(&key, INT_MAX);
2334 	umtxq_unlock(&key);
2335 
2336 	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2337 
2338 	umtxq_lock(&key);
2339 	umtxq_unbusy(&key);
2340 	umtxq_unlock(&key);
2341 
2342 	umtx_key_release(&key);
2343 	return (error);
2344 }
2345 
2346 int
2347 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2348     /* struct umtx *umtx */
2349 {
2350 	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2351 }
2352 
2353 int
2354 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2355     /* struct umtx *umtx */
2356 {
2357 	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2358 }
2359 
2360 static int
2361 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2362 {
2363 	struct timespec *ts, timeout;
2364 	int error;
2365 
2366 	/* Allow a null timespec (wait forever). */
2367 	if (uap->uaddr2 == NULL)
2368 		ts = NULL;
2369 	else {
2370 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2371 		if (error != 0)
2372 			return (error);
2373 		if (timeout.tv_nsec >= 1000000000 ||
2374 		    timeout.tv_nsec < 0) {
2375 			return (EINVAL);
2376 		}
2377 		ts = &timeout;
2378 	}
2379 	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2380 }
2381 
2382 static int
2383 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2384 {
2385 	return (do_unlock_umtx(td, uap->obj, uap->val));
2386 }
2387 
2388 static int
2389 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2390 {
2391 	struct timespec *ts, timeout;
2392 	int error;
2393 
2394 	if (uap->uaddr2 == NULL)
2395 		ts = NULL;
2396 	else {
2397 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2398 		if (error != 0)
2399 			return (error);
2400 		if (timeout.tv_nsec >= 1000000000 ||
2401 		    timeout.tv_nsec < 0)
2402 			return (EINVAL);
2403 		ts = &timeout;
2404 	}
2405 	return do_wait(td, uap->obj, uap->val, ts, 0);
2406 }
2407 
2408 static int
2409 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2410 {
2411 	struct timespec *ts, timeout;
2412 	int error;
2413 
2414 	if (uap->uaddr2 == NULL)
2415 		ts = NULL;
2416 	else {
2417 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2418 		if (error != 0)
2419 			return (error);
2420 		if (timeout.tv_nsec >= 1000000000 ||
2421 		    timeout.tv_nsec < 0)
2422 			return (EINVAL);
2423 		ts = &timeout;
2424 	}
2425 	return do_wait(td, uap->obj, uap->val, ts, 1);
2426 }
2427 
2428 static int
2429 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2430 {
2431 	return (kern_umtx_wake(td, uap->obj, uap->val));
2432 }
2433 
2434 static int
2435 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2436 {
2437 	struct timespec *ts, timeout;
2438 	int error;
2439 
2440 	/* Allow a null timespec (wait forever). */
2441 	if (uap->uaddr2 == NULL)
2442 		ts = NULL;
2443 	else {
2444 		error = copyin(uap->uaddr2, &timeout,
2445 		    sizeof(timeout));
2446 		if (error != 0)
2447 			return (error);
2448 		if (timeout.tv_nsec >= 1000000000 ||
2449 		    timeout.tv_nsec < 0) {
2450 			return (EINVAL);
2451 		}
2452 		ts = &timeout;
2453 	}
2454 	return do_lock_umutex(td, uap->obj, ts, 0);
2455 }
2456 
2457 static int
2458 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2459 {
2460 	return do_lock_umutex(td, uap->obj, NULL, 1);
2461 }
2462 
2463 static int
2464 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2465 {
2466 	return do_unlock_umutex(td, uap->obj);
2467 }
2468 
2469 static int
2470 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2471 {
2472 	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2473 }
2474 
2475 static int
2476 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2477 {
2478 	struct timespec *ts, timeout;
2479 	int error;
2480 
2481 	/* Allow a null timespec (wait forever). */
2482 	if (uap->uaddr2 == NULL)
2483 		ts = NULL;
2484 	else {
2485 		error = copyin(uap->uaddr2, &timeout,
2486 		    sizeof(timeout));
2487 		if (error != 0)
2488 			return (error);
2489 		if (timeout.tv_nsec >= 1000000000 ||
2490 		    timeout.tv_nsec < 0) {
2491 			return (EINVAL);
2492 		}
2493 		ts = &timeout;
2494 	}
2495 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2496 }
2497 
2498 static int
2499 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2500 {
2501 	return do_cv_signal(td, uap->obj);
2502 }
2503 
2504 static int
2505 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2506 {
2507 	return do_cv_broadcast(td, uap->obj);
2508 }
2509 
2510 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2511 
2512 static _umtx_op_func op_table[] = {
2513 	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2514 	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2515 	__umtx_op_wait,			/* UMTX_OP_WAIT */
2516 	__umtx_op_wake,			/* UMTX_OP_WAKE */
2517 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2518 	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2519 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2520 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2521 	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
2522 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2523 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
2524 	__umtx_op_wait_uint		/* UMTX_OP_WAIT_UINT */
2525 };
2526 
2527 int
2528 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
2529 {
2530 	if ((unsigned)uap->op < UMTX_OP_MAX)
2531 		return (*op_table[uap->op])(td, uap);
2532 	return (EINVAL);
2533 }
2534 
2535 #ifdef COMPAT_IA32
2536 int
2537 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2538     /* struct umtx *umtx */
2539 {
2540 	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2541 }
2542 
2543 int
2544 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2545     /* struct umtx *umtx */
2546 {
2547 	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2548 }
2549 
2550 struct timespec32 {
2551 	u_int32_t tv_sec;
2552 	u_int32_t tv_nsec;
2553 };
2554 
2555 static inline int
2556 copyin_timeout32(void *addr, struct timespec *tsp)
2557 {
2558 	struct timespec32 ts32;
2559 	int error;
2560 
2561 	error = copyin(addr, &ts32, sizeof(struct timespec32));
2562 	if (error == 0) {
2563 		tsp->tv_sec = ts32.tv_sec;
2564 		tsp->tv_nsec = ts32.tv_nsec;
2565 	}
2566 	return (error);
2567 }
2568 
2569 static int
2570 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2571 {
2572 	struct timespec *ts, timeout;
2573 	int error;
2574 
2575 	/* Allow a null timespec (wait forever). */
2576 	if (uap->uaddr2 == NULL)
2577 		ts = NULL;
2578 	else {
2579 		error = copyin_timeout32(uap->uaddr2, &timeout);
2580 		if (error != 0)
2581 			return (error);
2582 		if (timeout.tv_nsec >= 1000000000 ||
2583 		    timeout.tv_nsec < 0) {
2584 			return (EINVAL);
2585 		}
2586 		ts = &timeout;
2587 	}
2588 	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2589 }
2590 
2591 static int
2592 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2593 {
2594 	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2595 }
2596 
2597 static int
2598 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2599 {
2600 	struct timespec *ts, timeout;
2601 	int error;
2602 
2603 	if (uap->uaddr2 == NULL)
2604 		ts = NULL;
2605 	else {
2606 		error = copyin_timeout32(uap->uaddr2, &timeout);
2607 		if (error != 0)
2608 			return (error);
2609 		if (timeout.tv_nsec >= 1000000000 ||
2610 		    timeout.tv_nsec < 0)
2611 			return (EINVAL);
2612 		ts = &timeout;
2613 	}
2614 	return do_wait(td, uap->obj, uap->val, ts, 1);
2615 }
2616 
2617 static int
2618 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2619 {
2620 	struct timespec *ts, timeout;
2621 	int error;
2622 
2623 	/* Allow a null timespec (wait forever). */
2624 	if (uap->uaddr2 == NULL)
2625 		ts = NULL;
2626 	else {
2627 		error = copyin_timeout32(uap->uaddr2, &timeout);
2628 		if (error != 0)
2629 			return (error);
2630 		if (timeout.tv_nsec >= 1000000000 ||
2631 		    timeout.tv_nsec < 0)
2632 			return (EINVAL);
2633 		ts = &timeout;
2634 	}
2635 	return do_lock_umutex(td, uap->obj, ts, 0);
2636 }
2637 
2638 static int
2639 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2640 {
2641 	struct timespec *ts, timeout;
2642 	int error;
2643 
2644 	/* Allow a null timespec (wait forever). */
2645 	if (uap->uaddr2 == NULL)
2646 		ts = NULL;
2647 	else {
2648 		error = copyin_timeout32(uap->uaddr2, &timeout);
2649 		if (error != 0)
2650 			return (error);
2651 		if (timeout.tv_nsec >= 1000000000 ||
2652 		    timeout.tv_nsec < 0)
2653 			return (EINVAL);
2654 		ts = &timeout;
2655 	}
2656 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2657 }
2658 
2659 static _umtx_op_func op_table_compat32[] = {
2660 	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
2661 	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
2662 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
2663 	__umtx_op_wake,			/* UMTX_OP_WAKE */
2664 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
2665 	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
2666 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
2667 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2668 	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
2669 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2670 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
2671 	__umtx_op_wait_compat32		/* UMTX_OP_WAIT_UINT */
2672 };
2673 
2674 int
2675 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2676 {
2677 	if ((unsigned)uap->op < UMTX_OP_MAX)
2678 		return (*op_table_compat32[uap->op])(td,
2679 			(struct _umtx_op_args *)uap);
2680 	return (EINVAL);
2681 }
2682 #endif
2683 
2684 void
2685 umtx_thread_init(struct thread *td)
2686 {
2687 	td->td_umtxq = umtxq_alloc();
2688 	td->td_umtxq->uq_thread = td;
2689 }
2690 
2691 void
2692 umtx_thread_fini(struct thread *td)
2693 {
2694 	umtxq_free(td->td_umtxq);
2695 }
2696 
2697 /*
2698  * It will be called when new thread is created, e.g fork().
2699  */
2700 void
2701 umtx_thread_alloc(struct thread *td)
2702 {
2703 	struct umtx_q *uq;
2704 
2705 	uq = td->td_umtxq;
2706 	uq->uq_inherited_pri = PRI_MAX;
2707 
2708 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2709 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2710 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2711 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2712 }
2713 
2714 /*
2715  * exec() hook.
2716  */
2717 static void
2718 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2719 	struct image_params *imgp __unused)
2720 {
2721 	umtx_thread_cleanup(curthread);
2722 }
2723 
2724 /*
2725  * thread_exit() hook.
2726  */
2727 void
2728 umtx_thread_exit(struct thread *td)
2729 {
2730 	umtx_thread_cleanup(td);
2731 }
2732 
2733 /*
2734  * clean up umtx data.
2735  */
2736 static void
2737 umtx_thread_cleanup(struct thread *td)
2738 {
2739 	struct umtx_q *uq;
2740 	struct umtx_pi *pi;
2741 
2742 	if ((uq = td->td_umtxq) == NULL)
2743 		return;
2744 
2745 	mtx_lock_spin(&umtx_lock);
2746 	uq->uq_inherited_pri = PRI_MAX;
2747 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2748 		pi->pi_owner = NULL;
2749 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2750 	}
2751 	thread_lock(td);
2752 	td->td_flags &= ~TDF_UBORROWING;
2753 	thread_unlock(td);
2754 	mtx_unlock_spin(&umtx_lock);
2755 }
2756