xref: /freebsd/sys/kern/kern_umtx.c (revision 35a04710d7286aa9538917fd7f8e417dbee95b82)
1 /*-
2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54 
55 #include <machine/cpu.h>
56 
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60 
61 #define TYPE_SIMPLE_LOCK	0
62 #define TYPE_SIMPLE_WAIT	1
63 #define TYPE_NORMAL_UMUTEX	2
64 #define TYPE_PI_UMUTEX		3
65 #define TYPE_PP_UMUTEX		4
66 #define TYPE_CV			5
67 
68 /* Key to represent a unique userland synchronous object */
69 struct umtx_key {
70 	int	hash;
71 	int	type;
72 	int	shared;
73 	union {
74 		struct {
75 			vm_object_t	object;
76 			uintptr_t	offset;
77 		} shared;
78 		struct {
79 			struct vmspace	*vs;
80 			uintptr_t	addr;
81 		} private;
82 		struct {
83 			void		*a;
84 			uintptr_t	b;
85 		} both;
86 	} info;
87 };
88 
89 /* Priority inheritance mutex info. */
90 struct umtx_pi {
91 	/* Owner thread */
92 	struct thread		*pi_owner;
93 
94 	/* Reference count */
95 	int			pi_refcount;
96 
97  	/* List entry to link umtx holding by thread */
98 	TAILQ_ENTRY(umtx_pi)	pi_link;
99 
100 	/* List entry in hash */
101 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
102 
103 	/* List for waiters */
104 	TAILQ_HEAD(,umtx_q)	pi_blocked;
105 
106 	/* Identify a userland lock object */
107 	struct umtx_key		pi_key;
108 };
109 
110 /* A userland synchronous object user. */
111 struct umtx_q {
112 	/* Linked list for the hash. */
113 	TAILQ_ENTRY(umtx_q)	uq_link;
114 
115 	/* Umtx key. */
116 	struct umtx_key		uq_key;
117 
118 	/* Umtx flags. */
119 	int			uq_flags;
120 #define UQF_UMTXQ	0x0001
121 
122 	/* The thread waits on. */
123 	struct thread		*uq_thread;
124 
125 	/*
126 	 * Blocked on PI mutex. read can use chain lock
127 	 * or umtx_lock, write must have both chain lock and
128 	 * umtx_lock being hold.
129 	 */
130 	struct umtx_pi		*uq_pi_blocked;
131 
132 	/* On blocked list */
133 	TAILQ_ENTRY(umtx_q)	uq_lockq;
134 
135 	/* Thread contending with us */
136 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
137 
138 	/* Inherited priority from PP mutex */
139 	u_char			uq_inherited_pri;
140 };
141 
142 TAILQ_HEAD(umtxq_head, umtx_q);
143 
144 /* Userland lock object's wait-queue chain */
145 struct umtxq_chain {
146 	/* Lock for this chain. */
147 	struct mtx		uc_lock;
148 
149 	/* List of sleep queues. */
150 	struct umtxq_head	uc_queue;
151 
152 	/* Busy flag */
153 	char			uc_busy;
154 
155 	/* Chain lock waiters */
156 	int			uc_waiters;
157 
158 	/* All PI in the list */
159 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
160 };
161 
162 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
163 
164 /*
165  * Don't propagate time-sharing priority, there is a security reason,
166  * a user can simply introduce PI-mutex, let thread A lock the mutex,
167  * and let another thread B block on the mutex, because B is
168  * sleeping, its priority will be boosted, this causes A's priority to
169  * be boosted via priority propagating too and will never be lowered even
170  * if it is using 100%CPU, this is unfair to other processes.
171  */
172 
173 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
174 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
175 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
176 
177 #define	GOLDEN_RATIO_PRIME	2654404609U
178 #define	UMTX_CHAINS		128
179 #define	UMTX_SHIFTS		(__WORD_BIT - 7)
180 
181 #define THREAD_SHARE		0
182 #define PROCESS_SHARE		1
183 #define AUTO_SHARE		2
184 
185 #define	GET_SHARE(flags)	\
186     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187 
188 static uma_zone_t		umtx_pi_zone;
189 static struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
190 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
191 static int			umtx_pi_allocated;
192 
193 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
194 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
195     &umtx_pi_allocated, 0, "Allocated umtx_pi");
196 
197 static void umtxq_sysinit(void *);
198 static void umtxq_hash(struct umtx_key *key);
199 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
200 static void umtxq_lock(struct umtx_key *key);
201 static void umtxq_unlock(struct umtx_key *key);
202 static void umtxq_busy(struct umtx_key *key);
203 static void umtxq_unbusy(struct umtx_key *key);
204 static void umtxq_insert(struct umtx_q *uq);
205 static void umtxq_remove(struct umtx_q *uq);
206 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
207 static int umtxq_count(struct umtx_key *key);
208 static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
209 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
210 static int umtx_key_get(void *addr, int type, int share,
211 	struct umtx_key *key);
212 static void umtx_key_release(struct umtx_key *key);
213 static struct umtx_pi *umtx_pi_alloc(int);
214 static void umtx_pi_free(struct umtx_pi *pi);
215 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
216 static void umtx_thread_cleanup(struct thread *td);
217 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
218 	struct image_params *imgp __unused);
219 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
220 
221 static struct mtx umtx_lock;
222 
223 static void
224 umtxq_sysinit(void *arg __unused)
225 {
226 	int i;
227 
228 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
229 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
230 	for (i = 0; i < UMTX_CHAINS; ++i) {
231 		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
232 			 MTX_DEF | MTX_DUPOK);
233 		TAILQ_INIT(&umtxq_chains[i].uc_queue);
234 		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
235 		umtxq_chains[i].uc_busy = 0;
236 		umtxq_chains[i].uc_waiters = 0;
237 	}
238 	mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
239 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
240 	    EVENTHANDLER_PRI_ANY);
241 }
242 
243 struct umtx_q *
244 umtxq_alloc(void)
245 {
246 	struct umtx_q *uq;
247 
248 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
249 	TAILQ_INIT(&uq->uq_pi_contested);
250 	uq->uq_inherited_pri = PRI_MAX;
251 	return (uq);
252 }
253 
254 void
255 umtxq_free(struct umtx_q *uq)
256 {
257 	free(uq, M_UMTX);
258 }
259 
260 static inline void
261 umtxq_hash(struct umtx_key *key)
262 {
263 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
264 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
265 }
266 
267 static inline int
268 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
269 {
270 	return (k1->type == k2->type &&
271 		k1->info.both.a == k2->info.both.a &&
272 	        k1->info.both.b == k2->info.both.b);
273 }
274 
275 static inline struct umtxq_chain *
276 umtxq_getchain(struct umtx_key *key)
277 {
278 	return (&umtxq_chains[key->hash]);
279 }
280 
281 /*
282  * Set chain to busy state when following operation
283  * may be blocked (kernel mutex can not be used).
284  */
285 static inline void
286 umtxq_busy(struct umtx_key *key)
287 {
288 	struct umtxq_chain *uc;
289 
290 	uc = umtxq_getchain(key);
291 	mtx_assert(&uc->uc_lock, MA_OWNED);
292 	while (uc->uc_busy != 0) {
293 		uc->uc_waiters++;
294 		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
295 		uc->uc_waiters--;
296 	}
297 	uc->uc_busy = 1;
298 }
299 
300 /*
301  * Unbusy a chain.
302  */
303 static inline void
304 umtxq_unbusy(struct umtx_key *key)
305 {
306 	struct umtxq_chain *uc;
307 
308 	uc = umtxq_getchain(key);
309 	mtx_assert(&uc->uc_lock, MA_OWNED);
310 	KASSERT(uc->uc_busy != 0, ("not busy"));
311 	uc->uc_busy = 0;
312 	if (uc->uc_waiters)
313 		wakeup_one(uc);
314 }
315 
316 /*
317  * Lock a chain.
318  */
319 static inline void
320 umtxq_lock(struct umtx_key *key)
321 {
322 	struct umtxq_chain *uc;
323 
324 	uc = umtxq_getchain(key);
325 	mtx_lock(&uc->uc_lock);
326 }
327 
328 /*
329  * Unlock a chain.
330  */
331 static inline void
332 umtxq_unlock(struct umtx_key *key)
333 {
334 	struct umtxq_chain *uc;
335 
336 	uc = umtxq_getchain(key);
337 	mtx_unlock(&uc->uc_lock);
338 }
339 
340 /*
341  * Insert a thread onto the umtx queue.
342  */
343 static inline void
344 umtxq_insert(struct umtx_q *uq)
345 {
346 	struct umtxq_chain *uc;
347 
348 	uc = umtxq_getchain(&uq->uq_key);
349 	UMTXQ_LOCKED_ASSERT(uc);
350 	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
351 	uq->uq_flags |= UQF_UMTXQ;
352 }
353 
354 /*
355  * Remove thread from the umtx queue.
356  */
357 static inline void
358 umtxq_remove(struct umtx_q *uq)
359 {
360 	struct umtxq_chain *uc;
361 
362 	uc = umtxq_getchain(&uq->uq_key);
363 	UMTXQ_LOCKED_ASSERT(uc);
364 	if (uq->uq_flags & UQF_UMTXQ) {
365 		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
366 		uq->uq_flags &= ~UQF_UMTXQ;
367 	}
368 }
369 
370 /*
371  * Check if there are multiple waiters
372  */
373 static int
374 umtxq_count(struct umtx_key *key)
375 {
376 	struct umtxq_chain *uc;
377 	struct umtx_q *uq;
378 	int count = 0;
379 
380 	uc = umtxq_getchain(key);
381 	UMTXQ_LOCKED_ASSERT(uc);
382 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
383 		if (umtx_key_match(&uq->uq_key, key)) {
384 			if (++count > 1)
385 				break;
386 		}
387 	}
388 	return (count);
389 }
390 
391 /*
392  * Check if there are multiple PI waiters and returns first
393  * waiter.
394  */
395 static int
396 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
397 {
398 	struct umtxq_chain *uc;
399 	struct umtx_q *uq;
400 	int count = 0;
401 
402 	*first = NULL;
403 	uc = umtxq_getchain(key);
404 	UMTXQ_LOCKED_ASSERT(uc);
405 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
406 		if (umtx_key_match(&uq->uq_key, key)) {
407 			if (++count > 1)
408 				break;
409 			*first = uq;
410 		}
411 	}
412 	return (count);
413 }
414 
415 /*
416  * Wake up threads waiting on an userland object.
417  */
418 static int
419 umtxq_signal(struct umtx_key *key, int n_wake)
420 {
421 	struct umtxq_chain *uc;
422 	struct umtx_q *uq, *next;
423 	int ret;
424 
425 	ret = 0;
426 	uc = umtxq_getchain(key);
427 	UMTXQ_LOCKED_ASSERT(uc);
428 	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
429 		if (umtx_key_match(&uq->uq_key, key)) {
430 			umtxq_remove(uq);
431 			wakeup(uq);
432 			if (++ret >= n_wake)
433 				break;
434 		}
435 	}
436 	return (ret);
437 }
438 
439 /*
440  * Wake up specified thread.
441  */
442 static inline void
443 umtxq_signal_thread(struct umtx_q *uq)
444 {
445 	struct umtxq_chain *uc;
446 
447 	uc = umtxq_getchain(&uq->uq_key);
448 	UMTXQ_LOCKED_ASSERT(uc);
449 	umtxq_remove(uq);
450 	wakeup(uq);
451 }
452 
453 /*
454  * Put thread into sleep state, before sleeping, check if
455  * thread was removed from umtx queue.
456  */
457 static inline int
458 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
459 {
460 	struct umtxq_chain *uc;
461 	int error;
462 
463 	uc = umtxq_getchain(&uq->uq_key);
464 	UMTXQ_LOCKED_ASSERT(uc);
465 	if (!(uq->uq_flags & UQF_UMTXQ))
466 		return (0);
467 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
468 	if (error == EWOULDBLOCK)
469 		error = ETIMEDOUT;
470 	return (error);
471 }
472 
473 /*
474  * Convert userspace address into unique logical address.
475  */
476 static int
477 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
478 {
479 	struct thread *td = curthread;
480 	vm_map_t map;
481 	vm_map_entry_t entry;
482 	vm_pindex_t pindex;
483 	vm_prot_t prot;
484 	boolean_t wired;
485 
486 	key->type = type;
487 	if (share == THREAD_SHARE) {
488 		key->shared = 0;
489 		key->info.private.vs = td->td_proc->p_vmspace;
490 		key->info.private.addr = (uintptr_t)addr;
491 	} else {
492 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
493 		map = &td->td_proc->p_vmspace->vm_map;
494 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
495 		    &entry, &key->info.shared.object, &pindex, &prot,
496 		    &wired) != KERN_SUCCESS) {
497 			return EFAULT;
498 		}
499 
500 		if ((share == PROCESS_SHARE) ||
501 		    (share == AUTO_SHARE &&
502 		     VM_INHERIT_SHARE == entry->inheritance)) {
503 			key->shared = 1;
504 			key->info.shared.offset = entry->offset + entry->start -
505 				(vm_offset_t)addr;
506 			vm_object_reference(key->info.shared.object);
507 		} else {
508 			key->shared = 0;
509 			key->info.private.vs = td->td_proc->p_vmspace;
510 			key->info.private.addr = (uintptr_t)addr;
511 		}
512 		vm_map_lookup_done(map, entry);
513 	}
514 
515 	umtxq_hash(key);
516 	return (0);
517 }
518 
519 /*
520  * Release key.
521  */
522 static inline void
523 umtx_key_release(struct umtx_key *key)
524 {
525 	if (key->shared)
526 		vm_object_deallocate(key->info.shared.object);
527 }
528 
529 /*
530  * Lock a umtx object.
531  */
532 static int
533 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
534 {
535 	struct umtx_q *uq;
536 	u_long owner;
537 	u_long old;
538 	int error = 0;
539 
540 	uq = td->td_umtxq;
541 
542 	/*
543 	 * Care must be exercised when dealing with umtx structure. It
544 	 * can fault on any access.
545 	 */
546 	for (;;) {
547 		/*
548 		 * Try the uncontested case.  This should be done in userland.
549 		 */
550 		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
551 
552 		/* The acquire succeeded. */
553 		if (owner == UMTX_UNOWNED)
554 			return (0);
555 
556 		/* The address was invalid. */
557 		if (owner == -1)
558 			return (EFAULT);
559 
560 		/* If no one owns it but it is contested try to acquire it. */
561 		if (owner == UMTX_CONTESTED) {
562 			owner = casuword(&umtx->u_owner,
563 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
564 
565 			if (owner == UMTX_CONTESTED)
566 				return (0);
567 
568 			/* The address was invalid. */
569 			if (owner == -1)
570 				return (EFAULT);
571 
572 			/* If this failed the lock has changed, restart. */
573 			continue;
574 		}
575 
576 		/*
577 		 * If we caught a signal, we have retried and now
578 		 * exit immediately.
579 		 */
580 		if (error != 0)
581 			return (error);
582 
583 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
584 			AUTO_SHARE, &uq->uq_key)) != 0)
585 			return (error);
586 
587 		umtxq_lock(&uq->uq_key);
588 		umtxq_busy(&uq->uq_key);
589 		umtxq_insert(uq);
590 		umtxq_unbusy(&uq->uq_key);
591 		umtxq_unlock(&uq->uq_key);
592 
593 		/*
594 		 * Set the contested bit so that a release in user space
595 		 * knows to use the system call for unlock.  If this fails
596 		 * either some one else has acquired the lock or it has been
597 		 * released.
598 		 */
599 		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
600 
601 		/* The address was invalid. */
602 		if (old == -1) {
603 			umtxq_lock(&uq->uq_key);
604 			umtxq_remove(uq);
605 			umtxq_unlock(&uq->uq_key);
606 			umtx_key_release(&uq->uq_key);
607 			return (EFAULT);
608 		}
609 
610 		/*
611 		 * We set the contested bit, sleep. Otherwise the lock changed
612 		 * and we need to retry or we lost a race to the thread
613 		 * unlocking the umtx.
614 		 */
615 		umtxq_lock(&uq->uq_key);
616 		if (old == owner)
617 			error = umtxq_sleep(uq, "umtx", timo);
618 		umtxq_remove(uq);
619 		umtxq_unlock(&uq->uq_key);
620 		umtx_key_release(&uq->uq_key);
621 	}
622 
623 	return (0);
624 }
625 
626 /*
627  * Lock a umtx object.
628  */
629 static int
630 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
631 	struct timespec *timeout)
632 {
633 	struct timespec ts, ts2, ts3;
634 	struct timeval tv;
635 	int error;
636 
637 	if (timeout == NULL) {
638 		error = _do_lock_umtx(td, umtx, id, 0);
639 		/* Mutex locking is restarted if it is interrupted. */
640 		if (error == EINTR)
641 			error = ERESTART;
642 	} else {
643 		getnanouptime(&ts);
644 		timespecadd(&ts, timeout);
645 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
646 		for (;;) {
647 			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
648 			if (error != ETIMEDOUT)
649 				break;
650 			getnanouptime(&ts2);
651 			if (timespeccmp(&ts2, &ts, >=)) {
652 				error = ETIMEDOUT;
653 				break;
654 			}
655 			ts3 = ts;
656 			timespecsub(&ts3, &ts2);
657 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
658 		}
659 		/* Timed-locking is not restarted. */
660 		if (error == ERESTART)
661 			error = EINTR;
662 	}
663 	return (error);
664 }
665 
666 /*
667  * Unlock a umtx object.
668  */
669 static int
670 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
671 {
672 	struct umtx_key key;
673 	u_long owner;
674 	u_long old;
675 	int error;
676 	int count;
677 
678 	/*
679 	 * Make sure we own this mtx.
680 	 */
681 	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
682 	if (owner == -1)
683 		return (EFAULT);
684 
685 	if ((owner & ~UMTX_CONTESTED) != id)
686 		return (EPERM);
687 
688 	/* This should be done in userland */
689 	if ((owner & UMTX_CONTESTED) == 0) {
690 		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
691 		if (old == -1)
692 			return (EFAULT);
693 		if (old == owner)
694 			return (0);
695 		owner = old;
696 	}
697 
698 	/* We should only ever be in here for contested locks */
699 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
700 		&key)) != 0)
701 		return (error);
702 
703 	umtxq_lock(&key);
704 	umtxq_busy(&key);
705 	count = umtxq_count(&key);
706 	umtxq_unlock(&key);
707 
708 	/*
709 	 * When unlocking the umtx, it must be marked as unowned if
710 	 * there is zero or one thread only waiting for it.
711 	 * Otherwise, it must be marked as contested.
712 	 */
713 	old = casuword(&umtx->u_owner, owner,
714 		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
715 	umtxq_lock(&key);
716 	umtxq_signal(&key,1);
717 	umtxq_unbusy(&key);
718 	umtxq_unlock(&key);
719 	umtx_key_release(&key);
720 	if (old == -1)
721 		return (EFAULT);
722 	if (old != owner)
723 		return (EINVAL);
724 	return (0);
725 }
726 
727 #ifdef COMPAT_IA32
728 
729 /*
730  * Lock a umtx object.
731  */
732 static int
733 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
734 {
735 	struct umtx_q *uq;
736 	uint32_t owner;
737 	uint32_t old;
738 	int error = 0;
739 
740 	uq = td->td_umtxq;
741 
742 	/*
743 	 * Care must be exercised when dealing with umtx structure. It
744 	 * can fault on any access.
745 	 */
746 	for (;;) {
747 		/*
748 		 * Try the uncontested case.  This should be done in userland.
749 		 */
750 		owner = casuword32(m, UMUTEX_UNOWNED, id);
751 
752 		/* The acquire succeeded. */
753 		if (owner == UMUTEX_UNOWNED)
754 			return (0);
755 
756 		/* The address was invalid. */
757 		if (owner == -1)
758 			return (EFAULT);
759 
760 		/* If no one owns it but it is contested try to acquire it. */
761 		if (owner == UMUTEX_CONTESTED) {
762 			owner = casuword32(m,
763 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
764 			if (owner == UMUTEX_CONTESTED)
765 				return (0);
766 
767 			/* The address was invalid. */
768 			if (owner == -1)
769 				return (EFAULT);
770 
771 			/* If this failed the lock has changed, restart. */
772 			continue;
773 		}
774 
775 		/*
776 		 * If we caught a signal, we have retried and now
777 		 * exit immediately.
778 		 */
779 		if (error != 0)
780 			return (error);
781 
782 		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
783 			AUTO_SHARE, &uq->uq_key)) != 0)
784 			return (error);
785 
786 		umtxq_lock(&uq->uq_key);
787 		umtxq_busy(&uq->uq_key);
788 		umtxq_insert(uq);
789 		umtxq_unbusy(&uq->uq_key);
790 		umtxq_unlock(&uq->uq_key);
791 
792 		/*
793 		 * Set the contested bit so that a release in user space
794 		 * knows to use the system call for unlock.  If this fails
795 		 * either some one else has acquired the lock or it has been
796 		 * released.
797 		 */
798 		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
799 
800 		/* The address was invalid. */
801 		if (old == -1) {
802 			umtxq_lock(&uq->uq_key);
803 			umtxq_remove(uq);
804 			umtxq_unlock(&uq->uq_key);
805 			umtx_key_release(&uq->uq_key);
806 			return (EFAULT);
807 		}
808 
809 		/*
810 		 * We set the contested bit, sleep. Otherwise the lock changed
811 		 * and we need to retry or we lost a race to the thread
812 		 * unlocking the umtx.
813 		 */
814 		umtxq_lock(&uq->uq_key);
815 		if (old == owner)
816 			error = umtxq_sleep(uq, "umtx", timo);
817 		umtxq_remove(uq);
818 		umtxq_unlock(&uq->uq_key);
819 		umtx_key_release(&uq->uq_key);
820 	}
821 
822 	return (0);
823 }
824 
825 /*
826  * Lock a umtx object.
827  */
828 static int
829 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
830 	struct timespec *timeout)
831 {
832 	struct timespec ts, ts2, ts3;
833 	struct timeval tv;
834 	int error;
835 
836 	if (timeout == NULL) {
837 		error = _do_lock_umtx32(td, m, id, 0);
838 		/* Mutex locking is restarted if it is interrupted. */
839 		if (error == EINTR)
840 			error = ERESTART;
841 	} else {
842 		getnanouptime(&ts);
843 		timespecadd(&ts, timeout);
844 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
845 		for (;;) {
846 			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
847 			if (error != ETIMEDOUT)
848 				break;
849 			getnanouptime(&ts2);
850 			if (timespeccmp(&ts2, &ts, >=)) {
851 				error = ETIMEDOUT;
852 				break;
853 			}
854 			ts3 = ts;
855 			timespecsub(&ts3, &ts2);
856 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
857 		}
858 		/* Timed-locking is not restarted. */
859 		if (error == ERESTART)
860 			error = EINTR;
861 	}
862 	return (error);
863 }
864 
865 /*
866  * Unlock a umtx object.
867  */
868 static int
869 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
870 {
871 	struct umtx_key key;
872 	uint32_t owner;
873 	uint32_t old;
874 	int error;
875 	int count;
876 
877 	/*
878 	 * Make sure we own this mtx.
879 	 */
880 	owner = fuword32(m);
881 	if (owner == -1)
882 		return (EFAULT);
883 
884 	if ((owner & ~UMUTEX_CONTESTED) != id)
885 		return (EPERM);
886 
887 	/* This should be done in userland */
888 	if ((owner & UMUTEX_CONTESTED) == 0) {
889 		old = casuword32(m, owner, UMUTEX_UNOWNED);
890 		if (old == -1)
891 			return (EFAULT);
892 		if (old == owner)
893 			return (0);
894 		owner = old;
895 	}
896 
897 	/* We should only ever be in here for contested locks */
898 	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
899 		&key)) != 0)
900 		return (error);
901 
902 	umtxq_lock(&key);
903 	umtxq_busy(&key);
904 	count = umtxq_count(&key);
905 	umtxq_unlock(&key);
906 
907 	/*
908 	 * When unlocking the umtx, it must be marked as unowned if
909 	 * there is zero or one thread only waiting for it.
910 	 * Otherwise, it must be marked as contested.
911 	 */
912 	old = casuword32(m, owner,
913 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
914 	umtxq_lock(&key);
915 	umtxq_signal(&key,1);
916 	umtxq_unbusy(&key);
917 	umtxq_unlock(&key);
918 	umtx_key_release(&key);
919 	if (old == -1)
920 		return (EFAULT);
921 	if (old != owner)
922 		return (EINVAL);
923 	return (0);
924 }
925 #endif
926 
927 /*
928  * Fetch and compare value, sleep on the address if value is not changed.
929  */
930 static int
931 do_wait(struct thread *td, void *addr, u_long id,
932 	struct timespec *timeout, int compat32)
933 {
934 	struct umtx_q *uq;
935 	struct timespec ts, ts2, ts3;
936 	struct timeval tv;
937 	u_long tmp;
938 	int error = 0;
939 
940 	uq = td->td_umtxq;
941 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
942 	    &uq->uq_key)) != 0)
943 		return (error);
944 
945 	umtxq_lock(&uq->uq_key);
946 	umtxq_insert(uq);
947 	umtxq_unlock(&uq->uq_key);
948 	if (compat32 == 0)
949 		tmp = fuword(addr);
950         else
951 		tmp = fuword32(addr);
952 	if (tmp != id) {
953 		umtxq_lock(&uq->uq_key);
954 		umtxq_remove(uq);
955 		umtxq_unlock(&uq->uq_key);
956 	} else if (timeout == NULL) {
957 		umtxq_lock(&uq->uq_key);
958 		error = umtxq_sleep(uq, "uwait", 0);
959 		umtxq_remove(uq);
960 		umtxq_unlock(&uq->uq_key);
961 	} else {
962 		getnanouptime(&ts);
963 		timespecadd(&ts, timeout);
964 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
965 		umtxq_lock(&uq->uq_key);
966 		for (;;) {
967 			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
968 			if (!(uq->uq_flags & UQF_UMTXQ))
969 				break;
970 			if (error != ETIMEDOUT)
971 				break;
972 			umtxq_unlock(&uq->uq_key);
973 			getnanouptime(&ts2);
974 			if (timespeccmp(&ts2, &ts, >=)) {
975 				error = ETIMEDOUT;
976 				umtxq_lock(&uq->uq_key);
977 				break;
978 			}
979 			ts3 = ts;
980 			timespecsub(&ts3, &ts2);
981 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
982 			umtxq_lock(&uq->uq_key);
983 		}
984 		umtxq_remove(uq);
985 		umtxq_unlock(&uq->uq_key);
986 	}
987 	umtx_key_release(&uq->uq_key);
988 	if (error == ERESTART)
989 		error = EINTR;
990 	return (error);
991 }
992 
993 /*
994  * Wake up threads sleeping on the specified address.
995  */
996 int
997 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
998 {
999 	struct umtx_key key;
1000 	int ret;
1001 
1002 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1003 	   &key)) != 0)
1004 		return (ret);
1005 	umtxq_lock(&key);
1006 	ret = umtxq_signal(&key, n_wake);
1007 	umtxq_unlock(&key);
1008 	umtx_key_release(&key);
1009 	return (0);
1010 }
1011 
1012 /*
1013  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1014  */
1015 static int
1016 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1017 	int try)
1018 {
1019 	struct umtx_q *uq;
1020 	uint32_t owner, old, id;
1021 	int error = 0;
1022 
1023 	id = td->td_tid;
1024 	uq = td->td_umtxq;
1025 
1026 	/*
1027 	 * Care must be exercised when dealing with umtx structure. It
1028 	 * can fault on any access.
1029 	 */
1030 	for (;;) {
1031 		/*
1032 		 * Try the uncontested case.  This should be done in userland.
1033 		 */
1034 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1035 
1036 		/* The acquire succeeded. */
1037 		if (owner == UMUTEX_UNOWNED)
1038 			return (0);
1039 
1040 		/* The address was invalid. */
1041 		if (owner == -1)
1042 			return (EFAULT);
1043 
1044 		/* If no one owns it but it is contested try to acquire it. */
1045 		if (owner == UMUTEX_CONTESTED) {
1046 			owner = casuword32(&m->m_owner,
1047 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1048 
1049 			if (owner == UMUTEX_CONTESTED)
1050 				return (0);
1051 
1052 			/* The address was invalid. */
1053 			if (owner == -1)
1054 				return (EFAULT);
1055 
1056 			/* If this failed the lock has changed, restart. */
1057 			continue;
1058 		}
1059 
1060 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1061 		    (owner & ~UMUTEX_CONTESTED) == id)
1062 			return (EDEADLK);
1063 
1064 		if (try != 0)
1065 			return (EBUSY);
1066 
1067 		/*
1068 		 * If we caught a signal, we have retried and now
1069 		 * exit immediately.
1070 		 */
1071 		if (error != 0)
1072 			return (error);
1073 
1074 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1075 		    GET_SHARE(flags), &uq->uq_key)) != 0)
1076 			return (error);
1077 
1078 		umtxq_lock(&uq->uq_key);
1079 		umtxq_busy(&uq->uq_key);
1080 		umtxq_insert(uq);
1081 		umtxq_unbusy(&uq->uq_key);
1082 		umtxq_unlock(&uq->uq_key);
1083 
1084 		/*
1085 		 * Set the contested bit so that a release in user space
1086 		 * knows to use the system call for unlock.  If this fails
1087 		 * either some one else has acquired the lock or it has been
1088 		 * released.
1089 		 */
1090 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1091 
1092 		/* The address was invalid. */
1093 		if (old == -1) {
1094 			umtxq_lock(&uq->uq_key);
1095 			umtxq_remove(uq);
1096 			umtxq_unlock(&uq->uq_key);
1097 			umtx_key_release(&uq->uq_key);
1098 			return (EFAULT);
1099 		}
1100 
1101 		/*
1102 		 * We set the contested bit, sleep. Otherwise the lock changed
1103 		 * and we need to retry or we lost a race to the thread
1104 		 * unlocking the umtx.
1105 		 */
1106 		umtxq_lock(&uq->uq_key);
1107 		if (old == owner)
1108 			error = umtxq_sleep(uq, "umtxn", timo);
1109 		umtxq_remove(uq);
1110 		umtxq_unlock(&uq->uq_key);
1111 		umtx_key_release(&uq->uq_key);
1112 	}
1113 
1114 	return (0);
1115 }
1116 
1117 /*
1118  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1119  */
1120 /*
1121  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1122  */
1123 static int
1124 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1125 {
1126 	struct umtx_key key;
1127 	uint32_t owner, old, id;
1128 	int error;
1129 	int count;
1130 
1131 	id = td->td_tid;
1132 	/*
1133 	 * Make sure we own this mtx.
1134 	 */
1135 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1136 	if (owner == -1)
1137 		return (EFAULT);
1138 
1139 	if ((owner & ~UMUTEX_CONTESTED) != id)
1140 		return (EPERM);
1141 
1142 	/* This should be done in userland */
1143 	if ((owner & UMUTEX_CONTESTED) == 0) {
1144 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1145 		if (old == -1)
1146 			return (EFAULT);
1147 		if (old == owner)
1148 			return (0);
1149 		owner = old;
1150 	}
1151 
1152 	/* We should only ever be in here for contested locks */
1153 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1154 	    &key)) != 0)
1155 		return (error);
1156 
1157 	umtxq_lock(&key);
1158 	umtxq_busy(&key);
1159 	count = umtxq_count(&key);
1160 	umtxq_unlock(&key);
1161 
1162 	/*
1163 	 * When unlocking the umtx, it must be marked as unowned if
1164 	 * there is zero or one thread only waiting for it.
1165 	 * Otherwise, it must be marked as contested.
1166 	 */
1167 	old = casuword32(&m->m_owner, owner,
1168 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1169 	umtxq_lock(&key);
1170 	umtxq_signal(&key,1);
1171 	umtxq_unbusy(&key);
1172 	umtxq_unlock(&key);
1173 	umtx_key_release(&key);
1174 	if (old == -1)
1175 		return (EFAULT);
1176 	if (old != owner)
1177 		return (EINVAL);
1178 	return (0);
1179 }
1180 
1181 static inline struct umtx_pi *
1182 umtx_pi_alloc(int flags)
1183 {
1184 	struct umtx_pi *pi;
1185 
1186 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1187 	TAILQ_INIT(&pi->pi_blocked);
1188 	atomic_add_int(&umtx_pi_allocated, 1);
1189 	return (pi);
1190 }
1191 
1192 static inline void
1193 umtx_pi_free(struct umtx_pi *pi)
1194 {
1195 	uma_zfree(umtx_pi_zone, pi);
1196 	atomic_add_int(&umtx_pi_allocated, -1);
1197 }
1198 
1199 /*
1200  * Adjust the thread's position on a pi_state after its priority has been
1201  * changed.
1202  */
1203 static int
1204 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1205 {
1206 	struct umtx_q *uq, *uq1, *uq2;
1207 	struct thread *td1;
1208 
1209 	mtx_assert(&umtx_lock, MA_OWNED);
1210 	if (pi == NULL)
1211 		return (0);
1212 
1213 	uq = td->td_umtxq;
1214 
1215 	/*
1216 	 * Check if the thread needs to be moved on the blocked chain.
1217 	 * It needs to be moved if either its priority is lower than
1218 	 * the previous thread or higher than the next thread.
1219 	 */
1220 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1221 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1222 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1223 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1224 		/*
1225 		 * Remove thread from blocked chain and determine where
1226 		 * it should be moved to.
1227 		 */
1228 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1229 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1230 			td1 = uq1->uq_thread;
1231 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1232 			if (UPRI(td1) > UPRI(td))
1233 				break;
1234 		}
1235 
1236 		if (uq1 == NULL)
1237 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1238 		else
1239 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1240 	}
1241 	return (1);
1242 }
1243 
1244 /*
1245  * Propagate priority when a thread is blocked on POSIX
1246  * PI mutex.
1247  */
1248 static void
1249 umtx_propagate_priority(struct thread *td)
1250 {
1251 	struct umtx_q *uq;
1252 	struct umtx_pi *pi;
1253 	int pri;
1254 
1255 	mtx_assert(&umtx_lock, MA_OWNED);
1256 	pri = UPRI(td);
1257 	uq = td->td_umtxq;
1258 	pi = uq->uq_pi_blocked;
1259 	if (pi == NULL)
1260 		return;
1261 
1262 	for (;;) {
1263 		td = pi->pi_owner;
1264 		if (td == NULL)
1265 			return;
1266 
1267 		MPASS(td->td_proc != NULL);
1268 		MPASS(td->td_proc->p_magic == P_MAGIC);
1269 
1270 		if (UPRI(td) <= pri)
1271 			return;
1272 
1273 		thread_lock(td);
1274 		sched_lend_user_prio(td, pri);
1275 		thread_unlock(td);
1276 
1277 		/*
1278 		 * Pick up the lock that td is blocked on.
1279 		 */
1280 		uq = td->td_umtxq;
1281 		pi = uq->uq_pi_blocked;
1282 		/* Resort td on the list if needed. */
1283 		if (!umtx_pi_adjust_thread(pi, td))
1284 			break;
1285 	}
1286 }
1287 
1288 /*
1289  * Unpropagate priority for a PI mutex when a thread blocked on
1290  * it is interrupted by signal or resumed by others.
1291  */
1292 static void
1293 umtx_unpropagate_priority(struct umtx_pi *pi)
1294 {
1295 	struct umtx_q *uq, *uq_owner;
1296 	struct umtx_pi *pi2;
1297 	int pri;
1298 
1299 	mtx_assert(&umtx_lock, MA_OWNED);
1300 
1301 	while (pi != NULL && pi->pi_owner != NULL) {
1302 		pri = PRI_MAX;
1303 		uq_owner = pi->pi_owner->td_umtxq;
1304 
1305 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1306 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1307 			if (uq != NULL) {
1308 				if (pri > UPRI(uq->uq_thread))
1309 					pri = UPRI(uq->uq_thread);
1310 			}
1311 		}
1312 
1313 		if (pri > uq_owner->uq_inherited_pri)
1314 			pri = uq_owner->uq_inherited_pri;
1315 		thread_lock(pi->pi_owner);
1316 		sched_unlend_user_prio(pi->pi_owner, pri);
1317 		thread_unlock(pi->pi_owner);
1318 		pi = uq_owner->uq_pi_blocked;
1319 	}
1320 }
1321 
1322 /*
1323  * Insert a PI mutex into owned list.
1324  */
1325 static void
1326 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1327 {
1328 	struct umtx_q *uq_owner;
1329 
1330 	uq_owner = owner->td_umtxq;
1331 	mtx_assert(&umtx_lock, MA_OWNED);
1332 	if (pi->pi_owner != NULL)
1333 		panic("pi_ower != NULL");
1334 	pi->pi_owner = owner;
1335 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1336 }
1337 
1338 /*
1339  * Claim ownership of a PI mutex.
1340  */
1341 static int
1342 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1343 {
1344 	struct umtx_q *uq, *uq_owner;
1345 
1346 	uq_owner = owner->td_umtxq;
1347 	mtx_lock_spin(&umtx_lock);
1348 	if (pi->pi_owner == owner) {
1349 		mtx_unlock_spin(&umtx_lock);
1350 		return (0);
1351 	}
1352 
1353 	if (pi->pi_owner != NULL) {
1354 		/*
1355 		 * userland may have already messed the mutex, sigh.
1356 		 */
1357 		mtx_unlock_spin(&umtx_lock);
1358 		return (EPERM);
1359 	}
1360 	umtx_pi_setowner(pi, owner);
1361 	uq = TAILQ_FIRST(&pi->pi_blocked);
1362 	if (uq != NULL) {
1363 		int pri;
1364 
1365 		pri = UPRI(uq->uq_thread);
1366 		thread_lock(owner);
1367 		if (pri < UPRI(owner))
1368 			sched_lend_user_prio(owner, pri);
1369 		thread_unlock(owner);
1370 	}
1371 	mtx_unlock_spin(&umtx_lock);
1372 	return (0);
1373 }
1374 
1375 /*
1376  * Adjust a thread's order position in its blocked PI mutex,
1377  * this may result new priority propagating process.
1378  */
1379 void
1380 umtx_pi_adjust(struct thread *td, u_char oldpri)
1381 {
1382 	struct umtx_q *uq;
1383 	struct umtx_pi *pi;
1384 
1385 	uq = td->td_umtxq;
1386 
1387 	mtx_assert(&umtx_lock, MA_OWNED);
1388 	MPASS(TD_ON_UPILOCK(td));
1389 
1390 	/*
1391 	 * Pick up the lock that td is blocked on.
1392 	 */
1393 	pi = uq->uq_pi_blocked;
1394 	MPASS(pi != NULL);
1395 
1396 	/* Resort the turnstile on the list. */
1397 	if (!umtx_pi_adjust_thread(pi, td))
1398 		return;
1399 
1400 	/*
1401 	 * If our priority was lowered and we are at the head of the
1402 	 * turnstile, then propagate our new priority up the chain.
1403 	 */
1404 	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1405 		umtx_propagate_priority(td);
1406 }
1407 
1408 /*
1409  * Sleep on a PI mutex.
1410  */
1411 static int
1412 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1413 	uint32_t owner, const char *wmesg, int timo)
1414 {
1415 	struct umtxq_chain *uc;
1416 	struct thread *td, *td1;
1417 	struct umtx_q *uq1;
1418 	int pri;
1419 	int error = 0;
1420 
1421 	td = uq->uq_thread;
1422 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1423 	uc = umtxq_getchain(&uq->uq_key);
1424 	UMTXQ_LOCKED_ASSERT(uc);
1425 	umtxq_insert(uq);
1426 	if (pi->pi_owner == NULL) {
1427 		/* XXX
1428 		 * Current, We only support process private PI-mutex,
1429 		 * non-contended PI-mutexes are locked in userland.
1430 		 * Process shared PI-mutex should always be initialized
1431 		 * by kernel and be registered in kernel, locking should
1432 		 * always be done by kernel to avoid security problems.
1433 		 * For process private PI-mutex, we can find owner
1434 		 * thread and boost its priority safely.
1435 		 */
1436 		PROC_LOCK(curproc);
1437 		td1 = thread_find(curproc, owner);
1438 		mtx_lock_spin(&umtx_lock);
1439 		if (td1 != NULL && pi->pi_owner == NULL) {
1440 			uq1 = td1->td_umtxq;
1441 			umtx_pi_setowner(pi, td1);
1442 		}
1443 		PROC_UNLOCK(curproc);
1444 	} else {
1445 		mtx_lock_spin(&umtx_lock);
1446 	}
1447 
1448 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1449 		pri = UPRI(uq1->uq_thread);
1450 		if (pri > UPRI(td))
1451 			break;
1452 	}
1453 
1454 	if (uq1 != NULL)
1455 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1456 	else
1457 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1458 
1459 	uq->uq_pi_blocked = pi;
1460 	td->td_flags |= TDF_UPIBLOCKED;
1461 	mtx_unlock_spin(&umtx_lock);
1462 	umtxq_unlock(&uq->uq_key);
1463 
1464 	mtx_lock_spin(&umtx_lock);
1465 	umtx_propagate_priority(td);
1466 	mtx_unlock_spin(&umtx_lock);
1467 
1468 	umtxq_lock(&uq->uq_key);
1469 	if (uq->uq_flags & UQF_UMTXQ) {
1470 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1471 		if (error == EWOULDBLOCK)
1472 			error = ETIMEDOUT;
1473 		if (uq->uq_flags & UQF_UMTXQ) {
1474 			umtxq_busy(&uq->uq_key);
1475 			umtxq_remove(uq);
1476 			umtxq_unbusy(&uq->uq_key);
1477 		}
1478 	}
1479 	umtxq_unlock(&uq->uq_key);
1480 
1481 	mtx_lock_spin(&umtx_lock);
1482 	uq->uq_pi_blocked = NULL;
1483 	td->td_flags &= ~TDF_UPIBLOCKED;
1484 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1485 	umtx_unpropagate_priority(pi);
1486 	mtx_unlock_spin(&umtx_lock);
1487 
1488 	umtxq_lock(&uq->uq_key);
1489 
1490 	return (error);
1491 }
1492 
1493 /*
1494  * Add reference count for a PI mutex.
1495  */
1496 static void
1497 umtx_pi_ref(struct umtx_pi *pi)
1498 {
1499 	struct umtxq_chain *uc;
1500 
1501 	uc = umtxq_getchain(&pi->pi_key);
1502 	UMTXQ_LOCKED_ASSERT(uc);
1503 	pi->pi_refcount++;
1504 }
1505 
1506 /*
1507  * Decrease reference count for a PI mutex, if the counter
1508  * is decreased to zero, its memory space is freed.
1509  */
1510 static void
1511 umtx_pi_unref(struct umtx_pi *pi)
1512 {
1513 	struct umtxq_chain *uc;
1514 	int free = 0;
1515 
1516 	uc = umtxq_getchain(&pi->pi_key);
1517 	UMTXQ_LOCKED_ASSERT(uc);
1518 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1519 	if (--pi->pi_refcount == 0) {
1520 		mtx_lock_spin(&umtx_lock);
1521 		if (pi->pi_owner != NULL) {
1522 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1523 				pi, pi_link);
1524 			pi->pi_owner = NULL;
1525 		}
1526 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1527 			("blocked queue not empty"));
1528 		mtx_unlock_spin(&umtx_lock);
1529 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1530 		free = 1;
1531 	}
1532 	if (free)
1533 		umtx_pi_free(pi);
1534 }
1535 
1536 /*
1537  * Find a PI mutex in hash table.
1538  */
1539 static struct umtx_pi *
1540 umtx_pi_lookup(struct umtx_key *key)
1541 {
1542 	struct umtxq_chain *uc;
1543 	struct umtx_pi *pi;
1544 
1545 	uc = umtxq_getchain(key);
1546 	UMTXQ_LOCKED_ASSERT(uc);
1547 
1548 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1549 		if (umtx_key_match(&pi->pi_key, key)) {
1550 			return (pi);
1551 		}
1552 	}
1553 	return (NULL);
1554 }
1555 
1556 /*
1557  * Insert a PI mutex into hash table.
1558  */
1559 static inline void
1560 umtx_pi_insert(struct umtx_pi *pi)
1561 {
1562 	struct umtxq_chain *uc;
1563 
1564 	uc = umtxq_getchain(&pi->pi_key);
1565 	UMTXQ_LOCKED_ASSERT(uc);
1566 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1567 }
1568 
1569 /*
1570  * Lock a PI mutex.
1571  */
1572 static int
1573 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1574 	int try)
1575 {
1576 	struct umtx_q *uq;
1577 	struct umtx_pi *pi, *new_pi;
1578 	uint32_t id, owner, old;
1579 	int error;
1580 
1581 	id = td->td_tid;
1582 	uq = td->td_umtxq;
1583 
1584 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1585 	    &uq->uq_key)) != 0)
1586 		return (error);
1587 	umtxq_lock(&uq->uq_key);
1588 	pi = umtx_pi_lookup(&uq->uq_key);
1589 	if (pi == NULL) {
1590 		new_pi = umtx_pi_alloc(M_NOWAIT);
1591 		if (new_pi == NULL) {
1592 			umtxq_unlock(&uq->uq_key);
1593 			new_pi = umtx_pi_alloc(M_WAITOK);
1594 			new_pi->pi_key = uq->uq_key;
1595 			umtxq_lock(&uq->uq_key);
1596 			pi = umtx_pi_lookup(&uq->uq_key);
1597 			if (pi != NULL) {
1598 				umtx_pi_free(new_pi);
1599 				new_pi = NULL;
1600 			}
1601 		}
1602 		if (new_pi != NULL) {
1603 			new_pi->pi_key = uq->uq_key;
1604 			umtx_pi_insert(new_pi);
1605 			pi = new_pi;
1606 		}
1607 	}
1608 	umtx_pi_ref(pi);
1609 	umtxq_unlock(&uq->uq_key);
1610 
1611 	/*
1612 	 * Care must be exercised when dealing with umtx structure.  It
1613 	 * can fault on any access.
1614 	 */
1615 	for (;;) {
1616 		/*
1617 		 * Try the uncontested case.  This should be done in userland.
1618 		 */
1619 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1620 
1621 		/* The acquire succeeded. */
1622 		if (owner == UMUTEX_UNOWNED) {
1623 			error = 0;
1624 			break;
1625 		}
1626 
1627 		/* The address was invalid. */
1628 		if (owner == -1) {
1629 			error = EFAULT;
1630 			break;
1631 		}
1632 
1633 		/* If no one owns it but it is contested try to acquire it. */
1634 		if (owner == UMUTEX_CONTESTED) {
1635 			owner = casuword32(&m->m_owner,
1636 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1637 
1638 			if (owner == UMUTEX_CONTESTED) {
1639 				umtxq_lock(&uq->uq_key);
1640 				error = umtx_pi_claim(pi, td);
1641 				umtxq_unlock(&uq->uq_key);
1642 				break;
1643 			}
1644 
1645 			/* The address was invalid. */
1646 			if (owner == -1) {
1647 				error = EFAULT;
1648 				break;
1649 			}
1650 
1651 			/* If this failed the lock has changed, restart. */
1652 			continue;
1653 		}
1654 
1655 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1656 		    (owner & ~UMUTEX_CONTESTED) == id) {
1657 			error = EDEADLK;
1658 			break;
1659 		}
1660 
1661 		if (try != 0) {
1662 			error = EBUSY;
1663 			break;
1664 		}
1665 
1666 		/*
1667 		 * If we caught a signal, we have retried and now
1668 		 * exit immediately.
1669 		 */
1670 		if (error != 0)
1671 			break;
1672 
1673 		umtxq_lock(&uq->uq_key);
1674 		umtxq_busy(&uq->uq_key);
1675 		umtxq_unlock(&uq->uq_key);
1676 
1677 		/*
1678 		 * Set the contested bit so that a release in user space
1679 		 * knows to use the system call for unlock.  If this fails
1680 		 * either some one else has acquired the lock or it has been
1681 		 * released.
1682 		 */
1683 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1684 
1685 		/* The address was invalid. */
1686 		if (old == -1) {
1687 			umtxq_lock(&uq->uq_key);
1688 			umtxq_unbusy(&uq->uq_key);
1689 			umtxq_unlock(&uq->uq_key);
1690 			error = EFAULT;
1691 			break;
1692 		}
1693 
1694 		umtxq_lock(&uq->uq_key);
1695 		umtxq_unbusy(&uq->uq_key);
1696 		/*
1697 		 * We set the contested bit, sleep. Otherwise the lock changed
1698 		 * and we need to retry or we lost a race to the thread
1699 		 * unlocking the umtx.
1700 		 */
1701 		if (old == owner)
1702 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1703 				 "umtxpi", timo);
1704 		umtxq_unlock(&uq->uq_key);
1705 	}
1706 
1707 	umtxq_lock(&uq->uq_key);
1708 	umtx_pi_unref(pi);
1709 	umtxq_unlock(&uq->uq_key);
1710 
1711 	umtx_key_release(&uq->uq_key);
1712 	return (error);
1713 }
1714 
1715 /*
1716  * Unlock a PI mutex.
1717  */
1718 static int
1719 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1720 {
1721 	struct umtx_key key;
1722 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1723 	struct umtx_pi *pi, *pi2;
1724 	uint32_t owner, old, id;
1725 	int error;
1726 	int count;
1727 	int pri;
1728 
1729 	id = td->td_tid;
1730 	/*
1731 	 * Make sure we own this mtx.
1732 	 */
1733 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1734 	if (owner == -1)
1735 		return (EFAULT);
1736 
1737 	if ((owner & ~UMUTEX_CONTESTED) != id)
1738 		return (EPERM);
1739 
1740 	/* This should be done in userland */
1741 	if ((owner & UMUTEX_CONTESTED) == 0) {
1742 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1743 		if (old == -1)
1744 			return (EFAULT);
1745 		if (old == owner)
1746 			return (0);
1747 		owner = old;
1748 	}
1749 
1750 	/* We should only ever be in here for contested locks */
1751 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1752 	    &key)) != 0)
1753 		return (error);
1754 
1755 	umtxq_lock(&key);
1756 	umtxq_busy(&key);
1757 	count = umtxq_count_pi(&key, &uq_first);
1758 	if (uq_first != NULL) {
1759 		pi = uq_first->uq_pi_blocked;
1760 		if (pi->pi_owner != curthread) {
1761 			umtxq_unbusy(&key);
1762 			umtxq_unlock(&key);
1763 			/* userland messed the mutex */
1764 			return (EPERM);
1765 		}
1766 		uq_me = curthread->td_umtxq;
1767 		mtx_lock_spin(&umtx_lock);
1768 		pi->pi_owner = NULL;
1769 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1770 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1771 		pri = PRI_MAX;
1772 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1773 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1774 			if (uq_first2 != NULL) {
1775 				if (pri > UPRI(uq_first2->uq_thread))
1776 					pri = UPRI(uq_first2->uq_thread);
1777 			}
1778 		}
1779 		thread_lock(curthread);
1780 		sched_unlend_user_prio(curthread, pri);
1781 		thread_unlock(curthread);
1782 		mtx_unlock_spin(&umtx_lock);
1783 	}
1784 	umtxq_unlock(&key);
1785 
1786 	/*
1787 	 * When unlocking the umtx, it must be marked as unowned if
1788 	 * there is zero or one thread only waiting for it.
1789 	 * Otherwise, it must be marked as contested.
1790 	 */
1791 	old = casuword32(&m->m_owner, owner,
1792 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1793 
1794 	umtxq_lock(&key);
1795 	if (uq_first != NULL)
1796 		umtxq_signal_thread(uq_first);
1797 	umtxq_unbusy(&key);
1798 	umtxq_unlock(&key);
1799 	umtx_key_release(&key);
1800 	if (old == -1)
1801 		return (EFAULT);
1802 	if (old != owner)
1803 		return (EINVAL);
1804 	return (0);
1805 }
1806 
1807 /*
1808  * Lock a PP mutex.
1809  */
1810 static int
1811 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1812 	int try)
1813 {
1814 	struct umtx_q *uq, *uq2;
1815 	struct umtx_pi *pi;
1816 	uint32_t ceiling;
1817 	uint32_t owner, id;
1818 	int error, pri, old_inherited_pri, su;
1819 
1820 	id = td->td_tid;
1821 	uq = td->td_umtxq;
1822 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1823 	    &uq->uq_key)) != 0)
1824 		return (error);
1825 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1826 	for (;;) {
1827 		old_inherited_pri = uq->uq_inherited_pri;
1828 		umtxq_lock(&uq->uq_key);
1829 		umtxq_busy(&uq->uq_key);
1830 		umtxq_unlock(&uq->uq_key);
1831 
1832 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1833 		if (ceiling > RTP_PRIO_MAX) {
1834 			error = EINVAL;
1835 			goto out;
1836 		}
1837 
1838 		mtx_lock_spin(&umtx_lock);
1839 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1840 			mtx_unlock_spin(&umtx_lock);
1841 			error = EINVAL;
1842 			goto out;
1843 		}
1844 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1845 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1846 			thread_lock(td);
1847 			if (uq->uq_inherited_pri < UPRI(td))
1848 				sched_lend_user_prio(td, uq->uq_inherited_pri);
1849 			thread_unlock(td);
1850 		}
1851 		mtx_unlock_spin(&umtx_lock);
1852 
1853 		owner = casuword32(&m->m_owner,
1854 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1855 
1856 		if (owner == UMUTEX_CONTESTED) {
1857 			error = 0;
1858 			break;
1859 		}
1860 
1861 		/* The address was invalid. */
1862 		if (owner == -1) {
1863 			error = EFAULT;
1864 			break;
1865 		}
1866 
1867 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1868 		    (owner & ~UMUTEX_CONTESTED) == id) {
1869 			error = EDEADLK;
1870 			break;
1871 		}
1872 
1873 		if (try != 0) {
1874 			error = EBUSY;
1875 			break;
1876 		}
1877 
1878 		/*
1879 		 * If we caught a signal, we have retried and now
1880 		 * exit immediately.
1881 		 */
1882 		if (error != 0)
1883 			break;
1884 
1885 		umtxq_lock(&uq->uq_key);
1886 		umtxq_insert(uq);
1887 		umtxq_unbusy(&uq->uq_key);
1888 		error = umtxq_sleep(uq, "umtxpp", timo);
1889 		umtxq_remove(uq);
1890 		umtxq_unlock(&uq->uq_key);
1891 
1892 		mtx_lock_spin(&umtx_lock);
1893 		uq->uq_inherited_pri = old_inherited_pri;
1894 		pri = PRI_MAX;
1895 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1896 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1897 			if (uq2 != NULL) {
1898 				if (pri > UPRI(uq2->uq_thread))
1899 					pri = UPRI(uq2->uq_thread);
1900 			}
1901 		}
1902 		if (pri > uq->uq_inherited_pri)
1903 			pri = uq->uq_inherited_pri;
1904 		thread_lock(td);
1905 		sched_unlend_user_prio(td, pri);
1906 		thread_unlock(td);
1907 		mtx_unlock_spin(&umtx_lock);
1908 	}
1909 
1910 	if (error != 0) {
1911 		mtx_lock_spin(&umtx_lock);
1912 		uq->uq_inherited_pri = old_inherited_pri;
1913 		pri = PRI_MAX;
1914 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1915 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1916 			if (uq2 != NULL) {
1917 				if (pri > UPRI(uq2->uq_thread))
1918 					pri = UPRI(uq2->uq_thread);
1919 			}
1920 		}
1921 		if (pri > uq->uq_inherited_pri)
1922 			pri = uq->uq_inherited_pri;
1923 		thread_lock(td);
1924 		sched_unlend_user_prio(td, pri);
1925 		thread_unlock(td);
1926 		mtx_unlock_spin(&umtx_lock);
1927 	}
1928 
1929 out:
1930 	umtxq_lock(&uq->uq_key);
1931 	umtxq_unbusy(&uq->uq_key);
1932 	umtxq_unlock(&uq->uq_key);
1933 	umtx_key_release(&uq->uq_key);
1934 	return (error);
1935 }
1936 
1937 /*
1938  * Unlock a PP mutex.
1939  */
1940 static int
1941 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1942 {
1943 	struct umtx_key key;
1944 	struct umtx_q *uq, *uq2;
1945 	struct umtx_pi *pi;
1946 	uint32_t owner, id;
1947 	uint32_t rceiling;
1948 	int error, pri, new_inherited_pri, su;
1949 
1950 	id = td->td_tid;
1951 	uq = td->td_umtxq;
1952 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1953 
1954 	/*
1955 	 * Make sure we own this mtx.
1956 	 */
1957 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1958 	if (owner == -1)
1959 		return (EFAULT);
1960 
1961 	if ((owner & ~UMUTEX_CONTESTED) != id)
1962 		return (EPERM);
1963 
1964 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1965 	if (error != 0)
1966 		return (error);
1967 
1968 	if (rceiling == -1)
1969 		new_inherited_pri = PRI_MAX;
1970 	else {
1971 		rceiling = RTP_PRIO_MAX - rceiling;
1972 		if (rceiling > RTP_PRIO_MAX)
1973 			return (EINVAL);
1974 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1975 	}
1976 
1977 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1978 	    &key)) != 0)
1979 		return (error);
1980 	umtxq_lock(&key);
1981 	umtxq_busy(&key);
1982 	umtxq_unlock(&key);
1983 	/*
1984 	 * For priority protected mutex, always set unlocked state
1985 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1986 	 * to lock the mutex, it is necessary because thread priority
1987 	 * has to be adjusted for such mutex.
1988 	 */
1989 	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
1990 		UMUTEX_CONTESTED);
1991 
1992 	umtxq_lock(&key);
1993 	if (error == 0)
1994 		umtxq_signal(&key, 1);
1995 	umtxq_unbusy(&key);
1996 	umtxq_unlock(&key);
1997 
1998 	if (error == -1)
1999 		error = EFAULT;
2000 	else {
2001 		mtx_lock_spin(&umtx_lock);
2002 		if (su != 0)
2003 			uq->uq_inherited_pri = new_inherited_pri;
2004 		pri = PRI_MAX;
2005 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2006 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2007 			if (uq2 != NULL) {
2008 				if (pri > UPRI(uq2->uq_thread))
2009 					pri = UPRI(uq2->uq_thread);
2010 			}
2011 		}
2012 		if (pri > uq->uq_inherited_pri)
2013 			pri = uq->uq_inherited_pri;
2014 		thread_lock(td);
2015 		sched_unlend_user_prio(td, pri);
2016 		thread_unlock(td);
2017 		mtx_unlock_spin(&umtx_lock);
2018 	}
2019 	umtx_key_release(&key);
2020 	return (error);
2021 }
2022 
2023 static int
2024 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2025 	uint32_t *old_ceiling)
2026 {
2027 	struct umtx_q *uq;
2028 	uint32_t save_ceiling;
2029 	uint32_t owner, id;
2030 	uint32_t flags;
2031 	int error;
2032 
2033 	flags = fuword32(&m->m_flags);
2034 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2035 		return (EINVAL);
2036 	if (ceiling > RTP_PRIO_MAX)
2037 		return (EINVAL);
2038 	id = td->td_tid;
2039 	uq = td->td_umtxq;
2040 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2041 	   &uq->uq_key)) != 0)
2042 		return (error);
2043 	for (;;) {
2044 		umtxq_lock(&uq->uq_key);
2045 		umtxq_busy(&uq->uq_key);
2046 		umtxq_unlock(&uq->uq_key);
2047 
2048 		save_ceiling = fuword32(&m->m_ceilings[0]);
2049 
2050 		owner = casuword32(&m->m_owner,
2051 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2052 
2053 		if (owner == UMUTEX_CONTESTED) {
2054 			suword32(&m->m_ceilings[0], ceiling);
2055 			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2056 				UMUTEX_CONTESTED);
2057 			error = 0;
2058 			break;
2059 		}
2060 
2061 		/* The address was invalid. */
2062 		if (owner == -1) {
2063 			error = EFAULT;
2064 			break;
2065 		}
2066 
2067 		if ((owner & ~UMUTEX_CONTESTED) == id) {
2068 			suword32(&m->m_ceilings[0], ceiling);
2069 			error = 0;
2070 			break;
2071 		}
2072 
2073 		/*
2074 		 * If we caught a signal, we have retried and now
2075 		 * exit immediately.
2076 		 */
2077 		if (error != 0)
2078 			break;
2079 
2080 		/*
2081 		 * We set the contested bit, sleep. Otherwise the lock changed
2082 		 * and we need to retry or we lost a race to the thread
2083 		 * unlocking the umtx.
2084 		 */
2085 		umtxq_lock(&uq->uq_key);
2086 		umtxq_insert(uq);
2087 		umtxq_unbusy(&uq->uq_key);
2088 		error = umtxq_sleep(uq, "umtxpp", 0);
2089 		umtxq_remove(uq);
2090 		umtxq_unlock(&uq->uq_key);
2091 	}
2092 	umtxq_lock(&uq->uq_key);
2093 	if (error == 0)
2094 		umtxq_signal(&uq->uq_key, INT_MAX);
2095 	umtxq_unbusy(&uq->uq_key);
2096 	umtxq_unlock(&uq->uq_key);
2097 	umtx_key_release(&uq->uq_key);
2098 	if (error == 0 && old_ceiling != NULL)
2099 		suword32(old_ceiling, save_ceiling);
2100 	return (error);
2101 }
2102 
2103 static int
2104 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2105 	int try)
2106 {
2107 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2108 	case 0:
2109 		return (_do_lock_normal(td, m, flags, timo, try));
2110 	case UMUTEX_PRIO_INHERIT:
2111 		return (_do_lock_pi(td, m, flags, timo, try));
2112 	case UMUTEX_PRIO_PROTECT:
2113 		return (_do_lock_pp(td, m, flags, timo, try));
2114 	}
2115 	return (EINVAL);
2116 }
2117 
2118 /*
2119  * Lock a userland POSIX mutex.
2120  */
2121 static int
2122 do_lock_umutex(struct thread *td, struct umutex *m,
2123 	struct timespec *timeout, int try)
2124 {
2125 	struct timespec ts, ts2, ts3;
2126 	struct timeval tv;
2127 	uint32_t flags;
2128 	int error;
2129 
2130 	flags = fuword32(&m->m_flags);
2131 	if (flags == -1)
2132 		return (EFAULT);
2133 
2134 	if (timeout == NULL) {
2135 		error = _do_lock_umutex(td, m, flags, 0, try);
2136 		/* Mutex locking is restarted if it is interrupted. */
2137 		if (error == EINTR)
2138 			error = ERESTART;
2139 	} else {
2140 		getnanouptime(&ts);
2141 		timespecadd(&ts, timeout);
2142 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2143 		for (;;) {
2144 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2145 			if (error != ETIMEDOUT)
2146 				break;
2147 			getnanouptime(&ts2);
2148 			if (timespeccmp(&ts2, &ts, >=)) {
2149 				error = ETIMEDOUT;
2150 				break;
2151 			}
2152 			ts3 = ts;
2153 			timespecsub(&ts3, &ts2);
2154 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2155 		}
2156 		/* Timed-locking is not restarted. */
2157 		if (error == ERESTART)
2158 			error = EINTR;
2159 	}
2160 	return (error);
2161 }
2162 
2163 /*
2164  * Unlock a userland POSIX mutex.
2165  */
2166 static int
2167 do_unlock_umutex(struct thread *td, struct umutex *m)
2168 {
2169 	uint32_t flags;
2170 
2171 	flags = fuword32(&m->m_flags);
2172 	if (flags == -1)
2173 		return (EFAULT);
2174 
2175 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2176 	case 0:
2177 		return (do_unlock_normal(td, m, flags));
2178 	case UMUTEX_PRIO_INHERIT:
2179 		return (do_unlock_pi(td, m, flags));
2180 	case UMUTEX_PRIO_PROTECT:
2181 		return (do_unlock_pp(td, m, flags));
2182 	}
2183 
2184 	return (EINVAL);
2185 }
2186 
2187 static int
2188 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2189 	struct timespec *timeout, u_long wflags)
2190 {
2191 	struct umtx_q *uq;
2192 	struct timeval tv;
2193 	struct timespec cts, ets, tts;
2194 	uint32_t flags;
2195 	int error;
2196 
2197 	uq = td->td_umtxq;
2198 	flags = fuword32(&cv->c_flags);
2199 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2200 	if (error != 0)
2201 		return (error);
2202 	umtxq_lock(&uq->uq_key);
2203 	umtxq_busy(&uq->uq_key);
2204 	umtxq_insert(uq);
2205 	umtxq_unlock(&uq->uq_key);
2206 
2207 	/*
2208 	 * The magic thing is we should set c_has_waiters to 1 before
2209 	 * releasing user mutex.
2210 	 */
2211 	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2212 
2213 	umtxq_lock(&uq->uq_key);
2214 	umtxq_unbusy(&uq->uq_key);
2215 	umtxq_unlock(&uq->uq_key);
2216 
2217 	error = do_unlock_umutex(td, m);
2218 
2219 	umtxq_lock(&uq->uq_key);
2220 	if (error == 0) {
2221 		if ((wflags & UMTX_CHECK_UNPARKING) &&
2222 		    (td->td_pflags & TDP_WAKEUP)) {
2223 			td->td_pflags &= ~TDP_WAKEUP;
2224 			error = EINTR;
2225 		} else if (timeout == NULL) {
2226 			error = umtxq_sleep(uq, "ucond", 0);
2227 		} else {
2228 			getnanouptime(&ets);
2229 			timespecadd(&ets, timeout);
2230 			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2231 			for (;;) {
2232 				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2233 				if (error != ETIMEDOUT)
2234 					break;
2235 				getnanouptime(&cts);
2236 				if (timespeccmp(&cts, &ets, >=)) {
2237 					error = ETIMEDOUT;
2238 					break;
2239 				}
2240 				tts = ets;
2241 				timespecsub(&tts, &cts);
2242 				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2243 			}
2244 		}
2245 	}
2246 
2247 	if (error != 0) {
2248 		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2249 			/*
2250 			 * If we concurrently got do_cv_signal()d
2251 			 * and we got an error or UNIX signals or a timeout,
2252 			 * then, perform another umtxq_signal to avoid
2253 			 * consuming the wakeup. This may cause supurious
2254 			 * wakeup for another thread which was just queued,
2255 			 * but SUSV3 explicitly allows supurious wakeup to
2256 			 * occur, and indeed a kernel based implementation
2257 			 * can not avoid it.
2258 			 */
2259 			if (!umtxq_signal(&uq->uq_key, 1))
2260 				error = 0;
2261 		}
2262 		if (error == ERESTART)
2263 			error = EINTR;
2264 	}
2265 	umtxq_remove(uq);
2266 	umtxq_unlock(&uq->uq_key);
2267 	umtx_key_release(&uq->uq_key);
2268 	return (error);
2269 }
2270 
2271 /*
2272  * Signal a userland condition variable.
2273  */
2274 static int
2275 do_cv_signal(struct thread *td, struct ucond *cv)
2276 {
2277 	struct umtx_key key;
2278 	int error, cnt, nwake;
2279 	uint32_t flags;
2280 
2281 	flags = fuword32(&cv->c_flags);
2282 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2283 		return (error);
2284 	umtxq_lock(&key);
2285 	umtxq_busy(&key);
2286 	cnt = umtxq_count(&key);
2287 	nwake = umtxq_signal(&key, 1);
2288 	if (cnt <= nwake) {
2289 		umtxq_unlock(&key);
2290 		error = suword32(
2291 		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2292 		umtxq_lock(&key);
2293 	}
2294 	umtxq_unbusy(&key);
2295 	umtxq_unlock(&key);
2296 	umtx_key_release(&key);
2297 	return (error);
2298 }
2299 
2300 static int
2301 do_cv_broadcast(struct thread *td, struct ucond *cv)
2302 {
2303 	struct umtx_key key;
2304 	int error;
2305 	uint32_t flags;
2306 
2307 	flags = fuword32(&cv->c_flags);
2308 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2309 		return (error);
2310 
2311 	umtxq_lock(&key);
2312 	umtxq_busy(&key);
2313 	umtxq_signal(&key, INT_MAX);
2314 	umtxq_unlock(&key);
2315 
2316 	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2317 
2318 	umtxq_lock(&key);
2319 	umtxq_unbusy(&key);
2320 	umtxq_unlock(&key);
2321 
2322 	umtx_key_release(&key);
2323 	return (error);
2324 }
2325 
2326 int
2327 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2328     /* struct umtx *umtx */
2329 {
2330 	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2331 }
2332 
2333 int
2334 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2335     /* struct umtx *umtx */
2336 {
2337 	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2338 }
2339 
2340 static int
2341 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2342 {
2343 	struct timespec *ts, timeout;
2344 	int error;
2345 
2346 	/* Allow a null timespec (wait forever). */
2347 	if (uap->uaddr2 == NULL)
2348 		ts = NULL;
2349 	else {
2350 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2351 		if (error != 0)
2352 			return (error);
2353 		if (timeout.tv_nsec >= 1000000000 ||
2354 		    timeout.tv_nsec < 0) {
2355 			return (EINVAL);
2356 		}
2357 		ts = &timeout;
2358 	}
2359 	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2360 }
2361 
2362 static int
2363 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2364 {
2365 	return (do_unlock_umtx(td, uap->obj, uap->val));
2366 }
2367 
2368 static int
2369 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2370 {
2371 	struct timespec *ts, timeout;
2372 	int error;
2373 
2374 	if (uap->uaddr2 == NULL)
2375 		ts = NULL;
2376 	else {
2377 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2378 		if (error != 0)
2379 			return (error);
2380 		if (timeout.tv_nsec >= 1000000000 ||
2381 		    timeout.tv_nsec < 0)
2382 			return (EINVAL);
2383 		ts = &timeout;
2384 	}
2385 	return do_wait(td, uap->obj, uap->val, ts, 0);
2386 }
2387 
2388 static int
2389 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2390 {
2391 	struct timespec *ts, timeout;
2392 	int error;
2393 
2394 	if (uap->uaddr2 == NULL)
2395 		ts = NULL;
2396 	else {
2397 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2398 		if (error != 0)
2399 			return (error);
2400 		if (timeout.tv_nsec >= 1000000000 ||
2401 		    timeout.tv_nsec < 0)
2402 			return (EINVAL);
2403 		ts = &timeout;
2404 	}
2405 	return do_wait(td, uap->obj, uap->val, ts, 1);
2406 }
2407 
2408 static int
2409 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2410 {
2411 	return (kern_umtx_wake(td, uap->obj, uap->val));
2412 }
2413 
2414 static int
2415 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2416 {
2417 	struct timespec *ts, timeout;
2418 	int error;
2419 
2420 	/* Allow a null timespec (wait forever). */
2421 	if (uap->uaddr2 == NULL)
2422 		ts = NULL;
2423 	else {
2424 		error = copyin(uap->uaddr2, &timeout,
2425 		    sizeof(timeout));
2426 		if (error != 0)
2427 			return (error);
2428 		if (timeout.tv_nsec >= 1000000000 ||
2429 		    timeout.tv_nsec < 0) {
2430 			return (EINVAL);
2431 		}
2432 		ts = &timeout;
2433 	}
2434 	return do_lock_umutex(td, uap->obj, ts, 0);
2435 }
2436 
2437 static int
2438 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2439 {
2440 	return do_lock_umutex(td, uap->obj, NULL, 1);
2441 }
2442 
2443 static int
2444 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2445 {
2446 	return do_unlock_umutex(td, uap->obj);
2447 }
2448 
2449 static int
2450 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2451 {
2452 	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2453 }
2454 
2455 static int
2456 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2457 {
2458 	struct timespec *ts, timeout;
2459 	int error;
2460 
2461 	/* Allow a null timespec (wait forever). */
2462 	if (uap->uaddr2 == NULL)
2463 		ts = NULL;
2464 	else {
2465 		error = copyin(uap->uaddr2, &timeout,
2466 		    sizeof(timeout));
2467 		if (error != 0)
2468 			return (error);
2469 		if (timeout.tv_nsec >= 1000000000 ||
2470 		    timeout.tv_nsec < 0) {
2471 			return (EINVAL);
2472 		}
2473 		ts = &timeout;
2474 	}
2475 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2476 }
2477 
2478 static int
2479 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2480 {
2481 	return do_cv_signal(td, uap->obj);
2482 }
2483 
2484 static int
2485 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2486 {
2487 	return do_cv_broadcast(td, uap->obj);
2488 }
2489 
2490 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2491 
2492 static _umtx_op_func op_table[] = {
2493 	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2494 	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2495 	__umtx_op_wait,			/* UMTX_OP_WAIT */
2496 	__umtx_op_wake,			/* UMTX_OP_WAKE */
2497 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2498 	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2499 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2500 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2501 	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
2502 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2503 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
2504 	__umtx_op_wait_uint		/* UMTX_OP_WAIT_UINT */
2505 };
2506 
2507 int
2508 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
2509 {
2510 	if ((unsigned)uap->op < UMTX_OP_MAX)
2511 		return (*op_table[uap->op])(td, uap);
2512 	return (EINVAL);
2513 }
2514 
2515 #ifdef COMPAT_IA32
2516 int
2517 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2518     /* struct umtx *umtx */
2519 {
2520 	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2521 }
2522 
2523 int
2524 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2525     /* struct umtx *umtx */
2526 {
2527 	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2528 }
2529 
2530 struct timespec32 {
2531 	u_int32_t tv_sec;
2532 	u_int32_t tv_nsec;
2533 };
2534 
2535 static inline int
2536 copyin_timeout32(void *addr, struct timespec *tsp)
2537 {
2538 	struct timespec32 ts32;
2539 	int error;
2540 
2541 	error = copyin(addr, &ts32, sizeof(struct timespec32));
2542 	if (error == 0) {
2543 		tsp->tv_sec = ts32.tv_sec;
2544 		tsp->tv_nsec = ts32.tv_nsec;
2545 	}
2546 	return (error);
2547 }
2548 
2549 static int
2550 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2551 {
2552 	struct timespec *ts, timeout;
2553 	int error;
2554 
2555 	/* Allow a null timespec (wait forever). */
2556 	if (uap->uaddr2 == NULL)
2557 		ts = NULL;
2558 	else {
2559 		error = copyin_timeout32(uap->uaddr2, &timeout);
2560 		if (error != 0)
2561 			return (error);
2562 		if (timeout.tv_nsec >= 1000000000 ||
2563 		    timeout.tv_nsec < 0) {
2564 			return (EINVAL);
2565 		}
2566 		ts = &timeout;
2567 	}
2568 	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2569 }
2570 
2571 static int
2572 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2573 {
2574 	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2575 }
2576 
2577 static int
2578 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2579 {
2580 	struct timespec *ts, timeout;
2581 	int error;
2582 
2583 	if (uap->uaddr2 == NULL)
2584 		ts = NULL;
2585 	else {
2586 		error = copyin_timeout32(uap->uaddr2, &timeout);
2587 		if (error != 0)
2588 			return (error);
2589 		if (timeout.tv_nsec >= 1000000000 ||
2590 		    timeout.tv_nsec < 0)
2591 			return (EINVAL);
2592 		ts = &timeout;
2593 	}
2594 	return do_wait(td, uap->obj, uap->val, ts, 1);
2595 }
2596 
2597 static int
2598 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2599 {
2600 	struct timespec *ts, timeout;
2601 	int error;
2602 
2603 	/* Allow a null timespec (wait forever). */
2604 	if (uap->uaddr2 == NULL)
2605 		ts = NULL;
2606 	else {
2607 		error = copyin_timeout32(uap->uaddr2, &timeout);
2608 		if (error != 0)
2609 			return (error);
2610 		if (timeout.tv_nsec >= 1000000000 ||
2611 		    timeout.tv_nsec < 0)
2612 			return (EINVAL);
2613 		ts = &timeout;
2614 	}
2615 	return do_lock_umutex(td, uap->obj, ts, 0);
2616 }
2617 
2618 static int
2619 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2620 {
2621 	struct timespec *ts, timeout;
2622 	int error;
2623 
2624 	/* Allow a null timespec (wait forever). */
2625 	if (uap->uaddr2 == NULL)
2626 		ts = NULL;
2627 	else {
2628 		error = copyin_timeout32(uap->uaddr2, &timeout);
2629 		if (error != 0)
2630 			return (error);
2631 		if (timeout.tv_nsec >= 1000000000 ||
2632 		    timeout.tv_nsec < 0)
2633 			return (EINVAL);
2634 		ts = &timeout;
2635 	}
2636 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2637 }
2638 
2639 static _umtx_op_func op_table_compat32[] = {
2640 	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
2641 	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
2642 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
2643 	__umtx_op_wake,			/* UMTX_OP_WAKE */
2644 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
2645 	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
2646 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
2647 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2648 	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
2649 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2650 	__umtx_op_cv_broadcast,		/* UMTX_OP_CV_BROADCAST */
2651 	__umtx_op_wait_compat32		/* UMTX_OP_WAIT_UINT */
2652 };
2653 
2654 int
2655 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2656 {
2657 	if ((unsigned)uap->op < UMTX_OP_MAX)
2658 		return (*op_table_compat32[uap->op])(td,
2659 			(struct _umtx_op_args *)uap);
2660 	return (EINVAL);
2661 }
2662 #endif
2663 
2664 void
2665 umtx_thread_init(struct thread *td)
2666 {
2667 	td->td_umtxq = umtxq_alloc();
2668 	td->td_umtxq->uq_thread = td;
2669 }
2670 
2671 void
2672 umtx_thread_fini(struct thread *td)
2673 {
2674 	umtxq_free(td->td_umtxq);
2675 }
2676 
2677 /*
2678  * It will be called when new thread is created, e.g fork().
2679  */
2680 void
2681 umtx_thread_alloc(struct thread *td)
2682 {
2683 	struct umtx_q *uq;
2684 
2685 	uq = td->td_umtxq;
2686 	uq->uq_inherited_pri = PRI_MAX;
2687 
2688 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2689 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2690 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2691 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2692 }
2693 
2694 /*
2695  * exec() hook.
2696  */
2697 static void
2698 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2699 	struct image_params *imgp __unused)
2700 {
2701 	umtx_thread_cleanup(curthread);
2702 }
2703 
2704 /*
2705  * thread_exit() hook.
2706  */
2707 void
2708 umtx_thread_exit(struct thread *td)
2709 {
2710 	umtx_thread_cleanup(td);
2711 }
2712 
2713 /*
2714  * clean up umtx data.
2715  */
2716 static void
2717 umtx_thread_cleanup(struct thread *td)
2718 {
2719 	struct umtx_q *uq;
2720 	struct umtx_pi *pi;
2721 
2722 	if ((uq = td->td_umtxq) == NULL)
2723 		return;
2724 
2725 	mtx_lock_spin(&umtx_lock);
2726 	uq->uq_inherited_pri = PRI_MAX;
2727 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2728 		pi->pi_owner = NULL;
2729 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2730 	}
2731 	td->td_flags &= ~TDF_UBORROWING;
2732 	mtx_unlock_spin(&umtx_lock);
2733 }
2734