xref: /freebsd/sys/kern/kern_umtx.c (revision 99f3b482da15db6f2ae4702d592b53dd36d4cf55)
1 /*-
2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/limits.h>
34 #include <sys/lock.h>
35 #include <sys/malloc.h>
36 #include <sys/mutex.h>
37 #include <sys/proc.h>
38 #include <sys/sched.h>
39 #include <sys/sysctl.h>
40 #include <sys/sysent.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/eventhandler.h>
44 #include <sys/umtx.h>
45 
46 #include <vm/vm.h>
47 #include <vm/vm_param.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_map.h>
50 #include <vm/vm_object.h>
51 
52 #define TYPE_SIMPLE_LOCK	0
53 #define TYPE_SIMPLE_WAIT	1
54 #define TYPE_NORMAL_UMUTEX	2
55 #define TYPE_PI_UMUTEX		3
56 #define TYPE_PP_UMUTEX		4
57 #define TYPE_CV			5
58 
59 /* Key to represent a unique userland synchronous object */
60 struct umtx_key {
61 	int	hash;
62 	int	type;
63 	int	shared;
64 	union {
65 		struct {
66 			vm_object_t	object;
67 			uintptr_t	offset;
68 		} shared;
69 		struct {
70 			struct vmspace	*vs;
71 			uintptr_t	addr;
72 		} private;
73 		struct {
74 			void		*a;
75 			uintptr_t	b;
76 		} both;
77 	} info;
78 };
79 
80 /* Priority inheritance mutex info. */
81 struct umtx_pi {
82 	/* Owner thread */
83 	struct thread		*pi_owner;
84 
85 	/* Reference count */
86 	int			pi_refcount;
87 
88  	/* List entry to link umtx holding by thread */
89 	TAILQ_ENTRY(umtx_pi)	pi_link;
90 
91 	/* List entry in hash */
92 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
93 
94 	/* List for waiters */
95 	TAILQ_HEAD(,umtx_q)	pi_blocked;
96 
97 	/* Identify a userland lock object */
98 	struct umtx_key		pi_key;
99 };
100 
101 /* A userland synchronous object user. */
102 struct umtx_q {
103 	/* Linked list for the hash. */
104 	TAILQ_ENTRY(umtx_q)	uq_link;
105 
106 	/* Umtx key. */
107 	struct umtx_key		uq_key;
108 
109 	/* Umtx flags. */
110 	int			uq_flags;
111 #define UQF_UMTXQ	0x0001
112 
113 	/* The thread waits on. */
114 	struct thread		*uq_thread;
115 
116 	/*
117 	 * Blocked on PI mutex. read can use chain lock
118 	 * or sched_lock, write must have both chain lock and
119 	 * sched_lock being hold.
120 	 */
121 	struct umtx_pi		*uq_pi_blocked;
122 
123 	/* On blocked list */
124 	TAILQ_ENTRY(umtx_q)	uq_lockq;
125 
126 	/* Thread contending with us */
127 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
128 
129 	/* Inherited priority from PP mutex */
130 	u_char			uq_inherited_pri;
131 };
132 
133 TAILQ_HEAD(umtxq_head, umtx_q);
134 
135 /* Userland lock object's wait-queue chain */
136 struct umtxq_chain {
137 	/* Lock for this chain. */
138 	struct mtx		uc_lock;
139 
140 	/* List of sleep queues. */
141 	struct umtxq_head	uc_queue;
142 
143 	/* Busy flag */
144 	char			uc_busy;
145 
146 	/* Chain lock waiters */
147 	int			uc_waiters;
148 
149 	/* All PI in the list */
150 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
151 };
152 
153 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
154 
155 /*
156  * Don't propagate time-sharing priority, there is a security reason,
157  * a user can simply introduce PI-mutex, let thread A lock the mutex,
158  * and let another thread B block on the mutex, because B is
159  * sleeping, its priority will be boosted, this causes A's priority to
160  * be boosted via priority propagating too and will never be lowered even
161  * if it is using 100%CPU, this is unfair to other processes.
162  */
163 
164 #define UPRI(td)	(((td)->td_ksegrp->kg_user_pri >= PRI_MIN_TIMESHARE &&\
165 			  (td)->td_ksegrp->kg_user_pri <= PRI_MAX_TIMESHARE) ?\
166 			 PRI_MAX_TIMESHARE : (td)->td_ksegrp->kg_user_pri)
167 
168 #define	GOLDEN_RATIO_PRIME	2654404609U
169 #define	UMTX_CHAINS		128
170 #define	UMTX_SHIFTS		(__WORD_BIT - 7)
171 
172 #define THREAD_SHARE		0
173 #define PROCESS_SHARE		1
174 #define AUTO_SHARE		2
175 
176 #define	GET_SHARE(flags)	\
177     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
178 
179 static uma_zone_t		umtx_pi_zone;
180 static struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
181 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
182 static int			umtx_pi_allocated;
183 
184 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
185 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
186     &umtx_pi_allocated, 0, "Allocated umtx_pi");
187 
188 static void umtxq_sysinit(void *);
189 static void umtxq_hash(struct umtx_key *key);
190 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
191 static void umtxq_lock(struct umtx_key *key);
192 static void umtxq_unlock(struct umtx_key *key);
193 static void umtxq_busy(struct umtx_key *key);
194 static void umtxq_unbusy(struct umtx_key *key);
195 static void umtxq_insert(struct umtx_q *uq);
196 static void umtxq_remove(struct umtx_q *uq);
197 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
198 static int umtxq_count(struct umtx_key *key);
199 static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
200 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
201 static int umtx_key_get(void *addr, int type, int share,
202 	struct umtx_key *key);
203 static void umtx_key_release(struct umtx_key *key);
204 static struct umtx_pi *umtx_pi_alloc(void);
205 static void umtx_pi_free(struct umtx_pi *pi);
206 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
207 static void umtx_thread_cleanup(struct thread *td);
208 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
209 	struct image_params *imgp __unused);
210 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
211 
212 static void
213 umtxq_sysinit(void *arg __unused)
214 {
215 	int i;
216 
217 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
218 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
219 	for (i = 0; i < UMTX_CHAINS; ++i) {
220 		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
221 			 MTX_DEF | MTX_DUPOK);
222 		TAILQ_INIT(&umtxq_chains[i].uc_queue);
223 		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
224 		umtxq_chains[i].uc_busy = 0;
225 		umtxq_chains[i].uc_waiters = 0;
226 	}
227 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
228 	    EVENTHANDLER_PRI_ANY);
229 }
230 
231 struct umtx_q *
232 umtxq_alloc(void)
233 {
234 	struct umtx_q *uq;
235 
236 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
237 	TAILQ_INIT(&uq->uq_pi_contested);
238 	uq->uq_inherited_pri = PRI_MAX;
239 	return (uq);
240 }
241 
242 void
243 umtxq_free(struct umtx_q *uq)
244 {
245 	free(uq, M_UMTX);
246 }
247 
248 static inline void
249 umtxq_hash(struct umtx_key *key)
250 {
251 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
252 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
253 }
254 
255 static inline int
256 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
257 {
258 	return (k1->type == k2->type &&
259 		k1->info.both.a == k2->info.both.a &&
260 	        k1->info.both.b == k2->info.both.b);
261 }
262 
263 static inline struct umtxq_chain *
264 umtxq_getchain(struct umtx_key *key)
265 {
266 	return (&umtxq_chains[key->hash]);
267 }
268 
269 /*
270  * Set chain to busy state when following operation
271  * may be blocked (kernel mutex can not be used).
272  */
273 static inline void
274 umtxq_busy(struct umtx_key *key)
275 {
276 	struct umtxq_chain *uc;
277 
278 	uc = umtxq_getchain(key);
279 	mtx_assert(&uc->uc_lock, MA_OWNED);
280 	while (uc->uc_busy != 0) {
281 		uc->uc_waiters++;
282 		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
283 		uc->uc_waiters--;
284 	}
285 	uc->uc_busy = 1;
286 }
287 
288 /*
289  * Unbusy a chain.
290  */
291 static inline void
292 umtxq_unbusy(struct umtx_key *key)
293 {
294 	struct umtxq_chain *uc;
295 
296 	uc = umtxq_getchain(key);
297 	mtx_assert(&uc->uc_lock, MA_OWNED);
298 	KASSERT(uc->uc_busy != 0, ("not busy"));
299 	uc->uc_busy = 0;
300 	if (uc->uc_waiters)
301 		wakeup_one(uc);
302 }
303 
304 /*
305  * Lock a chain.
306  */
307 static inline void
308 umtxq_lock(struct umtx_key *key)
309 {
310 	struct umtxq_chain *uc;
311 
312 	uc = umtxq_getchain(key);
313 	mtx_lock(&uc->uc_lock);
314 }
315 
316 /*
317  * Unlock a chain.
318  */
319 static inline void
320 umtxq_unlock(struct umtx_key *key)
321 {
322 	struct umtxq_chain *uc;
323 
324 	uc = umtxq_getchain(key);
325 	mtx_unlock(&uc->uc_lock);
326 }
327 
328 /*
329  * Insert a thread onto the umtx queue.
330  */
331 static inline void
332 umtxq_insert(struct umtx_q *uq)
333 {
334 	struct umtxq_chain *uc;
335 
336 	uc = umtxq_getchain(&uq->uq_key);
337 	UMTXQ_LOCKED_ASSERT(uc);
338 	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
339 	uq->uq_flags |= UQF_UMTXQ;
340 }
341 
342 /*
343  * Remove thread from the umtx queue.
344  */
345 static inline void
346 umtxq_remove(struct umtx_q *uq)
347 {
348 	struct umtxq_chain *uc;
349 
350 	uc = umtxq_getchain(&uq->uq_key);
351 	UMTXQ_LOCKED_ASSERT(uc);
352 	if (uq->uq_flags & UQF_UMTXQ) {
353 		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
354 		uq->uq_flags &= ~UQF_UMTXQ;
355 	}
356 }
357 
358 /*
359  * Check if there are multiple waiters
360  */
361 static int
362 umtxq_count(struct umtx_key *key)
363 {
364 	struct umtxq_chain *uc;
365 	struct umtx_q *uq;
366 	int count = 0;
367 
368 	uc = umtxq_getchain(key);
369 	UMTXQ_LOCKED_ASSERT(uc);
370 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
371 		if (umtx_key_match(&uq->uq_key, key)) {
372 			if (++count > 1)
373 				break;
374 		}
375 	}
376 	return (count);
377 }
378 
379 /*
380  * Check if there are multiple PI waiters and returns first
381  * waiter.
382  */
383 static int
384 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
385 {
386 	struct umtxq_chain *uc;
387 	struct umtx_q *uq;
388 	int count = 0;
389 
390 	*first = NULL;
391 	uc = umtxq_getchain(key);
392 	UMTXQ_LOCKED_ASSERT(uc);
393 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
394 		if (umtx_key_match(&uq->uq_key, key)) {
395 			if (++count > 1)
396 				break;
397 			*first = uq;
398 		}
399 	}
400 	return (count);
401 }
402 
403 /*
404  * Wake up threads waiting on an userland object.
405  */
406 static int
407 umtxq_signal(struct umtx_key *key, int n_wake)
408 {
409 	struct umtxq_chain *uc;
410 	struct umtx_q *uq, *next;
411 	int ret;
412 
413 	ret = 0;
414 	uc = umtxq_getchain(key);
415 	UMTXQ_LOCKED_ASSERT(uc);
416 	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
417 		if (umtx_key_match(&uq->uq_key, key)) {
418 			umtxq_remove(uq);
419 			wakeup(uq);
420 			if (++ret >= n_wake)
421 				break;
422 		}
423 	}
424 	return (ret);
425 }
426 
427 /*
428  * Wake up specified thread.
429  */
430 static inline void
431 umtxq_signal_thread(struct umtx_q *uq)
432 {
433 	struct umtxq_chain *uc;
434 
435 	uc = umtxq_getchain(&uq->uq_key);
436 	UMTXQ_LOCKED_ASSERT(uc);
437 	umtxq_remove(uq);
438 	wakeup(uq);
439 }
440 
441 /*
442  * Put thread into sleep state, before sleeping, check if
443  * thread was removed from umtx queue.
444  */
445 static inline int
446 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
447 {
448 	struct umtxq_chain *uc;
449 	int error;
450 
451 	uc = umtxq_getchain(&uq->uq_key);
452 	UMTXQ_LOCKED_ASSERT(uc);
453 	if (!(uq->uq_flags & UQF_UMTXQ))
454 		return (0);
455 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
456 	if (error == EWOULDBLOCK)
457 		error = ETIMEDOUT;
458 	return (error);
459 }
460 
461 /*
462  * Convert userspace address into unique logical address.
463  */
464 static int
465 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
466 {
467 	struct thread *td = curthread;
468 	vm_map_t map;
469 	vm_map_entry_t entry;
470 	vm_pindex_t pindex;
471 	vm_prot_t prot;
472 	boolean_t wired;
473 
474 	key->type = type;
475 	if (share == THREAD_SHARE) {
476 		key->shared = 0;
477 		key->info.private.vs = td->td_proc->p_vmspace;
478 		key->info.private.addr = (uintptr_t)addr;
479 	} else if (share == PROCESS_SHARE || share == AUTO_SHARE) {
480 		map = &td->td_proc->p_vmspace->vm_map;
481 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
482 		    &entry, &key->info.shared.object, &pindex, &prot,
483 		    &wired) != KERN_SUCCESS) {
484 			return EFAULT;
485 		}
486 
487 		if ((share == PROCESS_SHARE) ||
488 		    (share == AUTO_SHARE &&
489 		     VM_INHERIT_SHARE == entry->inheritance)) {
490 			key->shared = 1;
491 			key->info.shared.offset = entry->offset + entry->start -
492 				(vm_offset_t)addr;
493 			vm_object_reference(key->info.shared.object);
494 		} else {
495 			key->shared = 0;
496 			key->info.private.vs = td->td_proc->p_vmspace;
497 			key->info.private.addr = (uintptr_t)addr;
498 		}
499 		vm_map_lookup_done(map, entry);
500 	}
501 
502 	umtxq_hash(key);
503 	return (0);
504 }
505 
506 /*
507  * Release key.
508  */
509 static inline void
510 umtx_key_release(struct umtx_key *key)
511 {
512 	if (key->shared)
513 		vm_object_deallocate(key->info.shared.object);
514 }
515 
516 /*
517  * Lock a umtx object.
518  */
519 static int
520 _do_lock(struct thread *td, struct umtx *umtx, uintptr_t id, int timo)
521 {
522 	struct umtx_q *uq;
523 	intptr_t owner;
524 	intptr_t old;
525 	int error = 0;
526 
527 	uq = td->td_umtxq;
528 
529 	/*
530 	 * Care must be exercised when dealing with umtx structure. It
531 	 * can fault on any access.
532 	 */
533 	for (;;) {
534 		/*
535 		 * Try the uncontested case.  This should be done in userland.
536 		 */
537 		owner = casuptr((intptr_t *)&umtx->u_owner, UMTX_UNOWNED, id);
538 
539 		/* The acquire succeeded. */
540 		if (owner == UMTX_UNOWNED)
541 			return (0);
542 
543 		/* The address was invalid. */
544 		if (owner == -1)
545 			return (EFAULT);
546 
547 		/* If no one owns it but it is contested try to acquire it. */
548 		if (owner == UMTX_CONTESTED) {
549 			owner = casuptr((intptr_t *)&umtx->u_owner,
550 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
551 
552 			if (owner == UMTX_CONTESTED)
553 				return (0);
554 
555 			/* The address was invalid. */
556 			if (owner == -1)
557 				return (EFAULT);
558 
559 			/* If this failed the lock has changed, restart. */
560 			continue;
561 		}
562 
563 		/*
564 		 * If we caught a signal, we have retried and now
565 		 * exit immediately.
566 		 */
567 		if (error != 0)
568 			return (error);
569 
570 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
571 			AUTO_SHARE, &uq->uq_key)) != 0)
572 			return (error);
573 
574 		umtxq_lock(&uq->uq_key);
575 		umtxq_busy(&uq->uq_key);
576 		umtxq_insert(uq);
577 		umtxq_unbusy(&uq->uq_key);
578 		umtxq_unlock(&uq->uq_key);
579 
580 		/*
581 		 * Set the contested bit so that a release in user space
582 		 * knows to use the system call for unlock.  If this fails
583 		 * either some one else has acquired the lock or it has been
584 		 * released.
585 		 */
586 		old = casuptr((intptr_t *)&umtx->u_owner, owner,
587 		    owner | UMTX_CONTESTED);
588 
589 		/* The address was invalid. */
590 		if (old == -1) {
591 			umtxq_lock(&uq->uq_key);
592 			umtxq_remove(uq);
593 			umtxq_unlock(&uq->uq_key);
594 			umtx_key_release(&uq->uq_key);
595 			return (EFAULT);
596 		}
597 
598 		/*
599 		 * We set the contested bit, sleep. Otherwise the lock changed
600 		 * and we need to retry or we lost a race to the thread
601 		 * unlocking the umtx.
602 		 */
603 		umtxq_lock(&uq->uq_key);
604 		if (old == owner)
605 			error = umtxq_sleep(uq, "umtx", timo);
606 		umtxq_remove(uq);
607 		umtxq_unlock(&uq->uq_key);
608 		umtx_key_release(&uq->uq_key);
609 	}
610 
611 	return (0);
612 }
613 
614 /*
615  * Lock a umtx object.
616  */
617 static int
618 do_lock(struct thread *td, struct umtx *umtx, uintptr_t id,
619 	struct timespec *timeout)
620 {
621 	struct timespec ts, ts2, ts3;
622 	struct timeval tv;
623 	int error;
624 
625 	if (timeout == NULL) {
626 		error = _do_lock(td, umtx, id, 0);
627 		/* Mutex locking is restarted if it is interrupted. */
628 		if (error == EINTR)
629 			error = ERESTART;
630 	} else {
631 		getnanouptime(&ts);
632 		timespecadd(&ts, timeout);
633 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
634 		for (;;) {
635 			error = _do_lock(td, umtx, id, tvtohz(&tv));
636 			if (error != ETIMEDOUT)
637 				break;
638 			getnanouptime(&ts2);
639 			if (timespeccmp(&ts2, &ts, >=)) {
640 				error = ETIMEDOUT;
641 				break;
642 			}
643 			ts3 = ts;
644 			timespecsub(&ts3, &ts2);
645 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
646 		}
647 		/* Timed-locking is not restarted. */
648 		if (error == ERESTART)
649 			error = EINTR;
650 	}
651 	return (error);
652 }
653 
654 /*
655  * Unlock a umtx object.
656  */
657 static int
658 do_unlock(struct thread *td, struct umtx *umtx, uintptr_t id)
659 {
660 	struct umtx_key key;
661 	intptr_t owner;
662 	intptr_t old;
663 	int error;
664 	int count;
665 
666 	/*
667 	 * Make sure we own this mtx.
668 	 *
669 	 * XXX Need a {fu,su}ptr this is not correct on arch where
670 	 * sizeof(intptr_t) != sizeof(long).
671 	 */
672 	owner = fuword(&umtx->u_owner);
673 	if (owner == -1)
674 		return (EFAULT);
675 
676 	if ((owner & ~UMTX_CONTESTED) != id)
677 		return (EPERM);
678 
679 	/* This should be done in userland */
680 	if ((owner & UMTX_CONTESTED) == 0) {
681 		old = casuptr((intptr_t *)&umtx->u_owner, owner,
682 			UMTX_UNOWNED);
683 		if (old == -1)
684 			return (EFAULT);
685 		if (old == owner)
686 			return (0);
687 		owner = old;
688 	}
689 
690 	/* We should only ever be in here for contested locks */
691 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
692 		&key)) != 0)
693 		return (error);
694 
695 	umtxq_lock(&key);
696 	umtxq_busy(&key);
697 	count = umtxq_count(&key);
698 	umtxq_unlock(&key);
699 
700 	/*
701 	 * When unlocking the umtx, it must be marked as unowned if
702 	 * there is zero or one thread only waiting for it.
703 	 * Otherwise, it must be marked as contested.
704 	 */
705 	old = casuptr((intptr_t *)&umtx->u_owner, owner,
706 			count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
707 	umtxq_lock(&key);
708 	umtxq_signal(&key,1);
709 	umtxq_unbusy(&key);
710 	umtxq_unlock(&key);
711 	umtx_key_release(&key);
712 	if (old == -1)
713 		return (EFAULT);
714 	if (old != owner)
715 		return (EINVAL);
716 	return (0);
717 }
718 
719 /*
720  * Fetch and compare value, sleep on the address if value is not changed.
721  */
722 static int
723 do_wait(struct thread *td, struct umtx *umtx, uintptr_t id, struct timespec *timeout)
724 {
725 	struct umtx_q *uq;
726 	struct timespec ts, ts2, ts3;
727 	struct timeval tv;
728 	uintptr_t tmp;
729 	int error = 0;
730 
731 	uq = td->td_umtxq;
732 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_WAIT, AUTO_SHARE,
733 	    &uq->uq_key)) != 0)
734 		return (error);
735 
736 	umtxq_lock(&uq->uq_key);
737 	umtxq_insert(uq);
738 	umtxq_unlock(&uq->uq_key);
739 	tmp = fuword(&umtx->u_owner);
740 	if (tmp != id) {
741 		umtxq_lock(&uq->uq_key);
742 		umtxq_remove(uq);
743 		umtxq_unlock(&uq->uq_key);
744 	} else if (timeout == NULL) {
745 		umtxq_lock(&uq->uq_key);
746 		error = umtxq_sleep(uq, "ucond", 0);
747 		umtxq_remove(uq);
748 		umtxq_unlock(&uq->uq_key);
749 	} else {
750 		getnanouptime(&ts);
751 		timespecadd(&ts, timeout);
752 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
753 		umtxq_lock(&uq->uq_key);
754 		for (;;) {
755 			error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
756 			if (!(uq->uq_flags & UQF_UMTXQ))
757 				break;
758 			if (error != ETIMEDOUT)
759 				break;
760 			umtxq_unlock(&uq->uq_key);
761 			getnanouptime(&ts2);
762 			if (timespeccmp(&ts2, &ts, >=)) {
763 				error = ETIMEDOUT;
764 				umtxq_lock(&uq->uq_key);
765 				break;
766 			}
767 			ts3 = ts;
768 			timespecsub(&ts3, &ts2);
769 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
770 			umtxq_lock(&uq->uq_key);
771 		}
772 		umtxq_remove(uq);
773 		umtxq_unlock(&uq->uq_key);
774 	}
775 	umtx_key_release(&uq->uq_key);
776 	if (error == ERESTART)
777 		error = EINTR;
778 	return (error);
779 }
780 
781 /*
782  * Wake up threads sleeping on the specified address.
783  */
784 int
785 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
786 {
787 	struct umtx_key key;
788 	int ret;
789 
790 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
791 	   &key)) != 0)
792 		return (ret);
793 	umtxq_lock(&key);
794 	ret = umtxq_signal(&key, n_wake);
795 	umtxq_unlock(&key);
796 	umtx_key_release(&key);
797 	return (0);
798 }
799 
800 /*
801  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
802  */
803 static int
804 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
805 	int try)
806 {
807 	struct umtx_q *uq;
808 	uint32_t owner, old, id;
809 	int error = 0;
810 
811 	id = td->td_tid;
812 	uq = td->td_umtxq;
813 
814 	/*
815 	 * Care must be exercised when dealing with umtx structure. It
816 	 * can fault on any access.
817 	 */
818 	for (;;) {
819 		/*
820 		 * Try the uncontested case.  This should be done in userland.
821 		 */
822 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
823 
824 		/* The acquire succeeded. */
825 		if (owner == UMUTEX_UNOWNED)
826 			return (0);
827 
828 		/* The address was invalid. */
829 		if (owner == -1)
830 			return (EFAULT);
831 
832 		/* If no one owns it but it is contested try to acquire it. */
833 		if (owner == UMUTEX_CONTESTED) {
834 			owner = casuword32(&m->m_owner,
835 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
836 
837 			if (owner == UMUTEX_CONTESTED)
838 				return (0);
839 
840 			/* The address was invalid. */
841 			if (owner == -1)
842 				return (EFAULT);
843 
844 			/* If this failed the lock has changed, restart. */
845 			continue;
846 		}
847 
848 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
849 		    (owner & ~UMUTEX_CONTESTED) == id)
850 			return (EDEADLK);
851 
852 		if (try != 0)
853 			return (EBUSY);
854 
855 		/*
856 		 * If we caught a signal, we have retried and now
857 		 * exit immediately.
858 		 */
859 		if (error != 0)
860 			return (error);
861 
862 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
863 		    GET_SHARE(flags), &uq->uq_key)) != 0)
864 			return (error);
865 
866 		umtxq_lock(&uq->uq_key);
867 		umtxq_busy(&uq->uq_key);
868 		umtxq_insert(uq);
869 		umtxq_unbusy(&uq->uq_key);
870 		umtxq_unlock(&uq->uq_key);
871 
872 		/*
873 		 * Set the contested bit so that a release in user space
874 		 * knows to use the system call for unlock.  If this fails
875 		 * either some one else has acquired the lock or it has been
876 		 * released.
877 		 */
878 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
879 
880 		/* The address was invalid. */
881 		if (old == -1) {
882 			umtxq_lock(&uq->uq_key);
883 			umtxq_remove(uq);
884 			umtxq_unlock(&uq->uq_key);
885 			umtx_key_release(&uq->uq_key);
886 			return (EFAULT);
887 		}
888 
889 		/*
890 		 * We set the contested bit, sleep. Otherwise the lock changed
891 		 * and we need to retry or we lost a race to the thread
892 		 * unlocking the umtx.
893 		 */
894 		umtxq_lock(&uq->uq_key);
895 		if (old == owner)
896 			error = umtxq_sleep(uq, "umtxn", timo);
897 		umtxq_remove(uq);
898 		umtxq_unlock(&uq->uq_key);
899 		umtx_key_release(&uq->uq_key);
900 	}
901 
902 	return (0);
903 }
904 
905 /*
906  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
907  */
908 /*
909  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
910  */
911 static int
912 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
913 {
914 	struct umtx_key key;
915 	uint32_t owner, old, id;
916 	int error;
917 	int count;
918 
919 	id = td->td_tid;
920 	/*
921 	 * Make sure we own this mtx.
922 	 */
923 	owner = fuword32(&m->m_owner);
924 	if (owner == -1)
925 		return (EFAULT);
926 
927 	if ((owner & ~UMUTEX_CONTESTED) != id)
928 		return (EPERM);
929 
930 	/* This should be done in userland */
931 	if ((owner & UMUTEX_CONTESTED) == 0) {
932 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
933 		if (old == -1)
934 			return (EFAULT);
935 		if (old == owner)
936 			return (0);
937 		owner = old;
938 	}
939 
940 	/* We should only ever be in here for contested locks */
941 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
942 	    &key)) != 0)
943 		return (error);
944 
945 	umtxq_lock(&key);
946 	umtxq_busy(&key);
947 	count = umtxq_count(&key);
948 	umtxq_unlock(&key);
949 
950 	/*
951 	 * When unlocking the umtx, it must be marked as unowned if
952 	 * there is zero or one thread only waiting for it.
953 	 * Otherwise, it must be marked as contested.
954 	 */
955 	old = casuword32(&m->m_owner, owner,
956 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
957 	umtxq_lock(&key);
958 	umtxq_signal(&key,1);
959 	umtxq_unbusy(&key);
960 	umtxq_unlock(&key);
961 	umtx_key_release(&key);
962 	if (old == -1)
963 		return (EFAULT);
964 	if (old != owner)
965 		return (EINVAL);
966 	return (0);
967 }
968 
969 static inline struct umtx_pi *
970 umtx_pi_alloc(void)
971 {
972 	struct umtx_pi *pi;
973 
974 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | M_WAITOK);
975 	TAILQ_INIT(&pi->pi_blocked);
976 	atomic_add_int(&umtx_pi_allocated, 1);
977 	return (pi);
978 }
979 
980 static inline void
981 umtx_pi_free(struct umtx_pi *pi)
982 {
983 	uma_zfree(umtx_pi_zone, pi);
984 	atomic_add_int(&umtx_pi_allocated, -1);
985 }
986 
987 /*
988  * Adjust the thread's position on a pi_state after its priority has been
989  * changed.
990  */
991 static int
992 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
993 {
994 	struct umtx_q *uq, *uq1, *uq2;
995 	struct thread *td1;
996 
997 	mtx_assert(&sched_lock, MA_OWNED);
998 	if (pi == NULL)
999 		return (0);
1000 
1001 	uq = td->td_umtxq;
1002 
1003 	/*
1004 	 * Check if the thread needs to be moved on the blocked chain.
1005 	 * It needs to be moved if either its priority is lower than
1006 	 * the previous thread or higher than the next thread.
1007 	 */
1008 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1009 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1010 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1011 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1012 		/*
1013 		 * Remove thread from blocked chain and determine where
1014 		 * it should be moved to.
1015 		 */
1016 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1017 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1018 			td1 = uq1->uq_thread;
1019 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1020 			if (UPRI(td1) > UPRI(td))
1021 				break;
1022 		}
1023 
1024 		if (uq1 == NULL)
1025 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1026 		else
1027 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1028 	}
1029 	return (1);
1030 }
1031 
1032 /*
1033  * Propagate priority when a thread is blocked on POSIX
1034  * PI mutex.
1035  */
1036 static void
1037 umtx_propagate_priority(struct thread *td)
1038 {
1039 	struct umtx_q *uq;
1040 	struct umtx_pi *pi;
1041 	int pri;
1042 
1043 	mtx_assert(&sched_lock, MA_OWNED);
1044 	pri = UPRI(td);
1045 	uq = td->td_umtxq;
1046 	pi = uq->uq_pi_blocked;
1047 	if (pi == NULL)
1048 		return;
1049 
1050 	for (;;) {
1051 		td = pi->pi_owner;
1052 		if (td == NULL)
1053 			return;
1054 
1055 		MPASS(td->td_proc != NULL);
1056 		MPASS(td->td_proc->p_magic == P_MAGIC);
1057 
1058 		if (UPRI(td) <= pri)
1059 			return;
1060 
1061 		sched_lend_user_prio(td, pri);
1062 
1063 		/*
1064 		 * Pick up the lock that td is blocked on.
1065 		 */
1066 		uq = td->td_umtxq;
1067 		pi = uq->uq_pi_blocked;
1068 		/* Resort td on the list if needed. */
1069 		if (!umtx_pi_adjust_thread(pi, td))
1070 			break;
1071 	}
1072 }
1073 
1074 /*
1075  * Unpropagate priority for a PI mutex when a thread blocked on
1076  * it is interrupted by signal or resumed by others.
1077  */
1078 static void
1079 umtx_unpropagate_priority(struct umtx_pi *pi)
1080 {
1081 	struct umtx_q *uq, *uq_owner;
1082 	struct umtx_pi *pi2;
1083 	int pri;
1084 
1085 	mtx_assert(&sched_lock, MA_OWNED);
1086 
1087 	while (pi != NULL && pi->pi_owner != NULL) {
1088 		pri = PRI_MAX;
1089 		uq_owner = pi->pi_owner->td_umtxq;
1090 
1091 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1092 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1093 			if (uq != NULL) {
1094 				if (pri > UPRI(uq->uq_thread))
1095 					pri = UPRI(uq->uq_thread);
1096 			}
1097 		}
1098 
1099 		if (pri > uq_owner->uq_inherited_pri)
1100 			pri = uq_owner->uq_inherited_pri;
1101 		sched_unlend_user_prio(pi->pi_owner, pri);
1102 		pi = uq_owner->uq_pi_blocked;
1103 	}
1104 }
1105 
1106 /*
1107  * Insert a PI mutex into owned list.
1108  */
1109 static void
1110 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1111 {
1112 	struct umtx_q *uq_owner;
1113 
1114 	uq_owner = owner->td_umtxq;
1115 	mtx_assert(&sched_lock, MA_OWNED);
1116 	if (pi->pi_owner != NULL)
1117 		panic("pi_ower != NULL");
1118 	pi->pi_owner = owner;
1119 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1120 }
1121 
1122 /*
1123  * Claim ownership of a PI mutex.
1124  */
1125 static int
1126 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1127 {
1128 	struct umtx_q *uq, *uq_owner;
1129 
1130 	uq_owner = owner->td_umtxq;
1131 	mtx_lock_spin(&sched_lock);
1132 	if (pi->pi_owner == owner) {
1133 		mtx_unlock_spin(&sched_lock);
1134 		return (0);
1135 	}
1136 
1137 	if (pi->pi_owner != NULL) {
1138 		/*
1139 		 * userland may have already messed the mutex, sigh.
1140 		 */
1141 		mtx_unlock_spin(&sched_lock);
1142 		return (EPERM);
1143 	}
1144 	umtx_pi_setowner(pi, owner);
1145 	uq = TAILQ_FIRST(&pi->pi_blocked);
1146 	if (uq != NULL) {
1147 		int pri;
1148 
1149 		pri = UPRI(uq->uq_thread);
1150 		if (pri < UPRI(owner))
1151 			sched_lend_user_prio(owner, pri);
1152 	}
1153 	mtx_unlock_spin(&sched_lock);
1154 	return (0);
1155 }
1156 
1157 /*
1158  * Adjust a thread's order position in its blocked PI mutex,
1159  * this may result new priority propagating process.
1160  */
1161 void
1162 umtx_pi_adjust(struct thread *td, u_char oldpri)
1163 {
1164 	struct umtx_q *uq;
1165 	struct umtx_pi *pi;
1166 
1167 	uq = td->td_umtxq;
1168 
1169 	mtx_assert(&sched_lock, MA_OWNED);
1170 	MPASS(TD_ON_UPILOCK(td));
1171 
1172 	/*
1173 	 * Pick up the lock that td is blocked on.
1174 	 */
1175 	pi = uq->uq_pi_blocked;
1176 	MPASS(pi != NULL);
1177 
1178 	/* Resort the turnstile on the list. */
1179 	if (!umtx_pi_adjust_thread(pi, td))
1180 		return;
1181 
1182 	/*
1183 	 * If our priority was lowered and we are at the head of the
1184 	 * turnstile, then propagate our new priority up the chain.
1185 	 */
1186 	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1187 		umtx_propagate_priority(td);
1188 }
1189 
1190 /*
1191  * Sleep on a PI mutex.
1192  */
1193 static int
1194 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1195 	uint32_t owner, const char *wmesg, int timo)
1196 {
1197 	struct umtxq_chain *uc;
1198 	struct thread *td, *td1;
1199 	struct umtx_q *uq1;
1200 	int pri;
1201 	int error = 0;
1202 
1203 	td = uq->uq_thread;
1204 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1205 	uc = umtxq_getchain(&uq->uq_key);
1206 	UMTXQ_LOCKED_ASSERT(uc);
1207 	umtxq_insert(uq);
1208 	if (pi->pi_owner == NULL) {
1209 		/* XXX
1210 		 * Current, We only support process private PI-mutex,
1211 		 * non-contended PI-mutexes are locked in userland.
1212 		 * Process shared PI-mutex should always be initialized
1213 		 * by kernel and be registered in kernel, locking should
1214 		 * always be done by kernel to avoid security problems.
1215 		 * For process private PI-mutex, we can find owner
1216 		 * thread and boost its priority safely.
1217 		 */
1218 		PROC_LOCK(curproc);
1219 		td1 = thread_find(curproc, owner);
1220 		mtx_lock_spin(&sched_lock);
1221 		if (td1 != NULL && pi->pi_owner == NULL) {
1222 			uq1 = td1->td_umtxq;
1223 			umtx_pi_setowner(pi, td1);
1224 		}
1225 		PROC_UNLOCK(curproc);
1226 	} else {
1227 		mtx_lock_spin(&sched_lock);
1228 	}
1229 
1230 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1231 		pri = UPRI(uq1->uq_thread);
1232 		if (pri > UPRI(td))
1233 			break;
1234 	}
1235 
1236 	if (uq1 != NULL)
1237 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1238 	else
1239 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1240 
1241 	uq->uq_pi_blocked = pi;
1242 	td->td_flags |= TDF_UPIBLOCKED;
1243 	mtx_unlock_spin(&sched_lock);
1244 	umtxq_unlock(&uq->uq_key);
1245 
1246 	mtx_lock_spin(&sched_lock);
1247 	umtx_propagate_priority(td);
1248 	mtx_unlock_spin(&sched_lock);
1249 
1250 	umtxq_lock(&uq->uq_key);
1251 	if (uq->uq_flags & UQF_UMTXQ) {
1252 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1253 		if (error == EWOULDBLOCK)
1254 			error = ETIMEDOUT;
1255 		if (uq->uq_flags & UQF_UMTXQ) {
1256 			umtxq_busy(&uq->uq_key);
1257 			umtxq_remove(uq);
1258 			umtxq_unbusy(&uq->uq_key);
1259 		}
1260 	}
1261 	umtxq_unlock(&uq->uq_key);
1262 
1263 	mtx_lock_spin(&sched_lock);
1264 	uq->uq_pi_blocked = NULL;
1265 	td->td_flags &= ~TDF_UPIBLOCKED;
1266 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1267 	umtx_unpropagate_priority(pi);
1268 	mtx_unlock_spin(&sched_lock);
1269 
1270 	umtxq_lock(&uq->uq_key);
1271 
1272 	return (error);
1273 }
1274 
1275 /*
1276  * Add reference count for a PI mutex.
1277  */
1278 static void
1279 umtx_pi_ref(struct umtx_pi *pi)
1280 {
1281 	struct umtxq_chain *uc;
1282 
1283 	uc = umtxq_getchain(&pi->pi_key);
1284 	UMTXQ_LOCKED_ASSERT(uc);
1285 	pi->pi_refcount++;
1286 }
1287 
1288 /*
1289  * Decrease reference count for a PI mutex, if the counter
1290  * is decreased to zero, its memory space is freed.
1291  */
1292 static void
1293 umtx_pi_unref(struct umtx_pi *pi)
1294 {
1295 	struct umtxq_chain *uc;
1296 	int free = 0;
1297 
1298 	uc = umtxq_getchain(&pi->pi_key);
1299 	UMTXQ_LOCKED_ASSERT(uc);
1300 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1301 	if (--pi->pi_refcount == 0) {
1302 		mtx_lock_spin(&sched_lock);
1303 		if (pi->pi_owner != NULL) {
1304 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1305 				pi, pi_link);
1306 			pi->pi_owner = NULL;
1307 		}
1308 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1309 			("blocked queue not empty"));
1310 		mtx_unlock_spin(&sched_lock);
1311 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1312 		free = 1;
1313 	}
1314 	if (free)
1315 		umtx_pi_free(pi);
1316 }
1317 
1318 /*
1319  * Find a PI mutex in hash table.
1320  */
1321 static struct umtx_pi *
1322 umtx_pi_lookup(struct umtx_key *key)
1323 {
1324 	struct umtxq_chain *uc;
1325 	struct umtx_pi *pi;
1326 
1327 	uc = umtxq_getchain(key);
1328 	UMTXQ_LOCKED_ASSERT(uc);
1329 
1330 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1331 		if (umtx_key_match(&pi->pi_key, key)) {
1332 			return (pi);
1333 		}
1334 	}
1335 	return (NULL);
1336 }
1337 
1338 /*
1339  * Insert a PI mutex into hash table.
1340  */
1341 static inline void
1342 umtx_pi_insert(struct umtx_pi *pi)
1343 {
1344 	struct umtxq_chain *uc;
1345 
1346 	uc = umtxq_getchain(&pi->pi_key);
1347 	UMTXQ_LOCKED_ASSERT(uc);
1348 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1349 }
1350 
1351 /*
1352  * Lock a PI mutex.
1353  */
1354 static int
1355 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1356 	int try)
1357 {
1358 	struct umtx_q *uq;
1359 	struct umtx_pi *pi, *new_pi;
1360 	uint32_t id, owner, old;
1361 	int error;
1362 
1363 	id = td->td_tid;
1364 	uq = td->td_umtxq;
1365 
1366 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1367 	    &uq->uq_key)) != 0)
1368 		return (error);
1369 	for (;;) {
1370 		pi = NULL;
1371 		umtxq_lock(&uq->uq_key);
1372 		pi = umtx_pi_lookup(&uq->uq_key);
1373 		if (pi == NULL) {
1374 			umtxq_unlock(&uq->uq_key);
1375 			new_pi = umtx_pi_alloc();
1376 			new_pi->pi_key = uq->uq_key;
1377 			umtxq_lock(&uq->uq_key);
1378 			pi = umtx_pi_lookup(&uq->uq_key);
1379 			if (pi != NULL)
1380 				umtx_pi_free(new_pi);
1381 			else {
1382 				umtx_pi_insert(new_pi);
1383 				pi = new_pi;
1384 			}
1385 		}
1386 
1387 		umtx_pi_ref(pi);
1388 		umtxq_unlock(&uq->uq_key);
1389 
1390 		/*
1391 		 * Care must be exercised when dealing with umtx structure.  It
1392 		 * can fault on any access.
1393 		 */
1394 
1395 		/*
1396 		 * Try the uncontested case.  This should be done in userland.
1397 		 */
1398 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1399 
1400 		/* The acquire succeeded. */
1401 		if (owner == UMUTEX_UNOWNED) {
1402 			error = 0;
1403 			break;
1404 		}
1405 
1406 		/* The address was invalid. */
1407 		if (owner == -1) {
1408 			error = EFAULT;
1409 			break;
1410 		}
1411 
1412 		/* If no one owns it but it is contested try to acquire it. */
1413 		if (owner == UMUTEX_CONTESTED) {
1414 			owner = casuword32(&m->m_owner,
1415 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1416 
1417 			if (owner == UMUTEX_CONTESTED) {
1418 				umtxq_lock(&uq->uq_key);
1419 				error = umtx_pi_claim(pi, td);
1420 				umtxq_unlock(&uq->uq_key);
1421 				break;
1422 			}
1423 
1424 			/* The address was invalid. */
1425 			if (owner == -1) {
1426 				error = EFAULT;
1427 				break;
1428 			}
1429 
1430 			/* If this failed the lock has changed, restart. */
1431 			umtxq_lock(&uq->uq_key);
1432 			umtx_pi_unref(pi);
1433 			umtxq_unlock(&uq->uq_key);
1434 			pi = NULL;
1435 			continue;
1436 		}
1437 
1438 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1439 		    (owner & ~UMUTEX_CONTESTED) == id) {
1440 			error = EDEADLK;
1441 			break;
1442 		}
1443 
1444 		if (try != 0) {
1445 			error = EBUSY;
1446 			break;
1447 		}
1448 
1449 		/*
1450 		 * If we caught a signal, we have retried and now
1451 		 * exit immediately.
1452 		 */
1453 		if (error != 0)
1454 			break;
1455 
1456 		umtxq_lock(&uq->uq_key);
1457 		umtxq_busy(&uq->uq_key);
1458 		umtxq_unlock(&uq->uq_key);
1459 
1460 		/*
1461 		 * Set the contested bit so that a release in user space
1462 		 * knows to use the system call for unlock.  If this fails
1463 		 * either some one else has acquired the lock or it has been
1464 		 * released.
1465 		 */
1466 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1467 
1468 		/* The address was invalid. */
1469 		if (old == -1) {
1470 			umtxq_lock(&uq->uq_key);
1471 			umtxq_unbusy(&uq->uq_key);
1472 			umtxq_unlock(&uq->uq_key);
1473 			error = EFAULT;
1474 			break;
1475 		}
1476 
1477 		umtxq_lock(&uq->uq_key);
1478 		umtxq_unbusy(&uq->uq_key);
1479 		/*
1480 		 * We set the contested bit, sleep. Otherwise the lock changed
1481 		 * and we need to retry or we lost a race to the thread
1482 		 * unlocking the umtx.
1483 		 */
1484 		if (old == owner)
1485 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1486 				 "umtxpi", timo);
1487 		umtx_pi_unref(pi);
1488 		umtxq_unlock(&uq->uq_key);
1489 		pi = NULL;
1490 	}
1491 
1492 	if (pi != NULL) {
1493 		umtxq_lock(&uq->uq_key);
1494 		umtx_pi_unref(pi);
1495 		umtxq_unlock(&uq->uq_key);
1496 	}
1497 
1498 	umtx_key_release(&uq->uq_key);
1499 	return (error);
1500 }
1501 
1502 /*
1503  * Unlock a PI mutex.
1504  */
1505 static int
1506 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1507 {
1508 	struct umtx_key key;
1509 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1510 	struct umtx_pi *pi, *pi2;
1511 	uint32_t owner, old, id;
1512 	int error;
1513 	int count;
1514 	int pri;
1515 
1516 	id = td->td_tid;
1517 	/*
1518 	 * Make sure we own this mtx.
1519 	 */
1520 	owner = fuword32(&m->m_owner);
1521 	if (owner == -1)
1522 		return (EFAULT);
1523 
1524 	if ((owner & ~UMUTEX_CONTESTED) != id)
1525 		return (EPERM);
1526 
1527 	/* This should be done in userland */
1528 	if ((owner & UMUTEX_CONTESTED) == 0) {
1529 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1530 		if (old == -1)
1531 			return (EFAULT);
1532 		if (old == owner)
1533 			return (0);
1534 		owner = old;
1535 	}
1536 
1537 	/* We should only ever be in here for contested locks */
1538 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1539 	    &key)) != 0)
1540 		return (error);
1541 
1542 	umtxq_lock(&key);
1543 	umtxq_busy(&key);
1544 	count = umtxq_count_pi(&key, &uq_first);
1545 	if (uq_first != NULL) {
1546 		pi = uq_first->uq_pi_blocked;
1547 		if (pi->pi_owner != curthread) {
1548 			umtxq_unbusy(&key);
1549 			umtxq_unlock(&key);
1550 			/* userland messed the mutex */
1551 			return (EPERM);
1552 		}
1553 		uq_me = curthread->td_umtxq;
1554 		mtx_lock_spin(&sched_lock);
1555 		pi->pi_owner = NULL;
1556 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1557 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1558 		pri = PRI_MAX;
1559 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1560 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1561 			if (uq_first2 != NULL) {
1562 				if (pri > UPRI(uq_first2->uq_thread))
1563 					pri = UPRI(uq_first2->uq_thread);
1564 			}
1565 		}
1566 		sched_unlend_user_prio(curthread, pri);
1567 		mtx_unlock_spin(&sched_lock);
1568 	}
1569 	umtxq_unlock(&key);
1570 
1571 	/*
1572 	 * When unlocking the umtx, it must be marked as unowned if
1573 	 * there is zero or one thread only waiting for it.
1574 	 * Otherwise, it must be marked as contested.
1575 	 */
1576 	old = casuword32(&m->m_owner, owner,
1577 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1578 
1579 	umtxq_lock(&key);
1580 	if (uq_first != NULL)
1581 		umtxq_signal_thread(uq_first);
1582 	umtxq_unbusy(&key);
1583 	umtxq_unlock(&key);
1584 	umtx_key_release(&key);
1585 	if (old == -1)
1586 		return (EFAULT);
1587 	if (old != owner)
1588 		return (EINVAL);
1589 	return (0);
1590 }
1591 
1592 /*
1593  * Lock a PP mutex.
1594  */
1595 static int
1596 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1597 	int try)
1598 {
1599 	struct umtx_q *uq, *uq2;
1600 	struct umtx_pi *pi;
1601 	uint32_t ceiling;
1602 	uint32_t owner, id;
1603 	int error, pri, old_inherited_pri, su;
1604 
1605 	id = td->td_tid;
1606 	uq = td->td_umtxq;
1607 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1608 	    &uq->uq_key)) != 0)
1609 		return (error);
1610 	su = (suser(td) == 0);
1611 	for (;;) {
1612 		old_inherited_pri = uq->uq_inherited_pri;
1613 		umtxq_lock(&uq->uq_key);
1614 		umtxq_busy(&uq->uq_key);
1615 		umtxq_unlock(&uq->uq_key);
1616 
1617 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1618 		if (ceiling > RTP_PRIO_MAX) {
1619 			error = EINVAL;
1620 			goto out;
1621 		}
1622 
1623 		mtx_lock_spin(&sched_lock);
1624 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1625 			mtx_unlock_spin(&sched_lock);
1626 			error = EINVAL;
1627 			goto out;
1628 		}
1629 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1630 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1631 			if (uq->uq_inherited_pri < UPRI(td))
1632 				sched_lend_user_prio(td, uq->uq_inherited_pri);
1633 		}
1634 		mtx_unlock_spin(&sched_lock);
1635 
1636 		owner = casuword32(&m->m_owner,
1637 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1638 
1639 		if (owner == UMUTEX_CONTESTED) {
1640 			error = 0;
1641 			break;
1642 		}
1643 
1644 		/* The address was invalid. */
1645 		if (owner == -1) {
1646 			error = EFAULT;
1647 			break;
1648 		}
1649 
1650 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1651 		    (owner & ~UMUTEX_CONTESTED) == id) {
1652 			error = EDEADLK;
1653 			break;
1654 		}
1655 
1656 		if (try != 0) {
1657 			error = EBUSY;
1658 			break;
1659 		}
1660 
1661 		/*
1662 		 * If we caught a signal, we have retried and now
1663 		 * exit immediately.
1664 		 */
1665 		if (error != 0)
1666 			break;
1667 
1668 		umtxq_lock(&uq->uq_key);
1669 		umtxq_insert(uq);
1670 		umtxq_unbusy(&uq->uq_key);
1671 		error = umtxq_sleep(uq, "umtxpp", timo);
1672 		umtxq_remove(uq);
1673 		umtxq_unlock(&uq->uq_key);
1674 
1675 		mtx_lock_spin(&sched_lock);
1676 		uq->uq_inherited_pri = old_inherited_pri;
1677 		pri = PRI_MAX;
1678 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1679 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1680 			if (uq2 != NULL) {
1681 				if (pri > UPRI(uq2->uq_thread))
1682 					pri = UPRI(uq2->uq_thread);
1683 			}
1684 		}
1685 		if (pri > uq->uq_inherited_pri)
1686 			pri = uq->uq_inherited_pri;
1687 		sched_unlend_user_prio(td, pri);
1688 		mtx_unlock_spin(&sched_lock);
1689 	}
1690 
1691 	if (error != 0) {
1692 		mtx_lock_spin(&sched_lock);
1693 		uq->uq_inherited_pri = old_inherited_pri;
1694 		pri = PRI_MAX;
1695 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1696 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1697 			if (uq2 != NULL) {
1698 				if (pri > UPRI(uq2->uq_thread))
1699 					pri = UPRI(uq2->uq_thread);
1700 			}
1701 		}
1702 		if (pri > uq->uq_inherited_pri)
1703 			pri = uq->uq_inherited_pri;
1704 		sched_unlend_user_prio(td, pri);
1705 		mtx_unlock_spin(&sched_lock);
1706 	}
1707 
1708 out:
1709 	umtxq_lock(&uq->uq_key);
1710 	umtxq_unbusy(&uq->uq_key);
1711 	umtxq_unlock(&uq->uq_key);
1712 	umtx_key_release(&uq->uq_key);
1713 	return (error);
1714 }
1715 
1716 /*
1717  * Unlock a PP mutex.
1718  */
1719 static int
1720 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1721 {
1722 	struct umtx_key key;
1723 	struct umtx_q *uq, *uq2;
1724 	struct umtx_pi *pi;
1725 	uint32_t owner, id;
1726 	uint32_t rceiling;
1727 	int error, pri, new_inherited_pri, su;
1728 
1729 	id = td->td_tid;
1730 	uq = td->td_umtxq;
1731 	su = (suser(td) == 0);
1732 
1733 	/*
1734 	 * Make sure we own this mtx.
1735 	 */
1736 	owner = fuword32(&m->m_owner);
1737 	if (owner == -1)
1738 		return (EFAULT);
1739 
1740 	if ((owner & ~UMUTEX_CONTESTED) != id)
1741 		return (EPERM);
1742 
1743 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
1744 	if (error != 0)
1745 		return (error);
1746 
1747 	if (rceiling == -1)
1748 		new_inherited_pri = PRI_MAX;
1749 	else {
1750 		rceiling = RTP_PRIO_MAX - rceiling;
1751 		if (rceiling > RTP_PRIO_MAX)
1752 			return (EINVAL);
1753 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
1754 	}
1755 
1756 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1757 	    &key)) != 0)
1758 		return (error);
1759 	umtxq_lock(&key);
1760 	umtxq_busy(&key);
1761 	umtxq_unlock(&key);
1762 	/*
1763 	 * For priority protected mutex, always set unlocked state
1764 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
1765 	 * to lock the mutex, it is necessary because thread priority
1766 	 * has to be adjusted for such mutex.
1767 	 */
1768 	error = suword32(&m->m_owner, UMUTEX_CONTESTED);
1769 
1770 	umtxq_lock(&key);
1771 	if (error == 0)
1772 		umtxq_signal(&key, 1);
1773 	umtxq_unbusy(&key);
1774 	umtxq_unlock(&key);
1775 
1776 	if (error == -1)
1777 		error = EFAULT;
1778 	else {
1779 		mtx_lock_spin(&sched_lock);
1780 		if (su != 0)
1781 			uq->uq_inherited_pri = new_inherited_pri;
1782 		pri = PRI_MAX;
1783 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1784 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1785 			if (uq2 != NULL) {
1786 				if (pri > UPRI(uq2->uq_thread))
1787 					pri = UPRI(uq2->uq_thread);
1788 			}
1789 		}
1790 		if (pri > uq->uq_inherited_pri)
1791 			pri = uq->uq_inherited_pri;
1792 		sched_unlend_user_prio(td, pri);
1793 		mtx_unlock_spin(&sched_lock);
1794 	}
1795 	umtx_key_release(&key);
1796 	return (error);
1797 }
1798 
1799 static int
1800 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
1801 	uint32_t *old_ceiling)
1802 {
1803 	struct umtx_q *uq;
1804 	uint32_t save_ceiling;
1805 	uint32_t owner, id;
1806 	uint32_t flags;
1807 	int error;
1808 
1809 	flags = fuword32(&m->m_flags);
1810 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
1811 		return (EINVAL);
1812 	if (ceiling > RTP_PRIO_MAX)
1813 		return (EINVAL);
1814 	id = td->td_tid;
1815 	uq = td->td_umtxq;
1816 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1817 	   &uq->uq_key)) != 0)
1818 		return (error);
1819 	for (;;) {
1820 		umtxq_lock(&uq->uq_key);
1821 		umtxq_busy(&uq->uq_key);
1822 		umtxq_unlock(&uq->uq_key);
1823 
1824 		save_ceiling = fuword32(&m->m_ceilings[0]);
1825 
1826 		owner = casuword32(&m->m_owner,
1827 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1828 
1829 		if (owner == UMUTEX_CONTESTED) {
1830 			suword32(&m->m_ceilings[0], ceiling);
1831 			suword32(&m->m_owner, UMUTEX_CONTESTED);
1832 			error = 0;
1833 			break;
1834 		}
1835 
1836 		/* The address was invalid. */
1837 		if (owner == -1) {
1838 			error = EFAULT;
1839 			break;
1840 		}
1841 
1842 		if ((owner & ~UMUTEX_CONTESTED) == id) {
1843 			suword32(&m->m_ceilings[0], ceiling);
1844 			error = 0;
1845 			break;
1846 		}
1847 
1848 		/*
1849 		 * If we caught a signal, we have retried and now
1850 		 * exit immediately.
1851 		 */
1852 		if (error != 0)
1853 			break;
1854 
1855 		/*
1856 		 * We set the contested bit, sleep. Otherwise the lock changed
1857 		 * and we need to retry or we lost a race to the thread
1858 		 * unlocking the umtx.
1859 		 */
1860 		umtxq_lock(&uq->uq_key);
1861 		umtxq_insert(uq);
1862 		umtxq_unbusy(&uq->uq_key);
1863 		error = umtxq_sleep(uq, "umtxpp", 0);
1864 		umtxq_remove(uq);
1865 		umtxq_unlock(&uq->uq_key);
1866 	}
1867 	umtxq_lock(&uq->uq_key);
1868 	if (error == 0)
1869 		umtxq_signal(&uq->uq_key, INT_MAX);
1870 	umtxq_unbusy(&uq->uq_key);
1871 	umtxq_unlock(&uq->uq_key);
1872 	umtx_key_release(&uq->uq_key);
1873 	if (error == 0 && old_ceiling != NULL)
1874 		suword32(old_ceiling, save_ceiling);
1875 	return (error);
1876 }
1877 
1878 static int
1879 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
1880 	int try)
1881 {
1882 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1883 	case 0:
1884 		return (_do_lock_normal(td, m, flags, timo, try));
1885 	case UMUTEX_PRIO_INHERIT:
1886 		return (_do_lock_pi(td, m, flags, timo, try));
1887 	case UMUTEX_PRIO_PROTECT:
1888 		return (_do_lock_pp(td, m, flags, timo, try));
1889 	}
1890 	return (EINVAL);
1891 }
1892 
1893 /*
1894  * Lock a userland POSIX mutex.
1895  */
1896 static int
1897 do_lock_umutex(struct thread *td, struct umutex *m,
1898 	struct timespec *timeout, int try)
1899 {
1900 	struct timespec ts, ts2, ts3;
1901 	struct timeval tv;
1902 	uint32_t flags;
1903 	int error;
1904 
1905 	flags = fuword32(&m->m_flags);
1906 	if (flags == -1)
1907 		return (EFAULT);
1908 
1909 	if (timeout == NULL) {
1910 		error = _do_lock_umutex(td, m, flags, 0, try);
1911 		/* Mutex locking is restarted if it is interrupted. */
1912 		if (error == EINTR)
1913 			error = ERESTART;
1914 	} else {
1915 		getnanouptime(&ts);
1916 		timespecadd(&ts, timeout);
1917 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
1918 		for (;;) {
1919 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
1920 			if (error != ETIMEDOUT)
1921 				break;
1922 			getnanouptime(&ts2);
1923 			if (timespeccmp(&ts2, &ts, >=)) {
1924 				error = ETIMEDOUT;
1925 				break;
1926 			}
1927 			ts3 = ts;
1928 			timespecsub(&ts3, &ts2);
1929 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
1930 		}
1931 		/* Timed-locking is not restarted. */
1932 		if (error == ERESTART)
1933 			error = EINTR;
1934 	}
1935 	return (error);
1936 }
1937 
1938 /*
1939  * Unlock a userland POSIX mutex.
1940  */
1941 static int
1942 do_unlock_umutex(struct thread *td, struct umutex *m)
1943 {
1944 	uint32_t flags;
1945 
1946 	flags = fuword32(&m->m_flags);
1947 	if (flags == -1)
1948 		return (EFAULT);
1949 
1950 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
1951 	case 0:
1952 		return (do_unlock_normal(td, m, flags));
1953 	case UMUTEX_PRIO_INHERIT:
1954 		return (do_unlock_pi(td, m, flags));
1955 	case UMUTEX_PRIO_PROTECT:
1956 		return (do_unlock_pp(td, m, flags));
1957 	}
1958 
1959 	return (EINVAL);
1960 }
1961 
1962 int
1963 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
1964     /* struct umtx *umtx */
1965 {
1966 	return _do_lock(td, uap->umtx, td->td_tid, 0);
1967 }
1968 
1969 int
1970 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
1971     /* struct umtx *umtx */
1972 {
1973 	return do_unlock(td, uap->umtx, td->td_tid);
1974 }
1975 
1976 int
1977 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
1978 {
1979 	struct timespec timeout;
1980 	struct timespec *ts;
1981 	int error;
1982 
1983 	switch(uap->op) {
1984 	case UMTX_OP_MUTEX_LOCK:
1985 		/* Allow a null timespec (wait forever). */
1986 		if (uap->uaddr2 == NULL)
1987 			ts = NULL;
1988 		else {
1989 			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
1990 			if (error != 0)
1991 				break;
1992 			if (timeout.tv_nsec >= 1000000000 ||
1993 			    timeout.tv_nsec < 0) {
1994 				error = EINVAL;
1995 				break;
1996 			}
1997 			ts = &timeout;
1998 		}
1999 		error = do_lock_umutex(td, uap->obj, ts, 0);
2000 		break;
2001 	case UMTX_OP_MUTEX_UNLOCK:
2002 		error = do_unlock_umutex(td, uap->obj);
2003 		break;
2004 	case UMTX_OP_LOCK:
2005 		/* Allow a null timespec (wait forever). */
2006 		if (uap->uaddr2 == NULL)
2007 			ts = NULL;
2008 		else {
2009 			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2010 			if (error != 0)
2011 				break;
2012 			if (timeout.tv_nsec >= 1000000000 ||
2013 			    timeout.tv_nsec < 0) {
2014 				error = EINVAL;
2015 				break;
2016 			}
2017 			ts = &timeout;
2018 		}
2019 		error = do_lock(td, uap->obj, uap->val, ts);
2020 		break;
2021 	case UMTX_OP_UNLOCK:
2022 		error = do_unlock(td, uap->obj, uap->val);
2023 		break;
2024 	case UMTX_OP_WAIT:
2025 		/* Allow a null timespec (wait forever). */
2026 		if (uap->uaddr2 == NULL)
2027 			ts = NULL;
2028 		else {
2029 			error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2030 			if (error != 0)
2031 				break;
2032 			if (timeout.tv_nsec >= 1000000000 ||
2033 			    timeout.tv_nsec < 0) {
2034 				error = EINVAL;
2035 				break;
2036 			}
2037 			ts = &timeout;
2038 		}
2039 		error = do_wait(td, uap->obj, uap->val, ts);
2040 		break;
2041 	case UMTX_OP_WAKE:
2042 		error = kern_umtx_wake(td, uap->obj, uap->val);
2043 		break;
2044 	case UMTX_OP_MUTEX_TRYLOCK:
2045 		error = do_lock_umutex(td, uap->obj, NULL, 1);
2046 		break;
2047 	case UMTX_OP_SET_CEILING:
2048 		error = do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2049 		break;
2050 	default:
2051 		error = EINVAL;
2052 		break;
2053 	}
2054 	return (error);
2055 }
2056 
2057 void
2058 umtx_thread_init(struct thread *td)
2059 {
2060 	td->td_umtxq = umtxq_alloc();
2061 	td->td_umtxq->uq_thread = td;
2062 }
2063 
2064 void
2065 umtx_thread_fini(struct thread *td)
2066 {
2067 	umtxq_free(td->td_umtxq);
2068 }
2069 
2070 /*
2071  * It will be called when new thread is created, e.g fork().
2072  */
2073 void
2074 umtx_thread_alloc(struct thread *td)
2075 {
2076 	struct umtx_q *uq;
2077 
2078 	uq = td->td_umtxq;
2079 	uq->uq_inherited_pri = PRI_MAX;
2080 
2081 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2082 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2083 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2084 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2085 }
2086 
2087 /*
2088  * exec() hook.
2089  */
2090 static void
2091 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2092 	struct image_params *imgp __unused)
2093 {
2094 	umtx_thread_cleanup(curthread);
2095 }
2096 
2097 /*
2098  * thread_exit() hook.
2099  */
2100 void
2101 umtx_thread_exit(struct thread *td)
2102 {
2103 	umtx_thread_cleanup(td);
2104 }
2105 
2106 /*
2107  * clean up umtx data.
2108  */
2109 static void
2110 umtx_thread_cleanup(struct thread *td)
2111 {
2112 	struct umtx_q *uq;
2113 	struct umtx_pi *pi;
2114 
2115 	if ((uq = td->td_umtxq) == NULL)
2116 		return;
2117 
2118 	mtx_lock_spin(&sched_lock);
2119 	uq->uq_inherited_pri = PRI_MAX;
2120 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2121 		pi->pi_owner = NULL;
2122 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2123 	}
2124 	td->td_flags &= ~TDF_UBORROWING;
2125 	mtx_unlock_spin(&sched_lock);
2126 }
2127