xref: /freebsd/sys/kern/kern_umtx.c (revision d139ce67c0b39ab6532275f7baff67d220fe8001)
1 /*-
2  * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice unmodified, this list of conditions, and the following
11  *    disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54 
55 #include <machine/cpu.h>
56 
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60 
61 #define TYPE_SIMPLE_LOCK	0
62 #define TYPE_SIMPLE_WAIT	1
63 #define TYPE_NORMAL_UMUTEX	2
64 #define TYPE_PI_UMUTEX		3
65 #define TYPE_PP_UMUTEX		4
66 #define TYPE_CV			5
67 
68 /* Key to represent a unique userland synchronous object */
69 struct umtx_key {
70 	int	hash;
71 	int	type;
72 	int	shared;
73 	union {
74 		struct {
75 			vm_object_t	object;
76 			uintptr_t	offset;
77 		} shared;
78 		struct {
79 			struct vmspace	*vs;
80 			uintptr_t	addr;
81 		} private;
82 		struct {
83 			void		*a;
84 			uintptr_t	b;
85 		} both;
86 	} info;
87 };
88 
89 /* Priority inheritance mutex info. */
90 struct umtx_pi {
91 	/* Owner thread */
92 	struct thread		*pi_owner;
93 
94 	/* Reference count */
95 	int			pi_refcount;
96 
97  	/* List entry to link umtx holding by thread */
98 	TAILQ_ENTRY(umtx_pi)	pi_link;
99 
100 	/* List entry in hash */
101 	TAILQ_ENTRY(umtx_pi)	pi_hashlink;
102 
103 	/* List for waiters */
104 	TAILQ_HEAD(,umtx_q)	pi_blocked;
105 
106 	/* Identify a userland lock object */
107 	struct umtx_key		pi_key;
108 };
109 
110 /* A userland synchronous object user. */
111 struct umtx_q {
112 	/* Linked list for the hash. */
113 	TAILQ_ENTRY(umtx_q)	uq_link;
114 
115 	/* Umtx key. */
116 	struct umtx_key		uq_key;
117 
118 	/* Umtx flags. */
119 	int			uq_flags;
120 #define UQF_UMTXQ	0x0001
121 
122 	/* The thread waits on. */
123 	struct thread		*uq_thread;
124 
125 	/*
126 	 * Blocked on PI mutex. read can use chain lock
127 	 * or sched_lock, write must have both chain lock and
128 	 * sched_lock being hold.
129 	 */
130 	struct umtx_pi		*uq_pi_blocked;
131 
132 	/* On blocked list */
133 	TAILQ_ENTRY(umtx_q)	uq_lockq;
134 
135 	/* Thread contending with us */
136 	TAILQ_HEAD(,umtx_pi)	uq_pi_contested;
137 
138 	/* Inherited priority from PP mutex */
139 	u_char			uq_inherited_pri;
140 };
141 
142 TAILQ_HEAD(umtxq_head, umtx_q);
143 
144 /* Userland lock object's wait-queue chain */
145 struct umtxq_chain {
146 	/* Lock for this chain. */
147 	struct mtx		uc_lock;
148 
149 	/* List of sleep queues. */
150 	struct umtxq_head	uc_queue;
151 
152 	/* Busy flag */
153 	char			uc_busy;
154 
155 	/* Chain lock waiters */
156 	int			uc_waiters;
157 
158 	/* All PI in the list */
159 	TAILQ_HEAD(,umtx_pi)	uc_pi_list;
160 };
161 
162 #define	UMTXQ_LOCKED_ASSERT(uc)		mtx_assert(&(uc)->uc_lock, MA_OWNED)
163 
164 /*
165  * Don't propagate time-sharing priority, there is a security reason,
166  * a user can simply introduce PI-mutex, let thread A lock the mutex,
167  * and let another thread B block on the mutex, because B is
168  * sleeping, its priority will be boosted, this causes A's priority to
169  * be boosted via priority propagating too and will never be lowered even
170  * if it is using 100%CPU, this is unfair to other processes.
171  */
172 
173 #define UPRI(td)	(((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
174 			  (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
175 			 PRI_MAX_TIMESHARE : (td)->td_user_pri)
176 
177 #define	GOLDEN_RATIO_PRIME	2654404609U
178 #define	UMTX_CHAINS		128
179 #define	UMTX_SHIFTS		(__WORD_BIT - 7)
180 
181 #define THREAD_SHARE		0
182 #define PROCESS_SHARE		1
183 #define AUTO_SHARE		2
184 
185 #define	GET_SHARE(flags)	\
186     (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
187 
188 static uma_zone_t		umtx_pi_zone;
189 static struct umtxq_chain	umtxq_chains[UMTX_CHAINS];
190 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
191 static int			umtx_pi_allocated;
192 
193 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
194 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
195     &umtx_pi_allocated, 0, "Allocated umtx_pi");
196 SYSCTL_DECL(_kern_threads);
197 static int			umtx_dflt_spins = 0;
198 SYSCTL_INT(_kern_threads, OID_AUTO, umtx_dflt_spins, CTLFLAG_RW,
199     &umtx_dflt_spins, 0, "default umtx spin count");
200 static int			umtx_max_spins = 3000;
201 SYSCTL_INT(_kern_threads, OID_AUTO, umtx_max_spins, CTLFLAG_RW,
202     &umtx_max_spins, 0, "max umtx spin count");
203 
204 static void umtxq_sysinit(void *);
205 static void umtxq_hash(struct umtx_key *key);
206 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
207 static void umtxq_lock(struct umtx_key *key);
208 static void umtxq_unlock(struct umtx_key *key);
209 static void umtxq_busy(struct umtx_key *key);
210 static void umtxq_unbusy(struct umtx_key *key);
211 static void umtxq_insert(struct umtx_q *uq);
212 static void umtxq_remove(struct umtx_q *uq);
213 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
214 static int umtxq_count(struct umtx_key *key);
215 static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
216 static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
217 static int umtx_key_get(void *addr, int type, int share,
218 	struct umtx_key *key);
219 static void umtx_key_release(struct umtx_key *key);
220 static struct umtx_pi *umtx_pi_alloc(int);
221 static void umtx_pi_free(struct umtx_pi *pi);
222 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
223 static void umtx_thread_cleanup(struct thread *td);
224 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
225 	struct image_params *imgp __unused);
226 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
227 
228 static void
229 umtxq_sysinit(void *arg __unused)
230 {
231 	int i;
232 
233 	umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
234 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
235 	for (i = 0; i < UMTX_CHAINS; ++i) {
236 		mtx_init(&umtxq_chains[i].uc_lock, "umtxql", NULL,
237 			 MTX_DEF | MTX_DUPOK);
238 		TAILQ_INIT(&umtxq_chains[i].uc_queue);
239 		TAILQ_INIT(&umtxq_chains[i].uc_pi_list);
240 		umtxq_chains[i].uc_busy = 0;
241 		umtxq_chains[i].uc_waiters = 0;
242 	}
243 	EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
244 	    EVENTHANDLER_PRI_ANY);
245 }
246 
247 struct umtx_q *
248 umtxq_alloc(void)
249 {
250 	struct umtx_q *uq;
251 
252 	uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
253 	TAILQ_INIT(&uq->uq_pi_contested);
254 	uq->uq_inherited_pri = PRI_MAX;
255 	return (uq);
256 }
257 
258 void
259 umtxq_free(struct umtx_q *uq)
260 {
261 	free(uq, M_UMTX);
262 }
263 
264 static inline void
265 umtxq_hash(struct umtx_key *key)
266 {
267 	unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
268 	key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
269 }
270 
271 static inline int
272 umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
273 {
274 	return (k1->type == k2->type &&
275 		k1->info.both.a == k2->info.both.a &&
276 	        k1->info.both.b == k2->info.both.b);
277 }
278 
279 static inline struct umtxq_chain *
280 umtxq_getchain(struct umtx_key *key)
281 {
282 	return (&umtxq_chains[key->hash]);
283 }
284 
285 /*
286  * Set chain to busy state when following operation
287  * may be blocked (kernel mutex can not be used).
288  */
289 static inline void
290 umtxq_busy(struct umtx_key *key)
291 {
292 	struct umtxq_chain *uc;
293 
294 	uc = umtxq_getchain(key);
295 	mtx_assert(&uc->uc_lock, MA_OWNED);
296 	while (uc->uc_busy != 0) {
297 		uc->uc_waiters++;
298 		msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
299 		uc->uc_waiters--;
300 	}
301 	uc->uc_busy = 1;
302 }
303 
304 /*
305  * Unbusy a chain.
306  */
307 static inline void
308 umtxq_unbusy(struct umtx_key *key)
309 {
310 	struct umtxq_chain *uc;
311 
312 	uc = umtxq_getchain(key);
313 	mtx_assert(&uc->uc_lock, MA_OWNED);
314 	KASSERT(uc->uc_busy != 0, ("not busy"));
315 	uc->uc_busy = 0;
316 	if (uc->uc_waiters)
317 		wakeup_one(uc);
318 }
319 
320 /*
321  * Lock a chain.
322  */
323 static inline void
324 umtxq_lock(struct umtx_key *key)
325 {
326 	struct umtxq_chain *uc;
327 
328 	uc = umtxq_getchain(key);
329 	mtx_lock(&uc->uc_lock);
330 }
331 
332 /*
333  * Unlock a chain.
334  */
335 static inline void
336 umtxq_unlock(struct umtx_key *key)
337 {
338 	struct umtxq_chain *uc;
339 
340 	uc = umtxq_getchain(key);
341 	mtx_unlock(&uc->uc_lock);
342 }
343 
344 /*
345  * Insert a thread onto the umtx queue.
346  */
347 static inline void
348 umtxq_insert(struct umtx_q *uq)
349 {
350 	struct umtxq_chain *uc;
351 
352 	uc = umtxq_getchain(&uq->uq_key);
353 	UMTXQ_LOCKED_ASSERT(uc);
354 	TAILQ_INSERT_TAIL(&uc->uc_queue, uq, uq_link);
355 	uq->uq_flags |= UQF_UMTXQ;
356 }
357 
358 /*
359  * Remove thread from the umtx queue.
360  */
361 static inline void
362 umtxq_remove(struct umtx_q *uq)
363 {
364 	struct umtxq_chain *uc;
365 
366 	uc = umtxq_getchain(&uq->uq_key);
367 	UMTXQ_LOCKED_ASSERT(uc);
368 	if (uq->uq_flags & UQF_UMTXQ) {
369 		TAILQ_REMOVE(&uc->uc_queue, uq, uq_link);
370 		uq->uq_flags &= ~UQF_UMTXQ;
371 	}
372 }
373 
374 /*
375  * Check if there are multiple waiters
376  */
377 static int
378 umtxq_count(struct umtx_key *key)
379 {
380 	struct umtxq_chain *uc;
381 	struct umtx_q *uq;
382 	int count = 0;
383 
384 	uc = umtxq_getchain(key);
385 	UMTXQ_LOCKED_ASSERT(uc);
386 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
387 		if (umtx_key_match(&uq->uq_key, key)) {
388 			if (++count > 1)
389 				break;
390 		}
391 	}
392 	return (count);
393 }
394 
395 /*
396  * Check if there are multiple PI waiters and returns first
397  * waiter.
398  */
399 static int
400 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
401 {
402 	struct umtxq_chain *uc;
403 	struct umtx_q *uq;
404 	int count = 0;
405 
406 	*first = NULL;
407 	uc = umtxq_getchain(key);
408 	UMTXQ_LOCKED_ASSERT(uc);
409 	TAILQ_FOREACH(uq, &uc->uc_queue, uq_link) {
410 		if (umtx_key_match(&uq->uq_key, key)) {
411 			if (++count > 1)
412 				break;
413 			*first = uq;
414 		}
415 	}
416 	return (count);
417 }
418 
419 /*
420  * Wake up threads waiting on an userland object.
421  */
422 static int
423 umtxq_signal(struct umtx_key *key, int n_wake)
424 {
425 	struct umtxq_chain *uc;
426 	struct umtx_q *uq, *next;
427 	int ret;
428 
429 	ret = 0;
430 	uc = umtxq_getchain(key);
431 	UMTXQ_LOCKED_ASSERT(uc);
432 	TAILQ_FOREACH_SAFE(uq, &uc->uc_queue, uq_link, next) {
433 		if (umtx_key_match(&uq->uq_key, key)) {
434 			umtxq_remove(uq);
435 			wakeup(uq);
436 			if (++ret >= n_wake)
437 				break;
438 		}
439 	}
440 	return (ret);
441 }
442 
443 /*
444  * Wake up specified thread.
445  */
446 static inline void
447 umtxq_signal_thread(struct umtx_q *uq)
448 {
449 	struct umtxq_chain *uc;
450 
451 	uc = umtxq_getchain(&uq->uq_key);
452 	UMTXQ_LOCKED_ASSERT(uc);
453 	umtxq_remove(uq);
454 	wakeup(uq);
455 }
456 
457 /*
458  * Put thread into sleep state, before sleeping, check if
459  * thread was removed from umtx queue.
460  */
461 static inline int
462 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
463 {
464 	struct umtxq_chain *uc;
465 	int error;
466 
467 	uc = umtxq_getchain(&uq->uq_key);
468 	UMTXQ_LOCKED_ASSERT(uc);
469 	if (!(uq->uq_flags & UQF_UMTXQ))
470 		return (0);
471 	error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
472 	if (error == EWOULDBLOCK)
473 		error = ETIMEDOUT;
474 	return (error);
475 }
476 
477 /*
478  * Convert userspace address into unique logical address.
479  */
480 static int
481 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
482 {
483 	struct thread *td = curthread;
484 	vm_map_t map;
485 	vm_map_entry_t entry;
486 	vm_pindex_t pindex;
487 	vm_prot_t prot;
488 	boolean_t wired;
489 
490 	key->type = type;
491 	if (share == THREAD_SHARE) {
492 		key->shared = 0;
493 		key->info.private.vs = td->td_proc->p_vmspace;
494 		key->info.private.addr = (uintptr_t)addr;
495 	} else {
496 		MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
497 		map = &td->td_proc->p_vmspace->vm_map;
498 		if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
499 		    &entry, &key->info.shared.object, &pindex, &prot,
500 		    &wired) != KERN_SUCCESS) {
501 			return EFAULT;
502 		}
503 
504 		if ((share == PROCESS_SHARE) ||
505 		    (share == AUTO_SHARE &&
506 		     VM_INHERIT_SHARE == entry->inheritance)) {
507 			key->shared = 1;
508 			key->info.shared.offset = entry->offset + entry->start -
509 				(vm_offset_t)addr;
510 			vm_object_reference(key->info.shared.object);
511 		} else {
512 			key->shared = 0;
513 			key->info.private.vs = td->td_proc->p_vmspace;
514 			key->info.private.addr = (uintptr_t)addr;
515 		}
516 		vm_map_lookup_done(map, entry);
517 	}
518 
519 	umtxq_hash(key);
520 	return (0);
521 }
522 
523 /*
524  * Release key.
525  */
526 static inline void
527 umtx_key_release(struct umtx_key *key)
528 {
529 	if (key->shared)
530 		vm_object_deallocate(key->info.shared.object);
531 }
532 
533 /*
534  * Lock a umtx object.
535  */
536 static int
537 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
538 {
539 	struct umtx_q *uq;
540 	u_long owner;
541 	u_long old;
542 	int error = 0;
543 
544 	uq = td->td_umtxq;
545 
546 	/*
547 	 * Care must be exercised when dealing with umtx structure. It
548 	 * can fault on any access.
549 	 */
550 	for (;;) {
551 		/*
552 		 * Try the uncontested case.  This should be done in userland.
553 		 */
554 		owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
555 
556 		/* The acquire succeeded. */
557 		if (owner == UMTX_UNOWNED)
558 			return (0);
559 
560 		/* The address was invalid. */
561 		if (owner == -1)
562 			return (EFAULT);
563 
564 		/* If no one owns it but it is contested try to acquire it. */
565 		if (owner == UMTX_CONTESTED) {
566 			owner = casuword(&umtx->u_owner,
567 			    UMTX_CONTESTED, id | UMTX_CONTESTED);
568 
569 			if (owner == UMTX_CONTESTED)
570 				return (0);
571 
572 			/* The address was invalid. */
573 			if (owner == -1)
574 				return (EFAULT);
575 
576 			/* If this failed the lock has changed, restart. */
577 			continue;
578 		}
579 
580 		/*
581 		 * If we caught a signal, we have retried and now
582 		 * exit immediately.
583 		 */
584 		if (error != 0)
585 			return (error);
586 
587 		if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
588 			AUTO_SHARE, &uq->uq_key)) != 0)
589 			return (error);
590 
591 		umtxq_lock(&uq->uq_key);
592 		umtxq_busy(&uq->uq_key);
593 		umtxq_insert(uq);
594 		umtxq_unbusy(&uq->uq_key);
595 		umtxq_unlock(&uq->uq_key);
596 
597 		/*
598 		 * Set the contested bit so that a release in user space
599 		 * knows to use the system call for unlock.  If this fails
600 		 * either some one else has acquired the lock or it has been
601 		 * released.
602 		 */
603 		old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
604 
605 		/* The address was invalid. */
606 		if (old == -1) {
607 			umtxq_lock(&uq->uq_key);
608 			umtxq_remove(uq);
609 			umtxq_unlock(&uq->uq_key);
610 			umtx_key_release(&uq->uq_key);
611 			return (EFAULT);
612 		}
613 
614 		/*
615 		 * We set the contested bit, sleep. Otherwise the lock changed
616 		 * and we need to retry or we lost a race to the thread
617 		 * unlocking the umtx.
618 		 */
619 		umtxq_lock(&uq->uq_key);
620 		if (old == owner)
621 			error = umtxq_sleep(uq, "umtx", timo);
622 		umtxq_remove(uq);
623 		umtxq_unlock(&uq->uq_key);
624 		umtx_key_release(&uq->uq_key);
625 	}
626 
627 	return (0);
628 }
629 
630 /*
631  * Lock a umtx object.
632  */
633 static int
634 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
635 	struct timespec *timeout)
636 {
637 	struct timespec ts, ts2, ts3;
638 	struct timeval tv;
639 	int error;
640 
641 	if (timeout == NULL) {
642 		error = _do_lock_umtx(td, umtx, id, 0);
643 		/* Mutex locking is restarted if it is interrupted. */
644 		if (error == EINTR)
645 			error = ERESTART;
646 	} else {
647 		getnanouptime(&ts);
648 		timespecadd(&ts, timeout);
649 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
650 		for (;;) {
651 			error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
652 			if (error != ETIMEDOUT)
653 				break;
654 			getnanouptime(&ts2);
655 			if (timespeccmp(&ts2, &ts, >=)) {
656 				error = ETIMEDOUT;
657 				break;
658 			}
659 			ts3 = ts;
660 			timespecsub(&ts3, &ts2);
661 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
662 		}
663 		/* Timed-locking is not restarted. */
664 		if (error == ERESTART)
665 			error = EINTR;
666 	}
667 	return (error);
668 }
669 
670 /*
671  * Unlock a umtx object.
672  */
673 static int
674 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
675 {
676 	struct umtx_key key;
677 	u_long owner;
678 	u_long old;
679 	int error;
680 	int count;
681 
682 	/*
683 	 * Make sure we own this mtx.
684 	 */
685 	owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
686 	if (owner == -1)
687 		return (EFAULT);
688 
689 	if ((owner & ~UMTX_CONTESTED) != id)
690 		return (EPERM);
691 
692 	/* This should be done in userland */
693 	if ((owner & UMTX_CONTESTED) == 0) {
694 		old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
695 		if (old == -1)
696 			return (EFAULT);
697 		if (old == owner)
698 			return (0);
699 		owner = old;
700 	}
701 
702 	/* We should only ever be in here for contested locks */
703 	if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
704 		&key)) != 0)
705 		return (error);
706 
707 	umtxq_lock(&key);
708 	umtxq_busy(&key);
709 	count = umtxq_count(&key);
710 	umtxq_unlock(&key);
711 
712 	/*
713 	 * When unlocking the umtx, it must be marked as unowned if
714 	 * there is zero or one thread only waiting for it.
715 	 * Otherwise, it must be marked as contested.
716 	 */
717 	old = casuword(&umtx->u_owner, owner,
718 		count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
719 	umtxq_lock(&key);
720 	umtxq_signal(&key,1);
721 	umtxq_unbusy(&key);
722 	umtxq_unlock(&key);
723 	umtx_key_release(&key);
724 	if (old == -1)
725 		return (EFAULT);
726 	if (old != owner)
727 		return (EINVAL);
728 	return (0);
729 }
730 
731 #ifdef COMPAT_IA32
732 
733 /*
734  * Lock a umtx object.
735  */
736 static int
737 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
738 {
739 	struct umtx_q *uq;
740 	uint32_t owner;
741 	uint32_t old;
742 	int error = 0;
743 
744 	uq = td->td_umtxq;
745 
746 	/*
747 	 * Care must be exercised when dealing with umtx structure. It
748 	 * can fault on any access.
749 	 */
750 	for (;;) {
751 		/*
752 		 * Try the uncontested case.  This should be done in userland.
753 		 */
754 		owner = casuword32(m, UMUTEX_UNOWNED, id);
755 
756 		/* The acquire succeeded. */
757 		if (owner == UMUTEX_UNOWNED)
758 			return (0);
759 
760 		/* The address was invalid. */
761 		if (owner == -1)
762 			return (EFAULT);
763 
764 		/* If no one owns it but it is contested try to acquire it. */
765 		if (owner == UMUTEX_CONTESTED) {
766 			owner = casuword32(m,
767 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
768 			if (owner == UMUTEX_CONTESTED)
769 				return (0);
770 
771 			/* The address was invalid. */
772 			if (owner == -1)
773 				return (EFAULT);
774 
775 			/* If this failed the lock has changed, restart. */
776 			continue;
777 		}
778 
779 		/*
780 		 * If we caught a signal, we have retried and now
781 		 * exit immediately.
782 		 */
783 		if (error != 0)
784 			return (error);
785 
786 		if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
787 			AUTO_SHARE, &uq->uq_key)) != 0)
788 			return (error);
789 
790 		umtxq_lock(&uq->uq_key);
791 		umtxq_busy(&uq->uq_key);
792 		umtxq_insert(uq);
793 		umtxq_unbusy(&uq->uq_key);
794 		umtxq_unlock(&uq->uq_key);
795 
796 		/*
797 		 * Set the contested bit so that a release in user space
798 		 * knows to use the system call for unlock.  If this fails
799 		 * either some one else has acquired the lock or it has been
800 		 * released.
801 		 */
802 		old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
803 
804 		/* The address was invalid. */
805 		if (old == -1) {
806 			umtxq_lock(&uq->uq_key);
807 			umtxq_remove(uq);
808 			umtxq_unlock(&uq->uq_key);
809 			umtx_key_release(&uq->uq_key);
810 			return (EFAULT);
811 		}
812 
813 		/*
814 		 * We set the contested bit, sleep. Otherwise the lock changed
815 		 * and we need to retry or we lost a race to the thread
816 		 * unlocking the umtx.
817 		 */
818 		umtxq_lock(&uq->uq_key);
819 		if (old == owner)
820 			error = umtxq_sleep(uq, "umtx", timo);
821 		umtxq_remove(uq);
822 		umtxq_unlock(&uq->uq_key);
823 		umtx_key_release(&uq->uq_key);
824 	}
825 
826 	return (0);
827 }
828 
829 /*
830  * Lock a umtx object.
831  */
832 static int
833 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
834 	struct timespec *timeout)
835 {
836 	struct timespec ts, ts2, ts3;
837 	struct timeval tv;
838 	int error;
839 
840 	if (timeout == NULL) {
841 		error = _do_lock_umtx32(td, m, id, 0);
842 		/* Mutex locking is restarted if it is interrupted. */
843 		if (error == EINTR)
844 			error = ERESTART;
845 	} else {
846 		getnanouptime(&ts);
847 		timespecadd(&ts, timeout);
848 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
849 		for (;;) {
850 			error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
851 			if (error != ETIMEDOUT)
852 				break;
853 			getnanouptime(&ts2);
854 			if (timespeccmp(&ts2, &ts, >=)) {
855 				error = ETIMEDOUT;
856 				break;
857 			}
858 			ts3 = ts;
859 			timespecsub(&ts3, &ts2);
860 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
861 		}
862 		/* Timed-locking is not restarted. */
863 		if (error == ERESTART)
864 			error = EINTR;
865 	}
866 	return (error);
867 }
868 
869 /*
870  * Unlock a umtx object.
871  */
872 static int
873 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
874 {
875 	struct umtx_key key;
876 	uint32_t owner;
877 	uint32_t old;
878 	int error;
879 	int count;
880 
881 	/*
882 	 * Make sure we own this mtx.
883 	 */
884 	owner = fuword32(m);
885 	if (owner == -1)
886 		return (EFAULT);
887 
888 	if ((owner & ~UMUTEX_CONTESTED) != id)
889 		return (EPERM);
890 
891 	/* This should be done in userland */
892 	if ((owner & UMUTEX_CONTESTED) == 0) {
893 		old = casuword32(m, owner, UMUTEX_UNOWNED);
894 		if (old == -1)
895 			return (EFAULT);
896 		if (old == owner)
897 			return (0);
898 		owner = old;
899 	}
900 
901 	/* We should only ever be in here for contested locks */
902 	if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
903 		&key)) != 0)
904 		return (error);
905 
906 	umtxq_lock(&key);
907 	umtxq_busy(&key);
908 	count = umtxq_count(&key);
909 	umtxq_unlock(&key);
910 
911 	/*
912 	 * When unlocking the umtx, it must be marked as unowned if
913 	 * there is zero or one thread only waiting for it.
914 	 * Otherwise, it must be marked as contested.
915 	 */
916 	old = casuword32(m, owner,
917 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
918 	umtxq_lock(&key);
919 	umtxq_signal(&key,1);
920 	umtxq_unbusy(&key);
921 	umtxq_unlock(&key);
922 	umtx_key_release(&key);
923 	if (old == -1)
924 		return (EFAULT);
925 	if (old != owner)
926 		return (EINVAL);
927 	return (0);
928 }
929 #endif
930 
931 /*
932  * Fetch and compare value, sleep on the address if value is not changed.
933  */
934 static int
935 do_wait(struct thread *td, void *addr, u_long id,
936 	struct timespec *timeout, int compat32)
937 {
938 	struct umtx_q *uq;
939 	struct timespec ts, ts2, ts3;
940 	struct timeval tv;
941 	u_long tmp;
942 	int error = 0;
943 
944 	uq = td->td_umtxq;
945 	if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
946 	    &uq->uq_key)) != 0)
947 		return (error);
948 
949 	umtxq_lock(&uq->uq_key);
950 	umtxq_insert(uq);
951 	umtxq_unlock(&uq->uq_key);
952 	if (compat32 == 0)
953 		tmp = fuword(addr);
954         else
955 		tmp = fuword32(addr);
956 	if (tmp != id) {
957 		umtxq_lock(&uq->uq_key);
958 		umtxq_remove(uq);
959 		umtxq_unlock(&uq->uq_key);
960 	} else if (timeout == NULL) {
961 		umtxq_lock(&uq->uq_key);
962 		error = umtxq_sleep(uq, "uwait", 0);
963 		umtxq_remove(uq);
964 		umtxq_unlock(&uq->uq_key);
965 	} else {
966 		getnanouptime(&ts);
967 		timespecadd(&ts, timeout);
968 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
969 		umtxq_lock(&uq->uq_key);
970 		for (;;) {
971 			error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
972 			if (!(uq->uq_flags & UQF_UMTXQ))
973 				break;
974 			if (error != ETIMEDOUT)
975 				break;
976 			umtxq_unlock(&uq->uq_key);
977 			getnanouptime(&ts2);
978 			if (timespeccmp(&ts2, &ts, >=)) {
979 				error = ETIMEDOUT;
980 				umtxq_lock(&uq->uq_key);
981 				break;
982 			}
983 			ts3 = ts;
984 			timespecsub(&ts3, &ts2);
985 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
986 			umtxq_lock(&uq->uq_key);
987 		}
988 		umtxq_remove(uq);
989 		umtxq_unlock(&uq->uq_key);
990 	}
991 	umtx_key_release(&uq->uq_key);
992 	if (error == ERESTART)
993 		error = EINTR;
994 	return (error);
995 }
996 
997 /*
998  * Wake up threads sleeping on the specified address.
999  */
1000 int
1001 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
1002 {
1003 	struct umtx_key key;
1004 	int ret;
1005 
1006 	if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, AUTO_SHARE,
1007 	   &key)) != 0)
1008 		return (ret);
1009 	umtxq_lock(&key);
1010 	ret = umtxq_signal(&key, n_wake);
1011 	umtxq_unlock(&key);
1012 	umtx_key_release(&key);
1013 	return (0);
1014 }
1015 
1016 /*
1017  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1018  */
1019 static int
1020 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1021 	int try)
1022 {
1023 	struct umtx_q *uq;
1024 	uint32_t owner, old, id;
1025 #ifdef SMP
1026 	int spincount;
1027 #endif
1028 	int error = 0;
1029 
1030 	id = td->td_tid;
1031 	uq = td->td_umtxq;
1032 
1033 #ifdef SMP
1034 	if (smp_cpus > 1) {
1035 		spincount = fuword32(&m->m_spincount);
1036 		if (spincount == 0)
1037 			spincount = umtx_dflt_spins;
1038 		if (spincount > umtx_max_spins)
1039 			spincount = umtx_max_spins;
1040 	} else
1041 		spincount = 0;
1042 #endif
1043 
1044 	/*
1045 	 * Care must be exercised when dealing with umtx structure. It
1046 	 * can fault on any access.
1047 	 */
1048 	for (;;) {
1049 #ifdef SMP
1050 try_unowned:
1051 #endif
1052 		/*
1053 		 * Try the uncontested case.  This should be done in userland.
1054 		 */
1055 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1056 
1057 		/* The acquire succeeded. */
1058 		if (owner == UMUTEX_UNOWNED)
1059 			return (0);
1060 
1061 		/* The address was invalid. */
1062 		if (owner == -1)
1063 			return (EFAULT);
1064 
1065 		/* If no one owns it but it is contested try to acquire it. */
1066 		if (owner == UMUTEX_CONTESTED) {
1067 #ifdef SMP
1068 try_contested:
1069 #endif
1070 			owner = casuword32(&m->m_owner,
1071 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1072 
1073 			if (owner == UMUTEX_CONTESTED)
1074 				return (0);
1075 
1076 			/* The address was invalid. */
1077 			if (owner == -1)
1078 				return (EFAULT);
1079 
1080 			/* If this failed the lock has changed, restart. */
1081 			continue;
1082 		}
1083 
1084 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1085 		    (owner & ~UMUTEX_CONTESTED) == id)
1086 			return (EDEADLK);
1087 
1088 		if (try != 0)
1089 			return (EBUSY);
1090 
1091 #ifdef SMP
1092 		if (spincount > 0 && (owner & ~UMUTEX_CONTESTED) != id) {
1093 			int i, found = 0;
1094 			struct pcpu *pcpu = NULL;
1095 
1096 			/* Look for a cpu the owner is running on */
1097 			for (i = 0; i < MAXCPU; i++) {
1098 				if (CPU_ABSENT(i))
1099 					continue;
1100 				pcpu = pcpu_find(i);
1101 				if ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid) {
1102 					found = 1;
1103 					break;
1104 				}
1105 			}
1106 
1107 			if (__predict_false(!found))
1108 				goto end_spin;
1109 
1110 			while ((owner & ~UMUTEX_CONTESTED) == pcpu->pc_curtid &&
1111 			       (owner & ~UMUTEX_CONTESTED) != id) {
1112 				if (--spincount <= 0)
1113 					break;
1114 				if ((td->td_flags &
1115 			    	    (TDF_NEEDRESCHED|TDF_ASTPENDING|TDF_NEEDSIGCHK)) ||
1116 				     P_SHOULDSTOP(td->td_proc))
1117 					break;
1118 				owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1119 				if (owner == UMUTEX_UNOWNED)
1120 					goto try_unowned;
1121  				if (owner == UMUTEX_CONTESTED)
1122 					goto try_contested;
1123 				cpu_spinwait();
1124 			}
1125 		}
1126 end_spin:
1127 		spincount = 0;
1128 
1129 #endif
1130 
1131 		/*
1132 		 * If we caught a signal, we have retried and now
1133 		 * exit immediately.
1134 		 */
1135 		if (error != 0)
1136 			return (error);
1137 
1138 		if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1139 		    GET_SHARE(flags), &uq->uq_key)) != 0)
1140 			return (error);
1141 
1142 		umtxq_lock(&uq->uq_key);
1143 		umtxq_busy(&uq->uq_key);
1144 		umtxq_insert(uq);
1145 		umtxq_unbusy(&uq->uq_key);
1146 		umtxq_unlock(&uq->uq_key);
1147 
1148 		/*
1149 		 * Set the contested bit so that a release in user space
1150 		 * knows to use the system call for unlock.  If this fails
1151 		 * either some one else has acquired the lock or it has been
1152 		 * released.
1153 		 */
1154 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1155 
1156 		/* The address was invalid. */
1157 		if (old == -1) {
1158 			umtxq_lock(&uq->uq_key);
1159 			umtxq_remove(uq);
1160 			umtxq_unlock(&uq->uq_key);
1161 			umtx_key_release(&uq->uq_key);
1162 			return (EFAULT);
1163 		}
1164 
1165 		/*
1166 		 * We set the contested bit, sleep. Otherwise the lock changed
1167 		 * and we need to retry or we lost a race to the thread
1168 		 * unlocking the umtx.
1169 		 */
1170 		umtxq_lock(&uq->uq_key);
1171 		if (old == owner)
1172 			error = umtxq_sleep(uq, "umtxn", timo);
1173 		umtxq_remove(uq);
1174 		umtxq_unlock(&uq->uq_key);
1175 		umtx_key_release(&uq->uq_key);
1176 	}
1177 
1178 	return (0);
1179 }
1180 
1181 /*
1182  * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1183  */
1184 /*
1185  * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1186  */
1187 static int
1188 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1189 {
1190 	struct umtx_key key;
1191 	uint32_t owner, old, id;
1192 	int error;
1193 	int count;
1194 
1195 	id = td->td_tid;
1196 	/*
1197 	 * Make sure we own this mtx.
1198 	 */
1199 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1200 	if (owner == -1)
1201 		return (EFAULT);
1202 
1203 	if ((owner & ~UMUTEX_CONTESTED) != id)
1204 		return (EPERM);
1205 
1206 	/* This should be done in userland */
1207 	if ((owner & UMUTEX_CONTESTED) == 0) {
1208 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1209 		if (old == -1)
1210 			return (EFAULT);
1211 		if (old == owner)
1212 			return (0);
1213 		owner = old;
1214 	}
1215 
1216 	/* We should only ever be in here for contested locks */
1217 	if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1218 	    &key)) != 0)
1219 		return (error);
1220 
1221 	umtxq_lock(&key);
1222 	umtxq_busy(&key);
1223 	count = umtxq_count(&key);
1224 	umtxq_unlock(&key);
1225 
1226 	/*
1227 	 * When unlocking the umtx, it must be marked as unowned if
1228 	 * there is zero or one thread only waiting for it.
1229 	 * Otherwise, it must be marked as contested.
1230 	 */
1231 	old = casuword32(&m->m_owner, owner,
1232 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1233 	umtxq_lock(&key);
1234 	umtxq_signal(&key,1);
1235 	umtxq_unbusy(&key);
1236 	umtxq_unlock(&key);
1237 	umtx_key_release(&key);
1238 	if (old == -1)
1239 		return (EFAULT);
1240 	if (old != owner)
1241 		return (EINVAL);
1242 	return (0);
1243 }
1244 
1245 static inline struct umtx_pi *
1246 umtx_pi_alloc(int flags)
1247 {
1248 	struct umtx_pi *pi;
1249 
1250 	pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1251 	TAILQ_INIT(&pi->pi_blocked);
1252 	atomic_add_int(&umtx_pi_allocated, 1);
1253 	return (pi);
1254 }
1255 
1256 static inline void
1257 umtx_pi_free(struct umtx_pi *pi)
1258 {
1259 	uma_zfree(umtx_pi_zone, pi);
1260 	atomic_add_int(&umtx_pi_allocated, -1);
1261 }
1262 
1263 /*
1264  * Adjust the thread's position on a pi_state after its priority has been
1265  * changed.
1266  */
1267 static int
1268 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1269 {
1270 	struct umtx_q *uq, *uq1, *uq2;
1271 	struct thread *td1;
1272 
1273 	mtx_assert(&sched_lock, MA_OWNED);
1274 	if (pi == NULL)
1275 		return (0);
1276 
1277 	uq = td->td_umtxq;
1278 
1279 	/*
1280 	 * Check if the thread needs to be moved on the blocked chain.
1281 	 * It needs to be moved if either its priority is lower than
1282 	 * the previous thread or higher than the next thread.
1283 	 */
1284 	uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1285 	uq2 = TAILQ_NEXT(uq, uq_lockq);
1286 	if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1287 	    (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1288 		/*
1289 		 * Remove thread from blocked chain and determine where
1290 		 * it should be moved to.
1291 		 */
1292 		TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1293 		TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1294 			td1 = uq1->uq_thread;
1295 			MPASS(td1->td_proc->p_magic == P_MAGIC);
1296 			if (UPRI(td1) > UPRI(td))
1297 				break;
1298 		}
1299 
1300 		if (uq1 == NULL)
1301 			TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1302 		else
1303 			TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1304 	}
1305 	return (1);
1306 }
1307 
1308 /*
1309  * Propagate priority when a thread is blocked on POSIX
1310  * PI mutex.
1311  */
1312 static void
1313 umtx_propagate_priority(struct thread *td)
1314 {
1315 	struct umtx_q *uq;
1316 	struct umtx_pi *pi;
1317 	int pri;
1318 
1319 	mtx_assert(&sched_lock, MA_OWNED);
1320 	pri = UPRI(td);
1321 	uq = td->td_umtxq;
1322 	pi = uq->uq_pi_blocked;
1323 	if (pi == NULL)
1324 		return;
1325 
1326 	for (;;) {
1327 		td = pi->pi_owner;
1328 		if (td == NULL)
1329 			return;
1330 
1331 		MPASS(td->td_proc != NULL);
1332 		MPASS(td->td_proc->p_magic == P_MAGIC);
1333 
1334 		if (UPRI(td) <= pri)
1335 			return;
1336 
1337 		sched_lend_user_prio(td, pri);
1338 
1339 		/*
1340 		 * Pick up the lock that td is blocked on.
1341 		 */
1342 		uq = td->td_umtxq;
1343 		pi = uq->uq_pi_blocked;
1344 		/* Resort td on the list if needed. */
1345 		if (!umtx_pi_adjust_thread(pi, td))
1346 			break;
1347 	}
1348 }
1349 
1350 /*
1351  * Unpropagate priority for a PI mutex when a thread blocked on
1352  * it is interrupted by signal or resumed by others.
1353  */
1354 static void
1355 umtx_unpropagate_priority(struct umtx_pi *pi)
1356 {
1357 	struct umtx_q *uq, *uq_owner;
1358 	struct umtx_pi *pi2;
1359 	int pri;
1360 
1361 	mtx_assert(&sched_lock, MA_OWNED);
1362 
1363 	while (pi != NULL && pi->pi_owner != NULL) {
1364 		pri = PRI_MAX;
1365 		uq_owner = pi->pi_owner->td_umtxq;
1366 
1367 		TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1368 			uq = TAILQ_FIRST(&pi2->pi_blocked);
1369 			if (uq != NULL) {
1370 				if (pri > UPRI(uq->uq_thread))
1371 					pri = UPRI(uq->uq_thread);
1372 			}
1373 		}
1374 
1375 		if (pri > uq_owner->uq_inherited_pri)
1376 			pri = uq_owner->uq_inherited_pri;
1377 		sched_unlend_user_prio(pi->pi_owner, pri);
1378 		pi = uq_owner->uq_pi_blocked;
1379 	}
1380 }
1381 
1382 /*
1383  * Insert a PI mutex into owned list.
1384  */
1385 static void
1386 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1387 {
1388 	struct umtx_q *uq_owner;
1389 
1390 	uq_owner = owner->td_umtxq;
1391 	mtx_assert(&sched_lock, MA_OWNED);
1392 	if (pi->pi_owner != NULL)
1393 		panic("pi_ower != NULL");
1394 	pi->pi_owner = owner;
1395 	TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1396 }
1397 
1398 /*
1399  * Claim ownership of a PI mutex.
1400  */
1401 static int
1402 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1403 {
1404 	struct umtx_q *uq, *uq_owner;
1405 
1406 	uq_owner = owner->td_umtxq;
1407 	mtx_lock_spin(&sched_lock);
1408 	if (pi->pi_owner == owner) {
1409 		mtx_unlock_spin(&sched_lock);
1410 		return (0);
1411 	}
1412 
1413 	if (pi->pi_owner != NULL) {
1414 		/*
1415 		 * userland may have already messed the mutex, sigh.
1416 		 */
1417 		mtx_unlock_spin(&sched_lock);
1418 		return (EPERM);
1419 	}
1420 	umtx_pi_setowner(pi, owner);
1421 	uq = TAILQ_FIRST(&pi->pi_blocked);
1422 	if (uq != NULL) {
1423 		int pri;
1424 
1425 		pri = UPRI(uq->uq_thread);
1426 		if (pri < UPRI(owner))
1427 			sched_lend_user_prio(owner, pri);
1428 	}
1429 	mtx_unlock_spin(&sched_lock);
1430 	return (0);
1431 }
1432 
1433 /*
1434  * Adjust a thread's order position in its blocked PI mutex,
1435  * this may result new priority propagating process.
1436  */
1437 void
1438 umtx_pi_adjust(struct thread *td, u_char oldpri)
1439 {
1440 	struct umtx_q *uq;
1441 	struct umtx_pi *pi;
1442 
1443 	uq = td->td_umtxq;
1444 
1445 	mtx_assert(&sched_lock, MA_OWNED);
1446 	MPASS(TD_ON_UPILOCK(td));
1447 
1448 	/*
1449 	 * Pick up the lock that td is blocked on.
1450 	 */
1451 	pi = uq->uq_pi_blocked;
1452 	MPASS(pi != NULL);
1453 
1454 	/* Resort the turnstile on the list. */
1455 	if (!umtx_pi_adjust_thread(pi, td))
1456 		return;
1457 
1458 	/*
1459 	 * If our priority was lowered and we are at the head of the
1460 	 * turnstile, then propagate our new priority up the chain.
1461 	 */
1462 	if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1463 		umtx_propagate_priority(td);
1464 }
1465 
1466 /*
1467  * Sleep on a PI mutex.
1468  */
1469 static int
1470 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1471 	uint32_t owner, const char *wmesg, int timo)
1472 {
1473 	struct umtxq_chain *uc;
1474 	struct thread *td, *td1;
1475 	struct umtx_q *uq1;
1476 	int pri;
1477 	int error = 0;
1478 
1479 	td = uq->uq_thread;
1480 	KASSERT(td == curthread, ("inconsistent uq_thread"));
1481 	uc = umtxq_getchain(&uq->uq_key);
1482 	UMTXQ_LOCKED_ASSERT(uc);
1483 	umtxq_insert(uq);
1484 	if (pi->pi_owner == NULL) {
1485 		/* XXX
1486 		 * Current, We only support process private PI-mutex,
1487 		 * non-contended PI-mutexes are locked in userland.
1488 		 * Process shared PI-mutex should always be initialized
1489 		 * by kernel and be registered in kernel, locking should
1490 		 * always be done by kernel to avoid security problems.
1491 		 * For process private PI-mutex, we can find owner
1492 		 * thread and boost its priority safely.
1493 		 */
1494 		PROC_LOCK(curproc);
1495 		td1 = thread_find(curproc, owner);
1496 		mtx_lock_spin(&sched_lock);
1497 		if (td1 != NULL && pi->pi_owner == NULL) {
1498 			uq1 = td1->td_umtxq;
1499 			umtx_pi_setowner(pi, td1);
1500 		}
1501 		PROC_UNLOCK(curproc);
1502 	} else {
1503 		mtx_lock_spin(&sched_lock);
1504 	}
1505 
1506 	TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1507 		pri = UPRI(uq1->uq_thread);
1508 		if (pri > UPRI(td))
1509 			break;
1510 	}
1511 
1512 	if (uq1 != NULL)
1513 		TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1514 	else
1515 		TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1516 
1517 	uq->uq_pi_blocked = pi;
1518 	td->td_flags |= TDF_UPIBLOCKED;
1519 	mtx_unlock_spin(&sched_lock);
1520 	umtxq_unlock(&uq->uq_key);
1521 
1522 	mtx_lock_spin(&sched_lock);
1523 	umtx_propagate_priority(td);
1524 	mtx_unlock_spin(&sched_lock);
1525 
1526 	umtxq_lock(&uq->uq_key);
1527 	if (uq->uq_flags & UQF_UMTXQ) {
1528 		error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1529 		if (error == EWOULDBLOCK)
1530 			error = ETIMEDOUT;
1531 		if (uq->uq_flags & UQF_UMTXQ) {
1532 			umtxq_busy(&uq->uq_key);
1533 			umtxq_remove(uq);
1534 			umtxq_unbusy(&uq->uq_key);
1535 		}
1536 	}
1537 	umtxq_unlock(&uq->uq_key);
1538 
1539 	mtx_lock_spin(&sched_lock);
1540 	uq->uq_pi_blocked = NULL;
1541 	td->td_flags &= ~TDF_UPIBLOCKED;
1542 	TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1543 	umtx_unpropagate_priority(pi);
1544 	mtx_unlock_spin(&sched_lock);
1545 
1546 	umtxq_lock(&uq->uq_key);
1547 
1548 	return (error);
1549 }
1550 
1551 /*
1552  * Add reference count for a PI mutex.
1553  */
1554 static void
1555 umtx_pi_ref(struct umtx_pi *pi)
1556 {
1557 	struct umtxq_chain *uc;
1558 
1559 	uc = umtxq_getchain(&pi->pi_key);
1560 	UMTXQ_LOCKED_ASSERT(uc);
1561 	pi->pi_refcount++;
1562 }
1563 
1564 /*
1565  * Decrease reference count for a PI mutex, if the counter
1566  * is decreased to zero, its memory space is freed.
1567  */
1568 static void
1569 umtx_pi_unref(struct umtx_pi *pi)
1570 {
1571 	struct umtxq_chain *uc;
1572 	int free = 0;
1573 
1574 	uc = umtxq_getchain(&pi->pi_key);
1575 	UMTXQ_LOCKED_ASSERT(uc);
1576 	KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1577 	if (--pi->pi_refcount == 0) {
1578 		mtx_lock_spin(&sched_lock);
1579 		if (pi->pi_owner != NULL) {
1580 			TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1581 				pi, pi_link);
1582 			pi->pi_owner = NULL;
1583 		}
1584 		KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1585 			("blocked queue not empty"));
1586 		mtx_unlock_spin(&sched_lock);
1587 		TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1588 		free = 1;
1589 	}
1590 	if (free)
1591 		umtx_pi_free(pi);
1592 }
1593 
1594 /*
1595  * Find a PI mutex in hash table.
1596  */
1597 static struct umtx_pi *
1598 umtx_pi_lookup(struct umtx_key *key)
1599 {
1600 	struct umtxq_chain *uc;
1601 	struct umtx_pi *pi;
1602 
1603 	uc = umtxq_getchain(key);
1604 	UMTXQ_LOCKED_ASSERT(uc);
1605 
1606 	TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1607 		if (umtx_key_match(&pi->pi_key, key)) {
1608 			return (pi);
1609 		}
1610 	}
1611 	return (NULL);
1612 }
1613 
1614 /*
1615  * Insert a PI mutex into hash table.
1616  */
1617 static inline void
1618 umtx_pi_insert(struct umtx_pi *pi)
1619 {
1620 	struct umtxq_chain *uc;
1621 
1622 	uc = umtxq_getchain(&pi->pi_key);
1623 	UMTXQ_LOCKED_ASSERT(uc);
1624 	TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1625 }
1626 
1627 /*
1628  * Lock a PI mutex.
1629  */
1630 static int
1631 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1632 	int try)
1633 {
1634 	struct umtx_q *uq;
1635 	struct umtx_pi *pi, *new_pi;
1636 	uint32_t id, owner, old;
1637 	int error;
1638 
1639 	id = td->td_tid;
1640 	uq = td->td_umtxq;
1641 
1642 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1643 	    &uq->uq_key)) != 0)
1644 		return (error);
1645 	umtxq_lock(&uq->uq_key);
1646 	pi = umtx_pi_lookup(&uq->uq_key);
1647 	if (pi == NULL) {
1648 		new_pi = umtx_pi_alloc(M_NOWAIT);
1649 		if (new_pi == NULL) {
1650 			umtxq_unlock(&uq->uq_key);
1651 			new_pi = umtx_pi_alloc(M_WAITOK);
1652 			new_pi->pi_key = uq->uq_key;
1653 			umtxq_lock(&uq->uq_key);
1654 			pi = umtx_pi_lookup(&uq->uq_key);
1655 			if (pi != NULL) {
1656 				umtx_pi_free(new_pi);
1657 				new_pi = NULL;
1658 			}
1659 		}
1660 		if (new_pi != NULL) {
1661 			new_pi->pi_key = uq->uq_key;
1662 			umtx_pi_insert(new_pi);
1663 			pi = new_pi;
1664 		}
1665 	}
1666 	umtx_pi_ref(pi);
1667 	umtxq_unlock(&uq->uq_key);
1668 
1669 	/*
1670 	 * Care must be exercised when dealing with umtx structure.  It
1671 	 * can fault on any access.
1672 	 */
1673 	for (;;) {
1674 		/*
1675 		 * Try the uncontested case.  This should be done in userland.
1676 		 */
1677 		owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1678 
1679 		/* The acquire succeeded. */
1680 		if (owner == UMUTEX_UNOWNED) {
1681 			error = 0;
1682 			break;
1683 		}
1684 
1685 		/* The address was invalid. */
1686 		if (owner == -1) {
1687 			error = EFAULT;
1688 			break;
1689 		}
1690 
1691 		/* If no one owns it but it is contested try to acquire it. */
1692 		if (owner == UMUTEX_CONTESTED) {
1693 			owner = casuword32(&m->m_owner,
1694 			    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1695 
1696 			if (owner == UMUTEX_CONTESTED) {
1697 				umtxq_lock(&uq->uq_key);
1698 				error = umtx_pi_claim(pi, td);
1699 				umtxq_unlock(&uq->uq_key);
1700 				break;
1701 			}
1702 
1703 			/* The address was invalid. */
1704 			if (owner == -1) {
1705 				error = EFAULT;
1706 				break;
1707 			}
1708 
1709 			/* If this failed the lock has changed, restart. */
1710 			continue;
1711 		}
1712 
1713 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1714 		    (owner & ~UMUTEX_CONTESTED) == id) {
1715 			error = EDEADLK;
1716 			break;
1717 		}
1718 
1719 		if (try != 0) {
1720 			error = EBUSY;
1721 			break;
1722 		}
1723 
1724 		/*
1725 		 * If we caught a signal, we have retried and now
1726 		 * exit immediately.
1727 		 */
1728 		if (error != 0)
1729 			break;
1730 
1731 		umtxq_lock(&uq->uq_key);
1732 		umtxq_busy(&uq->uq_key);
1733 		umtxq_unlock(&uq->uq_key);
1734 
1735 		/*
1736 		 * Set the contested bit so that a release in user space
1737 		 * knows to use the system call for unlock.  If this fails
1738 		 * either some one else has acquired the lock or it has been
1739 		 * released.
1740 		 */
1741 		old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1742 
1743 		/* The address was invalid. */
1744 		if (old == -1) {
1745 			umtxq_lock(&uq->uq_key);
1746 			umtxq_unbusy(&uq->uq_key);
1747 			umtxq_unlock(&uq->uq_key);
1748 			error = EFAULT;
1749 			break;
1750 		}
1751 
1752 		umtxq_lock(&uq->uq_key);
1753 		umtxq_unbusy(&uq->uq_key);
1754 		/*
1755 		 * We set the contested bit, sleep. Otherwise the lock changed
1756 		 * and we need to retry or we lost a race to the thread
1757 		 * unlocking the umtx.
1758 		 */
1759 		if (old == owner)
1760 			error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1761 				 "umtxpi", timo);
1762 		umtxq_unlock(&uq->uq_key);
1763 	}
1764 
1765 	umtxq_lock(&uq->uq_key);
1766 	umtx_pi_unref(pi);
1767 	umtxq_unlock(&uq->uq_key);
1768 
1769 	umtx_key_release(&uq->uq_key);
1770 	return (error);
1771 }
1772 
1773 /*
1774  * Unlock a PI mutex.
1775  */
1776 static int
1777 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1778 {
1779 	struct umtx_key key;
1780 	struct umtx_q *uq_first, *uq_first2, *uq_me;
1781 	struct umtx_pi *pi, *pi2;
1782 	uint32_t owner, old, id;
1783 	int error;
1784 	int count;
1785 	int pri;
1786 
1787 	id = td->td_tid;
1788 	/*
1789 	 * Make sure we own this mtx.
1790 	 */
1791 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1792 	if (owner == -1)
1793 		return (EFAULT);
1794 
1795 	if ((owner & ~UMUTEX_CONTESTED) != id)
1796 		return (EPERM);
1797 
1798 	/* This should be done in userland */
1799 	if ((owner & UMUTEX_CONTESTED) == 0) {
1800 		old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1801 		if (old == -1)
1802 			return (EFAULT);
1803 		if (old == owner)
1804 			return (0);
1805 		owner = old;
1806 	}
1807 
1808 	/* We should only ever be in here for contested locks */
1809 	if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1810 	    &key)) != 0)
1811 		return (error);
1812 
1813 	umtxq_lock(&key);
1814 	umtxq_busy(&key);
1815 	count = umtxq_count_pi(&key, &uq_first);
1816 	if (uq_first != NULL) {
1817 		pi = uq_first->uq_pi_blocked;
1818 		if (pi->pi_owner != curthread) {
1819 			umtxq_unbusy(&key);
1820 			umtxq_unlock(&key);
1821 			/* userland messed the mutex */
1822 			return (EPERM);
1823 		}
1824 		uq_me = curthread->td_umtxq;
1825 		mtx_lock_spin(&sched_lock);
1826 		pi->pi_owner = NULL;
1827 		TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1828 		uq_first = TAILQ_FIRST(&pi->pi_blocked);
1829 		pri = PRI_MAX;
1830 		TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1831 			uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1832 			if (uq_first2 != NULL) {
1833 				if (pri > UPRI(uq_first2->uq_thread))
1834 					pri = UPRI(uq_first2->uq_thread);
1835 			}
1836 		}
1837 		sched_unlend_user_prio(curthread, pri);
1838 		mtx_unlock_spin(&sched_lock);
1839 	}
1840 	umtxq_unlock(&key);
1841 
1842 	/*
1843 	 * When unlocking the umtx, it must be marked as unowned if
1844 	 * there is zero or one thread only waiting for it.
1845 	 * Otherwise, it must be marked as contested.
1846 	 */
1847 	old = casuword32(&m->m_owner, owner,
1848 		count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1849 
1850 	umtxq_lock(&key);
1851 	if (uq_first != NULL)
1852 		umtxq_signal_thread(uq_first);
1853 	umtxq_unbusy(&key);
1854 	umtxq_unlock(&key);
1855 	umtx_key_release(&key);
1856 	if (old == -1)
1857 		return (EFAULT);
1858 	if (old != owner)
1859 		return (EINVAL);
1860 	return (0);
1861 }
1862 
1863 /*
1864  * Lock a PP mutex.
1865  */
1866 static int
1867 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1868 	int try)
1869 {
1870 	struct umtx_q *uq, *uq2;
1871 	struct umtx_pi *pi;
1872 	uint32_t ceiling;
1873 	uint32_t owner, id;
1874 	int error, pri, old_inherited_pri, su;
1875 
1876 	id = td->td_tid;
1877 	uq = td->td_umtxq;
1878 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1879 	    &uq->uq_key)) != 0)
1880 		return (error);
1881 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1882 	for (;;) {
1883 		old_inherited_pri = uq->uq_inherited_pri;
1884 		umtxq_lock(&uq->uq_key);
1885 		umtxq_busy(&uq->uq_key);
1886 		umtxq_unlock(&uq->uq_key);
1887 
1888 		ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1889 		if (ceiling > RTP_PRIO_MAX) {
1890 			error = EINVAL;
1891 			goto out;
1892 		}
1893 
1894 		mtx_lock_spin(&sched_lock);
1895 		if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1896 			mtx_unlock_spin(&sched_lock);
1897 			error = EINVAL;
1898 			goto out;
1899 		}
1900 		if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1901 			uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1902 			if (uq->uq_inherited_pri < UPRI(td))
1903 				sched_lend_user_prio(td, uq->uq_inherited_pri);
1904 		}
1905 		mtx_unlock_spin(&sched_lock);
1906 
1907 		owner = casuword32(&m->m_owner,
1908 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1909 
1910 		if (owner == UMUTEX_CONTESTED) {
1911 			error = 0;
1912 			break;
1913 		}
1914 
1915 		/* The address was invalid. */
1916 		if (owner == -1) {
1917 			error = EFAULT;
1918 			break;
1919 		}
1920 
1921 		if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1922 		    (owner & ~UMUTEX_CONTESTED) == id) {
1923 			error = EDEADLK;
1924 			break;
1925 		}
1926 
1927 		if (try != 0) {
1928 			error = EBUSY;
1929 			break;
1930 		}
1931 
1932 		/*
1933 		 * If we caught a signal, we have retried and now
1934 		 * exit immediately.
1935 		 */
1936 		if (error != 0)
1937 			break;
1938 
1939 		umtxq_lock(&uq->uq_key);
1940 		umtxq_insert(uq);
1941 		umtxq_unbusy(&uq->uq_key);
1942 		error = umtxq_sleep(uq, "umtxpp", timo);
1943 		umtxq_remove(uq);
1944 		umtxq_unlock(&uq->uq_key);
1945 
1946 		mtx_lock_spin(&sched_lock);
1947 		uq->uq_inherited_pri = old_inherited_pri;
1948 		pri = PRI_MAX;
1949 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1950 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1951 			if (uq2 != NULL) {
1952 				if (pri > UPRI(uq2->uq_thread))
1953 					pri = UPRI(uq2->uq_thread);
1954 			}
1955 		}
1956 		if (pri > uq->uq_inherited_pri)
1957 			pri = uq->uq_inherited_pri;
1958 		sched_unlend_user_prio(td, pri);
1959 		mtx_unlock_spin(&sched_lock);
1960 	}
1961 
1962 	if (error != 0) {
1963 		mtx_lock_spin(&sched_lock);
1964 		uq->uq_inherited_pri = old_inherited_pri;
1965 		pri = PRI_MAX;
1966 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1967 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
1968 			if (uq2 != NULL) {
1969 				if (pri > UPRI(uq2->uq_thread))
1970 					pri = UPRI(uq2->uq_thread);
1971 			}
1972 		}
1973 		if (pri > uq->uq_inherited_pri)
1974 			pri = uq->uq_inherited_pri;
1975 		sched_unlend_user_prio(td, pri);
1976 		mtx_unlock_spin(&sched_lock);
1977 	}
1978 
1979 out:
1980 	umtxq_lock(&uq->uq_key);
1981 	umtxq_unbusy(&uq->uq_key);
1982 	umtxq_unlock(&uq->uq_key);
1983 	umtx_key_release(&uq->uq_key);
1984 	return (error);
1985 }
1986 
1987 /*
1988  * Unlock a PP mutex.
1989  */
1990 static int
1991 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1992 {
1993 	struct umtx_key key;
1994 	struct umtx_q *uq, *uq2;
1995 	struct umtx_pi *pi;
1996 	uint32_t owner, id;
1997 	uint32_t rceiling;
1998 	int error, pri, new_inherited_pri, su;
1999 
2000 	id = td->td_tid;
2001 	uq = td->td_umtxq;
2002 	su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2003 
2004 	/*
2005 	 * Make sure we own this mtx.
2006 	 */
2007 	owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2008 	if (owner == -1)
2009 		return (EFAULT);
2010 
2011 	if ((owner & ~UMUTEX_CONTESTED) != id)
2012 		return (EPERM);
2013 
2014 	error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2015 	if (error != 0)
2016 		return (error);
2017 
2018 	if (rceiling == -1)
2019 		new_inherited_pri = PRI_MAX;
2020 	else {
2021 		rceiling = RTP_PRIO_MAX - rceiling;
2022 		if (rceiling > RTP_PRIO_MAX)
2023 			return (EINVAL);
2024 		new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2025 	}
2026 
2027 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2028 	    &key)) != 0)
2029 		return (error);
2030 	umtxq_lock(&key);
2031 	umtxq_busy(&key);
2032 	umtxq_unlock(&key);
2033 	/*
2034 	 * For priority protected mutex, always set unlocked state
2035 	 * to UMUTEX_CONTESTED, so that userland always enters kernel
2036 	 * to lock the mutex, it is necessary because thread priority
2037 	 * has to be adjusted for such mutex.
2038 	 */
2039 	error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2040 		UMUTEX_CONTESTED);
2041 
2042 	umtxq_lock(&key);
2043 	if (error == 0)
2044 		umtxq_signal(&key, 1);
2045 	umtxq_unbusy(&key);
2046 	umtxq_unlock(&key);
2047 
2048 	if (error == -1)
2049 		error = EFAULT;
2050 	else {
2051 		mtx_lock_spin(&sched_lock);
2052 		if (su != 0)
2053 			uq->uq_inherited_pri = new_inherited_pri;
2054 		pri = PRI_MAX;
2055 		TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2056 			uq2 = TAILQ_FIRST(&pi->pi_blocked);
2057 			if (uq2 != NULL) {
2058 				if (pri > UPRI(uq2->uq_thread))
2059 					pri = UPRI(uq2->uq_thread);
2060 			}
2061 		}
2062 		if (pri > uq->uq_inherited_pri)
2063 			pri = uq->uq_inherited_pri;
2064 		sched_unlend_user_prio(td, pri);
2065 		mtx_unlock_spin(&sched_lock);
2066 	}
2067 	umtx_key_release(&key);
2068 	return (error);
2069 }
2070 
2071 static int
2072 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2073 	uint32_t *old_ceiling)
2074 {
2075 	struct umtx_q *uq;
2076 	uint32_t save_ceiling;
2077 	uint32_t owner, id;
2078 	uint32_t flags;
2079 	int error;
2080 
2081 	flags = fuword32(&m->m_flags);
2082 	if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2083 		return (EINVAL);
2084 	if (ceiling > RTP_PRIO_MAX)
2085 		return (EINVAL);
2086 	id = td->td_tid;
2087 	uq = td->td_umtxq;
2088 	if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2089 	   &uq->uq_key)) != 0)
2090 		return (error);
2091 	for (;;) {
2092 		umtxq_lock(&uq->uq_key);
2093 		umtxq_busy(&uq->uq_key);
2094 		umtxq_unlock(&uq->uq_key);
2095 
2096 		save_ceiling = fuword32(&m->m_ceilings[0]);
2097 
2098 		owner = casuword32(&m->m_owner,
2099 		    UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2100 
2101 		if (owner == UMUTEX_CONTESTED) {
2102 			suword32(&m->m_ceilings[0], ceiling);
2103 			suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2104 				UMUTEX_CONTESTED);
2105 			error = 0;
2106 			break;
2107 		}
2108 
2109 		/* The address was invalid. */
2110 		if (owner == -1) {
2111 			error = EFAULT;
2112 			break;
2113 		}
2114 
2115 		if ((owner & ~UMUTEX_CONTESTED) == id) {
2116 			suword32(&m->m_ceilings[0], ceiling);
2117 			error = 0;
2118 			break;
2119 		}
2120 
2121 		/*
2122 		 * If we caught a signal, we have retried and now
2123 		 * exit immediately.
2124 		 */
2125 		if (error != 0)
2126 			break;
2127 
2128 		/*
2129 		 * We set the contested bit, sleep. Otherwise the lock changed
2130 		 * and we need to retry or we lost a race to the thread
2131 		 * unlocking the umtx.
2132 		 */
2133 		umtxq_lock(&uq->uq_key);
2134 		umtxq_insert(uq);
2135 		umtxq_unbusy(&uq->uq_key);
2136 		error = umtxq_sleep(uq, "umtxpp", 0);
2137 		umtxq_remove(uq);
2138 		umtxq_unlock(&uq->uq_key);
2139 	}
2140 	umtxq_lock(&uq->uq_key);
2141 	if (error == 0)
2142 		umtxq_signal(&uq->uq_key, INT_MAX);
2143 	umtxq_unbusy(&uq->uq_key);
2144 	umtxq_unlock(&uq->uq_key);
2145 	umtx_key_release(&uq->uq_key);
2146 	if (error == 0 && old_ceiling != NULL)
2147 		suword32(old_ceiling, save_ceiling);
2148 	return (error);
2149 }
2150 
2151 static int
2152 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2153 	int try)
2154 {
2155 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2156 	case 0:
2157 		return (_do_lock_normal(td, m, flags, timo, try));
2158 	case UMUTEX_PRIO_INHERIT:
2159 		return (_do_lock_pi(td, m, flags, timo, try));
2160 	case UMUTEX_PRIO_PROTECT:
2161 		return (_do_lock_pp(td, m, flags, timo, try));
2162 	}
2163 	return (EINVAL);
2164 }
2165 
2166 /*
2167  * Lock a userland POSIX mutex.
2168  */
2169 static int
2170 do_lock_umutex(struct thread *td, struct umutex *m,
2171 	struct timespec *timeout, int try)
2172 {
2173 	struct timespec ts, ts2, ts3;
2174 	struct timeval tv;
2175 	uint32_t flags;
2176 	int error;
2177 
2178 	flags = fuword32(&m->m_flags);
2179 	if (flags == -1)
2180 		return (EFAULT);
2181 
2182 	if (timeout == NULL) {
2183 		error = _do_lock_umutex(td, m, flags, 0, try);
2184 		/* Mutex locking is restarted if it is interrupted. */
2185 		if (error == EINTR)
2186 			error = ERESTART;
2187 	} else {
2188 		getnanouptime(&ts);
2189 		timespecadd(&ts, timeout);
2190 		TIMESPEC_TO_TIMEVAL(&tv, timeout);
2191 		for (;;) {
2192 			error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
2193 			if (error != ETIMEDOUT)
2194 				break;
2195 			getnanouptime(&ts2);
2196 			if (timespeccmp(&ts2, &ts, >=)) {
2197 				error = ETIMEDOUT;
2198 				break;
2199 			}
2200 			ts3 = ts;
2201 			timespecsub(&ts3, &ts2);
2202 			TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2203 		}
2204 		/* Timed-locking is not restarted. */
2205 		if (error == ERESTART)
2206 			error = EINTR;
2207 	}
2208 	return (error);
2209 }
2210 
2211 /*
2212  * Unlock a userland POSIX mutex.
2213  */
2214 static int
2215 do_unlock_umutex(struct thread *td, struct umutex *m)
2216 {
2217 	uint32_t flags;
2218 
2219 	flags = fuword32(&m->m_flags);
2220 	if (flags == -1)
2221 		return (EFAULT);
2222 
2223 	switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2224 	case 0:
2225 		return (do_unlock_normal(td, m, flags));
2226 	case UMUTEX_PRIO_INHERIT:
2227 		return (do_unlock_pi(td, m, flags));
2228 	case UMUTEX_PRIO_PROTECT:
2229 		return (do_unlock_pp(td, m, flags));
2230 	}
2231 
2232 	return (EINVAL);
2233 }
2234 
2235 static int
2236 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2237 	struct timespec *timeout, u_long wflags)
2238 {
2239 	struct umtx_q *uq;
2240 	struct timeval tv;
2241 	struct timespec cts, ets, tts;
2242 	uint32_t flags;
2243 	int error;
2244 
2245 	uq = td->td_umtxq;
2246 	flags = fuword32(&cv->c_flags);
2247 	error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2248 	if (error != 0)
2249 		return (error);
2250 	umtxq_lock(&uq->uq_key);
2251 	umtxq_busy(&uq->uq_key);
2252 	umtxq_insert(uq);
2253 	umtxq_unlock(&uq->uq_key);
2254 
2255 	/*
2256 	 * The magic thing is we should set c_has_waiters to 1 before
2257 	 * releasing user mutex.
2258 	 */
2259 	suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2260 
2261 	umtxq_lock(&uq->uq_key);
2262 	umtxq_unbusy(&uq->uq_key);
2263 	umtxq_unlock(&uq->uq_key);
2264 
2265 	error = do_unlock_umutex(td, m);
2266 
2267 	umtxq_lock(&uq->uq_key);
2268 	if (error == 0) {
2269 		if ((wflags & UMTX_CHECK_UNPARKING) &&
2270 		    (td->td_pflags & TDP_WAKEUP)) {
2271 			td->td_pflags &= ~TDP_WAKEUP;
2272 			error = EINTR;
2273 		} else if (timeout == NULL) {
2274 			error = umtxq_sleep(uq, "ucond", 0);
2275 		} else {
2276 			getnanouptime(&ets);
2277 			timespecadd(&ets, timeout);
2278 			TIMESPEC_TO_TIMEVAL(&tv, timeout);
2279 			for (;;) {
2280 				error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2281 				if (error != ETIMEDOUT)
2282 					break;
2283 				getnanouptime(&cts);
2284 				if (timespeccmp(&cts, &ets, >=)) {
2285 					error = ETIMEDOUT;
2286 					break;
2287 				}
2288 				tts = ets;
2289 				timespecsub(&tts, &cts);
2290 				TIMESPEC_TO_TIMEVAL(&tv, &tts);
2291 			}
2292 		}
2293 	}
2294 
2295 	if (error != 0) {
2296 		if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2297 			/*
2298 			 * If we concurrently got do_cv_signal()d
2299 			 * and we got an error or UNIX signals or a timeout,
2300 			 * then, perform another umtxq_signal to avoid
2301 			 * consuming the wakeup. This may cause supurious
2302 			 * wakeup for another thread which was just queued,
2303 			 * but SUSV3 explicitly allows supurious wakeup to
2304 			 * occur, and indeed a kernel based implementation
2305 			 * can not avoid it.
2306 			 */
2307 			if (!umtxq_signal(&uq->uq_key, 1))
2308 				error = 0;
2309 		}
2310 		if (error == ERESTART)
2311 			error = EINTR;
2312 	}
2313 	umtxq_remove(uq);
2314 	umtxq_unlock(&uq->uq_key);
2315 	umtx_key_release(&uq->uq_key);
2316 	return (error);
2317 }
2318 
2319 /*
2320  * Signal a userland condition variable.
2321  */
2322 static int
2323 do_cv_signal(struct thread *td, struct ucond *cv)
2324 {
2325 	struct umtx_key key;
2326 	int error, cnt, nwake;
2327 	uint32_t flags;
2328 
2329 	flags = fuword32(&cv->c_flags);
2330 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2331 		return (error);
2332 	umtxq_lock(&key);
2333 	umtxq_busy(&key);
2334 	cnt = umtxq_count(&key);
2335 	nwake = umtxq_signal(&key, 1);
2336 	if (cnt <= nwake) {
2337 		umtxq_unlock(&key);
2338 		error = suword32(
2339 		    __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2340 		umtxq_lock(&key);
2341 	}
2342 	umtxq_unbusy(&key);
2343 	umtxq_unlock(&key);
2344 	umtx_key_release(&key);
2345 	return (error);
2346 }
2347 
2348 static int
2349 do_cv_broadcast(struct thread *td, struct ucond *cv)
2350 {
2351 	struct umtx_key key;
2352 	int error;
2353 	uint32_t flags;
2354 
2355 	flags = fuword32(&cv->c_flags);
2356 	if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2357 		return (error);
2358 
2359 	umtxq_lock(&key);
2360 	umtxq_busy(&key);
2361 	umtxq_signal(&key, INT_MAX);
2362 	umtxq_unlock(&key);
2363 
2364 	error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2365 
2366 	umtxq_lock(&key);
2367 	umtxq_unbusy(&key);
2368 	umtxq_unlock(&key);
2369 
2370 	umtx_key_release(&key);
2371 	return (error);
2372 }
2373 
2374 int
2375 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2376     /* struct umtx *umtx */
2377 {
2378 	return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2379 }
2380 
2381 int
2382 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2383     /* struct umtx *umtx */
2384 {
2385 	return do_unlock_umtx(td, uap->umtx, td->td_tid);
2386 }
2387 
2388 static int
2389 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2390 {
2391 	struct timespec *ts, timeout;
2392 	int error;
2393 
2394 	/* Allow a null timespec (wait forever). */
2395 	if (uap->uaddr2 == NULL)
2396 		ts = NULL;
2397 	else {
2398 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2399 		if (error != 0)
2400 			return (error);
2401 		if (timeout.tv_nsec >= 1000000000 ||
2402 		    timeout.tv_nsec < 0) {
2403 			return (EINVAL);
2404 		}
2405 		ts = &timeout;
2406 	}
2407 	return (do_lock_umtx(td, uap->obj, uap->val, ts));
2408 }
2409 
2410 static int
2411 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2412 {
2413 	return (do_unlock_umtx(td, uap->obj, uap->val));
2414 }
2415 
2416 static int
2417 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2418 {
2419 	struct timespec *ts, timeout;
2420 	int error;
2421 
2422 	if (uap->uaddr2 == NULL)
2423 		ts = NULL;
2424 	else {
2425 		error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2426 		if (error != 0)
2427 			return (error);
2428 		if (timeout.tv_nsec >= 1000000000 ||
2429 		    timeout.tv_nsec < 0)
2430 			return (EINVAL);
2431 		ts = &timeout;
2432 	}
2433 	return do_wait(td, uap->obj, uap->val, ts, 0);
2434 }
2435 
2436 static int
2437 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2438 {
2439 	return (kern_umtx_wake(td, uap->obj, uap->val));
2440 }
2441 
2442 static int
2443 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2444 {
2445 	struct timespec *ts, timeout;
2446 	int error;
2447 
2448 	/* Allow a null timespec (wait forever). */
2449 	if (uap->uaddr2 == NULL)
2450 		ts = NULL;
2451 	else {
2452 		error = copyin(uap->uaddr2, &timeout,
2453 		    sizeof(timeout));
2454 		if (error != 0)
2455 			return (error);
2456 		if (timeout.tv_nsec >= 1000000000 ||
2457 		    timeout.tv_nsec < 0) {
2458 			return (EINVAL);
2459 		}
2460 		ts = &timeout;
2461 	}
2462 	return do_lock_umutex(td, uap->obj, ts, 0);
2463 }
2464 
2465 static int
2466 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2467 {
2468 	return do_lock_umutex(td, uap->obj, NULL, 1);
2469 }
2470 
2471 static int
2472 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2473 {
2474 	return do_unlock_umutex(td, uap->obj);
2475 }
2476 
2477 static int
2478 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2479 {
2480 	return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2481 }
2482 
2483 static int
2484 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2485 {
2486 	struct timespec *ts, timeout;
2487 	int error;
2488 
2489 	/* Allow a null timespec (wait forever). */
2490 	if (uap->uaddr2 == NULL)
2491 		ts = NULL;
2492 	else {
2493 		error = copyin(uap->uaddr2, &timeout,
2494 		    sizeof(timeout));
2495 		if (error != 0)
2496 			return (error);
2497 		if (timeout.tv_nsec >= 1000000000 ||
2498 		    timeout.tv_nsec < 0) {
2499 			return (EINVAL);
2500 		}
2501 		ts = &timeout;
2502 	}
2503 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2504 }
2505 
2506 static int
2507 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2508 {
2509 	return do_cv_signal(td, uap->obj);
2510 }
2511 
2512 static int
2513 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2514 {
2515 	return do_cv_broadcast(td, uap->obj);
2516 }
2517 
2518 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
2519 
2520 static _umtx_op_func op_table[] = {
2521 	__umtx_op_lock_umtx,		/* UMTX_OP_LOCK */
2522 	__umtx_op_unlock_umtx,		/* UMTX_OP_UNLOCK */
2523 	__umtx_op_wait,			/* UMTX_OP_WAIT */
2524 	__umtx_op_wake,			/* UMTX_OP_WAKE */
2525 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_TRYLOCK */
2526 	__umtx_op_lock_umutex,		/* UMTX_OP_MUTEX_LOCK */
2527 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK */
2528 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2529 	__umtx_op_cv_wait,		/* UMTX_OP_CV_WAIT*/
2530 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2531 	__umtx_op_cv_broadcast		/* UMTX_OP_CV_BROADCAST */
2532 };
2533 
2534 int
2535 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
2536 {
2537 	if ((unsigned)uap->op < UMTX_OP_MAX)
2538 		return (*op_table[uap->op])(td, uap);
2539 	return (EINVAL);
2540 }
2541 
2542 #ifdef COMPAT_IA32
2543 
2544 int
2545 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
2546     /* struct umtx *umtx */
2547 {
2548 	return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
2549 }
2550 
2551 int
2552 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
2553     /* struct umtx *umtx */
2554 {
2555 	return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
2556 }
2557 
2558 struct timespec32 {
2559 	u_int32_t tv_sec;
2560 	u_int32_t tv_nsec;
2561 };
2562 
2563 static inline int
2564 copyin_timeout32(void *addr, struct timespec *tsp)
2565 {
2566 	struct timespec32 ts32;
2567 	int error;
2568 
2569 	error = copyin(addr, &ts32, sizeof(struct timespec32));
2570 	if (error == 0) {
2571 		tsp->tv_sec = ts32.tv_sec;
2572 		tsp->tv_nsec = ts32.tv_nsec;
2573 	}
2574 	return (error);
2575 }
2576 
2577 static int
2578 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2579 {
2580 	struct timespec *ts, timeout;
2581 	int error;
2582 
2583 	/* Allow a null timespec (wait forever). */
2584 	if (uap->uaddr2 == NULL)
2585 		ts = NULL;
2586 	else {
2587 		error = copyin_timeout32(uap->uaddr2, &timeout);
2588 		if (error != 0)
2589 			return (error);
2590 		if (timeout.tv_nsec >= 1000000000 ||
2591 		    timeout.tv_nsec < 0) {
2592 			return (EINVAL);
2593 		}
2594 		ts = &timeout;
2595 	}
2596 	return (do_lock_umtx32(td, uap->obj, uap->val, ts));
2597 }
2598 
2599 static int
2600 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
2601 {
2602 	return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
2603 }
2604 
2605 static int
2606 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2607 {
2608 	struct timespec *ts, timeout;
2609 	int error;
2610 
2611 	if (uap->uaddr2 == NULL)
2612 		ts = NULL;
2613 	else {
2614 		error = copyin_timeout32(uap->uaddr2, &timeout);
2615 		if (error != 0)
2616 			return (error);
2617 		if (timeout.tv_nsec >= 1000000000 ||
2618 		    timeout.tv_nsec < 0)
2619 			return (EINVAL);
2620 		ts = &timeout;
2621 	}
2622 	return do_wait(td, uap->obj, uap->val, ts, 1);
2623 }
2624 
2625 static int
2626 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
2627 {
2628 	struct timespec *ts, timeout;
2629 	int error;
2630 
2631 	/* Allow a null timespec (wait forever). */
2632 	if (uap->uaddr2 == NULL)
2633 		ts = NULL;
2634 	else {
2635 		error = copyin_timeout32(uap->uaddr2, &timeout);
2636 		if (error != 0)
2637 			return (error);
2638 		if (timeout.tv_nsec >= 1000000000 ||
2639 		    timeout.tv_nsec < 0)
2640 			return (EINVAL);
2641 		ts = &timeout;
2642 	}
2643 	return do_lock_umutex(td, uap->obj, ts, 0);
2644 }
2645 
2646 static int
2647 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
2648 {
2649 	struct timespec *ts, timeout;
2650 	int error;
2651 
2652 	/* Allow a null timespec (wait forever). */
2653 	if (uap->uaddr2 == NULL)
2654 		ts = NULL;
2655 	else {
2656 		error = copyin_timeout32(uap->uaddr2, &timeout);
2657 		if (error != 0)
2658 			return (error);
2659 		if (timeout.tv_nsec >= 1000000000 ||
2660 		    timeout.tv_nsec < 0)
2661 			return (EINVAL);
2662 		ts = &timeout;
2663 	}
2664 	return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2665 }
2666 
2667 static _umtx_op_func op_table_compat32[] = {
2668 	__umtx_op_lock_umtx_compat32,	/* UMTX_OP_LOCK */
2669 	__umtx_op_unlock_umtx_compat32,	/* UMTX_OP_UNLOCK */
2670 	__umtx_op_wait_compat32,	/* UMTX_OP_WAIT */
2671 	__umtx_op_wake,			/* UMTX_OP_WAKE */
2672 	__umtx_op_trylock_umutex,	/* UMTX_OP_MUTEX_LOCK */
2673 	__umtx_op_lock_umutex_compat32,	/* UMTX_OP_MUTEX_TRYLOCK */
2674 	__umtx_op_unlock_umutex,	/* UMTX_OP_MUTEX_UNLOCK	*/
2675 	__umtx_op_set_ceiling,		/* UMTX_OP_SET_CEILING */
2676 	__umtx_op_cv_wait_compat32,	/* UMTX_OP_CV_WAIT*/
2677 	__umtx_op_cv_signal,		/* UMTX_OP_CV_SIGNAL */
2678 	__umtx_op_cv_broadcast		/* UMTX_OP_CV_BROADCAST */
2679 };
2680 
2681 int
2682 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
2683 {
2684 	if ((unsigned)uap->op < UMTX_OP_MAX)
2685 		return (*op_table_compat32[uap->op])(td,
2686 			(struct _umtx_op_args *)uap);
2687 	return (EINVAL);
2688 }
2689 #endif
2690 
2691 void
2692 umtx_thread_init(struct thread *td)
2693 {
2694 	td->td_umtxq = umtxq_alloc();
2695 	td->td_umtxq->uq_thread = td;
2696 }
2697 
2698 void
2699 umtx_thread_fini(struct thread *td)
2700 {
2701 	umtxq_free(td->td_umtxq);
2702 }
2703 
2704 /*
2705  * It will be called when new thread is created, e.g fork().
2706  */
2707 void
2708 umtx_thread_alloc(struct thread *td)
2709 {
2710 	struct umtx_q *uq;
2711 
2712 	uq = td->td_umtxq;
2713 	uq->uq_inherited_pri = PRI_MAX;
2714 
2715 	KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
2716 	KASSERT(uq->uq_thread == td, ("uq_thread != td"));
2717 	KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
2718 	KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
2719 }
2720 
2721 /*
2722  * exec() hook.
2723  */
2724 static void
2725 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
2726 	struct image_params *imgp __unused)
2727 {
2728 	umtx_thread_cleanup(curthread);
2729 }
2730 
2731 /*
2732  * thread_exit() hook.
2733  */
2734 void
2735 umtx_thread_exit(struct thread *td)
2736 {
2737 	umtx_thread_cleanup(td);
2738 }
2739 
2740 /*
2741  * clean up umtx data.
2742  */
2743 static void
2744 umtx_thread_cleanup(struct thread *td)
2745 {
2746 	struct umtx_q *uq;
2747 	struct umtx_pi *pi;
2748 
2749 	if ((uq = td->td_umtxq) == NULL)
2750 		return;
2751 
2752 	mtx_lock_spin(&sched_lock);
2753 	uq->uq_inherited_pri = PRI_MAX;
2754 	while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
2755 		pi->pi_owner = NULL;
2756 		TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
2757 	}
2758 	td->td_flags &= ~TDF_UBORROWING;
2759 	mtx_unlock_spin(&sched_lock);
2760 }
2761