xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision 86aa9539fef591a363b06a0ebd3aa7a07f4c1579)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "namespace.h"
45 #include <stdlib.h>
46 #include <errno.h>
47 #include <string.h>
48 #include <sys/param.h>
49 #include <sys/queue.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include "un-namespace.h"
53 
54 #include "thr_private.h"
55 
56 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
57     "pthread_mutex is too large for off-page");
58 
59 /*
60  * For adaptive mutexes, how many times to spin doing trylock2
61  * before entering the kernel to block
62  */
63 #define MUTEX_ADAPTIVE_SPINS	2000
64 
65 /*
66  * Prototypes
67  */
68 int	__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
69 		const struct timespec * __restrict abstime);
70 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
71 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
72 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
73 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
74 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
75 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
76 
77 static int	mutex_self_trylock(pthread_mutex_t);
78 static int	mutex_self_lock(pthread_mutex_t,
79 				const struct timespec *abstime);
80 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
81 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
82 				const struct timespec *);
83 static void	mutex_init_robust(struct pthread *curthread);
84 static int	mutex_qidx(struct pthread_mutex *m);
85 static bool	is_robust_mutex(struct pthread_mutex *m);
86 static bool	is_pshared_mutex(struct pthread_mutex *m);
87 
88 __weak_reference(__Tthr_mutex_init, pthread_mutex_init);
89 __weak_reference(__Tthr_mutex_init, __pthread_mutex_init);
90 __strong_reference(__Tthr_mutex_init, _pthread_mutex_init);
91 __weak_reference(__Tthr_mutex_lock, pthread_mutex_lock);
92 __weak_reference(__Tthr_mutex_lock, __pthread_mutex_lock);
93 __strong_reference(__Tthr_mutex_lock, _pthread_mutex_lock);
94 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
95 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
96 __weak_reference(__Tthr_mutex_trylock, pthread_mutex_trylock);
97 __weak_reference(__Tthr_mutex_trylock, __pthread_mutex_trylock);
98 __strong_reference(__Tthr_mutex_trylock, _pthread_mutex_trylock);
99 __weak_reference(_Tthr_mutex_consistent, pthread_mutex_consistent);
100 __weak_reference(_Tthr_mutex_consistent, _pthread_mutex_consistent);
101 __strong_reference(_Tthr_mutex_consistent, __pthread_mutex_consistent);
102 
103 /* Single underscore versions provided for libc internal usage: */
104 /* No difference between libc and application usage of these: */
105 __weak_reference(_thr_mutex_destroy, pthread_mutex_destroy);
106 __weak_reference(_thr_mutex_destroy, _pthread_mutex_destroy);
107 __weak_reference(_thr_mutex_unlock, pthread_mutex_unlock);
108 __weak_reference(_thr_mutex_unlock, _pthread_mutex_unlock);
109 
110 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
111 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
112 
113 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
114 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
115 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
116 
117 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
118 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
119 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
120 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
121 
122 static void
123 mutex_init_link(struct pthread_mutex *m)
124 {
125 
126 #if defined(_PTHREADS_INVARIANTS)
127 	m->m_qe.tqe_prev = NULL;
128 	m->m_qe.tqe_next = NULL;
129 	m->m_pqe.tqe_prev = NULL;
130 	m->m_pqe.tqe_next = NULL;
131 #endif
132 }
133 
134 static void
135 mutex_assert_is_owned(struct pthread_mutex *m __unused)
136 {
137 
138 #if defined(_PTHREADS_INVARIANTS)
139 	if (__predict_false(m->m_qe.tqe_prev == NULL))
140 		PANIC("mutex %p own %#x is not on list %p %p",
141 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
142 #endif
143 }
144 
145 static void
146 mutex_assert_not_owned(struct pthread *curthread __unused,
147     struct pthread_mutex *m __unused)
148 {
149 
150 #if defined(_PTHREADS_INVARIANTS)
151 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
152 	    m->m_qe.tqe_next != NULL))
153 		PANIC("mutex %p own %#x is on list %p %p",
154 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
155 	if (__predict_false(is_robust_mutex(m) &&
156 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
157 	    (is_pshared_mutex(m) && curthread->robust_list ==
158 	    (uintptr_t)&m->m_lock) ||
159 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
160 	    (uintptr_t)&m->m_lock))))
161 		PANIC(
162     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
163 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
164 		    m->m_rb_prev, (void *)curthread->robust_list,
165 		    (void *)curthread->priv_robust_list);
166 #endif
167 }
168 
169 static bool
170 is_pshared_mutex(struct pthread_mutex *m)
171 {
172 
173 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
174 }
175 
176 static bool
177 is_robust_mutex(struct pthread_mutex *m)
178 {
179 
180 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
181 }
182 
183 int
184 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
185 {
186 
187 #if defined(_PTHREADS_INVARIANTS)
188 	if (__predict_false(curthread->inact_mtx != 0))
189 		PANIC("inact_mtx enter");
190 #endif
191 	if (!is_robust_mutex(m))
192 		return (0);
193 
194 	mutex_init_robust(curthread);
195 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
196 	return (1);
197 }
198 
199 void
200 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
201 {
202 
203 #if defined(_PTHREADS_INVARIANTS)
204 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
205 		PANIC("inact_mtx leave");
206 #endif
207 	curthread->inact_mtx = 0;
208 }
209 
210 static int
211 mutex_check_attr(const struct pthread_mutex_attr *attr)
212 {
213 
214 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
215 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
216 		return (EINVAL);
217 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
218 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
219 		return (EINVAL);
220 	return (0);
221 }
222 
223 static void
224 mutex_init_robust(struct pthread *curthread)
225 {
226 	struct umtx_robust_lists_params rb;
227 
228 	if (curthread == NULL)
229 		curthread = _get_curthread();
230 	if (curthread->robust_inited)
231 		return;
232 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
233 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
234 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
235 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
236 	curthread->robust_inited = 1;
237 }
238 
239 static void
240 mutex_init_body(struct pthread_mutex *pmutex,
241     const struct pthread_mutex_attr *attr)
242 {
243 
244 	pmutex->m_flags = attr->m_type;
245 	pmutex->m_count = 0;
246 	pmutex->m_spinloops = 0;
247 	pmutex->m_yieldloops = 0;
248 	mutex_init_link(pmutex);
249 	switch (attr->m_protocol) {
250 	case PTHREAD_PRIO_NONE:
251 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
252 		pmutex->m_lock.m_flags = 0;
253 		break;
254 	case PTHREAD_PRIO_INHERIT:
255 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
256 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
257 		break;
258 	case PTHREAD_PRIO_PROTECT:
259 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
260 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
261 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
262 		break;
263 	}
264 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
265 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
266 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
267 		mutex_init_robust(NULL);
268 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
269 	}
270 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
271 		pmutex->m_spinloops =
272 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
273 		pmutex->m_yieldloops = _thr_yieldloops;
274 	}
275 }
276 
277 static int
278 mutex_init(pthread_mutex_t *mutex,
279     const struct pthread_mutex_attr *mutex_attr,
280     void *(calloc_cb)(size_t, size_t))
281 {
282 	const struct pthread_mutex_attr *attr;
283 	struct pthread_mutex *pmutex;
284 	int error;
285 
286 	if (mutex_attr == NULL) {
287 		attr = &_pthread_mutexattr_default;
288 	} else {
289 		attr = mutex_attr;
290 		error = mutex_check_attr(attr);
291 		if (error != 0)
292 			return (error);
293 	}
294 	if ((pmutex = (pthread_mutex_t)
295 		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
296 		return (ENOMEM);
297 	mutex_init_body(pmutex, attr);
298 	*mutex = pmutex;
299 	return (0);
300 }
301 
302 static int
303 init_static(struct pthread *thread, pthread_mutex_t *mutex)
304 {
305 	int ret;
306 
307 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
308 
309 	if (*mutex == THR_MUTEX_INITIALIZER)
310 		ret = mutex_init(mutex, &_pthread_mutexattr_default,
311 		    __thr_calloc);
312 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
313 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
314 		    __thr_calloc);
315 	else
316 		ret = 0;
317 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
318 
319 	return (ret);
320 }
321 
322 static void
323 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
324 {
325 	struct pthread_mutex *m2;
326 
327 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
328 	if (m2 != NULL)
329 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
330 	else
331 		m->m_lock.m_ceilings[1] = -1;
332 }
333 
334 static void
335 shared_mutex_init(struct pthread_mutex *pmtx, const struct
336     pthread_mutex_attr *mutex_attr)
337 {
338 	static const struct pthread_mutex_attr foobar_mutex_attr = {
339 		.m_type = PTHREAD_MUTEX_DEFAULT,
340 		.m_protocol = PTHREAD_PRIO_NONE,
341 		.m_ceiling = 0,
342 		.m_pshared = PTHREAD_PROCESS_SHARED,
343 		.m_robust = PTHREAD_MUTEX_STALLED,
344 	};
345 	bool done;
346 
347 	/*
348 	 * Hack to allow multiple pthread_mutex_init() calls on the
349 	 * same process-shared mutex.  We rely on kernel allocating
350 	 * zeroed offpage for the mutex, i.e. the
351 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
352 	 */
353 	for (done = false; !done;) {
354 		switch (pmtx->m_ps) {
355 		case PMUTEX_INITSTAGE_DONE:
356 			atomic_thread_fence_acq();
357 			done = true;
358 			break;
359 		case PMUTEX_INITSTAGE_ALLOC:
360 			if (atomic_cmpset_int(&pmtx->m_ps,
361 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
362 				if (mutex_attr == NULL)
363 					mutex_attr = &foobar_mutex_attr;
364 				mutex_init_body(pmtx, mutex_attr);
365 				atomic_store_rel_int(&pmtx->m_ps,
366 				    PMUTEX_INITSTAGE_DONE);
367 				done = true;
368 			}
369 			break;
370 		case PMUTEX_INITSTAGE_BUSY:
371 			_pthread_yield();
372 			break;
373 		default:
374 			PANIC("corrupted offpage");
375 			break;
376 		}
377 	}
378 }
379 
380 int
381 __Tthr_mutex_init(pthread_mutex_t * __restrict mutex,
382     const pthread_mutexattr_t * __restrict mutex_attr)
383 {
384 	struct pthread_mutex *pmtx;
385 	int ret;
386 
387 	if (mutex_attr != NULL) {
388 		ret = mutex_check_attr(*mutex_attr);
389 		if (ret != 0)
390 			return (ret);
391 	}
392 	if (mutex_attr == NULL ||
393 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
394 		__thr_malloc_init();
395 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
396 		    __thr_calloc));
397 	}
398 	pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
399 	if (pmtx == NULL)
400 		return (EFAULT);
401 	*mutex = THR_PSHARED_PTR;
402 	shared_mutex_init(pmtx, *mutex_attr);
403 	return (0);
404 }
405 
406 /* This function is used internally by malloc. */
407 int
408 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
409     void *(calloc_cb)(size_t, size_t))
410 {
411 	static const struct pthread_mutex_attr attr = {
412 		.m_type = PTHREAD_MUTEX_NORMAL,
413 		.m_protocol = PTHREAD_PRIO_NONE,
414 		.m_ceiling = 0,
415 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
416 		.m_robust = PTHREAD_MUTEX_STALLED,
417 	};
418 	int ret;
419 
420 	ret = mutex_init(mutex, &attr, calloc_cb);
421 	if (ret == 0)
422 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
423 	return (ret);
424 }
425 
426 /*
427  * Fix mutex ownership for child process.
428  *
429  * Process private mutex ownership is transmitted from the forking
430  * thread to the child process.
431  *
432  * Process shared mutex should not be inherited because owner is
433  * forking thread which is in parent process, they are removed from
434  * the owned mutex list.
435  */
436 static void
437 queue_fork(struct pthread *curthread, struct mutex_queue *q,
438     struct mutex_queue *qp, uint bit)
439 {
440 	struct pthread_mutex *m;
441 
442 	TAILQ_INIT(q);
443 	TAILQ_FOREACH(m, qp, m_pqe) {
444 		TAILQ_INSERT_TAIL(q, m, m_qe);
445 		m->m_lock.m_owner = TID(curthread) | bit;
446 	}
447 }
448 
449 void
450 _mutex_fork(struct pthread *curthread)
451 {
452 
453 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
454 	    &curthread->mq[TMQ_NORM_PRIV], 0);
455 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
456 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
457 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
458 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
459 	curthread->robust_list = 0;
460 }
461 
462 int
463 _thr_mutex_destroy(pthread_mutex_t *mutex)
464 {
465 	pthread_mutex_t m, m1;
466 	int ret;
467 
468 	m = *mutex;
469 	if (m < THR_MUTEX_DESTROYED) {
470 		ret = 0;
471 	} else if (m == THR_MUTEX_DESTROYED) {
472 		ret = EINVAL;
473 	} else {
474 		if (m == THR_PSHARED_PTR) {
475 			m1 = __thr_pshared_offpage(mutex, 0);
476 			if (m1 != NULL) {
477 				mutex_assert_not_owned(_get_curthread(), m1);
478 				__thr_pshared_destroy(mutex);
479 			}
480 			*mutex = THR_MUTEX_DESTROYED;
481 			return (0);
482 		}
483 		if (PMUTEX_OWNER_ID(m) != 0 &&
484 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
485 			ret = EBUSY;
486 		} else {
487 			*mutex = THR_MUTEX_DESTROYED;
488 			mutex_assert_not_owned(_get_curthread(), m);
489 			__thr_free(m);
490 			ret = 0;
491 		}
492 	}
493 
494 	return (ret);
495 }
496 
497 static int
498 mutex_qidx(struct pthread_mutex *m)
499 {
500 
501 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
502 		return (TMQ_NORM);
503 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
504 }
505 
506 /*
507  * Both enqueue_mutex() and dequeue_mutex() operate on the
508  * thread-private linkage of the locked mutexes and on the robust
509  * linkage.
510  *
511  * Robust list, as seen by kernel, must be consistent even in the case
512  * of thread termination at arbitrary moment.  Since either enqueue or
513  * dequeue for list walked by kernel consists of rewriting a single
514  * forward pointer, it is safe.  On the other hand, rewrite of the
515  * back pointer is not atomic WRT the forward one, but kernel does not
516  * care.
517  */
518 static void
519 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
520     int error)
521 {
522 	struct pthread_mutex *m1;
523 	uintptr_t *rl;
524 	int qidx;
525 
526 	/* Add to the list of owned mutexes: */
527 	if (error != EOWNERDEAD)
528 		mutex_assert_not_owned(curthread, m);
529 	qidx = mutex_qidx(m);
530 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
531 	if (!is_pshared_mutex(m))
532 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
533 	if (is_robust_mutex(m)) {
534 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
535 		    &curthread->priv_robust_list;
536 		m->m_rb_prev = NULL;
537 		if (*rl != 0) {
538 			m1 = __containerof((void *)*rl,
539 			    struct pthread_mutex, m_lock);
540 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
541 			m1->m_rb_prev = m;
542 		} else {
543 			m1 = NULL;
544 			m->m_lock.m_rb_lnk = 0;
545 		}
546 		*rl = (uintptr_t)&m->m_lock;
547 	}
548 }
549 
550 static void
551 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
552 {
553 	struct pthread_mutex *mp, *mn;
554 	int qidx;
555 
556 	mutex_assert_is_owned(m);
557 	qidx = mutex_qidx(m);
558 	if (is_robust_mutex(m)) {
559 		mp = m->m_rb_prev;
560 		if (mp == NULL) {
561 			if (is_pshared_mutex(m)) {
562 				curthread->robust_list = m->m_lock.m_rb_lnk;
563 			} else {
564 				curthread->priv_robust_list =
565 				    m->m_lock.m_rb_lnk;
566 			}
567 		} else {
568 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
569 		}
570 		if (m->m_lock.m_rb_lnk != 0) {
571 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
572 			    struct pthread_mutex, m_lock);
573 			mn->m_rb_prev = m->m_rb_prev;
574 		}
575 		m->m_lock.m_rb_lnk = 0;
576 		m->m_rb_prev = NULL;
577 	}
578 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
579 	if (!is_pshared_mutex(m))
580 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
581 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
582 		set_inherited_priority(curthread, m);
583 	mutex_init_link(m);
584 }
585 
586 static int
587 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
588 {
589 	int ret;
590 
591 	*m = *mutex;
592 	ret = 0;
593 	if (*m == THR_PSHARED_PTR) {
594 		*m = __thr_pshared_offpage(mutex, 0);
595 		if (*m == NULL)
596 			ret = EINVAL;
597 		else
598 			shared_mutex_init(*m, NULL);
599 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
600 		if (*m == THR_MUTEX_DESTROYED) {
601 			ret = EINVAL;
602 		} else {
603 			ret = init_static(_get_curthread(), mutex);
604 			if (ret == 0)
605 				*m = *mutex;
606 		}
607 	}
608 	return (ret);
609 }
610 
611 int
612 __Tthr_mutex_trylock(pthread_mutex_t *mutex)
613 {
614 	struct pthread *curthread;
615 	struct pthread_mutex *m;
616 	uint32_t id;
617 	int ret, robust;
618 
619 	ret = check_and_init_mutex(mutex, &m);
620 	if (ret != 0)
621 		return (ret);
622 	curthread = _get_curthread();
623 	id = TID(curthread);
624 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
625 		THR_CRITICAL_ENTER(curthread);
626 	robust = _mutex_enter_robust(curthread, m);
627 	ret = _thr_umutex_trylock(&m->m_lock, id);
628 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
629 		enqueue_mutex(curthread, m, ret);
630 		if (ret == EOWNERDEAD)
631 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
632 	} else if (PMUTEX_OWNER_ID(m) == id) {
633 		ret = mutex_self_trylock(m);
634 	} /* else {} */
635 	if (robust)
636 		_mutex_leave_robust(curthread, m);
637 	if (ret != 0 && ret != EOWNERDEAD &&
638 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
639 		THR_CRITICAL_LEAVE(curthread);
640 	return (ret);
641 }
642 
643 static int
644 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
645     const struct timespec *abstime)
646 {
647 	uint32_t id, owner;
648 	int count, ret;
649 
650 	id = TID(curthread);
651 	if (PMUTEX_OWNER_ID(m) == id)
652 		return (mutex_self_lock(m, abstime));
653 
654 	/*
655 	 * For adaptive mutexes, spin for a bit in the expectation
656 	 * that if the application requests this mutex type then
657 	 * the lock is likely to be released quickly and it is
658 	 * faster than entering the kernel
659 	 */
660 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
661 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
662 		goto sleep_in_kernel;
663 
664 	if (!_thr_is_smp)
665 		goto yield_loop;
666 
667 	count = m->m_spinloops;
668 	while (count--) {
669 		owner = m->m_lock.m_owner;
670 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
671 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
672 			    id | owner)) {
673 				ret = 0;
674 				goto done;
675 			}
676 		}
677 		CPU_SPINWAIT;
678 	}
679 
680 yield_loop:
681 	count = m->m_yieldloops;
682 	while (count--) {
683 		_sched_yield();
684 		owner = m->m_lock.m_owner;
685 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
686 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
687 			    id | owner)) {
688 				ret = 0;
689 				goto done;
690 			}
691 		}
692 	}
693 
694 sleep_in_kernel:
695 	if (abstime == NULL)
696 		ret = __thr_umutex_lock(&m->m_lock, id);
697 	else if (__predict_false(abstime->tv_nsec < 0 ||
698 	    abstime->tv_nsec >= 1000000000))
699 		ret = EINVAL;
700 	else
701 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
702 done:
703 	if (ret == 0 || ret == EOWNERDEAD) {
704 		enqueue_mutex(curthread, m, ret);
705 		if (ret == EOWNERDEAD)
706 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
707 	}
708 	return (ret);
709 }
710 
711 static inline int
712 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
713     bool cvattach, bool rb_onlist)
714 {
715 	struct pthread *curthread;
716 	int ret, robust;
717 
718 	robust = 0;  /* pacify gcc */
719 	curthread  = _get_curthread();
720 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
721 		THR_CRITICAL_ENTER(curthread);
722 	if (!rb_onlist)
723 		robust = _mutex_enter_robust(curthread, m);
724 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
725 	if (ret == 0 || ret == EOWNERDEAD) {
726 		enqueue_mutex(curthread, m, ret);
727 		if (ret == EOWNERDEAD)
728 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
729 	} else {
730 		ret = mutex_lock_sleep(curthread, m, abstime);
731 	}
732 	if (!rb_onlist && robust)
733 		_mutex_leave_robust(curthread, m);
734 	if (ret != 0 && ret != EOWNERDEAD &&
735 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
736 		THR_CRITICAL_LEAVE(curthread);
737 	return (ret);
738 }
739 
740 int
741 __Tthr_mutex_lock(pthread_mutex_t *mutex)
742 {
743 	struct pthread_mutex *m;
744 	int ret;
745 
746 	_thr_check_init();
747 	ret = check_and_init_mutex(mutex, &m);
748 	if (ret == 0)
749 		ret = mutex_lock_common(m, NULL, false, false);
750 	return (ret);
751 }
752 
753 int
754 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
755     const struct timespec * __restrict abstime)
756 {
757 	struct pthread_mutex *m;
758 	int ret;
759 
760 	_thr_check_init();
761 	ret = check_and_init_mutex(mutex, &m);
762 	if (ret == 0)
763 		ret = mutex_lock_common(m, abstime, false, false);
764 	return (ret);
765 }
766 
767 int
768 _thr_mutex_unlock(pthread_mutex_t *mutex)
769 {
770 	struct pthread_mutex *mp;
771 
772 	if (*mutex == THR_PSHARED_PTR) {
773 		mp = __thr_pshared_offpage(mutex, 0);
774 		if (mp == NULL)
775 			return (EINVAL);
776 		shared_mutex_init(mp, NULL);
777 	} else {
778 		mp = *mutex;
779 	}
780 	return (mutex_unlock_common(mp, false, NULL));
781 }
782 
783 int
784 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
785 {
786 	int error;
787 
788 	error = mutex_lock_common(m, NULL, true, rb_onlist);
789 	if (error == 0 || error == EOWNERDEAD)
790 		m->m_count = count;
791 	return (error);
792 }
793 
794 int
795 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
796 {
797 
798 	/*
799 	 * Clear the count in case this is a recursive mutex.
800 	 */
801 	*count = m->m_count;
802 	m->m_count = 0;
803 	(void)mutex_unlock_common(m, true, defer);
804         return (0);
805 }
806 
807 int
808 _mutex_cv_attach(struct pthread_mutex *m, int count)
809 {
810 	struct pthread *curthread;
811 
812 	curthread = _get_curthread();
813 	enqueue_mutex(curthread, m, 0);
814 	m->m_count = count;
815 	return (0);
816 }
817 
818 int
819 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
820 {
821 	struct pthread *curthread;
822 	int deferred, error;
823 
824 	curthread = _get_curthread();
825 	if ((error = _mutex_owned(curthread, mp)) != 0)
826 		return (error);
827 
828 	/*
829 	 * Clear the count in case this is a recursive mutex.
830 	 */
831 	*recurse = mp->m_count;
832 	mp->m_count = 0;
833 	dequeue_mutex(curthread, mp);
834 
835 	/* Will this happen in real-world ? */
836         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
837 		deferred = 1;
838 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
839 	} else
840 		deferred = 0;
841 
842 	if (deferred)  {
843 		_thr_wake_all(curthread->defer_waiters,
844 		    curthread->nwaiter_defer);
845 		curthread->nwaiter_defer = 0;
846 	}
847 	return (0);
848 }
849 
850 static int
851 mutex_self_trylock(struct pthread_mutex *m)
852 {
853 	int ret;
854 
855 	switch (PMUTEX_TYPE(m->m_flags)) {
856 	case PTHREAD_MUTEX_ERRORCHECK:
857 	case PTHREAD_MUTEX_NORMAL:
858 	case PTHREAD_MUTEX_ADAPTIVE_NP:
859 		ret = EBUSY;
860 		break;
861 
862 	case PTHREAD_MUTEX_RECURSIVE:
863 		/* Increment the lock count: */
864 		if (m->m_count + 1 > 0) {
865 			m->m_count++;
866 			ret = 0;
867 		} else
868 			ret = EAGAIN;
869 		break;
870 
871 	default:
872 		/* Trap invalid mutex types; */
873 		ret = EINVAL;
874 	}
875 
876 	return (ret);
877 }
878 
879 static int
880 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
881 {
882 	struct timespec	ts1, ts2;
883 	int ret;
884 
885 	switch (PMUTEX_TYPE(m->m_flags)) {
886 	case PTHREAD_MUTEX_ERRORCHECK:
887 	case PTHREAD_MUTEX_ADAPTIVE_NP:
888 		if (abstime) {
889 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
890 			    abstime->tv_nsec >= 1000000000) {
891 				ret = EINVAL;
892 			} else {
893 				clock_gettime(CLOCK_REALTIME, &ts1);
894 				TIMESPEC_SUB(&ts2, abstime, &ts1);
895 				__sys_nanosleep(&ts2, NULL);
896 				ret = ETIMEDOUT;
897 			}
898 		} else {
899 			/*
900 			 * POSIX specifies that mutexes should return
901 			 * EDEADLK if a recursive lock is detected.
902 			 */
903 			ret = EDEADLK;
904 		}
905 		break;
906 
907 	case PTHREAD_MUTEX_NORMAL:
908 		/*
909 		 * What SS2 define as a 'normal' mutex.  Intentionally
910 		 * deadlock on attempts to get a lock you already own.
911 		 */
912 		ret = 0;
913 		if (abstime) {
914 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
915 			    abstime->tv_nsec >= 1000000000) {
916 				ret = EINVAL;
917 			} else {
918 				clock_gettime(CLOCK_REALTIME, &ts1);
919 				TIMESPEC_SUB(&ts2, abstime, &ts1);
920 				__sys_nanosleep(&ts2, NULL);
921 				ret = ETIMEDOUT;
922 			}
923 		} else {
924 			ts1.tv_sec = 30;
925 			ts1.tv_nsec = 0;
926 			for (;;)
927 				__sys_nanosleep(&ts1, NULL);
928 		}
929 		break;
930 
931 	case PTHREAD_MUTEX_RECURSIVE:
932 		/* Increment the lock count: */
933 		if (m->m_count + 1 > 0) {
934 			m->m_count++;
935 			ret = 0;
936 		} else
937 			ret = EAGAIN;
938 		break;
939 
940 	default:
941 		/* Trap invalid mutex types; */
942 		ret = EINVAL;
943 	}
944 
945 	return (ret);
946 }
947 
948 static int
949 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
950 {
951 	struct pthread *curthread;
952 	uint32_t id;
953 	int deferred, error, private, robust;
954 
955 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
956 		if (m == THR_MUTEX_DESTROYED)
957 			return (EINVAL);
958 		return (EPERM);
959 	}
960 
961 	curthread = _get_curthread();
962 	id = TID(curthread);
963 
964 	/*
965 	 * Check if the running thread is not the owner of the mutex.
966 	 */
967 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
968 		return (EPERM);
969 
970 	error = 0;
971 	private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
972 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
973 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
974 		m->m_count--;
975 	} else {
976 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
977 			deferred = 1;
978 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
979         	} else
980 			deferred = 0;
981 
982 		robust = _mutex_enter_robust(curthread, m);
983 		dequeue_mutex(curthread, m);
984 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
985 		if (deferred)  {
986 			if (mtx_defer == NULL) {
987 				_thr_wake_all(curthread->defer_waiters,
988 				    curthread->nwaiter_defer);
989 				curthread->nwaiter_defer = 0;
990 			} else
991 				*mtx_defer = 1;
992 		}
993 		if (robust)
994 			_mutex_leave_robust(curthread, m);
995 	}
996 	if (!cv && private)
997 		THR_CRITICAL_LEAVE(curthread);
998 	return (error);
999 }
1000 
1001 int
1002 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1003     int * __restrict prioceiling)
1004 {
1005 	struct pthread_mutex *m;
1006 
1007 	if (*mutex == THR_PSHARED_PTR) {
1008 		m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1009 		if (m == NULL)
1010 			return (EINVAL);
1011 		shared_mutex_init(m, NULL);
1012 	} else {
1013 		m = *mutex;
1014 		if (m <= THR_MUTEX_DESTROYED)
1015 			return (EINVAL);
1016 	}
1017 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1018 		return (EINVAL);
1019 	*prioceiling = m->m_lock.m_ceilings[0];
1020 	return (0);
1021 }
1022 
1023 int
1024 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1025     int ceiling, int * __restrict old_ceiling)
1026 {
1027 	struct pthread *curthread;
1028 	struct pthread_mutex *m, *m1, *m2;
1029 	struct mutex_queue *q, *qp;
1030 	int qidx, ret;
1031 
1032 	if (*mutex == THR_PSHARED_PTR) {
1033 		m = __thr_pshared_offpage(mutex, 0);
1034 		if (m == NULL)
1035 			return (EINVAL);
1036 		shared_mutex_init(m, NULL);
1037 	} else {
1038 		m = *mutex;
1039 		if (m <= THR_MUTEX_DESTROYED)
1040 			return (EINVAL);
1041 	}
1042 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1043 		return (EINVAL);
1044 
1045 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1046 	if (ret != 0)
1047 		return (ret);
1048 
1049 	curthread = _get_curthread();
1050 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1051 		mutex_assert_is_owned(m);
1052 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1053 		m2 = TAILQ_NEXT(m, m_qe);
1054 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1055 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1056 			qidx = mutex_qidx(m);
1057 			q = &curthread->mq[qidx];
1058 			qp = &curthread->mq[qidx + 1];
1059 			TAILQ_REMOVE(q, m, m_qe);
1060 			if (!is_pshared_mutex(m))
1061 				TAILQ_REMOVE(qp, m, m_pqe);
1062 			TAILQ_FOREACH(m2, q, m_qe) {
1063 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1064 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1065 					if (!is_pshared_mutex(m)) {
1066 						while (m2 != NULL &&
1067 						    is_pshared_mutex(m2)) {
1068 							m2 = TAILQ_PREV(m2,
1069 							    mutex_queue, m_qe);
1070 						}
1071 						if (m2 == NULL) {
1072 							TAILQ_INSERT_HEAD(qp,
1073 							    m, m_pqe);
1074 						} else {
1075 							TAILQ_INSERT_BEFORE(m2,
1076 							    m, m_pqe);
1077 						}
1078 					}
1079 					return (0);
1080 				}
1081 			}
1082 			TAILQ_INSERT_TAIL(q, m, m_qe);
1083 			if (!is_pshared_mutex(m))
1084 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1085 		}
1086 	}
1087 	return (0);
1088 }
1089 
1090 int
1091 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1092 {
1093 	struct pthread_mutex *m;
1094 	int ret;
1095 
1096 	ret = check_and_init_mutex(mutex, &m);
1097 	if (ret == 0)
1098 		*count = m->m_spinloops;
1099 	return (ret);
1100 }
1101 
1102 int
1103 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1104 {
1105 	struct pthread_mutex *m;
1106 	int ret;
1107 
1108 	ret = check_and_init_mutex(mutex, &m);
1109 	if (ret == 0)
1110 		m->m_spinloops = count;
1111 	return (ret);
1112 }
1113 
1114 int
1115 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1116 {
1117 	struct pthread_mutex *m;
1118 	int ret;
1119 
1120 	ret = check_and_init_mutex(mutex, &m);
1121 	if (ret == 0)
1122 		*count = m->m_yieldloops;
1123 	return (ret);
1124 }
1125 
1126 int
1127 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1128 {
1129 	struct pthread_mutex *m;
1130 	int ret;
1131 
1132 	ret = check_and_init_mutex(mutex, &m);
1133 	if (ret == 0)
1134 		m->m_yieldloops = count;
1135 	return (0);
1136 }
1137 
1138 int
1139 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1140 {
1141 	struct pthread_mutex *m;
1142 
1143 	if (*mutex == THR_PSHARED_PTR) {
1144 		m = __thr_pshared_offpage(mutex, 0);
1145 		if (m == NULL)
1146 			return (0);
1147 		shared_mutex_init(m, NULL);
1148 	} else {
1149 		m = *mutex;
1150 		if (m <= THR_MUTEX_DESTROYED)
1151 			return (0);
1152 	}
1153 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1154 }
1155 
1156 int
1157 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1158 {
1159 
1160 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1161 		if (mp == THR_MUTEX_DESTROYED)
1162 			return (EINVAL);
1163 		return (EPERM);
1164 	}
1165 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1166 		return (EPERM);
1167 	return (0);
1168 }
1169 
1170 int
1171 _Tthr_mutex_consistent(pthread_mutex_t *mutex)
1172 {
1173 	struct pthread_mutex *m;
1174 	struct pthread *curthread;
1175 
1176 	if (*mutex == THR_PSHARED_PTR) {
1177 		m = __thr_pshared_offpage(mutex, 0);
1178 		if (m == NULL)
1179 			return (EINVAL);
1180 		shared_mutex_init(m, NULL);
1181 	} else {
1182 		m = *mutex;
1183 		if (m <= THR_MUTEX_DESTROYED)
1184 			return (EINVAL);
1185 	}
1186 	curthread = _get_curthread();
1187 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1188 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1189 		return (EINVAL);
1190 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1191 		return (EPERM);
1192 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1193 	return (0);
1194 }
1195