xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision 8aac90f18aef7c9eea906c3ff9a001ca7b94f375)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40 
41 #include "namespace.h"
42 #include <stdlib.h>
43 #include <errno.h>
44 #include <string.h>
45 #include <sys/param.h>
46 #include <sys/queue.h>
47 #include <pthread.h>
48 #include <pthread_np.h>
49 #include "un-namespace.h"
50 
51 #include "thr_private.h"
52 
53 _Static_assert(sizeof(struct pthread_mutex) <= THR_PAGE_SIZE_MIN,
54     "pthread_mutex is too large for off-page");
55 
56 /*
57  * For adaptive mutexes, how many times to spin doing trylock2
58  * before entering the kernel to block
59  */
60 #define MUTEX_ADAPTIVE_SPINS	2000
61 
62 /*
63  * Prototypes
64  */
65 int	__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
66 		const struct timespec * __restrict abstime);
67 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
68 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
69 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
70 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
71 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
72 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
73 
74 static int	mutex_self_trylock(pthread_mutex_t);
75 static int	mutex_self_lock(pthread_mutex_t,
76 				const struct timespec *abstime);
77 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
78 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
79 				const struct timespec *);
80 static void	mutex_init_robust(struct pthread *curthread);
81 static int	mutex_qidx(struct pthread_mutex *m);
82 static bool	is_robust_mutex(struct pthread_mutex *m);
83 static bool	is_pshared_mutex(struct pthread_mutex *m);
84 
85 __weak_reference(__Tthr_mutex_init, pthread_mutex_init);
86 __weak_reference(__Tthr_mutex_init, __pthread_mutex_init);
87 __strong_reference(__Tthr_mutex_init, _pthread_mutex_init);
88 __weak_reference(__Tthr_mutex_lock, pthread_mutex_lock);
89 __weak_reference(__Tthr_mutex_lock, __pthread_mutex_lock);
90 __strong_reference(__Tthr_mutex_lock, _pthread_mutex_lock);
91 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
92 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
93 __weak_reference(__Tthr_mutex_trylock, pthread_mutex_trylock);
94 __weak_reference(__Tthr_mutex_trylock, __pthread_mutex_trylock);
95 __strong_reference(__Tthr_mutex_trylock, _pthread_mutex_trylock);
96 __weak_reference(_Tthr_mutex_consistent, pthread_mutex_consistent);
97 __weak_reference(_Tthr_mutex_consistent, _pthread_mutex_consistent);
98 __strong_reference(_Tthr_mutex_consistent, __pthread_mutex_consistent);
99 
100 /* Single underscore versions provided for libc internal usage: */
101 /* No difference between libc and application usage of these: */
102 __weak_reference(_thr_mutex_destroy, pthread_mutex_destroy);
103 __weak_reference(_thr_mutex_destroy, _pthread_mutex_destroy);
104 __weak_reference(_thr_mutex_unlock, pthread_mutex_unlock);
105 __weak_reference(_thr_mutex_unlock, _pthread_mutex_unlock);
106 
107 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
108 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
109 
110 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
111 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
112 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
113 
114 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
115 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
116 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
117 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
118 
119 static void
120 mutex_init_link(struct pthread_mutex *m __unused)
121 {
122 
123 #if defined(_PTHREADS_INVARIANTS)
124 	m->m_qe.tqe_prev = NULL;
125 	m->m_qe.tqe_next = NULL;
126 	m->m_pqe.tqe_prev = NULL;
127 	m->m_pqe.tqe_next = NULL;
128 #endif
129 }
130 
131 static void
132 mutex_assert_is_owned(struct pthread_mutex *m __unused)
133 {
134 
135 #if defined(_PTHREADS_INVARIANTS)
136 	if (__predict_false(m->m_qe.tqe_prev == NULL))
137 		PANIC("mutex %p own %#x is not on list %p %p",
138 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
139 #endif
140 }
141 
142 static void
143 mutex_assert_not_owned(struct pthread *curthread __unused,
144     struct pthread_mutex *m __unused)
145 {
146 
147 #if defined(_PTHREADS_INVARIANTS)
148 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
149 	    m->m_qe.tqe_next != NULL))
150 		PANIC("mutex %p own %#x is on list %p %p",
151 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
152 	if (__predict_false(is_robust_mutex(m) &&
153 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
154 	    (is_pshared_mutex(m) && curthread->robust_list ==
155 	    (uintptr_t)&m->m_lock) ||
156 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
157 	    (uintptr_t)&m->m_lock))))
158 		PANIC(
159     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
160 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
161 		    m->m_rb_prev, (void *)curthread->robust_list,
162 		    (void *)curthread->priv_robust_list);
163 #endif
164 }
165 
166 static bool
167 is_pshared_mutex(struct pthread_mutex *m)
168 {
169 
170 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
171 }
172 
173 static bool
174 is_robust_mutex(struct pthread_mutex *m)
175 {
176 
177 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
178 }
179 
180 int
181 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
182 {
183 
184 #if defined(_PTHREADS_INVARIANTS)
185 	if (__predict_false(curthread->inact_mtx != 0))
186 		PANIC("inact_mtx enter");
187 #endif
188 	if (!is_robust_mutex(m))
189 		return (0);
190 
191 	mutex_init_robust(curthread);
192 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
193 	return (1);
194 }
195 
196 void
197 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
198 {
199 
200 #if defined(_PTHREADS_INVARIANTS)
201 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
202 		PANIC("inact_mtx leave");
203 #endif
204 	curthread->inact_mtx = 0;
205 }
206 
207 static int
208 mutex_check_attr(const struct pthread_mutex_attr *attr)
209 {
210 
211 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
212 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
213 		return (EINVAL);
214 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
215 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
216 		return (EINVAL);
217 	return (0);
218 }
219 
220 static void
221 mutex_init_robust(struct pthread *curthread)
222 {
223 	struct umtx_robust_lists_params rb;
224 
225 	if (curthread == NULL)
226 		curthread = _get_curthread();
227 	if (curthread->robust_inited)
228 		return;
229 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
230 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
231 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
232 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
233 	curthread->robust_inited = 1;
234 }
235 
236 static void
237 mutex_init_body(struct pthread_mutex *pmutex,
238     const struct pthread_mutex_attr *attr)
239 {
240 
241 	pmutex->m_flags = attr->m_type;
242 	pmutex->m_count = 0;
243 	pmutex->m_spinloops = 0;
244 	pmutex->m_yieldloops = 0;
245 	mutex_init_link(pmutex);
246 	switch (attr->m_protocol) {
247 	case PTHREAD_PRIO_NONE:
248 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
249 		pmutex->m_lock.m_flags = 0;
250 		break;
251 	case PTHREAD_PRIO_INHERIT:
252 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
253 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
254 		break;
255 	case PTHREAD_PRIO_PROTECT:
256 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
257 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
258 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
259 		break;
260 	}
261 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
262 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
263 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
264 		mutex_init_robust(NULL);
265 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
266 	}
267 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
268 		pmutex->m_spinloops =
269 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
270 		pmutex->m_yieldloops = _thr_yieldloops;
271 	}
272 }
273 
274 static int
275 mutex_init(pthread_mutex_t *mutex,
276     const struct pthread_mutex_attr *mutex_attr,
277     void *(calloc_cb)(size_t, size_t))
278 {
279 	const struct pthread_mutex_attr *attr;
280 	struct pthread_mutex *pmutex;
281 	int error;
282 
283 	if (mutex_attr == NULL) {
284 		attr = &_pthread_mutexattr_default;
285 	} else {
286 		attr = mutex_attr;
287 		error = mutex_check_attr(attr);
288 		if (error != 0)
289 			return (error);
290 	}
291 	if ((pmutex = (pthread_mutex_t)calloc_cb(1,
292 	    sizeof(struct pthread_mutex))) == NULL)
293 		return (ENOMEM);
294 	mutex_init_body(pmutex, attr);
295 	*mutex = pmutex;
296 	return (0);
297 }
298 
299 static int
300 init_static(struct pthread *thread, pthread_mutex_t *mutex)
301 {
302 	int ret;
303 
304 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
305 
306 	if (*mutex == THR_MUTEX_INITIALIZER)
307 		ret = mutex_init(mutex, &_pthread_mutexattr_default,
308 		    __thr_calloc);
309 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
310 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
311 		    __thr_calloc);
312 	else
313 		ret = 0;
314 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
315 
316 	return (ret);
317 }
318 
319 static void
320 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
321 {
322 	struct pthread_mutex *m2;
323 
324 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
325 	if (m2 != NULL)
326 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
327 	else
328 		m->m_lock.m_ceilings[1] = -1;
329 }
330 
331 static void
332 shared_mutex_init(struct pthread_mutex *pmtx, const struct
333     pthread_mutex_attr *mutex_attr)
334 {
335 	static const struct pthread_mutex_attr foobar_mutex_attr = {
336 		.m_type = PTHREAD_MUTEX_DEFAULT,
337 		.m_protocol = PTHREAD_PRIO_NONE,
338 		.m_ceiling = 0,
339 		.m_pshared = PTHREAD_PROCESS_SHARED,
340 		.m_robust = PTHREAD_MUTEX_STALLED,
341 	};
342 	bool done;
343 
344 	/*
345 	 * Hack to allow multiple pthread_mutex_init() calls on the
346 	 * same process-shared mutex.  We rely on kernel allocating
347 	 * zeroed offpage for the mutex, i.e. the
348 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
349 	 */
350 	for (done = false; !done;) {
351 		switch (pmtx->m_ps) {
352 		case PMUTEX_INITSTAGE_DONE:
353 			atomic_thread_fence_acq();
354 			done = true;
355 			break;
356 		case PMUTEX_INITSTAGE_ALLOC:
357 			if (atomic_cmpset_int(&pmtx->m_ps,
358 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
359 				if (mutex_attr == NULL)
360 					mutex_attr = &foobar_mutex_attr;
361 				mutex_init_body(pmtx, mutex_attr);
362 				atomic_store_rel_int(&pmtx->m_ps,
363 				    PMUTEX_INITSTAGE_DONE);
364 				done = true;
365 			}
366 			break;
367 		case PMUTEX_INITSTAGE_BUSY:
368 			_pthread_yield();
369 			break;
370 		default:
371 			PANIC("corrupted offpage");
372 			break;
373 		}
374 	}
375 }
376 
377 int
378 __Tthr_mutex_init(pthread_mutex_t * __restrict mutex,
379     const pthread_mutexattr_t * __restrict mutex_attr)
380 {
381 	struct pthread_mutex *pmtx;
382 	int ret;
383 
384 	_thr_check_init();
385 
386 	if (mutex_attr != NULL) {
387 		ret = mutex_check_attr(*mutex_attr);
388 		if (ret != 0)
389 			return (ret);
390 	}
391 	if (mutex_attr == NULL ||
392 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
393 		__thr_malloc_init();
394 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
395 		    __thr_calloc));
396 	}
397 	pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
398 	if (pmtx == NULL)
399 		return (EFAULT);
400 	*mutex = THR_PSHARED_PTR;
401 	shared_mutex_init(pmtx, *mutex_attr);
402 	return (0);
403 }
404 
405 /* This function is used internally by malloc. */
406 int
407 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
408     void *(calloc_cb)(size_t, size_t))
409 {
410 	static const struct pthread_mutex_attr attr = {
411 		.m_type = PTHREAD_MUTEX_NORMAL,
412 		.m_protocol = PTHREAD_PRIO_NONE,
413 		.m_ceiling = 0,
414 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
415 		.m_robust = PTHREAD_MUTEX_STALLED,
416 	};
417 	int ret;
418 
419 	ret = mutex_init(mutex, &attr, calloc_cb);
420 	if (ret == 0)
421 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
422 	return (ret);
423 }
424 
425 /*
426  * Fix mutex ownership for child process.
427  *
428  * Process private mutex ownership is transmitted from the forking
429  * thread to the child process.
430  *
431  * Process shared mutex should not be inherited because owner is
432  * forking thread which is in parent process, they are removed from
433  * the owned mutex list.
434  */
435 static void
436 queue_fork(struct pthread *curthread, struct mutex_queue *q,
437     struct mutex_queue *qp, uint bit)
438 {
439 	struct pthread_mutex *m;
440 
441 	TAILQ_INIT(q);
442 	TAILQ_FOREACH(m, qp, m_pqe) {
443 		TAILQ_INSERT_TAIL(q, m, m_qe);
444 		m->m_lock.m_owner = TID(curthread) | bit;
445 	}
446 }
447 
448 void
449 _mutex_fork(struct pthread *curthread)
450 {
451 
452 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
453 	    &curthread->mq[TMQ_NORM_PRIV], 0);
454 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
455 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
456 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
457 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
458 	curthread->robust_list = 0;
459 }
460 
461 int
462 _thr_mutex_destroy(pthread_mutex_t *mutex)
463 {
464 	pthread_mutex_t m, m1;
465 	int ret;
466 
467 	m = *mutex;
468 	if (m < THR_MUTEX_DESTROYED) {
469 		ret = 0;
470 	} else if (m == THR_MUTEX_DESTROYED) {
471 		ret = EINVAL;
472 	} else {
473 		if (m == THR_PSHARED_PTR) {
474 			m1 = __thr_pshared_offpage(mutex, 0);
475 			if (m1 != NULL) {
476 				if ((uint32_t)m1->m_lock.m_owner !=
477 				    UMUTEX_RB_OWNERDEAD) {
478 					mutex_assert_not_owned(
479 					    _get_curthread(), m1);
480 				}
481 				__thr_pshared_destroy(mutex);
482 			}
483 			*mutex = THR_MUTEX_DESTROYED;
484 			return (0);
485 		}
486 		if (PMUTEX_OWNER_ID(m) != 0 &&
487 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
488 			ret = EBUSY;
489 		} else {
490 			*mutex = THR_MUTEX_DESTROYED;
491 			mutex_assert_not_owned(_get_curthread(), m);
492 			__thr_free(m);
493 			ret = 0;
494 		}
495 	}
496 
497 	return (ret);
498 }
499 
500 static int
501 mutex_qidx(struct pthread_mutex *m)
502 {
503 
504 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
505 		return (TMQ_NORM);
506 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
507 }
508 
509 /*
510  * Both enqueue_mutex() and dequeue_mutex() operate on the
511  * thread-private linkage of the locked mutexes and on the robust
512  * linkage.
513  *
514  * Robust list, as seen by kernel, must be consistent even in the case
515  * of thread termination at arbitrary moment.  Since either enqueue or
516  * dequeue for list walked by kernel consists of rewriting a single
517  * forward pointer, it is safe.  On the other hand, rewrite of the
518  * back pointer is not atomic WRT the forward one, but kernel does not
519  * care.
520  */
521 static void
522 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
523     int error)
524 {
525 	struct pthread_mutex *m1;
526 	uintptr_t *rl;
527 	int qidx;
528 
529 	/* Add to the list of owned mutexes: */
530 	if (error != EOWNERDEAD)
531 		mutex_assert_not_owned(curthread, m);
532 	qidx = mutex_qidx(m);
533 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
534 	if (!is_pshared_mutex(m))
535 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
536 	if (is_robust_mutex(m)) {
537 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
538 		    &curthread->priv_robust_list;
539 		m->m_rb_prev = NULL;
540 		if (*rl != 0) {
541 			m1 = __containerof((void *)*rl,
542 			    struct pthread_mutex, m_lock);
543 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
544 			m1->m_rb_prev = m;
545 		} else {
546 			m1 = NULL;
547 			m->m_lock.m_rb_lnk = 0;
548 		}
549 		*rl = (uintptr_t)&m->m_lock;
550 	}
551 }
552 
553 static void
554 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
555 {
556 	struct pthread_mutex *mp, *mn;
557 	int qidx;
558 
559 	mutex_assert_is_owned(m);
560 	qidx = mutex_qidx(m);
561 	if (is_robust_mutex(m)) {
562 		mp = m->m_rb_prev;
563 		if (mp == NULL) {
564 			if (is_pshared_mutex(m)) {
565 				curthread->robust_list = m->m_lock.m_rb_lnk;
566 			} else {
567 				curthread->priv_robust_list =
568 				    m->m_lock.m_rb_lnk;
569 			}
570 		} else {
571 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
572 		}
573 		if (m->m_lock.m_rb_lnk != 0) {
574 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
575 			    struct pthread_mutex, m_lock);
576 			mn->m_rb_prev = m->m_rb_prev;
577 		}
578 		m->m_lock.m_rb_lnk = 0;
579 		m->m_rb_prev = NULL;
580 	}
581 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
582 	if (!is_pshared_mutex(m))
583 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
584 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
585 		set_inherited_priority(curthread, m);
586 	mutex_init_link(m);
587 }
588 
589 static int
590 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
591 {
592 	int ret;
593 
594 	*m = *mutex;
595 	ret = 0;
596 	if (__predict_false(*m == THR_PSHARED_PTR)) {
597 		*m = __thr_pshared_offpage(mutex, 0);
598 		if (*m == NULL)
599 			ret = EINVAL;
600 		else
601 			shared_mutex_init(*m, NULL);
602 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
603 		if (*m == THR_MUTEX_DESTROYED) {
604 			ret = EINVAL;
605 		} else {
606 			ret = init_static(_get_curthread(), mutex);
607 			if (ret == 0)
608 				*m = *mutex;
609 		}
610 	}
611 	return (ret);
612 }
613 
614 int
615 __Tthr_mutex_trylock(pthread_mutex_t *mutex)
616 {
617 	struct pthread *curthread;
618 	struct pthread_mutex *m;
619 	uint32_t id;
620 	int ret, robust;
621 
622 	ret = check_and_init_mutex(mutex, &m);
623 	if (ret != 0)
624 		return (ret);
625 	curthread = _get_curthread();
626 	id = TID(curthread);
627 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
628 		THR_CRITICAL_ENTER(curthread);
629 	robust = _mutex_enter_robust(curthread, m);
630 	ret = _thr_umutex_trylock(&m->m_lock, id);
631 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
632 		enqueue_mutex(curthread, m, ret);
633 		if (ret == EOWNERDEAD)
634 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
635 	} else if (PMUTEX_OWNER_ID(m) == id) {
636 		ret = mutex_self_trylock(m);
637 	} /* else {} */
638 	if (robust)
639 		_mutex_leave_robust(curthread, m);
640 	if (ret != 0 && ret != EOWNERDEAD &&
641 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
642 		THR_CRITICAL_LEAVE(curthread);
643 	return (ret);
644 }
645 
646 static int
647 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
648     const struct timespec *abstime)
649 {
650 	uint32_t id, owner;
651 	int count, ret;
652 
653 	id = TID(curthread);
654 	if (PMUTEX_OWNER_ID(m) == id)
655 		return (mutex_self_lock(m, abstime));
656 
657 	/*
658 	 * For adaptive mutexes, spin for a bit in the expectation
659 	 * that if the application requests this mutex type then
660 	 * the lock is likely to be released quickly and it is
661 	 * faster than entering the kernel
662 	 */
663 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
664 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
665 		goto sleep_in_kernel;
666 
667 	if (!_thr_is_smp)
668 		goto yield_loop;
669 
670 	count = m->m_spinloops;
671 	while (count--) {
672 		owner = m->m_lock.m_owner;
673 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
674 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
675 			    id | owner)) {
676 				ret = 0;
677 				goto done;
678 			}
679 		}
680 		CPU_SPINWAIT;
681 	}
682 
683 yield_loop:
684 	count = m->m_yieldloops;
685 	while (count--) {
686 		_sched_yield();
687 		owner = m->m_lock.m_owner;
688 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
689 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
690 			    id | owner)) {
691 				ret = 0;
692 				goto done;
693 			}
694 		}
695 	}
696 
697 sleep_in_kernel:
698 	if (abstime == NULL)
699 		ret = __thr_umutex_lock(&m->m_lock, id);
700 	else if (__predict_false(abstime->tv_nsec < 0 ||
701 	    abstime->tv_nsec >= 1000000000))
702 		ret = EINVAL;
703 	else
704 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
705 done:
706 	if (ret == 0 || ret == EOWNERDEAD) {
707 		enqueue_mutex(curthread, m, ret);
708 		if (ret == EOWNERDEAD)
709 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
710 	}
711 	return (ret);
712 }
713 
714 static __always_inline int
715 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
716     bool cvattach, bool rb_onlist)
717 {
718 	struct pthread *curthread;
719 	int ret, robust;
720 
721 	robust = 0;  /* pacify gcc */
722 	curthread  = _get_curthread();
723 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
724 		THR_CRITICAL_ENTER(curthread);
725 	if (!rb_onlist)
726 		robust = _mutex_enter_robust(curthread, m);
727 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
728 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
729 		enqueue_mutex(curthread, m, ret);
730 		if (ret == EOWNERDEAD)
731 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
732 	} else {
733 		ret = mutex_lock_sleep(curthread, m, abstime);
734 	}
735 	if (!rb_onlist && robust)
736 		_mutex_leave_robust(curthread, m);
737 	if (ret != 0 && ret != EOWNERDEAD &&
738 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
739 		THR_CRITICAL_LEAVE(curthread);
740 	return (ret);
741 }
742 
743 int
744 __Tthr_mutex_lock(pthread_mutex_t *mutex)
745 {
746 	struct pthread_mutex *m;
747 	int ret;
748 
749 	_thr_check_init();
750 	ret = check_and_init_mutex(mutex, &m);
751 	if (ret == 0)
752 		ret = mutex_lock_common(m, NULL, false, false);
753 	return (ret);
754 }
755 
756 int
757 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
758     const struct timespec * __restrict abstime)
759 {
760 	struct pthread_mutex *m;
761 	int ret;
762 
763 	_thr_check_init();
764 	ret = check_and_init_mutex(mutex, &m);
765 	if (ret == 0)
766 		ret = mutex_lock_common(m, abstime, false, false);
767 	return (ret);
768 }
769 
770 int
771 _thr_mutex_unlock(pthread_mutex_t *mutex)
772 {
773 	struct pthread_mutex *mp;
774 
775 	if (*mutex == THR_PSHARED_PTR) {
776 		mp = __thr_pshared_offpage(mutex, 0);
777 		if (mp == NULL)
778 			return (EINVAL);
779 		shared_mutex_init(mp, NULL);
780 	} else {
781 		mp = *mutex;
782 	}
783 	return (mutex_unlock_common(mp, false, NULL));
784 }
785 
786 int
787 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
788 {
789 	int error;
790 
791 	error = mutex_lock_common(m, NULL, true, rb_onlist);
792 	if (error == 0 || error == EOWNERDEAD)
793 		m->m_count = count;
794 	return (error);
795 }
796 
797 int
798 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
799 {
800 
801 	/*
802 	 * Clear the count in case this is a recursive mutex.
803 	 */
804 	*count = m->m_count;
805 	m->m_count = 0;
806 	(void)mutex_unlock_common(m, true, defer);
807         return (0);
808 }
809 
810 int
811 _mutex_cv_attach(struct pthread_mutex *m, int count)
812 {
813 	struct pthread *curthread;
814 
815 	curthread = _get_curthread();
816 	enqueue_mutex(curthread, m, 0);
817 	m->m_count = count;
818 	return (0);
819 }
820 
821 int
822 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
823 {
824 	struct pthread *curthread;
825 	int deferred, error;
826 
827 	curthread = _get_curthread();
828 	if ((error = _mutex_owned(curthread, mp)) != 0)
829 		return (error);
830 
831 	/*
832 	 * Clear the count in case this is a recursive mutex.
833 	 */
834 	*recurse = mp->m_count;
835 	mp->m_count = 0;
836 	dequeue_mutex(curthread, mp);
837 
838 	/* Will this happen in real-world ? */
839         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
840 		deferred = 1;
841 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
842 	} else
843 		deferred = 0;
844 
845 	if (deferred)  {
846 		_thr_wake_all(curthread->defer_waiters,
847 		    curthread->nwaiter_defer);
848 		curthread->nwaiter_defer = 0;
849 	}
850 	return (0);
851 }
852 
853 static int
854 mutex_self_trylock(struct pthread_mutex *m)
855 {
856 	int ret;
857 
858 	switch (PMUTEX_TYPE(m->m_flags)) {
859 	case PTHREAD_MUTEX_ERRORCHECK:
860 	case PTHREAD_MUTEX_NORMAL:
861 	case PTHREAD_MUTEX_ADAPTIVE_NP:
862 		ret = EBUSY;
863 		break;
864 
865 	case PTHREAD_MUTEX_RECURSIVE:
866 		/* Increment the lock count: */
867 		if (m->m_count + 1 > 0) {
868 			m->m_count++;
869 			ret = 0;
870 		} else
871 			ret = EAGAIN;
872 		break;
873 
874 	default:
875 		/* Trap invalid mutex types; */
876 		ret = EINVAL;
877 	}
878 
879 	return (ret);
880 }
881 
882 static int
883 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
884 {
885 	struct timespec	ts1, ts2;
886 	int ret;
887 
888 	switch (PMUTEX_TYPE(m->m_flags)) {
889 	case PTHREAD_MUTEX_ERRORCHECK:
890 	case PTHREAD_MUTEX_ADAPTIVE_NP:
891 		if (abstime) {
892 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
893 			    abstime->tv_nsec >= 1000000000) {
894 				ret = EINVAL;
895 			} else {
896 				clock_gettime(CLOCK_REALTIME, &ts1);
897 				TIMESPEC_SUB(&ts2, abstime, &ts1);
898 				__sys_nanosleep(&ts2, NULL);
899 				ret = ETIMEDOUT;
900 			}
901 		} else {
902 			/*
903 			 * POSIX specifies that mutexes should return
904 			 * EDEADLK if a recursive lock is detected.
905 			 */
906 			ret = EDEADLK;
907 		}
908 		break;
909 
910 	case PTHREAD_MUTEX_NORMAL:
911 		/*
912 		 * What SS2 define as a 'normal' mutex.  Intentionally
913 		 * deadlock on attempts to get a lock you already own.
914 		 */
915 		ret = 0;
916 		if (abstime) {
917 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
918 			    abstime->tv_nsec >= 1000000000) {
919 				ret = EINVAL;
920 			} else {
921 				clock_gettime(CLOCK_REALTIME, &ts1);
922 				TIMESPEC_SUB(&ts2, abstime, &ts1);
923 				__sys_nanosleep(&ts2, NULL);
924 				ret = ETIMEDOUT;
925 			}
926 		} else {
927 			ts1.tv_sec = 30;
928 			ts1.tv_nsec = 0;
929 			for (;;)
930 				__sys_nanosleep(&ts1, NULL);
931 		}
932 		break;
933 
934 	case PTHREAD_MUTEX_RECURSIVE:
935 		/* Increment the lock count: */
936 		if (m->m_count + 1 > 0) {
937 			m->m_count++;
938 			ret = 0;
939 		} else
940 			ret = EAGAIN;
941 		break;
942 
943 	default:
944 		/* Trap invalid mutex types; */
945 		ret = EINVAL;
946 	}
947 
948 	return (ret);
949 }
950 
951 static __always_inline int
952 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
953 {
954 	struct pthread *curthread;
955 	uint32_t id;
956 	int deferred, error, private, robust;
957 
958 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
959 		if (m == THR_MUTEX_DESTROYED)
960 			return (EINVAL);
961 		return (EPERM);
962 	}
963 
964 	curthread = _get_curthread();
965 	id = TID(curthread);
966 
967 	/*
968 	 * Check if the running thread is not the owner of the mutex.
969 	 */
970 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
971 		return (EPERM);
972 
973 	error = 0;
974 	private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
975 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
976 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
977 		m->m_count--;
978 	} else {
979 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
980 			deferred = 1;
981 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
982         	} else
983 			deferred = 0;
984 
985 		robust = _mutex_enter_robust(curthread, m);
986 		dequeue_mutex(curthread, m);
987 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
988 		if (deferred)  {
989 			if (mtx_defer == NULL) {
990 				_thr_wake_all(curthread->defer_waiters,
991 				    curthread->nwaiter_defer);
992 				curthread->nwaiter_defer = 0;
993 			} else
994 				*mtx_defer = 1;
995 		}
996 		if (robust)
997 			_mutex_leave_robust(curthread, m);
998 	}
999 	if (!cv && private)
1000 		THR_CRITICAL_LEAVE(curthread);
1001 	return (error);
1002 }
1003 
1004 int
1005 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1006     int * __restrict prioceiling)
1007 {
1008 	struct pthread_mutex *m;
1009 
1010 	if (*mutex == THR_PSHARED_PTR) {
1011 		m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1012 		if (m == NULL)
1013 			return (EINVAL);
1014 		shared_mutex_init(m, NULL);
1015 	} else {
1016 		m = *mutex;
1017 		if (m <= THR_MUTEX_DESTROYED)
1018 			return (EINVAL);
1019 	}
1020 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1021 		return (EINVAL);
1022 	*prioceiling = m->m_lock.m_ceilings[0];
1023 	return (0);
1024 }
1025 
1026 int
1027 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1028     int ceiling, int * __restrict old_ceiling)
1029 {
1030 	struct pthread *curthread;
1031 	struct pthread_mutex *m, *m1, *m2;
1032 	struct mutex_queue *q, *qp;
1033 	int qidx, ret;
1034 
1035 	if (*mutex == THR_PSHARED_PTR) {
1036 		m = __thr_pshared_offpage(mutex, 0);
1037 		if (m == NULL)
1038 			return (EINVAL);
1039 		shared_mutex_init(m, NULL);
1040 	} else {
1041 		m = *mutex;
1042 		if (m <= THR_MUTEX_DESTROYED)
1043 			return (EINVAL);
1044 	}
1045 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1046 		return (EINVAL);
1047 
1048 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1049 	if (ret != 0)
1050 		return (ret);
1051 
1052 	curthread = _get_curthread();
1053 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1054 		mutex_assert_is_owned(m);
1055 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1056 		m2 = TAILQ_NEXT(m, m_qe);
1057 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1058 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1059 			qidx = mutex_qidx(m);
1060 			q = &curthread->mq[qidx];
1061 			qp = &curthread->mq[qidx + 1];
1062 			TAILQ_REMOVE(q, m, m_qe);
1063 			if (!is_pshared_mutex(m))
1064 				TAILQ_REMOVE(qp, m, m_pqe);
1065 			TAILQ_FOREACH(m2, q, m_qe) {
1066 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1067 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1068 					if (!is_pshared_mutex(m)) {
1069 						while (m2 != NULL &&
1070 						    is_pshared_mutex(m2)) {
1071 							m2 = TAILQ_PREV(m2,
1072 							    mutex_queue, m_qe);
1073 						}
1074 						if (m2 == NULL) {
1075 							TAILQ_INSERT_HEAD(qp,
1076 							    m, m_pqe);
1077 						} else {
1078 							TAILQ_INSERT_BEFORE(m2,
1079 							    m, m_pqe);
1080 						}
1081 					}
1082 					return (0);
1083 				}
1084 			}
1085 			TAILQ_INSERT_TAIL(q, m, m_qe);
1086 			if (!is_pshared_mutex(m))
1087 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1088 		}
1089 	}
1090 	return (0);
1091 }
1092 
1093 int
1094 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1095 {
1096 	struct pthread_mutex *m;
1097 	int ret;
1098 
1099 	ret = check_and_init_mutex(mutex, &m);
1100 	if (ret == 0)
1101 		*count = m->m_spinloops;
1102 	return (ret);
1103 }
1104 
1105 int
1106 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1107 {
1108 	struct pthread_mutex *m;
1109 	int ret;
1110 
1111 	ret = check_and_init_mutex(mutex, &m);
1112 	if (ret == 0)
1113 		m->m_spinloops = count;
1114 	return (ret);
1115 }
1116 
1117 int
1118 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1119 {
1120 	struct pthread_mutex *m;
1121 	int ret;
1122 
1123 	ret = check_and_init_mutex(mutex, &m);
1124 	if (ret == 0)
1125 		*count = m->m_yieldloops;
1126 	return (ret);
1127 }
1128 
1129 int
1130 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1131 {
1132 	struct pthread_mutex *m;
1133 	int ret;
1134 
1135 	ret = check_and_init_mutex(mutex, &m);
1136 	if (ret == 0)
1137 		m->m_yieldloops = count;
1138 	return (0);
1139 }
1140 
1141 int
1142 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1143 {
1144 	struct pthread_mutex *m;
1145 
1146 	if (*mutex == THR_PSHARED_PTR) {
1147 		m = __thr_pshared_offpage(mutex, 0);
1148 		if (m == NULL)
1149 			return (0);
1150 		shared_mutex_init(m, NULL);
1151 	} else {
1152 		m = *mutex;
1153 		if (m <= THR_MUTEX_DESTROYED)
1154 			return (0);
1155 	}
1156 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1157 }
1158 
1159 int
1160 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1161 {
1162 
1163 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1164 		if (mp == THR_MUTEX_DESTROYED)
1165 			return (EINVAL);
1166 		return (EPERM);
1167 	}
1168 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1169 		return (EPERM);
1170 	return (0);
1171 }
1172 
1173 int
1174 _Tthr_mutex_consistent(pthread_mutex_t *mutex)
1175 {
1176 	struct pthread_mutex *m;
1177 	struct pthread *curthread;
1178 
1179 	if (*mutex == THR_PSHARED_PTR) {
1180 		m = __thr_pshared_offpage(mutex, 0);
1181 		if (m == NULL)
1182 			return (EINVAL);
1183 		shared_mutex_init(m, NULL);
1184 	} else {
1185 		m = *mutex;
1186 		if (m <= THR_MUTEX_DESTROYED)
1187 			return (EINVAL);
1188 	}
1189 	curthread = _get_curthread();
1190 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1191 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1192 		return (EINVAL);
1193 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1194 		return (EPERM);
1195 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1196 	return (0);
1197 }
1198