xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision b23dbabb7f3edb3f323a64f03e37be2c9a8b2a45)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include "namespace.h"
45 #include <stdlib.h>
46 #include <errno.h>
47 #include <string.h>
48 #include <sys/param.h>
49 #include <sys/queue.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include "un-namespace.h"
53 
54 #include "thr_private.h"
55 
56 _Static_assert(sizeof(struct pthread_mutex) <= THR_PAGE_SIZE_MIN,
57     "pthread_mutex is too large for off-page");
58 
59 /*
60  * For adaptive mutexes, how many times to spin doing trylock2
61  * before entering the kernel to block
62  */
63 #define MUTEX_ADAPTIVE_SPINS	2000
64 
65 /*
66  * Prototypes
67  */
68 int	__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
69 		const struct timespec * __restrict abstime);
70 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
71 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
72 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
73 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
74 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
75 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
76 
77 static int	mutex_self_trylock(pthread_mutex_t);
78 static int	mutex_self_lock(pthread_mutex_t,
79 				const struct timespec *abstime);
80 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
81 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
82 				const struct timespec *);
83 static void	mutex_init_robust(struct pthread *curthread);
84 static int	mutex_qidx(struct pthread_mutex *m);
85 static bool	is_robust_mutex(struct pthread_mutex *m);
86 static bool	is_pshared_mutex(struct pthread_mutex *m);
87 
88 __weak_reference(__Tthr_mutex_init, pthread_mutex_init);
89 __weak_reference(__Tthr_mutex_init, __pthread_mutex_init);
90 __strong_reference(__Tthr_mutex_init, _pthread_mutex_init);
91 __weak_reference(__Tthr_mutex_lock, pthread_mutex_lock);
92 __weak_reference(__Tthr_mutex_lock, __pthread_mutex_lock);
93 __strong_reference(__Tthr_mutex_lock, _pthread_mutex_lock);
94 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
95 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
96 __weak_reference(__Tthr_mutex_trylock, pthread_mutex_trylock);
97 __weak_reference(__Tthr_mutex_trylock, __pthread_mutex_trylock);
98 __strong_reference(__Tthr_mutex_trylock, _pthread_mutex_trylock);
99 __weak_reference(_Tthr_mutex_consistent, pthread_mutex_consistent);
100 __weak_reference(_Tthr_mutex_consistent, _pthread_mutex_consistent);
101 __strong_reference(_Tthr_mutex_consistent, __pthread_mutex_consistent);
102 
103 /* Single underscore versions provided for libc internal usage: */
104 /* No difference between libc and application usage of these: */
105 __weak_reference(_thr_mutex_destroy, pthread_mutex_destroy);
106 __weak_reference(_thr_mutex_destroy, _pthread_mutex_destroy);
107 __weak_reference(_thr_mutex_unlock, pthread_mutex_unlock);
108 __weak_reference(_thr_mutex_unlock, _pthread_mutex_unlock);
109 
110 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
111 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
112 
113 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
114 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
115 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
116 
117 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
118 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
119 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
120 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
121 
122 static void
123 mutex_init_link(struct pthread_mutex *m __unused)
124 {
125 
126 #if defined(_PTHREADS_INVARIANTS)
127 	m->m_qe.tqe_prev = NULL;
128 	m->m_qe.tqe_next = NULL;
129 	m->m_pqe.tqe_prev = NULL;
130 	m->m_pqe.tqe_next = NULL;
131 #endif
132 }
133 
134 static void
135 mutex_assert_is_owned(struct pthread_mutex *m __unused)
136 {
137 
138 #if defined(_PTHREADS_INVARIANTS)
139 	if (__predict_false(m->m_qe.tqe_prev == NULL))
140 		PANIC("mutex %p own %#x is not on list %p %p",
141 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
142 #endif
143 }
144 
145 static void
146 mutex_assert_not_owned(struct pthread *curthread __unused,
147     struct pthread_mutex *m __unused)
148 {
149 
150 #if defined(_PTHREADS_INVARIANTS)
151 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
152 	    m->m_qe.tqe_next != NULL))
153 		PANIC("mutex %p own %#x is on list %p %p",
154 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
155 	if (__predict_false(is_robust_mutex(m) &&
156 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
157 	    (is_pshared_mutex(m) && curthread->robust_list ==
158 	    (uintptr_t)&m->m_lock) ||
159 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
160 	    (uintptr_t)&m->m_lock))))
161 		PANIC(
162     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
163 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
164 		    m->m_rb_prev, (void *)curthread->robust_list,
165 		    (void *)curthread->priv_robust_list);
166 #endif
167 }
168 
169 static bool
170 is_pshared_mutex(struct pthread_mutex *m)
171 {
172 
173 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
174 }
175 
176 static bool
177 is_robust_mutex(struct pthread_mutex *m)
178 {
179 
180 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
181 }
182 
183 int
184 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
185 {
186 
187 #if defined(_PTHREADS_INVARIANTS)
188 	if (__predict_false(curthread->inact_mtx != 0))
189 		PANIC("inact_mtx enter");
190 #endif
191 	if (!is_robust_mutex(m))
192 		return (0);
193 
194 	mutex_init_robust(curthread);
195 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
196 	return (1);
197 }
198 
199 void
200 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
201 {
202 
203 #if defined(_PTHREADS_INVARIANTS)
204 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
205 		PANIC("inact_mtx leave");
206 #endif
207 	curthread->inact_mtx = 0;
208 }
209 
210 static int
211 mutex_check_attr(const struct pthread_mutex_attr *attr)
212 {
213 
214 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
215 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
216 		return (EINVAL);
217 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
218 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
219 		return (EINVAL);
220 	return (0);
221 }
222 
223 static void
224 mutex_init_robust(struct pthread *curthread)
225 {
226 	struct umtx_robust_lists_params rb;
227 
228 	if (curthread == NULL)
229 		curthread = _get_curthread();
230 	if (curthread->robust_inited)
231 		return;
232 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
233 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
234 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
235 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
236 	curthread->robust_inited = 1;
237 }
238 
239 static void
240 mutex_init_body(struct pthread_mutex *pmutex,
241     const struct pthread_mutex_attr *attr)
242 {
243 
244 	pmutex->m_flags = attr->m_type;
245 	pmutex->m_count = 0;
246 	pmutex->m_spinloops = 0;
247 	pmutex->m_yieldloops = 0;
248 	mutex_init_link(pmutex);
249 	switch (attr->m_protocol) {
250 	case PTHREAD_PRIO_NONE:
251 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
252 		pmutex->m_lock.m_flags = 0;
253 		break;
254 	case PTHREAD_PRIO_INHERIT:
255 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
256 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
257 		break;
258 	case PTHREAD_PRIO_PROTECT:
259 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
260 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
261 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
262 		break;
263 	}
264 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
265 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
266 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
267 		mutex_init_robust(NULL);
268 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
269 	}
270 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
271 		pmutex->m_spinloops =
272 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
273 		pmutex->m_yieldloops = _thr_yieldloops;
274 	}
275 }
276 
277 static int
278 mutex_init(pthread_mutex_t *mutex,
279     const struct pthread_mutex_attr *mutex_attr,
280     void *(calloc_cb)(size_t, size_t))
281 {
282 	const struct pthread_mutex_attr *attr;
283 	struct pthread_mutex *pmutex;
284 	int error;
285 
286 	if (mutex_attr == NULL) {
287 		attr = &_pthread_mutexattr_default;
288 	} else {
289 		attr = mutex_attr;
290 		error = mutex_check_attr(attr);
291 		if (error != 0)
292 			return (error);
293 	}
294 	if ((pmutex = (pthread_mutex_t)calloc_cb(1,
295 	    sizeof(struct pthread_mutex))) == NULL)
296 		return (ENOMEM);
297 	mutex_init_body(pmutex, attr);
298 	*mutex = pmutex;
299 	return (0);
300 }
301 
302 static int
303 init_static(struct pthread *thread, pthread_mutex_t *mutex)
304 {
305 	int ret;
306 
307 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
308 
309 	if (*mutex == THR_MUTEX_INITIALIZER)
310 		ret = mutex_init(mutex, &_pthread_mutexattr_default,
311 		    __thr_calloc);
312 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
313 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
314 		    __thr_calloc);
315 	else
316 		ret = 0;
317 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
318 
319 	return (ret);
320 }
321 
322 static void
323 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
324 {
325 	struct pthread_mutex *m2;
326 
327 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
328 	if (m2 != NULL)
329 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
330 	else
331 		m->m_lock.m_ceilings[1] = -1;
332 }
333 
334 static void
335 shared_mutex_init(struct pthread_mutex *pmtx, const struct
336     pthread_mutex_attr *mutex_attr)
337 {
338 	static const struct pthread_mutex_attr foobar_mutex_attr = {
339 		.m_type = PTHREAD_MUTEX_DEFAULT,
340 		.m_protocol = PTHREAD_PRIO_NONE,
341 		.m_ceiling = 0,
342 		.m_pshared = PTHREAD_PROCESS_SHARED,
343 		.m_robust = PTHREAD_MUTEX_STALLED,
344 	};
345 	bool done;
346 
347 	/*
348 	 * Hack to allow multiple pthread_mutex_init() calls on the
349 	 * same process-shared mutex.  We rely on kernel allocating
350 	 * zeroed offpage for the mutex, i.e. the
351 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
352 	 */
353 	for (done = false; !done;) {
354 		switch (pmtx->m_ps) {
355 		case PMUTEX_INITSTAGE_DONE:
356 			atomic_thread_fence_acq();
357 			done = true;
358 			break;
359 		case PMUTEX_INITSTAGE_ALLOC:
360 			if (atomic_cmpset_int(&pmtx->m_ps,
361 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
362 				if (mutex_attr == NULL)
363 					mutex_attr = &foobar_mutex_attr;
364 				mutex_init_body(pmtx, mutex_attr);
365 				atomic_store_rel_int(&pmtx->m_ps,
366 				    PMUTEX_INITSTAGE_DONE);
367 				done = true;
368 			}
369 			break;
370 		case PMUTEX_INITSTAGE_BUSY:
371 			_pthread_yield();
372 			break;
373 		default:
374 			PANIC("corrupted offpage");
375 			break;
376 		}
377 	}
378 }
379 
380 int
381 __Tthr_mutex_init(pthread_mutex_t * __restrict mutex,
382     const pthread_mutexattr_t * __restrict mutex_attr)
383 {
384 	struct pthread_mutex *pmtx;
385 	int ret;
386 
387 	_thr_check_init();
388 
389 	if (mutex_attr != NULL) {
390 		ret = mutex_check_attr(*mutex_attr);
391 		if (ret != 0)
392 			return (ret);
393 	}
394 	if (mutex_attr == NULL ||
395 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
396 		__thr_malloc_init();
397 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
398 		    __thr_calloc));
399 	}
400 	pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
401 	if (pmtx == NULL)
402 		return (EFAULT);
403 	*mutex = THR_PSHARED_PTR;
404 	shared_mutex_init(pmtx, *mutex_attr);
405 	return (0);
406 }
407 
408 /* This function is used internally by malloc. */
409 int
410 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
411     void *(calloc_cb)(size_t, size_t))
412 {
413 	static const struct pthread_mutex_attr attr = {
414 		.m_type = PTHREAD_MUTEX_NORMAL,
415 		.m_protocol = PTHREAD_PRIO_NONE,
416 		.m_ceiling = 0,
417 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
418 		.m_robust = PTHREAD_MUTEX_STALLED,
419 	};
420 	int ret;
421 
422 	ret = mutex_init(mutex, &attr, calloc_cb);
423 	if (ret == 0)
424 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
425 	return (ret);
426 }
427 
428 /*
429  * Fix mutex ownership for child process.
430  *
431  * Process private mutex ownership is transmitted from the forking
432  * thread to the child process.
433  *
434  * Process shared mutex should not be inherited because owner is
435  * forking thread which is in parent process, they are removed from
436  * the owned mutex list.
437  */
438 static void
439 queue_fork(struct pthread *curthread, struct mutex_queue *q,
440     struct mutex_queue *qp, uint bit)
441 {
442 	struct pthread_mutex *m;
443 
444 	TAILQ_INIT(q);
445 	TAILQ_FOREACH(m, qp, m_pqe) {
446 		TAILQ_INSERT_TAIL(q, m, m_qe);
447 		m->m_lock.m_owner = TID(curthread) | bit;
448 	}
449 }
450 
451 void
452 _mutex_fork(struct pthread *curthread)
453 {
454 
455 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
456 	    &curthread->mq[TMQ_NORM_PRIV], 0);
457 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
458 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
459 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
460 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
461 	curthread->robust_list = 0;
462 }
463 
464 int
465 _thr_mutex_destroy(pthread_mutex_t *mutex)
466 {
467 	pthread_mutex_t m, m1;
468 	int ret;
469 
470 	m = *mutex;
471 	if (m < THR_MUTEX_DESTROYED) {
472 		ret = 0;
473 	} else if (m == THR_MUTEX_DESTROYED) {
474 		ret = EINVAL;
475 	} else {
476 		if (m == THR_PSHARED_PTR) {
477 			m1 = __thr_pshared_offpage(mutex, 0);
478 			if (m1 != NULL) {
479 				if ((uint32_t)m1->m_lock.m_owner !=
480 				    UMUTEX_RB_OWNERDEAD) {
481 					mutex_assert_not_owned(
482 					    _get_curthread(), m1);
483 				}
484 				__thr_pshared_destroy(mutex);
485 			}
486 			*mutex = THR_MUTEX_DESTROYED;
487 			return (0);
488 		}
489 		if (PMUTEX_OWNER_ID(m) != 0 &&
490 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
491 			ret = EBUSY;
492 		} else {
493 			*mutex = THR_MUTEX_DESTROYED;
494 			mutex_assert_not_owned(_get_curthread(), m);
495 			__thr_free(m);
496 			ret = 0;
497 		}
498 	}
499 
500 	return (ret);
501 }
502 
503 static int
504 mutex_qidx(struct pthread_mutex *m)
505 {
506 
507 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
508 		return (TMQ_NORM);
509 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
510 }
511 
512 /*
513  * Both enqueue_mutex() and dequeue_mutex() operate on the
514  * thread-private linkage of the locked mutexes and on the robust
515  * linkage.
516  *
517  * Robust list, as seen by kernel, must be consistent even in the case
518  * of thread termination at arbitrary moment.  Since either enqueue or
519  * dequeue for list walked by kernel consists of rewriting a single
520  * forward pointer, it is safe.  On the other hand, rewrite of the
521  * back pointer is not atomic WRT the forward one, but kernel does not
522  * care.
523  */
524 static void
525 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
526     int error)
527 {
528 	struct pthread_mutex *m1;
529 	uintptr_t *rl;
530 	int qidx;
531 
532 	/* Add to the list of owned mutexes: */
533 	if (error != EOWNERDEAD)
534 		mutex_assert_not_owned(curthread, m);
535 	qidx = mutex_qidx(m);
536 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
537 	if (!is_pshared_mutex(m))
538 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
539 	if (is_robust_mutex(m)) {
540 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
541 		    &curthread->priv_robust_list;
542 		m->m_rb_prev = NULL;
543 		if (*rl != 0) {
544 			m1 = __containerof((void *)*rl,
545 			    struct pthread_mutex, m_lock);
546 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
547 			m1->m_rb_prev = m;
548 		} else {
549 			m1 = NULL;
550 			m->m_lock.m_rb_lnk = 0;
551 		}
552 		*rl = (uintptr_t)&m->m_lock;
553 	}
554 }
555 
556 static void
557 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
558 {
559 	struct pthread_mutex *mp, *mn;
560 	int qidx;
561 
562 	mutex_assert_is_owned(m);
563 	qidx = mutex_qidx(m);
564 	if (is_robust_mutex(m)) {
565 		mp = m->m_rb_prev;
566 		if (mp == NULL) {
567 			if (is_pshared_mutex(m)) {
568 				curthread->robust_list = m->m_lock.m_rb_lnk;
569 			} else {
570 				curthread->priv_robust_list =
571 				    m->m_lock.m_rb_lnk;
572 			}
573 		} else {
574 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
575 		}
576 		if (m->m_lock.m_rb_lnk != 0) {
577 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
578 			    struct pthread_mutex, m_lock);
579 			mn->m_rb_prev = m->m_rb_prev;
580 		}
581 		m->m_lock.m_rb_lnk = 0;
582 		m->m_rb_prev = NULL;
583 	}
584 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
585 	if (!is_pshared_mutex(m))
586 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
587 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
588 		set_inherited_priority(curthread, m);
589 	mutex_init_link(m);
590 }
591 
592 static int
593 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
594 {
595 	int ret;
596 
597 	*m = *mutex;
598 	ret = 0;
599 	if (__predict_false(*m == THR_PSHARED_PTR)) {
600 		*m = __thr_pshared_offpage(mutex, 0);
601 		if (*m == NULL)
602 			ret = EINVAL;
603 		else
604 			shared_mutex_init(*m, NULL);
605 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
606 		if (*m == THR_MUTEX_DESTROYED) {
607 			ret = EINVAL;
608 		} else {
609 			ret = init_static(_get_curthread(), mutex);
610 			if (ret == 0)
611 				*m = *mutex;
612 		}
613 	}
614 	return (ret);
615 }
616 
617 int
618 __Tthr_mutex_trylock(pthread_mutex_t *mutex)
619 {
620 	struct pthread *curthread;
621 	struct pthread_mutex *m;
622 	uint32_t id;
623 	int ret, robust;
624 
625 	ret = check_and_init_mutex(mutex, &m);
626 	if (ret != 0)
627 		return (ret);
628 	curthread = _get_curthread();
629 	id = TID(curthread);
630 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
631 		THR_CRITICAL_ENTER(curthread);
632 	robust = _mutex_enter_robust(curthread, m);
633 	ret = _thr_umutex_trylock(&m->m_lock, id);
634 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
635 		enqueue_mutex(curthread, m, ret);
636 		if (ret == EOWNERDEAD)
637 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
638 	} else if (PMUTEX_OWNER_ID(m) == id) {
639 		ret = mutex_self_trylock(m);
640 	} /* else {} */
641 	if (robust)
642 		_mutex_leave_robust(curthread, m);
643 	if (ret != 0 && ret != EOWNERDEAD &&
644 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
645 		THR_CRITICAL_LEAVE(curthread);
646 	return (ret);
647 }
648 
649 static int
650 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
651     const struct timespec *abstime)
652 {
653 	uint32_t id, owner;
654 	int count, ret;
655 
656 	id = TID(curthread);
657 	if (PMUTEX_OWNER_ID(m) == id)
658 		return (mutex_self_lock(m, abstime));
659 
660 	/*
661 	 * For adaptive mutexes, spin for a bit in the expectation
662 	 * that if the application requests this mutex type then
663 	 * the lock is likely to be released quickly and it is
664 	 * faster than entering the kernel
665 	 */
666 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
667 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
668 		goto sleep_in_kernel;
669 
670 	if (!_thr_is_smp)
671 		goto yield_loop;
672 
673 	count = m->m_spinloops;
674 	while (count--) {
675 		owner = m->m_lock.m_owner;
676 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
677 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
678 			    id | owner)) {
679 				ret = 0;
680 				goto done;
681 			}
682 		}
683 		CPU_SPINWAIT;
684 	}
685 
686 yield_loop:
687 	count = m->m_yieldloops;
688 	while (count--) {
689 		_sched_yield();
690 		owner = m->m_lock.m_owner;
691 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
692 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
693 			    id | owner)) {
694 				ret = 0;
695 				goto done;
696 			}
697 		}
698 	}
699 
700 sleep_in_kernel:
701 	if (abstime == NULL)
702 		ret = __thr_umutex_lock(&m->m_lock, id);
703 	else if (__predict_false(abstime->tv_nsec < 0 ||
704 	    abstime->tv_nsec >= 1000000000))
705 		ret = EINVAL;
706 	else
707 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
708 done:
709 	if (ret == 0 || ret == EOWNERDEAD) {
710 		enqueue_mutex(curthread, m, ret);
711 		if (ret == EOWNERDEAD)
712 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
713 	}
714 	return (ret);
715 }
716 
717 static __always_inline int
718 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
719     bool cvattach, bool rb_onlist)
720 {
721 	struct pthread *curthread;
722 	int ret, robust;
723 
724 	robust = 0;  /* pacify gcc */
725 	curthread  = _get_curthread();
726 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
727 		THR_CRITICAL_ENTER(curthread);
728 	if (!rb_onlist)
729 		robust = _mutex_enter_robust(curthread, m);
730 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
731 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
732 		enqueue_mutex(curthread, m, ret);
733 		if (ret == EOWNERDEAD)
734 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
735 	} else {
736 		ret = mutex_lock_sleep(curthread, m, abstime);
737 	}
738 	if (!rb_onlist && robust)
739 		_mutex_leave_robust(curthread, m);
740 	if (ret != 0 && ret != EOWNERDEAD &&
741 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
742 		THR_CRITICAL_LEAVE(curthread);
743 	return (ret);
744 }
745 
746 int
747 __Tthr_mutex_lock(pthread_mutex_t *mutex)
748 {
749 	struct pthread_mutex *m;
750 	int ret;
751 
752 	_thr_check_init();
753 	ret = check_and_init_mutex(mutex, &m);
754 	if (ret == 0)
755 		ret = mutex_lock_common(m, NULL, false, false);
756 	return (ret);
757 }
758 
759 int
760 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
761     const struct timespec * __restrict abstime)
762 {
763 	struct pthread_mutex *m;
764 	int ret;
765 
766 	_thr_check_init();
767 	ret = check_and_init_mutex(mutex, &m);
768 	if (ret == 0)
769 		ret = mutex_lock_common(m, abstime, false, false);
770 	return (ret);
771 }
772 
773 int
774 _thr_mutex_unlock(pthread_mutex_t *mutex)
775 {
776 	struct pthread_mutex *mp;
777 
778 	if (*mutex == THR_PSHARED_PTR) {
779 		mp = __thr_pshared_offpage(mutex, 0);
780 		if (mp == NULL)
781 			return (EINVAL);
782 		shared_mutex_init(mp, NULL);
783 	} else {
784 		mp = *mutex;
785 	}
786 	return (mutex_unlock_common(mp, false, NULL));
787 }
788 
789 int
790 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
791 {
792 	int error;
793 
794 	error = mutex_lock_common(m, NULL, true, rb_onlist);
795 	if (error == 0 || error == EOWNERDEAD)
796 		m->m_count = count;
797 	return (error);
798 }
799 
800 int
801 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
802 {
803 
804 	/*
805 	 * Clear the count in case this is a recursive mutex.
806 	 */
807 	*count = m->m_count;
808 	m->m_count = 0;
809 	(void)mutex_unlock_common(m, true, defer);
810         return (0);
811 }
812 
813 int
814 _mutex_cv_attach(struct pthread_mutex *m, int count)
815 {
816 	struct pthread *curthread;
817 
818 	curthread = _get_curthread();
819 	enqueue_mutex(curthread, m, 0);
820 	m->m_count = count;
821 	return (0);
822 }
823 
824 int
825 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
826 {
827 	struct pthread *curthread;
828 	int deferred, error;
829 
830 	curthread = _get_curthread();
831 	if ((error = _mutex_owned(curthread, mp)) != 0)
832 		return (error);
833 
834 	/*
835 	 * Clear the count in case this is a recursive mutex.
836 	 */
837 	*recurse = mp->m_count;
838 	mp->m_count = 0;
839 	dequeue_mutex(curthread, mp);
840 
841 	/* Will this happen in real-world ? */
842         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
843 		deferred = 1;
844 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
845 	} else
846 		deferred = 0;
847 
848 	if (deferred)  {
849 		_thr_wake_all(curthread->defer_waiters,
850 		    curthread->nwaiter_defer);
851 		curthread->nwaiter_defer = 0;
852 	}
853 	return (0);
854 }
855 
856 static int
857 mutex_self_trylock(struct pthread_mutex *m)
858 {
859 	int ret;
860 
861 	switch (PMUTEX_TYPE(m->m_flags)) {
862 	case PTHREAD_MUTEX_ERRORCHECK:
863 	case PTHREAD_MUTEX_NORMAL:
864 	case PTHREAD_MUTEX_ADAPTIVE_NP:
865 		ret = EBUSY;
866 		break;
867 
868 	case PTHREAD_MUTEX_RECURSIVE:
869 		/* Increment the lock count: */
870 		if (m->m_count + 1 > 0) {
871 			m->m_count++;
872 			ret = 0;
873 		} else
874 			ret = EAGAIN;
875 		break;
876 
877 	default:
878 		/* Trap invalid mutex types; */
879 		ret = EINVAL;
880 	}
881 
882 	return (ret);
883 }
884 
885 static int
886 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
887 {
888 	struct timespec	ts1, ts2;
889 	int ret;
890 
891 	switch (PMUTEX_TYPE(m->m_flags)) {
892 	case PTHREAD_MUTEX_ERRORCHECK:
893 	case PTHREAD_MUTEX_ADAPTIVE_NP:
894 		if (abstime) {
895 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
896 			    abstime->tv_nsec >= 1000000000) {
897 				ret = EINVAL;
898 			} else {
899 				clock_gettime(CLOCK_REALTIME, &ts1);
900 				TIMESPEC_SUB(&ts2, abstime, &ts1);
901 				__sys_nanosleep(&ts2, NULL);
902 				ret = ETIMEDOUT;
903 			}
904 		} else {
905 			/*
906 			 * POSIX specifies that mutexes should return
907 			 * EDEADLK if a recursive lock is detected.
908 			 */
909 			ret = EDEADLK;
910 		}
911 		break;
912 
913 	case PTHREAD_MUTEX_NORMAL:
914 		/*
915 		 * What SS2 define as a 'normal' mutex.  Intentionally
916 		 * deadlock on attempts to get a lock you already own.
917 		 */
918 		ret = 0;
919 		if (abstime) {
920 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
921 			    abstime->tv_nsec >= 1000000000) {
922 				ret = EINVAL;
923 			} else {
924 				clock_gettime(CLOCK_REALTIME, &ts1);
925 				TIMESPEC_SUB(&ts2, abstime, &ts1);
926 				__sys_nanosleep(&ts2, NULL);
927 				ret = ETIMEDOUT;
928 			}
929 		} else {
930 			ts1.tv_sec = 30;
931 			ts1.tv_nsec = 0;
932 			for (;;)
933 				__sys_nanosleep(&ts1, NULL);
934 		}
935 		break;
936 
937 	case PTHREAD_MUTEX_RECURSIVE:
938 		/* Increment the lock count: */
939 		if (m->m_count + 1 > 0) {
940 			m->m_count++;
941 			ret = 0;
942 		} else
943 			ret = EAGAIN;
944 		break;
945 
946 	default:
947 		/* Trap invalid mutex types; */
948 		ret = EINVAL;
949 	}
950 
951 	return (ret);
952 }
953 
954 static __always_inline int
955 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
956 {
957 	struct pthread *curthread;
958 	uint32_t id;
959 	int deferred, error, private, robust;
960 
961 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
962 		if (m == THR_MUTEX_DESTROYED)
963 			return (EINVAL);
964 		return (EPERM);
965 	}
966 
967 	curthread = _get_curthread();
968 	id = TID(curthread);
969 
970 	/*
971 	 * Check if the running thread is not the owner of the mutex.
972 	 */
973 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
974 		return (EPERM);
975 
976 	error = 0;
977 	private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
978 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
979 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
980 		m->m_count--;
981 	} else {
982 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
983 			deferred = 1;
984 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
985         	} else
986 			deferred = 0;
987 
988 		robust = _mutex_enter_robust(curthread, m);
989 		dequeue_mutex(curthread, m);
990 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
991 		if (deferred)  {
992 			if (mtx_defer == NULL) {
993 				_thr_wake_all(curthread->defer_waiters,
994 				    curthread->nwaiter_defer);
995 				curthread->nwaiter_defer = 0;
996 			} else
997 				*mtx_defer = 1;
998 		}
999 		if (robust)
1000 			_mutex_leave_robust(curthread, m);
1001 	}
1002 	if (!cv && private)
1003 		THR_CRITICAL_LEAVE(curthread);
1004 	return (error);
1005 }
1006 
1007 int
1008 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1009     int * __restrict prioceiling)
1010 {
1011 	struct pthread_mutex *m;
1012 
1013 	if (*mutex == THR_PSHARED_PTR) {
1014 		m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1015 		if (m == NULL)
1016 			return (EINVAL);
1017 		shared_mutex_init(m, NULL);
1018 	} else {
1019 		m = *mutex;
1020 		if (m <= THR_MUTEX_DESTROYED)
1021 			return (EINVAL);
1022 	}
1023 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1024 		return (EINVAL);
1025 	*prioceiling = m->m_lock.m_ceilings[0];
1026 	return (0);
1027 }
1028 
1029 int
1030 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1031     int ceiling, int * __restrict old_ceiling)
1032 {
1033 	struct pthread *curthread;
1034 	struct pthread_mutex *m, *m1, *m2;
1035 	struct mutex_queue *q, *qp;
1036 	int qidx, ret;
1037 
1038 	if (*mutex == THR_PSHARED_PTR) {
1039 		m = __thr_pshared_offpage(mutex, 0);
1040 		if (m == NULL)
1041 			return (EINVAL);
1042 		shared_mutex_init(m, NULL);
1043 	} else {
1044 		m = *mutex;
1045 		if (m <= THR_MUTEX_DESTROYED)
1046 			return (EINVAL);
1047 	}
1048 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1049 		return (EINVAL);
1050 
1051 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1052 	if (ret != 0)
1053 		return (ret);
1054 
1055 	curthread = _get_curthread();
1056 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1057 		mutex_assert_is_owned(m);
1058 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1059 		m2 = TAILQ_NEXT(m, m_qe);
1060 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1061 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1062 			qidx = mutex_qidx(m);
1063 			q = &curthread->mq[qidx];
1064 			qp = &curthread->mq[qidx + 1];
1065 			TAILQ_REMOVE(q, m, m_qe);
1066 			if (!is_pshared_mutex(m))
1067 				TAILQ_REMOVE(qp, m, m_pqe);
1068 			TAILQ_FOREACH(m2, q, m_qe) {
1069 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1070 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1071 					if (!is_pshared_mutex(m)) {
1072 						while (m2 != NULL &&
1073 						    is_pshared_mutex(m2)) {
1074 							m2 = TAILQ_PREV(m2,
1075 							    mutex_queue, m_qe);
1076 						}
1077 						if (m2 == NULL) {
1078 							TAILQ_INSERT_HEAD(qp,
1079 							    m, m_pqe);
1080 						} else {
1081 							TAILQ_INSERT_BEFORE(m2,
1082 							    m, m_pqe);
1083 						}
1084 					}
1085 					return (0);
1086 				}
1087 			}
1088 			TAILQ_INSERT_TAIL(q, m, m_qe);
1089 			if (!is_pshared_mutex(m))
1090 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1091 		}
1092 	}
1093 	return (0);
1094 }
1095 
1096 int
1097 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1098 {
1099 	struct pthread_mutex *m;
1100 	int ret;
1101 
1102 	ret = check_and_init_mutex(mutex, &m);
1103 	if (ret == 0)
1104 		*count = m->m_spinloops;
1105 	return (ret);
1106 }
1107 
1108 int
1109 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1110 {
1111 	struct pthread_mutex *m;
1112 	int ret;
1113 
1114 	ret = check_and_init_mutex(mutex, &m);
1115 	if (ret == 0)
1116 		m->m_spinloops = count;
1117 	return (ret);
1118 }
1119 
1120 int
1121 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1122 {
1123 	struct pthread_mutex *m;
1124 	int ret;
1125 
1126 	ret = check_and_init_mutex(mutex, &m);
1127 	if (ret == 0)
1128 		*count = m->m_yieldloops;
1129 	return (ret);
1130 }
1131 
1132 int
1133 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1134 {
1135 	struct pthread_mutex *m;
1136 	int ret;
1137 
1138 	ret = check_and_init_mutex(mutex, &m);
1139 	if (ret == 0)
1140 		m->m_yieldloops = count;
1141 	return (0);
1142 }
1143 
1144 int
1145 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1146 {
1147 	struct pthread_mutex *m;
1148 
1149 	if (*mutex == THR_PSHARED_PTR) {
1150 		m = __thr_pshared_offpage(mutex, 0);
1151 		if (m == NULL)
1152 			return (0);
1153 		shared_mutex_init(m, NULL);
1154 	} else {
1155 		m = *mutex;
1156 		if (m <= THR_MUTEX_DESTROYED)
1157 			return (0);
1158 	}
1159 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1160 }
1161 
1162 int
1163 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1164 {
1165 
1166 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1167 		if (mp == THR_MUTEX_DESTROYED)
1168 			return (EINVAL);
1169 		return (EPERM);
1170 	}
1171 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1172 		return (EPERM);
1173 	return (0);
1174 }
1175 
1176 int
1177 _Tthr_mutex_consistent(pthread_mutex_t *mutex)
1178 {
1179 	struct pthread_mutex *m;
1180 	struct pthread *curthread;
1181 
1182 	if (*mutex == THR_PSHARED_PTR) {
1183 		m = __thr_pshared_offpage(mutex, 0);
1184 		if (m == NULL)
1185 			return (EINVAL);
1186 		shared_mutex_init(m, NULL);
1187 	} else {
1188 		m = *mutex;
1189 		if (m <= THR_MUTEX_DESTROYED)
1190 			return (EINVAL);
1191 	}
1192 	curthread = _get_curthread();
1193 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1194 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1195 		return (EINVAL);
1196 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1197 		return (EPERM);
1198 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1199 	return (0);
1200 }
1201