xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision f81cdf24ba5436367377f7c8e8f51f6df2a75ca7)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
5  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
6  * Copyright (c) 2015, 2016 The FreeBSD Foundation
7  *
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Konstantin Belousov
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by John Birrell.
24  * 4. Neither the name of the author nor the names of any co-contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40 
41 #include <sys/cdefs.h>
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51 
52 #include "thr_private.h"
53 
54 _Static_assert(sizeof(struct pthread_mutex) <= THR_PAGE_SIZE_MIN,
55     "pthread_mutex is too large for off-page");
56 
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS	2000
62 
63 /*
64  * Prototypes
65  */
66 int	__pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
67 		const struct timespec * __restrict abstime);
68 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
69 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
70 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
71 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
72 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
73 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
74 
75 static int	mutex_self_trylock(pthread_mutex_t);
76 static int	mutex_self_lock(pthread_mutex_t,
77 				const struct timespec *abstime);
78 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
79 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
80 				const struct timespec *);
81 static void	mutex_init_robust(struct pthread *curthread);
82 static int	mutex_qidx(struct pthread_mutex *m);
83 static bool	is_robust_mutex(struct pthread_mutex *m);
84 static bool	is_pshared_mutex(struct pthread_mutex *m);
85 
86 __weak_reference(__Tthr_mutex_init, pthread_mutex_init);
87 __weak_reference(__Tthr_mutex_init, __pthread_mutex_init);
88 __strong_reference(__Tthr_mutex_init, _pthread_mutex_init);
89 __weak_reference(__Tthr_mutex_lock, pthread_mutex_lock);
90 __weak_reference(__Tthr_mutex_lock, __pthread_mutex_lock);
91 __strong_reference(__Tthr_mutex_lock, _pthread_mutex_lock);
92 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
93 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
94 __weak_reference(__Tthr_mutex_trylock, pthread_mutex_trylock);
95 __weak_reference(__Tthr_mutex_trylock, __pthread_mutex_trylock);
96 __strong_reference(__Tthr_mutex_trylock, _pthread_mutex_trylock);
97 __weak_reference(_Tthr_mutex_consistent, pthread_mutex_consistent);
98 __weak_reference(_Tthr_mutex_consistent, _pthread_mutex_consistent);
99 __strong_reference(_Tthr_mutex_consistent, __pthread_mutex_consistent);
100 
101 /* Single underscore versions provided for libc internal usage: */
102 /* No difference between libc and application usage of these: */
103 __weak_reference(_thr_mutex_destroy, pthread_mutex_destroy);
104 __weak_reference(_thr_mutex_destroy, _pthread_mutex_destroy);
105 __weak_reference(_thr_mutex_unlock, pthread_mutex_unlock);
106 __weak_reference(_thr_mutex_unlock, _pthread_mutex_unlock);
107 
108 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
109 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
110 
111 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
112 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
113 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
114 
115 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
116 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
117 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
118 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
119 
120 static void
121 mutex_init_link(struct pthread_mutex *m __unused)
122 {
123 
124 #if defined(_PTHREADS_INVARIANTS)
125 	m->m_qe.tqe_prev = NULL;
126 	m->m_qe.tqe_next = NULL;
127 	m->m_pqe.tqe_prev = NULL;
128 	m->m_pqe.tqe_next = NULL;
129 #endif
130 }
131 
132 static void
133 mutex_assert_is_owned(struct pthread_mutex *m __unused)
134 {
135 
136 #if defined(_PTHREADS_INVARIANTS)
137 	if (__predict_false(m->m_qe.tqe_prev == NULL))
138 		PANIC("mutex %p own %#x is not on list %p %p",
139 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
140 #endif
141 }
142 
143 static void
144 mutex_assert_not_owned(struct pthread *curthread __unused,
145     struct pthread_mutex *m __unused)
146 {
147 
148 #if defined(_PTHREADS_INVARIANTS)
149 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
150 	    m->m_qe.tqe_next != NULL))
151 		PANIC("mutex %p own %#x is on list %p %p",
152 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
153 	if (__predict_false(is_robust_mutex(m) &&
154 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
155 	    (is_pshared_mutex(m) && curthread->robust_list ==
156 	    (uintptr_t)&m->m_lock) ||
157 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
158 	    (uintptr_t)&m->m_lock))))
159 		PANIC(
160     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
161 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
162 		    m->m_rb_prev, (void *)curthread->robust_list,
163 		    (void *)curthread->priv_robust_list);
164 #endif
165 }
166 
167 static bool
168 is_pshared_mutex(struct pthread_mutex *m)
169 {
170 
171 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
172 }
173 
174 static bool
175 is_robust_mutex(struct pthread_mutex *m)
176 {
177 
178 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
179 }
180 
181 int
182 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
183 {
184 
185 #if defined(_PTHREADS_INVARIANTS)
186 	if (__predict_false(curthread->inact_mtx != 0))
187 		PANIC("inact_mtx enter");
188 #endif
189 	if (!is_robust_mutex(m))
190 		return (0);
191 
192 	mutex_init_robust(curthread);
193 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
194 	return (1);
195 }
196 
197 void
198 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
199 {
200 
201 #if defined(_PTHREADS_INVARIANTS)
202 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
203 		PANIC("inact_mtx leave");
204 #endif
205 	curthread->inact_mtx = 0;
206 }
207 
208 static int
209 mutex_check_attr(const struct pthread_mutex_attr *attr)
210 {
211 
212 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
213 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
214 		return (EINVAL);
215 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
216 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
217 		return (EINVAL);
218 	return (0);
219 }
220 
221 static void
222 mutex_init_robust(struct pthread *curthread)
223 {
224 	struct umtx_robust_lists_params rb;
225 
226 	if (curthread == NULL)
227 		curthread = _get_curthread();
228 	if (curthread->robust_inited)
229 		return;
230 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
231 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
232 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
233 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
234 	curthread->robust_inited = 1;
235 }
236 
237 static void
238 mutex_init_body(struct pthread_mutex *pmutex,
239     const struct pthread_mutex_attr *attr)
240 {
241 
242 	pmutex->m_flags = attr->m_type;
243 	pmutex->m_count = 0;
244 	pmutex->m_spinloops = 0;
245 	pmutex->m_yieldloops = 0;
246 	mutex_init_link(pmutex);
247 	switch (attr->m_protocol) {
248 	case PTHREAD_PRIO_NONE:
249 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
250 		pmutex->m_lock.m_flags = 0;
251 		break;
252 	case PTHREAD_PRIO_INHERIT:
253 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
254 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
255 		break;
256 	case PTHREAD_PRIO_PROTECT:
257 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
258 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
259 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
260 		break;
261 	}
262 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
263 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
264 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
265 		mutex_init_robust(NULL);
266 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
267 	}
268 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
269 		pmutex->m_spinloops =
270 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
271 		pmutex->m_yieldloops = _thr_yieldloops;
272 	}
273 }
274 
275 static int
276 mutex_init(pthread_mutex_t *mutex,
277     const struct pthread_mutex_attr *mutex_attr,
278     void *(calloc_cb)(size_t, size_t))
279 {
280 	const struct pthread_mutex_attr *attr;
281 	struct pthread_mutex *pmutex;
282 	int error;
283 
284 	if (mutex_attr == NULL) {
285 		attr = &_pthread_mutexattr_default;
286 	} else {
287 		attr = mutex_attr;
288 		error = mutex_check_attr(attr);
289 		if (error != 0)
290 			return (error);
291 	}
292 	if ((pmutex = (pthread_mutex_t)calloc_cb(1,
293 	    sizeof(struct pthread_mutex))) == NULL)
294 		return (ENOMEM);
295 	mutex_init_body(pmutex, attr);
296 	*mutex = pmutex;
297 	return (0);
298 }
299 
300 static int
301 init_static(struct pthread *thread, pthread_mutex_t *mutex)
302 {
303 	int ret;
304 
305 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
306 
307 	if (*mutex == THR_MUTEX_INITIALIZER)
308 		ret = mutex_init(mutex, &_pthread_mutexattr_default,
309 		    __thr_calloc);
310 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
311 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
312 		    __thr_calloc);
313 	else
314 		ret = 0;
315 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
316 
317 	return (ret);
318 }
319 
320 static void
321 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
322 {
323 	struct pthread_mutex *m2;
324 
325 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
326 	if (m2 != NULL)
327 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
328 	else
329 		m->m_lock.m_ceilings[1] = -1;
330 }
331 
332 static void
333 shared_mutex_init(struct pthread_mutex *pmtx, const struct
334     pthread_mutex_attr *mutex_attr)
335 {
336 	static const struct pthread_mutex_attr foobar_mutex_attr = {
337 		.m_type = PTHREAD_MUTEX_DEFAULT,
338 		.m_protocol = PTHREAD_PRIO_NONE,
339 		.m_ceiling = 0,
340 		.m_pshared = PTHREAD_PROCESS_SHARED,
341 		.m_robust = PTHREAD_MUTEX_STALLED,
342 	};
343 	bool done;
344 
345 	/*
346 	 * Hack to allow multiple pthread_mutex_init() calls on the
347 	 * same process-shared mutex.  We rely on kernel allocating
348 	 * zeroed offpage for the mutex, i.e. the
349 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
350 	 */
351 	for (done = false; !done;) {
352 		switch (pmtx->m_ps) {
353 		case PMUTEX_INITSTAGE_DONE:
354 			atomic_thread_fence_acq();
355 			done = true;
356 			break;
357 		case PMUTEX_INITSTAGE_ALLOC:
358 			if (atomic_cmpset_int(&pmtx->m_ps,
359 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
360 				if (mutex_attr == NULL)
361 					mutex_attr = &foobar_mutex_attr;
362 				mutex_init_body(pmtx, mutex_attr);
363 				atomic_store_rel_int(&pmtx->m_ps,
364 				    PMUTEX_INITSTAGE_DONE);
365 				done = true;
366 			}
367 			break;
368 		case PMUTEX_INITSTAGE_BUSY:
369 			_pthread_yield();
370 			break;
371 		default:
372 			PANIC("corrupted offpage");
373 			break;
374 		}
375 	}
376 }
377 
378 int
379 __Tthr_mutex_init(pthread_mutex_t * __restrict mutex,
380     const pthread_mutexattr_t * __restrict mutex_attr)
381 {
382 	struct pthread_mutex *pmtx;
383 	int ret;
384 
385 	_thr_check_init();
386 
387 	if (mutex_attr != NULL) {
388 		ret = mutex_check_attr(*mutex_attr);
389 		if (ret != 0)
390 			return (ret);
391 	}
392 	if (mutex_attr == NULL ||
393 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
394 		__thr_malloc_init();
395 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
396 		    __thr_calloc));
397 	}
398 	pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1);
399 	if (pmtx == NULL)
400 		return (EFAULT);
401 	*mutex = THR_PSHARED_PTR;
402 	shared_mutex_init(pmtx, *mutex_attr);
403 	return (0);
404 }
405 
406 /* This function is used internally by malloc. */
407 int
408 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
409     void *(calloc_cb)(size_t, size_t))
410 {
411 	static const struct pthread_mutex_attr attr = {
412 		.m_type = PTHREAD_MUTEX_NORMAL,
413 		.m_protocol = PTHREAD_PRIO_NONE,
414 		.m_ceiling = 0,
415 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
416 		.m_robust = PTHREAD_MUTEX_STALLED,
417 	};
418 	int ret;
419 
420 	ret = mutex_init(mutex, &attr, calloc_cb);
421 	if (ret == 0)
422 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
423 	return (ret);
424 }
425 
426 /*
427  * Fix mutex ownership for child process.
428  *
429  * Process private mutex ownership is transmitted from the forking
430  * thread to the child process.
431  *
432  * Process shared mutex should not be inherited because owner is
433  * forking thread which is in parent process, they are removed from
434  * the owned mutex list.
435  */
436 static void
437 queue_fork(struct pthread *curthread, struct mutex_queue *q,
438     struct mutex_queue *qp, uint bit)
439 {
440 	struct pthread_mutex *m;
441 
442 	TAILQ_INIT(q);
443 	TAILQ_FOREACH(m, qp, m_pqe) {
444 		TAILQ_INSERT_TAIL(q, m, m_qe);
445 		m->m_lock.m_owner = TID(curthread) | bit;
446 	}
447 }
448 
449 void
450 _mutex_fork(struct pthread *curthread)
451 {
452 
453 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
454 	    &curthread->mq[TMQ_NORM_PRIV], 0);
455 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
456 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
457 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
458 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
459 	curthread->robust_list = 0;
460 }
461 
462 int
463 _thr_mutex_destroy(pthread_mutex_t *mutex)
464 {
465 	pthread_mutex_t m, m1;
466 	int ret;
467 
468 	m = *mutex;
469 	if (m < THR_MUTEX_DESTROYED) {
470 		ret = 0;
471 	} else if (m == THR_MUTEX_DESTROYED) {
472 		ret = EINVAL;
473 	} else {
474 		if (m == THR_PSHARED_PTR) {
475 			m1 = __thr_pshared_offpage(mutex, 0);
476 			if (m1 != NULL) {
477 				if ((uint32_t)m1->m_lock.m_owner !=
478 				    UMUTEX_RB_OWNERDEAD) {
479 					mutex_assert_not_owned(
480 					    _get_curthread(), m1);
481 				}
482 				__thr_pshared_destroy(mutex);
483 			}
484 			*mutex = THR_MUTEX_DESTROYED;
485 			return (0);
486 		}
487 		if (PMUTEX_OWNER_ID(m) != 0 &&
488 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
489 			ret = EBUSY;
490 		} else {
491 			*mutex = THR_MUTEX_DESTROYED;
492 			mutex_assert_not_owned(_get_curthread(), m);
493 			__thr_free(m);
494 			ret = 0;
495 		}
496 	}
497 
498 	return (ret);
499 }
500 
501 static int
502 mutex_qidx(struct pthread_mutex *m)
503 {
504 
505 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
506 		return (TMQ_NORM);
507 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
508 }
509 
510 /*
511  * Both enqueue_mutex() and dequeue_mutex() operate on the
512  * thread-private linkage of the locked mutexes and on the robust
513  * linkage.
514  *
515  * Robust list, as seen by kernel, must be consistent even in the case
516  * of thread termination at arbitrary moment.  Since either enqueue or
517  * dequeue for list walked by kernel consists of rewriting a single
518  * forward pointer, it is safe.  On the other hand, rewrite of the
519  * back pointer is not atomic WRT the forward one, but kernel does not
520  * care.
521  */
522 static void
523 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
524     int error)
525 {
526 	struct pthread_mutex *m1;
527 	uintptr_t *rl;
528 	int qidx;
529 
530 	/* Add to the list of owned mutexes: */
531 	if (error != EOWNERDEAD)
532 		mutex_assert_not_owned(curthread, m);
533 	qidx = mutex_qidx(m);
534 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
535 	if (!is_pshared_mutex(m))
536 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
537 	if (is_robust_mutex(m)) {
538 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
539 		    &curthread->priv_robust_list;
540 		m->m_rb_prev = NULL;
541 		if (*rl != 0) {
542 			m1 = __containerof((void *)*rl,
543 			    struct pthread_mutex, m_lock);
544 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
545 			m1->m_rb_prev = m;
546 		} else {
547 			m1 = NULL;
548 			m->m_lock.m_rb_lnk = 0;
549 		}
550 		*rl = (uintptr_t)&m->m_lock;
551 	}
552 }
553 
554 static void
555 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
556 {
557 	struct pthread_mutex *mp, *mn;
558 	int qidx;
559 
560 	mutex_assert_is_owned(m);
561 	qidx = mutex_qidx(m);
562 	if (is_robust_mutex(m)) {
563 		mp = m->m_rb_prev;
564 		if (mp == NULL) {
565 			if (is_pshared_mutex(m)) {
566 				curthread->robust_list = m->m_lock.m_rb_lnk;
567 			} else {
568 				curthread->priv_robust_list =
569 				    m->m_lock.m_rb_lnk;
570 			}
571 		} else {
572 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
573 		}
574 		if (m->m_lock.m_rb_lnk != 0) {
575 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
576 			    struct pthread_mutex, m_lock);
577 			mn->m_rb_prev = m->m_rb_prev;
578 		}
579 		m->m_lock.m_rb_lnk = 0;
580 		m->m_rb_prev = NULL;
581 	}
582 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
583 	if (!is_pshared_mutex(m))
584 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
585 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
586 		set_inherited_priority(curthread, m);
587 	mutex_init_link(m);
588 }
589 
590 static int
591 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
592 {
593 	int ret;
594 
595 	*m = *mutex;
596 	ret = 0;
597 	if (__predict_false(*m == THR_PSHARED_PTR)) {
598 		*m = __thr_pshared_offpage(mutex, 0);
599 		if (*m == NULL)
600 			ret = EINVAL;
601 		else
602 			shared_mutex_init(*m, NULL);
603 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
604 		if (*m == THR_MUTEX_DESTROYED) {
605 			ret = EINVAL;
606 		} else {
607 			ret = init_static(_get_curthread(), mutex);
608 			if (ret == 0)
609 				*m = *mutex;
610 		}
611 	}
612 	return (ret);
613 }
614 
615 int
616 __Tthr_mutex_trylock(pthread_mutex_t *mutex)
617 {
618 	struct pthread *curthread;
619 	struct pthread_mutex *m;
620 	uint32_t id;
621 	int ret, robust;
622 
623 	ret = check_and_init_mutex(mutex, &m);
624 	if (ret != 0)
625 		return (ret);
626 	curthread = _get_curthread();
627 	id = TID(curthread);
628 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
629 		THR_CRITICAL_ENTER(curthread);
630 	robust = _mutex_enter_robust(curthread, m);
631 	ret = _thr_umutex_trylock(&m->m_lock, id);
632 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
633 		enqueue_mutex(curthread, m, ret);
634 		if (ret == EOWNERDEAD)
635 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
636 	} else if (PMUTEX_OWNER_ID(m) == id) {
637 		ret = mutex_self_trylock(m);
638 	} /* else {} */
639 	if (robust)
640 		_mutex_leave_robust(curthread, m);
641 	if (ret != 0 && ret != EOWNERDEAD &&
642 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
643 		THR_CRITICAL_LEAVE(curthread);
644 	return (ret);
645 }
646 
647 static int
648 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
649     const struct timespec *abstime)
650 {
651 	uint32_t id, owner;
652 	int count, ret;
653 
654 	id = TID(curthread);
655 	if (PMUTEX_OWNER_ID(m) == id)
656 		return (mutex_self_lock(m, abstime));
657 
658 	/*
659 	 * For adaptive mutexes, spin for a bit in the expectation
660 	 * that if the application requests this mutex type then
661 	 * the lock is likely to be released quickly and it is
662 	 * faster than entering the kernel
663 	 */
664 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
665 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
666 		goto sleep_in_kernel;
667 
668 	if (!_thr_is_smp)
669 		goto yield_loop;
670 
671 	count = m->m_spinloops;
672 	while (count--) {
673 		owner = m->m_lock.m_owner;
674 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
675 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
676 			    id | owner)) {
677 				ret = 0;
678 				goto done;
679 			}
680 		}
681 		CPU_SPINWAIT;
682 	}
683 
684 yield_loop:
685 	count = m->m_yieldloops;
686 	while (count--) {
687 		_sched_yield();
688 		owner = m->m_lock.m_owner;
689 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
690 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
691 			    id | owner)) {
692 				ret = 0;
693 				goto done;
694 			}
695 		}
696 	}
697 
698 sleep_in_kernel:
699 	if (abstime == NULL)
700 		ret = __thr_umutex_lock(&m->m_lock, id);
701 	else if (__predict_false(abstime->tv_nsec < 0 ||
702 	    abstime->tv_nsec >= 1000000000))
703 		ret = EINVAL;
704 	else
705 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
706 done:
707 	if (ret == 0 || ret == EOWNERDEAD) {
708 		enqueue_mutex(curthread, m, ret);
709 		if (ret == EOWNERDEAD)
710 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
711 	}
712 	return (ret);
713 }
714 
715 static __always_inline int
716 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
717     bool cvattach, bool rb_onlist)
718 {
719 	struct pthread *curthread;
720 	int ret, robust;
721 
722 	robust = 0;  /* pacify gcc */
723 	curthread  = _get_curthread();
724 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
725 		THR_CRITICAL_ENTER(curthread);
726 	if (!rb_onlist)
727 		robust = _mutex_enter_robust(curthread, m);
728 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
729 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
730 		enqueue_mutex(curthread, m, ret);
731 		if (ret == EOWNERDEAD)
732 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
733 	} else {
734 		ret = mutex_lock_sleep(curthread, m, abstime);
735 	}
736 	if (!rb_onlist && robust)
737 		_mutex_leave_robust(curthread, m);
738 	if (ret != 0 && ret != EOWNERDEAD &&
739 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
740 		THR_CRITICAL_LEAVE(curthread);
741 	return (ret);
742 }
743 
744 int
745 __Tthr_mutex_lock(pthread_mutex_t *mutex)
746 {
747 	struct pthread_mutex *m;
748 	int ret;
749 
750 	_thr_check_init();
751 	ret = check_and_init_mutex(mutex, &m);
752 	if (ret == 0)
753 		ret = mutex_lock_common(m, NULL, false, false);
754 	return (ret);
755 }
756 
757 int
758 __pthread_mutex_timedlock(pthread_mutex_t * __restrict mutex,
759     const struct timespec * __restrict abstime)
760 {
761 	struct pthread_mutex *m;
762 	int ret;
763 
764 	_thr_check_init();
765 	ret = check_and_init_mutex(mutex, &m);
766 	if (ret == 0)
767 		ret = mutex_lock_common(m, abstime, false, false);
768 	return (ret);
769 }
770 
771 int
772 _thr_mutex_unlock(pthread_mutex_t *mutex)
773 {
774 	struct pthread_mutex *mp;
775 
776 	if (*mutex == THR_PSHARED_PTR) {
777 		mp = __thr_pshared_offpage(mutex, 0);
778 		if (mp == NULL)
779 			return (EINVAL);
780 		shared_mutex_init(mp, NULL);
781 	} else {
782 		mp = *mutex;
783 	}
784 	return (mutex_unlock_common(mp, false, NULL));
785 }
786 
787 int
788 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
789 {
790 	int error;
791 
792 	error = mutex_lock_common(m, NULL, true, rb_onlist);
793 	if (error == 0 || error == EOWNERDEAD)
794 		m->m_count = count;
795 	return (error);
796 }
797 
798 int
799 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
800 {
801 
802 	/*
803 	 * Clear the count in case this is a recursive mutex.
804 	 */
805 	*count = m->m_count;
806 	m->m_count = 0;
807 	(void)mutex_unlock_common(m, true, defer);
808         return (0);
809 }
810 
811 int
812 _mutex_cv_attach(struct pthread_mutex *m, int count)
813 {
814 	struct pthread *curthread;
815 
816 	curthread = _get_curthread();
817 	enqueue_mutex(curthread, m, 0);
818 	m->m_count = count;
819 	return (0);
820 }
821 
822 int
823 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
824 {
825 	struct pthread *curthread;
826 	int deferred, error;
827 
828 	curthread = _get_curthread();
829 	if ((error = _mutex_owned(curthread, mp)) != 0)
830 		return (error);
831 
832 	/*
833 	 * Clear the count in case this is a recursive mutex.
834 	 */
835 	*recurse = mp->m_count;
836 	mp->m_count = 0;
837 	dequeue_mutex(curthread, mp);
838 
839 	/* Will this happen in real-world ? */
840         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
841 		deferred = 1;
842 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
843 	} else
844 		deferred = 0;
845 
846 	if (deferred)  {
847 		_thr_wake_all(curthread->defer_waiters,
848 		    curthread->nwaiter_defer);
849 		curthread->nwaiter_defer = 0;
850 	}
851 	return (0);
852 }
853 
854 static int
855 mutex_self_trylock(struct pthread_mutex *m)
856 {
857 	int ret;
858 
859 	switch (PMUTEX_TYPE(m->m_flags)) {
860 	case PTHREAD_MUTEX_ERRORCHECK:
861 	case PTHREAD_MUTEX_NORMAL:
862 	case PTHREAD_MUTEX_ADAPTIVE_NP:
863 		ret = EBUSY;
864 		break;
865 
866 	case PTHREAD_MUTEX_RECURSIVE:
867 		/* Increment the lock count: */
868 		if (m->m_count + 1 > 0) {
869 			m->m_count++;
870 			ret = 0;
871 		} else
872 			ret = EAGAIN;
873 		break;
874 
875 	default:
876 		/* Trap invalid mutex types; */
877 		ret = EINVAL;
878 	}
879 
880 	return (ret);
881 }
882 
883 static int
884 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
885 {
886 	struct timespec	ts1, ts2;
887 	int ret;
888 
889 	switch (PMUTEX_TYPE(m->m_flags)) {
890 	case PTHREAD_MUTEX_ERRORCHECK:
891 	case PTHREAD_MUTEX_ADAPTIVE_NP:
892 		if (abstime) {
893 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
894 			    abstime->tv_nsec >= 1000000000) {
895 				ret = EINVAL;
896 			} else {
897 				clock_gettime(CLOCK_REALTIME, &ts1);
898 				TIMESPEC_SUB(&ts2, abstime, &ts1);
899 				__sys_nanosleep(&ts2, NULL);
900 				ret = ETIMEDOUT;
901 			}
902 		} else {
903 			/*
904 			 * POSIX specifies that mutexes should return
905 			 * EDEADLK if a recursive lock is detected.
906 			 */
907 			ret = EDEADLK;
908 		}
909 		break;
910 
911 	case PTHREAD_MUTEX_NORMAL:
912 		/*
913 		 * What SS2 define as a 'normal' mutex.  Intentionally
914 		 * deadlock on attempts to get a lock you already own.
915 		 */
916 		ret = 0;
917 		if (abstime) {
918 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
919 			    abstime->tv_nsec >= 1000000000) {
920 				ret = EINVAL;
921 			} else {
922 				clock_gettime(CLOCK_REALTIME, &ts1);
923 				TIMESPEC_SUB(&ts2, abstime, &ts1);
924 				__sys_nanosleep(&ts2, NULL);
925 				ret = ETIMEDOUT;
926 			}
927 		} else {
928 			ts1.tv_sec = 30;
929 			ts1.tv_nsec = 0;
930 			for (;;)
931 				__sys_nanosleep(&ts1, NULL);
932 		}
933 		break;
934 
935 	case PTHREAD_MUTEX_RECURSIVE:
936 		/* Increment the lock count: */
937 		if (m->m_count + 1 > 0) {
938 			m->m_count++;
939 			ret = 0;
940 		} else
941 			ret = EAGAIN;
942 		break;
943 
944 	default:
945 		/* Trap invalid mutex types; */
946 		ret = EINVAL;
947 	}
948 
949 	return (ret);
950 }
951 
952 static __always_inline int
953 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
954 {
955 	struct pthread *curthread;
956 	uint32_t id;
957 	int deferred, error, private, robust;
958 
959 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
960 		if (m == THR_MUTEX_DESTROYED)
961 			return (EINVAL);
962 		return (EPERM);
963 	}
964 
965 	curthread = _get_curthread();
966 	id = TID(curthread);
967 
968 	/*
969 	 * Check if the running thread is not the owner of the mutex.
970 	 */
971 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
972 		return (EPERM);
973 
974 	error = 0;
975 	private = (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0;
976 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
977 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
978 		m->m_count--;
979 	} else {
980 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
981 			deferred = 1;
982 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
983         	} else
984 			deferred = 0;
985 
986 		robust = _mutex_enter_robust(curthread, m);
987 		dequeue_mutex(curthread, m);
988 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
989 		if (deferred)  {
990 			if (mtx_defer == NULL) {
991 				_thr_wake_all(curthread->defer_waiters,
992 				    curthread->nwaiter_defer);
993 				curthread->nwaiter_defer = 0;
994 			} else
995 				*mtx_defer = 1;
996 		}
997 		if (robust)
998 			_mutex_leave_robust(curthread, m);
999 	}
1000 	if (!cv && private)
1001 		THR_CRITICAL_LEAVE(curthread);
1002 	return (error);
1003 }
1004 
1005 int
1006 _pthread_mutex_getprioceiling(const pthread_mutex_t * __restrict mutex,
1007     int * __restrict prioceiling)
1008 {
1009 	struct pthread_mutex *m;
1010 
1011 	if (*mutex == THR_PSHARED_PTR) {
1012 		m = __thr_pshared_offpage(__DECONST(void *, mutex), 0);
1013 		if (m == NULL)
1014 			return (EINVAL);
1015 		shared_mutex_init(m, NULL);
1016 	} else {
1017 		m = *mutex;
1018 		if (m <= THR_MUTEX_DESTROYED)
1019 			return (EINVAL);
1020 	}
1021 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1022 		return (EINVAL);
1023 	*prioceiling = m->m_lock.m_ceilings[0];
1024 	return (0);
1025 }
1026 
1027 int
1028 _pthread_mutex_setprioceiling(pthread_mutex_t * __restrict mutex,
1029     int ceiling, int * __restrict old_ceiling)
1030 {
1031 	struct pthread *curthread;
1032 	struct pthread_mutex *m, *m1, *m2;
1033 	struct mutex_queue *q, *qp;
1034 	int qidx, ret;
1035 
1036 	if (*mutex == THR_PSHARED_PTR) {
1037 		m = __thr_pshared_offpage(mutex, 0);
1038 		if (m == NULL)
1039 			return (EINVAL);
1040 		shared_mutex_init(m, NULL);
1041 	} else {
1042 		m = *mutex;
1043 		if (m <= THR_MUTEX_DESTROYED)
1044 			return (EINVAL);
1045 	}
1046 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1047 		return (EINVAL);
1048 
1049 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1050 	if (ret != 0)
1051 		return (ret);
1052 
1053 	curthread = _get_curthread();
1054 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1055 		mutex_assert_is_owned(m);
1056 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1057 		m2 = TAILQ_NEXT(m, m_qe);
1058 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1059 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1060 			qidx = mutex_qidx(m);
1061 			q = &curthread->mq[qidx];
1062 			qp = &curthread->mq[qidx + 1];
1063 			TAILQ_REMOVE(q, m, m_qe);
1064 			if (!is_pshared_mutex(m))
1065 				TAILQ_REMOVE(qp, m, m_pqe);
1066 			TAILQ_FOREACH(m2, q, m_qe) {
1067 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1068 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1069 					if (!is_pshared_mutex(m)) {
1070 						while (m2 != NULL &&
1071 						    is_pshared_mutex(m2)) {
1072 							m2 = TAILQ_PREV(m2,
1073 							    mutex_queue, m_qe);
1074 						}
1075 						if (m2 == NULL) {
1076 							TAILQ_INSERT_HEAD(qp,
1077 							    m, m_pqe);
1078 						} else {
1079 							TAILQ_INSERT_BEFORE(m2,
1080 							    m, m_pqe);
1081 						}
1082 					}
1083 					return (0);
1084 				}
1085 			}
1086 			TAILQ_INSERT_TAIL(q, m, m_qe);
1087 			if (!is_pshared_mutex(m))
1088 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1089 		}
1090 	}
1091 	return (0);
1092 }
1093 
1094 int
1095 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1096 {
1097 	struct pthread_mutex *m;
1098 	int ret;
1099 
1100 	ret = check_and_init_mutex(mutex, &m);
1101 	if (ret == 0)
1102 		*count = m->m_spinloops;
1103 	return (ret);
1104 }
1105 
1106 int
1107 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1108 {
1109 	struct pthread_mutex *m;
1110 	int ret;
1111 
1112 	ret = check_and_init_mutex(mutex, &m);
1113 	if (ret == 0)
1114 		m->m_spinloops = count;
1115 	return (ret);
1116 }
1117 
1118 int
1119 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1120 {
1121 	struct pthread_mutex *m;
1122 	int ret;
1123 
1124 	ret = check_and_init_mutex(mutex, &m);
1125 	if (ret == 0)
1126 		*count = m->m_yieldloops;
1127 	return (ret);
1128 }
1129 
1130 int
1131 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1132 {
1133 	struct pthread_mutex *m;
1134 	int ret;
1135 
1136 	ret = check_and_init_mutex(mutex, &m);
1137 	if (ret == 0)
1138 		m->m_yieldloops = count;
1139 	return (0);
1140 }
1141 
1142 int
1143 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1144 {
1145 	struct pthread_mutex *m;
1146 
1147 	if (*mutex == THR_PSHARED_PTR) {
1148 		m = __thr_pshared_offpage(mutex, 0);
1149 		if (m == NULL)
1150 			return (0);
1151 		shared_mutex_init(m, NULL);
1152 	} else {
1153 		m = *mutex;
1154 		if (m <= THR_MUTEX_DESTROYED)
1155 			return (0);
1156 	}
1157 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1158 }
1159 
1160 int
1161 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1162 {
1163 
1164 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1165 		if (mp == THR_MUTEX_DESTROYED)
1166 			return (EINVAL);
1167 		return (EPERM);
1168 	}
1169 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1170 		return (EPERM);
1171 	return (0);
1172 }
1173 
1174 int
1175 _Tthr_mutex_consistent(pthread_mutex_t *mutex)
1176 {
1177 	struct pthread_mutex *m;
1178 	struct pthread *curthread;
1179 
1180 	if (*mutex == THR_PSHARED_PTR) {
1181 		m = __thr_pshared_offpage(mutex, 0);
1182 		if (m == NULL)
1183 			return (EINVAL);
1184 		shared_mutex_init(m, NULL);
1185 	} else {
1186 		m = *mutex;
1187 		if (m <= THR_MUTEX_DESTROYED)
1188 			return (EINVAL);
1189 	}
1190 	curthread = _get_curthread();
1191 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1192 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1193 		return (EINVAL);
1194 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1195 		return (EPERM);
1196 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1197 	return (0);
1198 }
1199