xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision ee5cf11617a9b7f034d95c639bd4d27d1f09e848)
1 /*
2  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
3  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
5  *
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Konstantin Belousov
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by John Birrell.
22  * 4. Neither the name of the author nor the names of any co-contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51 
52 #include "thr_private.h"
53 
54 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
55     "pthread_mutex is too large for off-page");
56 
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS	2000
62 
63 /*
64  * Prototypes
65  */
66 int	__pthread_mutex_consistent(pthread_mutex_t *mutex);
67 int	__pthread_mutex_init(pthread_mutex_t *mutex,
68 		const pthread_mutexattr_t *mutex_attr);
69 int	__pthread_mutex_trylock(pthread_mutex_t *mutex);
70 int	__pthread_mutex_lock(pthread_mutex_t *mutex);
71 int	__pthread_mutex_timedlock(pthread_mutex_t *mutex,
72 		const struct timespec *abstime);
73 int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
74     		void *(calloc_cb)(size_t, size_t));
75 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
76 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
77 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
78 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
80 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
81 
82 static int	mutex_self_trylock(pthread_mutex_t);
83 static int	mutex_self_lock(pthread_mutex_t,
84 				const struct timespec *abstime);
85 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
86 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
87 				const struct timespec *);
88 static void	mutex_init_robust(struct pthread *curthread);
89 static int	mutex_qidx(struct pthread_mutex *m);
90 static bool	is_robust_mutex(struct pthread_mutex *m);
91 static bool	is_pshared_mutex(struct pthread_mutex *m);
92 
93 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
94 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
95 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
96 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
97 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
98 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
99 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
100 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
101 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
102 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
103 
104 /* Single underscore versions provided for libc internal usage: */
105 /* No difference between libc and application usage of these: */
106 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
107 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
108 
109 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
110 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
111 
112 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
113 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
114 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
115 
116 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
117 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
118 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
119 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
120 
121 static void
122 mutex_init_link(struct pthread_mutex *m)
123 {
124 
125 #if defined(_PTHREADS_INVARIANTS)
126 	m->m_qe.tqe_prev = NULL;
127 	m->m_qe.tqe_next = NULL;
128 	m->m_pqe.tqe_prev = NULL;
129 	m->m_pqe.tqe_next = NULL;
130 #endif
131 }
132 
133 static void
134 mutex_assert_is_owned(struct pthread_mutex *m __unused)
135 {
136 
137 #if defined(_PTHREADS_INVARIANTS)
138 	if (__predict_false(m->m_qe.tqe_prev == NULL)) {
139 		char msg[128];
140 		snprintf(msg, sizeof(msg),
141 		    "mutex %p own %#x is not on list %p %p",
142 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
143 		PANIC(msg);
144 	}
145 #endif
146 }
147 
148 static void
149 mutex_assert_not_owned(struct pthread *curthread __unused,
150     struct pthread_mutex *m __unused)
151 {
152 
153 #if defined(_PTHREADS_INVARIANTS)
154 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
155 	    m->m_qe.tqe_next != NULL)) {
156 		char msg[128];
157 		snprintf(msg, sizeof(msg),
158 		    "mutex %p own %#x is on list %p %p",
159 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
160 		PANIC(msg);
161 	}
162 	if (__predict_false(is_robust_mutex(m) &&
163 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
164 	    (is_pshared_mutex(m) && curthread->robust_list ==
165 	    (uintptr_t)&m->m_lock) ||
166 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
167 	    (uintptr_t)&m->m_lock)))) {
168 		char msg[128];
169 		snprintf(msg, sizeof(msg),
170     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
171 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
172 		    m->m_rb_prev, (void *)curthread->robust_list,
173 		    (void *)curthread->priv_robust_list);
174 		PANIC(msg);
175 	}
176 #endif
177 }
178 
179 static bool
180 is_pshared_mutex(struct pthread_mutex *m)
181 {
182 
183 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
184 }
185 
186 static bool
187 is_robust_mutex(struct pthread_mutex *m)
188 {
189 
190 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
191 }
192 
193 int
194 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
195 {
196 
197 #if defined(_PTHREADS_INVARIANTS)
198 	if (__predict_false(curthread->inact_mtx != 0))
199 		PANIC("inact_mtx enter");
200 #endif
201 	if (!is_robust_mutex(m))
202 		return (0);
203 
204 	mutex_init_robust(curthread);
205 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
206 	return (1);
207 }
208 
209 void
210 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
211 {
212 
213 #if defined(_PTHREADS_INVARIANTS)
214 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
215 		PANIC("inact_mtx leave");
216 #endif
217 	curthread->inact_mtx = 0;
218 }
219 
220 static int
221 mutex_check_attr(const struct pthread_mutex_attr *attr)
222 {
223 
224 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
225 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
226 		return (EINVAL);
227 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
228 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
229 		return (EINVAL);
230 	return (0);
231 }
232 
233 static void
234 mutex_init_robust(struct pthread *curthread)
235 {
236 	struct umtx_robust_lists_params rb;
237 
238 	if (curthread == NULL)
239 		curthread = _get_curthread();
240 	if (curthread->robust_inited)
241 		return;
242 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
243 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
244 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
245 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
246 	curthread->robust_inited = 1;
247 }
248 
249 static void
250 mutex_init_body(struct pthread_mutex *pmutex,
251     const struct pthread_mutex_attr *attr)
252 {
253 
254 	pmutex->m_flags = attr->m_type;
255 	pmutex->m_count = 0;
256 	pmutex->m_spinloops = 0;
257 	pmutex->m_yieldloops = 0;
258 	mutex_init_link(pmutex);
259 	switch (attr->m_protocol) {
260 	case PTHREAD_PRIO_NONE:
261 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
262 		pmutex->m_lock.m_flags = 0;
263 		break;
264 	case PTHREAD_PRIO_INHERIT:
265 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
266 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
267 		break;
268 	case PTHREAD_PRIO_PROTECT:
269 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
270 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
271 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
272 		break;
273 	}
274 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
275 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
276 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
277 		mutex_init_robust(NULL);
278 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
279 	}
280 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
281 		pmutex->m_spinloops =
282 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
283 		pmutex->m_yieldloops = _thr_yieldloops;
284 	}
285 }
286 
287 static int
288 mutex_init(pthread_mutex_t *mutex,
289     const struct pthread_mutex_attr *mutex_attr,
290     void *(calloc_cb)(size_t, size_t))
291 {
292 	const struct pthread_mutex_attr *attr;
293 	struct pthread_mutex *pmutex;
294 	int error;
295 
296 	if (mutex_attr == NULL) {
297 		attr = &_pthread_mutexattr_default;
298 	} else {
299 		attr = mutex_attr;
300 		error = mutex_check_attr(attr);
301 		if (error != 0)
302 			return (error);
303 	}
304 	if ((pmutex = (pthread_mutex_t)
305 		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
306 		return (ENOMEM);
307 	mutex_init_body(pmutex, attr);
308 	*mutex = pmutex;
309 	return (0);
310 }
311 
312 static int
313 init_static(struct pthread *thread, pthread_mutex_t *mutex)
314 {
315 	int ret;
316 
317 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
318 
319 	if (*mutex == THR_MUTEX_INITIALIZER)
320 		ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
321 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
322 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
323 		    calloc);
324 	else
325 		ret = 0;
326 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
327 
328 	return (ret);
329 }
330 
331 static void
332 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
333 {
334 	struct pthread_mutex *m2;
335 
336 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
337 	if (m2 != NULL)
338 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
339 	else
340 		m->m_lock.m_ceilings[1] = -1;
341 }
342 
343 static void
344 shared_mutex_init(struct pthread_mutex *pmtx, const struct
345     pthread_mutex_attr *mutex_attr)
346 {
347 	static const struct pthread_mutex_attr foobar_mutex_attr = {
348 		.m_type = PTHREAD_MUTEX_DEFAULT,
349 		.m_protocol = PTHREAD_PRIO_NONE,
350 		.m_ceiling = 0,
351 		.m_pshared = PTHREAD_PROCESS_SHARED,
352 		.m_robust = PTHREAD_MUTEX_STALLED,
353 	};
354 	bool done;
355 
356 	/*
357 	 * Hack to allow multiple pthread_mutex_init() calls on the
358 	 * same process-shared mutex.  We rely on kernel allocating
359 	 * zeroed offpage for the mutex, i.e. the
360 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
361 	 */
362 	for (done = false; !done;) {
363 		switch (pmtx->m_ps) {
364 		case PMUTEX_INITSTAGE_DONE:
365 			atomic_thread_fence_acq();
366 			done = true;
367 			break;
368 		case PMUTEX_INITSTAGE_ALLOC:
369 			if (atomic_cmpset_int(&pmtx->m_ps,
370 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
371 				if (mutex_attr == NULL)
372 					mutex_attr = &foobar_mutex_attr;
373 				mutex_init_body(pmtx, mutex_attr);
374 				atomic_store_rel_int(&pmtx->m_ps,
375 				    PMUTEX_INITSTAGE_DONE);
376 				done = true;
377 			}
378 			break;
379 		case PMUTEX_INITSTAGE_BUSY:
380 			_pthread_yield();
381 			break;
382 		default:
383 			PANIC("corrupted offpage");
384 			break;
385 		}
386 	}
387 }
388 
389 int
390 __pthread_mutex_init(pthread_mutex_t *mutex,
391     const pthread_mutexattr_t *mutex_attr)
392 {
393 	struct pthread_mutex *pmtx;
394 	int ret;
395 
396 	if (mutex_attr != NULL) {
397 		ret = mutex_check_attr(*mutex_attr);
398 		if (ret != 0)
399 			return (ret);
400 	}
401 	if (mutex_attr == NULL ||
402 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
403 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
404 		    calloc));
405 	}
406 	pmtx = __thr_pshared_offpage(mutex, 1);
407 	if (pmtx == NULL)
408 		return (EFAULT);
409 	*mutex = THR_PSHARED_PTR;
410 	shared_mutex_init(pmtx, *mutex_attr);
411 	return (0);
412 }
413 
414 /* This function is used internally by malloc. */
415 int
416 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
417     void *(calloc_cb)(size_t, size_t))
418 {
419 	static const struct pthread_mutex_attr attr = {
420 		.m_type = PTHREAD_MUTEX_NORMAL,
421 		.m_protocol = PTHREAD_PRIO_NONE,
422 		.m_ceiling = 0,
423 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
424 		.m_robust = PTHREAD_MUTEX_STALLED,
425 	};
426 	int ret;
427 
428 	ret = mutex_init(mutex, &attr, calloc_cb);
429 	if (ret == 0)
430 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
431 	return (ret);
432 }
433 
434 /*
435  * Fix mutex ownership for child process.
436  *
437  * Process private mutex ownership is transmitted from the forking
438  * thread to the child process.
439  *
440  * Process shared mutex should not be inherited because owner is
441  * forking thread which is in parent process, they are removed from
442  * the owned mutex list.
443  */
444 static void
445 queue_fork(struct pthread *curthread, struct mutex_queue *q,
446     struct mutex_queue *qp, uint bit)
447 {
448 	struct pthread_mutex *m;
449 
450 	TAILQ_INIT(q);
451 	TAILQ_FOREACH(m, qp, m_pqe) {
452 		TAILQ_INSERT_TAIL(q, m, m_qe);
453 		m->m_lock.m_owner = TID(curthread) | bit;
454 	}
455 }
456 
457 void
458 _mutex_fork(struct pthread *curthread)
459 {
460 
461 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
462 	    &curthread->mq[TMQ_NORM_PRIV], 0);
463 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
464 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
465 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
466 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
467 	curthread->robust_list = 0;
468 }
469 
470 int
471 _pthread_mutex_destroy(pthread_mutex_t *mutex)
472 {
473 	pthread_mutex_t m, m1;
474 	int ret;
475 
476 	m = *mutex;
477 	if (m < THR_MUTEX_DESTROYED) {
478 		ret = 0;
479 	} else if (m == THR_MUTEX_DESTROYED) {
480 		ret = EINVAL;
481 	} else {
482 		if (m == THR_PSHARED_PTR) {
483 			m1 = __thr_pshared_offpage(mutex, 0);
484 			if (m1 != NULL) {
485 				mutex_assert_not_owned(_get_curthread(), m1);
486 				__thr_pshared_destroy(mutex);
487 			}
488 			*mutex = THR_MUTEX_DESTROYED;
489 			return (0);
490 		}
491 		if (PMUTEX_OWNER_ID(m) != 0 &&
492 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
493 			ret = EBUSY;
494 		} else {
495 			*mutex = THR_MUTEX_DESTROYED;
496 			mutex_assert_not_owned(_get_curthread(), m);
497 			free(m);
498 			ret = 0;
499 		}
500 	}
501 
502 	return (ret);
503 }
504 
505 static int
506 mutex_qidx(struct pthread_mutex *m)
507 {
508 
509 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
510 		return (TMQ_NORM);
511 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
512 }
513 
514 /*
515  * Both enqueue_mutex() and dequeue_mutex() operate on the
516  * thread-private linkage of the locked mutexes and on the robust
517  * linkage.
518  *
519  * Robust list, as seen by kernel, must be consistent even in the case
520  * of thread termination at arbitrary moment.  Since either enqueue or
521  * dequeue for list walked by kernel consists of rewriting a single
522  * forward pointer, it is safe.  On the other hand, rewrite of the
523  * back pointer is not atomic WRT the forward one, but kernel does not
524  * care.
525  */
526 static void
527 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
528     int error)
529 {
530 	struct pthread_mutex *m1;
531 	uintptr_t *rl;
532 	int qidx;
533 
534 	/* Add to the list of owned mutexes: */
535 	if (error != EOWNERDEAD)
536 		mutex_assert_not_owned(curthread, m);
537 	qidx = mutex_qidx(m);
538 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
539 	if (!is_pshared_mutex(m))
540 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
541 	if (is_robust_mutex(m)) {
542 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
543 		    &curthread->priv_robust_list;
544 		m->m_rb_prev = NULL;
545 		if (*rl != 0) {
546 			m1 = __containerof((void *)*rl,
547 			    struct pthread_mutex, m_lock);
548 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
549 			m1->m_rb_prev = m;
550 		} else {
551 			m1 = NULL;
552 			m->m_lock.m_rb_lnk = 0;
553 		}
554 		*rl = (uintptr_t)&m->m_lock;
555 	}
556 }
557 
558 static void
559 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
560 {
561 	struct pthread_mutex *mp, *mn;
562 	int qidx;
563 
564 	mutex_assert_is_owned(m);
565 	qidx = mutex_qidx(m);
566 	if (is_robust_mutex(m)) {
567 		mp = m->m_rb_prev;
568 		if (mp == NULL) {
569 			if (is_pshared_mutex(m)) {
570 				curthread->robust_list = m->m_lock.m_rb_lnk;
571 			} else {
572 				curthread->priv_robust_list =
573 				    m->m_lock.m_rb_lnk;
574 			}
575 		} else {
576 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
577 		}
578 		if (m->m_lock.m_rb_lnk != 0) {
579 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
580 			    struct pthread_mutex, m_lock);
581 			mn->m_rb_prev = m->m_rb_prev;
582 		}
583 		m->m_lock.m_rb_lnk = 0;
584 		m->m_rb_prev = NULL;
585 	}
586 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
587 	if (!is_pshared_mutex(m))
588 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
589 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
590 		set_inherited_priority(curthread, m);
591 	mutex_init_link(m);
592 }
593 
594 static int
595 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
596 {
597 	int ret;
598 
599 	*m = *mutex;
600 	ret = 0;
601 	if (*m == THR_PSHARED_PTR) {
602 		*m = __thr_pshared_offpage(mutex, 0);
603 		if (*m == NULL)
604 			ret = EINVAL;
605 		else
606 			shared_mutex_init(*m, NULL);
607 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
608 		if (*m == THR_MUTEX_DESTROYED) {
609 			ret = EINVAL;
610 		} else {
611 			ret = init_static(_get_curthread(), mutex);
612 			if (ret == 0)
613 				*m = *mutex;
614 		}
615 	}
616 	return (ret);
617 }
618 
619 int
620 __pthread_mutex_trylock(pthread_mutex_t *mutex)
621 {
622 	struct pthread *curthread;
623 	struct pthread_mutex *m;
624 	uint32_t id;
625 	int ret, robust;
626 
627 	ret = check_and_init_mutex(mutex, &m);
628 	if (ret != 0)
629 		return (ret);
630 	curthread = _get_curthread();
631 	id = TID(curthread);
632 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
633 		THR_CRITICAL_ENTER(curthread);
634 	robust = _mutex_enter_robust(curthread, m);
635 	ret = _thr_umutex_trylock(&m->m_lock, id);
636 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
637 		enqueue_mutex(curthread, m, ret);
638 		if (ret == EOWNERDEAD)
639 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
640 	} else if (PMUTEX_OWNER_ID(m) == id) {
641 		ret = mutex_self_trylock(m);
642 	} /* else {} */
643 	if (robust)
644 		_mutex_leave_robust(curthread, m);
645 	if ((ret == 0 || ret == EOWNERDEAD) &&
646 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
647 		THR_CRITICAL_LEAVE(curthread);
648 	return (ret);
649 }
650 
651 static int
652 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
653     const struct timespec *abstime)
654 {
655 	uint32_t id, owner;
656 	int count, ret;
657 
658 	id = TID(curthread);
659 	if (PMUTEX_OWNER_ID(m) == id)
660 		return (mutex_self_lock(m, abstime));
661 
662 	/*
663 	 * For adaptive mutexes, spin for a bit in the expectation
664 	 * that if the application requests this mutex type then
665 	 * the lock is likely to be released quickly and it is
666 	 * faster than entering the kernel
667 	 */
668 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
669 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
670 		goto sleep_in_kernel;
671 
672 	if (!_thr_is_smp)
673 		goto yield_loop;
674 
675 	count = m->m_spinloops;
676 	while (count--) {
677 		owner = m->m_lock.m_owner;
678 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
679 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
680 			    id | owner)) {
681 				ret = 0;
682 				goto done;
683 			}
684 		}
685 		CPU_SPINWAIT;
686 	}
687 
688 yield_loop:
689 	count = m->m_yieldloops;
690 	while (count--) {
691 		_sched_yield();
692 		owner = m->m_lock.m_owner;
693 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
694 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
695 			    id | owner)) {
696 				ret = 0;
697 				goto done;
698 			}
699 		}
700 	}
701 
702 sleep_in_kernel:
703 	if (abstime == NULL)
704 		ret = __thr_umutex_lock(&m->m_lock, id);
705 	else if (__predict_false(abstime->tv_nsec < 0 ||
706 	    abstime->tv_nsec >= 1000000000))
707 		ret = EINVAL;
708 	else
709 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
710 done:
711 	if (ret == 0 || ret == EOWNERDEAD) {
712 		enqueue_mutex(curthread, m, ret);
713 		if (ret == EOWNERDEAD)
714 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
715 	}
716 	return (ret);
717 }
718 
719 static inline int
720 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
721     bool cvattach, bool rb_onlist)
722 {
723 	struct pthread *curthread;
724 	int ret, robust;
725 
726 	curthread  = _get_curthread();
727 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
728 		THR_CRITICAL_ENTER(curthread);
729 	if (!rb_onlist)
730 		robust = _mutex_enter_robust(curthread, m);
731 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
732 	if (ret == 0 || ret == EOWNERDEAD) {
733 		enqueue_mutex(curthread, m, ret);
734 		if (ret == EOWNERDEAD)
735 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
736 	} else {
737 		ret = mutex_lock_sleep(curthread, m, abstime);
738 	}
739 	if (!rb_onlist && robust)
740 		_mutex_leave_robust(curthread, m);
741 	if (ret != 0 && ret != EOWNERDEAD &&
742 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
743 		THR_CRITICAL_LEAVE(curthread);
744 	return (ret);
745 }
746 
747 int
748 __pthread_mutex_lock(pthread_mutex_t *mutex)
749 {
750 	struct pthread_mutex *m;
751 	int ret;
752 
753 	_thr_check_init();
754 	ret = check_and_init_mutex(mutex, &m);
755 	if (ret == 0)
756 		ret = mutex_lock_common(m, NULL, false, false);
757 	return (ret);
758 }
759 
760 int
761 __pthread_mutex_timedlock(pthread_mutex_t *mutex,
762     const struct timespec *abstime)
763 {
764 	struct pthread_mutex *m;
765 	int ret;
766 
767 	_thr_check_init();
768 	ret = check_and_init_mutex(mutex, &m);
769 	if (ret == 0)
770 		ret = mutex_lock_common(m, abstime, false, false);
771 	return (ret);
772 }
773 
774 int
775 _pthread_mutex_unlock(pthread_mutex_t *mutex)
776 {
777 	struct pthread_mutex *mp;
778 
779 	if (*mutex == THR_PSHARED_PTR) {
780 		mp = __thr_pshared_offpage(mutex, 0);
781 		if (mp == NULL)
782 			return (EINVAL);
783 		shared_mutex_init(mp, NULL);
784 	} else {
785 		mp = *mutex;
786 	}
787 	return (mutex_unlock_common(mp, false, NULL));
788 }
789 
790 int
791 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
792 {
793 	int error;
794 
795 	error = mutex_lock_common(m, NULL, true, rb_onlist);
796 	if (error == 0 || error == EOWNERDEAD)
797 		m->m_count = count;
798 	return (error);
799 }
800 
801 int
802 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
803 {
804 
805 	/*
806 	 * Clear the count in case this is a recursive mutex.
807 	 */
808 	*count = m->m_count;
809 	m->m_count = 0;
810 	(void)mutex_unlock_common(m, true, defer);
811         return (0);
812 }
813 
814 int
815 _mutex_cv_attach(struct pthread_mutex *m, int count)
816 {
817 	struct pthread *curthread;
818 
819 	curthread = _get_curthread();
820 	enqueue_mutex(curthread, m, 0);
821 	m->m_count = count;
822 	return (0);
823 }
824 
825 int
826 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
827 {
828 	struct pthread *curthread;
829 	int deferred, error;
830 
831 	curthread = _get_curthread();
832 	if ((error = _mutex_owned(curthread, mp)) != 0)
833 		return (error);
834 
835 	/*
836 	 * Clear the count in case this is a recursive mutex.
837 	 */
838 	*recurse = mp->m_count;
839 	mp->m_count = 0;
840 	dequeue_mutex(curthread, mp);
841 
842 	/* Will this happen in real-world ? */
843         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
844 		deferred = 1;
845 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
846 	} else
847 		deferred = 0;
848 
849 	if (deferred)  {
850 		_thr_wake_all(curthread->defer_waiters,
851 		    curthread->nwaiter_defer);
852 		curthread->nwaiter_defer = 0;
853 	}
854 	return (0);
855 }
856 
857 static int
858 mutex_self_trylock(struct pthread_mutex *m)
859 {
860 	int ret;
861 
862 	switch (PMUTEX_TYPE(m->m_flags)) {
863 	case PTHREAD_MUTEX_ERRORCHECK:
864 	case PTHREAD_MUTEX_NORMAL:
865 	case PTHREAD_MUTEX_ADAPTIVE_NP:
866 		ret = EBUSY;
867 		break;
868 
869 	case PTHREAD_MUTEX_RECURSIVE:
870 		/* Increment the lock count: */
871 		if (m->m_count + 1 > 0) {
872 			m->m_count++;
873 			ret = 0;
874 		} else
875 			ret = EAGAIN;
876 		break;
877 
878 	default:
879 		/* Trap invalid mutex types; */
880 		ret = EINVAL;
881 	}
882 
883 	return (ret);
884 }
885 
886 static int
887 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
888 {
889 	struct timespec	ts1, ts2;
890 	int ret;
891 
892 	switch (PMUTEX_TYPE(m->m_flags)) {
893 	case PTHREAD_MUTEX_ERRORCHECK:
894 	case PTHREAD_MUTEX_ADAPTIVE_NP:
895 		if (abstime) {
896 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
897 			    abstime->tv_nsec >= 1000000000) {
898 				ret = EINVAL;
899 			} else {
900 				clock_gettime(CLOCK_REALTIME, &ts1);
901 				TIMESPEC_SUB(&ts2, abstime, &ts1);
902 				__sys_nanosleep(&ts2, NULL);
903 				ret = ETIMEDOUT;
904 			}
905 		} else {
906 			/*
907 			 * POSIX specifies that mutexes should return
908 			 * EDEADLK if a recursive lock is detected.
909 			 */
910 			ret = EDEADLK;
911 		}
912 		break;
913 
914 	case PTHREAD_MUTEX_NORMAL:
915 		/*
916 		 * What SS2 define as a 'normal' mutex.  Intentionally
917 		 * deadlock on attempts to get a lock you already own.
918 		 */
919 		ret = 0;
920 		if (abstime) {
921 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
922 			    abstime->tv_nsec >= 1000000000) {
923 				ret = EINVAL;
924 			} else {
925 				clock_gettime(CLOCK_REALTIME, &ts1);
926 				TIMESPEC_SUB(&ts2, abstime, &ts1);
927 				__sys_nanosleep(&ts2, NULL);
928 				ret = ETIMEDOUT;
929 			}
930 		} else {
931 			ts1.tv_sec = 30;
932 			ts1.tv_nsec = 0;
933 			for (;;)
934 				__sys_nanosleep(&ts1, NULL);
935 		}
936 		break;
937 
938 	case PTHREAD_MUTEX_RECURSIVE:
939 		/* Increment the lock count: */
940 		if (m->m_count + 1 > 0) {
941 			m->m_count++;
942 			ret = 0;
943 		} else
944 			ret = EAGAIN;
945 		break;
946 
947 	default:
948 		/* Trap invalid mutex types; */
949 		ret = EINVAL;
950 	}
951 
952 	return (ret);
953 }
954 
955 static int
956 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
957 {
958 	struct pthread *curthread;
959 	uint32_t id;
960 	int deferred, error, robust;
961 
962 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
963 		if (m == THR_MUTEX_DESTROYED)
964 			return (EINVAL);
965 		return (EPERM);
966 	}
967 
968 	curthread = _get_curthread();
969 	id = TID(curthread);
970 
971 	/*
972 	 * Check if the running thread is not the owner of the mutex.
973 	 */
974 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
975 		return (EPERM);
976 
977 	error = 0;
978 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
979 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
980 		m->m_count--;
981 	} else {
982 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
983 			deferred = 1;
984 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
985         	} else
986 			deferred = 0;
987 
988 		robust = _mutex_enter_robust(curthread, m);
989 		dequeue_mutex(curthread, m);
990 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
991 		if (deferred)  {
992 			if (mtx_defer == NULL) {
993 				_thr_wake_all(curthread->defer_waiters,
994 				    curthread->nwaiter_defer);
995 				curthread->nwaiter_defer = 0;
996 			} else
997 				*mtx_defer = 1;
998 		}
999 		if (robust)
1000 			_mutex_leave_robust(curthread, m);
1001 	}
1002 	if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
1003 		THR_CRITICAL_LEAVE(curthread);
1004 	return (error);
1005 }
1006 
1007 int
1008 _pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
1009     int *prioceiling)
1010 {
1011 	struct pthread_mutex *m;
1012 
1013 	if (*mutex == THR_PSHARED_PTR) {
1014 		m = __thr_pshared_offpage(mutex, 0);
1015 		if (m == NULL)
1016 			return (EINVAL);
1017 		shared_mutex_init(m, NULL);
1018 	} else {
1019 		m = *mutex;
1020 		if (m <= THR_MUTEX_DESTROYED)
1021 			return (EINVAL);
1022 	}
1023 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1024 		return (EINVAL);
1025 	*prioceiling = m->m_lock.m_ceilings[0];
1026 	return (0);
1027 }
1028 
1029 int
1030 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
1031     int ceiling, int *old_ceiling)
1032 {
1033 	struct pthread *curthread;
1034 	struct pthread_mutex *m, *m1, *m2;
1035 	struct mutex_queue *q, *qp;
1036 	int qidx, ret;
1037 
1038 	if (*mutex == THR_PSHARED_PTR) {
1039 		m = __thr_pshared_offpage(mutex, 0);
1040 		if (m == NULL)
1041 			return (EINVAL);
1042 		shared_mutex_init(m, NULL);
1043 	} else {
1044 		m = *mutex;
1045 		if (m <= THR_MUTEX_DESTROYED)
1046 			return (EINVAL);
1047 	}
1048 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1049 		return (EINVAL);
1050 
1051 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1052 	if (ret != 0)
1053 		return (ret);
1054 
1055 	curthread = _get_curthread();
1056 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1057 		mutex_assert_is_owned(m);
1058 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1059 		m2 = TAILQ_NEXT(m, m_qe);
1060 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1061 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1062 			qidx = mutex_qidx(m);
1063 			q = &curthread->mq[qidx];
1064 			qp = &curthread->mq[qidx + 1];
1065 			TAILQ_REMOVE(q, m, m_qe);
1066 			if (!is_pshared_mutex(m))
1067 				TAILQ_REMOVE(qp, m, m_pqe);
1068 			TAILQ_FOREACH(m2, q, m_qe) {
1069 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1070 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1071 					if (!is_pshared_mutex(m)) {
1072 						while (m2 != NULL &&
1073 						    is_pshared_mutex(m2)) {
1074 							m2 = TAILQ_PREV(m2,
1075 							    mutex_queue, m_qe);
1076 						}
1077 						if (m2 == NULL) {
1078 							TAILQ_INSERT_HEAD(qp,
1079 							    m, m_pqe);
1080 						} else {
1081 							TAILQ_INSERT_BEFORE(m2,
1082 							    m, m_pqe);
1083 						}
1084 					}
1085 					return (0);
1086 				}
1087 			}
1088 			TAILQ_INSERT_TAIL(q, m, m_qe);
1089 			if (!is_pshared_mutex(m))
1090 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1091 		}
1092 	}
1093 	return (0);
1094 }
1095 
1096 int
1097 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1098 {
1099 	struct pthread_mutex *m;
1100 	int ret;
1101 
1102 	ret = check_and_init_mutex(mutex, &m);
1103 	if (ret == 0)
1104 		*count = m->m_spinloops;
1105 	return (ret);
1106 }
1107 
1108 int
1109 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1110 {
1111 	struct pthread_mutex *m;
1112 	int ret;
1113 
1114 	ret = check_and_init_mutex(mutex, &m);
1115 	if (ret == 0)
1116 		m->m_spinloops = count;
1117 	return (ret);
1118 }
1119 
1120 int
1121 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1122 {
1123 	struct pthread_mutex *m;
1124 	int ret;
1125 
1126 	ret = check_and_init_mutex(mutex, &m);
1127 	if (ret == 0)
1128 		*count = m->m_yieldloops;
1129 	return (ret);
1130 }
1131 
1132 int
1133 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1134 {
1135 	struct pthread_mutex *m;
1136 	int ret;
1137 
1138 	ret = check_and_init_mutex(mutex, &m);
1139 	if (ret == 0)
1140 		m->m_yieldloops = count;
1141 	return (0);
1142 }
1143 
1144 int
1145 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1146 {
1147 	struct pthread_mutex *m;
1148 
1149 	if (*mutex == THR_PSHARED_PTR) {
1150 		m = __thr_pshared_offpage(mutex, 0);
1151 		if (m == NULL)
1152 			return (0);
1153 		shared_mutex_init(m, NULL);
1154 	} else {
1155 		m = *mutex;
1156 		if (m <= THR_MUTEX_DESTROYED)
1157 			return (0);
1158 	}
1159 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1160 }
1161 
1162 int
1163 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1164 {
1165 
1166 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1167 		if (mp == THR_MUTEX_DESTROYED)
1168 			return (EINVAL);
1169 		return (EPERM);
1170 	}
1171 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1172 		return (EPERM);
1173 	return (0);
1174 }
1175 
1176 int
1177 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1178 {
1179 	struct pthread_mutex *m;
1180 	struct pthread *curthread;
1181 
1182 	if (*mutex == THR_PSHARED_PTR) {
1183 		m = __thr_pshared_offpage(mutex, 0);
1184 		if (m == NULL)
1185 			return (EINVAL);
1186 		shared_mutex_init(m, NULL);
1187 	} else {
1188 		m = *mutex;
1189 		if (m <= THR_MUTEX_DESTROYED)
1190 			return (EINVAL);
1191 	}
1192 	curthread = _get_curthread();
1193 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1194 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1195 		return (EINVAL);
1196 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1197 		return (EPERM);
1198 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1199 	return (0);
1200 }
1201