xref: /freebsd/lib/libthr/thread/thr_mutex.c (revision be996c05224c3d82f26f94315c760776c3f2896c)
1 /*
2  * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>.
3  * Copyright (c) 2006 David Xu <davidxu@freebsd.org>.
4  * Copyright (c) 2015, 2016 The FreeBSD Foundation
5  *
6  * All rights reserved.
7  *
8  * Portions of this software were developed by Konstantin Belousov
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by John Birrell.
22  * 4. Neither the name of the author nor the names of any co-contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "namespace.h"
43 #include <stdlib.h>
44 #include <errno.h>
45 #include <string.h>
46 #include <sys/param.h>
47 #include <sys/queue.h>
48 #include <pthread.h>
49 #include <pthread_np.h>
50 #include "un-namespace.h"
51 
52 #include "thr_private.h"
53 
54 _Static_assert(sizeof(struct pthread_mutex) <= PAGE_SIZE,
55     "pthread_mutex is too large for off-page");
56 
57 /*
58  * For adaptive mutexes, how many times to spin doing trylock2
59  * before entering the kernel to block
60  */
61 #define MUTEX_ADAPTIVE_SPINS	2000
62 
63 /*
64  * Prototypes
65  */
66 int	__pthread_mutex_consistent(pthread_mutex_t *mutex);
67 int	__pthread_mutex_init(pthread_mutex_t *mutex,
68 		const pthread_mutexattr_t *mutex_attr);
69 int	__pthread_mutex_trylock(pthread_mutex_t *mutex);
70 int	__pthread_mutex_lock(pthread_mutex_t *mutex);
71 int	__pthread_mutex_timedlock(pthread_mutex_t *mutex,
72 		const struct timespec *abstime);
73 int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
74     		void *(calloc_cb)(size_t, size_t));
75 int	_pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count);
76 int	_pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
77 int	__pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count);
78 int	_pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
79 int	_pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count);
80 int	__pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count);
81 
82 static int	mutex_self_trylock(pthread_mutex_t);
83 static int	mutex_self_lock(pthread_mutex_t,
84 				const struct timespec *abstime);
85 static int	mutex_unlock_common(struct pthread_mutex *, bool, int *);
86 static int	mutex_lock_sleep(struct pthread *, pthread_mutex_t,
87 				const struct timespec *);
88 static void	mutex_init_robust(struct pthread *curthread);
89 static int	mutex_qidx(struct pthread_mutex *m);
90 static bool	is_robust_mutex(struct pthread_mutex *m);
91 static bool	is_pshared_mutex(struct pthread_mutex *m);
92 
93 __weak_reference(__pthread_mutex_init, pthread_mutex_init);
94 __strong_reference(__pthread_mutex_init, _pthread_mutex_init);
95 __weak_reference(__pthread_mutex_lock, pthread_mutex_lock);
96 __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock);
97 __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock);
98 __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock);
99 __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock);
100 __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock);
101 __weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent);
102 __strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent);
103 
104 /* Single underscore versions provided for libc internal usage: */
105 /* No difference between libc and application usage of these: */
106 __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy);
107 __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock);
108 
109 __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling);
110 __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling);
111 
112 __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np);
113 __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np);
114 __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np);
115 
116 __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np);
117 __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np);
118 __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np);
119 __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np);
120 
121 static void
122 mutex_init_link(struct pthread_mutex *m)
123 {
124 
125 #if defined(_PTHREADS_INVARIANTS)
126 	m->m_qe.tqe_prev = NULL;
127 	m->m_qe.tqe_next = NULL;
128 	m->m_pqe.tqe_prev = NULL;
129 	m->m_pqe.tqe_next = NULL;
130 #endif
131 }
132 
133 static void
134 mutex_assert_is_owned(struct pthread_mutex *m __unused)
135 {
136 
137 #if defined(_PTHREADS_INVARIANTS)
138 	if (__predict_false(m->m_qe.tqe_prev == NULL))
139 		PANIC("mutex %p own %#x is not on list %p %p",
140 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
141 #endif
142 }
143 
144 static void
145 mutex_assert_not_owned(struct pthread *curthread __unused,
146     struct pthread_mutex *m __unused)
147 {
148 
149 #if defined(_PTHREADS_INVARIANTS)
150 	if (__predict_false(m->m_qe.tqe_prev != NULL ||
151 	    m->m_qe.tqe_next != NULL))
152 		PANIC("mutex %p own %#x is on list %p %p",
153 		    m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next);
154 	if (__predict_false(is_robust_mutex(m) &&
155 	    (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL ||
156 	    (is_pshared_mutex(m) && curthread->robust_list ==
157 	    (uintptr_t)&m->m_lock) ||
158 	    (!is_pshared_mutex(m) && curthread->priv_robust_list ==
159 	    (uintptr_t)&m->m_lock))))
160 		PANIC(
161     "mutex %p own %#x is on robust linkage %p %p head %p phead %p",
162 		    m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk,
163 		    m->m_rb_prev, (void *)curthread->robust_list,
164 		    (void *)curthread->priv_robust_list);
165 #endif
166 }
167 
168 static bool
169 is_pshared_mutex(struct pthread_mutex *m)
170 {
171 
172 	return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0);
173 }
174 
175 static bool
176 is_robust_mutex(struct pthread_mutex *m)
177 {
178 
179 	return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0);
180 }
181 
182 int
183 _mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m)
184 {
185 
186 #if defined(_PTHREADS_INVARIANTS)
187 	if (__predict_false(curthread->inact_mtx != 0))
188 		PANIC("inact_mtx enter");
189 #endif
190 	if (!is_robust_mutex(m))
191 		return (0);
192 
193 	mutex_init_robust(curthread);
194 	curthread->inact_mtx = (uintptr_t)&m->m_lock;
195 	return (1);
196 }
197 
198 void
199 _mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused)
200 {
201 
202 #if defined(_PTHREADS_INVARIANTS)
203 	if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock))
204 		PANIC("inact_mtx leave");
205 #endif
206 	curthread->inact_mtx = 0;
207 }
208 
209 static int
210 mutex_check_attr(const struct pthread_mutex_attr *attr)
211 {
212 
213 	if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK ||
214 	    attr->m_type >= PTHREAD_MUTEX_TYPE_MAX)
215 		return (EINVAL);
216 	if (attr->m_protocol < PTHREAD_PRIO_NONE ||
217 	    attr->m_protocol > PTHREAD_PRIO_PROTECT)
218 		return (EINVAL);
219 	return (0);
220 }
221 
222 static void
223 mutex_init_robust(struct pthread *curthread)
224 {
225 	struct umtx_robust_lists_params rb;
226 
227 	if (curthread == NULL)
228 		curthread = _get_curthread();
229 	if (curthread->robust_inited)
230 		return;
231 	rb.robust_list_offset = (uintptr_t)&curthread->robust_list;
232 	rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list;
233 	rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx;
234 	_umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL);
235 	curthread->robust_inited = 1;
236 }
237 
238 static void
239 mutex_init_body(struct pthread_mutex *pmutex,
240     const struct pthread_mutex_attr *attr)
241 {
242 
243 	pmutex->m_flags = attr->m_type;
244 	pmutex->m_count = 0;
245 	pmutex->m_spinloops = 0;
246 	pmutex->m_yieldloops = 0;
247 	mutex_init_link(pmutex);
248 	switch (attr->m_protocol) {
249 	case PTHREAD_PRIO_NONE:
250 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
251 		pmutex->m_lock.m_flags = 0;
252 		break;
253 	case PTHREAD_PRIO_INHERIT:
254 		pmutex->m_lock.m_owner = UMUTEX_UNOWNED;
255 		pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT;
256 		break;
257 	case PTHREAD_PRIO_PROTECT:
258 		pmutex->m_lock.m_owner = UMUTEX_CONTESTED;
259 		pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT;
260 		pmutex->m_lock.m_ceilings[0] = attr->m_ceiling;
261 		break;
262 	}
263 	if (attr->m_pshared == PTHREAD_PROCESS_SHARED)
264 		pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED;
265 	if (attr->m_robust == PTHREAD_MUTEX_ROBUST) {
266 		mutex_init_robust(NULL);
267 		pmutex->m_lock.m_flags |= UMUTEX_ROBUST;
268 	}
269 	if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) {
270 		pmutex->m_spinloops =
271 		    _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS;
272 		pmutex->m_yieldloops = _thr_yieldloops;
273 	}
274 }
275 
276 static int
277 mutex_init(pthread_mutex_t *mutex,
278     const struct pthread_mutex_attr *mutex_attr,
279     void *(calloc_cb)(size_t, size_t))
280 {
281 	const struct pthread_mutex_attr *attr;
282 	struct pthread_mutex *pmutex;
283 	int error;
284 
285 	if (mutex_attr == NULL) {
286 		attr = &_pthread_mutexattr_default;
287 	} else {
288 		attr = mutex_attr;
289 		error = mutex_check_attr(attr);
290 		if (error != 0)
291 			return (error);
292 	}
293 	if ((pmutex = (pthread_mutex_t)
294 		calloc_cb(1, sizeof(struct pthread_mutex))) == NULL)
295 		return (ENOMEM);
296 	mutex_init_body(pmutex, attr);
297 	*mutex = pmutex;
298 	return (0);
299 }
300 
301 static int
302 init_static(struct pthread *thread, pthread_mutex_t *mutex)
303 {
304 	int ret;
305 
306 	THR_LOCK_ACQUIRE(thread, &_mutex_static_lock);
307 
308 	if (*mutex == THR_MUTEX_INITIALIZER)
309 		ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc);
310 	else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER)
311 		ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default,
312 		    calloc);
313 	else
314 		ret = 0;
315 	THR_LOCK_RELEASE(thread, &_mutex_static_lock);
316 
317 	return (ret);
318 }
319 
320 static void
321 set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m)
322 {
323 	struct pthread_mutex *m2;
324 
325 	m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue);
326 	if (m2 != NULL)
327 		m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0];
328 	else
329 		m->m_lock.m_ceilings[1] = -1;
330 }
331 
332 static void
333 shared_mutex_init(struct pthread_mutex *pmtx, const struct
334     pthread_mutex_attr *mutex_attr)
335 {
336 	static const struct pthread_mutex_attr foobar_mutex_attr = {
337 		.m_type = PTHREAD_MUTEX_DEFAULT,
338 		.m_protocol = PTHREAD_PRIO_NONE,
339 		.m_ceiling = 0,
340 		.m_pshared = PTHREAD_PROCESS_SHARED,
341 		.m_robust = PTHREAD_MUTEX_STALLED,
342 	};
343 	bool done;
344 
345 	/*
346 	 * Hack to allow multiple pthread_mutex_init() calls on the
347 	 * same process-shared mutex.  We rely on kernel allocating
348 	 * zeroed offpage for the mutex, i.e. the
349 	 * PMUTEX_INITSTAGE_ALLOC value must be zero.
350 	 */
351 	for (done = false; !done;) {
352 		switch (pmtx->m_ps) {
353 		case PMUTEX_INITSTAGE_DONE:
354 			atomic_thread_fence_acq();
355 			done = true;
356 			break;
357 		case PMUTEX_INITSTAGE_ALLOC:
358 			if (atomic_cmpset_int(&pmtx->m_ps,
359 			    PMUTEX_INITSTAGE_ALLOC, PMUTEX_INITSTAGE_BUSY)) {
360 				if (mutex_attr == NULL)
361 					mutex_attr = &foobar_mutex_attr;
362 				mutex_init_body(pmtx, mutex_attr);
363 				atomic_store_rel_int(&pmtx->m_ps,
364 				    PMUTEX_INITSTAGE_DONE);
365 				done = true;
366 			}
367 			break;
368 		case PMUTEX_INITSTAGE_BUSY:
369 			_pthread_yield();
370 			break;
371 		default:
372 			PANIC("corrupted offpage");
373 			break;
374 		}
375 	}
376 }
377 
378 int
379 __pthread_mutex_init(pthread_mutex_t *mutex,
380     const pthread_mutexattr_t *mutex_attr)
381 {
382 	struct pthread_mutex *pmtx;
383 	int ret;
384 
385 	if (mutex_attr != NULL) {
386 		ret = mutex_check_attr(*mutex_attr);
387 		if (ret != 0)
388 			return (ret);
389 	}
390 	if (mutex_attr == NULL ||
391 	    (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) {
392 		return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL,
393 		    calloc));
394 	}
395 	pmtx = __thr_pshared_offpage(mutex, 1);
396 	if (pmtx == NULL)
397 		return (EFAULT);
398 	*mutex = THR_PSHARED_PTR;
399 	shared_mutex_init(pmtx, *mutex_attr);
400 	return (0);
401 }
402 
403 /* This function is used internally by malloc. */
404 int
405 _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
406     void *(calloc_cb)(size_t, size_t))
407 {
408 	static const struct pthread_mutex_attr attr = {
409 		.m_type = PTHREAD_MUTEX_NORMAL,
410 		.m_protocol = PTHREAD_PRIO_NONE,
411 		.m_ceiling = 0,
412 		.m_pshared = PTHREAD_PROCESS_PRIVATE,
413 		.m_robust = PTHREAD_MUTEX_STALLED,
414 	};
415 	int ret;
416 
417 	ret = mutex_init(mutex, &attr, calloc_cb);
418 	if (ret == 0)
419 		(*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE;
420 	return (ret);
421 }
422 
423 /*
424  * Fix mutex ownership for child process.
425  *
426  * Process private mutex ownership is transmitted from the forking
427  * thread to the child process.
428  *
429  * Process shared mutex should not be inherited because owner is
430  * forking thread which is in parent process, they are removed from
431  * the owned mutex list.
432  */
433 static void
434 queue_fork(struct pthread *curthread, struct mutex_queue *q,
435     struct mutex_queue *qp, uint bit)
436 {
437 	struct pthread_mutex *m;
438 
439 	TAILQ_INIT(q);
440 	TAILQ_FOREACH(m, qp, m_pqe) {
441 		TAILQ_INSERT_TAIL(q, m, m_qe);
442 		m->m_lock.m_owner = TID(curthread) | bit;
443 	}
444 }
445 
446 void
447 _mutex_fork(struct pthread *curthread)
448 {
449 
450 	queue_fork(curthread, &curthread->mq[TMQ_NORM],
451 	    &curthread->mq[TMQ_NORM_PRIV], 0);
452 	queue_fork(curthread, &curthread->mq[TMQ_NORM_PP],
453 	    &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED);
454 	queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP],
455 	    &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED);
456 	curthread->robust_list = 0;
457 }
458 
459 int
460 _pthread_mutex_destroy(pthread_mutex_t *mutex)
461 {
462 	pthread_mutex_t m, m1;
463 	int ret;
464 
465 	m = *mutex;
466 	if (m < THR_MUTEX_DESTROYED) {
467 		ret = 0;
468 	} else if (m == THR_MUTEX_DESTROYED) {
469 		ret = EINVAL;
470 	} else {
471 		if (m == THR_PSHARED_PTR) {
472 			m1 = __thr_pshared_offpage(mutex, 0);
473 			if (m1 != NULL) {
474 				mutex_assert_not_owned(_get_curthread(), m1);
475 				__thr_pshared_destroy(mutex);
476 			}
477 			*mutex = THR_MUTEX_DESTROYED;
478 			return (0);
479 		}
480 		if (PMUTEX_OWNER_ID(m) != 0 &&
481 		    (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) {
482 			ret = EBUSY;
483 		} else {
484 			*mutex = THR_MUTEX_DESTROYED;
485 			mutex_assert_not_owned(_get_curthread(), m);
486 			free(m);
487 			ret = 0;
488 		}
489 	}
490 
491 	return (ret);
492 }
493 
494 static int
495 mutex_qidx(struct pthread_mutex *m)
496 {
497 
498 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
499 		return (TMQ_NORM);
500 	return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP);
501 }
502 
503 /*
504  * Both enqueue_mutex() and dequeue_mutex() operate on the
505  * thread-private linkage of the locked mutexes and on the robust
506  * linkage.
507  *
508  * Robust list, as seen by kernel, must be consistent even in the case
509  * of thread termination at arbitrary moment.  Since either enqueue or
510  * dequeue for list walked by kernel consists of rewriting a single
511  * forward pointer, it is safe.  On the other hand, rewrite of the
512  * back pointer is not atomic WRT the forward one, but kernel does not
513  * care.
514  */
515 static void
516 enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m,
517     int error)
518 {
519 	struct pthread_mutex *m1;
520 	uintptr_t *rl;
521 	int qidx;
522 
523 	/* Add to the list of owned mutexes: */
524 	if (error != EOWNERDEAD)
525 		mutex_assert_not_owned(curthread, m);
526 	qidx = mutex_qidx(m);
527 	TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe);
528 	if (!is_pshared_mutex(m))
529 		TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe);
530 	if (is_robust_mutex(m)) {
531 		rl = is_pshared_mutex(m) ? &curthread->robust_list :
532 		    &curthread->priv_robust_list;
533 		m->m_rb_prev = NULL;
534 		if (*rl != 0) {
535 			m1 = __containerof((void *)*rl,
536 			    struct pthread_mutex, m_lock);
537 			m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock;
538 			m1->m_rb_prev = m;
539 		} else {
540 			m1 = NULL;
541 			m->m_lock.m_rb_lnk = 0;
542 		}
543 		*rl = (uintptr_t)&m->m_lock;
544 	}
545 }
546 
547 static void
548 dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m)
549 {
550 	struct pthread_mutex *mp, *mn;
551 	int qidx;
552 
553 	mutex_assert_is_owned(m);
554 	qidx = mutex_qidx(m);
555 	if (is_robust_mutex(m)) {
556 		mp = m->m_rb_prev;
557 		if (mp == NULL) {
558 			if (is_pshared_mutex(m)) {
559 				curthread->robust_list = m->m_lock.m_rb_lnk;
560 			} else {
561 				curthread->priv_robust_list =
562 				    m->m_lock.m_rb_lnk;
563 			}
564 		} else {
565 			mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk;
566 		}
567 		if (m->m_lock.m_rb_lnk != 0) {
568 			mn = __containerof((void *)m->m_lock.m_rb_lnk,
569 			    struct pthread_mutex, m_lock);
570 			mn->m_rb_prev = m->m_rb_prev;
571 		}
572 		m->m_lock.m_rb_lnk = 0;
573 		m->m_rb_prev = NULL;
574 	}
575 	TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe);
576 	if (!is_pshared_mutex(m))
577 		TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe);
578 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) != 0)
579 		set_inherited_priority(curthread, m);
580 	mutex_init_link(m);
581 }
582 
583 static int
584 check_and_init_mutex(pthread_mutex_t *mutex, struct pthread_mutex **m)
585 {
586 	int ret;
587 
588 	*m = *mutex;
589 	ret = 0;
590 	if (*m == THR_PSHARED_PTR) {
591 		*m = __thr_pshared_offpage(mutex, 0);
592 		if (*m == NULL)
593 			ret = EINVAL;
594 		else
595 			shared_mutex_init(*m, NULL);
596 	} else if (__predict_false(*m <= THR_MUTEX_DESTROYED)) {
597 		if (*m == THR_MUTEX_DESTROYED) {
598 			ret = EINVAL;
599 		} else {
600 			ret = init_static(_get_curthread(), mutex);
601 			if (ret == 0)
602 				*m = *mutex;
603 		}
604 	}
605 	return (ret);
606 }
607 
608 int
609 __pthread_mutex_trylock(pthread_mutex_t *mutex)
610 {
611 	struct pthread *curthread;
612 	struct pthread_mutex *m;
613 	uint32_t id;
614 	int ret, robust;
615 
616 	ret = check_and_init_mutex(mutex, &m);
617 	if (ret != 0)
618 		return (ret);
619 	curthread = _get_curthread();
620 	id = TID(curthread);
621 	if (m->m_flags & PMUTEX_FLAG_PRIVATE)
622 		THR_CRITICAL_ENTER(curthread);
623 	robust = _mutex_enter_robust(curthread, m);
624 	ret = _thr_umutex_trylock(&m->m_lock, id);
625 	if (__predict_true(ret == 0) || ret == EOWNERDEAD) {
626 		enqueue_mutex(curthread, m, ret);
627 		if (ret == EOWNERDEAD)
628 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
629 	} else if (PMUTEX_OWNER_ID(m) == id) {
630 		ret = mutex_self_trylock(m);
631 	} /* else {} */
632 	if (robust)
633 		_mutex_leave_robust(curthread, m);
634 	if ((ret == 0 || ret == EOWNERDEAD) &&
635 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0)
636 		THR_CRITICAL_LEAVE(curthread);
637 	return (ret);
638 }
639 
640 static int
641 mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
642     const struct timespec *abstime)
643 {
644 	uint32_t id, owner;
645 	int count, ret;
646 
647 	id = TID(curthread);
648 	if (PMUTEX_OWNER_ID(m) == id)
649 		return (mutex_self_lock(m, abstime));
650 
651 	/*
652 	 * For adaptive mutexes, spin for a bit in the expectation
653 	 * that if the application requests this mutex type then
654 	 * the lock is likely to be released quickly and it is
655 	 * faster than entering the kernel
656 	 */
657 	if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT |
658 	    UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0))
659 		goto sleep_in_kernel;
660 
661 	if (!_thr_is_smp)
662 		goto yield_loop;
663 
664 	count = m->m_spinloops;
665 	while (count--) {
666 		owner = m->m_lock.m_owner;
667 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
668 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
669 			    id | owner)) {
670 				ret = 0;
671 				goto done;
672 			}
673 		}
674 		CPU_SPINWAIT;
675 	}
676 
677 yield_loop:
678 	count = m->m_yieldloops;
679 	while (count--) {
680 		_sched_yield();
681 		owner = m->m_lock.m_owner;
682 		if ((owner & ~UMUTEX_CONTESTED) == 0) {
683 			if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner,
684 			    id | owner)) {
685 				ret = 0;
686 				goto done;
687 			}
688 		}
689 	}
690 
691 sleep_in_kernel:
692 	if (abstime == NULL)
693 		ret = __thr_umutex_lock(&m->m_lock, id);
694 	else if (__predict_false(abstime->tv_nsec < 0 ||
695 	    abstime->tv_nsec >= 1000000000))
696 		ret = EINVAL;
697 	else
698 		ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
699 done:
700 	if (ret == 0 || ret == EOWNERDEAD) {
701 		enqueue_mutex(curthread, m, ret);
702 		if (ret == EOWNERDEAD)
703 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
704 	}
705 	return (ret);
706 }
707 
708 static inline int
709 mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime,
710     bool cvattach, bool rb_onlist)
711 {
712 	struct pthread *curthread;
713 	int ret, robust;
714 
715 	curthread  = _get_curthread();
716 	if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE)
717 		THR_CRITICAL_ENTER(curthread);
718 	if (!rb_onlist)
719 		robust = _mutex_enter_robust(curthread, m);
720 	ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread));
721 	if (ret == 0 || ret == EOWNERDEAD) {
722 		enqueue_mutex(curthread, m, ret);
723 		if (ret == EOWNERDEAD)
724 			m->m_lock.m_flags |= UMUTEX_NONCONSISTENT;
725 	} else {
726 		ret = mutex_lock_sleep(curthread, m, abstime);
727 	}
728 	if (!rb_onlist && robust)
729 		_mutex_leave_robust(curthread, m);
730 	if (ret != 0 && ret != EOWNERDEAD &&
731 	    (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach)
732 		THR_CRITICAL_LEAVE(curthread);
733 	return (ret);
734 }
735 
736 int
737 __pthread_mutex_lock(pthread_mutex_t *mutex)
738 {
739 	struct pthread_mutex *m;
740 	int ret;
741 
742 	_thr_check_init();
743 	ret = check_and_init_mutex(mutex, &m);
744 	if (ret == 0)
745 		ret = mutex_lock_common(m, NULL, false, false);
746 	return (ret);
747 }
748 
749 int
750 __pthread_mutex_timedlock(pthread_mutex_t *mutex,
751     const struct timespec *abstime)
752 {
753 	struct pthread_mutex *m;
754 	int ret;
755 
756 	_thr_check_init();
757 	ret = check_and_init_mutex(mutex, &m);
758 	if (ret == 0)
759 		ret = mutex_lock_common(m, abstime, false, false);
760 	return (ret);
761 }
762 
763 int
764 _pthread_mutex_unlock(pthread_mutex_t *mutex)
765 {
766 	struct pthread_mutex *mp;
767 
768 	if (*mutex == THR_PSHARED_PTR) {
769 		mp = __thr_pshared_offpage(mutex, 0);
770 		if (mp == NULL)
771 			return (EINVAL);
772 		shared_mutex_init(mp, NULL);
773 	} else {
774 		mp = *mutex;
775 	}
776 	return (mutex_unlock_common(mp, false, NULL));
777 }
778 
779 int
780 _mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist)
781 {
782 	int error;
783 
784 	error = mutex_lock_common(m, NULL, true, rb_onlist);
785 	if (error == 0 || error == EOWNERDEAD)
786 		m->m_count = count;
787 	return (error);
788 }
789 
790 int
791 _mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer)
792 {
793 
794 	/*
795 	 * Clear the count in case this is a recursive mutex.
796 	 */
797 	*count = m->m_count;
798 	m->m_count = 0;
799 	(void)mutex_unlock_common(m, true, defer);
800         return (0);
801 }
802 
803 int
804 _mutex_cv_attach(struct pthread_mutex *m, int count)
805 {
806 	struct pthread *curthread;
807 
808 	curthread = _get_curthread();
809 	enqueue_mutex(curthread, m, 0);
810 	m->m_count = count;
811 	return (0);
812 }
813 
814 int
815 _mutex_cv_detach(struct pthread_mutex *mp, int *recurse)
816 {
817 	struct pthread *curthread;
818 	int deferred, error;
819 
820 	curthread = _get_curthread();
821 	if ((error = _mutex_owned(curthread, mp)) != 0)
822 		return (error);
823 
824 	/*
825 	 * Clear the count in case this is a recursive mutex.
826 	 */
827 	*recurse = mp->m_count;
828 	mp->m_count = 0;
829 	dequeue_mutex(curthread, mp);
830 
831 	/* Will this happen in real-world ? */
832         if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
833 		deferred = 1;
834 		mp->m_flags &= ~PMUTEX_FLAG_DEFERRED;
835 	} else
836 		deferred = 0;
837 
838 	if (deferred)  {
839 		_thr_wake_all(curthread->defer_waiters,
840 		    curthread->nwaiter_defer);
841 		curthread->nwaiter_defer = 0;
842 	}
843 	return (0);
844 }
845 
846 static int
847 mutex_self_trylock(struct pthread_mutex *m)
848 {
849 	int ret;
850 
851 	switch (PMUTEX_TYPE(m->m_flags)) {
852 	case PTHREAD_MUTEX_ERRORCHECK:
853 	case PTHREAD_MUTEX_NORMAL:
854 	case PTHREAD_MUTEX_ADAPTIVE_NP:
855 		ret = EBUSY;
856 		break;
857 
858 	case PTHREAD_MUTEX_RECURSIVE:
859 		/* Increment the lock count: */
860 		if (m->m_count + 1 > 0) {
861 			m->m_count++;
862 			ret = 0;
863 		} else
864 			ret = EAGAIN;
865 		break;
866 
867 	default:
868 		/* Trap invalid mutex types; */
869 		ret = EINVAL;
870 	}
871 
872 	return (ret);
873 }
874 
875 static int
876 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime)
877 {
878 	struct timespec	ts1, ts2;
879 	int ret;
880 
881 	switch (PMUTEX_TYPE(m->m_flags)) {
882 	case PTHREAD_MUTEX_ERRORCHECK:
883 	case PTHREAD_MUTEX_ADAPTIVE_NP:
884 		if (abstime) {
885 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
886 			    abstime->tv_nsec >= 1000000000) {
887 				ret = EINVAL;
888 			} else {
889 				clock_gettime(CLOCK_REALTIME, &ts1);
890 				TIMESPEC_SUB(&ts2, abstime, &ts1);
891 				__sys_nanosleep(&ts2, NULL);
892 				ret = ETIMEDOUT;
893 			}
894 		} else {
895 			/*
896 			 * POSIX specifies that mutexes should return
897 			 * EDEADLK if a recursive lock is detected.
898 			 */
899 			ret = EDEADLK;
900 		}
901 		break;
902 
903 	case PTHREAD_MUTEX_NORMAL:
904 		/*
905 		 * What SS2 define as a 'normal' mutex.  Intentionally
906 		 * deadlock on attempts to get a lock you already own.
907 		 */
908 		ret = 0;
909 		if (abstime) {
910 			if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
911 			    abstime->tv_nsec >= 1000000000) {
912 				ret = EINVAL;
913 			} else {
914 				clock_gettime(CLOCK_REALTIME, &ts1);
915 				TIMESPEC_SUB(&ts2, abstime, &ts1);
916 				__sys_nanosleep(&ts2, NULL);
917 				ret = ETIMEDOUT;
918 			}
919 		} else {
920 			ts1.tv_sec = 30;
921 			ts1.tv_nsec = 0;
922 			for (;;)
923 				__sys_nanosleep(&ts1, NULL);
924 		}
925 		break;
926 
927 	case PTHREAD_MUTEX_RECURSIVE:
928 		/* Increment the lock count: */
929 		if (m->m_count + 1 > 0) {
930 			m->m_count++;
931 			ret = 0;
932 		} else
933 			ret = EAGAIN;
934 		break;
935 
936 	default:
937 		/* Trap invalid mutex types; */
938 		ret = EINVAL;
939 	}
940 
941 	return (ret);
942 }
943 
944 static int
945 mutex_unlock_common(struct pthread_mutex *m, bool cv, int *mtx_defer)
946 {
947 	struct pthread *curthread;
948 	uint32_t id;
949 	int deferred, error, robust;
950 
951 	if (__predict_false(m <= THR_MUTEX_DESTROYED)) {
952 		if (m == THR_MUTEX_DESTROYED)
953 			return (EINVAL);
954 		return (EPERM);
955 	}
956 
957 	curthread = _get_curthread();
958 	id = TID(curthread);
959 
960 	/*
961 	 * Check if the running thread is not the owner of the mutex.
962 	 */
963 	if (__predict_false(PMUTEX_OWNER_ID(m) != id))
964 		return (EPERM);
965 
966 	error = 0;
967 	if (__predict_false(PMUTEX_TYPE(m->m_flags) ==
968 	    PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) {
969 		m->m_count--;
970 	} else {
971 		if ((m->m_flags & PMUTEX_FLAG_DEFERRED) != 0) {
972 			deferred = 1;
973 			m->m_flags &= ~PMUTEX_FLAG_DEFERRED;
974         	} else
975 			deferred = 0;
976 
977 		robust = _mutex_enter_robust(curthread, m);
978 		dequeue_mutex(curthread, m);
979 		error = _thr_umutex_unlock2(&m->m_lock, id, mtx_defer);
980 		if (deferred)  {
981 			if (mtx_defer == NULL) {
982 				_thr_wake_all(curthread->defer_waiters,
983 				    curthread->nwaiter_defer);
984 				curthread->nwaiter_defer = 0;
985 			} else
986 				*mtx_defer = 1;
987 		}
988 		if (robust)
989 			_mutex_leave_robust(curthread, m);
990 	}
991 	if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE)
992 		THR_CRITICAL_LEAVE(curthread);
993 	return (error);
994 }
995 
996 int
997 _pthread_mutex_getprioceiling(pthread_mutex_t *mutex,
998     int *prioceiling)
999 {
1000 	struct pthread_mutex *m;
1001 
1002 	if (*mutex == THR_PSHARED_PTR) {
1003 		m = __thr_pshared_offpage(mutex, 0);
1004 		if (m == NULL)
1005 			return (EINVAL);
1006 		shared_mutex_init(m, NULL);
1007 	} else {
1008 		m = *mutex;
1009 		if (m <= THR_MUTEX_DESTROYED)
1010 			return (EINVAL);
1011 	}
1012 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1013 		return (EINVAL);
1014 	*prioceiling = m->m_lock.m_ceilings[0];
1015 	return (0);
1016 }
1017 
1018 int
1019 _pthread_mutex_setprioceiling(pthread_mutex_t *mutex,
1020     int ceiling, int *old_ceiling)
1021 {
1022 	struct pthread *curthread;
1023 	struct pthread_mutex *m, *m1, *m2;
1024 	struct mutex_queue *q, *qp;
1025 	int qidx, ret;
1026 
1027 	if (*mutex == THR_PSHARED_PTR) {
1028 		m = __thr_pshared_offpage(mutex, 0);
1029 		if (m == NULL)
1030 			return (EINVAL);
1031 		shared_mutex_init(m, NULL);
1032 	} else {
1033 		m = *mutex;
1034 		if (m <= THR_MUTEX_DESTROYED)
1035 			return (EINVAL);
1036 	}
1037 	if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)
1038 		return (EINVAL);
1039 
1040 	ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling);
1041 	if (ret != 0)
1042 		return (ret);
1043 
1044 	curthread = _get_curthread();
1045 	if (PMUTEX_OWNER_ID(m) == TID(curthread)) {
1046 		mutex_assert_is_owned(m);
1047 		m1 = TAILQ_PREV(m, mutex_queue, m_qe);
1048 		m2 = TAILQ_NEXT(m, m_qe);
1049 		if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) ||
1050 		    (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) {
1051 			qidx = mutex_qidx(m);
1052 			q = &curthread->mq[qidx];
1053 			qp = &curthread->mq[qidx + 1];
1054 			TAILQ_REMOVE(q, m, m_qe);
1055 			if (!is_pshared_mutex(m))
1056 				TAILQ_REMOVE(qp, m, m_pqe);
1057 			TAILQ_FOREACH(m2, q, m_qe) {
1058 				if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) {
1059 					TAILQ_INSERT_BEFORE(m2, m, m_qe);
1060 					if (!is_pshared_mutex(m)) {
1061 						while (m2 != NULL &&
1062 						    is_pshared_mutex(m2)) {
1063 							m2 = TAILQ_PREV(m2,
1064 							    mutex_queue, m_qe);
1065 						}
1066 						if (m2 == NULL) {
1067 							TAILQ_INSERT_HEAD(qp,
1068 							    m, m_pqe);
1069 						} else {
1070 							TAILQ_INSERT_BEFORE(m2,
1071 							    m, m_pqe);
1072 						}
1073 					}
1074 					return (0);
1075 				}
1076 			}
1077 			TAILQ_INSERT_TAIL(q, m, m_qe);
1078 			if (!is_pshared_mutex(m))
1079 				TAILQ_INSERT_TAIL(qp, m, m_pqe);
1080 		}
1081 	}
1082 	return (0);
1083 }
1084 
1085 int
1086 _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count)
1087 {
1088 	struct pthread_mutex *m;
1089 	int ret;
1090 
1091 	ret = check_and_init_mutex(mutex, &m);
1092 	if (ret == 0)
1093 		*count = m->m_spinloops;
1094 	return (ret);
1095 }
1096 
1097 int
1098 __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count)
1099 {
1100 	struct pthread_mutex *m;
1101 	int ret;
1102 
1103 	ret = check_and_init_mutex(mutex, &m);
1104 	if (ret == 0)
1105 		m->m_spinloops = count;
1106 	return (ret);
1107 }
1108 
1109 int
1110 _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count)
1111 {
1112 	struct pthread_mutex *m;
1113 	int ret;
1114 
1115 	ret = check_and_init_mutex(mutex, &m);
1116 	if (ret == 0)
1117 		*count = m->m_yieldloops;
1118 	return (ret);
1119 }
1120 
1121 int
1122 __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count)
1123 {
1124 	struct pthread_mutex *m;
1125 	int ret;
1126 
1127 	ret = check_and_init_mutex(mutex, &m);
1128 	if (ret == 0)
1129 		m->m_yieldloops = count;
1130 	return (0);
1131 }
1132 
1133 int
1134 _pthread_mutex_isowned_np(pthread_mutex_t *mutex)
1135 {
1136 	struct pthread_mutex *m;
1137 
1138 	if (*mutex == THR_PSHARED_PTR) {
1139 		m = __thr_pshared_offpage(mutex, 0);
1140 		if (m == NULL)
1141 			return (0);
1142 		shared_mutex_init(m, NULL);
1143 	} else {
1144 		m = *mutex;
1145 		if (m <= THR_MUTEX_DESTROYED)
1146 			return (0);
1147 	}
1148 	return (PMUTEX_OWNER_ID(m) == TID(_get_curthread()));
1149 }
1150 
1151 int
1152 _mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp)
1153 {
1154 
1155 	if (__predict_false(mp <= THR_MUTEX_DESTROYED)) {
1156 		if (mp == THR_MUTEX_DESTROYED)
1157 			return (EINVAL);
1158 		return (EPERM);
1159 	}
1160 	if (PMUTEX_OWNER_ID(mp) != TID(curthread))
1161 		return (EPERM);
1162 	return (0);
1163 }
1164 
1165 int
1166 _pthread_mutex_consistent(pthread_mutex_t *mutex)
1167 {
1168 	struct pthread_mutex *m;
1169 	struct pthread *curthread;
1170 
1171 	if (*mutex == THR_PSHARED_PTR) {
1172 		m = __thr_pshared_offpage(mutex, 0);
1173 		if (m == NULL)
1174 			return (EINVAL);
1175 		shared_mutex_init(m, NULL);
1176 	} else {
1177 		m = *mutex;
1178 		if (m <= THR_MUTEX_DESTROYED)
1179 			return (EINVAL);
1180 	}
1181 	curthread = _get_curthread();
1182 	if ((m->m_lock.m_flags & (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) !=
1183 	    (UMUTEX_ROBUST | UMUTEX_NONCONSISTENT))
1184 		return (EINVAL);
1185 	if (PMUTEX_OWNER_ID(m) != TID(curthread))
1186 		return (EPERM);
1187 	m->m_lock.m_flags &= ~UMUTEX_NONCONSISTENT;
1188 	return (0);
1189 }
1190