xref: /titanic_50/usr/src/uts/common/os/mutex.c (revision 75d94465dbafa487b716482dc36d5150a4ec9853)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Big Theory Statement for mutual exclusion locking primitives.
28  *
29  * A mutex serializes multiple threads so that only one thread
30  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
31  * for a full description of the interfaces and programming model.
32  * The rest of this comment describes the implementation.
33  *
34  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
35  * determines the type based solely on the iblock cookie (PIL) argument.
36  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
37  *
38  * Spin mutexes block interrupts and spin until the lock becomes available.
39  * A thread may not sleep, or call any function that might sleep, while
40  * holding a spin mutex.  With few exceptions, spin mutexes should only
41  * be used to synchronize with interrupt handlers.
42  *
43  * Adaptive mutexes (the default type) spin if the owner is running on
44  * another CPU and block otherwise.  This policy is based on the assumption
45  * that mutex hold times are typically short enough that the time spent
46  * spinning is less than the time it takes to block.  If you need mutual
47  * exclusion semantics with long hold times, consider an rwlock(9F) as
48  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
49  * mutual exclusion for long periods of time, it's probably not scalable.
50  *
51  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
52  * so mutex_enter() assumes that the lock is adaptive.  We get away
53  * with this by structuring mutexes so that an attempt to acquire a
54  * spin mutex as adaptive always fails.  When mutex_enter() fails
55  * it punts to mutex_vector_enter(), which does all the hard stuff.
56  *
57  * mutex_vector_enter() first checks the type.  If it's spin mutex,
58  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
59  * we check to see what the owner is doing.  If the owner is running,
60  * we spin until the lock becomes available; if not, we mark the lock
61  * as having waiters and block.
62  *
63  * Blocking on a mutex is surprisingly delicate dance because, for speed,
64  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
65  * a little harder in the (rarely-executed) blocking path to make sure
66  * we don't block on a mutex that's just been released -- otherwise we
67  * might never be woken up.
68  *
69  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
70  * in the face of preemption and relaxed memory ordering is as follows:
71  *
72  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
73  *     to restart.  Each platform must enforce this by checking the
74  *     interrupted PC in the interrupt handler (or on return from trap --
75  *     whichever is more convenient for the platform).  If the PC
76  *     lies within the critical region of mutex_exit(), the interrupt
77  *     handler must reset the PC back to the beginning of mutex_exit().
78  *     The critical region consists of all instructions up to, but not
79  *     including, the store that clears the lock (which, of course,
80  *     must never be executed twice.)
81  *
82  *     This ensures that the owner will always check for waiters after
83  *     resuming from a previous preemption.
84  *
85  * (2) A thread resuming in mutex_exit() does (at least) the following:
86  *
87  *	when resuming:	set CPU_THREAD = owner
88  *			membar #StoreLoad
89  *
90  *	in mutex_exit:	check waiters bit; do wakeup if set
91  *			membar #LoadStore|#StoreStore
92  *			clear owner
93  *			(at this point, other threads may or may not grab
94  *			the lock, and we may or may not reacquire it)
95  *
96  *	when blocking:	membar #StoreStore (due to disp_lock_enter())
97  *			set CPU_THREAD = (possibly) someone else
98  *
99  * (3) A thread blocking in mutex_vector_enter() does the following:
100  *
101  *			set waiters bit
102  *			membar #StoreLoad (via membar_enter())
103  *			check CPU_THREAD for owner's t_cpu
104  *				continue if owner running
105  *			membar #LoadLoad (via membar_consumer())
106  *			check owner and waiters bit; abort if either changed
107  *			block
108  *
109  * Thus the global memory orderings for (2) and (3) are as follows:
110  *
111  * (2M) mutex_exit() memory order:
112  *
113  *			STORE	CPU_THREAD = owner
114  *			LOAD	waiters bit
115  *			STORE	owner = NULL
116  *			STORE	CPU_THREAD = (possibly) someone else
117  *
118  * (3M) mutex_vector_enter() memory order:
119  *
120  *			STORE	waiters bit = 1
121  *			LOAD	CPU_THREAD for each CPU
122  *			LOAD	owner and waiters bit
123  *
124  * It has been verified by exhaustive simulation that all possible global
125  * memory orderings of (2M) interleaved with (3M) result in correct
126  * behavior.  Moreover, these ordering constraints are minimal: changing
127  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
128  * windows for missed wakeups.  Note: the possibility that other threads
129  * may grab the lock after the owner drops it can be factored out of the
130  * memory ordering analysis because mutex_vector_enter() won't block
131  * if the lock isn't still owned by the same thread.
132  *
133  * The only requirements of code outside the mutex implementation are
134  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
135  * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
136  * (3) mutex_owner_running() preemption fixup in interrupt handlers
137  * or trap returns.
138  * Note: idle threads cannot grab adaptive locks (since they cannot block),
139  * so the membar may be safely omitted when resuming an idle thread.
140  *
141  * When a mutex has waiters, mutex_vector_exit() has several options:
142  *
143  * (1) Choose a waiter and make that thread the owner before waking it;
144  *     this is known as "direct handoff" of ownership.
145  *
146  * (2) Drop the lock and wake one waiter.
147  *
148  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
149  *
150  * In many ways (1) is the cleanest solution, but if a lock is moderately
151  * contended it defeats the adaptive spin logic.  If we make some other
152  * thread the owner, but he's not ONPROC yet, then all other threads on
153  * other cpus that try to get the lock will conclude that the owner is
154  * blocked, so they'll block too.  And so on -- it escalates quickly,
155  * with every thread taking the blocking path rather than the spin path.
156  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
157  *
158  * Option (2) is the next most natural-seeming option, but it has several
159  * annoying properties.  If there's more than one waiter, we must preserve
160  * the waiters bit on an unheld lock.  On cas-capable platforms, where
161  * the waiters bit is part of the lock word, this means that both 0x0
162  * and 0x1 represent unheld locks, so we have to cas against *both*.
163  * Priority inheritance also gets more complicated, because a lock can
164  * have waiters but no owner to whom priority can be willed.  So while
165  * it is possible to make option (2) work, it's surprisingly vile.
166  *
167  * Option (3), the least-intuitive at first glance, is what we actually do.
168  * It has the advantage that because you always wake all waiters, you
169  * never have to preserve the waiters bit.  Waking all waiters seems like
170  * begging for a thundering herd problem, but consider: under option (2),
171  * every thread that grabs and drops the lock will wake one waiter -- so
172  * if the lock is fairly active, all waiters will be awakened very quickly
173  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
174  * The blocking case is rare; the more common case (by 3-4 orders of
175  * magnitude) is that one or more threads spin waiting to get the lock.
176  * Only direct handoff can prevent the thundering herd problem, but as
177  * mentioned earlier, that would tend to defeat the adaptive spin logic.
178  * In practice, option (3) works well because the blocking case is rare.
179  */
180 
181 /*
182  * delayed lock retry with exponential delay for spin locks
183  *
184  * It is noted above that for both the spin locks and the adaptive locks,
185  * spinning is the dominate mode of operation.  So long as there is only
186  * one thread waiting on a lock, the naive spin loop works very well in
187  * cache based architectures.  The lock data structure is pulled into the
188  * cache of the processor with the waiting/spinning thread and no further
189  * memory traffic is generated until the lock is released.  Unfortunately,
190  * once two or more threads are waiting on a lock, the naive spin has
191  * the property of generating maximum memory traffic from each spinning
192  * thread as the spinning threads contend for the lock data structure.
193  *
194  * By executing a delay loop before retrying a lock, a waiting thread
195  * can reduce its memory traffic by a large factor, depending on the
196  * size of the delay loop.  A large delay loop greatly reduced the memory
197  * traffic, but has the drawback of having a period of time when
198  * no thread is attempting to gain the lock even though several threads
199  * might be waiting.  A small delay loop has the drawback of not
200  * much reduction in memory traffic, but reduces the potential idle time.
201  * The theory of the exponential delay code is to start with a short
202  * delay loop and double the waiting time on each iteration, up to
203  * a preselected maximum.
204  */
205 
206 #include <sys/param.h>
207 #include <sys/time.h>
208 #include <sys/cpuvar.h>
209 #include <sys/thread.h>
210 #include <sys/debug.h>
211 #include <sys/cmn_err.h>
212 #include <sys/sobject.h>
213 #include <sys/turnstile.h>
214 #include <sys/systm.h>
215 #include <sys/mutex_impl.h>
216 #include <sys/spl.h>
217 #include <sys/lockstat.h>
218 #include <sys/atomic.h>
219 #include <sys/cpu.h>
220 #include <sys/stack.h>
221 #include <sys/archsystm.h>
222 #include <sys/machsystm.h>
223 #include <sys/x_call.h>
224 
225 /*
226  * The sobj_ops vector exports a set of functions needed when a thread
227  * is asleep on a synchronization object of this type.
228  */
229 static sobj_ops_t mutex_sobj_ops = {
230 	SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
231 };
232 
233 /*
234  * If the system panics on a mutex, save the address of the offending
235  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
236  */
237 static mutex_impl_t panic_mutex;
238 static mutex_impl_t *panic_mutex_addr;
239 
240 static void
mutex_panic(char * msg,mutex_impl_t * lp)241 mutex_panic(char *msg, mutex_impl_t *lp)
242 {
243 	if (panicstr)
244 		return;
245 
246 	if (atomic_cas_ptr(&panic_mutex_addr, NULL, lp) == NULL)
247 		panic_mutex = *lp;
248 
249 	panic("%s, lp=%p owner=%p thread=%p",
250 	    msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
251 	    (void *)curthread);
252 }
253 
254 /* "tunables" for per-platform backoff constants. */
255 uint_t mutex_backoff_cap = 0;
256 ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
257 ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
258 uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
259 
260 void
mutex_sync(void)261 mutex_sync(void)
262 {
263 	MUTEX_SYNC();
264 }
265 
266 /* calculate the backoff interval */
267 uint_t
default_lock_backoff(uint_t backoff)268 default_lock_backoff(uint_t backoff)
269 {
270 	uint_t cap;		/* backoff cap calculated */
271 
272 	if (backoff == 0) {
273 		backoff = mutex_backoff_base;
274 		/* first call just sets the base */
275 		return (backoff);
276 	}
277 
278 	/* set cap */
279 	if (mutex_backoff_cap == 0) {
280 		/*
281 		 * For a contended lock, in the worst case a load + cas may
282 		 * be queued  at the controller for each contending CPU.
283 		 * Therefore, to avoid queueing, the accesses for all CPUS must
284 		 * be spread out in time over an interval of (ncpu *
285 		 * cap-factor).  Maximum backoff is set to this value, and
286 		 * actual backoff is a random number from 0 to the current max.
287 		 */
288 		cap = ncpus_online * mutex_cap_factor;
289 	} else {
290 		cap = mutex_backoff_cap;
291 	}
292 
293 	/* calculate new backoff value */
294 	backoff <<= mutex_backoff_shift;	/* increase backoff */
295 	if (backoff > cap) {
296 		if (cap < mutex_backoff_base)
297 			backoff = mutex_backoff_base;
298 		else
299 			backoff = cap;
300 	}
301 
302 	return (backoff);
303 }
304 
305 /*
306  * default delay function for mutexes.
307  */
308 void
default_lock_delay(uint_t backoff)309 default_lock_delay(uint_t backoff)
310 {
311 	ulong_t rnd;		/* random factor */
312 	uint_t cur_backoff;	/* calculated backoff */
313 	uint_t backctr;
314 
315 	/*
316 	 * Modify backoff by a random amount to avoid lockstep, and to
317 	 * make it probable that some thread gets a small backoff, and
318 	 * re-checks quickly
319 	 */
320 	rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
321 	cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
322 	    mutex_backoff_base;
323 
324 	/*
325 	 * Delay before trying
326 	 * to touch the mutex data structure.
327 	 */
328 	for (backctr = cur_backoff; backctr; backctr--) {
329 		MUTEX_DELAY();
330 	};
331 }
332 
333 uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
334 void (*mutex_lock_delay)(uint_t) = default_lock_delay;
335 void (*mutex_delay)(void) = mutex_delay_default;
336 
337 /*
338  * mutex_vector_enter() is called from the assembly mutex_enter() routine
339  * if the lock is held or is not of type MUTEX_ADAPTIVE.
340  */
341 void
mutex_vector_enter(mutex_impl_t * lp)342 mutex_vector_enter(mutex_impl_t *lp)
343 {
344 	kthread_id_t	owner;
345 	kthread_id_t	lastowner = MUTEX_NO_OWNER; /* track owner changes */
346 	hrtime_t	sleep_time = 0;	/* how long we slept */
347 	hrtime_t	spin_time = 0;	/* how long we spun */
348 	cpu_t 		*cpup;
349 	turnstile_t	*ts;
350 	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
351 	uint_t		backoff = 0;	/* current backoff */
352 	int		changecnt = 0;	/* count of owner changes */
353 
354 	ASSERT_STACK_ALIGNED();
355 
356 	if (MUTEX_TYPE_SPIN(lp)) {
357 		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
358 		    &lp->m_spin.m_oldspl);
359 		return;
360 	}
361 
362 	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
363 		mutex_panic("mutex_enter: bad mutex", lp);
364 		return;
365 	}
366 
367 	/*
368 	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
369 	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
370 	 * so we must verify by disabling preemption and loading CPU again.
371 	 */
372 	cpup = CPU;
373 	if (CPU_ON_INTR(cpup) && !panicstr) {
374 		kpreempt_disable();
375 		if (CPU_ON_INTR(CPU))
376 			mutex_panic("mutex_enter: adaptive at high PIL", lp);
377 		kpreempt_enable();
378 	}
379 
380 	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
381 
382 	spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);
383 
384 	backoff = mutex_lock_backoff(0);	/* set base backoff */
385 	for (;;) {
386 		mutex_lock_delay(backoff); /* backoff delay */
387 
388 		if (panicstr)
389 			return;
390 
391 		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
392 			if (mutex_adaptive_tryenter(lp)) {
393 				break;
394 			}
395 			/* increase backoff only on failed attempt. */
396 			backoff = mutex_lock_backoff(backoff);
397 			changecnt++;
398 			continue;
399 		} else if (lastowner != owner) {
400 			lastowner = owner;
401 			backoff = mutex_lock_backoff(backoff);
402 			changecnt++;
403 		}
404 
405 		if (changecnt >= ncpus_online) {
406 			backoff = mutex_lock_backoff(0);
407 			changecnt = 0;
408 		}
409 
410 		if (owner == curthread)
411 			mutex_panic("recursive mutex_enter", lp);
412 
413 		/*
414 		 * If lock is held but owner is not yet set, spin.
415 		 * (Only relevant for platforms that don't have cas.)
416 		 */
417 		if (owner == MUTEX_NO_OWNER)
418 			continue;
419 
420 		if (mutex_owner_running(lp) != NULL)  {
421 			continue;
422 		}
423 
424 		/*
425 		 * The owner appears not to be running, so block.
426 		 * See the Big Theory Statement for memory ordering issues.
427 		 */
428 		ts = turnstile_lookup(lp);
429 		MUTEX_SET_WAITERS(lp);
430 		membar_enter();
431 
432 		/*
433 		 * Recheck whether owner is running after waiters bit hits
434 		 * global visibility (above).  If owner is running, spin.
435 		 */
436 		if (mutex_owner_running(lp) != NULL) {
437 			turnstile_exit(lp);
438 			continue;
439 		}
440 		membar_consumer();
441 
442 		/*
443 		 * If owner and waiters bit are unchanged, block.
444 		 */
445 		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
446 			sleep_time -= gethrtime();
447 			(void) turnstile_block(ts, TS_WRITER_Q, lp,
448 			    &mutex_sobj_ops, NULL, NULL);
449 			sleep_time += gethrtime();
450 			/* reset backoff after turnstile */
451 			backoff = mutex_lock_backoff(0);
452 		} else {
453 			turnstile_exit(lp);
454 		}
455 	}
456 
457 	ASSERT(MUTEX_OWNER(lp) == curthread);
458 
459 	if (sleep_time != 0) {
460 		/*
461 		 * Note, sleep time is the sum of all the sleeping we
462 		 * did.
463 		 */
464 		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
465 	}
466 
467 	/* record spin time, don't count sleep time */
468 	if (spin_time != 0) {
469 		LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
470 		    spin_time + sleep_time);
471 	}
472 
473 	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
474 }
475 
476 /*
477  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
478  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
479  */
480 int
mutex_vector_tryenter(mutex_impl_t * lp)481 mutex_vector_tryenter(mutex_impl_t *lp)
482 {
483 	int s;
484 
485 	if (MUTEX_TYPE_ADAPTIVE(lp))
486 		return (0);		/* we already tried in assembly */
487 
488 	if (!MUTEX_TYPE_SPIN(lp)) {
489 		mutex_panic("mutex_tryenter: bad mutex", lp);
490 		return (0);
491 	}
492 
493 	s = splr(lp->m_spin.m_minspl);
494 	if (lock_try(&lp->m_spin.m_spinlock)) {
495 		lp->m_spin.m_oldspl = (ushort_t)s;
496 		return (1);
497 	}
498 	splx(s);
499 	return (0);
500 }
501 
502 /*
503  * mutex_vector_exit() is called from mutex_exit() if the lock is not
504  * adaptive, has waiters, or is not owned by the current thread (panic).
505  */
506 void
mutex_vector_exit(mutex_impl_t * lp)507 mutex_vector_exit(mutex_impl_t *lp)
508 {
509 	turnstile_t *ts;
510 
511 	if (MUTEX_TYPE_SPIN(lp)) {
512 		lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
513 		return;
514 	}
515 
516 	if (MUTEX_OWNER(lp) != curthread) {
517 		mutex_panic("mutex_exit: not owner", lp);
518 		return;
519 	}
520 
521 	ts = turnstile_lookup(lp);
522 	MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
523 	if (ts == NULL)
524 		turnstile_exit(lp);
525 	else
526 		turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
527 	LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
528 }
529 
530 int
mutex_owned(const kmutex_t * mp)531 mutex_owned(const kmutex_t *mp)
532 {
533 	const mutex_impl_t *lp = (const mutex_impl_t *)mp;
534 
535 	if (panicstr || quiesce_active)
536 		return (1);
537 
538 	if (MUTEX_TYPE_ADAPTIVE(lp))
539 		return (MUTEX_OWNER(lp) == curthread);
540 	return (LOCK_HELD(&lp->m_spin.m_spinlock));
541 }
542 
543 kthread_t *
mutex_owner(const kmutex_t * mp)544 mutex_owner(const kmutex_t *mp)
545 {
546 	const mutex_impl_t *lp = (const mutex_impl_t *)mp;
547 	kthread_id_t t;
548 
549 	if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
550 		return (t);
551 	return (NULL);
552 }
553 
554 /*
555  * The iblock cookie 'ibc' is the spl level associated with the lock;
556  * this alone determines whether the lock will be ADAPTIVE or SPIN.
557  *
558  * Adaptive mutexes created in zeroed memory do not need to call
559  * mutex_init() as their allocation in this fashion guarantees
560  * their initialization.
561  *   eg adaptive mutexes created as static within the BSS or allocated
562  *      by kmem_zalloc().
563  */
564 /* ARGSUSED */
565 void
mutex_init(kmutex_t * mp,char * name,kmutex_type_t type,void * ibc)566 mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
567 {
568 	mutex_impl_t *lp = (mutex_impl_t *)mp;
569 
570 	ASSERT(ibc < (void *)KERNELBASE);	/* see 1215173 */
571 
572 	if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
573 		ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
574 		MUTEX_SET_TYPE(lp, MUTEX_SPIN);
575 		LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
576 		LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
577 		lp->m_spin.m_minspl = (int)(intptr_t)ibc;
578 	} else {
579 #ifdef MUTEX_ALIGN
580 		static int misalign_cnt = 0;
581 
582 		if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
583 		    (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
584 			/*
585 			 * The mutex is not aligned and may cross a cache line.
586 			 * This is not supported and may cause a panic.
587 			 * Show a warning that the mutex is not aligned
588 			 * and attempt to identify the origin.
589 			 * Unaligned mutexes are not (supposed to be)
590 			 * possible on SPARC.
591 			 */
592 			char *funcname;
593 			ulong_t offset = 0;
594 
595 			funcname = modgetsymname((uintptr_t)caller(), &offset);
596 			cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
597 			    "aligned; caller %s+%lx in module %s. "
598 			    "This is unsupported and may cause a panic. "
599 			    "Please report this to the kernel module supplier.",
600 			    (void *)lp, MUTEX_ALIGN,
601 			    funcname ? funcname : "unknown", offset,
602 			    mod_containing_pc(caller()));
603 			misalign_cnt++;
604 			if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
605 				cmn_err(CE_WARN, "mutex_init: further unaligned"
606 				    " mutex warnings will be suppressed.");
607 			}
608 		}
609 #endif	/* MUTEX_ALIGN */
610 		ASSERT(type != MUTEX_SPIN);
611 
612 		MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
613 		MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
614 	}
615 }
616 
617 void
mutex_destroy(kmutex_t * mp)618 mutex_destroy(kmutex_t *mp)
619 {
620 	mutex_impl_t *lp = (mutex_impl_t *)mp;
621 
622 	if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
623 		MUTEX_DESTROY(lp);
624 	} else if (MUTEX_TYPE_SPIN(lp)) {
625 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
626 		MUTEX_DESTROY(lp);
627 	} else if (MUTEX_TYPE_ADAPTIVE(lp)) {
628 		LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
629 		if (MUTEX_OWNER(lp) != curthread)
630 			mutex_panic("mutex_destroy: not owner", lp);
631 		if (MUTEX_HAS_WAITERS(lp)) {
632 			turnstile_t *ts = turnstile_lookup(lp);
633 			turnstile_exit(lp);
634 			if (ts != NULL)
635 				mutex_panic("mutex_destroy: has waiters", lp);
636 		}
637 		MUTEX_DESTROY(lp);
638 	} else {
639 		mutex_panic("mutex_destroy: bad mutex", lp);
640 	}
641 }
642 
643 /*
644  * Simple C support for the cases where spin locks miss on the first try.
645  */
646 void
lock_set_spin(lock_t * lp)647 lock_set_spin(lock_t *lp)
648 {
649 	int loop_count = 0;
650 	uint_t backoff = 0;	/* current backoff */
651 	hrtime_t spin_time = 0;	/* how long we spun */
652 
653 	if (panicstr)
654 		return;
655 
656 	if (ncpus == 1)
657 		panic("lock_set: %p lock held and only one CPU", (void *)lp);
658 
659 	spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);
660 
661 	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
662 		if (panicstr)
663 			return;
664 		loop_count++;
665 
666 		if (ncpus_online == loop_count) {
667 			backoff = mutex_lock_backoff(0);
668 			loop_count = 0;
669 		} else {
670 			backoff = mutex_lock_backoff(backoff);
671 		}
672 		mutex_lock_delay(backoff);
673 	}
674 
675 	LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);
676 
677 	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
678 }
679 
680 void
lock_set_spl_spin(lock_t * lp,int new_pil,ushort_t * old_pil_addr,int old_pil)681 lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
682 {
683 	int loop_count = 0;
684 	uint_t backoff = 0;	/* current backoff */
685 	hrtime_t spin_time = 0;	/* how long we spun */
686 
687 	if (panicstr)
688 		return;
689 
690 	if (ncpus == 1)
691 		panic("lock_set_spl: %p lock held and only one CPU",
692 		    (void *)lp);
693 
694 	ASSERT(new_pil > LOCK_LEVEL);
695 
696 	spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);
697 
698 	do {
699 		splx(old_pil);
700 		while (LOCK_HELD(lp)) {
701 			loop_count++;
702 
703 			if (panicstr) {
704 				*old_pil_addr = (ushort_t)splr(new_pil);
705 				return;
706 			}
707 			if (ncpus_online == loop_count) {
708 				backoff = mutex_lock_backoff(0);
709 				loop_count = 0;
710 			} else {
711 				backoff = mutex_lock_backoff(backoff);
712 			}
713 			mutex_lock_delay(backoff);
714 		}
715 		old_pil = splr(new_pil);
716 	} while (!lock_spin_try(lp));
717 
718 	*old_pil_addr = (ushort_t)old_pil;
719 
720 	LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);
721 
722 	LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
723 }
724