xref: /freebsd/sys/kern/kern_sx.c (revision 00a5db46de56179184c0f000eaacad695e2b0859)
1 /*-
2  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
3  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice(s), this list of conditions and the following disclaimer as
11  *    the first lines of this file unmodified other than the possible
12  *    addition of one or more copyright notices.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice(s), this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
27  * DAMAGE.
28  */
29 
30 /*
31  * Shared/exclusive locks.  This implementation attempts to ensure
32  * deterministic lock granting behavior, so that slocks and xlocks are
33  * interleaved.
34  *
35  * Priority propagation will not generally raise the priority of lock holders,
36  * so should not be relied upon in combination with sx locks.
37  */
38 
39 #include "opt_adaptive_sx.h"
40 #include "opt_ddb.h"
41 #include "opt_kdtrace.h"
42 
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include <sys/param.h>
47 #include <sys/ktr.h>
48 #include <sys/lock.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/sleepqueue.h>
52 #include <sys/sx.h>
53 #include <sys/systm.h>
54 
55 #ifdef ADAPTIVE_SX
56 #include <machine/cpu.h>
57 #endif
58 
59 #ifdef DDB
60 #include <ddb/ddb.h>
61 #endif
62 
63 #if !defined(SMP) && defined(ADAPTIVE_SX)
64 #error "You must have SMP to enable the ADAPTIVE_SX option"
65 #endif
66 
67 CTASSERT(((SX_ADAPTIVESPIN | SX_RECURSE) & LO_CLASSFLAGS) ==
68     (SX_ADAPTIVESPIN | SX_RECURSE));
69 
70 /* Handy macros for sleep queues. */
71 #define	SQ_EXCLUSIVE_QUEUE	0
72 #define	SQ_SHARED_QUEUE		1
73 
74 /*
75  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
76  * drop Giant anytime we have to sleep or if we adaptively spin.
77  */
78 #define	GIANT_DECLARE							\
79 	int _giantcnt = 0;						\
80 	WITNESS_SAVE_DECL(Giant)					\
81 
82 #define	GIANT_SAVE() do {						\
83 	if (mtx_owned(&Giant)) {					\
84 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
85 		while (mtx_owned(&Giant)) {				\
86 			_giantcnt++;					\
87 			mtx_unlock(&Giant);				\
88 		}							\
89 	}								\
90 } while (0)
91 
92 #define GIANT_RESTORE() do {						\
93 	if (_giantcnt > 0) {						\
94 		mtx_assert(&Giant, MA_NOTOWNED);			\
95 		while (_giantcnt--)					\
96 			mtx_lock(&Giant);				\
97 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
98 	}								\
99 } while (0)
100 
101 /*
102  * Returns true if an exclusive lock is recursed.  It assumes
103  * curthread currently has an exclusive lock.
104  */
105 #define	sx_recurse		lock_object.lo_data
106 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
107 
108 static void	assert_sx(struct lock_object *lock, int what);
109 #ifdef DDB
110 static void	db_show_sx(struct lock_object *lock);
111 #endif
112 static void	lock_sx(struct lock_object *lock, int how);
113 #ifdef KDTRACE_HOOKS
114 static int	owner_sx(struct lock_object *lock, struct thread **owner);
115 #endif
116 static int	unlock_sx(struct lock_object *lock);
117 
118 struct lock_class lock_class_sx = {
119 	.lc_name = "sx",
120 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
121 	.lc_assert = assert_sx,
122 #ifdef DDB
123 	.lc_ddb_show = db_show_sx,
124 #endif
125 	.lc_lock = lock_sx,
126 	.lc_unlock = unlock_sx,
127 #ifdef KDTRACE_HOOKS
128 	.lc_owner = owner_sx,
129 #endif
130 };
131 
132 #ifndef INVARIANTS
133 #define	_sx_assert(sx, what, file, line)
134 #endif
135 
136 void
137 assert_sx(struct lock_object *lock, int what)
138 {
139 
140 	sx_assert((struct sx *)lock, what);
141 }
142 
143 void
144 lock_sx(struct lock_object *lock, int how)
145 {
146 	struct sx *sx;
147 
148 	sx = (struct sx *)lock;
149 	if (how)
150 		sx_xlock(sx);
151 	else
152 		sx_slock(sx);
153 }
154 
155 int
156 unlock_sx(struct lock_object *lock)
157 {
158 	struct sx *sx;
159 
160 	sx = (struct sx *)lock;
161 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
162 	if (sx_xlocked(sx)) {
163 		sx_xunlock(sx);
164 		return (1);
165 	} else {
166 		sx_sunlock(sx);
167 		return (0);
168 	}
169 }
170 
171 #ifdef KDTRACE_HOOKS
172 int
173 owner_sx(struct lock_object *lock, struct thread **owner)
174 {
175         struct sx *sx = (struct sx *)lock;
176 	uintptr_t x = sx->sx_lock;
177 
178         *owner = (struct thread *)SX_OWNER(x);
179         return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
180 	    (*owner != NULL));
181 }
182 #endif
183 
184 void
185 sx_sysinit(void *arg)
186 {
187 	struct sx_args *sargs = arg;
188 
189 	sx_init(sargs->sa_sx, sargs->sa_desc);
190 }
191 
192 void
193 sx_init_flags(struct sx *sx, const char *description, int opts)
194 {
195 	int flags;
196 
197 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
198 	    SX_NOPROFILE | SX_ADAPTIVESPIN)) == 0);
199 
200 	flags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
201 	if (opts & SX_DUPOK)
202 		flags |= LO_DUPOK;
203 	if (opts & SX_NOPROFILE)
204 		flags |= LO_NOPROFILE;
205 	if (!(opts & SX_NOWITNESS))
206 		flags |= LO_WITNESS;
207 	if (opts & SX_QUIET)
208 		flags |= LO_QUIET;
209 
210 	flags |= opts & (SX_ADAPTIVESPIN | SX_RECURSE);
211 	sx->sx_lock = SX_LOCK_UNLOCKED;
212 	sx->sx_recurse = 0;
213 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
214 }
215 
216 void
217 sx_destroy(struct sx *sx)
218 {
219 
220 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
221 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
222 	sx->sx_lock = SX_LOCK_DESTROYED;
223 	lock_destroy(&sx->lock_object);
224 }
225 
226 int
227 _sx_slock(struct sx *sx, int opts, const char *file, int line)
228 {
229 	int error = 0;
230 
231 	MPASS(curthread != NULL);
232 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
233 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
234 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
235 	error = __sx_slock(sx, opts, file, line);
236 	if (!error) {
237 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
238 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
239 		curthread->td_locks++;
240 	}
241 
242 	return (error);
243 }
244 
245 int
246 _sx_try_slock(struct sx *sx, const char *file, int line)
247 {
248 	uintptr_t x;
249 
250 	for (;;) {
251 		x = sx->sx_lock;
252 		KASSERT(x != SX_LOCK_DESTROYED,
253 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
254 		if (!(x & SX_LOCK_SHARED))
255 			break;
256 		if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) {
257 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
258 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
259 			curthread->td_locks++;
260 			return (1);
261 		}
262 	}
263 
264 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
265 	return (0);
266 }
267 
268 int
269 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
270 {
271 	int error = 0;
272 
273 	MPASS(curthread != NULL);
274 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
275 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
276 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
277 	    line, NULL);
278 	error = __sx_xlock(sx, curthread, opts, file, line);
279 	if (!error) {
280 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
281 		    file, line);
282 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
283 		curthread->td_locks++;
284 	}
285 
286 	return (error);
287 }
288 
289 int
290 _sx_try_xlock(struct sx *sx, const char *file, int line)
291 {
292 	int rval;
293 
294 	MPASS(curthread != NULL);
295 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
296 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
297 
298 	if (sx_xlocked(sx) && (sx->lock_object.lo_flags & SX_RECURSE) != 0) {
299 		sx->sx_recurse++;
300 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
301 		rval = 1;
302 	} else
303 		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
304 		    (uintptr_t)curthread);
305 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
306 	if (rval) {
307 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
308 		    file, line);
309 		curthread->td_locks++;
310 	}
311 
312 	return (rval);
313 }
314 
315 void
316 _sx_sunlock(struct sx *sx, const char *file, int line)
317 {
318 
319 	MPASS(curthread != NULL);
320 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
321 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
322 	_sx_assert(sx, SA_SLOCKED, file, line);
323 	curthread->td_locks--;
324 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
325 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
326 	__sx_sunlock(sx, file, line);
327 	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_SUNLOCK_RELEASE, sx);
328 }
329 
330 void
331 _sx_xunlock(struct sx *sx, const char *file, int line)
332 {
333 
334 	MPASS(curthread != NULL);
335 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
336 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
337 	_sx_assert(sx, SA_XLOCKED, file, line);
338 	curthread->td_locks--;
339 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
340 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
341 	    line);
342 	if (!sx_recursed(sx))
343 		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_XUNLOCK_RELEASE, sx);
344 	__sx_xunlock(sx, curthread, file, line);
345 }
346 
347 /*
348  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
349  * This will only succeed if this thread holds a single shared lock.
350  * Return 1 if if the upgrade succeed, 0 otherwise.
351  */
352 int
353 _sx_try_upgrade(struct sx *sx, const char *file, int line)
354 {
355 	uintptr_t x;
356 	int success;
357 
358 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
359 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
360 	_sx_assert(sx, SA_SLOCKED, file, line);
361 
362 	/*
363 	 * Try to switch from one shared lock to an exclusive lock.  We need
364 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
365 	 * we will wake up the exclusive waiters when we drop the lock.
366 	 */
367 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
368 	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
369 	    (uintptr_t)curthread | x);
370 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
371 	if (success) {
372 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
373 		    file, line);
374 		LOCKSTAT_RECORD0(LS_SX_TRYUPGRADE_UPGRADE, sx);
375 	}
376 	return (success);
377 }
378 
379 /*
380  * Downgrade an unrecursed exclusive lock into a single shared lock.
381  */
382 void
383 _sx_downgrade(struct sx *sx, const char *file, int line)
384 {
385 	uintptr_t x;
386 	int wakeup_swapper;
387 
388 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
389 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
390 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
391 #ifndef INVARIANTS
392 	if (sx_recursed(sx))
393 		panic("downgrade of a recursed lock");
394 #endif
395 
396 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
397 
398 	/*
399 	 * Try to switch from an exclusive lock with no shared waiters
400 	 * to one sharer with no shared waiters.  If there are
401 	 * exclusive waiters, we don't need to lock the sleep queue so
402 	 * long as we preserve the flag.  We do one quick try and if
403 	 * that fails we grab the sleepq lock to keep the flags from
404 	 * changing and do it the slow way.
405 	 *
406 	 * We have to lock the sleep queue if there are shared waiters
407 	 * so we can wake them up.
408 	 */
409 	x = sx->sx_lock;
410 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
411 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
412 	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
413 		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
414 		return;
415 	}
416 
417 	/*
418 	 * Lock the sleep queue so we can read the waiters bits
419 	 * without any races and wakeup any shared waiters.
420 	 */
421 	sleepq_lock(&sx->lock_object);
422 
423 	/*
424 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
425 	 * shared lock.  If there are any shared waiters, wake them up.
426 	 */
427 	wakeup_swapper = 0;
428 	x = sx->sx_lock;
429 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
430 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
431 	if (x & SX_LOCK_SHARED_WAITERS)
432 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
433 		    0, SQ_SHARED_QUEUE);
434 	sleepq_release(&sx->lock_object);
435 
436 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
437 	LOCKSTAT_RECORD0(LS_SX_DOWNGRADE_DOWNGRADE, sx);
438 
439 	if (wakeup_swapper)
440 		kick_proc0();
441 }
442 
443 /*
444  * This function represents the so-called 'hard case' for sx_xlock
445  * operation.  All 'easy case' failures are redirected to this.  Note
446  * that ideally this would be a static function, but it needs to be
447  * accessible from at least sx.h.
448  */
449 int
450 _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
451     int line)
452 {
453 	GIANT_DECLARE;
454 #ifdef ADAPTIVE_SX
455 	volatile struct thread *owner;
456 #endif
457 	uintptr_t x;
458 #ifdef LOCK_PROFILING
459 	uint64_t waittime = 0;
460 	int contested = 0;
461 #endif
462 	int error = 0;
463 #ifdef	KDTRACE_HOOKS
464 	uint64_t spin_cnt = 0;
465 	uint64_t sleep_cnt = 0;
466 	int64_t sleep_time = 0;
467 #endif
468 
469 	/* If we already hold an exclusive lock, then recurse. */
470 	if (sx_xlocked(sx)) {
471 		KASSERT((sx->lock_object.lo_flags & SX_RECURSE) != 0,
472 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
473 		    sx->lock_object.lo_name, file, line));
474 		sx->sx_recurse++;
475 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
476 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
477 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
478 		return (0);
479 	}
480 
481 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
482 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
483 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
484 
485 	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
486 #ifdef KDTRACE_HOOKS
487 		spin_cnt++;
488 #endif
489 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
490 		    &waittime);
491 #ifdef ADAPTIVE_SX
492 		/*
493 		 * If the lock is write locked and the owner is
494 		 * running on another CPU, spin until the owner stops
495 		 * running or the state of the lock changes.
496 		 */
497 		x = sx->sx_lock;
498 		if (!(x & SX_LOCK_SHARED) &&
499 		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
500 			x = SX_OWNER(x);
501 			owner = (struct thread *)x;
502 			if (TD_IS_RUNNING(owner)) {
503 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
504 					CTR3(KTR_LOCK,
505 					    "%s: spinning on %p held by %p",
506 					    __func__, sx, owner);
507 				GIANT_SAVE();
508 				while (SX_OWNER(sx->sx_lock) == x &&
509 				    TD_IS_RUNNING(owner)) {
510 					cpu_spinwait();
511 #ifdef KDTRACE_HOOKS
512 					spin_cnt++;
513 #endif
514 				}
515 				continue;
516 			}
517 		}
518 #endif
519 
520 		sleepq_lock(&sx->lock_object);
521 		x = sx->sx_lock;
522 
523 		/*
524 		 * If the lock was released while spinning on the
525 		 * sleep queue chain lock, try again.
526 		 */
527 		if (x == SX_LOCK_UNLOCKED) {
528 			sleepq_release(&sx->lock_object);
529 			continue;
530 		}
531 
532 #ifdef ADAPTIVE_SX
533 		/*
534 		 * The current lock owner might have started executing
535 		 * on another CPU (or the lock could have changed
536 		 * owners) while we were waiting on the sleep queue
537 		 * chain lock.  If so, drop the sleep queue lock and try
538 		 * again.
539 		 */
540 		if (!(x & SX_LOCK_SHARED) &&
541 		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
542 			owner = (struct thread *)SX_OWNER(x);
543 			if (TD_IS_RUNNING(owner)) {
544 				sleepq_release(&sx->lock_object);
545 				continue;
546 			}
547 		}
548 #endif
549 
550 		/*
551 		 * If an exclusive lock was released with both shared
552 		 * and exclusive waiters and a shared waiter hasn't
553 		 * woken up and acquired the lock yet, sx_lock will be
554 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
555 		 * If we see that value, try to acquire it once.  Note
556 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
557 		 * as there are other exclusive waiters still.  If we
558 		 * fail, restart the loop.
559 		 */
560 		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
561 			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
562 			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
563 			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
564 				sleepq_release(&sx->lock_object);
565 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
566 				    __func__, sx);
567 				break;
568 			}
569 			sleepq_release(&sx->lock_object);
570 			continue;
571 		}
572 
573 		/*
574 		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
575 		 * than loop back and retry.
576 		 */
577 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
578 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
579 			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
580 				sleepq_release(&sx->lock_object);
581 				continue;
582 			}
583 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
584 				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
585 				    __func__, sx);
586 		}
587 
588 		/*
589 		 * Since we have been unable to acquire the exclusive
590 		 * lock and the exclusive waiters flag is set, we have
591 		 * to sleep.
592 		 */
593 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
594 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
595 			    __func__, sx);
596 
597 #ifdef KDTRACE_HOOKS
598 		sleep_time -= lockstat_nsecs();
599 #endif
600 		GIANT_SAVE();
601 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
602 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
603 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
604 		if (!(opts & SX_INTERRUPTIBLE))
605 			sleepq_wait(&sx->lock_object, 0);
606 		else
607 			error = sleepq_wait_sig(&sx->lock_object, 0);
608 #ifdef KDTRACE_HOOKS
609 		sleep_time += lockstat_nsecs();
610 		sleep_cnt++;
611 #endif
612 		if (error) {
613 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
614 				CTR2(KTR_LOCK,
615 			"%s: interruptible sleep by %p suspended by signal",
616 				    __func__, sx);
617 			break;
618 		}
619 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
620 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
621 			    __func__, sx);
622 	}
623 
624 	GIANT_RESTORE();
625 	if (!error)
626 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx,
627 		    contested, waittime, file, line);
628 #ifdef KDTRACE_HOOKS
629 	if (sleep_time)
630 		LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
631 	if (spin_cnt > sleep_cnt)
632 		LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
633 #endif
634 	return (error);
635 }
636 
637 /*
638  * This function represents the so-called 'hard case' for sx_xunlock
639  * operation.  All 'easy case' failures are redirected to this.  Note
640  * that ideally this would be a static function, but it needs to be
641  * accessible from at least sx.h.
642  */
643 void
644 _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
645 {
646 	uintptr_t x;
647 	int queue, wakeup_swapper;
648 
649 	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
650 
651 	/* If the lock is recursed, then unrecurse one level. */
652 	if (sx_xlocked(sx) && sx_recursed(sx)) {
653 		if ((--sx->sx_recurse) == 0)
654 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
655 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
656 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
657 		return;
658 	}
659 	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
660 	    SX_LOCK_EXCLUSIVE_WAITERS));
661 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
662 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
663 
664 	sleepq_lock(&sx->lock_object);
665 	x = SX_LOCK_UNLOCKED;
666 
667 	/*
668 	 * The wake up algorithm here is quite simple and probably not
669 	 * ideal.  It gives precedence to shared waiters if they are
670 	 * present.  For this condition, we have to preserve the
671 	 * state of the exclusive waiters flag.
672 	 */
673 	if (sx->sx_lock & SX_LOCK_SHARED_WAITERS) {
674 		queue = SQ_SHARED_QUEUE;
675 		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
676 	} else
677 		queue = SQ_EXCLUSIVE_QUEUE;
678 
679 	/* Wake up all the waiters for the specific queue. */
680 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
681 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
682 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
683 		    "exclusive");
684 	atomic_store_rel_ptr(&sx->sx_lock, x);
685 	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
686 	    queue);
687 	sleepq_release(&sx->lock_object);
688 	if (wakeup_swapper)
689 		kick_proc0();
690 }
691 
692 /*
693  * This function represents the so-called 'hard case' for sx_slock
694  * operation.  All 'easy case' failures are redirected to this.  Note
695  * that ideally this would be a static function, but it needs to be
696  * accessible from at least sx.h.
697  */
698 int
699 _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
700 {
701 	GIANT_DECLARE;
702 #ifdef ADAPTIVE_SX
703 	volatile struct thread *owner;
704 #endif
705 #ifdef LOCK_PROFILING
706 	uint64_t waittime = 0;
707 	int contested = 0;
708 #endif
709 	uintptr_t x;
710 	int error = 0;
711 #ifdef KDTRACE_HOOKS
712 	uint64_t spin_cnt = 0;
713 	uint64_t sleep_cnt = 0;
714 	int64_t sleep_time = 0;
715 #endif
716 
717 	/*
718 	 * As with rwlocks, we don't make any attempt to try to block
719 	 * shared locks once there is an exclusive waiter.
720 	 */
721 	for (;;) {
722 #ifdef KDTRACE_HOOKS
723 		spin_cnt++;
724 #endif
725 		x = sx->sx_lock;
726 
727 		/*
728 		 * If no other thread has an exclusive lock then try to bump up
729 		 * the count of sharers.  Since we have to preserve the state
730 		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
731 		 * shared lock loop back and retry.
732 		 */
733 		if (x & SX_LOCK_SHARED) {
734 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
735 			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
736 			    x + SX_ONE_SHARER)) {
737 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
738 					CTR4(KTR_LOCK,
739 					    "%s: %p succeed %p -> %p", __func__,
740 					    sx, (void *)x,
741 					    (void *)(x + SX_ONE_SHARER));
742 				break;
743 			}
744 			continue;
745 		}
746 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
747 		    &waittime);
748 
749 #ifdef ADAPTIVE_SX
750 		/*
751 		 * If the owner is running on another CPU, spin until
752 		 * the owner stops running or the state of the lock
753 		 * changes.
754 		 */
755 		if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
756 			x = SX_OWNER(x);
757 			owner = (struct thread *)x;
758 			if (TD_IS_RUNNING(owner)) {
759 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
760 					CTR3(KTR_LOCK,
761 					    "%s: spinning on %p held by %p",
762 					    __func__, sx, owner);
763 				GIANT_SAVE();
764 				while (SX_OWNER(sx->sx_lock) == x &&
765 				    TD_IS_RUNNING(owner)) {
766 #ifdef KDTRACE_HOOKS
767 					spin_cnt++;
768 #endif
769 					cpu_spinwait();
770 				}
771 				continue;
772 			}
773 		}
774 #endif
775 
776 		/*
777 		 * Some other thread already has an exclusive lock, so
778 		 * start the process of blocking.
779 		 */
780 		sleepq_lock(&sx->lock_object);
781 		x = sx->sx_lock;
782 
783 		/*
784 		 * The lock could have been released while we spun.
785 		 * In this case loop back and retry.
786 		 */
787 		if (x & SX_LOCK_SHARED) {
788 			sleepq_release(&sx->lock_object);
789 			continue;
790 		}
791 
792 #ifdef ADAPTIVE_SX
793 		/*
794 		 * If the owner is running on another CPU, spin until
795 		 * the owner stops running or the state of the lock
796 		 * changes.
797 		 */
798 		if (!(x & SX_LOCK_SHARED) &&
799 		    (sx->lock_object.lo_flags & SX_ADAPTIVESPIN)) {
800 			owner = (struct thread *)SX_OWNER(x);
801 			if (TD_IS_RUNNING(owner)) {
802 				sleepq_release(&sx->lock_object);
803 				continue;
804 			}
805 		}
806 #endif
807 
808 		/*
809 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
810 		 * fail to set it drop the sleep queue lock and loop
811 		 * back.
812 		 */
813 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
814 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
815 			    x | SX_LOCK_SHARED_WAITERS)) {
816 				sleepq_release(&sx->lock_object);
817 				continue;
818 			}
819 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
820 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
821 				    __func__, sx);
822 		}
823 
824 		/*
825 		 * Since we have been unable to acquire the shared lock,
826 		 * we have to sleep.
827 		 */
828 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
829 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
830 			    __func__, sx);
831 
832 #ifdef KDTRACE_HOOKS
833 		sleep_time -= lockstat_nsecs();
834 #endif
835 		GIANT_SAVE();
836 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
837 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
838 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
839 		if (!(opts & SX_INTERRUPTIBLE))
840 			sleepq_wait(&sx->lock_object, 0);
841 		else
842 			error = sleepq_wait_sig(&sx->lock_object, 0);
843 #ifdef KDTRACE_HOOKS
844 		sleep_time += lockstat_nsecs();
845 		sleep_cnt++;
846 #endif
847 		if (error) {
848 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
849 				CTR2(KTR_LOCK,
850 			"%s: interruptible sleep by %p suspended by signal",
851 				    __func__, sx);
852 			break;
853 		}
854 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
855 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
856 			    __func__, sx);
857 	}
858 	if (error == 0)
859 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx,
860 		    contested, waittime, file, line);
861 #ifdef KDTRACE_HOOKS
862 	if (sleep_time)
863 		LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
864 	if (spin_cnt > sleep_cnt)
865 		LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
866 #endif
867 	GIANT_RESTORE();
868 	return (error);
869 }
870 
871 /*
872  * This function represents the so-called 'hard case' for sx_sunlock
873  * operation.  All 'easy case' failures are redirected to this.  Note
874  * that ideally this would be a static function, but it needs to be
875  * accessible from at least sx.h.
876  */
877 void
878 _sx_sunlock_hard(struct sx *sx, const char *file, int line)
879 {
880 	uintptr_t x;
881 	int wakeup_swapper;
882 
883 	for (;;) {
884 		x = sx->sx_lock;
885 
886 		/*
887 		 * We should never have sharers while at least one thread
888 		 * holds a shared lock.
889 		 */
890 		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
891 		    ("%s: waiting sharers", __func__));
892 
893 		/*
894 		 * See if there is more than one shared lock held.  If
895 		 * so, just drop one and return.
896 		 */
897 		if (SX_SHARERS(x) > 1) {
898 			if (atomic_cmpset_ptr(&sx->sx_lock, x,
899 			    x - SX_ONE_SHARER)) {
900 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
901 					CTR4(KTR_LOCK,
902 					    "%s: %p succeeded %p -> %p",
903 					    __func__, sx, (void *)x,
904 					    (void *)(x - SX_ONE_SHARER));
905 				break;
906 			}
907 			continue;
908 		}
909 
910 		/*
911 		 * If there aren't any waiters for an exclusive lock,
912 		 * then try to drop it quickly.
913 		 */
914 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
915 			MPASS(x == SX_SHARERS_LOCK(1));
916 			if (atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1),
917 			    SX_LOCK_UNLOCKED)) {
918 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
919 					CTR2(KTR_LOCK, "%s: %p last succeeded",
920 					    __func__, sx);
921 				break;
922 			}
923 			continue;
924 		}
925 
926 		/*
927 		 * At this point, there should just be one sharer with
928 		 * exclusive waiters.
929 		 */
930 		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
931 
932 		sleepq_lock(&sx->lock_object);
933 
934 		/*
935 		 * Wake up semantic here is quite simple:
936 		 * Just wake up all the exclusive waiters.
937 		 * Note that the state of the lock could have changed,
938 		 * so if it fails loop back and retry.
939 		 */
940 		if (!atomic_cmpset_ptr(&sx->sx_lock,
941 		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
942 		    SX_LOCK_UNLOCKED)) {
943 			sleepq_release(&sx->lock_object);
944 			continue;
945 		}
946 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
947 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
948 			    "exclusive queue", __func__, sx);
949 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
950 		    0, SQ_EXCLUSIVE_QUEUE);
951 		sleepq_release(&sx->lock_object);
952 		if (wakeup_swapper)
953 			kick_proc0();
954 		break;
955 	}
956 }
957 
958 #ifdef INVARIANT_SUPPORT
959 #ifndef INVARIANTS
960 #undef	_sx_assert
961 #endif
962 
963 /*
964  * In the non-WITNESS case, sx_assert() can only detect that at least
965  * *some* thread owns an slock, but it cannot guarantee that *this*
966  * thread owns an slock.
967  */
968 void
969 _sx_assert(struct sx *sx, int what, const char *file, int line)
970 {
971 #ifndef WITNESS
972 	int slocked = 0;
973 #endif
974 
975 	if (panicstr != NULL)
976 		return;
977 	switch (what) {
978 	case SA_SLOCKED:
979 	case SA_SLOCKED | SA_NOTRECURSED:
980 	case SA_SLOCKED | SA_RECURSED:
981 #ifndef WITNESS
982 		slocked = 1;
983 		/* FALLTHROUGH */
984 #endif
985 	case SA_LOCKED:
986 	case SA_LOCKED | SA_NOTRECURSED:
987 	case SA_LOCKED | SA_RECURSED:
988 #ifdef WITNESS
989 		witness_assert(&sx->lock_object, what, file, line);
990 #else
991 		/*
992 		 * If some other thread has an exclusive lock or we
993 		 * have one and are asserting a shared lock, fail.
994 		 * Also, if no one has a lock at all, fail.
995 		 */
996 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
997 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
998 		    sx_xholder(sx) != curthread)))
999 			panic("Lock %s not %slocked @ %s:%d\n",
1000 			    sx->lock_object.lo_name, slocked ? "share " : "",
1001 			    file, line);
1002 
1003 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
1004 			if (sx_recursed(sx)) {
1005 				if (what & SA_NOTRECURSED)
1006 					panic("Lock %s recursed @ %s:%d\n",
1007 					    sx->lock_object.lo_name, file,
1008 					    line);
1009 			} else if (what & SA_RECURSED)
1010 				panic("Lock %s not recursed @ %s:%d\n",
1011 				    sx->lock_object.lo_name, file, line);
1012 		}
1013 #endif
1014 		break;
1015 	case SA_XLOCKED:
1016 	case SA_XLOCKED | SA_NOTRECURSED:
1017 	case SA_XLOCKED | SA_RECURSED:
1018 		if (sx_xholder(sx) != curthread)
1019 			panic("Lock %s not exclusively locked @ %s:%d\n",
1020 			    sx->lock_object.lo_name, file, line);
1021 		if (sx_recursed(sx)) {
1022 			if (what & SA_NOTRECURSED)
1023 				panic("Lock %s recursed @ %s:%d\n",
1024 				    sx->lock_object.lo_name, file, line);
1025 		} else if (what & SA_RECURSED)
1026 			panic("Lock %s not recursed @ %s:%d\n",
1027 			    sx->lock_object.lo_name, file, line);
1028 		break;
1029 	case SA_UNLOCKED:
1030 #ifdef WITNESS
1031 		witness_assert(&sx->lock_object, what, file, line);
1032 #else
1033 		/*
1034 		 * If we hold an exclusve lock fail.  We can't
1035 		 * reliably check to see if we hold a shared lock or
1036 		 * not.
1037 		 */
1038 		if (sx_xholder(sx) == curthread)
1039 			panic("Lock %s exclusively locked @ %s:%d\n",
1040 			    sx->lock_object.lo_name, file, line);
1041 #endif
1042 		break;
1043 	default:
1044 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
1045 		    line);
1046 	}
1047 }
1048 #endif	/* INVARIANT_SUPPORT */
1049 
1050 #ifdef DDB
1051 static void
1052 db_show_sx(struct lock_object *lock)
1053 {
1054 	struct thread *td;
1055 	struct sx *sx;
1056 
1057 	sx = (struct sx *)lock;
1058 
1059 	db_printf(" state: ");
1060 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
1061 		db_printf("UNLOCKED\n");
1062 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
1063 		db_printf("DESTROYED\n");
1064 		return;
1065 	} else if (sx->sx_lock & SX_LOCK_SHARED)
1066 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
1067 	else {
1068 		td = sx_xholder(sx);
1069 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1070 		    td->td_tid, td->td_proc->p_pid, td->td_name);
1071 		if (sx_recursed(sx))
1072 			db_printf(" recursed: %d\n", sx->sx_recurse);
1073 	}
1074 
1075 	db_printf(" waiters: ");
1076 	switch(sx->sx_lock &
1077 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
1078 	case SX_LOCK_SHARED_WAITERS:
1079 		db_printf("shared\n");
1080 		break;
1081 	case SX_LOCK_EXCLUSIVE_WAITERS:
1082 		db_printf("exclusive\n");
1083 		break;
1084 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
1085 		db_printf("exclusive and shared\n");
1086 		break;
1087 	default:
1088 		db_printf("none\n");
1089 	}
1090 }
1091 
1092 /*
1093  * Check to see if a thread that is blocked on a sleep queue is actually
1094  * blocked on an sx lock.  If so, output some details and return true.
1095  * If the lock has an exclusive owner, return that in *ownerp.
1096  */
1097 int
1098 sx_chain(struct thread *td, struct thread **ownerp)
1099 {
1100 	struct sx *sx;
1101 
1102 	/*
1103 	 * Check to see if this thread is blocked on an sx lock.
1104 	 * First, we check the lock class.  If that is ok, then we
1105 	 * compare the lock name against the wait message.
1106 	 */
1107 	sx = td->td_wchan;
1108 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
1109 	    sx->lock_object.lo_name != td->td_wmesg)
1110 		return (0);
1111 
1112 	/* We think we have an sx lock, so output some details. */
1113 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
1114 	*ownerp = sx_xholder(sx);
1115 	if (sx->sx_lock & SX_LOCK_SHARED)
1116 		db_printf("SLOCK (count %ju)\n",
1117 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
1118 	else
1119 		db_printf("XLOCK\n");
1120 	return (1);
1121 }
1122 #endif
1123