xref: /freebsd/sys/kern/kern_rwlock.c (revision f856af0466c076beef4ea9b15d088e1119a945b8)
1 /*-
2  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * Machine independent bits of reader/writer lock implementation.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_ddb.h"
38 
39 #include <sys/param.h>
40 #include <sys/ktr.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/rwlock.h>
45 #include <sys/systm.h>
46 #include <sys/turnstile.h>
47 #include <sys/lock_profile.h>
48 #include <machine/cpu.h>
49 
50 #ifdef DDB
51 #include <ddb/ddb.h>
52 
53 static void	db_show_rwlock(struct lock_object *lock);
54 #endif
55 
56 struct lock_class lock_class_rw = {
57 	"rw",
58 	LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
59 #ifdef DDB
60 	db_show_rwlock
61 #endif
62 };
63 
64 /*
65  * Return a pointer to the owning thread if the lock is write-locked or
66  * NULL if the lock is unlocked or read-locked.
67  */
68 #define	rw_wowner(rw)							\
69 	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
70 	    (struct thread *)RW_OWNER((rw)->rw_lock))
71 
72 /*
73  * Return a pointer to the owning thread for this lock who should receive
74  * any priority lent by threads that block on this lock.  Currently this
75  * is identical to rw_wowner().
76  */
77 #define	rw_owner(rw)		rw_wowner(rw)
78 
79 #ifndef INVARIANTS
80 #define	_rw_assert(rw, what, file, line)
81 #endif
82 
83 void
84 rw_init(struct rwlock *rw, const char *name)
85 {
86 
87 	rw->rw_lock = RW_UNLOCKED;
88 
89 	lock_profile_object_init(&rw->rw_object, &lock_class_rw, name);
90 	lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS |
91 	    LO_RECURSABLE | LO_UPGRADABLE);
92 }
93 
94 void
95 rw_destroy(struct rwlock *rw)
96 {
97 
98 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
99 	lock_profile_object_destroy(&rw->rw_object);
100 	lock_destroy(&rw->rw_object);
101 }
102 
103 void
104 rw_sysinit(void *arg)
105 {
106 	struct rw_args *args = arg;
107 
108 	rw_init(args->ra_rw, args->ra_desc);
109 }
110 
111 void
112 _rw_wlock(struct rwlock *rw, const char *file, int line)
113 {
114 	uint64_t waitstart;
115 
116 	MPASS(curthread != NULL);
117 	KASSERT(rw_wowner(rw) != curthread,
118 	    ("%s (%s): wlock already held @ %s:%d", __func__,
119 	    rw->rw_object.lo_name, file, line));
120 	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
121 	    line);
122 	lock_profile_waitstart(&waitstart);
123 	__rw_wlock(rw, curthread, file, line);
124 	lock_profile_obtain_lock_success(&rw->rw_object, waitstart, file, line);
125 	LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line);
126 	WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
127 	curthread->td_locks++;
128 }
129 
130 void
131 _rw_wunlock(struct rwlock *rw, const char *file, int line)
132 {
133 
134 	MPASS(curthread != NULL);
135 	_rw_assert(rw, RA_WLOCKED, file, line);
136 	curthread->td_locks--;
137 	WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
138 	LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line);
139 	lock_profile_release_lock(&rw->rw_object);
140 	__rw_wunlock(rw, curthread, file, line);
141 }
142 
143 void
144 _rw_rlock(struct rwlock *rw, const char *file, int line)
145 {
146 #ifdef SMP
147 	volatile struct thread *owner;
148 #endif
149 	uint64_t waitstart;
150 	int contested;
151 	uintptr_t x;
152 
153 	KASSERT(rw_wowner(rw) != curthread,
154 	    ("%s (%s): wlock already held @ %s:%d", __func__,
155 	    rw->rw_object.lo_name, file, line));
156 	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line);
157 
158 	/*
159 	 * Note that we don't make any attempt to try to block read
160 	 * locks once a writer has blocked on the lock.  The reason is
161 	 * that we currently allow for read locks to recurse and we
162 	 * don't keep track of all the holders of read locks.  Thus, if
163 	 * we were to block readers once a writer blocked and a reader
164 	 * tried to recurse on their reader lock after a writer had
165 	 * blocked we would end up in a deadlock since the reader would
166 	 * be blocked on the writer, and the writer would be blocked
167 	 * waiting for the reader to release its original read lock.
168 	 */
169 	lock_profile_waitstart(&waitstart);
170 	for (;;) {
171 		/*
172 		 * Handle the easy case.  If no other thread has a write
173 		 * lock, then try to bump up the count of read locks.  Note
174 		 * that we have to preserve the current state of the
175 		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
176 		 * read lock, then rw_lock must have changed, so restart
177 		 * the loop.  Note that this handles the case of a
178 		 * completely unlocked rwlock since such a lock is encoded
179 		 * as a read lock with no waiters.
180 		 */
181 		x = rw->rw_lock;
182 		if (x & RW_LOCK_READ) {
183 
184 			/*
185 			 * The RW_LOCK_READ_WAITERS flag should only be set
186 			 * if another thread currently holds a write lock,
187 			 * and in that case RW_LOCK_READ should be clear.
188 			 */
189 			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
190 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
191 			    x + RW_ONE_READER)) {
192 				lock_profile_obtain_lock_success(&rw->rw_object, waitstart, file, line);
193 				if (LOCK_LOG_TEST(&rw->rw_object, 0))
194 					CTR4(KTR_LOCK,
195 					    "%s: %p succeed %p -> %p", __func__,
196 					    rw, (void *)x,
197 					    (void *)(x + RW_ONE_READER));
198 				break;
199 			}
200 			cpu_spinwait();
201 			lock_profile_obtain_lock_failed(&rw->rw_object, &contested);
202 			continue;
203 		}
204 
205 		/*
206 		 * Okay, now it's the hard case.  Some other thread already
207 		 * has a write lock, so acquire the turnstile lock so we can
208 		 * begin the process of blocking.
209 		 */
210 		turnstile_lock(&rw->rw_object);
211 
212 		/*
213 		 * The lock might have been released while we spun, so
214 		 * recheck its state and restart the loop if there is no
215 		 * longer a write lock.
216 		 */
217 		x = rw->rw_lock;
218 		if (x & RW_LOCK_READ) {
219 			turnstile_release(&rw->rw_object);
220 			cpu_spinwait();
221 			continue;
222 		}
223 
224 		/*
225 		 * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
226 		 * flag is already set, then we can go ahead and block.  If
227 		 * it is not set then try to set it.  If we fail to set it
228 		 * drop the turnstile lock and restart the loop.
229 		 */
230 		if (!(x & RW_LOCK_READ_WAITERS)) {
231 			if (!atomic_cmpset_ptr(&rw->rw_lock, x,
232 			    x | RW_LOCK_READ_WAITERS)) {
233 				turnstile_release(&rw->rw_object);
234 				cpu_spinwait();
235 				continue;
236 			}
237 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
238 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
239 				    __func__, rw);
240 		}
241 
242 #ifdef SMP
243 		/*
244 		 * If the owner is running on another CPU, spin until
245 		 * the owner stops running or the state of the lock
246 		 * changes.
247 		 */
248 		owner = (struct thread *)RW_OWNER(x);
249 		if (TD_IS_RUNNING(owner)) {
250 			lock_profile_obtain_lock_failed(&rw->rw_object, &contested);
251 			turnstile_release(&rw->rw_object);
252 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
253 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
254 				    __func__, rw, owner);
255 			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
256 			    TD_IS_RUNNING(owner))
257 				cpu_spinwait();
258 			continue;
259 		}
260 #endif
261 
262 		/*
263 		 * We were unable to acquire the lock and the read waiters
264 		 * flag is set, so we must block on the turnstile.
265 		 */
266 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
267 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
268 			    rw);
269 		turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE);
270 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
271 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
272 			    __func__, rw);
273 	}
274 
275 	/*
276 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
277 	 * however.  turnstiles don't like owners changing between calls to
278 	 * turnstile_wait() currently.
279 	 */
280 
281 	LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line);
282 	WITNESS_LOCK(&rw->rw_object, 0, file, line);
283 	curthread->td_locks++;
284 }
285 
286 void
287 _rw_runlock(struct rwlock *rw, const char *file, int line)
288 {
289 	struct turnstile *ts;
290 	uintptr_t x;
291 
292 	_rw_assert(rw, RA_RLOCKED, file, line);
293 	curthread->td_locks--;
294 	WITNESS_UNLOCK(&rw->rw_object, 0, file, line);
295 	LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line);
296 
297 	/* TODO: drop "owner of record" here. */
298 
299 	for (;;) {
300 		/*
301 		 * See if there is more than one read lock held.  If so,
302 		 * just drop one and return.
303 		 */
304 		x = rw->rw_lock;
305 		if (RW_READERS(x) > 1) {
306 			if (atomic_cmpset_ptr(&rw->rw_lock, x,
307 			    x - RW_ONE_READER)) {
308 				if (LOCK_LOG_TEST(&rw->rw_object, 0))
309 					CTR4(KTR_LOCK,
310 					    "%s: %p succeeded %p -> %p",
311 					    __func__, rw, (void *)x,
312 					    (void *)(x - RW_ONE_READER));
313 				break;
314 			}
315 			continue;
316 		} else
317 			lock_profile_release_lock(&rw->rw_object);
318 
319 
320 		/*
321 		 * We should never have read waiters while at least one
322 		 * thread holds a read lock.  (See note above)
323 		 */
324 		KASSERT(!(x & RW_LOCK_READ_WAITERS),
325 		    ("%s: waiting readers", __func__));
326 
327 		/*
328 		 * If there aren't any waiters for a write lock, then try
329 		 * to drop it quickly.
330 		 */
331 		if (!(x & RW_LOCK_WRITE_WAITERS)) {
332 
333 			/*
334 			 * There shouldn't be any flags set and we should
335 			 * be the only read lock.  If we fail to release
336 			 * the single read lock, then another thread might
337 			 * have just acquired a read lock, so go back up
338 			 * to the multiple read locks case.
339 			 */
340 			MPASS(x == RW_READERS_LOCK(1));
341 			if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
342 			    RW_UNLOCKED)) {
343 				if (LOCK_LOG_TEST(&rw->rw_object, 0))
344 					CTR2(KTR_LOCK, "%s: %p last succeeded",
345 					    __func__, rw);
346 				break;
347 			}
348 			continue;
349 		}
350 
351 		/*
352 		 * There should just be one reader with one or more
353 		 * writers waiting.
354 		 */
355 		MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
356 
357 		/*
358 		 * Ok, we know we have a waiting writer and we think we
359 		 * are the last reader, so grab the turnstile lock.
360 		 */
361 		turnstile_lock(&rw->rw_object);
362 
363 		/*
364 		 * Try to drop our lock leaving the lock in a unlocked
365 		 * state.
366 		 *
367 		 * If you wanted to do explicit lock handoff you'd have to
368 		 * do it here.  You'd also want to use turnstile_signal()
369 		 * and you'd have to handle the race where a higher
370 		 * priority thread blocks on the write lock before the
371 		 * thread you wakeup actually runs and have the new thread
372 		 * "steal" the lock.  For now it's a lot simpler to just
373 		 * wakeup all of the waiters.
374 		 *
375 		 * As above, if we fail, then another thread might have
376 		 * acquired a read lock, so drop the turnstile lock and
377 		 * restart.
378 		 */
379 		if (!atomic_cmpset_ptr(&rw->rw_lock,
380 		    RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
381 			turnstile_release(&rw->rw_object);
382 			continue;
383 		}
384 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
385 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
386 			    __func__, rw);
387 
388 		/*
389 		 * Ok.  The lock is released and all that's left is to
390 		 * wake up the waiters.  Note that the lock might not be
391 		 * free anymore, but in that case the writers will just
392 		 * block again if they run before the new lock holder(s)
393 		 * release the lock.
394 		 */
395 		ts = turnstile_lookup(&rw->rw_object);
396 		MPASS(ts != NULL);
397 		turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
398 		turnstile_unpend(ts, TS_SHARED_LOCK);
399 		break;
400 	}
401 }
402 
403 /*
404  * This function is called when we are unable to obtain a write lock on the
405  * first try.  This means that at least one other thread holds either a
406  * read or write lock.
407  */
408 void
409 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
410 {
411 #ifdef SMP
412 	volatile struct thread *owner;
413 #endif
414 	int contested;
415 	uintptr_t v;
416 
417 	if (LOCK_LOG_TEST(&rw->rw_object, 0))
418 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
419 		    rw->rw_object.lo_name, (void *)rw->rw_lock, file, line);
420 
421 	while (!_rw_write_lock(rw, tid)) {
422 		turnstile_lock(&rw->rw_object);
423 		v = rw->rw_lock;
424 
425 		/*
426 		 * If the lock was released while spinning on the
427 		 * turnstile chain lock, try again.
428 		 */
429 		if (v == RW_UNLOCKED) {
430 			turnstile_release(&rw->rw_object);
431 			cpu_spinwait();
432 			continue;
433 		}
434 
435 		/*
436 		 * If the lock was released by a writer with both readers
437 		 * and writers waiting and a reader hasn't woken up and
438 		 * acquired the lock yet, rw_lock will be set to the
439 		 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
440 		 * that value, try to acquire it once.  Note that we have
441 		 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
442 		 * other writers waiting still. If we fail, restart the
443 		 * loop.
444 		 */
445 		if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
446 			if (atomic_cmpset_acq_ptr(&rw->rw_lock,
447 			    RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
448 			    tid | RW_LOCK_WRITE_WAITERS)) {
449 				turnstile_claim(&rw->rw_object);
450 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
451 				    __func__, rw);
452 				break;
453 			}
454 			turnstile_release(&rw->rw_object);
455 			cpu_spinwait();
456 			lock_profile_obtain_lock_failed(&rw->rw_object, &contested);
457 			continue;
458 		}
459 
460 		/*
461 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
462 		 * set it.  If we fail to set it, then loop back and try
463 		 * again.
464 		 */
465 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
466 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
467 			    v | RW_LOCK_WRITE_WAITERS)) {
468 				turnstile_release(&rw->rw_object);
469 				cpu_spinwait();
470 				lock_profile_obtain_lock_failed(&rw->rw_object, &contested);
471 				continue;
472 			}
473 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
474 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
475 				    __func__, rw);
476 		}
477 
478 #ifdef SMP
479 		/*
480 		 * If the lock is write locked and the owner is
481 		 * running on another CPU, spin until the owner stops
482 		 * running or the state of the lock changes.
483 		 */
484 		owner = (struct thread *)RW_OWNER(v);
485 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
486 			lock_profile_obtain_lock_failed(&rw->rw_object, &contested);
487 			turnstile_release(&rw->rw_object);
488 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
489 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
490 				    __func__, rw, owner);
491 			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
492 			    TD_IS_RUNNING(owner))
493 				cpu_spinwait();
494 			continue;
495 		}
496 #endif
497 
498 		/*
499 		 * We were unable to acquire the lock and the write waiters
500 		 * flag is set, so we must block on the turnstile.
501 		 */
502 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
503 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
504 			    rw);
505 		turnstile_wait(&rw->rw_object, rw_owner(rw),
506 		    TS_EXCLUSIVE_QUEUE);
507 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
508 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
509 			    __func__, rw);
510 	}
511 }
512 
513 /*
514  * This function is called if the first try at releasing a write lock failed.
515  * This means that one of the 2 waiter bits must be set indicating that at
516  * least one thread is waiting on this lock.
517  */
518 void
519 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
520 {
521 	struct turnstile *ts;
522 	uintptr_t v;
523 	int queue;
524 
525 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
526 	    ("%s: neither of the waiter flags are set", __func__));
527 
528 	if (LOCK_LOG_TEST(&rw->rw_object, 0))
529 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
530 
531 	turnstile_lock(&rw->rw_object);
532 	ts = turnstile_lookup(&rw->rw_object);
533 
534 #ifdef SMP
535 	/*
536 	 * There might not be a turnstile for this lock if all of
537 	 * the waiters are adaptively spinning.  In that case, just
538 	 * reset the lock to the unlocked state and return.
539 	 */
540 	if (ts == NULL) {
541 		atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED);
542 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
543 			CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw);
544 		turnstile_release(&rw->rw_object);
545 		return;
546 	}
547 #else
548 	MPASS(ts != NULL);
549 #endif
550 
551 	/*
552 	 * Use the same algo as sx locks for now.  Prefer waking up shared
553 	 * waiters if we have any over writers.  This is probably not ideal.
554 	 *
555 	 * 'v' is the value we are going to write back to rw_lock.  If we
556 	 * have waiters on both queues, we need to preserve the state of
557 	 * the waiter flag for the queue we don't wake up.  For now this is
558 	 * hardcoded for the algorithm mentioned above.
559 	 *
560 	 * In the case of both readers and writers waiting we wakeup the
561 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
562 	 * new writer comes in before a reader it will claim the lock up
563 	 * above.  There is probably a potential priority inversion in
564 	 * there that could be worked around either by waking both queues
565 	 * of waiters or doing some complicated lock handoff gymnastics.
566 	 *
567 	 * Note that in the SMP case, if both flags are set, there might
568 	 * not be any actual writers on the turnstile as they might all
569 	 * be spinning.  In that case, we don't want to preserve the
570 	 * RW_LOCK_WRITE_WAITERS flag as the turnstile is going to go
571 	 * away once we wakeup all the readers.
572 	 */
573 	v = RW_UNLOCKED;
574 	if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
575 		queue = TS_SHARED_QUEUE;
576 #ifdef SMP
577 		if (rw->rw_lock & RW_LOCK_WRITE_WAITERS &&
578 		    !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
579 			v |= RW_LOCK_WRITE_WAITERS;
580 #else
581 		v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
582 #endif
583 	} else
584 		queue = TS_EXCLUSIVE_QUEUE;
585 
586 #ifdef SMP
587 	/*
588 	 * We have to make sure that we actually have waiters to
589 	 * wakeup.  If they are all spinning, then we just need to
590 	 * disown the turnstile and return.
591 	 */
592 	if (turnstile_empty(ts, queue)) {
593 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
594 			CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw);
595 		atomic_store_rel_ptr(&rw->rw_lock, v);
596 		turnstile_disown(ts);
597 		return;
598 	}
599 #endif
600 
601 	/* Wake up all waiters for the specific queue. */
602 	if (LOCK_LOG_TEST(&rw->rw_object, 0))
603 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
604 		    queue == TS_SHARED_QUEUE ? "read" : "write");
605 	turnstile_broadcast(ts, queue);
606 	atomic_store_rel_ptr(&rw->rw_lock, v);
607 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
608 }
609 
610 /*
611  * Attempt to do a non-blocking upgrade from a read lock to a write
612  * lock.  This will only succeed if this thread holds a single read
613  * lock.  Returns true if the upgrade succeeded and false otherwise.
614  */
615 int
616 _rw_try_upgrade(struct rwlock *rw, const char *file, int line)
617 {
618 	uintptr_t v, tid;
619 	int success;
620 
621 	_rw_assert(rw, RA_RLOCKED, file, line);
622 
623 	/*
624 	 * Attempt to switch from one reader to a writer.  If there
625 	 * are any write waiters, then we will have to lock the
626 	 * turnstile first to prevent races with another writer
627 	 * calling turnstile_wait() before we have claimed this
628 	 * turnstile.  So, do the simple case of no waiters first.
629 	 */
630 	tid = (uintptr_t)curthread;
631 	if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) {
632 		success = atomic_cmpset_acq_ptr(&rw->rw_lock,
633 		    RW_READERS_LOCK(1), tid);
634 		goto out;
635 	}
636 
637 	/*
638 	 * Ok, we think we have write waiters, so lock the
639 	 * turnstile.
640 	 */
641 	turnstile_lock(&rw->rw_object);
642 
643 	/*
644 	 * Try to switch from one reader to a writer again.  This time
645 	 * we honor the current state of the RW_LOCK_WRITE_WAITERS
646 	 * flag.  If we obtain the lock with the flag set, then claim
647 	 * ownership of the turnstile.  In the SMP case it is possible
648 	 * for there to not be an associated turnstile even though there
649 	 * are waiters if all of the waiters are spinning.
650 	 */
651 	v = rw->rw_lock & RW_LOCK_WRITE_WAITERS;
652 	success = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
653 	    tid | v);
654 #ifdef SMP
655 	if (success && v && turnstile_lookup(&rw->rw_object) != NULL)
656 #else
657 	if (success && v)
658 #endif
659 		turnstile_claim(&rw->rw_object);
660 	else
661 		turnstile_release(&rw->rw_object);
662 out:
663 	LOCK_LOG_TRY("WUPGRADE", &rw->rw_object, 0, success, file, line);
664 	if (success)
665 		WITNESS_UPGRADE(&rw->rw_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
666 		    file, line);
667 	return (success);
668 }
669 
670 /*
671  * Downgrade a write lock into a single read lock.
672  */
673 void
674 _rw_downgrade(struct rwlock *rw, const char *file, int line)
675 {
676 	struct turnstile *ts;
677 	uintptr_t tid, v;
678 
679 	_rw_assert(rw, RA_WLOCKED, file, line);
680 
681 	WITNESS_DOWNGRADE(&rw->rw_object, 0, file, line);
682 
683 	/*
684 	 * Convert from a writer to a single reader.  First we handle
685 	 * the easy case with no waiters.  If there are any waiters, we
686 	 * lock the turnstile, "disown" the lock, and awaken any read
687 	 * waiters.
688 	 */
689 	tid = (uintptr_t)curthread;
690 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
691 		goto out;
692 
693 	/*
694 	 * Ok, we think we have waiters, so lock the turnstile so we can
695 	 * read the waiter flags without any races.
696 	 */
697 	turnstile_lock(&rw->rw_object);
698 	v = rw->rw_lock;
699 	MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS));
700 
701 	/*
702 	 * Downgrade from a write lock while preserving
703 	 * RW_LOCK_WRITE_WAITERS and give up ownership of the
704 	 * turnstile.  If there are any read waiters, wake them up.
705 	 *
706 	 * For SMP, we have to allow for the fact that all of the
707 	 * read waiters might be spinning.  In that case, act as if
708 	 * RW_LOCK_READ_WAITERS is not set.  Also, only preserve
709 	 * the RW_LOCK_WRITE_WAITERS flag if at least one writer is
710 	 * blocked on the turnstile.
711 	 */
712 	ts = turnstile_lookup(&rw->rw_object);
713 #ifdef SMP
714 	if (ts == NULL)
715 		v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS);
716 	else if (v & RW_LOCK_READ_WAITERS &&
717 	    turnstile_empty(ts, TS_SHARED_QUEUE))
718 		v &= ~RW_LOCK_READ_WAITERS;
719 	else if (v & RW_LOCK_WRITE_WAITERS &&
720 	    turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
721 		v &= ~RW_LOCK_WRITE_WAITERS;
722 #else
723 	MPASS(ts != NULL);
724 #endif
725 	if (v & RW_LOCK_READ_WAITERS)
726 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
727 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) |
728 	    (v & RW_LOCK_WRITE_WAITERS));
729 	if (v & RW_LOCK_READ_WAITERS)
730 		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
731 #ifdef SMP
732 	else if (ts == NULL)
733 		turnstile_release(&rw->rw_object);
734 #endif
735 	else
736 		turnstile_disown(ts);
737 out:
738 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->rw_object, 0, 0, file, line);
739 }
740 
741 #ifdef INVARIANT_SUPPORT
742 #ifndef INVARIANTS
743 #undef _rw_assert
744 #endif
745 
746 /*
747  * In the non-WITNESS case, rw_assert() can only detect that at least
748  * *some* thread owns an rlock, but it cannot guarantee that *this*
749  * thread owns an rlock.
750  */
751 void
752 _rw_assert(struct rwlock *rw, int what, const char *file, int line)
753 {
754 
755 	if (panicstr != NULL)
756 		return;
757 	switch (what) {
758 	case RA_LOCKED:
759 	case RA_RLOCKED:
760 #ifdef WITNESS
761 		witness_assert(&rw->rw_object, what, file, line);
762 #else
763 		/*
764 		 * If some other thread has a write lock or we have one
765 		 * and are asserting a read lock, fail.  Also, if no one
766 		 * has a lock at all, fail.
767 		 */
768 		if (rw->rw_lock == RW_UNLOCKED ||
769 		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
770 		    rw_wowner(rw) != curthread)))
771 			panic("Lock %s not %slocked @ %s:%d\n",
772 			    rw->rw_object.lo_name, (what == RA_RLOCKED) ?
773 			    "read " : "", file, line);
774 #endif
775 		break;
776 	case RA_WLOCKED:
777 		if (rw_wowner(rw) != curthread)
778 			panic("Lock %s not exclusively locked @ %s:%d\n",
779 			    rw->rw_object.lo_name, file, line);
780 		break;
781 	case RA_UNLOCKED:
782 #ifdef WITNESS
783 		witness_assert(&rw->rw_object, what, file, line);
784 #else
785 		/*
786 		 * If we hold a write lock fail.  We can't reliably check
787 		 * to see if we hold a read lock or not.
788 		 */
789 		if (rw_wowner(rw) == curthread)
790 			panic("Lock %s exclusively locked @ %s:%d\n",
791 			    rw->rw_object.lo_name, file, line);
792 #endif
793 		break;
794 	default:
795 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
796 		    line);
797 	}
798 }
799 #endif /* INVARIANT_SUPPORT */
800 
801 #ifdef DDB
802 void
803 db_show_rwlock(struct lock_object *lock)
804 {
805 	struct rwlock *rw;
806 	struct thread *td;
807 
808 	rw = (struct rwlock *)lock;
809 
810 	db_printf(" state: ");
811 	if (rw->rw_lock == RW_UNLOCKED)
812 		db_printf("UNLOCKED\n");
813 	else if (rw->rw_lock & RW_LOCK_READ)
814 		db_printf("RLOCK: %jd locks\n",
815 		    (intmax_t)(RW_READERS(rw->rw_lock)));
816 	else {
817 		td = rw_wowner(rw);
818 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
819 		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
820 	}
821 	db_printf(" waiters: ");
822 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
823 	case RW_LOCK_READ_WAITERS:
824 		db_printf("readers\n");
825 		break;
826 	case RW_LOCK_WRITE_WAITERS:
827 		db_printf("writers\n");
828 		break;
829 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
830 		db_printf("readers and waiters\n");
831 		break;
832 	default:
833 		db_printf("none\n");
834 		break;
835 	}
836 }
837 
838 #endif
839