xref: /freebsd/sys/kern/kern_rwlock.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the author nor the names of any co-contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * Machine independent bits of reader/writer lock implementation.
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_ddb.h"
38 
39 #include <sys/param.h>
40 #include <sys/ktr.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/rwlock.h>
45 #include <sys/systm.h>
46 #include <sys/turnstile.h>
47 
48 #include <machine/cpu.h>
49 
50 #ifdef DDB
51 #include <ddb/ddb.h>
52 
53 static void	db_show_rwlock(struct lock_object *lock);
54 #endif
55 
56 struct lock_class lock_class_rw = {
57 	"rw",
58 	LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
59 #ifdef DDB
60 	db_show_rwlock
61 #endif
62 };
63 
64 /*
65  * Return a pointer to the owning thread if the lock is write-locked or
66  * NULL if the lock is unlocked or read-locked.
67  */
68 #define	rw_wowner(rw)							\
69 	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
70 	    (struct thread *)RW_OWNER((rw)->rw_lock))
71 
72 /*
73  * Return a pointer to the owning thread for this lock who should receive
74  * any priority lent by threads that block on this lock.  Currently this
75  * is identical to rw_wowner().
76  */
77 #define	rw_owner(rw)		rw_wowner(rw)
78 
79 #ifndef INVARIANTS
80 #define	_rw_assert(rw, what, file, line)
81 #endif
82 
83 void
84 rw_init(struct rwlock *rw, const char *name)
85 {
86 
87 	rw->rw_lock = RW_UNLOCKED;
88 
89 	lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS |
90 	    LO_RECURSABLE | LO_UPGRADABLE);
91 }
92 
93 void
94 rw_destroy(struct rwlock *rw)
95 {
96 
97 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
98 	lock_destroy(&rw->rw_object);
99 }
100 
101 void
102 rw_sysinit(void *arg)
103 {
104 	struct rw_args *args = arg;
105 
106 	rw_init(args->ra_rw, args->ra_desc);
107 }
108 
109 void
110 _rw_wlock(struct rwlock *rw, const char *file, int line)
111 {
112 
113 	MPASS(curthread != NULL);
114 	KASSERT(rw_wowner(rw) != curthread,
115 	    ("%s (%s): wlock already held @ %s:%d", __func__,
116 	    rw->rw_object.lo_name, file, line));
117 	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
118 	    line);
119 	__rw_wlock(rw, curthread, file, line);
120 	LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line);
121 	WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
122 }
123 
124 void
125 _rw_wunlock(struct rwlock *rw, const char *file, int line)
126 {
127 
128 	MPASS(curthread != NULL);
129 	_rw_assert(rw, RA_WLOCKED, file, line);
130 	WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
131 	LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line);
132 	__rw_wunlock(rw, curthread, file, line);
133 }
134 
135 void
136 _rw_rlock(struct rwlock *rw, const char *file, int line)
137 {
138 #ifdef SMP
139 	volatile struct thread *owner;
140 #endif
141 	uintptr_t x;
142 
143 	KASSERT(rw_wowner(rw) != curthread,
144 	    ("%s (%s): wlock already held @ %s:%d", __func__,
145 	    rw->rw_object.lo_name, file, line));
146 	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line);
147 
148 	/*
149 	 * Note that we don't make any attempt to try to block read
150 	 * locks once a writer has blocked on the lock.  The reason is
151 	 * that we currently allow for read locks to recurse and we
152 	 * don't keep track of all the holders of read locks.  Thus, if
153 	 * we were to block readers once a writer blocked and a reader
154 	 * tried to recurse on their reader lock after a writer had
155 	 * blocked we would end up in a deadlock since the reader would
156 	 * be blocked on the writer, and the writer would be blocked
157 	 * waiting for the reader to release its original read lock.
158 	 */
159 	for (;;) {
160 		/*
161 		 * Handle the easy case.  If no other thread has a write
162 		 * lock, then try to bump up the count of read locks.  Note
163 		 * that we have to preserve the current state of the
164 		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
165 		 * read lock, then rw_lock must have changed, so restart
166 		 * the loop.  Note that this handles the case of a
167 		 * completely unlocked rwlock since such a lock is encoded
168 		 * as a read lock with no waiters.
169 		 */
170 		x = rw->rw_lock;
171 		if (x & RW_LOCK_READ) {
172 
173 			/*
174 			 * The RW_LOCK_READ_WAITERS flag should only be set
175 			 * if another thread currently holds a write lock,
176 			 * and in that case RW_LOCK_READ should be clear.
177 			 */
178 			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
179 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
180 			    x + RW_ONE_READER)) {
181 				if (LOCK_LOG_TEST(&rw->rw_object, 0))
182 					CTR4(KTR_LOCK,
183 					    "%s: %p succeed %p -> %p", __func__,
184 					    rw, (void *)x,
185 					    (void *)(x + RW_ONE_READER));
186 				break;
187 			}
188 			cpu_spinwait();
189 			continue;
190 		}
191 
192 		/*
193 		 * Okay, now it's the hard case.  Some other thread already
194 		 * has a write lock, so acquire the turnstile lock so we can
195 		 * begin the process of blocking.
196 		 */
197 		turnstile_lock(&rw->rw_object);
198 
199 		/*
200 		 * The lock might have been released while we spun, so
201 		 * recheck its state and restart the loop if there is no
202 		 * longer a write lock.
203 		 */
204 		x = rw->rw_lock;
205 		if (x & RW_LOCK_READ) {
206 			turnstile_release(&rw->rw_object);
207 			cpu_spinwait();
208 			continue;
209 		}
210 
211 		/*
212 		 * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
213 		 * flag is already set, then we can go ahead and block.  If
214 		 * it is not set then try to set it.  If we fail to set it
215 		 * drop the turnstile lock and restart the loop.
216 		 */
217 		if (!(x & RW_LOCK_READ_WAITERS)) {
218 			if (!atomic_cmpset_ptr(&rw->rw_lock, x,
219 			    x | RW_LOCK_READ_WAITERS)) {
220 				turnstile_release(&rw->rw_object);
221 				cpu_spinwait();
222 				continue;
223 			}
224 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
225 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
226 				    __func__, rw);
227 		}
228 
229 #ifdef SMP
230 		/*
231 		 * If the owner is running on another CPU, spin until
232 		 * the owner stops running or the state of the lock
233 		 * changes.
234 		 */
235 		owner = (struct thread *)RW_OWNER(x);
236 		if (TD_IS_RUNNING(owner)) {
237 			turnstile_release(&rw->rw_object);
238 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
239 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
240 				    __func__, rw, owner);
241 			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
242 			    TD_IS_RUNNING(owner))
243 				cpu_spinwait();
244 			continue;
245 		}
246 #endif
247 
248 		/*
249 		 * We were unable to acquire the lock and the read waiters
250 		 * flag is set, so we must block on the turnstile.
251 		 */
252 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
253 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
254 			    rw);
255 		turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE);
256 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
257 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
258 			    __func__, rw);
259 	}
260 
261 	/*
262 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
263 	 * however.  turnstiles don't like owners changing between calls to
264 	 * turnstile_wait() currently.
265 	 */
266 
267 	LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line);
268 	WITNESS_LOCK(&rw->rw_object, 0, file, line);
269 }
270 
271 void
272 _rw_runlock(struct rwlock *rw, const char *file, int line)
273 {
274 	struct turnstile *ts;
275 	uintptr_t x;
276 
277 	_rw_assert(rw, RA_RLOCKED, file, line);
278 	WITNESS_UNLOCK(&rw->rw_object, 0, file, line);
279 	LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line);
280 
281 	/* TODO: drop "owner of record" here. */
282 
283 	for (;;) {
284 		/*
285 		 * See if there is more than one read lock held.  If so,
286 		 * just drop one and return.
287 		 */
288 		x = rw->rw_lock;
289 		if (RW_READERS(x) > 1) {
290 			if (atomic_cmpset_ptr(&rw->rw_lock, x,
291 			    x - RW_ONE_READER)) {
292 				if (LOCK_LOG_TEST(&rw->rw_object, 0))
293 					CTR4(KTR_LOCK,
294 					    "%s: %p succeeded %p -> %p",
295 					    __func__, rw, (void *)x,
296 					    (void *)(x - RW_ONE_READER));
297 				break;
298 			}
299 			continue;
300 		}
301 
302 		/*
303 		 * We should never have read waiters while at least one
304 		 * thread holds a read lock.  (See note above)
305 		 */
306 		KASSERT(!(x & RW_LOCK_READ_WAITERS),
307 		    ("%s: waiting readers", __func__));
308 
309 		/*
310 		 * If there aren't any waiters for a write lock, then try
311 		 * to drop it quickly.
312 		 */
313 		if (!(x & RW_LOCK_WRITE_WAITERS)) {
314 
315 			/*
316 			 * There shouldn't be any flags set and we should
317 			 * be the only read lock.  If we fail to release
318 			 * the single read lock, then another thread might
319 			 * have just acquired a read lock, so go back up
320 			 * to the multiple read locks case.
321 			 */
322 			MPASS(x == RW_READERS_LOCK(1));
323 			if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
324 			    RW_UNLOCKED)) {
325 				if (LOCK_LOG_TEST(&rw->rw_object, 0))
326 					CTR2(KTR_LOCK, "%s: %p last succeeded",
327 					    __func__, rw);
328 				break;
329 			}
330 			continue;
331 		}
332 
333 		/*
334 		 * There should just be one reader with one or more
335 		 * writers waiting.
336 		 */
337 		MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
338 
339 		/*
340 		 * Ok, we know we have a waiting writer and we think we
341 		 * are the last reader, so grab the turnstile lock.
342 		 */
343 		turnstile_lock(&rw->rw_object);
344 
345 		/*
346 		 * Try to drop our lock leaving the lock in a unlocked
347 		 * state.
348 		 *
349 		 * If you wanted to do explicit lock handoff you'd have to
350 		 * do it here.  You'd also want to use turnstile_signal()
351 		 * and you'd have to handle the race where a higher
352 		 * priority thread blocks on the write lock before the
353 		 * thread you wakeup actually runs and have the new thread
354 		 * "steal" the lock.  For now it's a lot simpler to just
355 		 * wakeup all of the waiters.
356 		 *
357 		 * As above, if we fail, then another thread might have
358 		 * acquired a read lock, so drop the turnstile lock and
359 		 * restart.
360 		 */
361 		if (!atomic_cmpset_ptr(&rw->rw_lock,
362 		    RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
363 			turnstile_release(&rw->rw_object);
364 			continue;
365 		}
366 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
367 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
368 			    __func__, rw);
369 
370 		/*
371 		 * Ok.  The lock is released and all that's left is to
372 		 * wake up the waiters.  Note that the lock might not be
373 		 * free anymore, but in that case the writers will just
374 		 * block again if they run before the new lock holder(s)
375 		 * release the lock.
376 		 */
377 		ts = turnstile_lookup(&rw->rw_object);
378 		MPASS(ts != NULL);
379 		turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
380 		turnstile_unpend(ts, TS_SHARED_LOCK);
381 		break;
382 	}
383 }
384 
385 /*
386  * This function is called when we are unable to obtain a write lock on the
387  * first try.  This means that at least one other thread holds either a
388  * read or write lock.
389  */
390 void
391 _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
392 {
393 #ifdef SMP
394 	volatile struct thread *owner;
395 #endif
396 	uintptr_t v;
397 
398 	if (LOCK_LOG_TEST(&rw->rw_object, 0))
399 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
400 		    rw->rw_object.lo_name, (void *)rw->rw_lock, file, line);
401 
402 	while (!_rw_write_lock(rw, tid)) {
403 		turnstile_lock(&rw->rw_object);
404 		v = rw->rw_lock;
405 
406 		/*
407 		 * If the lock was released while spinning on the
408 		 * turnstile chain lock, try again.
409 		 */
410 		if (v == RW_UNLOCKED) {
411 			turnstile_release(&rw->rw_object);
412 			cpu_spinwait();
413 			continue;
414 		}
415 
416 		/*
417 		 * If the lock was released by a writer with both readers
418 		 * and writers waiting and a reader hasn't woken up and
419 		 * acquired the lock yet, rw_lock will be set to the
420 		 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
421 		 * that value, try to acquire it once.  Note that we have
422 		 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
423 		 * other writers waiting still. If we fail, restart the
424 		 * loop.
425 		 */
426 		if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
427 			if (atomic_cmpset_acq_ptr(&rw->rw_lock,
428 			    RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
429 			    tid | RW_LOCK_WRITE_WAITERS)) {
430 				turnstile_claim(&rw->rw_object);
431 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
432 				    __func__, rw);
433 				break;
434 			}
435 			turnstile_release(&rw->rw_object);
436 			cpu_spinwait();
437 			continue;
438 		}
439 
440 		/*
441 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
442 		 * set it.  If we fail to set it, then loop back and try
443 		 * again.
444 		 */
445 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
446 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
447 			    v | RW_LOCK_WRITE_WAITERS)) {
448 				turnstile_release(&rw->rw_object);
449 				cpu_spinwait();
450 				continue;
451 			}
452 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
453 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
454 				    __func__, rw);
455 		}
456 
457 #ifdef SMP
458 		/*
459 		 * If the lock is write locked and the owner is
460 		 * running on another CPU, spin until the owner stops
461 		 * running or the state of the lock changes.
462 		 */
463 		owner = (struct thread *)RW_OWNER(v);
464 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
465 			turnstile_release(&rw->rw_object);
466 			if (LOCK_LOG_TEST(&rw->rw_object, 0))
467 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
468 				    __func__, rw, owner);
469 			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
470 			    TD_IS_RUNNING(owner))
471 				cpu_spinwait();
472 			continue;
473 		}
474 #endif
475 
476 		/*
477 		 * We were unable to acquire the lock and the write waiters
478 		 * flag is set, so we must block on the turnstile.
479 		 */
480 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
481 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
482 			    rw);
483 		turnstile_wait(&rw->rw_object, rw_owner(rw),
484 		    TS_EXCLUSIVE_QUEUE);
485 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
486 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
487 			    __func__, rw);
488 	}
489 }
490 
491 /*
492  * This function is called if the first try at releasing a write lock failed.
493  * This means that one of the 2 waiter bits must be set indicating that at
494  * least one thread is waiting on this lock.
495  */
496 void
497 _rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
498 {
499 	struct turnstile *ts;
500 	uintptr_t v;
501 	int queue;
502 
503 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
504 	    ("%s: neither of the waiter flags are set", __func__));
505 
506 	if (LOCK_LOG_TEST(&rw->rw_object, 0))
507 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
508 
509 	turnstile_lock(&rw->rw_object);
510 	ts = turnstile_lookup(&rw->rw_object);
511 
512 #ifdef SMP
513 	/*
514 	 * There might not be a turnstile for this lock if all of
515 	 * the waiters are adaptively spinning.  In that case, just
516 	 * reset the lock to the unlocked state and return.
517 	 */
518 	if (ts == NULL) {
519 		atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED);
520 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
521 			CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw);
522 		turnstile_release(&rw->rw_object);
523 		return;
524 	}
525 #else
526 	MPASS(ts != NULL);
527 #endif
528 
529 	/*
530 	 * Use the same algo as sx locks for now.  Prefer waking up shared
531 	 * waiters if we have any over writers.  This is probably not ideal.
532 	 *
533 	 * 'v' is the value we are going to write back to rw_lock.  If we
534 	 * have waiters on both queues, we need to preserve the state of
535 	 * the waiter flag for the queue we don't wake up.  For now this is
536 	 * hardcoded for the algorithm mentioned above.
537 	 *
538 	 * In the case of both readers and writers waiting we wakeup the
539 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
540 	 * new writer comes in before a reader it will claim the lock up
541 	 * above.  There is probably a potential priority inversion in
542 	 * there that could be worked around either by waking both queues
543 	 * of waiters or doing some complicated lock handoff gymnastics.
544 	 *
545 	 * Note that in the SMP case, if both flags are set, there might
546 	 * not be any actual writers on the turnstile as they might all
547 	 * be spinning.  In that case, we don't want to preserve the
548 	 * RW_LOCK_WRITE_WAITERS flag as the turnstile is going to go
549 	 * away once we wakeup all the readers.
550 	 */
551 	v = RW_UNLOCKED;
552 	if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
553 		queue = TS_SHARED_QUEUE;
554 #ifdef SMP
555 		if (rw->rw_lock & RW_LOCK_WRITE_WAITERS &&
556 		    !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
557 			v |= RW_LOCK_WRITE_WAITERS;
558 #else
559 		v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
560 #endif
561 	} else
562 		queue = TS_EXCLUSIVE_QUEUE;
563 
564 #ifdef SMP
565 	/*
566 	 * We have to make sure that we actually have waiters to
567 	 * wakeup.  If they are all spinning, then we just need to
568 	 * disown the turnstile and return.
569 	 */
570 	if (turnstile_empty(ts, queue)) {
571 		if (LOCK_LOG_TEST(&rw->rw_object, 0))
572 			CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw);
573 		atomic_store_rel_ptr(&rw->rw_lock, v);
574 		turnstile_disown(ts);
575 		return;
576 	}
577 #endif
578 
579 	/* Wake up all waiters for the specific queue. */
580 	if (LOCK_LOG_TEST(&rw->rw_object, 0))
581 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
582 		    queue == TS_SHARED_QUEUE ? "read" : "write");
583 	turnstile_broadcast(ts, queue);
584 	atomic_store_rel_ptr(&rw->rw_lock, v);
585 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
586 }
587 
588 /*
589  * Attempt to do a non-blocking upgrade from a read lock to a write
590  * lock.  This will only succeed if this thread holds a single read
591  * lock.  Returns true if the upgrade succeeded and false otherwise.
592  */
593 int
594 _rw_try_upgrade(struct rwlock *rw, const char *file, int line)
595 {
596 	uintptr_t v, tid;
597 	int success;
598 
599 	_rw_assert(rw, RA_RLOCKED, file, line);
600 
601 	/*
602 	 * Attempt to switch from one reader to a writer.  If there
603 	 * are any write waiters, then we will have to lock the
604 	 * turnstile first to prevent races with another writer
605 	 * calling turnstile_wait() before we have claimed this
606 	 * turnstile.  So, do the simple case of no waiters first.
607 	 */
608 	tid = (uintptr_t)curthread;
609 	if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) {
610 		success = atomic_cmpset_acq_ptr(&rw->rw_lock,
611 		    RW_READERS_LOCK(1), tid);
612 		goto out;
613 	}
614 
615 	/*
616 	 * Ok, we think we have write waiters, so lock the
617 	 * turnstile.
618 	 */
619 	turnstile_lock(&rw->rw_object);
620 
621 	/*
622 	 * Try to switch from one reader to a writer again.  This time
623 	 * we honor the current state of the RW_LOCK_WRITE_WAITERS
624 	 * flag.  If we obtain the lock with the flag set, then claim
625 	 * ownership of the turnstile.  In the SMP case it is possible
626 	 * for there to not be an associated turnstile even though there
627 	 * are waiters if all of the waiters are spinning.
628 	 */
629 	v = rw->rw_lock & RW_LOCK_WRITE_WAITERS;
630 	success = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
631 	    tid | v);
632 #ifdef SMP
633 	if (success && v && turnstile_lookup(&rw->rw_object) != NULL)
634 #else
635 	if (success && v)
636 #endif
637 		turnstile_claim(&rw->rw_object);
638 	else
639 		turnstile_release(&rw->rw_object);
640 out:
641 	LOCK_LOG_TRY("WUPGRADE", &rw->rw_object, 0, success, file, line);
642 	if (success)
643 		WITNESS_UPGRADE(&rw->rw_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
644 		    file, line);
645 	return (success);
646 }
647 
648 /*
649  * Downgrade a write lock into a single read lock.
650  */
651 void
652 _rw_downgrade(struct rwlock *rw, const char *file, int line)
653 {
654 	struct turnstile *ts;
655 	uintptr_t tid, v;
656 
657 	_rw_assert(rw, RA_WLOCKED, file, line);
658 
659 	WITNESS_DOWNGRADE(&rw->rw_object, 0, file, line);
660 
661 	/*
662 	 * Convert from a writer to a single reader.  First we handle
663 	 * the easy case with no waiters.  If there are any waiters, we
664 	 * lock the turnstile, "disown" the lock, and awaken any read
665 	 * waiters.
666 	 */
667 	tid = (uintptr_t)curthread;
668 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
669 		goto out;
670 
671 	/*
672 	 * Ok, we think we have waiters, so lock the turnstile so we can
673 	 * read the waiter flags without any races.
674 	 */
675 	turnstile_lock(&rw->rw_object);
676 	v = rw->rw_lock;
677 	MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS));
678 
679 	/*
680 	 * Downgrade from a write lock while preserving
681 	 * RW_LOCK_WRITE_WAITERS and give up ownership of the
682 	 * turnstile.  If there are any read waiters, wake them up.
683 	 *
684 	 * For SMP, we have to allow for the fact that all of the
685 	 * read waiters might be spinning.  In that case, act as if
686 	 * RW_LOCK_READ_WAITERS is not set.  Also, only preserve
687 	 * the RW_LOCK_WRITE_WAITERS flag if at least one writer is
688 	 * blocked on the turnstile.
689 	 */
690 	ts = turnstile_lookup(&rw->rw_object);
691 #ifdef SMP
692 	if (ts == NULL)
693 		v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS);
694 	else if (v & RW_LOCK_READ_WAITERS &&
695 	    turnstile_empty(ts, TS_SHARED_QUEUE))
696 		v &= ~RW_LOCK_READ_WAITERS;
697 	else if (v & RW_LOCK_WRITE_WAITERS &&
698 	    turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
699 		v &= ~RW_LOCK_WRITE_WAITERS;
700 #else
701 	MPASS(ts != NULL);
702 #endif
703 	if (v & RW_LOCK_READ_WAITERS)
704 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
705 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) |
706 	    (v & RW_LOCK_WRITE_WAITERS));
707 	if (v & RW_LOCK_READ_WAITERS)
708 		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
709 #ifdef SMP
710 	else if (ts == NULL)
711 		turnstile_release(&rw->rw_object);
712 #endif
713 	else
714 		turnstile_disown(ts);
715 out:
716 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->rw_object, 0, 0, file, line);
717 }
718 
719 #ifdef INVARIANT_SUPPORT
720 #ifndef INVARIANTS
721 #undef _rw_assert
722 #endif
723 
724 /*
725  * In the non-WITNESS case, rw_assert() can only detect that at least
726  * *some* thread owns an rlock, but it cannot guarantee that *this*
727  * thread owns an rlock.
728  */
729 void
730 _rw_assert(struct rwlock *rw, int what, const char *file, int line)
731 {
732 
733 	if (panicstr != NULL)
734 		return;
735 	switch (what) {
736 	case RA_LOCKED:
737 	case RA_RLOCKED:
738 #ifdef WITNESS
739 		witness_assert(&rw->rw_object, what, file, line);
740 #else
741 		/*
742 		 * If some other thread has a write lock or we have one
743 		 * and are asserting a read lock, fail.  Also, if no one
744 		 * has a lock at all, fail.
745 		 */
746 		if (rw->rw_lock == RW_UNLOCKED ||
747 		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
748 		    rw_wowner(rw) != curthread)))
749 			panic("Lock %s not %slocked @ %s:%d\n",
750 			    rw->rw_object.lo_name, (what == RA_RLOCKED) ?
751 			    "read " : "", file, line);
752 #endif
753 		break;
754 	case RA_WLOCKED:
755 		if (rw_wowner(rw) != curthread)
756 			panic("Lock %s not exclusively locked @ %s:%d\n",
757 			    rw->rw_object.lo_name, file, line);
758 		break;
759 	case RA_UNLOCKED:
760 #ifdef WITNESS
761 		witness_assert(&rw->rw_object, what, file, line);
762 #else
763 		/*
764 		 * If we hold a write lock fail.  We can't reliably check
765 		 * to see if we hold a read lock or not.
766 		 */
767 		if (rw_wowner(rw) == curthread)
768 			panic("Lock %s exclusively locked @ %s:%d\n",
769 			    rw->rw_object.lo_name, file, line);
770 #endif
771 		break;
772 	default:
773 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
774 		    line);
775 	}
776 }
777 #endif /* INVARIANT_SUPPORT */
778 
779 #ifdef DDB
780 void
781 db_show_rwlock(struct lock_object *lock)
782 {
783 	struct rwlock *rw;
784 	struct thread *td;
785 
786 	rw = (struct rwlock *)lock;
787 
788 	db_printf(" state: ");
789 	if (rw->rw_lock == RW_UNLOCKED)
790 		db_printf("UNLOCKED\n");
791 	else if (rw->rw_lock & RW_LOCK_READ)
792 		db_printf("RLOCK: %jd locks\n",
793 		    (intmax_t)(RW_READERS(rw->rw_lock)));
794 	else {
795 		td = rw_wowner(rw);
796 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
797 		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
798 	}
799 	db_printf(" waiters: ");
800 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
801 	case RW_LOCK_READ_WAITERS:
802 		db_printf("readers\n");
803 		break;
804 	case RW_LOCK_WRITE_WAITERS:
805 		db_printf("writers\n");
806 		break;
807 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
808 		db_printf("readers and waiters\n");
809 		break;
810 	default:
811 		db_printf("none\n");
812 		break;
813 	}
814 }
815 
816 #endif
817