1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Machine independent bits of reader/writer lock implementation.
30 */
31
32 #include <sys/cdefs.h>
33 #include "opt_ddb.h"
34 #include "opt_hwpmc_hooks.h"
35 #include "opt_no_adaptive_rwlocks.h"
36
37 #include <sys/param.h>
38 #include <sys/kdb.h>
39 #include <sys/ktr.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/rwlock.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49 #include <sys/turnstile.h>
50
51 #include <machine/cpu.h>
52
53 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
54 #define ADAPTIVE_RWLOCKS
55 #endif
56
57 #ifdef HWPMC_HOOKS
58 #include <sys/pmckern.h>
59 PMC_SOFT_DECLARE( , , lock, failed);
60 #endif
61
62 /*
63 * Return the rwlock address when the lock cookie address is provided.
64 * This functionality assumes that struct rwlock* have a member named rw_lock.
65 */
66 #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock))
67
68 #ifdef DDB
69 #include <ddb/ddb.h>
70
71 static void db_show_rwlock(const struct lock_object *lock);
72 #endif
73 static void assert_rw(const struct lock_object *lock, int what);
74 static void lock_rw(struct lock_object *lock, uintptr_t how);
75 static int trylock_rw(struct lock_object *lock, uintptr_t how);
76 #ifdef KDTRACE_HOOKS
77 static int owner_rw(const struct lock_object *lock, struct thread **owner);
78 #endif
79 static uintptr_t unlock_rw(struct lock_object *lock);
80
81 struct lock_class lock_class_rw = {
82 .lc_name = "rw",
83 .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
84 .lc_assert = assert_rw,
85 #ifdef DDB
86 .lc_ddb_show = db_show_rwlock,
87 #endif
88 .lc_lock = lock_rw,
89 .lc_trylock = trylock_rw,
90 .lc_unlock = unlock_rw,
91 #ifdef KDTRACE_HOOKS
92 .lc_owner = owner_rw,
93 #endif
94 };
95
96 #ifdef ADAPTIVE_RWLOCKS
97 #ifdef RWLOCK_CUSTOM_BACKOFF
98 static u_short __read_frequently rowner_retries;
99 static u_short __read_frequently rowner_loops;
100 static SYSCTL_NODE(_debug, OID_AUTO, rwlock,
101 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
102 "rwlock debugging");
103 SYSCTL_U16(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
104 SYSCTL_U16(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
105
106 static struct lock_delay_config __read_frequently rw_delay;
107
108 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
109 0, "");
110 SYSCTL_U16(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
111 0, "");
112
113 static void
rw_lock_delay_init(void * arg __unused)114 rw_lock_delay_init(void *arg __unused)
115 {
116
117 lock_delay_default_init(&rw_delay);
118 rowner_retries = 10;
119 rowner_loops = max(10000, rw_delay.max);
120 }
121 LOCK_DELAY_SYSINIT(rw_lock_delay_init);
122 #else
123 #define rw_delay locks_delay
124 #define rowner_retries locks_delay_retries
125 #define rowner_loops locks_delay_loops
126 #endif
127 #endif
128
129 /*
130 * Return a pointer to the owning thread if the lock is write-locked or
131 * NULL if the lock is unlocked or read-locked.
132 */
133
134 #define lv_rw_wowner(v) \
135 ((v) & RW_LOCK_READ ? NULL : \
136 (struct thread *)RW_OWNER((v)))
137
138 #define rw_wowner(rw) lv_rw_wowner(RW_READ_VALUE(rw))
139
140 /*
141 * Returns if a write owner is recursed. Write ownership is not assured
142 * here and should be previously checked.
143 */
144 #define rw_recursed(rw) ((rw)->rw_recurse != 0)
145
146 /*
147 * Return true if curthread helds the lock.
148 */
149 #define rw_wlocked(rw) (rw_wowner((rw)) == curthread)
150
151 /*
152 * Return a pointer to the owning thread for this lock who should receive
153 * any priority lent by threads that block on this lock. Currently this
154 * is identical to rw_wowner().
155 */
156 #define rw_owner(rw) rw_wowner(rw)
157
158 #ifndef INVARIANTS
159 #define __rw_assert(c, what, file, line)
160 #endif
161
162 static void
assert_rw(const struct lock_object * lock,int what)163 assert_rw(const struct lock_object *lock, int what)
164 {
165
166 rw_assert((const struct rwlock *)lock, what);
167 }
168
169 static void
lock_rw(struct lock_object * lock,uintptr_t how)170 lock_rw(struct lock_object *lock, uintptr_t how)
171 {
172 struct rwlock *rw;
173
174 rw = (struct rwlock *)lock;
175 if (how)
176 rw_rlock(rw);
177 else
178 rw_wlock(rw);
179 }
180
181 static int
trylock_rw(struct lock_object * lock,uintptr_t how)182 trylock_rw(struct lock_object *lock, uintptr_t how)
183 {
184 struct rwlock *rw;
185
186 rw = (struct rwlock *)lock;
187 if (how)
188 return (rw_try_rlock(rw));
189 else
190 return (rw_try_wlock(rw));
191 }
192
193 static uintptr_t
unlock_rw(struct lock_object * lock)194 unlock_rw(struct lock_object *lock)
195 {
196 struct rwlock *rw;
197
198 rw = (struct rwlock *)lock;
199 rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
200 if (rw->rw_lock & RW_LOCK_READ) {
201 rw_runlock(rw);
202 return (1);
203 } else {
204 rw_wunlock(rw);
205 return (0);
206 }
207 }
208
209 #ifdef KDTRACE_HOOKS
210 static int
owner_rw(const struct lock_object * lock,struct thread ** owner)211 owner_rw(const struct lock_object *lock, struct thread **owner)
212 {
213 const struct rwlock *rw = (const struct rwlock *)lock;
214 uintptr_t x = rw->rw_lock;
215
216 *owner = rw_wowner(rw);
217 return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) :
218 (*owner != NULL));
219 }
220 #endif
221
222 void
_rw_init_flags(volatile uintptr_t * c,const char * name,int opts)223 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
224 {
225 struct rwlock *rw;
226 int flags;
227
228 rw = rwlock2rw(c);
229
230 MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
231 RW_RECURSE | RW_NEW)) == 0);
232 ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
233 ("%s: rw_lock not aligned for %s: %p", __func__, name,
234 &rw->rw_lock));
235
236 flags = LO_UPGRADABLE;
237 if (opts & RW_DUPOK)
238 flags |= LO_DUPOK;
239 if (opts & RW_NOPROFILE)
240 flags |= LO_NOPROFILE;
241 if (!(opts & RW_NOWITNESS))
242 flags |= LO_WITNESS;
243 if (opts & RW_RECURSE)
244 flags |= LO_RECURSABLE;
245 if (opts & RW_QUIET)
246 flags |= LO_QUIET;
247 if (opts & RW_NEW)
248 flags |= LO_NEW;
249
250 lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
251 rw->rw_lock = RW_UNLOCKED;
252 rw->rw_recurse = 0;
253 }
254
255 void
_rw_destroy(volatile uintptr_t * c)256 _rw_destroy(volatile uintptr_t *c)
257 {
258 struct rwlock *rw;
259
260 rw = rwlock2rw(c);
261
262 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
263 KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
264 rw->rw_lock = RW_DESTROYED;
265 lock_destroy(&rw->lock_object);
266 }
267
268 void
rw_sysinit(void * arg)269 rw_sysinit(void *arg)
270 {
271 struct rw_args *args;
272
273 args = arg;
274 rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
275 args->ra_flags);
276 }
277
278 int
_rw_wowned(const volatile uintptr_t * c)279 _rw_wowned(const volatile uintptr_t *c)
280 {
281
282 return (rw_wowner(rwlock2rw(c)) == curthread);
283 }
284
285 void
_rw_wlock_cookie(volatile uintptr_t * c,const char * file,int line)286 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
287 {
288 struct rwlock *rw;
289 uintptr_t tid, v;
290
291 rw = rwlock2rw(c);
292
293 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
294 !TD_IS_IDLETHREAD(curthread),
295 ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
296 curthread, rw->lock_object.lo_name, file, line));
297 KASSERT(rw->rw_lock != RW_DESTROYED,
298 ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
299 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
300 line, NULL);
301 tid = (uintptr_t)curthread;
302 v = RW_UNLOCKED;
303 if (!_rw_write_lock_fetch(rw, &v, tid))
304 _rw_wlock_hard(rw, v, file, line);
305 else
306 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
307 0, 0, file, line, LOCKSTAT_WRITER);
308
309 LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
310 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
311 TD_LOCKS_INC(curthread);
312 }
313
314 int
__rw_try_wlock_int(struct rwlock * rw LOCK_FILE_LINE_ARG_DEF)315 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
316 {
317 struct thread *td;
318 uintptr_t tid, v;
319 int rval;
320 bool recursed;
321
322 td = curthread;
323 tid = (uintptr_t)td;
324 if (SCHEDULER_STOPPED())
325 return (1);
326
327 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
328 ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
329 curthread, rw->lock_object.lo_name, file, line));
330 KASSERT(rw->rw_lock != RW_DESTROYED,
331 ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
332
333 rval = 1;
334 recursed = false;
335 v = RW_UNLOCKED;
336 for (;;) {
337 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
338 break;
339 if (v == RW_UNLOCKED)
340 continue;
341 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
342 rw->rw_recurse++;
343 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
344 break;
345 }
346 rval = 0;
347 break;
348 }
349
350 LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
351 if (rval) {
352 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
353 file, line);
354 if (!recursed)
355 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
356 rw, 0, 0, file, line, LOCKSTAT_WRITER);
357 TD_LOCKS_INC(curthread);
358 }
359 return (rval);
360 }
361
362 int
__rw_try_wlock(volatile uintptr_t * c,const char * file,int line)363 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
364 {
365 struct rwlock *rw;
366
367 rw = rwlock2rw(c);
368 return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
369 }
370
371 void
_rw_wunlock_cookie(volatile uintptr_t * c,const char * file,int line)372 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
373 {
374 struct rwlock *rw;
375
376 rw = rwlock2rw(c);
377
378 KASSERT(rw->rw_lock != RW_DESTROYED,
379 ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
380 __rw_assert(c, RA_WLOCKED, file, line);
381 WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
382 LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
383 line);
384
385 #ifdef LOCK_PROFILING
386 _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
387 #else
388 __rw_wunlock(rw, curthread, file, line);
389 #endif
390
391 TD_LOCKS_DEC(curthread);
392 }
393
394 /*
395 * Determines whether a new reader can acquire a lock. Succeeds if the
396 * reader already owns a read lock and the lock is locked for read to
397 * prevent deadlock from reader recursion. Also succeeds if the lock
398 * is unlocked and has no writer waiters or spinners. Failing otherwise
399 * prioritizes writers before readers.
400 */
401 static __always_inline bool
__rw_can_read(struct thread * td,uintptr_t v,bool fp)402 __rw_can_read(struct thread *td, uintptr_t v, bool fp)
403 {
404
405 if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
406 == RW_LOCK_READ)
407 return (true);
408 if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
409 return (true);
410 return (false);
411 }
412
413 static __always_inline bool
__rw_rlock_try(struct rwlock * rw,struct thread * td,uintptr_t * vp,bool fp LOCK_FILE_LINE_ARG_DEF)414 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
415 LOCK_FILE_LINE_ARG_DEF)
416 {
417
418 /*
419 * Handle the easy case. If no other thread has a write
420 * lock, then try to bump up the count of read locks. Note
421 * that we have to preserve the current state of the
422 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a
423 * read lock, then rw_lock must have changed, so restart
424 * the loop. Note that this handles the case of a
425 * completely unlocked rwlock since such a lock is encoded
426 * as a read lock with no waiters.
427 */
428 while (__rw_can_read(td, *vp, fp)) {
429 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
430 *vp + RW_ONE_READER)) {
431 if (LOCK_LOG_TEST(&rw->lock_object, 0))
432 CTR4(KTR_LOCK,
433 "%s: %p succeed %p -> %p", __func__,
434 rw, (void *)*vp,
435 (void *)(*vp + RW_ONE_READER));
436 td->td_rw_rlocks++;
437 return (true);
438 }
439 }
440 return (false);
441 }
442
443 static void __noinline
__rw_rlock_hard(struct rwlock * rw,struct thread * td,uintptr_t v LOCK_FILE_LINE_ARG_DEF)444 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
445 LOCK_FILE_LINE_ARG_DEF)
446 {
447 struct turnstile *ts;
448 struct thread *owner;
449 #ifdef ADAPTIVE_RWLOCKS
450 int spintries = 0;
451 int i, n;
452 #endif
453 #ifdef LOCK_PROFILING
454 uint64_t waittime = 0;
455 int contested = 0;
456 #endif
457 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
458 struct lock_delay_arg lda;
459 #endif
460 #ifdef KDTRACE_HOOKS
461 u_int sleep_cnt = 0;
462 int64_t sleep_time = 0;
463 int64_t all_time = 0;
464 uintptr_t state = 0;
465 #endif
466 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
467 int doing_lockprof = 0;
468 #endif
469
470 #ifdef KDTRACE_HOOKS
471 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
472 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
473 goto out_lockstat;
474 doing_lockprof = 1;
475 all_time -= lockstat_nsecs(&rw->lock_object);
476 }
477 state = v;
478 #endif
479 #ifdef LOCK_PROFILING
480 doing_lockprof = 1;
481 #endif
482
483 if (SCHEDULER_STOPPED())
484 return;
485
486 #if defined(ADAPTIVE_RWLOCKS)
487 lock_delay_arg_init(&lda, &rw_delay);
488 #elif defined(KDTRACE_HOOKS)
489 lock_delay_arg_init_noadapt(&lda);
490 #endif
491
492 #ifdef HWPMC_HOOKS
493 PMC_SOFT_CALL( , , lock, failed);
494 #endif
495 lock_profile_obtain_lock_failed(&rw->lock_object, false,
496 &contested, &waittime);
497
498 THREAD_CONTENDS_ON_LOCK(&rw->lock_object);
499
500 for (;;) {
501 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
502 break;
503 #ifdef KDTRACE_HOOKS
504 lda.spin_cnt++;
505 #endif
506
507 #ifdef ADAPTIVE_RWLOCKS
508 /*
509 * If the owner is running on another CPU, spin until
510 * the owner stops running or the state of the lock
511 * changes.
512 */
513 if ((v & RW_LOCK_READ) == 0) {
514 owner = (struct thread *)RW_OWNER(v);
515 if (TD_IS_RUNNING(owner)) {
516 if (LOCK_LOG_TEST(&rw->lock_object, 0))
517 CTR3(KTR_LOCK,
518 "%s: spinning on %p held by %p",
519 __func__, rw, owner);
520 KTR_STATE1(KTR_SCHED, "thread",
521 sched_tdname(curthread), "spinning",
522 "lockname:\"%s\"", rw->lock_object.lo_name);
523 do {
524 lock_delay(&lda);
525 v = RW_READ_VALUE(rw);
526 owner = lv_rw_wowner(v);
527 } while (owner != NULL && TD_IS_RUNNING(owner));
528 KTR_STATE0(KTR_SCHED, "thread",
529 sched_tdname(curthread), "running");
530 continue;
531 }
532 } else {
533 if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) {
534 MPASS(!__rw_can_read(td, v, false));
535 lock_delay_spin(2);
536 v = RW_READ_VALUE(rw);
537 continue;
538 }
539 if (spintries < rowner_retries) {
540 spintries++;
541 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
542 "spinning", "lockname:\"%s\"",
543 rw->lock_object.lo_name);
544 n = RW_READERS(v);
545 for (i = 0; i < rowner_loops; i += n) {
546 lock_delay_spin(n);
547 v = RW_READ_VALUE(rw);
548 if (!(v & RW_LOCK_READ))
549 break;
550 n = RW_READERS(v);
551 if (n == 0)
552 break;
553 if (__rw_can_read(td, v, false))
554 break;
555 }
556 #ifdef KDTRACE_HOOKS
557 lda.spin_cnt += rowner_loops - i;
558 #endif
559 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
560 "running");
561 if (i < rowner_loops)
562 continue;
563 }
564 }
565 #endif
566
567 /*
568 * Okay, now it's the hard case. Some other thread already
569 * has a write lock or there are write waiters present,
570 * acquire the turnstile lock so we can begin the process
571 * of blocking.
572 */
573 ts = turnstile_trywait(&rw->lock_object);
574
575 /*
576 * The lock might have been released while we spun, so
577 * recheck its state and restart the loop if needed.
578 */
579 v = RW_READ_VALUE(rw);
580 retry_ts:
581 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) ||
582 __rw_can_read(td, v, false)) {
583 turnstile_cancel(ts);
584 continue;
585 }
586
587 owner = lv_rw_wowner(v);
588
589 #ifdef ADAPTIVE_RWLOCKS
590 /*
591 * The current lock owner might have started executing
592 * on another CPU (or the lock could have changed
593 * owners) while we were waiting on the turnstile
594 * chain lock. If so, drop the turnstile lock and try
595 * again.
596 */
597 if (owner != NULL) {
598 if (TD_IS_RUNNING(owner)) {
599 turnstile_cancel(ts);
600 continue;
601 }
602 }
603 #endif
604
605 /*
606 * The lock is held in write mode or it already has waiters.
607 */
608 MPASS(!__rw_can_read(td, v, false));
609
610 /*
611 * If the RW_LOCK_READ_WAITERS flag is already set, then
612 * we can go ahead and block. If it is not set then try
613 * to set it. If we fail to set it drop the turnstile
614 * lock and restart the loop.
615 */
616 if (!(v & RW_LOCK_READ_WAITERS)) {
617 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
618 v | RW_LOCK_READ_WAITERS))
619 goto retry_ts;
620 if (LOCK_LOG_TEST(&rw->lock_object, 0))
621 CTR2(KTR_LOCK, "%s: %p set read waiters flag",
622 __func__, rw);
623 }
624
625 /*
626 * We were unable to acquire the lock and the read waiters
627 * flag is set, so we must block on the turnstile.
628 */
629 if (LOCK_LOG_TEST(&rw->lock_object, 0))
630 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
631 rw);
632 #ifdef KDTRACE_HOOKS
633 sleep_time -= lockstat_nsecs(&rw->lock_object);
634 #endif
635 MPASS(owner == rw_owner(rw));
636 turnstile_wait(ts, owner, TS_SHARED_QUEUE);
637 #ifdef KDTRACE_HOOKS
638 sleep_time += lockstat_nsecs(&rw->lock_object);
639 sleep_cnt++;
640 #endif
641 if (LOCK_LOG_TEST(&rw->lock_object, 0))
642 CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
643 __func__, rw);
644 v = RW_READ_VALUE(rw);
645 }
646 THREAD_CONTENTION_DONE(&rw->lock_object);
647 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
648 if (__predict_true(!doing_lockprof))
649 return;
650 #endif
651 #ifdef KDTRACE_HOOKS
652 all_time += lockstat_nsecs(&rw->lock_object);
653 if (sleep_time)
654 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
655 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
656 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
657
658 /* Record only the loops spinning and not sleeping. */
659 if (lda.spin_cnt > sleep_cnt)
660 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
661 LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
662 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
663 out_lockstat:
664 #endif
665 /*
666 * TODO: acquire "owner of record" here. Here be turnstile dragons
667 * however. turnstiles don't like owners changing between calls to
668 * turnstile_wait() currently.
669 */
670 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
671 waittime, file, line, LOCKSTAT_READER);
672 }
673
674 void
__rw_rlock_int(struct rwlock * rw LOCK_FILE_LINE_ARG_DEF)675 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
676 {
677 struct thread *td;
678 uintptr_t v;
679
680 td = curthread;
681
682 KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
683 !TD_IS_IDLETHREAD(td),
684 ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
685 td, rw->lock_object.lo_name, file, line));
686 KASSERT(rw->rw_lock != RW_DESTROYED,
687 ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
688 KASSERT(rw_wowner(rw) != td,
689 ("rw_rlock: wlock already held for %s @ %s:%d",
690 rw->lock_object.lo_name, file, line));
691 WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
692
693 v = RW_READ_VALUE(rw);
694 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
695 !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
696 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
697 else
698 lock_profile_obtain_lock_success(&rw->lock_object, false, 0, 0,
699 file, line);
700
701 LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
702 WITNESS_LOCK(&rw->lock_object, 0, file, line);
703 TD_LOCKS_INC(curthread);
704 }
705
706 void
__rw_rlock(volatile uintptr_t * c,const char * file,int line)707 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
708 {
709 struct rwlock *rw;
710
711 rw = rwlock2rw(c);
712 __rw_rlock_int(rw LOCK_FILE_LINE_ARG);
713 }
714
715 int
__rw_try_rlock_int(struct rwlock * rw LOCK_FILE_LINE_ARG_DEF)716 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
717 {
718 uintptr_t x;
719
720 if (SCHEDULER_STOPPED())
721 return (1);
722
723 KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
724 ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
725 curthread, rw->lock_object.lo_name, file, line));
726
727 x = rw->rw_lock;
728 for (;;) {
729 KASSERT(rw->rw_lock != RW_DESTROYED,
730 ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
731 if (!(x & RW_LOCK_READ))
732 break;
733 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
734 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
735 line);
736 WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
737 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
738 rw, 0, 0, file, line, LOCKSTAT_READER);
739 TD_LOCKS_INC(curthread);
740 curthread->td_rw_rlocks++;
741 return (1);
742 }
743 }
744
745 LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
746 return (0);
747 }
748
749 int
__rw_try_rlock(volatile uintptr_t * c,const char * file,int line)750 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
751 {
752 struct rwlock *rw;
753
754 rw = rwlock2rw(c);
755 return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
756 }
757
758 static __always_inline bool
__rw_runlock_try(struct rwlock * rw,struct thread * td,uintptr_t * vp)759 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
760 {
761
762 for (;;) {
763 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) {
764 if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
765 *vp - RW_ONE_READER)) {
766 if (LOCK_LOG_TEST(&rw->lock_object, 0))
767 CTR4(KTR_LOCK,
768 "%s: %p succeeded %p -> %p",
769 __func__, rw, (void *)*vp,
770 (void *)(*vp - RW_ONE_READER));
771 td->td_rw_rlocks--;
772 return (true);
773 }
774 continue;
775 }
776 break;
777 }
778 return (false);
779 }
780
781 static void __noinline
__rw_runlock_hard(struct rwlock * rw,struct thread * td,uintptr_t v LOCK_FILE_LINE_ARG_DEF)782 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
783 LOCK_FILE_LINE_ARG_DEF)
784 {
785 struct turnstile *ts;
786 uintptr_t setv, passedv, queue;
787
788 if (SCHEDULER_STOPPED())
789 return;
790
791 passedv = v;
792 if (__rw_runlock_try(rw, td, &v))
793 goto out_lockstat;
794
795 /*
796 * Ok, we know we have waiters and we think we are the
797 * last reader, so grab the turnstile lock.
798 */
799 turnstile_chain_lock(&rw->lock_object);
800 v = RW_READ_VALUE(rw);
801 for (;;) {
802 if (__rw_runlock_try(rw, td, &v))
803 break;
804
805 MPASS(v & RW_LOCK_WAITERS);
806
807 /*
808 * Try to drop our lock leaving the lock in a unlocked
809 * state.
810 *
811 * If you wanted to do explicit lock handoff you'd have to
812 * do it here. You'd also want to use turnstile_signal()
813 * and you'd have to handle the race where a higher
814 * priority thread blocks on the write lock before the
815 * thread you wakeup actually runs and have the new thread
816 * "steal" the lock. For now it's a lot simpler to just
817 * wakeup all of the waiters.
818 *
819 * As above, if we fail, then another thread might have
820 * acquired a read lock, so drop the turnstile lock and
821 * restart.
822 */
823 setv = RW_UNLOCKED;
824 queue = TS_SHARED_QUEUE;
825 if (v & RW_LOCK_WRITE_WAITERS) {
826 queue = TS_EXCLUSIVE_QUEUE;
827 setv |= (v & RW_LOCK_READ_WAITERS);
828 }
829 setv |= (v & RW_LOCK_WRITE_SPINNER);
830 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
831 continue;
832 if (LOCK_LOG_TEST(&rw->lock_object, 0))
833 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
834 __func__, rw);
835
836 /*
837 * Ok. The lock is released and all that's left is to
838 * wake up the waiters. Note that the lock might not be
839 * free anymore, but in that case the writers will just
840 * block again if they run before the new lock holder(s)
841 * release the lock.
842 */
843 ts = turnstile_lookup(&rw->lock_object);
844 if (__predict_false(ts == NULL)) {
845 panic("got NULL turnstile on rwlock %p passedv %p v %p",
846 rw, (void *)passedv, (void *)v);
847 }
848 turnstile_broadcast(ts, queue);
849 turnstile_unpend(ts);
850 td->td_rw_rlocks--;
851 break;
852 }
853 turnstile_chain_unlock(&rw->lock_object);
854 out_lockstat:
855 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
856 }
857
858 void
_rw_runlock_cookie_int(struct rwlock * rw LOCK_FILE_LINE_ARG_DEF)859 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
860 {
861 struct thread *td;
862 uintptr_t v;
863
864 KASSERT(rw->rw_lock != RW_DESTROYED,
865 ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
866 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
867 WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
868 LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
869
870 td = curthread;
871 v = RW_READ_VALUE(rw);
872
873 if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
874 !__rw_runlock_try(rw, td, &v)))
875 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
876 else
877 lock_profile_release_lock(&rw->lock_object, false);
878
879 TD_LOCKS_DEC(curthread);
880 }
881
882 void
_rw_runlock_cookie(volatile uintptr_t * c,const char * file,int line)883 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
884 {
885 struct rwlock *rw;
886
887 rw = rwlock2rw(c);
888 _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
889 }
890
891 #ifdef ADAPTIVE_RWLOCKS
892 static inline void
rw_drop_critical(uintptr_t v,bool * in_critical,int * extra_work)893 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work)
894 {
895
896 if (v & RW_LOCK_WRITE_SPINNER)
897 return;
898 if (*in_critical) {
899 critical_exit();
900 *in_critical = false;
901 (*extra_work)--;
902 }
903 }
904 #else
905 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0)
906 #endif
907
908 /*
909 * This function is called when we are unable to obtain a write lock on the
910 * first try. This means that at least one other thread holds either a
911 * read or write lock.
912 */
913 void
__rw_wlock_hard(volatile uintptr_t * c,uintptr_t v LOCK_FILE_LINE_ARG_DEF)914 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
915 {
916 uintptr_t tid;
917 struct rwlock *rw;
918 struct turnstile *ts;
919 struct thread *owner;
920 #ifdef ADAPTIVE_RWLOCKS
921 int spintries = 0;
922 int i, n;
923 enum { READERS, WRITER } sleep_reason = READERS;
924 bool in_critical = false;
925 #endif
926 uintptr_t setv;
927 #ifdef LOCK_PROFILING
928 uint64_t waittime = 0;
929 int contested = 0;
930 #endif
931 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
932 struct lock_delay_arg lda;
933 #endif
934 #ifdef KDTRACE_HOOKS
935 u_int sleep_cnt = 0;
936 int64_t sleep_time = 0;
937 int64_t all_time = 0;
938 uintptr_t state = 0;
939 #endif
940 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
941 int doing_lockprof = 0;
942 #endif
943 int extra_work = 0;
944
945 tid = (uintptr_t)curthread;
946 rw = rwlock2rw(c);
947
948 #ifdef KDTRACE_HOOKS
949 if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
950 while (v == RW_UNLOCKED) {
951 if (_rw_write_lock_fetch(rw, &v, tid))
952 goto out_lockstat;
953 }
954 extra_work = 1;
955 doing_lockprof = 1;
956 all_time -= lockstat_nsecs(&rw->lock_object);
957 }
958 state = v;
959 #endif
960 #ifdef LOCK_PROFILING
961 extra_work = 1;
962 doing_lockprof = 1;
963 #endif
964
965 if (SCHEDULER_STOPPED())
966 return;
967
968 if (__predict_false(v == RW_UNLOCKED))
969 v = RW_READ_VALUE(rw);
970
971 if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
972 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
973 ("%s: recursing but non-recursive rw %s @ %s:%d\n",
974 __func__, rw->lock_object.lo_name, file, line));
975 rw->rw_recurse++;
976 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
977 if (LOCK_LOG_TEST(&rw->lock_object, 0))
978 CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
979 return;
980 }
981
982 if (LOCK_LOG_TEST(&rw->lock_object, 0))
983 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
984 rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
985
986 #if defined(ADAPTIVE_RWLOCKS)
987 lock_delay_arg_init(&lda, &rw_delay);
988 #elif defined(KDTRACE_HOOKS)
989 lock_delay_arg_init_noadapt(&lda);
990 #endif
991
992 #ifdef HWPMC_HOOKS
993 PMC_SOFT_CALL( , , lock, failed);
994 #endif
995 lock_profile_obtain_lock_failed(&rw->lock_object, false,
996 &contested, &waittime);
997
998 THREAD_CONTENDS_ON_LOCK(&rw->lock_object);
999
1000 for (;;) {
1001 if (v == RW_UNLOCKED) {
1002 if (_rw_write_lock_fetch(rw, &v, tid))
1003 break;
1004 continue;
1005 }
1006 #ifdef KDTRACE_HOOKS
1007 lda.spin_cnt++;
1008 #endif
1009
1010 #ifdef ADAPTIVE_RWLOCKS
1011 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) {
1012 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
1013 break;
1014 continue;
1015 }
1016
1017 /*
1018 * If the lock is write locked and the owner is
1019 * running on another CPU, spin until the owner stops
1020 * running or the state of the lock changes.
1021 */
1022 if (!(v & RW_LOCK_READ)) {
1023 rw_drop_critical(v, &in_critical, &extra_work);
1024 sleep_reason = WRITER;
1025 owner = lv_rw_wowner(v);
1026 if (!TD_IS_RUNNING(owner))
1027 goto ts;
1028 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1029 CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
1030 __func__, rw, owner);
1031 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
1032 "spinning", "lockname:\"%s\"",
1033 rw->lock_object.lo_name);
1034 do {
1035 lock_delay(&lda);
1036 v = RW_READ_VALUE(rw);
1037 owner = lv_rw_wowner(v);
1038 } while (owner != NULL && TD_IS_RUNNING(owner));
1039 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
1040 "running");
1041 continue;
1042 } else if (RW_READERS(v) > 0) {
1043 sleep_reason = READERS;
1044 if (spintries == rowner_retries)
1045 goto ts;
1046 if (!(v & RW_LOCK_WRITE_SPINNER)) {
1047 if (!in_critical) {
1048 critical_enter();
1049 in_critical = true;
1050 extra_work++;
1051 }
1052 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
1053 v | RW_LOCK_WRITE_SPINNER)) {
1054 critical_exit();
1055 in_critical = false;
1056 extra_work--;
1057 continue;
1058 }
1059 }
1060 spintries++;
1061 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
1062 "spinning", "lockname:\"%s\"",
1063 rw->lock_object.lo_name);
1064 n = RW_READERS(v);
1065 for (i = 0; i < rowner_loops; i += n) {
1066 lock_delay_spin(n);
1067 v = RW_READ_VALUE(rw);
1068 if (!(v & RW_LOCK_WRITE_SPINNER))
1069 break;
1070 if (!(v & RW_LOCK_READ))
1071 break;
1072 n = RW_READERS(v);
1073 if (n == 0)
1074 break;
1075 }
1076 #ifdef KDTRACE_HOOKS
1077 lda.spin_cnt += i;
1078 #endif
1079 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
1080 "running");
1081 if (i < rowner_loops)
1082 continue;
1083 }
1084 ts:
1085 #endif
1086 ts = turnstile_trywait(&rw->lock_object);
1087 v = RW_READ_VALUE(rw);
1088 retry_ts:
1089 owner = lv_rw_wowner(v);
1090
1091 #ifdef ADAPTIVE_RWLOCKS
1092 /*
1093 * The current lock owner might have started executing
1094 * on another CPU (or the lock could have changed
1095 * owners) while we were waiting on the turnstile
1096 * chain lock. If so, drop the turnstile lock and try
1097 * again.
1098 */
1099 if (owner != NULL) {
1100 if (TD_IS_RUNNING(owner)) {
1101 turnstile_cancel(ts);
1102 rw_drop_critical(v, &in_critical, &extra_work);
1103 continue;
1104 }
1105 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
1106 turnstile_cancel(ts);
1107 rw_drop_critical(v, &in_critical, &extra_work);
1108 continue;
1109 }
1110 #endif
1111 /*
1112 * Check for the waiters flags about this rwlock.
1113 * If the lock was released, without maintain any pending
1114 * waiters queue, simply try to acquire it.
1115 * If a pending waiters queue is present, claim the lock
1116 * ownership and maintain the pending queue.
1117 */
1118 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
1119 if ((v & ~setv) == RW_UNLOCKED) {
1120 setv &= ~RW_LOCK_WRITE_SPINNER;
1121 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) {
1122 if (setv)
1123 turnstile_claim(ts);
1124 else
1125 turnstile_cancel(ts);
1126 break;
1127 }
1128 goto retry_ts;
1129 }
1130
1131 #ifdef ADAPTIVE_RWLOCKS
1132 if (in_critical) {
1133 if ((v & RW_LOCK_WRITE_SPINNER) ||
1134 !((v & RW_LOCK_WRITE_WAITERS))) {
1135 setv = v & ~RW_LOCK_WRITE_SPINNER;
1136 setv |= RW_LOCK_WRITE_WAITERS;
1137 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv))
1138 goto retry_ts;
1139 }
1140 critical_exit();
1141 in_critical = false;
1142 extra_work--;
1143 } else {
1144 #endif
1145 /*
1146 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
1147 * set it. If we fail to set it, then loop back and try
1148 * again.
1149 */
1150 if (!(v & RW_LOCK_WRITE_WAITERS)) {
1151 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
1152 v | RW_LOCK_WRITE_WAITERS))
1153 goto retry_ts;
1154 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1155 CTR2(KTR_LOCK, "%s: %p set write waiters flag",
1156 __func__, rw);
1157 }
1158 #ifdef ADAPTIVE_RWLOCKS
1159 }
1160 #endif
1161 /*
1162 * We were unable to acquire the lock and the write waiters
1163 * flag is set, so we must block on the turnstile.
1164 */
1165 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1166 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
1167 rw);
1168 #ifdef KDTRACE_HOOKS
1169 sleep_time -= lockstat_nsecs(&rw->lock_object);
1170 #endif
1171 MPASS(owner == rw_owner(rw));
1172 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
1173 #ifdef KDTRACE_HOOKS
1174 sleep_time += lockstat_nsecs(&rw->lock_object);
1175 sleep_cnt++;
1176 #endif
1177 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1178 CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
1179 __func__, rw);
1180 #ifdef ADAPTIVE_RWLOCKS
1181 spintries = 0;
1182 #endif
1183 v = RW_READ_VALUE(rw);
1184 }
1185 THREAD_CONTENTION_DONE(&rw->lock_object);
1186 if (__predict_true(!extra_work))
1187 return;
1188 #ifdef ADAPTIVE_RWLOCKS
1189 if (in_critical)
1190 critical_exit();
1191 #endif
1192 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
1193 if (__predict_true(!doing_lockprof))
1194 return;
1195 #endif
1196 #ifdef KDTRACE_HOOKS
1197 all_time += lockstat_nsecs(&rw->lock_object);
1198 if (sleep_time)
1199 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
1200 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
1201 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
1202
1203 /* Record only the loops spinning and not sleeping. */
1204 if (lda.spin_cnt > sleep_cnt)
1205 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
1206 LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
1207 (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
1208 out_lockstat:
1209 #endif
1210 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
1211 waittime, file, line, LOCKSTAT_WRITER);
1212 }
1213
1214 /*
1215 * This function is called if lockstat is active or the first try at releasing
1216 * a write lock failed. The latter means that the lock is recursed or one of
1217 * the 2 waiter bits must be set indicating that at least one thread is waiting
1218 * on this lock.
1219 */
1220 void
__rw_wunlock_hard(volatile uintptr_t * c,uintptr_t v LOCK_FILE_LINE_ARG_DEF)1221 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
1222 {
1223 struct rwlock *rw;
1224 struct turnstile *ts;
1225 uintptr_t tid, setv, passedv;
1226 int queue;
1227
1228 tid = (uintptr_t)curthread;
1229 if (SCHEDULER_STOPPED())
1230 return;
1231
1232 rw = rwlock2rw(c);
1233 if (__predict_false(v == tid))
1234 v = RW_READ_VALUE(rw);
1235
1236 if (v & RW_LOCK_WRITER_RECURSED) {
1237 if (--(rw->rw_recurse) == 0)
1238 atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
1239 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1240 CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
1241 return;
1242 }
1243
1244 LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
1245 if (v == tid && _rw_write_unlock(rw, tid))
1246 return;
1247
1248 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
1249 ("%s: neither of the waiter flags are set", __func__));
1250
1251 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1252 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
1253
1254 turnstile_chain_lock(&rw->lock_object);
1255
1256 /*
1257 * Use the same algo as sx locks for now. Prefer waking up shared
1258 * waiters if we have any over writers. This is probably not ideal.
1259 *
1260 * 'v' is the value we are going to write back to rw_lock. If we
1261 * have waiters on both queues, we need to preserve the state of
1262 * the waiter flag for the queue we don't wake up. For now this is
1263 * hardcoded for the algorithm mentioned above.
1264 *
1265 * In the case of both readers and writers waiting we wakeup the
1266 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a
1267 * new writer comes in before a reader it will claim the lock up
1268 * above. There is probably a potential priority inversion in
1269 * there that could be worked around either by waking both queues
1270 * of waiters or doing some complicated lock handoff gymnastics.
1271 */
1272 setv = RW_UNLOCKED;
1273 passedv = v;
1274 v = RW_READ_VALUE(rw);
1275 queue = TS_SHARED_QUEUE;
1276 if (v & RW_LOCK_WRITE_WAITERS) {
1277 queue = TS_EXCLUSIVE_QUEUE;
1278 setv |= (v & RW_LOCK_READ_WAITERS);
1279 }
1280 atomic_store_rel_ptr(&rw->rw_lock, setv);
1281
1282 /* Wake up all waiters for the specific queue. */
1283 if (LOCK_LOG_TEST(&rw->lock_object, 0))
1284 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
1285 queue == TS_SHARED_QUEUE ? "read" : "write");
1286
1287 ts = turnstile_lookup(&rw->lock_object);
1288 if (__predict_false(ts == NULL)) {
1289 panic("got NULL turnstile on rwlock %p passedv %p v %p", rw,
1290 (void *)passedv, (void *)v);
1291 }
1292 turnstile_broadcast(ts, queue);
1293 turnstile_unpend(ts);
1294 turnstile_chain_unlock(&rw->lock_object);
1295 }
1296
1297 /*
1298 * Attempt to do a non-blocking upgrade from a read lock to a write
1299 * lock. This will only succeed if this thread holds a single read
1300 * lock. Returns true if the upgrade succeeded and false otherwise.
1301 */
1302 int
__rw_try_upgrade_int(struct rwlock * rw LOCK_FILE_LINE_ARG_DEF)1303 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
1304 {
1305 uintptr_t v, setv, tid;
1306 struct turnstile *ts;
1307 int success;
1308
1309 if (SCHEDULER_STOPPED())
1310 return (1);
1311
1312 KASSERT(rw->rw_lock != RW_DESTROYED,
1313 ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
1314 __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
1315
1316 /*
1317 * Attempt to switch from one reader to a writer. If there
1318 * are any write waiters, then we will have to lock the
1319 * turnstile first to prevent races with another writer
1320 * calling turnstile_wait() before we have claimed this
1321 * turnstile. So, do the simple case of no waiters first.
1322 */
1323 tid = (uintptr_t)curthread;
1324 success = 0;
1325 v = RW_READ_VALUE(rw);
1326 for (;;) {
1327 if (RW_READERS(v) > 1)
1328 break;
1329 if (!(v & RW_LOCK_WAITERS)) {
1330 success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
1331 if (!success)
1332 continue;
1333 break;
1334 }
1335
1336 /*
1337 * Ok, we think we have waiters, so lock the turnstile.
1338 */
1339 ts = turnstile_trywait(&rw->lock_object);
1340 v = RW_READ_VALUE(rw);
1341 retry_ts:
1342 if (RW_READERS(v) > 1) {
1343 turnstile_cancel(ts);
1344 break;
1345 }
1346 /*
1347 * Try to switch from one reader to a writer again. This time
1348 * we honor the current state of the waiters flags.
1349 * If we obtain the lock with the flags set, then claim
1350 * ownership of the turnstile.
1351 */
1352 setv = tid | (v & RW_LOCK_WAITERS);
1353 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
1354 if (success) {
1355 if (v & RW_LOCK_WAITERS)
1356 turnstile_claim(ts);
1357 else
1358 turnstile_cancel(ts);
1359 break;
1360 }
1361 goto retry_ts;
1362 }
1363 LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
1364 if (success) {
1365 curthread->td_rw_rlocks--;
1366 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
1367 file, line);
1368 LOCKSTAT_RECORD0(rw__upgrade, rw);
1369 }
1370 return (success);
1371 }
1372
1373 int
__rw_try_upgrade(volatile uintptr_t * c,const char * file,int line)1374 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
1375 {
1376 struct rwlock *rw;
1377
1378 rw = rwlock2rw(c);
1379 return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
1380 }
1381
1382 /*
1383 * Downgrade a write lock into a single read lock.
1384 */
1385 void
__rw_downgrade_int(struct rwlock * rw LOCK_FILE_LINE_ARG_DEF)1386 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
1387 {
1388 struct turnstile *ts;
1389 uintptr_t tid, v;
1390 int rwait, wwait;
1391
1392 if (SCHEDULER_STOPPED())
1393 return;
1394
1395 KASSERT(rw->rw_lock != RW_DESTROYED,
1396 ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
1397 __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
1398 #ifndef INVARIANTS
1399 if (rw_recursed(rw))
1400 panic("downgrade of a recursed lock");
1401 #endif
1402
1403 WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
1404
1405 /*
1406 * Convert from a writer to a single reader. First we handle
1407 * the easy case with no waiters. If there are any waiters, we
1408 * lock the turnstile and "disown" the lock.
1409 */
1410 tid = (uintptr_t)curthread;
1411 if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
1412 goto out;
1413
1414 /*
1415 * Ok, we think we have waiters, so lock the turnstile so we can
1416 * read the waiter flags without any races.
1417 */
1418 turnstile_chain_lock(&rw->lock_object);
1419 v = rw->rw_lock & RW_LOCK_WAITERS;
1420 rwait = v & RW_LOCK_READ_WAITERS;
1421 wwait = v & RW_LOCK_WRITE_WAITERS;
1422 MPASS(rwait | wwait);
1423
1424 /*
1425 * Downgrade from a write lock while preserving waiters flag
1426 * and give up ownership of the turnstile.
1427 */
1428 ts = turnstile_lookup(&rw->lock_object);
1429 MPASS(ts != NULL);
1430 if (!wwait)
1431 v &= ~RW_LOCK_READ_WAITERS;
1432 atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
1433 /*
1434 * Wake other readers if there are no writers pending. Otherwise they
1435 * won't be able to acquire the lock anyway.
1436 */
1437 if (rwait && !wwait) {
1438 turnstile_broadcast(ts, TS_SHARED_QUEUE);
1439 turnstile_unpend(ts);
1440 } else
1441 turnstile_disown(ts);
1442 turnstile_chain_unlock(&rw->lock_object);
1443 out:
1444 curthread->td_rw_rlocks++;
1445 LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
1446 LOCKSTAT_RECORD0(rw__downgrade, rw);
1447 }
1448
1449 void
__rw_downgrade(volatile uintptr_t * c,const char * file,int line)1450 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
1451 {
1452 struct rwlock *rw;
1453
1454 rw = rwlock2rw(c);
1455 __rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
1456 }
1457
1458 #ifdef INVARIANT_SUPPORT
1459 #ifndef INVARIANTS
1460 #undef __rw_assert
1461 #endif
1462
1463 /*
1464 * In the non-WITNESS case, rw_assert() can only detect that at least
1465 * *some* thread owns an rlock, but it cannot guarantee that *this*
1466 * thread owns an rlock.
1467 */
1468 void
__rw_assert(const volatile uintptr_t * c,int what,const char * file,int line)1469 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
1470 {
1471 const struct rwlock *rw;
1472
1473 if (SCHEDULER_STOPPED())
1474 return;
1475
1476 rw = rwlock2rw(c);
1477
1478 switch (what) {
1479 case RA_LOCKED:
1480 case RA_LOCKED | RA_RECURSED:
1481 case RA_LOCKED | RA_NOTRECURSED:
1482 case RA_RLOCKED:
1483 case RA_RLOCKED | RA_RECURSED:
1484 case RA_RLOCKED | RA_NOTRECURSED:
1485 #ifdef WITNESS
1486 witness_assert(&rw->lock_object, what, file, line);
1487 #else
1488 /*
1489 * If some other thread has a write lock or we have one
1490 * and are asserting a read lock, fail. Also, if no one
1491 * has a lock at all, fail.
1492 */
1493 if (rw->rw_lock == RW_UNLOCKED ||
1494 (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
1495 rw_wowner(rw) != curthread)))
1496 panic("Lock %s not %slocked @ %s:%d\n",
1497 rw->lock_object.lo_name, (what & RA_RLOCKED) ?
1498 "read " : "", file, line);
1499
1500 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
1501 if (rw_recursed(rw)) {
1502 if (what & RA_NOTRECURSED)
1503 panic("Lock %s recursed @ %s:%d\n",
1504 rw->lock_object.lo_name, file,
1505 line);
1506 } else if (what & RA_RECURSED)
1507 panic("Lock %s not recursed @ %s:%d\n",
1508 rw->lock_object.lo_name, file, line);
1509 }
1510 #endif
1511 break;
1512 case RA_WLOCKED:
1513 case RA_WLOCKED | RA_RECURSED:
1514 case RA_WLOCKED | RA_NOTRECURSED:
1515 if (rw_wowner(rw) != curthread)
1516 panic("Lock %s not exclusively locked @ %s:%d\n",
1517 rw->lock_object.lo_name, file, line);
1518 if (rw_recursed(rw)) {
1519 if (what & RA_NOTRECURSED)
1520 panic("Lock %s recursed @ %s:%d\n",
1521 rw->lock_object.lo_name, file, line);
1522 } else if (what & RA_RECURSED)
1523 panic("Lock %s not recursed @ %s:%d\n",
1524 rw->lock_object.lo_name, file, line);
1525 break;
1526 case RA_UNLOCKED:
1527 #ifdef WITNESS
1528 witness_assert(&rw->lock_object, what, file, line);
1529 #else
1530 /*
1531 * If we hold a write lock fail. We can't reliably check
1532 * to see if we hold a read lock or not.
1533 */
1534 if (rw_wowner(rw) == curthread)
1535 panic("Lock %s exclusively locked @ %s:%d\n",
1536 rw->lock_object.lo_name, file, line);
1537 #endif
1538 break;
1539 default:
1540 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1541 line);
1542 }
1543 }
1544 #endif /* INVARIANT_SUPPORT */
1545
1546 #ifdef DDB
1547 static void
db_show_rwlock(const struct lock_object * lock)1548 db_show_rwlock(const struct lock_object *lock)
1549 {
1550 const struct rwlock *rw;
1551 struct thread *td;
1552
1553 rw = (const struct rwlock *)lock;
1554
1555 db_printf(" state: ");
1556 if (rw->rw_lock == RW_UNLOCKED)
1557 db_printf("UNLOCKED\n");
1558 else if (rw->rw_lock == RW_DESTROYED) {
1559 db_printf("DESTROYED\n");
1560 return;
1561 } else if (rw->rw_lock & RW_LOCK_READ)
1562 db_printf("RLOCK: %ju locks\n",
1563 (uintmax_t)(RW_READERS(rw->rw_lock)));
1564 else {
1565 td = rw_wowner(rw);
1566 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1567 td->td_tid, td->td_proc->p_pid, td->td_name);
1568 if (rw_recursed(rw))
1569 db_printf(" recursed: %u\n", rw->rw_recurse);
1570 }
1571 db_printf(" waiters: ");
1572 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1573 case RW_LOCK_READ_WAITERS:
1574 db_printf("readers\n");
1575 break;
1576 case RW_LOCK_WRITE_WAITERS:
1577 db_printf("writers\n");
1578 break;
1579 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1580 db_printf("readers and writers\n");
1581 break;
1582 default:
1583 db_printf("none\n");
1584 break;
1585 }
1586 }
1587
1588 #endif
1589