1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5 * Copyright (c) 2023 The FreeBSD Foundation
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice unmodified, this list of conditions, and the following
16 * disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <sys/param.h>
34 #include <sys/kassert.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/proc.h>
39 #include <sys/rangelock.h>
40 #include <sys/sleepqueue.h>
41 #include <sys/smr.h>
42 #include <sys/sysctl.h>
43
44 #include <vm/uma.h>
45
46 /*
47 * Immediately after initialization (subject to 'rangelock_cheat'
48 * below), and until a request comes that conflicts with granted ones
49 * based on type, rangelocks serve requests in the "cheating" mode.
50 * In this mode, a rangelock behaves like a sxlock, as if each request
51 * covered the whole range of the protected object. On receiving a
52 * conflicting request (any request while a write request is
53 * effective, or any write request while some read ones are
54 * effective), all requests granted in "cheating" mode are drained,
55 * and the rangelock then switches to effectively keeping track of the
56 * precise range of each new request.
57 *
58 * Normal sx implementation is not used to not bloat structures (most
59 * important, vnodes) which embeds rangelocks.
60 *
61 * The cheating greatly helps very common pattern where file is first
62 * written single-threaded, and then read by many processes.
63 *
64 * Lock is in cheat mode when RL_CHEAT_CHEATING bit is set in the
65 * lock->head. Special cookies are returned in this mode, and
66 * trylocks are same as normal locks but do not drain.
67 */
68
69 static int rangelock_cheat = 1;
70 SYSCTL_INT(_debug, OID_AUTO, rangelock_cheat, CTLFLAG_RWTUN,
71 &rangelock_cheat, 0,
72 "");
73
74 #define RL_CHEAT_MASK 0x7
75 #define RL_CHEAT_CHEATING 0x1
76 /* #define RL_CHEAT_RLOCKED 0x0 */
77 #define RL_CHEAT_WLOCKED 0x2
78 #define RL_CHEAT_DRAINING 0x4
79
80 #define RL_CHEAT_READER 0x8
81
82 #define RL_RET_CHEAT_RLOCKED 0x1100
83 #define RL_RET_CHEAT_WLOCKED 0x2200
84
85 static void
rangelock_cheat_drain(struct rangelock * lock)86 rangelock_cheat_drain(struct rangelock *lock)
87 {
88 uintptr_t v;
89
90 DROP_GIANT();
91 for (;;) {
92 v = atomic_load_ptr(&lock->head);
93 if ((v & RL_CHEAT_DRAINING) == 0)
94 break;
95 sleepq_add(&lock->head, NULL, "ranged1", 0, 0);
96 sleepq_wait(&lock->head, PRI_USER);
97 sleepq_lock(&lock->head);
98 }
99 sleepq_release(&lock->head);
100 PICKUP_GIANT();
101 }
102
103 static bool
rangelock_cheat_lock(struct rangelock * lock,int locktype,bool trylock,void ** cookie)104 rangelock_cheat_lock(struct rangelock *lock, int locktype, bool trylock,
105 void **cookie)
106 {
107 uintptr_t v, x;
108
109 v = atomic_load_ptr(&lock->head);
110 if ((v & RL_CHEAT_CHEATING) == 0)
111 return (false);
112 if ((v & RL_CHEAT_DRAINING) != 0) {
113 drain:
114 if (trylock) {
115 *cookie = NULL;
116 return (true);
117 }
118 sleepq_lock(&lock->head);
119 drain1:
120 rangelock_cheat_drain(lock);
121 return (false);
122 }
123
124 switch (locktype) {
125 case RL_LOCK_READ:
126 for (;;) {
127 if ((v & RL_CHEAT_WLOCKED) != 0) {
128 if (trylock) {
129 *cookie = NULL;
130 return (true);
131 }
132 x = v | RL_CHEAT_DRAINING;
133 sleepq_lock(&lock->head);
134 if (atomic_fcmpset_rel_ptr(&lock->head, &v,
135 x) != 0)
136 goto drain1;
137 sleepq_release(&lock->head);
138 /* Possibly forgive passed conflict */
139 } else {
140 x = (v & ~RL_CHEAT_MASK) + RL_CHEAT_READER;
141 x |= RL_CHEAT_CHEATING;
142 if (atomic_fcmpset_acq_ptr(&lock->head, &v,
143 x) != 0)
144 break;
145 }
146 if ((v & RL_CHEAT_CHEATING) == 0)
147 return (false);
148 if ((v & RL_CHEAT_DRAINING) != 0)
149 goto drain;
150 }
151 *(uintptr_t *)cookie = RL_RET_CHEAT_RLOCKED;
152 break;
153 case RL_LOCK_WRITE:
154 for (;;) {
155 if ((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER ||
156 (v & RL_CHEAT_WLOCKED) != 0) {
157 if (trylock) {
158 *cookie = NULL;
159 return (true);
160 }
161 x = v | RL_CHEAT_DRAINING;
162 sleepq_lock(&lock->head);
163 if (atomic_fcmpset_rel_ptr(&lock->head, &v,
164 x) != 0)
165 goto drain1;
166 sleepq_release(&lock->head);
167 /* Possibly forgive passed conflict */
168 } else {
169 x = RL_CHEAT_WLOCKED | RL_CHEAT_CHEATING;
170 if (atomic_fcmpset_acq_ptr(&lock->head, &v,
171 x) != 0)
172 break;
173 }
174 if ((v & RL_CHEAT_CHEATING) == 0)
175 return (false);
176 if ((v & RL_CHEAT_DRAINING) != 0)
177 goto drain;
178 }
179 *(uintptr_t *)cookie = RL_RET_CHEAT_WLOCKED;
180 break;
181 default:
182 __assert_unreachable();
183 break;
184 }
185 return (true);
186 }
187
188 static bool
rangelock_cheat_unlock(struct rangelock * lock,void * cookie)189 rangelock_cheat_unlock(struct rangelock *lock, void *cookie)
190 {
191 uintptr_t v, x;
192
193 v = atomic_load_ptr(&lock->head);
194 if ((v & RL_CHEAT_CHEATING) == 0)
195 return (false);
196
197 MPASS((uintptr_t)cookie == RL_RET_CHEAT_WLOCKED ||
198 (uintptr_t)cookie == RL_RET_CHEAT_RLOCKED);
199
200 switch ((uintptr_t)cookie) {
201 case RL_RET_CHEAT_RLOCKED:
202 for (;;) {
203 MPASS((v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER);
204 MPASS((v & RL_CHEAT_WLOCKED) == 0);
205 x = (v & ~RL_CHEAT_MASK) - RL_CHEAT_READER;
206 if ((v & RL_CHEAT_DRAINING) != 0) {
207 if (x != 0) {
208 x |= RL_CHEAT_DRAINING |
209 RL_CHEAT_CHEATING;
210 if (atomic_fcmpset_rel_ptr(&lock->head,
211 &v, x) != 0)
212 break;
213 } else {
214 sleepq_lock(&lock->head);
215 if (atomic_fcmpset_rel_ptr(&lock->head,
216 &v, x) != 0) {
217 sleepq_broadcast(
218 &lock->head,
219 SLEEPQ_SLEEP, 0, 0);
220 sleepq_release(&lock->head);
221 break;
222 }
223 sleepq_release(&lock->head);
224 }
225 } else {
226 x |= RL_CHEAT_CHEATING;
227 if (atomic_fcmpset_rel_ptr(&lock->head, &v,
228 x) != 0)
229 break;
230 }
231 }
232 break;
233 case RL_RET_CHEAT_WLOCKED:
234 for (;;) {
235 MPASS((v & RL_CHEAT_WLOCKED) != 0);
236 if ((v & RL_CHEAT_DRAINING) != 0) {
237 sleepq_lock(&lock->head);
238 atomic_store_ptr(&lock->head, 0);
239 sleepq_broadcast(&lock->head,
240 SLEEPQ_SLEEP, 0, 0);
241 sleepq_release(&lock->head);
242 break;
243 } else {
244 if (atomic_fcmpset_ptr(&lock->head, &v,
245 RL_CHEAT_CHEATING) != 0)
246 break;
247 }
248 }
249 break;
250 default:
251 __assert_unreachable();
252 break;
253 }
254 return (true);
255 }
256
257 static bool
rangelock_cheat_destroy(struct rangelock * lock)258 rangelock_cheat_destroy(struct rangelock *lock)
259 {
260 uintptr_t v;
261
262 v = atomic_load_ptr(&lock->head);
263 if ((v & RL_CHEAT_CHEATING) == 0)
264 return (false);
265 MPASS(v == RL_CHEAT_CHEATING);
266 return (true);
267 }
268
269 /*
270 * Implementation of range locks based on the paper
271 * https://doi.org/10.1145/3342195.3387533
272 * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
273 * Scalable Range Locks for Scalable Address Spaces and Beyond
274 * by Alex Kogan, Dave Dice, and Shady Issa
275 */
276
277 static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
278
279 /*
280 * rl_q_next links all granted ranges in the lock. We cannot free an
281 * rl_q_entry while in the smr section, and cannot reuse rl_q_next
282 * linkage since other threads might follow it even after CAS removed
283 * the range. Use rl_q_free for local list of ranges to remove after
284 * the smr section is dropped.
285 */
286 struct rl_q_entry {
287 struct rl_q_entry *rl_q_next;
288 struct rl_q_entry *rl_q_free;
289 off_t rl_q_start, rl_q_end;
290 int rl_q_flags;
291 #ifdef INVARIANTS
292 struct thread *rl_q_owner;
293 #endif
294 };
295
296 static uma_zone_t rl_entry_zone;
297 static smr_t rl_smr;
298
299 static void rangelock_free_free(struct rl_q_entry *free);
300 static void rangelock_noncheating_destroy(struct rangelock *lock);
301
302 static void
rangelock_sys_init(void)303 rangelock_sys_init(void)
304 {
305 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
306 NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
307 UMA_ZONE_SMR);
308 rl_smr = uma_zone_get_smr(rl_entry_zone);
309 }
310 SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
311
312 static struct rl_q_entry *
rlqentry_alloc(vm_ooffset_t start,vm_ooffset_t end,int flags)313 rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
314 {
315 struct rl_q_entry *e;
316
317 e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
318 e->rl_q_next = NULL;
319 e->rl_q_free = NULL;
320 e->rl_q_start = start;
321 e->rl_q_end = end;
322 e->rl_q_flags = flags;
323 #ifdef INVARIANTS
324 e->rl_q_owner = curthread;
325 #endif
326 return (e);
327 }
328
329 void
rangelock_init(struct rangelock * lock)330 rangelock_init(struct rangelock *lock)
331 {
332 lock->sleepers = false;
333 atomic_store_ptr(&lock->head, rangelock_cheat ? RL_CHEAT_CHEATING : 0);
334 }
335
336 void
rangelock_destroy(struct rangelock * lock)337 rangelock_destroy(struct rangelock *lock)
338 {
339 MPASS(!lock->sleepers);
340 if (!rangelock_cheat_destroy(lock))
341 rangelock_noncheating_destroy(lock);
342 DEBUG_POISON_POINTER(*(void **)&lock->head);
343 }
344
345 static bool
rl_e_is_marked(const struct rl_q_entry * e)346 rl_e_is_marked(const struct rl_q_entry *e)
347 {
348 return (((uintptr_t)e & 1) != 0);
349 }
350
351 static struct rl_q_entry *
rl_e_unmark_unchecked(const struct rl_q_entry * e)352 rl_e_unmark_unchecked(const struct rl_q_entry *e)
353 {
354 return ((struct rl_q_entry *)((uintptr_t)e & ~1));
355 }
356
357 static struct rl_q_entry *
rl_e_unmark(const struct rl_q_entry * e)358 rl_e_unmark(const struct rl_q_entry *e)
359 {
360 MPASS(rl_e_is_marked(e));
361 return (rl_e_unmark_unchecked(e));
362 }
363
364 static void
rl_e_mark(struct rl_q_entry * e)365 rl_e_mark(struct rl_q_entry *e)
366 {
367 #if defined(INVARIANTS)
368 int r = atomic_testandset_ptr((uintptr_t *)&e->rl_q_next, 0);
369 MPASS(r == 0);
370 #else
371 atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
372 #endif
373 }
374
375 static struct rl_q_entry *
rl_q_load(struct rl_q_entry ** p)376 rl_q_load(struct rl_q_entry **p)
377 {
378 return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
379 }
380
381 static bool
rl_e_is_rlock(const struct rl_q_entry * e)382 rl_e_is_rlock(const struct rl_q_entry *e)
383 {
384 return ((e->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ);
385 }
386
387 static void
rangelock_free_free(struct rl_q_entry * free)388 rangelock_free_free(struct rl_q_entry *free)
389 {
390 struct rl_q_entry *x, *xp;
391
392 for (x = free; x != NULL; x = xp) {
393 MPASS(!rl_e_is_marked(x));
394 xp = x->rl_q_free;
395 MPASS(!rl_e_is_marked(xp));
396 uma_zfree_smr(rl_entry_zone, x);
397 }
398 }
399
400 static void
rangelock_unlock_int(struct rangelock * lock,struct rl_q_entry * e)401 rangelock_unlock_int(struct rangelock *lock, struct rl_q_entry *e)
402 {
403 bool sleepers;
404
405 MPASS(lock != NULL && e != NULL);
406 MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
407 MPASS(e->rl_q_owner == curthread);
408
409 rl_e_mark(e);
410 sleepers = lock->sleepers;
411 lock->sleepers = false;
412 if (sleepers)
413 sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
414 }
415
416 void
rangelock_unlock(struct rangelock * lock,void * cookie)417 rangelock_unlock(struct rangelock *lock, void *cookie)
418 {
419 if (rangelock_cheat_unlock(lock, cookie))
420 return;
421
422 sleepq_lock(&lock->sleepers);
423 rangelock_unlock_int(lock, cookie);
424 sleepq_release(&lock->sleepers);
425 }
426
427 /*
428 * result: -1 if e1 before e2, or both locks are readers and e1
429 * starts before or at e2
430 * 1 if e1 after e2, or both locks are readers and e1
431 * starts after e2
432 * 0 if e1 and e2 overlap and at least one lock is writer
433 */
434 static int
rl_e_compare(const struct rl_q_entry * e1,const struct rl_q_entry * e2)435 rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
436 {
437 bool rds;
438
439 if (e1 == NULL)
440 return (1);
441 if (e2->rl_q_start >= e1->rl_q_end)
442 return (-1);
443 rds = rl_e_is_rlock(e1) && rl_e_is_rlock(e2);
444 if (e2->rl_q_start >= e1->rl_q_start && rds)
445 return (-1);
446 if (e1->rl_q_start >= e2->rl_q_end)
447 return (1);
448 if (e1->rl_q_start >= e2->rl_q_start && rds)
449 return (1);
450 return (0);
451 }
452
453 static void
rl_insert_sleep(struct rangelock * lock)454 rl_insert_sleep(struct rangelock *lock)
455 {
456 smr_exit(rl_smr);
457 DROP_GIANT();
458 lock->sleepers = true;
459 sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
460 sleepq_wait(&lock->sleepers, PRI_USER);
461 PICKUP_GIANT();
462 smr_enter(rl_smr);
463 }
464
465 static bool
rl_q_cas(struct rl_q_entry ** prev,struct rl_q_entry * old,struct rl_q_entry * new)466 rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
467 struct rl_q_entry *new)
468 {
469 MPASS(!rl_e_is_marked(old));
470 return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
471 (uintptr_t)new) != 0);
472 }
473
474 static void
rangelock_noncheating_destroy(struct rangelock * lock)475 rangelock_noncheating_destroy(struct rangelock *lock)
476 {
477 struct rl_q_entry *cur, *free, *next, **prev;
478
479 free = NULL;
480 again:
481 smr_enter(rl_smr);
482 prev = (struct rl_q_entry **)&lock->head;
483 cur = rl_q_load(prev);
484 MPASS(!rl_e_is_marked(cur));
485
486 for (;;) {
487 if (cur == NULL)
488 break;
489 if (rl_e_is_marked(cur))
490 goto again;
491
492 next = rl_q_load(&cur->rl_q_next);
493 if (rl_e_is_marked(next)) {
494 next = rl_e_unmark(next);
495 if (rl_q_cas(prev, cur, next)) {
496 #ifdef INVARIANTS
497 cur->rl_q_owner = NULL;
498 #endif
499 cur->rl_q_free = free;
500 free = cur;
501 cur = next;
502 continue;
503 }
504 smr_exit(rl_smr);
505 goto again;
506 }
507
508 sleepq_lock(&lock->sleepers);
509 if (!rl_e_is_marked(cur)) {
510 rl_insert_sleep(lock);
511 goto again;
512 }
513 }
514 smr_exit(rl_smr);
515 rangelock_free_free(free);
516 }
517
518 enum RL_INSERT_RES {
519 RL_TRYLOCK_FAILED,
520 RL_LOCK_SUCCESS,
521 RL_LOCK_RETRY,
522 };
523
524 static enum RL_INSERT_RES
rl_r_validate(struct rangelock * lock,struct rl_q_entry * e,bool trylock,struct rl_q_entry ** free)525 rl_r_validate(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
526 struct rl_q_entry **free)
527 {
528 struct rl_q_entry *cur, *next, **prev;
529
530 again:
531 prev = &e->rl_q_next;
532 cur = rl_q_load(prev);
533 MPASS(!rl_e_is_marked(cur)); /* nobody can unlock e yet */
534 for (;;) {
535 if (cur == NULL || cur->rl_q_start >= e->rl_q_end)
536 return (RL_LOCK_SUCCESS);
537 next = rl_q_load(&cur->rl_q_next);
538 if (rl_e_is_marked(next)) {
539 next = rl_e_unmark(next);
540 if (rl_q_cas(prev, cur, next)) {
541 cur->rl_q_free = *free;
542 *free = cur;
543 cur = next;
544 continue;
545 }
546 goto again;
547 }
548 if (rl_e_is_rlock(cur)) {
549 prev = &cur->rl_q_next;
550 cur = rl_e_unmark_unchecked(rl_q_load(prev));
551 continue;
552 }
553 if (!rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
554 sleepq_lock(&lock->sleepers);
555 if (rl_e_is_marked(rl_q_load(&cur->rl_q_next))) {
556 sleepq_release(&lock->sleepers);
557 continue;
558 }
559 rangelock_unlock_int(lock, e);
560 if (trylock) {
561 sleepq_release(&lock->sleepers);
562 return (RL_TRYLOCK_FAILED);
563 }
564 rl_insert_sleep(lock);
565 return (RL_LOCK_RETRY);
566 }
567 }
568 }
569
570 static enum RL_INSERT_RES
rl_w_validate(struct rangelock * lock,struct rl_q_entry * e,bool trylock,struct rl_q_entry ** free)571 rl_w_validate(struct rangelock *lock, struct rl_q_entry *e,
572 bool trylock, struct rl_q_entry **free)
573 {
574 struct rl_q_entry *cur, *next, **prev;
575
576 again:
577 prev = (struct rl_q_entry **)&lock->head;
578 cur = rl_q_load(prev);
579 MPASS(!rl_e_is_marked(cur)); /* head is not marked */
580 for (;;) {
581 if (cur == e)
582 return (RL_LOCK_SUCCESS);
583 next = rl_q_load(&cur->rl_q_next);
584 if (rl_e_is_marked(next)) {
585 next = rl_e_unmark(next);
586 if (rl_q_cas(prev, cur, next)) {
587 cur->rl_q_free = *free;
588 *free = cur;
589 cur = next;
590 continue;
591 }
592 goto again;
593 }
594 if (cur->rl_q_end <= e->rl_q_start) {
595 prev = &cur->rl_q_next;
596 cur = rl_e_unmark_unchecked(rl_q_load(prev));
597 continue;
598 }
599 sleepq_lock(&lock->sleepers);
600 /* Reload after sleepq is locked */
601 next = rl_q_load(&cur->rl_q_next);
602 if (rl_e_is_marked(next)) {
603 sleepq_release(&lock->sleepers);
604 goto again;
605 }
606 rangelock_unlock_int(lock, e);
607 if (trylock) {
608 sleepq_release(&lock->sleepers);
609 return (RL_TRYLOCK_FAILED);
610 }
611 rl_insert_sleep(lock);
612 return (RL_LOCK_RETRY);
613 }
614 }
615
616 static enum RL_INSERT_RES
rl_insert(struct rangelock * lock,struct rl_q_entry * e,bool trylock,struct rl_q_entry ** free)617 rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
618 struct rl_q_entry **free)
619 {
620 struct rl_q_entry *cur, *next, **prev;
621 int r;
622
623 again:
624 prev = (struct rl_q_entry **)&lock->head;
625 cur = rl_q_load(prev);
626 if (cur == NULL && rl_q_cas(prev, NULL, e))
627 return (RL_LOCK_SUCCESS);
628
629 for (;;) {
630 if (cur != NULL) {
631 if (rl_e_is_marked(cur))
632 goto again;
633
634 next = rl_q_load(&cur->rl_q_next);
635 if (rl_e_is_marked(next)) {
636 next = rl_e_unmark(next);
637 if (rl_q_cas(prev, cur, next)) {
638 #ifdef INVARIANTS
639 cur->rl_q_owner = NULL;
640 #endif
641 cur->rl_q_free = *free;
642 *free = cur;
643 cur = next;
644 continue;
645 }
646 goto again;
647 }
648 }
649
650 MPASS(!rl_e_is_marked(cur));
651 r = rl_e_compare(cur, e);
652 if (r == -1) {
653 prev = &cur->rl_q_next;
654 cur = rl_q_load(prev);
655 } else if (r == 0) {
656 sleepq_lock(&lock->sleepers);
657 if (__predict_false(rl_e_is_marked(rl_q_load(
658 &cur->rl_q_next)))) {
659 sleepq_release(&lock->sleepers);
660 continue;
661 }
662 if (trylock) {
663 sleepq_release(&lock->sleepers);
664 return (RL_TRYLOCK_FAILED);
665 }
666 rl_insert_sleep(lock);
667 /* e is still valid */
668 goto again;
669 } else /* r == 1 */ {
670 e->rl_q_next = cur;
671 if (rl_q_cas(prev, cur, e)) {
672 atomic_thread_fence_acq();
673 return (rl_e_is_rlock(e) ?
674 rl_r_validate(lock, e, trylock, free) :
675 rl_w_validate(lock, e, trylock, free));
676 }
677 /* Reset rl_q_next in case we hit fast path. */
678 e->rl_q_next = NULL;
679 cur = rl_q_load(prev);
680 }
681 }
682 }
683
684 static struct rl_q_entry *
rangelock_lock_int(struct rangelock * lock,bool trylock,vm_ooffset_t start,vm_ooffset_t end,int locktype)685 rangelock_lock_int(struct rangelock *lock, bool trylock, vm_ooffset_t start,
686 vm_ooffset_t end, int locktype)
687 {
688 struct rl_q_entry *e, *free;
689 void *cookie;
690 enum RL_INSERT_RES res;
691
692 if (rangelock_cheat_lock(lock, locktype, trylock, &cookie))
693 return (cookie);
694 for (res = RL_LOCK_RETRY; res == RL_LOCK_RETRY;) {
695 free = NULL;
696 e = rlqentry_alloc(start, end, locktype);
697 smr_enter(rl_smr);
698 res = rl_insert(lock, e, trylock, &free);
699 smr_exit(rl_smr);
700 if (res == RL_TRYLOCK_FAILED) {
701 MPASS(trylock);
702 e->rl_q_free = free;
703 free = e;
704 e = NULL;
705 }
706 rangelock_free_free(free);
707 }
708 return (e);
709 }
710
711 void *
rangelock_rlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)712 rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
713 {
714 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_READ));
715 }
716
717 void *
rangelock_tryrlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)718 rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
719 {
720 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_READ));
721 }
722
723 void *
rangelock_wlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)724 rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
725 {
726 return (rangelock_lock_int(lock, false, start, end, RL_LOCK_WRITE));
727 }
728
729 void *
rangelock_trywlock(struct rangelock * lock,vm_ooffset_t start,vm_ooffset_t end)730 rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
731 {
732 return (rangelock_lock_int(lock, true, start, end, RL_LOCK_WRITE));
733 }
734
735 /*
736 * If the caller asserts that it can obtain the range locks on the
737 * same lock simultaneously, switch to the non-cheat mode. Cheat mode
738 * cannot handle it, hanging in drain or trylock retries.
739 */
740 void
rangelock_may_recurse(struct rangelock * lock)741 rangelock_may_recurse(struct rangelock *lock)
742 {
743 uintptr_t v, x;
744
745 v = atomic_load_ptr(&lock->head);
746 if ((v & RL_CHEAT_CHEATING) == 0)
747 return;
748
749 sleepq_lock(&lock->head);
750 for (;;) {
751 if ((v & RL_CHEAT_CHEATING) == 0) {
752 sleepq_release(&lock->head);
753 return;
754 }
755
756 /* Cheating and locked, drain. */
757 if ((v & RL_CHEAT_WLOCKED) != 0 ||
758 (v & ~RL_CHEAT_MASK) >= RL_CHEAT_READER) {
759 x = v | RL_CHEAT_DRAINING;
760 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
761 rangelock_cheat_drain(lock);
762 return;
763 }
764 continue;
765 }
766
767 /* Cheating and unlocked, clear RL_CHEAT_CHEATING. */
768 x = 0;
769 if (atomic_fcmpset_ptr(&lock->head, &v, x) != 0) {
770 sleepq_release(&lock->head);
771 return;
772 }
773 }
774 }
775
776 #ifdef INVARIANT_SUPPORT
777 void
_rangelock_cookie_assert(void * cookie,int what,const char * file,int line)778 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
779 {
780 }
781 #endif /* INVARIANT_SUPPORT */
782
783 #include "opt_ddb.h"
784 #ifdef DDB
785 #include <ddb/ddb.h>
786
DB_SHOW_COMMAND(rangelock,db_show_rangelock)787 DB_SHOW_COMMAND(rangelock, db_show_rangelock)
788 {
789 struct rangelock *lock;
790 struct rl_q_entry *e, *x;
791 uintptr_t v;
792
793 if (!have_addr) {
794 db_printf("show rangelock addr\n");
795 return;
796 }
797
798 lock = (struct rangelock *)addr;
799 db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
800 v = lock->head;
801 if ((v & RL_CHEAT_CHEATING) != 0) {
802 db_printf(" cheating head %#jx\n", (uintmax_t)v);
803 return;
804 }
805 for (e = (struct rl_q_entry *)(lock->head);;) {
806 x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
807 if (x == NULL)
808 break;
809 db_printf(" entry %p marked %d %d start %#jx end %#jx "
810 "flags %x next %p",
811 e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
812 x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
813 #ifdef INVARIANTS
814 db_printf(" owner %p (%d)", x->rl_q_owner,
815 x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
816 #endif
817 db_printf("\n");
818 e = x->rl_q_next;
819 }
820 }
821
822 #endif /* DDB */
823