18f0e9130SKonstantin Belousov /*- 28a36da99SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 38a36da99SPedro F. Giffuni * 48f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 58f0e9130SKonstantin Belousov * All rights reserved. 68f0e9130SKonstantin Belousov * 78f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 88f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 98f0e9130SKonstantin Belousov * are met: 108f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 118f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 128f0e9130SKonstantin Belousov * disclaimer. 138f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 148f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 158f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 168f0e9130SKonstantin Belousov * 178f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 188f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 198f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 208f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 218f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 228f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 238f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 248f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 258f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 268f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278f0e9130SKonstantin Belousov */ 288f0e9130SKonstantin Belousov 298f0e9130SKonstantin Belousov #include <sys/cdefs.h> 308f0e9130SKonstantin Belousov __FBSDID("$FreeBSD$"); 318f0e9130SKonstantin Belousov 328f0e9130SKonstantin Belousov #include <sys/param.h> 338f0e9130SKonstantin Belousov #include <sys/kernel.h> 348f0e9130SKonstantin Belousov #include <sys/lock.h> 358f0e9130SKonstantin Belousov #include <sys/mutex.h> 368f0e9130SKonstantin Belousov #include <sys/proc.h> 378f0e9130SKonstantin Belousov #include <sys/rangelock.h> 388f0e9130SKonstantin Belousov #include <sys/systm.h> 398f0e9130SKonstantin Belousov 408f0e9130SKonstantin Belousov #include <vm/uma.h> 418f0e9130SKonstantin Belousov 428f0e9130SKonstantin Belousov struct rl_q_entry { 438f0e9130SKonstantin Belousov TAILQ_ENTRY(rl_q_entry) rl_q_link; 448f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 458f0e9130SKonstantin Belousov int rl_q_flags; 468f0e9130SKonstantin Belousov }; 478f0e9130SKonstantin Belousov 488f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 498f0e9130SKonstantin Belousov 508f0e9130SKonstantin Belousov static void 518f0e9130SKonstantin Belousov rangelock_sys_init(void) 528f0e9130SKonstantin Belousov { 538f0e9130SKonstantin Belousov 548f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 558f0e9130SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 568f0e9130SKonstantin Belousov } 578f0e9130SKonstantin Belousov SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 588f0e9130SKonstantin Belousov 598f0e9130SKonstantin Belousov static struct rl_q_entry * 608f0e9130SKonstantin Belousov rlqentry_alloc(void) 618f0e9130SKonstantin Belousov { 628f0e9130SKonstantin Belousov 638f0e9130SKonstantin Belousov return (uma_zalloc(rl_entry_zone, M_WAITOK)); 648f0e9130SKonstantin Belousov } 658f0e9130SKonstantin Belousov 668f0e9130SKonstantin Belousov void 678f0e9130SKonstantin Belousov rlqentry_free(struct rl_q_entry *rleq) 688f0e9130SKonstantin Belousov { 698f0e9130SKonstantin Belousov 708f0e9130SKonstantin Belousov uma_zfree(rl_entry_zone, rleq); 718f0e9130SKonstantin Belousov } 728f0e9130SKonstantin Belousov 738f0e9130SKonstantin Belousov void 748f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 758f0e9130SKonstantin Belousov { 768f0e9130SKonstantin Belousov 778f0e9130SKonstantin Belousov TAILQ_INIT(&lock->rl_waiters); 788f0e9130SKonstantin Belousov lock->rl_currdep = NULL; 798f0e9130SKonstantin Belousov } 808f0e9130SKonstantin Belousov 818f0e9130SKonstantin Belousov void 828f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 838f0e9130SKonstantin Belousov { 848f0e9130SKonstantin Belousov 858f0e9130SKonstantin Belousov KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters")); 868f0e9130SKonstantin Belousov } 878f0e9130SKonstantin Belousov 888f0e9130SKonstantin Belousov /* 898f0e9130SKonstantin Belousov * Two entries are compatible if their ranges do not overlap, or both 908f0e9130SKonstantin Belousov * entries are for read. 918f0e9130SKonstantin Belousov */ 928f0e9130SKonstantin Belousov static int 932bb93f2dSColin Percival ranges_overlap(const struct rl_q_entry *e1, 948f0e9130SKonstantin Belousov const struct rl_q_entry *e2) 958f0e9130SKonstantin Belousov { 968f0e9130SKonstantin Belousov 978f0e9130SKonstantin Belousov if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start) 988f0e9130SKonstantin Belousov return (1); 998f0e9130SKonstantin Belousov return (0); 1008f0e9130SKonstantin Belousov } 1018f0e9130SKonstantin Belousov 1028f0e9130SKonstantin Belousov /* 1038f0e9130SKonstantin Belousov * Recalculate the lock->rl_currdep after an unlock. 1048f0e9130SKonstantin Belousov */ 1058f0e9130SKonstantin Belousov static void 1068f0e9130SKonstantin Belousov rangelock_calc_block(struct rangelock *lock) 1078f0e9130SKonstantin Belousov { 1082bb93f2dSColin Percival struct rl_q_entry *entry, *nextentry, *entry1; 1098f0e9130SKonstantin Belousov 1102bb93f2dSColin Percival for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) { 1112bb93f2dSColin Percival nextentry = TAILQ_NEXT(entry, rl_q_link); 1122bb93f2dSColin Percival if (entry->rl_q_flags & RL_LOCK_READ) { 1132bb93f2dSColin Percival /* Reads must not overlap with granted writes. */ 1142bb93f2dSColin Percival for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 1152bb93f2dSColin Percival !(entry1->rl_q_flags & RL_LOCK_READ); 1162bb93f2dSColin Percival entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 1172bb93f2dSColin Percival if (ranges_overlap(entry, entry1)) 1188f0e9130SKonstantin Belousov goto out; 1198f0e9130SKonstantin Belousov } 1202bb93f2dSColin Percival } else { 1212bb93f2dSColin Percival /* Write must not overlap with any granted locks. */ 1222bb93f2dSColin Percival for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 1232bb93f2dSColin Percival entry1 != entry; 1242bb93f2dSColin Percival entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 1252bb93f2dSColin Percival if (ranges_overlap(entry, entry1)) 1262bb93f2dSColin Percival goto out; 1272bb93f2dSColin Percival } 1282bb93f2dSColin Percival 1292bb93f2dSColin Percival /* Move grantable write locks to the front. */ 1302bb93f2dSColin Percival TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 1312bb93f2dSColin Percival TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link); 1322bb93f2dSColin Percival } 1332bb93f2dSColin Percival 1342bb93f2dSColin Percival /* Grant this lock. */ 1352bb93f2dSColin Percival entry->rl_q_flags |= RL_LOCK_GRANTED; 1362bb93f2dSColin Percival wakeup(entry); 1378f0e9130SKonstantin Belousov } 1388f0e9130SKonstantin Belousov out: 1398f0e9130SKonstantin Belousov lock->rl_currdep = entry; 1408f0e9130SKonstantin Belousov } 1418f0e9130SKonstantin Belousov 1428f0e9130SKonstantin Belousov static void 1438f0e9130SKonstantin Belousov rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, 144*e3680954SRick Macklem struct mtx *ilk, bool do_calc_block) 1458f0e9130SKonstantin Belousov { 1468f0e9130SKonstantin Belousov 1478f0e9130SKonstantin Belousov MPASS(lock != NULL && entry != NULL && ilk != NULL); 1488f0e9130SKonstantin Belousov mtx_assert(ilk, MA_OWNED); 149*e3680954SRick Macklem 150*e3680954SRick Macklem if (!do_calc_block) { 151*e3680954SRick Macklem /* 152*e3680954SRick Macklem * This is the case where rangelock_enqueue() has been called 153*e3680954SRick Macklem * with trylock == true and just inserted this entry in the 154*e3680954SRick Macklem * queue. 155*e3680954SRick Macklem * If rl_currdep is this entry, rl_currdep needs to 156*e3680954SRick Macklem * be set to the next entry in the rl_waiters list. 157*e3680954SRick Macklem * However, since this entry is the last entry in the 158*e3680954SRick Macklem * list, the next entry is NULL. 159*e3680954SRick Macklem */ 160*e3680954SRick Macklem if (lock->rl_currdep == entry) { 161*e3680954SRick Macklem KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL, 162*e3680954SRick Macklem ("rangelock_enqueue: next entry not NULL")); 163*e3680954SRick Macklem lock->rl_currdep = NULL; 164*e3680954SRick Macklem } 165*e3680954SRick Macklem } else 1668f0e9130SKonstantin Belousov KASSERT(entry != lock->rl_currdep, ("stuck currdep")); 1678f0e9130SKonstantin Belousov 1688f0e9130SKonstantin Belousov TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 169*e3680954SRick Macklem if (do_calc_block) 1708f0e9130SKonstantin Belousov rangelock_calc_block(lock); 1718f0e9130SKonstantin Belousov mtx_unlock(ilk); 1728f0e9130SKonstantin Belousov if (curthread->td_rlqe == NULL) 1738f0e9130SKonstantin Belousov curthread->td_rlqe = entry; 1748f0e9130SKonstantin Belousov else 1758f0e9130SKonstantin Belousov rlqentry_free(entry); 1768f0e9130SKonstantin Belousov } 1778f0e9130SKonstantin Belousov 1788f0e9130SKonstantin Belousov void 1798f0e9130SKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) 1808f0e9130SKonstantin Belousov { 1818f0e9130SKonstantin Belousov 1828f0e9130SKonstantin Belousov MPASS(lock != NULL && cookie != NULL && ilk != NULL); 1838f0e9130SKonstantin Belousov 1848f0e9130SKonstantin Belousov mtx_lock(ilk); 185*e3680954SRick Macklem rangelock_unlock_locked(lock, cookie, ilk, true); 1868f0e9130SKonstantin Belousov } 1878f0e9130SKonstantin Belousov 1888f0e9130SKonstantin Belousov /* 1898f0e9130SKonstantin Belousov * Unlock the sub-range of granted lock. 1908f0e9130SKonstantin Belousov */ 1918f0e9130SKonstantin Belousov void * 1928f0e9130SKonstantin Belousov rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, 1938f0e9130SKonstantin Belousov off_t end, struct mtx *ilk) 1948f0e9130SKonstantin Belousov { 1958f0e9130SKonstantin Belousov struct rl_q_entry *entry; 1968f0e9130SKonstantin Belousov 1978f0e9130SKonstantin Belousov MPASS(lock != NULL && cookie != NULL && ilk != NULL); 1988f0e9130SKonstantin Belousov entry = cookie; 1998f0e9130SKonstantin Belousov KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, 2008f0e9130SKonstantin Belousov ("Unlocking non-granted lock")); 2018f0e9130SKonstantin Belousov KASSERT(entry->rl_q_start == start, ("wrong start")); 2028f0e9130SKonstantin Belousov KASSERT(entry->rl_q_end >= end, ("wrong end")); 2038f0e9130SKonstantin Belousov 2048f0e9130SKonstantin Belousov mtx_lock(ilk); 2058f0e9130SKonstantin Belousov if (entry->rl_q_end == end) { 206*e3680954SRick Macklem rangelock_unlock_locked(lock, cookie, ilk, true); 2078f0e9130SKonstantin Belousov return (NULL); 2088f0e9130SKonstantin Belousov } 2098f0e9130SKonstantin Belousov entry->rl_q_end = end; 2108f0e9130SKonstantin Belousov rangelock_calc_block(lock); 2118f0e9130SKonstantin Belousov mtx_unlock(ilk); 2128f0e9130SKonstantin Belousov return (cookie); 2138f0e9130SKonstantin Belousov } 2148f0e9130SKonstantin Belousov 2158f0e9130SKonstantin Belousov /* 2168f0e9130SKonstantin Belousov * Add the lock request to the queue of the pending requests for 217*e3680954SRick Macklem * rangelock. Sleep until the request can be granted unless trylock == true. 2188f0e9130SKonstantin Belousov */ 2198f0e9130SKonstantin Belousov static void * 2208f0e9130SKonstantin Belousov rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, 221*e3680954SRick Macklem struct mtx *ilk, bool trylock) 2228f0e9130SKonstantin Belousov { 2238f0e9130SKonstantin Belousov struct rl_q_entry *entry; 2248f0e9130SKonstantin Belousov struct thread *td; 2258f0e9130SKonstantin Belousov 2268f0e9130SKonstantin Belousov MPASS(lock != NULL && ilk != NULL); 2278f0e9130SKonstantin Belousov 2288f0e9130SKonstantin Belousov td = curthread; 2298f0e9130SKonstantin Belousov if (td->td_rlqe != NULL) { 2308f0e9130SKonstantin Belousov entry = td->td_rlqe; 2318f0e9130SKonstantin Belousov td->td_rlqe = NULL; 2328f0e9130SKonstantin Belousov } else 2338f0e9130SKonstantin Belousov entry = rlqentry_alloc(); 2348f0e9130SKonstantin Belousov MPASS(entry != NULL); 2358f0e9130SKonstantin Belousov entry->rl_q_flags = mode; 2368f0e9130SKonstantin Belousov entry->rl_q_start = start; 2378f0e9130SKonstantin Belousov entry->rl_q_end = end; 2388f0e9130SKonstantin Belousov 2398f0e9130SKonstantin Belousov mtx_lock(ilk); 2408f0e9130SKonstantin Belousov /* 2418f0e9130SKonstantin Belousov * XXXKIB TODO. Check that a thread does not try to enqueue a 2428f0e9130SKonstantin Belousov * lock that is incompatible with another request from the same 2438f0e9130SKonstantin Belousov * thread. 2448f0e9130SKonstantin Belousov */ 2458f0e9130SKonstantin Belousov 2468f0e9130SKonstantin Belousov TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); 247*e3680954SRick Macklem /* 248*e3680954SRick Macklem * If rl_currdep == NULL, there is no entry waiting for a conflicting 249*e3680954SRick Macklem * range to be resolved, so set rl_currdep to this entry. If there is 250*e3680954SRick Macklem * no conflicting entry for this entry, rl_currdep will be set back to 251*e3680954SRick Macklem * NULL by rangelock_calc_block(). 252*e3680954SRick Macklem */ 2538f0e9130SKonstantin Belousov if (lock->rl_currdep == NULL) 2548f0e9130SKonstantin Belousov lock->rl_currdep = entry; 2558f0e9130SKonstantin Belousov rangelock_calc_block(lock); 256*e3680954SRick Macklem while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) { 257*e3680954SRick Macklem if (trylock) { 258*e3680954SRick Macklem /* 259*e3680954SRick Macklem * For this case, the range is not actually locked 260*e3680954SRick Macklem * yet, but removal from the list requires the same 261*e3680954SRick Macklem * steps, except for not doing a rangelock_calc_block() 262*e3680954SRick Macklem * call, since rangelock_calc_block() was called above. 263*e3680954SRick Macklem */ 264*e3680954SRick Macklem rangelock_unlock_locked(lock, entry, ilk, false); 265*e3680954SRick Macklem return (NULL); 266*e3680954SRick Macklem } 2678f0e9130SKonstantin Belousov msleep(entry, ilk, 0, "range", 0); 268*e3680954SRick Macklem } 2698f0e9130SKonstantin Belousov mtx_unlock(ilk); 2708f0e9130SKonstantin Belousov return (entry); 2718f0e9130SKonstantin Belousov } 2728f0e9130SKonstantin Belousov 2738f0e9130SKonstantin Belousov void * 2748f0e9130SKonstantin Belousov rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 2758f0e9130SKonstantin Belousov { 2768f0e9130SKonstantin Belousov 277*e3680954SRick Macklem return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false)); 278*e3680954SRick Macklem } 279*e3680954SRick Macklem 280*e3680954SRick Macklem void * 281*e3680954SRick Macklem rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end, 282*e3680954SRick Macklem struct mtx *ilk) 283*e3680954SRick Macklem { 284*e3680954SRick Macklem 285*e3680954SRick Macklem return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true)); 2868f0e9130SKonstantin Belousov } 2878f0e9130SKonstantin Belousov 2888f0e9130SKonstantin Belousov void * 2898f0e9130SKonstantin Belousov rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 2908f0e9130SKonstantin Belousov { 2918f0e9130SKonstantin Belousov 292*e3680954SRick Macklem return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false)); 293*e3680954SRick Macklem } 294*e3680954SRick Macklem 295*e3680954SRick Macklem void * 296*e3680954SRick Macklem rangelock_trywlock(struct rangelock *lock, off_t start, off_t end, 297*e3680954SRick Macklem struct mtx *ilk) 298*e3680954SRick Macklem { 299*e3680954SRick Macklem 300*e3680954SRick Macklem return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true)); 3018f0e9130SKonstantin Belousov } 302