1*8f0e9130SKonstantin Belousov /*- 2*8f0e9130SKonstantin Belousov * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 3*8f0e9130SKonstantin Belousov * All rights reserved. 4*8f0e9130SKonstantin Belousov * 5*8f0e9130SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 6*8f0e9130SKonstantin Belousov * modification, are permitted provided that the following conditions 7*8f0e9130SKonstantin Belousov * are met: 8*8f0e9130SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 9*8f0e9130SKonstantin Belousov * notice unmodified, this list of conditions, and the following 10*8f0e9130SKonstantin Belousov * disclaimer. 11*8f0e9130SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 12*8f0e9130SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 13*8f0e9130SKonstantin Belousov * documentation and/or other materials provided with the distribution. 14*8f0e9130SKonstantin Belousov * 15*8f0e9130SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16*8f0e9130SKonstantin Belousov * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17*8f0e9130SKonstantin Belousov * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18*8f0e9130SKonstantin Belousov * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19*8f0e9130SKonstantin Belousov * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20*8f0e9130SKonstantin Belousov * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21*8f0e9130SKonstantin Belousov * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22*8f0e9130SKonstantin Belousov * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*8f0e9130SKonstantin Belousov * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24*8f0e9130SKonstantin Belousov * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*8f0e9130SKonstantin Belousov */ 26*8f0e9130SKonstantin Belousov 27*8f0e9130SKonstantin Belousov #include <sys/cdefs.h> 28*8f0e9130SKonstantin Belousov __FBSDID("$FreeBSD$"); 29*8f0e9130SKonstantin Belousov 30*8f0e9130SKonstantin Belousov #include <sys/param.h> 31*8f0e9130SKonstantin Belousov #include <sys/kernel.h> 32*8f0e9130SKonstantin Belousov #include <sys/lock.h> 33*8f0e9130SKonstantin Belousov #include <sys/mutex.h> 34*8f0e9130SKonstantin Belousov #include <sys/proc.h> 35*8f0e9130SKonstantin Belousov #include <sys/rangelock.h> 36*8f0e9130SKonstantin Belousov #include <sys/systm.h> 37*8f0e9130SKonstantin Belousov 38*8f0e9130SKonstantin Belousov #include <vm/uma.h> 39*8f0e9130SKonstantin Belousov 40*8f0e9130SKonstantin Belousov struct rl_q_entry { 41*8f0e9130SKonstantin Belousov TAILQ_ENTRY(rl_q_entry) rl_q_link; 42*8f0e9130SKonstantin Belousov off_t rl_q_start, rl_q_end; 43*8f0e9130SKonstantin Belousov int rl_q_flags; 44*8f0e9130SKonstantin Belousov }; 45*8f0e9130SKonstantin Belousov 46*8f0e9130SKonstantin Belousov static uma_zone_t rl_entry_zone; 47*8f0e9130SKonstantin Belousov 48*8f0e9130SKonstantin Belousov static void 49*8f0e9130SKonstantin Belousov rangelock_sys_init(void) 50*8f0e9130SKonstantin Belousov { 51*8f0e9130SKonstantin Belousov 52*8f0e9130SKonstantin Belousov rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 53*8f0e9130SKonstantin Belousov NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 54*8f0e9130SKonstantin Belousov } 55*8f0e9130SKonstantin Belousov SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 56*8f0e9130SKonstantin Belousov 57*8f0e9130SKonstantin Belousov static struct rl_q_entry * 58*8f0e9130SKonstantin Belousov rlqentry_alloc(void) 59*8f0e9130SKonstantin Belousov { 60*8f0e9130SKonstantin Belousov 61*8f0e9130SKonstantin Belousov return (uma_zalloc(rl_entry_zone, M_WAITOK)); 62*8f0e9130SKonstantin Belousov } 63*8f0e9130SKonstantin Belousov 64*8f0e9130SKonstantin Belousov void 65*8f0e9130SKonstantin Belousov rlqentry_free(struct rl_q_entry *rleq) 66*8f0e9130SKonstantin Belousov { 67*8f0e9130SKonstantin Belousov 68*8f0e9130SKonstantin Belousov uma_zfree(rl_entry_zone, rleq); 69*8f0e9130SKonstantin Belousov } 70*8f0e9130SKonstantin Belousov 71*8f0e9130SKonstantin Belousov void 72*8f0e9130SKonstantin Belousov rangelock_init(struct rangelock *lock) 73*8f0e9130SKonstantin Belousov { 74*8f0e9130SKonstantin Belousov 75*8f0e9130SKonstantin Belousov TAILQ_INIT(&lock->rl_waiters); 76*8f0e9130SKonstantin Belousov lock->rl_currdep = NULL; 77*8f0e9130SKonstantin Belousov } 78*8f0e9130SKonstantin Belousov 79*8f0e9130SKonstantin Belousov void 80*8f0e9130SKonstantin Belousov rangelock_destroy(struct rangelock *lock) 81*8f0e9130SKonstantin Belousov { 82*8f0e9130SKonstantin Belousov 83*8f0e9130SKonstantin Belousov KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters")); 84*8f0e9130SKonstantin Belousov } 85*8f0e9130SKonstantin Belousov 86*8f0e9130SKonstantin Belousov /* 87*8f0e9130SKonstantin Belousov * Verifies the supplied rl_q_entries for compatibility. Returns true 88*8f0e9130SKonstantin Belousov * if the rangelock queue entries are not compatible, false if they are. 89*8f0e9130SKonstantin Belousov * 90*8f0e9130SKonstantin Belousov * Two entries are compatible if their ranges do not overlap, or both 91*8f0e9130SKonstantin Belousov * entries are for read. 92*8f0e9130SKonstantin Belousov */ 93*8f0e9130SKonstantin Belousov static int 94*8f0e9130SKonstantin Belousov rangelock_incompatible(const struct rl_q_entry *e1, 95*8f0e9130SKonstantin Belousov const struct rl_q_entry *e2) 96*8f0e9130SKonstantin Belousov { 97*8f0e9130SKonstantin Belousov 98*8f0e9130SKonstantin Belousov if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ && 99*8f0e9130SKonstantin Belousov (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ) 100*8f0e9130SKonstantin Belousov return (0); 101*8f0e9130SKonstantin Belousov if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start) 102*8f0e9130SKonstantin Belousov return (1); 103*8f0e9130SKonstantin Belousov return (0); 104*8f0e9130SKonstantin Belousov } 105*8f0e9130SKonstantin Belousov 106*8f0e9130SKonstantin Belousov /* 107*8f0e9130SKonstantin Belousov * Recalculate the lock->rl_currdep after an unlock. 108*8f0e9130SKonstantin Belousov */ 109*8f0e9130SKonstantin Belousov static void 110*8f0e9130SKonstantin Belousov rangelock_calc_block(struct rangelock *lock) 111*8f0e9130SKonstantin Belousov { 112*8f0e9130SKonstantin Belousov struct rl_q_entry *entry, *entry1, *whead; 113*8f0e9130SKonstantin Belousov 114*8f0e9130SKonstantin Belousov if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) && 115*8f0e9130SKonstantin Belousov lock->rl_currdep != NULL) 116*8f0e9130SKonstantin Belousov lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link); 117*8f0e9130SKonstantin Belousov for (entry = lock->rl_currdep; entry != NULL; 118*8f0e9130SKonstantin Belousov entry = TAILQ_NEXT(entry, rl_q_link)) { 119*8f0e9130SKonstantin Belousov TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) { 120*8f0e9130SKonstantin Belousov if (rangelock_incompatible(entry, entry1)) 121*8f0e9130SKonstantin Belousov goto out; 122*8f0e9130SKonstantin Belousov if (entry1 == entry) 123*8f0e9130SKonstantin Belousov break; 124*8f0e9130SKonstantin Belousov } 125*8f0e9130SKonstantin Belousov } 126*8f0e9130SKonstantin Belousov out: 127*8f0e9130SKonstantin Belousov lock->rl_currdep = entry; 128*8f0e9130SKonstantin Belousov TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) { 129*8f0e9130SKonstantin Belousov if (whead == lock->rl_currdep) 130*8f0e9130SKonstantin Belousov break; 131*8f0e9130SKonstantin Belousov if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) { 132*8f0e9130SKonstantin Belousov whead->rl_q_flags |= RL_LOCK_GRANTED; 133*8f0e9130SKonstantin Belousov wakeup(whead); 134*8f0e9130SKonstantin Belousov } 135*8f0e9130SKonstantin Belousov } 136*8f0e9130SKonstantin Belousov } 137*8f0e9130SKonstantin Belousov 138*8f0e9130SKonstantin Belousov static void 139*8f0e9130SKonstantin Belousov rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, 140*8f0e9130SKonstantin Belousov struct mtx *ilk) 141*8f0e9130SKonstantin Belousov { 142*8f0e9130SKonstantin Belousov 143*8f0e9130SKonstantin Belousov MPASS(lock != NULL && entry != NULL && ilk != NULL); 144*8f0e9130SKonstantin Belousov mtx_assert(ilk, MA_OWNED); 145*8f0e9130SKonstantin Belousov KASSERT(entry != lock->rl_currdep, ("stuck currdep")); 146*8f0e9130SKonstantin Belousov 147*8f0e9130SKonstantin Belousov TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 148*8f0e9130SKonstantin Belousov rangelock_calc_block(lock); 149*8f0e9130SKonstantin Belousov mtx_unlock(ilk); 150*8f0e9130SKonstantin Belousov if (curthread->td_rlqe == NULL) 151*8f0e9130SKonstantin Belousov curthread->td_rlqe = entry; 152*8f0e9130SKonstantin Belousov else 153*8f0e9130SKonstantin Belousov rlqentry_free(entry); 154*8f0e9130SKonstantin Belousov } 155*8f0e9130SKonstantin Belousov 156*8f0e9130SKonstantin Belousov void 157*8f0e9130SKonstantin Belousov rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) 158*8f0e9130SKonstantin Belousov { 159*8f0e9130SKonstantin Belousov 160*8f0e9130SKonstantin Belousov MPASS(lock != NULL && cookie != NULL && ilk != NULL); 161*8f0e9130SKonstantin Belousov 162*8f0e9130SKonstantin Belousov mtx_lock(ilk); 163*8f0e9130SKonstantin Belousov rangelock_unlock_locked(lock, cookie, ilk); 164*8f0e9130SKonstantin Belousov } 165*8f0e9130SKonstantin Belousov 166*8f0e9130SKonstantin Belousov /* 167*8f0e9130SKonstantin Belousov * Unlock the sub-range of granted lock. 168*8f0e9130SKonstantin Belousov */ 169*8f0e9130SKonstantin Belousov void * 170*8f0e9130SKonstantin Belousov rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, 171*8f0e9130SKonstantin Belousov off_t end, struct mtx *ilk) 172*8f0e9130SKonstantin Belousov { 173*8f0e9130SKonstantin Belousov struct rl_q_entry *entry; 174*8f0e9130SKonstantin Belousov 175*8f0e9130SKonstantin Belousov MPASS(lock != NULL && cookie != NULL && ilk != NULL); 176*8f0e9130SKonstantin Belousov entry = cookie; 177*8f0e9130SKonstantin Belousov KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, 178*8f0e9130SKonstantin Belousov ("Unlocking non-granted lock")); 179*8f0e9130SKonstantin Belousov KASSERT(entry->rl_q_start == start, ("wrong start")); 180*8f0e9130SKonstantin Belousov KASSERT(entry->rl_q_end >= end, ("wrong end")); 181*8f0e9130SKonstantin Belousov 182*8f0e9130SKonstantin Belousov mtx_lock(ilk); 183*8f0e9130SKonstantin Belousov if (entry->rl_q_end == end) { 184*8f0e9130SKonstantin Belousov rangelock_unlock_locked(lock, cookie, ilk); 185*8f0e9130SKonstantin Belousov return (NULL); 186*8f0e9130SKonstantin Belousov } 187*8f0e9130SKonstantin Belousov entry->rl_q_end = end; 188*8f0e9130SKonstantin Belousov rangelock_calc_block(lock); 189*8f0e9130SKonstantin Belousov mtx_unlock(ilk); 190*8f0e9130SKonstantin Belousov return (cookie); 191*8f0e9130SKonstantin Belousov } 192*8f0e9130SKonstantin Belousov 193*8f0e9130SKonstantin Belousov /* 194*8f0e9130SKonstantin Belousov * Add the lock request to the queue of the pending requests for 195*8f0e9130SKonstantin Belousov * rangelock. Sleep until the request can be granted. 196*8f0e9130SKonstantin Belousov */ 197*8f0e9130SKonstantin Belousov static void * 198*8f0e9130SKonstantin Belousov rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, 199*8f0e9130SKonstantin Belousov struct mtx *ilk) 200*8f0e9130SKonstantin Belousov { 201*8f0e9130SKonstantin Belousov struct rl_q_entry *entry; 202*8f0e9130SKonstantin Belousov struct thread *td; 203*8f0e9130SKonstantin Belousov 204*8f0e9130SKonstantin Belousov MPASS(lock != NULL && ilk != NULL); 205*8f0e9130SKonstantin Belousov 206*8f0e9130SKonstantin Belousov td = curthread; 207*8f0e9130SKonstantin Belousov if (td->td_rlqe != NULL) { 208*8f0e9130SKonstantin Belousov entry = td->td_rlqe; 209*8f0e9130SKonstantin Belousov td->td_rlqe = NULL; 210*8f0e9130SKonstantin Belousov } else 211*8f0e9130SKonstantin Belousov entry = rlqentry_alloc(); 212*8f0e9130SKonstantin Belousov MPASS(entry != NULL); 213*8f0e9130SKonstantin Belousov entry->rl_q_flags = mode; 214*8f0e9130SKonstantin Belousov entry->rl_q_start = start; 215*8f0e9130SKonstantin Belousov entry->rl_q_end = end; 216*8f0e9130SKonstantin Belousov 217*8f0e9130SKonstantin Belousov mtx_lock(ilk); 218*8f0e9130SKonstantin Belousov /* 219*8f0e9130SKonstantin Belousov * XXXKIB TODO. Check that a thread does not try to enqueue a 220*8f0e9130SKonstantin Belousov * lock that is incompatible with another request from the same 221*8f0e9130SKonstantin Belousov * thread. 222*8f0e9130SKonstantin Belousov */ 223*8f0e9130SKonstantin Belousov 224*8f0e9130SKonstantin Belousov TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); 225*8f0e9130SKonstantin Belousov if (lock->rl_currdep == NULL) 226*8f0e9130SKonstantin Belousov lock->rl_currdep = entry; 227*8f0e9130SKonstantin Belousov rangelock_calc_block(lock); 228*8f0e9130SKonstantin Belousov while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) 229*8f0e9130SKonstantin Belousov msleep(entry, ilk, 0, "range", 0); 230*8f0e9130SKonstantin Belousov mtx_unlock(ilk); 231*8f0e9130SKonstantin Belousov return (entry); 232*8f0e9130SKonstantin Belousov } 233*8f0e9130SKonstantin Belousov 234*8f0e9130SKonstantin Belousov void * 235*8f0e9130SKonstantin Belousov rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 236*8f0e9130SKonstantin Belousov { 237*8f0e9130SKonstantin Belousov 238*8f0e9130SKonstantin Belousov return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk)); 239*8f0e9130SKonstantin Belousov } 240*8f0e9130SKonstantin Belousov 241*8f0e9130SKonstantin Belousov void * 242*8f0e9130SKonstantin Belousov rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 243*8f0e9130SKonstantin Belousov { 244*8f0e9130SKonstantin Belousov 245*8f0e9130SKonstantin Belousov return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk)); 246*8f0e9130SKonstantin Belousov } 247