1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/proc.h> 37 #include <sys/rangelock.h> 38 #include <sys/systm.h> 39 40 #include <vm/uma.h> 41 42 struct rl_q_entry { 43 TAILQ_ENTRY(rl_q_entry) rl_q_link; 44 off_t rl_q_start, rl_q_end; 45 int rl_q_flags; 46 }; 47 48 static uma_zone_t rl_entry_zone; 49 50 static void 51 rangelock_sys_init(void) 52 { 53 54 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 55 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 56 } 57 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 58 59 static struct rl_q_entry * 60 rlqentry_alloc(void) 61 { 62 63 return (uma_zalloc(rl_entry_zone, M_WAITOK)); 64 } 65 66 void 67 rlqentry_free(struct rl_q_entry *rleq) 68 { 69 70 uma_zfree(rl_entry_zone, rleq); 71 } 72 73 void 74 rangelock_init(struct rangelock *lock) 75 { 76 77 TAILQ_INIT(&lock->rl_waiters); 78 lock->rl_currdep = NULL; 79 } 80 81 void 82 rangelock_destroy(struct rangelock *lock) 83 { 84 85 KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters")); 86 } 87 88 /* 89 * Two entries are compatible if their ranges do not overlap, or both 90 * entries are for read. 91 */ 92 static int 93 ranges_overlap(const struct rl_q_entry *e1, 94 const struct rl_q_entry *e2) 95 { 96 97 if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start) 98 return (1); 99 return (0); 100 } 101 102 /* 103 * Recalculate the lock->rl_currdep after an unlock. 104 */ 105 static void 106 rangelock_calc_block(struct rangelock *lock) 107 { 108 struct rl_q_entry *entry, *nextentry, *entry1; 109 110 for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) { 111 nextentry = TAILQ_NEXT(entry, rl_q_link); 112 if (entry->rl_q_flags & RL_LOCK_READ) { 113 /* Reads must not overlap with granted writes. */ 114 for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 115 !(entry1->rl_q_flags & RL_LOCK_READ); 116 entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 117 if (ranges_overlap(entry, entry1)) 118 goto out; 119 } 120 } else { 121 /* Write must not overlap with any granted locks. */ 122 for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 123 entry1 != entry; 124 entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 125 if (ranges_overlap(entry, entry1)) 126 goto out; 127 } 128 129 /* Move grantable write locks to the front. */ 130 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 131 TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link); 132 } 133 134 /* Grant this lock. */ 135 entry->rl_q_flags |= RL_LOCK_GRANTED; 136 wakeup(entry); 137 } 138 out: 139 lock->rl_currdep = entry; 140 } 141 142 static void 143 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, 144 struct mtx *ilk, bool do_calc_block) 145 { 146 147 MPASS(lock != NULL && entry != NULL && ilk != NULL); 148 mtx_assert(ilk, MA_OWNED); 149 150 if (!do_calc_block) { 151 /* 152 * This is the case where rangelock_enqueue() has been called 153 * with trylock == true and just inserted this entry in the 154 * queue. 155 * If rl_currdep is this entry, rl_currdep needs to 156 * be set to the next entry in the rl_waiters list. 157 * However, since this entry is the last entry in the 158 * list, the next entry is NULL. 159 */ 160 if (lock->rl_currdep == entry) { 161 KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL, 162 ("rangelock_enqueue: next entry not NULL")); 163 lock->rl_currdep = NULL; 164 } 165 } else 166 KASSERT(entry != lock->rl_currdep, ("stuck currdep")); 167 168 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 169 if (do_calc_block) 170 rangelock_calc_block(lock); 171 mtx_unlock(ilk); 172 if (curthread->td_rlqe == NULL) 173 curthread->td_rlqe = entry; 174 else 175 rlqentry_free(entry); 176 } 177 178 void 179 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) 180 { 181 182 MPASS(lock != NULL && cookie != NULL && ilk != NULL); 183 184 mtx_lock(ilk); 185 rangelock_unlock_locked(lock, cookie, ilk, true); 186 } 187 188 /* 189 * Unlock the sub-range of granted lock. 190 */ 191 void * 192 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, 193 off_t end, struct mtx *ilk) 194 { 195 struct rl_q_entry *entry; 196 197 MPASS(lock != NULL && cookie != NULL && ilk != NULL); 198 entry = cookie; 199 KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, 200 ("Unlocking non-granted lock")); 201 KASSERT(entry->rl_q_start == start, ("wrong start")); 202 KASSERT(entry->rl_q_end >= end, ("wrong end")); 203 204 mtx_lock(ilk); 205 if (entry->rl_q_end == end) { 206 rangelock_unlock_locked(lock, cookie, ilk, true); 207 return (NULL); 208 } 209 entry->rl_q_end = end; 210 rangelock_calc_block(lock); 211 mtx_unlock(ilk); 212 return (cookie); 213 } 214 215 /* 216 * Add the lock request to the queue of the pending requests for 217 * rangelock. Sleep until the request can be granted unless trylock == true. 218 */ 219 static void * 220 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, 221 struct mtx *ilk, bool trylock) 222 { 223 struct rl_q_entry *entry; 224 struct thread *td; 225 226 MPASS(lock != NULL && ilk != NULL); 227 228 td = curthread; 229 if (td->td_rlqe != NULL) { 230 entry = td->td_rlqe; 231 td->td_rlqe = NULL; 232 } else 233 entry = rlqentry_alloc(); 234 MPASS(entry != NULL); 235 entry->rl_q_flags = mode; 236 entry->rl_q_start = start; 237 entry->rl_q_end = end; 238 239 mtx_lock(ilk); 240 /* 241 * XXXKIB TODO. Check that a thread does not try to enqueue a 242 * lock that is incompatible with another request from the same 243 * thread. 244 */ 245 246 TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); 247 /* 248 * If rl_currdep == NULL, there is no entry waiting for a conflicting 249 * range to be resolved, so set rl_currdep to this entry. If there is 250 * no conflicting entry for this entry, rl_currdep will be set back to 251 * NULL by rangelock_calc_block(). 252 */ 253 if (lock->rl_currdep == NULL) 254 lock->rl_currdep = entry; 255 rangelock_calc_block(lock); 256 while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) { 257 if (trylock) { 258 /* 259 * For this case, the range is not actually locked 260 * yet, but removal from the list requires the same 261 * steps, except for not doing a rangelock_calc_block() 262 * call, since rangelock_calc_block() was called above. 263 */ 264 rangelock_unlock_locked(lock, entry, ilk, false); 265 return (NULL); 266 } 267 msleep(entry, ilk, 0, "range", 0); 268 } 269 mtx_unlock(ilk); 270 return (entry); 271 } 272 273 void * 274 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 275 { 276 277 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false)); 278 } 279 280 void * 281 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end, 282 struct mtx *ilk) 283 { 284 285 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true)); 286 } 287 288 void * 289 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 290 { 291 292 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false)); 293 } 294 295 void * 296 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end, 297 struct mtx *ilk) 298 { 299 300 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true)); 301 } 302 303 #ifdef INVARIANT_SUPPORT 304 void 305 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 306 { 307 struct rl_q_entry *entry; 308 int flags; 309 310 MPASS(cookie != NULL); 311 entry = cookie; 312 flags = entry->rl_q_flags; 313 switch (what) { 314 case RCA_LOCKED: 315 if ((flags & RL_LOCK_GRANTED) == 0) 316 panic("rangelock not held @ %s:%d\n", file, line); 317 break; 318 case RCA_RLOCKED: 319 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) != 320 (RL_LOCK_GRANTED | RL_LOCK_READ)) 321 panic("rangelock not rlocked @ %s:%d\n", file, line); 322 break; 323 case RCA_WLOCKED: 324 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) != 325 (RL_LOCK_GRANTED | RL_LOCK_WRITE)) 326 panic("rangelock not wlocked @ %s:%d\n", file, line); 327 break; 328 default: 329 panic("Unknown rangelock assertion: %d @ %s:%d", what, file, 330 line); 331 } 332 } 333 #endif /* INVARIANT_SUPPORT */ 334