1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/kernel.h> 31 #include <sys/lock.h> 32 #include <sys/mutex.h> 33 #include <sys/proc.h> 34 #include <sys/rangelock.h> 35 #include <sys/systm.h> 36 37 #include <vm/uma.h> 38 39 struct rl_q_entry { 40 TAILQ_ENTRY(rl_q_entry) rl_q_link; 41 off_t rl_q_start, rl_q_end; 42 int rl_q_flags; 43 }; 44 45 static uma_zone_t rl_entry_zone; 46 47 static void 48 rangelock_sys_init(void) 49 { 50 51 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 52 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 53 } 54 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 55 56 static struct rl_q_entry * 57 rlqentry_alloc(void) 58 { 59 60 return (uma_zalloc(rl_entry_zone, M_WAITOK)); 61 } 62 63 void 64 rlqentry_free(struct rl_q_entry *rleq) 65 { 66 67 uma_zfree(rl_entry_zone, rleq); 68 } 69 70 void 71 rangelock_init(struct rangelock *lock) 72 { 73 74 TAILQ_INIT(&lock->rl_waiters); 75 lock->rl_currdep = NULL; 76 } 77 78 void 79 rangelock_destroy(struct rangelock *lock) 80 { 81 82 KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters")); 83 } 84 85 /* 86 * Two entries are compatible if their ranges do not overlap, or both 87 * entries are for read. 88 */ 89 static int 90 ranges_overlap(const struct rl_q_entry *e1, 91 const struct rl_q_entry *e2) 92 { 93 94 if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start) 95 return (1); 96 return (0); 97 } 98 99 /* 100 * Recalculate the lock->rl_currdep after an unlock. 101 */ 102 static void 103 rangelock_calc_block(struct rangelock *lock) 104 { 105 struct rl_q_entry *entry, *nextentry, *entry1; 106 107 for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) { 108 nextentry = TAILQ_NEXT(entry, rl_q_link); 109 if (entry->rl_q_flags & RL_LOCK_READ) { 110 /* Reads must not overlap with granted writes. */ 111 for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 112 !(entry1->rl_q_flags & RL_LOCK_READ); 113 entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 114 if (ranges_overlap(entry, entry1)) 115 goto out; 116 } 117 } else { 118 /* Write must not overlap with any granted locks. */ 119 for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 120 entry1 != entry; 121 entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 122 if (ranges_overlap(entry, entry1)) 123 goto out; 124 } 125 126 /* Move grantable write locks to the front. */ 127 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 128 TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link); 129 } 130 131 /* Grant this lock. */ 132 entry->rl_q_flags |= RL_LOCK_GRANTED; 133 wakeup(entry); 134 } 135 out: 136 lock->rl_currdep = entry; 137 } 138 139 static void 140 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, 141 struct mtx *ilk, bool do_calc_block) 142 { 143 144 MPASS(lock != NULL && entry != NULL && ilk != NULL); 145 mtx_assert(ilk, MA_OWNED); 146 147 if (!do_calc_block) { 148 /* 149 * This is the case where rangelock_enqueue() has been called 150 * with trylock == true and just inserted this entry in the 151 * queue. 152 * If rl_currdep is this entry, rl_currdep needs to 153 * be set to the next entry in the rl_waiters list. 154 * However, since this entry is the last entry in the 155 * list, the next entry is NULL. 156 */ 157 if (lock->rl_currdep == entry) { 158 KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL, 159 ("rangelock_enqueue: next entry not NULL")); 160 lock->rl_currdep = NULL; 161 } 162 } else 163 KASSERT(entry != lock->rl_currdep, ("stuck currdep")); 164 165 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 166 if (do_calc_block) 167 rangelock_calc_block(lock); 168 mtx_unlock(ilk); 169 if (curthread->td_rlqe == NULL) 170 curthread->td_rlqe = entry; 171 else 172 rlqentry_free(entry); 173 } 174 175 void 176 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) 177 { 178 179 MPASS(lock != NULL && cookie != NULL && ilk != NULL); 180 181 mtx_lock(ilk); 182 rangelock_unlock_locked(lock, cookie, ilk, true); 183 } 184 185 /* 186 * Unlock the sub-range of granted lock. 187 */ 188 void * 189 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, 190 off_t end, struct mtx *ilk) 191 { 192 struct rl_q_entry *entry; 193 194 MPASS(lock != NULL && cookie != NULL && ilk != NULL); 195 entry = cookie; 196 KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, 197 ("Unlocking non-granted lock")); 198 KASSERT(entry->rl_q_start == start, ("wrong start")); 199 KASSERT(entry->rl_q_end >= end, ("wrong end")); 200 201 mtx_lock(ilk); 202 if (entry->rl_q_end == end) { 203 rangelock_unlock_locked(lock, cookie, ilk, true); 204 return (NULL); 205 } 206 entry->rl_q_end = end; 207 rangelock_calc_block(lock); 208 mtx_unlock(ilk); 209 return (cookie); 210 } 211 212 /* 213 * Add the lock request to the queue of the pending requests for 214 * rangelock. Sleep until the request can be granted unless trylock == true. 215 */ 216 static void * 217 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, 218 struct mtx *ilk, bool trylock) 219 { 220 struct rl_q_entry *entry; 221 struct thread *td; 222 223 MPASS(lock != NULL && ilk != NULL); 224 225 td = curthread; 226 if (td->td_rlqe != NULL) { 227 entry = td->td_rlqe; 228 td->td_rlqe = NULL; 229 } else 230 entry = rlqentry_alloc(); 231 MPASS(entry != NULL); 232 entry->rl_q_flags = mode; 233 entry->rl_q_start = start; 234 entry->rl_q_end = end; 235 236 mtx_lock(ilk); 237 /* 238 * XXXKIB TODO. Check that a thread does not try to enqueue a 239 * lock that is incompatible with another request from the same 240 * thread. 241 */ 242 243 TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); 244 /* 245 * If rl_currdep == NULL, there is no entry waiting for a conflicting 246 * range to be resolved, so set rl_currdep to this entry. If there is 247 * no conflicting entry for this entry, rl_currdep will be set back to 248 * NULL by rangelock_calc_block(). 249 */ 250 if (lock->rl_currdep == NULL) 251 lock->rl_currdep = entry; 252 rangelock_calc_block(lock); 253 while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) { 254 if (trylock) { 255 /* 256 * For this case, the range is not actually locked 257 * yet, but removal from the list requires the same 258 * steps, except for not doing a rangelock_calc_block() 259 * call, since rangelock_calc_block() was called above. 260 */ 261 rangelock_unlock_locked(lock, entry, ilk, false); 262 return (NULL); 263 } 264 msleep(entry, ilk, 0, "range", 0); 265 } 266 mtx_unlock(ilk); 267 return (entry); 268 } 269 270 void * 271 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 272 { 273 274 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false)); 275 } 276 277 void * 278 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end, 279 struct mtx *ilk) 280 { 281 282 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true)); 283 } 284 285 void * 286 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 287 { 288 289 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false)); 290 } 291 292 void * 293 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end, 294 struct mtx *ilk) 295 { 296 297 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true)); 298 } 299 300 #ifdef INVARIANT_SUPPORT 301 void 302 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 303 { 304 struct rl_q_entry *entry; 305 int flags; 306 307 MPASS(cookie != NULL); 308 entry = cookie; 309 flags = entry->rl_q_flags; 310 switch (what) { 311 case RCA_LOCKED: 312 if ((flags & RL_LOCK_GRANTED) == 0) 313 panic("rangelock not held @ %s:%d\n", file, line); 314 break; 315 case RCA_RLOCKED: 316 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) != 317 (RL_LOCK_GRANTED | RL_LOCK_READ)) 318 panic("rangelock not rlocked @ %s:%d\n", file, line); 319 break; 320 case RCA_WLOCKED: 321 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) != 322 (RL_LOCK_GRANTED | RL_LOCK_WRITE)) 323 panic("rangelock not wlocked @ %s:%d\n", file, line); 324 break; 325 default: 326 panic("Unknown rangelock assertion: %d @ %s:%d", what, file, 327 line); 328 } 329 } 330 #endif /* INVARIANT_SUPPORT */ 331