1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/lock.h> 33 #include <sys/mutex.h> 34 #include <sys/proc.h> 35 #include <sys/rangelock.h> 36 #include <sys/systm.h> 37 38 #include <vm/uma.h> 39 40 struct rl_q_entry { 41 TAILQ_ENTRY(rl_q_entry) rl_q_link; 42 off_t rl_q_start, rl_q_end; 43 int rl_q_flags; 44 }; 45 46 static uma_zone_t rl_entry_zone; 47 48 static void 49 rangelock_sys_init(void) 50 { 51 52 rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry), 53 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 54 } 55 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL); 56 57 static struct rl_q_entry * 58 rlqentry_alloc(void) 59 { 60 61 return (uma_zalloc(rl_entry_zone, M_WAITOK)); 62 } 63 64 void 65 rlqentry_free(struct rl_q_entry *rleq) 66 { 67 68 uma_zfree(rl_entry_zone, rleq); 69 } 70 71 void 72 rangelock_init(struct rangelock *lock) 73 { 74 75 TAILQ_INIT(&lock->rl_waiters); 76 lock->rl_currdep = NULL; 77 } 78 79 void 80 rangelock_destroy(struct rangelock *lock) 81 { 82 83 KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters")); 84 } 85 86 /* 87 * Two entries are compatible if their ranges do not overlap, or both 88 * entries are for read. 89 */ 90 static int 91 ranges_overlap(const struct rl_q_entry *e1, 92 const struct rl_q_entry *e2) 93 { 94 95 if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start) 96 return (1); 97 return (0); 98 } 99 100 /* 101 * Recalculate the lock->rl_currdep after an unlock. 102 */ 103 static void 104 rangelock_calc_block(struct rangelock *lock) 105 { 106 struct rl_q_entry *entry, *nextentry, *entry1; 107 108 for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) { 109 nextentry = TAILQ_NEXT(entry, rl_q_link); 110 if (entry->rl_q_flags & RL_LOCK_READ) { 111 /* Reads must not overlap with granted writes. */ 112 for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 113 !(entry1->rl_q_flags & RL_LOCK_READ); 114 entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 115 if (ranges_overlap(entry, entry1)) 116 goto out; 117 } 118 } else { 119 /* Write must not overlap with any granted locks. */ 120 for (entry1 = TAILQ_FIRST(&lock->rl_waiters); 121 entry1 != entry; 122 entry1 = TAILQ_NEXT(entry1, rl_q_link)) { 123 if (ranges_overlap(entry, entry1)) 124 goto out; 125 } 126 127 /* Move grantable write locks to the front. */ 128 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 129 TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link); 130 } 131 132 /* Grant this lock. */ 133 entry->rl_q_flags |= RL_LOCK_GRANTED; 134 wakeup(entry); 135 } 136 out: 137 lock->rl_currdep = entry; 138 } 139 140 static void 141 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry, 142 struct mtx *ilk, bool do_calc_block) 143 { 144 145 MPASS(lock != NULL && entry != NULL && ilk != NULL); 146 mtx_assert(ilk, MA_OWNED); 147 148 if (!do_calc_block) { 149 /* 150 * This is the case where rangelock_enqueue() has been called 151 * with trylock == true and just inserted this entry in the 152 * queue. 153 * If rl_currdep is this entry, rl_currdep needs to 154 * be set to the next entry in the rl_waiters list. 155 * However, since this entry is the last entry in the 156 * list, the next entry is NULL. 157 */ 158 if (lock->rl_currdep == entry) { 159 KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL, 160 ("rangelock_enqueue: next entry not NULL")); 161 lock->rl_currdep = NULL; 162 } 163 } else 164 KASSERT(entry != lock->rl_currdep, ("stuck currdep")); 165 166 TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link); 167 if (do_calc_block) 168 rangelock_calc_block(lock); 169 mtx_unlock(ilk); 170 if (curthread->td_rlqe == NULL) 171 curthread->td_rlqe = entry; 172 else 173 rlqentry_free(entry); 174 } 175 176 void 177 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk) 178 { 179 180 MPASS(lock != NULL && cookie != NULL && ilk != NULL); 181 182 mtx_lock(ilk); 183 rangelock_unlock_locked(lock, cookie, ilk, true); 184 } 185 186 /* 187 * Unlock the sub-range of granted lock. 188 */ 189 void * 190 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start, 191 off_t end, struct mtx *ilk) 192 { 193 struct rl_q_entry *entry; 194 195 MPASS(lock != NULL && cookie != NULL && ilk != NULL); 196 entry = cookie; 197 KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED, 198 ("Unlocking non-granted lock")); 199 KASSERT(entry->rl_q_start == start, ("wrong start")); 200 KASSERT(entry->rl_q_end >= end, ("wrong end")); 201 202 mtx_lock(ilk); 203 if (entry->rl_q_end == end) { 204 rangelock_unlock_locked(lock, cookie, ilk, true); 205 return (NULL); 206 } 207 entry->rl_q_end = end; 208 rangelock_calc_block(lock); 209 mtx_unlock(ilk); 210 return (cookie); 211 } 212 213 /* 214 * Add the lock request to the queue of the pending requests for 215 * rangelock. Sleep until the request can be granted unless trylock == true. 216 */ 217 static void * 218 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode, 219 struct mtx *ilk, bool trylock) 220 { 221 struct rl_q_entry *entry; 222 struct thread *td; 223 224 MPASS(lock != NULL && ilk != NULL); 225 226 td = curthread; 227 if (td->td_rlqe != NULL) { 228 entry = td->td_rlqe; 229 td->td_rlqe = NULL; 230 } else 231 entry = rlqentry_alloc(); 232 MPASS(entry != NULL); 233 entry->rl_q_flags = mode; 234 entry->rl_q_start = start; 235 entry->rl_q_end = end; 236 237 mtx_lock(ilk); 238 /* 239 * XXXKIB TODO. Check that a thread does not try to enqueue a 240 * lock that is incompatible with another request from the same 241 * thread. 242 */ 243 244 TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link); 245 /* 246 * If rl_currdep == NULL, there is no entry waiting for a conflicting 247 * range to be resolved, so set rl_currdep to this entry. If there is 248 * no conflicting entry for this entry, rl_currdep will be set back to 249 * NULL by rangelock_calc_block(). 250 */ 251 if (lock->rl_currdep == NULL) 252 lock->rl_currdep = entry; 253 rangelock_calc_block(lock); 254 while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) { 255 if (trylock) { 256 /* 257 * For this case, the range is not actually locked 258 * yet, but removal from the list requires the same 259 * steps, except for not doing a rangelock_calc_block() 260 * call, since rangelock_calc_block() was called above. 261 */ 262 rangelock_unlock_locked(lock, entry, ilk, false); 263 return (NULL); 264 } 265 msleep(entry, ilk, 0, "range", 0); 266 } 267 mtx_unlock(ilk); 268 return (entry); 269 } 270 271 void * 272 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 273 { 274 275 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false)); 276 } 277 278 void * 279 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end, 280 struct mtx *ilk) 281 { 282 283 return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true)); 284 } 285 286 void * 287 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk) 288 { 289 290 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false)); 291 } 292 293 void * 294 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end, 295 struct mtx *ilk) 296 { 297 298 return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true)); 299 } 300 301 #ifdef INVARIANT_SUPPORT 302 void 303 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line) 304 { 305 struct rl_q_entry *entry; 306 int flags; 307 308 MPASS(cookie != NULL); 309 entry = cookie; 310 flags = entry->rl_q_flags; 311 switch (what) { 312 case RCA_LOCKED: 313 if ((flags & RL_LOCK_GRANTED) == 0) 314 panic("rangelock not held @ %s:%d\n", file, line); 315 break; 316 case RCA_RLOCKED: 317 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) != 318 (RL_LOCK_GRANTED | RL_LOCK_READ)) 319 panic("rangelock not rlocked @ %s:%d\n", file, line); 320 break; 321 case RCA_WLOCKED: 322 if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) != 323 (RL_LOCK_GRANTED | RL_LOCK_WRITE)) 324 panic("rangelock not wlocked @ %s:%d\n", file, line); 325 break; 326 default: 327 panic("Unknown rangelock assertion: %d @ %s:%d", what, file, 328 line); 329 } 330 } 331 #endif /* INVARIANT_SUPPORT */ 332