xref: /freebsd/sys/kern/kern_rangelock.c (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/proc.h>
37 #include <sys/rangelock.h>
38 #include <sys/systm.h>
39 
40 #include <vm/uma.h>
41 
42 struct rl_q_entry {
43 	TAILQ_ENTRY(rl_q_entry) rl_q_link;
44 	off_t		rl_q_start, rl_q_end;
45 	int		rl_q_flags;
46 };
47 
48 static uma_zone_t rl_entry_zone;
49 
50 static void
51 rangelock_sys_init(void)
52 {
53 
54 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
55 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
56 }
57 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
58 
59 static struct rl_q_entry *
60 rlqentry_alloc(void)
61 {
62 
63 	return (uma_zalloc(rl_entry_zone, M_WAITOK));
64 }
65 
66 void
67 rlqentry_free(struct rl_q_entry *rleq)
68 {
69 
70 	uma_zfree(rl_entry_zone, rleq);
71 }
72 
73 void
74 rangelock_init(struct rangelock *lock)
75 {
76 
77 	TAILQ_INIT(&lock->rl_waiters);
78 	lock->rl_currdep = NULL;
79 }
80 
81 void
82 rangelock_destroy(struct rangelock *lock)
83 {
84 
85 	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
86 }
87 
88 /*
89  * Two entries are compatible if their ranges do not overlap, or both
90  * entries are for read.
91  */
92 static int
93 ranges_overlap(const struct rl_q_entry *e1,
94     const struct rl_q_entry *e2)
95 {
96 
97 	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
98 		return (1);
99 	return (0);
100 }
101 
102 /*
103  * Recalculate the lock->rl_currdep after an unlock.
104  */
105 static void
106 rangelock_calc_block(struct rangelock *lock)
107 {
108 	struct rl_q_entry *entry, *nextentry, *entry1;
109 
110 	for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
111 		nextentry = TAILQ_NEXT(entry, rl_q_link);
112 		if (entry->rl_q_flags & RL_LOCK_READ) {
113 			/* Reads must not overlap with granted writes. */
114 			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
115 			    !(entry1->rl_q_flags & RL_LOCK_READ);
116 			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
117 				if (ranges_overlap(entry, entry1))
118 					goto out;
119 			}
120 		} else {
121 			/* Write must not overlap with any granted locks. */
122 			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
123 			    entry1 != entry;
124 			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
125 				if (ranges_overlap(entry, entry1))
126 					goto out;
127 			}
128 
129 			/* Move grantable write locks to the front. */
130 			TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
131 			TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
132 		}
133 
134 		/* Grant this lock. */
135 		entry->rl_q_flags |= RL_LOCK_GRANTED;
136 		wakeup(entry);
137 	}
138 out:
139 	lock->rl_currdep = entry;
140 }
141 
142 static void
143 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
144     struct mtx *ilk, bool do_calc_block)
145 {
146 
147 	MPASS(lock != NULL && entry != NULL && ilk != NULL);
148 	mtx_assert(ilk, MA_OWNED);
149 
150 	if (!do_calc_block) {
151 		/*
152 		 * This is the case where rangelock_enqueue() has been called
153 		 * with trylock == true and just inserted this entry in the
154 		 * queue.
155 		 * If rl_currdep is this entry, rl_currdep needs to
156 		 * be set to the next entry in the rl_waiters list.
157 		 * However, since this entry is the last entry in the
158 		 * list, the next entry is NULL.
159 		 */
160 		if (lock->rl_currdep == entry) {
161 			KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL,
162 			    ("rangelock_enqueue: next entry not NULL"));
163 			lock->rl_currdep = NULL;
164 		}
165 	} else
166 		KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
167 
168 	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
169 	if (do_calc_block)
170 		rangelock_calc_block(lock);
171 	mtx_unlock(ilk);
172 	if (curthread->td_rlqe == NULL)
173 		curthread->td_rlqe = entry;
174 	else
175 		rlqentry_free(entry);
176 }
177 
178 void
179 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
180 {
181 
182 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
183 
184 	mtx_lock(ilk);
185 	rangelock_unlock_locked(lock, cookie, ilk, true);
186 }
187 
188 /*
189  * Unlock the sub-range of granted lock.
190  */
191 void *
192 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
193     off_t end, struct mtx *ilk)
194 {
195 	struct rl_q_entry *entry;
196 
197 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
198 	entry = cookie;
199 	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
200 	    ("Unlocking non-granted lock"));
201 	KASSERT(entry->rl_q_start == start, ("wrong start"));
202 	KASSERT(entry->rl_q_end >= end, ("wrong end"));
203 
204 	mtx_lock(ilk);
205 	if (entry->rl_q_end == end) {
206 		rangelock_unlock_locked(lock, cookie, ilk, true);
207 		return (NULL);
208 	}
209 	entry->rl_q_end = end;
210 	rangelock_calc_block(lock);
211 	mtx_unlock(ilk);
212 	return (cookie);
213 }
214 
215 /*
216  * Add the lock request to the queue of the pending requests for
217  * rangelock.  Sleep until the request can be granted unless trylock == true.
218  */
219 static void *
220 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
221     struct mtx *ilk, bool trylock)
222 {
223 	struct rl_q_entry *entry;
224 	struct thread *td;
225 
226 	MPASS(lock != NULL && ilk != NULL);
227 
228 	td = curthread;
229 	if (td->td_rlqe != NULL) {
230 		entry = td->td_rlqe;
231 		td->td_rlqe = NULL;
232 	} else
233 		entry = rlqentry_alloc();
234 	MPASS(entry != NULL);
235 	entry->rl_q_flags = mode;
236 	entry->rl_q_start = start;
237 	entry->rl_q_end = end;
238 
239 	mtx_lock(ilk);
240 	/*
241 	 * XXXKIB TODO. Check that a thread does not try to enqueue a
242 	 * lock that is incompatible with another request from the same
243 	 * thread.
244 	 */
245 
246 	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
247 	/*
248 	 * If rl_currdep == NULL, there is no entry waiting for a conflicting
249 	 * range to be resolved, so set rl_currdep to this entry.  If there is
250 	 * no conflicting entry for this entry, rl_currdep will be set back to
251 	 * NULL by rangelock_calc_block().
252 	 */
253 	if (lock->rl_currdep == NULL)
254 		lock->rl_currdep = entry;
255 	rangelock_calc_block(lock);
256 	while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) {
257 		if (trylock) {
258 			/*
259 			 * For this case, the range is not actually locked
260 			 * yet, but removal from the list requires the same
261 			 * steps, except for not doing a rangelock_calc_block()
262 			 * call, since rangelock_calc_block() was called above.
263 			 */
264 			rangelock_unlock_locked(lock, entry, ilk, false);
265 			return (NULL);
266 		}
267 		msleep(entry, ilk, 0, "range", 0);
268 	}
269 	mtx_unlock(ilk);
270 	return (entry);
271 }
272 
273 void *
274 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
275 {
276 
277 	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false));
278 }
279 
280 void *
281 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
282     struct mtx *ilk)
283 {
284 
285 	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true));
286 }
287 
288 void *
289 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
290 {
291 
292 	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false));
293 }
294 
295 void *
296 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
297     struct mtx *ilk)
298 {
299 
300 	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true));
301 }
302 
303 #ifdef INVARIANT_SUPPORT
304 void
305 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
306 {
307 	struct rl_q_entry *entry;
308 	int flags;
309 
310 	MPASS(cookie != NULL);
311 	entry = cookie;
312 	flags = entry->rl_q_flags;
313 	switch (what) {
314 	case RCA_LOCKED:
315 		if ((flags & RL_LOCK_GRANTED) == 0)
316 			panic("rangelock not held @ %s:%d\n", file, line);
317 		break;
318 	case RCA_RLOCKED:
319 		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
320 		    (RL_LOCK_GRANTED | RL_LOCK_READ))
321 			panic("rangelock not rlocked @ %s:%d\n", file, line);
322 		break;
323 	case RCA_WLOCKED:
324 		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
325 		    (RL_LOCK_GRANTED | RL_LOCK_WRITE))
326 			panic("rangelock not wlocked @ %s:%d\n", file, line);
327 		break;
328 	default:
329 		panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
330 		    line);
331 	}
332 }
333 #endif	/* INVARIANT_SUPPORT */
334