xref: /freebsd/sys/kern/kern_rangelock.c (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/kernel.h>
31 #include <sys/lock.h>
32 #include <sys/mutex.h>
33 #include <sys/proc.h>
34 #include <sys/rangelock.h>
35 #include <sys/systm.h>
36 
37 #include <vm/uma.h>
38 
39 struct rl_q_entry {
40 	TAILQ_ENTRY(rl_q_entry) rl_q_link;
41 	off_t		rl_q_start, rl_q_end;
42 	int		rl_q_flags;
43 };
44 
45 static uma_zone_t rl_entry_zone;
46 
47 static void
48 rangelock_sys_init(void)
49 {
50 
51 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
52 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
53 }
54 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
55 
56 static struct rl_q_entry *
57 rlqentry_alloc(void)
58 {
59 
60 	return (uma_zalloc(rl_entry_zone, M_WAITOK));
61 }
62 
63 void
64 rlqentry_free(struct rl_q_entry *rleq)
65 {
66 
67 	uma_zfree(rl_entry_zone, rleq);
68 }
69 
70 void
71 rangelock_init(struct rangelock *lock)
72 {
73 
74 	TAILQ_INIT(&lock->rl_waiters);
75 	lock->rl_currdep = NULL;
76 }
77 
78 void
79 rangelock_destroy(struct rangelock *lock)
80 {
81 
82 	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
83 }
84 
85 /*
86  * Two entries are compatible if their ranges do not overlap, or both
87  * entries are for read.
88  */
89 static int
90 ranges_overlap(const struct rl_q_entry *e1,
91     const struct rl_q_entry *e2)
92 {
93 
94 	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
95 		return (1);
96 	return (0);
97 }
98 
99 /*
100  * Recalculate the lock->rl_currdep after an unlock.
101  */
102 static void
103 rangelock_calc_block(struct rangelock *lock)
104 {
105 	struct rl_q_entry *entry, *nextentry, *entry1;
106 
107 	for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
108 		nextentry = TAILQ_NEXT(entry, rl_q_link);
109 		if (entry->rl_q_flags & RL_LOCK_READ) {
110 			/* Reads must not overlap with granted writes. */
111 			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
112 			    !(entry1->rl_q_flags & RL_LOCK_READ);
113 			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
114 				if (ranges_overlap(entry, entry1))
115 					goto out;
116 			}
117 		} else {
118 			/* Write must not overlap with any granted locks. */
119 			for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
120 			    entry1 != entry;
121 			    entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
122 				if (ranges_overlap(entry, entry1))
123 					goto out;
124 			}
125 
126 			/* Move grantable write locks to the front. */
127 			TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
128 			TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
129 		}
130 
131 		/* Grant this lock. */
132 		entry->rl_q_flags |= RL_LOCK_GRANTED;
133 		wakeup(entry);
134 	}
135 out:
136 	lock->rl_currdep = entry;
137 }
138 
139 static void
140 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
141     struct mtx *ilk, bool do_calc_block)
142 {
143 
144 	MPASS(lock != NULL && entry != NULL && ilk != NULL);
145 	mtx_assert(ilk, MA_OWNED);
146 
147 	if (!do_calc_block) {
148 		/*
149 		 * This is the case where rangelock_enqueue() has been called
150 		 * with trylock == true and just inserted this entry in the
151 		 * queue.
152 		 * If rl_currdep is this entry, rl_currdep needs to
153 		 * be set to the next entry in the rl_waiters list.
154 		 * However, since this entry is the last entry in the
155 		 * list, the next entry is NULL.
156 		 */
157 		if (lock->rl_currdep == entry) {
158 			KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL,
159 			    ("rangelock_enqueue: next entry not NULL"));
160 			lock->rl_currdep = NULL;
161 		}
162 	} else
163 		KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
164 
165 	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
166 	if (do_calc_block)
167 		rangelock_calc_block(lock);
168 	mtx_unlock(ilk);
169 	if (curthread->td_rlqe == NULL)
170 		curthread->td_rlqe = entry;
171 	else
172 		rlqentry_free(entry);
173 }
174 
175 void
176 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
177 {
178 
179 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
180 
181 	mtx_lock(ilk);
182 	rangelock_unlock_locked(lock, cookie, ilk, true);
183 }
184 
185 /*
186  * Unlock the sub-range of granted lock.
187  */
188 void *
189 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
190     off_t end, struct mtx *ilk)
191 {
192 	struct rl_q_entry *entry;
193 
194 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
195 	entry = cookie;
196 	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
197 	    ("Unlocking non-granted lock"));
198 	KASSERT(entry->rl_q_start == start, ("wrong start"));
199 	KASSERT(entry->rl_q_end >= end, ("wrong end"));
200 
201 	mtx_lock(ilk);
202 	if (entry->rl_q_end == end) {
203 		rangelock_unlock_locked(lock, cookie, ilk, true);
204 		return (NULL);
205 	}
206 	entry->rl_q_end = end;
207 	rangelock_calc_block(lock);
208 	mtx_unlock(ilk);
209 	return (cookie);
210 }
211 
212 /*
213  * Add the lock request to the queue of the pending requests for
214  * rangelock.  Sleep until the request can be granted unless trylock == true.
215  */
216 static void *
217 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
218     struct mtx *ilk, bool trylock)
219 {
220 	struct rl_q_entry *entry;
221 	struct thread *td;
222 
223 	MPASS(lock != NULL && ilk != NULL);
224 
225 	td = curthread;
226 	if (td->td_rlqe != NULL) {
227 		entry = td->td_rlqe;
228 		td->td_rlqe = NULL;
229 	} else
230 		entry = rlqentry_alloc();
231 	MPASS(entry != NULL);
232 	entry->rl_q_flags = mode;
233 	entry->rl_q_start = start;
234 	entry->rl_q_end = end;
235 
236 	mtx_lock(ilk);
237 	/*
238 	 * XXXKIB TODO. Check that a thread does not try to enqueue a
239 	 * lock that is incompatible with another request from the same
240 	 * thread.
241 	 */
242 
243 	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
244 	/*
245 	 * If rl_currdep == NULL, there is no entry waiting for a conflicting
246 	 * range to be resolved, so set rl_currdep to this entry.  If there is
247 	 * no conflicting entry for this entry, rl_currdep will be set back to
248 	 * NULL by rangelock_calc_block().
249 	 */
250 	if (lock->rl_currdep == NULL)
251 		lock->rl_currdep = entry;
252 	rangelock_calc_block(lock);
253 	while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) {
254 		if (trylock) {
255 			/*
256 			 * For this case, the range is not actually locked
257 			 * yet, but removal from the list requires the same
258 			 * steps, except for not doing a rangelock_calc_block()
259 			 * call, since rangelock_calc_block() was called above.
260 			 */
261 			rangelock_unlock_locked(lock, entry, ilk, false);
262 			return (NULL);
263 		}
264 		msleep(entry, ilk, 0, "range", 0);
265 	}
266 	mtx_unlock(ilk);
267 	return (entry);
268 }
269 
270 void *
271 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
272 {
273 
274 	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false));
275 }
276 
277 void *
278 rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
279     struct mtx *ilk)
280 {
281 
282 	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true));
283 }
284 
285 void *
286 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
287 {
288 
289 	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false));
290 }
291 
292 void *
293 rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
294     struct mtx *ilk)
295 {
296 
297 	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true));
298 }
299 
300 #ifdef INVARIANT_SUPPORT
301 void
302 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
303 {
304 	struct rl_q_entry *entry;
305 	int flags;
306 
307 	MPASS(cookie != NULL);
308 	entry = cookie;
309 	flags = entry->rl_q_flags;
310 	switch (what) {
311 	case RCA_LOCKED:
312 		if ((flags & RL_LOCK_GRANTED) == 0)
313 			panic("rangelock not held @ %s:%d\n", file, line);
314 		break;
315 	case RCA_RLOCKED:
316 		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
317 		    (RL_LOCK_GRANTED | RL_LOCK_READ))
318 			panic("rangelock not rlocked @ %s:%d\n", file, line);
319 		break;
320 	case RCA_WLOCKED:
321 		if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
322 		    (RL_LOCK_GRANTED | RL_LOCK_WRITE))
323 			panic("rangelock not wlocked @ %s:%d\n", file, line);
324 		break;
325 	default:
326 		panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
327 		    line);
328 	}
329 }
330 #endif	/* INVARIANT_SUPPORT */
331