xref: /freebsd/sys/kern/kern_rangelock.c (revision b85e1f7d05ac7889868f2364474e02b6a024af25)
1 /*-
2  * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/lock.h>
33 #include <sys/mutex.h>
34 #include <sys/proc.h>
35 #include <sys/rangelock.h>
36 #include <sys/systm.h>
37 
38 #include <vm/uma.h>
39 
40 struct rl_q_entry {
41 	TAILQ_ENTRY(rl_q_entry) rl_q_link;
42 	off_t		rl_q_start, rl_q_end;
43 	int		rl_q_flags;
44 };
45 
46 static uma_zone_t rl_entry_zone;
47 
48 static void
49 rangelock_sys_init(void)
50 {
51 
52 	rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
53 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
54 }
55 SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
56 
57 static struct rl_q_entry *
58 rlqentry_alloc(void)
59 {
60 
61 	return (uma_zalloc(rl_entry_zone, M_WAITOK));
62 }
63 
64 void
65 rlqentry_free(struct rl_q_entry *rleq)
66 {
67 
68 	uma_zfree(rl_entry_zone, rleq);
69 }
70 
71 void
72 rangelock_init(struct rangelock *lock)
73 {
74 
75 	TAILQ_INIT(&lock->rl_waiters);
76 	lock->rl_currdep = NULL;
77 }
78 
79 void
80 rangelock_destroy(struct rangelock *lock)
81 {
82 
83 	KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
84 }
85 
86 /*
87  * Verifies the supplied rl_q_entries for compatibility.  Returns true
88  * if the rangelock queue entries are not compatible, false if they are.
89  *
90  * Two entries are compatible if their ranges do not overlap, or both
91  * entries are for read.
92  */
93 static int
94 rangelock_incompatible(const struct rl_q_entry *e1,
95     const struct rl_q_entry *e2)
96 {
97 
98 	if ((e1->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ &&
99 	    (e2->rl_q_flags & RL_LOCK_TYPE_MASK) == RL_LOCK_READ)
100 		return (0);
101 	if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
102 		return (1);
103 	return (0);
104 }
105 
106 /*
107  * Recalculate the lock->rl_currdep after an unlock.
108  */
109 static void
110 rangelock_calc_block(struct rangelock *lock)
111 {
112 	struct rl_q_entry *entry, *entry1, *whead;
113 
114 	if (lock->rl_currdep == TAILQ_FIRST(&lock->rl_waiters) &&
115 	    lock->rl_currdep != NULL)
116 		lock->rl_currdep = TAILQ_NEXT(lock->rl_currdep, rl_q_link);
117 	for (entry = lock->rl_currdep; entry != NULL;
118 	     entry = TAILQ_NEXT(entry, rl_q_link)) {
119 		TAILQ_FOREACH(entry1, &lock->rl_waiters, rl_q_link) {
120 			if (rangelock_incompatible(entry, entry1))
121 				goto out;
122 			if (entry1 == entry)
123 				break;
124 		}
125 	}
126 out:
127 	lock->rl_currdep = entry;
128 	TAILQ_FOREACH(whead, &lock->rl_waiters, rl_q_link) {
129 		if (whead == lock->rl_currdep)
130 			break;
131 		if (!(whead->rl_q_flags & RL_LOCK_GRANTED)) {
132 			whead->rl_q_flags |= RL_LOCK_GRANTED;
133 			wakeup(whead);
134 		}
135 	}
136 }
137 
138 static void
139 rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
140     struct mtx *ilk)
141 {
142 
143 	MPASS(lock != NULL && entry != NULL && ilk != NULL);
144 	mtx_assert(ilk, MA_OWNED);
145 	KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
146 
147 	TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
148 	rangelock_calc_block(lock);
149 	mtx_unlock(ilk);
150 	if (curthread->td_rlqe == NULL)
151 		curthread->td_rlqe = entry;
152 	else
153 		rlqentry_free(entry);
154 }
155 
156 void
157 rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
158 {
159 
160 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
161 
162 	mtx_lock(ilk);
163 	rangelock_unlock_locked(lock, cookie, ilk);
164 }
165 
166 /*
167  * Unlock the sub-range of granted lock.
168  */
169 void *
170 rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
171     off_t end, struct mtx *ilk)
172 {
173 	struct rl_q_entry *entry;
174 
175 	MPASS(lock != NULL && cookie != NULL && ilk != NULL);
176 	entry = cookie;
177 	KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
178 	    ("Unlocking non-granted lock"));
179 	KASSERT(entry->rl_q_start == start, ("wrong start"));
180 	KASSERT(entry->rl_q_end >= end, ("wrong end"));
181 
182 	mtx_lock(ilk);
183 	if (entry->rl_q_end == end) {
184 		rangelock_unlock_locked(lock, cookie, ilk);
185 		return (NULL);
186 	}
187 	entry->rl_q_end = end;
188 	rangelock_calc_block(lock);
189 	mtx_unlock(ilk);
190 	return (cookie);
191 }
192 
193 /*
194  * Add the lock request to the queue of the pending requests for
195  * rangelock.  Sleep until the request can be granted.
196  */
197 static void *
198 rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
199     struct mtx *ilk)
200 {
201 	struct rl_q_entry *entry;
202 	struct thread *td;
203 
204 	MPASS(lock != NULL && ilk != NULL);
205 
206 	td = curthread;
207 	if (td->td_rlqe != NULL) {
208 		entry = td->td_rlqe;
209 		td->td_rlqe = NULL;
210 	} else
211 		entry = rlqentry_alloc();
212 	MPASS(entry != NULL);
213 	entry->rl_q_flags = mode;
214 	entry->rl_q_start = start;
215 	entry->rl_q_end = end;
216 
217 	mtx_lock(ilk);
218 	/*
219 	 * XXXKIB TODO. Check that a thread does not try to enqueue a
220 	 * lock that is incompatible with another request from the same
221 	 * thread.
222 	 */
223 
224 	TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
225 	if (lock->rl_currdep == NULL)
226 		lock->rl_currdep = entry;
227 	rangelock_calc_block(lock);
228 	while (!(entry->rl_q_flags & RL_LOCK_GRANTED))
229 		msleep(entry, ilk, 0, "range", 0);
230 	mtx_unlock(ilk);
231 	return (entry);
232 }
233 
234 void *
235 rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
236 {
237 
238 	return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk));
239 }
240 
241 void *
242 rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx *ilk)
243 {
244 
245 	return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk));
246 }
247