xref: /linux/include/linux/mmap_lock.h (revision 00c010e130e58301db2ea0cec1eadc931e1cb8cf)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_MMAP_LOCK_H
3 #define _LINUX_MMAP_LOCK_H
4 
5 /* Avoid a dependency loop by declaring here. */
6 extern int rcuwait_wake_up(struct rcuwait *w);
7 
8 #include <linux/lockdep.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmdebug.h>
11 #include <linux/rwsem.h>
12 #include <linux/tracepoint-defs.h>
13 #include <linux/types.h>
14 #include <linux/cleanup.h>
15 
16 #define MMAP_LOCK_INITIALIZER(name) \
17 	.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
18 
19 DECLARE_TRACEPOINT(mmap_lock_start_locking);
20 DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
21 DECLARE_TRACEPOINT(mmap_lock_released);
22 
23 #ifdef CONFIG_TRACING
24 
25 void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
26 void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
27 					   bool success);
28 void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
29 
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)30 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
31 						   bool write)
32 {
33 	if (tracepoint_enabled(mmap_lock_start_locking))
34 		__mmap_lock_do_trace_start_locking(mm, write);
35 }
36 
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)37 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
38 						      bool write, bool success)
39 {
40 	if (tracepoint_enabled(mmap_lock_acquire_returned))
41 		__mmap_lock_do_trace_acquire_returned(mm, write, success);
42 }
43 
__mmap_lock_trace_released(struct mm_struct * mm,bool write)44 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
45 {
46 	if (tracepoint_enabled(mmap_lock_released))
47 		__mmap_lock_do_trace_released(mm, write);
48 }
49 
50 #else /* !CONFIG_TRACING */
51 
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)52 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
53 						   bool write)
54 {
55 }
56 
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)57 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
58 						      bool write, bool success)
59 {
60 }
61 
__mmap_lock_trace_released(struct mm_struct * mm,bool write)62 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
63 {
64 }
65 
66 #endif /* CONFIG_TRACING */
67 
mmap_assert_locked(const struct mm_struct * mm)68 static inline void mmap_assert_locked(const struct mm_struct *mm)
69 {
70 	rwsem_assert_held(&mm->mmap_lock);
71 }
72 
mmap_assert_write_locked(const struct mm_struct * mm)73 static inline void mmap_assert_write_locked(const struct mm_struct *mm)
74 {
75 	rwsem_assert_held_write(&mm->mmap_lock);
76 }
77 
78 #ifdef CONFIG_PER_VMA_LOCK
79 
mm_lock_seqcount_init(struct mm_struct * mm)80 static inline void mm_lock_seqcount_init(struct mm_struct *mm)
81 {
82 	seqcount_init(&mm->mm_lock_seq);
83 }
84 
mm_lock_seqcount_begin(struct mm_struct * mm)85 static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
86 {
87 	do_raw_write_seqcount_begin(&mm->mm_lock_seq);
88 }
89 
mm_lock_seqcount_end(struct mm_struct * mm)90 static inline void mm_lock_seqcount_end(struct mm_struct *mm)
91 {
92 	ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
93 	do_raw_write_seqcount_end(&mm->mm_lock_seq);
94 }
95 
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)96 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
97 {
98 	/*
99 	 * Since mmap_lock is a sleeping lock, and waiting for it to become
100 	 * unlocked is more or less equivalent with taking it ourselves, don't
101 	 * bother with the speculative path if mmap_lock is already write-locked
102 	 * and take the slow path, which takes the lock.
103 	 */
104 	return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
105 }
106 
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)107 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
108 {
109 	return read_seqcount_retry(&mm->mm_lock_seq, seq);
110 }
111 
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)112 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
113 {
114 #ifdef CONFIG_DEBUG_LOCK_ALLOC
115 	static struct lock_class_key lockdep_key;
116 
117 	lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
118 #endif
119 	if (reset_refcnt)
120 		refcount_set(&vma->vm_refcnt, 0);
121 	vma->vm_lock_seq = UINT_MAX;
122 }
123 
is_vma_writer_only(int refcnt)124 static inline bool is_vma_writer_only(int refcnt)
125 {
126 	/*
127 	 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
128 	 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
129 	 * a detached vma happens only in vma_mark_detached() and is a rare
130 	 * case, therefore most of the time there will be no unnecessary wakeup.
131 	 */
132 	return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
133 }
134 
vma_refcount_put(struct vm_area_struct * vma)135 static inline void vma_refcount_put(struct vm_area_struct *vma)
136 {
137 	/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
138 	struct mm_struct *mm = vma->vm_mm;
139 	int oldcnt;
140 
141 	rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
142 	if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
143 
144 		if (is_vma_writer_only(oldcnt - 1))
145 			rcuwait_wake_up(&mm->vma_writer_wait);
146 	}
147 }
148 
149 /*
150  * Try to read-lock a vma. The function is allowed to occasionally yield false
151  * locked result to avoid performance overhead, in which case we fall back to
152  * using mmap_lock. The function should never yield false unlocked result.
153  * False locked result is possible if mm_lock_seq overflows or if vma gets
154  * reused and attached to a different mm before we lock it.
155  * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
156  * detached.
157  */
vma_start_read(struct mm_struct * mm,struct vm_area_struct * vma)158 static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
159 						    struct vm_area_struct *vma)
160 {
161 	int oldcnt;
162 
163 	/*
164 	 * Check before locking. A race might cause false locked result.
165 	 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
166 	 * ACQUIRE semantics, because this is just a lockless check whose result
167 	 * we don't rely on for anything - the mm_lock_seq read against which we
168 	 * need ordering is below.
169 	 */
170 	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
171 		return NULL;
172 
173 	/*
174 	 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
175 	 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
176 	 * Acquire fence is required here to avoid reordering against later
177 	 * vm_lock_seq check and checks inside lock_vma_under_rcu().
178 	 */
179 	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
180 							      VMA_REF_LIMIT))) {
181 		/* return EAGAIN if vma got detached from under us */
182 		return oldcnt ? NULL : ERR_PTR(-EAGAIN);
183 	}
184 
185 	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
186 	/*
187 	 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
188 	 * False unlocked result is impossible because we modify and check
189 	 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
190 	 * modification invalidates all existing locks.
191 	 *
192 	 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
193 	 * racing with vma_end_write_all(), we only start reading from the VMA
194 	 * after it has been unlocked.
195 	 * This pairs with RELEASE semantics in vma_end_write_all().
196 	 */
197 	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
198 		vma_refcount_put(vma);
199 		return NULL;
200 	}
201 
202 	return vma;
203 }
204 
205 /*
206  * Use only while holding mmap read lock which guarantees that locking will not
207  * fail (nobody can concurrently write-lock the vma). vma_start_read() should
208  * not be used in such cases because it might fail due to mm_lock_seq overflow.
209  * This functionality is used to obtain vma read lock and drop the mmap read lock.
210  */
vma_start_read_locked_nested(struct vm_area_struct * vma,int subclass)211 static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
212 {
213 	int oldcnt;
214 
215 	mmap_assert_locked(vma->vm_mm);
216 	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
217 							      VMA_REF_LIMIT)))
218 		return false;
219 
220 	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
221 	return true;
222 }
223 
224 /*
225  * Use only while holding mmap read lock which guarantees that locking will not
226  * fail (nobody can concurrently write-lock the vma). vma_start_read() should
227  * not be used in such cases because it might fail due to mm_lock_seq overflow.
228  * This functionality is used to obtain vma read lock and drop the mmap read lock.
229  */
vma_start_read_locked(struct vm_area_struct * vma)230 static inline bool vma_start_read_locked(struct vm_area_struct *vma)
231 {
232 	return vma_start_read_locked_nested(vma, 0);
233 }
234 
vma_end_read(struct vm_area_struct * vma)235 static inline void vma_end_read(struct vm_area_struct *vma)
236 {
237 	vma_refcount_put(vma);
238 }
239 
240 /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
__is_vma_write_locked(struct vm_area_struct * vma,unsigned int * mm_lock_seq)241 static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
242 {
243 	mmap_assert_write_locked(vma->vm_mm);
244 
245 	/*
246 	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
247 	 * mm->mm_lock_seq can't be concurrently modified.
248 	 */
249 	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
250 	return (vma->vm_lock_seq == *mm_lock_seq);
251 }
252 
253 void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
254 
255 /*
256  * Begin writing to a VMA.
257  * Exclude concurrent readers under the per-VMA lock until the currently
258  * write-locked mmap_lock is dropped or downgraded.
259  */
vma_start_write(struct vm_area_struct * vma)260 static inline void vma_start_write(struct vm_area_struct *vma)
261 {
262 	unsigned int mm_lock_seq;
263 
264 	if (__is_vma_write_locked(vma, &mm_lock_seq))
265 		return;
266 
267 	__vma_start_write(vma, mm_lock_seq);
268 }
269 
vma_assert_write_locked(struct vm_area_struct * vma)270 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
271 {
272 	unsigned int mm_lock_seq;
273 
274 	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
275 }
276 
vma_assert_locked(struct vm_area_struct * vma)277 static inline void vma_assert_locked(struct vm_area_struct *vma)
278 {
279 	unsigned int mm_lock_seq;
280 
281 	VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
282 		      !__is_vma_write_locked(vma, &mm_lock_seq), vma);
283 }
284 
285 /*
286  * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
287  * assertions should be made either under mmap_write_lock or when the object
288  * has been isolated under mmap_write_lock, ensuring no competing writers.
289  */
vma_assert_attached(struct vm_area_struct * vma)290 static inline void vma_assert_attached(struct vm_area_struct *vma)
291 {
292 	WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
293 }
294 
vma_assert_detached(struct vm_area_struct * vma)295 static inline void vma_assert_detached(struct vm_area_struct *vma)
296 {
297 	WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
298 }
299 
vma_mark_attached(struct vm_area_struct * vma)300 static inline void vma_mark_attached(struct vm_area_struct *vma)
301 {
302 	vma_assert_write_locked(vma);
303 	vma_assert_detached(vma);
304 	refcount_set_release(&vma->vm_refcnt, 1);
305 }
306 
307 void vma_mark_detached(struct vm_area_struct *vma);
308 
309 struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
310 					  unsigned long address);
311 
312 #else /* CONFIG_PER_VMA_LOCK */
313 
mm_lock_seqcount_init(struct mm_struct * mm)314 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
mm_lock_seqcount_begin(struct mm_struct * mm)315 static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
mm_lock_seqcount_end(struct mm_struct * mm)316 static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
317 
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)318 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
319 {
320 	return false;
321 }
322 
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)323 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
324 {
325 	return true;
326 }
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)327 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
vma_start_read(struct mm_struct * mm,struct vm_area_struct * vma)328 static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
329 						    struct vm_area_struct *vma)
330 		{ return NULL; }
vma_end_read(struct vm_area_struct * vma)331 static inline void vma_end_read(struct vm_area_struct *vma) {}
vma_start_write(struct vm_area_struct * vma)332 static inline void vma_start_write(struct vm_area_struct *vma) {}
vma_assert_write_locked(struct vm_area_struct * vma)333 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
334 		{ mmap_assert_write_locked(vma->vm_mm); }
vma_assert_attached(struct vm_area_struct * vma)335 static inline void vma_assert_attached(struct vm_area_struct *vma) {}
vma_assert_detached(struct vm_area_struct * vma)336 static inline void vma_assert_detached(struct vm_area_struct *vma) {}
vma_mark_attached(struct vm_area_struct * vma)337 static inline void vma_mark_attached(struct vm_area_struct *vma) {}
vma_mark_detached(struct vm_area_struct * vma)338 static inline void vma_mark_detached(struct vm_area_struct *vma) {}
339 
lock_vma_under_rcu(struct mm_struct * mm,unsigned long address)340 static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
341 		unsigned long address)
342 {
343 	return NULL;
344 }
345 
vma_assert_locked(struct vm_area_struct * vma)346 static inline void vma_assert_locked(struct vm_area_struct *vma)
347 {
348 	mmap_assert_locked(vma->vm_mm);
349 }
350 
351 #endif /* CONFIG_PER_VMA_LOCK */
352 
mmap_write_lock(struct mm_struct * mm)353 static inline void mmap_write_lock(struct mm_struct *mm)
354 {
355 	__mmap_lock_trace_start_locking(mm, true);
356 	down_write(&mm->mmap_lock);
357 	mm_lock_seqcount_begin(mm);
358 	__mmap_lock_trace_acquire_returned(mm, true, true);
359 }
360 
mmap_write_lock_nested(struct mm_struct * mm,int subclass)361 static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
362 {
363 	__mmap_lock_trace_start_locking(mm, true);
364 	down_write_nested(&mm->mmap_lock, subclass);
365 	mm_lock_seqcount_begin(mm);
366 	__mmap_lock_trace_acquire_returned(mm, true, true);
367 }
368 
mmap_write_lock_killable(struct mm_struct * mm)369 static inline int mmap_write_lock_killable(struct mm_struct *mm)
370 {
371 	int ret;
372 
373 	__mmap_lock_trace_start_locking(mm, true);
374 	ret = down_write_killable(&mm->mmap_lock);
375 	if (!ret)
376 		mm_lock_seqcount_begin(mm);
377 	__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
378 	return ret;
379 }
380 
381 /*
382  * Drop all currently-held per-VMA locks.
383  * This is called from the mmap_lock implementation directly before releasing
384  * a write-locked mmap_lock (or downgrading it to read-locked).
385  * This should normally NOT be called manually from other places.
386  * If you want to call this manually anyway, keep in mind that this will release
387  * *all* VMA write locks, including ones from further up the stack.
388  */
vma_end_write_all(struct mm_struct * mm)389 static inline void vma_end_write_all(struct mm_struct *mm)
390 {
391 	mmap_assert_write_locked(mm);
392 	mm_lock_seqcount_end(mm);
393 }
394 
mmap_write_unlock(struct mm_struct * mm)395 static inline void mmap_write_unlock(struct mm_struct *mm)
396 {
397 	__mmap_lock_trace_released(mm, true);
398 	vma_end_write_all(mm);
399 	up_write(&mm->mmap_lock);
400 }
401 
mmap_write_downgrade(struct mm_struct * mm)402 static inline void mmap_write_downgrade(struct mm_struct *mm)
403 {
404 	__mmap_lock_trace_acquire_returned(mm, false, true);
405 	vma_end_write_all(mm);
406 	downgrade_write(&mm->mmap_lock);
407 }
408 
mmap_read_lock(struct mm_struct * mm)409 static inline void mmap_read_lock(struct mm_struct *mm)
410 {
411 	__mmap_lock_trace_start_locking(mm, false);
412 	down_read(&mm->mmap_lock);
413 	__mmap_lock_trace_acquire_returned(mm, false, true);
414 }
415 
mmap_read_lock_killable(struct mm_struct * mm)416 static inline int mmap_read_lock_killable(struct mm_struct *mm)
417 {
418 	int ret;
419 
420 	__mmap_lock_trace_start_locking(mm, false);
421 	ret = down_read_killable(&mm->mmap_lock);
422 	__mmap_lock_trace_acquire_returned(mm, false, ret == 0);
423 	return ret;
424 }
425 
mmap_read_trylock(struct mm_struct * mm)426 static inline bool mmap_read_trylock(struct mm_struct *mm)
427 {
428 	bool ret;
429 
430 	__mmap_lock_trace_start_locking(mm, false);
431 	ret = down_read_trylock(&mm->mmap_lock) != 0;
432 	__mmap_lock_trace_acquire_returned(mm, false, ret);
433 	return ret;
434 }
435 
mmap_read_unlock(struct mm_struct * mm)436 static inline void mmap_read_unlock(struct mm_struct *mm)
437 {
438 	__mmap_lock_trace_released(mm, false);
439 	up_read(&mm->mmap_lock);
440 }
441 
DEFINE_GUARD(mmap_read_lock,struct mm_struct *,mmap_read_lock (_T),mmap_read_unlock (_T))442 DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
443 	     mmap_read_lock(_T), mmap_read_unlock(_T))
444 
445 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
446 {
447 	__mmap_lock_trace_released(mm, false);
448 	up_read_non_owner(&mm->mmap_lock);
449 }
450 
mmap_lock_is_contended(struct mm_struct * mm)451 static inline int mmap_lock_is_contended(struct mm_struct *mm)
452 {
453 	return rwsem_is_contended(&mm->mmap_lock);
454 }
455 
456 #endif /* _LINUX_MMAP_LOCK_H */
457