1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_MMAP_LOCK_H
3 #define _LINUX_MMAP_LOCK_H
4
5 /* Avoid a dependency loop by declaring here. */
6 extern int rcuwait_wake_up(struct rcuwait *w);
7
8 #include <linux/lockdep.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmdebug.h>
11 #include <linux/rwsem.h>
12 #include <linux/tracepoint-defs.h>
13 #include <linux/types.h>
14 #include <linux/cleanup.h>
15 #include <linux/sched/mm.h>
16
17 #define MMAP_LOCK_INITIALIZER(name) \
18 .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
19
20 DECLARE_TRACEPOINT(mmap_lock_start_locking);
21 DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
22 DECLARE_TRACEPOINT(mmap_lock_released);
23
24 #ifdef CONFIG_TRACING
25
26 void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
27 void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
28 bool success);
29 void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
30
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)31 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
32 bool write)
33 {
34 if (tracepoint_enabled(mmap_lock_start_locking))
35 __mmap_lock_do_trace_start_locking(mm, write);
36 }
37
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)38 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
39 bool write, bool success)
40 {
41 if (tracepoint_enabled(mmap_lock_acquire_returned))
42 __mmap_lock_do_trace_acquire_returned(mm, write, success);
43 }
44
__mmap_lock_trace_released(struct mm_struct * mm,bool write)45 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
46 {
47 if (tracepoint_enabled(mmap_lock_released))
48 __mmap_lock_do_trace_released(mm, write);
49 }
50
51 #else /* !CONFIG_TRACING */
52
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)53 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
54 bool write)
55 {
56 }
57
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)58 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
59 bool write, bool success)
60 {
61 }
62
__mmap_lock_trace_released(struct mm_struct * mm,bool write)63 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
64 {
65 }
66
67 #endif /* CONFIG_TRACING */
68
mmap_assert_locked(const struct mm_struct * mm)69 static inline void mmap_assert_locked(const struct mm_struct *mm)
70 {
71 rwsem_assert_held(&mm->mmap_lock);
72 }
73
mmap_assert_write_locked(const struct mm_struct * mm)74 static inline void mmap_assert_write_locked(const struct mm_struct *mm)
75 {
76 rwsem_assert_held_write(&mm->mmap_lock);
77 }
78
79 #ifdef CONFIG_PER_VMA_LOCK
80
mm_lock_seqcount_init(struct mm_struct * mm)81 static inline void mm_lock_seqcount_init(struct mm_struct *mm)
82 {
83 seqcount_init(&mm->mm_lock_seq);
84 }
85
mm_lock_seqcount_begin(struct mm_struct * mm)86 static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
87 {
88 do_raw_write_seqcount_begin(&mm->mm_lock_seq);
89 }
90
mm_lock_seqcount_end(struct mm_struct * mm)91 static inline void mm_lock_seqcount_end(struct mm_struct *mm)
92 {
93 ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
94 do_raw_write_seqcount_end(&mm->mm_lock_seq);
95 }
96
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)97 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
98 {
99 /*
100 * Since mmap_lock is a sleeping lock, and waiting for it to become
101 * unlocked is more or less equivalent with taking it ourselves, don't
102 * bother with the speculative path if mmap_lock is already write-locked
103 * and take the slow path, which takes the lock.
104 */
105 return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
106 }
107
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)108 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
109 {
110 return read_seqcount_retry(&mm->mm_lock_seq, seq);
111 }
112
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)113 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
114 {
115 #ifdef CONFIG_DEBUG_LOCK_ALLOC
116 static struct lock_class_key lockdep_key;
117
118 lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
119 #endif
120 if (reset_refcnt)
121 refcount_set(&vma->vm_refcnt, 0);
122 vma->vm_lock_seq = UINT_MAX;
123 }
124
is_vma_writer_only(int refcnt)125 static inline bool is_vma_writer_only(int refcnt)
126 {
127 /*
128 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
129 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
130 * a detached vma happens only in vma_mark_detached() and is a rare
131 * case, therefore most of the time there will be no unnecessary wakeup.
132 */
133 return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
134 }
135
vma_refcount_put(struct vm_area_struct * vma)136 static inline void vma_refcount_put(struct vm_area_struct *vma)
137 {
138 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
139 struct mm_struct *mm = vma->vm_mm;
140 int oldcnt;
141
142 rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
143 if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
144
145 if (is_vma_writer_only(oldcnt - 1))
146 rcuwait_wake_up(&mm->vma_writer_wait);
147 }
148 }
149
150 /*
151 * Try to read-lock a vma. The function is allowed to occasionally yield false
152 * locked result to avoid performance overhead, in which case we fall back to
153 * using mmap_lock. The function should never yield false unlocked result.
154 * False locked result is possible if mm_lock_seq overflows or if vma gets
155 * reused and attached to a different mm before we lock it.
156 * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
157 * detached.
158 *
159 * WARNING! The vma passed to this function cannot be used if the function
160 * fails to lock it because in certain cases RCU lock is dropped and then
161 * reacquired. Once RCU lock is dropped the vma can be concurently freed.
162 */
vma_start_read(struct mm_struct * mm,struct vm_area_struct * vma)163 static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
164 struct vm_area_struct *vma)
165 {
166 int oldcnt;
167
168 /*
169 * Check before locking. A race might cause false locked result.
170 * We can use READ_ONCE() for the mm_lock_seq here, and don't need
171 * ACQUIRE semantics, because this is just a lockless check whose result
172 * we don't rely on for anything - the mm_lock_seq read against which we
173 * need ordering is below.
174 */
175 if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
176 return NULL;
177
178 /*
179 * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
180 * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
181 * Acquire fence is required here to avoid reordering against later
182 * vm_lock_seq check and checks inside lock_vma_under_rcu().
183 */
184 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
185 VMA_REF_LIMIT))) {
186 /* return EAGAIN if vma got detached from under us */
187 return oldcnt ? NULL : ERR_PTR(-EAGAIN);
188 }
189
190 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
191
192 /*
193 * If vma got attached to another mm from under us, that mm is not
194 * stable and can be freed in the narrow window after vma->vm_refcnt
195 * is dropped and before rcuwait_wake_up(mm) is called. Grab it before
196 * releasing vma->vm_refcnt.
197 */
198 if (unlikely(vma->vm_mm != mm)) {
199 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
200 struct mm_struct *other_mm = vma->vm_mm;
201
202 /*
203 * __mmdrop() is a heavy operation and we don't need RCU
204 * protection here. Release RCU lock during these operations.
205 * We reinstate the RCU read lock as the caller expects it to
206 * be held when this function returns even on error.
207 */
208 rcu_read_unlock();
209 mmgrab(other_mm);
210 vma_refcount_put(vma);
211 mmdrop(other_mm);
212 rcu_read_lock();
213 return NULL;
214 }
215
216 /*
217 * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
218 * False unlocked result is impossible because we modify and check
219 * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
220 * modification invalidates all existing locks.
221 *
222 * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
223 * racing with vma_end_write_all(), we only start reading from the VMA
224 * after it has been unlocked.
225 * This pairs with RELEASE semantics in vma_end_write_all().
226 */
227 if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
228 vma_refcount_put(vma);
229 return NULL;
230 }
231
232 return vma;
233 }
234
235 /*
236 * Use only while holding mmap read lock which guarantees that locking will not
237 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
238 * not be used in such cases because it might fail due to mm_lock_seq overflow.
239 * This functionality is used to obtain vma read lock and drop the mmap read lock.
240 */
vma_start_read_locked_nested(struct vm_area_struct * vma,int subclass)241 static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
242 {
243 int oldcnt;
244
245 mmap_assert_locked(vma->vm_mm);
246 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
247 VMA_REF_LIMIT)))
248 return false;
249
250 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
251 return true;
252 }
253
254 /*
255 * Use only while holding mmap read lock which guarantees that locking will not
256 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
257 * not be used in such cases because it might fail due to mm_lock_seq overflow.
258 * This functionality is used to obtain vma read lock and drop the mmap read lock.
259 */
vma_start_read_locked(struct vm_area_struct * vma)260 static inline bool vma_start_read_locked(struct vm_area_struct *vma)
261 {
262 return vma_start_read_locked_nested(vma, 0);
263 }
264
vma_end_read(struct vm_area_struct * vma)265 static inline void vma_end_read(struct vm_area_struct *vma)
266 {
267 vma_refcount_put(vma);
268 }
269
270 /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
__is_vma_write_locked(struct vm_area_struct * vma,unsigned int * mm_lock_seq)271 static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
272 {
273 mmap_assert_write_locked(vma->vm_mm);
274
275 /*
276 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
277 * mm->mm_lock_seq can't be concurrently modified.
278 */
279 *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
280 return (vma->vm_lock_seq == *mm_lock_seq);
281 }
282
283 void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
284
285 /*
286 * Begin writing to a VMA.
287 * Exclude concurrent readers under the per-VMA lock until the currently
288 * write-locked mmap_lock is dropped or downgraded.
289 */
vma_start_write(struct vm_area_struct * vma)290 static inline void vma_start_write(struct vm_area_struct *vma)
291 {
292 unsigned int mm_lock_seq;
293
294 if (__is_vma_write_locked(vma, &mm_lock_seq))
295 return;
296
297 __vma_start_write(vma, mm_lock_seq);
298 }
299
vma_assert_write_locked(struct vm_area_struct * vma)300 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
301 {
302 unsigned int mm_lock_seq;
303
304 VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
305 }
306
vma_assert_locked(struct vm_area_struct * vma)307 static inline void vma_assert_locked(struct vm_area_struct *vma)
308 {
309 unsigned int mm_lock_seq;
310
311 VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
312 !__is_vma_write_locked(vma, &mm_lock_seq), vma);
313 }
314
315 /*
316 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
317 * assertions should be made either under mmap_write_lock or when the object
318 * has been isolated under mmap_write_lock, ensuring no competing writers.
319 */
vma_assert_attached(struct vm_area_struct * vma)320 static inline void vma_assert_attached(struct vm_area_struct *vma)
321 {
322 WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
323 }
324
vma_assert_detached(struct vm_area_struct * vma)325 static inline void vma_assert_detached(struct vm_area_struct *vma)
326 {
327 WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
328 }
329
vma_mark_attached(struct vm_area_struct * vma)330 static inline void vma_mark_attached(struct vm_area_struct *vma)
331 {
332 vma_assert_write_locked(vma);
333 vma_assert_detached(vma);
334 refcount_set_release(&vma->vm_refcnt, 1);
335 }
336
337 void vma_mark_detached(struct vm_area_struct *vma);
338
339 struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
340 unsigned long address);
341
342 /*
343 * Locks next vma pointed by the iterator. Confirms the locked vma has not
344 * been modified and will retry under mmap_lock protection if modification
345 * was detected. Should be called from read RCU section.
346 * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the
347 * process was interrupted.
348 */
349 struct vm_area_struct *lock_next_vma(struct mm_struct *mm,
350 struct vma_iterator *iter,
351 unsigned long address);
352
353 #else /* CONFIG_PER_VMA_LOCK */
354
mm_lock_seqcount_init(struct mm_struct * mm)355 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
mm_lock_seqcount_begin(struct mm_struct * mm)356 static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
mm_lock_seqcount_end(struct mm_struct * mm)357 static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
358
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)359 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
360 {
361 return false;
362 }
363
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)364 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
365 {
366 return true;
367 }
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)368 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
vma_start_read(struct mm_struct * mm,struct vm_area_struct * vma)369 static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
370 struct vm_area_struct *vma)
371 { return NULL; }
vma_end_read(struct vm_area_struct * vma)372 static inline void vma_end_read(struct vm_area_struct *vma) {}
vma_start_write(struct vm_area_struct * vma)373 static inline void vma_start_write(struct vm_area_struct *vma) {}
vma_assert_write_locked(struct vm_area_struct * vma)374 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
375 { mmap_assert_write_locked(vma->vm_mm); }
vma_assert_attached(struct vm_area_struct * vma)376 static inline void vma_assert_attached(struct vm_area_struct *vma) {}
vma_assert_detached(struct vm_area_struct * vma)377 static inline void vma_assert_detached(struct vm_area_struct *vma) {}
vma_mark_attached(struct vm_area_struct * vma)378 static inline void vma_mark_attached(struct vm_area_struct *vma) {}
vma_mark_detached(struct vm_area_struct * vma)379 static inline void vma_mark_detached(struct vm_area_struct *vma) {}
380
lock_vma_under_rcu(struct mm_struct * mm,unsigned long address)381 static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
382 unsigned long address)
383 {
384 return NULL;
385 }
386
vma_assert_locked(struct vm_area_struct * vma)387 static inline void vma_assert_locked(struct vm_area_struct *vma)
388 {
389 mmap_assert_locked(vma->vm_mm);
390 }
391
392 #endif /* CONFIG_PER_VMA_LOCK */
393
mmap_write_lock(struct mm_struct * mm)394 static inline void mmap_write_lock(struct mm_struct *mm)
395 {
396 __mmap_lock_trace_start_locking(mm, true);
397 down_write(&mm->mmap_lock);
398 mm_lock_seqcount_begin(mm);
399 __mmap_lock_trace_acquire_returned(mm, true, true);
400 }
401
mmap_write_lock_nested(struct mm_struct * mm,int subclass)402 static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
403 {
404 __mmap_lock_trace_start_locking(mm, true);
405 down_write_nested(&mm->mmap_lock, subclass);
406 mm_lock_seqcount_begin(mm);
407 __mmap_lock_trace_acquire_returned(mm, true, true);
408 }
409
mmap_write_lock_killable(struct mm_struct * mm)410 static inline int mmap_write_lock_killable(struct mm_struct *mm)
411 {
412 int ret;
413
414 __mmap_lock_trace_start_locking(mm, true);
415 ret = down_write_killable(&mm->mmap_lock);
416 if (!ret)
417 mm_lock_seqcount_begin(mm);
418 __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
419 return ret;
420 }
421
422 /*
423 * Drop all currently-held per-VMA locks.
424 * This is called from the mmap_lock implementation directly before releasing
425 * a write-locked mmap_lock (or downgrading it to read-locked).
426 * This should normally NOT be called manually from other places.
427 * If you want to call this manually anyway, keep in mind that this will release
428 * *all* VMA write locks, including ones from further up the stack.
429 */
vma_end_write_all(struct mm_struct * mm)430 static inline void vma_end_write_all(struct mm_struct *mm)
431 {
432 mmap_assert_write_locked(mm);
433 mm_lock_seqcount_end(mm);
434 }
435
mmap_write_unlock(struct mm_struct * mm)436 static inline void mmap_write_unlock(struct mm_struct *mm)
437 {
438 __mmap_lock_trace_released(mm, true);
439 vma_end_write_all(mm);
440 up_write(&mm->mmap_lock);
441 }
442
mmap_write_downgrade(struct mm_struct * mm)443 static inline void mmap_write_downgrade(struct mm_struct *mm)
444 {
445 __mmap_lock_trace_acquire_returned(mm, false, true);
446 vma_end_write_all(mm);
447 downgrade_write(&mm->mmap_lock);
448 }
449
mmap_read_lock(struct mm_struct * mm)450 static inline void mmap_read_lock(struct mm_struct *mm)
451 {
452 __mmap_lock_trace_start_locking(mm, false);
453 down_read(&mm->mmap_lock);
454 __mmap_lock_trace_acquire_returned(mm, false, true);
455 }
456
mmap_read_lock_killable(struct mm_struct * mm)457 static inline int mmap_read_lock_killable(struct mm_struct *mm)
458 {
459 int ret;
460
461 __mmap_lock_trace_start_locking(mm, false);
462 ret = down_read_killable(&mm->mmap_lock);
463 __mmap_lock_trace_acquire_returned(mm, false, ret == 0);
464 return ret;
465 }
466
mmap_read_trylock(struct mm_struct * mm)467 static inline bool mmap_read_trylock(struct mm_struct *mm)
468 {
469 bool ret;
470
471 __mmap_lock_trace_start_locking(mm, false);
472 ret = down_read_trylock(&mm->mmap_lock) != 0;
473 __mmap_lock_trace_acquire_returned(mm, false, ret);
474 return ret;
475 }
476
mmap_read_unlock(struct mm_struct * mm)477 static inline void mmap_read_unlock(struct mm_struct *mm)
478 {
479 __mmap_lock_trace_released(mm, false);
480 up_read(&mm->mmap_lock);
481 }
482
DEFINE_GUARD(mmap_read_lock,struct mm_struct *,mmap_read_lock (_T),mmap_read_unlock (_T))483 DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
484 mmap_read_lock(_T), mmap_read_unlock(_T))
485
486 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
487 {
488 __mmap_lock_trace_released(mm, false);
489 up_read_non_owner(&mm->mmap_lock);
490 }
491
mmap_lock_is_contended(struct mm_struct * mm)492 static inline int mmap_lock_is_contended(struct mm_struct *mm)
493 {
494 return rwsem_is_contended(&mm->mmap_lock);
495 }
496
497 #endif /* _LINUX_MMAP_LOCK_H */
498