1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_MMAP_LOCK_H
3 #define _LINUX_MMAP_LOCK_H
4
5 /* Avoid a dependency loop by declaring here. */
6 extern int rcuwait_wake_up(struct rcuwait *w);
7
8 #include <linux/lockdep.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmdebug.h>
11 #include <linux/rwsem.h>
12 #include <linux/tracepoint-defs.h>
13 #include <linux/types.h>
14 #include <linux/cleanup.h>
15 #include <linux/sched/mm.h>
16
17 #define MMAP_LOCK_INITIALIZER(name) \
18 .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
19
20 DECLARE_TRACEPOINT(mmap_lock_start_locking);
21 DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
22 DECLARE_TRACEPOINT(mmap_lock_released);
23
24 #ifdef CONFIG_TRACING
25
26 void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
27 void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
28 bool success);
29 void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
30
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)31 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
32 bool write)
33 {
34 if (tracepoint_enabled(mmap_lock_start_locking))
35 __mmap_lock_do_trace_start_locking(mm, write);
36 }
37
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)38 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
39 bool write, bool success)
40 {
41 if (tracepoint_enabled(mmap_lock_acquire_returned))
42 __mmap_lock_do_trace_acquire_returned(mm, write, success);
43 }
44
__mmap_lock_trace_released(struct mm_struct * mm,bool write)45 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
46 {
47 if (tracepoint_enabled(mmap_lock_released))
48 __mmap_lock_do_trace_released(mm, write);
49 }
50
51 #else /* !CONFIG_TRACING */
52
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)53 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
54 bool write)
55 {
56 }
57
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)58 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
59 bool write, bool success)
60 {
61 }
62
__mmap_lock_trace_released(struct mm_struct * mm,bool write)63 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
64 {
65 }
66
67 #endif /* CONFIG_TRACING */
68
mmap_assert_locked(const struct mm_struct * mm)69 static inline void mmap_assert_locked(const struct mm_struct *mm)
70 {
71 rwsem_assert_held(&mm->mmap_lock);
72 }
73
mmap_assert_write_locked(const struct mm_struct * mm)74 static inline void mmap_assert_write_locked(const struct mm_struct *mm)
75 {
76 rwsem_assert_held_write(&mm->mmap_lock);
77 }
78
79 #ifdef CONFIG_PER_VMA_LOCK
80
mm_lock_seqcount_init(struct mm_struct * mm)81 static inline void mm_lock_seqcount_init(struct mm_struct *mm)
82 {
83 seqcount_init(&mm->mm_lock_seq);
84 }
85
mm_lock_seqcount_begin(struct mm_struct * mm)86 static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
87 {
88 do_raw_write_seqcount_begin(&mm->mm_lock_seq);
89 }
90
mm_lock_seqcount_end(struct mm_struct * mm)91 static inline void mm_lock_seqcount_end(struct mm_struct *mm)
92 {
93 ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
94 do_raw_write_seqcount_end(&mm->mm_lock_seq);
95 }
96
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)97 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
98 {
99 /*
100 * Since mmap_lock is a sleeping lock, and waiting for it to become
101 * unlocked is more or less equivalent with taking it ourselves, don't
102 * bother with the speculative path if mmap_lock is already write-locked
103 * and take the slow path, which takes the lock.
104 */
105 return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
106 }
107
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)108 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
109 {
110 return read_seqcount_retry(&mm->mm_lock_seq, seq);
111 }
112
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)113 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
114 {
115 #ifdef CONFIG_DEBUG_LOCK_ALLOC
116 static struct lock_class_key lockdep_key;
117
118 lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
119 #endif
120 if (reset_refcnt)
121 refcount_set(&vma->vm_refcnt, 0);
122 vma->vm_lock_seq = UINT_MAX;
123 }
124
is_vma_writer_only(int refcnt)125 static inline bool is_vma_writer_only(int refcnt)
126 {
127 /*
128 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
129 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
130 * a detached vma happens only in vma_mark_detached() and is a rare
131 * case, therefore most of the time there will be no unnecessary wakeup.
132 */
133 return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
134 }
135
vma_refcount_put(struct vm_area_struct * vma)136 static inline void vma_refcount_put(struct vm_area_struct *vma)
137 {
138 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
139 struct mm_struct *mm = vma->vm_mm;
140 int oldcnt;
141
142 rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
143 if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
144
145 if (is_vma_writer_only(oldcnt - 1))
146 rcuwait_wake_up(&mm->vma_writer_wait);
147 }
148 }
149
150 /*
151 * Use only while holding mmap read lock which guarantees that locking will not
152 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
153 * not be used in such cases because it might fail due to mm_lock_seq overflow.
154 * This functionality is used to obtain vma read lock and drop the mmap read lock.
155 */
vma_start_read_locked_nested(struct vm_area_struct * vma,int subclass)156 static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
157 {
158 int oldcnt;
159
160 mmap_assert_locked(vma->vm_mm);
161 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
162 VMA_REF_LIMIT)))
163 return false;
164
165 rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
166 return true;
167 }
168
169 /*
170 * Use only while holding mmap read lock which guarantees that locking will not
171 * fail (nobody can concurrently write-lock the vma). vma_start_read() should
172 * not be used in such cases because it might fail due to mm_lock_seq overflow.
173 * This functionality is used to obtain vma read lock and drop the mmap read lock.
174 */
vma_start_read_locked(struct vm_area_struct * vma)175 static inline bool vma_start_read_locked(struct vm_area_struct *vma)
176 {
177 return vma_start_read_locked_nested(vma, 0);
178 }
179
vma_end_read(struct vm_area_struct * vma)180 static inline void vma_end_read(struct vm_area_struct *vma)
181 {
182 vma_refcount_put(vma);
183 }
184
185 /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
__is_vma_write_locked(struct vm_area_struct * vma,unsigned int * mm_lock_seq)186 static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
187 {
188 mmap_assert_write_locked(vma->vm_mm);
189
190 /*
191 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
192 * mm->mm_lock_seq can't be concurrently modified.
193 */
194 *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
195 return (vma->vm_lock_seq == *mm_lock_seq);
196 }
197
198 void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
199
200 /*
201 * Begin writing to a VMA.
202 * Exclude concurrent readers under the per-VMA lock until the currently
203 * write-locked mmap_lock is dropped or downgraded.
204 */
vma_start_write(struct vm_area_struct * vma)205 static inline void vma_start_write(struct vm_area_struct *vma)
206 {
207 unsigned int mm_lock_seq;
208
209 if (__is_vma_write_locked(vma, &mm_lock_seq))
210 return;
211
212 __vma_start_write(vma, mm_lock_seq);
213 }
214
vma_assert_write_locked(struct vm_area_struct * vma)215 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
216 {
217 unsigned int mm_lock_seq;
218
219 VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
220 }
221
vma_assert_locked(struct vm_area_struct * vma)222 static inline void vma_assert_locked(struct vm_area_struct *vma)
223 {
224 unsigned int mm_lock_seq;
225
226 VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
227 !__is_vma_write_locked(vma, &mm_lock_seq), vma);
228 }
229
230 /*
231 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
232 * assertions should be made either under mmap_write_lock or when the object
233 * has been isolated under mmap_write_lock, ensuring no competing writers.
234 */
vma_assert_attached(struct vm_area_struct * vma)235 static inline void vma_assert_attached(struct vm_area_struct *vma)
236 {
237 WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
238 }
239
vma_assert_detached(struct vm_area_struct * vma)240 static inline void vma_assert_detached(struct vm_area_struct *vma)
241 {
242 WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
243 }
244
vma_mark_attached(struct vm_area_struct * vma)245 static inline void vma_mark_attached(struct vm_area_struct *vma)
246 {
247 vma_assert_write_locked(vma);
248 vma_assert_detached(vma);
249 refcount_set_release(&vma->vm_refcnt, 1);
250 }
251
252 void vma_mark_detached(struct vm_area_struct *vma);
253
254 struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
255 unsigned long address);
256
257 /*
258 * Locks next vma pointed by the iterator. Confirms the locked vma has not
259 * been modified and will retry under mmap_lock protection if modification
260 * was detected. Should be called from read RCU section.
261 * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the
262 * process was interrupted.
263 */
264 struct vm_area_struct *lock_next_vma(struct mm_struct *mm,
265 struct vma_iterator *iter,
266 unsigned long address);
267
268 #else /* CONFIG_PER_VMA_LOCK */
269
mm_lock_seqcount_init(struct mm_struct * mm)270 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
mm_lock_seqcount_begin(struct mm_struct * mm)271 static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
mm_lock_seqcount_end(struct mm_struct * mm)272 static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
273
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)274 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
275 {
276 return false;
277 }
278
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)279 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
280 {
281 return true;
282 }
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)283 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
vma_start_read(struct mm_struct * mm,struct vm_area_struct * vma)284 static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
285 struct vm_area_struct *vma)
286 { return NULL; }
vma_end_read(struct vm_area_struct * vma)287 static inline void vma_end_read(struct vm_area_struct *vma) {}
vma_start_write(struct vm_area_struct * vma)288 static inline void vma_start_write(struct vm_area_struct *vma) {}
vma_assert_write_locked(struct vm_area_struct * vma)289 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
290 { mmap_assert_write_locked(vma->vm_mm); }
vma_assert_attached(struct vm_area_struct * vma)291 static inline void vma_assert_attached(struct vm_area_struct *vma) {}
vma_assert_detached(struct vm_area_struct * vma)292 static inline void vma_assert_detached(struct vm_area_struct *vma) {}
vma_mark_attached(struct vm_area_struct * vma)293 static inline void vma_mark_attached(struct vm_area_struct *vma) {}
vma_mark_detached(struct vm_area_struct * vma)294 static inline void vma_mark_detached(struct vm_area_struct *vma) {}
295
lock_vma_under_rcu(struct mm_struct * mm,unsigned long address)296 static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
297 unsigned long address)
298 {
299 return NULL;
300 }
301
vma_assert_locked(struct vm_area_struct * vma)302 static inline void vma_assert_locked(struct vm_area_struct *vma)
303 {
304 mmap_assert_locked(vma->vm_mm);
305 }
306
307 #endif /* CONFIG_PER_VMA_LOCK */
308
mmap_write_lock(struct mm_struct * mm)309 static inline void mmap_write_lock(struct mm_struct *mm)
310 {
311 __mmap_lock_trace_start_locking(mm, true);
312 down_write(&mm->mmap_lock);
313 mm_lock_seqcount_begin(mm);
314 __mmap_lock_trace_acquire_returned(mm, true, true);
315 }
316
mmap_write_lock_nested(struct mm_struct * mm,int subclass)317 static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
318 {
319 __mmap_lock_trace_start_locking(mm, true);
320 down_write_nested(&mm->mmap_lock, subclass);
321 mm_lock_seqcount_begin(mm);
322 __mmap_lock_trace_acquire_returned(mm, true, true);
323 }
324
mmap_write_lock_killable(struct mm_struct * mm)325 static inline int mmap_write_lock_killable(struct mm_struct *mm)
326 {
327 int ret;
328
329 __mmap_lock_trace_start_locking(mm, true);
330 ret = down_write_killable(&mm->mmap_lock);
331 if (!ret)
332 mm_lock_seqcount_begin(mm);
333 __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
334 return ret;
335 }
336
337 /*
338 * Drop all currently-held per-VMA locks.
339 * This is called from the mmap_lock implementation directly before releasing
340 * a write-locked mmap_lock (or downgrading it to read-locked).
341 * This should normally NOT be called manually from other places.
342 * If you want to call this manually anyway, keep in mind that this will release
343 * *all* VMA write locks, including ones from further up the stack.
344 */
vma_end_write_all(struct mm_struct * mm)345 static inline void vma_end_write_all(struct mm_struct *mm)
346 {
347 mmap_assert_write_locked(mm);
348 mm_lock_seqcount_end(mm);
349 }
350
mmap_write_unlock(struct mm_struct * mm)351 static inline void mmap_write_unlock(struct mm_struct *mm)
352 {
353 __mmap_lock_trace_released(mm, true);
354 vma_end_write_all(mm);
355 up_write(&mm->mmap_lock);
356 }
357
mmap_write_downgrade(struct mm_struct * mm)358 static inline void mmap_write_downgrade(struct mm_struct *mm)
359 {
360 __mmap_lock_trace_acquire_returned(mm, false, true);
361 vma_end_write_all(mm);
362 downgrade_write(&mm->mmap_lock);
363 }
364
mmap_read_lock(struct mm_struct * mm)365 static inline void mmap_read_lock(struct mm_struct *mm)
366 {
367 __mmap_lock_trace_start_locking(mm, false);
368 down_read(&mm->mmap_lock);
369 __mmap_lock_trace_acquire_returned(mm, false, true);
370 }
371
mmap_read_lock_killable(struct mm_struct * mm)372 static inline int mmap_read_lock_killable(struct mm_struct *mm)
373 {
374 int ret;
375
376 __mmap_lock_trace_start_locking(mm, false);
377 ret = down_read_killable(&mm->mmap_lock);
378 __mmap_lock_trace_acquire_returned(mm, false, ret == 0);
379 return ret;
380 }
381
mmap_read_trylock(struct mm_struct * mm)382 static inline bool mmap_read_trylock(struct mm_struct *mm)
383 {
384 bool ret;
385
386 __mmap_lock_trace_start_locking(mm, false);
387 ret = down_read_trylock(&mm->mmap_lock) != 0;
388 __mmap_lock_trace_acquire_returned(mm, false, ret);
389 return ret;
390 }
391
mmap_read_unlock(struct mm_struct * mm)392 static inline void mmap_read_unlock(struct mm_struct *mm)
393 {
394 __mmap_lock_trace_released(mm, false);
395 up_read(&mm->mmap_lock);
396 }
397
DEFINE_GUARD(mmap_read_lock,struct mm_struct *,mmap_read_lock (_T),mmap_read_unlock (_T))398 DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
399 mmap_read_lock(_T), mmap_read_unlock(_T))
400
401 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
402 {
403 __mmap_lock_trace_released(mm, false);
404 up_read_non_owner(&mm->mmap_lock);
405 }
406
mmap_lock_is_contended(struct mm_struct * mm)407 static inline int mmap_lock_is_contended(struct mm_struct *mm)
408 {
409 return rwsem_is_contended(&mm->mmap_lock);
410 }
411
412 #endif /* _LINUX_MMAP_LOCK_H */
413