xref: /linux/include/linux/mmap_lock.h (revision 7203ca412fc8e8a0588e9adc0f777d3163f8dff3)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_MMAP_LOCK_H
3 #define _LINUX_MMAP_LOCK_H
4 
5 /* Avoid a dependency loop by declaring here. */
6 extern int rcuwait_wake_up(struct rcuwait *w);
7 
8 #include <linux/lockdep.h>
9 #include <linux/mm_types.h>
10 #include <linux/mmdebug.h>
11 #include <linux/rwsem.h>
12 #include <linux/tracepoint-defs.h>
13 #include <linux/types.h>
14 #include <linux/cleanup.h>
15 #include <linux/sched/mm.h>
16 
17 #define MMAP_LOCK_INITIALIZER(name) \
18 	.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
19 
20 DECLARE_TRACEPOINT(mmap_lock_start_locking);
21 DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
22 DECLARE_TRACEPOINT(mmap_lock_released);
23 
24 #ifdef CONFIG_TRACING
25 
26 void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
27 void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
28 					   bool success);
29 void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
30 
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)31 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
32 						   bool write)
33 {
34 	if (tracepoint_enabled(mmap_lock_start_locking))
35 		__mmap_lock_do_trace_start_locking(mm, write);
36 }
37 
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)38 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
39 						      bool write, bool success)
40 {
41 	if (tracepoint_enabled(mmap_lock_acquire_returned))
42 		__mmap_lock_do_trace_acquire_returned(mm, write, success);
43 }
44 
__mmap_lock_trace_released(struct mm_struct * mm,bool write)45 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
46 {
47 	if (tracepoint_enabled(mmap_lock_released))
48 		__mmap_lock_do_trace_released(mm, write);
49 }
50 
51 #else /* !CONFIG_TRACING */
52 
__mmap_lock_trace_start_locking(struct mm_struct * mm,bool write)53 static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
54 						   bool write)
55 {
56 }
57 
__mmap_lock_trace_acquire_returned(struct mm_struct * mm,bool write,bool success)58 static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
59 						      bool write, bool success)
60 {
61 }
62 
__mmap_lock_trace_released(struct mm_struct * mm,bool write)63 static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
64 {
65 }
66 
67 #endif /* CONFIG_TRACING */
68 
mmap_assert_locked(const struct mm_struct * mm)69 static inline void mmap_assert_locked(const struct mm_struct *mm)
70 {
71 	rwsem_assert_held(&mm->mmap_lock);
72 }
73 
mmap_assert_write_locked(const struct mm_struct * mm)74 static inline void mmap_assert_write_locked(const struct mm_struct *mm)
75 {
76 	rwsem_assert_held_write(&mm->mmap_lock);
77 }
78 
79 #ifdef CONFIG_PER_VMA_LOCK
80 
mm_lock_seqcount_init(struct mm_struct * mm)81 static inline void mm_lock_seqcount_init(struct mm_struct *mm)
82 {
83 	seqcount_init(&mm->mm_lock_seq);
84 }
85 
mm_lock_seqcount_begin(struct mm_struct * mm)86 static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
87 {
88 	do_raw_write_seqcount_begin(&mm->mm_lock_seq);
89 }
90 
mm_lock_seqcount_end(struct mm_struct * mm)91 static inline void mm_lock_seqcount_end(struct mm_struct *mm)
92 {
93 	ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
94 	do_raw_write_seqcount_end(&mm->mm_lock_seq);
95 }
96 
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)97 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
98 {
99 	/*
100 	 * Since mmap_lock is a sleeping lock, and waiting for it to become
101 	 * unlocked is more or less equivalent with taking it ourselves, don't
102 	 * bother with the speculative path if mmap_lock is already write-locked
103 	 * and take the slow path, which takes the lock.
104 	 */
105 	return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
106 }
107 
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)108 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
109 {
110 	return read_seqcount_retry(&mm->mm_lock_seq, seq);
111 }
112 
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)113 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
114 {
115 #ifdef CONFIG_DEBUG_LOCK_ALLOC
116 	static struct lock_class_key lockdep_key;
117 
118 	lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
119 #endif
120 	if (reset_refcnt)
121 		refcount_set(&vma->vm_refcnt, 0);
122 	vma->vm_lock_seq = UINT_MAX;
123 }
124 
is_vma_writer_only(int refcnt)125 static inline bool is_vma_writer_only(int refcnt)
126 {
127 	/*
128 	 * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
129 	 * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
130 	 * a detached vma happens only in vma_mark_detached() and is a rare
131 	 * case, therefore most of the time there will be no unnecessary wakeup.
132 	 */
133 	return (refcnt & VMA_LOCK_OFFSET) && refcnt <= VMA_LOCK_OFFSET + 1;
134 }
135 
vma_refcount_put(struct vm_area_struct * vma)136 static inline void vma_refcount_put(struct vm_area_struct *vma)
137 {
138 	/* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
139 	struct mm_struct *mm = vma->vm_mm;
140 	int oldcnt;
141 
142 	rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
143 	if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
144 
145 		if (is_vma_writer_only(oldcnt - 1))
146 			rcuwait_wake_up(&mm->vma_writer_wait);
147 	}
148 }
149 
150 /*
151  * Use only while holding mmap read lock which guarantees that locking will not
152  * fail (nobody can concurrently write-lock the vma). vma_start_read() should
153  * not be used in such cases because it might fail due to mm_lock_seq overflow.
154  * This functionality is used to obtain vma read lock and drop the mmap read lock.
155  */
vma_start_read_locked_nested(struct vm_area_struct * vma,int subclass)156 static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
157 {
158 	int oldcnt;
159 
160 	mmap_assert_locked(vma->vm_mm);
161 	if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
162 							      VMA_REF_LIMIT)))
163 		return false;
164 
165 	rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
166 	return true;
167 }
168 
169 /*
170  * Use only while holding mmap read lock which guarantees that locking will not
171  * fail (nobody can concurrently write-lock the vma). vma_start_read() should
172  * not be used in such cases because it might fail due to mm_lock_seq overflow.
173  * This functionality is used to obtain vma read lock and drop the mmap read lock.
174  */
vma_start_read_locked(struct vm_area_struct * vma)175 static inline bool vma_start_read_locked(struct vm_area_struct *vma)
176 {
177 	return vma_start_read_locked_nested(vma, 0);
178 }
179 
vma_end_read(struct vm_area_struct * vma)180 static inline void vma_end_read(struct vm_area_struct *vma)
181 {
182 	vma_refcount_put(vma);
183 }
184 
185 /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
__is_vma_write_locked(struct vm_area_struct * vma,unsigned int * mm_lock_seq)186 static inline bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
187 {
188 	mmap_assert_write_locked(vma->vm_mm);
189 
190 	/*
191 	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
192 	 * mm->mm_lock_seq can't be concurrently modified.
193 	 */
194 	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
195 	return (vma->vm_lock_seq == *mm_lock_seq);
196 }
197 
198 int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
199 		int state);
200 
201 /*
202  * Begin writing to a VMA.
203  * Exclude concurrent readers under the per-VMA lock until the currently
204  * write-locked mmap_lock is dropped or downgraded.
205  */
vma_start_write(struct vm_area_struct * vma)206 static inline void vma_start_write(struct vm_area_struct *vma)
207 {
208 	unsigned int mm_lock_seq;
209 
210 	if (__is_vma_write_locked(vma, &mm_lock_seq))
211 		return;
212 
213 	__vma_start_write(vma, mm_lock_seq, TASK_UNINTERRUPTIBLE);
214 }
215 
216 /**
217  * vma_start_write_killable - Begin writing to a VMA.
218  * @vma: The VMA we are going to modify.
219  *
220  * Exclude concurrent readers under the per-VMA lock until the currently
221  * write-locked mmap_lock is dropped or downgraded.
222  *
223  * Context: May sleep while waiting for readers to drop the vma read lock.
224  * Caller must already hold the mmap_lock for write.
225  *
226  * Return: 0 for a successful acquisition.  -EINTR if a fatal signal was
227  * received.
228  */
229 static inline __must_check
vma_start_write_killable(struct vm_area_struct * vma)230 int vma_start_write_killable(struct vm_area_struct *vma)
231 {
232 	unsigned int mm_lock_seq;
233 
234 	if (__is_vma_write_locked(vma, &mm_lock_seq))
235 		return 0;
236 	return __vma_start_write(vma, mm_lock_seq, TASK_KILLABLE);
237 }
238 
vma_assert_write_locked(struct vm_area_struct * vma)239 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
240 {
241 	unsigned int mm_lock_seq;
242 
243 	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
244 }
245 
vma_assert_locked(struct vm_area_struct * vma)246 static inline void vma_assert_locked(struct vm_area_struct *vma)
247 {
248 	unsigned int mm_lock_seq;
249 
250 	VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
251 		      !__is_vma_write_locked(vma, &mm_lock_seq), vma);
252 }
253 
254 /*
255  * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
256  * assertions should be made either under mmap_write_lock or when the object
257  * has been isolated under mmap_write_lock, ensuring no competing writers.
258  */
vma_assert_attached(struct vm_area_struct * vma)259 static inline void vma_assert_attached(struct vm_area_struct *vma)
260 {
261 	WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
262 }
263 
vma_assert_detached(struct vm_area_struct * vma)264 static inline void vma_assert_detached(struct vm_area_struct *vma)
265 {
266 	WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
267 }
268 
vma_mark_attached(struct vm_area_struct * vma)269 static inline void vma_mark_attached(struct vm_area_struct *vma)
270 {
271 	vma_assert_write_locked(vma);
272 	vma_assert_detached(vma);
273 	refcount_set_release(&vma->vm_refcnt, 1);
274 }
275 
276 void vma_mark_detached(struct vm_area_struct *vma);
277 
278 struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
279 					  unsigned long address);
280 
281 /*
282  * Locks next vma pointed by the iterator. Confirms the locked vma has not
283  * been modified and will retry under mmap_lock protection if modification
284  * was detected. Should be called from read RCU section.
285  * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the
286  * process was interrupted.
287  */
288 struct vm_area_struct *lock_next_vma(struct mm_struct *mm,
289 				     struct vma_iterator *iter,
290 				     unsigned long address);
291 
292 #else /* CONFIG_PER_VMA_LOCK */
293 
mm_lock_seqcount_init(struct mm_struct * mm)294 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
mm_lock_seqcount_begin(struct mm_struct * mm)295 static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
mm_lock_seqcount_end(struct mm_struct * mm)296 static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
297 
mmap_lock_speculate_try_begin(struct mm_struct * mm,unsigned int * seq)298 static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
299 {
300 	return false;
301 }
302 
mmap_lock_speculate_retry(struct mm_struct * mm,unsigned int seq)303 static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
304 {
305 	return true;
306 }
vma_lock_init(struct vm_area_struct * vma,bool reset_refcnt)307 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
vma_end_read(struct vm_area_struct * vma)308 static inline void vma_end_read(struct vm_area_struct *vma) {}
vma_start_write(struct vm_area_struct * vma)309 static inline void vma_start_write(struct vm_area_struct *vma) {}
310 static inline __must_check
vma_start_write_killable(struct vm_area_struct * vma)311 int vma_start_write_killable(struct vm_area_struct *vma) { return 0; }
vma_assert_write_locked(struct vm_area_struct * vma)312 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
313 		{ mmap_assert_write_locked(vma->vm_mm); }
vma_assert_attached(struct vm_area_struct * vma)314 static inline void vma_assert_attached(struct vm_area_struct *vma) {}
vma_assert_detached(struct vm_area_struct * vma)315 static inline void vma_assert_detached(struct vm_area_struct *vma) {}
vma_mark_attached(struct vm_area_struct * vma)316 static inline void vma_mark_attached(struct vm_area_struct *vma) {}
vma_mark_detached(struct vm_area_struct * vma)317 static inline void vma_mark_detached(struct vm_area_struct *vma) {}
318 
lock_vma_under_rcu(struct mm_struct * mm,unsigned long address)319 static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
320 		unsigned long address)
321 {
322 	return NULL;
323 }
324 
vma_assert_locked(struct vm_area_struct * vma)325 static inline void vma_assert_locked(struct vm_area_struct *vma)
326 {
327 	mmap_assert_locked(vma->vm_mm);
328 }
329 
330 #endif /* CONFIG_PER_VMA_LOCK */
331 
mmap_write_lock(struct mm_struct * mm)332 static inline void mmap_write_lock(struct mm_struct *mm)
333 {
334 	__mmap_lock_trace_start_locking(mm, true);
335 	down_write(&mm->mmap_lock);
336 	mm_lock_seqcount_begin(mm);
337 	__mmap_lock_trace_acquire_returned(mm, true, true);
338 }
339 
mmap_write_lock_nested(struct mm_struct * mm,int subclass)340 static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
341 {
342 	__mmap_lock_trace_start_locking(mm, true);
343 	down_write_nested(&mm->mmap_lock, subclass);
344 	mm_lock_seqcount_begin(mm);
345 	__mmap_lock_trace_acquire_returned(mm, true, true);
346 }
347 
mmap_write_lock_killable(struct mm_struct * mm)348 static inline int mmap_write_lock_killable(struct mm_struct *mm)
349 {
350 	int ret;
351 
352 	__mmap_lock_trace_start_locking(mm, true);
353 	ret = down_write_killable(&mm->mmap_lock);
354 	if (!ret)
355 		mm_lock_seqcount_begin(mm);
356 	__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
357 	return ret;
358 }
359 
360 /*
361  * Drop all currently-held per-VMA locks.
362  * This is called from the mmap_lock implementation directly before releasing
363  * a write-locked mmap_lock (or downgrading it to read-locked).
364  * This should normally NOT be called manually from other places.
365  * If you want to call this manually anyway, keep in mind that this will release
366  * *all* VMA write locks, including ones from further up the stack.
367  */
vma_end_write_all(struct mm_struct * mm)368 static inline void vma_end_write_all(struct mm_struct *mm)
369 {
370 	mmap_assert_write_locked(mm);
371 	mm_lock_seqcount_end(mm);
372 }
373 
mmap_write_unlock(struct mm_struct * mm)374 static inline void mmap_write_unlock(struct mm_struct *mm)
375 {
376 	__mmap_lock_trace_released(mm, true);
377 	vma_end_write_all(mm);
378 	up_write(&mm->mmap_lock);
379 }
380 
mmap_write_downgrade(struct mm_struct * mm)381 static inline void mmap_write_downgrade(struct mm_struct *mm)
382 {
383 	__mmap_lock_trace_acquire_returned(mm, false, true);
384 	vma_end_write_all(mm);
385 	downgrade_write(&mm->mmap_lock);
386 }
387 
mmap_read_lock(struct mm_struct * mm)388 static inline void mmap_read_lock(struct mm_struct *mm)
389 {
390 	__mmap_lock_trace_start_locking(mm, false);
391 	down_read(&mm->mmap_lock);
392 	__mmap_lock_trace_acquire_returned(mm, false, true);
393 }
394 
mmap_read_lock_killable(struct mm_struct * mm)395 static inline int mmap_read_lock_killable(struct mm_struct *mm)
396 {
397 	int ret;
398 
399 	__mmap_lock_trace_start_locking(mm, false);
400 	ret = down_read_killable(&mm->mmap_lock);
401 	__mmap_lock_trace_acquire_returned(mm, false, ret == 0);
402 	return ret;
403 }
404 
mmap_read_trylock(struct mm_struct * mm)405 static inline bool mmap_read_trylock(struct mm_struct *mm)
406 {
407 	bool ret;
408 
409 	__mmap_lock_trace_start_locking(mm, false);
410 	ret = down_read_trylock(&mm->mmap_lock) != 0;
411 	__mmap_lock_trace_acquire_returned(mm, false, ret);
412 	return ret;
413 }
414 
mmap_read_unlock(struct mm_struct * mm)415 static inline void mmap_read_unlock(struct mm_struct *mm)
416 {
417 	__mmap_lock_trace_released(mm, false);
418 	up_read(&mm->mmap_lock);
419 }
420 
DEFINE_GUARD(mmap_read_lock,struct mm_struct *,mmap_read_lock (_T),mmap_read_unlock (_T))421 DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
422 	     mmap_read_lock(_T), mmap_read_unlock(_T))
423 
424 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
425 {
426 	__mmap_lock_trace_released(mm, false);
427 	up_read_non_owner(&mm->mmap_lock);
428 }
429 
mmap_lock_is_contended(struct mm_struct * mm)430 static inline int mmap_lock_is_contended(struct mm_struct *mm)
431 {
432 	return rwsem_is_contended(&mm->mmap_lock);
433 }
434 
435 #endif /* _LINUX_MMAP_LOCK_H */
436