xref: /linux/tools/testing/vma/include/dup.h (revision eeccf287a2a517954b57cf9d733b3cf5d47afa34)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 
3 #pragma once
4 
5 /* Forward declarations to avoid header cycle. */
6 struct vm_area_struct;
7 static inline void vma_start_write(struct vm_area_struct *vma);
8 
9 extern const struct vm_operations_struct vma_dummy_vm_ops;
10 extern unsigned long stack_guard_gap;
11 extern const struct vm_operations_struct vma_dummy_vm_ops;
12 extern unsigned long rlimit(unsigned int limit);
13 struct task_struct *get_current(void);
14 
15 #define MMF_HAS_MDWE	28
16 #define current get_current()
17 
18 /*
19  * Define the task command name length as enum, then it can be visible to
20  * BPF programs.
21  */
22 enum {
23 	TASK_COMM_LEN = 16,
24 };
25 
26 /* PARTIALLY implemented types. */
27 struct mm_struct {
28 	struct maple_tree mm_mt;
29 	int map_count;			/* number of VMAs */
30 	unsigned long total_vm;	   /* Total pages mapped */
31 	unsigned long locked_vm;   /* Pages that have PG_mlocked set */
32 	unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
33 	unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
34 	unsigned long stack_vm;	   /* VM_STACK */
35 
36 	unsigned long def_flags;
37 
38 	mm_flags_t flags; /* Must use mm_flags_* helpers to access */
39 };
40 struct address_space {
41 	struct rb_root_cached	i_mmap;
42 	unsigned long		flags;
43 	atomic_t		i_mmap_writable;
44 };
45 struct file_operations {
46 	int (*mmap)(struct file *, struct vm_area_struct *);
47 	int (*mmap_prepare)(struct vm_area_desc *);
48 };
49 struct file {
50 	struct address_space	*f_mapping;
51 	const struct file_operations	*f_op;
52 };
53 struct anon_vma_chain {
54 	struct anon_vma *anon_vma;
55 	struct list_head same_vma;
56 };
57 struct task_struct {
58 	char comm[TASK_COMM_LEN];
59 	pid_t pid;
60 	struct mm_struct *mm;
61 
62 	/* Used for emulating ABI behavior of previous Linux versions: */
63 	unsigned int			personality;
64 };
65 
66 struct kref {
67 	refcount_t refcount;
68 };
69 
70 struct anon_vma_name {
71 	struct kref kref;
72 	/* The name needs to be at the end because it is dynamically sized. */
73 	char name[];
74 };
75 
76 /*
77  * Contains declarations that are DUPLICATED from kernel source in order to
78  * faciliate userland VMA testing.
79  *
80  * These must be kept in sync with kernel source.
81  */
82 
83 #define VMA_LOCK_OFFSET	0x40000000
84 
85 typedef struct { unsigned long v; } freeptr_t;
86 
87 #define VM_NONE		0x00000000
88 
89 typedef int __bitwise vma_flag_t;
90 
91 #define ACCESS_PRIVATE(p, member) ((p)->member)
92 
93 #define DECLARE_VMA_BIT(name, bitnum) \
94 	VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
95 #define DECLARE_VMA_BIT_ALIAS(name, aliased) \
96 	VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
97 enum {
98 	DECLARE_VMA_BIT(READ, 0),
99 	DECLARE_VMA_BIT(WRITE, 1),
100 	DECLARE_VMA_BIT(EXEC, 2),
101 	DECLARE_VMA_BIT(SHARED, 3),
102 	/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
103 	DECLARE_VMA_BIT(MAYREAD, 4),	/* limits for mprotect() etc. */
104 	DECLARE_VMA_BIT(MAYWRITE, 5),
105 	DECLARE_VMA_BIT(MAYEXEC, 6),
106 	DECLARE_VMA_BIT(MAYSHARE, 7),
107 	DECLARE_VMA_BIT(GROWSDOWN, 8),	/* general info on the segment */
108 #ifdef CONFIG_MMU
109 	DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
110 #else
111 	/* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
112 	DECLARE_VMA_BIT(MAYOVERLAY, 9),
113 #endif /* CONFIG_MMU */
114 	/* Page-ranges managed without "struct page", just pure PFN */
115 	DECLARE_VMA_BIT(PFNMAP, 10),
116 	DECLARE_VMA_BIT(MAYBE_GUARD, 11),
117 	DECLARE_VMA_BIT(UFFD_WP, 12),	/* wrprotect pages tracking */
118 	DECLARE_VMA_BIT(LOCKED, 13),
119 	DECLARE_VMA_BIT(IO, 14),	/* Memory mapped I/O or similar */
120 	DECLARE_VMA_BIT(SEQ_READ, 15),	/* App will access data sequentially */
121 	DECLARE_VMA_BIT(RAND_READ, 16),	/* App will not benefit from clustered reads */
122 	DECLARE_VMA_BIT(DONTCOPY, 17),	/* Do not copy this vma on fork */
123 	DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
124 	DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
125 	DECLARE_VMA_BIT(ACCOUNT, 20),	/* Is a VM accounted object */
126 	DECLARE_VMA_BIT(NORESERVE, 21),	/* should the VM suppress accounting */
127 	DECLARE_VMA_BIT(HUGETLB, 22),	/* Huge TLB Page VM */
128 	DECLARE_VMA_BIT(SYNC, 23),	/* Synchronous page faults */
129 	DECLARE_VMA_BIT(ARCH_1, 24),	/* Architecture-specific flag */
130 	DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
131 	DECLARE_VMA_BIT(DONTDUMP, 26),	/* Do not include in the core dump */
132 	DECLARE_VMA_BIT(SOFTDIRTY, 27),	/* NOT soft dirty clean area */
133 	DECLARE_VMA_BIT(MIXEDMAP, 28),	/* Can contain struct page and pure PFN pages */
134 	DECLARE_VMA_BIT(HUGEPAGE, 29),	/* MADV_HUGEPAGE marked this vma */
135 	DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
136 	DECLARE_VMA_BIT(MERGEABLE, 31),	/* KSM may merge identical pages */
137 	/* These bits are reused, we define specific uses below. */
138 	DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
139 	DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
140 	DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
141 	DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
142 	DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
143 	DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
144 	DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
145 	/*
146 	 * This flag is used to connect VFIO to arch specific KVM code. It
147 	 * indicates that the memory under this VMA is safe for use with any
148 	 * non-cachable memory type inside KVM. Some VFIO devices, on some
149 	 * platforms, are thought to be unsafe and can cause machine crashes
150 	 * if KVM does not lock down the memory type.
151 	 */
152 	DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
153 #ifdef CONFIG_PPC32
154 	DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
155 #else
156 	DECLARE_VMA_BIT(DROPPABLE, 40),
157 #endif
158 	DECLARE_VMA_BIT(UFFD_MINOR, 41),
159 	DECLARE_VMA_BIT(SEALED, 42),
160 	/* Flags that reuse flags above. */
161 	DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
162 	DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
163 	DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
164 	DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
165 	DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
166 #if defined(CONFIG_X86_USER_SHADOW_STACK)
167 	/*
168 	 * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
169 	 * support core mm.
170 	 *
171 	 * These VMAs will get a single end guard page. This helps userspace
172 	 * protect itself from attacks. A single page is enough for current
173 	 * shadow stack archs (x86). See the comments near alloc_shstk() in
174 	 * arch/x86/kernel/shstk.c for more details on the guard size.
175 	 */
176 	DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
177 #elif defined(CONFIG_ARM64_GCS)
178 	/*
179 	 * arm64's Guarded Control Stack implements similar functionality and
180 	 * has similar constraints to shadow stacks.
181 	 */
182 	DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
183 #endif
184 	DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1),		/* Strong Access Ordering (powerpc) */
185 	DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1),		/* parisc */
186 	DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1),	/* sparc64 */
187 	DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1),	/* arm64 */
188 	DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1),	/* sparc64, arm64 */
189 	DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1),	/* !CONFIG_MMU */
190 	DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4),	/* arm64 */
191 	DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
192 #ifdef CONFIG_STACK_GROWSUP
193 	DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
194 	DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
195 #else
196 	DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
197 #endif
198 };
199 
200 #define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
201 #define VM_READ		INIT_VM_FLAG(READ)
202 #define VM_WRITE	INIT_VM_FLAG(WRITE)
203 #define VM_EXEC		INIT_VM_FLAG(EXEC)
204 #define VM_SHARED	INIT_VM_FLAG(SHARED)
205 #define VM_MAYREAD	INIT_VM_FLAG(MAYREAD)
206 #define VM_MAYWRITE	INIT_VM_FLAG(MAYWRITE)
207 #define VM_MAYEXEC	INIT_VM_FLAG(MAYEXEC)
208 #define VM_MAYSHARE	INIT_VM_FLAG(MAYSHARE)
209 #define VM_GROWSDOWN	INIT_VM_FLAG(GROWSDOWN)
210 #ifdef CONFIG_MMU
211 #define VM_UFFD_MISSING	INIT_VM_FLAG(UFFD_MISSING)
212 #else
213 #define VM_UFFD_MISSING	VM_NONE
214 #define VM_MAYOVERLAY	INIT_VM_FLAG(MAYOVERLAY)
215 #endif
216 #define VM_PFNMAP	INIT_VM_FLAG(PFNMAP)
217 #define VM_MAYBE_GUARD	INIT_VM_FLAG(MAYBE_GUARD)
218 #define VM_UFFD_WP	INIT_VM_FLAG(UFFD_WP)
219 #define VM_LOCKED	INIT_VM_FLAG(LOCKED)
220 #define VM_IO		INIT_VM_FLAG(IO)
221 #define VM_SEQ_READ	INIT_VM_FLAG(SEQ_READ)
222 #define VM_RAND_READ	INIT_VM_FLAG(RAND_READ)
223 #define VM_DONTCOPY	INIT_VM_FLAG(DONTCOPY)
224 #define VM_DONTEXPAND	INIT_VM_FLAG(DONTEXPAND)
225 #define VM_LOCKONFAULT	INIT_VM_FLAG(LOCKONFAULT)
226 #define VM_ACCOUNT	INIT_VM_FLAG(ACCOUNT)
227 #define VM_NORESERVE	INIT_VM_FLAG(NORESERVE)
228 #define VM_HUGETLB	INIT_VM_FLAG(HUGETLB)
229 #define VM_SYNC		INIT_VM_FLAG(SYNC)
230 #define VM_ARCH_1	INIT_VM_FLAG(ARCH_1)
231 #define VM_WIPEONFORK	INIT_VM_FLAG(WIPEONFORK)
232 #define VM_DONTDUMP	INIT_VM_FLAG(DONTDUMP)
233 #ifdef CONFIG_MEM_SOFT_DIRTY
234 #define VM_SOFTDIRTY	INIT_VM_FLAG(SOFTDIRTY)
235 #else
236 #define VM_SOFTDIRTY	VM_NONE
237 #endif
238 #define VM_MIXEDMAP	INIT_VM_FLAG(MIXEDMAP)
239 #define VM_HUGEPAGE	INIT_VM_FLAG(HUGEPAGE)
240 #define VM_NOHUGEPAGE	INIT_VM_FLAG(NOHUGEPAGE)
241 #define VM_MERGEABLE	INIT_VM_FLAG(MERGEABLE)
242 #define VM_STACK	INIT_VM_FLAG(STACK)
243 #ifdef CONFIG_STACK_GROWS_UP
244 #define VM_STACK_EARLY	INIT_VM_FLAG(STACK_EARLY)
245 #else
246 #define VM_STACK_EARLY	VM_NONE
247 #endif
248 #ifdef CONFIG_ARCH_HAS_PKEYS
249 #define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
250 /* Despite the naming, these are FLAGS not bits. */
251 #define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
252 #define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
253 #define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
254 #if CONFIG_ARCH_PKEY_BITS > 3
255 #define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
256 #else
257 #define VM_PKEY_BIT3  VM_NONE
258 #endif /* CONFIG_ARCH_PKEY_BITS > 3 */
259 #if CONFIG_ARCH_PKEY_BITS > 4
260 #define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
261 #else
262 #define VM_PKEY_BIT4  VM_NONE
263 #endif /* CONFIG_ARCH_PKEY_BITS > 4 */
264 #endif /* CONFIG_ARCH_HAS_PKEYS */
265 #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
266 #define VM_SHADOW_STACK	INIT_VM_FLAG(SHADOW_STACK)
267 #else
268 #define VM_SHADOW_STACK	VM_NONE
269 #endif
270 #if defined(CONFIG_PPC64)
271 #define VM_SAO		INIT_VM_FLAG(SAO)
272 #elif defined(CONFIG_PARISC)
273 #define VM_GROWSUP	INIT_VM_FLAG(GROWSUP)
274 #elif defined(CONFIG_SPARC64)
275 #define VM_SPARC_ADI	INIT_VM_FLAG(SPARC_ADI)
276 #define VM_ARCH_CLEAR	INIT_VM_FLAG(ARCH_CLEAR)
277 #elif defined(CONFIG_ARM64)
278 #define VM_ARM64_BTI	INIT_VM_FLAG(ARM64_BTI)
279 #define VM_ARCH_CLEAR	INIT_VM_FLAG(ARCH_CLEAR)
280 #elif !defined(CONFIG_MMU)
281 #define VM_MAPPED_COPY	INIT_VM_FLAG(MAPPED_COPY)
282 #endif
283 #ifndef VM_GROWSUP
284 #define VM_GROWSUP	VM_NONE
285 #endif
286 #ifdef CONFIG_ARM64_MTE
287 #define VM_MTE		INIT_VM_FLAG(MTE)
288 #define VM_MTE_ALLOWED	INIT_VM_FLAG(MTE_ALLOWED)
289 #else
290 #define VM_MTE		VM_NONE
291 #define VM_MTE_ALLOWED	VM_NONE
292 #endif
293 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
294 #define VM_UFFD_MINOR	INIT_VM_FLAG(UFFD_MINOR)
295 #else
296 #define VM_UFFD_MINOR	VM_NONE
297 #endif
298 #ifdef CONFIG_64BIT
299 #define VM_ALLOW_ANY_UNCACHED	INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
300 #define VM_SEALED		INIT_VM_FLAG(SEALED)
301 #else
302 #define VM_ALLOW_ANY_UNCACHED	VM_NONE
303 #define VM_SEALED		VM_NONE
304 #endif
305 #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
306 #define VM_DROPPABLE		INIT_VM_FLAG(DROPPABLE)
307 #else
308 #define VM_DROPPABLE		VM_NONE
309 #endif
310 
311 /* Bits set in the VMA until the stack is in its final location */
312 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
313 
314 #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
315 
316 /* Common data flag combinations */
317 #define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
318 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
319 #define VM_DATA_FLAGS_NON_EXEC	(VM_READ | VM_WRITE | VM_MAYREAD | \
320 				 VM_MAYWRITE | VM_MAYEXEC)
321 #define VM_DATA_FLAGS_EXEC	(VM_READ | VM_WRITE | VM_EXEC | \
322 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
323 
324 #ifndef VM_DATA_DEFAULT_FLAGS		/* arch can override this */
325 #define VM_DATA_DEFAULT_FLAGS  VM_DATA_FLAGS_EXEC
326 #endif
327 
328 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
329 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
330 #endif
331 
332 #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
333 
334 #define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
335 
336 /* VMA basic access permission flags */
337 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
338 
339 /*
340  * Special vmas that are non-mergable, non-mlock()able.
341  */
342 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
343 
344 #define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
345 #define TASK_SIZE_LOW		DEFAULT_MAP_WINDOW
346 #define TASK_SIZE_MAX		DEFAULT_MAP_WINDOW
347 #define STACK_TOP		TASK_SIZE_LOW
348 #define STACK_TOP_MAX		TASK_SIZE_MAX
349 
350 /* This mask represents all the VMA flag bits used by mlock */
351 #define VM_LOCKED_MASK	(VM_LOCKED | VM_LOCKONFAULT)
352 
353 #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
354 
355 #define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
356 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
357 
358 #define RLIMIT_STACK		3	/* max stack size */
359 #define RLIMIT_MEMLOCK		8	/* max locked-in-memory address space */
360 
361 #define CAP_IPC_LOCK         14
362 
363 #define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
364 
365 #define VM_IGNORE_MERGE VM_STICKY
366 
367 #define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
368 
369 #define pgprot_val(x)		((x).pgprot)
370 #define __pgprot(x)		((pgprot_t) { (x) } )
371 
372 #define for_each_vma(__vmi, __vma)					\
373 	while (((__vma) = vma_next(&(__vmi))) != NULL)
374 
375 /* The MM code likes to work with exclusive end addresses */
376 #define for_each_vma_range(__vmi, __vma, __end)				\
377 	while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
378 
379 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
380 
381 #define PHYS_PFN(x)	((unsigned long)((x) >> PAGE_SHIFT))
382 
383 #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
384 #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
385 
386 #define AS_MM_ALL_LOCKS 2
387 
388 #define swap(a, b) \
389 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
390 
391 /*
392  * Flags for bug emulation.
393  *
394  * These occupy the top three bytes.
395  */
396 enum {
397 	READ_IMPLIES_EXEC =	0x0400000,
398 };
399 
400 struct vma_iterator {
401 	struct ma_state mas;
402 };
403 
404 #define VMA_ITERATOR(name, __mm, __addr)				\
405 	struct vma_iterator name = {					\
406 		.mas = {						\
407 			.tree = &(__mm)->mm_mt,				\
408 			.index = __addr,				\
409 			.node = NULL,					\
410 			.status = ma_start,				\
411 		},							\
412 	}
413 
414 #define DEFINE_MUTEX(mutexname) \
415 	struct mutex mutexname = {}
416 
417 #define DECLARE_BITMAP(name, bits) \
418 	unsigned long name[BITS_TO_LONGS(bits)]
419 
420 #define EMPTY_VMA_FLAGS ((vma_flags_t){ })
421 
422 /* What action should be taken after an .mmap_prepare call is complete? */
423 enum mmap_action_type {
424 	MMAP_NOTHING,		/* Mapping is complete, no further action. */
425 	MMAP_REMAP_PFN,		/* Remap PFN range. */
426 	MMAP_IO_REMAP_PFN,	/* I/O remap PFN range. */
427 };
428 
429 /*
430  * Describes an action an mmap_prepare hook can instruct to be taken to complete
431  * the mapping of a VMA. Specified in vm_area_desc.
432  */
433 struct mmap_action {
434 	union {
435 		/* Remap range. */
436 		struct {
437 			unsigned long start;
438 			unsigned long start_pfn;
439 			unsigned long size;
440 			pgprot_t pgprot;
441 		} remap;
442 	};
443 	enum mmap_action_type type;
444 
445 	/*
446 	 * If specified, this hook is invoked after the selected action has been
447 	 * successfully completed. Note that the VMA write lock still held.
448 	 *
449 	 * The absolute minimum ought to be done here.
450 	 *
451 	 * Returns 0 on success, or an error code.
452 	 */
453 	int (*success_hook)(const struct vm_area_struct *vma);
454 
455 	/*
456 	 * If specified, this hook is invoked when an error occurred when
457 	 * attempting the selection action.
458 	 *
459 	 * The hook can return an error code in order to filter the error, but
460 	 * it is not valid to clear the error here.
461 	 */
462 	int (*error_hook)(int err);
463 
464 	/*
465 	 * This should be set in rare instances where the operation required
466 	 * that the rmap should not be able to access the VMA until
467 	 * completely set up.
468 	 */
469 	bool hide_from_rmap_until_complete :1;
470 };
471 
472 /* Operations which modify VMAs. */
473 enum vma_operation {
474 	VMA_OP_SPLIT,
475 	VMA_OP_MERGE_UNFAULTED,
476 	VMA_OP_REMAP,
477 	VMA_OP_FORK,
478 };
479 
480 /*
481  * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
482  * manipulate mutable fields which will cause those fields to be updated in the
483  * resultant VMA.
484  *
485  * Helper functions are not required for manipulating any field.
486  */
487 struct vm_area_desc {
488 	/* Immutable state. */
489 	const struct mm_struct *const mm;
490 	struct file *const file; /* May vary from vm_file in stacked callers. */
491 	unsigned long start;
492 	unsigned long end;
493 
494 	/* Mutable fields. Populated with initial state. */
495 	pgoff_t pgoff;
496 	struct file *vm_file;
497 	union {
498 		vm_flags_t vm_flags;
499 		vma_flags_t vma_flags;
500 	};
501 	pgprot_t page_prot;
502 
503 	/* Write-only fields. */
504 	const struct vm_operations_struct *vm_ops;
505 	void *private_data;
506 
507 	/* Take further action? */
508 	struct mmap_action action;
509 };
510 
511 struct vm_area_struct {
512 	/* The first cache line has the info for VMA tree walking. */
513 
514 	union {
515 		struct {
516 			/* VMA covers [vm_start; vm_end) addresses within mm */
517 			unsigned long vm_start;
518 			unsigned long vm_end;
519 		};
520 		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
521 	};
522 
523 	struct mm_struct *vm_mm;	/* The address space we belong to. */
524 	pgprot_t vm_page_prot;          /* Access permissions of this VMA. */
525 
526 	/*
527 	 * Flags, see mm.h.
528 	 * To modify use vm_flags_{init|reset|set|clear|mod} functions.
529 	 */
530 	union {
531 		const vm_flags_t vm_flags;
532 		vma_flags_t flags;
533 	};
534 
535 #ifdef CONFIG_PER_VMA_LOCK
536 	/*
537 	 * Can only be written (using WRITE_ONCE()) while holding both:
538 	 *  - mmap_lock (in write mode)
539 	 *  - vm_refcnt bit at VMA_LOCK_OFFSET is set
540 	 * Can be read reliably while holding one of:
541 	 *  - mmap_lock (in read or write mode)
542 	 *  - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
543 	 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
544 	 * while holding nothing (except RCU to keep the VMA struct allocated).
545 	 *
546 	 * This sequence counter is explicitly allowed to overflow; sequence
547 	 * counter reuse can only lead to occasional unnecessary use of the
548 	 * slowpath.
549 	 */
550 	unsigned int vm_lock_seq;
551 #endif
552 
553 	/*
554 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
555 	 * list, after a COW of one of the file pages.	A MAP_SHARED vma
556 	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
557 	 * or brk vma (with NULL file) can only be in an anon_vma list.
558 	 */
559 	struct list_head anon_vma_chain; /* Serialized by mmap_lock &
560 					  * page_table_lock */
561 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
562 
563 	/* Function pointers to deal with this struct. */
564 	const struct vm_operations_struct *vm_ops;
565 
566 	/* Information about our backing store: */
567 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
568 					   units */
569 	struct file * vm_file;		/* File we map to (can be NULL). */
570 	void * vm_private_data;		/* was vm_pte (shared mem) */
571 
572 #ifdef CONFIG_SWAP
573 	atomic_long_t swap_readahead_info;
574 #endif
575 #ifndef CONFIG_MMU
576 	struct vm_region *vm_region;	/* NOMMU mapping region */
577 #endif
578 #ifdef CONFIG_NUMA
579 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
580 #endif
581 #ifdef CONFIG_NUMA_BALANCING
582 	struct vma_numab_state *numab_state;	/* NUMA Balancing state */
583 #endif
584 #ifdef CONFIG_PER_VMA_LOCK
585 	/* Unstable RCU readers are allowed to read this. */
586 	refcount_t vm_refcnt;
587 #endif
588 	/*
589 	 * For areas with an address space and backing store,
590 	 * linkage into the address_space->i_mmap interval tree.
591 	 *
592 	 */
593 	struct {
594 		struct rb_node rb;
595 		unsigned long rb_subtree_last;
596 	} shared;
597 #ifdef CONFIG_ANON_VMA_NAME
598 	/*
599 	 * For private and shared anonymous mappings, a pointer to a null
600 	 * terminated string containing the name given to the vma, or NULL if
601 	 * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
602 	 */
603 	struct anon_vma_name *anon_name;
604 #endif
605 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
606 } __randomize_layout;
607 
608 struct vm_operations_struct {
609 	void (*open)(struct vm_area_struct * area);
610 	/**
611 	 * @close: Called when the VMA is being removed from the MM.
612 	 * Context: User context.  May sleep.  Caller holds mmap_lock.
613 	 */
614 	void (*close)(struct vm_area_struct * area);
615 	/* Called any time before splitting to check if it's allowed */
616 	int (*may_split)(struct vm_area_struct *area, unsigned long addr);
617 	int (*mremap)(struct vm_area_struct *area);
618 	/*
619 	 * Called by mprotect() to make driver-specific permission
620 	 * checks before mprotect() is finalised.   The VMA must not
621 	 * be modified.  Returns 0 if mprotect() can proceed.
622 	 */
623 	int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
624 			unsigned long end, unsigned long newflags);
625 	vm_fault_t (*fault)(struct vm_fault *vmf);
626 	vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
627 	vm_fault_t (*map_pages)(struct vm_fault *vmf,
628 			pgoff_t start_pgoff, pgoff_t end_pgoff);
629 	unsigned long (*pagesize)(struct vm_area_struct * area);
630 
631 	/* notification that a previously read-only page is about to become
632 	 * writable, if an error is returned it will cause a SIGBUS */
633 	vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
634 
635 	/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
636 	vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
637 
638 	/* called by access_process_vm when get_user_pages() fails, typically
639 	 * for use by special VMAs. See also generic_access_phys() for a generic
640 	 * implementation useful for any iomem mapping.
641 	 */
642 	int (*access)(struct vm_area_struct *vma, unsigned long addr,
643 		      void *buf, int len, int write);
644 
645 	/* Called by the /proc/PID/maps code to ask the vma whether it
646 	 * has a special name.  Returning non-NULL will also cause this
647 	 * vma to be dumped unconditionally. */
648 	const char *(*name)(struct vm_area_struct *vma);
649 
650 #ifdef CONFIG_NUMA
651 	/*
652 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
653 	 * to hold the policy upon return.  Caller should pass NULL @new to
654 	 * remove a policy and fall back to surrounding context--i.e. do not
655 	 * install a MPOL_DEFAULT policy, nor the task or system default
656 	 * mempolicy.
657 	 */
658 	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
659 
660 	/*
661 	 * get_policy() op must add reference [mpol_get()] to any policy at
662 	 * (vma,addr) marked as MPOL_SHARED.  The shared policy infrastructure
663 	 * in mm/mempolicy.c will do this automatically.
664 	 * get_policy() must NOT add a ref if the policy at (vma,addr) is not
665 	 * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
666 	 * If no [shared/vma] mempolicy exists at the addr, get_policy() op
667 	 * must return NULL--i.e., do not "fallback" to task or system default
668 	 * policy.
669 	 */
670 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
671 					unsigned long addr, pgoff_t *ilx);
672 #endif
673 #ifdef CONFIG_FIND_NORMAL_PAGE
674 	/*
675 	 * Called by vm_normal_page() for special PTEs in @vma at @addr. This
676 	 * allows for returning a "normal" page from vm_normal_page() even
677 	 * though the PTE indicates that the "struct page" either does not exist
678 	 * or should not be touched: "special".
679 	 *
680 	 * Do not add new users: this really only works when a "normal" page
681 	 * was mapped, but then the PTE got changed to something weird (+
682 	 * marked special) that would not make pte_pfn() identify the originally
683 	 * inserted page.
684 	 */
685 	struct page *(*find_normal_page)(struct vm_area_struct *vma,
686 					 unsigned long addr);
687 #endif /* CONFIG_FIND_NORMAL_PAGE */
688 };
689 
690 struct vm_unmapped_area_info {
691 #define VM_UNMAPPED_AREA_TOPDOWN 1
692 	unsigned long flags;
693 	unsigned long length;
694 	unsigned long low_limit;
695 	unsigned long high_limit;
696 	unsigned long align_mask;
697 	unsigned long align_offset;
698 	unsigned long start_gap;
699 };
700 
701 struct pagetable_move_control {
702 	struct vm_area_struct *old; /* Source VMA. */
703 	struct vm_area_struct *new; /* Destination VMA. */
704 	unsigned long old_addr; /* Address from which the move begins. */
705 	unsigned long old_end; /* Exclusive address at which old range ends. */
706 	unsigned long new_addr; /* Address to move page tables to. */
707 	unsigned long len_in; /* Bytes to remap specified by user. */
708 
709 	bool need_rmap_locks; /* Do rmap locks need to be taken? */
710 	bool for_stack; /* Is this an early temp stack being moved? */
711 };
712 
713 #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_)	\
714 	struct pagetable_move_control name = {				\
715 		.old = old_,						\
716 		.new = new_,						\
717 		.old_addr = old_addr_,					\
718 		.old_end = (old_addr_) + (len_),			\
719 		.new_addr = new_addr_,					\
720 		.len_in = len_,						\
721 	}
722 
vma_iter_invalidate(struct vma_iterator * vmi)723 static inline void vma_iter_invalidate(struct vma_iterator *vmi)
724 {
725 	mas_pause(&vmi->mas);
726 }
727 
pgprot_modify(pgprot_t oldprot,pgprot_t newprot)728 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
729 {
730 	return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
731 }
732 
vm_get_page_prot(vm_flags_t vm_flags)733 static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
734 {
735 	return __pgprot(vm_flags);
736 }
737 
mm_flags_test(int flag,const struct mm_struct * mm)738 static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
739 {
740 	return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
741 }
742 
743 /*
744  * Copy value to the first system word of VMA flags, non-atomically.
745  *
746  * IMPORTANT: This does not overwrite bytes past the first system word. The
747  * caller must account for this.
748  */
vma_flags_overwrite_word(vma_flags_t * flags,unsigned long value)749 static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
750 {
751 	*ACCESS_PRIVATE(flags, __vma_flags) = value;
752 }
753 
754 /*
755  * Copy value to the first system word of VMA flags ONCE, non-atomically.
756  *
757  * IMPORTANT: This does not overwrite bytes past the first system word. The
758  * caller must account for this.
759  */
vma_flags_overwrite_word_once(vma_flags_t * flags,unsigned long value)760 static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
761 {
762 	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
763 
764 	WRITE_ONCE(*bitmap, value);
765 }
766 
767 /* Update the first system word of VMA flags setting bits, non-atomically. */
vma_flags_set_word(vma_flags_t * flags,unsigned long value)768 static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
769 {
770 	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
771 
772 	*bitmap |= value;
773 }
774 
775 /* Update the first system word of VMA flags clearing bits, non-atomically. */
vma_flags_clear_word(vma_flags_t * flags,unsigned long value)776 static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
777 {
778 	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
779 
780 	*bitmap &= ~value;
781 }
782 
vma_flags_clear_all(vma_flags_t * flags)783 static inline void vma_flags_clear_all(vma_flags_t *flags)
784 {
785 	bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
786 }
787 
vma_flag_set(vma_flags_t * flags,vma_flag_t bit)788 static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
789 {
790 	unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
791 
792 	__set_bit((__force int)bit, bitmap);
793 }
794 
795 /* Use when VMA is not part of the VMA tree and needs no locking */
vm_flags_init(struct vm_area_struct * vma,vm_flags_t flags)796 static inline void vm_flags_init(struct vm_area_struct *vma,
797 				 vm_flags_t flags)
798 {
799 	vma_flags_clear_all(&vma->flags);
800 	vma_flags_overwrite_word(&vma->flags, flags);
801 }
802 
803 /*
804  * Use when VMA is part of the VMA tree and modifications need coordination
805  * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
806  * it should be locked explicitly beforehand.
807  */
vm_flags_reset(struct vm_area_struct * vma,vm_flags_t flags)808 static inline void vm_flags_reset(struct vm_area_struct *vma,
809 				  vm_flags_t flags)
810 {
811 	vma_assert_write_locked(vma);
812 	vm_flags_init(vma, flags);
813 }
814 
vm_flags_reset_once(struct vm_area_struct * vma,vm_flags_t flags)815 static inline void vm_flags_reset_once(struct vm_area_struct *vma,
816 				       vm_flags_t flags)
817 {
818 	vma_assert_write_locked(vma);
819 	/*
820 	 * The user should only be interested in avoiding reordering of
821 	 * assignment to the first word.
822 	 */
823 	vma_flags_clear_all(&vma->flags);
824 	vma_flags_overwrite_word_once(&vma->flags, flags);
825 }
826 
vm_flags_set(struct vm_area_struct * vma,vm_flags_t flags)827 static inline void vm_flags_set(struct vm_area_struct *vma,
828 				vm_flags_t flags)
829 {
830 	vma_start_write(vma);
831 	vma_flags_set_word(&vma->flags, flags);
832 }
833 
vm_flags_clear(struct vm_area_struct * vma,vm_flags_t flags)834 static inline void vm_flags_clear(struct vm_area_struct *vma,
835 				  vm_flags_t flags)
836 {
837 	vma_start_write(vma);
838 	vma_flags_clear_word(&vma->flags, flags);
839 }
840 
841 static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
842 
843 #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
844 					 (const vma_flag_t []){__VA_ARGS__})
845 
vma_flags_test_mask(const vma_flags_t * flags,vma_flags_t to_test)846 static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
847 		vma_flags_t to_test)
848 {
849 	const unsigned long *bitmap = flags->__vma_flags;
850 	const unsigned long *bitmap_to_test = to_test.__vma_flags;
851 
852 	return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
853 }
854 
855 #define vma_flags_test(flags, ...) \
856 	vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
857 
vma_flags_test_all_mask(const vma_flags_t * flags,vma_flags_t to_test)858 static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
859 		vma_flags_t to_test)
860 {
861 	const unsigned long *bitmap = flags->__vma_flags;
862 	const unsigned long *bitmap_to_test = to_test.__vma_flags;
863 
864 	return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
865 }
866 
867 #define vma_flags_test_all(flags, ...) \
868 	vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
869 
vma_flags_set_mask(vma_flags_t * flags,vma_flags_t to_set)870 static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
871 {
872 	unsigned long *bitmap = flags->__vma_flags;
873 	const unsigned long *bitmap_to_set = to_set.__vma_flags;
874 
875 	bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
876 }
877 
878 #define vma_flags_set(flags, ...) \
879 	vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
880 
vma_flags_clear_mask(vma_flags_t * flags,vma_flags_t to_clear)881 static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
882 {
883 	unsigned long *bitmap = flags->__vma_flags;
884 	const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
885 
886 	bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
887 }
888 
889 #define vma_flags_clear(flags, ...) \
890 	vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
891 
vma_test_all_flags_mask(const struct vm_area_struct * vma,vma_flags_t flags)892 static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
893 					   vma_flags_t flags)
894 {
895 	return vma_flags_test_all_mask(&vma->flags, flags);
896 }
897 
898 #define vma_test_all_flags(vma, ...) \
899 	vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
900 
is_shared_maywrite_vm_flags(vm_flags_t vm_flags)901 static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
902 {
903 	return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
904 		(VM_SHARED | VM_MAYWRITE);
905 }
906 
vma_set_flags_mask(struct vm_area_struct * vma,vma_flags_t flags)907 static inline void vma_set_flags_mask(struct vm_area_struct *vma,
908 				      vma_flags_t flags)
909 {
910 	vma_flags_set_mask(&vma->flags, flags);
911 }
912 
913 #define vma_set_flags(vma, ...) \
914 	vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
915 
vma_desc_test_flags_mask(const struct vm_area_desc * desc,vma_flags_t flags)916 static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
917 					    vma_flags_t flags)
918 {
919 	return vma_flags_test_mask(&desc->vma_flags, flags);
920 }
921 
922 #define vma_desc_test_flags(desc, ...) \
923 	vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
924 
vma_desc_set_flags_mask(struct vm_area_desc * desc,vma_flags_t flags)925 static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
926 					   vma_flags_t flags)
927 {
928 	vma_flags_set_mask(&desc->vma_flags, flags);
929 }
930 
931 #define vma_desc_set_flags(desc, ...) \
932 	vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
933 
vma_desc_clear_flags_mask(struct vm_area_desc * desc,vma_flags_t flags)934 static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
935 					     vma_flags_t flags)
936 {
937 	vma_flags_clear_mask(&desc->vma_flags, flags);
938 }
939 
940 #define vma_desc_clear_flags(desc, ...) \
941 	vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
942 
is_shared_maywrite(const vma_flags_t * flags)943 static inline bool is_shared_maywrite(const vma_flags_t *flags)
944 {
945 	return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
946 }
947 
vma_is_shared_maywrite(struct vm_area_struct * vma)948 static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
949 {
950 	return is_shared_maywrite(&vma->flags);
951 }
952 
vma_next(struct vma_iterator * vmi)953 static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
954 {
955 	/*
956 	 * Uses mas_find() to get the first VMA when the iterator starts.
957 	 * Calling mas_next() could skip the first entry.
958 	 */
959 	return mas_find(&vmi->mas, ULONG_MAX);
960 }
961 
962 /*
963  * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
964  * assertions should be made either under mmap_write_lock or when the object
965  * has been isolated under mmap_write_lock, ensuring no competing writers.
966  */
vma_assert_attached(struct vm_area_struct * vma)967 static inline void vma_assert_attached(struct vm_area_struct *vma)
968 {
969 	WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
970 }
971 
vma_assert_detached(struct vm_area_struct * vma)972 static inline void vma_assert_detached(struct vm_area_struct *vma)
973 {
974 	WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
975 }
976 
977 static inline void vma_assert_write_locked(struct vm_area_struct *);
vma_mark_attached(struct vm_area_struct * vma)978 static inline void vma_mark_attached(struct vm_area_struct *vma)
979 {
980 	vma_assert_write_locked(vma);
981 	vma_assert_detached(vma);
982 	refcount_set_release(&vma->vm_refcnt, 1);
983 }
984 
vma_mark_detached(struct vm_area_struct * vma)985 static inline void vma_mark_detached(struct vm_area_struct *vma)
986 {
987 	vma_assert_write_locked(vma);
988 	vma_assert_attached(vma);
989 	/* We are the only writer, so no need to use vma_refcount_put(). */
990 	if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
991 		/*
992 		 * Reader must have temporarily raised vm_refcnt but it will
993 		 * drop it without using the vma since vma is write-locked.
994 		 */
995 	}
996 }
997 
vma_init(struct vm_area_struct * vma,struct mm_struct * mm)998 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
999 {
1000 	memset(vma, 0, sizeof(*vma));
1001 	vma->vm_mm = mm;
1002 	vma->vm_ops = &vma_dummy_vm_ops;
1003 	INIT_LIST_HEAD(&vma->anon_vma_chain);
1004 	vma->vm_lock_seq = UINT_MAX;
1005 }
1006 
1007 /*
1008  * These are defined in vma.h, but sadly vm_stat_account() is referenced by
1009  * kernel/fork.c, so we have to these broadly available there, and temporarily
1010  * define them here to resolve the dependency cycle.
1011  */
1012 #define is_exec_mapping(flags) \
1013 	((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
1014 
1015 #define is_stack_mapping(flags) \
1016 	(((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
1017 
1018 #define is_data_mapping(flags) \
1019 	((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
1020 
vm_stat_account(struct mm_struct * mm,vm_flags_t flags,long npages)1021 static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
1022 				   long npages)
1023 {
1024 	WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
1025 
1026 	if (is_exec_mapping(flags))
1027 		mm->exec_vm += npages;
1028 	else if (is_stack_mapping(flags))
1029 		mm->stack_vm += npages;
1030 	else if (is_data_mapping(flags))
1031 		mm->data_vm += npages;
1032 }
1033 
1034 #undef is_exec_mapping
1035 #undef is_stack_mapping
1036 #undef is_data_mapping
1037 
vm_unacct_memory(long pages)1038 static inline void vm_unacct_memory(long pages)
1039 {
1040 	vm_acct_memory(-pages);
1041 }
1042 
mapping_allow_writable(struct address_space * mapping)1043 static inline void mapping_allow_writable(struct address_space *mapping)
1044 {
1045 	atomic_inc(&mapping->i_mmap_writable);
1046 }
1047 
1048 static inline
vma_find(struct vma_iterator * vmi,unsigned long max)1049 struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
1050 {
1051 	return mas_find(&vmi->mas, max - 1);
1052 }
1053 
vma_iter_clear_gfp(struct vma_iterator * vmi,unsigned long start,unsigned long end,gfp_t gfp)1054 static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
1055 			unsigned long start, unsigned long end, gfp_t gfp)
1056 {
1057 	__mas_set_range(&vmi->mas, start, end - 1);
1058 	mas_store_gfp(&vmi->mas, NULL, gfp);
1059 	if (unlikely(mas_is_err(&vmi->mas)))
1060 		return -ENOMEM;
1061 
1062 	return 0;
1063 }
1064 
vma_set_anonymous(struct vm_area_struct * vma)1065 static inline void vma_set_anonymous(struct vm_area_struct *vma)
1066 {
1067 	vma->vm_ops = NULL;
1068 }
1069 
1070 /* Declared in vma.h. */
1071 static inline void set_vma_from_desc(struct vm_area_struct *vma,
1072 		struct vm_area_desc *desc);
1073 
__compat_vma_mmap(const struct file_operations * f_op,struct file * file,struct vm_area_struct * vma)1074 static inline int __compat_vma_mmap(const struct file_operations *f_op,
1075 		struct file *file, struct vm_area_struct *vma)
1076 {
1077 	struct vm_area_desc desc = {
1078 		.mm = vma->vm_mm,
1079 		.file = file,
1080 		.start = vma->vm_start,
1081 		.end = vma->vm_end,
1082 
1083 		.pgoff = vma->vm_pgoff,
1084 		.vm_file = vma->vm_file,
1085 		.vm_flags = vma->vm_flags,
1086 		.page_prot = vma->vm_page_prot,
1087 
1088 		.action.type = MMAP_NOTHING, /* Default */
1089 	};
1090 	int err;
1091 
1092 	err = f_op->mmap_prepare(&desc);
1093 	if (err)
1094 		return err;
1095 
1096 	mmap_action_prepare(&desc.action, &desc);
1097 	set_vma_from_desc(vma, &desc);
1098 	return mmap_action_complete(&desc.action, vma);
1099 }
1100 
compat_vma_mmap(struct file * file,struct vm_area_struct * vma)1101 static inline int compat_vma_mmap(struct file *file,
1102 		struct vm_area_struct *vma)
1103 {
1104 	return __compat_vma_mmap(file->f_op, file, vma);
1105 }
1106 
1107 
vma_iter_init(struct vma_iterator * vmi,struct mm_struct * mm,unsigned long addr)1108 static inline void vma_iter_init(struct vma_iterator *vmi,
1109 		struct mm_struct *mm, unsigned long addr)
1110 {
1111 	mas_init(&vmi->mas, &mm->mm_mt, addr);
1112 }
1113 
vma_pages(struct vm_area_struct * vma)1114 static inline unsigned long vma_pages(struct vm_area_struct *vma)
1115 {
1116 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1117 }
1118 
1119 static inline void mmap_assert_locked(struct mm_struct *);
find_vma_intersection(struct mm_struct * mm,unsigned long start_addr,unsigned long end_addr)1120 static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
1121 						unsigned long start_addr,
1122 						unsigned long end_addr)
1123 {
1124 	unsigned long index = start_addr;
1125 
1126 	mmap_assert_locked(mm);
1127 	return mt_find(&mm->mm_mt, &index, end_addr - 1);
1128 }
1129 
1130 static inline
vma_lookup(struct mm_struct * mm,unsigned long addr)1131 struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
1132 {
1133 	return mtree_load(&mm->mm_mt, addr);
1134 }
1135 
vma_prev(struct vma_iterator * vmi)1136 static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
1137 {
1138 	return mas_prev(&vmi->mas, 0);
1139 }
1140 
vma_iter_set(struct vma_iterator * vmi,unsigned long addr)1141 static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
1142 {
1143 	mas_set(&vmi->mas, addr);
1144 }
1145 
vma_is_anonymous(struct vm_area_struct * vma)1146 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
1147 {
1148 	return !vma->vm_ops;
1149 }
1150 
1151 /* Defined in vma.h, so temporarily define here to avoid circular dependency. */
1152 #define vma_iter_load(vmi) \
1153 	mas_walk(&(vmi)->mas)
1154 
1155 static inline struct vm_area_struct *
find_vma_prev(struct mm_struct * mm,unsigned long addr,struct vm_area_struct ** pprev)1156 find_vma_prev(struct mm_struct *mm, unsigned long addr,
1157 			struct vm_area_struct **pprev)
1158 {
1159 	struct vm_area_struct *vma;
1160 	VMA_ITERATOR(vmi, mm, addr);
1161 
1162 	vma = vma_iter_load(&vmi);
1163 	*pprev = vma_prev(&vmi);
1164 	if (!vma)
1165 		vma = vma_next(&vmi);
1166 	return vma;
1167 }
1168 
1169 #undef vma_iter_load
1170 
vma_iter_free(struct vma_iterator * vmi)1171 static inline void vma_iter_free(struct vma_iterator *vmi)
1172 {
1173 	mas_destroy(&vmi->mas);
1174 }
1175 
1176 static inline
vma_iter_next_range(struct vma_iterator * vmi)1177 struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
1178 {
1179 	return mas_next_range(&vmi->mas, ULONG_MAX);
1180 }
1181 
1182 bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
1183 
1184 /* Update vma->vm_page_prot to reflect vma->vm_flags. */
vma_set_page_prot(struct vm_area_struct * vma)1185 static inline void vma_set_page_prot(struct vm_area_struct *vma)
1186 {
1187 	vm_flags_t vm_flags = vma->vm_flags;
1188 	pgprot_t vm_page_prot;
1189 
1190 	/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1191 	vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
1192 
1193 	if (vma_wants_writenotify(vma, vm_page_prot)) {
1194 		vm_flags &= ~VM_SHARED;
1195 		/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1196 		vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
1197 	}
1198 	/* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
1199 	WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
1200 }
1201 
stack_guard_start_gap(struct vm_area_struct * vma)1202 static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
1203 {
1204 	if (vma->vm_flags & VM_GROWSDOWN)
1205 		return stack_guard_gap;
1206 
1207 	/* See reasoning around the VM_SHADOW_STACK definition */
1208 	if (vma->vm_flags & VM_SHADOW_STACK)
1209 		return PAGE_SIZE;
1210 
1211 	return 0;
1212 }
1213 
vm_start_gap(struct vm_area_struct * vma)1214 static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
1215 {
1216 	unsigned long gap = stack_guard_start_gap(vma);
1217 	unsigned long vm_start = vma->vm_start;
1218 
1219 	vm_start -= gap;
1220 	if (vm_start > vma->vm_start)
1221 		vm_start = 0;
1222 	return vm_start;
1223 }
1224 
vm_end_gap(struct vm_area_struct * vma)1225 static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
1226 {
1227 	unsigned long vm_end = vma->vm_end;
1228 
1229 	if (vma->vm_flags & VM_GROWSUP) {
1230 		vm_end += stack_guard_gap;
1231 		if (vm_end < vma->vm_end)
1232 			vm_end = -PAGE_SIZE;
1233 	}
1234 	return vm_end;
1235 }
1236 
vma_is_accessible(struct vm_area_struct * vma)1237 static inline bool vma_is_accessible(struct vm_area_struct *vma)
1238 {
1239 	return vma->vm_flags & VM_ACCESS_FLAGS;
1240 }
1241 
mlock_future_ok(const struct mm_struct * mm,vm_flags_t vm_flags,unsigned long bytes)1242 static inline bool mlock_future_ok(const struct mm_struct *mm,
1243 		vm_flags_t vm_flags, unsigned long bytes)
1244 {
1245 	unsigned long locked_pages, limit_pages;
1246 
1247 	if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
1248 		return true;
1249 
1250 	locked_pages = bytes >> PAGE_SHIFT;
1251 	locked_pages += mm->locked_vm;
1252 
1253 	limit_pages = rlimit(RLIMIT_MEMLOCK);
1254 	limit_pages >>= PAGE_SHIFT;
1255 
1256 	return locked_pages <= limit_pages;
1257 }
1258 
map_deny_write_exec(unsigned long old,unsigned long new)1259 static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
1260 {
1261 	/* If MDWE is disabled, we have nothing to deny. */
1262 	if (mm_flags_test(MMF_HAS_MDWE, current->mm))
1263 		return false;
1264 
1265 	/* If the new VMA is not executable, we have nothing to deny. */
1266 	if (!(new & VM_EXEC))
1267 		return false;
1268 
1269 	/* Under MDWE we do not accept newly writably executable VMAs... */
1270 	if (new & VM_WRITE)
1271 		return true;
1272 
1273 	/* ...nor previously non-executable VMAs becoming executable. */
1274 	if (!(old & VM_EXEC))
1275 		return true;
1276 
1277 	return false;
1278 }
1279 
mapping_map_writable(struct address_space * mapping)1280 static inline int mapping_map_writable(struct address_space *mapping)
1281 {
1282 	return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
1283 		0 : -EPERM;
1284 }
1285 
1286 /* Did the driver provide valid mmap hook configuration? */
can_mmap_file(struct file * file)1287 static inline bool can_mmap_file(struct file *file)
1288 {
1289 	bool has_mmap = file->f_op->mmap;
1290 	bool has_mmap_prepare = file->f_op->mmap_prepare;
1291 
1292 	/* Hooks are mutually exclusive. */
1293 	if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
1294 		return false;
1295 	if (!has_mmap && !has_mmap_prepare)
1296 		return false;
1297 
1298 	return true;
1299 }
1300 
vfs_mmap(struct file * file,struct vm_area_struct * vma)1301 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
1302 {
1303 	if (file->f_op->mmap_prepare)
1304 		return compat_vma_mmap(file, vma);
1305 
1306 	return file->f_op->mmap(file, vma);
1307 }
1308 
vfs_mmap_prepare(struct file * file,struct vm_area_desc * desc)1309 static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
1310 {
1311 	return file->f_op->mmap_prepare(desc);
1312 }
1313 
vma_set_file(struct vm_area_struct * vma,struct file * file)1314 static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
1315 {
1316 	/* Changing an anonymous vma with this is illegal */
1317 	get_file(file);
1318 	swap(vma->vm_file, file);
1319 	fput(file);
1320 }
1321