xref: /linux/tools/testing/vma/vma_internal.h (revision c4fb7f0a79771dfd18838bfc5015650a9730e9c0)
1 /* SPDX-License-Identifier: GPL-2.0+ */
2 /*
3  * vma_internal.h
4  *
5  * Header providing userland wrappers and shims for the functionality provided
6  * by mm/vma_internal.h.
7  *
8  * We make the header guard the same as mm/vma_internal.h, so if this shim
9  * header is included, it precludes the inclusion of the kernel one.
10  */
11 
12 #ifndef __MM_VMA_INTERNAL_H
13 #define __MM_VMA_INTERNAL_H
14 
15 #define __private
16 #define __bitwise
17 #define __randomize_layout
18 
19 #define CONFIG_MMU
20 #define CONFIG_PER_VMA_LOCK
21 
22 #include <stdlib.h>
23 
24 #include <linux/list.h>
25 #include <linux/maple_tree.h>
26 #include <linux/mm.h>
27 #include <linux/rbtree.h>
28 #include <linux/refcount.h>
29 #include <linux/slab.h>
30 
31 extern unsigned long stack_guard_gap;
32 #ifdef CONFIG_MMU
33 extern unsigned long mmap_min_addr;
34 extern unsigned long dac_mmap_min_addr;
35 #else
36 #define mmap_min_addr		0UL
37 #define dac_mmap_min_addr	0UL
38 #endif
39 
40 #define VM_WARN_ON(_expr) (WARN_ON(_expr))
41 #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
42 #define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
43 #define VM_BUG_ON(_expr) (BUG_ON(_expr))
44 #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
45 
46 #define MMF_HAS_MDWE	28
47 
48 #define VM_NONE		0x00000000
49 #define VM_READ		0x00000001
50 #define VM_WRITE	0x00000002
51 #define VM_EXEC		0x00000004
52 #define VM_SHARED	0x00000008
53 #define VM_MAYREAD	0x00000010
54 #define VM_MAYWRITE	0x00000020
55 #define VM_MAYEXEC	0x00000040
56 #define VM_GROWSDOWN	0x00000100
57 #define VM_PFNMAP	0x00000400
58 #define VM_LOCKED	0x00002000
59 #define VM_IO           0x00004000
60 #define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
61 #define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
62 #define VM_DONTEXPAND	0x00040000
63 #define VM_LOCKONFAULT	0x00080000
64 #define VM_ACCOUNT	0x00100000
65 #define VM_NORESERVE	0x00200000
66 #define VM_MIXEDMAP	0x10000000
67 #define VM_STACK	VM_GROWSDOWN
68 #define VM_SHADOW_STACK	VM_NONE
69 #define VM_SOFTDIRTY	0
70 #define VM_ARCH_1	0x01000000	/* Architecture-specific flag */
71 #define VM_GROWSUP	VM_NONE
72 
73 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
74 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
75 
76 #ifdef CONFIG_STACK_GROWSUP
77 #define VM_STACK	VM_GROWSUP
78 #define VM_STACK_EARLY	VM_GROWSDOWN
79 #else
80 #define VM_STACK	VM_GROWSDOWN
81 #define VM_STACK_EARLY	0
82 #endif
83 
84 #define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
85 #define TASK_SIZE_LOW		DEFAULT_MAP_WINDOW
86 #define TASK_SIZE_MAX		DEFAULT_MAP_WINDOW
87 #define STACK_TOP		TASK_SIZE_LOW
88 #define STACK_TOP_MAX		TASK_SIZE_MAX
89 
90 /* This mask represents all the VMA flag bits used by mlock */
91 #define VM_LOCKED_MASK	(VM_LOCKED | VM_LOCKONFAULT)
92 
93 #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
94 
95 #define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
96 				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
97 
98 #define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
99 
100 #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
101 
102 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
103 #define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
104 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
105 
106 #define RLIMIT_STACK		3	/* max stack size */
107 #define RLIMIT_MEMLOCK		8	/* max locked-in-memory address space */
108 
109 #define CAP_IPC_LOCK         14
110 
111 #ifdef CONFIG_64BIT
112 #define VM_SEALED_BIT	42
113 #define VM_SEALED	BIT(VM_SEALED_BIT)
114 #else
115 #define VM_SEALED	VM_NONE
116 #endif
117 
118 #define FIRST_USER_ADDRESS	0UL
119 #define USER_PGTABLES_CEILING	0UL
120 
121 #define vma_policy(vma) NULL
122 
123 #define down_write_nest_lock(sem, nest_lock)
124 
125 #define pgprot_val(x)		((x).pgprot)
126 #define __pgprot(x)		((pgprot_t) { (x) } )
127 
128 #define for_each_vma(__vmi, __vma)					\
129 	while (((__vma) = vma_next(&(__vmi))) != NULL)
130 
131 /* The MM code likes to work with exclusive end addresses */
132 #define for_each_vma_range(__vmi, __vma, __end)				\
133 	while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
134 
135 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
136 
137 #define PHYS_PFN(x)	((unsigned long)((x) >> PAGE_SHIFT))
138 
139 #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
140 #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
141 
142 #define TASK_SIZE ((1ul << 47)-PAGE_SIZE)
143 
144 #define AS_MM_ALL_LOCKS 2
145 
146 /* We hardcode this for now. */
147 #define sysctl_max_map_count 0x1000000UL
148 
149 #define pgoff_t unsigned long
150 typedef unsigned long	pgprotval_t;
151 typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
152 typedef unsigned long vm_flags_t;
153 typedef __bitwise unsigned int vm_fault_t;
154 
155 /*
156  * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...)
157  * either way :)
158  */
159 #define pr_warn_once pr_err
160 
161 #define data_race(expr) expr
162 
163 #define ASSERT_EXCLUSIVE_WRITER(x)
164 
165 /**
166  * swap - swap values of @a and @b
167  * @a: first value
168  * @b: second value
169  */
170 #define swap(a, b) \
171 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
172 
173 struct kref {
174 	refcount_t refcount;
175 };
176 
177 /*
178  * Define the task command name length as enum, then it can be visible to
179  * BPF programs.
180  */
181 enum {
182 	TASK_COMM_LEN = 16,
183 };
184 
185 /*
186  * Flags for bug emulation.
187  *
188  * These occupy the top three bytes.
189  */
190 enum {
191 	READ_IMPLIES_EXEC =	0x0400000,
192 };
193 
194 struct task_struct {
195 	char comm[TASK_COMM_LEN];
196 	pid_t pid;
197 	struct mm_struct *mm;
198 
199 	/* Used for emulating ABI behavior of previous Linux versions: */
200 	unsigned int			personality;
201 };
202 
203 struct task_struct *get_current(void);
204 #define current get_current()
205 
206 struct anon_vma {
207 	struct anon_vma *root;
208 	struct rb_root_cached rb_root;
209 
210 	/* Test fields. */
211 	bool was_cloned;
212 	bool was_unlinked;
213 };
214 
215 struct anon_vma_chain {
216 	struct anon_vma *anon_vma;
217 	struct list_head same_vma;
218 };
219 
220 struct anon_vma_name {
221 	struct kref kref;
222 	/* The name needs to be at the end because it is dynamically sized. */
223 	char name[];
224 };
225 
226 struct vma_iterator {
227 	struct ma_state mas;
228 };
229 
230 #define VMA_ITERATOR(name, __mm, __addr)				\
231 	struct vma_iterator name = {					\
232 		.mas = {						\
233 			.tree = &(__mm)->mm_mt,				\
234 			.index = __addr,				\
235 			.node = NULL,					\
236 			.status = ma_start,				\
237 		},							\
238 	}
239 
240 struct address_space {
241 	struct rb_root_cached	i_mmap;
242 	unsigned long		flags;
243 	atomic_t		i_mmap_writable;
244 };
245 
246 struct vm_userfaultfd_ctx {};
247 struct mempolicy {};
248 struct mmu_gather {};
249 struct mutex {};
250 #define DEFINE_MUTEX(mutexname) \
251 	struct mutex mutexname = {}
252 
253 struct mm_struct {
254 	struct maple_tree mm_mt;
255 	int map_count;			/* number of VMAs */
256 	unsigned long total_vm;	   /* Total pages mapped */
257 	unsigned long locked_vm;   /* Pages that have PG_mlocked set */
258 	unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
259 	unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
260 	unsigned long stack_vm;	   /* VM_STACK */
261 
262 	unsigned long def_flags;
263 
264 	unsigned long flags; /* Must use atomic bitops to access */
265 };
266 
267 struct vm_area_struct;
268 
269 /*
270  * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
271  * manipulate mutable fields which will cause those fields to be updated in the
272  * resultant VMA.
273  *
274  * Helper functions are not required for manipulating any field.
275  */
276 struct vm_area_desc {
277 	/* Immutable state. */
278 	struct mm_struct *mm;
279 	unsigned long start;
280 	unsigned long end;
281 
282 	/* Mutable fields. Populated with initial state. */
283 	pgoff_t pgoff;
284 	struct file *file;
285 	vm_flags_t vm_flags;
286 	pgprot_t page_prot;
287 
288 	/* Write-only fields. */
289 	const struct vm_operations_struct *vm_ops;
290 	void *private_data;
291 };
292 
293 struct file_operations {
294 	int (*mmap)(struct file *, struct vm_area_struct *);
295 	int (*mmap_prepare)(struct vm_area_desc *);
296 };
297 
298 struct file {
299 	struct address_space	*f_mapping;
300 	const struct file_operations	*f_op;
301 };
302 
303 #define VMA_LOCK_OFFSET	0x40000000
304 
305 typedef struct { unsigned long v; } freeptr_t;
306 
307 struct vm_area_struct {
308 	/* The first cache line has the info for VMA tree walking. */
309 
310 	union {
311 		struct {
312 			/* VMA covers [vm_start; vm_end) addresses within mm */
313 			unsigned long vm_start;
314 			unsigned long vm_end;
315 		};
316 		freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
317 	};
318 
319 	struct mm_struct *vm_mm;	/* The address space we belong to. */
320 	pgprot_t vm_page_prot;          /* Access permissions of this VMA. */
321 
322 	/*
323 	 * Flags, see mm.h.
324 	 * To modify use vm_flags_{init|reset|set|clear|mod} functions.
325 	 */
326 	union {
327 		const vm_flags_t vm_flags;
328 		vm_flags_t __private __vm_flags;
329 	};
330 
331 #ifdef CONFIG_PER_VMA_LOCK
332 	/*
333 	 * Can only be written (using WRITE_ONCE()) while holding both:
334 	 *  - mmap_lock (in write mode)
335 	 *  - vm_refcnt bit at VMA_LOCK_OFFSET is set
336 	 * Can be read reliably while holding one of:
337 	 *  - mmap_lock (in read or write mode)
338 	 *  - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
339 	 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
340 	 * while holding nothing (except RCU to keep the VMA struct allocated).
341 	 *
342 	 * This sequence counter is explicitly allowed to overflow; sequence
343 	 * counter reuse can only lead to occasional unnecessary use of the
344 	 * slowpath.
345 	 */
346 	unsigned int vm_lock_seq;
347 #endif
348 
349 	/*
350 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
351 	 * list, after a COW of one of the file pages.	A MAP_SHARED vma
352 	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
353 	 * or brk vma (with NULL file) can only be in an anon_vma list.
354 	 */
355 	struct list_head anon_vma_chain; /* Serialized by mmap_lock &
356 					  * page_table_lock */
357 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
358 
359 	/* Function pointers to deal with this struct. */
360 	const struct vm_operations_struct *vm_ops;
361 
362 	/* Information about our backing store: */
363 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
364 					   units */
365 	struct file * vm_file;		/* File we map to (can be NULL). */
366 	void * vm_private_data;		/* was vm_pte (shared mem) */
367 
368 #ifdef CONFIG_SWAP
369 	atomic_long_t swap_readahead_info;
370 #endif
371 #ifndef CONFIG_MMU
372 	struct vm_region *vm_region;	/* NOMMU mapping region */
373 #endif
374 #ifdef CONFIG_NUMA
375 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
376 #endif
377 #ifdef CONFIG_NUMA_BALANCING
378 	struct vma_numab_state *numab_state;	/* NUMA Balancing state */
379 #endif
380 #ifdef CONFIG_PER_VMA_LOCK
381 	/* Unstable RCU readers are allowed to read this. */
382 	refcount_t vm_refcnt;
383 #endif
384 	/*
385 	 * For areas with an address space and backing store,
386 	 * linkage into the address_space->i_mmap interval tree.
387 	 *
388 	 */
389 	struct {
390 		struct rb_node rb;
391 		unsigned long rb_subtree_last;
392 	} shared;
393 #ifdef CONFIG_ANON_VMA_NAME
394 	/*
395 	 * For private and shared anonymous mappings, a pointer to a null
396 	 * terminated string containing the name given to the vma, or NULL if
397 	 * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
398 	 */
399 	struct anon_vma_name *anon_name;
400 #endif
401 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
402 } __randomize_layout;
403 
404 struct vm_fault {};
405 
406 struct vm_operations_struct {
407 	void (*open)(struct vm_area_struct * area);
408 	/**
409 	 * @close: Called when the VMA is being removed from the MM.
410 	 * Context: User context.  May sleep.  Caller holds mmap_lock.
411 	 */
412 	void (*close)(struct vm_area_struct * area);
413 	/* Called any time before splitting to check if it's allowed */
414 	int (*may_split)(struct vm_area_struct *area, unsigned long addr);
415 	int (*mremap)(struct vm_area_struct *area);
416 	/*
417 	 * Called by mprotect() to make driver-specific permission
418 	 * checks before mprotect() is finalised.   The VMA must not
419 	 * be modified.  Returns 0 if mprotect() can proceed.
420 	 */
421 	int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
422 			unsigned long end, unsigned long newflags);
423 	vm_fault_t (*fault)(struct vm_fault *vmf);
424 	vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
425 	vm_fault_t (*map_pages)(struct vm_fault *vmf,
426 			pgoff_t start_pgoff, pgoff_t end_pgoff);
427 	unsigned long (*pagesize)(struct vm_area_struct * area);
428 
429 	/* notification that a previously read-only page is about to become
430 	 * writable, if an error is returned it will cause a SIGBUS */
431 	vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
432 
433 	/* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
434 	vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
435 
436 	/* called by access_process_vm when get_user_pages() fails, typically
437 	 * for use by special VMAs. See also generic_access_phys() for a generic
438 	 * implementation useful for any iomem mapping.
439 	 */
440 	int (*access)(struct vm_area_struct *vma, unsigned long addr,
441 		      void *buf, int len, int write);
442 
443 	/* Called by the /proc/PID/maps code to ask the vma whether it
444 	 * has a special name.  Returning non-NULL will also cause this
445 	 * vma to be dumped unconditionally. */
446 	const char *(*name)(struct vm_area_struct *vma);
447 
448 #ifdef CONFIG_NUMA
449 	/*
450 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
451 	 * to hold the policy upon return.  Caller should pass NULL @new to
452 	 * remove a policy and fall back to surrounding context--i.e. do not
453 	 * install a MPOL_DEFAULT policy, nor the task or system default
454 	 * mempolicy.
455 	 */
456 	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
457 
458 	/*
459 	 * get_policy() op must add reference [mpol_get()] to any policy at
460 	 * (vma,addr) marked as MPOL_SHARED.  The shared policy infrastructure
461 	 * in mm/mempolicy.c will do this automatically.
462 	 * get_policy() must NOT add a ref if the policy at (vma,addr) is not
463 	 * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
464 	 * If no [shared/vma] mempolicy exists at the addr, get_policy() op
465 	 * must return NULL--i.e., do not "fallback" to task or system default
466 	 * policy.
467 	 */
468 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
469 					unsigned long addr, pgoff_t *ilx);
470 #endif
471 	/*
472 	 * Called by vm_normal_page() for special PTEs to find the
473 	 * page for @addr.  This is useful if the default behavior
474 	 * (using pte_page()) would not find the correct page.
475 	 */
476 	struct page *(*find_special_page)(struct vm_area_struct *vma,
477 					  unsigned long addr);
478 };
479 
480 struct vm_unmapped_area_info {
481 #define VM_UNMAPPED_AREA_TOPDOWN 1
482 	unsigned long flags;
483 	unsigned long length;
484 	unsigned long low_limit;
485 	unsigned long high_limit;
486 	unsigned long align_mask;
487 	unsigned long align_offset;
488 	unsigned long start_gap;
489 };
490 
491 struct pagetable_move_control {
492 	struct vm_area_struct *old; /* Source VMA. */
493 	struct vm_area_struct *new; /* Destination VMA. */
494 	unsigned long old_addr; /* Address from which the move begins. */
495 	unsigned long old_end; /* Exclusive address at which old range ends. */
496 	unsigned long new_addr; /* Address to move page tables to. */
497 	unsigned long len_in; /* Bytes to remap specified by user. */
498 
499 	bool need_rmap_locks; /* Do rmap locks need to be taken? */
500 	bool for_stack; /* Is this an early temp stack being moved? */
501 };
502 
503 #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_)	\
504 	struct pagetable_move_control name = {				\
505 		.old = old_,						\
506 		.new = new_,						\
507 		.old_addr = old_addr_,					\
508 		.old_end = (old_addr_) + (len_),			\
509 		.new_addr = new_addr_,					\
510 		.len_in = len_,						\
511 	}
512 
513 static inline void vma_iter_invalidate(struct vma_iterator *vmi)
514 {
515 	mas_pause(&vmi->mas);
516 }
517 
518 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
519 {
520 	return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
521 }
522 
523 static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
524 {
525 	return __pgprot(vm_flags);
526 }
527 
528 static inline bool is_shared_maywrite(vm_flags_t vm_flags)
529 {
530 	return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
531 		(VM_SHARED | VM_MAYWRITE);
532 }
533 
534 static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
535 {
536 	return is_shared_maywrite(vma->vm_flags);
537 }
538 
539 static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
540 {
541 	/*
542 	 * Uses mas_find() to get the first VMA when the iterator starts.
543 	 * Calling mas_next() could skip the first entry.
544 	 */
545 	return mas_find(&vmi->mas, ULONG_MAX);
546 }
547 
548 /*
549  * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
550  * assertions should be made either under mmap_write_lock or when the object
551  * has been isolated under mmap_write_lock, ensuring no competing writers.
552  */
553 static inline void vma_assert_attached(struct vm_area_struct *vma)
554 {
555 	WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
556 }
557 
558 static inline void vma_assert_detached(struct vm_area_struct *vma)
559 {
560 	WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
561 }
562 
563 static inline void vma_assert_write_locked(struct vm_area_struct *);
564 static inline void vma_mark_attached(struct vm_area_struct *vma)
565 {
566 	vma_assert_write_locked(vma);
567 	vma_assert_detached(vma);
568 	refcount_set_release(&vma->vm_refcnt, 1);
569 }
570 
571 static inline void vma_mark_detached(struct vm_area_struct *vma)
572 {
573 	vma_assert_write_locked(vma);
574 	vma_assert_attached(vma);
575 	/* We are the only writer, so no need to use vma_refcount_put(). */
576 	if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
577 		/*
578 		 * Reader must have temporarily raised vm_refcnt but it will
579 		 * drop it without using the vma since vma is write-locked.
580 		 */
581 	}
582 }
583 
584 extern const struct vm_operations_struct vma_dummy_vm_ops;
585 
586 extern unsigned long rlimit(unsigned int limit);
587 
588 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
589 {
590 	memset(vma, 0, sizeof(*vma));
591 	vma->vm_mm = mm;
592 	vma->vm_ops = &vma_dummy_vm_ops;
593 	INIT_LIST_HEAD(&vma->anon_vma_chain);
594 	vma->vm_lock_seq = UINT_MAX;
595 }
596 
597 /*
598  * These are defined in vma.h, but sadly vm_stat_account() is referenced by
599  * kernel/fork.c, so we have to these broadly available there, and temporarily
600  * define them here to resolve the dependency cycle.
601  */
602 
603 #define is_exec_mapping(flags) \
604 	((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
605 
606 #define is_stack_mapping(flags) \
607 	(((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
608 
609 #define is_data_mapping(flags) \
610 	((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
611 
612 static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
613 				   long npages)
614 {
615 	WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
616 
617 	if (is_exec_mapping(flags))
618 		mm->exec_vm += npages;
619 	else if (is_stack_mapping(flags))
620 		mm->stack_vm += npages;
621 	else if (is_data_mapping(flags))
622 		mm->data_vm += npages;
623 }
624 
625 #undef is_exec_mapping
626 #undef is_stack_mapping
627 #undef is_data_mapping
628 
629 /* Currently stubbed but we may later wish to un-stub. */
630 static inline void vm_acct_memory(long pages);
631 static inline void vm_unacct_memory(long pages)
632 {
633 	vm_acct_memory(-pages);
634 }
635 
636 static inline void mapping_allow_writable(struct address_space *mapping)
637 {
638 	atomic_inc(&mapping->i_mmap_writable);
639 }
640 
641 static inline void vma_set_range(struct vm_area_struct *vma,
642 				 unsigned long start, unsigned long end,
643 				 pgoff_t pgoff)
644 {
645 	vma->vm_start = start;
646 	vma->vm_end = end;
647 	vma->vm_pgoff = pgoff;
648 }
649 
650 static inline
651 struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
652 {
653 	return mas_find(&vmi->mas, max - 1);
654 }
655 
656 static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
657 			unsigned long start, unsigned long end, gfp_t gfp)
658 {
659 	__mas_set_range(&vmi->mas, start, end - 1);
660 	mas_store_gfp(&vmi->mas, NULL, gfp);
661 	if (unlikely(mas_is_err(&vmi->mas)))
662 		return -ENOMEM;
663 
664 	return 0;
665 }
666 
667 static inline void mmap_assert_locked(struct mm_struct *);
668 static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
669 						unsigned long start_addr,
670 						unsigned long end_addr)
671 {
672 	unsigned long index = start_addr;
673 
674 	mmap_assert_locked(mm);
675 	return mt_find(&mm->mm_mt, &index, end_addr - 1);
676 }
677 
678 static inline
679 struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
680 {
681 	return mtree_load(&mm->mm_mt, addr);
682 }
683 
684 static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
685 {
686 	return mas_prev(&vmi->mas, 0);
687 }
688 
689 static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
690 {
691 	mas_set(&vmi->mas, addr);
692 }
693 
694 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
695 {
696 	return !vma->vm_ops;
697 }
698 
699 /* Defined in vma.h, so temporarily define here to avoid circular dependency. */
700 #define vma_iter_load(vmi) \
701 	mas_walk(&(vmi)->mas)
702 
703 static inline struct vm_area_struct *
704 find_vma_prev(struct mm_struct *mm, unsigned long addr,
705 			struct vm_area_struct **pprev)
706 {
707 	struct vm_area_struct *vma;
708 	VMA_ITERATOR(vmi, mm, addr);
709 
710 	vma = vma_iter_load(&vmi);
711 	*pprev = vma_prev(&vmi);
712 	if (!vma)
713 		vma = vma_next(&vmi);
714 	return vma;
715 }
716 
717 #undef vma_iter_load
718 
719 static inline void vma_iter_init(struct vma_iterator *vmi,
720 		struct mm_struct *mm, unsigned long addr)
721 {
722 	mas_init(&vmi->mas, &mm->mm_mt, addr);
723 }
724 
725 /* Stubbed functions. */
726 
727 static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
728 {
729 	return NULL;
730 }
731 
732 static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
733 					struct vm_userfaultfd_ctx vm_ctx)
734 {
735 	return true;
736 }
737 
738 static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
739 				    struct anon_vma_name *anon_name2)
740 {
741 	return true;
742 }
743 
744 static inline void might_sleep(void)
745 {
746 }
747 
748 static inline unsigned long vma_pages(struct vm_area_struct *vma)
749 {
750 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
751 }
752 
753 static inline void fput(struct file *file)
754 {
755 }
756 
757 static inline void mpol_put(struct mempolicy *pol)
758 {
759 }
760 
761 static inline void lru_add_drain(void)
762 {
763 }
764 
765 static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
766 {
767 }
768 
769 static inline void update_hiwater_rss(struct mm_struct *mm)
770 {
771 }
772 
773 static inline void update_hiwater_vm(struct mm_struct *mm)
774 {
775 }
776 
777 static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
778 		      struct vm_area_struct *vma, unsigned long start_addr,
779 		      unsigned long end_addr, unsigned long tree_end,
780 		      bool mm_wr_locked)
781 {
782 }
783 
784 static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
785 		   struct vm_area_struct *vma, unsigned long floor,
786 		   unsigned long ceiling, bool mm_wr_locked)
787 {
788 }
789 
790 static inline void mapping_unmap_writable(struct address_space *mapping)
791 {
792 }
793 
794 static inline void flush_dcache_mmap_lock(struct address_space *mapping)
795 {
796 }
797 
798 static inline void tlb_finish_mmu(struct mmu_gather *tlb)
799 {
800 }
801 
802 static inline struct file *get_file(struct file *f)
803 {
804 	return f;
805 }
806 
807 static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
808 {
809 	return 0;
810 }
811 
812 static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
813 {
814 	/* For testing purposes. We indicate that an anon_vma has been cloned. */
815 	if (src->anon_vma != NULL) {
816 		dst->anon_vma = src->anon_vma;
817 		dst->anon_vma->was_cloned = true;
818 	}
819 
820 	return 0;
821 }
822 
823 static inline void vma_start_write(struct vm_area_struct *vma)
824 {
825 	/* Used to indicate to tests that a write operation has begun. */
826 	vma->vm_lock_seq++;
827 }
828 
829 static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
830 					 unsigned long start,
831 					 unsigned long end,
832 					 struct vm_area_struct *next)
833 {
834 }
835 
836 static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {}
837 
838 static inline void vma_iter_free(struct vma_iterator *vmi)
839 {
840 	mas_destroy(&vmi->mas);
841 }
842 
843 static inline
844 struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
845 {
846 	return mas_next_range(&vmi->mas, ULONG_MAX);
847 }
848 
849 static inline void vm_acct_memory(long pages)
850 {
851 }
852 
853 static inline void vma_interval_tree_insert(struct vm_area_struct *vma,
854 					    struct rb_root_cached *rb)
855 {
856 }
857 
858 static inline void vma_interval_tree_remove(struct vm_area_struct *vma,
859 					    struct rb_root_cached *rb)
860 {
861 }
862 
863 static inline void flush_dcache_mmap_unlock(struct address_space *mapping)
864 {
865 }
866 
867 static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc,
868 						 struct rb_root_cached *rb)
869 {
870 }
871 
872 static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc,
873 						 struct rb_root_cached *rb)
874 {
875 }
876 
877 static inline void uprobe_mmap(struct vm_area_struct *vma)
878 {
879 }
880 
881 static inline void uprobe_munmap(struct vm_area_struct *vma,
882 				 unsigned long start, unsigned long end)
883 {
884 }
885 
886 static inline void i_mmap_lock_write(struct address_space *mapping)
887 {
888 }
889 
890 static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
891 {
892 }
893 
894 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
895 {
896 }
897 
898 static inline void unlink_anon_vmas(struct vm_area_struct *vma)
899 {
900 	/* For testing purposes, indicate that the anon_vma was unlinked. */
901 	vma->anon_vma->was_unlinked = true;
902 }
903 
904 static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
905 {
906 }
907 
908 static inline void i_mmap_unlock_write(struct address_space *mapping)
909 {
910 }
911 
912 static inline void anon_vma_merge(struct vm_area_struct *vma,
913 				  struct vm_area_struct *next)
914 {
915 }
916 
917 static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
918 					 unsigned long start,
919 					 unsigned long end,
920 					 struct list_head *unmaps)
921 {
922 	return 0;
923 }
924 
925 static inline void mmap_write_downgrade(struct mm_struct *mm)
926 {
927 }
928 
929 static inline void mmap_read_unlock(struct mm_struct *mm)
930 {
931 }
932 
933 static inline void mmap_write_unlock(struct mm_struct *mm)
934 {
935 }
936 
937 static inline int mmap_write_lock_killable(struct mm_struct *mm)
938 {
939 	return 0;
940 }
941 
942 static inline bool can_modify_mm(struct mm_struct *mm,
943 				 unsigned long start,
944 				 unsigned long end)
945 {
946 	return true;
947 }
948 
949 static inline void arch_unmap(struct mm_struct *mm,
950 				 unsigned long start,
951 				 unsigned long end)
952 {
953 }
954 
955 static inline void mmap_assert_locked(struct mm_struct *mm)
956 {
957 }
958 
959 static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
960 {
961 	return true;
962 }
963 
964 static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
965 			  vm_flags_t vm_flags)
966 {
967 }
968 
969 static inline bool mapping_can_writeback(struct address_space *mapping)
970 {
971 	return true;
972 }
973 
974 static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma)
975 {
976 	return false;
977 }
978 
979 static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
980 {
981 	return false;
982 }
983 
984 static inline bool userfaultfd_wp(struct vm_area_struct *vma)
985 {
986 	return false;
987 }
988 
989 static inline void mmap_assert_write_locked(struct mm_struct *mm)
990 {
991 }
992 
993 static inline void mutex_lock(struct mutex *lock)
994 {
995 }
996 
997 static inline void mutex_unlock(struct mutex *lock)
998 {
999 }
1000 
1001 static inline bool mutex_is_locked(struct mutex *lock)
1002 {
1003 	return true;
1004 }
1005 
1006 static inline bool signal_pending(void *p)
1007 {
1008 	return false;
1009 }
1010 
1011 static inline bool is_file_hugepages(struct file *file)
1012 {
1013 	return false;
1014 }
1015 
1016 static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
1017 {
1018 	return 0;
1019 }
1020 
1021 static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags,
1022 				 unsigned long npages)
1023 {
1024 	return true;
1025 }
1026 
1027 static inline void vm_flags_init(struct vm_area_struct *vma,
1028 				 vm_flags_t flags)
1029 {
1030 	vma->__vm_flags = flags;
1031 }
1032 
1033 static inline void vm_flags_set(struct vm_area_struct *vma,
1034 				vm_flags_t flags)
1035 {
1036 	vma_start_write(vma);
1037 	vma->__vm_flags |= flags;
1038 }
1039 
1040 static inline void vm_flags_clear(struct vm_area_struct *vma,
1041 				  vm_flags_t flags)
1042 {
1043 	vma_start_write(vma);
1044 	vma->__vm_flags &= ~flags;
1045 }
1046 
1047 static inline int shmem_zero_setup(struct vm_area_struct *vma)
1048 {
1049 	return 0;
1050 }
1051 
1052 static inline void vma_set_anonymous(struct vm_area_struct *vma)
1053 {
1054 	vma->vm_ops = NULL;
1055 }
1056 
1057 static inline void ksm_add_vma(struct vm_area_struct *vma)
1058 {
1059 }
1060 
1061 static inline void perf_event_mmap(struct vm_area_struct *vma)
1062 {
1063 }
1064 
1065 static inline bool vma_is_dax(struct vm_area_struct *vma)
1066 {
1067 	return false;
1068 }
1069 
1070 static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
1071 {
1072 	return NULL;
1073 }
1074 
1075 bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
1076 
1077 /* Update vma->vm_page_prot to reflect vma->vm_flags. */
1078 static inline void vma_set_page_prot(struct vm_area_struct *vma)
1079 {
1080 	vm_flags_t vm_flags = vma->vm_flags;
1081 	pgprot_t vm_page_prot;
1082 
1083 	/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1084 	vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
1085 
1086 	if (vma_wants_writenotify(vma, vm_page_prot)) {
1087 		vm_flags &= ~VM_SHARED;
1088 		/* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1089 		vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
1090 	}
1091 	/* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
1092 	WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
1093 }
1094 
1095 static inline bool arch_validate_flags(vm_flags_t flags)
1096 {
1097 	return true;
1098 }
1099 
1100 static inline void vma_close(struct vm_area_struct *vma)
1101 {
1102 }
1103 
1104 static inline int mmap_file(struct file *file, struct vm_area_struct *vma)
1105 {
1106 	return 0;
1107 }
1108 
1109 static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
1110 {
1111 	if (vma->vm_flags & VM_GROWSDOWN)
1112 		return stack_guard_gap;
1113 
1114 	/* See reasoning around the VM_SHADOW_STACK definition */
1115 	if (vma->vm_flags & VM_SHADOW_STACK)
1116 		return PAGE_SIZE;
1117 
1118 	return 0;
1119 }
1120 
1121 static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
1122 {
1123 	unsigned long gap = stack_guard_start_gap(vma);
1124 	unsigned long vm_start = vma->vm_start;
1125 
1126 	vm_start -= gap;
1127 	if (vm_start > vma->vm_start)
1128 		vm_start = 0;
1129 	return vm_start;
1130 }
1131 
1132 static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
1133 {
1134 	unsigned long vm_end = vma->vm_end;
1135 
1136 	if (vma->vm_flags & VM_GROWSUP) {
1137 		vm_end += stack_guard_gap;
1138 		if (vm_end < vma->vm_end)
1139 			vm_end = -PAGE_SIZE;
1140 	}
1141 	return vm_end;
1142 }
1143 
1144 static inline int is_hugepage_only_range(struct mm_struct *mm,
1145 					unsigned long addr, unsigned long len)
1146 {
1147 	return 0;
1148 }
1149 
1150 static inline bool vma_is_accessible(struct vm_area_struct *vma)
1151 {
1152 	return vma->vm_flags & VM_ACCESS_FLAGS;
1153 }
1154 
1155 static inline bool capable(int cap)
1156 {
1157 	return true;
1158 }
1159 
1160 static inline bool mlock_future_ok(struct mm_struct *mm, vm_flags_t vm_flags,
1161 			unsigned long bytes)
1162 {
1163 	unsigned long locked_pages, limit_pages;
1164 
1165 	if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
1166 		return true;
1167 
1168 	locked_pages = bytes >> PAGE_SHIFT;
1169 	locked_pages += mm->locked_vm;
1170 
1171 	limit_pages = rlimit(RLIMIT_MEMLOCK);
1172 	limit_pages >>= PAGE_SHIFT;
1173 
1174 	return locked_pages <= limit_pages;
1175 }
1176 
1177 static inline int __anon_vma_prepare(struct vm_area_struct *vma)
1178 {
1179 	struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
1180 
1181 	if (!anon_vma)
1182 		return -ENOMEM;
1183 
1184 	anon_vma->root = anon_vma;
1185 	vma->anon_vma = anon_vma;
1186 
1187 	return 0;
1188 }
1189 
1190 static inline int anon_vma_prepare(struct vm_area_struct *vma)
1191 {
1192 	if (likely(vma->anon_vma))
1193 		return 0;
1194 
1195 	return __anon_vma_prepare(vma);
1196 }
1197 
1198 static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
1199 					      struct list_head *uf)
1200 {
1201 }
1202 
1203 /*
1204  * Denies creating a writable executable mapping or gaining executable permissions.
1205  *
1206  * This denies the following:
1207  *
1208  *     a)      mmap(PROT_WRITE | PROT_EXEC)
1209  *
1210  *     b)      mmap(PROT_WRITE)
1211  *             mprotect(PROT_EXEC)
1212  *
1213  *     c)      mmap(PROT_WRITE)
1214  *             mprotect(PROT_READ)
1215  *             mprotect(PROT_EXEC)
1216  *
1217  * But allows the following:
1218  *
1219  *     d)      mmap(PROT_READ | PROT_EXEC)
1220  *             mmap(PROT_READ | PROT_EXEC | PROT_BTI)
1221  *
1222  * This is only applicable if the user has set the Memory-Deny-Write-Execute
1223  * (MDWE) protection mask for the current process.
1224  *
1225  * @old specifies the VMA flags the VMA originally possessed, and @new the ones
1226  * we propose to set.
1227  *
1228  * Return: false if proposed change is OK, true if not ok and should be denied.
1229  */
1230 static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
1231 {
1232 	/* If MDWE is disabled, we have nothing to deny. */
1233 	if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
1234 		return false;
1235 
1236 	/* If the new VMA is not executable, we have nothing to deny. */
1237 	if (!(new & VM_EXEC))
1238 		return false;
1239 
1240 	/* Under MDWE we do not accept newly writably executable VMAs... */
1241 	if (new & VM_WRITE)
1242 		return true;
1243 
1244 	/* ...nor previously non-executable VMAs becoming executable. */
1245 	if (!(old & VM_EXEC))
1246 		return true;
1247 
1248 	return false;
1249 }
1250 
1251 static inline int mapping_map_writable(struct address_space *mapping)
1252 {
1253 	int c = atomic_read(&mapping->i_mmap_writable);
1254 
1255 	/* Derived from the raw_atomic_inc_unless_negative() implementation. */
1256 	do {
1257 		if (c < 0)
1258 			return -EPERM;
1259 	} while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
1260 
1261 	return 0;
1262 }
1263 
1264 static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
1265 {
1266 	return 0;
1267 }
1268 
1269 static inline void free_pgd_range(struct mmu_gather *tlb,
1270 			unsigned long addr, unsigned long end,
1271 			unsigned long floor, unsigned long ceiling)
1272 {
1273 }
1274 
1275 static inline int ksm_execve(struct mm_struct *mm)
1276 {
1277 	return 0;
1278 }
1279 
1280 static inline void ksm_exit(struct mm_struct *mm)
1281 {
1282 }
1283 
1284 static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
1285 {
1286 	if (reset_refcnt)
1287 		refcount_set(&vma->vm_refcnt, 0);
1288 }
1289 
1290 static inline void vma_numab_state_init(struct vm_area_struct *vma)
1291 {
1292 }
1293 
1294 static inline void vma_numab_state_free(struct vm_area_struct *vma)
1295 {
1296 }
1297 
1298 static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
1299 				     struct vm_area_struct *new_vma)
1300 {
1301 }
1302 
1303 static inline void free_anon_vma_name(struct vm_area_struct *vma)
1304 {
1305 }
1306 
1307 /* Declared in vma.h. */
1308 static inline void set_vma_from_desc(struct vm_area_struct *vma,
1309 		struct vm_area_desc *desc);
1310 
1311 static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma,
1312 		struct vm_area_desc *desc);
1313 
1314 static int compat_vma_mmap_prepare(struct file *file,
1315 		struct vm_area_struct *vma)
1316 {
1317 	struct vm_area_desc desc;
1318 	int err;
1319 
1320 	err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc));
1321 	if (err)
1322 		return err;
1323 	set_vma_from_desc(vma, &desc);
1324 
1325 	return 0;
1326 }
1327 
1328 /* Did the driver provide valid mmap hook configuration? */
1329 static inline bool can_mmap_file(struct file *file)
1330 {
1331 	bool has_mmap = file->f_op->mmap;
1332 	bool has_mmap_prepare = file->f_op->mmap_prepare;
1333 
1334 	/* Hooks are mutually exclusive. */
1335 	if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
1336 		return false;
1337 	if (!has_mmap && !has_mmap_prepare)
1338 		return false;
1339 
1340 	return true;
1341 }
1342 
1343 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
1344 {
1345 	if (file->f_op->mmap_prepare)
1346 		return compat_vma_mmap_prepare(file, vma);
1347 
1348 	return file->f_op->mmap(file, vma);
1349 }
1350 
1351 static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
1352 {
1353 	return file->f_op->mmap_prepare(desc);
1354 }
1355 
1356 static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma)
1357 {
1358 }
1359 
1360 static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
1361 {
1362 	/* Changing an anonymous vma with this is illegal */
1363 	get_file(file);
1364 	swap(vma->vm_file, file);
1365 	fput(file);
1366 }
1367 
1368 static inline bool shmem_file(struct file *file)
1369 {
1370 	return false;
1371 }
1372 
1373 static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm,
1374 		const struct file *file, vm_flags_t vm_flags)
1375 {
1376 	return vm_flags;
1377 }
1378 
1379 #endif	/* __MM_VMA_INTERNAL_H */
1380