1 /* SPDX-License-Identifier: GPL-2.0+ */
2
3 #pragma once
4
5 /* Forward declarations to avoid header cycle. */
6 struct vm_area_struct;
7 static inline void vma_start_write(struct vm_area_struct *vma);
8
9 extern const struct vm_operations_struct vma_dummy_vm_ops;
10 extern unsigned long stack_guard_gap;
11 extern const struct vm_operations_struct vma_dummy_vm_ops;
12 extern unsigned long rlimit(unsigned int limit);
13 struct task_struct *get_current(void);
14
15 #define MMF_HAS_MDWE 28
16 #define current get_current()
17
18 /*
19 * Define the task command name length as enum, then it can be visible to
20 * BPF programs.
21 */
22 enum {
23 TASK_COMM_LEN = 16,
24 };
25
26 /* PARTIALLY implemented types. */
27 struct mm_struct {
28 struct maple_tree mm_mt;
29 int map_count; /* number of VMAs */
30 unsigned long total_vm; /* Total pages mapped */
31 unsigned long locked_vm; /* Pages that have PG_mlocked set */
32 unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
33 unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
34 unsigned long stack_vm; /* VM_STACK */
35
36 unsigned long def_flags;
37
38 mm_flags_t flags; /* Must use mm_flags_* helpers to access */
39 };
40 struct address_space {
41 struct rb_root_cached i_mmap;
42 unsigned long flags;
43 atomic_t i_mmap_writable;
44 };
45 struct file_operations {
46 int (*mmap)(struct file *, struct vm_area_struct *);
47 int (*mmap_prepare)(struct vm_area_desc *);
48 };
49 struct file {
50 struct address_space *f_mapping;
51 const struct file_operations *f_op;
52 };
53 struct anon_vma_chain {
54 struct anon_vma *anon_vma;
55 struct list_head same_vma;
56 };
57 struct task_struct {
58 char comm[TASK_COMM_LEN];
59 pid_t pid;
60 struct mm_struct *mm;
61
62 /* Used for emulating ABI behavior of previous Linux versions: */
63 unsigned int personality;
64 };
65
66 struct kref {
67 refcount_t refcount;
68 };
69
70 struct anon_vma_name {
71 struct kref kref;
72 /* The name needs to be at the end because it is dynamically sized. */
73 char name[];
74 };
75
76 /*
77 * Contains declarations that are DUPLICATED from kernel source in order to
78 * faciliate userland VMA testing.
79 *
80 * These must be kept in sync with kernel source.
81 */
82
83 #define VMA_LOCK_OFFSET 0x40000000
84
85 typedef struct { unsigned long v; } freeptr_t;
86
87 #define VM_NONE 0x00000000
88
89 typedef int __bitwise vma_flag_t;
90
91 #define ACCESS_PRIVATE(p, member) ((p)->member)
92
93 #define DECLARE_VMA_BIT(name, bitnum) \
94 VMA_ ## name ## _BIT = ((__force vma_flag_t)bitnum)
95 #define DECLARE_VMA_BIT_ALIAS(name, aliased) \
96 VMA_ ## name ## _BIT = VMA_ ## aliased ## _BIT
97 enum {
98 DECLARE_VMA_BIT(READ, 0),
99 DECLARE_VMA_BIT(WRITE, 1),
100 DECLARE_VMA_BIT(EXEC, 2),
101 DECLARE_VMA_BIT(SHARED, 3),
102 /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
103 DECLARE_VMA_BIT(MAYREAD, 4), /* limits for mprotect() etc. */
104 DECLARE_VMA_BIT(MAYWRITE, 5),
105 DECLARE_VMA_BIT(MAYEXEC, 6),
106 DECLARE_VMA_BIT(MAYSHARE, 7),
107 DECLARE_VMA_BIT(GROWSDOWN, 8), /* general info on the segment */
108 #ifdef CONFIG_MMU
109 DECLARE_VMA_BIT(UFFD_MISSING, 9),/* missing pages tracking */
110 #else
111 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */
112 DECLARE_VMA_BIT(MAYOVERLAY, 9),
113 #endif /* CONFIG_MMU */
114 /* Page-ranges managed without "struct page", just pure PFN */
115 DECLARE_VMA_BIT(PFNMAP, 10),
116 DECLARE_VMA_BIT(MAYBE_GUARD, 11),
117 DECLARE_VMA_BIT(UFFD_WP, 12), /* wrprotect pages tracking */
118 DECLARE_VMA_BIT(LOCKED, 13),
119 DECLARE_VMA_BIT(IO, 14), /* Memory mapped I/O or similar */
120 DECLARE_VMA_BIT(SEQ_READ, 15), /* App will access data sequentially */
121 DECLARE_VMA_BIT(RAND_READ, 16), /* App will not benefit from clustered reads */
122 DECLARE_VMA_BIT(DONTCOPY, 17), /* Do not copy this vma on fork */
123 DECLARE_VMA_BIT(DONTEXPAND, 18),/* Cannot expand with mremap() */
124 DECLARE_VMA_BIT(LOCKONFAULT, 19),/* Lock pages covered when faulted in */
125 DECLARE_VMA_BIT(ACCOUNT, 20), /* Is a VM accounted object */
126 DECLARE_VMA_BIT(NORESERVE, 21), /* should the VM suppress accounting */
127 DECLARE_VMA_BIT(HUGETLB, 22), /* Huge TLB Page VM */
128 DECLARE_VMA_BIT(SYNC, 23), /* Synchronous page faults */
129 DECLARE_VMA_BIT(ARCH_1, 24), /* Architecture-specific flag */
130 DECLARE_VMA_BIT(WIPEONFORK, 25),/* Wipe VMA contents in child. */
131 DECLARE_VMA_BIT(DONTDUMP, 26), /* Do not include in the core dump */
132 DECLARE_VMA_BIT(SOFTDIRTY, 27), /* NOT soft dirty clean area */
133 DECLARE_VMA_BIT(MIXEDMAP, 28), /* Can contain struct page and pure PFN pages */
134 DECLARE_VMA_BIT(HUGEPAGE, 29), /* MADV_HUGEPAGE marked this vma */
135 DECLARE_VMA_BIT(NOHUGEPAGE, 30),/* MADV_NOHUGEPAGE marked this vma */
136 DECLARE_VMA_BIT(MERGEABLE, 31), /* KSM may merge identical pages */
137 /* These bits are reused, we define specific uses below. */
138 DECLARE_VMA_BIT(HIGH_ARCH_0, 32),
139 DECLARE_VMA_BIT(HIGH_ARCH_1, 33),
140 DECLARE_VMA_BIT(HIGH_ARCH_2, 34),
141 DECLARE_VMA_BIT(HIGH_ARCH_3, 35),
142 DECLARE_VMA_BIT(HIGH_ARCH_4, 36),
143 DECLARE_VMA_BIT(HIGH_ARCH_5, 37),
144 DECLARE_VMA_BIT(HIGH_ARCH_6, 38),
145 /*
146 * This flag is used to connect VFIO to arch specific KVM code. It
147 * indicates that the memory under this VMA is safe for use with any
148 * non-cachable memory type inside KVM. Some VFIO devices, on some
149 * platforms, are thought to be unsafe and can cause machine crashes
150 * if KVM does not lock down the memory type.
151 */
152 DECLARE_VMA_BIT(ALLOW_ANY_UNCACHED, 39),
153 #ifdef CONFIG_PPC32
154 DECLARE_VMA_BIT_ALIAS(DROPPABLE, ARCH_1),
155 #else
156 DECLARE_VMA_BIT(DROPPABLE, 40),
157 #endif
158 DECLARE_VMA_BIT(UFFD_MINOR, 41),
159 DECLARE_VMA_BIT(SEALED, 42),
160 /* Flags that reuse flags above. */
161 DECLARE_VMA_BIT_ALIAS(PKEY_BIT0, HIGH_ARCH_0),
162 DECLARE_VMA_BIT_ALIAS(PKEY_BIT1, HIGH_ARCH_1),
163 DECLARE_VMA_BIT_ALIAS(PKEY_BIT2, HIGH_ARCH_2),
164 DECLARE_VMA_BIT_ALIAS(PKEY_BIT3, HIGH_ARCH_3),
165 DECLARE_VMA_BIT_ALIAS(PKEY_BIT4, HIGH_ARCH_4),
166 #if defined(CONFIG_X86_USER_SHADOW_STACK)
167 /*
168 * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of
169 * support core mm.
170 *
171 * These VMAs will get a single end guard page. This helps userspace
172 * protect itself from attacks. A single page is enough for current
173 * shadow stack archs (x86). See the comments near alloc_shstk() in
174 * arch/x86/kernel/shstk.c for more details on the guard size.
175 */
176 DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_5),
177 #elif defined(CONFIG_ARM64_GCS)
178 /*
179 * arm64's Guarded Control Stack implements similar functionality and
180 * has similar constraints to shadow stacks.
181 */
182 DECLARE_VMA_BIT_ALIAS(SHADOW_STACK, HIGH_ARCH_6),
183 #endif
184 DECLARE_VMA_BIT_ALIAS(SAO, ARCH_1), /* Strong Access Ordering (powerpc) */
185 DECLARE_VMA_BIT_ALIAS(GROWSUP, ARCH_1), /* parisc */
186 DECLARE_VMA_BIT_ALIAS(SPARC_ADI, ARCH_1), /* sparc64 */
187 DECLARE_VMA_BIT_ALIAS(ARM64_BTI, ARCH_1), /* arm64 */
188 DECLARE_VMA_BIT_ALIAS(ARCH_CLEAR, ARCH_1), /* sparc64, arm64 */
189 DECLARE_VMA_BIT_ALIAS(MAPPED_COPY, ARCH_1), /* !CONFIG_MMU */
190 DECLARE_VMA_BIT_ALIAS(MTE, HIGH_ARCH_4), /* arm64 */
191 DECLARE_VMA_BIT_ALIAS(MTE_ALLOWED, HIGH_ARCH_5),/* arm64 */
192 #ifdef CONFIG_STACK_GROWSUP
193 DECLARE_VMA_BIT_ALIAS(STACK, GROWSUP),
194 DECLARE_VMA_BIT_ALIAS(STACK_EARLY, GROWSDOWN),
195 #else
196 DECLARE_VMA_BIT_ALIAS(STACK, GROWSDOWN),
197 #endif
198 };
199
200 #define INIT_VM_FLAG(name) BIT((__force int) VMA_ ## name ## _BIT)
201 #define VM_READ INIT_VM_FLAG(READ)
202 #define VM_WRITE INIT_VM_FLAG(WRITE)
203 #define VM_EXEC INIT_VM_FLAG(EXEC)
204 #define VM_SHARED INIT_VM_FLAG(SHARED)
205 #define VM_MAYREAD INIT_VM_FLAG(MAYREAD)
206 #define VM_MAYWRITE INIT_VM_FLAG(MAYWRITE)
207 #define VM_MAYEXEC INIT_VM_FLAG(MAYEXEC)
208 #define VM_MAYSHARE INIT_VM_FLAG(MAYSHARE)
209 #define VM_GROWSDOWN INIT_VM_FLAG(GROWSDOWN)
210 #ifdef CONFIG_MMU
211 #define VM_UFFD_MISSING INIT_VM_FLAG(UFFD_MISSING)
212 #else
213 #define VM_UFFD_MISSING VM_NONE
214 #define VM_MAYOVERLAY INIT_VM_FLAG(MAYOVERLAY)
215 #endif
216 #define VM_PFNMAP INIT_VM_FLAG(PFNMAP)
217 #define VM_MAYBE_GUARD INIT_VM_FLAG(MAYBE_GUARD)
218 #define VM_UFFD_WP INIT_VM_FLAG(UFFD_WP)
219 #define VM_LOCKED INIT_VM_FLAG(LOCKED)
220 #define VM_IO INIT_VM_FLAG(IO)
221 #define VM_SEQ_READ INIT_VM_FLAG(SEQ_READ)
222 #define VM_RAND_READ INIT_VM_FLAG(RAND_READ)
223 #define VM_DONTCOPY INIT_VM_FLAG(DONTCOPY)
224 #define VM_DONTEXPAND INIT_VM_FLAG(DONTEXPAND)
225 #define VM_LOCKONFAULT INIT_VM_FLAG(LOCKONFAULT)
226 #define VM_ACCOUNT INIT_VM_FLAG(ACCOUNT)
227 #define VM_NORESERVE INIT_VM_FLAG(NORESERVE)
228 #define VM_HUGETLB INIT_VM_FLAG(HUGETLB)
229 #define VM_SYNC INIT_VM_FLAG(SYNC)
230 #define VM_ARCH_1 INIT_VM_FLAG(ARCH_1)
231 #define VM_WIPEONFORK INIT_VM_FLAG(WIPEONFORK)
232 #define VM_DONTDUMP INIT_VM_FLAG(DONTDUMP)
233 #ifdef CONFIG_MEM_SOFT_DIRTY
234 #define VM_SOFTDIRTY INIT_VM_FLAG(SOFTDIRTY)
235 #else
236 #define VM_SOFTDIRTY VM_NONE
237 #endif
238 #define VM_MIXEDMAP INIT_VM_FLAG(MIXEDMAP)
239 #define VM_HUGEPAGE INIT_VM_FLAG(HUGEPAGE)
240 #define VM_NOHUGEPAGE INIT_VM_FLAG(NOHUGEPAGE)
241 #define VM_MERGEABLE INIT_VM_FLAG(MERGEABLE)
242 #define VM_STACK INIT_VM_FLAG(STACK)
243 #ifdef CONFIG_STACK_GROWS_UP
244 #define VM_STACK_EARLY INIT_VM_FLAG(STACK_EARLY)
245 #else
246 #define VM_STACK_EARLY VM_NONE
247 #endif
248 #ifdef CONFIG_ARCH_HAS_PKEYS
249 #define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT)
250 /* Despite the naming, these are FLAGS not bits. */
251 #define VM_PKEY_BIT0 INIT_VM_FLAG(PKEY_BIT0)
252 #define VM_PKEY_BIT1 INIT_VM_FLAG(PKEY_BIT1)
253 #define VM_PKEY_BIT2 INIT_VM_FLAG(PKEY_BIT2)
254 #if CONFIG_ARCH_PKEY_BITS > 3
255 #define VM_PKEY_BIT3 INIT_VM_FLAG(PKEY_BIT3)
256 #else
257 #define VM_PKEY_BIT3 VM_NONE
258 #endif /* CONFIG_ARCH_PKEY_BITS > 3 */
259 #if CONFIG_ARCH_PKEY_BITS > 4
260 #define VM_PKEY_BIT4 INIT_VM_FLAG(PKEY_BIT4)
261 #else
262 #define VM_PKEY_BIT4 VM_NONE
263 #endif /* CONFIG_ARCH_PKEY_BITS > 4 */
264 #endif /* CONFIG_ARCH_HAS_PKEYS */
265 #if defined(CONFIG_X86_USER_SHADOW_STACK) || defined(CONFIG_ARM64_GCS)
266 #define VM_SHADOW_STACK INIT_VM_FLAG(SHADOW_STACK)
267 #else
268 #define VM_SHADOW_STACK VM_NONE
269 #endif
270 #if defined(CONFIG_PPC64)
271 #define VM_SAO INIT_VM_FLAG(SAO)
272 #elif defined(CONFIG_PARISC)
273 #define VM_GROWSUP INIT_VM_FLAG(GROWSUP)
274 #elif defined(CONFIG_SPARC64)
275 #define VM_SPARC_ADI INIT_VM_FLAG(SPARC_ADI)
276 #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
277 #elif defined(CONFIG_ARM64)
278 #define VM_ARM64_BTI INIT_VM_FLAG(ARM64_BTI)
279 #define VM_ARCH_CLEAR INIT_VM_FLAG(ARCH_CLEAR)
280 #elif !defined(CONFIG_MMU)
281 #define VM_MAPPED_COPY INIT_VM_FLAG(MAPPED_COPY)
282 #endif
283 #ifndef VM_GROWSUP
284 #define VM_GROWSUP VM_NONE
285 #endif
286 #ifdef CONFIG_ARM64_MTE
287 #define VM_MTE INIT_VM_FLAG(MTE)
288 #define VM_MTE_ALLOWED INIT_VM_FLAG(MTE_ALLOWED)
289 #else
290 #define VM_MTE VM_NONE
291 #define VM_MTE_ALLOWED VM_NONE
292 #endif
293 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
294 #define VM_UFFD_MINOR INIT_VM_FLAG(UFFD_MINOR)
295 #else
296 #define VM_UFFD_MINOR VM_NONE
297 #endif
298 #ifdef CONFIG_64BIT
299 #define VM_ALLOW_ANY_UNCACHED INIT_VM_FLAG(ALLOW_ANY_UNCACHED)
300 #define VM_SEALED INIT_VM_FLAG(SEALED)
301 #else
302 #define VM_ALLOW_ANY_UNCACHED VM_NONE
303 #define VM_SEALED VM_NONE
304 #endif
305 #if defined(CONFIG_64BIT) || defined(CONFIG_PPC32)
306 #define VM_DROPPABLE INIT_VM_FLAG(DROPPABLE)
307 #else
308 #define VM_DROPPABLE VM_NONE
309 #endif
310
311 /* Bits set in the VMA until the stack is in its final location */
312 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
313
314 #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
315
316 /* Common data flag combinations */
317 #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
318 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
319 #define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \
320 VM_MAYWRITE | VM_MAYEXEC)
321 #define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \
322 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
323
324 #ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */
325 #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC
326 #endif
327
328 #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
329 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
330 #endif
331
332 #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
333
334 #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
335
336 /* VMA basic access permission flags */
337 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
338
339 /*
340 * Special vmas that are non-mergable, non-mlock()able.
341 */
342 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
343
344 #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
345 #define TASK_SIZE_LOW DEFAULT_MAP_WINDOW
346 #define TASK_SIZE_MAX DEFAULT_MAP_WINDOW
347 #define STACK_TOP TASK_SIZE_LOW
348 #define STACK_TOP_MAX TASK_SIZE_MAX
349
350 /* This mask represents all the VMA flag bits used by mlock */
351 #define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT)
352
353 #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
354
355 #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \
356 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
357
358 #define RLIMIT_STACK 3 /* max stack size */
359 #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
360
361 #define CAP_IPC_LOCK 14
362
363 #define VM_STICKY (VM_SOFTDIRTY | VM_MAYBE_GUARD)
364
365 #define VM_IGNORE_MERGE VM_STICKY
366
367 #define VM_COPY_ON_FORK (VM_PFNMAP | VM_MIXEDMAP | VM_UFFD_WP | VM_MAYBE_GUARD)
368
369 #define pgprot_val(x) ((x).pgprot)
370 #define __pgprot(x) ((pgprot_t) { (x) } )
371
372 #define for_each_vma(__vmi, __vma) \
373 while (((__vma) = vma_next(&(__vmi))) != NULL)
374
375 /* The MM code likes to work with exclusive end addresses */
376 #define for_each_vma_range(__vmi, __vma, __end) \
377 while (((__vma) = vma_find(&(__vmi), (__end))) != NULL)
378
379 #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
380
381 #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
382
383 #define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr)
384 #define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr)
385
386 #define AS_MM_ALL_LOCKS 2
387
388 #define swap(a, b) \
389 do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
390
391 /*
392 * Flags for bug emulation.
393 *
394 * These occupy the top three bytes.
395 */
396 enum {
397 READ_IMPLIES_EXEC = 0x0400000,
398 };
399
400 struct vma_iterator {
401 struct ma_state mas;
402 };
403
404 #define VMA_ITERATOR(name, __mm, __addr) \
405 struct vma_iterator name = { \
406 .mas = { \
407 .tree = &(__mm)->mm_mt, \
408 .index = __addr, \
409 .node = NULL, \
410 .status = ma_start, \
411 }, \
412 }
413
414 #define DEFINE_MUTEX(mutexname) \
415 struct mutex mutexname = {}
416
417 #define DECLARE_BITMAP(name, bits) \
418 unsigned long name[BITS_TO_LONGS(bits)]
419
420 #define EMPTY_VMA_FLAGS ((vma_flags_t){ })
421
422 /* What action should be taken after an .mmap_prepare call is complete? */
423 enum mmap_action_type {
424 MMAP_NOTHING, /* Mapping is complete, no further action. */
425 MMAP_REMAP_PFN, /* Remap PFN range. */
426 MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
427 };
428
429 /*
430 * Describes an action an mmap_prepare hook can instruct to be taken to complete
431 * the mapping of a VMA. Specified in vm_area_desc.
432 */
433 struct mmap_action {
434 union {
435 /* Remap range. */
436 struct {
437 unsigned long start;
438 unsigned long start_pfn;
439 unsigned long size;
440 pgprot_t pgprot;
441 } remap;
442 };
443 enum mmap_action_type type;
444
445 /*
446 * If specified, this hook is invoked after the selected action has been
447 * successfully completed. Note that the VMA write lock still held.
448 *
449 * The absolute minimum ought to be done here.
450 *
451 * Returns 0 on success, or an error code.
452 */
453 int (*success_hook)(const struct vm_area_struct *vma);
454
455 /*
456 * If specified, this hook is invoked when an error occurred when
457 * attempting the selection action.
458 *
459 * The hook can return an error code in order to filter the error, but
460 * it is not valid to clear the error here.
461 */
462 int (*error_hook)(int err);
463
464 /*
465 * This should be set in rare instances where the operation required
466 * that the rmap should not be able to access the VMA until
467 * completely set up.
468 */
469 bool hide_from_rmap_until_complete :1;
470 };
471
472 /* Operations which modify VMAs. */
473 enum vma_operation {
474 VMA_OP_SPLIT,
475 VMA_OP_MERGE_UNFAULTED,
476 VMA_OP_REMAP,
477 VMA_OP_FORK,
478 };
479
480 /*
481 * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
482 * manipulate mutable fields which will cause those fields to be updated in the
483 * resultant VMA.
484 *
485 * Helper functions are not required for manipulating any field.
486 */
487 struct vm_area_desc {
488 /* Immutable state. */
489 const struct mm_struct *const mm;
490 struct file *const file; /* May vary from vm_file in stacked callers. */
491 unsigned long start;
492 unsigned long end;
493
494 /* Mutable fields. Populated with initial state. */
495 pgoff_t pgoff;
496 struct file *vm_file;
497 union {
498 vm_flags_t vm_flags;
499 vma_flags_t vma_flags;
500 };
501 pgprot_t page_prot;
502
503 /* Write-only fields. */
504 const struct vm_operations_struct *vm_ops;
505 void *private_data;
506
507 /* Take further action? */
508 struct mmap_action action;
509 };
510
511 struct vm_area_struct {
512 /* The first cache line has the info for VMA tree walking. */
513
514 union {
515 struct {
516 /* VMA covers [vm_start; vm_end) addresses within mm */
517 unsigned long vm_start;
518 unsigned long vm_end;
519 };
520 freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */
521 };
522
523 struct mm_struct *vm_mm; /* The address space we belong to. */
524 pgprot_t vm_page_prot; /* Access permissions of this VMA. */
525
526 /*
527 * Flags, see mm.h.
528 * To modify use vm_flags_{init|reset|set|clear|mod} functions.
529 */
530 union {
531 const vm_flags_t vm_flags;
532 vma_flags_t flags;
533 };
534
535 #ifdef CONFIG_PER_VMA_LOCK
536 /*
537 * Can only be written (using WRITE_ONCE()) while holding both:
538 * - mmap_lock (in write mode)
539 * - vm_refcnt bit at VMA_LOCK_OFFSET is set
540 * Can be read reliably while holding one of:
541 * - mmap_lock (in read or write mode)
542 * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
543 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
544 * while holding nothing (except RCU to keep the VMA struct allocated).
545 *
546 * This sequence counter is explicitly allowed to overflow; sequence
547 * counter reuse can only lead to occasional unnecessary use of the
548 * slowpath.
549 */
550 unsigned int vm_lock_seq;
551 #endif
552
553 /*
554 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
555 * list, after a COW of one of the file pages. A MAP_SHARED vma
556 * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
557 * or brk vma (with NULL file) can only be in an anon_vma list.
558 */
559 struct list_head anon_vma_chain; /* Serialized by mmap_lock &
560 * page_table_lock */
561 struct anon_vma *anon_vma; /* Serialized by page_table_lock */
562
563 /* Function pointers to deal with this struct. */
564 const struct vm_operations_struct *vm_ops;
565
566 /* Information about our backing store: */
567 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
568 units */
569 struct file * vm_file; /* File we map to (can be NULL). */
570 void * vm_private_data; /* was vm_pte (shared mem) */
571
572 #ifdef CONFIG_SWAP
573 atomic_long_t swap_readahead_info;
574 #endif
575 #ifndef CONFIG_MMU
576 struct vm_region *vm_region; /* NOMMU mapping region */
577 #endif
578 #ifdef CONFIG_NUMA
579 struct mempolicy *vm_policy; /* NUMA policy for the VMA */
580 #endif
581 #ifdef CONFIG_NUMA_BALANCING
582 struct vma_numab_state *numab_state; /* NUMA Balancing state */
583 #endif
584 #ifdef CONFIG_PER_VMA_LOCK
585 /* Unstable RCU readers are allowed to read this. */
586 refcount_t vm_refcnt;
587 #endif
588 /*
589 * For areas with an address space and backing store,
590 * linkage into the address_space->i_mmap interval tree.
591 *
592 */
593 struct {
594 struct rb_node rb;
595 unsigned long rb_subtree_last;
596 } shared;
597 #ifdef CONFIG_ANON_VMA_NAME
598 /*
599 * For private and shared anonymous mappings, a pointer to a null
600 * terminated string containing the name given to the vma, or NULL if
601 * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
602 */
603 struct anon_vma_name *anon_name;
604 #endif
605 struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
606 } __randomize_layout;
607
608 struct vm_operations_struct {
609 void (*open)(struct vm_area_struct * area);
610 /**
611 * @close: Called when the VMA is being removed from the MM.
612 * Context: User context. May sleep. Caller holds mmap_lock.
613 */
614 void (*close)(struct vm_area_struct * area);
615 /* Called any time before splitting to check if it's allowed */
616 int (*may_split)(struct vm_area_struct *area, unsigned long addr);
617 int (*mremap)(struct vm_area_struct *area);
618 /*
619 * Called by mprotect() to make driver-specific permission
620 * checks before mprotect() is finalised. The VMA must not
621 * be modified. Returns 0 if mprotect() can proceed.
622 */
623 int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
624 unsigned long end, unsigned long newflags);
625 vm_fault_t (*fault)(struct vm_fault *vmf);
626 vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
627 vm_fault_t (*map_pages)(struct vm_fault *vmf,
628 pgoff_t start_pgoff, pgoff_t end_pgoff);
629 unsigned long (*pagesize)(struct vm_area_struct * area);
630
631 /* notification that a previously read-only page is about to become
632 * writable, if an error is returned it will cause a SIGBUS */
633 vm_fault_t (*page_mkwrite)(struct vm_fault *vmf);
634
635 /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */
636 vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf);
637
638 /* called by access_process_vm when get_user_pages() fails, typically
639 * for use by special VMAs. See also generic_access_phys() for a generic
640 * implementation useful for any iomem mapping.
641 */
642 int (*access)(struct vm_area_struct *vma, unsigned long addr,
643 void *buf, int len, int write);
644
645 /* Called by the /proc/PID/maps code to ask the vma whether it
646 * has a special name. Returning non-NULL will also cause this
647 * vma to be dumped unconditionally. */
648 const char *(*name)(struct vm_area_struct *vma);
649
650 #ifdef CONFIG_NUMA
651 /*
652 * set_policy() op must add a reference to any non-NULL @new mempolicy
653 * to hold the policy upon return. Caller should pass NULL @new to
654 * remove a policy and fall back to surrounding context--i.e. do not
655 * install a MPOL_DEFAULT policy, nor the task or system default
656 * mempolicy.
657 */
658 int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
659
660 /*
661 * get_policy() op must add reference [mpol_get()] to any policy at
662 * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
663 * in mm/mempolicy.c will do this automatically.
664 * get_policy() must NOT add a ref if the policy at (vma,addr) is not
665 * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
666 * If no [shared/vma] mempolicy exists at the addr, get_policy() op
667 * must return NULL--i.e., do not "fallback" to task or system default
668 * policy.
669 */
670 struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
671 unsigned long addr, pgoff_t *ilx);
672 #endif
673 #ifdef CONFIG_FIND_NORMAL_PAGE
674 /*
675 * Called by vm_normal_page() for special PTEs in @vma at @addr. This
676 * allows for returning a "normal" page from vm_normal_page() even
677 * though the PTE indicates that the "struct page" either does not exist
678 * or should not be touched: "special".
679 *
680 * Do not add new users: this really only works when a "normal" page
681 * was mapped, but then the PTE got changed to something weird (+
682 * marked special) that would not make pte_pfn() identify the originally
683 * inserted page.
684 */
685 struct page *(*find_normal_page)(struct vm_area_struct *vma,
686 unsigned long addr);
687 #endif /* CONFIG_FIND_NORMAL_PAGE */
688 };
689
690 struct vm_unmapped_area_info {
691 #define VM_UNMAPPED_AREA_TOPDOWN 1
692 unsigned long flags;
693 unsigned long length;
694 unsigned long low_limit;
695 unsigned long high_limit;
696 unsigned long align_mask;
697 unsigned long align_offset;
698 unsigned long start_gap;
699 };
700
701 struct pagetable_move_control {
702 struct vm_area_struct *old; /* Source VMA. */
703 struct vm_area_struct *new; /* Destination VMA. */
704 unsigned long old_addr; /* Address from which the move begins. */
705 unsigned long old_end; /* Exclusive address at which old range ends. */
706 unsigned long new_addr; /* Address to move page tables to. */
707 unsigned long len_in; /* Bytes to remap specified by user. */
708
709 bool need_rmap_locks; /* Do rmap locks need to be taken? */
710 bool for_stack; /* Is this an early temp stack being moved? */
711 };
712
713 #define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_) \
714 struct pagetable_move_control name = { \
715 .old = old_, \
716 .new = new_, \
717 .old_addr = old_addr_, \
718 .old_end = (old_addr_) + (len_), \
719 .new_addr = new_addr_, \
720 .len_in = len_, \
721 }
722
vma_iter_invalidate(struct vma_iterator * vmi)723 static inline void vma_iter_invalidate(struct vma_iterator *vmi)
724 {
725 mas_pause(&vmi->mas);
726 }
727
pgprot_modify(pgprot_t oldprot,pgprot_t newprot)728 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
729 {
730 return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot));
731 }
732
vm_get_page_prot(vm_flags_t vm_flags)733 static inline pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
734 {
735 return __pgprot(vm_flags);
736 }
737
mm_flags_test(int flag,const struct mm_struct * mm)738 static inline bool mm_flags_test(int flag, const struct mm_struct *mm)
739 {
740 return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags));
741 }
742
743 /*
744 * Copy value to the first system word of VMA flags, non-atomically.
745 *
746 * IMPORTANT: This does not overwrite bytes past the first system word. The
747 * caller must account for this.
748 */
vma_flags_overwrite_word(vma_flags_t * flags,unsigned long value)749 static inline void vma_flags_overwrite_word(vma_flags_t *flags, unsigned long value)
750 {
751 *ACCESS_PRIVATE(flags, __vma_flags) = value;
752 }
753
754 /*
755 * Copy value to the first system word of VMA flags ONCE, non-atomically.
756 *
757 * IMPORTANT: This does not overwrite bytes past the first system word. The
758 * caller must account for this.
759 */
vma_flags_overwrite_word_once(vma_flags_t * flags,unsigned long value)760 static inline void vma_flags_overwrite_word_once(vma_flags_t *flags, unsigned long value)
761 {
762 unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
763
764 WRITE_ONCE(*bitmap, value);
765 }
766
767 /* Update the first system word of VMA flags setting bits, non-atomically. */
vma_flags_set_word(vma_flags_t * flags,unsigned long value)768 static inline void vma_flags_set_word(vma_flags_t *flags, unsigned long value)
769 {
770 unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
771
772 *bitmap |= value;
773 }
774
775 /* Update the first system word of VMA flags clearing bits, non-atomically. */
vma_flags_clear_word(vma_flags_t * flags,unsigned long value)776 static inline void vma_flags_clear_word(vma_flags_t *flags, unsigned long value)
777 {
778 unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
779
780 *bitmap &= ~value;
781 }
782
vma_flags_clear_all(vma_flags_t * flags)783 static inline void vma_flags_clear_all(vma_flags_t *flags)
784 {
785 bitmap_zero(ACCESS_PRIVATE(flags, __vma_flags), NUM_VMA_FLAG_BITS);
786 }
787
vma_flag_set(vma_flags_t * flags,vma_flag_t bit)788 static inline void vma_flag_set(vma_flags_t *flags, vma_flag_t bit)
789 {
790 unsigned long *bitmap = ACCESS_PRIVATE(flags, __vma_flags);
791
792 __set_bit((__force int)bit, bitmap);
793 }
794
795 /* Use when VMA is not part of the VMA tree and needs no locking */
vm_flags_init(struct vm_area_struct * vma,vm_flags_t flags)796 static inline void vm_flags_init(struct vm_area_struct *vma,
797 vm_flags_t flags)
798 {
799 vma_flags_clear_all(&vma->flags);
800 vma_flags_overwrite_word(&vma->flags, flags);
801 }
802
803 /*
804 * Use when VMA is part of the VMA tree and modifications need coordination
805 * Note: vm_flags_reset and vm_flags_reset_once do not lock the vma and
806 * it should be locked explicitly beforehand.
807 */
vm_flags_reset(struct vm_area_struct * vma,vm_flags_t flags)808 static inline void vm_flags_reset(struct vm_area_struct *vma,
809 vm_flags_t flags)
810 {
811 vma_assert_write_locked(vma);
812 vm_flags_init(vma, flags);
813 }
814
vm_flags_reset_once(struct vm_area_struct * vma,vm_flags_t flags)815 static inline void vm_flags_reset_once(struct vm_area_struct *vma,
816 vm_flags_t flags)
817 {
818 vma_assert_write_locked(vma);
819 /*
820 * The user should only be interested in avoiding reordering of
821 * assignment to the first word.
822 */
823 vma_flags_clear_all(&vma->flags);
824 vma_flags_overwrite_word_once(&vma->flags, flags);
825 }
826
vm_flags_set(struct vm_area_struct * vma,vm_flags_t flags)827 static inline void vm_flags_set(struct vm_area_struct *vma,
828 vm_flags_t flags)
829 {
830 vma_start_write(vma);
831 vma_flags_set_word(&vma->flags, flags);
832 }
833
vm_flags_clear(struct vm_area_struct * vma,vm_flags_t flags)834 static inline void vm_flags_clear(struct vm_area_struct *vma,
835 vm_flags_t flags)
836 {
837 vma_start_write(vma);
838 vma_flags_clear_word(&vma->flags, flags);
839 }
840
841 static inline vma_flags_t __mk_vma_flags(size_t count, const vma_flag_t *bits);
842
843 #define mk_vma_flags(...) __mk_vma_flags(COUNT_ARGS(__VA_ARGS__), \
844 (const vma_flag_t []){__VA_ARGS__})
845
vma_flags_test_mask(const vma_flags_t * flags,vma_flags_t to_test)846 static __always_inline bool vma_flags_test_mask(const vma_flags_t *flags,
847 vma_flags_t to_test)
848 {
849 const unsigned long *bitmap = flags->__vma_flags;
850 const unsigned long *bitmap_to_test = to_test.__vma_flags;
851
852 return bitmap_intersects(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
853 }
854
855 #define vma_flags_test(flags, ...) \
856 vma_flags_test_mask(flags, mk_vma_flags(__VA_ARGS__))
857
vma_flags_test_all_mask(const vma_flags_t * flags,vma_flags_t to_test)858 static __always_inline bool vma_flags_test_all_mask(const vma_flags_t *flags,
859 vma_flags_t to_test)
860 {
861 const unsigned long *bitmap = flags->__vma_flags;
862 const unsigned long *bitmap_to_test = to_test.__vma_flags;
863
864 return bitmap_subset(bitmap_to_test, bitmap, NUM_VMA_FLAG_BITS);
865 }
866
867 #define vma_flags_test_all(flags, ...) \
868 vma_flags_test_all_mask(flags, mk_vma_flags(__VA_ARGS__))
869
vma_flags_set_mask(vma_flags_t * flags,vma_flags_t to_set)870 static __always_inline void vma_flags_set_mask(vma_flags_t *flags, vma_flags_t to_set)
871 {
872 unsigned long *bitmap = flags->__vma_flags;
873 const unsigned long *bitmap_to_set = to_set.__vma_flags;
874
875 bitmap_or(bitmap, bitmap, bitmap_to_set, NUM_VMA_FLAG_BITS);
876 }
877
878 #define vma_flags_set(flags, ...) \
879 vma_flags_set_mask(flags, mk_vma_flags(__VA_ARGS__))
880
vma_flags_clear_mask(vma_flags_t * flags,vma_flags_t to_clear)881 static __always_inline void vma_flags_clear_mask(vma_flags_t *flags, vma_flags_t to_clear)
882 {
883 unsigned long *bitmap = flags->__vma_flags;
884 const unsigned long *bitmap_to_clear = to_clear.__vma_flags;
885
886 bitmap_andnot(bitmap, bitmap, bitmap_to_clear, NUM_VMA_FLAG_BITS);
887 }
888
889 #define vma_flags_clear(flags, ...) \
890 vma_flags_clear_mask(flags, mk_vma_flags(__VA_ARGS__))
891
vma_test_all_flags_mask(const struct vm_area_struct * vma,vma_flags_t flags)892 static inline bool vma_test_all_flags_mask(const struct vm_area_struct *vma,
893 vma_flags_t flags)
894 {
895 return vma_flags_test_all_mask(&vma->flags, flags);
896 }
897
898 #define vma_test_all_flags(vma, ...) \
899 vma_test_all_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
900
is_shared_maywrite_vm_flags(vm_flags_t vm_flags)901 static inline bool is_shared_maywrite_vm_flags(vm_flags_t vm_flags)
902 {
903 return (vm_flags & (VM_SHARED | VM_MAYWRITE)) ==
904 (VM_SHARED | VM_MAYWRITE);
905 }
906
vma_set_flags_mask(struct vm_area_struct * vma,vma_flags_t flags)907 static inline void vma_set_flags_mask(struct vm_area_struct *vma,
908 vma_flags_t flags)
909 {
910 vma_flags_set_mask(&vma->flags, flags);
911 }
912
913 #define vma_set_flags(vma, ...) \
914 vma_set_flags_mask(vma, mk_vma_flags(__VA_ARGS__))
915
vma_desc_test_flags_mask(const struct vm_area_desc * desc,vma_flags_t flags)916 static inline bool vma_desc_test_flags_mask(const struct vm_area_desc *desc,
917 vma_flags_t flags)
918 {
919 return vma_flags_test_mask(&desc->vma_flags, flags);
920 }
921
922 #define vma_desc_test_flags(desc, ...) \
923 vma_desc_test_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
924
vma_desc_set_flags_mask(struct vm_area_desc * desc,vma_flags_t flags)925 static inline void vma_desc_set_flags_mask(struct vm_area_desc *desc,
926 vma_flags_t flags)
927 {
928 vma_flags_set_mask(&desc->vma_flags, flags);
929 }
930
931 #define vma_desc_set_flags(desc, ...) \
932 vma_desc_set_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
933
vma_desc_clear_flags_mask(struct vm_area_desc * desc,vma_flags_t flags)934 static inline void vma_desc_clear_flags_mask(struct vm_area_desc *desc,
935 vma_flags_t flags)
936 {
937 vma_flags_clear_mask(&desc->vma_flags, flags);
938 }
939
940 #define vma_desc_clear_flags(desc, ...) \
941 vma_desc_clear_flags_mask(desc, mk_vma_flags(__VA_ARGS__))
942
is_shared_maywrite(const vma_flags_t * flags)943 static inline bool is_shared_maywrite(const vma_flags_t *flags)
944 {
945 return vma_flags_test_all(flags, VMA_SHARED_BIT, VMA_MAYWRITE_BIT);
946 }
947
vma_is_shared_maywrite(struct vm_area_struct * vma)948 static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma)
949 {
950 return is_shared_maywrite(&vma->flags);
951 }
952
vma_next(struct vma_iterator * vmi)953 static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
954 {
955 /*
956 * Uses mas_find() to get the first VMA when the iterator starts.
957 * Calling mas_next() could skip the first entry.
958 */
959 return mas_find(&vmi->mas, ULONG_MAX);
960 }
961
962 /*
963 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
964 * assertions should be made either under mmap_write_lock or when the object
965 * has been isolated under mmap_write_lock, ensuring no competing writers.
966 */
vma_assert_attached(struct vm_area_struct * vma)967 static inline void vma_assert_attached(struct vm_area_struct *vma)
968 {
969 WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
970 }
971
vma_assert_detached(struct vm_area_struct * vma)972 static inline void vma_assert_detached(struct vm_area_struct *vma)
973 {
974 WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
975 }
976
977 static inline void vma_assert_write_locked(struct vm_area_struct *);
vma_mark_attached(struct vm_area_struct * vma)978 static inline void vma_mark_attached(struct vm_area_struct *vma)
979 {
980 vma_assert_write_locked(vma);
981 vma_assert_detached(vma);
982 refcount_set_release(&vma->vm_refcnt, 1);
983 }
984
vma_mark_detached(struct vm_area_struct * vma)985 static inline void vma_mark_detached(struct vm_area_struct *vma)
986 {
987 vma_assert_write_locked(vma);
988 vma_assert_attached(vma);
989 /* We are the only writer, so no need to use vma_refcount_put(). */
990 if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
991 /*
992 * Reader must have temporarily raised vm_refcnt but it will
993 * drop it without using the vma since vma is write-locked.
994 */
995 }
996 }
997
vma_init(struct vm_area_struct * vma,struct mm_struct * mm)998 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
999 {
1000 memset(vma, 0, sizeof(*vma));
1001 vma->vm_mm = mm;
1002 vma->vm_ops = &vma_dummy_vm_ops;
1003 INIT_LIST_HEAD(&vma->anon_vma_chain);
1004 vma->vm_lock_seq = UINT_MAX;
1005 }
1006
1007 /*
1008 * These are defined in vma.h, but sadly vm_stat_account() is referenced by
1009 * kernel/fork.c, so we have to these broadly available there, and temporarily
1010 * define them here to resolve the dependency cycle.
1011 */
1012 #define is_exec_mapping(flags) \
1013 ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC)
1014
1015 #define is_stack_mapping(flags) \
1016 (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK))
1017
1018 #define is_data_mapping(flags) \
1019 ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE)
1020
vm_stat_account(struct mm_struct * mm,vm_flags_t flags,long npages)1021 static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags,
1022 long npages)
1023 {
1024 WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
1025
1026 if (is_exec_mapping(flags))
1027 mm->exec_vm += npages;
1028 else if (is_stack_mapping(flags))
1029 mm->stack_vm += npages;
1030 else if (is_data_mapping(flags))
1031 mm->data_vm += npages;
1032 }
1033
1034 #undef is_exec_mapping
1035 #undef is_stack_mapping
1036 #undef is_data_mapping
1037
vm_unacct_memory(long pages)1038 static inline void vm_unacct_memory(long pages)
1039 {
1040 vm_acct_memory(-pages);
1041 }
1042
mapping_allow_writable(struct address_space * mapping)1043 static inline void mapping_allow_writable(struct address_space *mapping)
1044 {
1045 atomic_inc(&mapping->i_mmap_writable);
1046 }
1047
1048 static inline
vma_find(struct vma_iterator * vmi,unsigned long max)1049 struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
1050 {
1051 return mas_find(&vmi->mas, max - 1);
1052 }
1053
vma_iter_clear_gfp(struct vma_iterator * vmi,unsigned long start,unsigned long end,gfp_t gfp)1054 static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
1055 unsigned long start, unsigned long end, gfp_t gfp)
1056 {
1057 __mas_set_range(&vmi->mas, start, end - 1);
1058 mas_store_gfp(&vmi->mas, NULL, gfp);
1059 if (unlikely(mas_is_err(&vmi->mas)))
1060 return -ENOMEM;
1061
1062 return 0;
1063 }
1064
vma_set_anonymous(struct vm_area_struct * vma)1065 static inline void vma_set_anonymous(struct vm_area_struct *vma)
1066 {
1067 vma->vm_ops = NULL;
1068 }
1069
1070 /* Declared in vma.h. */
1071 static inline void set_vma_from_desc(struct vm_area_struct *vma,
1072 struct vm_area_desc *desc);
1073
__compat_vma_mmap(const struct file_operations * f_op,struct file * file,struct vm_area_struct * vma)1074 static inline int __compat_vma_mmap(const struct file_operations *f_op,
1075 struct file *file, struct vm_area_struct *vma)
1076 {
1077 struct vm_area_desc desc = {
1078 .mm = vma->vm_mm,
1079 .file = file,
1080 .start = vma->vm_start,
1081 .end = vma->vm_end,
1082
1083 .pgoff = vma->vm_pgoff,
1084 .vm_file = vma->vm_file,
1085 .vm_flags = vma->vm_flags,
1086 .page_prot = vma->vm_page_prot,
1087
1088 .action.type = MMAP_NOTHING, /* Default */
1089 };
1090 int err;
1091
1092 err = f_op->mmap_prepare(&desc);
1093 if (err)
1094 return err;
1095
1096 mmap_action_prepare(&desc.action, &desc);
1097 set_vma_from_desc(vma, &desc);
1098 return mmap_action_complete(&desc.action, vma);
1099 }
1100
compat_vma_mmap(struct file * file,struct vm_area_struct * vma)1101 static inline int compat_vma_mmap(struct file *file,
1102 struct vm_area_struct *vma)
1103 {
1104 return __compat_vma_mmap(file->f_op, file, vma);
1105 }
1106
1107
vma_iter_init(struct vma_iterator * vmi,struct mm_struct * mm,unsigned long addr)1108 static inline void vma_iter_init(struct vma_iterator *vmi,
1109 struct mm_struct *mm, unsigned long addr)
1110 {
1111 mas_init(&vmi->mas, &mm->mm_mt, addr);
1112 }
1113
vma_pages(struct vm_area_struct * vma)1114 static inline unsigned long vma_pages(struct vm_area_struct *vma)
1115 {
1116 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1117 }
1118
1119 static inline void mmap_assert_locked(struct mm_struct *);
find_vma_intersection(struct mm_struct * mm,unsigned long start_addr,unsigned long end_addr)1120 static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
1121 unsigned long start_addr,
1122 unsigned long end_addr)
1123 {
1124 unsigned long index = start_addr;
1125
1126 mmap_assert_locked(mm);
1127 return mt_find(&mm->mm_mt, &index, end_addr - 1);
1128 }
1129
1130 static inline
vma_lookup(struct mm_struct * mm,unsigned long addr)1131 struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
1132 {
1133 return mtree_load(&mm->mm_mt, addr);
1134 }
1135
vma_prev(struct vma_iterator * vmi)1136 static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
1137 {
1138 return mas_prev(&vmi->mas, 0);
1139 }
1140
vma_iter_set(struct vma_iterator * vmi,unsigned long addr)1141 static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr)
1142 {
1143 mas_set(&vmi->mas, addr);
1144 }
1145
vma_is_anonymous(struct vm_area_struct * vma)1146 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
1147 {
1148 return !vma->vm_ops;
1149 }
1150
1151 /* Defined in vma.h, so temporarily define here to avoid circular dependency. */
1152 #define vma_iter_load(vmi) \
1153 mas_walk(&(vmi)->mas)
1154
1155 static inline struct vm_area_struct *
find_vma_prev(struct mm_struct * mm,unsigned long addr,struct vm_area_struct ** pprev)1156 find_vma_prev(struct mm_struct *mm, unsigned long addr,
1157 struct vm_area_struct **pprev)
1158 {
1159 struct vm_area_struct *vma;
1160 VMA_ITERATOR(vmi, mm, addr);
1161
1162 vma = vma_iter_load(&vmi);
1163 *pprev = vma_prev(&vmi);
1164 if (!vma)
1165 vma = vma_next(&vmi);
1166 return vma;
1167 }
1168
1169 #undef vma_iter_load
1170
vma_iter_free(struct vma_iterator * vmi)1171 static inline void vma_iter_free(struct vma_iterator *vmi)
1172 {
1173 mas_destroy(&vmi->mas);
1174 }
1175
1176 static inline
vma_iter_next_range(struct vma_iterator * vmi)1177 struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
1178 {
1179 return mas_next_range(&vmi->mas, ULONG_MAX);
1180 }
1181
1182 bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
1183
1184 /* Update vma->vm_page_prot to reflect vma->vm_flags. */
vma_set_page_prot(struct vm_area_struct * vma)1185 static inline void vma_set_page_prot(struct vm_area_struct *vma)
1186 {
1187 vm_flags_t vm_flags = vma->vm_flags;
1188 pgprot_t vm_page_prot;
1189
1190 /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1191 vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags));
1192
1193 if (vma_wants_writenotify(vma, vm_page_prot)) {
1194 vm_flags &= ~VM_SHARED;
1195 /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */
1196 vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags));
1197 }
1198 /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
1199 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
1200 }
1201
stack_guard_start_gap(struct vm_area_struct * vma)1202 static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
1203 {
1204 if (vma->vm_flags & VM_GROWSDOWN)
1205 return stack_guard_gap;
1206
1207 /* See reasoning around the VM_SHADOW_STACK definition */
1208 if (vma->vm_flags & VM_SHADOW_STACK)
1209 return PAGE_SIZE;
1210
1211 return 0;
1212 }
1213
vm_start_gap(struct vm_area_struct * vma)1214 static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
1215 {
1216 unsigned long gap = stack_guard_start_gap(vma);
1217 unsigned long vm_start = vma->vm_start;
1218
1219 vm_start -= gap;
1220 if (vm_start > vma->vm_start)
1221 vm_start = 0;
1222 return vm_start;
1223 }
1224
vm_end_gap(struct vm_area_struct * vma)1225 static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
1226 {
1227 unsigned long vm_end = vma->vm_end;
1228
1229 if (vma->vm_flags & VM_GROWSUP) {
1230 vm_end += stack_guard_gap;
1231 if (vm_end < vma->vm_end)
1232 vm_end = -PAGE_SIZE;
1233 }
1234 return vm_end;
1235 }
1236
vma_is_accessible(struct vm_area_struct * vma)1237 static inline bool vma_is_accessible(struct vm_area_struct *vma)
1238 {
1239 return vma->vm_flags & VM_ACCESS_FLAGS;
1240 }
1241
mlock_future_ok(const struct mm_struct * mm,vm_flags_t vm_flags,unsigned long bytes)1242 static inline bool mlock_future_ok(const struct mm_struct *mm,
1243 vm_flags_t vm_flags, unsigned long bytes)
1244 {
1245 unsigned long locked_pages, limit_pages;
1246
1247 if (!(vm_flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
1248 return true;
1249
1250 locked_pages = bytes >> PAGE_SHIFT;
1251 locked_pages += mm->locked_vm;
1252
1253 limit_pages = rlimit(RLIMIT_MEMLOCK);
1254 limit_pages >>= PAGE_SHIFT;
1255
1256 return locked_pages <= limit_pages;
1257 }
1258
map_deny_write_exec(unsigned long old,unsigned long new)1259 static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
1260 {
1261 /* If MDWE is disabled, we have nothing to deny. */
1262 if (mm_flags_test(MMF_HAS_MDWE, current->mm))
1263 return false;
1264
1265 /* If the new VMA is not executable, we have nothing to deny. */
1266 if (!(new & VM_EXEC))
1267 return false;
1268
1269 /* Under MDWE we do not accept newly writably executable VMAs... */
1270 if (new & VM_WRITE)
1271 return true;
1272
1273 /* ...nor previously non-executable VMAs becoming executable. */
1274 if (!(old & VM_EXEC))
1275 return true;
1276
1277 return false;
1278 }
1279
mapping_map_writable(struct address_space * mapping)1280 static inline int mapping_map_writable(struct address_space *mapping)
1281 {
1282 return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
1283 0 : -EPERM;
1284 }
1285
1286 /* Did the driver provide valid mmap hook configuration? */
can_mmap_file(struct file * file)1287 static inline bool can_mmap_file(struct file *file)
1288 {
1289 bool has_mmap = file->f_op->mmap;
1290 bool has_mmap_prepare = file->f_op->mmap_prepare;
1291
1292 /* Hooks are mutually exclusive. */
1293 if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
1294 return false;
1295 if (!has_mmap && !has_mmap_prepare)
1296 return false;
1297
1298 return true;
1299 }
1300
vfs_mmap(struct file * file,struct vm_area_struct * vma)1301 static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
1302 {
1303 if (file->f_op->mmap_prepare)
1304 return compat_vma_mmap(file, vma);
1305
1306 return file->f_op->mmap(file, vma);
1307 }
1308
vfs_mmap_prepare(struct file * file,struct vm_area_desc * desc)1309 static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
1310 {
1311 return file->f_op->mmap_prepare(desc);
1312 }
1313
vma_set_file(struct vm_area_struct * vma,struct file * file)1314 static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
1315 {
1316 /* Changing an anonymous vma with this is illegal */
1317 get_file(file);
1318 swap(vma->vm_file, file);
1319 fput(file);
1320 }
1321