xref: /linux/mm/mseal.c (revision 7203ca412fc8e8a0588e9adc0f777d3163f8dff3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Implement mseal() syscall.
4  *
5  *  Copyright (c) 2023,2024 Google, Inc.
6  *
7  *  Author: Jeff Xu <jeffxu@chromium.org>
8  */
9 
10 #include <linux/mempolicy.h>
11 #include <linux/mman.h>
12 #include <linux/mm.h>
13 #include <linux/mm_inline.h>
14 #include <linux/syscalls.h>
15 #include <linux/sched.h>
16 #include "internal.h"
17 
18 /*
19  * mseal() disallows an input range which contain unmapped ranges (VMA holes).
20  *
21  * It disallows unmapped regions from start to end whether they exist at the
22  * start, in the middle, or at the end of the range, or any combination thereof.
23  *
24  * This is because after sealng a range, there's nothing to stop memory mapping
25  * of ranges in the remaining gaps later, meaning that the user might then
26  * wrongly consider the entirety of the mseal()'d range to be sealed when it
27  * in fact isn't.
28  */
29 
30 /*
31  * Does the [start, end) range contain any unmapped memory?
32  *
33  * We ensure that:
34  * - start is part of a valid VMA.
35  * - end is part of a valid VMA.
36  * - no gap (unallocated memory) exists between start and end.
37  */
range_contains_unmapped(struct mm_struct * mm,unsigned long start,unsigned long end)38 static bool range_contains_unmapped(struct mm_struct *mm,
39 		unsigned long start, unsigned long end)
40 {
41 	struct vm_area_struct *vma;
42 	unsigned long prev_end = start;
43 	VMA_ITERATOR(vmi, current->mm, start);
44 
45 	for_each_vma_range(vmi, vma, end) {
46 		if (vma->vm_start > prev_end)
47 			return true;
48 
49 		prev_end = vma->vm_end;
50 	}
51 
52 	return prev_end < end;
53 }
54 
mseal_apply(struct mm_struct * mm,unsigned long start,unsigned long end)55 static int mseal_apply(struct mm_struct *mm,
56 		unsigned long start, unsigned long end)
57 {
58 	struct vm_area_struct *vma, *prev;
59 	unsigned long curr_start = start;
60 	VMA_ITERATOR(vmi, mm, start);
61 
62 	/* We know there are no gaps so this will be non-NULL. */
63 	vma = vma_iter_load(&vmi);
64 	prev = vma_prev(&vmi);
65 	if (start > vma->vm_start)
66 		prev = vma;
67 
68 	for_each_vma_range(vmi, vma, end) {
69 		const unsigned long curr_end = MIN(vma->vm_end, end);
70 
71 		if (!(vma->vm_flags & VM_SEALED)) {
72 			vm_flags_t vm_flags = vma->vm_flags | VM_SEALED;
73 
74 			vma = vma_modify_flags(&vmi, prev, vma, curr_start,
75 					       curr_end, &vm_flags);
76 			if (IS_ERR(vma))
77 				return PTR_ERR(vma);
78 			vm_flags_set(vma, VM_SEALED);
79 		}
80 
81 		prev = vma;
82 		curr_start = curr_end;
83 	}
84 
85 	return 0;
86 }
87 
88 /*
89  * mseal(2) seals the VM's meta data from
90  * selected syscalls.
91  *
92  * addr/len: VM address range.
93  *
94  *  The address range by addr/len must meet:
95  *   start (addr) must be in a valid VMA.
96  *   end (addr + len) must be in a valid VMA.
97  *   no gap (unallocated memory) between start and end.
98  *   start (addr) must be page aligned.
99  *
100  *  len: len will be page aligned implicitly.
101  *
102  *   Below VMA operations are blocked after sealing.
103  *   1> Unmapping, moving to another location, and shrinking
104  *	the size, via munmap() and mremap(), can leave an empty
105  *	space, therefore can be replaced with a VMA with a new
106  *	set of attributes.
107  *   2> Moving or expanding a different vma into the current location,
108  *	via mremap().
109  *   3> Modifying a VMA via mmap(MAP_FIXED).
110  *   4> Size expansion, via mremap(), does not appear to pose any
111  *	specific risks to sealed VMAs. It is included anyway because
112  *	the use case is unclear. In any case, users can rely on
113  *	merging to expand a sealed VMA.
114  *   5> mprotect and pkey_mprotect.
115  *   6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
116  *      for anonymous memory, when users don't have write permission to the
117  *	memory. Those behaviors can alter region contents by discarding pages,
118  *	effectively a memset(0) for anonymous memory.
119  *
120  *  flags: reserved.
121  *
122  * return values:
123  *  zero: success.
124  *  -EINVAL:
125  *   invalid input flags.
126  *   start address is not page aligned.
127  *   Address arange (start + len) overflow.
128  *  -ENOMEM:
129  *   addr is not a valid address (not allocated).
130  *   end (start + len) is not a valid address.
131  *   a gap (unallocated memory) between start and end.
132  *  -EPERM:
133  *  - In 32 bit architecture, sealing is not supported.
134  * Note:
135  *  user can call mseal(2) multiple times, adding a seal on an
136  *  already sealed memory is a no-action (no error).
137  *
138  *  unseal() is not supported.
139  */
do_mseal(unsigned long start,size_t len_in,unsigned long flags)140 int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
141 {
142 	size_t len;
143 	int ret = 0;
144 	unsigned long end;
145 	struct mm_struct *mm = current->mm;
146 
147 	/* Verify flags not set. */
148 	if (flags)
149 		return -EINVAL;
150 
151 	start = untagged_addr(start);
152 	if (!PAGE_ALIGNED(start))
153 		return -EINVAL;
154 
155 	len = PAGE_ALIGN(len_in);
156 	/* Check to see whether len was rounded up from small -ve to zero. */
157 	if (len_in && !len)
158 		return -EINVAL;
159 
160 	end = start + len;
161 	if (end < start)
162 		return -EINVAL;
163 
164 	if (end == start)
165 		return 0;
166 
167 	if (mmap_write_lock_killable(mm))
168 		return -EINTR;
169 
170 	if (range_contains_unmapped(mm, start, end)) {
171 		ret = -ENOMEM;
172 		goto out;
173 	}
174 
175 	/*
176 	 * Second pass, this should success, unless there are errors
177 	 * from vma_modify_flags, e.g. merge/split error, or process
178 	 * reaching the max supported VMAs, however, those cases shall
179 	 * be rare.
180 	 */
181 	ret = mseal_apply(mm, start, end);
182 
183 out:
184 	mmap_write_unlock(mm);
185 	return ret;
186 }
187 
SYSCALL_DEFINE3(mseal,unsigned long,start,size_t,len,unsigned long,flags)188 SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
189 		flags)
190 {
191 	return do_mseal(start, len, flags);
192 }
193