1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Implement mseal() syscall.
4 *
5 * Copyright (c) 2023,2024 Google, Inc.
6 *
7 * Author: Jeff Xu <jeffxu@chromium.org>
8 */
9
10 #include <linux/mempolicy.h>
11 #include <linux/mman.h>
12 #include <linux/mm.h>
13 #include <linux/mm_inline.h>
14 #include <linux/syscalls.h>
15 #include <linux/sched.h>
16 #include "internal.h"
17
18 /*
19 * mseal() disallows an input range which contain unmapped ranges (VMA holes).
20 *
21 * It disallows unmapped regions from start to end whether they exist at the
22 * start, in the middle, or at the end of the range, or any combination thereof.
23 *
24 * This is because after sealing a range, there's nothing to stop memory mapping
25 * of ranges in the remaining gaps later, meaning that the user might then
26 * wrongly consider the entirety of the mseal()'d range to be sealed when it
27 * in fact isn't.
28 */
29
30 /*
31 * Does the [start, end) range contain any unmapped memory?
32 *
33 * We ensure that:
34 * - start is part of a valid VMA.
35 * - end is part of a valid VMA.
36 * - no gap (unallocated memory) exists between start and end.
37 */
range_contains_unmapped(struct mm_struct * mm,unsigned long start,unsigned long end)38 static bool range_contains_unmapped(struct mm_struct *mm,
39 unsigned long start, unsigned long end)
40 {
41 struct vm_area_struct *vma;
42 unsigned long prev_end = start;
43 VMA_ITERATOR(vmi, current->mm, start);
44
45 for_each_vma_range(vmi, vma, end) {
46 if (vma->vm_start > prev_end)
47 return true;
48
49 prev_end = vma->vm_end;
50 }
51
52 return prev_end < end;
53 }
54
mseal_apply(struct mm_struct * mm,unsigned long start,unsigned long end)55 static int mseal_apply(struct mm_struct *mm,
56 unsigned long start, unsigned long end)
57 {
58 struct vm_area_struct *vma, *prev;
59 VMA_ITERATOR(vmi, mm, start);
60
61 /* We know there are no gaps so this will be non-NULL. */
62 vma = vma_iter_load(&vmi);
63 prev = vma_prev(&vmi);
64 if (start > vma->vm_start)
65 prev = vma;
66
67 for_each_vma_range(vmi, vma, end) {
68 const unsigned long curr_start = MAX(vma->vm_start, start);
69 const unsigned long curr_end = MIN(vma->vm_end, end);
70
71 if (!vma_test(vma, VMA_SEALED_BIT)) {
72 vma_flags_t vma_flags = vma->flags;
73
74 vma_flags_set(&vma_flags, VMA_SEALED_BIT);
75
76 vma = vma_modify_flags(&vmi, prev, vma, curr_start,
77 curr_end, &vma_flags);
78 if (IS_ERR(vma))
79 return PTR_ERR(vma);
80 vma_start_write(vma);
81 vma_set_flags(vma, VMA_SEALED_BIT);
82 }
83
84 prev = vma;
85 }
86
87 return 0;
88 }
89
90 /*
91 * mseal(2) seals the VM's meta data from
92 * selected syscalls.
93 *
94 * addr/len: VM address range.
95 *
96 * The address range by addr/len must meet:
97 * start (addr) must be in a valid VMA.
98 * end (addr + len) must be in a valid VMA.
99 * no gap (unallocated memory) between start and end.
100 * start (addr) must be page aligned.
101 *
102 * len: len will be page aligned implicitly.
103 *
104 * Below VMA operations are blocked after sealing.
105 * 1> Unmapping, moving to another location, and shrinking
106 * the size, via munmap() and mremap(), can leave an empty
107 * space, therefore can be replaced with a VMA with a new
108 * set of attributes.
109 * 2> Moving or expanding a different vma into the current location,
110 * via mremap().
111 * 3> Modifying a VMA via mmap(MAP_FIXED).
112 * 4> Size expansion, via mremap(), does not appear to pose any
113 * specific risks to sealed VMAs. It is included anyway because
114 * the use case is unclear. In any case, users can rely on
115 * merging to expand a sealed VMA.
116 * 5> mprotect and pkey_mprotect.
117 * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
118 * for anonymous memory, when users don't have write permission to the
119 * memory. Those behaviors can alter region contents by discarding pages,
120 * effectively a memset(0) for anonymous memory.
121 *
122 * flags: reserved.
123 *
124 * return values:
125 * zero: success.
126 * -EINVAL:
127 * invalid input flags.
128 * start address is not page aligned.
129 * Address range (start + len) overflow.
130 * -ENOMEM:
131 * addr is not a valid address (not allocated).
132 * end (start + len) is not a valid address.
133 * a gap (unallocated memory) between start and end.
134 * -EPERM:
135 * - In 32 bit architecture, sealing is not supported.
136 * Note:
137 * user can call mseal(2) multiple times, adding a seal on an
138 * already sealed memory is a no-action (no error).
139 *
140 * unseal() is not supported.
141 */
do_mseal(unsigned long start,size_t len_in,unsigned long flags)142 int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
143 {
144 size_t len;
145 int ret = 0;
146 unsigned long end;
147 struct mm_struct *mm = current->mm;
148
149 /* Verify flags not set. */
150 if (flags)
151 return -EINVAL;
152
153 start = untagged_addr(start);
154 if (!PAGE_ALIGNED(start))
155 return -EINVAL;
156
157 len = PAGE_ALIGN(len_in);
158 /* Check to see whether len was rounded up from small -ve to zero. */
159 if (len_in && !len)
160 return -EINVAL;
161
162 end = start + len;
163 if (end < start)
164 return -EINVAL;
165
166 if (end == start)
167 return 0;
168
169 if (mmap_write_lock_killable(mm))
170 return -EINTR;
171
172 if (range_contains_unmapped(mm, start, end)) {
173 ret = -ENOMEM;
174 goto out;
175 }
176
177 /*
178 * Second pass, this should success, unless there are errors
179 * from vma_modify_flags, e.g. merge/split error, or process
180 * reaching the max supported VMAs, however, those cases shall
181 * be rare.
182 */
183 ret = mseal_apply(mm, start, end);
184
185 out:
186 mmap_write_unlock(mm);
187 return ret;
188 }
189
SYSCALL_DEFINE3(mseal,unsigned long,start,size_t,len,unsigned long,flags)190 SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
191 flags)
192 {
193 return do_mseal(start, len, flags);
194 }
195