1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement mseal() syscall. 4 * 5 * Copyright (c) 2023,2024 Google, Inc. 6 * 7 * Author: Jeff Xu <jeffxu@chromium.org> 8 */ 9 10 #include <linux/mempolicy.h> 11 #include <linux/mman.h> 12 #include <linux/mm.h> 13 #include <linux/mm_inline.h> 14 #include <linux/syscalls.h> 15 #include <linux/sched.h> 16 #include "internal.h" 17 18 /* 19 * mseal() disallows an input range which contain unmapped ranges (VMA holes). 20 * 21 * It disallows unmapped regions from start to end whether they exist at the 22 * start, in the middle, or at the end of the range, or any combination thereof. 23 * 24 * This is because after sealing a range, there's nothing to stop memory mapping 25 * of ranges in the remaining gaps later, meaning that the user might then 26 * wrongly consider the entirety of the mseal()'d range to be sealed when it 27 * in fact isn't. 28 */ 29 30 /* 31 * Does the [start, end) range contain any unmapped memory? 32 * 33 * We ensure that: 34 * - start is part of a valid VMA. 35 * - end is part of a valid VMA. 36 * - no gap (unallocated memory) exists between start and end. 37 */ 38 static bool range_contains_unmapped(struct mm_struct *mm, 39 unsigned long start, unsigned long end) 40 { 41 struct vm_area_struct *vma; 42 unsigned long prev_end = start; 43 VMA_ITERATOR(vmi, current->mm, start); 44 45 for_each_vma_range(vmi, vma, end) { 46 if (vma->vm_start > prev_end) 47 return true; 48 49 prev_end = vma->vm_end; 50 } 51 52 return prev_end < end; 53 } 54 55 static int mseal_apply(struct mm_struct *mm, 56 unsigned long start, unsigned long end) 57 { 58 struct vm_area_struct *vma, *prev; 59 VMA_ITERATOR(vmi, mm, start); 60 61 /* We know there are no gaps so this will be non-NULL. */ 62 vma = vma_iter_load(&vmi); 63 prev = vma_prev(&vmi); 64 if (start > vma->vm_start) 65 prev = vma; 66 67 for_each_vma_range(vmi, vma, end) { 68 const unsigned long curr_start = MAX(vma->vm_start, start); 69 const unsigned long curr_end = MIN(vma->vm_end, end); 70 71 if (!vma_test(vma, VMA_SEALED_BIT)) { 72 vma_flags_t vma_flags = vma->flags; 73 74 vma_flags_set(&vma_flags, VMA_SEALED_BIT); 75 76 vma = vma_modify_flags(&vmi, prev, vma, curr_start, 77 curr_end, &vma_flags); 78 if (IS_ERR(vma)) 79 return PTR_ERR(vma); 80 vma_start_write(vma); 81 vma_set_flags(vma, VMA_SEALED_BIT); 82 } 83 84 prev = vma; 85 } 86 87 return 0; 88 } 89 90 /* 91 * mseal(2) seals the VM's meta data from 92 * selected syscalls. 93 * 94 * addr/len: VM address range. 95 * 96 * The address range by addr/len must meet: 97 * start (addr) must be in a valid VMA. 98 * end (addr + len) must be in a valid VMA. 99 * no gap (unallocated memory) between start and end. 100 * start (addr) must be page aligned. 101 * 102 * len: len will be page aligned implicitly. 103 * 104 * Below VMA operations are blocked after sealing. 105 * 1> Unmapping, moving to another location, and shrinking 106 * the size, via munmap() and mremap(), can leave an empty 107 * space, therefore can be replaced with a VMA with a new 108 * set of attributes. 109 * 2> Moving or expanding a different vma into the current location, 110 * via mremap(). 111 * 3> Modifying a VMA via mmap(MAP_FIXED). 112 * 4> Size expansion, via mremap(), does not appear to pose any 113 * specific risks to sealed VMAs. It is included anyway because 114 * the use case is unclear. In any case, users can rely on 115 * merging to expand a sealed VMA. 116 * 5> mprotect and pkey_mprotect. 117 * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED) 118 * for anonymous memory, when users don't have write permission to the 119 * memory. Those behaviors can alter region contents by discarding pages, 120 * effectively a memset(0) for anonymous memory. 121 * 122 * flags: reserved. 123 * 124 * return values: 125 * zero: success. 126 * -EINVAL: 127 * invalid input flags. 128 * start address is not page aligned. 129 * Address range (start + len) overflow. 130 * -ENOMEM: 131 * addr is not a valid address (not allocated). 132 * end (start + len) is not a valid address. 133 * a gap (unallocated memory) between start and end. 134 * -EPERM: 135 * - In 32 bit architecture, sealing is not supported. 136 * Note: 137 * user can call mseal(2) multiple times, adding a seal on an 138 * already sealed memory is a no-action (no error). 139 * 140 * unseal() is not supported. 141 */ 142 int do_mseal(unsigned long start, size_t len_in, unsigned long flags) 143 { 144 size_t len; 145 int ret = 0; 146 unsigned long end; 147 struct mm_struct *mm = current->mm; 148 149 /* Verify flags not set. */ 150 if (flags) 151 return -EINVAL; 152 153 start = untagged_addr(start); 154 if (!PAGE_ALIGNED(start)) 155 return -EINVAL; 156 157 len = PAGE_ALIGN(len_in); 158 /* Check to see whether len was rounded up from small -ve to zero. */ 159 if (len_in && !len) 160 return -EINVAL; 161 162 end = start + len; 163 if (end < start) 164 return -EINVAL; 165 166 if (end == start) 167 return 0; 168 169 if (mmap_write_lock_killable(mm)) 170 return -EINTR; 171 172 if (range_contains_unmapped(mm, start, end)) { 173 ret = -ENOMEM; 174 goto out; 175 } 176 177 /* 178 * Second pass, this should success, unless there are errors 179 * from vma_modify_flags, e.g. merge/split error, or process 180 * reaching the max supported VMAs, however, those cases shall 181 * be rare. 182 */ 183 ret = mseal_apply(mm, start, end); 184 185 out: 186 mmap_write_unlock(mm); 187 return ret; 188 } 189 190 SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long, 191 flags) 192 { 193 return do_mseal(start, len, flags); 194 } 195