1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement mseal() syscall. 4 * 5 * Copyright (c) 2023,2024 Google, Inc. 6 * 7 * Author: Jeff Xu <jeffxu@chromium.org> 8 */ 9 10 #include <linux/mempolicy.h> 11 #include <linux/mman.h> 12 #include <linux/mm.h> 13 #include <linux/mm_inline.h> 14 #include <linux/syscalls.h> 15 #include <linux/sched.h> 16 #include "internal.h" 17 18 /* 19 * mseal() disallows an input range which contain unmapped ranges (VMA holes). 20 * 21 * It disallows unmapped regions from start to end whether they exist at the 22 * start, in the middle, or at the end of the range, or any combination thereof. 23 * 24 * This is because after sealng a range, there's nothing to stop memory mapping 25 * of ranges in the remaining gaps later, meaning that the user might then 26 * wrongly consider the entirety of the mseal()'d range to be sealed when it 27 * in fact isn't. 28 */ 29 30 /* 31 * Does the [start, end) range contain any unmapped memory? 32 * 33 * We ensure that: 34 * - start is part of a valid VMA. 35 * - end is part of a valid VMA. 36 * - no gap (unallocated memory) exists between start and end. 37 */ 38 static bool range_contains_unmapped(struct mm_struct *mm, 39 unsigned long start, unsigned long end) 40 { 41 struct vm_area_struct *vma; 42 unsigned long prev_end = start; 43 VMA_ITERATOR(vmi, current->mm, start); 44 45 for_each_vma_range(vmi, vma, end) { 46 if (vma->vm_start > prev_end) 47 return true; 48 49 prev_end = vma->vm_end; 50 } 51 52 return prev_end < end; 53 } 54 55 static int mseal_apply(struct mm_struct *mm, 56 unsigned long start, unsigned long end) 57 { 58 struct vm_area_struct *vma, *prev; 59 unsigned long curr_start = start; 60 VMA_ITERATOR(vmi, mm, start); 61 62 /* We know there are no gaps so this will be non-NULL. */ 63 vma = vma_iter_load(&vmi); 64 prev = vma_prev(&vmi); 65 if (start > vma->vm_start) 66 prev = vma; 67 68 for_each_vma_range(vmi, vma, end) { 69 unsigned long curr_end = MIN(vma->vm_end, end); 70 71 if (!(vma->vm_flags & VM_SEALED)) { 72 vma = vma_modify_flags(&vmi, prev, vma, 73 curr_start, curr_end, 74 vma->vm_flags | VM_SEALED); 75 if (IS_ERR(vma)) 76 return PTR_ERR(vma); 77 vm_flags_set(vma, VM_SEALED); 78 } 79 80 prev = vma; 81 curr_start = curr_end; 82 } 83 84 return 0; 85 } 86 87 /* 88 * mseal(2) seals the VM's meta data from 89 * selected syscalls. 90 * 91 * addr/len: VM address range. 92 * 93 * The address range by addr/len must meet: 94 * start (addr) must be in a valid VMA. 95 * end (addr + len) must be in a valid VMA. 96 * no gap (unallocated memory) between start and end. 97 * start (addr) must be page aligned. 98 * 99 * len: len will be page aligned implicitly. 100 * 101 * Below VMA operations are blocked after sealing. 102 * 1> Unmapping, moving to another location, and shrinking 103 * the size, via munmap() and mremap(), can leave an empty 104 * space, therefore can be replaced with a VMA with a new 105 * set of attributes. 106 * 2> Moving or expanding a different vma into the current location, 107 * via mremap(). 108 * 3> Modifying a VMA via mmap(MAP_FIXED). 109 * 4> Size expansion, via mremap(), does not appear to pose any 110 * specific risks to sealed VMAs. It is included anyway because 111 * the use case is unclear. In any case, users can rely on 112 * merging to expand a sealed VMA. 113 * 5> mprotect and pkey_mprotect. 114 * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED) 115 * for anonymous memory, when users don't have write permission to the 116 * memory. Those behaviors can alter region contents by discarding pages, 117 * effectively a memset(0) for anonymous memory. 118 * 119 * flags: reserved. 120 * 121 * return values: 122 * zero: success. 123 * -EINVAL: 124 * invalid input flags. 125 * start address is not page aligned. 126 * Address arange (start + len) overflow. 127 * -ENOMEM: 128 * addr is not a valid address (not allocated). 129 * end (start + len) is not a valid address. 130 * a gap (unallocated memory) between start and end. 131 * -EPERM: 132 * - In 32 bit architecture, sealing is not supported. 133 * Note: 134 * user can call mseal(2) multiple times, adding a seal on an 135 * already sealed memory is a no-action (no error). 136 * 137 * unseal() is not supported. 138 */ 139 int do_mseal(unsigned long start, size_t len_in, unsigned long flags) 140 { 141 size_t len; 142 int ret = 0; 143 unsigned long end; 144 struct mm_struct *mm = current->mm; 145 146 /* Verify flags not set. */ 147 if (flags) 148 return -EINVAL; 149 150 start = untagged_addr(start); 151 if (!PAGE_ALIGNED(start)) 152 return -EINVAL; 153 154 len = PAGE_ALIGN(len_in); 155 /* Check to see whether len was rounded up from small -ve to zero. */ 156 if (len_in && !len) 157 return -EINVAL; 158 159 end = start + len; 160 if (end < start) 161 return -EINVAL; 162 163 if (end == start) 164 return 0; 165 166 if (mmap_write_lock_killable(mm)) 167 return -EINTR; 168 169 if (range_contains_unmapped(mm, start, end)) { 170 ret = -ENOMEM; 171 goto out; 172 } 173 174 /* 175 * Second pass, this should success, unless there are errors 176 * from vma_modify_flags, e.g. merge/split error, or process 177 * reaching the max supported VMAs, however, those cases shall 178 * be rare. 179 */ 180 ret = mseal_apply(mm, start, end); 181 182 out: 183 mmap_write_unlock(mm); 184 return ret; 185 } 186 187 SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long, 188 flags) 189 { 190 return do_mseal(start, len, flags); 191 } 192