1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Implement mseal() syscall. 4 * 5 * Copyright (c) 2023,2024 Google, Inc. 6 * 7 * Author: Jeff Xu <jeffxu@chromium.org> 8 */ 9 10 #include <linux/mempolicy.h> 11 #include <linux/mman.h> 12 #include <linux/mm.h> 13 #include <linux/mm_inline.h> 14 #include <linux/syscalls.h> 15 #include <linux/sched.h> 16 #include "internal.h" 17 18 /* 19 * mseal() disallows an input range which contain unmapped ranges (VMA holes). 20 * 21 * It disallows unmapped regions from start to end whether they exist at the 22 * start, in the middle, or at the end of the range, or any combination thereof. 23 * 24 * This is because after sealng a range, there's nothing to stop memory mapping 25 * of ranges in the remaining gaps later, meaning that the user might then 26 * wrongly consider the entirety of the mseal()'d range to be sealed when it 27 * in fact isn't. 28 */ 29 30 /* 31 * Does the [start, end) range contain any unmapped memory? 32 * 33 * We ensure that: 34 * - start is part of a valid VMA. 35 * - end is part of a valid VMA. 36 * - no gap (unallocated memory) exists between start and end. 37 */ 38 static bool range_contains_unmapped(struct mm_struct *mm, 39 unsigned long start, unsigned long end) 40 { 41 struct vm_area_struct *vma; 42 unsigned long prev_end = start; 43 VMA_ITERATOR(vmi, current->mm, start); 44 45 for_each_vma_range(vmi, vma, end) { 46 if (vma->vm_start > prev_end) 47 return true; 48 49 prev_end = vma->vm_end; 50 } 51 52 return prev_end < end; 53 } 54 55 static int mseal_apply(struct mm_struct *mm, 56 unsigned long start, unsigned long end) 57 { 58 struct vm_area_struct *vma, *prev; 59 unsigned long curr_start = start; 60 VMA_ITERATOR(vmi, mm, start); 61 62 /* We know there are no gaps so this will be non-NULL. */ 63 vma = vma_iter_load(&vmi); 64 prev = vma_prev(&vmi); 65 if (start > vma->vm_start) 66 prev = vma; 67 68 for_each_vma_range(vmi, vma, end) { 69 const unsigned long curr_end = MIN(vma->vm_end, end); 70 71 if (!(vma->vm_flags & VM_SEALED)) { 72 vm_flags_t vm_flags = vma->vm_flags | VM_SEALED; 73 74 vma = vma_modify_flags(&vmi, prev, vma, curr_start, 75 curr_end, &vm_flags); 76 if (IS_ERR(vma)) 77 return PTR_ERR(vma); 78 vm_flags_set(vma, VM_SEALED); 79 } 80 81 prev = vma; 82 curr_start = curr_end; 83 } 84 85 return 0; 86 } 87 88 /* 89 * mseal(2) seals the VM's meta data from 90 * selected syscalls. 91 * 92 * addr/len: VM address range. 93 * 94 * The address range by addr/len must meet: 95 * start (addr) must be in a valid VMA. 96 * end (addr + len) must be in a valid VMA. 97 * no gap (unallocated memory) between start and end. 98 * start (addr) must be page aligned. 99 * 100 * len: len will be page aligned implicitly. 101 * 102 * Below VMA operations are blocked after sealing. 103 * 1> Unmapping, moving to another location, and shrinking 104 * the size, via munmap() and mremap(), can leave an empty 105 * space, therefore can be replaced with a VMA with a new 106 * set of attributes. 107 * 2> Moving or expanding a different vma into the current location, 108 * via mremap(). 109 * 3> Modifying a VMA via mmap(MAP_FIXED). 110 * 4> Size expansion, via mremap(), does not appear to pose any 111 * specific risks to sealed VMAs. It is included anyway because 112 * the use case is unclear. In any case, users can rely on 113 * merging to expand a sealed VMA. 114 * 5> mprotect and pkey_mprotect. 115 * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED) 116 * for anonymous memory, when users don't have write permission to the 117 * memory. Those behaviors can alter region contents by discarding pages, 118 * effectively a memset(0) for anonymous memory. 119 * 120 * flags: reserved. 121 * 122 * return values: 123 * zero: success. 124 * -EINVAL: 125 * invalid input flags. 126 * start address is not page aligned. 127 * Address arange (start + len) overflow. 128 * -ENOMEM: 129 * addr is not a valid address (not allocated). 130 * end (start + len) is not a valid address. 131 * a gap (unallocated memory) between start and end. 132 * -EPERM: 133 * - In 32 bit architecture, sealing is not supported. 134 * Note: 135 * user can call mseal(2) multiple times, adding a seal on an 136 * already sealed memory is a no-action (no error). 137 * 138 * unseal() is not supported. 139 */ 140 int do_mseal(unsigned long start, size_t len_in, unsigned long flags) 141 { 142 size_t len; 143 int ret = 0; 144 unsigned long end; 145 struct mm_struct *mm = current->mm; 146 147 /* Verify flags not set. */ 148 if (flags) 149 return -EINVAL; 150 151 start = untagged_addr(start); 152 if (!PAGE_ALIGNED(start)) 153 return -EINVAL; 154 155 len = PAGE_ALIGN(len_in); 156 /* Check to see whether len was rounded up from small -ve to zero. */ 157 if (len_in && !len) 158 return -EINVAL; 159 160 end = start + len; 161 if (end < start) 162 return -EINVAL; 163 164 if (end == start) 165 return 0; 166 167 if (mmap_write_lock_killable(mm)) 168 return -EINTR; 169 170 if (range_contains_unmapped(mm, start, end)) { 171 ret = -ENOMEM; 172 goto out; 173 } 174 175 /* 176 * Second pass, this should success, unless there are errors 177 * from vma_modify_flags, e.g. merge/split error, or process 178 * reaching the max supported VMAs, however, those cases shall 179 * be rare. 180 */ 181 ret = mseal_apply(mm, start, end); 182 183 out: 184 mmap_write_unlock(mm); 185 return ret; 186 } 187 188 SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long, 189 flags) 190 { 191 return do_mseal(start, len, flags); 192 } 193