1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Functions explicitly implemented for exec functionality which however are 5 * explicitly VMA-only logic. 6 */ 7 8 #include "vma_internal.h" 9 #include "vma.h" 10 11 /* 12 * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between 13 * this VMA and its relocated range, which will now reside at [vma->vm_start - 14 * shift, vma->vm_end - shift). 15 * 16 * This function is almost certainly NOT what you want for anything other than 17 * early executable temporary stack relocation. 18 */ 19 int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) 20 { 21 /* 22 * The process proceeds as follows: 23 * 24 * 1) Use shift to calculate the new vma endpoints. 25 * 2) Extend vma to cover both the old and new ranges. This ensures the 26 * arguments passed to subsequent functions are consistent. 27 * 3) Move vma's page tables to the new range. 28 * 4) Free up any cleared pgd range. 29 * 5) Shrink the vma to cover only the new range. 30 */ 31 32 struct mm_struct *mm = vma->vm_mm; 33 unsigned long old_start = vma->vm_start; 34 unsigned long old_end = vma->vm_end; 35 unsigned long length = old_end - old_start; 36 unsigned long new_start = old_start - shift; 37 unsigned long new_end = old_end - shift; 38 VMA_ITERATOR(vmi, mm, new_start); 39 VMG_STATE(vmg, mm, &vmi, new_start, old_end, EMPTY_VMA_FLAGS, 40 vma->vm_pgoff); 41 struct vm_area_struct *next; 42 struct mmu_gather tlb; 43 PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); 44 45 BUG_ON(new_start > new_end); 46 47 /* 48 * ensure there are no vmas between where we want to go 49 * and where we are 50 */ 51 if (vma != vma_next(&vmi)) 52 return -EFAULT; 53 54 vma_iter_prev_range(&vmi); 55 /* 56 * cover the whole range: [new_start, old_end) 57 */ 58 vmg.target = vma; 59 if (vma_expand(&vmg)) 60 return -ENOMEM; 61 62 /* 63 * move the page tables downwards, on failure we rely on 64 * process cleanup to remove whatever mess we made. 65 */ 66 pmc.for_stack = true; 67 if (length != move_page_tables(&pmc)) 68 return -ENOMEM; 69 70 tlb_gather_mmu(&tlb, mm); 71 next = vma_next(&vmi); 72 if (new_end > old_start) { 73 /* 74 * when the old and new regions overlap clear from new_end. 75 */ 76 free_pgd_range(&tlb, new_end, old_end, new_end, 77 next ? next->vm_start : USER_PGTABLES_CEILING); 78 } else { 79 /* 80 * otherwise, clean from old_start; this is done to not touch 81 * the address space in [new_end, old_start) some architectures 82 * have constraints on va-space that make this illegal (IA64) - 83 * for the others its just a little faster. 84 */ 85 free_pgd_range(&tlb, old_start, old_end, new_end, 86 next ? next->vm_start : USER_PGTABLES_CEILING); 87 } 88 tlb_finish_mmu(&tlb); 89 90 vma_prev(&vmi); 91 /* Shrink the vma to just the new range */ 92 return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); 93 } 94 95 /* 96 * Establish the stack VMA in an execve'd process, located temporarily at the 97 * maximum stack address provided by the architecture. 98 * 99 * We later relocate this downwards in relocate_vma_down(). 100 * 101 * This function is almost certainly NOT what you want for anything other than 102 * early executable initialisation. 103 * 104 * On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the 105 * maximum addressable location in the stack (that is capable of storing a 106 * system word of data). 107 */ 108 int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, 109 unsigned long *top_mem_p) 110 { 111 unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; 112 int err; 113 struct vm_area_struct *vma = vm_area_alloc(mm); 114 115 if (!vma) 116 return -ENOMEM; 117 118 vma_set_anonymous(vma); 119 120 if (mmap_write_lock_killable(mm)) { 121 err = -EINTR; 122 goto err_free; 123 } 124 125 /* 126 * Need to be called with mmap write lock 127 * held, to avoid race with ksmd. 128 */ 129 err = ksm_execve(mm); 130 if (err) 131 goto err_ksm; 132 133 /* 134 * Place the stack at the largest stack address the architecture 135 * supports. Later, we'll move this to an appropriate place. We don't 136 * use STACK_TOP because that can depend on attributes which aren't 137 * configured yet. 138 */ 139 VM_WARN_ON_ONCE(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); 140 vma->vm_end = STACK_TOP_MAX; 141 vma->vm_start = vma->vm_end - PAGE_SIZE; 142 if (pgtable_supports_soft_dirty()) 143 flags |= VM_SOFTDIRTY; 144 vm_flags_init(vma, flags); 145 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 146 147 err = insert_vm_struct(mm, vma); 148 if (err) 149 goto err; 150 151 mm->stack_vm = mm->total_vm = 1; 152 mmap_write_unlock(mm); 153 *vmap = vma; 154 *top_mem_p = vma->vm_end - sizeof(void *); 155 return 0; 156 157 err: 158 ksm_exit(mm); 159 err_ksm: 160 mmap_write_unlock(mm); 161 err_free: 162 *vmap = NULL; 163 vm_area_free(vma); 164 return err; 165 } 166