1 // SPDX-License-Identifier: GPL-2.0-only 2 3 /* 4 * Functions explicitly implemented for exec functionality which however are 5 * explicitly VMA-only logic. 6 */ 7 8 #include "vma_internal.h" 9 #include "vma.h" 10 11 /* 12 * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between 13 * this VMA and its relocated range, which will now reside at [vma->vm_start - 14 * shift, vma->vm_end - shift). 15 * 16 * This function is almost certainly NOT what you want for anything other than 17 * early executable temporary stack relocation. 18 */ 19 int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) 20 { 21 /* 22 * The process proceeds as follows: 23 * 24 * 1) Use shift to calculate the new vma endpoints. 25 * 2) Extend vma to cover both the old and new ranges. This ensures the 26 * arguments passed to subsequent functions are consistent. 27 * 3) Move vma's page tables to the new range. 28 * 4) Free up any cleared pgd range. 29 * 5) Shrink the vma to cover only the new range. 30 */ 31 32 struct mm_struct *mm = vma->vm_mm; 33 unsigned long old_start = vma->vm_start; 34 unsigned long old_end = vma->vm_end; 35 unsigned long length = old_end - old_start; 36 unsigned long new_start = old_start - shift; 37 unsigned long new_end = old_end - shift; 38 VMA_ITERATOR(vmi, mm, new_start); 39 VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff); 40 struct vm_area_struct *next; 41 struct mmu_gather tlb; 42 PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length); 43 44 BUG_ON(new_start > new_end); 45 46 /* 47 * ensure there are no vmas between where we want to go 48 * and where we are 49 */ 50 if (vma != vma_next(&vmi)) 51 return -EFAULT; 52 53 vma_iter_prev_range(&vmi); 54 /* 55 * cover the whole range: [new_start, old_end) 56 */ 57 vmg.target = vma; 58 if (vma_expand(&vmg)) 59 return -ENOMEM; 60 61 /* 62 * move the page tables downwards, on failure we rely on 63 * process cleanup to remove whatever mess we made. 64 */ 65 pmc.for_stack = true; 66 if (length != move_page_tables(&pmc)) 67 return -ENOMEM; 68 69 tlb_gather_mmu(&tlb, mm); 70 next = vma_next(&vmi); 71 if (new_end > old_start) { 72 /* 73 * when the old and new regions overlap clear from new_end. 74 */ 75 free_pgd_range(&tlb, new_end, old_end, new_end, 76 next ? next->vm_start : USER_PGTABLES_CEILING); 77 } else { 78 /* 79 * otherwise, clean from old_start; this is done to not touch 80 * the address space in [new_end, old_start) some architectures 81 * have constraints on va-space that make this illegal (IA64) - 82 * for the others its just a little faster. 83 */ 84 free_pgd_range(&tlb, old_start, old_end, new_end, 85 next ? next->vm_start : USER_PGTABLES_CEILING); 86 } 87 tlb_finish_mmu(&tlb); 88 89 vma_prev(&vmi); 90 /* Shrink the vma to just the new range */ 91 return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff); 92 } 93 94 /* 95 * Establish the stack VMA in an execve'd process, located temporarily at the 96 * maximum stack address provided by the architecture. 97 * 98 * We later relocate this downwards in relocate_vma_down(). 99 * 100 * This function is almost certainly NOT what you want for anything other than 101 * early executable initialisation. 102 * 103 * On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the 104 * maximum addressable location in the stack (that is capable of storing a 105 * system word of data). 106 */ 107 int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, 108 unsigned long *top_mem_p) 109 { 110 int err; 111 struct vm_area_struct *vma = vm_area_alloc(mm); 112 113 if (!vma) 114 return -ENOMEM; 115 116 vma_set_anonymous(vma); 117 118 if (mmap_write_lock_killable(mm)) { 119 err = -EINTR; 120 goto err_free; 121 } 122 123 /* 124 * Need to be called with mmap write lock 125 * held, to avoid race with ksmd. 126 */ 127 err = ksm_execve(mm); 128 if (err) 129 goto err_ksm; 130 131 /* 132 * Place the stack at the largest stack address the architecture 133 * supports. Later, we'll move this to an appropriate place. We don't 134 * use STACK_TOP because that can depend on attributes which aren't 135 * configured yet. 136 */ 137 BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); 138 vma->vm_end = STACK_TOP_MAX; 139 vma->vm_start = vma->vm_end - PAGE_SIZE; 140 vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP); 141 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 142 143 err = insert_vm_struct(mm, vma); 144 if (err) 145 goto err; 146 147 mm->stack_vm = mm->total_vm = 1; 148 mmap_write_unlock(mm); 149 *vmap = vma; 150 *top_mem_p = vma->vm_end - sizeof(void *); 151 return 0; 152 153 err: 154 ksm_exit(mm); 155 err_ksm: 156 mmap_write_unlock(mm); 157 err_free: 158 *vmap = NULL; 159 vm_area_free(vma); 160 return err; 161 } 162