xref: /linux/mm/vma_exec.c (revision 40286d6379aacfcc053253ef78dc78b09addffda)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /*
4  * Functions explicitly implemented for exec functionality which however are
5  * explicitly VMA-only logic.
6  */
7 
8 #include "vma_internal.h"
9 #include "vma.h"
10 
11 /*
12  * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
13  * this VMA and its relocated range, which will now reside at [vma->vm_start -
14  * shift, vma->vm_end - shift).
15  *
16  * This function is almost certainly NOT what you want for anything other than
17  * early executable temporary stack relocation.
18  */
19 int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
20 {
21 	/*
22 	 * The process proceeds as follows:
23 	 *
24 	 * 1) Use shift to calculate the new vma endpoints.
25 	 * 2) Extend vma to cover both the old and new ranges.  This ensures the
26 	 *    arguments passed to subsequent functions are consistent.
27 	 * 3) Move vma's page tables to the new range.
28 	 * 4) Free up any cleared pgd range.
29 	 * 5) Shrink the vma to cover only the new range.
30 	 */
31 
32 	struct mm_struct *mm = vma->vm_mm;
33 	unsigned long old_start = vma->vm_start;
34 	unsigned long old_end = vma->vm_end;
35 	unsigned long length = old_end - old_start;
36 	unsigned long new_start = old_start - shift;
37 	unsigned long new_end = old_end - shift;
38 	VMA_ITERATOR(vmi, mm, new_start);
39 	VMG_STATE(vmg, mm, &vmi, new_start, old_end, EMPTY_VMA_FLAGS,
40 		  vma->vm_pgoff);
41 	struct vm_area_struct *next;
42 	struct mmu_gather tlb;
43 	PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
44 
45 	BUG_ON(new_start > new_end);
46 
47 	/*
48 	 * ensure there are no vmas between where we want to go
49 	 * and where we are
50 	 */
51 	if (vma != vma_next(&vmi))
52 		return -EFAULT;
53 
54 	vma_iter_prev_range(&vmi);
55 	/*
56 	 * cover the whole range: [new_start, old_end)
57 	 */
58 	vmg.target = vma;
59 	if (vma_expand(&vmg))
60 		return -ENOMEM;
61 
62 	/*
63 	 * move the page tables downwards, on failure we rely on
64 	 * process cleanup to remove whatever mess we made.
65 	 */
66 	pmc.for_stack = true;
67 	if (length != move_page_tables(&pmc))
68 		return -ENOMEM;
69 
70 	tlb_gather_mmu(&tlb, mm);
71 	next = vma_next(&vmi);
72 	if (new_end > old_start) {
73 		/*
74 		 * when the old and new regions overlap clear from new_end.
75 		 */
76 		free_pgd_range(&tlb, new_end, old_end, new_end,
77 			next ? next->vm_start : USER_PGTABLES_CEILING);
78 	} else {
79 		/*
80 		 * otherwise, clean from old_start; this is done to not touch
81 		 * the address space in [new_end, old_start) some architectures
82 		 * have constraints on va-space that make this illegal (IA64) -
83 		 * for the others its just a little faster.
84 		 */
85 		free_pgd_range(&tlb, old_start, old_end, new_end,
86 			next ? next->vm_start : USER_PGTABLES_CEILING);
87 	}
88 	tlb_finish_mmu(&tlb);
89 
90 	vma_prev(&vmi);
91 	/* Shrink the vma to just the new range */
92 	return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
93 }
94 
95 /*
96  * Establish the stack VMA in an execve'd process, located temporarily at the
97  * maximum stack address provided by the architecture.
98  *
99  * We later relocate this downwards in relocate_vma_down().
100  *
101  * This function is almost certainly NOT what you want for anything other than
102  * early executable initialisation.
103  *
104  * On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the
105  * maximum addressable location in the stack (that is capable of storing a
106  * system word of data).
107  */
108 int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
109 			  unsigned long *top_mem_p)
110 {
111 	unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
112 	int err;
113 	struct vm_area_struct *vma = vm_area_alloc(mm);
114 
115 	if (!vma)
116 		return -ENOMEM;
117 
118 	vma_set_anonymous(vma);
119 
120 	if (mmap_write_lock_killable(mm)) {
121 		err = -EINTR;
122 		goto err_free;
123 	}
124 
125 	/*
126 	 * Need to be called with mmap write lock
127 	 * held, to avoid race with ksmd.
128 	 */
129 	err = ksm_execve(mm);
130 	if (err)
131 		goto err_ksm;
132 
133 	/*
134 	 * Place the stack at the largest stack address the architecture
135 	 * supports. Later, we'll move this to an appropriate place. We don't
136 	 * use STACK_TOP because that can depend on attributes which aren't
137 	 * configured yet.
138 	 */
139 	VM_WARN_ON_ONCE(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
140 	vma->vm_end = STACK_TOP_MAX;
141 	vma->vm_start = vma->vm_end - PAGE_SIZE;
142 	if (pgtable_supports_soft_dirty())
143 		flags |= VM_SOFTDIRTY;
144 	vm_flags_init(vma, flags);
145 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
146 
147 	err = insert_vm_struct(mm, vma);
148 	if (err)
149 		goto err;
150 
151 	mm->stack_vm = mm->total_vm = 1;
152 	mmap_write_unlock(mm);
153 	*vmap = vma;
154 	*top_mem_p = vma->vm_end - sizeof(void *);
155 	return 0;
156 
157 err:
158 	ksm_exit(mm);
159 err_ksm:
160 	mmap_write_unlock(mm);
161 err_free:
162 	*vmap = NULL;
163 	vm_area_free(vma);
164 	return err;
165 }
166