xref: /linux/mm/vma_exec.c (revision 00c010e130e58301db2ea0cec1eadc931e1cb8cf)
16c36ac1eSLorenzo Stoakes // SPDX-License-Identifier: GPL-2.0-only
26c36ac1eSLorenzo Stoakes 
36c36ac1eSLorenzo Stoakes /*
46c36ac1eSLorenzo Stoakes  * Functions explicitly implemented for exec functionality which however are
56c36ac1eSLorenzo Stoakes  * explicitly VMA-only logic.
66c36ac1eSLorenzo Stoakes  */
76c36ac1eSLorenzo Stoakes 
86c36ac1eSLorenzo Stoakes #include "vma_internal.h"
96c36ac1eSLorenzo Stoakes #include "vma.h"
106c36ac1eSLorenzo Stoakes 
116c36ac1eSLorenzo Stoakes /*
126c36ac1eSLorenzo Stoakes  * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
136c36ac1eSLorenzo Stoakes  * this VMA and its relocated range, which will now reside at [vma->vm_start -
146c36ac1eSLorenzo Stoakes  * shift, vma->vm_end - shift).
156c36ac1eSLorenzo Stoakes  *
166c36ac1eSLorenzo Stoakes  * This function is almost certainly NOT what you want for anything other than
176c36ac1eSLorenzo Stoakes  * early executable temporary stack relocation.
186c36ac1eSLorenzo Stoakes  */
relocate_vma_down(struct vm_area_struct * vma,unsigned long shift)196c36ac1eSLorenzo Stoakes int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
206c36ac1eSLorenzo Stoakes {
216c36ac1eSLorenzo Stoakes 	/*
226c36ac1eSLorenzo Stoakes 	 * The process proceeds as follows:
236c36ac1eSLorenzo Stoakes 	 *
246c36ac1eSLorenzo Stoakes 	 * 1) Use shift to calculate the new vma endpoints.
256c36ac1eSLorenzo Stoakes 	 * 2) Extend vma to cover both the old and new ranges.  This ensures the
266c36ac1eSLorenzo Stoakes 	 *    arguments passed to subsequent functions are consistent.
276c36ac1eSLorenzo Stoakes 	 * 3) Move vma's page tables to the new range.
286c36ac1eSLorenzo Stoakes 	 * 4) Free up any cleared pgd range.
296c36ac1eSLorenzo Stoakes 	 * 5) Shrink the vma to cover only the new range.
306c36ac1eSLorenzo Stoakes 	 */
316c36ac1eSLorenzo Stoakes 
326c36ac1eSLorenzo Stoakes 	struct mm_struct *mm = vma->vm_mm;
336c36ac1eSLorenzo Stoakes 	unsigned long old_start = vma->vm_start;
346c36ac1eSLorenzo Stoakes 	unsigned long old_end = vma->vm_end;
356c36ac1eSLorenzo Stoakes 	unsigned long length = old_end - old_start;
366c36ac1eSLorenzo Stoakes 	unsigned long new_start = old_start - shift;
376c36ac1eSLorenzo Stoakes 	unsigned long new_end = old_end - shift;
386c36ac1eSLorenzo Stoakes 	VMA_ITERATOR(vmi, mm, new_start);
396c36ac1eSLorenzo Stoakes 	VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff);
406c36ac1eSLorenzo Stoakes 	struct vm_area_struct *next;
416c36ac1eSLorenzo Stoakes 	struct mmu_gather tlb;
426c36ac1eSLorenzo Stoakes 	PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
436c36ac1eSLorenzo Stoakes 
446c36ac1eSLorenzo Stoakes 	BUG_ON(new_start > new_end);
456c36ac1eSLorenzo Stoakes 
466c36ac1eSLorenzo Stoakes 	/*
476c36ac1eSLorenzo Stoakes 	 * ensure there are no vmas between where we want to go
486c36ac1eSLorenzo Stoakes 	 * and where we are
496c36ac1eSLorenzo Stoakes 	 */
506c36ac1eSLorenzo Stoakes 	if (vma != vma_next(&vmi))
516c36ac1eSLorenzo Stoakes 		return -EFAULT;
526c36ac1eSLorenzo Stoakes 
536c36ac1eSLorenzo Stoakes 	vma_iter_prev_range(&vmi);
546c36ac1eSLorenzo Stoakes 	/*
556c36ac1eSLorenzo Stoakes 	 * cover the whole range: [new_start, old_end)
566c36ac1eSLorenzo Stoakes 	 */
576c36ac1eSLorenzo Stoakes 	vmg.middle = vma;
586c36ac1eSLorenzo Stoakes 	if (vma_expand(&vmg))
596c36ac1eSLorenzo Stoakes 		return -ENOMEM;
606c36ac1eSLorenzo Stoakes 
616c36ac1eSLorenzo Stoakes 	/*
626c36ac1eSLorenzo Stoakes 	 * move the page tables downwards, on failure we rely on
636c36ac1eSLorenzo Stoakes 	 * process cleanup to remove whatever mess we made.
646c36ac1eSLorenzo Stoakes 	 */
656c36ac1eSLorenzo Stoakes 	pmc.for_stack = true;
666c36ac1eSLorenzo Stoakes 	if (length != move_page_tables(&pmc))
676c36ac1eSLorenzo Stoakes 		return -ENOMEM;
686c36ac1eSLorenzo Stoakes 
696c36ac1eSLorenzo Stoakes 	tlb_gather_mmu(&tlb, mm);
706c36ac1eSLorenzo Stoakes 	next = vma_next(&vmi);
716c36ac1eSLorenzo Stoakes 	if (new_end > old_start) {
726c36ac1eSLorenzo Stoakes 		/*
736c36ac1eSLorenzo Stoakes 		 * when the old and new regions overlap clear from new_end.
746c36ac1eSLorenzo Stoakes 		 */
756c36ac1eSLorenzo Stoakes 		free_pgd_range(&tlb, new_end, old_end, new_end,
766c36ac1eSLorenzo Stoakes 			next ? next->vm_start : USER_PGTABLES_CEILING);
776c36ac1eSLorenzo Stoakes 	} else {
786c36ac1eSLorenzo Stoakes 		/*
796c36ac1eSLorenzo Stoakes 		 * otherwise, clean from old_start; this is done to not touch
806c36ac1eSLorenzo Stoakes 		 * the address space in [new_end, old_start) some architectures
816c36ac1eSLorenzo Stoakes 		 * have constraints on va-space that make this illegal (IA64) -
826c36ac1eSLorenzo Stoakes 		 * for the others its just a little faster.
836c36ac1eSLorenzo Stoakes 		 */
846c36ac1eSLorenzo Stoakes 		free_pgd_range(&tlb, old_start, old_end, new_end,
856c36ac1eSLorenzo Stoakes 			next ? next->vm_start : USER_PGTABLES_CEILING);
866c36ac1eSLorenzo Stoakes 	}
876c36ac1eSLorenzo Stoakes 	tlb_finish_mmu(&tlb);
886c36ac1eSLorenzo Stoakes 
896c36ac1eSLorenzo Stoakes 	vma_prev(&vmi);
906c36ac1eSLorenzo Stoakes 	/* Shrink the vma to just the new range */
916c36ac1eSLorenzo Stoakes 	return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
926c36ac1eSLorenzo Stoakes }
93*dd7a6246SLorenzo Stoakes 
94*dd7a6246SLorenzo Stoakes /*
95*dd7a6246SLorenzo Stoakes  * Establish the stack VMA in an execve'd process, located temporarily at the
96*dd7a6246SLorenzo Stoakes  * maximum stack address provided by the architecture.
97*dd7a6246SLorenzo Stoakes  *
98*dd7a6246SLorenzo Stoakes  * We later relocate this downwards in relocate_vma_down().
99*dd7a6246SLorenzo Stoakes  *
100*dd7a6246SLorenzo Stoakes  * This function is almost certainly NOT what you want for anything other than
101*dd7a6246SLorenzo Stoakes  * early executable initialisation.
102*dd7a6246SLorenzo Stoakes  *
103*dd7a6246SLorenzo Stoakes  * On success, returns 0 and sets *vmap to the stack VMA and *top_mem_p to the
104*dd7a6246SLorenzo Stoakes  * maximum addressable location in the stack (that is capable of storing a
105*dd7a6246SLorenzo Stoakes  * system word of data).
106*dd7a6246SLorenzo Stoakes  */
create_init_stack_vma(struct mm_struct * mm,struct vm_area_struct ** vmap,unsigned long * top_mem_p)107*dd7a6246SLorenzo Stoakes int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
108*dd7a6246SLorenzo Stoakes 			  unsigned long *top_mem_p)
109*dd7a6246SLorenzo Stoakes {
110*dd7a6246SLorenzo Stoakes 	int err;
111*dd7a6246SLorenzo Stoakes 	struct vm_area_struct *vma = vm_area_alloc(mm);
112*dd7a6246SLorenzo Stoakes 
113*dd7a6246SLorenzo Stoakes 	if (!vma)
114*dd7a6246SLorenzo Stoakes 		return -ENOMEM;
115*dd7a6246SLorenzo Stoakes 
116*dd7a6246SLorenzo Stoakes 	vma_set_anonymous(vma);
117*dd7a6246SLorenzo Stoakes 
118*dd7a6246SLorenzo Stoakes 	if (mmap_write_lock_killable(mm)) {
119*dd7a6246SLorenzo Stoakes 		err = -EINTR;
120*dd7a6246SLorenzo Stoakes 		goto err_free;
121*dd7a6246SLorenzo Stoakes 	}
122*dd7a6246SLorenzo Stoakes 
123*dd7a6246SLorenzo Stoakes 	/*
124*dd7a6246SLorenzo Stoakes 	 * Need to be called with mmap write lock
125*dd7a6246SLorenzo Stoakes 	 * held, to avoid race with ksmd.
126*dd7a6246SLorenzo Stoakes 	 */
127*dd7a6246SLorenzo Stoakes 	err = ksm_execve(mm);
128*dd7a6246SLorenzo Stoakes 	if (err)
129*dd7a6246SLorenzo Stoakes 		goto err_ksm;
130*dd7a6246SLorenzo Stoakes 
131*dd7a6246SLorenzo Stoakes 	/*
132*dd7a6246SLorenzo Stoakes 	 * Place the stack at the largest stack address the architecture
133*dd7a6246SLorenzo Stoakes 	 * supports. Later, we'll move this to an appropriate place. We don't
134*dd7a6246SLorenzo Stoakes 	 * use STACK_TOP because that can depend on attributes which aren't
135*dd7a6246SLorenzo Stoakes 	 * configured yet.
136*dd7a6246SLorenzo Stoakes 	 */
137*dd7a6246SLorenzo Stoakes 	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
138*dd7a6246SLorenzo Stoakes 	vma->vm_end = STACK_TOP_MAX;
139*dd7a6246SLorenzo Stoakes 	vma->vm_start = vma->vm_end - PAGE_SIZE;
140*dd7a6246SLorenzo Stoakes 	vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
141*dd7a6246SLorenzo Stoakes 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
142*dd7a6246SLorenzo Stoakes 
143*dd7a6246SLorenzo Stoakes 	err = insert_vm_struct(mm, vma);
144*dd7a6246SLorenzo Stoakes 	if (err)
145*dd7a6246SLorenzo Stoakes 		goto err;
146*dd7a6246SLorenzo Stoakes 
147*dd7a6246SLorenzo Stoakes 	mm->stack_vm = mm->total_vm = 1;
148*dd7a6246SLorenzo Stoakes 	mmap_write_unlock(mm);
149*dd7a6246SLorenzo Stoakes 	*vmap = vma;
150*dd7a6246SLorenzo Stoakes 	*top_mem_p = vma->vm_end - sizeof(void *);
151*dd7a6246SLorenzo Stoakes 	return 0;
152*dd7a6246SLorenzo Stoakes 
153*dd7a6246SLorenzo Stoakes err:
154*dd7a6246SLorenzo Stoakes 	ksm_exit(mm);
155*dd7a6246SLorenzo Stoakes err_ksm:
156*dd7a6246SLorenzo Stoakes 	mmap_write_unlock(mm);
157*dd7a6246SLorenzo Stoakes err_free:
158*dd7a6246SLorenzo Stoakes 	*vmap = NULL;
159*dd7a6246SLorenzo Stoakes 	vm_area_free(vma);
160*dd7a6246SLorenzo Stoakes 	return err;
161*dd7a6246SLorenzo Stoakes }
162