xref: /linux/arch/x86/kernel/sys_x86_64.c (revision 7f4f3b14e8079ecde096bd734af10e30d40c27b7)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/compat.h>
3 #include <linux/errno.h>
4 #include <linux/sched.h>
5 #include <linux/sched/mm.h>
6 #include <linux/syscalls.h>
7 #include <linux/mm.h>
8 #include <linux/fs.h>
9 #include <linux/smp.h>
10 #include <linux/sem.h>
11 #include <linux/msg.h>
12 #include <linux/shm.h>
13 #include <linux/stat.h>
14 #include <linux/mman.h>
15 #include <linux/file.h>
16 #include <linux/utsname.h>
17 #include <linux/personality.h>
18 #include <linux/random.h>
19 #include <linux/uaccess.h>
20 #include <linux/elf.h>
21 #include <linux/hugetlb.h>
22 
23 #include <asm/elf.h>
24 #include <asm/ia32.h>
25 
26 /*
27  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
28  */
29 static unsigned long get_align_mask(struct file *filp)
30 {
31 	if (filp && is_file_hugepages(filp))
32 		return huge_page_mask_align(filp);
33 	/* handle 32- and 64-bit case with a single conditional */
34 	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
35 		return 0;
36 
37 	if (!(current->flags & PF_RANDOMIZE))
38 		return 0;
39 
40 	return va_align.mask;
41 }
42 
43 /*
44  * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
45  * va_align.bits, [12:upper_bit), are set to a random value instead of
46  * zeroing them. This random value is computed once per boot. This form
47  * of ASLR is known as "per-boot ASLR".
48  *
49  * To achieve this, the random value is added to the info.align_offset
50  * value before calling vm_unmapped_area() or ORed directly to the
51  * address.
52  */
53 static unsigned long get_align_bits(void)
54 {
55 	return va_align.bits & get_align_mask(NULL);
56 }
57 
58 static int __init control_va_addr_alignment(char *str)
59 {
60 	/* guard against enabling this on other CPU families */
61 	if (va_align.flags < 0)
62 		return 1;
63 
64 	if (*str == 0)
65 		return 1;
66 
67 	if (!strcmp(str, "32"))
68 		va_align.flags = ALIGN_VA_32;
69 	else if (!strcmp(str, "64"))
70 		va_align.flags = ALIGN_VA_64;
71 	else if (!strcmp(str, "off"))
72 		va_align.flags = 0;
73 	else if (!strcmp(str, "on"))
74 		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
75 	else
76 		pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
77 
78 	return 1;
79 }
80 __setup("align_va_addr=", control_va_addr_alignment);
81 
82 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
83 		unsigned long, prot, unsigned long, flags,
84 		unsigned long, fd, unsigned long, off)
85 {
86 	if (off & ~PAGE_MASK)
87 		return -EINVAL;
88 
89 	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
90 }
91 
92 static void find_start_end(unsigned long addr, unsigned long flags,
93 		unsigned long *begin, unsigned long *end)
94 {
95 	if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
96 		/* This is usually used needed to map code in small
97 		   model, so it needs to be in the first 31bit. Limit
98 		   it to that.  This means we need to move the
99 		   unmapped base down for this case. This can give
100 		   conflicts with the heap, but we assume that glibc
101 		   malloc knows how to fall back to mmap. Give it 1GB
102 		   of playground for now. -AK */
103 		*begin = 0x40000000;
104 		*end = 0x80000000;
105 		if (current->flags & PF_RANDOMIZE) {
106 			*begin = randomize_page(*begin, 0x02000000);
107 		}
108 		return;
109 	}
110 
111 	*begin	= get_mmap_base(1);
112 	if (in_32bit_syscall())
113 		*end = task_size_32bit();
114 	else
115 		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
116 }
117 
118 static inline unsigned long stack_guard_placement(vm_flags_t vm_flags)
119 {
120 	if (vm_flags & VM_SHADOW_STACK)
121 		return PAGE_SIZE;
122 
123 	return 0;
124 }
125 
126 unsigned long
127 arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len,
128 		       unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
129 {
130 	struct mm_struct *mm = current->mm;
131 	struct vm_area_struct *vma;
132 	struct vm_unmapped_area_info info = {};
133 	unsigned long begin, end;
134 
135 	if (flags & MAP_FIXED)
136 		return addr;
137 
138 	find_start_end(addr, flags, &begin, &end);
139 
140 	if (len > end)
141 		return -ENOMEM;
142 
143 	if (addr) {
144 		addr = PAGE_ALIGN(addr);
145 		vma = find_vma(mm, addr);
146 		if (end - len >= addr &&
147 		    (!vma || addr + len <= vm_start_gap(vma)))
148 			return addr;
149 	}
150 
151 	info.length = len;
152 	info.low_limit = begin;
153 	info.high_limit = end;
154 	if (!(filp && is_file_hugepages(filp))) {
155 		info.align_offset = pgoff << PAGE_SHIFT;
156 		info.start_gap = stack_guard_placement(vm_flags);
157 	}
158 	if (filp) {
159 		info.align_mask = get_align_mask(filp);
160 		info.align_offset += get_align_bits();
161 	}
162 
163 	return vm_unmapped_area(&info);
164 }
165 
166 unsigned long
167 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr0,
168 			  unsigned long len, unsigned long pgoff,
169 			  unsigned long flags, vm_flags_t vm_flags)
170 {
171 	struct vm_area_struct *vma;
172 	struct mm_struct *mm = current->mm;
173 	unsigned long addr = addr0;
174 	struct vm_unmapped_area_info info = {};
175 
176 	/* requested length too big for entire address space */
177 	if (len > TASK_SIZE)
178 		return -ENOMEM;
179 
180 	/* No address checking. See comment at mmap_address_hint_valid() */
181 	if (flags & MAP_FIXED)
182 		return addr;
183 
184 	/* for MAP_32BIT mappings we force the legacy mmap base */
185 	if (!in_32bit_syscall() && (flags & MAP_32BIT))
186 		goto bottomup;
187 
188 	/* requesting a specific address */
189 	if (addr) {
190 		addr &= PAGE_MASK;
191 		if (!mmap_address_hint_valid(addr, len))
192 			goto get_unmapped_area;
193 
194 		vma = find_vma(mm, addr);
195 		if (!vma || addr + len <= vm_start_gap(vma))
196 			return addr;
197 	}
198 get_unmapped_area:
199 
200 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
201 	info.length = len;
202 	if (!in_32bit_syscall() && (flags & MAP_ABOVE4G))
203 		info.low_limit = SZ_4G;
204 	else
205 		info.low_limit = PAGE_SIZE;
206 
207 	info.high_limit = get_mmap_base(0);
208 	if (!(filp && is_file_hugepages(filp))) {
209 		info.start_gap = stack_guard_placement(vm_flags);
210 		info.align_offset = pgoff << PAGE_SHIFT;
211 	}
212 
213 	/*
214 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
215 	 * in the full address space.
216 	 *
217 	 * !in_32bit_syscall() check to avoid high addresses for x32
218 	 * (and make it no op on native i386).
219 	 */
220 	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
221 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
222 
223 	if (filp) {
224 		info.align_mask = get_align_mask(filp);
225 		info.align_offset += get_align_bits();
226 	}
227 	addr = vm_unmapped_area(&info);
228 	if (!(addr & ~PAGE_MASK))
229 		return addr;
230 	VM_BUG_ON(addr != -ENOMEM);
231 
232 bottomup:
233 	/*
234 	 * A failed mmap() very likely causes application failure,
235 	 * so fall back to the bottom-up function here. This scenario
236 	 * can happen with large stack limits and large mmap()
237 	 * allocations.
238 	 */
239 	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags, 0);
240 }
241