xref: /linux/arch/x86/kernel/sys_x86_64.c (revision 6fdcba32711044c35c0e1b094cbd8f3f0b4472c9)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/compat.h>
3 #include <linux/errno.h>
4 #include <linux/sched.h>
5 #include <linux/sched/mm.h>
6 #include <linux/syscalls.h>
7 #include <linux/mm.h>
8 #include <linux/fs.h>
9 #include <linux/smp.h>
10 #include <linux/sem.h>
11 #include <linux/msg.h>
12 #include <linux/shm.h>
13 #include <linux/stat.h>
14 #include <linux/mman.h>
15 #include <linux/file.h>
16 #include <linux/utsname.h>
17 #include <linux/personality.h>
18 #include <linux/random.h>
19 #include <linux/uaccess.h>
20 #include <linux/elf.h>
21 
22 #include <asm/elf.h>
23 #include <asm/ia32.h>
24 #include <asm/syscalls.h>
25 #include <asm/mpx.h>
26 
27 /*
28  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
29  */
30 static unsigned long get_align_mask(void)
31 {
32 	/* handle 32- and 64-bit case with a single conditional */
33 	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
34 		return 0;
35 
36 	if (!(current->flags & PF_RANDOMIZE))
37 		return 0;
38 
39 	return va_align.mask;
40 }
41 
42 /*
43  * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
44  * va_align.bits, [12:upper_bit), are set to a random value instead of
45  * zeroing them. This random value is computed once per boot. This form
46  * of ASLR is known as "per-boot ASLR".
47  *
48  * To achieve this, the random value is added to the info.align_offset
49  * value before calling vm_unmapped_area() or ORed directly to the
50  * address.
51  */
52 static unsigned long get_align_bits(void)
53 {
54 	return va_align.bits & get_align_mask();
55 }
56 
57 unsigned long align_vdso_addr(unsigned long addr)
58 {
59 	unsigned long align_mask = get_align_mask();
60 	addr = (addr + align_mask) & ~align_mask;
61 	return addr | get_align_bits();
62 }
63 
64 static int __init control_va_addr_alignment(char *str)
65 {
66 	/* guard against enabling this on other CPU families */
67 	if (va_align.flags < 0)
68 		return 1;
69 
70 	if (*str == 0)
71 		return 1;
72 
73 	if (*str == '=')
74 		str++;
75 
76 	if (!strcmp(str, "32"))
77 		va_align.flags = ALIGN_VA_32;
78 	else if (!strcmp(str, "64"))
79 		va_align.flags = ALIGN_VA_64;
80 	else if (!strcmp(str, "off"))
81 		va_align.flags = 0;
82 	else if (!strcmp(str, "on"))
83 		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
84 	else
85 		return 0;
86 
87 	return 1;
88 }
89 __setup("align_va_addr", control_va_addr_alignment);
90 
91 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
92 		unsigned long, prot, unsigned long, flags,
93 		unsigned long, fd, unsigned long, off)
94 {
95 	long error;
96 	error = -EINVAL;
97 	if (off & ~PAGE_MASK)
98 		goto out;
99 
100 	error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
101 out:
102 	return error;
103 }
104 
105 static void find_start_end(unsigned long addr, unsigned long flags,
106 		unsigned long *begin, unsigned long *end)
107 {
108 	if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
109 		/* This is usually used needed to map code in small
110 		   model, so it needs to be in the first 31bit. Limit
111 		   it to that.  This means we need to move the
112 		   unmapped base down for this case. This can give
113 		   conflicts with the heap, but we assume that glibc
114 		   malloc knows how to fall back to mmap. Give it 1GB
115 		   of playground for now. -AK */
116 		*begin = 0x40000000;
117 		*end = 0x80000000;
118 		if (current->flags & PF_RANDOMIZE) {
119 			*begin = randomize_page(*begin, 0x02000000);
120 		}
121 		return;
122 	}
123 
124 	*begin	= get_mmap_base(1);
125 	if (in_32bit_syscall())
126 		*end = task_size_32bit();
127 	else
128 		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
129 }
130 
131 unsigned long
132 arch_get_unmapped_area(struct file *filp, unsigned long addr,
133 		unsigned long len, unsigned long pgoff, unsigned long flags)
134 {
135 	struct mm_struct *mm = current->mm;
136 	struct vm_area_struct *vma;
137 	struct vm_unmapped_area_info info;
138 	unsigned long begin, end;
139 
140 	addr = mpx_unmapped_area_check(addr, len, flags);
141 	if (IS_ERR_VALUE(addr))
142 		return addr;
143 
144 	if (flags & MAP_FIXED)
145 		return addr;
146 
147 	find_start_end(addr, flags, &begin, &end);
148 
149 	if (len > end)
150 		return -ENOMEM;
151 
152 	if (addr) {
153 		addr = PAGE_ALIGN(addr);
154 		vma = find_vma(mm, addr);
155 		if (end - len >= addr &&
156 		    (!vma || addr + len <= vm_start_gap(vma)))
157 			return addr;
158 	}
159 
160 	info.flags = 0;
161 	info.length = len;
162 	info.low_limit = begin;
163 	info.high_limit = end;
164 	info.align_mask = 0;
165 	info.align_offset = pgoff << PAGE_SHIFT;
166 	if (filp) {
167 		info.align_mask = get_align_mask();
168 		info.align_offset += get_align_bits();
169 	}
170 	return vm_unmapped_area(&info);
171 }
172 
173 unsigned long
174 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
175 			  const unsigned long len, const unsigned long pgoff,
176 			  const unsigned long flags)
177 {
178 	struct vm_area_struct *vma;
179 	struct mm_struct *mm = current->mm;
180 	unsigned long addr = addr0;
181 	struct vm_unmapped_area_info info;
182 
183 	addr = mpx_unmapped_area_check(addr, len, flags);
184 	if (IS_ERR_VALUE(addr))
185 		return addr;
186 
187 	/* requested length too big for entire address space */
188 	if (len > TASK_SIZE)
189 		return -ENOMEM;
190 
191 	/* No address checking. See comment at mmap_address_hint_valid() */
192 	if (flags & MAP_FIXED)
193 		return addr;
194 
195 	/* for MAP_32BIT mappings we force the legacy mmap base */
196 	if (!in_32bit_syscall() && (flags & MAP_32BIT))
197 		goto bottomup;
198 
199 	/* requesting a specific address */
200 	if (addr) {
201 		addr &= PAGE_MASK;
202 		if (!mmap_address_hint_valid(addr, len))
203 			goto get_unmapped_area;
204 
205 		vma = find_vma(mm, addr);
206 		if (!vma || addr + len <= vm_start_gap(vma))
207 			return addr;
208 	}
209 get_unmapped_area:
210 
211 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
212 	info.length = len;
213 	info.low_limit = PAGE_SIZE;
214 	info.high_limit = get_mmap_base(0);
215 
216 	/*
217 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
218 	 * in the full address space.
219 	 *
220 	 * !in_32bit_syscall() check to avoid high addresses for x32
221 	 * (and make it no op on native i386).
222 	 */
223 	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
224 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
225 
226 	info.align_mask = 0;
227 	info.align_offset = pgoff << PAGE_SHIFT;
228 	if (filp) {
229 		info.align_mask = get_align_mask();
230 		info.align_offset += get_align_bits();
231 	}
232 	addr = vm_unmapped_area(&info);
233 	if (!(addr & ~PAGE_MASK))
234 		return addr;
235 	VM_BUG_ON(addr != -ENOMEM);
236 
237 bottomup:
238 	/*
239 	 * A failed mmap() very likely causes application failure,
240 	 * so fall back to the bottom-up function here. This scenario
241 	 * can happen with large stack limits and large mmap()
242 	 * allocations.
243 	 */
244 	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
245 }
246