xref: /linux/arch/x86/kernel/sys_x86_64.c (revision 5a558f369ef89c6fd8170ee1137274fcc08517ae)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/compat.h>
3 #include <linux/errno.h>
4 #include <linux/sched.h>
5 #include <linux/sched/mm.h>
6 #include <linux/syscalls.h>
7 #include <linux/mm.h>
8 #include <linux/fs.h>
9 #include <linux/smp.h>
10 #include <linux/sem.h>
11 #include <linux/msg.h>
12 #include <linux/shm.h>
13 #include <linux/stat.h>
14 #include <linux/mman.h>
15 #include <linux/file.h>
16 #include <linux/utsname.h>
17 #include <linux/personality.h>
18 #include <linux/random.h>
19 #include <linux/uaccess.h>
20 #include <linux/elf.h>
21 
22 #include <asm/elf.h>
23 #include <asm/ia32.h>
24 
25 /*
26  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
27  */
28 static unsigned long get_align_mask(void)
29 {
30 	/* handle 32- and 64-bit case with a single conditional */
31 	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
32 		return 0;
33 
34 	if (!(current->flags & PF_RANDOMIZE))
35 		return 0;
36 
37 	return va_align.mask;
38 }
39 
40 /*
41  * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
42  * va_align.bits, [12:upper_bit), are set to a random value instead of
43  * zeroing them. This random value is computed once per boot. This form
44  * of ASLR is known as "per-boot ASLR".
45  *
46  * To achieve this, the random value is added to the info.align_offset
47  * value before calling vm_unmapped_area() or ORed directly to the
48  * address.
49  */
50 static unsigned long get_align_bits(void)
51 {
52 	return va_align.bits & get_align_mask();
53 }
54 
55 static int __init control_va_addr_alignment(char *str)
56 {
57 	/* guard against enabling this on other CPU families */
58 	if (va_align.flags < 0)
59 		return 1;
60 
61 	if (*str == 0)
62 		return 1;
63 
64 	if (!strcmp(str, "32"))
65 		va_align.flags = ALIGN_VA_32;
66 	else if (!strcmp(str, "64"))
67 		va_align.flags = ALIGN_VA_64;
68 	else if (!strcmp(str, "off"))
69 		va_align.flags = 0;
70 	else if (!strcmp(str, "on"))
71 		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
72 	else
73 		pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
74 
75 	return 1;
76 }
77 __setup("align_va_addr=", control_va_addr_alignment);
78 
79 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
80 		unsigned long, prot, unsigned long, flags,
81 		unsigned long, fd, unsigned long, off)
82 {
83 	if (off & ~PAGE_MASK)
84 		return -EINVAL;
85 
86 	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
87 }
88 
89 static void find_start_end(unsigned long addr, unsigned long flags,
90 		unsigned long *begin, unsigned long *end)
91 {
92 	if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
93 		/* This is usually used needed to map code in small
94 		   model, so it needs to be in the first 31bit. Limit
95 		   it to that.  This means we need to move the
96 		   unmapped base down for this case. This can give
97 		   conflicts with the heap, but we assume that glibc
98 		   malloc knows how to fall back to mmap. Give it 1GB
99 		   of playground for now. -AK */
100 		*begin = 0x40000000;
101 		*end = 0x80000000;
102 		if (current->flags & PF_RANDOMIZE) {
103 			*begin = randomize_page(*begin, 0x02000000);
104 		}
105 		return;
106 	}
107 
108 	*begin	= get_mmap_base(1);
109 	if (in_32bit_syscall())
110 		*end = task_size_32bit();
111 	else
112 		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
113 }
114 
115 static inline unsigned long stack_guard_placement(vm_flags_t vm_flags)
116 {
117 	if (vm_flags & VM_SHADOW_STACK)
118 		return PAGE_SIZE;
119 
120 	return 0;
121 }
122 
123 unsigned long
124 arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len,
125 		       unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
126 {
127 	struct mm_struct *mm = current->mm;
128 	struct vm_area_struct *vma;
129 	struct vm_unmapped_area_info info = {};
130 	unsigned long begin, end;
131 
132 	if (flags & MAP_FIXED)
133 		return addr;
134 
135 	find_start_end(addr, flags, &begin, &end);
136 
137 	if (len > end)
138 		return -ENOMEM;
139 
140 	if (addr) {
141 		addr = PAGE_ALIGN(addr);
142 		vma = find_vma(mm, addr);
143 		if (end - len >= addr &&
144 		    (!vma || addr + len <= vm_start_gap(vma)))
145 			return addr;
146 	}
147 
148 	info.length = len;
149 	info.low_limit = begin;
150 	info.high_limit = end;
151 	info.align_offset = pgoff << PAGE_SHIFT;
152 	info.start_gap = stack_guard_placement(vm_flags);
153 	if (filp) {
154 		info.align_mask = get_align_mask();
155 		info.align_offset += get_align_bits();
156 	}
157 	return vm_unmapped_area(&info);
158 }
159 
160 unsigned long
161 arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr0,
162 			  unsigned long len, unsigned long pgoff,
163 			  unsigned long flags, vm_flags_t vm_flags)
164 {
165 	struct vm_area_struct *vma;
166 	struct mm_struct *mm = current->mm;
167 	unsigned long addr = addr0;
168 	struct vm_unmapped_area_info info = {};
169 
170 	/* requested length too big for entire address space */
171 	if (len > TASK_SIZE)
172 		return -ENOMEM;
173 
174 	/* No address checking. See comment at mmap_address_hint_valid() */
175 	if (flags & MAP_FIXED)
176 		return addr;
177 
178 	/* for MAP_32BIT mappings we force the legacy mmap base */
179 	if (!in_32bit_syscall() && (flags & MAP_32BIT))
180 		goto bottomup;
181 
182 	/* requesting a specific address */
183 	if (addr) {
184 		addr &= PAGE_MASK;
185 		if (!mmap_address_hint_valid(addr, len))
186 			goto get_unmapped_area;
187 
188 		vma = find_vma(mm, addr);
189 		if (!vma || addr + len <= vm_start_gap(vma))
190 			return addr;
191 	}
192 get_unmapped_area:
193 
194 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
195 	info.length = len;
196 	if (!in_32bit_syscall() && (flags & MAP_ABOVE4G))
197 		info.low_limit = SZ_4G;
198 	else
199 		info.low_limit = PAGE_SIZE;
200 
201 	info.high_limit = get_mmap_base(0);
202 	info.start_gap = stack_guard_placement(vm_flags);
203 
204 	/*
205 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
206 	 * in the full address space.
207 	 *
208 	 * !in_32bit_syscall() check to avoid high addresses for x32
209 	 * (and make it no op on native i386).
210 	 */
211 	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
212 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
213 
214 	info.align_offset = pgoff << PAGE_SHIFT;
215 	if (filp) {
216 		info.align_mask = get_align_mask();
217 		info.align_offset += get_align_bits();
218 	}
219 	addr = vm_unmapped_area(&info);
220 	if (!(addr & ~PAGE_MASK))
221 		return addr;
222 	VM_BUG_ON(addr != -ENOMEM);
223 
224 bottomup:
225 	/*
226 	 * A failed mmap() very likely causes application failure,
227 	 * so fall back to the bottom-up function here. This scenario
228 	 * can happen with large stack limits and large mmap()
229 	 * allocations.
230 	 */
231 	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
232 }
233 
234 unsigned long
235 arch_get_unmapped_area(struct file *filp, unsigned long addr,
236 		unsigned long len, unsigned long pgoff, unsigned long flags)
237 {
238 	return arch_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, 0);
239 }
240 
241 unsigned long
242 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
243 			  const unsigned long len, const unsigned long pgoff,
244 			  const unsigned long flags)
245 {
246 	return arch_get_unmapped_area_topdown_vmflags(filp, addr, len, pgoff, flags, 0);
247 }
248