xref: /linux/arch/x86/kernel/sys_x86_64.c (revision be239684b18e1cdcafcf8c7face4a2f562c745ad)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/compat.h>
3 #include <linux/errno.h>
4 #include <linux/sched.h>
5 #include <linux/sched/mm.h>
6 #include <linux/syscalls.h>
7 #include <linux/mm.h>
8 #include <linux/fs.h>
9 #include <linux/smp.h>
10 #include <linux/sem.h>
11 #include <linux/msg.h>
12 #include <linux/shm.h>
13 #include <linux/stat.h>
14 #include <linux/mman.h>
15 #include <linux/file.h>
16 #include <linux/utsname.h>
17 #include <linux/personality.h>
18 #include <linux/random.h>
19 #include <linux/uaccess.h>
20 #include <linux/elf.h>
21 
22 #include <asm/elf.h>
23 #include <asm/ia32.h>
24 
25 /*
26  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
27  */
28 static unsigned long get_align_mask(void)
29 {
30 	/* handle 32- and 64-bit case with a single conditional */
31 	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
32 		return 0;
33 
34 	if (!(current->flags & PF_RANDOMIZE))
35 		return 0;
36 
37 	return va_align.mask;
38 }
39 
40 /*
41  * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
42  * va_align.bits, [12:upper_bit), are set to a random value instead of
43  * zeroing them. This random value is computed once per boot. This form
44  * of ASLR is known as "per-boot ASLR".
45  *
46  * To achieve this, the random value is added to the info.align_offset
47  * value before calling vm_unmapped_area() or ORed directly to the
48  * address.
49  */
50 static unsigned long get_align_bits(void)
51 {
52 	return va_align.bits & get_align_mask();
53 }
54 
55 unsigned long align_vdso_addr(unsigned long addr)
56 {
57 	unsigned long align_mask = get_align_mask();
58 	addr = (addr + align_mask) & ~align_mask;
59 	return addr | get_align_bits();
60 }
61 
62 static int __init control_va_addr_alignment(char *str)
63 {
64 	/* guard against enabling this on other CPU families */
65 	if (va_align.flags < 0)
66 		return 1;
67 
68 	if (*str == 0)
69 		return 1;
70 
71 	if (!strcmp(str, "32"))
72 		va_align.flags = ALIGN_VA_32;
73 	else if (!strcmp(str, "64"))
74 		va_align.flags = ALIGN_VA_64;
75 	else if (!strcmp(str, "off"))
76 		va_align.flags = 0;
77 	else if (!strcmp(str, "on"))
78 		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
79 	else
80 		pr_warn("invalid option value: 'align_va_addr=%s'\n", str);
81 
82 	return 1;
83 }
84 __setup("align_va_addr=", control_va_addr_alignment);
85 
86 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
87 		unsigned long, prot, unsigned long, flags,
88 		unsigned long, fd, unsigned long, off)
89 {
90 	if (off & ~PAGE_MASK)
91 		return -EINVAL;
92 
93 	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
94 }
95 
96 static void find_start_end(unsigned long addr, unsigned long flags,
97 		unsigned long *begin, unsigned long *end)
98 {
99 	if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
100 		/* This is usually used needed to map code in small
101 		   model, so it needs to be in the first 31bit. Limit
102 		   it to that.  This means we need to move the
103 		   unmapped base down for this case. This can give
104 		   conflicts with the heap, but we assume that glibc
105 		   malloc knows how to fall back to mmap. Give it 1GB
106 		   of playground for now. -AK */
107 		*begin = 0x40000000;
108 		*end = 0x80000000;
109 		if (current->flags & PF_RANDOMIZE) {
110 			*begin = randomize_page(*begin, 0x02000000);
111 		}
112 		return;
113 	}
114 
115 	*begin	= get_mmap_base(1);
116 	if (in_32bit_syscall())
117 		*end = task_size_32bit();
118 	else
119 		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
120 }
121 
122 unsigned long
123 arch_get_unmapped_area(struct file *filp, unsigned long addr,
124 		unsigned long len, unsigned long pgoff, unsigned long flags)
125 {
126 	struct mm_struct *mm = current->mm;
127 	struct vm_area_struct *vma;
128 	struct vm_unmapped_area_info info;
129 	unsigned long begin, end;
130 
131 	if (flags & MAP_FIXED)
132 		return addr;
133 
134 	find_start_end(addr, flags, &begin, &end);
135 
136 	if (len > end)
137 		return -ENOMEM;
138 
139 	if (addr) {
140 		addr = PAGE_ALIGN(addr);
141 		vma = find_vma(mm, addr);
142 		if (end - len >= addr &&
143 		    (!vma || addr + len <= vm_start_gap(vma)))
144 			return addr;
145 	}
146 
147 	info.flags = 0;
148 	info.length = len;
149 	info.low_limit = begin;
150 	info.high_limit = end;
151 	info.align_mask = 0;
152 	info.align_offset = pgoff << PAGE_SHIFT;
153 	if (filp) {
154 		info.align_mask = get_align_mask();
155 		info.align_offset += get_align_bits();
156 	}
157 	return vm_unmapped_area(&info);
158 }
159 
160 unsigned long
161 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
162 			  const unsigned long len, const unsigned long pgoff,
163 			  const unsigned long flags)
164 {
165 	struct vm_area_struct *vma;
166 	struct mm_struct *mm = current->mm;
167 	unsigned long addr = addr0;
168 	struct vm_unmapped_area_info info;
169 
170 	/* requested length too big for entire address space */
171 	if (len > TASK_SIZE)
172 		return -ENOMEM;
173 
174 	/* No address checking. See comment at mmap_address_hint_valid() */
175 	if (flags & MAP_FIXED)
176 		return addr;
177 
178 	/* for MAP_32BIT mappings we force the legacy mmap base */
179 	if (!in_32bit_syscall() && (flags & MAP_32BIT))
180 		goto bottomup;
181 
182 	/* requesting a specific address */
183 	if (addr) {
184 		addr &= PAGE_MASK;
185 		if (!mmap_address_hint_valid(addr, len))
186 			goto get_unmapped_area;
187 
188 		vma = find_vma(mm, addr);
189 		if (!vma || addr + len <= vm_start_gap(vma))
190 			return addr;
191 	}
192 get_unmapped_area:
193 
194 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
195 	info.length = len;
196 	if (!in_32bit_syscall() && (flags & MAP_ABOVE4G))
197 		info.low_limit = SZ_4G;
198 	else
199 		info.low_limit = PAGE_SIZE;
200 
201 	info.high_limit = get_mmap_base(0);
202 
203 	/*
204 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
205 	 * in the full address space.
206 	 *
207 	 * !in_32bit_syscall() check to avoid high addresses for x32
208 	 * (and make it no op on native i386).
209 	 */
210 	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
211 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
212 
213 	info.align_mask = 0;
214 	info.align_offset = pgoff << PAGE_SHIFT;
215 	if (filp) {
216 		info.align_mask = get_align_mask();
217 		info.align_offset += get_align_bits();
218 	}
219 	addr = vm_unmapped_area(&info);
220 	if (!(addr & ~PAGE_MASK))
221 		return addr;
222 	VM_BUG_ON(addr != -ENOMEM);
223 
224 bottomup:
225 	/*
226 	 * A failed mmap() very likely causes application failure,
227 	 * so fall back to the bottom-up function here. This scenario
228 	 * can happen with large stack limits and large mmap()
229 	 * allocations.
230 	 */
231 	return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
232 }
233