xref: /linux/arch/x86/kernel/sys_x86_64.c (revision f49f4ab95c301dbccad0efe85296d908b8ae7ad4)
1 #include <linux/errno.h>
2 #include <linux/sched.h>
3 #include <linux/syscalls.h>
4 #include <linux/mm.h>
5 #include <linux/fs.h>
6 #include <linux/smp.h>
7 #include <linux/sem.h>
8 #include <linux/msg.h>
9 #include <linux/shm.h>
10 #include <linux/stat.h>
11 #include <linux/mman.h>
12 #include <linux/file.h>
13 #include <linux/utsname.h>
14 #include <linux/personality.h>
15 #include <linux/random.h>
16 #include <linux/uaccess.h>
17 #include <linux/elf.h>
18 
19 #include <asm/ia32.h>
20 #include <asm/syscalls.h>
21 
22 /*
23  * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
24  *
25  * @flags denotes the allocation direction - bottomup or topdown -
26  * or vDSO; see call sites below.
27  */
28 unsigned long align_addr(unsigned long addr, struct file *filp,
29 			 enum align_flags flags)
30 {
31 	unsigned long tmp_addr;
32 
33 	/* handle 32- and 64-bit case with a single conditional */
34 	if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
35 		return addr;
36 
37 	if (!(current->flags & PF_RANDOMIZE))
38 		return addr;
39 
40 	if (!((flags & ALIGN_VDSO) || filp))
41 		return addr;
42 
43 	tmp_addr = addr;
44 
45 	/*
46 	 * We need an address which is <= than the original
47 	 * one only when in topdown direction.
48 	 */
49 	if (!(flags & ALIGN_TOPDOWN))
50 		tmp_addr += va_align.mask;
51 
52 	tmp_addr &= ~va_align.mask;
53 
54 	return tmp_addr;
55 }
56 
57 static int __init control_va_addr_alignment(char *str)
58 {
59 	/* guard against enabling this on other CPU families */
60 	if (va_align.flags < 0)
61 		return 1;
62 
63 	if (*str == 0)
64 		return 1;
65 
66 	if (*str == '=')
67 		str++;
68 
69 	if (!strcmp(str, "32"))
70 		va_align.flags = ALIGN_VA_32;
71 	else if (!strcmp(str, "64"))
72 		va_align.flags = ALIGN_VA_64;
73 	else if (!strcmp(str, "off"))
74 		va_align.flags = 0;
75 	else if (!strcmp(str, "on"))
76 		va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
77 	else
78 		return 0;
79 
80 	return 1;
81 }
82 __setup("align_va_addr", control_va_addr_alignment);
83 
84 SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
85 		unsigned long, prot, unsigned long, flags,
86 		unsigned long, fd, unsigned long, off)
87 {
88 	long error;
89 	error = -EINVAL;
90 	if (off & ~PAGE_MASK)
91 		goto out;
92 
93 	error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
94 out:
95 	return error;
96 }
97 
98 static void find_start_end(unsigned long flags, unsigned long *begin,
99 			   unsigned long *end)
100 {
101 	if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
102 		unsigned long new_begin;
103 		/* This is usually used needed to map code in small
104 		   model, so it needs to be in the first 31bit. Limit
105 		   it to that.  This means we need to move the
106 		   unmapped base down for this case. This can give
107 		   conflicts with the heap, but we assume that glibc
108 		   malloc knows how to fall back to mmap. Give it 1GB
109 		   of playground for now. -AK */
110 		*begin = 0x40000000;
111 		*end = 0x80000000;
112 		if (current->flags & PF_RANDOMIZE) {
113 			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
114 			if (new_begin)
115 				*begin = new_begin;
116 		}
117 	} else {
118 		*begin = TASK_UNMAPPED_BASE;
119 		*end = TASK_SIZE;
120 	}
121 }
122 
123 unsigned long
124 arch_get_unmapped_area(struct file *filp, unsigned long addr,
125 		unsigned long len, unsigned long pgoff, unsigned long flags)
126 {
127 	struct mm_struct *mm = current->mm;
128 	struct vm_area_struct *vma;
129 	unsigned long start_addr;
130 	unsigned long begin, end;
131 
132 	if (flags & MAP_FIXED)
133 		return addr;
134 
135 	find_start_end(flags, &begin, &end);
136 
137 	if (len > end)
138 		return -ENOMEM;
139 
140 	if (addr) {
141 		addr = PAGE_ALIGN(addr);
142 		vma = find_vma(mm, addr);
143 		if (end - len >= addr &&
144 		    (!vma || addr + len <= vma->vm_start))
145 			return addr;
146 	}
147 	if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32))
148 	    && len <= mm->cached_hole_size) {
149 		mm->cached_hole_size = 0;
150 		mm->free_area_cache = begin;
151 	}
152 	addr = mm->free_area_cache;
153 	if (addr < begin)
154 		addr = begin;
155 	start_addr = addr;
156 
157 full_search:
158 
159 	addr = align_addr(addr, filp, 0);
160 
161 	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 		/* At this point:  (!vma || addr < vma->vm_end). */
163 		if (end - len < addr) {
164 			/*
165 			 * Start a new search - just in case we missed
166 			 * some holes.
167 			 */
168 			if (start_addr != begin) {
169 				start_addr = addr = begin;
170 				mm->cached_hole_size = 0;
171 				goto full_search;
172 			}
173 			return -ENOMEM;
174 		}
175 		if (!vma || addr + len <= vma->vm_start) {
176 			/*
177 			 * Remember the place where we stopped the search:
178 			 */
179 			mm->free_area_cache = addr + len;
180 			return addr;
181 		}
182 		if (addr + mm->cached_hole_size < vma->vm_start)
183 			mm->cached_hole_size = vma->vm_start - addr;
184 
185 		addr = vma->vm_end;
186 		addr = align_addr(addr, filp, 0);
187 	}
188 }
189 
190 
191 unsigned long
192 arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
193 			  const unsigned long len, const unsigned long pgoff,
194 			  const unsigned long flags)
195 {
196 	struct vm_area_struct *vma;
197 	struct mm_struct *mm = current->mm;
198 	unsigned long addr = addr0, start_addr;
199 
200 	/* requested length too big for entire address space */
201 	if (len > TASK_SIZE)
202 		return -ENOMEM;
203 
204 	if (flags & MAP_FIXED)
205 		return addr;
206 
207 	/* for MAP_32BIT mappings we force the legact mmap base */
208 	if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
209 		goto bottomup;
210 
211 	/* requesting a specific address */
212 	if (addr) {
213 		addr = PAGE_ALIGN(addr);
214 		vma = find_vma(mm, addr);
215 		if (TASK_SIZE - len >= addr &&
216 				(!vma || addr + len <= vma->vm_start))
217 			return addr;
218 	}
219 
220 	/* check if free_area_cache is useful for us */
221 	if (len <= mm->cached_hole_size) {
222 		mm->cached_hole_size = 0;
223 		mm->free_area_cache = mm->mmap_base;
224 	}
225 
226 try_again:
227 	/* either no address requested or can't fit in requested address hole */
228 	start_addr = addr = mm->free_area_cache;
229 
230 	if (addr < len)
231 		goto fail;
232 
233 	addr -= len;
234 	do {
235 		addr = align_addr(addr, filp, ALIGN_TOPDOWN);
236 
237 		/*
238 		 * Lookup failure means no vma is above this address,
239 		 * else if new region fits below vma->vm_start,
240 		 * return with success:
241 		 */
242 		vma = find_vma(mm, addr);
243 		if (!vma || addr+len <= vma->vm_start)
244 			/* remember the address as a hint for next time */
245 			return mm->free_area_cache = addr;
246 
247 		/* remember the largest hole we saw so far */
248 		if (addr + mm->cached_hole_size < vma->vm_start)
249 			mm->cached_hole_size = vma->vm_start - addr;
250 
251 		/* try just below the current vma->vm_start */
252 		addr = vma->vm_start-len;
253 	} while (len < vma->vm_start);
254 
255 fail:
256 	/*
257 	 * if hint left us with no space for the requested
258 	 * mapping then try again:
259 	 */
260 	if (start_addr != mm->mmap_base) {
261 		mm->free_area_cache = mm->mmap_base;
262 		mm->cached_hole_size = 0;
263 		goto try_again;
264 	}
265 
266 bottomup:
267 	/*
268 	 * A failed mmap() very likely causes application failure,
269 	 * so fall back to the bottom-up function here. This scenario
270 	 * can happen with large stack limits and large mmap()
271 	 * allocations.
272 	 */
273 	mm->cached_hole_size = ~0UL;
274 	mm->free_area_cache = TASK_UNMAPPED_BASE;
275 	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
276 	/*
277 	 * Restore the topdown base:
278 	 */
279 	mm->free_area_cache = mm->mmap_base;
280 	mm->cached_hole_size = ~0UL;
281 
282 	return addr;
283 }
284