xref: /linux/arch/um/kernel/tlb.c (revision a8fe58cec351c25e09c393bf46117c0c47b5a17c)
1 /*
2  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3  * Licensed under the GPL
4  */
5 
6 #include <linux/mm.h>
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <asm/pgtable.h>
10 #include <asm/tlbflush.h>
11 #include <as-layout.h>
12 #include <mem_user.h>
13 #include <os.h>
14 #include <skas.h>
15 #include <kern_util.h>
16 
17 struct host_vm_change {
18 	struct host_vm_op {
19 		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
20 		union {
21 			struct {
22 				unsigned long addr;
23 				unsigned long len;
24 				unsigned int prot;
25 				int fd;
26 				__u64 offset;
27 			} mmap;
28 			struct {
29 				unsigned long addr;
30 				unsigned long len;
31 			} munmap;
32 			struct {
33 				unsigned long addr;
34 				unsigned long len;
35 				unsigned int prot;
36 			} mprotect;
37 		} u;
38 	} ops[1];
39 	int index;
40 	struct mm_id *id;
41 	void *data;
42 	int force;
43 };
44 
45 #define INIT_HVC(mm, force) \
46 	((struct host_vm_change) \
47 	 { .ops		= { { .type = NONE } },	\
48 	   .id		= &mm->context.id, \
49        	   .data	= NULL, \
50 	   .index	= 0, \
51 	   .force	= force })
52 
53 static void report_enomem(void)
54 {
55 	printk(KERN_ERR "UML ran out of memory on the host side! "
56 			"This can happen due to a memory limitation or "
57 			"vm.max_map_count has been reached.\n");
58 }
59 
60 static int do_ops(struct host_vm_change *hvc, int end,
61 		  int finished)
62 {
63 	struct host_vm_op *op;
64 	int i, ret = 0;
65 
66 	for (i = 0; i < end && !ret; i++) {
67 		op = &hvc->ops[i];
68 		switch (op->type) {
69 		case MMAP:
70 			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
71 				  op->u.mmap.prot, op->u.mmap.fd,
72 				  op->u.mmap.offset, finished, &hvc->data);
73 			break;
74 		case MUNMAP:
75 			ret = unmap(hvc->id, op->u.munmap.addr,
76 				    op->u.munmap.len, finished, &hvc->data);
77 			break;
78 		case MPROTECT:
79 			ret = protect(hvc->id, op->u.mprotect.addr,
80 				      op->u.mprotect.len, op->u.mprotect.prot,
81 				      finished, &hvc->data);
82 			break;
83 		default:
84 			printk(KERN_ERR "Unknown op type %d in do_ops\n",
85 			       op->type);
86 			BUG();
87 			break;
88 		}
89 	}
90 
91 	if (ret == -ENOMEM)
92 		report_enomem();
93 
94 	return ret;
95 }
96 
97 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
98 		    unsigned int prot, struct host_vm_change *hvc)
99 {
100 	__u64 offset;
101 	struct host_vm_op *last;
102 	int fd, ret = 0;
103 
104 	fd = phys_mapping(phys, &offset);
105 	if (hvc->index != 0) {
106 		last = &hvc->ops[hvc->index - 1];
107 		if ((last->type == MMAP) &&
108 		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
109 		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
110 		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
111 			last->u.mmap.len += len;
112 			return 0;
113 		}
114 	}
115 
116 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
117 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
118 		hvc->index = 0;
119 	}
120 
121 	hvc->ops[hvc->index++] = ((struct host_vm_op)
122 				  { .type	= MMAP,
123 				    .u = { .mmap = { .addr	= virt,
124 						     .len	= len,
125 						     .prot	= prot,
126 						     .fd	= fd,
127 						     .offset	= offset }
128 			   } });
129 	return ret;
130 }
131 
132 static int add_munmap(unsigned long addr, unsigned long len,
133 		      struct host_vm_change *hvc)
134 {
135 	struct host_vm_op *last;
136 	int ret = 0;
137 
138 	if ((addr >= STUB_START) && (addr < STUB_END))
139 		return -EINVAL;
140 
141 	if (hvc->index != 0) {
142 		last = &hvc->ops[hvc->index - 1];
143 		if ((last->type == MUNMAP) &&
144 		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
145 			last->u.munmap.len += len;
146 			return 0;
147 		}
148 	}
149 
150 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
151 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
152 		hvc->index = 0;
153 	}
154 
155 	hvc->ops[hvc->index++] = ((struct host_vm_op)
156 				  { .type	= MUNMAP,
157 			     	    .u = { .munmap = { .addr	= addr,
158 						       .len	= len } } });
159 	return ret;
160 }
161 
162 static int add_mprotect(unsigned long addr, unsigned long len,
163 			unsigned int prot, struct host_vm_change *hvc)
164 {
165 	struct host_vm_op *last;
166 	int ret = 0;
167 
168 	if (hvc->index != 0) {
169 		last = &hvc->ops[hvc->index - 1];
170 		if ((last->type == MPROTECT) &&
171 		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
172 		   (last->u.mprotect.prot == prot)) {
173 			last->u.mprotect.len += len;
174 			return 0;
175 		}
176 	}
177 
178 	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
179 		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
180 		hvc->index = 0;
181 	}
182 
183 	hvc->ops[hvc->index++] = ((struct host_vm_op)
184 				  { .type	= MPROTECT,
185 			     	    .u = { .mprotect = { .addr	= addr,
186 							 .len	= len,
187 							 .prot	= prot } } });
188 	return ret;
189 }
190 
191 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
192 
193 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
194 				   unsigned long end,
195 				   struct host_vm_change *hvc)
196 {
197 	pte_t *pte;
198 	int r, w, x, prot, ret = 0;
199 
200 	pte = pte_offset_kernel(pmd, addr);
201 	do {
202 		if ((addr >= STUB_START) && (addr < STUB_END))
203 			continue;
204 
205 		r = pte_read(*pte);
206 		w = pte_write(*pte);
207 		x = pte_exec(*pte);
208 		if (!pte_young(*pte)) {
209 			r = 0;
210 			w = 0;
211 		} else if (!pte_dirty(*pte))
212 			w = 0;
213 
214 		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
215 			(x ? UM_PROT_EXEC : 0));
216 		if (hvc->force || pte_newpage(*pte)) {
217 			if (pte_present(*pte))
218 				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
219 					       PAGE_SIZE, prot, hvc);
220 			else
221 				ret = add_munmap(addr, PAGE_SIZE, hvc);
222 		} else if (pte_newprot(*pte))
223 			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
224 		*pte = pte_mkuptodate(*pte);
225 	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
226 	return ret;
227 }
228 
229 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
230 				   unsigned long end,
231 				   struct host_vm_change *hvc)
232 {
233 	pmd_t *pmd;
234 	unsigned long next;
235 	int ret = 0;
236 
237 	pmd = pmd_offset(pud, addr);
238 	do {
239 		next = pmd_addr_end(addr, end);
240 		if (!pmd_present(*pmd)) {
241 			if (hvc->force || pmd_newpage(*pmd)) {
242 				ret = add_munmap(addr, next - addr, hvc);
243 				pmd_mkuptodate(*pmd);
244 			}
245 		}
246 		else ret = update_pte_range(pmd, addr, next, hvc);
247 	} while (pmd++, addr = next, ((addr < end) && !ret));
248 	return ret;
249 }
250 
251 static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
252 				   unsigned long end,
253 				   struct host_vm_change *hvc)
254 {
255 	pud_t *pud;
256 	unsigned long next;
257 	int ret = 0;
258 
259 	pud = pud_offset(pgd, addr);
260 	do {
261 		next = pud_addr_end(addr, end);
262 		if (!pud_present(*pud)) {
263 			if (hvc->force || pud_newpage(*pud)) {
264 				ret = add_munmap(addr, next - addr, hvc);
265 				pud_mkuptodate(*pud);
266 			}
267 		}
268 		else ret = update_pmd_range(pud, addr, next, hvc);
269 	} while (pud++, addr = next, ((addr < end) && !ret));
270 	return ret;
271 }
272 
273 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
274 		      unsigned long end_addr, int force)
275 {
276 	pgd_t *pgd;
277 	struct host_vm_change hvc;
278 	unsigned long addr = start_addr, next;
279 	int ret = 0;
280 
281 	hvc = INIT_HVC(mm, force);
282 	pgd = pgd_offset(mm, addr);
283 	do {
284 		next = pgd_addr_end(addr, end_addr);
285 		if (!pgd_present(*pgd)) {
286 			if (force || pgd_newpage(*pgd)) {
287 				ret = add_munmap(addr, next - addr, &hvc);
288 				pgd_mkuptodate(*pgd);
289 			}
290 		}
291 		else ret = update_pud_range(pgd, addr, next, &hvc);
292 	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
293 
294 	if (!ret)
295 		ret = do_ops(&hvc, hvc.index, 1);
296 
297 	/* This is not an else because ret is modified above */
298 	if (ret) {
299 		printk(KERN_ERR "fix_range_common: failed, killing current "
300 		       "process: %d\n", task_tgid_vnr(current));
301 		/* We are under mmap_sem, release it such that current can terminate */
302 		up_write(&current->mm->mmap_sem);
303 		force_sig(SIGKILL, current);
304 		do_signal(&current->thread.regs);
305 	}
306 }
307 
308 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
309 {
310 	struct mm_struct *mm;
311 	pgd_t *pgd;
312 	pud_t *pud;
313 	pmd_t *pmd;
314 	pte_t *pte;
315 	unsigned long addr, last;
316 	int updated = 0, err;
317 
318 	mm = &init_mm;
319 	for (addr = start; addr < end;) {
320 		pgd = pgd_offset(mm, addr);
321 		if (!pgd_present(*pgd)) {
322 			last = ADD_ROUND(addr, PGDIR_SIZE);
323 			if (last > end)
324 				last = end;
325 			if (pgd_newpage(*pgd)) {
326 				updated = 1;
327 				err = os_unmap_memory((void *) addr,
328 						      last - addr);
329 				if (err < 0)
330 					panic("munmap failed, errno = %d\n",
331 					      -err);
332 			}
333 			addr = last;
334 			continue;
335 		}
336 
337 		pud = pud_offset(pgd, addr);
338 		if (!pud_present(*pud)) {
339 			last = ADD_ROUND(addr, PUD_SIZE);
340 			if (last > end)
341 				last = end;
342 			if (pud_newpage(*pud)) {
343 				updated = 1;
344 				err = os_unmap_memory((void *) addr,
345 						      last - addr);
346 				if (err < 0)
347 					panic("munmap failed, errno = %d\n",
348 					      -err);
349 			}
350 			addr = last;
351 			continue;
352 		}
353 
354 		pmd = pmd_offset(pud, addr);
355 		if (!pmd_present(*pmd)) {
356 			last = ADD_ROUND(addr, PMD_SIZE);
357 			if (last > end)
358 				last = end;
359 			if (pmd_newpage(*pmd)) {
360 				updated = 1;
361 				err = os_unmap_memory((void *) addr,
362 						      last - addr);
363 				if (err < 0)
364 					panic("munmap failed, errno = %d\n",
365 					      -err);
366 			}
367 			addr = last;
368 			continue;
369 		}
370 
371 		pte = pte_offset_kernel(pmd, addr);
372 		if (!pte_present(*pte) || pte_newpage(*pte)) {
373 			updated = 1;
374 			err = os_unmap_memory((void *) addr,
375 					      PAGE_SIZE);
376 			if (err < 0)
377 				panic("munmap failed, errno = %d\n",
378 				      -err);
379 			if (pte_present(*pte))
380 				map_memory(addr,
381 					   pte_val(*pte) & PAGE_MASK,
382 					   PAGE_SIZE, 1, 1, 1);
383 		}
384 		else if (pte_newprot(*pte)) {
385 			updated = 1;
386 			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
387 		}
388 		addr += PAGE_SIZE;
389 	}
390 	return updated;
391 }
392 
393 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
394 {
395 	pgd_t *pgd;
396 	pud_t *pud;
397 	pmd_t *pmd;
398 	pte_t *pte;
399 	struct mm_struct *mm = vma->vm_mm;
400 	void *flush = NULL;
401 	int r, w, x, prot, err = 0;
402 	struct mm_id *mm_id;
403 
404 	address &= PAGE_MASK;
405 	pgd = pgd_offset(mm, address);
406 	if (!pgd_present(*pgd))
407 		goto kill;
408 
409 	pud = pud_offset(pgd, address);
410 	if (!pud_present(*pud))
411 		goto kill;
412 
413 	pmd = pmd_offset(pud, address);
414 	if (!pmd_present(*pmd))
415 		goto kill;
416 
417 	pte = pte_offset_kernel(pmd, address);
418 
419 	r = pte_read(*pte);
420 	w = pte_write(*pte);
421 	x = pte_exec(*pte);
422 	if (!pte_young(*pte)) {
423 		r = 0;
424 		w = 0;
425 	} else if (!pte_dirty(*pte)) {
426 		w = 0;
427 	}
428 
429 	mm_id = &mm->context.id;
430 	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
431 		(x ? UM_PROT_EXEC : 0));
432 	if (pte_newpage(*pte)) {
433 		if (pte_present(*pte)) {
434 			unsigned long long offset;
435 			int fd;
436 
437 			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
438 			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
439 				  1, &flush);
440 		}
441 		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
442 	}
443 	else if (pte_newprot(*pte))
444 		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
445 
446 	if (err) {
447 		if (err == -ENOMEM)
448 			report_enomem();
449 
450 		goto kill;
451 	}
452 
453 	*pte = pte_mkuptodate(*pte);
454 
455 	return;
456 
457 kill:
458 	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
459 	force_sig(SIGKILL, current);
460 }
461 
462 pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
463 {
464 	return pgd_offset(mm, address);
465 }
466 
467 pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
468 {
469 	return pud_offset(pgd, address);
470 }
471 
472 pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
473 {
474 	return pmd_offset(pud, address);
475 }
476 
477 pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
478 {
479 	return pte_offset_kernel(pmd, address);
480 }
481 
482 pte_t *addr_pte(struct task_struct *task, unsigned long addr)
483 {
484 	pgd_t *pgd = pgd_offset(task->mm, addr);
485 	pud_t *pud = pud_offset(pgd, addr);
486 	pmd_t *pmd = pmd_offset(pud, addr);
487 
488 	return pte_offset_map(pmd, addr);
489 }
490 
491 void flush_tlb_all(void)
492 {
493 	flush_tlb_mm(current->mm);
494 }
495 
496 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
497 {
498 	flush_tlb_kernel_range_common(start, end);
499 }
500 
501 void flush_tlb_kernel_vm(void)
502 {
503 	flush_tlb_kernel_range_common(start_vm, end_vm);
504 }
505 
506 void __flush_tlb_one(unsigned long addr)
507 {
508 	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
509 }
510 
511 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
512 		      unsigned long end_addr, int force)
513 {
514 	fix_range_common(mm, start_addr, end_addr, force);
515 }
516 
517 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
518 		     unsigned long end)
519 {
520 	if (vma->vm_mm == NULL)
521 		flush_tlb_kernel_range_common(start, end);
522 	else fix_range(vma->vm_mm, start, end, 0);
523 }
524 EXPORT_SYMBOL(flush_tlb_range);
525 
526 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
527 			unsigned long end)
528 {
529 	/*
530 	 * Don't bother flushing if this address space is about to be
531 	 * destroyed.
532 	 */
533 	if (atomic_read(&mm->mm_users) == 0)
534 		return;
535 
536 	fix_range(mm, start, end, 0);
537 }
538 
539 void flush_tlb_mm(struct mm_struct *mm)
540 {
541 	struct vm_area_struct *vma = mm->mmap;
542 
543 	while (vma != NULL) {
544 		fix_range(mm, vma->vm_start, vma->vm_end, 0);
545 		vma = vma->vm_next;
546 	}
547 }
548 
549 void force_flush_all(void)
550 {
551 	struct mm_struct *mm = current->mm;
552 	struct vm_area_struct *vma = mm->mmap;
553 
554 	while (vma != NULL) {
555 		fix_range(mm, vma->vm_start, vma->vm_end, 1);
556 		vma = vma->vm_next;
557 	}
558 }
559