xref: /linux/drivers/xen/privcmd.c (revision b889fcf63cb62e7fdb7816565e28f44dbe4a76a5)
1 /******************************************************************************
2  * privcmd.c
3  *
4  * Interface to privileged domain-0 commands.
5  *
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/sched.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/errno.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/uaccess.h>
18 #include <linux/swap.h>
19 #include <linux/highmem.h>
20 #include <linux/pagemap.h>
21 #include <linux/seq_file.h>
22 #include <linux/miscdevice.h>
23 
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
26 #include <asm/tlb.h>
27 #include <asm/xen/hypervisor.h>
28 #include <asm/xen/hypercall.h>
29 
30 #include <xen/xen.h>
31 #include <xen/privcmd.h>
32 #include <xen/interface/xen.h>
33 #include <xen/features.h>
34 #include <xen/page.h>
35 #include <xen/xen-ops.h>
36 #include <xen/balloon.h>
37 
38 #include "privcmd.h"
39 
40 MODULE_LICENSE("GPL");
41 
42 #define PRIV_VMA_LOCKED ((void *)1)
43 
44 #ifndef HAVE_ARCH_PRIVCMD_MMAP
45 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
46 #endif
47 
48 static long privcmd_ioctl_hypercall(void __user *udata)
49 {
50 	struct privcmd_hypercall hypercall;
51 	long ret;
52 
53 	if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
54 		return -EFAULT;
55 
56 	ret = privcmd_call(hypercall.op,
57 			   hypercall.arg[0], hypercall.arg[1],
58 			   hypercall.arg[2], hypercall.arg[3],
59 			   hypercall.arg[4]);
60 
61 	return ret;
62 }
63 
64 static void free_page_list(struct list_head *pages)
65 {
66 	struct page *p, *n;
67 
68 	list_for_each_entry_safe(p, n, pages, lru)
69 		__free_page(p);
70 
71 	INIT_LIST_HEAD(pages);
72 }
73 
74 /*
75  * Given an array of items in userspace, return a list of pages
76  * containing the data.  If copying fails, either because of memory
77  * allocation failure or a problem reading user memory, return an
78  * error code; its up to the caller to dispose of any partial list.
79  */
80 static int gather_array(struct list_head *pagelist,
81 			unsigned nelem, size_t size,
82 			const void __user *data)
83 {
84 	unsigned pageidx;
85 	void *pagedata;
86 	int ret;
87 
88 	if (size > PAGE_SIZE)
89 		return 0;
90 
91 	pageidx = PAGE_SIZE;
92 	pagedata = NULL;	/* quiet, gcc */
93 	while (nelem--) {
94 		if (pageidx > PAGE_SIZE-size) {
95 			struct page *page = alloc_page(GFP_KERNEL);
96 
97 			ret = -ENOMEM;
98 			if (page == NULL)
99 				goto fail;
100 
101 			pagedata = page_address(page);
102 
103 			list_add_tail(&page->lru, pagelist);
104 			pageidx = 0;
105 		}
106 
107 		ret = -EFAULT;
108 		if (copy_from_user(pagedata + pageidx, data, size))
109 			goto fail;
110 
111 		data += size;
112 		pageidx += size;
113 	}
114 
115 	ret = 0;
116 
117 fail:
118 	return ret;
119 }
120 
121 /*
122  * Call function "fn" on each element of the array fragmented
123  * over a list of pages.
124  */
125 static int traverse_pages(unsigned nelem, size_t size,
126 			  struct list_head *pos,
127 			  int (*fn)(void *data, void *state),
128 			  void *state)
129 {
130 	void *pagedata;
131 	unsigned pageidx;
132 	int ret = 0;
133 
134 	BUG_ON(size > PAGE_SIZE);
135 
136 	pageidx = PAGE_SIZE;
137 	pagedata = NULL;	/* hush, gcc */
138 
139 	while (nelem--) {
140 		if (pageidx > PAGE_SIZE-size) {
141 			struct page *page;
142 			pos = pos->next;
143 			page = list_entry(pos, struct page, lru);
144 			pagedata = page_address(page);
145 			pageidx = 0;
146 		}
147 
148 		ret = (*fn)(pagedata + pageidx, state);
149 		if (ret)
150 			break;
151 		pageidx += size;
152 	}
153 
154 	return ret;
155 }
156 
157 struct mmap_mfn_state {
158 	unsigned long va;
159 	struct vm_area_struct *vma;
160 	domid_t domain;
161 };
162 
163 static int mmap_mfn_range(void *data, void *state)
164 {
165 	struct privcmd_mmap_entry *msg = data;
166 	struct mmap_mfn_state *st = state;
167 	struct vm_area_struct *vma = st->vma;
168 	int rc;
169 
170 	/* Do not allow range to wrap the address space. */
171 	if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
172 	    ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
173 		return -EINVAL;
174 
175 	/* Range chunks must be contiguous in va space. */
176 	if ((msg->va != st->va) ||
177 	    ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
178 		return -EINVAL;
179 
180 	rc = xen_remap_domain_mfn_range(vma,
181 					msg->va & PAGE_MASK,
182 					msg->mfn, msg->npages,
183 					vma->vm_page_prot,
184 					st->domain, NULL);
185 	if (rc < 0)
186 		return rc;
187 
188 	st->va += msg->npages << PAGE_SHIFT;
189 
190 	return 0;
191 }
192 
193 static long privcmd_ioctl_mmap(void __user *udata)
194 {
195 	struct privcmd_mmap mmapcmd;
196 	struct mm_struct *mm = current->mm;
197 	struct vm_area_struct *vma;
198 	int rc;
199 	LIST_HEAD(pagelist);
200 	struct mmap_mfn_state state;
201 
202 	if (!xen_initial_domain())
203 		return -EPERM;
204 
205 	/* We only support privcmd_ioctl_mmap_batch for auto translated. */
206 	if (xen_feature(XENFEAT_auto_translated_physmap))
207 		return -ENOSYS;
208 
209 	if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
210 		return -EFAULT;
211 
212 	rc = gather_array(&pagelist,
213 			  mmapcmd.num, sizeof(struct privcmd_mmap_entry),
214 			  mmapcmd.entry);
215 
216 	if (rc || list_empty(&pagelist))
217 		goto out;
218 
219 	down_write(&mm->mmap_sem);
220 
221 	{
222 		struct page *page = list_first_entry(&pagelist,
223 						     struct page, lru);
224 		struct privcmd_mmap_entry *msg = page_address(page);
225 
226 		vma = find_vma(mm, msg->va);
227 		rc = -EINVAL;
228 
229 		if (!vma || (msg->va != vma->vm_start) ||
230 		    !privcmd_enforce_singleshot_mapping(vma))
231 			goto out_up;
232 	}
233 
234 	state.va = vma->vm_start;
235 	state.vma = vma;
236 	state.domain = mmapcmd.dom;
237 
238 	rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
239 			    &pagelist,
240 			    mmap_mfn_range, &state);
241 
242 
243 out_up:
244 	up_write(&mm->mmap_sem);
245 
246 out:
247 	free_page_list(&pagelist);
248 
249 	return rc;
250 }
251 
252 struct mmap_batch_state {
253 	domid_t domain;
254 	unsigned long va;
255 	struct vm_area_struct *vma;
256 	int index;
257 	/* A tristate:
258 	 *      0 for no errors
259 	 *      1 if at least one error has happened (and no
260 	 *          -ENOENT errors have happened)
261 	 *      -ENOENT if at least 1 -ENOENT has happened.
262 	 */
263 	int global_error;
264 	/* An array for individual errors */
265 	int *err;
266 
267 	/* User-space mfn array to store errors in the second pass for V1. */
268 	xen_pfn_t __user *user_mfn;
269 };
270 
271 /* auto translated dom0 note: if domU being created is PV, then mfn is
272  * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
273  */
274 static int mmap_batch_fn(void *data, void *state)
275 {
276 	xen_pfn_t *mfnp = data;
277 	struct mmap_batch_state *st = state;
278 	struct vm_area_struct *vma = st->vma;
279 	struct page **pages = vma->vm_private_data;
280 	struct page *cur_page = NULL;
281 	int ret;
282 
283 	if (xen_feature(XENFEAT_auto_translated_physmap))
284 		cur_page = pages[st->index++];
285 
286 	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
287 					 st->vma->vm_page_prot, st->domain,
288 					 &cur_page);
289 
290 	/* Store error code for second pass. */
291 	*(st->err++) = ret;
292 
293 	/* And see if it affects the global_error. */
294 	if (ret < 0) {
295 		if (ret == -ENOENT)
296 			st->global_error = -ENOENT;
297 		else {
298 			/* Record that at least one error has happened. */
299 			if (st->global_error == 0)
300 				st->global_error = 1;
301 		}
302 	}
303 	st->va += PAGE_SIZE;
304 
305 	return 0;
306 }
307 
308 static int mmap_return_errors_v1(void *data, void *state)
309 {
310 	xen_pfn_t *mfnp = data;
311 	struct mmap_batch_state *st = state;
312 	int err = *(st->err++);
313 
314 	/*
315 	 * V1 encodes the error codes in the 32bit top nibble of the
316 	 * mfn (with its known limitations vis-a-vis 64 bit callers).
317 	 */
318 	*mfnp |= (err == -ENOENT) ?
319 				PRIVCMD_MMAPBATCH_PAGED_ERROR :
320 				PRIVCMD_MMAPBATCH_MFN_ERROR;
321 	return __put_user(*mfnp, st->user_mfn++);
322 }
323 
324 /* Allocate pfns that are then mapped with gmfns from foreign domid. Update
325  * the vma with the page info to use later.
326  * Returns: 0 if success, otherwise -errno
327  */
328 static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
329 {
330 	int rc;
331 	struct page **pages;
332 
333 	pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
334 	if (pages == NULL)
335 		return -ENOMEM;
336 
337 	rc = alloc_xenballooned_pages(numpgs, pages, 0);
338 	if (rc != 0) {
339 		pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
340 			numpgs, rc);
341 		kfree(pages);
342 		return -ENOMEM;
343 	}
344 	BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
345 	vma->vm_private_data = pages;
346 
347 	return 0;
348 }
349 
350 static struct vm_operations_struct privcmd_vm_ops;
351 
352 static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
353 {
354 	int ret;
355 	struct privcmd_mmapbatch_v2 m;
356 	struct mm_struct *mm = current->mm;
357 	struct vm_area_struct *vma;
358 	unsigned long nr_pages;
359 	LIST_HEAD(pagelist);
360 	int *err_array = NULL;
361 	struct mmap_batch_state state;
362 
363 	if (!xen_initial_domain())
364 		return -EPERM;
365 
366 	switch (version) {
367 	case 1:
368 		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
369 			return -EFAULT;
370 		/* Returns per-frame error in m.arr. */
371 		m.err = NULL;
372 		if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
373 			return -EFAULT;
374 		break;
375 	case 2:
376 		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
377 			return -EFAULT;
378 		/* Returns per-frame error code in m.err. */
379 		if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
380 			return -EFAULT;
381 		break;
382 	default:
383 		return -EINVAL;
384 	}
385 
386 	nr_pages = m.num;
387 	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
388 		return -EINVAL;
389 
390 	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
391 
392 	if (ret)
393 		goto out;
394 	if (list_empty(&pagelist)) {
395 		ret = -EINVAL;
396 		goto out;
397 	}
398 
399 	err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
400 	if (err_array == NULL) {
401 		ret = -ENOMEM;
402 		goto out;
403 	}
404 
405 	down_write(&mm->mmap_sem);
406 
407 	vma = find_vma(mm, m.addr);
408 	if (!vma ||
409 	    vma->vm_ops != &privcmd_vm_ops ||
410 	    (m.addr != vma->vm_start) ||
411 	    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
412 	    !privcmd_enforce_singleshot_mapping(vma)) {
413 		up_write(&mm->mmap_sem);
414 		ret = -EINVAL;
415 		goto out;
416 	}
417 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
418 		ret = alloc_empty_pages(vma, m.num);
419 		if (ret < 0) {
420 			up_write(&mm->mmap_sem);
421 			goto out;
422 		}
423 	}
424 
425 	state.domain        = m.dom;
426 	state.vma           = vma;
427 	state.va            = m.addr;
428 	state.index         = 0;
429 	state.global_error  = 0;
430 	state.err           = err_array;
431 
432 	/* mmap_batch_fn guarantees ret == 0 */
433 	BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
434 			     &pagelist, mmap_batch_fn, &state));
435 
436 	up_write(&mm->mmap_sem);
437 
438 	if (version == 1) {
439 		if (state.global_error) {
440 			/* Write back errors in second pass. */
441 			state.user_mfn = (xen_pfn_t *)m.arr;
442 			state.err      = err_array;
443 			ret = traverse_pages(m.num, sizeof(xen_pfn_t),
444 					     &pagelist, mmap_return_errors_v1, &state);
445 		} else
446 			ret = 0;
447 
448 	} else if (version == 2) {
449 		ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
450 		if (ret)
451 			ret = -EFAULT;
452 	}
453 
454 	/* If we have not had any EFAULT-like global errors then set the global
455 	 * error to -ENOENT if necessary. */
456 	if ((ret == 0) && (state.global_error == -ENOENT))
457 		ret = -ENOENT;
458 
459 out:
460 	kfree(err_array);
461 	free_page_list(&pagelist);
462 
463 	return ret;
464 }
465 
466 static long privcmd_ioctl(struct file *file,
467 			  unsigned int cmd, unsigned long data)
468 {
469 	int ret = -ENOSYS;
470 	void __user *udata = (void __user *) data;
471 
472 	switch (cmd) {
473 	case IOCTL_PRIVCMD_HYPERCALL:
474 		ret = privcmd_ioctl_hypercall(udata);
475 		break;
476 
477 	case IOCTL_PRIVCMD_MMAP:
478 		ret = privcmd_ioctl_mmap(udata);
479 		break;
480 
481 	case IOCTL_PRIVCMD_MMAPBATCH:
482 		ret = privcmd_ioctl_mmap_batch(udata, 1);
483 		break;
484 
485 	case IOCTL_PRIVCMD_MMAPBATCH_V2:
486 		ret = privcmd_ioctl_mmap_batch(udata, 2);
487 		break;
488 
489 	default:
490 		ret = -EINVAL;
491 		break;
492 	}
493 
494 	return ret;
495 }
496 
497 static void privcmd_close(struct vm_area_struct *vma)
498 {
499 	struct page **pages = vma->vm_private_data;
500 	int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
501 
502 	if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
503 		return;
504 
505 	xen_unmap_domain_mfn_range(vma, numpgs, pages);
506 	free_xenballooned_pages(numpgs, pages);
507 	kfree(pages);
508 }
509 
510 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
511 {
512 	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
513 	       vma, vma->vm_start, vma->vm_end,
514 	       vmf->pgoff, vmf->virtual_address);
515 
516 	return VM_FAULT_SIGBUS;
517 }
518 
519 static struct vm_operations_struct privcmd_vm_ops = {
520 	.close = privcmd_close,
521 	.fault = privcmd_fault
522 };
523 
524 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
525 {
526 	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
527 	 * how to recreate these mappings */
528 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
529 			 VM_DONTEXPAND | VM_DONTDUMP;
530 	vma->vm_ops = &privcmd_vm_ops;
531 	vma->vm_private_data = NULL;
532 
533 	return 0;
534 }
535 
536 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
537 {
538 	return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
539 }
540 
541 const struct file_operations xen_privcmd_fops = {
542 	.owner = THIS_MODULE,
543 	.unlocked_ioctl = privcmd_ioctl,
544 	.mmap = privcmd_mmap,
545 };
546 EXPORT_SYMBOL_GPL(xen_privcmd_fops);
547 
548 static struct miscdevice privcmd_dev = {
549 	.minor = MISC_DYNAMIC_MINOR,
550 	.name = "xen/privcmd",
551 	.fops = &xen_privcmd_fops,
552 };
553 
554 static int __init privcmd_init(void)
555 {
556 	int err;
557 
558 	if (!xen_domain())
559 		return -ENODEV;
560 
561 	err = misc_register(&privcmd_dev);
562 	if (err != 0) {
563 		printk(KERN_ERR "Could not register Xen privcmd device\n");
564 		return err;
565 	}
566 	return 0;
567 }
568 
569 static void __exit privcmd_exit(void)
570 {
571 	misc_deregister(&privcmd_dev);
572 }
573 
574 module_init(privcmd_init);
575 module_exit(privcmd_exit);
576