xref: /freebsd/sys/dev/xen/privcmd/privcmd.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*
2  * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/uio.h>
31 #include <sys/bus.h>
32 #include <sys/malloc.h>
33 #include <sys/kernel.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/rwlock.h>
37 #include <sys/selinfo.h>
38 #include <sys/poll.h>
39 #include <sys/conf.h>
40 #include <sys/fcntl.h>
41 #include <sys/ioccom.h>
42 #include <sys/rman.h>
43 #include <sys/tree.h>
44 #include <sys/module.h>
45 #include <sys/proc.h>
46 #include <sys/bitset.h>
47 
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_extern.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_pager.h>
56 
57 #include <machine/md_var.h>
58 
59 #include <xen/xen-os.h>
60 #include <xen/hypervisor.h>
61 #include <xen/privcmd.h>
62 #include <xen/error.h>
63 
64 MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
65 
66 #define MAX_DMOP_BUFFERS 16
67 
68 struct privcmd_map {
69 	vm_object_t mem;
70 	vm_size_t size;
71 	struct resource *pseudo_phys_res;
72 	int pseudo_phys_res_id;
73 	vm_paddr_t phys_base_addr;
74 	boolean_t mapped;
75 	BITSET_DEFINE_VAR() *err;
76 };
77 
78 static d_ioctl_t     privcmd_ioctl;
79 static d_open_t      privcmd_open;
80 static d_mmap_single_t	privcmd_mmap_single;
81 
82 static struct cdevsw privcmd_devsw = {
83 	.d_version = D_VERSION,
84 	.d_ioctl = privcmd_ioctl,
85 	.d_mmap_single = privcmd_mmap_single,
86 	.d_open = privcmd_open,
87 	.d_name = "privcmd",
88 };
89 
90 static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
91     vm_ooffset_t foff, struct ucred *cred, u_short *color);
92 static void privcmd_pg_dtor(void *handle);
93 static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
94     int prot, vm_page_t *mres);
95 
96 static struct cdev_pager_ops privcmd_pg_ops = {
97 	.cdev_pg_fault = privcmd_pg_fault,
98 	.cdev_pg_ctor =	privcmd_pg_ctor,
99 	.cdev_pg_dtor =	privcmd_pg_dtor,
100 };
101 
102 struct per_user_data {
103 	domid_t dom;
104 };
105 
106 static device_t privcmd_dev = NULL;
107 
108 /*------------------------- Privcmd Pager functions --------------------------*/
109 static int
110 privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
111     vm_ooffset_t foff, struct ucred *cred, u_short *color)
112 {
113 
114 	return (0);
115 }
116 
117 static void
118 privcmd_pg_dtor(void *handle)
119 {
120 	struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
121 	struct privcmd_map *map = handle;
122 	int error __diagused;
123 	vm_size_t i;
124 	vm_page_t m;
125 
126 	/*
127 	 * Remove the mappings from the used pages. This will remove the
128 	 * underlying p2m bindings in Xen second stage translation.
129 	 */
130 	if (map->mapped == true) {
131 		VM_OBJECT_WLOCK(map->mem);
132 retry:
133 		for (i = 0; i < map->size; i++) {
134 			m = vm_page_lookup(map->mem, i);
135 			if (m == NULL)
136 				continue;
137 			if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
138 				goto retry;
139 			cdev_pager_free_page(map->mem, m);
140 		}
141 		VM_OBJECT_WUNLOCK(map->mem);
142 
143 		for (i = 0; i < map->size; i++) {
144 			rm.gpfn = atop(map->phys_base_addr) + i;
145 			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
146 		}
147 		free(map->err, M_PRIVCMD);
148 	}
149 
150 	error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
151 	    map->pseudo_phys_res);
152 	KASSERT(error == 0, ("Unable to release memory resource: %d", error));
153 
154 	free(map, M_PRIVCMD);
155 }
156 
157 static int
158 privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
159     int prot, vm_page_t *mres)
160 {
161 	struct privcmd_map *map = object->handle;
162 	vm_pindex_t pidx;
163 	vm_page_t page;
164 
165 	if (map->mapped != true)
166 		return (VM_PAGER_FAIL);
167 
168 	pidx = OFF_TO_IDX(offset);
169 	if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
170 		return (VM_PAGER_FAIL);
171 
172 	page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
173 	if (page == NULL)
174 		return (VM_PAGER_FAIL);
175 
176 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
177 	    ("not fictitious %p", page));
178 	KASSERT(vm_page_wired(page), ("page %p not wired", page));
179 	KASSERT(!vm_page_busied(page), ("page %p is busy", page));
180 
181 	vm_page_busy_acquire(page, 0);
182 	vm_page_valid(page);
183 
184 	if (*mres != NULL)
185 		vm_page_replace(page, object, pidx, *mres);
186 	else
187 		vm_page_insert(page, object, pidx);
188 	*mres = page;
189 	return (VM_PAGER_OK);
190 }
191 
192 /*----------------------- Privcmd char device methods ------------------------*/
193 static int
194 privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
195     vm_object_t *object, int nprot)
196 {
197 	struct privcmd_map *map;
198 
199 	map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
200 
201 	map->size = OFF_TO_IDX(size);
202 	map->pseudo_phys_res_id = 0;
203 
204 	map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
205 	    &map->pseudo_phys_res_id, size);
206 	if (map->pseudo_phys_res == NULL) {
207 		free(map, M_PRIVCMD);
208 		return (ENOMEM);
209 	}
210 
211 	map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
212 	map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
213 	    size, nprot, *offset, NULL);
214 	if (map->mem == NULL) {
215 		xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
216 		    map->pseudo_phys_res);
217 		free(map, M_PRIVCMD);
218 		return (ENOMEM);
219 	}
220 
221 	*object = map->mem;
222 
223 	return (0);
224 }
225 
226 static struct privcmd_map *
227 setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
228 {
229 	vm_map_t map;
230 	vm_map_entry_t entry;
231 	vm_object_t mem;
232 	vm_pindex_t pindex;
233 	vm_prot_t prot;
234 	boolean_t wired;
235 	struct privcmd_map *umap;
236 	int error;
237 
238 	if ((num == 0) || ((addr & PAGE_MASK) != 0))
239 		return NULL;
240 
241 	map = &td->td_proc->p_vmspace->vm_map;
242 	error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
243 	    &prot, &wired);
244 	if (error != KERN_SUCCESS || (entry->start != addr) ||
245 	    (entry->end != addr + (num * PAGE_SIZE)))
246 		return NULL;
247 
248 	vm_map_lookup_done(map, entry);
249 	if ((mem->type != OBJT_MGTDEVICE) ||
250 	    (mem->un_pager.devp.ops != &privcmd_pg_ops))
251 		return NULL;
252 
253 	umap = mem->handle;
254 	/* Allocate a bitset to store broken page mappings. */
255 	umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
256 
257 	return umap;
258 }
259 
260 static int
261 privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
262 	      int mode, struct thread *td)
263 {
264 	int error;
265 	unsigned int i;
266 	void *data;
267 	const struct per_user_data *u;
268 
269 	error = devfs_get_cdevpriv(&data);
270 	if (error != 0)
271 		return (EINVAL);
272 	/*
273 	 * Constify user-data to prevent unintended changes to the restriction
274 	 * limits.
275 	 */
276 	u = data;
277 
278 	switch (cmd) {
279 	case IOCTL_PRIVCMD_HYPERCALL: {
280 		struct ioctl_privcmd_hypercall *hcall;
281 
282 		hcall = (struct ioctl_privcmd_hypercall *)arg;
283 
284 		/* Forbid hypercalls if restricted. */
285 		if (u->dom != DOMID_INVALID) {
286 			error = EPERM;
287 			break;
288 		}
289 
290 #ifdef __amd64__
291 		/*
292 		 * The hypervisor page table walker will refuse to access
293 		 * user-space pages if SMAP is enabled, so temporary disable it
294 		 * while performing the hypercall.
295 		 */
296 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
297 			stac();
298 #endif
299 		error = privcmd_hypercall(hcall->op, hcall->arg[0],
300 		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
301 #ifdef __amd64__
302 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
303 			clac();
304 #endif
305 		if (error >= 0) {
306 			hcall->retval = error;
307 			error = 0;
308 		} else {
309 			error = xen_translate_error(error);
310 			hcall->retval = 0;
311 		}
312 		break;
313 	}
314 	case IOCTL_PRIVCMD_MMAPBATCH: {
315 		struct ioctl_privcmd_mmapbatch *mmap;
316 		struct xen_add_to_physmap_batch add;
317 		xen_ulong_t *idxs;
318 		xen_pfn_t *gpfns;
319 		int *errs;
320 		unsigned int index;
321 		struct privcmd_map *umap;
322 		uint16_t num;
323 
324 		mmap = (struct ioctl_privcmd_mmapbatch *)arg;
325 
326 		if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
327 			error = EPERM;
328 			break;
329 		}
330 
331 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
332 		if (umap == NULL) {
333 			error = EINVAL;
334 			break;
335 		}
336 
337 		add.domid = DOMID_SELF;
338 		add.space = XENMAPSPACE_gmfn_foreign;
339 		add.u.foreign_domid = mmap->dom;
340 
341 		/*
342 		 * The 'size' field in the xen_add_to_physmap_range only
343 		 * allows for UINT16_MAX mappings in a single hypercall.
344 		 */
345 		num = MIN(mmap->num, UINT16_MAX);
346 
347 		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
348 		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
349 		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
350 
351 		set_xen_guest_handle(add.idxs, idxs);
352 		set_xen_guest_handle(add.gpfns, gpfns);
353 		set_xen_guest_handle(add.errs, errs);
354 
355 		for (index = 0; index < mmap->num; index += num) {
356 			num = MIN(mmap->num - index, UINT16_MAX);
357 			add.size = num;
358 
359 			error = copyin(&mmap->arr[index], idxs,
360 			    sizeof(idxs[0]) * num);
361 			if (error != 0)
362 				goto mmap_out;
363 
364 			for (i = 0; i < num; i++)
365 				gpfns[i] = atop(umap->phys_base_addr +
366 				    (i + index) * PAGE_SIZE);
367 
368 			bzero(errs, sizeof(*errs) * num);
369 
370 			error = HYPERVISOR_memory_op(
371 			    XENMEM_add_to_physmap_batch, &add);
372 			if (error != 0) {
373 				error = xen_translate_error(error);
374 				goto mmap_out;
375 			}
376 
377 			for (i = 0; i < num; i++) {
378 				if (errs[i] != 0) {
379 					errs[i] = xen_translate_error(errs[i]);
380 
381 					/* Mark the page as invalid. */
382 					BIT_SET(mmap->num, index + i,
383 					    umap->err);
384 				}
385 			}
386 
387 			error = copyout(errs, &mmap->err[index],
388 			    sizeof(errs[0]) * num);
389 			if (error != 0)
390 				goto mmap_out;
391 		}
392 
393 		umap->mapped = true;
394 
395 mmap_out:
396 		free(idxs, M_PRIVCMD);
397 		free(gpfns, M_PRIVCMD);
398 		free(errs, M_PRIVCMD);
399 		if (!umap->mapped)
400 			free(umap->err, M_PRIVCMD);
401 
402 		break;
403 	}
404 	case IOCTL_PRIVCMD_MMAP_RESOURCE: {
405 		struct ioctl_privcmd_mmapresource *mmap;
406 		struct xen_mem_acquire_resource adq;
407 		xen_pfn_t *gpfns;
408 		struct privcmd_map *umap;
409 
410 		mmap = (struct ioctl_privcmd_mmapresource *)arg;
411 
412 		if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
413 			error = EPERM;
414 			break;
415 		}
416 
417 		bzero(&adq, sizeof(adq));
418 
419 		adq.domid = mmap->dom;
420 		adq.type = mmap->type;
421 		adq.id = mmap->id;
422 
423 		/* Shortcut for getting the resource size. */
424 		if (mmap->addr == 0 && mmap->num == 0) {
425 			error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
426 			    &adq);
427 			if (error != 0)
428 				error = xen_translate_error(error);
429 			else
430 				mmap->num = adq.nr_frames;
431 			break;
432 		}
433 
434 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
435 		if (umap == NULL) {
436 			error = EINVAL;
437 			break;
438 		}
439 
440 		adq.nr_frames = mmap->num;
441 		adq.frame = mmap->idx;
442 
443 		gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
444 		for (i = 0; i < mmap->num; i++)
445 			gpfns[i] = atop(umap->phys_base_addr) + i;
446 		set_xen_guest_handle(adq.frame_list, gpfns);
447 
448 		error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
449 		if (error != 0)
450 			error = xen_translate_error(error);
451 		else
452 			umap->mapped = true;
453 
454 		free(gpfns, M_PRIVCMD);
455 		if (!umap->mapped)
456 			free(umap->err, M_PRIVCMD);
457 
458 		break;
459 	}
460 	case IOCTL_PRIVCMD_DM_OP: {
461 		const struct ioctl_privcmd_dmop *dmop;
462 		struct privcmd_dmop_buf *bufs;
463 		struct xen_dm_op_buf *hbufs;
464 
465 		dmop = (struct ioctl_privcmd_dmop *)arg;
466 
467 		if (u->dom != DOMID_INVALID && u->dom != dmop->dom) {
468 			error = EPERM;
469 			break;
470 		}
471 
472 		if (dmop->num == 0)
473 			break;
474 
475 		if (dmop->num > MAX_DMOP_BUFFERS) {
476 			error = E2BIG;
477 			break;
478 		}
479 
480 		bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK);
481 
482 		error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num);
483 		if (error != 0) {
484 			free(bufs, M_PRIVCMD);
485 			break;
486 		}
487 
488 		hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK);
489 		for (i = 0; i < dmop->num; i++) {
490 			set_xen_guest_handle(hbufs[i].h, bufs[i].uptr);
491 			hbufs[i].size = bufs[i].size;
492 		}
493 
494 #ifdef __amd64__
495 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
496 			stac();
497 #endif
498 		error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs);
499 #ifdef __amd64__
500 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
501 			clac();
502 #endif
503 		if (error != 0)
504 			error = xen_translate_error(error);
505 
506 		free(bufs, M_PRIVCMD);
507 		free(hbufs, M_PRIVCMD);
508 
509 
510 		break;
511 	}
512 	case IOCTL_PRIVCMD_RESTRICT: {
513 		struct per_user_data *u;
514 		domid_t dom;
515 
516 		dom = *(domid_t *)arg;
517 
518 		error = devfs_get_cdevpriv((void **)&u);
519 		if (error != 0)
520 			break;
521 
522 		if (u->dom != DOMID_INVALID && u->dom != dom) {
523 			error = -EINVAL;
524 			break;
525 		}
526 		u->dom = dom;
527 
528 		break;
529 	}
530 	default:
531 		error = ENOSYS;
532 		break;
533 	}
534 
535 	return (error);
536 }
537 
538 static void
539 user_release(void *arg)
540 {
541 
542 	free(arg, M_PRIVCMD);
543 }
544 
545 static int
546 privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td)
547 {
548 	struct per_user_data *u;
549 	int error;
550 
551 	u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK);
552 	u->dom = DOMID_INVALID;
553 
554 	/* Assign the allocated per_user_data to this open instance. */
555 	error = devfs_set_cdevpriv(u, user_release);
556 	if (error != 0) {
557 		free(u, M_PRIVCMD);
558 	}
559 
560 	return (error);
561 }
562 
563 /*------------------ Private Device Attachment Functions  --------------------*/
564 static void
565 privcmd_identify(driver_t *driver, device_t parent)
566 {
567 
568 	KASSERT(xen_domain(),
569 	    ("Trying to attach privcmd device on non Xen domain"));
570 
571 	if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
572 		panic("unable to attach privcmd user-space device");
573 }
574 
575 static int
576 privcmd_probe(device_t dev)
577 {
578 
579 	privcmd_dev = dev;
580 	device_set_desc(dev, "Xen privileged interface user-space device");
581 	return (BUS_PROBE_NOWILDCARD);
582 }
583 
584 static int
585 privcmd_attach(device_t dev)
586 {
587 
588 	make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
589 	    GID_WHEEL, 0600, "xen/privcmd");
590 	return (0);
591 }
592 
593 /*-------------------- Private Device Attachment Data  -----------------------*/
594 static device_method_t privcmd_methods[] = {
595 	DEVMETHOD(device_identify,	privcmd_identify),
596 	DEVMETHOD(device_probe,		privcmd_probe),
597 	DEVMETHOD(device_attach,	privcmd_attach),
598 
599 	DEVMETHOD_END
600 };
601 
602 static driver_t privcmd_driver = {
603 	"privcmd",
604 	privcmd_methods,
605 	0,
606 };
607 
608 DRIVER_MODULE(privcmd, xenpv, privcmd_driver, 0, 0);
609 MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
610