xref: /freebsd/sys/dev/xen/privcmd/privcmd.c (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 /*
2  * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/uio.h>
33 #include <sys/bus.h>
34 #include <sys/malloc.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/rwlock.h>
39 #include <sys/selinfo.h>
40 #include <sys/poll.h>
41 #include <sys/conf.h>
42 #include <sys/fcntl.h>
43 #include <sys/ioccom.h>
44 #include <sys/rman.h>
45 #include <sys/tree.h>
46 #include <sys/module.h>
47 #include <sys/proc.h>
48 #include <sys/bitset.h>
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_pager.h>
58 
59 #include <machine/md_var.h>
60 
61 #include <xen/xen-os.h>
62 #include <xen/hypervisor.h>
63 #include <xen/privcmd.h>
64 #include <xen/error.h>
65 
66 MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
67 
68 #define MAX_DMOP_BUFFERS 16
69 
70 struct privcmd_map {
71 	vm_object_t mem;
72 	vm_size_t size;
73 	struct resource *pseudo_phys_res;
74 	int pseudo_phys_res_id;
75 	vm_paddr_t phys_base_addr;
76 	boolean_t mapped;
77 	BITSET_DEFINE_VAR() *err;
78 };
79 
80 static d_ioctl_t     privcmd_ioctl;
81 static d_open_t      privcmd_open;
82 static d_mmap_single_t	privcmd_mmap_single;
83 
84 static struct cdevsw privcmd_devsw = {
85 	.d_version = D_VERSION,
86 	.d_ioctl = privcmd_ioctl,
87 	.d_mmap_single = privcmd_mmap_single,
88 	.d_open = privcmd_open,
89 	.d_name = "privcmd",
90 };
91 
92 static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
93     vm_ooffset_t foff, struct ucred *cred, u_short *color);
94 static void privcmd_pg_dtor(void *handle);
95 static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
96     int prot, vm_page_t *mres);
97 
98 static struct cdev_pager_ops privcmd_pg_ops = {
99 	.cdev_pg_fault = privcmd_pg_fault,
100 	.cdev_pg_ctor =	privcmd_pg_ctor,
101 	.cdev_pg_dtor =	privcmd_pg_dtor,
102 };
103 
104 struct per_user_data {
105 	domid_t dom;
106 };
107 
108 static device_t privcmd_dev = NULL;
109 
110 /*------------------------- Privcmd Pager functions --------------------------*/
111 static int
112 privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
113     vm_ooffset_t foff, struct ucred *cred, u_short *color)
114 {
115 
116 	return (0);
117 }
118 
119 static void
120 privcmd_pg_dtor(void *handle)
121 {
122 	struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
123 	struct privcmd_map *map = handle;
124 	int error;
125 	vm_size_t i;
126 	vm_page_t m;
127 
128 	/*
129 	 * Remove the mappings from the used pages. This will remove the
130 	 * underlying p2m bindings in Xen second stage translation.
131 	 */
132 	if (map->mapped == true) {
133 		VM_OBJECT_WLOCK(map->mem);
134 retry:
135 		for (i = 0; i < map->size; i++) {
136 			m = vm_page_lookup(map->mem, i);
137 			if (m == NULL)
138 				continue;
139 			if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
140 				goto retry;
141 			cdev_pager_free_page(map->mem, m);
142 		}
143 		VM_OBJECT_WUNLOCK(map->mem);
144 
145 		for (i = 0; i < map->size; i++) {
146 			rm.gpfn = atop(map->phys_base_addr) + i;
147 			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
148 		}
149 		free(map->err, M_PRIVCMD);
150 	}
151 
152 	error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
153 	    map->pseudo_phys_res);
154 	KASSERT(error == 0, ("Unable to release memory resource: %d", error));
155 
156 	free(map, M_PRIVCMD);
157 }
158 
159 static int
160 privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
161     int prot, vm_page_t *mres)
162 {
163 	struct privcmd_map *map = object->handle;
164 	vm_pindex_t pidx;
165 	vm_page_t page;
166 
167 	if (map->mapped != true)
168 		return (VM_PAGER_FAIL);
169 
170 	pidx = OFF_TO_IDX(offset);
171 	if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
172 		return (VM_PAGER_FAIL);
173 
174 	page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
175 	if (page == NULL)
176 		return (VM_PAGER_FAIL);
177 
178 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
179 	    ("not fictitious %p", page));
180 	KASSERT(vm_page_wired(page), ("page %p not wired", page));
181 	KASSERT(!vm_page_busied(page), ("page %p is busy", page));
182 
183 	vm_page_busy_acquire(page, 0);
184 	vm_page_valid(page);
185 
186 	if (*mres != NULL)
187 		vm_page_replace(page, object, pidx, *mres);
188 	else
189 		vm_page_insert(page, object, pidx);
190 	*mres = page;
191 	return (VM_PAGER_OK);
192 }
193 
194 /*----------------------- Privcmd char device methods ------------------------*/
195 static int
196 privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
197     vm_object_t *object, int nprot)
198 {
199 	struct privcmd_map *map;
200 
201 	map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
202 
203 	map->size = OFF_TO_IDX(size);
204 	map->pseudo_phys_res_id = 0;
205 
206 	map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
207 	    &map->pseudo_phys_res_id, size);
208 	if (map->pseudo_phys_res == NULL) {
209 		free(map, M_PRIVCMD);
210 		return (ENOMEM);
211 	}
212 
213 	map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
214 	map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
215 	    size, nprot, *offset, NULL);
216 	if (map->mem == NULL) {
217 		xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
218 		    map->pseudo_phys_res);
219 		free(map, M_PRIVCMD);
220 		return (ENOMEM);
221 	}
222 
223 	*object = map->mem;
224 
225 	return (0);
226 }
227 
228 static struct privcmd_map *
229 setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
230 {
231 	vm_map_t map;
232 	vm_map_entry_t entry;
233 	vm_object_t mem;
234 	vm_pindex_t pindex;
235 	vm_prot_t prot;
236 	boolean_t wired;
237 	struct privcmd_map *umap;
238 	int error;
239 
240 	if ((num == 0) || ((addr & PAGE_MASK) != 0))
241 		return NULL;
242 
243 	map = &td->td_proc->p_vmspace->vm_map;
244 	error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
245 	    &prot, &wired);
246 	if (error != KERN_SUCCESS || (entry->start != addr) ||
247 	    (entry->end != addr + (num * PAGE_SIZE)))
248 		return NULL;
249 
250 	vm_map_lookup_done(map, entry);
251 	if ((mem->type != OBJT_MGTDEVICE) ||
252 	    (mem->un_pager.devp.ops != &privcmd_pg_ops))
253 		return NULL;
254 
255 	umap = mem->handle;
256 	/* Allocate a bitset to store broken page mappings. */
257 	umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
258 
259 	return umap;
260 }
261 
262 static int
263 privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
264 	      int mode, struct thread *td)
265 {
266 	int error;
267 	unsigned int i;
268 	void *data;
269 	const struct per_user_data *u;
270 
271 	error = devfs_get_cdevpriv(&data);
272 	if (error != 0)
273 		return (EINVAL);
274 	/*
275 	 * Constify user-data to prevent unintended changes to the restriction
276 	 * limits.
277 	 */
278 	u = data;
279 
280 	switch (cmd) {
281 	case IOCTL_PRIVCMD_HYPERCALL: {
282 		struct ioctl_privcmd_hypercall *hcall;
283 
284 		hcall = (struct ioctl_privcmd_hypercall *)arg;
285 
286 		/* Forbid hypercalls if restricted. */
287 		if (u->dom != DOMID_INVALID) {
288 			error = EPERM;
289 			break;
290 		}
291 
292 #ifdef __amd64__
293 		/*
294 		 * The hypervisor page table walker will refuse to access
295 		 * user-space pages if SMAP is enabled, so temporary disable it
296 		 * while performing the hypercall.
297 		 */
298 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
299 			stac();
300 #endif
301 		error = privcmd_hypercall(hcall->op, hcall->arg[0],
302 		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
303 #ifdef __amd64__
304 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
305 			clac();
306 #endif
307 		if (error >= 0) {
308 			hcall->retval = error;
309 			error = 0;
310 		} else {
311 			error = xen_translate_error(error);
312 			hcall->retval = 0;
313 		}
314 		break;
315 	}
316 	case IOCTL_PRIVCMD_MMAPBATCH: {
317 		struct ioctl_privcmd_mmapbatch *mmap;
318 		struct xen_add_to_physmap_range add;
319 		xen_ulong_t *idxs;
320 		xen_pfn_t *gpfns;
321 		int *errs;
322 		unsigned int index;
323 		struct privcmd_map *umap;
324 		uint16_t num;
325 
326 		mmap = (struct ioctl_privcmd_mmapbatch *)arg;
327 
328 		if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
329 			error = EPERM;
330 			break;
331 		}
332 
333 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
334 		if (umap == NULL) {
335 			error = EINVAL;
336 			break;
337 		}
338 
339 		add.domid = DOMID_SELF;
340 		add.space = XENMAPSPACE_gmfn_foreign;
341 		add.foreign_domid = mmap->dom;
342 
343 		/*
344 		 * The 'size' field in the xen_add_to_physmap_range only
345 		 * allows for UINT16_MAX mappings in a single hypercall.
346 		 */
347 		num = MIN(mmap->num, UINT16_MAX);
348 
349 		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
350 		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
351 		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
352 
353 		set_xen_guest_handle(add.idxs, idxs);
354 		set_xen_guest_handle(add.gpfns, gpfns);
355 		set_xen_guest_handle(add.errs, errs);
356 
357 		for (index = 0; index < mmap->num; index += num) {
358 			num = MIN(mmap->num - index, UINT16_MAX);
359 			add.size = num;
360 
361 			error = copyin(&mmap->arr[index], idxs,
362 			    sizeof(idxs[0]) * num);
363 			if (error != 0)
364 				goto mmap_out;
365 
366 			for (i = 0; i < num; i++)
367 				gpfns[i] = atop(umap->phys_base_addr +
368 				    (i + index) * PAGE_SIZE);
369 
370 			bzero(errs, sizeof(*errs) * num);
371 
372 			error = HYPERVISOR_memory_op(
373 			    XENMEM_add_to_physmap_range, &add);
374 			if (error != 0) {
375 				error = xen_translate_error(error);
376 				goto mmap_out;
377 			}
378 
379 			for (i = 0; i < num; i++) {
380 				if (errs[i] != 0) {
381 					errs[i] = xen_translate_error(errs[i]);
382 
383 					/* Mark the page as invalid. */
384 					BIT_SET(mmap->num, index + i,
385 					    umap->err);
386 				}
387 			}
388 
389 			error = copyout(errs, &mmap->err[index],
390 			    sizeof(errs[0]) * num);
391 			if (error != 0)
392 				goto mmap_out;
393 		}
394 
395 		umap->mapped = true;
396 
397 mmap_out:
398 		free(idxs, M_PRIVCMD);
399 		free(gpfns, M_PRIVCMD);
400 		free(errs, M_PRIVCMD);
401 		if (!umap->mapped)
402 			free(umap->err, M_PRIVCMD);
403 
404 		break;
405 	}
406 	case IOCTL_PRIVCMD_MMAP_RESOURCE: {
407 		struct ioctl_privcmd_mmapresource *mmap;
408 		struct xen_mem_acquire_resource adq;
409 		xen_pfn_t *gpfns;
410 		struct privcmd_map *umap;
411 
412 		mmap = (struct ioctl_privcmd_mmapresource *)arg;
413 
414 		if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
415 			error = EPERM;
416 			break;
417 		}
418 
419 		bzero(&adq, sizeof(adq));
420 
421 		adq.domid = mmap->dom;
422 		adq.type = mmap->type;
423 		adq.id = mmap->id;
424 
425 		/* Shortcut for getting the resource size. */
426 		if (mmap->addr == 0 && mmap->num == 0) {
427 			error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
428 			    &adq);
429 			if (error != 0) {
430 				error = xen_translate_error(error);
431 				break;
432 			}
433 			error = copyout(&adq.nr_frames, &mmap->num,
434 			    sizeof(mmap->num));
435 			break;
436 		}
437 
438 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
439 		if (umap == NULL) {
440 			error = EINVAL;
441 			break;
442 		}
443 
444 		adq.nr_frames = mmap->num;
445 		adq.frame = mmap->idx;
446 
447 		gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
448 		for (i = 0; i < mmap->num; i++)
449 			gpfns[i] = atop(umap->phys_base_addr) + i;
450 		set_xen_guest_handle(adq.frame_list, gpfns);
451 
452 		error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
453 		if (error != 0)
454 			error = xen_translate_error(error);
455 		else
456 			umap->mapped = true;
457 
458 		free(gpfns, M_PRIVCMD);
459 		if (!umap->mapped)
460 			free(umap->err, M_PRIVCMD);
461 
462 		break;
463 	}
464 	case IOCTL_PRIVCMD_DM_OP: {
465 		const struct ioctl_privcmd_dmop *dmop;
466 		struct privcmd_dmop_buf *bufs;
467 		struct xen_dm_op_buf *hbufs;
468 
469 		dmop = (struct ioctl_privcmd_dmop *)arg;
470 
471 		if (u->dom != DOMID_INVALID && u->dom != dmop->dom) {
472 			error = EPERM;
473 			break;
474 		}
475 
476 		if (dmop->num == 0)
477 			break;
478 
479 		if (dmop->num > MAX_DMOP_BUFFERS) {
480 			error = E2BIG;
481 			break;
482 		}
483 
484 		bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK);
485 
486 		error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num);
487 		if (error != 0) {
488 			free(bufs, M_PRIVCMD);
489 			break;
490 		}
491 
492 		hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK);
493 		for (i = 0; i < dmop->num; i++) {
494 			set_xen_guest_handle(hbufs[i].h, bufs[i].uptr);
495 			hbufs[i].size = bufs[i].size;
496 		}
497 
498 #ifdef __amd64__
499 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
500 			stac();
501 #endif
502 		error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs);
503 #ifdef __amd64__
504 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
505 			clac();
506 #endif
507 		if (error != 0)
508 			error = xen_translate_error(error);
509 
510 		free(bufs, M_PRIVCMD);
511 		free(hbufs, M_PRIVCMD);
512 
513 
514 		break;
515 	}
516 	case IOCTL_PRIVCMD_RESTRICT: {
517 		struct per_user_data *u;
518 		domid_t dom;
519 
520 		dom = *(domid_t *)arg;
521 
522 		error = devfs_get_cdevpriv((void **)&u);
523 		if (error != 0)
524 			break;
525 
526 		if (u->dom != DOMID_INVALID && u->dom != dom) {
527 			error = -EINVAL;
528 			break;
529 		}
530 		u->dom = dom;
531 
532 		break;
533 	}
534 	default:
535 		error = ENOSYS;
536 		break;
537 	}
538 
539 	return (error);
540 }
541 
542 static void
543 user_release(void *arg)
544 {
545 
546 	free(arg, M_PRIVCMD);
547 }
548 
549 static int
550 privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td)
551 {
552 	struct per_user_data *u;
553 	int error;
554 
555 	u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK);
556 	u->dom = DOMID_INVALID;
557 
558 	/* Assign the allocated per_user_data to this open instance. */
559 	error = devfs_set_cdevpriv(u, user_release);
560 	if (error != 0) {
561 		free(u, M_PRIVCMD);
562 	}
563 
564 	return (error);
565 }
566 
567 /*------------------ Private Device Attachment Functions  --------------------*/
568 static void
569 privcmd_identify(driver_t *driver, device_t parent)
570 {
571 
572 	KASSERT(xen_domain(),
573 	    ("Trying to attach privcmd device on non Xen domain"));
574 
575 	if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
576 		panic("unable to attach privcmd user-space device");
577 }
578 
579 static int
580 privcmd_probe(device_t dev)
581 {
582 
583 	privcmd_dev = dev;
584 	device_set_desc(dev, "Xen privileged interface user-space device");
585 	return (BUS_PROBE_NOWILDCARD);
586 }
587 
588 static int
589 privcmd_attach(device_t dev)
590 {
591 
592 	make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
593 	    GID_WHEEL, 0600, "xen/privcmd");
594 	return (0);
595 }
596 
597 /*-------------------- Private Device Attachment Data  -----------------------*/
598 static device_method_t privcmd_methods[] = {
599 	DEVMETHOD(device_identify,	privcmd_identify),
600 	DEVMETHOD(device_probe,		privcmd_probe),
601 	DEVMETHOD(device_attach,	privcmd_attach),
602 
603 	DEVMETHOD_END
604 };
605 
606 static driver_t privcmd_driver = {
607 	"privcmd",
608 	privcmd_methods,
609 	0,
610 };
611 
612 devclass_t privcmd_devclass;
613 
614 DRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0);
615 MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
616