xref: /freebsd/sys/dev/xen/privcmd/privcmd.c (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 /*
2  * Copyright (c) 2014 Roger Pau MonnĂ© <roger.pau@citrix.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/uio.h>
30 #include <sys/bus.h>
31 #include <sys/malloc.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/rwlock.h>
36 #include <sys/selinfo.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/fcntl.h>
40 #include <sys/ioccom.h>
41 #include <sys/rman.h>
42 #include <sys/tree.h>
43 #include <sys/module.h>
44 #include <sys/proc.h>
45 #include <sys/bitset.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_extern.h>
50 #include <vm/vm_kern.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_pager.h>
55 
56 #include <machine/md_var.h>
57 
58 #include <xen/xen-os.h>
59 #include <xen/hypervisor.h>
60 #include <xen/privcmd.h>
61 #include <xen/error.h>
62 
63 MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
64 
65 #define MAX_DMOP_BUFFERS 16
66 
67 struct privcmd_map {
68 	vm_object_t mem;
69 	vm_size_t size;
70 	struct resource *pseudo_phys_res;
71 	int pseudo_phys_res_id;
72 	vm_paddr_t phys_base_addr;
73 	boolean_t mapped;
74 	BITSET_DEFINE_VAR() *err;
75 };
76 
77 static d_ioctl_t     privcmd_ioctl;
78 static d_open_t      privcmd_open;
79 static d_mmap_single_t	privcmd_mmap_single;
80 
81 static struct cdevsw privcmd_devsw = {
82 	.d_version = D_VERSION,
83 	.d_ioctl = privcmd_ioctl,
84 	.d_mmap_single = privcmd_mmap_single,
85 	.d_open = privcmd_open,
86 	.d_name = "privcmd",
87 };
88 
89 static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
90     vm_ooffset_t foff, struct ucred *cred, u_short *color);
91 static void privcmd_pg_dtor(void *handle);
92 static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
93     int prot, vm_page_t *mres);
94 
95 static struct cdev_pager_ops privcmd_pg_ops = {
96 	.cdev_pg_fault = privcmd_pg_fault,
97 	.cdev_pg_ctor =	privcmd_pg_ctor,
98 	.cdev_pg_dtor =	privcmd_pg_dtor,
99 };
100 
101 struct per_user_data {
102 	domid_t dom;
103 };
104 
105 static device_t privcmd_dev = NULL;
106 
107 /*------------------------- Privcmd Pager functions --------------------------*/
108 static int
109 privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
110     vm_ooffset_t foff, struct ucred *cred, u_short *color)
111 {
112 
113 	return (0);
114 }
115 
116 static void
117 privcmd_pg_dtor(void *handle)
118 {
119 	struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
120 	struct privcmd_map *map = handle;
121 	int error __diagused;
122 	vm_size_t i;
123 
124 	/*
125 	 * Remove the mappings from the used pages. This will remove the
126 	 * underlying p2m bindings in Xen second stage translation.
127 	 */
128 	if (map->mapped == true) {
129 		cdev_mgtdev_pager_free_pages(map->mem);
130 		for (i = 0; i < map->size; i++) {
131 			rm.gpfn = atop(map->phys_base_addr) + i;
132 			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
133 		}
134 		free(map->err, M_PRIVCMD);
135 	}
136 
137 	error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
138 	    map->pseudo_phys_res);
139 	KASSERT(error == 0, ("Unable to release memory resource: %d", error));
140 
141 	free(map, M_PRIVCMD);
142 }
143 
144 static int
145 privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
146     int prot, vm_page_t *mres)
147 {
148 	struct privcmd_map *map = object->handle;
149 	vm_pindex_t pidx;
150 	vm_page_t page;
151 
152 	if (map->mapped != true)
153 		return (VM_PAGER_FAIL);
154 
155 	pidx = OFF_TO_IDX(offset);
156 	if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
157 		return (VM_PAGER_FAIL);
158 
159 	page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
160 	if (page == NULL)
161 		return (VM_PAGER_FAIL);
162 
163 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
164 	    ("not fictitious %p", page));
165 	KASSERT(vm_page_wired(page), ("page %p not wired", page));
166 	KASSERT(!vm_page_busied(page), ("page %p is busy", page));
167 
168 	vm_page_busy_acquire(page, 0);
169 	vm_page_valid(page);
170 
171 	if (*mres != NULL)
172 		vm_page_replace(page, object, pidx, *mres);
173 	else
174 		vm_page_insert(page, object, pidx);
175 	*mres = page;
176 	return (VM_PAGER_OK);
177 }
178 
179 /*----------------------- Privcmd char device methods ------------------------*/
180 static int
181 privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
182     vm_object_t *object, int nprot)
183 {
184 	struct privcmd_map *map;
185 
186 	map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
187 
188 	map->size = OFF_TO_IDX(size);
189 	map->pseudo_phys_res_id = 0;
190 
191 	map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
192 	    &map->pseudo_phys_res_id, size);
193 	if (map->pseudo_phys_res == NULL) {
194 		free(map, M_PRIVCMD);
195 		return (ENOMEM);
196 	}
197 
198 	map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
199 	map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
200 	    size, nprot, *offset, NULL);
201 	if (map->mem == NULL) {
202 		xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
203 		    map->pseudo_phys_res);
204 		free(map, M_PRIVCMD);
205 		return (ENOMEM);
206 	}
207 
208 	*object = map->mem;
209 
210 	return (0);
211 }
212 
213 static struct privcmd_map *
214 setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
215 {
216 	vm_map_t map;
217 	vm_map_entry_t entry;
218 	vm_object_t mem;
219 	vm_pindex_t pindex;
220 	vm_prot_t prot;
221 	boolean_t wired;
222 	struct privcmd_map *umap;
223 	int error;
224 
225 	if ((num == 0) || ((addr & PAGE_MASK) != 0))
226 		return NULL;
227 
228 	map = &td->td_proc->p_vmspace->vm_map;
229 	error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
230 	    &prot, &wired);
231 	if (error != KERN_SUCCESS || (entry->start != addr) ||
232 	    (entry->end != addr + (num * PAGE_SIZE)))
233 		return NULL;
234 
235 	vm_map_lookup_done(map, entry);
236 	if ((mem->type != OBJT_MGTDEVICE) ||
237 	    (mem->un_pager.devp.ops != &privcmd_pg_ops))
238 		return NULL;
239 
240 	umap = mem->handle;
241 	/* Allocate a bitset to store broken page mappings. */
242 	umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
243 
244 	return umap;
245 }
246 
247 static int
248 privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
249 	      int mode, struct thread *td)
250 {
251 	int error;
252 	unsigned int i;
253 	void *data;
254 	const struct per_user_data *u;
255 
256 	error = devfs_get_cdevpriv(&data);
257 	if (error != 0)
258 		return (EINVAL);
259 	/*
260 	 * Constify user-data to prevent unintended changes to the restriction
261 	 * limits.
262 	 */
263 	u = data;
264 
265 	switch (cmd) {
266 	case IOCTL_PRIVCMD_HYPERCALL: {
267 		struct ioctl_privcmd_hypercall *hcall;
268 
269 		hcall = (struct ioctl_privcmd_hypercall *)arg;
270 
271 		/* Forbid hypercalls if restricted. */
272 		if (u->dom != DOMID_INVALID) {
273 			error = EPERM;
274 			break;
275 		}
276 
277 #ifdef __amd64__
278 		/*
279 		 * The hypervisor page table walker will refuse to access
280 		 * user-space pages if SMAP is enabled, so temporary disable it
281 		 * while performing the hypercall.
282 		 */
283 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
284 			stac();
285 #endif
286 		error = privcmd_hypercall(hcall->op, hcall->arg[0],
287 		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
288 #ifdef __amd64__
289 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
290 			clac();
291 #endif
292 		if (error >= 0) {
293 			hcall->retval = error;
294 			error = 0;
295 		} else {
296 			error = xen_translate_error(error);
297 			hcall->retval = 0;
298 		}
299 		break;
300 	}
301 	case IOCTL_PRIVCMD_MMAPBATCH: {
302 		struct ioctl_privcmd_mmapbatch *mmap;
303 		struct xen_add_to_physmap_batch add;
304 		xen_ulong_t *idxs;
305 		xen_pfn_t *gpfns;
306 		int *errs;
307 		unsigned int index;
308 		struct privcmd_map *umap;
309 		uint16_t num;
310 
311 		mmap = (struct ioctl_privcmd_mmapbatch *)arg;
312 
313 		if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
314 			error = EPERM;
315 			break;
316 		}
317 
318 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
319 		if (umap == NULL) {
320 			error = EINVAL;
321 			break;
322 		}
323 
324 		add.domid = DOMID_SELF;
325 		add.space = XENMAPSPACE_gmfn_foreign;
326 		add.u.foreign_domid = mmap->dom;
327 
328 		/*
329 		 * The 'size' field in the xen_add_to_physmap_range only
330 		 * allows for UINT16_MAX mappings in a single hypercall.
331 		 */
332 		num = MIN(mmap->num, UINT16_MAX);
333 
334 		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
335 		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
336 		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
337 
338 		set_xen_guest_handle(add.idxs, idxs);
339 		set_xen_guest_handle(add.gpfns, gpfns);
340 		set_xen_guest_handle(add.errs, errs);
341 
342 		for (index = 0; index < mmap->num; index += num) {
343 			num = MIN(mmap->num - index, UINT16_MAX);
344 			add.size = num;
345 
346 			error = copyin(&mmap->arr[index], idxs,
347 			    sizeof(idxs[0]) * num);
348 			if (error != 0)
349 				goto mmap_out;
350 
351 			for (i = 0; i < num; i++)
352 				gpfns[i] = atop(umap->phys_base_addr +
353 				    (i + index) * PAGE_SIZE);
354 
355 			bzero(errs, sizeof(*errs) * num);
356 
357 			error = HYPERVISOR_memory_op(
358 			    XENMEM_add_to_physmap_batch, &add);
359 			if (error != 0) {
360 				error = xen_translate_error(error);
361 				goto mmap_out;
362 			}
363 
364 			for (i = 0; i < num; i++) {
365 				if (errs[i] != 0) {
366 					errs[i] = xen_translate_error(errs[i]);
367 
368 					/* Mark the page as invalid. */
369 					BIT_SET(mmap->num, index + i,
370 					    umap->err);
371 				}
372 			}
373 
374 			error = copyout(errs, &mmap->err[index],
375 			    sizeof(errs[0]) * num);
376 			if (error != 0)
377 				goto mmap_out;
378 		}
379 
380 		umap->mapped = true;
381 
382 mmap_out:
383 		free(idxs, M_PRIVCMD);
384 		free(gpfns, M_PRIVCMD);
385 		free(errs, M_PRIVCMD);
386 		if (!umap->mapped)
387 			free(umap->err, M_PRIVCMD);
388 
389 		break;
390 	}
391 	case IOCTL_PRIVCMD_MMAP_RESOURCE: {
392 		struct ioctl_privcmd_mmapresource *mmap;
393 		struct xen_mem_acquire_resource adq;
394 		xen_pfn_t *gpfns;
395 		struct privcmd_map *umap;
396 
397 		mmap = (struct ioctl_privcmd_mmapresource *)arg;
398 
399 		if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
400 			error = EPERM;
401 			break;
402 		}
403 
404 		bzero(&adq, sizeof(adq));
405 
406 		adq.domid = mmap->dom;
407 		adq.type = mmap->type;
408 		adq.id = mmap->id;
409 
410 		/* Shortcut for getting the resource size. */
411 		if (mmap->addr == 0 && mmap->num == 0) {
412 			error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
413 			    &adq);
414 			if (error != 0)
415 				error = xen_translate_error(error);
416 			else
417 				mmap->num = adq.nr_frames;
418 			break;
419 		}
420 
421 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
422 		if (umap == NULL) {
423 			error = EINVAL;
424 			break;
425 		}
426 
427 		adq.nr_frames = mmap->num;
428 		adq.frame = mmap->idx;
429 
430 		gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
431 		for (i = 0; i < mmap->num; i++)
432 			gpfns[i] = atop(umap->phys_base_addr) + i;
433 		set_xen_guest_handle(adq.frame_list, gpfns);
434 
435 		error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
436 		if (error != 0)
437 			error = xen_translate_error(error);
438 		else
439 			umap->mapped = true;
440 
441 		free(gpfns, M_PRIVCMD);
442 		if (!umap->mapped)
443 			free(umap->err, M_PRIVCMD);
444 
445 		break;
446 	}
447 	case IOCTL_PRIVCMD_DM_OP: {
448 		const struct ioctl_privcmd_dmop *dmop;
449 		struct privcmd_dmop_buf *bufs;
450 		struct xen_dm_op_buf *hbufs;
451 
452 		dmop = (struct ioctl_privcmd_dmop *)arg;
453 
454 		if (u->dom != DOMID_INVALID && u->dom != dmop->dom) {
455 			error = EPERM;
456 			break;
457 		}
458 
459 		if (dmop->num == 0)
460 			break;
461 
462 		if (dmop->num > MAX_DMOP_BUFFERS) {
463 			error = E2BIG;
464 			break;
465 		}
466 
467 		bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK);
468 
469 		error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num);
470 		if (error != 0) {
471 			free(bufs, M_PRIVCMD);
472 			break;
473 		}
474 
475 		hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK);
476 		for (i = 0; i < dmop->num; i++) {
477 			set_xen_guest_handle(hbufs[i].h, bufs[i].uptr);
478 			hbufs[i].size = bufs[i].size;
479 		}
480 
481 #ifdef __amd64__
482 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
483 			stac();
484 #endif
485 		error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs);
486 #ifdef __amd64__
487 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
488 			clac();
489 #endif
490 		if (error != 0)
491 			error = xen_translate_error(error);
492 
493 		free(bufs, M_PRIVCMD);
494 		free(hbufs, M_PRIVCMD);
495 
496 
497 		break;
498 	}
499 	case IOCTL_PRIVCMD_RESTRICT: {
500 		struct per_user_data *u;
501 		domid_t dom;
502 
503 		dom = *(domid_t *)arg;
504 
505 		error = devfs_get_cdevpriv((void **)&u);
506 		if (error != 0)
507 			break;
508 
509 		if (u->dom != DOMID_INVALID && u->dom != dom) {
510 			error = -EINVAL;
511 			break;
512 		}
513 		u->dom = dom;
514 
515 		break;
516 	}
517 	default:
518 		error = ENOSYS;
519 		break;
520 	}
521 
522 	return (error);
523 }
524 
525 static void
526 user_release(void *arg)
527 {
528 
529 	free(arg, M_PRIVCMD);
530 }
531 
532 static int
533 privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td)
534 {
535 	struct per_user_data *u;
536 	int error;
537 
538 	u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK);
539 	u->dom = DOMID_INVALID;
540 
541 	/* Assign the allocated per_user_data to this open instance. */
542 	error = devfs_set_cdevpriv(u, user_release);
543 	if (error != 0) {
544 		free(u, M_PRIVCMD);
545 	}
546 
547 	return (error);
548 }
549 
550 /*------------------ Private Device Attachment Functions  --------------------*/
551 static void
552 privcmd_identify(driver_t *driver, device_t parent)
553 {
554 
555 	KASSERT(xen_domain(),
556 	    ("Trying to attach privcmd device on non Xen domain"));
557 
558 	if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
559 		panic("unable to attach privcmd user-space device");
560 }
561 
562 static int
563 privcmd_probe(device_t dev)
564 {
565 
566 	privcmd_dev = dev;
567 	device_set_desc(dev, "Xen privileged interface user-space device");
568 	return (BUS_PROBE_NOWILDCARD);
569 }
570 
571 static int
572 privcmd_attach(device_t dev)
573 {
574 
575 	make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
576 	    GID_WHEEL, 0600, "xen/privcmd");
577 	return (0);
578 }
579 
580 /*-------------------- Private Device Attachment Data  -----------------------*/
581 static device_method_t privcmd_methods[] = {
582 	DEVMETHOD(device_identify,	privcmd_identify),
583 	DEVMETHOD(device_probe,		privcmd_probe),
584 	DEVMETHOD(device_attach,	privcmd_attach),
585 
586 	DEVMETHOD_END
587 };
588 
589 static driver_t privcmd_driver = {
590 	"privcmd",
591 	privcmd_methods,
592 	0,
593 };
594 
595 DRIVER_MODULE(privcmd, xenpv, privcmd_driver, 0, 0);
596 MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
597