xref: /freebsd/sys/dev/xen/privcmd/privcmd.c (revision 658860e2d07065b4203bb3e7779bee0512c65c92)
1 /*
2  * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/uio.h>
33 #include <sys/bus.h>
34 #include <sys/malloc.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/rwlock.h>
39 #include <sys/selinfo.h>
40 #include <sys/poll.h>
41 #include <sys/conf.h>
42 #include <sys/fcntl.h>
43 #include <sys/ioccom.h>
44 #include <sys/rman.h>
45 #include <sys/tree.h>
46 #include <sys/module.h>
47 #include <sys/proc.h>
48 #include <sys/bitset.h>
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_pager.h>
58 
59 #include <machine/md_var.h>
60 
61 #include <xen/xen-os.h>
62 #include <xen/hypervisor.h>
63 #include <xen/privcmd.h>
64 #include <xen/error.h>
65 
66 MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
67 
68 struct privcmd_map {
69 	vm_object_t mem;
70 	vm_size_t size;
71 	struct resource *pseudo_phys_res;
72 	int pseudo_phys_res_id;
73 	vm_paddr_t phys_base_addr;
74 	boolean_t mapped;
75 	BITSET_DEFINE_VAR() *err;
76 };
77 
78 static d_ioctl_t     privcmd_ioctl;
79 static d_mmap_single_t	privcmd_mmap_single;
80 
81 static struct cdevsw privcmd_devsw = {
82 	.d_version = D_VERSION,
83 	.d_ioctl = privcmd_ioctl,
84 	.d_mmap_single = privcmd_mmap_single,
85 	.d_name = "privcmd",
86 };
87 
88 static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
89     vm_ooffset_t foff, struct ucred *cred, u_short *color);
90 static void privcmd_pg_dtor(void *handle);
91 static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
92     int prot, vm_page_t *mres);
93 
94 static struct cdev_pager_ops privcmd_pg_ops = {
95 	.cdev_pg_fault = privcmd_pg_fault,
96 	.cdev_pg_ctor =	privcmd_pg_ctor,
97 	.cdev_pg_dtor =	privcmd_pg_dtor,
98 };
99 
100 static device_t privcmd_dev = NULL;
101 
102 /*------------------------- Privcmd Pager functions --------------------------*/
103 static int
104 privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
105     vm_ooffset_t foff, struct ucred *cred, u_short *color)
106 {
107 
108 	return (0);
109 }
110 
111 static void
112 privcmd_pg_dtor(void *handle)
113 {
114 	struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
115 	struct privcmd_map *map = handle;
116 	int error;
117 	vm_size_t i;
118 	vm_page_t m;
119 
120 	/*
121 	 * Remove the mappings from the used pages. This will remove the
122 	 * underlying p2m bindings in Xen second stage translation.
123 	 */
124 	if (map->mapped == true) {
125 		VM_OBJECT_WLOCK(map->mem);
126 retry:
127 		for (i = 0; i < map->size; i++) {
128 			m = vm_page_lookup(map->mem, i);
129 			if (m == NULL)
130 				continue;
131 			if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
132 				goto retry;
133 			cdev_pager_free_page(map->mem, m);
134 		}
135 		VM_OBJECT_WUNLOCK(map->mem);
136 
137 		for (i = 0; i < map->size; i++) {
138 			rm.gpfn = atop(map->phys_base_addr) + i;
139 			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
140 		}
141 		free(map->err, M_PRIVCMD);
142 	}
143 
144 	error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
145 	    map->pseudo_phys_res);
146 	KASSERT(error == 0, ("Unable to release memory resource: %d", error));
147 
148 	free(map, M_PRIVCMD);
149 }
150 
151 static int
152 privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
153     int prot, vm_page_t *mres)
154 {
155 	struct privcmd_map *map = object->handle;
156 	vm_pindex_t pidx;
157 	vm_page_t page;
158 
159 	if (map->mapped != true)
160 		return (VM_PAGER_FAIL);
161 
162 	pidx = OFF_TO_IDX(offset);
163 	if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
164 		return (VM_PAGER_FAIL);
165 
166 	page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
167 	if (page == NULL)
168 		return (VM_PAGER_FAIL);
169 
170 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
171 	    ("not fictitious %p", page));
172 	KASSERT(vm_page_wired(page), ("page %p not wired", page));
173 	KASSERT(!vm_page_busied(page), ("page %p is busy", page));
174 
175 	vm_page_busy_acquire(page, 0);
176 	vm_page_valid(page);
177 
178 	if (*mres != NULL)
179 		vm_page_replace(page, object, pidx, *mres);
180 	else
181 		vm_page_insert(page, object, pidx);
182 	*mres = page;
183 	return (VM_PAGER_OK);
184 }
185 
186 /*----------------------- Privcmd char device methods ------------------------*/
187 static int
188 privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
189     vm_object_t *object, int nprot)
190 {
191 	struct privcmd_map *map;
192 
193 	map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
194 
195 	map->size = OFF_TO_IDX(size);
196 	map->pseudo_phys_res_id = 0;
197 
198 	map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
199 	    &map->pseudo_phys_res_id, size);
200 	if (map->pseudo_phys_res == NULL) {
201 		free(map, M_PRIVCMD);
202 		return (ENOMEM);
203 	}
204 
205 	map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
206 	map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
207 	    size, nprot, *offset, NULL);
208 	if (map->mem == NULL) {
209 		xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
210 		    map->pseudo_phys_res);
211 		free(map, M_PRIVCMD);
212 		return (ENOMEM);
213 	}
214 
215 	*object = map->mem;
216 
217 	return (0);
218 }
219 
220 static struct privcmd_map *
221 setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
222 {
223 	vm_map_t map;
224 	vm_map_entry_t entry;
225 	vm_object_t mem;
226 	vm_pindex_t pindex;
227 	vm_prot_t prot;
228 	boolean_t wired;
229 	struct privcmd_map *umap;
230 	int error;
231 
232 	if ((num == 0) || ((addr & PAGE_MASK) != 0))
233 		return NULL;
234 
235 	map = &td->td_proc->p_vmspace->vm_map;
236 	error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
237 	    &prot, &wired);
238 	if (error != KERN_SUCCESS || (entry->start != addr) ||
239 	    (entry->end != addr + (num * PAGE_SIZE)))
240 		return NULL;
241 
242 	vm_map_lookup_done(map, entry);
243 	if ((mem->type != OBJT_MGTDEVICE) ||
244 	    (mem->un_pager.devp.ops != &privcmd_pg_ops))
245 		return NULL;
246 
247 	umap = mem->handle;
248 	/* Allocate a bitset to store broken page mappings. */
249 	umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
250 
251 	return umap;
252 }
253 
254 static int
255 privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
256 	      int mode, struct thread *td)
257 {
258 	int error;
259 	unsigned int i;
260 
261 	switch (cmd) {
262 	case IOCTL_PRIVCMD_HYPERCALL: {
263 		struct ioctl_privcmd_hypercall *hcall;
264 
265 		hcall = (struct ioctl_privcmd_hypercall *)arg;
266 #ifdef __amd64__
267 		/*
268 		 * The hypervisor page table walker will refuse to access
269 		 * user-space pages if SMAP is enabled, so temporary disable it
270 		 * while performing the hypercall.
271 		 */
272 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
273 			stac();
274 #endif
275 		error = privcmd_hypercall(hcall->op, hcall->arg[0],
276 		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
277 #ifdef __amd64__
278 		if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
279 			clac();
280 #endif
281 		if (error >= 0) {
282 			hcall->retval = error;
283 			error = 0;
284 		} else {
285 			error = xen_translate_error(error);
286 			hcall->retval = 0;
287 		}
288 		break;
289 	}
290 	case IOCTL_PRIVCMD_MMAPBATCH: {
291 		struct ioctl_privcmd_mmapbatch *mmap;
292 		struct xen_add_to_physmap_range add;
293 		xen_ulong_t *idxs;
294 		xen_pfn_t *gpfns;
295 		int *errs;
296 		unsigned int index;
297 		struct privcmd_map *umap;
298 		uint16_t num;
299 
300 		mmap = (struct ioctl_privcmd_mmapbatch *)arg;
301 
302 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
303 		if (umap == NULL) {
304 			error = EINVAL;
305 			break;
306 		}
307 
308 		add.domid = DOMID_SELF;
309 		add.space = XENMAPSPACE_gmfn_foreign;
310 		add.foreign_domid = mmap->dom;
311 
312 		/*
313 		 * The 'size' field in the xen_add_to_physmap_range only
314 		 * allows for UINT16_MAX mappings in a single hypercall.
315 		 */
316 		num = MIN(mmap->num, UINT16_MAX);
317 
318 		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
319 		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
320 		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
321 
322 		set_xen_guest_handle(add.idxs, idxs);
323 		set_xen_guest_handle(add.gpfns, gpfns);
324 		set_xen_guest_handle(add.errs, errs);
325 
326 		for (index = 0; index < mmap->num; index += num) {
327 			num = MIN(mmap->num - index, UINT16_MAX);
328 			add.size = num;
329 
330 			error = copyin(&mmap->arr[index], idxs,
331 			    sizeof(idxs[0]) * num);
332 			if (error != 0)
333 				goto mmap_out;
334 
335 			for (i = 0; i < num; i++)
336 				gpfns[i] = atop(umap->phys_base_addr +
337 				    (i + index) * PAGE_SIZE);
338 
339 			bzero(errs, sizeof(*errs) * num);
340 
341 			error = HYPERVISOR_memory_op(
342 			    XENMEM_add_to_physmap_range, &add);
343 			if (error != 0) {
344 				error = xen_translate_error(error);
345 				goto mmap_out;
346 			}
347 
348 			for (i = 0; i < num; i++) {
349 				if (errs[i] != 0) {
350 					errs[i] = xen_translate_error(errs[i]);
351 
352 					/* Mark the page as invalid. */
353 					BIT_SET(mmap->num, index + i,
354 					    umap->err);
355 				}
356 			}
357 
358 			error = copyout(errs, &mmap->err[index],
359 			    sizeof(errs[0]) * num);
360 			if (error != 0)
361 				goto mmap_out;
362 		}
363 
364 		umap->mapped = true;
365 
366 mmap_out:
367 		free(idxs, M_PRIVCMD);
368 		free(gpfns, M_PRIVCMD);
369 		free(errs, M_PRIVCMD);
370 		if (!umap->mapped)
371 			free(umap->err, M_PRIVCMD);
372 
373 		break;
374 	}
375 	case IOCTL_PRIVCMD_MMAP_RESOURCE: {
376 		struct ioctl_privcmd_mmapresource *mmap;
377 		struct xen_mem_acquire_resource adq;
378 		xen_pfn_t *gpfns;
379 		struct privcmd_map *umap;
380 
381 		mmap = (struct ioctl_privcmd_mmapresource *)arg;
382 
383 		bzero(&adq, sizeof(adq));
384 
385 		adq.domid = mmap->dom;
386 		adq.type = mmap->type;
387 		adq.id = mmap->id;
388 
389 		/* Shortcut for getting the resource size. */
390 		if (mmap->addr == 0 && mmap->num == 0) {
391 			error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
392 			    &adq);
393 			if (error != 0) {
394 				error = xen_translate_error(error);
395 				break;
396 			}
397 			error = copyout(&adq.nr_frames, &mmap->num,
398 			    sizeof(mmap->num));
399 			break;
400 		}
401 
402 		umap = setup_virtual_area(td, mmap->addr, mmap->num);
403 		if (umap == NULL) {
404 			error = EINVAL;
405 			break;
406 		}
407 
408 		adq.nr_frames = mmap->num;
409 		adq.frame = mmap->idx;
410 
411 		gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
412 		for (i = 0; i < mmap->num; i++)
413 			gpfns[i] = atop(umap->phys_base_addr) + i;
414 		set_xen_guest_handle(adq.frame_list, gpfns);
415 
416 		error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
417 		if (error != 0)
418 			error = xen_translate_error(error);
419 		else
420 			umap->mapped = true;
421 
422 		free(gpfns, M_PRIVCMD);
423 		if (!umap->mapped)
424 			free(umap->err, M_PRIVCMD);
425 
426 		break;
427 	}
428 	default:
429 		error = ENOSYS;
430 		break;
431 	}
432 
433 	return (error);
434 }
435 
436 /*------------------ Private Device Attachment Functions  --------------------*/
437 static void
438 privcmd_identify(driver_t *driver, device_t parent)
439 {
440 
441 	KASSERT(xen_domain(),
442 	    ("Trying to attach privcmd device on non Xen domain"));
443 
444 	if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
445 		panic("unable to attach privcmd user-space device");
446 }
447 
448 static int
449 privcmd_probe(device_t dev)
450 {
451 
452 	privcmd_dev = dev;
453 	device_set_desc(dev, "Xen privileged interface user-space device");
454 	return (BUS_PROBE_NOWILDCARD);
455 }
456 
457 static int
458 privcmd_attach(device_t dev)
459 {
460 
461 	make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
462 	    GID_WHEEL, 0600, "xen/privcmd");
463 	return (0);
464 }
465 
466 /*-------------------- Private Device Attachment Data  -----------------------*/
467 static device_method_t privcmd_methods[] = {
468 	DEVMETHOD(device_identify,	privcmd_identify),
469 	DEVMETHOD(device_probe,		privcmd_probe),
470 	DEVMETHOD(device_attach,	privcmd_attach),
471 
472 	DEVMETHOD_END
473 };
474 
475 static driver_t privcmd_driver = {
476 	"privcmd",
477 	privcmd_methods,
478 	0,
479 };
480 
481 devclass_t privcmd_devclass;
482 
483 DRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0);
484 MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
485