xref: /freebsd/sys/dev/xen/privcmd/privcmd.c (revision 63d1fd5970ec814904aa0f4580b10a0d302d08b2)
1 /*
2  * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/uio.h>
33 #include <sys/bus.h>
34 #include <sys/malloc.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/rwlock.h>
39 #include <sys/selinfo.h>
40 #include <sys/poll.h>
41 #include <sys/conf.h>
42 #include <sys/fcntl.h>
43 #include <sys/ioccom.h>
44 #include <sys/rman.h>
45 #include <sys/tree.h>
46 #include <sys/module.h>
47 #include <sys/proc.h>
48 #include <sys/bitset.h>
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_pager.h>
58 #include <vm/vm_phys.h>
59 
60 #include <machine/md_var.h>
61 
62 #include <xen/xen-os.h>
63 #include <xen/hypervisor.h>
64 #include <xen/privcmd.h>
65 #include <xen/error.h>
66 
67 MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
68 
69 struct privcmd_map {
70 	vm_object_t mem;
71 	vm_size_t size;
72 	struct resource *pseudo_phys_res;
73 	int pseudo_phys_res_id;
74 	vm_paddr_t phys_base_addr;
75 	boolean_t mapped;
76 	BITSET_DEFINE_VAR() *err;
77 };
78 
79 static d_ioctl_t     privcmd_ioctl;
80 static d_mmap_single_t	privcmd_mmap_single;
81 
82 static struct cdevsw privcmd_devsw = {
83 	.d_version = D_VERSION,
84 	.d_ioctl = privcmd_ioctl,
85 	.d_mmap_single = privcmd_mmap_single,
86 	.d_name = "privcmd",
87 };
88 
89 static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
90     vm_ooffset_t foff, struct ucred *cred, u_short *color);
91 static void privcmd_pg_dtor(void *handle);
92 static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
93     int prot, vm_page_t *mres);
94 
95 static struct cdev_pager_ops privcmd_pg_ops = {
96 	.cdev_pg_fault = privcmd_pg_fault,
97 	.cdev_pg_ctor =	privcmd_pg_ctor,
98 	.cdev_pg_dtor =	privcmd_pg_dtor,
99 };
100 
101 static device_t privcmd_dev = NULL;
102 
103 /*------------------------- Privcmd Pager functions --------------------------*/
104 static int
105 privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
106     vm_ooffset_t foff, struct ucred *cred, u_short *color)
107 {
108 
109 	return (0);
110 }
111 
112 static void
113 privcmd_pg_dtor(void *handle)
114 {
115 	struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
116 	struct privcmd_map *map = handle;
117 	int error;
118 	vm_size_t i;
119 	vm_page_t m;
120 
121 	/*
122 	 * Remove the mappings from the used pages. This will remove the
123 	 * underlying p2m bindings in Xen second stage translation.
124 	 */
125 	if (map->mapped == true) {
126 		VM_OBJECT_WLOCK(map->mem);
127 retry:
128 		for (i = 0; i < map->size; i++) {
129 			m = vm_page_lookup(map->mem, i);
130 			if (m == NULL)
131 				continue;
132 			if (vm_page_sleep_if_busy(m, "pcmdum"))
133 				goto retry;
134 			cdev_pager_free_page(map->mem, m);
135 		}
136 		VM_OBJECT_WUNLOCK(map->mem);
137 
138 		for (i = 0; i < map->size; i++) {
139 			rm.gpfn = atop(map->phys_base_addr) + i;
140 			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
141 		}
142 		free(map->err, M_PRIVCMD);
143 	}
144 
145 	error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
146 	    map->pseudo_phys_res);
147 	KASSERT(error == 0, ("Unable to release memory resource: %d", error));
148 
149 	free(map, M_PRIVCMD);
150 }
151 
152 static int
153 privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
154     int prot, vm_page_t *mres)
155 {
156 	struct privcmd_map *map = object->handle;
157 	vm_pindex_t pidx;
158 	vm_page_t page, oldm;
159 
160 	if (map->mapped != true)
161 		return (VM_PAGER_FAIL);
162 
163 	pidx = OFF_TO_IDX(offset);
164 	if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
165 		return (VM_PAGER_FAIL);
166 
167 	page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
168 	if (page == NULL)
169 		return (VM_PAGER_FAIL);
170 
171 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
172 	    ("not fictitious %p", page));
173 	KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page));
174 	KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page));
175 
176 	if (*mres != NULL) {
177 		oldm = *mres;
178 		vm_page_lock(oldm);
179 		vm_page_free(oldm);
180 		vm_page_unlock(oldm);
181 		*mres = NULL;
182 	}
183 
184 	vm_page_insert(page, object, pidx);
185 	page->valid = VM_PAGE_BITS_ALL;
186 	vm_page_xbusy(page);
187 	*mres = page;
188 	return (VM_PAGER_OK);
189 }
190 
191 /*----------------------- Privcmd char device methods ------------------------*/
192 static int
193 privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
194     vm_object_t *object, int nprot)
195 {
196 	struct privcmd_map *map;
197 
198 	map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
199 
200 	map->size = OFF_TO_IDX(size);
201 	map->pseudo_phys_res_id = 0;
202 
203 	map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
204 	    &map->pseudo_phys_res_id, size);
205 	if (map->pseudo_phys_res == NULL) {
206 		free(map, M_PRIVCMD);
207 		return (ENOMEM);
208 	}
209 
210 	map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
211 	map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
212 	    size, nprot, *offset, NULL);
213 	if (map->mem == NULL) {
214 		xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
215 		    map->pseudo_phys_res);
216 		free(map, M_PRIVCMD);
217 		return (ENOMEM);
218 	}
219 
220 	*object = map->mem;
221 
222 	return (0);
223 }
224 
225 static int
226 privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
227 	      int mode, struct thread *td)
228 {
229 	int error, i;
230 
231 	switch (cmd) {
232 	case IOCTL_PRIVCMD_HYPERCALL: {
233 		struct ioctl_privcmd_hypercall *hcall;
234 
235 		hcall = (struct ioctl_privcmd_hypercall *)arg;
236 
237 		error = privcmd_hypercall(hcall->op, hcall->arg[0],
238 		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
239 		if (error >= 0) {
240 			hcall->retval = error;
241 			error = 0;
242 		} else {
243 			error = xen_translate_error(error);
244 			hcall->retval = 0;
245 		}
246 		break;
247 	}
248 	case IOCTL_PRIVCMD_MMAPBATCH: {
249 		struct ioctl_privcmd_mmapbatch *mmap;
250 		vm_map_t map;
251 		vm_map_entry_t entry;
252 		vm_object_t mem;
253 		vm_pindex_t pindex;
254 		vm_prot_t prot;
255 		boolean_t wired;
256 		struct xen_add_to_physmap_range add;
257 		xen_ulong_t *idxs;
258 		xen_pfn_t *gpfns;
259 		int *errs, index;
260 		struct privcmd_map *umap;
261 		uint16_t num;
262 
263 		mmap = (struct ioctl_privcmd_mmapbatch *)arg;
264 
265 		if ((mmap->num == 0) ||
266 		    ((mmap->addr & PAGE_MASK) != 0)) {
267 			error = EINVAL;
268 			break;
269 		}
270 
271 		map = &td->td_proc->p_vmspace->vm_map;
272 		error = vm_map_lookup(&map, mmap->addr, VM_PROT_NONE, &entry,
273 		    &mem, &pindex, &prot, &wired);
274 		if (error != KERN_SUCCESS) {
275 			error = EINVAL;
276 			break;
277 		}
278 		if ((entry->start != mmap->addr) ||
279 		    (entry->end != mmap->addr + (mmap->num * PAGE_SIZE))) {
280 			vm_map_lookup_done(map, entry);
281 			error = EINVAL;
282 			break;
283 		}
284 		vm_map_lookup_done(map, entry);
285 		if ((mem->type != OBJT_MGTDEVICE) ||
286 		    (mem->un_pager.devp.ops != &privcmd_pg_ops)) {
287 			error = EINVAL;
288 			break;
289 		}
290 		umap = mem->handle;
291 
292 		add.domid = DOMID_SELF;
293 		add.space = XENMAPSPACE_gmfn_foreign;
294 		add.foreign_domid = mmap->dom;
295 
296 		/*
297 		 * The 'size' field in the xen_add_to_physmap_range only
298 		 * allows for UINT16_MAX mappings in a single hypercall.
299 		 */
300 		num = MIN(mmap->num, UINT16_MAX);
301 
302 		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
303 		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
304 		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
305 
306 		set_xen_guest_handle(add.idxs, idxs);
307 		set_xen_guest_handle(add.gpfns, gpfns);
308 		set_xen_guest_handle(add.errs, errs);
309 
310 		/* Allocate a bitset to store broken page mappings. */
311 		umap->err = BITSET_ALLOC(mmap->num, M_PRIVCMD,
312 		    M_WAITOK | M_ZERO);
313 
314 		for (index = 0; index < mmap->num; index += num) {
315 			num = MIN(mmap->num - index, UINT16_MAX);
316 			add.size = num;
317 
318 			error = copyin(&mmap->arr[index], idxs,
319 			    sizeof(idxs[0]) * num);
320 			if (error != 0)
321 				goto mmap_out;
322 
323 			for (i = 0; i < num; i++)
324 				gpfns[i] = atop(umap->phys_base_addr +
325 				    (i + index) * PAGE_SIZE);
326 
327 			bzero(errs, sizeof(*errs) * num);
328 
329 			error = HYPERVISOR_memory_op(
330 			    XENMEM_add_to_physmap_range, &add);
331 			if (error != 0) {
332 				error = xen_translate_error(error);
333 				goto mmap_out;
334 			}
335 
336 			for (i = 0; i < num; i++) {
337 				if (errs[i] != 0) {
338 					errs[i] = xen_translate_error(errs[i]);
339 
340 					/* Mark the page as invalid. */
341 					BIT_SET(mmap->num, index + i,
342 					    umap->err);
343 				}
344 			}
345 
346 			error = copyout(errs, &mmap->err[index],
347 			    sizeof(errs[0]) * num);
348 			if (error != 0)
349 				goto mmap_out;
350 		}
351 
352 		umap->mapped = true;
353 
354 mmap_out:
355 		free(idxs, M_PRIVCMD);
356 		free(gpfns, M_PRIVCMD);
357 		free(errs, M_PRIVCMD);
358 		if (!umap->mapped)
359 			free(umap->err, M_PRIVCMD);
360 
361 		break;
362 	}
363 
364 	default:
365 		error = ENOSYS;
366 		break;
367 	}
368 
369 	return (error);
370 }
371 
372 /*------------------ Private Device Attachment Functions  --------------------*/
373 static void
374 privcmd_identify(driver_t *driver, device_t parent)
375 {
376 
377 	KASSERT(xen_domain(),
378 	    ("Trying to attach privcmd device on non Xen domain"));
379 
380 	if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
381 		panic("unable to attach privcmd user-space device");
382 }
383 
384 static int
385 privcmd_probe(device_t dev)
386 {
387 
388 	privcmd_dev = dev;
389 	device_set_desc(dev, "Xen privileged interface user-space device");
390 	return (BUS_PROBE_NOWILDCARD);
391 }
392 
393 static int
394 privcmd_attach(device_t dev)
395 {
396 
397 	make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
398 	    GID_WHEEL, 0600, "xen/privcmd");
399 	return (0);
400 }
401 
402 /*-------------------- Private Device Attachment Data  -----------------------*/
403 static device_method_t privcmd_methods[] = {
404 	DEVMETHOD(device_identify,	privcmd_identify),
405 	DEVMETHOD(device_probe,		privcmd_probe),
406 	DEVMETHOD(device_attach,	privcmd_attach),
407 
408 	DEVMETHOD_END
409 };
410 
411 static driver_t privcmd_driver = {
412 	"privcmd",
413 	privcmd_methods,
414 	0,
415 };
416 
417 devclass_t privcmd_devclass;
418 
419 DRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0);
420 MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
421