1 /*
2 * Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/uio.h>
30 #include <sys/bus.h>
31 #include <sys/malloc.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/rwlock.h>
36 #include <sys/selinfo.h>
37 #include <sys/poll.h>
38 #include <sys/conf.h>
39 #include <sys/fcntl.h>
40 #include <sys/ioccom.h>
41 #include <sys/rman.h>
42 #include <sys/tree.h>
43 #include <sys/module.h>
44 #include <sys/proc.h>
45 #include <sys/bitset.h>
46
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_extern.h>
50 #include <vm/vm_kern.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_pager.h>
55
56 #include <machine/md_var.h>
57
58 #include <xen/xen-os.h>
59 #include <xen/hypervisor.h>
60 #include <xen/privcmd.h>
61 #include <xen/error.h>
62
63 MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
64
65 #define MAX_DMOP_BUFFERS 16
66
67 struct privcmd_map {
68 vm_object_t mem;
69 vm_size_t size;
70 struct resource *pseudo_phys_res;
71 int pseudo_phys_res_id;
72 vm_paddr_t phys_base_addr;
73 boolean_t mapped;
74 BITSET_DEFINE_VAR() *err;
75 };
76
77 static d_ioctl_t privcmd_ioctl;
78 static d_open_t privcmd_open;
79 static d_mmap_single_t privcmd_mmap_single;
80
81 static struct cdevsw privcmd_devsw = {
82 .d_version = D_VERSION,
83 .d_ioctl = privcmd_ioctl,
84 .d_mmap_single = privcmd_mmap_single,
85 .d_open = privcmd_open,
86 .d_name = "privcmd",
87 };
88
89 static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
90 vm_ooffset_t foff, struct ucred *cred, u_short *color);
91 static void privcmd_pg_dtor(void *handle);
92 static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
93 int prot, vm_page_t *mres);
94
95 static struct cdev_pager_ops privcmd_pg_ops = {
96 .cdev_pg_fault = privcmd_pg_fault,
97 .cdev_pg_ctor = privcmd_pg_ctor,
98 .cdev_pg_dtor = privcmd_pg_dtor,
99 };
100
101 struct per_user_data {
102 domid_t dom;
103 };
104
105 static device_t privcmd_dev = NULL;
106
107 /*------------------------- Privcmd Pager functions --------------------------*/
108 static int
privcmd_pg_ctor(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * cred,u_short * color)109 privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
110 vm_ooffset_t foff, struct ucred *cred, u_short *color)
111 {
112
113 return (0);
114 }
115
116 static void
privcmd_pg_dtor(void * handle)117 privcmd_pg_dtor(void *handle)
118 {
119 struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
120 struct privcmd_map *map = handle;
121 int error __diagused;
122 vm_size_t i;
123
124 /*
125 * Remove the mappings from the used pages. This will remove the
126 * underlying p2m bindings in Xen second stage translation.
127 */
128 if (map->mapped == true) {
129 cdev_mgtdev_pager_free_pages(map->mem);
130 for (i = 0; i < map->size; i++) {
131 rm.gpfn = atop(map->phys_base_addr) + i;
132 HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
133 }
134 free(map->err, M_PRIVCMD);
135 }
136
137 error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
138 map->pseudo_phys_res);
139 KASSERT(error == 0, ("Unable to release memory resource: %d", error));
140
141 free(map, M_PRIVCMD);
142 }
143
144 static int
privcmd_pg_fault(vm_object_t object,vm_ooffset_t offset,int prot,vm_page_t * mres)145 privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
146 int prot, vm_page_t *mres)
147 {
148 struct privcmd_map *map = object->handle;
149 vm_pindex_t pidx;
150 vm_page_t page;
151
152 if (map->mapped != true)
153 return (VM_PAGER_FAIL);
154
155 pidx = OFF_TO_IDX(offset);
156 if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
157 return (VM_PAGER_FAIL);
158
159 page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
160 if (page == NULL)
161 return (VM_PAGER_FAIL);
162
163 KASSERT((page->flags & PG_FICTITIOUS) != 0,
164 ("not fictitious %p", page));
165 KASSERT(vm_page_wired(page), ("page %p not wired", page));
166 KASSERT(!vm_page_busied(page), ("page %p is busy", page));
167
168 vm_page_busy_acquire(page, 0);
169 vm_page_valid(page);
170
171 if (*mres != NULL)
172 vm_page_replace(page, object, pidx, *mres);
173 else
174 vm_page_insert(page, object, pidx);
175 *mres = page;
176 return (VM_PAGER_OK);
177 }
178
179 /*----------------------- Privcmd char device methods ------------------------*/
180 static int
privcmd_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t size,vm_object_t * object,int nprot)181 privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
182 vm_object_t *object, int nprot)
183 {
184 struct privcmd_map *map;
185
186 map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
187
188 map->size = OFF_TO_IDX(size);
189 map->pseudo_phys_res_id = 0;
190
191 map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
192 &map->pseudo_phys_res_id, size);
193 if (map->pseudo_phys_res == NULL) {
194 free(map, M_PRIVCMD);
195 return (ENOMEM);
196 }
197
198 map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
199 map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
200 size, nprot, *offset, NULL);
201 if (map->mem == NULL) {
202 xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
203 map->pseudo_phys_res);
204 free(map, M_PRIVCMD);
205 return (ENOMEM);
206 }
207
208 *object = map->mem;
209
210 return (0);
211 }
212
213 static struct privcmd_map *
setup_virtual_area(struct thread * td,unsigned long addr,unsigned long num)214 setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num)
215 {
216 vm_map_t map;
217 vm_map_entry_t entry;
218 vm_object_t mem;
219 vm_pindex_t pindex;
220 vm_prot_t prot;
221 boolean_t wired;
222 struct privcmd_map *umap;
223 int error;
224
225 if ((num == 0) || ((addr & PAGE_MASK) != 0))
226 return NULL;
227
228 map = &td->td_proc->p_vmspace->vm_map;
229 error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex,
230 &prot, &wired);
231 if (error != KERN_SUCCESS || (entry->start != addr) ||
232 (entry->end != addr + (num * PAGE_SIZE)))
233 return NULL;
234
235 vm_map_lookup_done(map, entry);
236 if ((mem->type != OBJT_MGTDEVICE) ||
237 (mem->un_pager.devp.ops != &privcmd_pg_ops))
238 return NULL;
239
240 umap = mem->handle;
241 /* Allocate a bitset to store broken page mappings. */
242 umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO);
243
244 return umap;
245 }
246
247 static int
privcmd_ioctl(struct cdev * dev,unsigned long cmd,caddr_t arg,int mode,struct thread * td)248 privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
249 int mode, struct thread *td)
250 {
251 int error;
252 unsigned int i;
253 void *data;
254 const struct per_user_data *u;
255
256 error = devfs_get_cdevpriv(&data);
257 if (error != 0)
258 return (EINVAL);
259 /*
260 * Constify user-data to prevent unintended changes to the restriction
261 * limits.
262 */
263 u = data;
264
265 switch (cmd) {
266 case IOCTL_PRIVCMD_HYPERCALL: {
267 struct ioctl_privcmd_hypercall *hcall;
268
269 hcall = (struct ioctl_privcmd_hypercall *)arg;
270
271 /* Forbid hypercalls if restricted. */
272 if (u->dom != DOMID_INVALID) {
273 error = EPERM;
274 break;
275 }
276
277 #ifdef __amd64__
278 /*
279 * The hypervisor page table walker will refuse to access
280 * user-space pages if SMAP is enabled, so temporary disable it
281 * while performing the hypercall.
282 */
283 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
284 stac();
285 #endif
286 error = privcmd_hypercall(hcall->op, hcall->arg[0],
287 hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
288 #ifdef __amd64__
289 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
290 clac();
291 #endif
292 if (error >= 0) {
293 hcall->retval = error;
294 error = 0;
295 } else {
296 error = xen_translate_error(error);
297 hcall->retval = 0;
298 }
299 break;
300 }
301 case IOCTL_PRIVCMD_MMAPBATCH: {
302 struct ioctl_privcmd_mmapbatch *mmap;
303 struct xen_add_to_physmap_batch add;
304 xen_ulong_t *idxs;
305 xen_pfn_t *gpfns;
306 int *errs;
307 unsigned int index;
308 struct privcmd_map *umap;
309 uint16_t num;
310
311 mmap = (struct ioctl_privcmd_mmapbatch *)arg;
312
313 if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
314 error = EPERM;
315 break;
316 }
317
318 umap = setup_virtual_area(td, mmap->addr, mmap->num);
319 if (umap == NULL) {
320 error = EINVAL;
321 break;
322 }
323
324 add.domid = DOMID_SELF;
325 add.space = XENMAPSPACE_gmfn_foreign;
326 add.u.foreign_domid = mmap->dom;
327
328 /*
329 * The 'size' field in the xen_add_to_physmap_range only
330 * allows for UINT16_MAX mappings in a single hypercall.
331 */
332 num = MIN(mmap->num, UINT16_MAX);
333
334 idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
335 gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
336 errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
337
338 set_xen_guest_handle(add.idxs, idxs);
339 set_xen_guest_handle(add.gpfns, gpfns);
340 set_xen_guest_handle(add.errs, errs);
341
342 for (index = 0; index < mmap->num; index += num) {
343 num = MIN(mmap->num - index, UINT16_MAX);
344 add.size = num;
345
346 error = copyin(&mmap->arr[index], idxs,
347 sizeof(idxs[0]) * num);
348 if (error != 0)
349 goto mmap_out;
350
351 for (i = 0; i < num; i++)
352 gpfns[i] = atop(umap->phys_base_addr +
353 (i + index) * PAGE_SIZE);
354
355 bzero(errs, sizeof(*errs) * num);
356
357 error = HYPERVISOR_memory_op(
358 XENMEM_add_to_physmap_batch, &add);
359 if (error != 0) {
360 error = xen_translate_error(error);
361 goto mmap_out;
362 }
363
364 for (i = 0; i < num; i++) {
365 if (errs[i] != 0) {
366 errs[i] = xen_translate_error(errs[i]);
367
368 /* Mark the page as invalid. */
369 BIT_SET(mmap->num, index + i,
370 umap->err);
371 }
372 }
373
374 error = copyout(errs, &mmap->err[index],
375 sizeof(errs[0]) * num);
376 if (error != 0)
377 goto mmap_out;
378 }
379
380 umap->mapped = true;
381
382 mmap_out:
383 free(idxs, M_PRIVCMD);
384 free(gpfns, M_PRIVCMD);
385 free(errs, M_PRIVCMD);
386 if (!umap->mapped)
387 free(umap->err, M_PRIVCMD);
388
389 break;
390 }
391 case IOCTL_PRIVCMD_MMAP_RESOURCE: {
392 struct ioctl_privcmd_mmapresource *mmap;
393 struct xen_mem_acquire_resource adq;
394 xen_pfn_t *gpfns;
395 struct privcmd_map *umap;
396
397 mmap = (struct ioctl_privcmd_mmapresource *)arg;
398
399 if (u->dom != DOMID_INVALID && u->dom != mmap->dom) {
400 error = EPERM;
401 break;
402 }
403
404 bzero(&adq, sizeof(adq));
405
406 adq.domid = mmap->dom;
407 adq.type = mmap->type;
408 adq.id = mmap->id;
409
410 /* Shortcut for getting the resource size. */
411 if (mmap->addr == 0 && mmap->num == 0) {
412 error = HYPERVISOR_memory_op(XENMEM_acquire_resource,
413 &adq);
414 if (error != 0)
415 error = xen_translate_error(error);
416 else
417 mmap->num = adq.nr_frames;
418 break;
419 }
420
421 umap = setup_virtual_area(td, mmap->addr, mmap->num);
422 if (umap == NULL) {
423 error = EINVAL;
424 break;
425 }
426
427 adq.nr_frames = mmap->num;
428 adq.frame = mmap->idx;
429
430 gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK);
431 for (i = 0; i < mmap->num; i++)
432 gpfns[i] = atop(umap->phys_base_addr) + i;
433 set_xen_guest_handle(adq.frame_list, gpfns);
434
435 error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq);
436 if (error != 0)
437 error = xen_translate_error(error);
438 else
439 umap->mapped = true;
440
441 free(gpfns, M_PRIVCMD);
442 if (!umap->mapped)
443 free(umap->err, M_PRIVCMD);
444
445 break;
446 }
447 case IOCTL_PRIVCMD_DM_OP: {
448 const struct ioctl_privcmd_dmop *dmop;
449 struct privcmd_dmop_buf *bufs;
450 struct xen_dm_op_buf *hbufs;
451
452 dmop = (struct ioctl_privcmd_dmop *)arg;
453
454 if (u->dom != DOMID_INVALID && u->dom != dmop->dom) {
455 error = EPERM;
456 break;
457 }
458
459 if (dmop->num == 0)
460 break;
461
462 if (dmop->num > MAX_DMOP_BUFFERS) {
463 error = E2BIG;
464 break;
465 }
466
467 bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK);
468
469 error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num);
470 if (error != 0) {
471 free(bufs, M_PRIVCMD);
472 break;
473 }
474
475 hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK);
476 for (i = 0; i < dmop->num; i++) {
477 set_xen_guest_handle(hbufs[i].h, bufs[i].uptr);
478 hbufs[i].size = bufs[i].size;
479 }
480
481 #ifdef __amd64__
482 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
483 stac();
484 #endif
485 error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs);
486 #ifdef __amd64__
487 if (cpu_stdext_feature & CPUID_STDEXT_SMAP)
488 clac();
489 #endif
490 if (error != 0)
491 error = xen_translate_error(error);
492
493 free(bufs, M_PRIVCMD);
494 free(hbufs, M_PRIVCMD);
495
496
497 break;
498 }
499 case IOCTL_PRIVCMD_RESTRICT: {
500 struct per_user_data *u;
501 domid_t dom;
502
503 dom = *(domid_t *)arg;
504
505 error = devfs_get_cdevpriv((void **)&u);
506 if (error != 0)
507 break;
508
509 if (u->dom != DOMID_INVALID && u->dom != dom) {
510 error = -EINVAL;
511 break;
512 }
513 u->dom = dom;
514
515 break;
516 }
517 default:
518 error = ENOSYS;
519 break;
520 }
521
522 return (error);
523 }
524
525 static void
user_release(void * arg)526 user_release(void *arg)
527 {
528
529 free(arg, M_PRIVCMD);
530 }
531
532 static int
privcmd_open(struct cdev * dev,int flag,int otyp,struct thread * td)533 privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td)
534 {
535 struct per_user_data *u;
536 int error;
537
538 u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK);
539 u->dom = DOMID_INVALID;
540
541 /* Assign the allocated per_user_data to this open instance. */
542 error = devfs_set_cdevpriv(u, user_release);
543 if (error != 0) {
544 free(u, M_PRIVCMD);
545 }
546
547 return (error);
548 }
549
550 /*------------------ Private Device Attachment Functions --------------------*/
551 static void
privcmd_identify(driver_t * driver,device_t parent)552 privcmd_identify(driver_t *driver, device_t parent)
553 {
554
555 KASSERT(xen_domain(),
556 ("Trying to attach privcmd device on non Xen domain"));
557
558 if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
559 panic("unable to attach privcmd user-space device");
560 }
561
562 static int
privcmd_probe(device_t dev)563 privcmd_probe(device_t dev)
564 {
565
566 privcmd_dev = dev;
567 device_set_desc(dev, "Xen privileged interface user-space device");
568 return (BUS_PROBE_NOWILDCARD);
569 }
570
571 static int
privcmd_attach(device_t dev)572 privcmd_attach(device_t dev)
573 {
574
575 make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
576 GID_WHEEL, 0600, "xen/privcmd");
577 return (0);
578 }
579
580 /*-------------------- Private Device Attachment Data -----------------------*/
581 static device_method_t privcmd_methods[] = {
582 DEVMETHOD(device_identify, privcmd_identify),
583 DEVMETHOD(device_probe, privcmd_probe),
584 DEVMETHOD(device_attach, privcmd_attach),
585
586 DEVMETHOD_END
587 };
588
589 static driver_t privcmd_driver = {
590 "privcmd",
591 privcmd_methods,
592 0,
593 };
594
595 DRIVER_MODULE(privcmd, xenpv, privcmd_driver, 0, 0);
596 MODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
597