1843e1988Sjohnlev /* 2843e1988Sjohnlev * CDDL HEADER START 3843e1988Sjohnlev * 4843e1988Sjohnlev * The contents of this file are subject to the terms of the 5843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6843e1988Sjohnlev * You may not use this file except in compliance with the License. 7843e1988Sjohnlev * 8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10843e1988Sjohnlev * See the License for the specific language governing permissions 11843e1988Sjohnlev * and limitations under the License. 12843e1988Sjohnlev * 13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18843e1988Sjohnlev * 19843e1988Sjohnlev * CDDL HEADER END 20843e1988Sjohnlev */ 21843e1988Sjohnlev 22843e1988Sjohnlev /* 23*a576ab5bSrab * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24843e1988Sjohnlev * Use is subject to license terms. 25843e1988Sjohnlev */ 26843e1988Sjohnlev 27843e1988Sjohnlev #pragma ident "%Z%%M% %I% %E% SMI" 28843e1988Sjohnlev 29843e1988Sjohnlev #include <sys/types.h> 30843e1988Sjohnlev #include <sys/file.h> 31843e1988Sjohnlev #include <sys/errno.h> 32843e1988Sjohnlev #include <sys/open.h> 33843e1988Sjohnlev #include <sys/cred.h> 34843e1988Sjohnlev #include <sys/conf.h> 35843e1988Sjohnlev #include <sys/stat.h> 36843e1988Sjohnlev #include <sys/modctl.h> 37843e1988Sjohnlev #include <sys/ddi.h> 38843e1988Sjohnlev #include <sys/sunddi.h> 39843e1988Sjohnlev #include <sys/vmsystm.h> 40843e1988Sjohnlev #include <sys/sdt.h> 41843e1988Sjohnlev #include <sys/hypervisor.h> 42843e1988Sjohnlev #include <sys/xen_errno.h> 43843e1988Sjohnlev 44843e1988Sjohnlev #include <vm/hat_i86.h> 45843e1988Sjohnlev #include <vm/hat_pte.h> 46843e1988Sjohnlev #include <vm/seg_mf.h> 47843e1988Sjohnlev 48843e1988Sjohnlev #include <xen/sys/privcmd.h> 49843e1988Sjohnlev #include <sys/privcmd_impl.h> 50843e1988Sjohnlev 51843e1988Sjohnlev static dev_info_t *privcmd_devi; 52843e1988Sjohnlev 53843e1988Sjohnlev /*ARGSUSED*/ 54843e1988Sjohnlev static int 55843e1988Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 56843e1988Sjohnlev { 57843e1988Sjohnlev switch (cmd) { 58843e1988Sjohnlev case DDI_INFO_DEVT2DEVINFO: 59843e1988Sjohnlev case DDI_INFO_DEVT2INSTANCE: 60843e1988Sjohnlev break; 61843e1988Sjohnlev default: 62843e1988Sjohnlev return (DDI_FAILURE); 63843e1988Sjohnlev } 64843e1988Sjohnlev 65843e1988Sjohnlev switch (getminor((dev_t)arg)) { 66843e1988Sjohnlev case PRIVCMD_MINOR: 67843e1988Sjohnlev break; 68843e1988Sjohnlev default: 69843e1988Sjohnlev return (DDI_FAILURE); 70843e1988Sjohnlev } 71843e1988Sjohnlev 72843e1988Sjohnlev if (cmd == DDI_INFO_DEVT2INSTANCE) 73843e1988Sjohnlev *result = 0; 74843e1988Sjohnlev else 75843e1988Sjohnlev *result = privcmd_devi; 76843e1988Sjohnlev return (DDI_SUCCESS); 77843e1988Sjohnlev } 78843e1988Sjohnlev 79843e1988Sjohnlev static int 80843e1988Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 81843e1988Sjohnlev { 82843e1988Sjohnlev if (cmd != DDI_ATTACH) 83843e1988Sjohnlev return (DDI_FAILURE); 84843e1988Sjohnlev 85843e1988Sjohnlev if (ddi_create_minor_node(devi, PRIVCMD_NODE, 86843e1988Sjohnlev S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) 87843e1988Sjohnlev return (DDI_FAILURE); 88843e1988Sjohnlev 89843e1988Sjohnlev privcmd_devi = devi; 90843e1988Sjohnlev ddi_report_dev(devi); 91843e1988Sjohnlev return (DDI_SUCCESS); 92843e1988Sjohnlev } 93843e1988Sjohnlev 94843e1988Sjohnlev static int 95843e1988Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 96843e1988Sjohnlev { 97843e1988Sjohnlev if (cmd != DDI_DETACH) 98843e1988Sjohnlev return (DDI_FAILURE); 99843e1988Sjohnlev ddi_remove_minor_node(devi, NULL); 100843e1988Sjohnlev privcmd_devi = NULL; 101843e1988Sjohnlev return (DDI_SUCCESS); 102843e1988Sjohnlev } 103843e1988Sjohnlev 104843e1988Sjohnlev /*ARGSUSED1*/ 105843e1988Sjohnlev static int 106843e1988Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr) 107843e1988Sjohnlev { 108843e1988Sjohnlev return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO); 109843e1988Sjohnlev } 110843e1988Sjohnlev 111843e1988Sjohnlev /* 112843e1988Sjohnlev * Map a contiguous set of machine frames in a foreign domain. 113843e1988Sjohnlev * Used in the following way: 114843e1988Sjohnlev * 115843e1988Sjohnlev * privcmd_mmap_t p; 116843e1988Sjohnlev * privcmd_mmap_entry_t e; 117843e1988Sjohnlev * 118843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 119843e1988Sjohnlev * p.num = number of privcmd_mmap_entry_t's 120843e1988Sjohnlev * p.dom = domid; 121843e1988Sjohnlev * p.entry = &e; 122843e1988Sjohnlev * e.va = addr; 123843e1988Sjohnlev * e.mfn = mfn; 124843e1988Sjohnlev * e.npages = btopr(size); 125843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p); 126843e1988Sjohnlev */ 127843e1988Sjohnlev /*ARGSUSED2*/ 128843e1988Sjohnlev int 129843e1988Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr) 130843e1988Sjohnlev { 131843e1988Sjohnlev privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd; 132843e1988Sjohnlev privcmd_mmap_entry_t *umme; 133843e1988Sjohnlev struct as *as = curproc->p_as; 134843e1988Sjohnlev struct seg *seg; 135843e1988Sjohnlev int i, error = 0; 136843e1988Sjohnlev 137843e1988Sjohnlev if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode)) 138843e1988Sjohnlev return (EFAULT); 139843e1988Sjohnlev 140843e1988Sjohnlev DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num, 141843e1988Sjohnlev privcmd_mmap_entry_t *, mmc->entry); 142843e1988Sjohnlev 143843e1988Sjohnlev if (mmc->dom == DOMID_SELF) { 144843e1988Sjohnlev error = ENOTSUP; /* Too paranoid? */ 145843e1988Sjohnlev goto done; 146843e1988Sjohnlev } 147843e1988Sjohnlev 148843e1988Sjohnlev for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) { 149843e1988Sjohnlev privcmd_mmap_entry_t __mmapent, *mme = &__mmapent; 150843e1988Sjohnlev caddr_t addr; 151843e1988Sjohnlev 152843e1988Sjohnlev if (ddi_copyin(umme, mme, sizeof (*mme), mode)) { 153843e1988Sjohnlev error = EFAULT; 154843e1988Sjohnlev break; 155843e1988Sjohnlev } 156843e1988Sjohnlev 157843e1988Sjohnlev DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn, 158843e1988Sjohnlev ulong_t, mme->npages); 159843e1988Sjohnlev 160843e1988Sjohnlev if (mme->mfn == MFN_INVALID) { 161843e1988Sjohnlev error = EINVAL; 162843e1988Sjohnlev break; 163843e1988Sjohnlev } 164843e1988Sjohnlev 165843e1988Sjohnlev addr = (caddr_t)mme->va; 166843e1988Sjohnlev 167843e1988Sjohnlev /* 168843e1988Sjohnlev * Find the segment we want to mess with, then add 169843e1988Sjohnlev * the mfn range to the segment. 170843e1988Sjohnlev */ 171843e1988Sjohnlev AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 172843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 173843e1988Sjohnlev addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size) 174843e1988Sjohnlev error = EINVAL; 175843e1988Sjohnlev else 176843e1988Sjohnlev error = segmf_add_mfns(seg, addr, 177843e1988Sjohnlev mme->mfn, mme->npages, mmc->dom); 178843e1988Sjohnlev AS_LOCK_EXIT(as, &as->a_lock); 179843e1988Sjohnlev 180843e1988Sjohnlev if (error != 0) 181843e1988Sjohnlev break; 182843e1988Sjohnlev } 183843e1988Sjohnlev 184843e1988Sjohnlev done: 185843e1988Sjohnlev DTRACE_XPV1(mmap__end, int, error); 186843e1988Sjohnlev 187843e1988Sjohnlev return (error); 188843e1988Sjohnlev } 189843e1988Sjohnlev 190843e1988Sjohnlev /* 191843e1988Sjohnlev * Set up the address range to map to an array of mfns in 192843e1988Sjohnlev * a foreign domain. Used in the following way: 193843e1988Sjohnlev * 194843e1988Sjohnlev * privcmd_mmap_batch_t p; 195843e1988Sjohnlev * 196843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 197843e1988Sjohnlev * p.num = number of pages 198843e1988Sjohnlev * p.dom = domid 199843e1988Sjohnlev * p.addr = addr; 200843e1988Sjohnlev * p.arr = array of mfns, indexed 0 .. p.num - 1 201843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p); 202843e1988Sjohnlev */ 203843e1988Sjohnlev /*ARGSUSED2*/ 204843e1988Sjohnlev static int 205843e1988Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) 206843e1988Sjohnlev { 207843e1988Sjohnlev privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch; 208843e1988Sjohnlev struct as *as = curproc->p_as; 209843e1988Sjohnlev struct seg *seg; 210843e1988Sjohnlev int i, error = 0; 211843e1988Sjohnlev caddr_t addr; 212843e1988Sjohnlev ulong_t *ulp; 213843e1988Sjohnlev 214843e1988Sjohnlev if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode)) 215843e1988Sjohnlev return (EFAULT); 216843e1988Sjohnlev 217843e1988Sjohnlev DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num, 218843e1988Sjohnlev caddr_t, mmb->addr); 219843e1988Sjohnlev 220843e1988Sjohnlev addr = (caddr_t)mmb->addr; 221843e1988Sjohnlev AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 222843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 223843e1988Sjohnlev addr + ptob(mmb->num) > seg->s_base + seg->s_size) { 224843e1988Sjohnlev error = EINVAL; 225843e1988Sjohnlev goto done; 226843e1988Sjohnlev } 227843e1988Sjohnlev 228843e1988Sjohnlev for (i = 0, ulp = mmb->arr; 229843e1988Sjohnlev i < mmb->num; i++, addr += PAGESIZE, ulp++) { 230843e1988Sjohnlev mfn_t mfn; 231843e1988Sjohnlev 232843e1988Sjohnlev if (fulword(ulp, &mfn) != 0) { 233843e1988Sjohnlev error = EFAULT; 234843e1988Sjohnlev break; 235843e1988Sjohnlev } 236843e1988Sjohnlev 237843e1988Sjohnlev if (mfn == MFN_INVALID) { 238*a576ab5bSrab /* 239*a576ab5bSrab * This mfn is invalid and should not be added to 240*a576ab5bSrab * segmf, as we'd only cause an immediate EFAULT when 241*a576ab5bSrab * we tried to fault it in. 242*a576ab5bSrab */ 243*a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB; 244*a576ab5bSrab continue; 245843e1988Sjohnlev } 246843e1988Sjohnlev 247843e1988Sjohnlev if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0) 248843e1988Sjohnlev continue; 249843e1988Sjohnlev 250843e1988Sjohnlev /* 251843e1988Sjohnlev * Tell the process that this MFN could not be mapped, so it 252843e1988Sjohnlev * won't later try to access it. 253843e1988Sjohnlev */ 254*a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB; 255843e1988Sjohnlev if (sulword(ulp, mfn) != 0) { 256843e1988Sjohnlev error = EFAULT; 257843e1988Sjohnlev break; 258843e1988Sjohnlev } 259843e1988Sjohnlev } 260843e1988Sjohnlev 261843e1988Sjohnlev done: 262843e1988Sjohnlev AS_LOCK_EXIT(as, &as->a_lock); 263843e1988Sjohnlev 264843e1988Sjohnlev DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t, 265843e1988Sjohnlev mmb->addr); 266843e1988Sjohnlev 267843e1988Sjohnlev return (error); 268843e1988Sjohnlev } 269843e1988Sjohnlev 270843e1988Sjohnlev /*ARGSUSED*/ 271843e1988Sjohnlev static int 272843e1988Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval) 273843e1988Sjohnlev { 274843e1988Sjohnlev if ((mode & FMODELS) != FNATIVE) 275843e1988Sjohnlev return (EOVERFLOW); 276843e1988Sjohnlev 277843e1988Sjohnlev /* 278843e1988Sjohnlev * Everything is a -native- data type. 279843e1988Sjohnlev */ 280843e1988Sjohnlev 281843e1988Sjohnlev switch (cmd) { 282843e1988Sjohnlev case IOCTL_PRIVCMD_HYPERCALL: 283843e1988Sjohnlev return (do_privcmd_hypercall((void *)arg, mode, cr, rval)); 284843e1988Sjohnlev case IOCTL_PRIVCMD_MMAP: 285843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 286843e1988Sjohnlev return (do_privcmd_mmap((void *)arg, mode, cr)); 287843e1988Sjohnlev break; 288843e1988Sjohnlev case IOCTL_PRIVCMD_MMAPBATCH: 289843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 290843e1988Sjohnlev return (do_privcmd_mmapbatch((void *)arg, mode, cr)); 291843e1988Sjohnlev break; 292843e1988Sjohnlev default: 293843e1988Sjohnlev break; 294843e1988Sjohnlev } 295843e1988Sjohnlev return (EINVAL); 296843e1988Sjohnlev } 297843e1988Sjohnlev 298843e1988Sjohnlev /* 299843e1988Sjohnlev * The real magic happens in the segmf segment driver. 300843e1988Sjohnlev */ 301843e1988Sjohnlev /*ARGSUSED8*/ 302843e1988Sjohnlev static int 303843e1988Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, 304843e1988Sjohnlev off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr) 305843e1988Sjohnlev { 306843e1988Sjohnlev struct segmf_crargs a; 307843e1988Sjohnlev int error; 308843e1988Sjohnlev 309843e1988Sjohnlev as_rangelock(as); 310843e1988Sjohnlev if ((flags & MAP_FIXED) == 0) { 311843e1988Sjohnlev map_addr(addrp, len, (offset_t)off, 0, flags); 312843e1988Sjohnlev if (*addrp == NULL) { 313843e1988Sjohnlev error = ENOMEM; 314843e1988Sjohnlev goto rangeunlock; 315843e1988Sjohnlev } 316843e1988Sjohnlev } else { 317843e1988Sjohnlev /* 318843e1988Sjohnlev * User specified address 319843e1988Sjohnlev */ 320843e1988Sjohnlev (void) as_unmap(as, *addrp, len); 321843e1988Sjohnlev } 322843e1988Sjohnlev 323843e1988Sjohnlev /* 324843e1988Sjohnlev * The mapping *must* be MAP_SHARED at offset 0. 325843e1988Sjohnlev * 326843e1988Sjohnlev * (Foreign pages are treated like device memory; the 327843e1988Sjohnlev * ioctl interface allows the backing objects to be 328843e1988Sjohnlev * arbitrarily redefined to point at any machine frame.) 329843e1988Sjohnlev */ 330843e1988Sjohnlev if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) { 331843e1988Sjohnlev error = EINVAL; 332843e1988Sjohnlev goto rangeunlock; 333843e1988Sjohnlev } 334843e1988Sjohnlev 335843e1988Sjohnlev a.dev = dev; 336843e1988Sjohnlev a.prot = (uchar_t)prot; 337843e1988Sjohnlev a.maxprot = (uchar_t)maxprot; 338843e1988Sjohnlev error = as_map(as, *addrp, len, segmf_create, &a); 339843e1988Sjohnlev 340843e1988Sjohnlev rangeunlock: 341843e1988Sjohnlev as_rangeunlock(as); 342843e1988Sjohnlev return (error); 343843e1988Sjohnlev } 344843e1988Sjohnlev 345843e1988Sjohnlev static struct cb_ops privcmd_cb_ops = { 346843e1988Sjohnlev privcmd_open, 347843e1988Sjohnlev nulldev, /* close */ 348843e1988Sjohnlev nodev, /* strategy */ 349843e1988Sjohnlev nodev, /* print */ 350843e1988Sjohnlev nodev, /* dump */ 351843e1988Sjohnlev nodev, /* read */ 352843e1988Sjohnlev nodev, /* write */ 353843e1988Sjohnlev privcmd_ioctl, 354843e1988Sjohnlev nodev, /* devmap */ 355843e1988Sjohnlev nodev, /* mmap */ 356843e1988Sjohnlev privcmd_segmap, 357843e1988Sjohnlev nochpoll, /* poll */ 358843e1988Sjohnlev ddi_prop_op, 359843e1988Sjohnlev NULL, 360843e1988Sjohnlev D_64BIT | D_NEW | D_MP 361843e1988Sjohnlev }; 362843e1988Sjohnlev 363843e1988Sjohnlev static struct dev_ops privcmd_dv_ops = { 364843e1988Sjohnlev DEVO_REV, 365843e1988Sjohnlev 0, 366843e1988Sjohnlev privcmd_getinfo, 367843e1988Sjohnlev nulldev, /* identify */ 368843e1988Sjohnlev nulldev, /* probe */ 369843e1988Sjohnlev privcmd_attach, 370843e1988Sjohnlev privcmd_detach, 371843e1988Sjohnlev nodev, /* reset */ 372843e1988Sjohnlev &privcmd_cb_ops, 373843e1988Sjohnlev 0 /* struct bus_ops */ 374843e1988Sjohnlev }; 375843e1988Sjohnlev 376843e1988Sjohnlev static struct modldrv modldrv = { 377843e1988Sjohnlev &mod_driverops, 378843e1988Sjohnlev "privcmd driver %I%", 379843e1988Sjohnlev &privcmd_dv_ops 380843e1988Sjohnlev }; 381843e1988Sjohnlev 382843e1988Sjohnlev static struct modlinkage modl = { 383843e1988Sjohnlev MODREV_1, 384843e1988Sjohnlev &modldrv 385843e1988Sjohnlev }; 386843e1988Sjohnlev 387843e1988Sjohnlev int 388843e1988Sjohnlev _init(void) 389843e1988Sjohnlev { 390843e1988Sjohnlev return (mod_install(&modl)); 391843e1988Sjohnlev } 392843e1988Sjohnlev 393843e1988Sjohnlev int 394843e1988Sjohnlev _fini(void) 395843e1988Sjohnlev { 396843e1988Sjohnlev return (mod_remove(&modl)); 397843e1988Sjohnlev } 398843e1988Sjohnlev 399843e1988Sjohnlev int 400843e1988Sjohnlev _info(struct modinfo *modinfo) 401843e1988Sjohnlev { 402843e1988Sjohnlev return (mod_info(&modl, modinfo)); 403843e1988Sjohnlev } 404