1843e1988Sjohnlev /* 2843e1988Sjohnlev * CDDL HEADER START 3843e1988Sjohnlev * 4843e1988Sjohnlev * The contents of this file are subject to the terms of the 5843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6843e1988Sjohnlev * You may not use this file except in compliance with the License. 7843e1988Sjohnlev * 8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10843e1988Sjohnlev * See the License for the specific language governing permissions 11843e1988Sjohnlev * and limitations under the License. 12843e1988Sjohnlev * 13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18843e1988Sjohnlev * 19843e1988Sjohnlev * CDDL HEADER END 20843e1988Sjohnlev */ 21843e1988Sjohnlev 22843e1988Sjohnlev /* 23a576ab5bSrab * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24843e1988Sjohnlev * Use is subject to license terms. 25843e1988Sjohnlev */ 26843e1988Sjohnlev 27843e1988Sjohnlev #include <sys/types.h> 28843e1988Sjohnlev #include <sys/file.h> 29843e1988Sjohnlev #include <sys/errno.h> 30843e1988Sjohnlev #include <sys/open.h> 31843e1988Sjohnlev #include <sys/cred.h> 32843e1988Sjohnlev #include <sys/conf.h> 33843e1988Sjohnlev #include <sys/stat.h> 34843e1988Sjohnlev #include <sys/modctl.h> 35843e1988Sjohnlev #include <sys/ddi.h> 36843e1988Sjohnlev #include <sys/sunddi.h> 37843e1988Sjohnlev #include <sys/vmsystm.h> 38843e1988Sjohnlev #include <sys/sdt.h> 39843e1988Sjohnlev #include <sys/hypervisor.h> 40843e1988Sjohnlev #include <sys/xen_errno.h> 41b26a64aeSjohnlev #include <sys/policy.h> 42843e1988Sjohnlev 43843e1988Sjohnlev #include <vm/hat_i86.h> 44843e1988Sjohnlev #include <vm/hat_pte.h> 45843e1988Sjohnlev #include <vm/seg_mf.h> 46843e1988Sjohnlev 47843e1988Sjohnlev #include <xen/sys/privcmd.h> 48843e1988Sjohnlev #include <sys/privcmd_impl.h> 49843e1988Sjohnlev 50843e1988Sjohnlev static dev_info_t *privcmd_devi; 51843e1988Sjohnlev 52843e1988Sjohnlev /*ARGSUSED*/ 53843e1988Sjohnlev static int 54843e1988Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 55843e1988Sjohnlev { 56843e1988Sjohnlev switch (cmd) { 57843e1988Sjohnlev case DDI_INFO_DEVT2DEVINFO: 58843e1988Sjohnlev case DDI_INFO_DEVT2INSTANCE: 59843e1988Sjohnlev break; 60843e1988Sjohnlev default: 61843e1988Sjohnlev return (DDI_FAILURE); 62843e1988Sjohnlev } 63843e1988Sjohnlev 64843e1988Sjohnlev switch (getminor((dev_t)arg)) { 65843e1988Sjohnlev case PRIVCMD_MINOR: 66843e1988Sjohnlev break; 67843e1988Sjohnlev default: 68843e1988Sjohnlev return (DDI_FAILURE); 69843e1988Sjohnlev } 70843e1988Sjohnlev 71843e1988Sjohnlev if (cmd == DDI_INFO_DEVT2INSTANCE) 72843e1988Sjohnlev *result = 0; 73843e1988Sjohnlev else 74843e1988Sjohnlev *result = privcmd_devi; 75843e1988Sjohnlev return (DDI_SUCCESS); 76843e1988Sjohnlev } 77843e1988Sjohnlev 78843e1988Sjohnlev static int 79843e1988Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 80843e1988Sjohnlev { 81843e1988Sjohnlev if (cmd != DDI_ATTACH) 82843e1988Sjohnlev return (DDI_FAILURE); 83843e1988Sjohnlev 84843e1988Sjohnlev if (ddi_create_minor_node(devi, PRIVCMD_NODE, 85843e1988Sjohnlev S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) 86843e1988Sjohnlev return (DDI_FAILURE); 87843e1988Sjohnlev 88843e1988Sjohnlev privcmd_devi = devi; 89843e1988Sjohnlev ddi_report_dev(devi); 90843e1988Sjohnlev return (DDI_SUCCESS); 91843e1988Sjohnlev } 92843e1988Sjohnlev 93843e1988Sjohnlev static int 94843e1988Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 95843e1988Sjohnlev { 96843e1988Sjohnlev if (cmd != DDI_DETACH) 97843e1988Sjohnlev return (DDI_FAILURE); 98843e1988Sjohnlev ddi_remove_minor_node(devi, NULL); 99843e1988Sjohnlev privcmd_devi = NULL; 100843e1988Sjohnlev return (DDI_SUCCESS); 101843e1988Sjohnlev } 102843e1988Sjohnlev 103843e1988Sjohnlev /*ARGSUSED1*/ 104843e1988Sjohnlev static int 105843e1988Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr) 106843e1988Sjohnlev { 107843e1988Sjohnlev return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO); 108843e1988Sjohnlev } 109843e1988Sjohnlev 110843e1988Sjohnlev /* 111843e1988Sjohnlev * Map a contiguous set of machine frames in a foreign domain. 112843e1988Sjohnlev * Used in the following way: 113843e1988Sjohnlev * 114843e1988Sjohnlev * privcmd_mmap_t p; 115843e1988Sjohnlev * privcmd_mmap_entry_t e; 116843e1988Sjohnlev * 117843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 118843e1988Sjohnlev * p.num = number of privcmd_mmap_entry_t's 119843e1988Sjohnlev * p.dom = domid; 120843e1988Sjohnlev * p.entry = &e; 121843e1988Sjohnlev * e.va = addr; 122843e1988Sjohnlev * e.mfn = mfn; 123843e1988Sjohnlev * e.npages = btopr(size); 124843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p); 125843e1988Sjohnlev */ 126843e1988Sjohnlev /*ARGSUSED2*/ 127843e1988Sjohnlev int 128843e1988Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr) 129843e1988Sjohnlev { 130843e1988Sjohnlev privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd; 131843e1988Sjohnlev privcmd_mmap_entry_t *umme; 132843e1988Sjohnlev struct as *as = curproc->p_as; 133843e1988Sjohnlev struct seg *seg; 134843e1988Sjohnlev int i, error = 0; 135843e1988Sjohnlev 136843e1988Sjohnlev if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode)) 137843e1988Sjohnlev return (EFAULT); 138843e1988Sjohnlev 139843e1988Sjohnlev DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num, 140843e1988Sjohnlev privcmd_mmap_entry_t *, mmc->entry); 141843e1988Sjohnlev 142843e1988Sjohnlev if (mmc->dom == DOMID_SELF) { 143843e1988Sjohnlev error = ENOTSUP; /* Too paranoid? */ 144843e1988Sjohnlev goto done; 145843e1988Sjohnlev } 146843e1988Sjohnlev 147843e1988Sjohnlev for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) { 148843e1988Sjohnlev privcmd_mmap_entry_t __mmapent, *mme = &__mmapent; 149843e1988Sjohnlev caddr_t addr; 150843e1988Sjohnlev 151843e1988Sjohnlev if (ddi_copyin(umme, mme, sizeof (*mme), mode)) { 152843e1988Sjohnlev error = EFAULT; 153843e1988Sjohnlev break; 154843e1988Sjohnlev } 155843e1988Sjohnlev 156843e1988Sjohnlev DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn, 157843e1988Sjohnlev ulong_t, mme->npages); 158843e1988Sjohnlev 159843e1988Sjohnlev if (mme->mfn == MFN_INVALID) { 160843e1988Sjohnlev error = EINVAL; 161843e1988Sjohnlev break; 162843e1988Sjohnlev } 163843e1988Sjohnlev 164843e1988Sjohnlev addr = (caddr_t)mme->va; 165843e1988Sjohnlev 166843e1988Sjohnlev /* 167843e1988Sjohnlev * Find the segment we want to mess with, then add 168843e1988Sjohnlev * the mfn range to the segment. 169843e1988Sjohnlev */ 170843e1988Sjohnlev AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 171843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 172843e1988Sjohnlev addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size) 173843e1988Sjohnlev error = EINVAL; 174843e1988Sjohnlev else 175843e1988Sjohnlev error = segmf_add_mfns(seg, addr, 176843e1988Sjohnlev mme->mfn, mme->npages, mmc->dom); 177843e1988Sjohnlev AS_LOCK_EXIT(as, &as->a_lock); 178843e1988Sjohnlev 179843e1988Sjohnlev if (error != 0) 180843e1988Sjohnlev break; 181843e1988Sjohnlev } 182843e1988Sjohnlev 183843e1988Sjohnlev done: 184843e1988Sjohnlev DTRACE_XPV1(mmap__end, int, error); 185843e1988Sjohnlev 186843e1988Sjohnlev return (error); 187843e1988Sjohnlev } 188843e1988Sjohnlev 189843e1988Sjohnlev /* 190843e1988Sjohnlev * Set up the address range to map to an array of mfns in 191843e1988Sjohnlev * a foreign domain. Used in the following way: 192843e1988Sjohnlev * 193843e1988Sjohnlev * privcmd_mmap_batch_t p; 194843e1988Sjohnlev * 195843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 196843e1988Sjohnlev * p.num = number of pages 197843e1988Sjohnlev * p.dom = domid 198843e1988Sjohnlev * p.addr = addr; 199843e1988Sjohnlev * p.arr = array of mfns, indexed 0 .. p.num - 1 200843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p); 201843e1988Sjohnlev */ 202843e1988Sjohnlev /*ARGSUSED2*/ 203843e1988Sjohnlev static int 204843e1988Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) 205843e1988Sjohnlev { 206843e1988Sjohnlev privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch; 207843e1988Sjohnlev struct as *as = curproc->p_as; 208843e1988Sjohnlev struct seg *seg; 209843e1988Sjohnlev int i, error = 0; 210843e1988Sjohnlev caddr_t addr; 211843e1988Sjohnlev ulong_t *ulp; 212843e1988Sjohnlev 213843e1988Sjohnlev if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode)) 214843e1988Sjohnlev return (EFAULT); 215843e1988Sjohnlev 216843e1988Sjohnlev DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num, 217843e1988Sjohnlev caddr_t, mmb->addr); 218843e1988Sjohnlev 219843e1988Sjohnlev addr = (caddr_t)mmb->addr; 220843e1988Sjohnlev AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 221843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 222843e1988Sjohnlev addr + ptob(mmb->num) > seg->s_base + seg->s_size) { 223843e1988Sjohnlev error = EINVAL; 224843e1988Sjohnlev goto done; 225843e1988Sjohnlev } 226843e1988Sjohnlev 227843e1988Sjohnlev for (i = 0, ulp = mmb->arr; 228843e1988Sjohnlev i < mmb->num; i++, addr += PAGESIZE, ulp++) { 229843e1988Sjohnlev mfn_t mfn; 230843e1988Sjohnlev 231843e1988Sjohnlev if (fulword(ulp, &mfn) != 0) { 232843e1988Sjohnlev error = EFAULT; 233843e1988Sjohnlev break; 234843e1988Sjohnlev } 235843e1988Sjohnlev 236843e1988Sjohnlev if (mfn == MFN_INVALID) { 237a576ab5bSrab /* 238a576ab5bSrab * This mfn is invalid and should not be added to 239a576ab5bSrab * segmf, as we'd only cause an immediate EFAULT when 240a576ab5bSrab * we tried to fault it in. 241a576ab5bSrab */ 242a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB; 243a576ab5bSrab continue; 244843e1988Sjohnlev } 245843e1988Sjohnlev 246843e1988Sjohnlev if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0) 247843e1988Sjohnlev continue; 248843e1988Sjohnlev 249843e1988Sjohnlev /* 250843e1988Sjohnlev * Tell the process that this MFN could not be mapped, so it 251843e1988Sjohnlev * won't later try to access it. 252843e1988Sjohnlev */ 253a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB; 254843e1988Sjohnlev if (sulword(ulp, mfn) != 0) { 255843e1988Sjohnlev error = EFAULT; 256843e1988Sjohnlev break; 257843e1988Sjohnlev } 258843e1988Sjohnlev } 259843e1988Sjohnlev 260843e1988Sjohnlev done: 261843e1988Sjohnlev AS_LOCK_EXIT(as, &as->a_lock); 262843e1988Sjohnlev 263843e1988Sjohnlev DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t, 264843e1988Sjohnlev mmb->addr); 265843e1988Sjohnlev 266843e1988Sjohnlev return (error); 267843e1988Sjohnlev } 268843e1988Sjohnlev 269843e1988Sjohnlev /*ARGSUSED*/ 270843e1988Sjohnlev static int 271843e1988Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval) 272843e1988Sjohnlev { 273b26a64aeSjohnlev if (secpolicy_xvm_control(cr)) 274b26a64aeSjohnlev return (EPERM); 275843e1988Sjohnlev 276843e1988Sjohnlev /* 277843e1988Sjohnlev * Everything is a -native- data type. 278843e1988Sjohnlev */ 279b26a64aeSjohnlev if ((mode & FMODELS) != FNATIVE) 280b26a64aeSjohnlev return (EOVERFLOW); 281843e1988Sjohnlev 282843e1988Sjohnlev switch (cmd) { 283843e1988Sjohnlev case IOCTL_PRIVCMD_HYPERCALL: 284843e1988Sjohnlev return (do_privcmd_hypercall((void *)arg, mode, cr, rval)); 285843e1988Sjohnlev case IOCTL_PRIVCMD_MMAP: 286843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 287843e1988Sjohnlev return (do_privcmd_mmap((void *)arg, mode, cr)); 288843e1988Sjohnlev break; 289843e1988Sjohnlev case IOCTL_PRIVCMD_MMAPBATCH: 290843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 291843e1988Sjohnlev return (do_privcmd_mmapbatch((void *)arg, mode, cr)); 292843e1988Sjohnlev break; 293843e1988Sjohnlev default: 294843e1988Sjohnlev break; 295843e1988Sjohnlev } 296843e1988Sjohnlev return (EINVAL); 297843e1988Sjohnlev } 298843e1988Sjohnlev 299843e1988Sjohnlev /* 300843e1988Sjohnlev * The real magic happens in the segmf segment driver. 301843e1988Sjohnlev */ 302843e1988Sjohnlev /*ARGSUSED8*/ 303843e1988Sjohnlev static int 304843e1988Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, 305843e1988Sjohnlev off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr) 306843e1988Sjohnlev { 307843e1988Sjohnlev struct segmf_crargs a; 308843e1988Sjohnlev int error; 309843e1988Sjohnlev 310b26a64aeSjohnlev if (secpolicy_xvm_control(cr)) 311b26a64aeSjohnlev return (EPERM); 312b26a64aeSjohnlev 313843e1988Sjohnlev as_rangelock(as); 314843e1988Sjohnlev if ((flags & MAP_FIXED) == 0) { 315843e1988Sjohnlev map_addr(addrp, len, (offset_t)off, 0, flags); 316843e1988Sjohnlev if (*addrp == NULL) { 317843e1988Sjohnlev error = ENOMEM; 318843e1988Sjohnlev goto rangeunlock; 319843e1988Sjohnlev } 320843e1988Sjohnlev } else { 321843e1988Sjohnlev /* 322843e1988Sjohnlev * User specified address 323843e1988Sjohnlev */ 324843e1988Sjohnlev (void) as_unmap(as, *addrp, len); 325843e1988Sjohnlev } 326843e1988Sjohnlev 327843e1988Sjohnlev /* 328843e1988Sjohnlev * The mapping *must* be MAP_SHARED at offset 0. 329843e1988Sjohnlev * 330843e1988Sjohnlev * (Foreign pages are treated like device memory; the 331843e1988Sjohnlev * ioctl interface allows the backing objects to be 332843e1988Sjohnlev * arbitrarily redefined to point at any machine frame.) 333843e1988Sjohnlev */ 334843e1988Sjohnlev if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) { 335843e1988Sjohnlev error = EINVAL; 336843e1988Sjohnlev goto rangeunlock; 337843e1988Sjohnlev } 338843e1988Sjohnlev 339843e1988Sjohnlev a.dev = dev; 340843e1988Sjohnlev a.prot = (uchar_t)prot; 341843e1988Sjohnlev a.maxprot = (uchar_t)maxprot; 342843e1988Sjohnlev error = as_map(as, *addrp, len, segmf_create, &a); 343843e1988Sjohnlev 344843e1988Sjohnlev rangeunlock: 345843e1988Sjohnlev as_rangeunlock(as); 346843e1988Sjohnlev return (error); 347843e1988Sjohnlev } 348843e1988Sjohnlev 349843e1988Sjohnlev static struct cb_ops privcmd_cb_ops = { 350843e1988Sjohnlev privcmd_open, 351843e1988Sjohnlev nulldev, /* close */ 352843e1988Sjohnlev nodev, /* strategy */ 353843e1988Sjohnlev nodev, /* print */ 354843e1988Sjohnlev nodev, /* dump */ 355843e1988Sjohnlev nodev, /* read */ 356843e1988Sjohnlev nodev, /* write */ 357843e1988Sjohnlev privcmd_ioctl, 358843e1988Sjohnlev nodev, /* devmap */ 359843e1988Sjohnlev nodev, /* mmap */ 360843e1988Sjohnlev privcmd_segmap, 361843e1988Sjohnlev nochpoll, /* poll */ 362843e1988Sjohnlev ddi_prop_op, 363843e1988Sjohnlev NULL, 364843e1988Sjohnlev D_64BIT | D_NEW | D_MP 365843e1988Sjohnlev }; 366843e1988Sjohnlev 367843e1988Sjohnlev static struct dev_ops privcmd_dv_ops = { 368843e1988Sjohnlev DEVO_REV, 369843e1988Sjohnlev 0, 370843e1988Sjohnlev privcmd_getinfo, 371843e1988Sjohnlev nulldev, /* identify */ 372843e1988Sjohnlev nulldev, /* probe */ 373843e1988Sjohnlev privcmd_attach, 374843e1988Sjohnlev privcmd_detach, 375843e1988Sjohnlev nodev, /* reset */ 376843e1988Sjohnlev &privcmd_cb_ops, 377*19397407SSherry Moore 0, /* struct bus_ops */ 378*19397407SSherry Moore NULL, /* power */ 379*19397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */ 380843e1988Sjohnlev }; 381843e1988Sjohnlev 382843e1988Sjohnlev static struct modldrv modldrv = { 383843e1988Sjohnlev &mod_driverops, 384613b2871SRichard Bean "privcmd driver", 385843e1988Sjohnlev &privcmd_dv_ops 386843e1988Sjohnlev }; 387843e1988Sjohnlev 388843e1988Sjohnlev static struct modlinkage modl = { 389843e1988Sjohnlev MODREV_1, 390843e1988Sjohnlev &modldrv 391843e1988Sjohnlev }; 392843e1988Sjohnlev 393843e1988Sjohnlev int 394843e1988Sjohnlev _init(void) 395843e1988Sjohnlev { 396843e1988Sjohnlev return (mod_install(&modl)); 397843e1988Sjohnlev } 398843e1988Sjohnlev 399843e1988Sjohnlev int 400843e1988Sjohnlev _fini(void) 401843e1988Sjohnlev { 402843e1988Sjohnlev return (mod_remove(&modl)); 403843e1988Sjohnlev } 404843e1988Sjohnlev 405843e1988Sjohnlev int 406843e1988Sjohnlev _info(struct modinfo *modinfo) 407843e1988Sjohnlev { 408843e1988Sjohnlev return (mod_info(&modl, modinfo)); 409843e1988Sjohnlev } 410