1843e1988Sjohnlev /* 2843e1988Sjohnlev * CDDL HEADER START 3843e1988Sjohnlev * 4843e1988Sjohnlev * The contents of this file are subject to the terms of the 5843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6843e1988Sjohnlev * You may not use this file except in compliance with the License. 7843e1988Sjohnlev * 8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10843e1988Sjohnlev * See the License for the specific language governing permissions 11843e1988Sjohnlev * and limitations under the License. 12843e1988Sjohnlev * 13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18843e1988Sjohnlev * 19843e1988Sjohnlev * CDDL HEADER END 20843e1988Sjohnlev */ 21843e1988Sjohnlev 22843e1988Sjohnlev /* 23349b53ddSStuart Maybee * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24843e1988Sjohnlev * Use is subject to license terms. 25843e1988Sjohnlev */ 26843e1988Sjohnlev 27349b53ddSStuart Maybee #include <sys/xpv_user.h> 28349b53ddSStuart Maybee 29843e1988Sjohnlev #include <sys/types.h> 30843e1988Sjohnlev #include <sys/file.h> 31843e1988Sjohnlev #include <sys/errno.h> 32843e1988Sjohnlev #include <sys/open.h> 33843e1988Sjohnlev #include <sys/cred.h> 34843e1988Sjohnlev #include <sys/conf.h> 35843e1988Sjohnlev #include <sys/stat.h> 36843e1988Sjohnlev #include <sys/modctl.h> 37843e1988Sjohnlev #include <sys/ddi.h> 38843e1988Sjohnlev #include <sys/sunddi.h> 39843e1988Sjohnlev #include <sys/vmsystm.h> 40843e1988Sjohnlev #include <sys/sdt.h> 41843e1988Sjohnlev #include <sys/hypervisor.h> 42843e1988Sjohnlev #include <sys/xen_errno.h> 43b26a64aeSjohnlev #include <sys/policy.h> 44843e1988Sjohnlev 45843e1988Sjohnlev #include <vm/hat_i86.h> 46843e1988Sjohnlev #include <vm/hat_pte.h> 47843e1988Sjohnlev #include <vm/seg_mf.h> 48843e1988Sjohnlev 49843e1988Sjohnlev #include <xen/sys/privcmd.h> 50843e1988Sjohnlev #include <sys/privcmd_impl.h> 51843e1988Sjohnlev 52843e1988Sjohnlev static dev_info_t *privcmd_devi; 53843e1988Sjohnlev 54843e1988Sjohnlev /*ARGSUSED*/ 55843e1988Sjohnlev static int 56843e1988Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 57843e1988Sjohnlev { 58843e1988Sjohnlev switch (cmd) { 59843e1988Sjohnlev case DDI_INFO_DEVT2DEVINFO: 60843e1988Sjohnlev case DDI_INFO_DEVT2INSTANCE: 61843e1988Sjohnlev break; 62843e1988Sjohnlev default: 63843e1988Sjohnlev return (DDI_FAILURE); 64843e1988Sjohnlev } 65843e1988Sjohnlev 66843e1988Sjohnlev switch (getminor((dev_t)arg)) { 67843e1988Sjohnlev case PRIVCMD_MINOR: 68843e1988Sjohnlev break; 69843e1988Sjohnlev default: 70843e1988Sjohnlev return (DDI_FAILURE); 71843e1988Sjohnlev } 72843e1988Sjohnlev 73843e1988Sjohnlev if (cmd == DDI_INFO_DEVT2INSTANCE) 74843e1988Sjohnlev *result = 0; 75843e1988Sjohnlev else 76843e1988Sjohnlev *result = privcmd_devi; 77843e1988Sjohnlev return (DDI_SUCCESS); 78843e1988Sjohnlev } 79843e1988Sjohnlev 80843e1988Sjohnlev static int 81843e1988Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 82843e1988Sjohnlev { 83843e1988Sjohnlev if (cmd != DDI_ATTACH) 84843e1988Sjohnlev return (DDI_FAILURE); 85843e1988Sjohnlev 86843e1988Sjohnlev if (ddi_create_minor_node(devi, PRIVCMD_NODE, 87843e1988Sjohnlev S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) 88843e1988Sjohnlev return (DDI_FAILURE); 89843e1988Sjohnlev 90843e1988Sjohnlev privcmd_devi = devi; 91843e1988Sjohnlev ddi_report_dev(devi); 92843e1988Sjohnlev return (DDI_SUCCESS); 93843e1988Sjohnlev } 94843e1988Sjohnlev 95843e1988Sjohnlev static int 96843e1988Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 97843e1988Sjohnlev { 98843e1988Sjohnlev if (cmd != DDI_DETACH) 99843e1988Sjohnlev return (DDI_FAILURE); 100843e1988Sjohnlev ddi_remove_minor_node(devi, NULL); 101843e1988Sjohnlev privcmd_devi = NULL; 102843e1988Sjohnlev return (DDI_SUCCESS); 103843e1988Sjohnlev } 104843e1988Sjohnlev 105843e1988Sjohnlev /*ARGSUSED1*/ 106843e1988Sjohnlev static int 107843e1988Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr) 108843e1988Sjohnlev { 109843e1988Sjohnlev return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO); 110843e1988Sjohnlev } 111843e1988Sjohnlev 112843e1988Sjohnlev /* 113843e1988Sjohnlev * Map a contiguous set of machine frames in a foreign domain. 114843e1988Sjohnlev * Used in the following way: 115843e1988Sjohnlev * 116843e1988Sjohnlev * privcmd_mmap_t p; 117843e1988Sjohnlev * privcmd_mmap_entry_t e; 118843e1988Sjohnlev * 119843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 120843e1988Sjohnlev * p.num = number of privcmd_mmap_entry_t's 121843e1988Sjohnlev * p.dom = domid; 122843e1988Sjohnlev * p.entry = &e; 123843e1988Sjohnlev * e.va = addr; 124843e1988Sjohnlev * e.mfn = mfn; 125843e1988Sjohnlev * e.npages = btopr(size); 126843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p); 127843e1988Sjohnlev */ 128843e1988Sjohnlev /*ARGSUSED2*/ 129843e1988Sjohnlev int 130843e1988Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr) 131843e1988Sjohnlev { 132843e1988Sjohnlev privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd; 133843e1988Sjohnlev privcmd_mmap_entry_t *umme; 134843e1988Sjohnlev struct as *as = curproc->p_as; 135843e1988Sjohnlev struct seg *seg; 136843e1988Sjohnlev int i, error = 0; 137843e1988Sjohnlev 138843e1988Sjohnlev if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode)) 139843e1988Sjohnlev return (EFAULT); 140843e1988Sjohnlev 141843e1988Sjohnlev DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num, 142843e1988Sjohnlev privcmd_mmap_entry_t *, mmc->entry); 143843e1988Sjohnlev 144843e1988Sjohnlev if (mmc->dom == DOMID_SELF) { 145843e1988Sjohnlev error = ENOTSUP; /* Too paranoid? */ 146843e1988Sjohnlev goto done; 147843e1988Sjohnlev } 148843e1988Sjohnlev 149843e1988Sjohnlev for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) { 150843e1988Sjohnlev privcmd_mmap_entry_t __mmapent, *mme = &__mmapent; 151843e1988Sjohnlev caddr_t addr; 152843e1988Sjohnlev 153843e1988Sjohnlev if (ddi_copyin(umme, mme, sizeof (*mme), mode)) { 154843e1988Sjohnlev error = EFAULT; 155843e1988Sjohnlev break; 156843e1988Sjohnlev } 157843e1988Sjohnlev 158843e1988Sjohnlev DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn, 159843e1988Sjohnlev ulong_t, mme->npages); 160843e1988Sjohnlev 161843e1988Sjohnlev if (mme->mfn == MFN_INVALID) { 162843e1988Sjohnlev error = EINVAL; 163843e1988Sjohnlev break; 164843e1988Sjohnlev } 165843e1988Sjohnlev 166843e1988Sjohnlev addr = (caddr_t)mme->va; 167843e1988Sjohnlev 168843e1988Sjohnlev /* 169843e1988Sjohnlev * Find the segment we want to mess with, then add 170843e1988Sjohnlev * the mfn range to the segment. 171843e1988Sjohnlev */ 172*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER); 173843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 174843e1988Sjohnlev addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size) 175843e1988Sjohnlev error = EINVAL; 176843e1988Sjohnlev else 177843e1988Sjohnlev error = segmf_add_mfns(seg, addr, 178843e1988Sjohnlev mme->mfn, mme->npages, mmc->dom); 179*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 180843e1988Sjohnlev 181843e1988Sjohnlev if (error != 0) 182843e1988Sjohnlev break; 183843e1988Sjohnlev } 184843e1988Sjohnlev 185843e1988Sjohnlev done: 186843e1988Sjohnlev DTRACE_XPV1(mmap__end, int, error); 187843e1988Sjohnlev 188843e1988Sjohnlev return (error); 189843e1988Sjohnlev } 190843e1988Sjohnlev 191843e1988Sjohnlev /* 192843e1988Sjohnlev * Set up the address range to map to an array of mfns in 193843e1988Sjohnlev * a foreign domain. Used in the following way: 194843e1988Sjohnlev * 195843e1988Sjohnlev * privcmd_mmap_batch_t p; 196843e1988Sjohnlev * 197843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 198843e1988Sjohnlev * p.num = number of pages 199843e1988Sjohnlev * p.dom = domid 200843e1988Sjohnlev * p.addr = addr; 201843e1988Sjohnlev * p.arr = array of mfns, indexed 0 .. p.num - 1 202843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p); 203843e1988Sjohnlev */ 204843e1988Sjohnlev /*ARGSUSED2*/ 205843e1988Sjohnlev static int 206843e1988Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) 207843e1988Sjohnlev { 208843e1988Sjohnlev privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch; 209843e1988Sjohnlev struct as *as = curproc->p_as; 210843e1988Sjohnlev struct seg *seg; 211843e1988Sjohnlev int i, error = 0; 212843e1988Sjohnlev caddr_t addr; 213843e1988Sjohnlev ulong_t *ulp; 214843e1988Sjohnlev 215843e1988Sjohnlev if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode)) 216843e1988Sjohnlev return (EFAULT); 217843e1988Sjohnlev 218843e1988Sjohnlev DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num, 219843e1988Sjohnlev caddr_t, mmb->addr); 220843e1988Sjohnlev 221843e1988Sjohnlev addr = (caddr_t)mmb->addr; 222*dc32d872SJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER); 223843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 224843e1988Sjohnlev addr + ptob(mmb->num) > seg->s_base + seg->s_size) { 225843e1988Sjohnlev error = EINVAL; 226843e1988Sjohnlev goto done; 227843e1988Sjohnlev } 228843e1988Sjohnlev 229843e1988Sjohnlev for (i = 0, ulp = mmb->arr; 230843e1988Sjohnlev i < mmb->num; i++, addr += PAGESIZE, ulp++) { 231843e1988Sjohnlev mfn_t mfn; 232843e1988Sjohnlev 233843e1988Sjohnlev if (fulword(ulp, &mfn) != 0) { 234843e1988Sjohnlev error = EFAULT; 235843e1988Sjohnlev break; 236843e1988Sjohnlev } 237843e1988Sjohnlev 238843e1988Sjohnlev if (mfn == MFN_INVALID) { 239a576ab5bSrab /* 240a576ab5bSrab * This mfn is invalid and should not be added to 241a576ab5bSrab * segmf, as we'd only cause an immediate EFAULT when 242a576ab5bSrab * we tried to fault it in. 243a576ab5bSrab */ 244a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB; 245a576ab5bSrab continue; 246843e1988Sjohnlev } 247843e1988Sjohnlev 248843e1988Sjohnlev if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0) 249843e1988Sjohnlev continue; 250843e1988Sjohnlev 251843e1988Sjohnlev /* 252843e1988Sjohnlev * Tell the process that this MFN could not be mapped, so it 253843e1988Sjohnlev * won't later try to access it. 254843e1988Sjohnlev */ 255a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB; 256843e1988Sjohnlev if (sulword(ulp, mfn) != 0) { 257843e1988Sjohnlev error = EFAULT; 258843e1988Sjohnlev break; 259843e1988Sjohnlev } 260843e1988Sjohnlev } 261843e1988Sjohnlev 262843e1988Sjohnlev done: 263*dc32d872SJosef 'Jeff' Sipek AS_LOCK_EXIT(as); 264843e1988Sjohnlev 265843e1988Sjohnlev DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t, 266843e1988Sjohnlev mmb->addr); 267843e1988Sjohnlev 268843e1988Sjohnlev return (error); 269843e1988Sjohnlev } 270843e1988Sjohnlev 271843e1988Sjohnlev /*ARGSUSED*/ 272843e1988Sjohnlev static int 273843e1988Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval) 274843e1988Sjohnlev { 275b26a64aeSjohnlev if (secpolicy_xvm_control(cr)) 276b26a64aeSjohnlev return (EPERM); 277843e1988Sjohnlev 278843e1988Sjohnlev /* 279843e1988Sjohnlev * Everything is a -native- data type. 280843e1988Sjohnlev */ 281b26a64aeSjohnlev if ((mode & FMODELS) != FNATIVE) 282b26a64aeSjohnlev return (EOVERFLOW); 283843e1988Sjohnlev 284843e1988Sjohnlev switch (cmd) { 285843e1988Sjohnlev case IOCTL_PRIVCMD_HYPERCALL: 286843e1988Sjohnlev return (do_privcmd_hypercall((void *)arg, mode, cr, rval)); 287843e1988Sjohnlev case IOCTL_PRIVCMD_MMAP: 288843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 289843e1988Sjohnlev return (do_privcmd_mmap((void *)arg, mode, cr)); 290843e1988Sjohnlev break; 291843e1988Sjohnlev case IOCTL_PRIVCMD_MMAPBATCH: 292843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 293843e1988Sjohnlev return (do_privcmd_mmapbatch((void *)arg, mode, cr)); 294843e1988Sjohnlev break; 295843e1988Sjohnlev default: 296843e1988Sjohnlev break; 297843e1988Sjohnlev } 298843e1988Sjohnlev return (EINVAL); 299843e1988Sjohnlev } 300843e1988Sjohnlev 301843e1988Sjohnlev /* 302843e1988Sjohnlev * The real magic happens in the segmf segment driver. 303843e1988Sjohnlev */ 304843e1988Sjohnlev /*ARGSUSED8*/ 305843e1988Sjohnlev static int 306843e1988Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, 307843e1988Sjohnlev off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr) 308843e1988Sjohnlev { 309843e1988Sjohnlev struct segmf_crargs a; 310843e1988Sjohnlev int error; 311843e1988Sjohnlev 312b26a64aeSjohnlev if (secpolicy_xvm_control(cr)) 313b26a64aeSjohnlev return (EPERM); 314b26a64aeSjohnlev 315843e1988Sjohnlev as_rangelock(as); 316843e1988Sjohnlev if ((flags & MAP_FIXED) == 0) { 317843e1988Sjohnlev map_addr(addrp, len, (offset_t)off, 0, flags); 318843e1988Sjohnlev if (*addrp == NULL) { 319843e1988Sjohnlev error = ENOMEM; 320843e1988Sjohnlev goto rangeunlock; 321843e1988Sjohnlev } 322843e1988Sjohnlev } else { 323843e1988Sjohnlev /* 324843e1988Sjohnlev * User specified address 325843e1988Sjohnlev */ 326843e1988Sjohnlev (void) as_unmap(as, *addrp, len); 327843e1988Sjohnlev } 328843e1988Sjohnlev 329843e1988Sjohnlev /* 330843e1988Sjohnlev * The mapping *must* be MAP_SHARED at offset 0. 331843e1988Sjohnlev * 332843e1988Sjohnlev * (Foreign pages are treated like device memory; the 333843e1988Sjohnlev * ioctl interface allows the backing objects to be 334843e1988Sjohnlev * arbitrarily redefined to point at any machine frame.) 335843e1988Sjohnlev */ 336843e1988Sjohnlev if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) { 337843e1988Sjohnlev error = EINVAL; 338843e1988Sjohnlev goto rangeunlock; 339843e1988Sjohnlev } 340843e1988Sjohnlev 341843e1988Sjohnlev a.dev = dev; 342843e1988Sjohnlev a.prot = (uchar_t)prot; 343843e1988Sjohnlev a.maxprot = (uchar_t)maxprot; 344843e1988Sjohnlev error = as_map(as, *addrp, len, segmf_create, &a); 345843e1988Sjohnlev 346843e1988Sjohnlev rangeunlock: 347843e1988Sjohnlev as_rangeunlock(as); 348843e1988Sjohnlev return (error); 349843e1988Sjohnlev } 350843e1988Sjohnlev 351843e1988Sjohnlev static struct cb_ops privcmd_cb_ops = { 352843e1988Sjohnlev privcmd_open, 353843e1988Sjohnlev nulldev, /* close */ 354843e1988Sjohnlev nodev, /* strategy */ 355843e1988Sjohnlev nodev, /* print */ 356843e1988Sjohnlev nodev, /* dump */ 357843e1988Sjohnlev nodev, /* read */ 358843e1988Sjohnlev nodev, /* write */ 359843e1988Sjohnlev privcmd_ioctl, 360843e1988Sjohnlev nodev, /* devmap */ 361843e1988Sjohnlev nodev, /* mmap */ 362843e1988Sjohnlev privcmd_segmap, 363843e1988Sjohnlev nochpoll, /* poll */ 364843e1988Sjohnlev ddi_prop_op, 365843e1988Sjohnlev NULL, 366843e1988Sjohnlev D_64BIT | D_NEW | D_MP 367843e1988Sjohnlev }; 368843e1988Sjohnlev 369843e1988Sjohnlev static struct dev_ops privcmd_dv_ops = { 370843e1988Sjohnlev DEVO_REV, 371843e1988Sjohnlev 0, 372843e1988Sjohnlev privcmd_getinfo, 373843e1988Sjohnlev nulldev, /* identify */ 374843e1988Sjohnlev nulldev, /* probe */ 375843e1988Sjohnlev privcmd_attach, 376843e1988Sjohnlev privcmd_detach, 377843e1988Sjohnlev nodev, /* reset */ 378843e1988Sjohnlev &privcmd_cb_ops, 37919397407SSherry Moore 0, /* struct bus_ops */ 38019397407SSherry Moore NULL, /* power */ 38119397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */ 382843e1988Sjohnlev }; 383843e1988Sjohnlev 384843e1988Sjohnlev static struct modldrv modldrv = { 385843e1988Sjohnlev &mod_driverops, 386613b2871SRichard Bean "privcmd driver", 387843e1988Sjohnlev &privcmd_dv_ops 388843e1988Sjohnlev }; 389843e1988Sjohnlev 390843e1988Sjohnlev static struct modlinkage modl = { 391843e1988Sjohnlev MODREV_1, 392843e1988Sjohnlev &modldrv 393843e1988Sjohnlev }; 394843e1988Sjohnlev 395843e1988Sjohnlev int 396843e1988Sjohnlev _init(void) 397843e1988Sjohnlev { 398843e1988Sjohnlev return (mod_install(&modl)); 399843e1988Sjohnlev } 400843e1988Sjohnlev 401843e1988Sjohnlev int 402843e1988Sjohnlev _fini(void) 403843e1988Sjohnlev { 404843e1988Sjohnlev return (mod_remove(&modl)); 405843e1988Sjohnlev } 406843e1988Sjohnlev 407843e1988Sjohnlev int 408843e1988Sjohnlev _info(struct modinfo *modinfo) 409843e1988Sjohnlev { 410843e1988Sjohnlev return (mod_info(&modl, modinfo)); 411843e1988Sjohnlev } 412