1*843e1988Sjohnlev /* 2*843e1988Sjohnlev * CDDL HEADER START 3*843e1988Sjohnlev * 4*843e1988Sjohnlev * The contents of this file are subject to the terms of the 5*843e1988Sjohnlev * Common Development and Distribution License (the "License"). 6*843e1988Sjohnlev * You may not use this file except in compliance with the License. 7*843e1988Sjohnlev * 8*843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing. 10*843e1988Sjohnlev * See the License for the specific language governing permissions 11*843e1988Sjohnlev * and limitations under the License. 12*843e1988Sjohnlev * 13*843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each 14*843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the 16*843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying 17*843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18*843e1988Sjohnlev * 19*843e1988Sjohnlev * CDDL HEADER END 20*843e1988Sjohnlev */ 21*843e1988Sjohnlev 22*843e1988Sjohnlev /* 23*843e1988Sjohnlev * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24*843e1988Sjohnlev * Use is subject to license terms. 25*843e1988Sjohnlev */ 26*843e1988Sjohnlev 27*843e1988Sjohnlev #pragma ident "%Z%%M% %I% %E% SMI" 28*843e1988Sjohnlev 29*843e1988Sjohnlev #include <sys/types.h> 30*843e1988Sjohnlev #include <sys/file.h> 31*843e1988Sjohnlev #include <sys/errno.h> 32*843e1988Sjohnlev #include <sys/open.h> 33*843e1988Sjohnlev #include <sys/cred.h> 34*843e1988Sjohnlev #include <sys/conf.h> 35*843e1988Sjohnlev #include <sys/stat.h> 36*843e1988Sjohnlev #include <sys/modctl.h> 37*843e1988Sjohnlev #include <sys/ddi.h> 38*843e1988Sjohnlev #include <sys/sunddi.h> 39*843e1988Sjohnlev #include <sys/vmsystm.h> 40*843e1988Sjohnlev #include <sys/sdt.h> 41*843e1988Sjohnlev #include <sys/hypervisor.h> 42*843e1988Sjohnlev #include <sys/xen_errno.h> 43*843e1988Sjohnlev 44*843e1988Sjohnlev #include <vm/hat_i86.h> 45*843e1988Sjohnlev #include <vm/hat_pte.h> 46*843e1988Sjohnlev #include <vm/seg_mf.h> 47*843e1988Sjohnlev 48*843e1988Sjohnlev #include <xen/sys/privcmd.h> 49*843e1988Sjohnlev #include <sys/privcmd_impl.h> 50*843e1988Sjohnlev 51*843e1988Sjohnlev static dev_info_t *privcmd_devi; 52*843e1988Sjohnlev 53*843e1988Sjohnlev /*ARGSUSED*/ 54*843e1988Sjohnlev static int 55*843e1988Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result) 56*843e1988Sjohnlev { 57*843e1988Sjohnlev switch (cmd) { 58*843e1988Sjohnlev case DDI_INFO_DEVT2DEVINFO: 59*843e1988Sjohnlev case DDI_INFO_DEVT2INSTANCE: 60*843e1988Sjohnlev break; 61*843e1988Sjohnlev default: 62*843e1988Sjohnlev return (DDI_FAILURE); 63*843e1988Sjohnlev } 64*843e1988Sjohnlev 65*843e1988Sjohnlev switch (getminor((dev_t)arg)) { 66*843e1988Sjohnlev case PRIVCMD_MINOR: 67*843e1988Sjohnlev break; 68*843e1988Sjohnlev default: 69*843e1988Sjohnlev return (DDI_FAILURE); 70*843e1988Sjohnlev } 71*843e1988Sjohnlev 72*843e1988Sjohnlev if (cmd == DDI_INFO_DEVT2INSTANCE) 73*843e1988Sjohnlev *result = 0; 74*843e1988Sjohnlev else 75*843e1988Sjohnlev *result = privcmd_devi; 76*843e1988Sjohnlev return (DDI_SUCCESS); 77*843e1988Sjohnlev } 78*843e1988Sjohnlev 79*843e1988Sjohnlev static int 80*843e1988Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 81*843e1988Sjohnlev { 82*843e1988Sjohnlev if (cmd != DDI_ATTACH) 83*843e1988Sjohnlev return (DDI_FAILURE); 84*843e1988Sjohnlev 85*843e1988Sjohnlev if (ddi_create_minor_node(devi, PRIVCMD_NODE, 86*843e1988Sjohnlev S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS) 87*843e1988Sjohnlev return (DDI_FAILURE); 88*843e1988Sjohnlev 89*843e1988Sjohnlev privcmd_devi = devi; 90*843e1988Sjohnlev ddi_report_dev(devi); 91*843e1988Sjohnlev return (DDI_SUCCESS); 92*843e1988Sjohnlev } 93*843e1988Sjohnlev 94*843e1988Sjohnlev static int 95*843e1988Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 96*843e1988Sjohnlev { 97*843e1988Sjohnlev if (cmd != DDI_DETACH) 98*843e1988Sjohnlev return (DDI_FAILURE); 99*843e1988Sjohnlev ddi_remove_minor_node(devi, NULL); 100*843e1988Sjohnlev privcmd_devi = NULL; 101*843e1988Sjohnlev return (DDI_SUCCESS); 102*843e1988Sjohnlev } 103*843e1988Sjohnlev 104*843e1988Sjohnlev /*ARGSUSED1*/ 105*843e1988Sjohnlev static int 106*843e1988Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr) 107*843e1988Sjohnlev { 108*843e1988Sjohnlev return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO); 109*843e1988Sjohnlev } 110*843e1988Sjohnlev 111*843e1988Sjohnlev /* 112*843e1988Sjohnlev * Map a contiguous set of machine frames in a foreign domain. 113*843e1988Sjohnlev * Used in the following way: 114*843e1988Sjohnlev * 115*843e1988Sjohnlev * privcmd_mmap_t p; 116*843e1988Sjohnlev * privcmd_mmap_entry_t e; 117*843e1988Sjohnlev * 118*843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 119*843e1988Sjohnlev * p.num = number of privcmd_mmap_entry_t's 120*843e1988Sjohnlev * p.dom = domid; 121*843e1988Sjohnlev * p.entry = &e; 122*843e1988Sjohnlev * e.va = addr; 123*843e1988Sjohnlev * e.mfn = mfn; 124*843e1988Sjohnlev * e.npages = btopr(size); 125*843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p); 126*843e1988Sjohnlev */ 127*843e1988Sjohnlev /*ARGSUSED2*/ 128*843e1988Sjohnlev int 129*843e1988Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr) 130*843e1988Sjohnlev { 131*843e1988Sjohnlev privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd; 132*843e1988Sjohnlev privcmd_mmap_entry_t *umme; 133*843e1988Sjohnlev struct as *as = curproc->p_as; 134*843e1988Sjohnlev struct seg *seg; 135*843e1988Sjohnlev int i, error = 0; 136*843e1988Sjohnlev 137*843e1988Sjohnlev if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode)) 138*843e1988Sjohnlev return (EFAULT); 139*843e1988Sjohnlev 140*843e1988Sjohnlev DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num, 141*843e1988Sjohnlev privcmd_mmap_entry_t *, mmc->entry); 142*843e1988Sjohnlev 143*843e1988Sjohnlev if (mmc->dom == DOMID_SELF) { 144*843e1988Sjohnlev error = ENOTSUP; /* Too paranoid? */ 145*843e1988Sjohnlev goto done; 146*843e1988Sjohnlev } 147*843e1988Sjohnlev 148*843e1988Sjohnlev for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) { 149*843e1988Sjohnlev privcmd_mmap_entry_t __mmapent, *mme = &__mmapent; 150*843e1988Sjohnlev caddr_t addr; 151*843e1988Sjohnlev 152*843e1988Sjohnlev if (ddi_copyin(umme, mme, sizeof (*mme), mode)) { 153*843e1988Sjohnlev error = EFAULT; 154*843e1988Sjohnlev break; 155*843e1988Sjohnlev } 156*843e1988Sjohnlev 157*843e1988Sjohnlev DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn, 158*843e1988Sjohnlev ulong_t, mme->npages); 159*843e1988Sjohnlev 160*843e1988Sjohnlev if (mme->mfn == MFN_INVALID) { 161*843e1988Sjohnlev error = EINVAL; 162*843e1988Sjohnlev break; 163*843e1988Sjohnlev } 164*843e1988Sjohnlev 165*843e1988Sjohnlev addr = (caddr_t)mme->va; 166*843e1988Sjohnlev 167*843e1988Sjohnlev /* 168*843e1988Sjohnlev * Find the segment we want to mess with, then add 169*843e1988Sjohnlev * the mfn range to the segment. 170*843e1988Sjohnlev */ 171*843e1988Sjohnlev AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 172*843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 173*843e1988Sjohnlev addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size) 174*843e1988Sjohnlev error = EINVAL; 175*843e1988Sjohnlev else 176*843e1988Sjohnlev error = segmf_add_mfns(seg, addr, 177*843e1988Sjohnlev mme->mfn, mme->npages, mmc->dom); 178*843e1988Sjohnlev AS_LOCK_EXIT(as, &as->a_lock); 179*843e1988Sjohnlev 180*843e1988Sjohnlev if (error != 0) 181*843e1988Sjohnlev break; 182*843e1988Sjohnlev } 183*843e1988Sjohnlev 184*843e1988Sjohnlev done: 185*843e1988Sjohnlev DTRACE_XPV1(mmap__end, int, error); 186*843e1988Sjohnlev 187*843e1988Sjohnlev return (error); 188*843e1988Sjohnlev } 189*843e1988Sjohnlev 190*843e1988Sjohnlev /* 191*843e1988Sjohnlev * Set up the address range to map to an array of mfns in 192*843e1988Sjohnlev * a foreign domain. Used in the following way: 193*843e1988Sjohnlev * 194*843e1988Sjohnlev * privcmd_mmap_batch_t p; 195*843e1988Sjohnlev * 196*843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0); 197*843e1988Sjohnlev * p.num = number of pages 198*843e1988Sjohnlev * p.dom = domid 199*843e1988Sjohnlev * p.addr = addr; 200*843e1988Sjohnlev * p.arr = array of mfns, indexed 0 .. p.num - 1 201*843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p); 202*843e1988Sjohnlev */ 203*843e1988Sjohnlev /*ARGSUSED2*/ 204*843e1988Sjohnlev static int 205*843e1988Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr) 206*843e1988Sjohnlev { 207*843e1988Sjohnlev privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch; 208*843e1988Sjohnlev struct as *as = curproc->p_as; 209*843e1988Sjohnlev struct seg *seg; 210*843e1988Sjohnlev int i, error = 0; 211*843e1988Sjohnlev caddr_t addr; 212*843e1988Sjohnlev ulong_t *ulp; 213*843e1988Sjohnlev 214*843e1988Sjohnlev if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode)) 215*843e1988Sjohnlev return (EFAULT); 216*843e1988Sjohnlev 217*843e1988Sjohnlev DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num, 218*843e1988Sjohnlev caddr_t, mmb->addr); 219*843e1988Sjohnlev 220*843e1988Sjohnlev addr = (caddr_t)mmb->addr; 221*843e1988Sjohnlev AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 222*843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL || 223*843e1988Sjohnlev addr + ptob(mmb->num) > seg->s_base + seg->s_size) { 224*843e1988Sjohnlev error = EINVAL; 225*843e1988Sjohnlev goto done; 226*843e1988Sjohnlev } 227*843e1988Sjohnlev 228*843e1988Sjohnlev for (i = 0, ulp = mmb->arr; 229*843e1988Sjohnlev i < mmb->num; i++, addr += PAGESIZE, ulp++) { 230*843e1988Sjohnlev mfn_t mfn; 231*843e1988Sjohnlev 232*843e1988Sjohnlev if (fulword(ulp, &mfn) != 0) { 233*843e1988Sjohnlev error = EFAULT; 234*843e1988Sjohnlev break; 235*843e1988Sjohnlev } 236*843e1988Sjohnlev 237*843e1988Sjohnlev if (mfn == MFN_INVALID) { 238*843e1988Sjohnlev error = EINVAL; 239*843e1988Sjohnlev break; 240*843e1988Sjohnlev } 241*843e1988Sjohnlev 242*843e1988Sjohnlev if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0) 243*843e1988Sjohnlev continue; 244*843e1988Sjohnlev 245*843e1988Sjohnlev /* 246*843e1988Sjohnlev * Tell the process that this MFN could not be mapped, so it 247*843e1988Sjohnlev * won't later try to access it. 248*843e1988Sjohnlev */ 249*843e1988Sjohnlev mfn |= 0xf0000000; 250*843e1988Sjohnlev if (sulword(ulp, mfn) != 0) { 251*843e1988Sjohnlev error = EFAULT; 252*843e1988Sjohnlev break; 253*843e1988Sjohnlev } 254*843e1988Sjohnlev } 255*843e1988Sjohnlev 256*843e1988Sjohnlev done: 257*843e1988Sjohnlev AS_LOCK_EXIT(as, &as->a_lock); 258*843e1988Sjohnlev 259*843e1988Sjohnlev DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t, 260*843e1988Sjohnlev mmb->addr); 261*843e1988Sjohnlev 262*843e1988Sjohnlev return (error); 263*843e1988Sjohnlev } 264*843e1988Sjohnlev 265*843e1988Sjohnlev /*ARGSUSED*/ 266*843e1988Sjohnlev static int 267*843e1988Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval) 268*843e1988Sjohnlev { 269*843e1988Sjohnlev if ((mode & FMODELS) != FNATIVE) 270*843e1988Sjohnlev return (EOVERFLOW); 271*843e1988Sjohnlev 272*843e1988Sjohnlev /* 273*843e1988Sjohnlev * Everything is a -native- data type. 274*843e1988Sjohnlev */ 275*843e1988Sjohnlev 276*843e1988Sjohnlev switch (cmd) { 277*843e1988Sjohnlev case IOCTL_PRIVCMD_HYPERCALL: 278*843e1988Sjohnlev return (do_privcmd_hypercall((void *)arg, mode, cr, rval)); 279*843e1988Sjohnlev case IOCTL_PRIVCMD_MMAP: 280*843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 281*843e1988Sjohnlev return (do_privcmd_mmap((void *)arg, mode, cr)); 282*843e1988Sjohnlev break; 283*843e1988Sjohnlev case IOCTL_PRIVCMD_MMAPBATCH: 284*843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info)) 285*843e1988Sjohnlev return (do_privcmd_mmapbatch((void *)arg, mode, cr)); 286*843e1988Sjohnlev break; 287*843e1988Sjohnlev default: 288*843e1988Sjohnlev break; 289*843e1988Sjohnlev } 290*843e1988Sjohnlev return (EINVAL); 291*843e1988Sjohnlev } 292*843e1988Sjohnlev 293*843e1988Sjohnlev /* 294*843e1988Sjohnlev * The real magic happens in the segmf segment driver. 295*843e1988Sjohnlev */ 296*843e1988Sjohnlev /*ARGSUSED8*/ 297*843e1988Sjohnlev static int 298*843e1988Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, 299*843e1988Sjohnlev off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr) 300*843e1988Sjohnlev { 301*843e1988Sjohnlev struct segmf_crargs a; 302*843e1988Sjohnlev int error; 303*843e1988Sjohnlev 304*843e1988Sjohnlev as_rangelock(as); 305*843e1988Sjohnlev if ((flags & MAP_FIXED) == 0) { 306*843e1988Sjohnlev map_addr(addrp, len, (offset_t)off, 0, flags); 307*843e1988Sjohnlev if (*addrp == NULL) { 308*843e1988Sjohnlev error = ENOMEM; 309*843e1988Sjohnlev goto rangeunlock; 310*843e1988Sjohnlev } 311*843e1988Sjohnlev } else { 312*843e1988Sjohnlev /* 313*843e1988Sjohnlev * User specified address 314*843e1988Sjohnlev */ 315*843e1988Sjohnlev (void) as_unmap(as, *addrp, len); 316*843e1988Sjohnlev } 317*843e1988Sjohnlev 318*843e1988Sjohnlev /* 319*843e1988Sjohnlev * The mapping *must* be MAP_SHARED at offset 0. 320*843e1988Sjohnlev * 321*843e1988Sjohnlev * (Foreign pages are treated like device memory; the 322*843e1988Sjohnlev * ioctl interface allows the backing objects to be 323*843e1988Sjohnlev * arbitrarily redefined to point at any machine frame.) 324*843e1988Sjohnlev */ 325*843e1988Sjohnlev if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) { 326*843e1988Sjohnlev error = EINVAL; 327*843e1988Sjohnlev goto rangeunlock; 328*843e1988Sjohnlev } 329*843e1988Sjohnlev 330*843e1988Sjohnlev a.dev = dev; 331*843e1988Sjohnlev a.prot = (uchar_t)prot; 332*843e1988Sjohnlev a.maxprot = (uchar_t)maxprot; 333*843e1988Sjohnlev error = as_map(as, *addrp, len, segmf_create, &a); 334*843e1988Sjohnlev 335*843e1988Sjohnlev rangeunlock: 336*843e1988Sjohnlev as_rangeunlock(as); 337*843e1988Sjohnlev return (error); 338*843e1988Sjohnlev } 339*843e1988Sjohnlev 340*843e1988Sjohnlev static struct cb_ops privcmd_cb_ops = { 341*843e1988Sjohnlev privcmd_open, 342*843e1988Sjohnlev nulldev, /* close */ 343*843e1988Sjohnlev nodev, /* strategy */ 344*843e1988Sjohnlev nodev, /* print */ 345*843e1988Sjohnlev nodev, /* dump */ 346*843e1988Sjohnlev nodev, /* read */ 347*843e1988Sjohnlev nodev, /* write */ 348*843e1988Sjohnlev privcmd_ioctl, 349*843e1988Sjohnlev nodev, /* devmap */ 350*843e1988Sjohnlev nodev, /* mmap */ 351*843e1988Sjohnlev privcmd_segmap, 352*843e1988Sjohnlev nochpoll, /* poll */ 353*843e1988Sjohnlev ddi_prop_op, 354*843e1988Sjohnlev NULL, 355*843e1988Sjohnlev D_64BIT | D_NEW | D_MP 356*843e1988Sjohnlev }; 357*843e1988Sjohnlev 358*843e1988Sjohnlev static struct dev_ops privcmd_dv_ops = { 359*843e1988Sjohnlev DEVO_REV, 360*843e1988Sjohnlev 0, 361*843e1988Sjohnlev privcmd_getinfo, 362*843e1988Sjohnlev nulldev, /* identify */ 363*843e1988Sjohnlev nulldev, /* probe */ 364*843e1988Sjohnlev privcmd_attach, 365*843e1988Sjohnlev privcmd_detach, 366*843e1988Sjohnlev nodev, /* reset */ 367*843e1988Sjohnlev &privcmd_cb_ops, 368*843e1988Sjohnlev 0 /* struct bus_ops */ 369*843e1988Sjohnlev }; 370*843e1988Sjohnlev 371*843e1988Sjohnlev static struct modldrv modldrv = { 372*843e1988Sjohnlev &mod_driverops, 373*843e1988Sjohnlev "privcmd driver %I%", 374*843e1988Sjohnlev &privcmd_dv_ops 375*843e1988Sjohnlev }; 376*843e1988Sjohnlev 377*843e1988Sjohnlev static struct modlinkage modl = { 378*843e1988Sjohnlev MODREV_1, 379*843e1988Sjohnlev &modldrv 380*843e1988Sjohnlev }; 381*843e1988Sjohnlev 382*843e1988Sjohnlev int 383*843e1988Sjohnlev _init(void) 384*843e1988Sjohnlev { 385*843e1988Sjohnlev return (mod_install(&modl)); 386*843e1988Sjohnlev } 387*843e1988Sjohnlev 388*843e1988Sjohnlev int 389*843e1988Sjohnlev _fini(void) 390*843e1988Sjohnlev { 391*843e1988Sjohnlev return (mod_remove(&modl)); 392*843e1988Sjohnlev } 393*843e1988Sjohnlev 394*843e1988Sjohnlev int 395*843e1988Sjohnlev _info(struct modinfo *modinfo) 396*843e1988Sjohnlev { 397*843e1988Sjohnlev return (mod_info(&modl, modinfo)); 398*843e1988Sjohnlev } 399