1843e1988Sjohnlev /*
2843e1988Sjohnlev * CDDL HEADER START
3843e1988Sjohnlev *
4843e1988Sjohnlev * The contents of this file are subject to the terms of the
5843e1988Sjohnlev * Common Development and Distribution License (the "License").
6843e1988Sjohnlev * You may not use this file except in compliance with the License.
7843e1988Sjohnlev *
8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev * See the License for the specific language governing permissions
11843e1988Sjohnlev * and limitations under the License.
12843e1988Sjohnlev *
13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev *
19843e1988Sjohnlev * CDDL HEADER END
20843e1988Sjohnlev */
21843e1988Sjohnlev
22843e1988Sjohnlev /*
23349b53ddSStuart Maybee * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24843e1988Sjohnlev * Use is subject to license terms.
25843e1988Sjohnlev */
26843e1988Sjohnlev
27349b53ddSStuart Maybee #include <sys/xpv_user.h>
28349b53ddSStuart Maybee
29843e1988Sjohnlev #include <sys/types.h>
30843e1988Sjohnlev #include <sys/file.h>
31843e1988Sjohnlev #include <sys/errno.h>
32843e1988Sjohnlev #include <sys/open.h>
33843e1988Sjohnlev #include <sys/cred.h>
34843e1988Sjohnlev #include <sys/conf.h>
35843e1988Sjohnlev #include <sys/stat.h>
36843e1988Sjohnlev #include <sys/modctl.h>
37843e1988Sjohnlev #include <sys/ddi.h>
38843e1988Sjohnlev #include <sys/sunddi.h>
39843e1988Sjohnlev #include <sys/vmsystm.h>
40843e1988Sjohnlev #include <sys/sdt.h>
41843e1988Sjohnlev #include <sys/hypervisor.h>
42843e1988Sjohnlev #include <sys/xen_errno.h>
43b26a64aeSjohnlev #include <sys/policy.h>
44843e1988Sjohnlev
45843e1988Sjohnlev #include <vm/hat_i86.h>
46843e1988Sjohnlev #include <vm/hat_pte.h>
47843e1988Sjohnlev #include <vm/seg_mf.h>
48843e1988Sjohnlev
49843e1988Sjohnlev #include <xen/sys/privcmd.h>
50843e1988Sjohnlev #include <sys/privcmd_impl.h>
51843e1988Sjohnlev
52843e1988Sjohnlev static dev_info_t *privcmd_devi;
53843e1988Sjohnlev
54843e1988Sjohnlev /*ARGSUSED*/
55843e1988Sjohnlev static int
privcmd_getinfo(dev_info_t * devi,ddi_info_cmd_t cmd,void * arg,void ** result)56843e1988Sjohnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
57843e1988Sjohnlev {
58843e1988Sjohnlev switch (cmd) {
59843e1988Sjohnlev case DDI_INFO_DEVT2DEVINFO:
60843e1988Sjohnlev case DDI_INFO_DEVT2INSTANCE:
61843e1988Sjohnlev break;
62843e1988Sjohnlev default:
63843e1988Sjohnlev return (DDI_FAILURE);
64843e1988Sjohnlev }
65843e1988Sjohnlev
66843e1988Sjohnlev switch (getminor((dev_t)arg)) {
67843e1988Sjohnlev case PRIVCMD_MINOR:
68843e1988Sjohnlev break;
69843e1988Sjohnlev default:
70843e1988Sjohnlev return (DDI_FAILURE);
71843e1988Sjohnlev }
72843e1988Sjohnlev
73843e1988Sjohnlev if (cmd == DDI_INFO_DEVT2INSTANCE)
74843e1988Sjohnlev *result = 0;
75843e1988Sjohnlev else
76843e1988Sjohnlev *result = privcmd_devi;
77843e1988Sjohnlev return (DDI_SUCCESS);
78843e1988Sjohnlev }
79843e1988Sjohnlev
80843e1988Sjohnlev static int
privcmd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)81843e1988Sjohnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
82843e1988Sjohnlev {
83843e1988Sjohnlev if (cmd != DDI_ATTACH)
84843e1988Sjohnlev return (DDI_FAILURE);
85843e1988Sjohnlev
86843e1988Sjohnlev if (ddi_create_minor_node(devi, PRIVCMD_NODE,
87843e1988Sjohnlev S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
88843e1988Sjohnlev return (DDI_FAILURE);
89843e1988Sjohnlev
90843e1988Sjohnlev privcmd_devi = devi;
91843e1988Sjohnlev ddi_report_dev(devi);
92843e1988Sjohnlev return (DDI_SUCCESS);
93843e1988Sjohnlev }
94843e1988Sjohnlev
95843e1988Sjohnlev static int
privcmd_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)96843e1988Sjohnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
97843e1988Sjohnlev {
98843e1988Sjohnlev if (cmd != DDI_DETACH)
99843e1988Sjohnlev return (DDI_FAILURE);
100843e1988Sjohnlev ddi_remove_minor_node(devi, NULL);
101843e1988Sjohnlev privcmd_devi = NULL;
102843e1988Sjohnlev return (DDI_SUCCESS);
103843e1988Sjohnlev }
104843e1988Sjohnlev
105843e1988Sjohnlev /*ARGSUSED1*/
106843e1988Sjohnlev static int
privcmd_open(dev_t * dev,int flag,int otyp,cred_t * cr)107843e1988Sjohnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
108843e1988Sjohnlev {
109843e1988Sjohnlev return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
110843e1988Sjohnlev }
111843e1988Sjohnlev
112843e1988Sjohnlev /*
113843e1988Sjohnlev * Map a contiguous set of machine frames in a foreign domain.
114843e1988Sjohnlev * Used in the following way:
115843e1988Sjohnlev *
116843e1988Sjohnlev * privcmd_mmap_t p;
117843e1988Sjohnlev * privcmd_mmap_entry_t e;
118843e1988Sjohnlev *
119843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
120843e1988Sjohnlev * p.num = number of privcmd_mmap_entry_t's
121843e1988Sjohnlev * p.dom = domid;
122843e1988Sjohnlev * p.entry = &e;
123843e1988Sjohnlev * e.va = addr;
124843e1988Sjohnlev * e.mfn = mfn;
125843e1988Sjohnlev * e.npages = btopr(size);
126843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
127843e1988Sjohnlev */
128843e1988Sjohnlev /*ARGSUSED2*/
129843e1988Sjohnlev int
do_privcmd_mmap(void * uarg,int mode,cred_t * cr)130843e1988Sjohnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
131843e1988Sjohnlev {
132843e1988Sjohnlev privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
133843e1988Sjohnlev privcmd_mmap_entry_t *umme;
134843e1988Sjohnlev struct as *as = curproc->p_as;
135843e1988Sjohnlev struct seg *seg;
136843e1988Sjohnlev int i, error = 0;
137843e1988Sjohnlev
138843e1988Sjohnlev if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
139843e1988Sjohnlev return (EFAULT);
140843e1988Sjohnlev
141843e1988Sjohnlev DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
142843e1988Sjohnlev privcmd_mmap_entry_t *, mmc->entry);
143843e1988Sjohnlev
144843e1988Sjohnlev if (mmc->dom == DOMID_SELF) {
145843e1988Sjohnlev error = ENOTSUP; /* Too paranoid? */
146843e1988Sjohnlev goto done;
147843e1988Sjohnlev }
148843e1988Sjohnlev
149843e1988Sjohnlev for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
150843e1988Sjohnlev privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
151843e1988Sjohnlev caddr_t addr;
152843e1988Sjohnlev
153843e1988Sjohnlev if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
154843e1988Sjohnlev error = EFAULT;
155843e1988Sjohnlev break;
156843e1988Sjohnlev }
157843e1988Sjohnlev
158843e1988Sjohnlev DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
159843e1988Sjohnlev ulong_t, mme->npages);
160843e1988Sjohnlev
161843e1988Sjohnlev if (mme->mfn == MFN_INVALID) {
162843e1988Sjohnlev error = EINVAL;
163843e1988Sjohnlev break;
164843e1988Sjohnlev }
165843e1988Sjohnlev
166843e1988Sjohnlev addr = (caddr_t)mme->va;
167843e1988Sjohnlev
168843e1988Sjohnlev /*
169843e1988Sjohnlev * Find the segment we want to mess with, then add
170843e1988Sjohnlev * the mfn range to the segment.
171843e1988Sjohnlev */
172*fd435bccSJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER);
173843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL ||
174843e1988Sjohnlev addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
175843e1988Sjohnlev error = EINVAL;
176843e1988Sjohnlev else
177843e1988Sjohnlev error = segmf_add_mfns(seg, addr,
178843e1988Sjohnlev mme->mfn, mme->npages, mmc->dom);
179*fd435bccSJosef 'Jeff' Sipek AS_LOCK_EXIT(as);
180843e1988Sjohnlev
181843e1988Sjohnlev if (error != 0)
182843e1988Sjohnlev break;
183843e1988Sjohnlev }
184843e1988Sjohnlev
185843e1988Sjohnlev done:
186843e1988Sjohnlev DTRACE_XPV1(mmap__end, int, error);
187843e1988Sjohnlev
188843e1988Sjohnlev return (error);
189843e1988Sjohnlev }
190843e1988Sjohnlev
191843e1988Sjohnlev /*
192843e1988Sjohnlev * Set up the address range to map to an array of mfns in
193843e1988Sjohnlev * a foreign domain. Used in the following way:
194843e1988Sjohnlev *
195843e1988Sjohnlev * privcmd_mmap_batch_t p;
196843e1988Sjohnlev *
197843e1988Sjohnlev * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
198843e1988Sjohnlev * p.num = number of pages
199843e1988Sjohnlev * p.dom = domid
200843e1988Sjohnlev * p.addr = addr;
201843e1988Sjohnlev * p.arr = array of mfns, indexed 0 .. p.num - 1
202843e1988Sjohnlev * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
203843e1988Sjohnlev */
204843e1988Sjohnlev /*ARGSUSED2*/
205843e1988Sjohnlev static int
do_privcmd_mmapbatch(void * uarg,int mode,cred_t * cr)206843e1988Sjohnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
207843e1988Sjohnlev {
208843e1988Sjohnlev privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
209843e1988Sjohnlev struct as *as = curproc->p_as;
210843e1988Sjohnlev struct seg *seg;
211843e1988Sjohnlev int i, error = 0;
212843e1988Sjohnlev caddr_t addr;
213843e1988Sjohnlev ulong_t *ulp;
214843e1988Sjohnlev
215843e1988Sjohnlev if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
216843e1988Sjohnlev return (EFAULT);
217843e1988Sjohnlev
218843e1988Sjohnlev DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
219843e1988Sjohnlev caddr_t, mmb->addr);
220843e1988Sjohnlev
221843e1988Sjohnlev addr = (caddr_t)mmb->addr;
222*fd435bccSJosef 'Jeff' Sipek AS_LOCK_ENTER(as, RW_READER);
223843e1988Sjohnlev if ((seg = as_findseg(as, addr, 0)) == NULL ||
224843e1988Sjohnlev addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
225843e1988Sjohnlev error = EINVAL;
226843e1988Sjohnlev goto done;
227843e1988Sjohnlev }
228843e1988Sjohnlev
229843e1988Sjohnlev for (i = 0, ulp = mmb->arr;
230843e1988Sjohnlev i < mmb->num; i++, addr += PAGESIZE, ulp++) {
231843e1988Sjohnlev mfn_t mfn;
232843e1988Sjohnlev
233843e1988Sjohnlev if (fulword(ulp, &mfn) != 0) {
234843e1988Sjohnlev error = EFAULT;
235843e1988Sjohnlev break;
236843e1988Sjohnlev }
237843e1988Sjohnlev
238843e1988Sjohnlev if (mfn == MFN_INVALID) {
239a576ab5bSrab /*
240a576ab5bSrab * This mfn is invalid and should not be added to
241a576ab5bSrab * segmf, as we'd only cause an immediate EFAULT when
242a576ab5bSrab * we tried to fault it in.
243a576ab5bSrab */
244a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB;
245a576ab5bSrab continue;
246843e1988Sjohnlev }
247843e1988Sjohnlev
248843e1988Sjohnlev if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
249843e1988Sjohnlev continue;
250843e1988Sjohnlev
251843e1988Sjohnlev /*
252843e1988Sjohnlev * Tell the process that this MFN could not be mapped, so it
253843e1988Sjohnlev * won't later try to access it.
254843e1988Sjohnlev */
255a576ab5bSrab mfn |= XEN_DOMCTL_PFINFO_XTAB;
256843e1988Sjohnlev if (sulword(ulp, mfn) != 0) {
257843e1988Sjohnlev error = EFAULT;
258843e1988Sjohnlev break;
259843e1988Sjohnlev }
260843e1988Sjohnlev }
261843e1988Sjohnlev
262843e1988Sjohnlev done:
263*fd435bccSJosef 'Jeff' Sipek AS_LOCK_EXIT(as);
264843e1988Sjohnlev
265843e1988Sjohnlev DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
266843e1988Sjohnlev mmb->addr);
267843e1988Sjohnlev
268843e1988Sjohnlev return (error);
269843e1988Sjohnlev }
270843e1988Sjohnlev
271843e1988Sjohnlev /*ARGSUSED*/
272843e1988Sjohnlev static int
privcmd_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cr,int * rval)273843e1988Sjohnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
274843e1988Sjohnlev {
275b26a64aeSjohnlev if (secpolicy_xvm_control(cr))
276b26a64aeSjohnlev return (EPERM);
277843e1988Sjohnlev
278843e1988Sjohnlev /*
279843e1988Sjohnlev * Everything is a -native- data type.
280843e1988Sjohnlev */
281b26a64aeSjohnlev if ((mode & FMODELS) != FNATIVE)
282b26a64aeSjohnlev return (EOVERFLOW);
283843e1988Sjohnlev
284843e1988Sjohnlev switch (cmd) {
285843e1988Sjohnlev case IOCTL_PRIVCMD_HYPERCALL:
286843e1988Sjohnlev return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
287843e1988Sjohnlev case IOCTL_PRIVCMD_MMAP:
288843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info))
289843e1988Sjohnlev return (do_privcmd_mmap((void *)arg, mode, cr));
290843e1988Sjohnlev break;
291843e1988Sjohnlev case IOCTL_PRIVCMD_MMAPBATCH:
292843e1988Sjohnlev if (DOMAIN_IS_PRIVILEGED(xen_info))
293843e1988Sjohnlev return (do_privcmd_mmapbatch((void *)arg, mode, cr));
294843e1988Sjohnlev break;
295843e1988Sjohnlev default:
296843e1988Sjohnlev break;
297843e1988Sjohnlev }
298843e1988Sjohnlev return (EINVAL);
299843e1988Sjohnlev }
300843e1988Sjohnlev
301843e1988Sjohnlev /*
302843e1988Sjohnlev * The real magic happens in the segmf segment driver.
303843e1988Sjohnlev */
304843e1988Sjohnlev /*ARGSUSED8*/
305843e1988Sjohnlev static int
privcmd_segmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cr)306843e1988Sjohnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
307843e1988Sjohnlev off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
308843e1988Sjohnlev {
309843e1988Sjohnlev struct segmf_crargs a;
310843e1988Sjohnlev int error;
311843e1988Sjohnlev
312b26a64aeSjohnlev if (secpolicy_xvm_control(cr))
313b26a64aeSjohnlev return (EPERM);
314b26a64aeSjohnlev
315843e1988Sjohnlev as_rangelock(as);
316843e1988Sjohnlev if ((flags & MAP_FIXED) == 0) {
317843e1988Sjohnlev map_addr(addrp, len, (offset_t)off, 0, flags);
318843e1988Sjohnlev if (*addrp == NULL) {
319843e1988Sjohnlev error = ENOMEM;
320843e1988Sjohnlev goto rangeunlock;
321843e1988Sjohnlev }
322843e1988Sjohnlev } else {
323843e1988Sjohnlev /*
324843e1988Sjohnlev * User specified address
325843e1988Sjohnlev */
326843e1988Sjohnlev (void) as_unmap(as, *addrp, len);
327843e1988Sjohnlev }
328843e1988Sjohnlev
329843e1988Sjohnlev /*
330843e1988Sjohnlev * The mapping *must* be MAP_SHARED at offset 0.
331843e1988Sjohnlev *
332843e1988Sjohnlev * (Foreign pages are treated like device memory; the
333843e1988Sjohnlev * ioctl interface allows the backing objects to be
334843e1988Sjohnlev * arbitrarily redefined to point at any machine frame.)
335843e1988Sjohnlev */
336843e1988Sjohnlev if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
337843e1988Sjohnlev error = EINVAL;
338843e1988Sjohnlev goto rangeunlock;
339843e1988Sjohnlev }
340843e1988Sjohnlev
341843e1988Sjohnlev a.dev = dev;
342843e1988Sjohnlev a.prot = (uchar_t)prot;
343843e1988Sjohnlev a.maxprot = (uchar_t)maxprot;
344843e1988Sjohnlev error = as_map(as, *addrp, len, segmf_create, &a);
345843e1988Sjohnlev
346843e1988Sjohnlev rangeunlock:
347843e1988Sjohnlev as_rangeunlock(as);
348843e1988Sjohnlev return (error);
349843e1988Sjohnlev }
350843e1988Sjohnlev
351843e1988Sjohnlev static struct cb_ops privcmd_cb_ops = {
352843e1988Sjohnlev privcmd_open,
353843e1988Sjohnlev nulldev, /* close */
354843e1988Sjohnlev nodev, /* strategy */
355843e1988Sjohnlev nodev, /* print */
356843e1988Sjohnlev nodev, /* dump */
357843e1988Sjohnlev nodev, /* read */
358843e1988Sjohnlev nodev, /* write */
359843e1988Sjohnlev privcmd_ioctl,
360843e1988Sjohnlev nodev, /* devmap */
361843e1988Sjohnlev nodev, /* mmap */
362843e1988Sjohnlev privcmd_segmap,
363843e1988Sjohnlev nochpoll, /* poll */
364843e1988Sjohnlev ddi_prop_op,
365843e1988Sjohnlev NULL,
366843e1988Sjohnlev D_64BIT | D_NEW | D_MP
367843e1988Sjohnlev };
368843e1988Sjohnlev
369843e1988Sjohnlev static struct dev_ops privcmd_dv_ops = {
370843e1988Sjohnlev DEVO_REV,
371843e1988Sjohnlev 0,
372843e1988Sjohnlev privcmd_getinfo,
373843e1988Sjohnlev nulldev, /* identify */
374843e1988Sjohnlev nulldev, /* probe */
375843e1988Sjohnlev privcmd_attach,
376843e1988Sjohnlev privcmd_detach,
377843e1988Sjohnlev nodev, /* reset */
378843e1988Sjohnlev &privcmd_cb_ops,
37919397407SSherry Moore 0, /* struct bus_ops */
38019397407SSherry Moore NULL, /* power */
38119397407SSherry Moore ddi_quiesce_not_needed, /* quiesce */
382843e1988Sjohnlev };
383843e1988Sjohnlev
384843e1988Sjohnlev static struct modldrv modldrv = {
385843e1988Sjohnlev &mod_driverops,
386613b2871SRichard Bean "privcmd driver",
387843e1988Sjohnlev &privcmd_dv_ops
388843e1988Sjohnlev };
389843e1988Sjohnlev
390843e1988Sjohnlev static struct modlinkage modl = {
391843e1988Sjohnlev MODREV_1,
392843e1988Sjohnlev &modldrv
393843e1988Sjohnlev };
394843e1988Sjohnlev
395843e1988Sjohnlev int
_init(void)396843e1988Sjohnlev _init(void)
397843e1988Sjohnlev {
398843e1988Sjohnlev return (mod_install(&modl));
399843e1988Sjohnlev }
400843e1988Sjohnlev
401843e1988Sjohnlev int
_fini(void)402843e1988Sjohnlev _fini(void)
403843e1988Sjohnlev {
404843e1988Sjohnlev return (mod_remove(&modl));
405843e1988Sjohnlev }
406843e1988Sjohnlev
407843e1988Sjohnlev int
_info(struct modinfo * modinfo)408843e1988Sjohnlev _info(struct modinfo *modinfo)
409843e1988Sjohnlev {
410843e1988Sjohnlev return (mod_info(&modl, modinfo));
411843e1988Sjohnlev }
412