1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/xpv_user.h>
28
29 #include <sys/types.h>
30 #include <sys/file.h>
31 #include <sys/errno.h>
32 #include <sys/open.h>
33 #include <sys/cred.h>
34 #include <sys/conf.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/vmsystm.h>
40 #include <sys/sdt.h>
41 #include <sys/hypervisor.h>
42 #include <sys/xen_errno.h>
43 #include <sys/policy.h>
44
45 #include <vm/hat_i86.h>
46 #include <vm/hat_pte.h>
47 #include <vm/seg_mf.h>
48
49 #include <xen/sys/privcmd.h>
50 #include <sys/privcmd_impl.h>
51
52 static dev_info_t *privcmd_devi;
53
54 /*ARGSUSED*/
55 static int
privcmd_getinfo(dev_info_t * devi,ddi_info_cmd_t cmd,void * arg,void ** result)56 privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
57 {
58 switch (cmd) {
59 case DDI_INFO_DEVT2DEVINFO:
60 case DDI_INFO_DEVT2INSTANCE:
61 break;
62 default:
63 return (DDI_FAILURE);
64 }
65
66 switch (getminor((dev_t)arg)) {
67 case PRIVCMD_MINOR:
68 break;
69 default:
70 return (DDI_FAILURE);
71 }
72
73 if (cmd == DDI_INFO_DEVT2INSTANCE)
74 *result = 0;
75 else
76 *result = privcmd_devi;
77 return (DDI_SUCCESS);
78 }
79
80 static int
privcmd_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)81 privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
82 {
83 if (cmd != DDI_ATTACH)
84 return (DDI_FAILURE);
85
86 if (ddi_create_minor_node(devi, PRIVCMD_NODE,
87 S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
88 return (DDI_FAILURE);
89
90 privcmd_devi = devi;
91 ddi_report_dev(devi);
92 return (DDI_SUCCESS);
93 }
94
95 static int
privcmd_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)96 privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
97 {
98 if (cmd != DDI_DETACH)
99 return (DDI_FAILURE);
100 ddi_remove_minor_node(devi, NULL);
101 privcmd_devi = NULL;
102 return (DDI_SUCCESS);
103 }
104
105 /*ARGSUSED1*/
106 static int
privcmd_open(dev_t * dev,int flag,int otyp,cred_t * cr)107 privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
108 {
109 return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
110 }
111
112 /*
113 * Map a contiguous set of machine frames in a foreign domain.
114 * Used in the following way:
115 *
116 * privcmd_mmap_t p;
117 * privcmd_mmap_entry_t e;
118 *
119 * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
120 * p.num = number of privcmd_mmap_entry_t's
121 * p.dom = domid;
122 * p.entry = &e;
123 * e.va = addr;
124 * e.mfn = mfn;
125 * e.npages = btopr(size);
126 * ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
127 */
128 /*ARGSUSED2*/
129 int
do_privcmd_mmap(void * uarg,int mode,cred_t * cr)130 do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
131 {
132 privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
133 privcmd_mmap_entry_t *umme;
134 struct as *as = curproc->p_as;
135 struct seg *seg;
136 int i, error = 0;
137
138 if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
139 return (EFAULT);
140
141 DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
142 privcmd_mmap_entry_t *, mmc->entry);
143
144 if (mmc->dom == DOMID_SELF) {
145 error = ENOTSUP; /* Too paranoid? */
146 goto done;
147 }
148
149 for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
150 privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
151 caddr_t addr;
152
153 if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
154 error = EFAULT;
155 break;
156 }
157
158 DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
159 ulong_t, mme->npages);
160
161 if (mme->mfn == MFN_INVALID) {
162 error = EINVAL;
163 break;
164 }
165
166 addr = (caddr_t)mme->va;
167
168 /*
169 * Find the segment we want to mess with, then add
170 * the mfn range to the segment.
171 */
172 AS_LOCK_ENTER(as, RW_READER);
173 if ((seg = as_findseg(as, addr, 0)) == NULL ||
174 addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
175 error = EINVAL;
176 else
177 error = segmf_add_mfns(seg, addr,
178 mme->mfn, mme->npages, mmc->dom);
179 AS_LOCK_EXIT(as);
180
181 if (error != 0)
182 break;
183 }
184
185 done:
186 DTRACE_XPV1(mmap__end, int, error);
187
188 return (error);
189 }
190
191 /*
192 * Set up the address range to map to an array of mfns in
193 * a foreign domain. Used in the following way:
194 *
195 * privcmd_mmap_batch_t p;
196 *
197 * addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
198 * p.num = number of pages
199 * p.dom = domid
200 * p.addr = addr;
201 * p.arr = array of mfns, indexed 0 .. p.num - 1
202 * ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
203 */
204 /*ARGSUSED2*/
205 static int
do_privcmd_mmapbatch(void * uarg,int mode,cred_t * cr)206 do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
207 {
208 privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
209 struct as *as = curproc->p_as;
210 struct seg *seg;
211 int i, error = 0;
212 caddr_t addr;
213 ulong_t *ulp;
214
215 if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
216 return (EFAULT);
217
218 DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
219 caddr_t, mmb->addr);
220
221 addr = (caddr_t)mmb->addr;
222 AS_LOCK_ENTER(as, RW_READER);
223 if ((seg = as_findseg(as, addr, 0)) == NULL ||
224 addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
225 error = EINVAL;
226 goto done;
227 }
228
229 for (i = 0, ulp = mmb->arr;
230 i < mmb->num; i++, addr += PAGESIZE, ulp++) {
231 mfn_t mfn;
232
233 if (fulword(ulp, &mfn) != 0) {
234 error = EFAULT;
235 break;
236 }
237
238 if (mfn == MFN_INVALID) {
239 /*
240 * This mfn is invalid and should not be added to
241 * segmf, as we'd only cause an immediate EFAULT when
242 * we tried to fault it in.
243 */
244 mfn |= XEN_DOMCTL_PFINFO_XTAB;
245 continue;
246 }
247
248 if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
249 continue;
250
251 /*
252 * Tell the process that this MFN could not be mapped, so it
253 * won't later try to access it.
254 */
255 mfn |= XEN_DOMCTL_PFINFO_XTAB;
256 if (sulword(ulp, mfn) != 0) {
257 error = EFAULT;
258 break;
259 }
260 }
261
262 done:
263 AS_LOCK_EXIT(as);
264
265 DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
266 mmb->addr);
267
268 return (error);
269 }
270
271 /*ARGSUSED*/
272 static int
privcmd_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cr,int * rval)273 privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
274 {
275 if (secpolicy_xvm_control(cr))
276 return (EPERM);
277
278 /*
279 * Everything is a -native- data type.
280 */
281 if ((mode & FMODELS) != FNATIVE)
282 return (EOVERFLOW);
283
284 switch (cmd) {
285 case IOCTL_PRIVCMD_HYPERCALL:
286 return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
287 case IOCTL_PRIVCMD_MMAP:
288 if (DOMAIN_IS_PRIVILEGED(xen_info))
289 return (do_privcmd_mmap((void *)arg, mode, cr));
290 break;
291 case IOCTL_PRIVCMD_MMAPBATCH:
292 if (DOMAIN_IS_PRIVILEGED(xen_info))
293 return (do_privcmd_mmapbatch((void *)arg, mode, cr));
294 break;
295 default:
296 break;
297 }
298 return (EINVAL);
299 }
300
301 /*
302 * The real magic happens in the segmf segment driver.
303 */
304 /*ARGSUSED8*/
305 static int
privcmd_segmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cr)306 privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
307 off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
308 {
309 struct segmf_crargs a;
310 int error;
311
312 if (secpolicy_xvm_control(cr))
313 return (EPERM);
314
315 as_rangelock(as);
316 if ((flags & MAP_FIXED) == 0) {
317 map_addr(addrp, len, (offset_t)off, 0, flags);
318 if (*addrp == NULL) {
319 error = ENOMEM;
320 goto rangeunlock;
321 }
322 } else {
323 /*
324 * User specified address
325 */
326 (void) as_unmap(as, *addrp, len);
327 }
328
329 /*
330 * The mapping *must* be MAP_SHARED at offset 0.
331 *
332 * (Foreign pages are treated like device memory; the
333 * ioctl interface allows the backing objects to be
334 * arbitrarily redefined to point at any machine frame.)
335 */
336 if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
337 error = EINVAL;
338 goto rangeunlock;
339 }
340
341 a.dev = dev;
342 a.prot = (uchar_t)prot;
343 a.maxprot = (uchar_t)maxprot;
344 error = as_map(as, *addrp, len, segmf_create, &a);
345
346 rangeunlock:
347 as_rangeunlock(as);
348 return (error);
349 }
350
351 static struct cb_ops privcmd_cb_ops = {
352 privcmd_open,
353 nulldev, /* close */
354 nodev, /* strategy */
355 nodev, /* print */
356 nodev, /* dump */
357 nodev, /* read */
358 nodev, /* write */
359 privcmd_ioctl,
360 nodev, /* devmap */
361 nodev, /* mmap */
362 privcmd_segmap,
363 nochpoll, /* poll */
364 ddi_prop_op,
365 NULL,
366 D_64BIT | D_NEW | D_MP
367 };
368
369 static struct dev_ops privcmd_dv_ops = {
370 DEVO_REV,
371 0,
372 privcmd_getinfo,
373 nulldev, /* identify */
374 nulldev, /* probe */
375 privcmd_attach,
376 privcmd_detach,
377 nodev, /* reset */
378 &privcmd_cb_ops,
379 0, /* struct bus_ops */
380 NULL, /* power */
381 ddi_quiesce_not_needed, /* quiesce */
382 };
383
384 static struct modldrv modldrv = {
385 &mod_driverops,
386 "privcmd driver",
387 &privcmd_dv_ops
388 };
389
390 static struct modlinkage modl = {
391 MODREV_1,
392 &modldrv
393 };
394
395 int
_init(void)396 _init(void)
397 {
398 return (mod_install(&modl));
399 }
400
401 int
_fini(void)402 _fini(void)
403 {
404 return (mod_remove(&modl));
405 }
406
407 int
_info(struct modinfo * modinfo)408 _info(struct modinfo *modinfo)
409 {
410 return (mod_info(&modl, modinfo));
411 }
412