xref: /titanic_50/usr/src/uts/i86xpv/io/privcmd_hcall.c (revision 98e8d17584d08c481c8a827f2311c1e3e6aceabb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/xpv_user.h>
27 
28 #include <sys/types.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/open.h>
32 #include <sys/cred.h>
33 #include <sys/conf.h>
34 #include <sys/stat.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/vmsystm.h>
39 #include <sys/hypervisor.h>
40 #include <sys/xen_errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/sdt.h>
43 
44 #include <xen/sys/privcmd.h>
45 #include <sys/privcmd_impl.h>
46 
47 typedef struct import_export {
48 	void *			ie_uaddr;
49 	void *			ie_kaddr;
50 	size_t			ie_size;
51 	uint32_t		ie_flags;
52 } import_export_t;
53 
54 static import_export_t null_ie = {NULL, NULL, 0, 0};
55 
56 #define	IE_IMPORT	0x0001		/* Data needs to be copied in */
57 #define	IE_EXPORT	0x0002		/* Data needs to be copied out */
58 #define	IE_FREE		0x0004
59 #define	IE_IMPEXP	(IE_IMPORT | IE_EXPORT)
60 
61 static void *
62 uaddr_from_handle(void *field)
63 {
64 	struct { void *p; } *hdl = field;
65 	void *ptr;
66 
67 	/*LINTED: constant in conditional context*/
68 	get_xen_guest_handle(ptr, (*hdl));
69 	return (ptr);
70 }
71 
72 
73 /*
74  * Import a buffer from user-space.  If the caller provides a kernel
75  * address, we import to that address.  If not, we kmem_alloc() the space
76  * ourselves.
77  */
78 static int
79 import_buffer(import_export_t *iep, void *uaddr, void *kaddr, size_t size,
80     uint32_t flags)
81 {
82 	iep->ie_uaddr = uaddr;
83 	iep->ie_size = size;
84 	iep->ie_flags = flags & IE_EXPORT;
85 
86 	if (size == 0 || uaddr == NULL) {
87 		*iep = null_ie;
88 		return (0);
89 	}
90 
91 	if (kaddr == NULL) {
92 		iep->ie_kaddr = kmem_alloc(size, KM_SLEEP);
93 		iep->ie_flags |= IE_FREE;
94 	} else {
95 		iep->ie_kaddr = kaddr;
96 		iep->ie_flags &= ~IE_FREE;
97 	}
98 
99 	if ((flags & IE_IMPORT) &&
100 	    (ddi_copyin(uaddr, iep->ie_kaddr, size, 0) != 0)) {
101 		if (iep->ie_flags & IE_FREE) {
102 			kmem_free(iep->ie_kaddr, iep->ie_size);
103 			iep->ie_kaddr = NULL;
104 			iep->ie_flags = 0;
105 		}
106 		return (-X_EFAULT);
107 	}
108 
109 	return (0);
110 }
111 
112 static void
113 export_buffer(import_export_t *iep, int *error)
114 {
115 	int copy_err = 0;
116 
117 	if (iep->ie_size == 0 || iep->ie_uaddr == NULL)
118 		return;
119 
120 	/*
121 	 * If the buffer was marked for export initially, and if the
122 	 * hypercall completed successfully, resync the user-space buffer
123 	 * with our in-kernel buffer.
124 	 */
125 	if ((iep->ie_flags & IE_EXPORT) && (*error >= 0) &&
126 	    (ddi_copyout(iep->ie_kaddr, iep->ie_uaddr, iep->ie_size, 0) != 0))
127 		copy_err = -X_EFAULT;
128 	if (iep->ie_flags & IE_FREE) {
129 		kmem_free(iep->ie_kaddr, iep->ie_size);
130 		iep->ie_kaddr = NULL;
131 		iep->ie_flags = 0;
132 	}
133 
134 	if (copy_err != 0 && *error >= 0)
135 		*error = copy_err;
136 }
137 
138 /*
139  * Xen 'op' structures often include pointers disguised as 'handles', which
140  * refer to addresses in user space.  This routine copies a buffer
141  * associated with an embedded pointer into kernel space, and replaces the
142  * pointer to userspace with a pointer to the new kernel buffer.
143  *
144  * Note: if Xen ever redefines the structure of a 'handle', this routine
145  * (specifically the definition of 'hdl') will need to be updated.
146  */
147 static int
148 import_handle(import_export_t *iep, void *field, size_t size, int flags)
149 {
150 	struct { void *p; } *hdl = field;
151 	void *ptr;
152 	int err;
153 
154 	ptr = uaddr_from_handle(field);
155 	err = import_buffer(iep, ptr, NULL, size, (flags));
156 	if (err == 0) {
157 		/*LINTED: constant in conditional context*/
158 		set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr));
159 	}
160 	return (err);
161 }
162 
163 static int
164 privcmd_HYPERVISOR_mmu_update(mmu_update_t *ureq, int count, int *scount,
165     domid_t domid)
166 {
167 	mmu_update_t *kreq, single_kreq;
168 	import_export_t cnt_ie, req_ie;
169 	int error, kscount, bytes;
170 
171 	bytes = count * sizeof (*kreq);
172 	kreq = (count == 1) ? &single_kreq : kmem_alloc(bytes, KM_SLEEP);
173 
174 	error = import_buffer(&cnt_ie, scount, &kscount, sizeof (kscount),
175 	    IE_IMPEXP);
176 	if (error != 0)
177 		req_ie = null_ie;
178 	else
179 		error = import_buffer(&req_ie, ureq, kreq, bytes, IE_IMPEXP);
180 
181 	DTRACE_XPV3(mmu__update__start, int, domid, int, count, mmu_update_t *,
182 	    ((error == -X_EFAULT) ? ureq : kreq));
183 
184 	if (error == 0)
185 		error = HYPERVISOR_mmu_update(kreq, count, &kscount, domid);
186 	export_buffer(&cnt_ie, &error);
187 	export_buffer(&req_ie, &error);
188 	if (count != 1)
189 		kmem_free(kreq, bytes);
190 
191 	DTRACE_XPV1(mmu__update__end, int, error);
192 	return (error);
193 }
194 
195 static int
196 privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
197 {
198 	xen_domctl_t op;
199 	import_export_t op_ie, sub_ie;
200 	int error = 0;
201 
202 	if ((error = import_buffer(&op_ie, opp, &op, sizeof (op),
203 	    IE_IMPEXP)) != 0)
204 		return (error);
205 
206 	sub_ie = null_ie;
207 
208 	/*
209 	 * Check this first because our wrapper will forcibly overwrite it.
210 	 */
211 	if (op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) {
212 #ifdef DEBUG
213 		printf("domctl vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
214 		    op.cmd, op.interface_version, XEN_DOMCTL_INTERFACE_VERSION);
215 #endif
216 		error = -X_EACCES;
217 		export_buffer(&op_ie, &error);
218 		return (error);
219 	}
220 
221 	/*
222 	 * Now handle any domctl ops with embedded pointers elsewhere
223 	 * in the user address space that also need to be tacked down
224 	 * while the hypervisor futzes with them.
225 	 */
226 	switch (op.cmd) {
227 	case XEN_DOMCTL_createdomain:
228 		DTRACE_XPV1(dom__create__start, xen_domctl_t *,
229 		    &op.u.createdomain);
230 		break;
231 
232 	case XEN_DOMCTL_destroydomain:
233 		DTRACE_XPV1(dom__destroy__start, domid_t, op.domain);
234 		break;
235 
236 	case XEN_DOMCTL_pausedomain:
237 		DTRACE_XPV1(dom__pause__start, domid_t, op.domain);
238 		break;
239 
240 	case XEN_DOMCTL_unpausedomain:
241 		DTRACE_XPV1(dom__unpause__start, domid_t, op.domain);
242 		break;
243 
244 	case XEN_DOMCTL_getmemlist: {
245 		error = import_handle(&sub_ie, &op.u.getmemlist.buffer,
246 		    op.u.getmemlist.max_pfns * sizeof (xen_pfn_t), IE_EXPORT);
247 		break;
248 	}
249 
250 	case XEN_DOMCTL_getpageframeinfo2: {
251 		error = import_handle(&sub_ie, &op.u.getpageframeinfo2.array,
252 		    op.u.getpageframeinfo2.num * sizeof (ulong_t), IE_IMPEXP);
253 		break;
254 	}
255 
256 	case XEN_DOMCTL_shadow_op: {
257 		size_t size;
258 
259 		size = roundup(howmany(op.u.shadow_op.pages, NBBY),
260 		    sizeof (ulong_t));
261 		error = import_handle(&sub_ie,
262 		    &op.u.shadow_op.dirty_bitmap, size, IE_IMPEXP);
263 		break;
264 	}
265 
266 	case XEN_DOMCTL_setvcpucontext: {
267 		vcpu_guest_context_t *taddr;
268 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
269 		    sizeof (vcpu_guest_context_t), IE_IMPORT);
270 		if (error == -X_EFAULT)
271 			/*LINTED: constant in conditional context*/
272 			get_xen_guest_handle_u(taddr, op.u.vcpucontext.ctxt);
273 		else
274 			taddr = sub_ie.ie_kaddr;
275 		DTRACE_XPV2(setvcpucontext__start, domid_t, op.domain,
276 		    vcpu_guest_context_t *, taddr);
277 		break;
278 	}
279 
280 	case XEN_DOMCTL_getvcpucontext: {
281 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
282 		    sizeof (vcpu_guest_context_t), IE_EXPORT);
283 		break;
284 	}
285 
286 
287 	case XEN_DOMCTL_sethvmcontext: {
288 		error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
289 		    op.u.hvmcontext.size, IE_IMPORT);
290 		break;
291 	}
292 
293 	case XEN_DOMCTL_gethvmcontext: {
294 #if !defined(__GNUC__) && defined(__i386__)
295 		if (op.u.hvmcontext.buffer.u.p != NULL)
296 #else
297 		if (op.u.hvmcontext.buffer.p != NULL)
298 #endif
299 			error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
300 			    op.u.hvmcontext.size, IE_EXPORT);
301 		break;
302 	}
303 
304 	case XEN_DOMCTL_getdomaininfo:
305 	case XEN_DOMCTL_getpageframeinfo:
306 	case XEN_DOMCTL_max_mem:
307 	case XEN_DOMCTL_resumedomain:
308 	case XEN_DOMCTL_getvcpuinfo:
309 	case XEN_DOMCTL_setvcpuaffinity:
310 	case XEN_DOMCTL_getvcpuaffinity:
311 	case XEN_DOMCTL_max_vcpus:
312 	case XEN_DOMCTL_scheduler_op:
313 	case XEN_DOMCTL_setdomainhandle:
314 	case XEN_DOMCTL_setdebugging:
315 	case XEN_DOMCTL_irq_permission:
316 	case XEN_DOMCTL_iomem_permission:
317 	case XEN_DOMCTL_ioport_permission:
318 	case XEN_DOMCTL_hypercall_init:
319 	case XEN_DOMCTL_arch_setup:
320 	case XEN_DOMCTL_settimeoffset:
321 	case XEN_DOMCTL_real_mode_area:
322 	case XEN_DOMCTL_sendtrigger:
323 	case XEN_DOMCTL_assign_device:
324 	case XEN_DOMCTL_bind_pt_irq:
325 	case XEN_DOMCTL_get_address_size:
326 	case XEN_DOMCTL_set_address_size:
327 	case XEN_DOMCTL_get_ext_vcpucontext:
328 	case XEN_DOMCTL_set_ext_vcpucontext:
329 	case XEN_DOMCTL_set_opt_feature:
330 	case XEN_DOMCTL_memory_mapping:
331 	case XEN_DOMCTL_ioport_mapping:
332 	case XEN_DOMCTL_pin_mem_cacheattr:
333 	case XEN_DOMCTL_test_assign_device:
334 	case XEN_DOMCTL_set_target:
335 	case XEN_DOMCTL_deassign_device:
336 	case XEN_DOMCTL_set_cpuid:
337 	case XEN_DOMCTL_get_device_group:
338 	case XEN_DOMCTL_get_machine_address_size:
339 	case XEN_DOMCTL_set_machine_address_size:
340 	case XEN_DOMCTL_suppress_spurious_page_faults:
341 		break;
342 
343 	default:
344 #ifdef DEBUG
345 		printf("unrecognized HYPERVISOR_domctl %d\n", op.cmd);
346 #endif
347 		error = -X_EINVAL;
348 	}
349 
350 	if (error == 0)
351 		error = HYPERVISOR_domctl(&op);
352 
353 	export_buffer(&op_ie, &error);
354 	export_buffer(&sub_ie, &error);
355 
356 	switch (op.cmd) {
357 	case XEN_DOMCTL_createdomain:
358 		DTRACE_XPV1(dom__create__end, int, error);
359 		break;
360 	case XEN_DOMCTL_destroydomain:
361 		DTRACE_XPV1(dom__destroy__end, int, error);
362 		break;
363 	case XEN_DOMCTL_pausedomain:
364 		DTRACE_XPV1(dom__pause__end, int, error);
365 		break;
366 	case XEN_DOMCTL_unpausedomain:
367 		DTRACE_XPV1(dom__unpause__end, int, error);
368 		break;
369 	case XEN_DOMCTL_setvcpucontext:
370 		DTRACE_XPV1(setvcpucontext__end, int, error);
371 		break;
372 	default:
373 		;
374 	}
375 
376 	return (error);
377 }
378 
379 static int
380 privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
381 {
382 	xen_sysctl_t op, dop;
383 	import_export_t op_ie, sub_ie, sub2_ie;
384 	int error = 0;
385 
386 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
387 		return (-X_EFAULT);
388 
389 	sub_ie = null_ie;
390 	sub2_ie = null_ie;
391 
392 	/*
393 	 * Check this first because our wrapper will forcibly overwrite it.
394 	 */
395 	if (op.interface_version != XEN_SYSCTL_INTERFACE_VERSION) {
396 		error = -X_EACCES;
397 		export_buffer(&op_ie, &error);
398 		return (error);
399 	}
400 
401 	switch (op.cmd) {
402 	case XEN_SYSCTL_readconsole: {
403 		error = import_handle(&sub_ie, &op.u.readconsole.buffer,
404 		    op.u.readconsole.count, IE_EXPORT);
405 		break;
406 	}
407 
408 	case XEN_SYSCTL_debug_keys: {
409 		error = import_handle(&sub_ie, &op.u.debug_keys.keys,
410 		    op.u.debug_keys.nr_keys, IE_IMPORT);
411 		break;
412 	}
413 
414 	case XEN_SYSCTL_tbuf_op:
415 	case XEN_SYSCTL_physinfo: {
416 		if (uaddr_from_handle(&op.u.physinfo.cpu_to_node) != NULL &&
417 		    op.u.physinfo.max_cpu_id != 0) {
418 			error = import_handle(&sub_ie,
419 			    &op.u.physinfo.cpu_to_node,
420 			    op.u.physinfo.max_cpu_id * sizeof (uint32_t),
421 			    IE_EXPORT);
422 		}
423 		break;
424 	}
425 	case XEN_SYSCTL_sched_id:
426 	case XEN_SYSCTL_availheap:
427 	case XEN_SYSCTL_cpu_hotplug:
428 		break;
429 	case XEN_SYSCTL_get_pmstat: {
430 		unsigned int maxs;
431 
432 		switch (op.u.get_pmstat.type) {
433 		case PMSTAT_get_pxstat:
434 			/*
435 			 * This interface is broken. Xen always copies out
436 			 * all the state information, and the interface
437 			 * does not specify how much space the caller has
438 			 * reserved. So, the only thing to do is just mirror
439 			 * the hypervisor and libxc behavior, and use the
440 			 * maximum amount of data.
441 			 */
442 			dop.cmd = XEN_SYSCTL_get_pmstat;
443 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
444 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
445 			dop.u.get_pmstat.type = PMSTAT_get_max_px;
446 			error = HYPERVISOR_sysctl(&dop);
447 			if (error != 0)
448 				break;
449 
450 			maxs = dop.u.get_pmstat.u.getpx.total;
451 			if (maxs == 0) {
452 				error = -X_EINVAL;
453 				break;
454 			}
455 
456 			error = import_handle(&sub_ie,
457 			    &op.u.get_pmstat.u.getpx.trans_pt,
458 			    maxs * maxs * sizeof (uint64_t), IE_EXPORT);
459 			if (error != 0)
460 				break;
461 
462 			error = import_handle(&sub2_ie,
463 			    &op.u.get_pmstat.u.getpx.pt,
464 			    maxs * sizeof (pm_px_val_t), IE_EXPORT);
465 			break;
466 		case PMSTAT_get_cxstat:
467 			/* See above */
468 			dop.cmd = XEN_SYSCTL_get_pmstat;
469 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
470 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
471 			dop.u.get_pmstat.type = PMSTAT_get_max_cx;
472 			error = HYPERVISOR_sysctl(&dop);
473 			if (error != 0)
474 				break;
475 
476 			maxs = dop.u.get_pmstat.u.getcx.nr;
477 			if (maxs == 0) {
478 				error = -X_EINVAL;
479 				break;
480 			}
481 
482 			error = import_handle(&sub_ie,
483 			    &op.u.get_pmstat.u.getcx.triggers,
484 			    maxs * sizeof (uint64_t), IE_EXPORT);
485 			if (error != 0)
486 				break;
487 			error = import_handle(&sub2_ie,
488 			    &op.u.get_pmstat.u.getcx.residencies,
489 			    maxs * sizeof (uint64_t), IE_EXPORT);
490 			break;
491 
492 		case PMSTAT_get_max_px:
493 		case PMSTAT_reset_pxstat:
494 		case PMSTAT_get_max_cx:
495 		case PMSTAT_reset_cxstat:
496 			break;
497 		default:
498 			error = -X_EINVAL;
499 			break;
500 		}
501 		break;
502 	}
503 
504 	case XEN_SYSCTL_perfc_op: {
505 		xen_sysctl_perfc_desc_t *scdp;
506 		/*
507 		 * If 'desc' is NULL, then the caller is asking for
508 		 * the number of counters.  If 'desc' is non-NULL,
509 		 * then we need to know how many counters there are
510 		 * before wiring down the output buffer appropriately.
511 		 */
512 		/*LINTED: constant in conditional context*/
513 		get_xen_guest_handle_u(scdp, op.u.perfc_op.desc);
514 		if (scdp != NULL) {
515 			static int numcounters = -1;
516 			static int numvals = -1;
517 
518 			if (numcounters == -1) {
519 				dop.cmd = XEN_SYSCTL_perfc_op;
520 				dop.interface_version =
521 				    XEN_SYSCTL_INTERFACE_VERSION;
522 				dop.u.perfc_op.cmd = XEN_SYSCTL_PERFCOP_query;
523 				/*LINTED: constant in conditional context*/
524 				set_xen_guest_handle_u(dop.u.perfc_op.desc,
525 				    NULL);
526 				/*LINTED: constant in conditional context*/
527 				set_xen_guest_handle_u(dop.u.perfc_op.val,
528 				    NULL);
529 
530 				error = HYPERVISOR_sysctl(&dop);
531 				if (error != 0)
532 					break;
533 				numcounters = dop.u.perfc_op.nr_counters;
534 				numvals = dop.u.perfc_op.nr_vals;
535 			}
536 			ASSERT(numcounters != -1);
537 			ASSERT(numvals != -1);
538 			error = import_handle(&sub_ie, &op.u.perfc_op.desc,
539 			    (sizeof (xen_sysctl_perfc_desc_t) * numcounters),
540 			    IE_EXPORT);
541 			error = import_handle(&sub2_ie, &op.u.perfc_op.val,
542 			    (sizeof (xen_sysctl_perfc_val_t) * numvals),
543 			    IE_EXPORT);
544 		}
545 		break;
546 	}
547 
548 	case XEN_SYSCTL_getdomaininfolist: {
549 		error = import_handle(&sub_ie, &op.u.getdomaininfolist.buffer,
550 		    (op.u.getdomaininfolist.max_domains *
551 		    sizeof (xen_domctl_getdomaininfo_t)), IE_EXPORT);
552 		break;
553 	}
554 
555 	case XEN_SYSCTL_getcpuinfo:
556 		error = import_handle(&sub_ie, &op.u.getcpuinfo.info,
557 		    op.u.getcpuinfo.max_cpus *
558 		    sizeof (xen_sysctl_cpuinfo_t), IE_EXPORT);
559 		break;
560 	default:
561 #ifdef DEBUG
562 		printf("unrecognized HYPERVISOR_sysctl %d\n", op.cmd);
563 #endif
564 		error = -X_EINVAL;
565 	}
566 
567 	if (error == 0)
568 		error = HYPERVISOR_sysctl(&op);
569 
570 	export_buffer(&op_ie, &error);
571 	export_buffer(&sub_ie, &error);
572 	export_buffer(&sub2_ie, &error);
573 
574 	return (error);
575 }
576 
577 static int
578 privcmd_HYPERVISOR_platform_op(xen_platform_op_t *opp)
579 {
580 	import_export_t op_ie, sub_ie, sub2_ie;
581 	xen_platform_op_t op;
582 	int error;
583 
584 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
585 		return (-X_EFAULT);
586 
587 	sub_ie = null_ie;
588 	sub2_ie = null_ie;
589 
590 	/*
591 	 * Check this first because our wrapper will forcibly overwrite it.
592 	 */
593 	if (op.interface_version != XENPF_INTERFACE_VERSION) {
594 		error = -X_EACCES;
595 		export_buffer(&op_ie, &error);
596 		return (error);
597 	}
598 
599 	/*
600 	 * Now handle any platform ops with embedded pointers elsewhere
601 	 * in the user address space that also need to be tacked down
602 	 * while the hypervisor futzes with them.
603 	 */
604 	switch (op.cmd) {
605 	case XENPF_settime:
606 	case XENPF_add_memtype:
607 	case XENPF_del_memtype:
608 	case XENPF_read_memtype:
609 	case XENPF_platform_quirk:
610 	case XENPF_enter_acpi_sleep:
611 	case XENPF_change_freq:
612 	case XENPF_panic_init:
613 		break;
614 
615 	case XENPF_microcode_update:
616 		error = import_handle(&sub_ie, &op.u.microcode.data,
617 		    op.u.microcode.length, IE_IMPORT);
618 		break;
619 	case XENPF_getidletime:
620 		error = import_handle(&sub_ie, &op.u.getidletime.cpumap_bitmap,
621 		    op.u.getidletime.cpumap_nr_cpus, IE_IMPEXP);
622 		if (error != 0)
623 			break;
624 
625 		error = import_handle(&sub2_ie, &op.u.getidletime.idletime,
626 		    op.u.getidletime.cpumap_nr_cpus * sizeof (uint64_t),
627 		    IE_EXPORT);
628 		break;
629 
630 	case XENPF_set_processor_pminfo: {
631 		size_t s;
632 
633 		switch (op.u.set_pminfo.type) {
634 		case XEN_PM_PX:
635 			s = op.u.set_pminfo.u.perf.state_count *
636 			    sizeof (xen_processor_px_t);
637 			if (op.u.set_pminfo.u.perf.flags & XEN_PX_PSS) {
638 				error = import_handle(&sub_ie,
639 				    &op.u.set_pminfo.u.perf.states, s,
640 				    IE_IMPORT);
641 			}
642 			break;
643 		case XEN_PM_CX:
644 			s = op.u.set_pminfo.u.power.count *
645 			    sizeof (xen_processor_cx_t);
646 			error = import_handle(&sub_ie,
647 			    &op.u.set_pminfo.u.power.states, s, IE_IMPORT);
648 			break;
649 		case XEN_PM_TX:
650 			break;
651 		default:
652 			error = -X_EINVAL;
653 			break;
654 		}
655 		break;
656 	}
657 	case XENPF_firmware_info: {
658 		uint16_t len;
659 		void *uaddr;
660 
661 		switch (op.u.firmware_info.type) {
662 		case XEN_FW_DISK_INFO:
663 			/*
664 			 * Ugh.. another hokey interface. The first 16 bits
665 			 * of the buffer are also used as the (input) length.
666 			 */
667 			uaddr = uaddr_from_handle(
668 			    &op.u.firmware_info.u.disk_info.edd_params);
669 			error = ddi_copyin(uaddr, &len, sizeof (len), 0);
670 			if (error != 0)
671 				break;
672 			error = import_handle(&sub_ie,
673 			    &op.u.firmware_info.u.disk_info.edd_params, len,
674 			    IE_IMPEXP);
675 			break;
676 		case XEN_FW_VBEDDC_INFO:
677 			error = import_handle(&sub_ie,
678 			    &op.u.firmware_info.u.vbeddc_info.edid, 128,
679 			    IE_EXPORT);
680 			break;
681 		case XEN_FW_DISK_MBR_SIGNATURE:
682 		default:
683 			break;
684 		}
685 		break;
686 	}
687 	default:
688 		/* FIXME: see this with non-existed ID 38 ???? */
689 #ifdef DEBUG
690 		printf("unrecognized HYPERVISOR_platform_op %d pid %d\n",
691 		    op.cmd, curthread->t_procp->p_pid);
692 #endif
693 		return (-X_EINVAL);
694 	}
695 
696 	if (error == 0)
697 		error = HYPERVISOR_platform_op(&op);
698 
699 	export_buffer(&op_ie, &error);
700 	export_buffer(&sub_ie, &error);
701 	export_buffer(&sub2_ie, &error);
702 
703 	return (error);
704 }
705 
706 static int
707 privcmd_HYPERVISOR_memory_op(int cmd, void *arg)
708 {
709 	int error = 0;
710 	import_export_t op_ie, sub_ie, gpfn_ie, mfn_ie;
711 	union {
712 		domid_t domid;
713 		struct xen_memory_reservation resv;
714 		struct xen_machphys_mfn_list xmml;
715 		struct xen_add_to_physmap xatp;
716 		struct xen_translate_gpfn_list tgl;
717 		struct xen_memory_map mm;
718 		struct xen_foreign_memory_map fmm;
719 	} op_arg;
720 
721 	op_ie = sub_ie = gpfn_ie = mfn_ie = null_ie;
722 
723 	switch (cmd) {
724 	case XENMEM_increase_reservation:
725 	case XENMEM_decrease_reservation:
726 	case XENMEM_populate_physmap: {
727 		ulong_t *taddr;
728 
729 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.resv),
730 		    IE_IMPEXP) != 0)
731 			return (-X_EFAULT);
732 
733 		error = import_handle(&sub_ie, &op_arg.resv.extent_start,
734 		    (op_arg.resv.nr_extents * sizeof (ulong_t)), IE_IMPEXP);
735 
736 		if (error == -X_EFAULT)
737 			/*LINTED: constant in conditional context*/
738 			get_xen_guest_handle(taddr, op_arg.resv.extent_start);
739 		else
740 			taddr = sub_ie.ie_kaddr;
741 
742 		switch (cmd) {
743 		case XENMEM_increase_reservation:
744 			DTRACE_XPV4(increase__reservation__start,
745 			    domid_t, op_arg.resv.domid,
746 			    ulong_t, op_arg.resv.nr_extents,
747 			    uint_t, op_arg.resv.extent_order,
748 			    ulong_t *, taddr);
749 			break;
750 		case XENMEM_decrease_reservation:
751 			DTRACE_XPV4(decrease__reservation__start,
752 			    domid_t, op_arg.resv.domid,
753 			    ulong_t, op_arg.resv.nr_extents,
754 			    uint_t, op_arg.resv.extent_order,
755 			    ulong_t *, taddr);
756 			break;
757 		case XENMEM_populate_physmap:
758 			DTRACE_XPV3(populate__physmap__start,
759 			    domid_t, op_arg.resv.domid,
760 			    ulong_t, op_arg.resv.nr_extents,
761 			    ulong_t *, taddr);
762 			break;
763 		}
764 
765 		break;
766 	}
767 
768 	case XENMEM_maximum_ram_page:
769 		break;
770 
771 	case XENMEM_current_reservation:
772 	case XENMEM_maximum_reservation:
773 	case XENMEM_maximum_gpfn:
774 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid),
775 		    IE_IMPEXP) != 0)
776 			return (-X_EFAULT);
777 		break;
778 
779 	case XENMEM_machphys_mfn_list: {
780 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xmml),
781 		    IE_IMPEXP) != 0)
782 			return (-X_EFAULT);
783 
784 		error = import_handle(&sub_ie, &op_arg.xmml.extent_start,
785 		    (op_arg.xmml.max_extents * sizeof (ulong_t)), IE_IMPEXP);
786 		break;
787 	}
788 
789 	case XENMEM_add_to_physmap:
790 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xatp),
791 		    IE_IMPEXP) != 0)
792 			return (-X_EFAULT);
793 		DTRACE_XPV4(add__to__physmap__start, domid_t,
794 		    op_arg.xatp.domid, uint_t, op_arg.xatp.space, ulong_t,
795 		    op_arg.xatp.idx, ulong_t, op_arg.xatp.gpfn);
796 		break;
797 
798 	case XENMEM_translate_gpfn_list: {
799 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.tgl),
800 		    IE_IMPEXP) != 0)
801 			return (-X_EFAULT);
802 
803 		error = import_handle(&gpfn_ie, &op_arg.tgl.gpfn_list,
804 		    (op_arg.tgl.nr_gpfns * sizeof (long)), IE_IMPORT);
805 		if (error == 0)
806 			error = import_handle(&mfn_ie, &op_arg.tgl.mfn_list,
807 			    (op_arg.tgl.nr_gpfns * sizeof (long)), IE_EXPORT);
808 		break;
809 	}
810 
811 	case XENMEM_memory_map:
812 	case XENMEM_machine_memory_map: {
813 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.mm),
814 		    IE_EXPORT) != 0)
815 			return (-X_EFAULT);
816 
817 		/*
818 		 * XXPV: ugh. e820entry is packed, but not in the kernel, since
819 		 * we remove all attributes; seems like this is a nice way to
820 		 * break mysteriously.
821 		 */
822 		error = import_handle(&sub_ie, &op_arg.mm.buffer,
823 		    (op_arg.mm.nr_entries * 20), IE_IMPEXP);
824 		break;
825 	}
826 
827 	case XENMEM_set_memory_map: {
828 		struct xen_memory_map *taddr;
829 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.fmm),
830 		    IE_IMPORT) != 0)
831 			return (-X_EFAULT);
832 
833 		/*
834 		 * As above.
835 		 */
836 		error = import_handle(&sub_ie, &op_arg.fmm.map.buffer,
837 		    (op_arg.fmm.map.nr_entries * 20), IE_IMPEXP);
838 
839 		if (error == -X_EFAULT)
840 			/*LINTED: constant in conditional context*/
841 			get_xen_guest_handle(taddr, op_arg.fmm.map.buffer);
842 		else
843 			taddr = sub_ie.ie_kaddr;
844 		DTRACE_XPV3(set__memory__map__start, domid_t,
845 		    op_arg.fmm.domid, int, op_arg.fmm.map.nr_entries,
846 		    struct xen_memory_map *, taddr);
847 		break;
848 	}
849 
850 	default:
851 #ifdef DEBUG
852 		printf("unrecognized HYPERVISOR_memory_op %d\n", cmd);
853 #endif
854 		return (-X_EINVAL);
855 	}
856 
857 	if (error == 0)
858 		error = HYPERVISOR_memory_op(cmd,
859 		    (arg == NULL) ? NULL: &op_arg);
860 
861 	export_buffer(&op_ie, &error);
862 	export_buffer(&sub_ie, &error);
863 	export_buffer(&gpfn_ie, &error);
864 	export_buffer(&mfn_ie, &error);
865 
866 	switch (cmd) {
867 	case XENMEM_increase_reservation:
868 		DTRACE_XPV1(increase__reservation__end, int, error);
869 		break;
870 	case XENMEM_decrease_reservation:
871 		DTRACE_XPV1(decrease__reservation__end, int, error);
872 		break;
873 	case XENMEM_populate_physmap:
874 		DTRACE_XPV1(populate__physmap__end, int, error);
875 		break;
876 	case XENMEM_add_to_physmap:
877 		DTRACE_XPV1(add__to__physmap__end, int, error);
878 		break;
879 	case XENMEM_set_memory_map:
880 		DTRACE_XPV1(set__memory__map__end, int, error);
881 		break;
882 	}
883 	return (error);
884 }
885 
886 static int
887 privcmd_HYPERVISOR_event_channel_op(int cmd, void *arg)
888 {
889 	int error;
890 	size_t size;
891 	import_export_t op_ie;
892 	uint32_t flags;
893 
894 	switch (cmd) {
895 	case EVTCHNOP_alloc_unbound:
896 		size = sizeof (evtchn_alloc_unbound_t);
897 		flags = IE_IMPEXP;
898 		break;
899 	case EVTCHNOP_bind_interdomain:
900 		size = sizeof (evtchn_bind_interdomain_t);
901 		flags = IE_IMPEXP;
902 		break;
903 	case EVTCHNOP_bind_virq:
904 		size = sizeof (evtchn_bind_virq_t);
905 		flags = IE_IMPEXP;
906 		break;
907 	case EVTCHNOP_bind_pirq:
908 		size = sizeof (evtchn_bind_pirq_t);
909 		flags = IE_IMPEXP;
910 		break;
911 	case EVTCHNOP_bind_ipi:
912 		size = sizeof (evtchn_bind_ipi_t);
913 		flags = IE_IMPEXP;
914 		break;
915 	case EVTCHNOP_close:
916 		size = sizeof (evtchn_close_t);
917 		flags = IE_IMPORT;
918 		break;
919 	case EVTCHNOP_send:
920 		size = sizeof (evtchn_send_t);
921 		flags = IE_IMPORT;
922 		break;
923 	case EVTCHNOP_status:
924 		size = sizeof (evtchn_status_t);
925 		flags = IE_IMPEXP;
926 		break;
927 	case EVTCHNOP_bind_vcpu:
928 		size = sizeof (evtchn_bind_vcpu_t);
929 		flags = IE_IMPORT;
930 		break;
931 	case EVTCHNOP_unmask:
932 		size = sizeof (evtchn_unmask_t);
933 		flags = IE_IMPORT;
934 		break;
935 	case EVTCHNOP_reset:
936 		size = sizeof (evtchn_reset_t);
937 		flags = IE_IMPORT;
938 		break;
939 
940 	default:
941 #ifdef DEBUG
942 		printf("unrecognized HYPERVISOR_event_channel op %d\n", cmd);
943 #endif
944 		return (-X_EINVAL);
945 	}
946 
947 	error = import_buffer(&op_ie, arg, NULL, size, flags);
948 
949 	/*
950 	 * If there is sufficient demand, we can replace this void * with
951 	 * the proper op structure pointer.
952 	 */
953 	DTRACE_XPV2(evtchn__op__start, int, cmd, void *,
954 	    ((error == -X_EFAULT) ? arg : op_ie.ie_kaddr));
955 
956 	if (error == 0)
957 		error = HYPERVISOR_event_channel_op(cmd, op_ie.ie_kaddr);
958 	export_buffer(&op_ie, &error);
959 
960 	DTRACE_XPV1(evtchn__op__end, int, error);
961 
962 	return (error);
963 }
964 
965 static int
966 privcmd_HYPERVISOR_xen_version(int cmd, void *arg)
967 {
968 	int error;
969 	int size = 0;
970 	import_export_t op_ie;
971 	uint32_t flags = IE_EXPORT;
972 
973 	switch (cmd) {
974 	case XENVER_version:
975 		break;
976 	case XENVER_extraversion:
977 		size = sizeof (xen_extraversion_t);
978 		break;
979 	case XENVER_compile_info:
980 		size = sizeof (xen_compile_info_t);
981 		break;
982 	case XENVER_capabilities:
983 		size = sizeof (xen_capabilities_info_t);
984 		break;
985 	case XENVER_changeset:
986 		size = sizeof (xen_changeset_info_t);
987 		break;
988 	case XENVER_platform_parameters:
989 		size = sizeof (xen_platform_parameters_t);
990 		break;
991 	case XENVER_get_features:
992 		flags = IE_IMPEXP;
993 		size = sizeof (xen_feature_info_t);
994 		break;
995 	case XENVER_pagesize:
996 		break;
997 	case XENVER_guest_handle:
998 		size = sizeof (xen_domain_handle_t);
999 		break;
1000 
1001 	default:
1002 #ifdef DEBUG
1003 		printf("unrecognized HYPERVISOR_xen_version op %d\n", cmd);
1004 #endif
1005 		return (-X_EINVAL);
1006 	}
1007 
1008 	error = import_buffer(&op_ie, arg, NULL, size, flags);
1009 	if (error == 0)
1010 		error = HYPERVISOR_xen_version(cmd, op_ie.ie_kaddr);
1011 	export_buffer(&op_ie, &error);
1012 
1013 	return (error);
1014 }
1015 
1016 static int
1017 privcmd_HYPERVISOR_xsm_op(void *uacmctl)
1018 {
1019 	int error;
1020 	struct xen_acmctl *acmctl;
1021 	import_export_t op_ie;
1022 
1023 	error = import_buffer(&op_ie, uacmctl, NULL, sizeof (*acmctl),
1024 	    IE_IMPEXP);
1025 	if (error != 0)
1026 		return (error);
1027 
1028 	acmctl = op_ie.ie_kaddr;
1029 
1030 	if (acmctl->interface_version != ACM_INTERFACE_VERSION) {
1031 #ifdef DEBUG
1032 		printf("acm vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
1033 		    acmctl->cmd, acmctl->interface_version,
1034 		    ACM_INTERFACE_VERSION);
1035 #endif
1036 		error = -X_EACCES;
1037 		export_buffer(&op_ie, &error);
1038 		return (error);
1039 	}
1040 
1041 	/* FIXME: flask ops??? */
1042 
1043 	switch (acmctl->cmd) {
1044 	case ACMOP_setpolicy:
1045 	case ACMOP_getpolicy:
1046 	case ACMOP_dumpstats:
1047 	case ACMOP_getssid:
1048 	case ACMOP_getdecision:
1049 	case ACMOP_chgpolicy:
1050 	case ACMOP_relabeldoms:
1051 		/* flags = IE_IMPEXP; */
1052 		break;
1053 	default:
1054 #ifdef DEBUG
1055 		printf("unrecognized HYPERVISOR_xsm_op op %d\n", acmctl->cmd);
1056 #endif
1057 		return (-X_EINVAL);
1058 	}
1059 
1060 	if (error == 0)
1061 		error = HYPERVISOR_xsm_op(acmctl);
1062 	export_buffer(&op_ie, &error);
1063 
1064 	return (error);
1065 }
1066 
1067 static int
1068 privcmd_HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, uint_t *scount,
1069     domid_t domid)
1070 {
1071 	int error, bytes;
1072 	uint_t kscount;
1073 	struct mmuext_op *kop, single_kop;
1074 	import_export_t op_ie, scnt_ie;
1075 
1076 	op_ie = scnt_ie = null_ie;
1077 	error = 0;
1078 
1079 	if (count >= 1) {
1080 		bytes = count * sizeof (*kop);
1081 		kop = (count == 1) ? &single_kop : kmem_alloc(bytes, KM_SLEEP);
1082 		error = import_buffer(&op_ie, op, kop, bytes, IE_IMPORT);
1083 	}
1084 
1085 	DTRACE_XPV2(mmu__ext__op__start, int, count, struct mmuext_op *,
1086 	    ((error == -X_EFAULT) ? op : kop));
1087 
1088 	if (scount != NULL && error == 0)
1089 		error = import_buffer(&scnt_ie, scount, &kscount,
1090 		    sizeof (kscount), IE_EXPORT);
1091 
1092 	if (error == 0)
1093 		error = HYPERVISOR_mmuext_op(kop, count, &kscount, domid);
1094 	export_buffer(&op_ie, &error);
1095 	export_buffer(&scnt_ie, &error);
1096 
1097 	DTRACE_XPV1(mmu__ext__op__end, int, error);
1098 
1099 	if (count > 1)
1100 		kmem_free(kop, bytes);
1101 	return (error);
1102 }
1103 
1104 static int
1105 privcmd_HYPERVISOR_hvm_op(int cmd, void *arg)
1106 {
1107 	int error;
1108 	int size = 0;
1109 	import_export_t arg_ie;
1110 	uint32_t flags = IE_IMPORT;
1111 
1112 	switch (cmd) {
1113 	case HVMOP_set_param:
1114 	case HVMOP_get_param:
1115 		size = sizeof (struct xen_hvm_param);
1116 		flags = IE_IMPEXP;
1117 		break;
1118 	case HVMOP_set_pci_intx_level:
1119 		size = sizeof (struct xen_hvm_set_pci_intx_level);
1120 		break;
1121 	case HVMOP_set_isa_irq_level:
1122 		size = sizeof (struct xen_hvm_set_isa_irq_level);
1123 		break;
1124 	case HVMOP_set_pci_link_route:
1125 		size = sizeof (struct xen_hvm_set_pci_link_route);
1126 		break;
1127 	case HVMOP_track_dirty_vram:
1128 		size = sizeof (struct xen_hvm_track_dirty_vram);
1129 		break;
1130 	case HVMOP_modified_memory:
1131 		size = sizeof (struct xen_hvm_modified_memory);
1132 		break;
1133 	case HVMOP_set_mem_type:
1134 		size = sizeof (struct xen_hvm_set_mem_type);
1135 		break;
1136 
1137 	default:
1138 #ifdef DEBUG
1139 		printf("unrecognized HVM op 0x%x\n", cmd);
1140 #endif
1141 		return (-X_EINVAL);
1142 	}
1143 
1144 	error = import_buffer(&arg_ie, arg, NULL, size, flags);
1145 	if (error == 0)
1146 		error = HYPERVISOR_hvm_op(cmd, arg_ie.ie_kaddr);
1147 	export_buffer(&arg_ie, &error);
1148 
1149 	return (error);
1150 }
1151 
1152 static int
1153 privcmd_HYPERVISOR_sched_op(int cmd, void *arg)
1154 {
1155 	int error;
1156 	int size = 0;
1157 	import_export_t op_ie;
1158 	struct sched_remote_shutdown op;
1159 
1160 	switch (cmd) {
1161 	case SCHEDOP_remote_shutdown:
1162 		size = sizeof (struct sched_remote_shutdown);
1163 		break;
1164 	default:
1165 #ifdef DEBUG
1166 		printf("unrecognized sched op 0x%x\n", cmd);
1167 #endif
1168 		return (-X_EINVAL);
1169 	}
1170 
1171 	error = import_buffer(&op_ie, arg, &op, size, IE_IMPORT);
1172 	if (error == 0)
1173 		error = HYPERVISOR_sched_op(cmd, (arg == NULL) ? NULL : &op);
1174 	export_buffer(&op_ie, &error);
1175 
1176 	return (error);
1177 }
1178 
1179 int allow_all_hypercalls = 0;
1180 int privcmd_efault_debug = 0;
1181 
1182 /*ARGSUSED*/
1183 int
1184 do_privcmd_hypercall(void *uarg, int mode, cred_t *cr, int *rval)
1185 {
1186 	privcmd_hypercall_t __hc, *hc = &__hc;
1187 	int error;
1188 
1189 	if (ddi_copyin(uarg, hc, sizeof (*hc), mode))
1190 		return (EFAULT);
1191 
1192 	switch (hc->op) {
1193 	case __HYPERVISOR_mmu_update:
1194 		error = privcmd_HYPERVISOR_mmu_update(
1195 		    (mmu_update_t *)hc->arg[0], (int)hc->arg[1],
1196 		    (int *)hc->arg[2], (domid_t)hc->arg[3]);
1197 		break;
1198 	case __HYPERVISOR_domctl:
1199 		error = privcmd_HYPERVISOR_domctl(
1200 		    (xen_domctl_t *)hc->arg[0]);
1201 		break;
1202 	case __HYPERVISOR_sysctl:
1203 		error = privcmd_HYPERVISOR_sysctl(
1204 		    (xen_sysctl_t *)hc->arg[0]);
1205 		break;
1206 	case __HYPERVISOR_platform_op:
1207 		error = privcmd_HYPERVISOR_platform_op(
1208 		    (xen_platform_op_t *)hc->arg[0]);
1209 		break;
1210 	case __HYPERVISOR_memory_op:
1211 		error = privcmd_HYPERVISOR_memory_op(
1212 		    (int)hc->arg[0], (void *)hc->arg[1]);
1213 		break;
1214 	case __HYPERVISOR_event_channel_op:
1215 		error = privcmd_HYPERVISOR_event_channel_op(
1216 		    (int)hc->arg[0], (void *)hc->arg[1]);
1217 		break;
1218 	case __HYPERVISOR_xen_version:
1219 		error = privcmd_HYPERVISOR_xen_version(
1220 		    (int)hc->arg[0], (void *)hc->arg[1]);
1221 		break;
1222 	case __HYPERVISOR_mmuext_op:
1223 		error = privcmd_HYPERVISOR_mmuext_op(
1224 		    (struct mmuext_op *)hc->arg[0], (int)hc->arg[1],
1225 		    (uint_t *)hc->arg[2], (domid_t)hc->arg[3]);
1226 		break;
1227 	case __HYPERVISOR_xsm_op:
1228 		error = privcmd_HYPERVISOR_xsm_op((void *)hc->arg[0]);
1229 		break;
1230 	case __HYPERVISOR_hvm_op:
1231 		error = privcmd_HYPERVISOR_hvm_op(
1232 		    (int)hc->arg[0], (void *)hc->arg[1]);
1233 		break;
1234 	case __HYPERVISOR_sched_op:
1235 		error = privcmd_HYPERVISOR_sched_op(
1236 		    (int)hc->arg[0], (void *)hc->arg[1]);
1237 		break;
1238 	default:
1239 		if (allow_all_hypercalls)
1240 			error = __hypercall5(hc->op, hc->arg[0], hc->arg[1],
1241 			    hc->arg[2], hc->arg[3], hc->arg[4]);
1242 		else {
1243 #ifdef DEBUG
1244 			printf("unrecognized hypercall %ld\n", hc->op);
1245 #endif
1246 			error = -X_EPERM;
1247 		}
1248 		break;
1249 	}
1250 
1251 	if (error > 0) {
1252 		*rval = error;
1253 		error = 0;
1254 	} else if (error != 0)
1255 		error = xen_xlate_errcode(error);
1256 
1257 	return (error);
1258 }
1259