xref: /titanic_41/usr/src/uts/i86xpv/io/privcmd_hcall.c (revision 07d06da50d310a325b457d6330165aebab1e0064)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/xpv_user.h>
27 
28 #include <sys/types.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/open.h>
32 #include <sys/cred.h>
33 #include <sys/conf.h>
34 #include <sys/stat.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/vmsystm.h>
39 #include <sys/hypervisor.h>
40 #include <sys/xen_errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/sdt.h>
43 
44 #include <xen/sys/privcmd.h>
45 #include <sys/privcmd_impl.h>
46 
47 typedef struct import_export {
48 	void *			ie_uaddr;
49 	void *			ie_kaddr;
50 	size_t			ie_size;
51 	uint32_t		ie_flags;
52 } import_export_t;
53 
54 static import_export_t null_ie = {NULL, NULL, 0, 0};
55 
56 #define	IE_IMPORT	0x0001		/* Data needs to be copied in */
57 #define	IE_EXPORT	0x0002		/* Data needs to be copied out */
58 #define	IE_FREE		0x0004
59 #define	IE_IMPEXP	(IE_IMPORT | IE_EXPORT)
60 
61 static void *
62 uaddr_from_handle(void *field)
63 {
64 	struct { void *p; } *hdl = field;
65 	void *ptr;
66 
67 	/*LINTED: constant in conditional context*/
68 	get_xen_guest_handle(ptr, (*hdl));
69 	return (ptr);
70 }
71 
72 
73 /*
74  * Import a buffer from user-space.  If the caller provides a kernel
75  * address, we import to that address.  If not, we kmem_alloc() the space
76  * ourselves.
77  */
78 static int
79 import_buffer(import_export_t *iep, void *uaddr, void *kaddr, size_t size,
80     uint32_t flags)
81 {
82 	iep->ie_uaddr = uaddr;
83 	iep->ie_size = size;
84 	iep->ie_flags = flags & IE_EXPORT;
85 
86 	if (size == 0 || uaddr == NULL) {
87 		*iep = null_ie;
88 		return (0);
89 	}
90 
91 	if (kaddr == NULL) {
92 		iep->ie_kaddr = kmem_alloc(size, KM_SLEEP);
93 		iep->ie_flags |= IE_FREE;
94 	} else {
95 		iep->ie_kaddr = kaddr;
96 		iep->ie_flags &= ~IE_FREE;
97 	}
98 
99 	if ((flags & IE_IMPORT) &&
100 	    (ddi_copyin(uaddr, iep->ie_kaddr, size, 0) != 0)) {
101 		if (iep->ie_flags & IE_FREE) {
102 			kmem_free(iep->ie_kaddr, iep->ie_size);
103 			iep->ie_kaddr = NULL;
104 			iep->ie_flags = 0;
105 		}
106 		return (-X_EFAULT);
107 	}
108 
109 	return (0);
110 }
111 
112 static void
113 export_buffer(import_export_t *iep, int *error)
114 {
115 	int copy_err = 0;
116 
117 	if (iep->ie_size == 0 || iep->ie_uaddr == NULL)
118 		return;
119 
120 	/*
121 	 * If the buffer was marked for export initially, and if the
122 	 * hypercall completed successfully, resync the user-space buffer
123 	 * with our in-kernel buffer.
124 	 */
125 	if ((iep->ie_flags & IE_EXPORT) && (*error >= 0) &&
126 	    (ddi_copyout(iep->ie_kaddr, iep->ie_uaddr, iep->ie_size, 0) != 0))
127 		copy_err = -X_EFAULT;
128 	if (iep->ie_flags & IE_FREE) {
129 		kmem_free(iep->ie_kaddr, iep->ie_size);
130 		iep->ie_kaddr = NULL;
131 		iep->ie_flags = 0;
132 	}
133 
134 	if (copy_err != 0 && *error >= 0)
135 		*error = copy_err;
136 }
137 
138 /*
139  * Xen 'op' structures often include pointers disguised as 'handles', which
140  * refer to addresses in user space.  This routine copies a buffer
141  * associated with an embedded pointer into kernel space, and replaces the
142  * pointer to userspace with a pointer to the new kernel buffer.
143  *
144  * Note: if Xen ever redefines the structure of a 'handle', this routine
145  * (specifically the definition of 'hdl') will need to be updated.
146  */
147 static int
148 import_handle(import_export_t *iep, void *field, size_t size, int flags)
149 {
150 	struct { void *p; } *hdl = field;
151 	void *ptr;
152 	int err;
153 
154 	ptr = uaddr_from_handle(field);
155 	err = import_buffer(iep, ptr, NULL, size, (flags));
156 	if (err == 0) {
157 		/*LINTED: constant in conditional context*/
158 		set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr));
159 	}
160 	return (err);
161 }
162 
163 static int
164 privcmd_HYPERVISOR_mmu_update(mmu_update_t *ureq, int count, int *scount,
165     domid_t domid)
166 {
167 	mmu_update_t *kreq, single_kreq;
168 	import_export_t cnt_ie, req_ie;
169 	int error, kscount, bytes;
170 
171 	bytes = count * sizeof (*kreq);
172 	kreq = (count == 1) ? &single_kreq : kmem_alloc(bytes, KM_SLEEP);
173 
174 	error = import_buffer(&cnt_ie, scount, &kscount, sizeof (kscount),
175 	    IE_IMPEXP);
176 	if (error != 0)
177 		req_ie = null_ie;
178 	else
179 		error = import_buffer(&req_ie, ureq, kreq, bytes, IE_IMPEXP);
180 
181 	DTRACE_XPV3(mmu__update__start, int, domid, int, count, mmu_update_t *,
182 	    ((error == -X_EFAULT) ? ureq : kreq));
183 
184 	if (error == 0)
185 		error = HYPERVISOR_mmu_update(kreq, count, &kscount, domid);
186 	export_buffer(&cnt_ie, &error);
187 	export_buffer(&req_ie, &error);
188 	if (count != 1)
189 		kmem_free(kreq, bytes);
190 
191 	DTRACE_XPV1(mmu__update__end, int, error);
192 	return (error);
193 }
194 
195 static int
196 privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
197 {
198 	xen_domctl_t op;
199 	import_export_t op_ie, sub_ie;
200 	int error = 0;
201 
202 	if ((error = import_buffer(&op_ie, opp, &op, sizeof (op),
203 	    IE_IMPEXP)) != 0)
204 		return (error);
205 
206 	sub_ie = null_ie;
207 
208 	/*
209 	 * Check this first because our wrapper will forcibly overwrite it.
210 	 */
211 	if (op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) {
212 #ifdef DEBUG
213 		printf("domctl vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
214 		    op.cmd, op.interface_version, XEN_DOMCTL_INTERFACE_VERSION);
215 #endif
216 		error = -X_EACCES;
217 		export_buffer(&op_ie, &error);
218 		return (error);
219 	}
220 
221 	/*
222 	 * Now handle any domctl ops with embedded pointers elsewhere
223 	 * in the user address space that also need to be tacked down
224 	 * while the hypervisor futzes with them.
225 	 */
226 	switch (op.cmd) {
227 	case XEN_DOMCTL_createdomain:
228 		DTRACE_XPV1(dom__create__start, xen_domctl_t *,
229 		    &op.u.createdomain);
230 		break;
231 
232 	case XEN_DOMCTL_destroydomain:
233 		DTRACE_XPV1(dom__destroy__start, domid_t, op.domain);
234 		break;
235 
236 	case XEN_DOMCTL_pausedomain:
237 		DTRACE_XPV1(dom__pause__start, domid_t, op.domain);
238 		break;
239 
240 	case XEN_DOMCTL_unpausedomain:
241 		DTRACE_XPV1(dom__unpause__start, domid_t, op.domain);
242 		break;
243 
244 	case XEN_DOMCTL_getmemlist: {
245 		error = import_handle(&sub_ie, &op.u.getmemlist.buffer,
246 		    op.u.getmemlist.max_pfns * sizeof (xen_pfn_t), IE_EXPORT);
247 		break;
248 	}
249 
250 	case XEN_DOMCTL_getpageframeinfo2: {
251 		error = import_handle(&sub_ie, &op.u.getpageframeinfo2.array,
252 		    op.u.getpageframeinfo2.num * sizeof (ulong_t), IE_IMPEXP);
253 		break;
254 	}
255 
256 	case XEN_DOMCTL_shadow_op: {
257 		size_t size;
258 
259 		size = roundup(howmany(op.u.shadow_op.pages, NBBY),
260 		    sizeof (ulong_t));
261 		error = import_handle(&sub_ie,
262 		    &op.u.shadow_op.dirty_bitmap, size, IE_IMPEXP);
263 		break;
264 	}
265 
266 	case XEN_DOMCTL_setvcpucontext: {
267 		vcpu_guest_context_t *taddr;
268 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
269 		    sizeof (vcpu_guest_context_t), IE_IMPORT);
270 		if (error == -X_EFAULT)
271 			/*LINTED: constant in conditional context*/
272 			get_xen_guest_handle_u(taddr, op.u.vcpucontext.ctxt);
273 		else
274 			taddr = sub_ie.ie_kaddr;
275 		DTRACE_XPV2(setvcpucontext__start, domid_t, op.domain,
276 		    vcpu_guest_context_t *, taddr);
277 		break;
278 	}
279 
280 	case XEN_DOMCTL_getvcpucontext: {
281 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
282 		    sizeof (vcpu_guest_context_t), IE_EXPORT);
283 		break;
284 	}
285 
286 
287 	case XEN_DOMCTL_sethvmcontext: {
288 		error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
289 		    op.u.hvmcontext.size, IE_IMPORT);
290 		break;
291 	}
292 
293 	case XEN_DOMCTL_gethvmcontext: {
294 #if !defined(__GNUC__) && defined(__i386__)
295 		if (op.u.hvmcontext.buffer.u.p != NULL)
296 #else
297 		if (op.u.hvmcontext.buffer.p != NULL)
298 #endif
299 			error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
300 			    op.u.hvmcontext.size, IE_EXPORT);
301 		break;
302 	}
303 
304 	case XEN_DOMCTL_getdomaininfo:
305 	case XEN_DOMCTL_getpageframeinfo:
306 	case XEN_DOMCTL_max_mem:
307 	case XEN_DOMCTL_resumedomain:
308 	case XEN_DOMCTL_getvcpuinfo:
309 	case XEN_DOMCTL_setvcpuaffinity:
310 	case XEN_DOMCTL_getvcpuaffinity:
311 	case XEN_DOMCTL_max_vcpus:
312 	case XEN_DOMCTL_scheduler_op:
313 	case XEN_DOMCTL_setdomainhandle:
314 	case XEN_DOMCTL_setdebugging:
315 	case XEN_DOMCTL_irq_permission:
316 	case XEN_DOMCTL_iomem_permission:
317 	case XEN_DOMCTL_ioport_permission:
318 	case XEN_DOMCTL_hypercall_init:
319 	case XEN_DOMCTL_arch_setup:
320 	case XEN_DOMCTL_settimeoffset:
321 	case XEN_DOMCTL_real_mode_area:
322 	case XEN_DOMCTL_sendtrigger:
323 	case XEN_DOMCTL_assign_device:
324 	case XEN_DOMCTL_bind_pt_irq:
325 	case XEN_DOMCTL_get_address_size:
326 	case XEN_DOMCTL_set_address_size:
327 	case XEN_DOMCTL_get_ext_vcpucontext:
328 	case XEN_DOMCTL_set_ext_vcpucontext:
329 	case XEN_DOMCTL_set_opt_feature:
330 	case XEN_DOMCTL_memory_mapping:
331 	case XEN_DOMCTL_ioport_mapping:
332 	case XEN_DOMCTL_pin_mem_cacheattr:
333 	case XEN_DOMCTL_test_assign_device:
334 	case XEN_DOMCTL_set_target:
335 	case XEN_DOMCTL_deassign_device:
336 	case XEN_DOMCTL_set_cpuid:
337 	case XEN_DOMCTL_get_device_group:
338 	case XEN_DOMCTL_get_machine_address_size:
339 	case XEN_DOMCTL_set_machine_address_size:
340 	case XEN_DOMCTL_suppress_spurious_page_faults:
341 		break;
342 
343 	default:
344 #ifdef DEBUG
345 		printf("unrecognized HYPERVISOR_domctl %d\n", op.cmd);
346 #endif
347 		error = -X_EINVAL;
348 	}
349 
350 	if (error == 0)
351 		error = HYPERVISOR_domctl(&op);
352 
353 	export_buffer(&op_ie, &error);
354 	export_buffer(&sub_ie, &error);
355 
356 	switch (op.cmd) {
357 	case XEN_DOMCTL_createdomain:
358 		DTRACE_XPV1(dom__create__end, int, error);
359 		break;
360 	case XEN_DOMCTL_destroydomain:
361 		DTRACE_XPV1(dom__destroy__end, int, error);
362 		break;
363 	case XEN_DOMCTL_pausedomain:
364 		DTRACE_XPV1(dom__pause__end, int, error);
365 		break;
366 	case XEN_DOMCTL_unpausedomain:
367 		DTRACE_XPV1(dom__unpause__end, int, error);
368 		break;
369 	case XEN_DOMCTL_setvcpucontext:
370 		DTRACE_XPV1(setvcpucontext__end, int, error);
371 		break;
372 	default:
373 		;
374 	}
375 
376 	return (error);
377 }
378 
379 static int
380 privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
381 {
382 	xen_sysctl_t op, dop;
383 	import_export_t op_ie, sub_ie, sub2_ie;
384 	int error = 0;
385 
386 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
387 		return (-X_EFAULT);
388 
389 	sub_ie = null_ie;
390 	sub2_ie = null_ie;
391 
392 	/*
393 	 * Check this first because our wrapper will forcibly overwrite it.
394 	 */
395 	if (op.interface_version != XEN_SYSCTL_INTERFACE_VERSION) {
396 		error = -X_EACCES;
397 		export_buffer(&op_ie, &error);
398 		return (error);
399 	}
400 
401 	switch (op.cmd) {
402 	case XEN_SYSCTL_readconsole: {
403 		error = import_handle(&sub_ie, &op.u.readconsole.buffer,
404 		    op.u.readconsole.count, IE_EXPORT);
405 		break;
406 	}
407 
408 	case XEN_SYSCTL_debug_keys: {
409 		error = import_handle(&sub_ie, &op.u.debug_keys.keys,
410 		    op.u.debug_keys.nr_keys, IE_IMPORT);
411 		break;
412 	}
413 
414 	case XEN_SYSCTL_tbuf_op:
415 	case XEN_SYSCTL_physinfo: {
416 		if (uaddr_from_handle(&op.u.physinfo.cpu_to_node) != NULL &&
417 		    op.u.physinfo.max_cpu_id != 0) {
418 			error = import_handle(&sub_ie,
419 			    &op.u.physinfo.cpu_to_node,
420 			    op.u.physinfo.max_cpu_id * sizeof (uint32_t),
421 			    IE_EXPORT);
422 		}
423 		break;
424 	}
425 	case XEN_SYSCTL_sched_id:
426 	case XEN_SYSCTL_availheap:
427 	case XEN_SYSCTL_cpu_hotplug:
428 		break;
429 	case XEN_SYSCTL_get_pmstat: {
430 		unsigned int maxs;
431 
432 		switch (op.u.get_pmstat.type) {
433 		case PMSTAT_get_pxstat:
434 			/*
435 			 * This interface is broken. Xen always copies out
436 			 * all the state information, and the interface
437 			 * does not specify how much space the caller has
438 			 * reserved. So, the only thing to do is just mirror
439 			 * the hypervisor and libxc behavior, and use the
440 			 * maximum amount of data.
441 			 */
442 			dop.cmd = XEN_SYSCTL_get_pmstat;
443 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
444 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
445 			dop.u.get_pmstat.type = PMSTAT_get_max_px;
446 			error = HYPERVISOR_sysctl(&dop);
447 			if (error != 0)
448 				break;
449 
450 			maxs = dop.u.get_pmstat.u.getpx.total;
451 			if (maxs == 0) {
452 				error = -X_EINVAL;
453 				break;
454 			}
455 
456 			error = import_handle(&sub_ie,
457 			    &op.u.get_pmstat.u.getpx.trans_pt,
458 			    maxs * maxs * sizeof (uint64_t), IE_EXPORT);
459 			if (error != 0)
460 				break;
461 
462 			error = import_handle(&sub2_ie,
463 			    &op.u.get_pmstat.u.getpx.pt,
464 			    maxs * sizeof (pm_px_val_t), IE_EXPORT);
465 			break;
466 		case PMSTAT_get_cxstat:
467 			/* See above */
468 			dop.cmd = XEN_SYSCTL_get_pmstat;
469 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
470 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
471 			dop.u.get_pmstat.type = PMSTAT_get_max_cx;
472 			error = HYPERVISOR_sysctl(&dop);
473 			if (error != 0)
474 				break;
475 
476 			maxs = dop.u.get_pmstat.u.getcx.nr;
477 			if (maxs == 0) {
478 				error = -X_EINVAL;
479 				break;
480 			}
481 
482 			error = import_handle(&sub_ie,
483 			    &op.u.get_pmstat.u.getcx.triggers,
484 			    maxs * sizeof (uint64_t), IE_EXPORT);
485 			if (error != 0)
486 				break;
487 			error = import_handle(&sub2_ie,
488 			    &op.u.get_pmstat.u.getcx.residencies,
489 			    maxs * sizeof (uint64_t), IE_EXPORT);
490 			break;
491 
492 		case PMSTAT_get_max_px:
493 		case PMSTAT_reset_pxstat:
494 		case PMSTAT_get_max_cx:
495 		case PMSTAT_reset_cxstat:
496 			break;
497 		default:
498 			error = -X_EINVAL;
499 			break;
500 		}
501 		break;
502 	}
503 
504 	case XEN_SYSCTL_perfc_op: {
505 		xen_sysctl_perfc_desc_t *scdp;
506 		/*
507 		 * If 'desc' is NULL, then the caller is asking for
508 		 * the number of counters.  If 'desc' is non-NULL,
509 		 * then we need to know how many counters there are
510 		 * before wiring down the output buffer appropriately.
511 		 */
512 		/*LINTED: constant in conditional context*/
513 		get_xen_guest_handle_u(scdp, op.u.perfc_op.desc);
514 		if (scdp != NULL) {
515 			static int numcounters = -1;
516 			static int numvals = -1;
517 
518 			if (numcounters == -1) {
519 				dop.cmd = XEN_SYSCTL_perfc_op;
520 				dop.interface_version =
521 				    XEN_SYSCTL_INTERFACE_VERSION;
522 				dop.u.perfc_op.cmd = XEN_SYSCTL_PERFCOP_query;
523 				/*LINTED: constant in conditional context*/
524 				set_xen_guest_handle_u(dop.u.perfc_op.desc,
525 				    NULL);
526 				/*LINTED: constant in conditional context*/
527 				set_xen_guest_handle_u(dop.u.perfc_op.val,
528 				    NULL);
529 
530 				error = HYPERVISOR_sysctl(&dop);
531 				if (error != 0)
532 					break;
533 				numcounters = dop.u.perfc_op.nr_counters;
534 				numvals = dop.u.perfc_op.nr_vals;
535 			}
536 			ASSERT(numcounters != -1);
537 			ASSERT(numvals != -1);
538 			error = import_handle(&sub_ie, &op.u.perfc_op.desc,
539 			    (sizeof (xen_sysctl_perfc_desc_t) * numcounters),
540 			    IE_EXPORT);
541 			error = import_handle(&sub2_ie, &op.u.perfc_op.val,
542 			    (sizeof (xen_sysctl_perfc_val_t) * numvals),
543 			    IE_EXPORT);
544 		}
545 		break;
546 	}
547 
548 	case XEN_SYSCTL_getdomaininfolist: {
549 		error = import_handle(&sub_ie, &op.u.getdomaininfolist.buffer,
550 		    (op.u.getdomaininfolist.max_domains *
551 		    sizeof (xen_domctl_getdomaininfo_t)), IE_EXPORT);
552 		break;
553 	}
554 
555 	case XEN_SYSCTL_getcpuinfo:
556 		error = import_handle(&sub_ie, &op.u.getcpuinfo.info,
557 		    op.u.getcpuinfo.max_cpus *
558 		    sizeof (xen_sysctl_cpuinfo_t), IE_EXPORT);
559 		break;
560 	default:
561 #ifdef DEBUG
562 		printf("unrecognized HYPERVISOR_sysctl %d\n", op.cmd);
563 #endif
564 		error = -X_EINVAL;
565 	}
566 
567 	if (error == 0)
568 		error = HYPERVISOR_sysctl(&op);
569 
570 	export_buffer(&op_ie, &error);
571 	export_buffer(&sub_ie, &error);
572 	export_buffer(&sub2_ie, &error);
573 
574 	return (error);
575 }
576 
577 static int
578 privcmd_HYPERVISOR_platform_op(xen_platform_op_t *opp)
579 {
580 	import_export_t op_ie, sub_ie, sub2_ie;
581 	xen_platform_op_t op;
582 	int error;
583 
584 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
585 		return (-X_EFAULT);
586 
587 	sub_ie = null_ie;
588 	sub2_ie = null_ie;
589 
590 	/*
591 	 * Check this first because our wrapper will forcibly overwrite it.
592 	 */
593 	if (op.interface_version != XENPF_INTERFACE_VERSION) {
594 		error = -X_EACCES;
595 		export_buffer(&op_ie, &error);
596 		return (error);
597 	}
598 
599 	/*
600 	 * Now handle any platform ops with embedded pointers elsewhere
601 	 * in the user address space that also need to be tacked down
602 	 * while the hypervisor futzes with them.
603 	 */
604 	switch (op.cmd) {
605 	case XENPF_settime:
606 	case XENPF_add_memtype:
607 	case XENPF_del_memtype:
608 	case XENPF_read_memtype:
609 	case XENPF_platform_quirk:
610 	case XENPF_enter_acpi_sleep:
611 	case XENPF_change_freq:
612 	case XENPF_panic_init:
613 		break;
614 
615 	case XENPF_microcode_update:
616 		error = import_handle(&sub_ie, &op.u.microcode.data,
617 		    op.u.microcode.length, IE_IMPORT);
618 		break;
619 	case XENPF_getidletime:
620 		error = import_handle(&sub_ie, &op.u.getidletime.cpumap_bitmap,
621 		    op.u.getidletime.cpumap_nr_cpus, IE_IMPEXP);
622 		if (error != 0)
623 			break;
624 
625 		error = import_handle(&sub2_ie, &op.u.getidletime.idletime,
626 		    op.u.getidletime.cpumap_nr_cpus * sizeof (uint64_t),
627 		    IE_EXPORT);
628 		break;
629 
630 	case XENPF_set_processor_pminfo: {
631 		size_t s;
632 
633 		switch (op.u.set_pminfo.type) {
634 		case XEN_PM_PX:
635 			s = op.u.set_pminfo.u.perf.state_count *
636 			    sizeof (xen_processor_px_t);
637 			if (op.u.set_pminfo.u.perf.flags & XEN_PX_PSS) {
638 				error = import_handle(&sub_ie,
639 				    &op.u.set_pminfo.u.perf.states, s,
640 				    IE_IMPORT);
641 			}
642 			break;
643 		case XEN_PM_CX:
644 			s = op.u.set_pminfo.u.power.count *
645 			    sizeof (xen_processor_cx_t);
646 			error = import_handle(&sub_ie,
647 			    &op.u.set_pminfo.u.power.states, s, IE_IMPORT);
648 			break;
649 		case XEN_PM_TX:
650 			break;
651 		default:
652 			error = -X_EINVAL;
653 			break;
654 		}
655 		break;
656 	}
657 	case XENPF_firmware_info: {
658 		uint16_t len;
659 		void *uaddr;
660 
661 		switch (op.u.firmware_info.type) {
662 		case XEN_FW_DISK_INFO:
663 			/*
664 			 * Ugh.. another hokey interface. The first 16 bits
665 			 * of the buffer are also used as the (input) length.
666 			 */
667 			uaddr = uaddr_from_handle(
668 			    &op.u.firmware_info.u.disk_info.edd_params);
669 			error = ddi_copyin(uaddr, &len, sizeof (len), 0);
670 			if (error != 0)
671 				break;
672 			error = import_handle(&sub_ie,
673 			    &op.u.firmware_info.u.disk_info.edd_params, len,
674 			    IE_IMPEXP);
675 			break;
676 		case XEN_FW_VBEDDC_INFO:
677 			error = import_handle(&sub_ie,
678 			    &op.u.firmware_info.u.vbeddc_info.edid, 128,
679 			    IE_EXPORT);
680 			break;
681 		case XEN_FW_DISK_MBR_SIGNATURE:
682 		default:
683 			break;
684 		}
685 		break;
686 	}
687 	default:
688 		/* FIXME: see this with non-existed ID 38 ???? */
689 #ifdef DEBUG
690 		printf("unrecognized HYPERVISOR_platform_op %d pid %d\n",
691 		    op.cmd, curthread->t_procp->p_pid);
692 #endif
693 		return (-X_EINVAL);
694 	}
695 
696 	if (error == 0)
697 		error = HYPERVISOR_platform_op(&op);
698 
699 	export_buffer(&op_ie, &error);
700 	export_buffer(&sub_ie, &error);
701 	export_buffer(&sub2_ie, &error);
702 
703 	return (error);
704 }
705 
706 static int
707 privcmd_HYPERVISOR_memory_op(int cmd, void *arg)
708 {
709 	int error = 0;
710 	import_export_t op_ie, sub_ie, gpfn_ie, mfn_ie;
711 	union {
712 		domid_t domid;
713 		struct xen_memory_reservation resv;
714 		struct xen_machphys_mfn_list xmml;
715 		struct xen_add_to_physmap xatp;
716 		struct xen_memory_map mm;
717 		struct xen_foreign_memory_map fmm;
718 	} op_arg;
719 
720 	op_ie = sub_ie = gpfn_ie = mfn_ie = null_ie;
721 
722 	switch (cmd) {
723 	case XENMEM_increase_reservation:
724 	case XENMEM_decrease_reservation:
725 	case XENMEM_populate_physmap: {
726 		ulong_t *taddr;
727 
728 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.resv),
729 		    IE_IMPEXP) != 0)
730 			return (-X_EFAULT);
731 
732 		error = import_handle(&sub_ie, &op_arg.resv.extent_start,
733 		    (op_arg.resv.nr_extents * sizeof (ulong_t)), IE_IMPEXP);
734 
735 		if (error == -X_EFAULT)
736 			/*LINTED: constant in conditional context*/
737 			get_xen_guest_handle(taddr, op_arg.resv.extent_start);
738 		else
739 			taddr = sub_ie.ie_kaddr;
740 
741 		switch (cmd) {
742 		case XENMEM_increase_reservation:
743 			DTRACE_XPV4(increase__reservation__start,
744 			    domid_t, op_arg.resv.domid,
745 			    ulong_t, op_arg.resv.nr_extents,
746 			    uint_t, op_arg.resv.extent_order,
747 			    ulong_t *, taddr);
748 			break;
749 		case XENMEM_decrease_reservation:
750 			DTRACE_XPV4(decrease__reservation__start,
751 			    domid_t, op_arg.resv.domid,
752 			    ulong_t, op_arg.resv.nr_extents,
753 			    uint_t, op_arg.resv.extent_order,
754 			    ulong_t *, taddr);
755 			break;
756 		case XENMEM_populate_physmap:
757 			DTRACE_XPV3(populate__physmap__start,
758 			    domid_t, op_arg.resv.domid,
759 			    ulong_t, op_arg.resv.nr_extents,
760 			    ulong_t *, taddr);
761 			break;
762 		}
763 
764 		break;
765 	}
766 
767 	case XENMEM_maximum_ram_page:
768 		break;
769 
770 	case XENMEM_current_reservation:
771 	case XENMEM_maximum_reservation:
772 	case XENMEM_maximum_gpfn:
773 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid),
774 		    IE_IMPEXP) != 0)
775 			return (-X_EFAULT);
776 		break;
777 
778 	case XENMEM_machphys_mfn_list: {
779 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xmml),
780 		    IE_IMPEXP) != 0)
781 			return (-X_EFAULT);
782 
783 		error = import_handle(&sub_ie, &op_arg.xmml.extent_start,
784 		    (op_arg.xmml.max_extents * sizeof (ulong_t)), IE_IMPEXP);
785 		break;
786 	}
787 
788 	case XENMEM_add_to_physmap:
789 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xatp),
790 		    IE_IMPEXP) != 0)
791 			return (-X_EFAULT);
792 		DTRACE_XPV4(add__to__physmap__start, domid_t,
793 		    op_arg.xatp.domid, uint_t, op_arg.xatp.space, ulong_t,
794 		    op_arg.xatp.idx, ulong_t, op_arg.xatp.gpfn);
795 		break;
796 
797 	case XENMEM_memory_map:
798 	case XENMEM_machine_memory_map: {
799 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.mm),
800 		    IE_EXPORT) != 0)
801 			return (-X_EFAULT);
802 
803 		/*
804 		 * XXPV: ugh. e820entry is packed, but not in the kernel, since
805 		 * we remove all attributes; seems like this is a nice way to
806 		 * break mysteriously.
807 		 */
808 		error = import_handle(&sub_ie, &op_arg.mm.buffer,
809 		    (op_arg.mm.nr_entries * 20), IE_IMPEXP);
810 		break;
811 	}
812 
813 	case XENMEM_set_memory_map: {
814 		struct xen_memory_map *taddr;
815 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.fmm),
816 		    IE_IMPORT) != 0)
817 			return (-X_EFAULT);
818 
819 		/*
820 		 * As above.
821 		 */
822 		error = import_handle(&sub_ie, &op_arg.fmm.map.buffer,
823 		    (op_arg.fmm.map.nr_entries * 20), IE_IMPEXP);
824 
825 		if (error == -X_EFAULT)
826 			/*LINTED: constant in conditional context*/
827 			get_xen_guest_handle(taddr, op_arg.fmm.map.buffer);
828 		else
829 			taddr = sub_ie.ie_kaddr;
830 		DTRACE_XPV3(set__memory__map__start, domid_t,
831 		    op_arg.fmm.domid, int, op_arg.fmm.map.nr_entries,
832 		    struct xen_memory_map *, taddr);
833 		break;
834 	}
835 
836 	default:
837 #ifdef DEBUG
838 		printf("unrecognized HYPERVISOR_memory_op %d\n", cmd);
839 #endif
840 		return (-X_EINVAL);
841 	}
842 
843 	if (error == 0)
844 		error = HYPERVISOR_memory_op(cmd,
845 		    (arg == NULL) ? NULL: &op_arg);
846 
847 	export_buffer(&op_ie, &error);
848 	export_buffer(&sub_ie, &error);
849 	export_buffer(&gpfn_ie, &error);
850 	export_buffer(&mfn_ie, &error);
851 
852 	switch (cmd) {
853 	case XENMEM_increase_reservation:
854 		DTRACE_XPV1(increase__reservation__end, int, error);
855 		break;
856 	case XENMEM_decrease_reservation:
857 		DTRACE_XPV1(decrease__reservation__end, int, error);
858 		break;
859 	case XENMEM_populate_physmap:
860 		DTRACE_XPV1(populate__physmap__end, int, error);
861 		break;
862 	case XENMEM_add_to_physmap:
863 		DTRACE_XPV1(add__to__physmap__end, int, error);
864 		break;
865 	case XENMEM_set_memory_map:
866 		DTRACE_XPV1(set__memory__map__end, int, error);
867 		break;
868 	}
869 	return (error);
870 }
871 
872 static int
873 privcmd_HYPERVISOR_event_channel_op(int cmd, void *arg)
874 {
875 	int error;
876 	size_t size;
877 	import_export_t op_ie;
878 	uint32_t flags;
879 
880 	switch (cmd) {
881 	case EVTCHNOP_alloc_unbound:
882 		size = sizeof (evtchn_alloc_unbound_t);
883 		flags = IE_IMPEXP;
884 		break;
885 	case EVTCHNOP_bind_interdomain:
886 		size = sizeof (evtchn_bind_interdomain_t);
887 		flags = IE_IMPEXP;
888 		break;
889 	case EVTCHNOP_bind_virq:
890 		size = sizeof (evtchn_bind_virq_t);
891 		flags = IE_IMPEXP;
892 		break;
893 	case EVTCHNOP_bind_pirq:
894 		size = sizeof (evtchn_bind_pirq_t);
895 		flags = IE_IMPEXP;
896 		break;
897 	case EVTCHNOP_bind_ipi:
898 		size = sizeof (evtchn_bind_ipi_t);
899 		flags = IE_IMPEXP;
900 		break;
901 	case EVTCHNOP_close:
902 		size = sizeof (evtchn_close_t);
903 		flags = IE_IMPORT;
904 		break;
905 	case EVTCHNOP_send:
906 		size = sizeof (evtchn_send_t);
907 		flags = IE_IMPORT;
908 		break;
909 	case EVTCHNOP_status:
910 		size = sizeof (evtchn_status_t);
911 		flags = IE_IMPEXP;
912 		break;
913 	case EVTCHNOP_bind_vcpu:
914 		size = sizeof (evtchn_bind_vcpu_t);
915 		flags = IE_IMPORT;
916 		break;
917 	case EVTCHNOP_unmask:
918 		size = sizeof (evtchn_unmask_t);
919 		flags = IE_IMPORT;
920 		break;
921 	case EVTCHNOP_reset:
922 		size = sizeof (evtchn_reset_t);
923 		flags = IE_IMPORT;
924 		break;
925 
926 	default:
927 #ifdef DEBUG
928 		printf("unrecognized HYPERVISOR_event_channel op %d\n", cmd);
929 #endif
930 		return (-X_EINVAL);
931 	}
932 
933 	error = import_buffer(&op_ie, arg, NULL, size, flags);
934 
935 	/*
936 	 * If there is sufficient demand, we can replace this void * with
937 	 * the proper op structure pointer.
938 	 */
939 	DTRACE_XPV2(evtchn__op__start, int, cmd, void *,
940 	    ((error == -X_EFAULT) ? arg : op_ie.ie_kaddr));
941 
942 	if (error == 0)
943 		error = HYPERVISOR_event_channel_op(cmd, op_ie.ie_kaddr);
944 	export_buffer(&op_ie, &error);
945 
946 	DTRACE_XPV1(evtchn__op__end, int, error);
947 
948 	return (error);
949 }
950 
951 static int
952 privcmd_HYPERVISOR_xen_version(int cmd, void *arg)
953 {
954 	int error;
955 	int size = 0;
956 	import_export_t op_ie;
957 	uint32_t flags = IE_EXPORT;
958 
959 	switch (cmd) {
960 	case XENVER_version:
961 		break;
962 	case XENVER_extraversion:
963 		size = sizeof (xen_extraversion_t);
964 		break;
965 	case XENVER_compile_info:
966 		size = sizeof (xen_compile_info_t);
967 		break;
968 	case XENVER_capabilities:
969 		size = sizeof (xen_capabilities_info_t);
970 		break;
971 	case XENVER_changeset:
972 		size = sizeof (xen_changeset_info_t);
973 		break;
974 	case XENVER_platform_parameters:
975 		size = sizeof (xen_platform_parameters_t);
976 		break;
977 	case XENVER_get_features:
978 		flags = IE_IMPEXP;
979 		size = sizeof (xen_feature_info_t);
980 		break;
981 	case XENVER_pagesize:
982 		break;
983 	case XENVER_guest_handle:
984 		size = sizeof (xen_domain_handle_t);
985 		break;
986 
987 	default:
988 #ifdef DEBUG
989 		printf("unrecognized HYPERVISOR_xen_version op %d\n", cmd);
990 #endif
991 		return (-X_EINVAL);
992 	}
993 
994 	error = import_buffer(&op_ie, arg, NULL, size, flags);
995 	if (error == 0)
996 		error = HYPERVISOR_xen_version(cmd, op_ie.ie_kaddr);
997 	export_buffer(&op_ie, &error);
998 
999 	return (error);
1000 }
1001 
1002 static int
1003 privcmd_HYPERVISOR_xsm_op(void *uacmctl)
1004 {
1005 	int error;
1006 	struct xen_acmctl *acmctl;
1007 	import_export_t op_ie;
1008 
1009 	error = import_buffer(&op_ie, uacmctl, NULL, sizeof (*acmctl),
1010 	    IE_IMPEXP);
1011 	if (error != 0)
1012 		return (error);
1013 
1014 	acmctl = op_ie.ie_kaddr;
1015 
1016 	if (acmctl->interface_version != ACM_INTERFACE_VERSION) {
1017 #ifdef DEBUG
1018 		printf("acm vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
1019 		    acmctl->cmd, acmctl->interface_version,
1020 		    ACM_INTERFACE_VERSION);
1021 #endif
1022 		error = -X_EACCES;
1023 		export_buffer(&op_ie, &error);
1024 		return (error);
1025 	}
1026 
1027 	/* FIXME: flask ops??? */
1028 
1029 	switch (acmctl->cmd) {
1030 	case ACMOP_setpolicy:
1031 	case ACMOP_getpolicy:
1032 	case ACMOP_dumpstats:
1033 	case ACMOP_getssid:
1034 	case ACMOP_getdecision:
1035 	case ACMOP_chgpolicy:
1036 	case ACMOP_relabeldoms:
1037 		/* flags = IE_IMPEXP; */
1038 		break;
1039 	default:
1040 #ifdef DEBUG
1041 		printf("unrecognized HYPERVISOR_xsm_op op %d\n", acmctl->cmd);
1042 #endif
1043 		return (-X_EINVAL);
1044 	}
1045 
1046 	if (error == 0)
1047 		error = HYPERVISOR_xsm_op(acmctl);
1048 	export_buffer(&op_ie, &error);
1049 
1050 	return (error);
1051 }
1052 
1053 static int
1054 privcmd_HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, uint_t *scount,
1055     domid_t domid)
1056 {
1057 	int error, bytes;
1058 	uint_t kscount;
1059 	struct mmuext_op *kop, single_kop;
1060 	import_export_t op_ie, scnt_ie;
1061 
1062 	op_ie = scnt_ie = null_ie;
1063 	error = 0;
1064 
1065 	if (count >= 1) {
1066 		bytes = count * sizeof (*kop);
1067 		kop = (count == 1) ? &single_kop : kmem_alloc(bytes, KM_SLEEP);
1068 		error = import_buffer(&op_ie, op, kop, bytes, IE_IMPORT);
1069 	}
1070 
1071 	DTRACE_XPV2(mmu__ext__op__start, int, count, struct mmuext_op *,
1072 	    ((error == -X_EFAULT) ? op : kop));
1073 
1074 	if (scount != NULL && error == 0)
1075 		error = import_buffer(&scnt_ie, scount, &kscount,
1076 		    sizeof (kscount), IE_EXPORT);
1077 
1078 	if (error == 0)
1079 		error = HYPERVISOR_mmuext_op(kop, count, &kscount, domid);
1080 	export_buffer(&op_ie, &error);
1081 	export_buffer(&scnt_ie, &error);
1082 
1083 	DTRACE_XPV1(mmu__ext__op__end, int, error);
1084 
1085 	if (count > 1)
1086 		kmem_free(kop, bytes);
1087 	return (error);
1088 }
1089 
1090 static int
1091 privcmd_HYPERVISOR_hvm_op(int cmd, void *arg)
1092 {
1093 	int error;
1094 	int size = 0;
1095 	import_export_t arg_ie;
1096 	uint32_t flags = IE_IMPORT;
1097 
1098 	switch (cmd) {
1099 	case HVMOP_set_param:
1100 	case HVMOP_get_param:
1101 		size = sizeof (struct xen_hvm_param);
1102 		flags = IE_IMPEXP;
1103 		break;
1104 	case HVMOP_set_pci_intx_level:
1105 		size = sizeof (struct xen_hvm_set_pci_intx_level);
1106 		break;
1107 	case HVMOP_set_isa_irq_level:
1108 		size = sizeof (struct xen_hvm_set_isa_irq_level);
1109 		break;
1110 	case HVMOP_set_pci_link_route:
1111 		size = sizeof (struct xen_hvm_set_pci_link_route);
1112 		break;
1113 	case HVMOP_track_dirty_vram:
1114 		size = sizeof (struct xen_hvm_track_dirty_vram);
1115 		break;
1116 	case HVMOP_modified_memory:
1117 		size = sizeof (struct xen_hvm_modified_memory);
1118 		break;
1119 	case HVMOP_set_mem_type:
1120 		size = sizeof (struct xen_hvm_set_mem_type);
1121 		break;
1122 
1123 	default:
1124 #ifdef DEBUG
1125 		printf("unrecognized HVM op 0x%x\n", cmd);
1126 #endif
1127 		return (-X_EINVAL);
1128 	}
1129 
1130 	error = import_buffer(&arg_ie, arg, NULL, size, flags);
1131 	if (error == 0)
1132 		error = HYPERVISOR_hvm_op(cmd, arg_ie.ie_kaddr);
1133 	export_buffer(&arg_ie, &error);
1134 
1135 	return (error);
1136 }
1137 
1138 static int
1139 privcmd_HYPERVISOR_sched_op(int cmd, void *arg)
1140 {
1141 	int error;
1142 	int size = 0;
1143 	import_export_t op_ie;
1144 	struct sched_remote_shutdown op;
1145 
1146 	switch (cmd) {
1147 	case SCHEDOP_remote_shutdown:
1148 		size = sizeof (struct sched_remote_shutdown);
1149 		break;
1150 	default:
1151 #ifdef DEBUG
1152 		printf("unrecognized sched op 0x%x\n", cmd);
1153 #endif
1154 		return (-X_EINVAL);
1155 	}
1156 
1157 	error = import_buffer(&op_ie, arg, &op, size, IE_IMPORT);
1158 	if (error == 0)
1159 		error = HYPERVISOR_sched_op(cmd, (arg == NULL) ? NULL : &op);
1160 	export_buffer(&op_ie, &error);
1161 
1162 	return (error);
1163 }
1164 
1165 int allow_all_hypercalls = 0;
1166 int privcmd_efault_debug = 0;
1167 
1168 /*ARGSUSED*/
1169 int
1170 do_privcmd_hypercall(void *uarg, int mode, cred_t *cr, int *rval)
1171 {
1172 	privcmd_hypercall_t __hc, *hc = &__hc;
1173 	int error;
1174 
1175 	if (ddi_copyin(uarg, hc, sizeof (*hc), mode))
1176 		return (EFAULT);
1177 
1178 	switch (hc->op) {
1179 	case __HYPERVISOR_mmu_update:
1180 		error = privcmd_HYPERVISOR_mmu_update(
1181 		    (mmu_update_t *)hc->arg[0], (int)hc->arg[1],
1182 		    (int *)hc->arg[2], (domid_t)hc->arg[3]);
1183 		break;
1184 	case __HYPERVISOR_domctl:
1185 		error = privcmd_HYPERVISOR_domctl(
1186 		    (xen_domctl_t *)hc->arg[0]);
1187 		break;
1188 	case __HYPERVISOR_sysctl:
1189 		error = privcmd_HYPERVISOR_sysctl(
1190 		    (xen_sysctl_t *)hc->arg[0]);
1191 		break;
1192 	case __HYPERVISOR_platform_op:
1193 		error = privcmd_HYPERVISOR_platform_op(
1194 		    (xen_platform_op_t *)hc->arg[0]);
1195 		break;
1196 	case __HYPERVISOR_memory_op:
1197 		error = privcmd_HYPERVISOR_memory_op(
1198 		    (int)hc->arg[0], (void *)hc->arg[1]);
1199 		break;
1200 	case __HYPERVISOR_event_channel_op:
1201 		error = privcmd_HYPERVISOR_event_channel_op(
1202 		    (int)hc->arg[0], (void *)hc->arg[1]);
1203 		break;
1204 	case __HYPERVISOR_xen_version:
1205 		error = privcmd_HYPERVISOR_xen_version(
1206 		    (int)hc->arg[0], (void *)hc->arg[1]);
1207 		break;
1208 	case __HYPERVISOR_mmuext_op:
1209 		error = privcmd_HYPERVISOR_mmuext_op(
1210 		    (struct mmuext_op *)hc->arg[0], (int)hc->arg[1],
1211 		    (uint_t *)hc->arg[2], (domid_t)hc->arg[3]);
1212 		break;
1213 	case __HYPERVISOR_xsm_op:
1214 		error = privcmd_HYPERVISOR_xsm_op((void *)hc->arg[0]);
1215 		break;
1216 	case __HYPERVISOR_hvm_op:
1217 		error = privcmd_HYPERVISOR_hvm_op(
1218 		    (int)hc->arg[0], (void *)hc->arg[1]);
1219 		break;
1220 	case __HYPERVISOR_sched_op:
1221 		error = privcmd_HYPERVISOR_sched_op(
1222 		    (int)hc->arg[0], (void *)hc->arg[1]);
1223 		break;
1224 	default:
1225 		if (allow_all_hypercalls)
1226 			error = __hypercall5(hc->op, hc->arg[0], hc->arg[1],
1227 			    hc->arg[2], hc->arg[3], hc->arg[4]);
1228 		else {
1229 #ifdef DEBUG
1230 			printf("unrecognized hypercall %ld\n", hc->op);
1231 #endif
1232 			error = -X_EPERM;
1233 		}
1234 		break;
1235 	}
1236 
1237 	if (error > 0) {
1238 		*rval = error;
1239 		error = 0;
1240 	} else if (error != 0)
1241 		error = xen_xlate_errcode(error);
1242 
1243 	return (error);
1244 }
1245