xref: /illumos-gate/usr/src/uts/i86xpv/io/privcmd_hcall.c (revision 50e803017668ce124e5b7c37cae78c397813805a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/xpv_user.h>
27 
28 #include <sys/types.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/open.h>
32 #include <sys/cred.h>
33 #include <sys/conf.h>
34 #include <sys/stat.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/vmsystm.h>
39 #include <sys/hypervisor.h>
40 #include <sys/xen_errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/sdt.h>
43 
44 #include <xen/sys/privcmd.h>
45 #include <sys/privcmd_impl.h>
46 
47 typedef struct import_export {
48 	void *			ie_uaddr;
49 	void *			ie_kaddr;
50 	size_t			ie_size;
51 	uint32_t		ie_flags;
52 } import_export_t;
53 
54 static import_export_t null_ie = {NULL, NULL, 0, 0};
55 
56 #define	IE_IMPORT	0x0001		/* Data needs to be copied in */
57 #define	IE_EXPORT	0x0002		/* Data needs to be copied out */
58 #define	IE_FREE		0x0004
59 #define	IE_IMPEXP	(IE_IMPORT | IE_EXPORT)
60 
61 static void *
62 uaddr_from_handle(void *field)
63 {
64 	struct { void *p; } *hdl = field;
65 	void *ptr;
66 
67 	/*LINTED: constant in conditional context*/
68 	get_xen_guest_handle(ptr, (*hdl));
69 	return (ptr);
70 }
71 
72 
73 /*
74  * Import a buffer from user-space.  If the caller provides a kernel
75  * address, we import to that address.  If not, we kmem_alloc() the space
76  * ourselves.
77  */
78 static int
79 import_buffer(import_export_t *iep, void *uaddr, void *kaddr, size_t size,
80     uint32_t flags)
81 {
82 	iep->ie_uaddr = uaddr;
83 	iep->ie_size = size;
84 	iep->ie_flags = flags & IE_EXPORT;
85 
86 	if (size == 0 || uaddr == NULL) {
87 		*iep = null_ie;
88 		return (0);
89 	}
90 
91 	if (kaddr == NULL) {
92 		iep->ie_kaddr = kmem_alloc(size, KM_SLEEP);
93 		iep->ie_flags |= IE_FREE;
94 	} else {
95 		iep->ie_kaddr = kaddr;
96 		iep->ie_flags &= ~IE_FREE;
97 	}
98 
99 	if ((flags & IE_IMPORT) &&
100 	    (ddi_copyin(uaddr, iep->ie_kaddr, size, 0) != 0)) {
101 		if (iep->ie_flags & IE_FREE) {
102 			kmem_free(iep->ie_kaddr, iep->ie_size);
103 			iep->ie_kaddr = NULL;
104 			iep->ie_flags = 0;
105 		}
106 		return (-X_EFAULT);
107 	}
108 
109 	return (0);
110 }
111 
112 static void
113 export_buffer(import_export_t *iep, int *error)
114 {
115 	int copy_err = 0;
116 
117 	if (iep->ie_size == 0 || iep->ie_uaddr == NULL)
118 		return;
119 
120 	/*
121 	 * If the buffer was marked for export initially, and if the
122 	 * hypercall completed successfully, resync the user-space buffer
123 	 * with our in-kernel buffer.
124 	 */
125 	if ((iep->ie_flags & IE_EXPORT) && (*error >= 0) &&
126 	    (ddi_copyout(iep->ie_kaddr, iep->ie_uaddr, iep->ie_size, 0) != 0))
127 		copy_err = -X_EFAULT;
128 	if (iep->ie_flags & IE_FREE) {
129 		kmem_free(iep->ie_kaddr, iep->ie_size);
130 		iep->ie_kaddr = NULL;
131 		iep->ie_flags = 0;
132 	}
133 
134 	if (copy_err != 0 && *error >= 0)
135 		*error = copy_err;
136 }
137 
138 /*
139  * Xen 'op' structures often include pointers disguised as 'handles', which
140  * refer to addresses in user space.  This routine copies a buffer
141  * associated with an embedded pointer into kernel space, and replaces the
142  * pointer to userspace with a pointer to the new kernel buffer.
143  *
144  * Note: if Xen ever redefines the structure of a 'handle', this routine
145  * (specifically the definition of 'hdl') will need to be updated.
146  */
147 static int
148 import_handle(import_export_t *iep, void *field, size_t size, int flags)
149 {
150 	struct { void *p; } *hdl = field;
151 	void *ptr;
152 	int err;
153 
154 	ptr = uaddr_from_handle(field);
155 	err = import_buffer(iep, ptr, NULL, size, (flags));
156 	if (err == 0) {
157 		/*LINTED: constant in conditional context*/
158 		set_xen_guest_handle((*hdl), (void *)((iep)->ie_kaddr));
159 	}
160 	return (err);
161 }
162 
163 static int
164 privcmd_HYPERVISOR_mmu_update(mmu_update_t *ureq, int count, int *scount,
165     domid_t domid)
166 {
167 	mmu_update_t *kreq, single_kreq;
168 	import_export_t cnt_ie, req_ie;
169 	int error, kscount, bytes;
170 
171 	bytes = count * sizeof (*kreq);
172 	kreq = (count == 1) ? &single_kreq : kmem_alloc(bytes, KM_SLEEP);
173 
174 	error = import_buffer(&cnt_ie, scount, &kscount, sizeof (kscount),
175 	    IE_IMPEXP);
176 	if (error != 0)
177 		req_ie = null_ie;
178 	else
179 		error = import_buffer(&req_ie, ureq, kreq, bytes, IE_IMPEXP);
180 
181 	DTRACE_XPV3(mmu__update__start, int, domid, int, count, mmu_update_t *,
182 	    ((error == -X_EFAULT) ? ureq : kreq));
183 
184 	if (error == 0)
185 		error = HYPERVISOR_mmu_update(kreq, count, &kscount, domid);
186 	export_buffer(&cnt_ie, &error);
187 	export_buffer(&req_ie, &error);
188 	if (count != 1)
189 		kmem_free(kreq, bytes);
190 
191 	DTRACE_XPV1(mmu__update__end, int, error);
192 	return (error);
193 }
194 
195 static int
196 privcmd_HYPERVISOR_domctl(xen_domctl_t *opp)
197 {
198 	xen_domctl_t op;
199 	import_export_t op_ie, sub_ie;
200 	int error = 0;
201 
202 	if ((error = import_buffer(&op_ie, opp, &op, sizeof (op),
203 	    IE_IMPEXP)) != 0)
204 		return (error);
205 
206 	sub_ie = null_ie;
207 
208 	/*
209 	 * Check this first because our wrapper will forcibly overwrite it.
210 	 */
211 	if (op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) {
212 #ifdef DEBUG
213 		printf("domctl vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
214 		    op.cmd, op.interface_version, XEN_DOMCTL_INTERFACE_VERSION);
215 #endif
216 		error = -X_EACCES;
217 		export_buffer(&op_ie, &error);
218 		return (error);
219 	}
220 
221 	/*
222 	 * Now handle any domctl ops with embedded pointers elsewhere
223 	 * in the user address space that also need to be tacked down
224 	 * while the hypervisor futzes with them.
225 	 */
226 	switch (op.cmd) {
227 	case XEN_DOMCTL_createdomain:
228 		DTRACE_XPV1(dom__create__start, xen_domctl_t *,
229 		    &op.u.createdomain);
230 		break;
231 
232 	case XEN_DOMCTL_destroydomain:
233 		DTRACE_XPV1(dom__destroy__start, domid_t, op.domain);
234 		break;
235 
236 	case XEN_DOMCTL_pausedomain:
237 		DTRACE_XPV1(dom__pause__start, domid_t, op.domain);
238 		break;
239 
240 	case XEN_DOMCTL_unpausedomain:
241 		DTRACE_XPV1(dom__unpause__start, domid_t, op.domain);
242 		break;
243 
244 	case XEN_DOMCTL_getmemlist: {
245 		error = import_handle(&sub_ie, &op.u.getmemlist.buffer,
246 		    op.u.getmemlist.max_pfns * sizeof (xen_pfn_t), IE_EXPORT);
247 		break;
248 	}
249 
250 	case XEN_DOMCTL_getpageframeinfo2: {
251 		error = import_handle(&sub_ie, &op.u.getpageframeinfo2.array,
252 		    op.u.getpageframeinfo2.num * sizeof (ulong_t), IE_IMPEXP);
253 		break;
254 	}
255 
256 	case XEN_DOMCTL_shadow_op: {
257 		size_t size;
258 
259 		size = roundup(howmany(op.u.shadow_op.pages, NBBY),
260 		    sizeof (ulong_t));
261 		error = import_handle(&sub_ie,
262 		    &op.u.shadow_op.dirty_bitmap, size, IE_IMPEXP);
263 		break;
264 	}
265 
266 	case XEN_DOMCTL_setvcpucontext: {
267 		vcpu_guest_context_t *taddr;
268 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
269 		    sizeof (vcpu_guest_context_t), IE_IMPORT);
270 		if (error == -X_EFAULT)
271 			/*LINTED: constant in conditional context*/
272 			get_xen_guest_handle_u(taddr, op.u.vcpucontext.ctxt);
273 		else
274 			taddr = sub_ie.ie_kaddr;
275 		DTRACE_XPV2(setvcpucontext__start, domid_t, op.domain,
276 		    vcpu_guest_context_t *, taddr);
277 		break;
278 	}
279 
280 	case XEN_DOMCTL_getvcpucontext: {
281 		error = import_handle(&sub_ie, &op.u.vcpucontext.ctxt,
282 		    sizeof (vcpu_guest_context_t), IE_EXPORT);
283 		break;
284 	}
285 
286 
287 	case XEN_DOMCTL_sethvmcontext: {
288 		error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
289 		    op.u.hvmcontext.size, IE_IMPORT);
290 		break;
291 	}
292 
293 	case XEN_DOMCTL_gethvmcontext: {
294 		if (op.u.hvmcontext.buffer.p != NULL)
295 			error = import_handle(&sub_ie, &op.u.hvmcontext.buffer,
296 			    op.u.hvmcontext.size, IE_EXPORT);
297 		break;
298 	}
299 
300 	case XEN_DOMCTL_getdomaininfo:
301 	case XEN_DOMCTL_getpageframeinfo:
302 	case XEN_DOMCTL_max_mem:
303 	case XEN_DOMCTL_resumedomain:
304 	case XEN_DOMCTL_getvcpuinfo:
305 	case XEN_DOMCTL_setvcpuaffinity:
306 	case XEN_DOMCTL_getvcpuaffinity:
307 	case XEN_DOMCTL_max_vcpus:
308 	case XEN_DOMCTL_scheduler_op:
309 	case XEN_DOMCTL_setdomainhandle:
310 	case XEN_DOMCTL_setdebugging:
311 	case XEN_DOMCTL_irq_permission:
312 	case XEN_DOMCTL_iomem_permission:
313 	case XEN_DOMCTL_ioport_permission:
314 	case XEN_DOMCTL_hypercall_init:
315 	case XEN_DOMCTL_arch_setup:
316 	case XEN_DOMCTL_settimeoffset:
317 	case XEN_DOMCTL_real_mode_area:
318 	case XEN_DOMCTL_sendtrigger:
319 	case XEN_DOMCTL_assign_device:
320 	case XEN_DOMCTL_bind_pt_irq:
321 	case XEN_DOMCTL_get_address_size:
322 	case XEN_DOMCTL_set_address_size:
323 	case XEN_DOMCTL_get_ext_vcpucontext:
324 	case XEN_DOMCTL_set_ext_vcpucontext:
325 	case XEN_DOMCTL_set_opt_feature:
326 	case XEN_DOMCTL_memory_mapping:
327 	case XEN_DOMCTL_ioport_mapping:
328 	case XEN_DOMCTL_pin_mem_cacheattr:
329 	case XEN_DOMCTL_test_assign_device:
330 	case XEN_DOMCTL_set_target:
331 	case XEN_DOMCTL_deassign_device:
332 	case XEN_DOMCTL_set_cpuid:
333 	case XEN_DOMCTL_get_device_group:
334 	case XEN_DOMCTL_get_machine_address_size:
335 	case XEN_DOMCTL_set_machine_address_size:
336 	case XEN_DOMCTL_suppress_spurious_page_faults:
337 		break;
338 
339 	default:
340 #ifdef DEBUG
341 		printf("unrecognized HYPERVISOR_domctl %d\n", op.cmd);
342 #endif
343 		error = -X_EINVAL;
344 	}
345 
346 	if (error == 0)
347 		error = HYPERVISOR_domctl(&op);
348 
349 	export_buffer(&op_ie, &error);
350 	export_buffer(&sub_ie, &error);
351 
352 	switch (op.cmd) {
353 	case XEN_DOMCTL_createdomain:
354 		DTRACE_XPV1(dom__create__end, int, error);
355 		break;
356 	case XEN_DOMCTL_destroydomain:
357 		DTRACE_XPV1(dom__destroy__end, int, error);
358 		break;
359 	case XEN_DOMCTL_pausedomain:
360 		DTRACE_XPV1(dom__pause__end, int, error);
361 		break;
362 	case XEN_DOMCTL_unpausedomain:
363 		DTRACE_XPV1(dom__unpause__end, int, error);
364 		break;
365 	case XEN_DOMCTL_setvcpucontext:
366 		DTRACE_XPV1(setvcpucontext__end, int, error);
367 		break;
368 	default:
369 		;
370 	}
371 
372 	return (error);
373 }
374 
375 static int
376 privcmd_HYPERVISOR_sysctl(xen_sysctl_t *opp)
377 {
378 	xen_sysctl_t op, dop;
379 	import_export_t op_ie, sub_ie, sub2_ie;
380 	int error = 0;
381 
382 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
383 		return (-X_EFAULT);
384 
385 	sub_ie = null_ie;
386 	sub2_ie = null_ie;
387 
388 	/*
389 	 * Check this first because our wrapper will forcibly overwrite it.
390 	 */
391 	if (op.interface_version != XEN_SYSCTL_INTERFACE_VERSION) {
392 		error = -X_EACCES;
393 		export_buffer(&op_ie, &error);
394 		return (error);
395 	}
396 
397 	switch (op.cmd) {
398 	case XEN_SYSCTL_readconsole: {
399 		error = import_handle(&sub_ie, &op.u.readconsole.buffer,
400 		    op.u.readconsole.count, IE_EXPORT);
401 		break;
402 	}
403 
404 	case XEN_SYSCTL_debug_keys: {
405 		error = import_handle(&sub_ie, &op.u.debug_keys.keys,
406 		    op.u.debug_keys.nr_keys, IE_IMPORT);
407 		break;
408 	}
409 
410 	case XEN_SYSCTL_tbuf_op:
411 	case XEN_SYSCTL_physinfo: {
412 		if (uaddr_from_handle(&op.u.physinfo.cpu_to_node) != NULL &&
413 		    op.u.physinfo.max_cpu_id != 0) {
414 			error = import_handle(&sub_ie,
415 			    &op.u.physinfo.cpu_to_node,
416 			    op.u.physinfo.max_cpu_id * sizeof (uint32_t),
417 			    IE_EXPORT);
418 		}
419 		break;
420 	}
421 	case XEN_SYSCTL_sched_id:
422 	case XEN_SYSCTL_availheap:
423 	case XEN_SYSCTL_cpu_hotplug:
424 		break;
425 	case XEN_SYSCTL_get_pmstat: {
426 		unsigned int maxs;
427 
428 		switch (op.u.get_pmstat.type) {
429 		case PMSTAT_get_pxstat:
430 			/*
431 			 * This interface is broken. Xen always copies out
432 			 * all the state information, and the interface
433 			 * does not specify how much space the caller has
434 			 * reserved. So, the only thing to do is just mirror
435 			 * the hypervisor and libxc behavior, and use the
436 			 * maximum amount of data.
437 			 */
438 			dop.cmd = XEN_SYSCTL_get_pmstat;
439 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
440 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
441 			dop.u.get_pmstat.type = PMSTAT_get_max_px;
442 			error = HYPERVISOR_sysctl(&dop);
443 			if (error != 0)
444 				break;
445 
446 			maxs = dop.u.get_pmstat.u.getpx.total;
447 			if (maxs == 0) {
448 				error = -X_EINVAL;
449 				break;
450 			}
451 
452 			error = import_handle(&sub_ie,
453 			    &op.u.get_pmstat.u.getpx.trans_pt,
454 			    maxs * maxs * sizeof (uint64_t), IE_EXPORT);
455 			if (error != 0)
456 				break;
457 
458 			error = import_handle(&sub2_ie,
459 			    &op.u.get_pmstat.u.getpx.pt,
460 			    maxs * sizeof (pm_px_val_t), IE_EXPORT);
461 			break;
462 		case PMSTAT_get_cxstat:
463 			/* See above */
464 			dop.cmd = XEN_SYSCTL_get_pmstat;
465 			dop.interface_version = XEN_SYSCTL_INTERFACE_VERSION;
466 			dop.u.get_pmstat.cpuid = op.u.get_pmstat.cpuid;
467 			dop.u.get_pmstat.type = PMSTAT_get_max_cx;
468 			error = HYPERVISOR_sysctl(&dop);
469 			if (error != 0)
470 				break;
471 
472 			maxs = dop.u.get_pmstat.u.getcx.nr;
473 			if (maxs == 0) {
474 				error = -X_EINVAL;
475 				break;
476 			}
477 
478 			error = import_handle(&sub_ie,
479 			    &op.u.get_pmstat.u.getcx.triggers,
480 			    maxs * sizeof (uint64_t), IE_EXPORT);
481 			if (error != 0)
482 				break;
483 			error = import_handle(&sub2_ie,
484 			    &op.u.get_pmstat.u.getcx.residencies,
485 			    maxs * sizeof (uint64_t), IE_EXPORT);
486 			break;
487 
488 		case PMSTAT_get_max_px:
489 		case PMSTAT_reset_pxstat:
490 		case PMSTAT_get_max_cx:
491 		case PMSTAT_reset_cxstat:
492 			break;
493 		default:
494 			error = -X_EINVAL;
495 			break;
496 		}
497 		break;
498 	}
499 
500 	case XEN_SYSCTL_perfc_op: {
501 		xen_sysctl_perfc_desc_t *scdp;
502 		/*
503 		 * If 'desc' is NULL, then the caller is asking for
504 		 * the number of counters.  If 'desc' is non-NULL,
505 		 * then we need to know how many counters there are
506 		 * before wiring down the output buffer appropriately.
507 		 */
508 		/*LINTED: constant in conditional context*/
509 		get_xen_guest_handle_u(scdp, op.u.perfc_op.desc);
510 		if (scdp != NULL) {
511 			static int numcounters = -1;
512 			static int numvals = -1;
513 
514 			if (numcounters == -1) {
515 				dop.cmd = XEN_SYSCTL_perfc_op;
516 				dop.interface_version =
517 				    XEN_SYSCTL_INTERFACE_VERSION;
518 				dop.u.perfc_op.cmd = XEN_SYSCTL_PERFCOP_query;
519 				/*LINTED: constant in conditional context*/
520 				set_xen_guest_handle_u(dop.u.perfc_op.desc,
521 				    NULL);
522 				/*LINTED: constant in conditional context*/
523 				set_xen_guest_handle_u(dop.u.perfc_op.val,
524 				    NULL);
525 
526 				error = HYPERVISOR_sysctl(&dop);
527 				if (error != 0)
528 					break;
529 				numcounters = dop.u.perfc_op.nr_counters;
530 				numvals = dop.u.perfc_op.nr_vals;
531 			}
532 			ASSERT(numcounters != -1);
533 			ASSERT(numvals != -1);
534 			error = import_handle(&sub_ie, &op.u.perfc_op.desc,
535 			    (sizeof (xen_sysctl_perfc_desc_t) * numcounters),
536 			    IE_EXPORT);
537 			error = import_handle(&sub2_ie, &op.u.perfc_op.val,
538 			    (sizeof (xen_sysctl_perfc_val_t) * numvals),
539 			    IE_EXPORT);
540 		}
541 		break;
542 	}
543 
544 	case XEN_SYSCTL_getdomaininfolist: {
545 		error = import_handle(&sub_ie, &op.u.getdomaininfolist.buffer,
546 		    (op.u.getdomaininfolist.max_domains *
547 		    sizeof (xen_domctl_getdomaininfo_t)), IE_EXPORT);
548 		break;
549 	}
550 
551 	case XEN_SYSCTL_getcpuinfo:
552 		error = import_handle(&sub_ie, &op.u.getcpuinfo.info,
553 		    op.u.getcpuinfo.max_cpus *
554 		    sizeof (xen_sysctl_cpuinfo_t), IE_EXPORT);
555 		break;
556 	default:
557 #ifdef DEBUG
558 		printf("unrecognized HYPERVISOR_sysctl %d\n", op.cmd);
559 #endif
560 		error = -X_EINVAL;
561 	}
562 
563 	if (error == 0)
564 		error = HYPERVISOR_sysctl(&op);
565 
566 	export_buffer(&op_ie, &error);
567 	export_buffer(&sub_ie, &error);
568 	export_buffer(&sub2_ie, &error);
569 
570 	return (error);
571 }
572 
573 static int
574 privcmd_HYPERVISOR_platform_op(xen_platform_op_t *opp)
575 {
576 	import_export_t op_ie, sub_ie, sub2_ie;
577 	xen_platform_op_t op;
578 	int error;
579 
580 	if (import_buffer(&op_ie, opp, &op, sizeof (op), IE_IMPEXP) != 0)
581 		return (-X_EFAULT);
582 
583 	sub_ie = null_ie;
584 	sub2_ie = null_ie;
585 
586 	/*
587 	 * Check this first because our wrapper will forcibly overwrite it.
588 	 */
589 	if (op.interface_version != XENPF_INTERFACE_VERSION) {
590 		error = -X_EACCES;
591 		export_buffer(&op_ie, &error);
592 		return (error);
593 	}
594 
595 	/*
596 	 * Now handle any platform ops with embedded pointers elsewhere
597 	 * in the user address space that also need to be tacked down
598 	 * while the hypervisor futzes with them.
599 	 */
600 	switch (op.cmd) {
601 	case XENPF_settime:
602 	case XENPF_add_memtype:
603 	case XENPF_del_memtype:
604 	case XENPF_read_memtype:
605 	case XENPF_platform_quirk:
606 	case XENPF_enter_acpi_sleep:
607 	case XENPF_change_freq:
608 	case XENPF_panic_init:
609 		break;
610 
611 	case XENPF_microcode_update:
612 		error = import_handle(&sub_ie, &op.u.microcode.data,
613 		    op.u.microcode.length, IE_IMPORT);
614 		break;
615 	case XENPF_getidletime:
616 		error = import_handle(&sub_ie, &op.u.getidletime.cpumap_bitmap,
617 		    op.u.getidletime.cpumap_nr_cpus, IE_IMPEXP);
618 		if (error != 0)
619 			break;
620 
621 		error = import_handle(&sub2_ie, &op.u.getidletime.idletime,
622 		    op.u.getidletime.cpumap_nr_cpus * sizeof (uint64_t),
623 		    IE_EXPORT);
624 		break;
625 
626 	case XENPF_set_processor_pminfo: {
627 		size_t s;
628 
629 		switch (op.u.set_pminfo.type) {
630 		case XEN_PM_PX:
631 			s = op.u.set_pminfo.u.perf.state_count *
632 			    sizeof (xen_processor_px_t);
633 			if (op.u.set_pminfo.u.perf.flags & XEN_PX_PSS) {
634 				error = import_handle(&sub_ie,
635 				    &op.u.set_pminfo.u.perf.states, s,
636 				    IE_IMPORT);
637 			}
638 			break;
639 		case XEN_PM_CX:
640 			s = op.u.set_pminfo.u.power.count *
641 			    sizeof (xen_processor_cx_t);
642 			error = import_handle(&sub_ie,
643 			    &op.u.set_pminfo.u.power.states, s, IE_IMPORT);
644 			break;
645 		case XEN_PM_TX:
646 			break;
647 		default:
648 			error = -X_EINVAL;
649 			break;
650 		}
651 		break;
652 	}
653 	case XENPF_firmware_info: {
654 		uint16_t len;
655 		void *uaddr;
656 
657 		switch (op.u.firmware_info.type) {
658 		case XEN_FW_DISK_INFO:
659 			/*
660 			 * Ugh.. another hokey interface. The first 16 bits
661 			 * of the buffer are also used as the (input) length.
662 			 */
663 			uaddr = uaddr_from_handle(
664 			    &op.u.firmware_info.u.disk_info.edd_params);
665 			error = ddi_copyin(uaddr, &len, sizeof (len), 0);
666 			if (error != 0)
667 				break;
668 			error = import_handle(&sub_ie,
669 			    &op.u.firmware_info.u.disk_info.edd_params, len,
670 			    IE_IMPEXP);
671 			break;
672 		case XEN_FW_VBEDDC_INFO:
673 			error = import_handle(&sub_ie,
674 			    &op.u.firmware_info.u.vbeddc_info.edid, 128,
675 			    IE_EXPORT);
676 			break;
677 		case XEN_FW_DISK_MBR_SIGNATURE:
678 		default:
679 			break;
680 		}
681 		break;
682 	}
683 	default:
684 		/* FIXME: see this with non-existed ID 38 ???? */
685 #ifdef DEBUG
686 		printf("unrecognized HYPERVISOR_platform_op %d pid %d\n",
687 		    op.cmd, curthread->t_procp->p_pid);
688 #endif
689 		return (-X_EINVAL);
690 	}
691 
692 	if (error == 0)
693 		error = HYPERVISOR_platform_op(&op);
694 
695 	export_buffer(&op_ie, &error);
696 	export_buffer(&sub_ie, &error);
697 	export_buffer(&sub2_ie, &error);
698 
699 	return (error);
700 }
701 
702 static int
703 privcmd_HYPERVISOR_memory_op(int cmd, void *arg)
704 {
705 	int error = 0;
706 	import_export_t op_ie, sub_ie, gpfn_ie, mfn_ie;
707 	union {
708 		domid_t domid;
709 		struct xen_memory_reservation resv;
710 		struct xen_machphys_mfn_list xmml;
711 		struct xen_add_to_physmap xatp;
712 		struct xen_memory_map mm;
713 		struct xen_foreign_memory_map fmm;
714 		struct xen_pod_target pd;
715 	} op_arg;
716 
717 	op_ie = sub_ie = gpfn_ie = mfn_ie = null_ie;
718 
719 	switch (cmd) {
720 	case XENMEM_increase_reservation:
721 	case XENMEM_decrease_reservation:
722 	case XENMEM_populate_physmap: {
723 		ulong_t *taddr;
724 
725 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.resv),
726 		    IE_IMPEXP) != 0)
727 			return (-X_EFAULT);
728 
729 		error = import_handle(&sub_ie, &op_arg.resv.extent_start,
730 		    (op_arg.resv.nr_extents * sizeof (ulong_t)), IE_IMPEXP);
731 
732 		if (error == -X_EFAULT)
733 			/*LINTED: constant in conditional context*/
734 			get_xen_guest_handle(taddr, op_arg.resv.extent_start);
735 		else
736 			taddr = sub_ie.ie_kaddr;
737 
738 		switch (cmd) {
739 		case XENMEM_increase_reservation:
740 			DTRACE_XPV4(increase__reservation__start,
741 			    domid_t, op_arg.resv.domid,
742 			    ulong_t, op_arg.resv.nr_extents,
743 			    uint_t, op_arg.resv.extent_order,
744 			    ulong_t *, taddr);
745 			break;
746 		case XENMEM_decrease_reservation:
747 			DTRACE_XPV4(decrease__reservation__start,
748 			    domid_t, op_arg.resv.domid,
749 			    ulong_t, op_arg.resv.nr_extents,
750 			    uint_t, op_arg.resv.extent_order,
751 			    ulong_t *, taddr);
752 			break;
753 		case XENMEM_populate_physmap:
754 			DTRACE_XPV3(populate__physmap__start,
755 			    domid_t, op_arg.resv.domid,
756 			    ulong_t, op_arg.resv.nr_extents,
757 			    ulong_t *, taddr);
758 			break;
759 		}
760 
761 		break;
762 	}
763 
764 	case XENMEM_maximum_ram_page:
765 		break;
766 
767 	case XENMEM_current_reservation:
768 	case XENMEM_maximum_reservation:
769 	case XENMEM_maximum_gpfn:
770 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.domid),
771 		    IE_IMPEXP) != 0)
772 			return (-X_EFAULT);
773 		break;
774 
775 	case XENMEM_machphys_mfn_list: {
776 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xmml),
777 		    IE_IMPEXP) != 0)
778 			return (-X_EFAULT);
779 
780 		error = import_handle(&sub_ie, &op_arg.xmml.extent_start,
781 		    (op_arg.xmml.max_extents * sizeof (ulong_t)), IE_IMPEXP);
782 		break;
783 	}
784 
785 	case XENMEM_add_to_physmap:
786 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.xatp),
787 		    IE_IMPEXP) != 0)
788 			return (-X_EFAULT);
789 		DTRACE_XPV4(add__to__physmap__start, domid_t,
790 		    op_arg.xatp.domid, uint_t, op_arg.xatp.space, ulong_t,
791 		    op_arg.xatp.idx, ulong_t, op_arg.xatp.gpfn);
792 		break;
793 
794 	case XENMEM_memory_map:
795 	case XENMEM_machine_memory_map: {
796 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.mm),
797 		    IE_EXPORT) != 0)
798 			return (-X_EFAULT);
799 
800 		/*
801 		 * XXPV: ugh. e820entry is packed, but not in the kernel, since
802 		 * we remove all attributes; seems like this is a nice way to
803 		 * break mysteriously.
804 		 */
805 		error = import_handle(&sub_ie, &op_arg.mm.buffer,
806 		    (op_arg.mm.nr_entries * 20), IE_IMPEXP);
807 		break;
808 	}
809 
810 	case XENMEM_set_memory_map: {
811 		struct xen_memory_map *taddr;
812 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.fmm),
813 		    IE_IMPORT) != 0)
814 			return (-X_EFAULT);
815 
816 		/*
817 		 * As above.
818 		 */
819 		error = import_handle(&sub_ie, &op_arg.fmm.map.buffer,
820 		    (op_arg.fmm.map.nr_entries * 20), IE_IMPEXP);
821 
822 		if (error == -X_EFAULT)
823 			/*LINTED: constant in conditional context*/
824 			get_xen_guest_handle(taddr, op_arg.fmm.map.buffer);
825 		else
826 			taddr = sub_ie.ie_kaddr;
827 		DTRACE_XPV3(set__memory__map__start, domid_t,
828 		    op_arg.fmm.domid, int, op_arg.fmm.map.nr_entries,
829 		    struct xen_memory_map *, taddr);
830 		break;
831 	}
832 
833 	case XENMEM_set_pod_target:
834 	case XENMEM_get_pod_target:
835 		if (import_buffer(&op_ie, arg, &op_arg, sizeof (op_arg.pd),
836 		    IE_IMPEXP) != 0)
837 			return (-X_EFAULT);
838 		break;
839 
840 	default:
841 #ifdef DEBUG
842 		printf("unrecognized HYPERVISOR_memory_op %d\n", cmd);
843 #endif
844 		return (-X_EINVAL);
845 	}
846 
847 	if (error == 0)
848 		error = HYPERVISOR_memory_op(cmd,
849 		    (arg == NULL) ? NULL: &op_arg);
850 
851 	export_buffer(&op_ie, &error);
852 	export_buffer(&sub_ie, &error);
853 	export_buffer(&gpfn_ie, &error);
854 	export_buffer(&mfn_ie, &error);
855 
856 	switch (cmd) {
857 	case XENMEM_increase_reservation:
858 		DTRACE_XPV1(increase__reservation__end, int, error);
859 		break;
860 	case XENMEM_decrease_reservation:
861 		DTRACE_XPV1(decrease__reservation__end, int, error);
862 		break;
863 	case XENMEM_populate_physmap:
864 		DTRACE_XPV1(populate__physmap__end, int, error);
865 		break;
866 	case XENMEM_add_to_physmap:
867 		DTRACE_XPV1(add__to__physmap__end, int, error);
868 		break;
869 	case XENMEM_set_memory_map:
870 		DTRACE_XPV1(set__memory__map__end, int, error);
871 		break;
872 	}
873 	return (error);
874 }
875 
876 static int
877 privcmd_HYPERVISOR_event_channel_op(int cmd, void *arg)
878 {
879 	int error;
880 	size_t size;
881 	import_export_t op_ie;
882 	uint32_t flags;
883 
884 	switch (cmd) {
885 	case EVTCHNOP_alloc_unbound:
886 		size = sizeof (evtchn_alloc_unbound_t);
887 		flags = IE_IMPEXP;
888 		break;
889 	case EVTCHNOP_bind_interdomain:
890 		size = sizeof (evtchn_bind_interdomain_t);
891 		flags = IE_IMPEXP;
892 		break;
893 	case EVTCHNOP_bind_virq:
894 		size = sizeof (evtchn_bind_virq_t);
895 		flags = IE_IMPEXP;
896 		break;
897 	case EVTCHNOP_bind_pirq:
898 		size = sizeof (evtchn_bind_pirq_t);
899 		flags = IE_IMPEXP;
900 		break;
901 	case EVTCHNOP_bind_ipi:
902 		size = sizeof (evtchn_bind_ipi_t);
903 		flags = IE_IMPEXP;
904 		break;
905 	case EVTCHNOP_close:
906 		size = sizeof (evtchn_close_t);
907 		flags = IE_IMPORT;
908 		break;
909 	case EVTCHNOP_send:
910 		size = sizeof (evtchn_send_t);
911 		flags = IE_IMPORT;
912 		break;
913 	case EVTCHNOP_status:
914 		size = sizeof (evtchn_status_t);
915 		flags = IE_IMPEXP;
916 		break;
917 	case EVTCHNOP_bind_vcpu:
918 		size = sizeof (evtchn_bind_vcpu_t);
919 		flags = IE_IMPORT;
920 		break;
921 	case EVTCHNOP_unmask:
922 		size = sizeof (evtchn_unmask_t);
923 		flags = IE_IMPORT;
924 		break;
925 	case EVTCHNOP_reset:
926 		size = sizeof (evtchn_reset_t);
927 		flags = IE_IMPORT;
928 		break;
929 
930 	default:
931 #ifdef DEBUG
932 		printf("unrecognized HYPERVISOR_event_channel op %d\n", cmd);
933 #endif
934 		return (-X_EINVAL);
935 	}
936 
937 	error = import_buffer(&op_ie, arg, NULL, size, flags);
938 
939 	/*
940 	 * If there is sufficient demand, we can replace this void * with
941 	 * the proper op structure pointer.
942 	 */
943 	DTRACE_XPV2(evtchn__op__start, int, cmd, void *,
944 	    ((error == -X_EFAULT) ? arg : op_ie.ie_kaddr));
945 
946 	if (error == 0)
947 		error = HYPERVISOR_event_channel_op(cmd, op_ie.ie_kaddr);
948 	export_buffer(&op_ie, &error);
949 
950 	DTRACE_XPV1(evtchn__op__end, int, error);
951 
952 	return (error);
953 }
954 
955 static int
956 privcmd_HYPERVISOR_xen_version(int cmd, void *arg)
957 {
958 	int error;
959 	int size = 0;
960 	import_export_t op_ie;
961 	uint32_t flags = IE_EXPORT;
962 
963 	switch (cmd) {
964 	case XENVER_version:
965 		break;
966 	case XENVER_extraversion:
967 		size = sizeof (xen_extraversion_t);
968 		break;
969 	case XENVER_compile_info:
970 		size = sizeof (xen_compile_info_t);
971 		break;
972 	case XENVER_capabilities:
973 		size = sizeof (xen_capabilities_info_t);
974 		break;
975 	case XENVER_changeset:
976 		size = sizeof (xen_changeset_info_t);
977 		break;
978 	case XENVER_platform_parameters:
979 		size = sizeof (xen_platform_parameters_t);
980 		break;
981 	case XENVER_get_features:
982 		flags = IE_IMPEXP;
983 		size = sizeof (xen_feature_info_t);
984 		break;
985 	case XENVER_pagesize:
986 		break;
987 	case XENVER_guest_handle:
988 		size = sizeof (xen_domain_handle_t);
989 		break;
990 
991 	default:
992 #ifdef DEBUG
993 		printf("unrecognized HYPERVISOR_xen_version op %d\n", cmd);
994 #endif
995 		return (-X_EINVAL);
996 	}
997 
998 	error = import_buffer(&op_ie, arg, NULL, size, flags);
999 	if (error == 0)
1000 		error = HYPERVISOR_xen_version(cmd, op_ie.ie_kaddr);
1001 	export_buffer(&op_ie, &error);
1002 
1003 	return (error);
1004 }
1005 
1006 static int
1007 privcmd_HYPERVISOR_xsm_op(void *uacmctl)
1008 {
1009 	int error;
1010 	struct xen_acmctl *acmctl;
1011 	import_export_t op_ie;
1012 
1013 	error = import_buffer(&op_ie, uacmctl, NULL, sizeof (*acmctl),
1014 	    IE_IMPEXP);
1015 	if (error != 0)
1016 		return (error);
1017 
1018 	acmctl = op_ie.ie_kaddr;
1019 
1020 	if (acmctl->interface_version != ACM_INTERFACE_VERSION) {
1021 #ifdef DEBUG
1022 		printf("acm vers mismatch (cmd %d, found 0x%x, need 0x%x\n",
1023 		    acmctl->cmd, acmctl->interface_version,
1024 		    ACM_INTERFACE_VERSION);
1025 #endif
1026 		error = -X_EACCES;
1027 		export_buffer(&op_ie, &error);
1028 		return (error);
1029 	}
1030 
1031 	/* FIXME: flask ops??? */
1032 
1033 	switch (acmctl->cmd) {
1034 	case ACMOP_setpolicy:
1035 	case ACMOP_getpolicy:
1036 	case ACMOP_dumpstats:
1037 	case ACMOP_getssid:
1038 	case ACMOP_getdecision:
1039 	case ACMOP_chgpolicy:
1040 	case ACMOP_relabeldoms:
1041 		/* flags = IE_IMPEXP; */
1042 		break;
1043 	default:
1044 #ifdef DEBUG
1045 		printf("unrecognized HYPERVISOR_xsm_op op %d\n", acmctl->cmd);
1046 #endif
1047 		return (-X_EINVAL);
1048 	}
1049 
1050 	if (error == 0)
1051 		error = HYPERVISOR_xsm_op(acmctl);
1052 	export_buffer(&op_ie, &error);
1053 
1054 	return (error);
1055 }
1056 
1057 static int
1058 privcmd_HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, uint_t *scount,
1059     domid_t domid)
1060 {
1061 	int error, bytes;
1062 	uint_t kscount;
1063 	struct mmuext_op *kop, single_kop;
1064 	import_export_t op_ie, scnt_ie;
1065 
1066 	kop = NULL;
1067 	op_ie = scnt_ie = null_ie;
1068 	error = 0;
1069 
1070 	if (count >= 1) {
1071 		bytes = count * sizeof (*kop);
1072 		kop = (count == 1) ? &single_kop : kmem_alloc(bytes, KM_SLEEP);
1073 		error = import_buffer(&op_ie, op, kop, bytes, IE_IMPORT);
1074 	}
1075 
1076 	DTRACE_XPV2(mmu__ext__op__start, int, count, struct mmuext_op *,
1077 	    ((error == -X_EFAULT) ? op : kop));
1078 
1079 	if (scount != NULL && error == 0)
1080 		error = import_buffer(&scnt_ie, scount, &kscount,
1081 		    sizeof (kscount), IE_EXPORT);
1082 
1083 	if (error == 0)
1084 		error = HYPERVISOR_mmuext_op(kop, count, &kscount, domid);
1085 	export_buffer(&op_ie, &error);
1086 	export_buffer(&scnt_ie, &error);
1087 
1088 	DTRACE_XPV1(mmu__ext__op__end, int, error);
1089 
1090 	if (count > 1)
1091 		kmem_free(kop, bytes);
1092 	return (error);
1093 }
1094 
1095 static int
1096 privcmd_HYPERVISOR_hvm_op(int cmd, void *arg)
1097 {
1098 	int error;
1099 	int size = 0;
1100 	import_export_t arg_ie;
1101 	uint32_t flags = IE_IMPORT;
1102 
1103 	switch (cmd) {
1104 	case HVMOP_set_param:
1105 	case HVMOP_get_param:
1106 		size = sizeof (struct xen_hvm_param);
1107 		flags = IE_IMPEXP;
1108 		break;
1109 	case HVMOP_set_pci_intx_level:
1110 		size = sizeof (struct xen_hvm_set_pci_intx_level);
1111 		break;
1112 	case HVMOP_set_isa_irq_level:
1113 		size = sizeof (struct xen_hvm_set_isa_irq_level);
1114 		break;
1115 	case HVMOP_set_pci_link_route:
1116 		size = sizeof (struct xen_hvm_set_pci_link_route);
1117 		break;
1118 	case HVMOP_track_dirty_vram:
1119 		size = sizeof (struct xen_hvm_track_dirty_vram);
1120 		break;
1121 	case HVMOP_modified_memory:
1122 		size = sizeof (struct xen_hvm_modified_memory);
1123 		break;
1124 	case HVMOP_set_mem_type:
1125 		size = sizeof (struct xen_hvm_set_mem_type);
1126 		break;
1127 
1128 	default:
1129 #ifdef DEBUG
1130 		printf("unrecognized HVM op 0x%x\n", cmd);
1131 #endif
1132 		return (-X_EINVAL);
1133 	}
1134 
1135 	error = import_buffer(&arg_ie, arg, NULL, size, flags);
1136 	if (error == 0)
1137 		error = HYPERVISOR_hvm_op(cmd, arg_ie.ie_kaddr);
1138 	export_buffer(&arg_ie, &error);
1139 
1140 	return (error);
1141 }
1142 
1143 static int
1144 privcmd_HYPERVISOR_sched_op(int cmd, void *arg)
1145 {
1146 	int error;
1147 	int size = 0;
1148 	import_export_t op_ie;
1149 	struct sched_remote_shutdown op;
1150 
1151 	switch (cmd) {
1152 	case SCHEDOP_remote_shutdown:
1153 		size = sizeof (struct sched_remote_shutdown);
1154 		break;
1155 	default:
1156 #ifdef DEBUG
1157 		printf("unrecognized sched op 0x%x\n", cmd);
1158 #endif
1159 		return (-X_EINVAL);
1160 	}
1161 
1162 	error = import_buffer(&op_ie, arg, &op, size, IE_IMPORT);
1163 	if (error == 0)
1164 		error = HYPERVISOR_sched_op(cmd, (arg == NULL) ? NULL : &op);
1165 	export_buffer(&op_ie, &error);
1166 
1167 	return (error);
1168 }
1169 
1170 int allow_all_hypercalls = 0;
1171 int privcmd_efault_debug = 0;
1172 
1173 /*ARGSUSED*/
1174 int
1175 do_privcmd_hypercall(void *uarg, int mode, cred_t *cr, int *rval)
1176 {
1177 	privcmd_hypercall_t __hc, *hc = &__hc;
1178 	int error;
1179 
1180 	if (ddi_copyin(uarg, hc, sizeof (*hc), mode))
1181 		return (EFAULT);
1182 
1183 	switch (hc->op) {
1184 	case __HYPERVISOR_mmu_update:
1185 		error = privcmd_HYPERVISOR_mmu_update(
1186 		    (mmu_update_t *)hc->arg[0], (int)hc->arg[1],
1187 		    (int *)hc->arg[2], (domid_t)hc->arg[3]);
1188 		break;
1189 	case __HYPERVISOR_domctl:
1190 		error = privcmd_HYPERVISOR_domctl(
1191 		    (xen_domctl_t *)hc->arg[0]);
1192 		break;
1193 	case __HYPERVISOR_sysctl:
1194 		error = privcmd_HYPERVISOR_sysctl(
1195 		    (xen_sysctl_t *)hc->arg[0]);
1196 		break;
1197 	case __HYPERVISOR_platform_op:
1198 		error = privcmd_HYPERVISOR_platform_op(
1199 		    (xen_platform_op_t *)hc->arg[0]);
1200 		break;
1201 	case __HYPERVISOR_memory_op:
1202 		error = privcmd_HYPERVISOR_memory_op(
1203 		    (int)hc->arg[0], (void *)hc->arg[1]);
1204 		break;
1205 	case __HYPERVISOR_event_channel_op:
1206 		error = privcmd_HYPERVISOR_event_channel_op(
1207 		    (int)hc->arg[0], (void *)hc->arg[1]);
1208 		break;
1209 	case __HYPERVISOR_xen_version:
1210 		error = privcmd_HYPERVISOR_xen_version(
1211 		    (int)hc->arg[0], (void *)hc->arg[1]);
1212 		break;
1213 	case __HYPERVISOR_mmuext_op:
1214 		error = privcmd_HYPERVISOR_mmuext_op(
1215 		    (struct mmuext_op *)hc->arg[0], (int)hc->arg[1],
1216 		    (uint_t *)hc->arg[2], (domid_t)hc->arg[3]);
1217 		break;
1218 	case __HYPERVISOR_xsm_op:
1219 		error = privcmd_HYPERVISOR_xsm_op((void *)hc->arg[0]);
1220 		break;
1221 	case __HYPERVISOR_hvm_op:
1222 		error = privcmd_HYPERVISOR_hvm_op(
1223 		    (int)hc->arg[0], (void *)hc->arg[1]);
1224 		break;
1225 	case __HYPERVISOR_sched_op:
1226 		error = privcmd_HYPERVISOR_sched_op(
1227 		    (int)hc->arg[0], (void *)hc->arg[1]);
1228 		break;
1229 	default:
1230 		if (allow_all_hypercalls)
1231 			error = __hypercall5(hc->op, hc->arg[0], hc->arg[1],
1232 			    hc->arg[2], hc->arg[3], hc->arg[4]);
1233 		else {
1234 #ifdef DEBUG
1235 			printf("unrecognized hypercall %ld\n", hc->op);
1236 #endif
1237 			error = -X_EPERM;
1238 		}
1239 		break;
1240 	}
1241 
1242 	if (error > 0) {
1243 		*rval = error;
1244 		error = 0;
1245 	} else if (error != 0)
1246 		error = xen_xlate_errcode(error);
1247 
1248 	return (error);
1249 }
1250