xref: /illumos-gate/usr/src/uts/i86pc/io/xsvc/xsvc.c (revision 12042ab213b3af68474f48555504db816a449211)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2016 Joyent, Inc.
27  */
28 
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <vm/seg_kmem.h>
41 #include <sys/vmsystm.h>
42 #include <sys/sysmacros.h>
43 #include <sys/ddidevmap.h>
44 #include <sys/avl.h>
45 #ifdef __xpv
46 #include <sys/hypervisor.h>
47 #endif
48 
49 #include <sys/xsvc.h>
50 
51 /* total max memory which can be alloced with ioctl interface */
52 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
53 
54 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
55 
56 
57 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
58 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
59 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
60     int *rval);
61 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
62     size_t *maplen, uint_t model);
63 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
64 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
65 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
66     void **result);
67 
68 static struct cb_ops xsvc_cb_ops = {
69 	xsvc_open,		/* cb_open */
70 	xsvc_close,		/* cb_close */
71 	nodev,			/* cb_strategy */
72 	nodev,			/* cb_print */
73 	nodev,			/* cb_dump */
74 	nodev,			/* cb_read */
75 	nodev,			/* cb_write */
76 	xsvc_ioctl,		/* cb_ioctl */
77 	xsvc_devmap,		/* cb_devmap */
78 	NULL,			/* cb_mmap */
79 	NULL,			/* cb_segmap */
80 	nochpoll,		/* cb_chpoll */
81 	ddi_prop_op,		/* cb_prop_op */
82 	NULL,			/* cb_stream */
83 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
84 	CB_REV
85 };
86 
87 static struct dev_ops xsvc_dev_ops = {
88 	DEVO_REV,		/* devo_rev */
89 	0,			/* devo_refcnt */
90 	xsvc_getinfo,		/* devo_getinfo */
91 	nulldev,		/* devo_identify */
92 	nulldev,		/* devo_probe */
93 	xsvc_attach,		/* devo_attach */
94 	xsvc_detach,		/* devo_detach */
95 	nodev,			/* devo_reset */
96 	&xsvc_cb_ops,		/* devo_cb_ops */
97 	NULL,			/* devo_bus_ops */
98 	NULL,			/* power */
99 	ddi_quiesce_not_needed,		/* quiesce */
100 };
101 
102 static struct modldrv xsvc_modldrv = {
103 	&mod_driverops,		/* Type of module.  This one is a driver */
104 	"xsvc driver",		/* Name of the module. */
105 	&xsvc_dev_ops,		/* driver ops */
106 };
107 
108 static struct modlinkage xsvc_modlinkage = {
109 	MODREV_1,
110 	(void *) &xsvc_modldrv,
111 	NULL
112 };
113 
114 
115 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
116 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
117 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
118 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
119     xsvc_mem_t **mp);
120 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
121 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
122     uint64_t key);
123 static int xsvc_mnode_key_compare(const void *q, const void *e);
124 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
125     ddi_umem_cookie_t *cookiep);
126 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
127 
128 
129 void *xsvc_statep;
130 
131 static ddi_device_acc_attr_t xsvc_device_attr = {
132 	DDI_DEVICE_ATTR_V0,
133 	DDI_NEVERSWAP_ACC,
134 	DDI_STRICTORDER_ACC
135 };
136 
137 static int xsvc_devmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
138     offset_t off, size_t len, void **pvtp);
139 static int xsvc_devmap_dup(devmap_cookie_t dhp, void *pvtp,
140     devmap_cookie_t new_dhp, void **new_pvtp);
141 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
142     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
143     devmap_cookie_t new_dhp2, void **new_pvtp2);
144 
145 
146 static struct devmap_callback_ctl xsvc_callbk = {
147 	DEVMAP_OPS_REV,
148 	xsvc_devmap_map,
149 	NULL,
150 	xsvc_devmap_dup,
151 	xsvc_devmap_unmap
152 };
153 
154 
155 /*
156  * _init()
157  *
158  */
159 int
160 _init(void)
161 {
162 	int err;
163 
164 	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
165 	if (err != 0) {
166 		return (err);
167 	}
168 
169 	err = mod_install(&xsvc_modlinkage);
170 	if (err != 0) {
171 		ddi_soft_state_fini(&xsvc_statep);
172 		return (err);
173 	}
174 
175 	return (0);
176 }
177 
178 /*
179  * _info()
180  *
181  */
182 int
183 _info(struct modinfo *modinfop)
184 {
185 	return (mod_info(&xsvc_modlinkage, modinfop));
186 }
187 
188 /*
189  * _fini()
190  *
191  */
192 int
193 _fini(void)
194 {
195 	int err;
196 
197 	err = mod_remove(&xsvc_modlinkage);
198 	if (err != 0) {
199 		return (err);
200 	}
201 
202 	ddi_soft_state_fini(&xsvc_statep);
203 
204 	return (0);
205 }
206 
207 /*
208  * xsvc_attach()
209  *
210  */
211 static int
212 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
213 {
214 	xsvc_state_t *state;
215 	int maxallocmem;
216 	int instance;
217 	int err;
218 
219 
220 	switch (cmd) {
221 	case DDI_ATTACH:
222 		break;
223 
224 	case DDI_RESUME:
225 		return (DDI_SUCCESS);
226 
227 	default:
228 		return (DDI_FAILURE);
229 	}
230 
231 	instance = ddi_get_instance(dip);
232 	err = ddi_soft_state_zalloc(xsvc_statep, instance);
233 	if (err != DDI_SUCCESS) {
234 		return (DDI_FAILURE);
235 	}
236 	state = ddi_get_soft_state(xsvc_statep, instance);
237 	if (state == NULL) {
238 		goto attachfail_get_soft_state;
239 	}
240 
241 	state->xs_dip = dip;
242 	state->xs_instance = instance;
243 
244 	/* Initialize allocation count */
245 	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
246 	state->xs_currently_alloced = 0;
247 
248 	mutex_init(&state->xs_cookie_mutex, NULL, MUTEX_DRIVER, NULL);
249 
250 	/* create the minor node (for the ioctl) */
251 	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
252 	    0);
253 	if (err != DDI_SUCCESS) {
254 		goto attachfail_minor_node;
255 	}
256 
257 	/*
258 	 * the maxallocmem property will override the default (xsvc_max_memory).
259 	 * This is the maximum total memory the ioctl will allow to be alloced.
260 	 */
261 	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
262 	    DDI_PROP_DONTPASS, "maxallocmem", -1);
263 	if (maxallocmem >= 0) {
264 		xsvc_max_memory = maxallocmem * 1024;
265 	}
266 
267 	/* Initialize list of memory allocs */
268 	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
269 	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
270 	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
271 
272 	/* Report that driver was loaded */
273 	ddi_report_dev(dip);
274 
275 	return (DDI_SUCCESS);
276 
277 attachfail_minor_node:
278 	mutex_destroy(&state->xs_cookie_mutex);
279 	mutex_destroy(&state->xs_mutex);
280 attachfail_get_soft_state:
281 	(void) ddi_soft_state_free(xsvc_statep, instance);
282 
283 	return (err);
284 }
285 
286 /*
287  * xsvc_detach()
288  *
289  */
290 static int
291 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
292 {
293 	xsvc_state_t *state;
294 	xsvc_mnode_t *mnode;
295 	xsvc_mem_t *mp;
296 	int instance;
297 
298 
299 	instance = ddi_get_instance(dip);
300 	state = ddi_get_soft_state(xsvc_statep, instance);
301 	if (state == NULL) {
302 		return (DDI_FAILURE);
303 	}
304 
305 	switch (cmd) {
306 	case DDI_DETACH:
307 		break;
308 
309 	case DDI_SUSPEND:
310 		return (DDI_SUCCESS);
311 
312 	default:
313 		return (DDI_FAILURE);
314 	}
315 
316 	ddi_remove_minor_node(dip, NULL);
317 
318 	/* Free any memory on list */
319 	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
320 		mp = mnode->mn_home;
321 		xsvc_mem_free(state, mp);
322 	}
323 
324 	/* remove list */
325 	avl_destroy(&state->xs_mlist.ml_avl);
326 	mutex_destroy(&state->xs_mlist.ml_mutex);
327 
328 	mutex_destroy(&state->xs_cookie_mutex);
329 	mutex_destroy(&state->xs_mutex);
330 	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
331 	return (DDI_SUCCESS);
332 }
333 
334 /*
335  * xsvc_getinfo()
336  *
337  */
338 /*ARGSUSED*/
339 static int
340 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
341 {
342 	xsvc_state_t *state;
343 	int instance;
344 	dev_t dev;
345 	int err;
346 
347 
348 	dev = (dev_t)arg;
349 	instance = getminor(dev);
350 
351 	switch (cmd) {
352 	case DDI_INFO_DEVT2DEVINFO:
353 		state = ddi_get_soft_state(xsvc_statep, instance);
354 		if (state == NULL) {
355 			return (DDI_FAILURE);
356 		}
357 		*result = (void *)state->xs_dip;
358 		err = DDI_SUCCESS;
359 		break;
360 
361 	case DDI_INFO_DEVT2INSTANCE:
362 		*result = (void *)(uintptr_t)instance;
363 		err = DDI_SUCCESS;
364 		break;
365 
366 	default:
367 		err = DDI_FAILURE;
368 		break;
369 	}
370 
371 	return (err);
372 }
373 
374 
375 /*
376  * xsvc_open()
377  *
378  */
379 /*ARGSUSED*/
380 static int
381 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
382 {
383 	xsvc_state_t *state;
384 	int instance;
385 
386 	instance = getminor(*devp);
387 	state = ddi_get_soft_state(xsvc_statep, instance);
388 	if (state == NULL) {
389 		return (ENXIO);
390 	}
391 
392 	return (0);
393 }
394 
395 /*
396  * xsvc_close()
397  *
398  */
399 /*ARGSUSED*/
400 static int
401 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
402 {
403 	return (0);
404 }
405 
406 /*
407  * xsvc_ioctl()
408  *
409  */
410 /*ARGSUSED*/
411 static int
412 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
413 {
414 	xsvc_state_t *state;
415 	int instance;
416 	int err;
417 
418 
419 	err = drv_priv(cred);
420 	if (err != 0) {
421 		return (EPERM);
422 	}
423 	instance = getminor(dev);
424 	if (instance == -1) {
425 		return (EBADF);
426 	}
427 	state = ddi_get_soft_state(xsvc_statep, instance);
428 	if (state == NULL) {
429 		return (EBADF);
430 	}
431 
432 	switch (cmd) {
433 	case XSVC_ALLOC_MEM:
434 		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
435 		break;
436 
437 	case XSVC_FREE_MEM:
438 		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
439 		break;
440 
441 	case XSVC_FLUSH_MEM:
442 		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
443 		break;
444 
445 	default:
446 		err = ENXIO;
447 	}
448 
449 	return (err);
450 }
451 
452 /*
453  * xsvc_ioctl_alloc_memory()
454  *
455  */
456 static int
457 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
458 {
459 	xsvc_mem_req_32 params32;
460 	xsvc_mloc_32 *usgl32;
461 	xsvc_mem_req params;
462 	xsvc_mloc_32 sgl32;
463 	xsvc_mloc *usgl;
464 	xsvc_mem_t *mp;
465 	xsvc_mloc sgl;
466 	uint64_t key;
467 	size_t size;
468 	int err;
469 	int i;
470 
471 	/* Copy in the params, then get the size and key */
472 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
473 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
474 		    mode);
475 		if (err != 0) {
476 			return (EFAULT);
477 		}
478 
479 		key = (uint64_t)params32.xsvc_mem_reqid;
480 		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
481 	} else {
482 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
483 		if (err != 0) {
484 			return (EFAULT);
485 		}
486 		key = (uint64_t)params.xsvc_mem_reqid;
487 		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
488 	}
489 
490 	/*
491 	 * make sure this doesn't put us over the maximum allowed to be
492 	 * allocated
493 	 */
494 	mutex_enter(&state->xs_mutex);
495 	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
496 		mutex_exit(&state->xs_mutex);
497 		return (EAGAIN);
498 	}
499 	state->xs_currently_alloced += size;
500 	mutex_exit(&state->xs_mutex);
501 
502 	/* get state to track this memory */
503 	err = xsvc_mem_alloc(state, key, &mp);
504 	if (err != 0) {
505 		return (err);
506 	}
507 	mp->xm_size = size;
508 
509 	/* allocate and bind the memory */
510 	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
511 	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
512 	mp->xm_dma_attr.dma_attr_burstsizes = 1;
513 	mp->xm_dma_attr.dma_attr_minxfer = 1;
514 	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
515 	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
516 	mp->xm_dma_attr.dma_attr_granular = 1;
517 	mp->xm_dma_attr.dma_attr_flags = 0;
518 
519 	/* Finish converting params */
520 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
521 		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
522 		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
523 		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
524 		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
525 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
526 		    params32.xsvc_mem_align, PAGESIZE);
527 		usgl = NULL;
528 	} else {
529 		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
530 		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
531 		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
532 		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
533 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
534 		    params.xsvc_mem_align, PAGESIZE);
535 		usgl32 = NULL;
536 	}
537 
538 	mp->xm_device_attr = xsvc_device_attr;
539 
540 	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
541 	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
542 	if (err != DDI_SUCCESS) {
543 		err = EINVAL;
544 		goto allocfail_alloc_handle;
545 	}
546 
547 	/* don't sleep here so we don't get stuck in contig alloc */
548 	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
549 	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
550 	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
551 	if (err != DDI_SUCCESS) {
552 		err = EINVAL;
553 		goto allocfail_alloc_mem;
554 	}
555 
556 	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
557 	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
558 	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
559 	if (err != DDI_DMA_MAPPED) {
560 		err = EFAULT;
561 		goto allocfail_bind;
562 	}
563 
564 	/* return sgl */
565 	for (i = 0; i < mp->xm_cookie_count; i++) {
566 		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
567 			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
568 			sgl32.mloc_size = mp->xm_cookie.dmac_size;
569 			err = ddi_copyout(&sgl32, &usgl32[i],
570 			    sizeof (xsvc_mloc_32), mode);
571 			if (err != 0) {
572 				err = EFAULT;
573 				goto allocfail_copyout;
574 			}
575 		} else {
576 			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
577 			sgl.mloc_size = mp->xm_cookie.dmac_size;
578 			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
579 			    mode);
580 			if (err != 0) {
581 				err = EFAULT;
582 				goto allocfail_copyout;
583 			}
584 		}
585 		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
586 	}
587 
588 	/* set the last sgl entry to 0 to indicate cookie count */
589 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
590 		sgl32.mloc_addr = 0;
591 		sgl32.mloc_size = 0;
592 		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
593 		    mode);
594 		if (err != 0) {
595 			err = EFAULT;
596 			goto allocfail_copyout;
597 		}
598 	} else {
599 		sgl.mloc_addr = 0;
600 		sgl.mloc_size = 0;
601 		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
602 		if (err != 0) {
603 			err = EFAULT;
604 			goto allocfail_copyout;
605 		}
606 	}
607 
608 	return (0);
609 
610 allocfail_copyout:
611 	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
612 allocfail_bind:
613 	ddi_dma_mem_free(&mp->xm_mem_handle);
614 allocfail_alloc_mem:
615 	ddi_dma_free_handle(&mp->xm_dma_handle);
616 allocfail_alloc_handle:
617 	mp->xm_dma_handle = NULL;
618 	xsvc_mem_free(state, mp);
619 
620 	mutex_enter(&state->xs_mutex);
621 	state->xs_currently_alloced = state->xs_currently_alloced - size;
622 	mutex_exit(&state->xs_mutex);
623 
624 	return (err);
625 }
626 
627 /*
628  * xsvc_ioctl_flush_memory()
629  *
630  */
631 static int
632 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
633 {
634 	xsvc_mem_req_32 params32;
635 	xsvc_mem_req params;
636 	xsvc_mem_t *mp;
637 	uint64_t key;
638 	int err;
639 
640 
641 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
642 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
643 		    mode);
644 		if (err != 0) {
645 			return (EFAULT);
646 		}
647 		key = (uint64_t)params32.xsvc_mem_reqid;
648 	} else {
649 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
650 		if (err != 0) {
651 			return (EFAULT);
652 		}
653 		key = (uint64_t)params.xsvc_mem_reqid;
654 	}
655 
656 	/* find the memory */
657 	mp = xsvc_mem_lookup(state, key);
658 	if (mp == NULL) {
659 		return (EINVAL);
660 	}
661 
662 	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
663 
664 	return (0);
665 }
666 
667 
668 /*
669  * xsvc_ioctl_free_memory()
670  *
671  */
672 static int
673 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
674 {
675 	xsvc_mem_req_32 params32;
676 	xsvc_mem_req params;
677 	xsvc_mem_t *mp;
678 	uint64_t key;
679 	int err;
680 
681 
682 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
683 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
684 		    mode);
685 		if (err != 0) {
686 			return (EFAULT);
687 		}
688 		key = (uint64_t)params32.xsvc_mem_reqid;
689 	} else {
690 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
691 		if (err != 0) {
692 			return (EFAULT);
693 		}
694 		key = (uint64_t)params.xsvc_mem_reqid;
695 	}
696 
697 	/* find the memory */
698 	mp = xsvc_mem_lookup(state, key);
699 	if (mp == NULL) {
700 		return (EINVAL);
701 	}
702 
703 	xsvc_mem_free(state, mp);
704 
705 	return (0);
706 }
707 
708 /*
709  * xsvc_mem_alloc()
710  *
711  */
712 static int
713 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
714 {
715 	xsvc_mem_t *mem;
716 
717 	mem = xsvc_mem_lookup(state, key);
718 	if (mem != NULL) {
719 		xsvc_mem_free(state, mem);
720 	}
721 
722 	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
723 	(*mp)->xm_mnode.mn_home = *mp;
724 	(*mp)->xm_mnode.mn_key = key;
725 
726 	mutex_enter(&state->xs_mlist.ml_mutex);
727 	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
728 	mutex_exit(&state->xs_mlist.ml_mutex);
729 
730 	return (0);
731 }
732 
733 /*
734  * xsvc_mem_free()
735  *
736  */
737 static void
738 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
739 {
740 	if (mp->xm_dma_handle != NULL) {
741 		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
742 		ddi_dma_mem_free(&mp->xm_mem_handle);
743 		ddi_dma_free_handle(&mp->xm_dma_handle);
744 
745 		mutex_enter(&state->xs_mutex);
746 		state->xs_currently_alloced = state->xs_currently_alloced -
747 		    mp->xm_size;
748 		mutex_exit(&state->xs_mutex);
749 	}
750 
751 	mutex_enter(&state->xs_mlist.ml_mutex);
752 	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
753 	mutex_exit(&state->xs_mlist.ml_mutex);
754 
755 	kmem_free(mp, sizeof (*mp));
756 }
757 
758 /*
759  * xsvc_mem_lookup()
760  *
761  */
762 static xsvc_mem_t *
763 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
764 {
765 	xsvc_mnode_t mnode;
766 	xsvc_mnode_t *mnp;
767 	avl_index_t where;
768 	xsvc_mem_t *mp;
769 
770 	mnode.mn_key = key;
771 	mutex_enter(&state->xs_mlist.ml_mutex);
772 	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
773 	mutex_exit(&state->xs_mlist.ml_mutex);
774 
775 	if (mnp != NULL) {
776 		mp = mnp->mn_home;
777 	} else {
778 		mp = NULL;
779 	}
780 
781 	return (mp);
782 }
783 
784 /*
785  * xsvc_mnode_key_compare()
786  *
787  */
788 static int
789 xsvc_mnode_key_compare(const void *q, const void *e)
790 {
791 	xsvc_mnode_t *n1;
792 	xsvc_mnode_t *n2;
793 
794 	n1 = (xsvc_mnode_t *)q;
795 	n2 = (xsvc_mnode_t *)e;
796 
797 	if (n1->mn_key < n2->mn_key) {
798 		return (-1);
799 	} else if (n1->mn_key > n2->mn_key) {
800 		return (1);
801 	} else {
802 		return (0);
803 	}
804 }
805 
806 /*
807  * xsvc_devmap()
808  *
809  */
810 /*ARGSUSED*/
811 static int
812 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
813     size_t *maplen, uint_t model)
814 {
815 	ddi_umem_cookie_t cookie;
816 	xsvc_state_t *state;
817 	offset_t off_align;
818 	size_t npages;
819 	caddr_t kvai;
820 	size_t psize;
821 	int instance;
822 	caddr_t kva;
823 	pfn_t pfn;
824 	int err;
825 	int i;
826 
827 
828 	instance = getminor(dev);
829 	state = ddi_get_soft_state(xsvc_statep, instance);
830 	if (state == NULL) {
831 		return (ENXIO);
832 	}
833 
834 	/*
835 	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
836 	 * smmap32 will sign extend the offset. We need to undo that since
837 	 * we are passed a physical address in off, not a offset.
838 	 */
839 #if defined(__amd64)
840 	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
841 	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
842 		off = off & 0xFFFFFFFF;
843 	}
844 #endif
845 
846 #ifdef __xpv
847 	/*
848 	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
849 	 * this some later when there is a good reason.
850 	 */
851 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
852 		return (-1);
853 	}
854 
855 	/* we will always treat this as a foreign MFN */
856 	pfn = xen_assign_pfn(btop(off));
857 #else
858 	pfn = btop(off);
859 #endif
860 	/* always work with whole pages */
861 
862 	off_align = P2ALIGN(off, PAGESIZE);
863 	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
864 
865 	/*
866 	 * if this is memory we're trying to map into user space, we first
867 	 * need to map the PFNs into KVA, then build up a umem cookie, and
868 	 * finally do a umem_setup to map it in.
869 	 */
870 	if (pf_is_memory(pfn)) {
871 		npages = btop(psize);
872 
873 		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
874 		if (kva == NULL) {
875 			return (-1);
876 		}
877 
878 		kvai = kva;
879 		for (i = 0; i < npages; i++) {
880 			page_t *pp = page_numtopp_nolock(pfn);
881 
882 			/*
883 			 * Preemptively check for panic conditions from
884 			 * hat_devload and error out instead.
885 			 */
886 			if (pp != NULL && (PP_ISFREE(pp) ||
887 			    (!PAGE_LOCKED(pp) && !PP_ISNORELOC(pp)))) {
888 				err = DDI_FAILURE;
889 				npages = i;
890 				goto devmapfail_cookie_alloc;
891 			}
892 
893 			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
894 			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
895 			pfn++;
896 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
897 		}
898 
899 		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
900 		if (err != 0) {
901 			goto devmapfail_cookie_alloc;
902 		}
903 
904 		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
905 		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
906 			goto devmapfail_umem_setup;
907 		}
908 		*maplen = psize;
909 
910 	/*
911 	 * If this is not memory (or a foreign MFN in i86xpv), go through
912 	 * devmem_setup.
913 	 */
914 	} else {
915 		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
916 		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
917 			return (err);
918 		}
919 		*maplen = psize;
920 	}
921 
922 	return (0);
923 
924 devmapfail_umem_setup:
925 	xsvc_umem_cookie_free(&cookie);
926 
927 devmapfail_cookie_alloc:
928 	kvai = kva;
929 	for (i = 0; i < npages; i++) {
930 		hat_unload(kas.a_hat, kvai, PAGESIZE,
931 		    HAT_UNLOAD_UNLOCK);
932 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
933 	}
934 	vmem_free(heap_arena, kva, psize);
935 
936 	return (err);
937 }
938 
939 /*
940  * xsvc_umem_cookie_alloc()
941  *
942  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
943  *   allocated.
944  */
945 int
946 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
947     ddi_umem_cookie_t *cookiep)
948 {
949 	struct ddi_umem_cookie *umem_cookiep;
950 
951 	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
952 	if (umem_cookiep == NULL) {
953 		*cookiep = NULL;
954 		return (-1);
955 	}
956 
957 	umem_cookiep->cvaddr = kva;
958 	umem_cookiep->type = KMEM_NON_PAGEABLE;
959 	umem_cookiep->size = size;
960 	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
961 
962 	return (0);
963 }
964 
965 /*
966  * xsvc_umem_cookie_free()
967  *
968  */
969 static void
970 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
971 {
972 	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
973 	*cookiep = NULL;
974 }
975 
976 
977 /*
978  * xsvc_devmap_map()
979  *
980  */
981 /*ARGSUSED*/
982 static int
983 xsvc_devmap_map(devmap_cookie_t dhc, dev_t dev, uint_t flags, offset_t off,
984     size_t len, void **pvtp)
985 {
986 	struct ddi_umem_cookie *cp;
987 	devmap_handle_t *dhp;
988 	xsvc_state_t *state;
989 	int instance;
990 
991 
992 	instance = getminor(dev);
993 	state = ddi_get_soft_state(xsvc_statep, instance);
994 	if (state == NULL) {
995 		return (ENXIO);
996 	}
997 
998 	dhp = (devmap_handle_t *)dhc;
999 	/* This driver only supports MAP_SHARED, not MAP_PRIVATE */
1000 	if (flags & MAP_PRIVATE) {
1001 		cmn_err(CE_WARN, "!xsvc driver doesn't support MAP_PRIVATE");
1002 		return (EINVAL);
1003 	}
1004 
1005 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1006 	cp->cook_refcnt = 1;
1007 
1008 	*pvtp = state;
1009 	return (0);
1010 }
1011 
1012 
1013 /*
1014  * xsvc_devmap_dup()
1015  *
1016  *   keep a reference count for forks so we don't unmap if we have multiple
1017  *   mappings.
1018  */
1019 /*ARGSUSED*/
1020 static int
1021 xsvc_devmap_dup(devmap_cookie_t dhc, void *pvtp, devmap_cookie_t new_dhp,
1022     void **new_pvtp)
1023 {
1024 	struct ddi_umem_cookie *cp;
1025 	devmap_handle_t *dhp;
1026 	xsvc_state_t *state;
1027 
1028 
1029 	state = (xsvc_state_t *)pvtp;
1030 	dhp = (devmap_handle_t *)dhc;
1031 
1032 	mutex_enter(&state->xs_cookie_mutex);
1033 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1034 	if (cp == NULL) {
1035 		mutex_exit(&state->xs_cookie_mutex);
1036 		return (ENOMEM);
1037 	}
1038 
1039 	cp->cook_refcnt++;
1040 	mutex_exit(&state->xs_cookie_mutex);
1041 
1042 	*new_pvtp = state;
1043 	return (0);
1044 }
1045 
1046 
1047 /*
1048  * xsvc_devmap_unmap()
1049  *
1050  *   This routine is only call if we were mapping in memory in xsvc_devmap().
1051  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
1052  *   was true. It would have been nice if devmap_callback_ctl had an args param.
1053  *   We wouldn't have had to look into the devmap_handle and into the umem
1054  *   cookie.
1055  */
1056 /*ARGSUSED*/
1057 static void
1058 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
1059     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
1060     void **new_pvtp2)
1061 {
1062 	struct ddi_umem_cookie *ncp;
1063 	struct ddi_umem_cookie *cp;
1064 	devmap_handle_t *ndhp;
1065 	devmap_handle_t *dhp;
1066 	xsvc_state_t *state;
1067 	size_t npages;
1068 	caddr_t kvai;
1069 	caddr_t kva;
1070 	size_t size;
1071 	int i;
1072 
1073 
1074 	state = (xsvc_state_t *)pvtp;
1075 	mutex_enter(&state->xs_cookie_mutex);
1076 
1077 	/* peek into the umem cookie to figure out what we need to free up */
1078 	dhp = (devmap_handle_t *)dhc;
1079 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1080 	ASSERT(cp != NULL);
1081 
1082 	if (new_dhp1 != NULL) {
1083 		ndhp = (devmap_handle_t *)new_dhp1;
1084 		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1085 		ncp->cook_refcnt++;
1086 		*new_pvtp1 = state;
1087 	}
1088 	if (new_dhp2 != NULL) {
1089 		ndhp = (devmap_handle_t *)new_dhp2;
1090 		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1091 		ncp->cook_refcnt++;
1092 		*new_pvtp2 = state;
1093 	}
1094 
1095 	cp->cook_refcnt--;
1096 	if (cp->cook_refcnt == 0) {
1097 		kva = cp->cvaddr;
1098 		size = cp->size;
1099 
1100 		/*
1101 		 * free up the umem cookie, then unmap all the pages what we
1102 		 * mapped in during devmap, then free up the kva space.
1103 		 */
1104 		npages = btop(size);
1105 		xsvc_umem_cookie_free(&dhp->dh_cookie);
1106 		kvai = kva;
1107 		for (i = 0; i < npages; i++) {
1108 			hat_unload(kas.a_hat, kvai, PAGESIZE,
1109 			    HAT_UNLOAD_UNLOCK);
1110 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
1111 		}
1112 		vmem_free(heap_arena, kva, size);
1113 	}
1114 
1115 	mutex_exit(&state->xs_cookie_mutex);
1116 }
1117