xref: /illumos-gate/usr/src/uts/i86pc/io/xsvc/xsvc.c (revision 62c8caf3fac65817982e780c1efa988846153bf0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/errno.h>
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/kmem.h>
31 #include <sys/ddi.h>
32 #include <sys/stat.h>
33 #include <sys/sunddi.h>
34 #include <sys/file.h>
35 #include <sys/open.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi_impldefs.h>
38 #include <vm/seg_kmem.h>
39 #include <sys/vmsystm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/ddidevmap.h>
42 #include <sys/avl.h>
43 #ifdef __xpv
44 #include <sys/hypervisor.h>
45 #endif
46 
47 #include <sys/xsvc.h>
48 
49 /* total max memory which can be alloced with ioctl interface */
50 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
51 
52 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
53 
54 
55 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
56 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
57 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
58     int *rval);
59 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
60     size_t *maplen, uint_t model);
61 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
62 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
63 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
64     void **result);
65 
66 static 	struct cb_ops xsvc_cb_ops = {
67 	xsvc_open,		/* cb_open */
68 	xsvc_close,		/* cb_close */
69 	nodev,			/* cb_strategy */
70 	nodev,			/* cb_print */
71 	nodev,			/* cb_dump */
72 	nodev,			/* cb_read */
73 	nodev,			/* cb_write */
74 	xsvc_ioctl,		/* cb_ioctl */
75 	xsvc_devmap,		/* cb_devmap */
76 	NULL,			/* cb_mmap */
77 	NULL,			/* cb_segmap */
78 	nochpoll,		/* cb_chpoll */
79 	ddi_prop_op,		/* cb_prop_op */
80 	NULL,			/* cb_stream */
81 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
82 	CB_REV
83 };
84 
85 static struct dev_ops xsvc_dev_ops = {
86 	DEVO_REV,		/* devo_rev */
87 	0,			/* devo_refcnt */
88 	xsvc_getinfo,		/* devo_getinfo */
89 	nulldev,		/* devo_identify */
90 	nulldev,		/* devo_probe */
91 	xsvc_attach,		/* devo_attach */
92 	xsvc_detach,		/* devo_detach */
93 	nodev,			/* devo_reset */
94 	&xsvc_cb_ops,		/* devo_cb_ops */
95 	NULL,			/* devo_bus_ops */
96 	NULL			/* power */
97 };
98 
99 static struct modldrv xsvc_modldrv = {
100 	&mod_driverops,		/* Type of module.  This one is a driver */
101 	"xsvc driver",		/* Name of the module. */
102 	&xsvc_dev_ops,		/* driver ops */
103 };
104 
105 static struct modlinkage xsvc_modlinkage = {
106 	MODREV_1,
107 	(void *) &xsvc_modldrv,
108 	NULL
109 };
110 
111 
112 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
113 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
114 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
115 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
116     xsvc_mem_t **mp);
117 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
118 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
119     uint64_t key);
120 static int xsvc_mnode_key_compare(const void *q, const void *e);
121 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
122     ddi_umem_cookie_t *cookiep);
123 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
124 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
125     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
126     devmap_cookie_t new_dhp2, void **new_pvtp2);
127 
128 
129 void *xsvc_statep;
130 
131 static ddi_device_acc_attr_t xsvc_device_attr = {
132 	DDI_DEVICE_ATTR_V0,
133 	DDI_NEVERSWAP_ACC,
134 	DDI_STRICTORDER_ACC
135 };
136 
137 static struct devmap_callback_ctl xsvc_callbk = {
138 	DEVMAP_OPS_REV,
139 	NULL,
140 	NULL,
141 	NULL,
142 	xsvc_devmap_unmap
143 };
144 
145 
146 /*
147  * _init()
148  *
149  */
150 int
151 _init(void)
152 {
153 	int err;
154 
155 	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
156 	if (err != 0) {
157 		return (err);
158 	}
159 
160 	err = mod_install(&xsvc_modlinkage);
161 	if (err != 0) {
162 		ddi_soft_state_fini(&xsvc_statep);
163 		return (err);
164 	}
165 
166 	return (0);
167 }
168 
169 /*
170  * _info()
171  *
172  */
173 int
174 _info(struct modinfo *modinfop)
175 {
176 	return (mod_info(&xsvc_modlinkage, modinfop));
177 }
178 
179 /*
180  * _fini()
181  *
182  */
183 int
184 _fini(void)
185 {
186 	int err;
187 
188 	err = mod_remove(&xsvc_modlinkage);
189 	if (err != 0) {
190 		return (err);
191 	}
192 
193 	ddi_soft_state_fini(&xsvc_statep);
194 
195 	return (0);
196 }
197 
198 /*
199  * xsvc_attach()
200  *
201  */
202 static int
203 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
204 {
205 	xsvc_state_t *state;
206 	int maxallocmem;
207 	int instance;
208 	int err;
209 
210 
211 	switch (cmd) {
212 	case DDI_ATTACH:
213 		break;
214 
215 	case DDI_RESUME:
216 		return (DDI_SUCCESS);
217 
218 	default:
219 		return (DDI_FAILURE);
220 	}
221 
222 	instance = ddi_get_instance(dip);
223 	err = ddi_soft_state_zalloc(xsvc_statep, instance);
224 	if (err != DDI_SUCCESS) {
225 		return (DDI_FAILURE);
226 	}
227 	state = ddi_get_soft_state(xsvc_statep, instance);
228 	if (state == NULL) {
229 		goto attachfail_get_soft_state;
230 	}
231 
232 	state->xs_dip = dip;
233 	state->xs_instance = instance;
234 
235 	/* Initialize allocation count */
236 	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
237 	state->xs_currently_alloced = 0;
238 
239 	/* create the minor node (for the ioctl) */
240 	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
241 	    0);
242 	if (err != DDI_SUCCESS) {
243 		goto attachfail_minor_node;
244 	}
245 
246 	/*
247 	 * the maxallocmem property will override the default (xsvc_max_memory).
248 	 * This is the maximum total memory the ioctl will allow to be alloced.
249 	 */
250 	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
251 	    DDI_PROP_DONTPASS, "maxallocmem", -1);
252 	if (maxallocmem >= 0) {
253 		xsvc_max_memory = maxallocmem * 1024;
254 	}
255 
256 	/* Initialize list of memory allocs */
257 	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
258 	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
259 	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
260 
261 	/* Report that driver was loaded */
262 	ddi_report_dev(dip);
263 
264 	return (DDI_SUCCESS);
265 
266 attachfail_minor_node:
267 	mutex_destroy(&state->xs_mutex);
268 attachfail_get_soft_state:
269 	(void) ddi_soft_state_free(xsvc_statep, instance);
270 
271 	return (err);
272 }
273 
274 /*
275  * xsvc_detach()
276  *
277  */
278 static int
279 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
280 {
281 	xsvc_state_t *state;
282 	xsvc_mnode_t *mnode;
283 	xsvc_mem_t *mp;
284 	int instance;
285 
286 
287 	instance = ddi_get_instance(dip);
288 	state = ddi_get_soft_state(xsvc_statep, instance);
289 	if (state == NULL) {
290 		return (DDI_FAILURE);
291 	}
292 
293 	switch (cmd) {
294 	case DDI_DETACH:
295 		break;
296 
297 	case DDI_SUSPEND:
298 		return (DDI_SUCCESS);
299 
300 	default:
301 		return (DDI_FAILURE);
302 	}
303 
304 	ddi_remove_minor_node(dip, NULL);
305 
306 	/* Free any memory on list */
307 	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
308 		mp = mnode->mn_home;
309 		xsvc_mem_free(state, mp);
310 	}
311 
312 	/* remove list */
313 	avl_destroy(&state->xs_mlist.ml_avl);
314 	mutex_destroy(&state->xs_mlist.ml_mutex);
315 
316 	mutex_destroy(&state->xs_mutex);
317 	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
318 	return (DDI_SUCCESS);
319 }
320 
321 /*
322  * xsvc_getinfo()
323  *
324  */
325 /*ARGSUSED*/
326 static int
327 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
328 {
329 	xsvc_state_t *state;
330 	int instance;
331 	dev_t dev;
332 	int err;
333 
334 
335 	dev = (dev_t)arg;
336 	instance = getminor(dev);
337 
338 	switch (cmd) {
339 	case DDI_INFO_DEVT2DEVINFO:
340 		state = ddi_get_soft_state(xsvc_statep, instance);
341 		if (state == NULL) {
342 			return (DDI_FAILURE);
343 		}
344 		*result = (void *)state->xs_dip;
345 		err = DDI_SUCCESS;
346 		break;
347 
348 	case DDI_INFO_DEVT2INSTANCE:
349 		*result = (void *)(uintptr_t)instance;
350 		err = DDI_SUCCESS;
351 		break;
352 
353 	default:
354 		err = DDI_FAILURE;
355 		break;
356 	}
357 
358 	return (err);
359 }
360 
361 
362 /*
363  * xsvc_open()
364  *
365  */
366 /*ARGSUSED*/
367 static int
368 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
369 {
370 	xsvc_state_t *state;
371 	int instance;
372 
373 	instance = getminor(*devp);
374 	state = ddi_get_soft_state(xsvc_statep, instance);
375 	if (state == NULL) {
376 		return (ENXIO);
377 	}
378 
379 	return (0);
380 }
381 
382 /*
383  * xsvc_close()
384  *
385  */
386 /*ARGSUSED*/
387 static int
388 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
389 {
390 	return (0);
391 }
392 
393 /*
394  * xsvc_ioctl()
395  *
396  */
397 /*ARGSUSED*/
398 static int
399 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
400 {
401 	xsvc_state_t *state;
402 	int instance;
403 	int err;
404 
405 
406 	err = drv_priv(cred);
407 	if (err != 0) {
408 		return (EPERM);
409 	}
410 	instance = getminor(dev);
411 	if (instance == -1) {
412 		return (EBADF);
413 	}
414 	state = ddi_get_soft_state(xsvc_statep, instance);
415 	if (state == NULL) {
416 		return (EBADF);
417 	}
418 
419 	switch (cmd) {
420 	case XSVC_ALLOC_MEM:
421 		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
422 		break;
423 
424 	case XSVC_FREE_MEM:
425 		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
426 		break;
427 
428 	case XSVC_FLUSH_MEM:
429 		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
430 		break;
431 
432 	default:
433 		err = ENXIO;
434 	}
435 
436 	return (err);
437 }
438 
439 /*
440  * xsvc_ioctl_alloc_memory()
441  *
442  */
443 static int
444 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
445 {
446 	xsvc_mem_req_32 params32;
447 	xsvc_mloc_32 *usgl32;
448 	xsvc_mem_req params;
449 	xsvc_mloc_32 sgl32;
450 	xsvc_mloc *usgl;
451 	xsvc_mem_t *mp;
452 	xsvc_mloc sgl;
453 	uint64_t key;
454 	size_t size;
455 	int err;
456 	int i;
457 
458 
459 	/* Copy in the params, then get the size and key */
460 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
461 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
462 		    mode);
463 		if (err != 0) {
464 			return (EFAULT);
465 		}
466 
467 		key = (uint64_t)params32.xsvc_mem_reqid;
468 		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
469 	} else {
470 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
471 		if (err != 0) {
472 			return (EFAULT);
473 		}
474 		key = (uint64_t)params.xsvc_mem_reqid;
475 		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
476 	}
477 
478 	/*
479 	 * make sure this doesn't put us over the maximum allowed to be
480 	 * allocated
481 	 */
482 	mutex_enter(&state->xs_mutex);
483 	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
484 		mutex_exit(&state->xs_mutex);
485 		return (EAGAIN);
486 	}
487 	state->xs_currently_alloced += size;
488 	mutex_exit(&state->xs_mutex);
489 
490 	/* get state to track this memory */
491 	err = xsvc_mem_alloc(state, key, &mp);
492 	if (err != 0) {
493 		return (err);
494 	}
495 	mp->xm_size = size;
496 
497 	/* allocate and bind the memory */
498 	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
499 	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
500 	mp->xm_dma_attr.dma_attr_burstsizes = 1;
501 	mp->xm_dma_attr.dma_attr_minxfer = 1;
502 	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
503 	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
504 	mp->xm_dma_attr.dma_attr_granular = 1;
505 	mp->xm_dma_attr.dma_attr_flags = 0;
506 
507 	/* Finish converting params */
508 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
509 		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
510 		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
511 		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
512 		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
513 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
514 		    params32.xsvc_mem_align, PAGESIZE);
515 	} else {
516 		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
517 		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
518 		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
519 		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
520 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
521 		    params.xsvc_mem_align, PAGESIZE);
522 	}
523 
524 	mp->xm_device_attr = xsvc_device_attr;
525 
526 	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
527 	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
528 	if (err != DDI_SUCCESS) {
529 		err = EINVAL;
530 		goto allocfail_alloc_handle;
531 	}
532 
533 	/* don't sleep here so we don't get stuck in contig alloc */
534 	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
535 	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
536 	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
537 	if (err != DDI_SUCCESS) {
538 		err = EINVAL;
539 		goto allocfail_alloc_mem;
540 	}
541 
542 	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
543 	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
544 	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
545 	if (err != DDI_DMA_MAPPED) {
546 		err = EFAULT;
547 		goto allocfail_bind;
548 	}
549 
550 	/* return sgl */
551 	for (i = 0; i < mp->xm_cookie_count; i++) {
552 		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
553 			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
554 			sgl32.mloc_size = mp->xm_cookie.dmac_size;
555 			err = ddi_copyout(&sgl32, &usgl32[i],
556 			    sizeof (xsvc_mloc_32), mode);
557 			if (err != 0) {
558 				err = EFAULT;
559 				goto allocfail_copyout;
560 			}
561 		} else {
562 			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
563 			sgl.mloc_size = mp->xm_cookie.dmac_size;
564 			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
565 			    mode);
566 			if (err != 0) {
567 				err = EFAULT;
568 				goto allocfail_copyout;
569 			}
570 		}
571 		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
572 	}
573 
574 	/* set the last sgl entry to 0 to indicate cookie count */
575 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
576 		sgl32.mloc_addr = 0;
577 		sgl32.mloc_size = 0;
578 		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
579 		    mode);
580 		if (err != 0) {
581 			err = EFAULT;
582 			goto allocfail_copyout;
583 		}
584 	} else {
585 		sgl.mloc_addr = 0;
586 		sgl.mloc_size = 0;
587 		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
588 		if (err != 0) {
589 			err = EFAULT;
590 			goto allocfail_copyout;
591 		}
592 	}
593 
594 	return (0);
595 
596 allocfail_copyout:
597 	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
598 allocfail_bind:
599 	ddi_dma_mem_free(&mp->xm_mem_handle);
600 allocfail_alloc_mem:
601 	ddi_dma_free_handle(&mp->xm_dma_handle);
602 allocfail_alloc_handle:
603 	mp->xm_dma_handle = NULL;
604 	xsvc_mem_free(state, mp);
605 
606 	mutex_enter(&state->xs_mutex);
607 	state->xs_currently_alloced = state->xs_currently_alloced - size;
608 	mutex_exit(&state->xs_mutex);
609 
610 	return (err);
611 }
612 
613 /*
614  * xsvc_ioctl_flush_memory()
615  *
616  */
617 static int
618 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
619 {
620 	xsvc_mem_req_32 params32;
621 	xsvc_mem_req params;
622 	xsvc_mem_t *mp;
623 	uint64_t key;
624 	int err;
625 
626 
627 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
628 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
629 		    mode);
630 		if (err != 0) {
631 			return (EFAULT);
632 		}
633 		key = (uint64_t)params32.xsvc_mem_reqid;
634 	} else {
635 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
636 		if (err != 0) {
637 			return (EFAULT);
638 		}
639 		key = (uint64_t)params.xsvc_mem_reqid;
640 	}
641 
642 	/* find the memory */
643 	mp = xsvc_mem_lookup(state, key);
644 	if (mp == NULL) {
645 		return (EINVAL);
646 	}
647 
648 	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
649 
650 	return (0);
651 }
652 
653 
654 /*
655  * xsvc_ioctl_free_memory()
656  *
657  */
658 static int
659 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
660 {
661 	xsvc_mem_req_32 params32;
662 	xsvc_mem_req params;
663 	xsvc_mem_t *mp;
664 	uint64_t key;
665 	int err;
666 
667 
668 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
669 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
670 		    mode);
671 		if (err != 0) {
672 			return (EFAULT);
673 		}
674 		key = (uint64_t)params32.xsvc_mem_reqid;
675 	} else {
676 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
677 		if (err != 0) {
678 			return (EFAULT);
679 		}
680 		key = (uint64_t)params.xsvc_mem_reqid;
681 	}
682 
683 	/* find the memory */
684 	mp = xsvc_mem_lookup(state, key);
685 	if (mp == NULL) {
686 		return (EINVAL);
687 	}
688 
689 	xsvc_mem_free(state, mp);
690 
691 	return (0);
692 }
693 
694 /*
695  * xsvc_mem_alloc()
696  *
697  */
698 static int
699 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
700 {
701 	xsvc_mem_t *mem;
702 
703 	mem = xsvc_mem_lookup(state, key);
704 	if (mem != NULL) {
705 		xsvc_mem_free(state, mem);
706 	}
707 
708 	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
709 	(*mp)->xm_mnode.mn_home = *mp;
710 	(*mp)->xm_mnode.mn_key = key;
711 
712 	mutex_enter(&state->xs_mlist.ml_mutex);
713 	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
714 	mutex_exit(&state->xs_mlist.ml_mutex);
715 
716 	return (0);
717 }
718 
719 /*
720  * xsvc_mem_free()
721  *
722  */
723 static void
724 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
725 {
726 	if (mp->xm_dma_handle != NULL) {
727 		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
728 		ddi_dma_mem_free(&mp->xm_mem_handle);
729 		ddi_dma_free_handle(&mp->xm_dma_handle);
730 
731 		mutex_enter(&state->xs_mutex);
732 		state->xs_currently_alloced = state->xs_currently_alloced -
733 		    mp->xm_size;
734 		mutex_exit(&state->xs_mutex);
735 	}
736 
737 	mutex_enter(&state->xs_mlist.ml_mutex);
738 	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
739 	mutex_exit(&state->xs_mlist.ml_mutex);
740 
741 	kmem_free(mp, sizeof (*mp));
742 }
743 
744 /*
745  * xsvc_mem_lookup()
746  *
747  */
748 static xsvc_mem_t *
749 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
750 {
751 	xsvc_mnode_t mnode;
752 	xsvc_mnode_t *mnp;
753 	avl_index_t where;
754 	xsvc_mem_t *mp;
755 
756 	mnode.mn_key = key;
757 	mutex_enter(&state->xs_mlist.ml_mutex);
758 	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
759 	mutex_exit(&state->xs_mlist.ml_mutex);
760 
761 	if (mnp != NULL) {
762 		mp = mnp->mn_home;
763 	} else {
764 		mp = NULL;
765 	}
766 
767 	return (mp);
768 }
769 
770 /*
771  * xsvc_mnode_key_compare()
772  *
773  */
774 static int
775 xsvc_mnode_key_compare(const void *q, const void *e)
776 {
777 	xsvc_mnode_t *n1;
778 	xsvc_mnode_t *n2;
779 
780 	n1 = (xsvc_mnode_t *)q;
781 	n2 = (xsvc_mnode_t *)e;
782 
783 	if (n1->mn_key < n2->mn_key) {
784 		return (-1);
785 	} else if (n1->mn_key > n2->mn_key) {
786 		return (1);
787 	} else {
788 		return (0);
789 	}
790 }
791 
792 /*
793  * xsvc_devmap()
794  *
795  */
796 /*ARGSUSED*/
797 static int
798 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
799 		size_t *maplen, uint_t model)
800 {
801 	ddi_umem_cookie_t cookie;
802 	xsvc_state_t *state;
803 	offset_t off_align;
804 	size_t npages;
805 	caddr_t kvai;
806 	size_t psize;
807 	int instance;
808 	caddr_t kva;
809 	pfn_t pfn;
810 	int err;
811 	int i;
812 
813 
814 	instance = getminor(dev);
815 	state = ddi_get_soft_state(xsvc_statep, instance);
816 	if (state == NULL) {
817 		return (ENXIO);
818 	}
819 
820 	/*
821 	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
822 	 * smmap32 will sign extend the offset. We need to undo that since
823 	 * we are passed a physical address in off, not a offset.
824 	 */
825 #if defined(__amd64)
826 	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
827 	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
828 		off = off & 0xFFFFFFFF;
829 	}
830 #endif
831 
832 #ifdef __xpv
833 	/*
834 	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
835 	 * this some later when there is a good reason.
836 	 */
837 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
838 		return (-1);
839 	}
840 
841 	/* we will always treat this as a foreign MFN */
842 	pfn = xen_assign_pfn(btop(off));
843 #else
844 	pfn = btop(off);
845 #endif
846 	/* always work with whole pages */
847 
848 	off_align = P2ALIGN(off, PAGESIZE);
849 	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
850 
851 	/*
852 	 * if this is memory we're trying to map into user space, we first
853 	 * need to map the PFNs into KVA, then build up a umem cookie, and
854 	 * finally do a umem_setup to map it in.
855 	 */
856 	if (pf_is_memory(pfn)) {
857 		npages = btop(psize);
858 
859 		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
860 		if (kva == NULL) {
861 			return (-1);
862 		}
863 
864 		kvai = kva;
865 		for (i = 0; i < npages; i++) {
866 			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
867 			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
868 			pfn++;
869 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
870 		}
871 
872 		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
873 		if (err != 0) {
874 			goto devmapfail_cookie_alloc;
875 		}
876 
877 		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
878 		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
879 			goto devmapfail_umem_setup;
880 		}
881 		*maplen = psize;
882 
883 	/*
884 	 * If this is not memory (or a foreign MFN in i86xpv), go through
885 	 * devmem_setup.
886 	 */
887 	} else {
888 		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
889 		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
890 			return (err);
891 		}
892 		*maplen = psize;
893 	}
894 
895 	return (0);
896 
897 devmapfail_umem_setup:
898 	xsvc_umem_cookie_free(&cookie);
899 
900 devmapfail_cookie_alloc:
901 	kvai = kva;
902 	for (i = 0; i < npages; i++) {
903 		hat_unload(kas.a_hat, kvai, PAGESIZE,
904 		    HAT_UNLOAD_UNLOCK);
905 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
906 	}
907 	vmem_free(heap_arena, kva, psize);
908 
909 	return (err);
910 }
911 
912 /*
913  * xsvc_umem_cookie_alloc()
914  *
915  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
916  *   allocated.
917  */
918 int
919 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
920     ddi_umem_cookie_t *cookiep)
921 {
922 	struct ddi_umem_cookie *umem_cookiep;
923 
924 	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
925 	if (umem_cookiep == NULL) {
926 		*cookiep = NULL;
927 		return (-1);
928 	}
929 
930 	umem_cookiep->cvaddr = kva;
931 	umem_cookiep->type = KMEM_NON_PAGEABLE;
932 	umem_cookiep->size = size;
933 	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
934 
935 	return (0);
936 }
937 
938 /*
939  * xsvc_umem_cookie_free()
940  *
941  */
942 static void
943 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
944 {
945 	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
946 	*cookiep = NULL;
947 }
948 
949 /*
950  * xsvc_devmap_unmap()
951  *
952  *   This routine is only call if we were mapping in memory in xsvc_devmap().
953  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
954  *   was true. It would have been nice if devmap_callback_ctl had an args param.
955  *   We wouldn't have had to look into the devmap_handle and into the umem
956  *   cookie.
957  */
958 /*ARGSUSED*/
959 static void
960 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
961     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
962     void **new_pvtp2)
963 {
964 	struct ddi_umem_cookie *cp;
965 	devmap_handle_t *dhp;
966 	size_t npages;
967 	caddr_t kvai;
968 	caddr_t kva;
969 	size_t size;
970 	int i;
971 
972 
973 	/* peek into the umem cookie to figure out what we need to free up */
974 	dhp = (devmap_handle_t *)dhc;
975 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
976 	kva = cp->cvaddr;
977 	size = cp->size;
978 
979 	/*
980 	 * free up the umem cookie, then unmap all the pages what we mapped
981 	 * in during devmap, then free up the kva space.
982 	 */
983 	npages = btop(size);
984 	xsvc_umem_cookie_free(&dhp->dh_cookie);
985 	kvai = kva;
986 	for (i = 0; i < npages; i++) {
987 		hat_unload(kas.a_hat, kvai, PAGESIZE, HAT_UNLOAD_UNLOCK);
988 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
989 	}
990 	vmem_free(heap_arena, kva, size);
991 }
992