xref: /titanic_52/usr/src/uts/i86pc/io/xsvc/xsvc.c (revision b9bd317cda1afb3a01f4812de73e8cec888cbbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <vm/seg_kmem.h>
41 #include <sys/vmsystm.h>
42 #include <sys/sysmacros.h>
43 #include <sys/ddidevmap.h>
44 #include <sys/avl.h>
45 #ifdef __xpv
46 #include <sys/hypervisor.h>
47 #endif
48 
49 #include <sys/xsvc.h>
50 
51 /* total max memory which can be alloced with ioctl interface */
52 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
53 
54 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
55 
56 
57 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
58 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
59 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
60     int *rval);
61 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
62     size_t *maplen, uint_t model);
63 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
64 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
65 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
66     void **result);
67 
68 static 	struct cb_ops xsvc_cb_ops = {
69 	xsvc_open,		/* cb_open */
70 	xsvc_close,		/* cb_close */
71 	nodev,			/* cb_strategy */
72 	nodev,			/* cb_print */
73 	nodev,			/* cb_dump */
74 	nodev,			/* cb_read */
75 	nodev,			/* cb_write */
76 	xsvc_ioctl,		/* cb_ioctl */
77 	xsvc_devmap,		/* cb_devmap */
78 	NULL,			/* cb_mmap */
79 	NULL,			/* cb_segmap */
80 	nochpoll,		/* cb_chpoll */
81 	ddi_prop_op,		/* cb_prop_op */
82 	NULL,			/* cb_stream */
83 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
84 	CB_REV
85 };
86 
87 static struct dev_ops xsvc_dev_ops = {
88 	DEVO_REV,		/* devo_rev */
89 	0,			/* devo_refcnt */
90 	xsvc_getinfo,		/* devo_getinfo */
91 	nulldev,		/* devo_identify */
92 	nulldev,		/* devo_probe */
93 	xsvc_attach,		/* devo_attach */
94 	xsvc_detach,		/* devo_detach */
95 	nodev,			/* devo_reset */
96 	&xsvc_cb_ops,		/* devo_cb_ops */
97 	NULL,			/* devo_bus_ops */
98 	NULL			/* power */
99 };
100 
101 static struct modldrv xsvc_modldrv = {
102 	&mod_driverops,		/* Type of module.  This one is a driver */
103 	"xsvc driver v%I%",	/* Name of the module. */
104 	&xsvc_dev_ops,		/* driver ops */
105 };
106 
107 static struct modlinkage xsvc_modlinkage = {
108 	MODREV_1,
109 	(void *) &xsvc_modldrv,
110 	NULL
111 };
112 
113 
114 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
115 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
116 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
117 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
118     xsvc_mem_t **mp);
119 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
120 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
121     uint64_t key);
122 static int xsvc_mnode_key_compare(const void *q, const void *e);
123 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
124     ddi_umem_cookie_t *cookiep);
125 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
126 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
127     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
128     devmap_cookie_t new_dhp2, void **new_pvtp2);
129 
130 
131 void *xsvc_statep;
132 
133 static ddi_device_acc_attr_t xsvc_device_attr = {
134 	DDI_DEVICE_ATTR_V0,
135 	DDI_NEVERSWAP_ACC,
136 	DDI_STRICTORDER_ACC
137 };
138 
139 static struct devmap_callback_ctl xsvc_callbk = {
140 	DEVMAP_OPS_REV,
141 	NULL,
142 	NULL,
143 	NULL,
144 	xsvc_devmap_unmap
145 };
146 
147 
148 /*
149  * _init()
150  *
151  */
152 int
153 _init(void)
154 {
155 	int err;
156 
157 	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
158 	if (err != 0) {
159 		return (err);
160 	}
161 
162 	err = mod_install(&xsvc_modlinkage);
163 	if (err != 0) {
164 		ddi_soft_state_fini(&xsvc_statep);
165 		return (err);
166 	}
167 
168 	return (0);
169 }
170 
171 /*
172  * _info()
173  *
174  */
175 int
176 _info(struct modinfo *modinfop)
177 {
178 	return (mod_info(&xsvc_modlinkage, modinfop));
179 }
180 
181 /*
182  * _fini()
183  *
184  */
185 int
186 _fini(void)
187 {
188 	int err;
189 
190 	err = mod_remove(&xsvc_modlinkage);
191 	if (err != 0) {
192 		return (err);
193 	}
194 
195 	ddi_soft_state_fini(&xsvc_statep);
196 
197 	return (0);
198 }
199 
200 /*
201  * xsvc_attach()
202  *
203  */
204 static int
205 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
206 {
207 	xsvc_state_t *state;
208 	int maxallocmem;
209 	int instance;
210 	int err;
211 
212 
213 	switch (cmd) {
214 	case DDI_ATTACH:
215 		break;
216 
217 	case DDI_RESUME:
218 		return (DDI_SUCCESS);
219 
220 	default:
221 		return (DDI_FAILURE);
222 	}
223 
224 	instance = ddi_get_instance(dip);
225 	err = ddi_soft_state_zalloc(xsvc_statep, instance);
226 	if (err != DDI_SUCCESS) {
227 		return (DDI_FAILURE);
228 	}
229 	state = ddi_get_soft_state(xsvc_statep, instance);
230 	if (state == NULL) {
231 		goto attachfail_get_soft_state;
232 	}
233 
234 	state->xs_dip = dip;
235 	state->xs_instance = instance;
236 
237 	/* Initialize allocation count */
238 	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
239 	state->xs_currently_alloced = 0;
240 
241 	/* create the minor node (for the ioctl) */
242 	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
243 	    0);
244 	if (err != DDI_SUCCESS) {
245 		goto attachfail_minor_node;
246 	}
247 
248 	/*
249 	 * the maxallocmem property will override the default (xsvc_max_memory).
250 	 * This is the maximum total memory the ioctl will allow to be alloced.
251 	 */
252 	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
253 	    DDI_PROP_DONTPASS, "maxallocmem", -1);
254 	if (maxallocmem >= 0) {
255 		xsvc_max_memory = maxallocmem * 1024;
256 	}
257 
258 	/* Initialize list of memory allocs */
259 	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
260 	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
261 	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
262 
263 	/* Report that driver was loaded */
264 	ddi_report_dev(dip);
265 
266 	return (DDI_SUCCESS);
267 
268 attachfail_minor_node:
269 	mutex_destroy(&state->xs_mutex);
270 attachfail_get_soft_state:
271 	(void) ddi_soft_state_free(xsvc_statep, instance);
272 
273 	return (err);
274 }
275 
276 /*
277  * xsvc_detach()
278  *
279  */
280 static int
281 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
282 {
283 	xsvc_state_t *state;
284 	xsvc_mnode_t *mnode;
285 	xsvc_mem_t *mp;
286 	int instance;
287 
288 
289 	instance = ddi_get_instance(dip);
290 	state = ddi_get_soft_state(xsvc_statep, instance);
291 	if (state == NULL) {
292 		return (DDI_FAILURE);
293 	}
294 
295 	switch (cmd) {
296 	case DDI_DETACH:
297 		break;
298 
299 	case DDI_SUSPEND:
300 		return (DDI_SUCCESS);
301 
302 	default:
303 		return (DDI_FAILURE);
304 	}
305 
306 	ddi_remove_minor_node(dip, NULL);
307 
308 	/* Free any memory on list */
309 	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
310 		mp = mnode->mn_home;
311 		xsvc_mem_free(state, mp);
312 	}
313 
314 	/* remove list */
315 	avl_destroy(&state->xs_mlist.ml_avl);
316 	mutex_destroy(&state->xs_mlist.ml_mutex);
317 
318 	mutex_destroy(&state->xs_mutex);
319 	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
320 	return (DDI_SUCCESS);
321 }
322 
323 /*
324  * xsvc_getinfo()
325  *
326  */
327 /*ARGSUSED*/
328 static int
329 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
330 {
331 	xsvc_state_t *state;
332 	int instance;
333 	dev_t dev;
334 	int err;
335 
336 
337 	dev = (dev_t)arg;
338 	instance = getminor(dev);
339 
340 	switch (cmd) {
341 	case DDI_INFO_DEVT2DEVINFO:
342 		state = ddi_get_soft_state(xsvc_statep, instance);
343 		if (state == NULL) {
344 			return (DDI_FAILURE);
345 		}
346 		*result = (void *)state->xs_dip;
347 		err = DDI_SUCCESS;
348 		break;
349 
350 	case DDI_INFO_DEVT2INSTANCE:
351 		*result = (void *)(uintptr_t)instance;
352 		err = DDI_SUCCESS;
353 		break;
354 
355 	default:
356 		err = DDI_FAILURE;
357 		break;
358 	}
359 
360 	return (err);
361 }
362 
363 
364 /*
365  * xsvc_open()
366  *
367  */
368 /*ARGSUSED*/
369 static int
370 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
371 {
372 	xsvc_state_t *state;
373 	int instance;
374 
375 	instance = getminor(*devp);
376 	state = ddi_get_soft_state(xsvc_statep, instance);
377 	if (state == NULL) {
378 		return (ENXIO);
379 	}
380 
381 	return (0);
382 }
383 
384 /*
385  * xsvc_close()
386  *
387  */
388 /*ARGSUSED*/
389 static int
390 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
391 {
392 	return (0);
393 }
394 
395 /*
396  * xsvc_ioctl()
397  *
398  */
399 /*ARGSUSED*/
400 static int
401 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
402 {
403 	xsvc_state_t *state;
404 	int instance;
405 	int err;
406 
407 
408 	err = drv_priv(cred);
409 	if (err != 0) {
410 		return (EPERM);
411 	}
412 	instance = getminor(dev);
413 	if (instance == -1) {
414 		return (EBADF);
415 	}
416 	state = ddi_get_soft_state(xsvc_statep, instance);
417 	if (state == NULL) {
418 		return (EBADF);
419 	}
420 
421 	switch (cmd) {
422 	case XSVC_ALLOC_MEM:
423 		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
424 		break;
425 
426 	case XSVC_FREE_MEM:
427 		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
428 		break;
429 
430 	case XSVC_FLUSH_MEM:
431 		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
432 		break;
433 
434 	default:
435 		err = ENXIO;
436 	}
437 
438 	return (err);
439 }
440 
441 /*
442  * xsvc_ioctl_alloc_memory()
443  *
444  */
445 static int
446 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
447 {
448 	xsvc_mem_req_32 params32;
449 	xsvc_mloc_32 *usgl32;
450 	xsvc_mem_req params;
451 	xsvc_mloc_32 sgl32;
452 	xsvc_mloc *usgl;
453 	xsvc_mem_t *mp;
454 	xsvc_mloc sgl;
455 	uint64_t key;
456 	size_t size;
457 	int err;
458 	int i;
459 
460 
461 	/* Copy in the params, then get the size and key */
462 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
463 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
464 		    mode);
465 		if (err != 0) {
466 			return (EFAULT);
467 		}
468 
469 		key = (uint64_t)params32.xsvc_mem_reqid;
470 		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
471 	} else {
472 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
473 		if (err != 0) {
474 			return (EFAULT);
475 		}
476 		key = (uint64_t)params.xsvc_mem_reqid;
477 		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
478 	}
479 
480 	/*
481 	 * make sure this doesn't put us over the maximum allowed to be
482 	 * allocated
483 	 */
484 	mutex_enter(&state->xs_mutex);
485 	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
486 		mutex_exit(&state->xs_mutex);
487 		return (EAGAIN);
488 	}
489 	state->xs_currently_alloced += size;
490 	mutex_exit(&state->xs_mutex);
491 
492 	/* get state to track this memory */
493 	err = xsvc_mem_alloc(state, key, &mp);
494 	if (err != 0) {
495 		return (err);
496 	}
497 	mp->xm_size = size;
498 
499 	/* allocate and bind the memory */
500 	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
501 	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
502 	mp->xm_dma_attr.dma_attr_burstsizes = 1;
503 	mp->xm_dma_attr.dma_attr_minxfer = 1;
504 	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
505 	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
506 	mp->xm_dma_attr.dma_attr_granular = 1;
507 	mp->xm_dma_attr.dma_attr_flags = 0;
508 
509 	/* Finish converting params */
510 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
511 		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
512 		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
513 		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
514 		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
515 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
516 		    params32.xsvc_mem_align, PAGESIZE);
517 	} else {
518 		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
519 		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
520 		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
521 		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
522 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
523 		    params.xsvc_mem_align, PAGESIZE);
524 	}
525 
526 	mp->xm_device_attr = xsvc_device_attr;
527 
528 	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
529 	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
530 	if (err != DDI_SUCCESS) {
531 		err = EINVAL;
532 		goto allocfail_alloc_handle;
533 	}
534 
535 	/* don't sleep here so we don't get stuck in contig alloc */
536 	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
537 	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
538 	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
539 	if (err != DDI_SUCCESS) {
540 		err = EINVAL;
541 		goto allocfail_alloc_mem;
542 	}
543 
544 	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
545 	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
546 	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
547 	if (err != DDI_DMA_MAPPED) {
548 		err = EFAULT;
549 		goto allocfail_bind;
550 	}
551 
552 	/* return sgl */
553 	for (i = 0; i < mp->xm_cookie_count; i++) {
554 		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
555 			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
556 			sgl32.mloc_size = mp->xm_cookie.dmac_size;
557 			err = ddi_copyout(&sgl32, &usgl32[i],
558 			    sizeof (xsvc_mloc_32), mode);
559 			if (err != 0) {
560 				err = EFAULT;
561 				goto allocfail_copyout;
562 			}
563 		} else {
564 			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
565 			sgl.mloc_size = mp->xm_cookie.dmac_size;
566 			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
567 			    mode);
568 			if (err != 0) {
569 				err = EFAULT;
570 				goto allocfail_copyout;
571 			}
572 		}
573 		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
574 	}
575 
576 	/* set the last sgl entry to 0 to indicate cookie count */
577 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
578 		sgl32.mloc_addr = 0;
579 		sgl32.mloc_size = 0;
580 		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
581 		    mode);
582 		if (err != 0) {
583 			err = EFAULT;
584 			goto allocfail_copyout;
585 		}
586 	} else {
587 		sgl.mloc_addr = 0;
588 		sgl.mloc_size = 0;
589 		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
590 		if (err != 0) {
591 			err = EFAULT;
592 			goto allocfail_copyout;
593 		}
594 	}
595 
596 	return (0);
597 
598 allocfail_copyout:
599 	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
600 allocfail_bind:
601 	ddi_dma_mem_free(&mp->xm_mem_handle);
602 allocfail_alloc_mem:
603 	ddi_dma_free_handle(&mp->xm_dma_handle);
604 allocfail_alloc_handle:
605 	mp->xm_dma_handle = NULL;
606 	xsvc_mem_free(state, mp);
607 
608 	mutex_enter(&state->xs_mutex);
609 	state->xs_currently_alloced = state->xs_currently_alloced - size;
610 	mutex_exit(&state->xs_mutex);
611 
612 	return (err);
613 }
614 
615 /*
616  * xsvc_ioctl_flush_memory()
617  *
618  */
619 static int
620 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
621 {
622 	xsvc_mem_req_32 params32;
623 	xsvc_mem_req params;
624 	xsvc_mem_t *mp;
625 	uint64_t key;
626 	int err;
627 
628 
629 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
630 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
631 		    mode);
632 		if (err != 0) {
633 			return (EFAULT);
634 		}
635 		key = (uint64_t)params32.xsvc_mem_reqid;
636 	} else {
637 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
638 		if (err != 0) {
639 			return (EFAULT);
640 		}
641 		key = (uint64_t)params.xsvc_mem_reqid;
642 	}
643 
644 	/* find the memory */
645 	mp = xsvc_mem_lookup(state, key);
646 	if (mp == NULL) {
647 		return (EINVAL);
648 	}
649 
650 	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
651 
652 	return (0);
653 }
654 
655 
656 /*
657  * xsvc_ioctl_free_memory()
658  *
659  */
660 static int
661 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
662 {
663 	xsvc_mem_req_32 params32;
664 	xsvc_mem_req params;
665 	xsvc_mem_t *mp;
666 	uint64_t key;
667 	int err;
668 
669 
670 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
671 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
672 		    mode);
673 		if (err != 0) {
674 			return (EFAULT);
675 		}
676 		key = (uint64_t)params32.xsvc_mem_reqid;
677 	} else {
678 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
679 		if (err != 0) {
680 			return (EFAULT);
681 		}
682 		key = (uint64_t)params.xsvc_mem_reqid;
683 	}
684 
685 	/* find the memory */
686 	mp = xsvc_mem_lookup(state, key);
687 	if (mp == NULL) {
688 		return (EINVAL);
689 	}
690 
691 	xsvc_mem_free(state, mp);
692 
693 	return (0);
694 }
695 
696 /*
697  * xsvc_mem_alloc()
698  *
699  */
700 static int
701 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
702 {
703 	xsvc_mem_t *mem;
704 
705 	mem = xsvc_mem_lookup(state, key);
706 	if (mem != NULL) {
707 		xsvc_mem_free(state, mem);
708 	}
709 
710 	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
711 	(*mp)->xm_mnode.mn_home = *mp;
712 	(*mp)->xm_mnode.mn_key = key;
713 
714 	mutex_enter(&state->xs_mlist.ml_mutex);
715 	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
716 	mutex_exit(&state->xs_mlist.ml_mutex);
717 
718 	return (0);
719 }
720 
721 /*
722  * xsvc_mem_free()
723  *
724  */
725 static void
726 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
727 {
728 	if (mp->xm_dma_handle != NULL) {
729 		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
730 		ddi_dma_mem_free(&mp->xm_mem_handle);
731 		ddi_dma_free_handle(&mp->xm_dma_handle);
732 
733 		mutex_enter(&state->xs_mutex);
734 		state->xs_currently_alloced = state->xs_currently_alloced -
735 		    mp->xm_size;
736 		mutex_exit(&state->xs_mutex);
737 	}
738 
739 	mutex_enter(&state->xs_mlist.ml_mutex);
740 	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
741 	mutex_exit(&state->xs_mlist.ml_mutex);
742 
743 	kmem_free(mp, sizeof (*mp));
744 }
745 
746 /*
747  * xsvc_mem_lookup()
748  *
749  */
750 static xsvc_mem_t *
751 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
752 {
753 	xsvc_mnode_t mnode;
754 	xsvc_mnode_t *mnp;
755 	avl_index_t where;
756 	xsvc_mem_t *mp;
757 
758 	mnode.mn_key = key;
759 	mutex_enter(&state->xs_mlist.ml_mutex);
760 	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
761 	mutex_exit(&state->xs_mlist.ml_mutex);
762 
763 	if (mnp != NULL) {
764 		mp = mnp->mn_home;
765 	} else {
766 		mp = NULL;
767 	}
768 
769 	return (mp);
770 }
771 
772 /*
773  * xsvc_mnode_key_compare()
774  *
775  */
776 static int
777 xsvc_mnode_key_compare(const void *q, const void *e)
778 {
779 	xsvc_mnode_t *n1;
780 	xsvc_mnode_t *n2;
781 
782 	n1 = (xsvc_mnode_t *)q;
783 	n2 = (xsvc_mnode_t *)e;
784 
785 	if (n1->mn_key < n2->mn_key) {
786 		return (-1);
787 	} else if (n1->mn_key > n2->mn_key) {
788 		return (1);
789 	} else {
790 		return (0);
791 	}
792 }
793 
794 /*
795  * xsvc_devmap()
796  *
797  */
798 /*ARGSUSED*/
799 static int
800 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
801 		size_t *maplen, uint_t model)
802 {
803 	ddi_umem_cookie_t cookie;
804 	xsvc_state_t *state;
805 	offset_t off_align;
806 	size_t npages;
807 	caddr_t kvai;
808 	size_t psize;
809 	int instance;
810 	caddr_t kva;
811 	pfn_t pfn;
812 	int err;
813 	int i;
814 
815 
816 	instance = getminor(dev);
817 	state = ddi_get_soft_state(xsvc_statep, instance);
818 	if (state == NULL) {
819 		return (ENXIO);
820 	}
821 
822 	/*
823 	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
824 	 * smmap32 will sign extend the offset. We need to undo that since
825 	 * we are passed a physical address in off, not a offset.
826 	 */
827 #if defined(__amd64)
828 	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
829 	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
830 		off = off & 0xFFFFFFFF;
831 	}
832 #endif
833 
834 #ifdef __xpv
835 	/*
836 	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
837 	 * this some later when there is a good reason.
838 	 */
839 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
840 		return (-1);
841 	}
842 
843 	/* we will always treat this as a foreign MFN */
844 	pfn = xen_assign_pfn(btop(off));
845 #else
846 	pfn = btop(off);
847 #endif
848 	/* always work with whole pages */
849 
850 	off_align = P2ALIGN(off, PAGESIZE);
851 	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
852 
853 	/*
854 	 * if this is memory we're trying to map into user space, we first
855 	 * need to map the PFNs into KVA, then build up a umem cookie, and
856 	 * finally do a umem_setup to map it in.
857 	 */
858 	if (pf_is_memory(pfn)) {
859 		npages = btop(psize);
860 
861 		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
862 		if (kva == NULL) {
863 			return (-1);
864 		}
865 
866 		kvai = kva;
867 		for (i = 0; i < npages; i++) {
868 			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
869 			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
870 			pfn++;
871 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
872 		}
873 
874 		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
875 		if (err != 0) {
876 			goto devmapfail_cookie_alloc;
877 		}
878 
879 		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
880 		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
881 			goto devmapfail_umem_setup;
882 		}
883 		*maplen = psize;
884 
885 	/*
886 	 * If this is not memory (or a foreign MFN in i86xpv), go through
887 	 * devmem_setup.
888 	 */
889 	} else {
890 		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
891 		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
892 			return (err);
893 		}
894 		*maplen = psize;
895 	}
896 
897 	return (0);
898 
899 devmapfail_umem_setup:
900 	xsvc_umem_cookie_free(&cookie);
901 
902 devmapfail_cookie_alloc:
903 	kvai = kva;
904 	for (i = 0; i < npages; i++) {
905 		hat_unload(kas.a_hat, kvai, PAGESIZE,
906 		    HAT_UNLOAD_UNLOCK);
907 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
908 	}
909 	vmem_free(heap_arena, kva, psize);
910 
911 	return (err);
912 }
913 
914 /*
915  * xsvc_umem_cookie_alloc()
916  *
917  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
918  *   allocated.
919  */
920 int
921 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
922     ddi_umem_cookie_t *cookiep)
923 {
924 	struct ddi_umem_cookie *umem_cookiep;
925 
926 	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
927 	if (umem_cookiep == NULL) {
928 		*cookiep = NULL;
929 		return (-1);
930 	}
931 
932 	umem_cookiep->cvaddr = kva;
933 	umem_cookiep->type = KMEM_NON_PAGEABLE;
934 	umem_cookiep->size = size;
935 	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
936 
937 	return (0);
938 }
939 
940 /*
941  * xsvc_umem_cookie_free()
942  *
943  */
944 static void
945 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
946 {
947 	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
948 	*cookiep = NULL;
949 }
950 
951 /*
952  * xsvc_devmap_unmap()
953  *
954  *   This routine is only call if we were mapping in memory in xsvc_devmap().
955  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
956  *   was true. It would have been nice if devmap_callback_ctl had an args param.
957  *   We wouldn't have had to look into the devmap_handle and into the umem
958  *   cookie.
959  */
960 /*ARGSUSED*/
961 static void
962 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
963     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
964     void **new_pvtp2)
965 {
966 	struct ddi_umem_cookie *cp;
967 	devmap_handle_t *dhp;
968 	size_t npages;
969 	caddr_t kvai;
970 	caddr_t kva;
971 	size_t size;
972 	int i;
973 
974 
975 	/* peek into the umem cookie to figure out what we need to free up */
976 	dhp = (devmap_handle_t *)dhc;
977 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
978 	kva = cp->cvaddr;
979 	size = cp->size;
980 
981 	/*
982 	 * free up the umem cookie, then unmap all the pages what we mapped
983 	 * in during devmap, then free up the kva space.
984 	 */
985 	npages = btop(size);
986 	xsvc_umem_cookie_free(&dhp->dh_cookie);
987 	kvai = kva;
988 	for (i = 0; i < npages; i++) {
989 		hat_unload(kas.a_hat, kvai, PAGESIZE, HAT_UNLOAD_UNLOCK);
990 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
991 	}
992 	vmem_free(heap_arena, kva, size);
993 }
994