xref: /titanic_50/usr/src/uts/i86pc/io/xsvc/xsvc.c (revision 3fbe3e2827948b5ff8ffec94d18c232af100ea3c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/errno.h>
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/kmem.h>
31 #include <sys/ddi.h>
32 #include <sys/stat.h>
33 #include <sys/sunddi.h>
34 #include <sys/file.h>
35 #include <sys/open.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi_impldefs.h>
38 #include <vm/seg_kmem.h>
39 #include <sys/vmsystm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/ddidevmap.h>
42 #include <sys/avl.h>
43 #ifdef __xpv
44 #include <sys/hypervisor.h>
45 #endif
46 
47 #include <sys/xsvc.h>
48 
49 /* total max memory which can be alloced with ioctl interface */
50 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
51 
52 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
53 
54 
55 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
56 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
57 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
58     int *rval);
59 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
60     size_t *maplen, uint_t model);
61 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
62 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
63 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
64     void **result);
65 
66 static 	struct cb_ops xsvc_cb_ops = {
67 	xsvc_open,		/* cb_open */
68 	xsvc_close,		/* cb_close */
69 	nodev,			/* cb_strategy */
70 	nodev,			/* cb_print */
71 	nodev,			/* cb_dump */
72 	nodev,			/* cb_read */
73 	nodev,			/* cb_write */
74 	xsvc_ioctl,		/* cb_ioctl */
75 	xsvc_devmap,		/* cb_devmap */
76 	NULL,			/* cb_mmap */
77 	NULL,			/* cb_segmap */
78 	nochpoll,		/* cb_chpoll */
79 	ddi_prop_op,		/* cb_prop_op */
80 	NULL,			/* cb_stream */
81 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
82 	CB_REV
83 };
84 
85 static struct dev_ops xsvc_dev_ops = {
86 	DEVO_REV,		/* devo_rev */
87 	0,			/* devo_refcnt */
88 	xsvc_getinfo,		/* devo_getinfo */
89 	nulldev,		/* devo_identify */
90 	nulldev,		/* devo_probe */
91 	xsvc_attach,		/* devo_attach */
92 	xsvc_detach,		/* devo_detach */
93 	nodev,			/* devo_reset */
94 	&xsvc_cb_ops,		/* devo_cb_ops */
95 	NULL,			/* devo_bus_ops */
96 	NULL,			/* power */
97 	ddi_quiesce_not_needed,		/* quiesce */
98 };
99 
100 static struct modldrv xsvc_modldrv = {
101 	&mod_driverops,		/* Type of module.  This one is a driver */
102 	"xsvc driver",		/* Name of the module. */
103 	&xsvc_dev_ops,		/* driver ops */
104 };
105 
106 static struct modlinkage xsvc_modlinkage = {
107 	MODREV_1,
108 	(void *) &xsvc_modldrv,
109 	NULL
110 };
111 
112 
113 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
114 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
115 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
116 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
117     xsvc_mem_t **mp);
118 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
119 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
120     uint64_t key);
121 static int xsvc_mnode_key_compare(const void *q, const void *e);
122 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
123     ddi_umem_cookie_t *cookiep);
124 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
125 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
126     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
127     devmap_cookie_t new_dhp2, void **new_pvtp2);
128 
129 
130 void *xsvc_statep;
131 
132 static ddi_device_acc_attr_t xsvc_device_attr = {
133 	DDI_DEVICE_ATTR_V0,
134 	DDI_NEVERSWAP_ACC,
135 	DDI_STRICTORDER_ACC
136 };
137 
138 static struct devmap_callback_ctl xsvc_callbk = {
139 	DEVMAP_OPS_REV,
140 	NULL,
141 	NULL,
142 	NULL,
143 	xsvc_devmap_unmap
144 };
145 
146 
147 /*
148  * _init()
149  *
150  */
151 int
152 _init(void)
153 {
154 	int err;
155 
156 	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
157 	if (err != 0) {
158 		return (err);
159 	}
160 
161 	err = mod_install(&xsvc_modlinkage);
162 	if (err != 0) {
163 		ddi_soft_state_fini(&xsvc_statep);
164 		return (err);
165 	}
166 
167 	return (0);
168 }
169 
170 /*
171  * _info()
172  *
173  */
174 int
175 _info(struct modinfo *modinfop)
176 {
177 	return (mod_info(&xsvc_modlinkage, modinfop));
178 }
179 
180 /*
181  * _fini()
182  *
183  */
184 int
185 _fini(void)
186 {
187 	int err;
188 
189 	err = mod_remove(&xsvc_modlinkage);
190 	if (err != 0) {
191 		return (err);
192 	}
193 
194 	ddi_soft_state_fini(&xsvc_statep);
195 
196 	return (0);
197 }
198 
199 /*
200  * xsvc_attach()
201  *
202  */
203 static int
204 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
205 {
206 	xsvc_state_t *state;
207 	int maxallocmem;
208 	int instance;
209 	int err;
210 
211 
212 	switch (cmd) {
213 	case DDI_ATTACH:
214 		break;
215 
216 	case DDI_RESUME:
217 		return (DDI_SUCCESS);
218 
219 	default:
220 		return (DDI_FAILURE);
221 	}
222 
223 	instance = ddi_get_instance(dip);
224 	err = ddi_soft_state_zalloc(xsvc_statep, instance);
225 	if (err != DDI_SUCCESS) {
226 		return (DDI_FAILURE);
227 	}
228 	state = ddi_get_soft_state(xsvc_statep, instance);
229 	if (state == NULL) {
230 		goto attachfail_get_soft_state;
231 	}
232 
233 	state->xs_dip = dip;
234 	state->xs_instance = instance;
235 
236 	/* Initialize allocation count */
237 	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
238 	state->xs_currently_alloced = 0;
239 
240 	/* create the minor node (for the ioctl) */
241 	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
242 	    0);
243 	if (err != DDI_SUCCESS) {
244 		goto attachfail_minor_node;
245 	}
246 
247 	/*
248 	 * the maxallocmem property will override the default (xsvc_max_memory).
249 	 * This is the maximum total memory the ioctl will allow to be alloced.
250 	 */
251 	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
252 	    DDI_PROP_DONTPASS, "maxallocmem", -1);
253 	if (maxallocmem >= 0) {
254 		xsvc_max_memory = maxallocmem * 1024;
255 	}
256 
257 	/* Initialize list of memory allocs */
258 	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
259 	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
260 	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
261 
262 	/* Report that driver was loaded */
263 	ddi_report_dev(dip);
264 
265 	return (DDI_SUCCESS);
266 
267 attachfail_minor_node:
268 	mutex_destroy(&state->xs_mutex);
269 attachfail_get_soft_state:
270 	(void) ddi_soft_state_free(xsvc_statep, instance);
271 
272 	return (err);
273 }
274 
275 /*
276  * xsvc_detach()
277  *
278  */
279 static int
280 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
281 {
282 	xsvc_state_t *state;
283 	xsvc_mnode_t *mnode;
284 	xsvc_mem_t *mp;
285 	int instance;
286 
287 
288 	instance = ddi_get_instance(dip);
289 	state = ddi_get_soft_state(xsvc_statep, instance);
290 	if (state == NULL) {
291 		return (DDI_FAILURE);
292 	}
293 
294 	switch (cmd) {
295 	case DDI_DETACH:
296 		break;
297 
298 	case DDI_SUSPEND:
299 		return (DDI_SUCCESS);
300 
301 	default:
302 		return (DDI_FAILURE);
303 	}
304 
305 	ddi_remove_minor_node(dip, NULL);
306 
307 	/* Free any memory on list */
308 	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
309 		mp = mnode->mn_home;
310 		xsvc_mem_free(state, mp);
311 	}
312 
313 	/* remove list */
314 	avl_destroy(&state->xs_mlist.ml_avl);
315 	mutex_destroy(&state->xs_mlist.ml_mutex);
316 
317 	mutex_destroy(&state->xs_mutex);
318 	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
319 	return (DDI_SUCCESS);
320 }
321 
322 /*
323  * xsvc_getinfo()
324  *
325  */
326 /*ARGSUSED*/
327 static int
328 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
329 {
330 	xsvc_state_t *state;
331 	int instance;
332 	dev_t dev;
333 	int err;
334 
335 
336 	dev = (dev_t)arg;
337 	instance = getminor(dev);
338 
339 	switch (cmd) {
340 	case DDI_INFO_DEVT2DEVINFO:
341 		state = ddi_get_soft_state(xsvc_statep, instance);
342 		if (state == NULL) {
343 			return (DDI_FAILURE);
344 		}
345 		*result = (void *)state->xs_dip;
346 		err = DDI_SUCCESS;
347 		break;
348 
349 	case DDI_INFO_DEVT2INSTANCE:
350 		*result = (void *)(uintptr_t)instance;
351 		err = DDI_SUCCESS;
352 		break;
353 
354 	default:
355 		err = DDI_FAILURE;
356 		break;
357 	}
358 
359 	return (err);
360 }
361 
362 
363 /*
364  * xsvc_open()
365  *
366  */
367 /*ARGSUSED*/
368 static int
369 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
370 {
371 	xsvc_state_t *state;
372 	int instance;
373 
374 	instance = getminor(*devp);
375 	state = ddi_get_soft_state(xsvc_statep, instance);
376 	if (state == NULL) {
377 		return (ENXIO);
378 	}
379 
380 	return (0);
381 }
382 
383 /*
384  * xsvc_close()
385  *
386  */
387 /*ARGSUSED*/
388 static int
389 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
390 {
391 	return (0);
392 }
393 
394 /*
395  * xsvc_ioctl()
396  *
397  */
398 /*ARGSUSED*/
399 static int
400 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
401 {
402 	xsvc_state_t *state;
403 	int instance;
404 	int err;
405 
406 
407 	err = drv_priv(cred);
408 	if (err != 0) {
409 		return (EPERM);
410 	}
411 	instance = getminor(dev);
412 	if (instance == -1) {
413 		return (EBADF);
414 	}
415 	state = ddi_get_soft_state(xsvc_statep, instance);
416 	if (state == NULL) {
417 		return (EBADF);
418 	}
419 
420 	switch (cmd) {
421 	case XSVC_ALLOC_MEM:
422 		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
423 		break;
424 
425 	case XSVC_FREE_MEM:
426 		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
427 		break;
428 
429 	case XSVC_FLUSH_MEM:
430 		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
431 		break;
432 
433 	default:
434 		err = ENXIO;
435 	}
436 
437 	return (err);
438 }
439 
440 /*
441  * xsvc_ioctl_alloc_memory()
442  *
443  */
444 static int
445 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
446 {
447 	xsvc_mem_req_32 params32;
448 	xsvc_mloc_32 *usgl32;
449 	xsvc_mem_req params;
450 	xsvc_mloc_32 sgl32;
451 	xsvc_mloc *usgl;
452 	xsvc_mem_t *mp;
453 	xsvc_mloc sgl;
454 	uint64_t key;
455 	size_t size;
456 	int err;
457 	int i;
458 
459 
460 	/* Copy in the params, then get the size and key */
461 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
462 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
463 		    mode);
464 		if (err != 0) {
465 			return (EFAULT);
466 		}
467 
468 		key = (uint64_t)params32.xsvc_mem_reqid;
469 		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
470 	} else {
471 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
472 		if (err != 0) {
473 			return (EFAULT);
474 		}
475 		key = (uint64_t)params.xsvc_mem_reqid;
476 		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
477 	}
478 
479 	/*
480 	 * make sure this doesn't put us over the maximum allowed to be
481 	 * allocated
482 	 */
483 	mutex_enter(&state->xs_mutex);
484 	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
485 		mutex_exit(&state->xs_mutex);
486 		return (EAGAIN);
487 	}
488 	state->xs_currently_alloced += size;
489 	mutex_exit(&state->xs_mutex);
490 
491 	/* get state to track this memory */
492 	err = xsvc_mem_alloc(state, key, &mp);
493 	if (err != 0) {
494 		return (err);
495 	}
496 	mp->xm_size = size;
497 
498 	/* allocate and bind the memory */
499 	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
500 	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
501 	mp->xm_dma_attr.dma_attr_burstsizes = 1;
502 	mp->xm_dma_attr.dma_attr_minxfer = 1;
503 	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
504 	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
505 	mp->xm_dma_attr.dma_attr_granular = 1;
506 	mp->xm_dma_attr.dma_attr_flags = 0;
507 
508 	/* Finish converting params */
509 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
510 		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
511 		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
512 		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
513 		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
514 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
515 		    params32.xsvc_mem_align, PAGESIZE);
516 	} else {
517 		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
518 		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
519 		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
520 		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
521 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
522 		    params.xsvc_mem_align, PAGESIZE);
523 	}
524 
525 	mp->xm_device_attr = xsvc_device_attr;
526 
527 	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
528 	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
529 	if (err != DDI_SUCCESS) {
530 		err = EINVAL;
531 		goto allocfail_alloc_handle;
532 	}
533 
534 	/* don't sleep here so we don't get stuck in contig alloc */
535 	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
536 	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
537 	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
538 	if (err != DDI_SUCCESS) {
539 		err = EINVAL;
540 		goto allocfail_alloc_mem;
541 	}
542 
543 	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
544 	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
545 	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
546 	if (err != DDI_DMA_MAPPED) {
547 		err = EFAULT;
548 		goto allocfail_bind;
549 	}
550 
551 	/* return sgl */
552 	for (i = 0; i < mp->xm_cookie_count; i++) {
553 		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
554 			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
555 			sgl32.mloc_size = mp->xm_cookie.dmac_size;
556 			err = ddi_copyout(&sgl32, &usgl32[i],
557 			    sizeof (xsvc_mloc_32), mode);
558 			if (err != 0) {
559 				err = EFAULT;
560 				goto allocfail_copyout;
561 			}
562 		} else {
563 			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
564 			sgl.mloc_size = mp->xm_cookie.dmac_size;
565 			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
566 			    mode);
567 			if (err != 0) {
568 				err = EFAULT;
569 				goto allocfail_copyout;
570 			}
571 		}
572 		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
573 	}
574 
575 	/* set the last sgl entry to 0 to indicate cookie count */
576 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
577 		sgl32.mloc_addr = 0;
578 		sgl32.mloc_size = 0;
579 		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
580 		    mode);
581 		if (err != 0) {
582 			err = EFAULT;
583 			goto allocfail_copyout;
584 		}
585 	} else {
586 		sgl.mloc_addr = 0;
587 		sgl.mloc_size = 0;
588 		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
589 		if (err != 0) {
590 			err = EFAULT;
591 			goto allocfail_copyout;
592 		}
593 	}
594 
595 	return (0);
596 
597 allocfail_copyout:
598 	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
599 allocfail_bind:
600 	ddi_dma_mem_free(&mp->xm_mem_handle);
601 allocfail_alloc_mem:
602 	ddi_dma_free_handle(&mp->xm_dma_handle);
603 allocfail_alloc_handle:
604 	mp->xm_dma_handle = NULL;
605 	xsvc_mem_free(state, mp);
606 
607 	mutex_enter(&state->xs_mutex);
608 	state->xs_currently_alloced = state->xs_currently_alloced - size;
609 	mutex_exit(&state->xs_mutex);
610 
611 	return (err);
612 }
613 
614 /*
615  * xsvc_ioctl_flush_memory()
616  *
617  */
618 static int
619 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
620 {
621 	xsvc_mem_req_32 params32;
622 	xsvc_mem_req params;
623 	xsvc_mem_t *mp;
624 	uint64_t key;
625 	int err;
626 
627 
628 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
629 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
630 		    mode);
631 		if (err != 0) {
632 			return (EFAULT);
633 		}
634 		key = (uint64_t)params32.xsvc_mem_reqid;
635 	} else {
636 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
637 		if (err != 0) {
638 			return (EFAULT);
639 		}
640 		key = (uint64_t)params.xsvc_mem_reqid;
641 	}
642 
643 	/* find the memory */
644 	mp = xsvc_mem_lookup(state, key);
645 	if (mp == NULL) {
646 		return (EINVAL);
647 	}
648 
649 	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
650 
651 	return (0);
652 }
653 
654 
655 /*
656  * xsvc_ioctl_free_memory()
657  *
658  */
659 static int
660 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
661 {
662 	xsvc_mem_req_32 params32;
663 	xsvc_mem_req params;
664 	xsvc_mem_t *mp;
665 	uint64_t key;
666 	int err;
667 
668 
669 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
670 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
671 		    mode);
672 		if (err != 0) {
673 			return (EFAULT);
674 		}
675 		key = (uint64_t)params32.xsvc_mem_reqid;
676 	} else {
677 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
678 		if (err != 0) {
679 			return (EFAULT);
680 		}
681 		key = (uint64_t)params.xsvc_mem_reqid;
682 	}
683 
684 	/* find the memory */
685 	mp = xsvc_mem_lookup(state, key);
686 	if (mp == NULL) {
687 		return (EINVAL);
688 	}
689 
690 	xsvc_mem_free(state, mp);
691 
692 	return (0);
693 }
694 
695 /*
696  * xsvc_mem_alloc()
697  *
698  */
699 static int
700 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
701 {
702 	xsvc_mem_t *mem;
703 
704 	mem = xsvc_mem_lookup(state, key);
705 	if (mem != NULL) {
706 		xsvc_mem_free(state, mem);
707 	}
708 
709 	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
710 	(*mp)->xm_mnode.mn_home = *mp;
711 	(*mp)->xm_mnode.mn_key = key;
712 
713 	mutex_enter(&state->xs_mlist.ml_mutex);
714 	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
715 	mutex_exit(&state->xs_mlist.ml_mutex);
716 
717 	return (0);
718 }
719 
720 /*
721  * xsvc_mem_free()
722  *
723  */
724 static void
725 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
726 {
727 	if (mp->xm_dma_handle != NULL) {
728 		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
729 		ddi_dma_mem_free(&mp->xm_mem_handle);
730 		ddi_dma_free_handle(&mp->xm_dma_handle);
731 
732 		mutex_enter(&state->xs_mutex);
733 		state->xs_currently_alloced = state->xs_currently_alloced -
734 		    mp->xm_size;
735 		mutex_exit(&state->xs_mutex);
736 	}
737 
738 	mutex_enter(&state->xs_mlist.ml_mutex);
739 	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
740 	mutex_exit(&state->xs_mlist.ml_mutex);
741 
742 	kmem_free(mp, sizeof (*mp));
743 }
744 
745 /*
746  * xsvc_mem_lookup()
747  *
748  */
749 static xsvc_mem_t *
750 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
751 {
752 	xsvc_mnode_t mnode;
753 	xsvc_mnode_t *mnp;
754 	avl_index_t where;
755 	xsvc_mem_t *mp;
756 
757 	mnode.mn_key = key;
758 	mutex_enter(&state->xs_mlist.ml_mutex);
759 	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
760 	mutex_exit(&state->xs_mlist.ml_mutex);
761 
762 	if (mnp != NULL) {
763 		mp = mnp->mn_home;
764 	} else {
765 		mp = NULL;
766 	}
767 
768 	return (mp);
769 }
770 
771 /*
772  * xsvc_mnode_key_compare()
773  *
774  */
775 static int
776 xsvc_mnode_key_compare(const void *q, const void *e)
777 {
778 	xsvc_mnode_t *n1;
779 	xsvc_mnode_t *n2;
780 
781 	n1 = (xsvc_mnode_t *)q;
782 	n2 = (xsvc_mnode_t *)e;
783 
784 	if (n1->mn_key < n2->mn_key) {
785 		return (-1);
786 	} else if (n1->mn_key > n2->mn_key) {
787 		return (1);
788 	} else {
789 		return (0);
790 	}
791 }
792 
793 /*
794  * xsvc_devmap()
795  *
796  */
797 /*ARGSUSED*/
798 static int
799 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
800 		size_t *maplen, uint_t model)
801 {
802 	ddi_umem_cookie_t cookie;
803 	xsvc_state_t *state;
804 	offset_t off_align;
805 	size_t npages;
806 	caddr_t kvai;
807 	size_t psize;
808 	int instance;
809 	caddr_t kva;
810 	pfn_t pfn;
811 	int err;
812 	int i;
813 
814 
815 	instance = getminor(dev);
816 	state = ddi_get_soft_state(xsvc_statep, instance);
817 	if (state == NULL) {
818 		return (ENXIO);
819 	}
820 
821 	/*
822 	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
823 	 * smmap32 will sign extend the offset. We need to undo that since
824 	 * we are passed a physical address in off, not a offset.
825 	 */
826 #if defined(__amd64)
827 	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
828 	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
829 		off = off & 0xFFFFFFFF;
830 	}
831 #endif
832 
833 #ifdef __xpv
834 	/*
835 	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
836 	 * this some later when there is a good reason.
837 	 */
838 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
839 		return (-1);
840 	}
841 
842 	/* we will always treat this as a foreign MFN */
843 	pfn = xen_assign_pfn(btop(off));
844 #else
845 	pfn = btop(off);
846 #endif
847 	/* always work with whole pages */
848 
849 	off_align = P2ALIGN(off, PAGESIZE);
850 	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
851 
852 	/*
853 	 * if this is memory we're trying to map into user space, we first
854 	 * need to map the PFNs into KVA, then build up a umem cookie, and
855 	 * finally do a umem_setup to map it in.
856 	 */
857 	if (pf_is_memory(pfn)) {
858 		npages = btop(psize);
859 
860 		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
861 		if (kva == NULL) {
862 			return (-1);
863 		}
864 
865 		kvai = kva;
866 		for (i = 0; i < npages; i++) {
867 			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
868 			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
869 			pfn++;
870 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
871 		}
872 
873 		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
874 		if (err != 0) {
875 			goto devmapfail_cookie_alloc;
876 		}
877 
878 		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
879 		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
880 			goto devmapfail_umem_setup;
881 		}
882 		*maplen = psize;
883 
884 	/*
885 	 * If this is not memory (or a foreign MFN in i86xpv), go through
886 	 * devmem_setup.
887 	 */
888 	} else {
889 		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
890 		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
891 			return (err);
892 		}
893 		*maplen = psize;
894 	}
895 
896 	return (0);
897 
898 devmapfail_umem_setup:
899 	xsvc_umem_cookie_free(&cookie);
900 
901 devmapfail_cookie_alloc:
902 	kvai = kva;
903 	for (i = 0; i < npages; i++) {
904 		hat_unload(kas.a_hat, kvai, PAGESIZE,
905 		    HAT_UNLOAD_UNLOCK);
906 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
907 	}
908 	vmem_free(heap_arena, kva, psize);
909 
910 	return (err);
911 }
912 
913 /*
914  * xsvc_umem_cookie_alloc()
915  *
916  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
917  *   allocated.
918  */
919 int
920 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
921     ddi_umem_cookie_t *cookiep)
922 {
923 	struct ddi_umem_cookie *umem_cookiep;
924 
925 	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
926 	if (umem_cookiep == NULL) {
927 		*cookiep = NULL;
928 		return (-1);
929 	}
930 
931 	umem_cookiep->cvaddr = kva;
932 	umem_cookiep->type = KMEM_NON_PAGEABLE;
933 	umem_cookiep->size = size;
934 	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
935 
936 	return (0);
937 }
938 
939 /*
940  * xsvc_umem_cookie_free()
941  *
942  */
943 static void
944 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
945 {
946 	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
947 	*cookiep = NULL;
948 }
949 
950 /*
951  * xsvc_devmap_unmap()
952  *
953  *   This routine is only call if we were mapping in memory in xsvc_devmap().
954  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
955  *   was true. It would have been nice if devmap_callback_ctl had an args param.
956  *   We wouldn't have had to look into the devmap_handle and into the umem
957  *   cookie.
958  */
959 /*ARGSUSED*/
960 static void
961 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
962     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
963     void **new_pvtp2)
964 {
965 	struct ddi_umem_cookie *cp;
966 	devmap_handle_t *dhp;
967 	size_t npages;
968 	caddr_t kvai;
969 	caddr_t kva;
970 	size_t size;
971 	int i;
972 
973 
974 	/* peek into the umem cookie to figure out what we need to free up */
975 	dhp = (devmap_handle_t *)dhc;
976 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
977 	kva = cp->cvaddr;
978 	size = cp->size;
979 
980 	/*
981 	 * free up the umem cookie, then unmap all the pages what we mapped
982 	 * in during devmap, then free up the kva space.
983 	 */
984 	npages = btop(size);
985 	xsvc_umem_cookie_free(&dhp->dh_cookie);
986 	kvai = kva;
987 	for (i = 0; i < npages; i++) {
988 		hat_unload(kas.a_hat, kvai, PAGESIZE, HAT_UNLOAD_UNLOCK);
989 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
990 	}
991 	vmem_free(heap_arena, kva, size);
992 }
993