xref: /illumos-gate/usr/src/uts/i86pc/io/xsvc/xsvc.c (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2016 Joyent, Inc.
27  */
28 
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <vm/seg_kmem.h>
41 #include <sys/vmsystm.h>
42 #include <sys/sysmacros.h>
43 #include <sys/ddidevmap.h>
44 #include <sys/avl.h>
45 #ifdef __xpv
46 #include <sys/hypervisor.h>
47 #endif
48 
49 #include <sys/xsvc.h>
50 
51 /* total max memory which can be alloced with ioctl interface */
52 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
53 
54 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
55 
56 
57 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
58 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
59 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
60     int *rval);
61 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
62     size_t *maplen, uint_t model);
63 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
64 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
65 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
66     void **result);
67 
68 static struct cb_ops xsvc_cb_ops = {
69 	xsvc_open,		/* cb_open */
70 	xsvc_close,		/* cb_close */
71 	nodev,			/* cb_strategy */
72 	nodev,			/* cb_print */
73 	nodev,			/* cb_dump */
74 	nodev,			/* cb_read */
75 	nodev,			/* cb_write */
76 	xsvc_ioctl,		/* cb_ioctl */
77 	xsvc_devmap,		/* cb_devmap */
78 	NULL,			/* cb_mmap */
79 	NULL,			/* cb_segmap */
80 	nochpoll,		/* cb_chpoll */
81 	ddi_prop_op,		/* cb_prop_op */
82 	NULL,			/* cb_stream */
83 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
84 	CB_REV
85 };
86 
87 static struct dev_ops xsvc_dev_ops = {
88 	DEVO_REV,		/* devo_rev */
89 	0,			/* devo_refcnt */
90 	xsvc_getinfo,		/* devo_getinfo */
91 	nulldev,		/* devo_identify */
92 	nulldev,		/* devo_probe */
93 	xsvc_attach,		/* devo_attach */
94 	xsvc_detach,		/* devo_detach */
95 	nodev,			/* devo_reset */
96 	&xsvc_cb_ops,		/* devo_cb_ops */
97 	NULL,			/* devo_bus_ops */
98 	NULL,			/* power */
99 	ddi_quiesce_not_needed,		/* quiesce */
100 };
101 
102 static struct modldrv xsvc_modldrv = {
103 	&mod_driverops,		/* Type of module.  This one is a driver */
104 	"xsvc driver",		/* Name of the module. */
105 	&xsvc_dev_ops,		/* driver ops */
106 };
107 
108 static struct modlinkage xsvc_modlinkage = {
109 	MODREV_1,
110 	(void *) &xsvc_modldrv,
111 	NULL
112 };
113 
114 
115 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
116 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
117 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
118 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
119     xsvc_mem_t **mp);
120 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
121 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
122     uint64_t key);
123 static int xsvc_mnode_key_compare(const void *q, const void *e);
124 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
125     ddi_umem_cookie_t *cookiep);
126 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
127 
128 
129 void *xsvc_statep;
130 
131 static ddi_device_acc_attr_t xsvc_device_attr = {
132 	DDI_DEVICE_ATTR_V0,
133 	DDI_NEVERSWAP_ACC,
134 	DDI_STRICTORDER_ACC
135 };
136 
137 static int xsvc_devmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
138     offset_t off, size_t len, void **pvtp);
139 static int xsvc_devmap_dup(devmap_cookie_t dhp, void *pvtp,
140     devmap_cookie_t new_dhp, void **new_pvtp);
141 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
142     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
143     devmap_cookie_t new_dhp2, void **new_pvtp2);
144 
145 
146 static struct devmap_callback_ctl xsvc_callbk = {
147 	DEVMAP_OPS_REV,
148 	xsvc_devmap_map,
149 	NULL,
150 	xsvc_devmap_dup,
151 	xsvc_devmap_unmap
152 };
153 
154 
155 /*
156  * _init()
157  *
158  */
159 int
160 _init(void)
161 {
162 	int err;
163 
164 	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
165 	if (err != 0) {
166 		return (err);
167 	}
168 
169 	err = mod_install(&xsvc_modlinkage);
170 	if (err != 0) {
171 		ddi_soft_state_fini(&xsvc_statep);
172 		return (err);
173 	}
174 
175 	return (0);
176 }
177 
178 /*
179  * _info()
180  *
181  */
182 int
183 _info(struct modinfo *modinfop)
184 {
185 	return (mod_info(&xsvc_modlinkage, modinfop));
186 }
187 
188 /*
189  * _fini()
190  *
191  */
192 int
193 _fini(void)
194 {
195 	int err;
196 
197 	err = mod_remove(&xsvc_modlinkage);
198 	if (err != 0) {
199 		return (err);
200 	}
201 
202 	ddi_soft_state_fini(&xsvc_statep);
203 
204 	return (0);
205 }
206 
207 /*
208  * xsvc_attach()
209  *
210  */
211 static int
212 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
213 {
214 	xsvc_state_t *state;
215 	int maxallocmem;
216 	int instance;
217 	int err;
218 
219 
220 	switch (cmd) {
221 	case DDI_ATTACH:
222 		break;
223 
224 	case DDI_RESUME:
225 		return (DDI_SUCCESS);
226 
227 	default:
228 		return (DDI_FAILURE);
229 	}
230 
231 	instance = ddi_get_instance(dip);
232 	err = ddi_soft_state_zalloc(xsvc_statep, instance);
233 	if (err != DDI_SUCCESS) {
234 		return (DDI_FAILURE);
235 	}
236 	state = ddi_get_soft_state(xsvc_statep, instance);
237 	if (state == NULL) {
238 		goto attachfail_get_soft_state;
239 	}
240 
241 	state->xs_dip = dip;
242 	state->xs_instance = instance;
243 
244 	/* Initialize allocation count */
245 	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
246 	state->xs_currently_alloced = 0;
247 
248 	mutex_init(&state->xs_cookie_mutex, NULL, MUTEX_DRIVER, NULL);
249 
250 	/* create the minor node (for the ioctl) */
251 	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
252 	    0);
253 	if (err != DDI_SUCCESS) {
254 		goto attachfail_minor_node;
255 	}
256 
257 	/*
258 	 * the maxallocmem property will override the default (xsvc_max_memory).
259 	 * This is the maximum total memory the ioctl will allow to be alloced.
260 	 */
261 	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
262 	    DDI_PROP_DONTPASS, "maxallocmem", -1);
263 	if (maxallocmem >= 0) {
264 		xsvc_max_memory = maxallocmem * 1024;
265 	}
266 
267 	/* Initialize list of memory allocs */
268 	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
269 	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
270 	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
271 
272 	/* Report that driver was loaded */
273 	ddi_report_dev(dip);
274 
275 	return (DDI_SUCCESS);
276 
277 attachfail_minor_node:
278 	mutex_destroy(&state->xs_cookie_mutex);
279 	mutex_destroy(&state->xs_mutex);
280 attachfail_get_soft_state:
281 	(void) ddi_soft_state_free(xsvc_statep, instance);
282 
283 	return (err);
284 }
285 
286 /*
287  * xsvc_detach()
288  *
289  */
290 static int
291 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
292 {
293 	xsvc_state_t *state;
294 	xsvc_mnode_t *mnode;
295 	xsvc_mem_t *mp;
296 	int instance;
297 
298 
299 	instance = ddi_get_instance(dip);
300 	state = ddi_get_soft_state(xsvc_statep, instance);
301 	if (state == NULL) {
302 		return (DDI_FAILURE);
303 	}
304 
305 	switch (cmd) {
306 	case DDI_DETACH:
307 		break;
308 
309 	case DDI_SUSPEND:
310 		return (DDI_SUCCESS);
311 
312 	default:
313 		return (DDI_FAILURE);
314 	}
315 
316 	ddi_remove_minor_node(dip, NULL);
317 
318 	/* Free any memory on list */
319 	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
320 		mp = mnode->mn_home;
321 		xsvc_mem_free(state, mp);
322 	}
323 
324 	/* remove list */
325 	avl_destroy(&state->xs_mlist.ml_avl);
326 	mutex_destroy(&state->xs_mlist.ml_mutex);
327 
328 	mutex_destroy(&state->xs_cookie_mutex);
329 	mutex_destroy(&state->xs_mutex);
330 	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
331 	return (DDI_SUCCESS);
332 }
333 
334 /*
335  * xsvc_getinfo()
336  *
337  */
338 /*ARGSUSED*/
339 static int
340 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
341 {
342 	xsvc_state_t *state;
343 	int instance;
344 	dev_t dev;
345 	int err;
346 
347 
348 	dev = (dev_t)arg;
349 	instance = getminor(dev);
350 
351 	switch (cmd) {
352 	case DDI_INFO_DEVT2DEVINFO:
353 		state = ddi_get_soft_state(xsvc_statep, instance);
354 		if (state == NULL) {
355 			return (DDI_FAILURE);
356 		}
357 		*result = (void *)state->xs_dip;
358 		err = DDI_SUCCESS;
359 		break;
360 
361 	case DDI_INFO_DEVT2INSTANCE:
362 		*result = (void *)(uintptr_t)instance;
363 		err = DDI_SUCCESS;
364 		break;
365 
366 	default:
367 		err = DDI_FAILURE;
368 		break;
369 	}
370 
371 	return (err);
372 }
373 
374 
375 /*
376  * xsvc_open()
377  *
378  */
379 /*ARGSUSED*/
380 static int
381 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
382 {
383 	xsvc_state_t *state;
384 	int instance;
385 
386 	instance = getminor(*devp);
387 	state = ddi_get_soft_state(xsvc_statep, instance);
388 	if (state == NULL) {
389 		return (ENXIO);
390 	}
391 
392 	return (0);
393 }
394 
395 /*
396  * xsvc_close()
397  *
398  */
399 /*ARGSUSED*/
400 static int
401 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
402 {
403 	return (0);
404 }
405 
406 /*
407  * xsvc_ioctl()
408  *
409  */
410 /*ARGSUSED*/
411 static int
412 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
413 {
414 	xsvc_state_t *state;
415 	int instance;
416 	int err;
417 
418 
419 	err = drv_priv(cred);
420 	if (err != 0) {
421 		return (EPERM);
422 	}
423 	instance = getminor(dev);
424 	if (instance == -1) {
425 		return (EBADF);
426 	}
427 	state = ddi_get_soft_state(xsvc_statep, instance);
428 	if (state == NULL) {
429 		return (EBADF);
430 	}
431 
432 	switch (cmd) {
433 	case XSVC_ALLOC_MEM:
434 		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
435 		break;
436 
437 	case XSVC_FREE_MEM:
438 		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
439 		break;
440 
441 	case XSVC_FLUSH_MEM:
442 		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
443 		break;
444 
445 	default:
446 		err = ENXIO;
447 	}
448 
449 	return (err);
450 }
451 
452 /*
453  * xsvc_ioctl_alloc_memory()
454  *
455  */
456 static int
457 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
458 {
459 	xsvc_mem_req_32 params32;
460 	xsvc_mloc_32 *usgl32;
461 	xsvc_mem_req params;
462 	xsvc_mloc_32 sgl32;
463 	xsvc_mloc *usgl;
464 	xsvc_mem_t *mp;
465 	xsvc_mloc sgl;
466 	uint64_t key;
467 	size_t size;
468 	int err;
469 	int i;
470 
471 	/* Copy in the params, then get the size and key */
472 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
473 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
474 		    mode);
475 		if (err != 0) {
476 			return (EFAULT);
477 		}
478 
479 		key = (uint64_t)params32.xsvc_mem_reqid;
480 		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
481 	} else {
482 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
483 		if (err != 0) {
484 			return (EFAULT);
485 		}
486 		key = (uint64_t)params.xsvc_mem_reqid;
487 		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
488 	}
489 
490 	/*
491 	 * make sure this doesn't put us over the maximum allowed to be
492 	 * allocated
493 	 */
494 	mutex_enter(&state->xs_mutex);
495 	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
496 		mutex_exit(&state->xs_mutex);
497 		return (EAGAIN);
498 	}
499 	state->xs_currently_alloced += size;
500 	mutex_exit(&state->xs_mutex);
501 
502 	/* get state to track this memory */
503 	err = xsvc_mem_alloc(state, key, &mp);
504 	if (err != 0) {
505 		return (err);
506 	}
507 	mp->xm_size = size;
508 
509 	/* allocate and bind the memory */
510 	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
511 	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
512 	mp->xm_dma_attr.dma_attr_burstsizes = 1;
513 	mp->xm_dma_attr.dma_attr_minxfer = 1;
514 	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
515 	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
516 	mp->xm_dma_attr.dma_attr_granular = 1;
517 	mp->xm_dma_attr.dma_attr_flags = 0;
518 
519 	/* Finish converting params */
520 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
521 		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
522 		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
523 		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
524 		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
525 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
526 		    params32.xsvc_mem_align, PAGESIZE);
527 		usgl = NULL;
528 	} else {
529 		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
530 		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
531 		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
532 		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
533 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
534 		    params.xsvc_mem_align, PAGESIZE);
535 		usgl32 = NULL;
536 	}
537 
538 	mp->xm_device_attr = xsvc_device_attr;
539 
540 	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
541 	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
542 	if (err != DDI_SUCCESS) {
543 		err = EINVAL;
544 		goto allocfail_alloc_handle;
545 	}
546 
547 	/* don't sleep here so we don't get stuck in contig alloc */
548 	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
549 	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
550 	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
551 	if (err != DDI_SUCCESS) {
552 		err = EINVAL;
553 		goto allocfail_alloc_mem;
554 	}
555 
556 	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
557 	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
558 	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
559 	if (err != DDI_DMA_MAPPED) {
560 		err = EFAULT;
561 		goto allocfail_bind;
562 	}
563 
564 	/* return sgl */
565 	for (i = 0; i < mp->xm_cookie_count; i++) {
566 		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
567 			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
568 			sgl32.mloc_size = mp->xm_cookie.dmac_size;
569 			err = ddi_copyout(&sgl32, &usgl32[i],
570 			    sizeof (xsvc_mloc_32), mode);
571 			if (err != 0) {
572 				err = EFAULT;
573 				goto allocfail_copyout;
574 			}
575 		} else {
576 			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
577 			sgl.mloc_size = mp->xm_cookie.dmac_size;
578 			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
579 			    mode);
580 			if (err != 0) {
581 				err = EFAULT;
582 				goto allocfail_copyout;
583 			}
584 		}
585 		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
586 	}
587 
588 	/* set the last sgl entry to 0 to indicate cookie count */
589 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
590 		sgl32.mloc_addr = 0;
591 		sgl32.mloc_size = 0;
592 		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
593 		    mode);
594 		if (err != 0) {
595 			err = EFAULT;
596 			goto allocfail_copyout;
597 		}
598 	} else {
599 		sgl.mloc_addr = 0;
600 		sgl.mloc_size = 0;
601 		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
602 		if (err != 0) {
603 			err = EFAULT;
604 			goto allocfail_copyout;
605 		}
606 	}
607 
608 	return (0);
609 
610 allocfail_copyout:
611 	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
612 allocfail_bind:
613 	ddi_dma_mem_free(&mp->xm_mem_handle);
614 allocfail_alloc_mem:
615 	ddi_dma_free_handle(&mp->xm_dma_handle);
616 allocfail_alloc_handle:
617 	mp->xm_dma_handle = NULL;
618 	xsvc_mem_free(state, mp);
619 
620 	mutex_enter(&state->xs_mutex);
621 	state->xs_currently_alloced = state->xs_currently_alloced - size;
622 	mutex_exit(&state->xs_mutex);
623 
624 	return (err);
625 }
626 
627 /*
628  * xsvc_ioctl_flush_memory()
629  *
630  */
631 static int
632 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
633 {
634 	xsvc_mem_req_32 params32;
635 	xsvc_mem_req params;
636 	xsvc_mem_t *mp;
637 	uint64_t key;
638 	int err;
639 
640 
641 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
642 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
643 		    mode);
644 		if (err != 0) {
645 			return (EFAULT);
646 		}
647 		key = (uint64_t)params32.xsvc_mem_reqid;
648 	} else {
649 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
650 		if (err != 0) {
651 			return (EFAULT);
652 		}
653 		key = (uint64_t)params.xsvc_mem_reqid;
654 	}
655 
656 	/* find the memory */
657 	mp = xsvc_mem_lookup(state, key);
658 	if (mp == NULL) {
659 		return (EINVAL);
660 	}
661 
662 	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
663 
664 	return (0);
665 }
666 
667 
668 /*
669  * xsvc_ioctl_free_memory()
670  *
671  */
672 static int
673 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
674 {
675 	xsvc_mem_req_32 params32;
676 	xsvc_mem_req params;
677 	xsvc_mem_t *mp;
678 	uint64_t key;
679 	int err;
680 
681 
682 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
683 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
684 		    mode);
685 		if (err != 0) {
686 			return (EFAULT);
687 		}
688 		key = (uint64_t)params32.xsvc_mem_reqid;
689 	} else {
690 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
691 		if (err != 0) {
692 			return (EFAULT);
693 		}
694 		key = (uint64_t)params.xsvc_mem_reqid;
695 	}
696 
697 	/* find the memory */
698 	mp = xsvc_mem_lookup(state, key);
699 	if (mp == NULL) {
700 		return (EINVAL);
701 	}
702 
703 	xsvc_mem_free(state, mp);
704 
705 	return (0);
706 }
707 
708 /*
709  * xsvc_mem_alloc()
710  *
711  */
712 static int
713 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
714 {
715 	xsvc_mem_t *mem;
716 
717 	mem = xsvc_mem_lookup(state, key);
718 	if (mem != NULL) {
719 		xsvc_mem_free(state, mem);
720 	}
721 
722 	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
723 	(*mp)->xm_mnode.mn_home = *mp;
724 	(*mp)->xm_mnode.mn_key = key;
725 
726 	mutex_enter(&state->xs_mlist.ml_mutex);
727 	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
728 	mutex_exit(&state->xs_mlist.ml_mutex);
729 
730 	return (0);
731 }
732 
733 /*
734  * xsvc_mem_free()
735  *
736  */
737 static void
738 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
739 {
740 	if (mp->xm_dma_handle != NULL) {
741 		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
742 		ddi_dma_mem_free(&mp->xm_mem_handle);
743 		ddi_dma_free_handle(&mp->xm_dma_handle);
744 
745 		mutex_enter(&state->xs_mutex);
746 		state->xs_currently_alloced = state->xs_currently_alloced -
747 		    mp->xm_size;
748 		mutex_exit(&state->xs_mutex);
749 	}
750 
751 	mutex_enter(&state->xs_mlist.ml_mutex);
752 	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
753 	mutex_exit(&state->xs_mlist.ml_mutex);
754 
755 	kmem_free(mp, sizeof (*mp));
756 }
757 
758 /*
759  * xsvc_mem_lookup()
760  *
761  */
762 static xsvc_mem_t *
763 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
764 {
765 	xsvc_mnode_t mnode;
766 	xsvc_mnode_t *mnp;
767 	avl_index_t where;
768 	xsvc_mem_t *mp;
769 
770 	mnode.mn_key = key;
771 	mutex_enter(&state->xs_mlist.ml_mutex);
772 	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
773 	mutex_exit(&state->xs_mlist.ml_mutex);
774 
775 	if (mnp != NULL) {
776 		mp = mnp->mn_home;
777 	} else {
778 		mp = NULL;
779 	}
780 
781 	return (mp);
782 }
783 
784 /*
785  * xsvc_mnode_key_compare()
786  *
787  */
788 static int
789 xsvc_mnode_key_compare(const void *q, const void *e)
790 {
791 	xsvc_mnode_t *n1;
792 	xsvc_mnode_t *n2;
793 
794 	n1 = (xsvc_mnode_t *)q;
795 	n2 = (xsvc_mnode_t *)e;
796 
797 	if (n1->mn_key < n2->mn_key) {
798 		return (-1);
799 	} else if (n1->mn_key > n2->mn_key) {
800 		return (1);
801 	} else {
802 		return (0);
803 	}
804 }
805 
806 /*
807  * xsvc_devmap()
808  *
809  */
810 /*ARGSUSED*/
811 static int
812 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
813     size_t *maplen, uint_t model)
814 {
815 	ddi_umem_cookie_t cookie;
816 	xsvc_state_t *state;
817 	offset_t off_align;
818 	size_t npages;
819 	caddr_t kvai;
820 	size_t psize;
821 	int instance;
822 	caddr_t kva;
823 	pfn_t pfn;
824 	int err;
825 	int i;
826 
827 
828 	instance = getminor(dev);
829 	state = ddi_get_soft_state(xsvc_statep, instance);
830 	if (state == NULL) {
831 		return (ENXIO);
832 	}
833 
834 	/*
835 	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
836 	 * smmap32 will sign extend the offset. We need to undo that since
837 	 * we are passed a physical address in off, not a offset.
838 	 */
839 	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
840 	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
841 		off = off & 0xFFFFFFFF;
842 	}
843 
844 #ifdef __xpv
845 	/*
846 	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
847 	 * this some later when there is a good reason.
848 	 */
849 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
850 		return (-1);
851 	}
852 
853 	/* we will always treat this as a foreign MFN */
854 	pfn = xen_assign_pfn(btop(off));
855 #else
856 	pfn = btop(off);
857 #endif
858 	/* always work with whole pages */
859 
860 	off_align = P2ALIGN(off, PAGESIZE);
861 	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
862 
863 	/*
864 	 * if this is memory we're trying to map into user space, we first
865 	 * need to map the PFNs into KVA, then build up a umem cookie, and
866 	 * finally do a umem_setup to map it in.
867 	 */
868 	if (pf_is_memory(pfn)) {
869 		npages = btop(psize);
870 
871 		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
872 		if (kva == NULL) {
873 			return (-1);
874 		}
875 
876 		kvai = kva;
877 		for (i = 0; i < npages; i++) {
878 			page_t *pp = page_numtopp_nolock(pfn);
879 
880 			/*
881 			 * Preemptively check for panic conditions from
882 			 * hat_devload and error out instead.
883 			 */
884 			if (pp != NULL && (PP_ISFREE(pp) ||
885 			    (!PAGE_LOCKED(pp) && !PP_ISNORELOC(pp)))) {
886 				err = DDI_FAILURE;
887 				npages = i;
888 				goto devmapfail_cookie_alloc;
889 			}
890 
891 			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
892 			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
893 			pfn++;
894 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
895 		}
896 
897 		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
898 		if (err != 0) {
899 			goto devmapfail_cookie_alloc;
900 		}
901 
902 		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
903 		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
904 			goto devmapfail_umem_setup;
905 		}
906 		*maplen = psize;
907 
908 	/*
909 	 * If this is not memory (or a foreign MFN in i86xpv), go through
910 	 * devmem_setup.
911 	 */
912 	} else {
913 		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
914 		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
915 			return (err);
916 		}
917 		*maplen = psize;
918 	}
919 
920 	return (0);
921 
922 devmapfail_umem_setup:
923 	xsvc_umem_cookie_free(&cookie);
924 
925 devmapfail_cookie_alloc:
926 	kvai = kva;
927 	for (i = 0; i < npages; i++) {
928 		hat_unload(kas.a_hat, kvai, PAGESIZE,
929 		    HAT_UNLOAD_UNLOCK);
930 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
931 	}
932 	vmem_free(heap_arena, kva, psize);
933 
934 	return (err);
935 }
936 
937 /*
938  * xsvc_umem_cookie_alloc()
939  *
940  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
941  *   allocated.
942  */
943 int
944 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
945     ddi_umem_cookie_t *cookiep)
946 {
947 	struct ddi_umem_cookie *umem_cookiep;
948 
949 	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
950 	if (umem_cookiep == NULL) {
951 		*cookiep = NULL;
952 		return (-1);
953 	}
954 
955 	umem_cookiep->cvaddr = kva;
956 	umem_cookiep->type = KMEM_NON_PAGEABLE;
957 	umem_cookiep->size = size;
958 	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
959 
960 	return (0);
961 }
962 
963 /*
964  * xsvc_umem_cookie_free()
965  *
966  */
967 static void
968 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
969 {
970 	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
971 	*cookiep = NULL;
972 }
973 
974 
975 /*
976  * xsvc_devmap_map()
977  *
978  */
979 /*ARGSUSED*/
980 static int
981 xsvc_devmap_map(devmap_cookie_t dhc, dev_t dev, uint_t flags, offset_t off,
982     size_t len, void **pvtp)
983 {
984 	struct ddi_umem_cookie *cp;
985 	devmap_handle_t *dhp;
986 	xsvc_state_t *state;
987 	int instance;
988 
989 
990 	instance = getminor(dev);
991 	state = ddi_get_soft_state(xsvc_statep, instance);
992 	if (state == NULL) {
993 		return (ENXIO);
994 	}
995 
996 	dhp = (devmap_handle_t *)dhc;
997 	/* This driver only supports MAP_SHARED, not MAP_PRIVATE */
998 	if (flags & MAP_PRIVATE) {
999 		cmn_err(CE_WARN, "!xsvc driver doesn't support MAP_PRIVATE");
1000 		return (EINVAL);
1001 	}
1002 
1003 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1004 	cp->cook_refcnt = 1;
1005 
1006 	*pvtp = state;
1007 	return (0);
1008 }
1009 
1010 
1011 /*
1012  * xsvc_devmap_dup()
1013  *
1014  *   keep a reference count for forks so we don't unmap if we have multiple
1015  *   mappings.
1016  */
1017 /*ARGSUSED*/
1018 static int
1019 xsvc_devmap_dup(devmap_cookie_t dhc, void *pvtp, devmap_cookie_t new_dhp,
1020     void **new_pvtp)
1021 {
1022 	struct ddi_umem_cookie *cp;
1023 	devmap_handle_t *dhp;
1024 	xsvc_state_t *state;
1025 
1026 
1027 	state = (xsvc_state_t *)pvtp;
1028 	dhp = (devmap_handle_t *)dhc;
1029 
1030 	mutex_enter(&state->xs_cookie_mutex);
1031 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1032 	if (cp == NULL) {
1033 		mutex_exit(&state->xs_cookie_mutex);
1034 		return (ENOMEM);
1035 	}
1036 
1037 	cp->cook_refcnt++;
1038 	mutex_exit(&state->xs_cookie_mutex);
1039 
1040 	*new_pvtp = state;
1041 	return (0);
1042 }
1043 
1044 
1045 /*
1046  * xsvc_devmap_unmap()
1047  *
1048  *   This routine is only call if we were mapping in memory in xsvc_devmap().
1049  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
1050  *   was true. It would have been nice if devmap_callback_ctl had an args param.
1051  *   We wouldn't have had to look into the devmap_handle and into the umem
1052  *   cookie.
1053  */
1054 /*ARGSUSED*/
1055 static void
1056 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
1057     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
1058     void **new_pvtp2)
1059 {
1060 	struct ddi_umem_cookie *ncp;
1061 	struct ddi_umem_cookie *cp;
1062 	devmap_handle_t *ndhp;
1063 	devmap_handle_t *dhp;
1064 	xsvc_state_t *state;
1065 	size_t npages;
1066 	caddr_t kvai;
1067 	caddr_t kva;
1068 	size_t size;
1069 	int i;
1070 
1071 
1072 	state = (xsvc_state_t *)pvtp;
1073 	mutex_enter(&state->xs_cookie_mutex);
1074 
1075 	/* peek into the umem cookie to figure out what we need to free up */
1076 	dhp = (devmap_handle_t *)dhc;
1077 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1078 	ASSERT(cp != NULL);
1079 
1080 	if (new_dhp1 != NULL) {
1081 		ndhp = (devmap_handle_t *)new_dhp1;
1082 		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1083 		ncp->cook_refcnt++;
1084 		*new_pvtp1 = state;
1085 	}
1086 	if (new_dhp2 != NULL) {
1087 		ndhp = (devmap_handle_t *)new_dhp2;
1088 		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1089 		ncp->cook_refcnt++;
1090 		*new_pvtp2 = state;
1091 	}
1092 
1093 	cp->cook_refcnt--;
1094 	if (cp->cook_refcnt == 0) {
1095 		kva = cp->cvaddr;
1096 		size = cp->size;
1097 
1098 		/*
1099 		 * free up the umem cookie, then unmap all the pages what we
1100 		 * mapped in during devmap, then free up the kva space.
1101 		 */
1102 		npages = btop(size);
1103 		xsvc_umem_cookie_free(&dhp->dh_cookie);
1104 		kvai = kva;
1105 		for (i = 0; i < npages; i++) {
1106 			hat_unload(kas.a_hat, kvai, PAGESIZE,
1107 			    HAT_UNLOAD_UNLOCK);
1108 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
1109 		}
1110 		vmem_free(heap_arena, kva, size);
1111 	}
1112 
1113 	mutex_exit(&state->xs_cookie_mutex);
1114 }
1115