xref: /illumos-gate/usr/src/uts/i86pc/io/xsvc/xsvc.c (revision 1f0a297725d06da62d0c701916f52e7f403ee0fc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2016 Joyent, Inc.
27  */
28 
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <vm/seg_kmem.h>
41 #include <sys/vmsystm.h>
42 #include <sys/sysmacros.h>
43 #include <sys/ddidevmap.h>
44 #include <sys/avl.h>
45 #ifdef __xpv
46 #include <sys/hypervisor.h>
47 #endif
48 
49 #include <sys/xsvc.h>
50 
51 /* total max memory which can be alloced with ioctl interface */
52 uint64_t xsvc_max_memory = 10 * 1024 * 1024;
53 
54 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
55 
56 
57 static int xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred);
58 static int xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred);
59 static int xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
60     int *rval);
61 static int xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
62     size_t *maplen, uint_t model);
63 static int xsvc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
64 static int xsvc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
65 static int xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
66     void **result);
67 
68 static 	struct cb_ops xsvc_cb_ops = {
69 	xsvc_open,		/* cb_open */
70 	xsvc_close,		/* cb_close */
71 	nodev,			/* cb_strategy */
72 	nodev,			/* cb_print */
73 	nodev,			/* cb_dump */
74 	nodev,			/* cb_read */
75 	nodev,			/* cb_write */
76 	xsvc_ioctl,		/* cb_ioctl */
77 	xsvc_devmap,		/* cb_devmap */
78 	NULL,			/* cb_mmap */
79 	NULL,			/* cb_segmap */
80 	nochpoll,		/* cb_chpoll */
81 	ddi_prop_op,		/* cb_prop_op */
82 	NULL,			/* cb_stream */
83 	D_NEW | D_MP | D_64BIT | D_DEVMAP,	/* cb_flag */
84 	CB_REV
85 };
86 
87 static struct dev_ops xsvc_dev_ops = {
88 	DEVO_REV,		/* devo_rev */
89 	0,			/* devo_refcnt */
90 	xsvc_getinfo,		/* devo_getinfo */
91 	nulldev,		/* devo_identify */
92 	nulldev,		/* devo_probe */
93 	xsvc_attach,		/* devo_attach */
94 	xsvc_detach,		/* devo_detach */
95 	nodev,			/* devo_reset */
96 	&xsvc_cb_ops,		/* devo_cb_ops */
97 	NULL,			/* devo_bus_ops */
98 	NULL,			/* power */
99 	ddi_quiesce_not_needed,		/* quiesce */
100 };
101 
102 static struct modldrv xsvc_modldrv = {
103 	&mod_driverops,		/* Type of module.  This one is a driver */
104 	"xsvc driver",		/* Name of the module. */
105 	&xsvc_dev_ops,		/* driver ops */
106 };
107 
108 static struct modlinkage xsvc_modlinkage = {
109 	MODREV_1,
110 	(void *) &xsvc_modldrv,
111 	NULL
112 };
113 
114 
115 static int xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode);
116 static int xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode);
117 static int xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode);
118 static int xsvc_mem_alloc(xsvc_state_t *state, uint64_t key,
119     xsvc_mem_t **mp);
120 static void xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp);
121 static xsvc_mem_t *xsvc_mem_lookup(xsvc_state_t *state,
122     uint64_t key);
123 static int xsvc_mnode_key_compare(const void *q, const void *e);
124 static int xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
125     ddi_umem_cookie_t *cookiep);
126 static void xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep);
127 
128 
129 void *xsvc_statep;
130 
131 static ddi_device_acc_attr_t xsvc_device_attr = {
132 	DDI_DEVICE_ATTR_V0,
133 	DDI_NEVERSWAP_ACC,
134 	DDI_STRICTORDER_ACC
135 };
136 
137 static int xsvc_devmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
138     offset_t off, size_t len, void **pvtp);
139 static int xsvc_devmap_dup(devmap_cookie_t dhp, void *pvtp,
140     devmap_cookie_t new_dhp, void **new_pvtp);
141 static void xsvc_devmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
142     size_t len, devmap_cookie_t new_dhp1, void **new_pvtp1,
143     devmap_cookie_t new_dhp2, void **new_pvtp2);
144 
145 
146 static struct devmap_callback_ctl xsvc_callbk = {
147 	DEVMAP_OPS_REV,
148 	xsvc_devmap_map,
149 	NULL,
150 	xsvc_devmap_dup,
151 	xsvc_devmap_unmap
152 };
153 
154 
155 /*
156  * _init()
157  *
158  */
159 int
160 _init(void)
161 {
162 	int err;
163 
164 	err = ddi_soft_state_init(&xsvc_statep, sizeof (xsvc_state_t), 1);
165 	if (err != 0) {
166 		return (err);
167 	}
168 
169 	err = mod_install(&xsvc_modlinkage);
170 	if (err != 0) {
171 		ddi_soft_state_fini(&xsvc_statep);
172 		return (err);
173 	}
174 
175 	return (0);
176 }
177 
178 /*
179  * _info()
180  *
181  */
182 int
183 _info(struct modinfo *modinfop)
184 {
185 	return (mod_info(&xsvc_modlinkage, modinfop));
186 }
187 
188 /*
189  * _fini()
190  *
191  */
192 int
193 _fini(void)
194 {
195 	int err;
196 
197 	err = mod_remove(&xsvc_modlinkage);
198 	if (err != 0) {
199 		return (err);
200 	}
201 
202 	ddi_soft_state_fini(&xsvc_statep);
203 
204 	return (0);
205 }
206 
207 /*
208  * xsvc_attach()
209  *
210  */
211 static int
212 xsvc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
213 {
214 	xsvc_state_t *state;
215 	int maxallocmem;
216 	int instance;
217 	int err;
218 
219 
220 	switch (cmd) {
221 	case DDI_ATTACH:
222 		break;
223 
224 	case DDI_RESUME:
225 		return (DDI_SUCCESS);
226 
227 	default:
228 		return (DDI_FAILURE);
229 	}
230 
231 	instance = ddi_get_instance(dip);
232 	err = ddi_soft_state_zalloc(xsvc_statep, instance);
233 	if (err != DDI_SUCCESS) {
234 		return (DDI_FAILURE);
235 	}
236 	state = ddi_get_soft_state(xsvc_statep, instance);
237 	if (state == NULL) {
238 		goto attachfail_get_soft_state;
239 	}
240 
241 	state->xs_dip = dip;
242 	state->xs_instance = instance;
243 
244 	/* Initialize allocation count */
245 	mutex_init(&state->xs_mutex, NULL, MUTEX_DRIVER, NULL);
246 	state->xs_currently_alloced = 0;
247 
248 	mutex_init(&state->xs_cookie_mutex, NULL, MUTEX_DRIVER, NULL);
249 
250 	/* create the minor node (for the ioctl) */
251 	err = ddi_create_minor_node(dip, "xsvc", S_IFCHR, instance, DDI_PSEUDO,
252 	    0);
253 	if (err != DDI_SUCCESS) {
254 		goto attachfail_minor_node;
255 	}
256 
257 	/*
258 	 * the maxallocmem property will override the default (xsvc_max_memory).
259 	 * This is the maximum total memory the ioctl will allow to be alloced.
260 	 */
261 	maxallocmem = ddi_prop_get_int(DDI_DEV_T_ANY, state->xs_dip,
262 	    DDI_PROP_DONTPASS, "maxallocmem", -1);
263 	if (maxallocmem >= 0) {
264 		xsvc_max_memory = maxallocmem * 1024;
265 	}
266 
267 	/* Initialize list of memory allocs */
268 	mutex_init(&state->xs_mlist.ml_mutex, NULL, MUTEX_DRIVER, NULL);
269 	avl_create(&state->xs_mlist.ml_avl, xsvc_mnode_key_compare,
270 	    sizeof (xsvc_mnode_t), offsetof(xsvc_mnode_t, mn_link));
271 
272 	/* Report that driver was loaded */
273 	ddi_report_dev(dip);
274 
275 	return (DDI_SUCCESS);
276 
277 attachfail_minor_node:
278 	mutex_destroy(&state->xs_cookie_mutex);
279 	mutex_destroy(&state->xs_mutex);
280 attachfail_get_soft_state:
281 	(void) ddi_soft_state_free(xsvc_statep, instance);
282 
283 	return (err);
284 }
285 
286 /*
287  * xsvc_detach()
288  *
289  */
290 static int
291 xsvc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
292 {
293 	xsvc_state_t *state;
294 	xsvc_mnode_t *mnode;
295 	xsvc_mem_t *mp;
296 	int instance;
297 
298 
299 	instance = ddi_get_instance(dip);
300 	state = ddi_get_soft_state(xsvc_statep, instance);
301 	if (state == NULL) {
302 		return (DDI_FAILURE);
303 	}
304 
305 	switch (cmd) {
306 	case DDI_DETACH:
307 		break;
308 
309 	case DDI_SUSPEND:
310 		return (DDI_SUCCESS);
311 
312 	default:
313 		return (DDI_FAILURE);
314 	}
315 
316 	ddi_remove_minor_node(dip, NULL);
317 
318 	/* Free any memory on list */
319 	while ((mnode = avl_first(&state->xs_mlist.ml_avl)) != NULL) {
320 		mp = mnode->mn_home;
321 		xsvc_mem_free(state, mp);
322 	}
323 
324 	/* remove list */
325 	avl_destroy(&state->xs_mlist.ml_avl);
326 	mutex_destroy(&state->xs_mlist.ml_mutex);
327 
328 	mutex_destroy(&state->xs_cookie_mutex);
329 	mutex_destroy(&state->xs_mutex);
330 	(void) ddi_soft_state_free(xsvc_statep, state->xs_instance);
331 	return (DDI_SUCCESS);
332 }
333 
334 /*
335  * xsvc_getinfo()
336  *
337  */
338 /*ARGSUSED*/
339 static int
340 xsvc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
341 {
342 	xsvc_state_t *state;
343 	int instance;
344 	dev_t dev;
345 	int err;
346 
347 
348 	dev = (dev_t)arg;
349 	instance = getminor(dev);
350 
351 	switch (cmd) {
352 	case DDI_INFO_DEVT2DEVINFO:
353 		state = ddi_get_soft_state(xsvc_statep, instance);
354 		if (state == NULL) {
355 			return (DDI_FAILURE);
356 		}
357 		*result = (void *)state->xs_dip;
358 		err = DDI_SUCCESS;
359 		break;
360 
361 	case DDI_INFO_DEVT2INSTANCE:
362 		*result = (void *)(uintptr_t)instance;
363 		err = DDI_SUCCESS;
364 		break;
365 
366 	default:
367 		err = DDI_FAILURE;
368 		break;
369 	}
370 
371 	return (err);
372 }
373 
374 
375 /*
376  * xsvc_open()
377  *
378  */
379 /*ARGSUSED*/
380 static int
381 xsvc_open(dev_t *devp, int flag, int otyp, cred_t *cred)
382 {
383 	xsvc_state_t *state;
384 	int instance;
385 
386 	instance = getminor(*devp);
387 	state = ddi_get_soft_state(xsvc_statep, instance);
388 	if (state == NULL) {
389 		return (ENXIO);
390 	}
391 
392 	return (0);
393 }
394 
395 /*
396  * xsvc_close()
397  *
398  */
399 /*ARGSUSED*/
400 static int
401 xsvc_close(dev_t devp, int flag, int otyp, cred_t *cred)
402 {
403 	return (0);
404 }
405 
406 /*
407  * xsvc_ioctl()
408  *
409  */
410 /*ARGSUSED*/
411 static int
412 xsvc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, int *rval)
413 {
414 	xsvc_state_t *state;
415 	int instance;
416 	int err;
417 
418 
419 	err = drv_priv(cred);
420 	if (err != 0) {
421 		return (EPERM);
422 	}
423 	instance = getminor(dev);
424 	if (instance == -1) {
425 		return (EBADF);
426 	}
427 	state = ddi_get_soft_state(xsvc_statep, instance);
428 	if (state == NULL) {
429 		return (EBADF);
430 	}
431 
432 	switch (cmd) {
433 	case XSVC_ALLOC_MEM:
434 		err = xsvc_ioctl_alloc_memory(state, (void *)arg, mode);
435 		break;
436 
437 	case XSVC_FREE_MEM:
438 		err = xsvc_ioctl_free_memory(state, (void *)arg, mode);
439 		break;
440 
441 	case XSVC_FLUSH_MEM:
442 		err = xsvc_ioctl_flush_memory(state, (void *)arg, mode);
443 		break;
444 
445 	default:
446 		err = ENXIO;
447 	}
448 
449 	return (err);
450 }
451 
452 /*
453  * xsvc_ioctl_alloc_memory()
454  *
455  */
456 static int
457 xsvc_ioctl_alloc_memory(xsvc_state_t *state, void *arg, int mode)
458 {
459 	xsvc_mem_req_32 params32;
460 	xsvc_mloc_32 *usgl32;
461 	xsvc_mem_req params;
462 	xsvc_mloc_32 sgl32;
463 	xsvc_mloc *usgl;
464 	xsvc_mem_t *mp;
465 	xsvc_mloc sgl;
466 	uint64_t key;
467 	size_t size;
468 	int err;
469 	int i;
470 
471 
472 	/* Copy in the params, then get the size and key */
473 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
474 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
475 		    mode);
476 		if (err != 0) {
477 			return (EFAULT);
478 		}
479 
480 		key = (uint64_t)params32.xsvc_mem_reqid;
481 		size = P2ROUNDUP((size_t)params32.xsvc_mem_size, PAGESIZE);
482 	} else {
483 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
484 		if (err != 0) {
485 			return (EFAULT);
486 		}
487 		key = (uint64_t)params.xsvc_mem_reqid;
488 		size = P2ROUNDUP(params.xsvc_mem_size, PAGESIZE);
489 	}
490 
491 	/*
492 	 * make sure this doesn't put us over the maximum allowed to be
493 	 * allocated
494 	 */
495 	mutex_enter(&state->xs_mutex);
496 	if ((state->xs_currently_alloced + size) > xsvc_max_memory) {
497 		mutex_exit(&state->xs_mutex);
498 		return (EAGAIN);
499 	}
500 	state->xs_currently_alloced += size;
501 	mutex_exit(&state->xs_mutex);
502 
503 	/* get state to track this memory */
504 	err = xsvc_mem_alloc(state, key, &mp);
505 	if (err != 0) {
506 		return (err);
507 	}
508 	mp->xm_size = size;
509 
510 	/* allocate and bind the memory */
511 	mp->xm_dma_attr.dma_attr_version = DMA_ATTR_V0;
512 	mp->xm_dma_attr.dma_attr_count_max = (uint64_t)0xFFFFFFFF;
513 	mp->xm_dma_attr.dma_attr_burstsizes = 1;
514 	mp->xm_dma_attr.dma_attr_minxfer = 1;
515 	mp->xm_dma_attr.dma_attr_maxxfer = (uint64_t)0xFFFFFFFF;
516 	mp->xm_dma_attr.dma_attr_seg = (uint64_t)0xFFFFFFFF;
517 	mp->xm_dma_attr.dma_attr_granular = 1;
518 	mp->xm_dma_attr.dma_attr_flags = 0;
519 
520 	/* Finish converting params */
521 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
522 		mp->xm_dma_attr.dma_attr_addr_lo = params32.xsvc_mem_addr_lo;
523 		mp->xm_dma_attr.dma_attr_addr_hi = params32.xsvc_mem_addr_hi;
524 		mp->xm_dma_attr.dma_attr_sgllen = params32.xsvc_mem_sgllen;
525 		usgl32 = (xsvc_mloc_32 *)(uintptr_t)params32.xsvc_sg_list;
526 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
527 		    params32.xsvc_mem_align, PAGESIZE);
528 	} else {
529 		mp->xm_dma_attr.dma_attr_addr_lo = params.xsvc_mem_addr_lo;
530 		mp->xm_dma_attr.dma_attr_addr_hi = params.xsvc_mem_addr_hi;
531 		mp->xm_dma_attr.dma_attr_sgllen = params.xsvc_mem_sgllen;
532 		usgl = (xsvc_mloc *)(uintptr_t)params.xsvc_sg_list;
533 		mp->xm_dma_attr.dma_attr_align = P2ROUNDUP(
534 		    params.xsvc_mem_align, PAGESIZE);
535 	}
536 
537 	mp->xm_device_attr = xsvc_device_attr;
538 
539 	err = ddi_dma_alloc_handle(state->xs_dip, &mp->xm_dma_attr,
540 	    DDI_DMA_SLEEP, NULL, &mp->xm_dma_handle);
541 	if (err != DDI_SUCCESS) {
542 		err = EINVAL;
543 		goto allocfail_alloc_handle;
544 	}
545 
546 	/* don't sleep here so we don't get stuck in contig alloc */
547 	err = ddi_dma_mem_alloc(mp->xm_dma_handle, mp->xm_size,
548 	    &mp->xm_device_attr, DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
549 	    &mp->xm_addr, &mp->xm_real_length, &mp->xm_mem_handle);
550 	if (err != DDI_SUCCESS) {
551 		err = EINVAL;
552 		goto allocfail_alloc_mem;
553 	}
554 
555 	err = ddi_dma_addr_bind_handle(mp->xm_dma_handle, NULL, mp->xm_addr,
556 	    mp->xm_size, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
557 	    NULL, &mp->xm_cookie, &mp->xm_cookie_count);
558 	if (err != DDI_DMA_MAPPED) {
559 		err = EFAULT;
560 		goto allocfail_bind;
561 	}
562 
563 	/* return sgl */
564 	for (i = 0; i < mp->xm_cookie_count; i++) {
565 		if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
566 			sgl32.mloc_addr = mp->xm_cookie.dmac_laddress;
567 			sgl32.mloc_size = mp->xm_cookie.dmac_size;
568 			err = ddi_copyout(&sgl32, &usgl32[i],
569 			    sizeof (xsvc_mloc_32), mode);
570 			if (err != 0) {
571 				err = EFAULT;
572 				goto allocfail_copyout;
573 			}
574 		} else {
575 			sgl.mloc_addr = mp->xm_cookie.dmac_laddress;
576 			sgl.mloc_size = mp->xm_cookie.dmac_size;
577 			err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc),
578 			    mode);
579 			if (err != 0) {
580 				err = EFAULT;
581 				goto allocfail_copyout;
582 			}
583 		}
584 		ddi_dma_nextcookie(mp->xm_dma_handle, &mp->xm_cookie);
585 	}
586 
587 	/* set the last sgl entry to 0 to indicate cookie count */
588 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
589 		sgl32.mloc_addr = 0;
590 		sgl32.mloc_size = 0;
591 		err = ddi_copyout(&sgl32, &usgl32[i], sizeof (xsvc_mloc_32),
592 		    mode);
593 		if (err != 0) {
594 			err = EFAULT;
595 			goto allocfail_copyout;
596 		}
597 	} else {
598 		sgl.mloc_addr = 0;
599 		sgl.mloc_size = 0;
600 		err = ddi_copyout(&sgl, &usgl[i], sizeof (xsvc_mloc), mode);
601 		if (err != 0) {
602 			err = EFAULT;
603 			goto allocfail_copyout;
604 		}
605 	}
606 
607 	return (0);
608 
609 allocfail_copyout:
610 	(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
611 allocfail_bind:
612 	ddi_dma_mem_free(&mp->xm_mem_handle);
613 allocfail_alloc_mem:
614 	ddi_dma_free_handle(&mp->xm_dma_handle);
615 allocfail_alloc_handle:
616 	mp->xm_dma_handle = NULL;
617 	xsvc_mem_free(state, mp);
618 
619 	mutex_enter(&state->xs_mutex);
620 	state->xs_currently_alloced = state->xs_currently_alloced - size;
621 	mutex_exit(&state->xs_mutex);
622 
623 	return (err);
624 }
625 
626 /*
627  * xsvc_ioctl_flush_memory()
628  *
629  */
630 static int
631 xsvc_ioctl_flush_memory(xsvc_state_t *state, void *arg, int mode)
632 {
633 	xsvc_mem_req_32 params32;
634 	xsvc_mem_req params;
635 	xsvc_mem_t *mp;
636 	uint64_t key;
637 	int err;
638 
639 
640 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
641 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
642 		    mode);
643 		if (err != 0) {
644 			return (EFAULT);
645 		}
646 		key = (uint64_t)params32.xsvc_mem_reqid;
647 	} else {
648 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
649 		if (err != 0) {
650 			return (EFAULT);
651 		}
652 		key = (uint64_t)params.xsvc_mem_reqid;
653 	}
654 
655 	/* find the memory */
656 	mp = xsvc_mem_lookup(state, key);
657 	if (mp == NULL) {
658 		return (EINVAL);
659 	}
660 
661 	(void) ddi_dma_sync(mp->xm_dma_handle, 0, 0, DDI_DMA_SYNC_FORCPU);
662 
663 	return (0);
664 }
665 
666 
667 /*
668  * xsvc_ioctl_free_memory()
669  *
670  */
671 static int
672 xsvc_ioctl_free_memory(xsvc_state_t *state, void *arg, int mode)
673 {
674 	xsvc_mem_req_32 params32;
675 	xsvc_mem_req params;
676 	xsvc_mem_t *mp;
677 	uint64_t key;
678 	int err;
679 
680 
681 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
682 		err = ddi_copyin(arg, &params32, sizeof (xsvc_mem_req_32),
683 		    mode);
684 		if (err != 0) {
685 			return (EFAULT);
686 		}
687 		key = (uint64_t)params32.xsvc_mem_reqid;
688 	} else {
689 		err = ddi_copyin(arg, &params, sizeof (xsvc_mem_req), mode);
690 		if (err != 0) {
691 			return (EFAULT);
692 		}
693 		key = (uint64_t)params.xsvc_mem_reqid;
694 	}
695 
696 	/* find the memory */
697 	mp = xsvc_mem_lookup(state, key);
698 	if (mp == NULL) {
699 		return (EINVAL);
700 	}
701 
702 	xsvc_mem_free(state, mp);
703 
704 	return (0);
705 }
706 
707 /*
708  * xsvc_mem_alloc()
709  *
710  */
711 static int
712 xsvc_mem_alloc(xsvc_state_t *state, uint64_t key, xsvc_mem_t **mp)
713 {
714 	xsvc_mem_t *mem;
715 
716 	mem = xsvc_mem_lookup(state, key);
717 	if (mem != NULL) {
718 		xsvc_mem_free(state, mem);
719 	}
720 
721 	*mp = kmem_alloc(sizeof (xsvc_mem_t), KM_SLEEP);
722 	(*mp)->xm_mnode.mn_home = *mp;
723 	(*mp)->xm_mnode.mn_key = key;
724 
725 	mutex_enter(&state->xs_mlist.ml_mutex);
726 	avl_add(&state->xs_mlist.ml_avl, &(*mp)->xm_mnode);
727 	mutex_exit(&state->xs_mlist.ml_mutex);
728 
729 	return (0);
730 }
731 
732 /*
733  * xsvc_mem_free()
734  *
735  */
736 static void
737 xsvc_mem_free(xsvc_state_t *state, xsvc_mem_t *mp)
738 {
739 	if (mp->xm_dma_handle != NULL) {
740 		(void) ddi_dma_unbind_handle(mp->xm_dma_handle);
741 		ddi_dma_mem_free(&mp->xm_mem_handle);
742 		ddi_dma_free_handle(&mp->xm_dma_handle);
743 
744 		mutex_enter(&state->xs_mutex);
745 		state->xs_currently_alloced = state->xs_currently_alloced -
746 		    mp->xm_size;
747 		mutex_exit(&state->xs_mutex);
748 	}
749 
750 	mutex_enter(&state->xs_mlist.ml_mutex);
751 	avl_remove(&state->xs_mlist.ml_avl, &mp->xm_mnode);
752 	mutex_exit(&state->xs_mlist.ml_mutex);
753 
754 	kmem_free(mp, sizeof (*mp));
755 }
756 
757 /*
758  * xsvc_mem_lookup()
759  *
760  */
761 static xsvc_mem_t *
762 xsvc_mem_lookup(xsvc_state_t *state, uint64_t key)
763 {
764 	xsvc_mnode_t mnode;
765 	xsvc_mnode_t *mnp;
766 	avl_index_t where;
767 	xsvc_mem_t *mp;
768 
769 	mnode.mn_key = key;
770 	mutex_enter(&state->xs_mlist.ml_mutex);
771 	mnp = avl_find(&state->xs_mlist.ml_avl, &mnode, &where);
772 	mutex_exit(&state->xs_mlist.ml_mutex);
773 
774 	if (mnp != NULL) {
775 		mp = mnp->mn_home;
776 	} else {
777 		mp = NULL;
778 	}
779 
780 	return (mp);
781 }
782 
783 /*
784  * xsvc_mnode_key_compare()
785  *
786  */
787 static int
788 xsvc_mnode_key_compare(const void *q, const void *e)
789 {
790 	xsvc_mnode_t *n1;
791 	xsvc_mnode_t *n2;
792 
793 	n1 = (xsvc_mnode_t *)q;
794 	n2 = (xsvc_mnode_t *)e;
795 
796 	if (n1->mn_key < n2->mn_key) {
797 		return (-1);
798 	} else if (n1->mn_key > n2->mn_key) {
799 		return (1);
800 	} else {
801 		return (0);
802 	}
803 }
804 
805 /*
806  * xsvc_devmap()
807  *
808  */
809 /*ARGSUSED*/
810 static int
811 xsvc_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
812     size_t *maplen, uint_t model)
813 {
814 	ddi_umem_cookie_t cookie;
815 	xsvc_state_t *state;
816 	offset_t off_align;
817 	size_t npages;
818 	caddr_t kvai;
819 	size_t psize;
820 	int instance;
821 	caddr_t kva;
822 	pfn_t pfn;
823 	int err;
824 	int i;
825 
826 
827 	instance = getminor(dev);
828 	state = ddi_get_soft_state(xsvc_statep, instance);
829 	if (state == NULL) {
830 		return (ENXIO);
831 	}
832 
833 	/*
834 	 * On 64-bit kernels, if we have a 32-bit application doing a mmap(),
835 	 * smmap32 will sign extend the offset. We need to undo that since
836 	 * we are passed a physical address in off, not a offset.
837 	 */
838 #if defined(__amd64)
839 	if (((model & DDI_MODEL_MASK) == DDI_MODEL_ILP32) &&
840 	    ((off & ~0xFFFFFFFFll) == ~0xFFFFFFFFll)) {
841 		off = off & 0xFFFFFFFF;
842 	}
843 #endif
844 
845 #ifdef __xpv
846 	/*
847 	 * we won't allow guest OSes to devmap mfn/pfns. Maybe we'll relax
848 	 * this some later when there is a good reason.
849 	 */
850 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
851 		return (-1);
852 	}
853 
854 	/* we will always treat this as a foreign MFN */
855 	pfn = xen_assign_pfn(btop(off));
856 #else
857 	pfn = btop(off);
858 #endif
859 	/* always work with whole pages */
860 
861 	off_align = P2ALIGN(off, PAGESIZE);
862 	psize = P2ROUNDUP(off + len, PAGESIZE) - off_align;
863 
864 	/*
865 	 * if this is memory we're trying to map into user space, we first
866 	 * need to map the PFNs into KVA, then build up a umem cookie, and
867 	 * finally do a umem_setup to map it in.
868 	 */
869 	if (pf_is_memory(pfn)) {
870 		npages = btop(psize);
871 
872 		kva = vmem_alloc(heap_arena, psize, VM_SLEEP);
873 		if (kva == NULL) {
874 			return (-1);
875 		}
876 
877 		kvai = kva;
878 		for (i = 0; i < npages; i++) {
879 			page_t *pp = page_numtopp_nolock(pfn);
880 
881 			/*
882 			 * Preemptively check for panic conditions from
883 			 * hat_devload and error out instead.
884 			 */
885 			if (pp != NULL && (PP_ISFREE(pp) ||
886 			    (!PAGE_LOCKED(pp) && !PP_ISNORELOC(pp)))) {
887 				err = DDI_FAILURE;
888 				npages = i;
889 				goto devmapfail_cookie_alloc;
890 			}
891 
892 			hat_devload(kas.a_hat, kvai, PAGESIZE, pfn,
893 			    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK);
894 			pfn++;
895 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
896 		}
897 
898 		err = xsvc_umem_cookie_alloc(kva, psize, KM_SLEEP, &cookie);
899 		if (err != 0) {
900 			goto devmapfail_cookie_alloc;
901 		}
902 
903 		if ((err = devmap_umem_setup(dhp, state->xs_dip, &xsvc_callbk,
904 		    cookie, 0, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
905 			goto devmapfail_umem_setup;
906 		}
907 		*maplen = psize;
908 
909 	/*
910 	 * If this is not memory (or a foreign MFN in i86xpv), go through
911 	 * devmem_setup.
912 	 */
913 	} else {
914 		if ((err = devmap_devmem_setup(dhp, state->xs_dip, NULL, 0,
915 		    off_align, psize, PROT_ALL, 0, &xsvc_device_attr)) < 0) {
916 			return (err);
917 		}
918 		*maplen = psize;
919 	}
920 
921 	return (0);
922 
923 devmapfail_umem_setup:
924 	xsvc_umem_cookie_free(&cookie);
925 
926 devmapfail_cookie_alloc:
927 	kvai = kva;
928 	for (i = 0; i < npages; i++) {
929 		hat_unload(kas.a_hat, kvai, PAGESIZE,
930 		    HAT_UNLOAD_UNLOCK);
931 		kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
932 	}
933 	vmem_free(heap_arena, kva, psize);
934 
935 	return (err);
936 }
937 
938 /*
939  * xsvc_umem_cookie_alloc()
940  *
941  *   allocate a umem cookie to be used in devmap_umem_setup using KVA already
942  *   allocated.
943  */
944 int
945 xsvc_umem_cookie_alloc(caddr_t kva, size_t size, int flags,
946     ddi_umem_cookie_t *cookiep)
947 {
948 	struct ddi_umem_cookie *umem_cookiep;
949 
950 	umem_cookiep = kmem_zalloc(sizeof (struct ddi_umem_cookie), flags);
951 	if (umem_cookiep == NULL) {
952 		*cookiep = NULL;
953 		return (-1);
954 	}
955 
956 	umem_cookiep->cvaddr = kva;
957 	umem_cookiep->type = KMEM_NON_PAGEABLE;
958 	umem_cookiep->size = size;
959 	*cookiep = (ddi_umem_cookie_t *)umem_cookiep;
960 
961 	return (0);
962 }
963 
964 /*
965  * xsvc_umem_cookie_free()
966  *
967  */
968 static void
969 xsvc_umem_cookie_free(ddi_umem_cookie_t *cookiep)
970 {
971 	kmem_free(*cookiep, sizeof (struct ddi_umem_cookie));
972 	*cookiep = NULL;
973 }
974 
975 
976 /*
977  * xsvc_devmap_map()
978  *
979  */
980 /*ARGSUSED*/
981 static int
982 xsvc_devmap_map(devmap_cookie_t dhc, dev_t dev, uint_t flags, offset_t off,
983     size_t len, void **pvtp)
984 {
985 	struct ddi_umem_cookie *cp;
986 	devmap_handle_t *dhp;
987 	xsvc_state_t *state;
988 	int instance;
989 
990 
991 	instance = getminor(dev);
992 	state = ddi_get_soft_state(xsvc_statep, instance);
993 	if (state == NULL) {
994 		return (ENXIO);
995 	}
996 
997 	dhp = (devmap_handle_t *)dhc;
998 	/* This driver only supports MAP_SHARED, not MAP_PRIVATE */
999 	if (flags & MAP_PRIVATE) {
1000 		cmn_err(CE_WARN, "!xsvc driver doesn't support MAP_PRIVATE");
1001 		return (EINVAL);
1002 	}
1003 
1004 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1005 	cp->cook_refcnt = 1;
1006 
1007 	*pvtp = state;
1008 	return (0);
1009 }
1010 
1011 
1012 /*
1013  * xsvc_devmap_dup()
1014  *
1015  *   keep a reference count for forks so we don't unmap if we have multiple
1016  *   mappings.
1017  */
1018 /*ARGSUSED*/
1019 static int
1020 xsvc_devmap_dup(devmap_cookie_t dhc, void *pvtp, devmap_cookie_t new_dhp,
1021     void **new_pvtp)
1022 {
1023 	struct ddi_umem_cookie *cp;
1024 	devmap_handle_t *dhp;
1025 	xsvc_state_t *state;
1026 
1027 
1028 	state = (xsvc_state_t *)pvtp;
1029 	dhp = (devmap_handle_t *)dhc;
1030 
1031 	mutex_enter(&state->xs_cookie_mutex);
1032 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1033 	if (cp == NULL) {
1034 		mutex_exit(&state->xs_cookie_mutex);
1035 		return (ENOMEM);
1036 	}
1037 
1038 	cp->cook_refcnt++;
1039 	mutex_exit(&state->xs_cookie_mutex);
1040 
1041 	*new_pvtp = state;
1042 	return (0);
1043 }
1044 
1045 
1046 /*
1047  * xsvc_devmap_unmap()
1048  *
1049  *   This routine is only call if we were mapping in memory in xsvc_devmap().
1050  *   i.e. we only pass in xsvc_callbk to devmap_umem_setup if pf_is_memory()
1051  *   was true. It would have been nice if devmap_callback_ctl had an args param.
1052  *   We wouldn't have had to look into the devmap_handle and into the umem
1053  *   cookie.
1054  */
1055 /*ARGSUSED*/
1056 static void
1057 xsvc_devmap_unmap(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
1058     devmap_cookie_t new_dhp1, void **new_pvtp1, devmap_cookie_t new_dhp2,
1059     void **new_pvtp2)
1060 {
1061 	struct ddi_umem_cookie *ncp;
1062 	struct ddi_umem_cookie *cp;
1063 	devmap_handle_t *ndhp;
1064 	devmap_handle_t *dhp;
1065 	xsvc_state_t *state;
1066 	size_t npages;
1067 	caddr_t kvai;
1068 	caddr_t kva;
1069 	size_t size;
1070 	int i;
1071 
1072 
1073 	state = (xsvc_state_t *)pvtp;
1074 	mutex_enter(&state->xs_cookie_mutex);
1075 
1076 	/* peek into the umem cookie to figure out what we need to free up */
1077 	dhp = (devmap_handle_t *)dhc;
1078 	cp = (struct ddi_umem_cookie *)dhp->dh_cookie;
1079 	ASSERT(cp != NULL);
1080 
1081 	if (new_dhp1 != NULL) {
1082 		ndhp = (devmap_handle_t *)new_dhp1;
1083 		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1084 		ncp->cook_refcnt++;
1085 		*new_pvtp1 = state;
1086 	}
1087 	if (new_dhp2 != NULL) {
1088 		ndhp = (devmap_handle_t *)new_dhp2;
1089 		ncp = (struct ddi_umem_cookie *)ndhp->dh_cookie;
1090 		ncp->cook_refcnt++;
1091 		*new_pvtp2 = state;
1092 	}
1093 
1094 	cp->cook_refcnt--;
1095 	if (cp->cook_refcnt == 0) {
1096 		kva = cp->cvaddr;
1097 		size = cp->size;
1098 
1099 		/*
1100 		 * free up the umem cookie, then unmap all the pages what we
1101 		 * mapped in during devmap, then free up the kva space.
1102 		 */
1103 		npages = btop(size);
1104 		xsvc_umem_cookie_free(&dhp->dh_cookie);
1105 		kvai = kva;
1106 		for (i = 0; i < npages; i++) {
1107 			hat_unload(kas.a_hat, kvai, PAGESIZE,
1108 			    HAT_UNLOAD_UNLOCK);
1109 			kvai = (caddr_t)((uintptr_t)kvai + PAGESIZE);
1110 		}
1111 		vmem_free(heap_arena, kva, size);
1112 	}
1113 
1114 	mutex_exit(&state->xs_cookie_mutex);
1115 }
1116