xref: /titanic_51/usr/src/uts/i86pc/io/rootnex.c (revision 25351652d920ae27c5a56c199da581033ce763f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * x86 root nexus driver
28  */
29 
30 #include <sys/sysmacros.h>
31 #include <sys/conf.h>
32 #include <sys/autoconf.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/psw.h>
36 #include <sys/ddidmareq.h>
37 #include <sys/promif.h>
38 #include <sys/devops.h>
39 #include <sys/kmem.h>
40 #include <sys/cmn_err.h>
41 #include <vm/seg.h>
42 #include <vm/seg_kmem.h>
43 #include <vm/seg_dev.h>
44 #include <sys/vmem.h>
45 #include <sys/mman.h>
46 #include <vm/hat.h>
47 #include <vm/as.h>
48 #include <vm/page.h>
49 #include <sys/avintr.h>
50 #include <sys/errno.h>
51 #include <sys/modctl.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/sunddi.h>
54 #include <sys/sunndi.h>
55 #include <sys/mach_intr.h>
56 #include <sys/psm.h>
57 #include <sys/ontrap.h>
58 #include <sys/atomic.h>
59 #include <sys/sdt.h>
60 #include <sys/rootnex.h>
61 #include <vm/hat_i86.h>
62 #include <sys/ddifm.h>
63 #include <sys/ddi_isa.h>
64 
65 #ifdef __xpv
66 #include <sys/bootinfo.h>
67 #include <sys/hypervisor.h>
68 #include <sys/bootconf.h>
69 #include <vm/kboot_mmu.h>
70 #else
71 #include <sys/intel_iommu.h>
72 #endif
73 
74 
75 /*
76  * enable/disable extra checking of function parameters. Useful for debugging
77  * drivers.
78  */
79 #ifdef	DEBUG
80 int rootnex_alloc_check_parms = 1;
81 int rootnex_bind_check_parms = 1;
82 int rootnex_bind_check_inuse = 1;
83 int rootnex_unbind_verify_buffer = 0;
84 int rootnex_sync_check_parms = 1;
85 #else
86 int rootnex_alloc_check_parms = 0;
87 int rootnex_bind_check_parms = 0;
88 int rootnex_bind_check_inuse = 0;
89 int rootnex_unbind_verify_buffer = 0;
90 int rootnex_sync_check_parms = 0;
91 #endif
92 
93 /* Master Abort and Target Abort panic flag */
94 int rootnex_fm_ma_ta_panic_flag = 0;
95 
96 /* Semi-temporary patchables to phase in bug fixes, test drivers, etc. */
97 int rootnex_bind_fail = 1;
98 int rootnex_bind_warn = 1;
99 uint8_t *rootnex_warn_list;
100 /* bitmasks for rootnex_warn_list. Up to 8 different warnings with uint8_t */
101 #define	ROOTNEX_BIND_WARNING	(0x1 << 0)
102 
103 /*
104  * revert back to old broken behavior of always sync'ing entire copy buffer.
105  * This is useful if be have a buggy driver which doesn't correctly pass in
106  * the offset and size into ddi_dma_sync().
107  */
108 int rootnex_sync_ignore_params = 0;
109 
110 /*
111  * For the 64-bit kernel, pre-alloc enough cookies for a 256K buffer plus 1
112  * page for alignment. For the 32-bit kernel, pre-alloc enough cookies for a
113  * 64K buffer plus 1 page for alignment (we have less kernel space in a 32-bit
114  * kernel). Allocate enough windows to handle a 256K buffer w/ at least 65
115  * sgllen DMA engine, and enough copybuf buffer state pages to handle 2 pages
116  * (< 8K). We will still need to allocate the copy buffer during bind though
117  * (if we need one). These can only be modified in /etc/system before rootnex
118  * attach.
119  */
120 #if defined(__amd64)
121 int rootnex_prealloc_cookies = 65;
122 int rootnex_prealloc_windows = 4;
123 int rootnex_prealloc_copybuf = 2;
124 #else
125 int rootnex_prealloc_cookies = 33;
126 int rootnex_prealloc_windows = 4;
127 int rootnex_prealloc_copybuf = 2;
128 #endif
129 
130 /* driver global state */
131 static rootnex_state_t *rootnex_state;
132 
133 /* shortcut to rootnex counters */
134 static uint64_t *rootnex_cnt;
135 
136 /*
137  * XXX - does x86 even need these or are they left over from the SPARC days?
138  */
139 /* statically defined integer/boolean properties for the root node */
140 static rootnex_intprop_t rootnex_intprp[] = {
141 	{ "PAGESIZE",			PAGESIZE },
142 	{ "MMU_PAGESIZE",		MMU_PAGESIZE },
143 	{ "MMU_PAGEOFFSET",		MMU_PAGEOFFSET },
144 	{ DDI_RELATIVE_ADDRESSING,	1 },
145 };
146 #define	NROOT_INTPROPS	(sizeof (rootnex_intprp) / sizeof (rootnex_intprop_t))
147 
148 #ifdef __xpv
149 typedef maddr_t rootnex_addr_t;
150 #define	ROOTNEX_PADDR_TO_RBASE(xinfo, pa)	\
151 	(DOMAIN_IS_INITDOMAIN(xinfo) ? pa_to_ma(pa) : (pa))
152 #else
153 typedef paddr_t rootnex_addr_t;
154 #endif
155 
156 #if !defined(__xpv)
157 char _depends_on[] = "mach/pcplusmp misc/iommulib";
158 #endif
159 
160 static struct cb_ops rootnex_cb_ops = {
161 	nodev,		/* open */
162 	nodev,		/* close */
163 	nodev,		/* strategy */
164 	nodev,		/* print */
165 	nodev,		/* dump */
166 	nodev,		/* read */
167 	nodev,		/* write */
168 	nodev,		/* ioctl */
169 	nodev,		/* devmap */
170 	nodev,		/* mmap */
171 	nodev,		/* segmap */
172 	nochpoll,	/* chpoll */
173 	ddi_prop_op,	/* cb_prop_op */
174 	NULL,		/* struct streamtab */
175 	D_NEW | D_MP | D_HOTPLUG, /* compatibility flags */
176 	CB_REV,		/* Rev */
177 	nodev,		/* cb_aread */
178 	nodev		/* cb_awrite */
179 };
180 
181 static int rootnex_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
182     off_t offset, off_t len, caddr_t *vaddrp);
183 static int rootnex_map_fault(dev_info_t *dip, dev_info_t *rdip,
184     struct hat *hat, struct seg *seg, caddr_t addr,
185     struct devpage *dp, pfn_t pfn, uint_t prot, uint_t lock);
186 static int rootnex_dma_map(dev_info_t *dip, dev_info_t *rdip,
187     struct ddi_dma_req *dmareq, ddi_dma_handle_t *handlep);
188 static int rootnex_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
189     ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg,
190     ddi_dma_handle_t *handlep);
191 static int rootnex_dma_freehdl(dev_info_t *dip, dev_info_t *rdip,
192     ddi_dma_handle_t handle);
193 static int rootnex_dma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
194     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
195     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
196 static int rootnex_dma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
197     ddi_dma_handle_t handle);
198 static int rootnex_dma_sync(dev_info_t *dip, dev_info_t *rdip,
199     ddi_dma_handle_t handle, off_t off, size_t len, uint_t cache_flags);
200 static int rootnex_dma_win(dev_info_t *dip, dev_info_t *rdip,
201     ddi_dma_handle_t handle, uint_t win, off_t *offp, size_t *lenp,
202     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
203 static int rootnex_dma_mctl(dev_info_t *dip, dev_info_t *rdip,
204     ddi_dma_handle_t handle, enum ddi_dma_ctlops request,
205     off_t *offp, size_t *lenp, caddr_t *objp, uint_t cache_flags);
206 static int rootnex_ctlops(dev_info_t *dip, dev_info_t *rdip,
207     ddi_ctl_enum_t ctlop, void *arg, void *result);
208 static int rootnex_fm_init(dev_info_t *dip, dev_info_t *tdip, int tcap,
209     ddi_iblock_cookie_t *ibc);
210 static int rootnex_intr_ops(dev_info_t *pdip, dev_info_t *rdip,
211     ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
212 
213 static int rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
214     ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg,
215     ddi_dma_handle_t *handlep);
216 static int rootnex_coredma_freehdl(dev_info_t *dip, dev_info_t *rdip,
217     ddi_dma_handle_t handle);
218 static int rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
219     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
220     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
221 static int rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
222     ddi_dma_handle_t handle);
223 #if !defined(__xpv)
224 static void rootnex_coredma_reset_cookies(dev_info_t *dip,
225     ddi_dma_handle_t handle);
226 static int rootnex_coredma_get_cookies(dev_info_t *dip, ddi_dma_handle_t handle,
227     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
228 #endif
229 static int rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip,
230     ddi_dma_handle_t handle, off_t off, size_t len, uint_t cache_flags);
231 static int rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip,
232     ddi_dma_handle_t handle, uint_t win, off_t *offp, size_t *lenp,
233     ddi_dma_cookie_t *cookiep, uint_t *ccountp);
234 static int rootnex_coredma_map(dev_info_t *dip, dev_info_t *rdip,
235     struct ddi_dma_req *dmareq, ddi_dma_handle_t *handlep);
236 static int rootnex_coredma_mctl(dev_info_t *dip, dev_info_t *rdip,
237     ddi_dma_handle_t handle, enum ddi_dma_ctlops request, off_t *offp,
238     size_t *lenp, caddr_t *objpp, uint_t cache_flags);
239 
240 static struct bus_ops rootnex_bus_ops = {
241 	BUSO_REV,
242 	rootnex_map,
243 	NULL,
244 	NULL,
245 	NULL,
246 	rootnex_map_fault,
247 	rootnex_dma_map,
248 	rootnex_dma_allochdl,
249 	rootnex_dma_freehdl,
250 	rootnex_dma_bindhdl,
251 	rootnex_dma_unbindhdl,
252 	rootnex_dma_sync,
253 	rootnex_dma_win,
254 	rootnex_dma_mctl,
255 	rootnex_ctlops,
256 	ddi_bus_prop_op,
257 	i_ddi_rootnex_get_eventcookie,
258 	i_ddi_rootnex_add_eventcall,
259 	i_ddi_rootnex_remove_eventcall,
260 	i_ddi_rootnex_post_event,
261 	0,			/* bus_intr_ctl */
262 	0,			/* bus_config */
263 	0,			/* bus_unconfig */
264 	rootnex_fm_init,	/* bus_fm_init */
265 	NULL,			/* bus_fm_fini */
266 	NULL,			/* bus_fm_access_enter */
267 	NULL,			/* bus_fm_access_exit */
268 	NULL,			/* bus_powr */
269 	rootnex_intr_ops	/* bus_intr_op */
270 };
271 
272 static int rootnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
273 static int rootnex_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
274 
275 static struct dev_ops rootnex_ops = {
276 	DEVO_REV,
277 	0,
278 	ddi_no_info,
279 	nulldev,
280 	nulldev,
281 	rootnex_attach,
282 	rootnex_detach,
283 	nulldev,
284 	&rootnex_cb_ops,
285 	&rootnex_bus_ops,
286 	NULL,
287 	ddi_quiesce_not_needed,		/* quiesce */
288 };
289 
290 static struct modldrv rootnex_modldrv = {
291 	&mod_driverops,
292 	"i86pc root nexus",
293 	&rootnex_ops
294 };
295 
296 static struct modlinkage rootnex_modlinkage = {
297 	MODREV_1,
298 	(void *)&rootnex_modldrv,
299 	NULL
300 };
301 
302 #if !defined(__xpv)
303 static iommulib_nexops_t iommulib_nexops = {
304 	IOMMU_NEXOPS_VERSION,
305 	"Rootnex IOMMU ops Vers 1.1",
306 	NULL,
307 	rootnex_coredma_allochdl,
308 	rootnex_coredma_freehdl,
309 	rootnex_coredma_bindhdl,
310 	rootnex_coredma_unbindhdl,
311 	rootnex_coredma_reset_cookies,
312 	rootnex_coredma_get_cookies,
313 	rootnex_coredma_sync,
314 	rootnex_coredma_win,
315 	rootnex_coredma_map,
316 	rootnex_coredma_mctl
317 };
318 #endif
319 
320 /*
321  *  extern hacks
322  */
323 extern struct seg_ops segdev_ops;
324 extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
325 #ifdef	DDI_MAP_DEBUG
326 extern int ddi_map_debug_flag;
327 #define	ddi_map_debug	if (ddi_map_debug_flag) prom_printf
328 #endif
329 extern void i86_pp_map(page_t *pp, caddr_t kaddr);
330 extern void i86_va_map(caddr_t vaddr, struct as *asp, caddr_t kaddr);
331 extern int (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
332     psm_intr_op_t, int *);
333 extern int impl_ddi_sunbus_initchild(dev_info_t *dip);
334 extern void impl_ddi_sunbus_removechild(dev_info_t *dip);
335 
336 /*
337  * Use device arena to use for device control register mappings.
338  * Various kernel memory walkers (debugger, dtrace) need to know
339  * to avoid this address range to prevent undesired device activity.
340  */
341 extern void *device_arena_alloc(size_t size, int vm_flag);
342 extern void device_arena_free(void * vaddr, size_t size);
343 
344 
345 /*
346  *  Internal functions
347  */
348 static int rootnex_dma_init();
349 static void rootnex_add_props(dev_info_t *);
350 static int rootnex_ctl_reportdev(dev_info_t *dip);
351 static struct intrspec *rootnex_get_ispec(dev_info_t *rdip, int inum);
352 static int rootnex_map_regspec(ddi_map_req_t *mp, caddr_t *vaddrp);
353 static int rootnex_unmap_regspec(ddi_map_req_t *mp, caddr_t *vaddrp);
354 static int rootnex_map_handle(ddi_map_req_t *mp);
355 static void rootnex_clean_dmahdl(ddi_dma_impl_t *hp);
356 static int rootnex_valid_alloc_parms(ddi_dma_attr_t *attr, uint_t maxsegsize);
357 static int rootnex_valid_bind_parms(ddi_dma_req_t *dmareq,
358     ddi_dma_attr_t *attr);
359 static void rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
360     rootnex_sglinfo_t *sglinfo);
361 static int rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
362     rootnex_dma_t *dma, ddi_dma_attr_t *attr, int kmflag);
363 static int rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
364     rootnex_dma_t *dma, ddi_dma_attr_t *attr);
365 static void rootnex_teardown_copybuf(rootnex_dma_t *dma);
366 static int rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
367     ddi_dma_attr_t *attr, int kmflag);
368 static void rootnex_teardown_windows(rootnex_dma_t *dma);
369 static void rootnex_init_win(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
370     rootnex_window_t *window, ddi_dma_cookie_t *cookie, off_t cur_offset);
371 static void rootnex_setup_cookie(ddi_dma_obj_t *dmar_object,
372     rootnex_dma_t *dma, ddi_dma_cookie_t *cookie, off_t cur_offset,
373     size_t *copybuf_used, page_t **cur_pp);
374 static int rootnex_sgllen_window_boundary(ddi_dma_impl_t *hp,
375     rootnex_dma_t *dma, rootnex_window_t **windowp, ddi_dma_cookie_t *cookie,
376     ddi_dma_attr_t *attr, off_t cur_offset);
377 static int rootnex_copybuf_window_boundary(ddi_dma_impl_t *hp,
378     rootnex_dma_t *dma, rootnex_window_t **windowp,
379     ddi_dma_cookie_t *cookie, off_t cur_offset, size_t *copybuf_used);
380 static int rootnex_maxxfer_window_boundary(ddi_dma_impl_t *hp,
381     rootnex_dma_t *dma, rootnex_window_t **windowp, ddi_dma_cookie_t *cookie);
382 static int rootnex_valid_sync_parms(ddi_dma_impl_t *hp, rootnex_window_t *win,
383     off_t offset, size_t size, uint_t cache_flags);
384 static int rootnex_verify_buffer(rootnex_dma_t *dma);
385 static int rootnex_dma_check(dev_info_t *dip, const void *handle,
386     const void *comp_addr, const void *not_used);
387 
388 /*
389  * _init()
390  *
391  */
392 int
393 _init(void)
394 {
395 
396 	rootnex_state = NULL;
397 	return (mod_install(&rootnex_modlinkage));
398 }
399 
400 
401 /*
402  * _info()
403  *
404  */
405 int
406 _info(struct modinfo *modinfop)
407 {
408 	return (mod_info(&rootnex_modlinkage, modinfop));
409 }
410 
411 
412 /*
413  * _fini()
414  *
415  */
416 int
417 _fini(void)
418 {
419 	return (EBUSY);
420 }
421 
422 
423 /*
424  * rootnex_attach()
425  *
426  */
427 static int
428 rootnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
429 {
430 	int fmcap;
431 	int e;
432 
433 	switch (cmd) {
434 	case DDI_ATTACH:
435 		break;
436 	case DDI_RESUME:
437 		return (DDI_SUCCESS);
438 	default:
439 		return (DDI_FAILURE);
440 	}
441 
442 	/*
443 	 * We should only have one instance of rootnex. Save it away since we
444 	 * don't have an easy way to get it back later.
445 	 */
446 	ASSERT(rootnex_state == NULL);
447 	rootnex_state = kmem_zalloc(sizeof (rootnex_state_t), KM_SLEEP);
448 
449 	rootnex_state->r_dip = dip;
450 	rootnex_state->r_err_ibc = (ddi_iblock_cookie_t)ipltospl(15);
451 	rootnex_state->r_reserved_msg_printed = B_FALSE;
452 	rootnex_cnt = &rootnex_state->r_counters[0];
453 	rootnex_state->r_intel_iommu_enabled = B_FALSE;
454 
455 	/*
456 	 * Set minimum fm capability level for i86pc platforms and then
457 	 * initialize error handling. Since we're the rootnex, we don't
458 	 * care what's returned in the fmcap field.
459 	 */
460 	ddi_system_fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE |
461 	    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE;
462 	fmcap = ddi_system_fmcap;
463 	ddi_fm_init(dip, &fmcap, &rootnex_state->r_err_ibc);
464 
465 	/* initialize DMA related state */
466 	e = rootnex_dma_init();
467 	if (e != DDI_SUCCESS) {
468 		kmem_free(rootnex_state, sizeof (rootnex_state_t));
469 		return (DDI_FAILURE);
470 	}
471 
472 	/* Add static root node properties */
473 	rootnex_add_props(dip);
474 
475 	/* since we can't call ddi_report_dev() */
476 	cmn_err(CE_CONT, "?root nexus = %s\n", ddi_get_name(dip));
477 
478 	/* Initialize rootnex event handle */
479 	i_ddi_rootnex_init_events(dip);
480 
481 #if !defined(__xpv)
482 #if defined(__amd64)
483 	/* probe intel iommu */
484 	intel_iommu_probe_and_parse();
485 
486 	/* attach the iommu nodes */
487 	if (intel_iommu_support) {
488 		if (intel_iommu_attach_dmar_nodes() == DDI_SUCCESS) {
489 			rootnex_state->r_intel_iommu_enabled = B_TRUE;
490 		} else {
491 			intel_iommu_release_dmar_info();
492 		}
493 	}
494 #endif
495 
496 	e = iommulib_nexus_register(dip, &iommulib_nexops,
497 	    &rootnex_state->r_iommulib_handle);
498 
499 	ASSERT(e == DDI_SUCCESS);
500 #endif
501 
502 	return (DDI_SUCCESS);
503 }
504 
505 
506 /*
507  * rootnex_detach()
508  *
509  */
510 /*ARGSUSED*/
511 static int
512 rootnex_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
513 {
514 	switch (cmd) {
515 	case DDI_SUSPEND:
516 		break;
517 	default:
518 		return (DDI_FAILURE);
519 	}
520 
521 	return (DDI_SUCCESS);
522 }
523 
524 
525 /*
526  * rootnex_dma_init()
527  *
528  */
529 /*ARGSUSED*/
530 static int
531 rootnex_dma_init()
532 {
533 	size_t bufsize;
534 
535 
536 	/*
537 	 * size of our cookie/window/copybuf state needed in dma bind that we
538 	 * pre-alloc in dma_alloc_handle
539 	 */
540 	rootnex_state->r_prealloc_cookies = rootnex_prealloc_cookies;
541 	rootnex_state->r_prealloc_size =
542 	    (rootnex_state->r_prealloc_cookies * sizeof (ddi_dma_cookie_t)) +
543 	    (rootnex_prealloc_windows * sizeof (rootnex_window_t)) +
544 	    (rootnex_prealloc_copybuf * sizeof (rootnex_pgmap_t));
545 
546 	/*
547 	 * setup DDI DMA handle kmem cache, align each handle on 64 bytes,
548 	 * allocate 16 extra bytes for struct pointer alignment
549 	 * (p->dmai_private & dma->dp_prealloc_buffer)
550 	 */
551 	bufsize = sizeof (ddi_dma_impl_t) + sizeof (rootnex_dma_t) +
552 	    rootnex_state->r_prealloc_size + 0x10;
553 	rootnex_state->r_dmahdl_cache = kmem_cache_create("rootnex_dmahdl",
554 	    bufsize, 64, NULL, NULL, NULL, NULL, NULL, 0);
555 	if (rootnex_state->r_dmahdl_cache == NULL) {
556 		return (DDI_FAILURE);
557 	}
558 
559 	/*
560 	 * allocate array to track which major numbers we have printed warnings
561 	 * for.
562 	 */
563 	rootnex_warn_list = kmem_zalloc(devcnt * sizeof (*rootnex_warn_list),
564 	    KM_SLEEP);
565 
566 	return (DDI_SUCCESS);
567 }
568 
569 
570 /*
571  * rootnex_add_props()
572  *
573  */
574 static void
575 rootnex_add_props(dev_info_t *dip)
576 {
577 	rootnex_intprop_t *rpp;
578 	int i;
579 
580 	/* Add static integer/boolean properties to the root node */
581 	rpp = rootnex_intprp;
582 	for (i = 0; i < NROOT_INTPROPS; i++) {
583 		(void) e_ddi_prop_update_int(DDI_DEV_T_NONE, dip,
584 		    rpp[i].prop_name, rpp[i].prop_value);
585 	}
586 }
587 
588 
589 
590 /*
591  * *************************
592  *  ctlops related routines
593  * *************************
594  */
595 
596 /*
597  * rootnex_ctlops()
598  *
599  */
600 /*ARGSUSED*/
601 static int
602 rootnex_ctlops(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
603     void *arg, void *result)
604 {
605 	int n, *ptr;
606 	struct ddi_parent_private_data *pdp;
607 
608 	switch (ctlop) {
609 	case DDI_CTLOPS_DMAPMAPC:
610 		/*
611 		 * Return 'partial' to indicate that dma mapping
612 		 * has to be done in the main MMU.
613 		 */
614 		return (DDI_DMA_PARTIAL);
615 
616 	case DDI_CTLOPS_BTOP:
617 		/*
618 		 * Convert byte count input to physical page units.
619 		 * (byte counts that are not a page-size multiple
620 		 * are rounded down)
621 		 */
622 		*(ulong_t *)result = btop(*(ulong_t *)arg);
623 		return (DDI_SUCCESS);
624 
625 	case DDI_CTLOPS_PTOB:
626 		/*
627 		 * Convert size in physical pages to bytes
628 		 */
629 		*(ulong_t *)result = ptob(*(ulong_t *)arg);
630 		return (DDI_SUCCESS);
631 
632 	case DDI_CTLOPS_BTOPR:
633 		/*
634 		 * Convert byte count input to physical page units
635 		 * (byte counts that are not a page-size multiple
636 		 * are rounded up)
637 		 */
638 		*(ulong_t *)result = btopr(*(ulong_t *)arg);
639 		return (DDI_SUCCESS);
640 
641 	case DDI_CTLOPS_INITCHILD:
642 		return (impl_ddi_sunbus_initchild(arg));
643 
644 	case DDI_CTLOPS_UNINITCHILD:
645 		impl_ddi_sunbus_removechild(arg);
646 		return (DDI_SUCCESS);
647 
648 	case DDI_CTLOPS_REPORTDEV:
649 		return (rootnex_ctl_reportdev(rdip));
650 
651 	case DDI_CTLOPS_IOMIN:
652 		/*
653 		 * Nothing to do here but reflect back..
654 		 */
655 		return (DDI_SUCCESS);
656 
657 	case DDI_CTLOPS_REGSIZE:
658 	case DDI_CTLOPS_NREGS:
659 		break;
660 
661 	case DDI_CTLOPS_SIDDEV:
662 		if (ndi_dev_is_prom_node(rdip))
663 			return (DDI_SUCCESS);
664 		if (ndi_dev_is_persistent_node(rdip))
665 			return (DDI_SUCCESS);
666 		return (DDI_FAILURE);
667 
668 	case DDI_CTLOPS_POWER:
669 		return ((*pm_platform_power)((power_req_t *)arg));
670 
671 	case DDI_CTLOPS_RESERVED0: /* Was DDI_CTLOPS_NINTRS, obsolete */
672 	case DDI_CTLOPS_RESERVED1: /* Was DDI_CTLOPS_POKE_INIT, obsolete */
673 	case DDI_CTLOPS_RESERVED2: /* Was DDI_CTLOPS_POKE_FLUSH, obsolete */
674 	case DDI_CTLOPS_RESERVED3: /* Was DDI_CTLOPS_POKE_FINI, obsolete */
675 	case DDI_CTLOPS_RESERVED4: /* Was DDI_CTLOPS_INTR_HILEVEL, obsolete */
676 	case DDI_CTLOPS_RESERVED5: /* Was DDI_CTLOPS_XLATE_INTRS, obsolete */
677 		if (!rootnex_state->r_reserved_msg_printed) {
678 			rootnex_state->r_reserved_msg_printed = B_TRUE;
679 			cmn_err(CE_WARN, "Failing ddi_ctlops call(s) for "
680 			    "1 or more reserved/obsolete operations.");
681 		}
682 		return (DDI_FAILURE);
683 
684 	default:
685 		return (DDI_FAILURE);
686 	}
687 	/*
688 	 * The rest are for "hardware" properties
689 	 */
690 	if ((pdp = ddi_get_parent_data(rdip)) == NULL)
691 		return (DDI_FAILURE);
692 
693 	if (ctlop == DDI_CTLOPS_NREGS) {
694 		ptr = (int *)result;
695 		*ptr = pdp->par_nreg;
696 	} else {
697 		off_t *size = (off_t *)result;
698 
699 		ptr = (int *)arg;
700 		n = *ptr;
701 		if (n >= pdp->par_nreg) {
702 			return (DDI_FAILURE);
703 		}
704 		*size = (off_t)pdp->par_reg[n].regspec_size;
705 	}
706 	return (DDI_SUCCESS);
707 }
708 
709 
710 /*
711  * rootnex_ctl_reportdev()
712  *
713  */
714 static int
715 rootnex_ctl_reportdev(dev_info_t *dev)
716 {
717 	int i, n, len, f_len = 0;
718 	char *buf;
719 
720 	buf = kmem_alloc(REPORTDEV_BUFSIZE, KM_SLEEP);
721 	f_len += snprintf(buf, REPORTDEV_BUFSIZE,
722 	    "%s%d at root", ddi_driver_name(dev), ddi_get_instance(dev));
723 	len = strlen(buf);
724 
725 	for (i = 0; i < sparc_pd_getnreg(dev); i++) {
726 
727 		struct regspec *rp = sparc_pd_getreg(dev, i);
728 
729 		if (i == 0)
730 			f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
731 			    ": ");
732 		else
733 			f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
734 			    " and ");
735 		len = strlen(buf);
736 
737 		switch (rp->regspec_bustype) {
738 
739 		case BTEISA:
740 			f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
741 			    "%s 0x%x", DEVI_EISA_NEXNAME, rp->regspec_addr);
742 			break;
743 
744 		case BTISA:
745 			f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
746 			    "%s 0x%x", DEVI_ISA_NEXNAME, rp->regspec_addr);
747 			break;
748 
749 		default:
750 			f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
751 			    "space %x offset %x",
752 			    rp->regspec_bustype, rp->regspec_addr);
753 			break;
754 		}
755 		len = strlen(buf);
756 	}
757 	for (i = 0, n = sparc_pd_getnintr(dev); i < n; i++) {
758 		int pri;
759 
760 		if (i != 0) {
761 			f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
762 			    ",");
763 			len = strlen(buf);
764 		}
765 		pri = INT_IPL(sparc_pd_getintr(dev, i)->intrspec_pri);
766 		f_len += snprintf(buf + len, REPORTDEV_BUFSIZE - len,
767 		    " sparc ipl %d", pri);
768 		len = strlen(buf);
769 	}
770 #ifdef DEBUG
771 	if (f_len + 1 >= REPORTDEV_BUFSIZE) {
772 		cmn_err(CE_NOTE, "next message is truncated: "
773 		    "printed length 1024, real length %d", f_len);
774 	}
775 #endif /* DEBUG */
776 	cmn_err(CE_CONT, "?%s\n", buf);
777 	kmem_free(buf, REPORTDEV_BUFSIZE);
778 	return (DDI_SUCCESS);
779 }
780 
781 
782 /*
783  * ******************
784  *  map related code
785  * ******************
786  */
787 
788 /*
789  * rootnex_map()
790  *
791  */
792 static int
793 rootnex_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp, off_t offset,
794     off_t len, caddr_t *vaddrp)
795 {
796 	struct regspec *rp, tmp_reg;
797 	ddi_map_req_t mr = *mp;		/* Get private copy of request */
798 	int error;
799 
800 	mp = &mr;
801 
802 	switch (mp->map_op)  {
803 	case DDI_MO_MAP_LOCKED:
804 	case DDI_MO_UNMAP:
805 	case DDI_MO_MAP_HANDLE:
806 		break;
807 	default:
808 #ifdef	DDI_MAP_DEBUG
809 		cmn_err(CE_WARN, "rootnex_map: unimplemented map op %d.",
810 		    mp->map_op);
811 #endif	/* DDI_MAP_DEBUG */
812 		return (DDI_ME_UNIMPLEMENTED);
813 	}
814 
815 	if (mp->map_flags & DDI_MF_USER_MAPPING)  {
816 #ifdef	DDI_MAP_DEBUG
817 		cmn_err(CE_WARN, "rootnex_map: unimplemented map type: user.");
818 #endif	/* DDI_MAP_DEBUG */
819 		return (DDI_ME_UNIMPLEMENTED);
820 	}
821 
822 	/*
823 	 * First, if given an rnumber, convert it to a regspec...
824 	 * (Presumably, this is on behalf of a child of the root node?)
825 	 */
826 
827 	if (mp->map_type == DDI_MT_RNUMBER)  {
828 
829 		int rnumber = mp->map_obj.rnumber;
830 #ifdef	DDI_MAP_DEBUG
831 		static char *out_of_range =
832 		    "rootnex_map: Out of range rnumber <%d>, device <%s>";
833 #endif	/* DDI_MAP_DEBUG */
834 
835 		rp = i_ddi_rnumber_to_regspec(rdip, rnumber);
836 		if (rp == NULL)  {
837 #ifdef	DDI_MAP_DEBUG
838 			cmn_err(CE_WARN, out_of_range, rnumber,
839 			    ddi_get_name(rdip));
840 #endif	/* DDI_MAP_DEBUG */
841 			return (DDI_ME_RNUMBER_RANGE);
842 		}
843 
844 		/*
845 		 * Convert the given ddi_map_req_t from rnumber to regspec...
846 		 */
847 
848 		mp->map_type = DDI_MT_REGSPEC;
849 		mp->map_obj.rp = rp;
850 	}
851 
852 	/*
853 	 * Adjust offset and length correspnding to called values...
854 	 * XXX: A non-zero length means override the one in the regspec
855 	 * XXX: (regardless of what's in the parent's range?)
856 	 */
857 
858 	tmp_reg = *(mp->map_obj.rp);		/* Preserve underlying data */
859 	rp = mp->map_obj.rp = &tmp_reg;		/* Use tmp_reg in request */
860 
861 #ifdef	DDI_MAP_DEBUG
862 	cmn_err(CE_CONT, "rootnex: <%s,%s> <0x%x, 0x%x, 0x%d> offset %d len %d "
863 	    "handle 0x%x\n", ddi_get_name(dip), ddi_get_name(rdip),
864 	    rp->regspec_bustype, rp->regspec_addr, rp->regspec_size, offset,
865 	    len, mp->map_handlep);
866 #endif	/* DDI_MAP_DEBUG */
867 
868 	/*
869 	 * I/O or memory mapping:
870 	 *
871 	 *	<bustype=0, addr=x, len=x>: memory
872 	 *	<bustype=1, addr=x, len=x>: i/o
873 	 *	<bustype>1, addr=0, len=x>: x86-compatibility i/o
874 	 */
875 
876 	if (rp->regspec_bustype > 1 && rp->regspec_addr != 0) {
877 		cmn_err(CE_WARN, "<%s,%s> invalid register spec"
878 		    " <0x%x, 0x%x, 0x%x>", ddi_get_name(dip),
879 		    ddi_get_name(rdip), rp->regspec_bustype,
880 		    rp->regspec_addr, rp->regspec_size);
881 		return (DDI_ME_INVAL);
882 	}
883 
884 	if (rp->regspec_bustype > 1 && rp->regspec_addr == 0) {
885 		/*
886 		 * compatibility i/o mapping
887 		 */
888 		rp->regspec_bustype += (uint_t)offset;
889 	} else {
890 		/*
891 		 * Normal memory or i/o mapping
892 		 */
893 		rp->regspec_addr += (uint_t)offset;
894 	}
895 
896 	if (len != 0)
897 		rp->regspec_size = (uint_t)len;
898 
899 #ifdef	DDI_MAP_DEBUG
900 	cmn_err(CE_CONT, "             <%s,%s> <0x%x, 0x%x, 0x%d> offset %d "
901 	    "len %d handle 0x%x\n", ddi_get_name(dip), ddi_get_name(rdip),
902 	    rp->regspec_bustype, rp->regspec_addr, rp->regspec_size,
903 	    offset, len, mp->map_handlep);
904 #endif	/* DDI_MAP_DEBUG */
905 
906 	/*
907 	 * Apply any parent ranges at this level, if applicable.
908 	 * (This is where nexus specific regspec translation takes place.
909 	 * Use of this function is implicit agreement that translation is
910 	 * provided via ddi_apply_range.)
911 	 */
912 
913 #ifdef	DDI_MAP_DEBUG
914 	ddi_map_debug("applying range of parent <%s> to child <%s>...\n",
915 	    ddi_get_name(dip), ddi_get_name(rdip));
916 #endif	/* DDI_MAP_DEBUG */
917 
918 	if ((error = i_ddi_apply_range(dip, rdip, mp->map_obj.rp)) != 0)
919 		return (error);
920 
921 	switch (mp->map_op)  {
922 	case DDI_MO_MAP_LOCKED:
923 
924 		/*
925 		 * Set up the locked down kernel mapping to the regspec...
926 		 */
927 
928 		return (rootnex_map_regspec(mp, vaddrp));
929 
930 	case DDI_MO_UNMAP:
931 
932 		/*
933 		 * Release mapping...
934 		 */
935 
936 		return (rootnex_unmap_regspec(mp, vaddrp));
937 
938 	case DDI_MO_MAP_HANDLE:
939 
940 		return (rootnex_map_handle(mp));
941 
942 	default:
943 		return (DDI_ME_UNIMPLEMENTED);
944 	}
945 }
946 
947 
948 /*
949  * rootnex_map_fault()
950  *
951  *	fault in mappings for requestors
952  */
953 /*ARGSUSED*/
954 static int
955 rootnex_map_fault(dev_info_t *dip, dev_info_t *rdip, struct hat *hat,
956     struct seg *seg, caddr_t addr, struct devpage *dp, pfn_t pfn, uint_t prot,
957     uint_t lock)
958 {
959 
960 #ifdef	DDI_MAP_DEBUG
961 	ddi_map_debug("rootnex_map_fault: address <%x> pfn <%x>", addr, pfn);
962 	ddi_map_debug(" Seg <%s>\n",
963 	    seg->s_ops == &segdev_ops ? "segdev" :
964 	    seg == &kvseg ? "segkmem" : "NONE!");
965 #endif	/* DDI_MAP_DEBUG */
966 
967 	/*
968 	 * This is all terribly broken, but it is a start
969 	 *
970 	 * XXX	Note that this test means that segdev_ops
971 	 *	must be exported from seg_dev.c.
972 	 * XXX	What about devices with their own segment drivers?
973 	 */
974 	if (seg->s_ops == &segdev_ops) {
975 		struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
976 
977 		if (hat == NULL) {
978 			/*
979 			 * This is one plausible interpretation of
980 			 * a null hat i.e. use the first hat on the
981 			 * address space hat list which by convention is
982 			 * the hat of the system MMU.  At alternative
983 			 * would be to panic .. this might well be better ..
984 			 */
985 			ASSERT(AS_READ_HELD(seg->s_as, &seg->s_as->a_lock));
986 			hat = seg->s_as->a_hat;
987 			cmn_err(CE_NOTE, "rootnex_map_fault: nil hat");
988 		}
989 		hat_devload(hat, addr, MMU_PAGESIZE, pfn, prot | sdp->hat_attr,
990 		    (lock ? HAT_LOAD_LOCK : HAT_LOAD));
991 	} else if (seg == &kvseg && dp == NULL) {
992 		hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pfn, prot,
993 		    HAT_LOAD_LOCK);
994 	} else
995 		return (DDI_FAILURE);
996 	return (DDI_SUCCESS);
997 }
998 
999 
1000 /*
1001  * rootnex_map_regspec()
1002  *     we don't support mapping of I/O cards above 4Gb
1003  */
1004 static int
1005 rootnex_map_regspec(ddi_map_req_t *mp, caddr_t *vaddrp)
1006 {
1007 	rootnex_addr_t rbase;
1008 	void *cvaddr;
1009 	uint_t npages, pgoffset;
1010 	struct regspec *rp;
1011 	ddi_acc_hdl_t *hp;
1012 	ddi_acc_impl_t *ap;
1013 	uint_t	hat_acc_flags;
1014 	paddr_t pbase;
1015 
1016 	rp = mp->map_obj.rp;
1017 	hp = mp->map_handlep;
1018 
1019 #ifdef	DDI_MAP_DEBUG
1020 	ddi_map_debug(
1021 	    "rootnex_map_regspec: <0x%x 0x%x 0x%x> handle 0x%x\n",
1022 	    rp->regspec_bustype, rp->regspec_addr,
1023 	    rp->regspec_size, mp->map_handlep);
1024 #endif	/* DDI_MAP_DEBUG */
1025 
1026 	/*
1027 	 * I/O or memory mapping
1028 	 *
1029 	 *	<bustype=0, addr=x, len=x>: memory
1030 	 *	<bustype=1, addr=x, len=x>: i/o
1031 	 *	<bustype>1, addr=0, len=x>: x86-compatibility i/o
1032 	 */
1033 
1034 	if (rp->regspec_bustype > 1 && rp->regspec_addr != 0) {
1035 		cmn_err(CE_WARN, "rootnex: invalid register spec"
1036 		    " <0x%x, 0x%x, 0x%x>", rp->regspec_bustype,
1037 		    rp->regspec_addr, rp->regspec_size);
1038 		return (DDI_FAILURE);
1039 	}
1040 
1041 	if (rp->regspec_bustype != 0) {
1042 		/*
1043 		 * I/O space - needs a handle.
1044 		 */
1045 		if (hp == NULL) {
1046 			return (DDI_FAILURE);
1047 		}
1048 		ap = (ddi_acc_impl_t *)hp->ah_platform_private;
1049 		ap->ahi_acc_attr |= DDI_ACCATTR_IO_SPACE;
1050 		impl_acc_hdl_init(hp);
1051 
1052 		if (mp->map_flags & DDI_MF_DEVICE_MAPPING) {
1053 #ifdef  DDI_MAP_DEBUG
1054 			ddi_map_debug("rootnex_map_regspec: mmap() "
1055 			    "to I/O space is not supported.\n");
1056 #endif  /* DDI_MAP_DEBUG */
1057 			return (DDI_ME_INVAL);
1058 		} else {
1059 			/*
1060 			 * 1275-compliant vs. compatibility i/o mapping
1061 			 */
1062 			*vaddrp =
1063 			    (rp->regspec_bustype > 1 && rp->regspec_addr == 0) ?
1064 			    ((caddr_t)(uintptr_t)rp->regspec_bustype) :
1065 			    ((caddr_t)(uintptr_t)rp->regspec_addr);
1066 #ifdef __xpv
1067 			if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1068 				hp->ah_pfn = xen_assign_pfn(
1069 				    mmu_btop((ulong_t)rp->regspec_addr &
1070 				    MMU_PAGEMASK));
1071 			} else {
1072 				hp->ah_pfn = mmu_btop(
1073 				    (ulong_t)rp->regspec_addr & MMU_PAGEMASK);
1074 			}
1075 #else
1076 			hp->ah_pfn = mmu_btop((ulong_t)rp->regspec_addr &
1077 			    MMU_PAGEMASK);
1078 #endif
1079 			hp->ah_pnum = mmu_btopr(rp->regspec_size +
1080 			    (ulong_t)rp->regspec_addr & MMU_PAGEOFFSET);
1081 		}
1082 
1083 #ifdef	DDI_MAP_DEBUG
1084 		ddi_map_debug(
1085 	    "rootnex_map_regspec: \"Mapping\" %d bytes I/O space at 0x%x\n",
1086 		    rp->regspec_size, *vaddrp);
1087 #endif	/* DDI_MAP_DEBUG */
1088 		return (DDI_SUCCESS);
1089 	}
1090 
1091 	/*
1092 	 * Memory space
1093 	 */
1094 
1095 	if (hp != NULL) {
1096 		/*
1097 		 * hat layer ignores
1098 		 * hp->ah_acc.devacc_attr_endian_flags.
1099 		 */
1100 		switch (hp->ah_acc.devacc_attr_dataorder) {
1101 		case DDI_STRICTORDER_ACC:
1102 			hat_acc_flags = HAT_STRICTORDER;
1103 			break;
1104 		case DDI_UNORDERED_OK_ACC:
1105 			hat_acc_flags = HAT_UNORDERED_OK;
1106 			break;
1107 		case DDI_MERGING_OK_ACC:
1108 			hat_acc_flags = HAT_MERGING_OK;
1109 			break;
1110 		case DDI_LOADCACHING_OK_ACC:
1111 			hat_acc_flags = HAT_LOADCACHING_OK;
1112 			break;
1113 		case DDI_STORECACHING_OK_ACC:
1114 			hat_acc_flags = HAT_STORECACHING_OK;
1115 			break;
1116 		}
1117 		ap = (ddi_acc_impl_t *)hp->ah_platform_private;
1118 		ap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1119 		impl_acc_hdl_init(hp);
1120 		hp->ah_hat_flags = hat_acc_flags;
1121 	} else {
1122 		hat_acc_flags = HAT_STRICTORDER;
1123 	}
1124 
1125 	rbase = (rootnex_addr_t)(rp->regspec_addr & MMU_PAGEMASK);
1126 #ifdef __xpv
1127 	/*
1128 	 * If we're dom0, we're using a real device so we need to translate
1129 	 * the MA to a PA.
1130 	 */
1131 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1132 		pbase = pfn_to_pa(xen_assign_pfn(mmu_btop(rbase)));
1133 	} else {
1134 		pbase = rbase;
1135 	}
1136 #else
1137 	pbase = rbase;
1138 #endif
1139 	pgoffset = (ulong_t)rp->regspec_addr & MMU_PAGEOFFSET;
1140 
1141 	if (rp->regspec_size == 0) {
1142 #ifdef  DDI_MAP_DEBUG
1143 		ddi_map_debug("rootnex_map_regspec: zero regspec_size\n");
1144 #endif  /* DDI_MAP_DEBUG */
1145 		return (DDI_ME_INVAL);
1146 	}
1147 
1148 	if (mp->map_flags & DDI_MF_DEVICE_MAPPING) {
1149 		/* extra cast to make gcc happy */
1150 		*vaddrp = (caddr_t)((uintptr_t)mmu_btop(pbase));
1151 	} else {
1152 		npages = mmu_btopr(rp->regspec_size + pgoffset);
1153 
1154 #ifdef	DDI_MAP_DEBUG
1155 		ddi_map_debug("rootnex_map_regspec: Mapping %d pages "
1156 		    "physical %llx", npages, pbase);
1157 #endif	/* DDI_MAP_DEBUG */
1158 
1159 		cvaddr = device_arena_alloc(ptob(npages), VM_NOSLEEP);
1160 		if (cvaddr == NULL)
1161 			return (DDI_ME_NORESOURCES);
1162 
1163 		/*
1164 		 * Now map in the pages we've allocated...
1165 		 */
1166 		hat_devload(kas.a_hat, cvaddr, mmu_ptob(npages),
1167 		    mmu_btop(pbase), mp->map_prot | hat_acc_flags,
1168 		    HAT_LOAD_LOCK);
1169 		*vaddrp = (caddr_t)cvaddr + pgoffset;
1170 
1171 		/* save away pfn and npages for FMA */
1172 		hp = mp->map_handlep;
1173 		if (hp) {
1174 			hp->ah_pfn = mmu_btop(pbase);
1175 			hp->ah_pnum = npages;
1176 		}
1177 	}
1178 
1179 #ifdef	DDI_MAP_DEBUG
1180 	ddi_map_debug("at virtual 0x%x\n", *vaddrp);
1181 #endif	/* DDI_MAP_DEBUG */
1182 	return (DDI_SUCCESS);
1183 }
1184 
1185 
1186 /*
1187  * rootnex_unmap_regspec()
1188  *
1189  */
1190 static int
1191 rootnex_unmap_regspec(ddi_map_req_t *mp, caddr_t *vaddrp)
1192 {
1193 	caddr_t addr = (caddr_t)*vaddrp;
1194 	uint_t npages, pgoffset;
1195 	struct regspec *rp;
1196 
1197 	if (mp->map_flags & DDI_MF_DEVICE_MAPPING)
1198 		return (0);
1199 
1200 	rp = mp->map_obj.rp;
1201 
1202 	if (rp->regspec_size == 0) {
1203 #ifdef  DDI_MAP_DEBUG
1204 		ddi_map_debug("rootnex_unmap_regspec: zero regspec_size\n");
1205 #endif  /* DDI_MAP_DEBUG */
1206 		return (DDI_ME_INVAL);
1207 	}
1208 
1209 	/*
1210 	 * I/O or memory mapping:
1211 	 *
1212 	 *	<bustype=0, addr=x, len=x>: memory
1213 	 *	<bustype=1, addr=x, len=x>: i/o
1214 	 *	<bustype>1, addr=0, len=x>: x86-compatibility i/o
1215 	 */
1216 	if (rp->regspec_bustype != 0) {
1217 		/*
1218 		 * This is I/O space, which requires no particular
1219 		 * processing on unmap since it isn't mapped in the
1220 		 * first place.
1221 		 */
1222 		return (DDI_SUCCESS);
1223 	}
1224 
1225 	/*
1226 	 * Memory space
1227 	 */
1228 	pgoffset = (uintptr_t)addr & MMU_PAGEOFFSET;
1229 	npages = mmu_btopr(rp->regspec_size + pgoffset);
1230 	hat_unload(kas.a_hat, addr - pgoffset, ptob(npages), HAT_UNLOAD_UNLOCK);
1231 	device_arena_free(addr - pgoffset, ptob(npages));
1232 
1233 	/*
1234 	 * Destroy the pointer - the mapping has logically gone
1235 	 */
1236 	*vaddrp = NULL;
1237 
1238 	return (DDI_SUCCESS);
1239 }
1240 
1241 
1242 /*
1243  * rootnex_map_handle()
1244  *
1245  */
1246 static int
1247 rootnex_map_handle(ddi_map_req_t *mp)
1248 {
1249 	rootnex_addr_t rbase;
1250 	ddi_acc_hdl_t *hp;
1251 	uint_t pgoffset;
1252 	struct regspec *rp;
1253 	paddr_t pbase;
1254 
1255 	rp = mp->map_obj.rp;
1256 
1257 #ifdef	DDI_MAP_DEBUG
1258 	ddi_map_debug(
1259 	    "rootnex_map_handle: <0x%x 0x%x 0x%x> handle 0x%x\n",
1260 	    rp->regspec_bustype, rp->regspec_addr,
1261 	    rp->regspec_size, mp->map_handlep);
1262 #endif	/* DDI_MAP_DEBUG */
1263 
1264 	/*
1265 	 * I/O or memory mapping:
1266 	 *
1267 	 *	<bustype=0, addr=x, len=x>: memory
1268 	 *	<bustype=1, addr=x, len=x>: i/o
1269 	 *	<bustype>1, addr=0, len=x>: x86-compatibility i/o
1270 	 */
1271 	if (rp->regspec_bustype != 0) {
1272 		/*
1273 		 * This refers to I/O space, and we don't support "mapping"
1274 		 * I/O space to a user.
1275 		 */
1276 		return (DDI_FAILURE);
1277 	}
1278 
1279 	/*
1280 	 * Set up the hat_flags for the mapping.
1281 	 */
1282 	hp = mp->map_handlep;
1283 
1284 	switch (hp->ah_acc.devacc_attr_endian_flags) {
1285 	case DDI_NEVERSWAP_ACC:
1286 		hp->ah_hat_flags = HAT_NEVERSWAP | HAT_STRICTORDER;
1287 		break;
1288 	case DDI_STRUCTURE_LE_ACC:
1289 		hp->ah_hat_flags = HAT_STRUCTURE_LE;
1290 		break;
1291 	case DDI_STRUCTURE_BE_ACC:
1292 		return (DDI_FAILURE);
1293 	default:
1294 		return (DDI_REGS_ACC_CONFLICT);
1295 	}
1296 
1297 	switch (hp->ah_acc.devacc_attr_dataorder) {
1298 	case DDI_STRICTORDER_ACC:
1299 		break;
1300 	case DDI_UNORDERED_OK_ACC:
1301 		hp->ah_hat_flags |= HAT_UNORDERED_OK;
1302 		break;
1303 	case DDI_MERGING_OK_ACC:
1304 		hp->ah_hat_flags |= HAT_MERGING_OK;
1305 		break;
1306 	case DDI_LOADCACHING_OK_ACC:
1307 		hp->ah_hat_flags |= HAT_LOADCACHING_OK;
1308 		break;
1309 	case DDI_STORECACHING_OK_ACC:
1310 		hp->ah_hat_flags |= HAT_STORECACHING_OK;
1311 		break;
1312 	default:
1313 		return (DDI_FAILURE);
1314 	}
1315 
1316 	rbase = (rootnex_addr_t)rp->regspec_addr &
1317 	    (~(rootnex_addr_t)MMU_PAGEOFFSET);
1318 	pgoffset = (ulong_t)rp->regspec_addr & MMU_PAGEOFFSET;
1319 
1320 	if (rp->regspec_size == 0)
1321 		return (DDI_ME_INVAL);
1322 
1323 #ifdef __xpv
1324 	/*
1325 	 * If we're dom0, we're using a real device so we need to translate
1326 	 * the MA to a PA.
1327 	 */
1328 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1329 		pbase = pfn_to_pa(xen_assign_pfn(mmu_btop(rbase))) |
1330 		    (rbase & MMU_PAGEOFFSET);
1331 	} else {
1332 		pbase = rbase;
1333 	}
1334 #else
1335 	pbase = rbase;
1336 #endif
1337 
1338 	hp->ah_pfn = mmu_btop(pbase);
1339 	hp->ah_pnum = mmu_btopr(rp->regspec_size + pgoffset);
1340 
1341 	return (DDI_SUCCESS);
1342 }
1343 
1344 
1345 
1346 /*
1347  * ************************
1348  *  interrupt related code
1349  * ************************
1350  */
1351 
1352 /*
1353  * rootnex_intr_ops()
1354  *	bus_intr_op() function for interrupt support
1355  */
1356 /* ARGSUSED */
1357 static int
1358 rootnex_intr_ops(dev_info_t *pdip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1359     ddi_intr_handle_impl_t *hdlp, void *result)
1360 {
1361 	struct intrspec			*ispec;
1362 	struct ddi_parent_private_data	*pdp;
1363 
1364 	DDI_INTR_NEXDBG((CE_CONT,
1365 	    "rootnex_intr_ops: pdip = %p, rdip = %p, intr_op = %x, hdlp = %p\n",
1366 	    (void *)pdip, (void *)rdip, intr_op, (void *)hdlp));
1367 
1368 	/* Process the interrupt operation */
1369 	switch (intr_op) {
1370 	case DDI_INTROP_GETCAP:
1371 		/* First check with pcplusmp */
1372 		if (psm_intr_ops == NULL)
1373 			return (DDI_FAILURE);
1374 
1375 		if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_GET_CAP, result)) {
1376 			*(int *)result = 0;
1377 			return (DDI_FAILURE);
1378 		}
1379 		break;
1380 	case DDI_INTROP_SETCAP:
1381 		if (psm_intr_ops == NULL)
1382 			return (DDI_FAILURE);
1383 
1384 		if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_SET_CAP, result))
1385 			return (DDI_FAILURE);
1386 		break;
1387 	case DDI_INTROP_ALLOC:
1388 		if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1389 			return (DDI_FAILURE);
1390 		hdlp->ih_pri = ispec->intrspec_pri;
1391 		*(int *)result = hdlp->ih_scratch1;
1392 		break;
1393 	case DDI_INTROP_FREE:
1394 		pdp = ddi_get_parent_data(rdip);
1395 		/*
1396 		 * Special case for 'pcic' driver' only.
1397 		 * If an intrspec was created for it, clean it up here
1398 		 * See detailed comments on this in the function
1399 		 * rootnex_get_ispec().
1400 		 */
1401 		if (pdp->par_intr && strcmp(ddi_get_name(rdip), "pcic") == 0) {
1402 			kmem_free(pdp->par_intr, sizeof (struct intrspec) *
1403 			    pdp->par_nintr);
1404 			/*
1405 			 * Set it to zero; so that
1406 			 * DDI framework doesn't free it again
1407 			 */
1408 			pdp->par_intr = NULL;
1409 			pdp->par_nintr = 0;
1410 		}
1411 		break;
1412 	case DDI_INTROP_GETPRI:
1413 		if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1414 			return (DDI_FAILURE);
1415 		*(int *)result = ispec->intrspec_pri;
1416 		break;
1417 	case DDI_INTROP_SETPRI:
1418 		/* Validate the interrupt priority passed to us */
1419 		if (*(int *)result > LOCK_LEVEL)
1420 			return (DDI_FAILURE);
1421 
1422 		/* Ensure that PSM is all initialized and ispec is ok */
1423 		if ((psm_intr_ops == NULL) ||
1424 		    ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL))
1425 			return (DDI_FAILURE);
1426 
1427 		/* Change the priority */
1428 		if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_SET_PRI, result) ==
1429 		    PSM_FAILURE)
1430 			return (DDI_FAILURE);
1431 
1432 		/* update the ispec with the new priority */
1433 		ispec->intrspec_pri =  *(int *)result;
1434 		break;
1435 	case DDI_INTROP_ADDISR:
1436 		if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1437 			return (DDI_FAILURE);
1438 		ispec->intrspec_func = hdlp->ih_cb_func;
1439 		break;
1440 	case DDI_INTROP_REMISR:
1441 		if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1442 			return (DDI_FAILURE);
1443 		ispec->intrspec_func = (uint_t (*)()) 0;
1444 		break;
1445 	case DDI_INTROP_ENABLE:
1446 		if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1447 			return (DDI_FAILURE);
1448 
1449 		/* Call psmi to translate irq with the dip */
1450 		if (psm_intr_ops == NULL)
1451 			return (DDI_FAILURE);
1452 
1453 		((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp = ispec;
1454 		(void) (*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_XLATE_VECTOR,
1455 		    (int *)&hdlp->ih_vector);
1456 
1457 		/* Add the interrupt handler */
1458 		if (!add_avintr((void *)hdlp, ispec->intrspec_pri,
1459 		    hdlp->ih_cb_func, DEVI(rdip)->devi_name, hdlp->ih_vector,
1460 		    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, NULL, rdip))
1461 			return (DDI_FAILURE);
1462 		break;
1463 	case DDI_INTROP_DISABLE:
1464 		if ((ispec = rootnex_get_ispec(rdip, hdlp->ih_inum)) == NULL)
1465 			return (DDI_FAILURE);
1466 
1467 		/* Call psm_ops() to translate irq with the dip */
1468 		if (psm_intr_ops == NULL)
1469 			return (DDI_FAILURE);
1470 
1471 		((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp = ispec;
1472 		(void) (*psm_intr_ops)(rdip, hdlp,
1473 		    PSM_INTR_OP_XLATE_VECTOR, (int *)&hdlp->ih_vector);
1474 
1475 		/* Remove the interrupt handler */
1476 		rem_avintr((void *)hdlp, ispec->intrspec_pri,
1477 		    hdlp->ih_cb_func, hdlp->ih_vector);
1478 		break;
1479 	case DDI_INTROP_SETMASK:
1480 		if (psm_intr_ops == NULL)
1481 			return (DDI_FAILURE);
1482 
1483 		if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_SET_MASK, NULL))
1484 			return (DDI_FAILURE);
1485 		break;
1486 	case DDI_INTROP_CLRMASK:
1487 		if (psm_intr_ops == NULL)
1488 			return (DDI_FAILURE);
1489 
1490 		if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_CLEAR_MASK, NULL))
1491 			return (DDI_FAILURE);
1492 		break;
1493 	case DDI_INTROP_GETPENDING:
1494 		if (psm_intr_ops == NULL)
1495 			return (DDI_FAILURE);
1496 
1497 		if ((*psm_intr_ops)(rdip, hdlp, PSM_INTR_OP_GET_PENDING,
1498 		    result)) {
1499 			*(int *)result = 0;
1500 			return (DDI_FAILURE);
1501 		}
1502 		break;
1503 	case DDI_INTROP_NAVAIL:
1504 	case DDI_INTROP_NINTRS:
1505 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
1506 		if (*(int *)result == 0) {
1507 			/*
1508 			 * Special case for 'pcic' driver' only. This driver
1509 			 * driver is a child of 'isa' and 'rootnex' drivers.
1510 			 *
1511 			 * See detailed comments on this in the function
1512 			 * rootnex_get_ispec().
1513 			 *
1514 			 * Children of 'pcic' send 'NINITR' request all the
1515 			 * way to rootnex driver. But, the 'pdp->par_nintr'
1516 			 * field may not initialized. So, we fake it here
1517 			 * to return 1 (a la what PCMCIA nexus does).
1518 			 */
1519 			if (strcmp(ddi_get_name(rdip), "pcic") == 0)
1520 				*(int *)result = 1;
1521 			else
1522 				return (DDI_FAILURE);
1523 		}
1524 		break;
1525 	case DDI_INTROP_SUPPORTED_TYPES:
1526 		*(int *)result = DDI_INTR_TYPE_FIXED;	/* Always ... */
1527 		break;
1528 	default:
1529 		return (DDI_FAILURE);
1530 	}
1531 
1532 	return (DDI_SUCCESS);
1533 }
1534 
1535 
1536 /*
1537  * rootnex_get_ispec()
1538  *	convert an interrupt number to an interrupt specification.
1539  *	The interrupt number determines which interrupt spec will be
1540  *	returned if more than one exists.
1541  *
1542  *	Look into the parent private data area of the 'rdip' to find out
1543  *	the interrupt specification.  First check to make sure there is
1544  *	one that matchs "inumber" and then return a pointer to it.
1545  *
1546  *	Return NULL if one could not be found.
1547  *
1548  *	NOTE: This is needed for rootnex_intr_ops()
1549  */
1550 static struct intrspec *
1551 rootnex_get_ispec(dev_info_t *rdip, int inum)
1552 {
1553 	struct ddi_parent_private_data *pdp = ddi_get_parent_data(rdip);
1554 
1555 	/*
1556 	 * Special case handling for drivers that provide their own
1557 	 * intrspec structures instead of relying on the DDI framework.
1558 	 *
1559 	 * A broken hardware driver in ON could potentially provide its
1560 	 * own intrspec structure, instead of relying on the hardware.
1561 	 * If these drivers are children of 'rootnex' then we need to
1562 	 * continue to provide backward compatibility to them here.
1563 	 *
1564 	 * Following check is a special case for 'pcic' driver which
1565 	 * was found to have broken hardwre andby provides its own intrspec.
1566 	 *
1567 	 * Verbatim comments from this driver are shown here:
1568 	 * "Don't use the ddi_add_intr since we don't have a
1569 	 * default intrspec in all cases."
1570 	 *
1571 	 * Since an 'ispec' may not be always created for it,
1572 	 * check for that and create one if so.
1573 	 *
1574 	 * NOTE: Currently 'pcic' is the only driver found to do this.
1575 	 */
1576 	if (!pdp->par_intr && strcmp(ddi_get_name(rdip), "pcic") == 0) {
1577 		pdp->par_nintr = 1;
1578 		pdp->par_intr = kmem_zalloc(sizeof (struct intrspec) *
1579 		    pdp->par_nintr, KM_SLEEP);
1580 	}
1581 
1582 	/* Validate the interrupt number */
1583 	if (inum >= pdp->par_nintr)
1584 		return (NULL);
1585 
1586 	/* Get the interrupt structure pointer and return that */
1587 	return ((struct intrspec *)&pdp->par_intr[inum]);
1588 }
1589 
1590 
1591 /*
1592  * ******************
1593  *  dma related code
1594  * ******************
1595  */
1596 
1597 /*ARGSUSED*/
1598 static int
1599 rootnex_coredma_allochdl(dev_info_t *dip, dev_info_t *rdip,
1600     ddi_dma_attr_t *attr, int (*waitfp)(caddr_t), caddr_t arg,
1601     ddi_dma_handle_t *handlep)
1602 {
1603 	uint64_t maxsegmentsize_ll;
1604 	uint_t maxsegmentsize;
1605 	ddi_dma_impl_t *hp;
1606 	rootnex_dma_t *dma;
1607 	uint64_t count_max;
1608 	uint64_t seg;
1609 	int kmflag;
1610 	int e;
1611 
1612 
1613 	/* convert our sleep flags */
1614 	if (waitfp == DDI_DMA_SLEEP) {
1615 		kmflag = KM_SLEEP;
1616 	} else {
1617 		kmflag = KM_NOSLEEP;
1618 	}
1619 
1620 	/*
1621 	 * We try to do only one memory allocation here. We'll do a little
1622 	 * pointer manipulation later. If the bind ends up taking more than
1623 	 * our prealloc's space, we'll have to allocate more memory in the
1624 	 * bind operation. Not great, but much better than before and the
1625 	 * best we can do with the current bind interfaces.
1626 	 */
1627 	hp = kmem_cache_alloc(rootnex_state->r_dmahdl_cache, kmflag);
1628 	if (hp == NULL) {
1629 		if (waitfp != DDI_DMA_DONTWAIT) {
1630 			ddi_set_callback(waitfp, arg,
1631 			    &rootnex_state->r_dvma_call_list_id);
1632 		}
1633 		return (DDI_DMA_NORESOURCES);
1634 	}
1635 
1636 	/* Do our pointer manipulation now, align the structures */
1637 	hp->dmai_private = (void *)(((uintptr_t)hp +
1638 	    (uintptr_t)sizeof (ddi_dma_impl_t) + 0x7) & ~0x7);
1639 	dma = (rootnex_dma_t *)hp->dmai_private;
1640 	dma->dp_prealloc_buffer = (uchar_t *)(((uintptr_t)dma +
1641 	    sizeof (rootnex_dma_t) + 0x7) & ~0x7);
1642 
1643 	/* setup the handle */
1644 	rootnex_clean_dmahdl(hp);
1645 	dma->dp_dip = rdip;
1646 	dma->dp_sglinfo.si_min_addr = attr->dma_attr_addr_lo;
1647 	dma->dp_sglinfo.si_max_addr = attr->dma_attr_addr_hi;
1648 	hp->dmai_minxfer = attr->dma_attr_minxfer;
1649 	hp->dmai_burstsizes = attr->dma_attr_burstsizes;
1650 	hp->dmai_rdip = rdip;
1651 	hp->dmai_attr = *attr;
1652 
1653 	/* we don't need to worry about the SPL since we do a tryenter */
1654 	mutex_init(&dma->dp_mutex, NULL, MUTEX_DRIVER, NULL);
1655 
1656 	/*
1657 	 * Figure out our maximum segment size. If the segment size is greater
1658 	 * than 4G, we will limit it to (4G - 1) since the max size of a dma
1659 	 * object (ddi_dma_obj_t.dmao_size) is 32 bits. dma_attr_seg and
1660 	 * dma_attr_count_max are size-1 type values.
1661 	 *
1662 	 * Maximum segment size is the largest physically contiguous chunk of
1663 	 * memory that we can return from a bind (i.e. the maximum size of a
1664 	 * single cookie).
1665 	 */
1666 
1667 	/* handle the rollover cases */
1668 	seg = attr->dma_attr_seg + 1;
1669 	if (seg < attr->dma_attr_seg) {
1670 		seg = attr->dma_attr_seg;
1671 	}
1672 	count_max = attr->dma_attr_count_max + 1;
1673 	if (count_max < attr->dma_attr_count_max) {
1674 		count_max = attr->dma_attr_count_max;
1675 	}
1676 
1677 	/*
1678 	 * granularity may or may not be a power of two. If it isn't, we can't
1679 	 * use a simple mask.
1680 	 */
1681 	if (attr->dma_attr_granular & (attr->dma_attr_granular - 1)) {
1682 		dma->dp_granularity_power_2 = B_FALSE;
1683 	} else {
1684 		dma->dp_granularity_power_2 = B_TRUE;
1685 	}
1686 
1687 	/*
1688 	 * maxxfer should be a whole multiple of granularity. If we're going to
1689 	 * break up a window because we're greater than maxxfer, we might as
1690 	 * well make sure it's maxxfer is a whole multiple so we don't have to
1691 	 * worry about triming the window later on for this case.
1692 	 */
1693 	if (attr->dma_attr_granular > 1) {
1694 		if (dma->dp_granularity_power_2) {
1695 			dma->dp_maxxfer = attr->dma_attr_maxxfer -
1696 			    (attr->dma_attr_maxxfer &
1697 			    (attr->dma_attr_granular - 1));
1698 		} else {
1699 			dma->dp_maxxfer = attr->dma_attr_maxxfer -
1700 			    (attr->dma_attr_maxxfer % attr->dma_attr_granular);
1701 		}
1702 	} else {
1703 		dma->dp_maxxfer = attr->dma_attr_maxxfer;
1704 	}
1705 
1706 	maxsegmentsize_ll = MIN(seg, dma->dp_maxxfer);
1707 	maxsegmentsize_ll = MIN(maxsegmentsize_ll, count_max);
1708 	if (maxsegmentsize_ll == 0 || (maxsegmentsize_ll > 0xFFFFFFFF)) {
1709 		maxsegmentsize = 0xFFFFFFFF;
1710 	} else {
1711 		maxsegmentsize = maxsegmentsize_ll;
1712 	}
1713 	dma->dp_sglinfo.si_max_cookie_size = maxsegmentsize;
1714 	dma->dp_sglinfo.si_segmask = attr->dma_attr_seg;
1715 
1716 	/* check the ddi_dma_attr arg to make sure it makes a little sense */
1717 	if (rootnex_alloc_check_parms) {
1718 		e = rootnex_valid_alloc_parms(attr, maxsegmentsize);
1719 		if (e != DDI_SUCCESS) {
1720 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ALLOC_FAIL]);
1721 			(void) rootnex_dma_freehdl(dip, rdip,
1722 			    (ddi_dma_handle_t)hp);
1723 			return (e);
1724 		}
1725 	}
1726 
1727 	*handlep = (ddi_dma_handle_t)hp;
1728 
1729 	ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1730 	DTRACE_PROBE1(rootnex__alloc__handle, uint64_t,
1731 	    rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1732 
1733 	return (DDI_SUCCESS);
1734 }
1735 
1736 
1737 /*
1738  * rootnex_dma_allochdl()
1739  *    called from ddi_dma_alloc_handle().
1740  */
1741 static int
1742 rootnex_dma_allochdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
1743     int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *handlep)
1744 {
1745 #if !defined(__xpv)
1746 	uint_t error = ENOTSUP;
1747 	int retval;
1748 
1749 	retval = iommulib_nex_open(rdip, &error);
1750 
1751 	if (retval != DDI_SUCCESS && error == ENOTSUP) {
1752 		/* No IOMMU */
1753 		return (rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
1754 		    handlep));
1755 	} else if (retval != DDI_SUCCESS) {
1756 		return (DDI_FAILURE);
1757 	}
1758 
1759 	ASSERT(IOMMU_USED(rdip));
1760 
1761 	/* has an IOMMU */
1762 	return (iommulib_nexdma_allochdl(dip, rdip, attr,
1763 	    waitfp, arg, handlep));
1764 #else
1765 	return (rootnex_coredma_allochdl(dip, rdip, attr, waitfp, arg,
1766 	    handlep));
1767 #endif
1768 }
1769 
1770 /*ARGSUSED*/
1771 static int
1772 rootnex_coredma_freehdl(dev_info_t *dip, dev_info_t *rdip,
1773     ddi_dma_handle_t handle)
1774 {
1775 	ddi_dma_impl_t *hp;
1776 	rootnex_dma_t *dma;
1777 
1778 
1779 	hp = (ddi_dma_impl_t *)handle;
1780 	dma = (rootnex_dma_t *)hp->dmai_private;
1781 
1782 	/* unbind should have been called first */
1783 	ASSERT(!dma->dp_inuse);
1784 
1785 	mutex_destroy(&dma->dp_mutex);
1786 	kmem_cache_free(rootnex_state->r_dmahdl_cache, hp);
1787 
1788 	ROOTNEX_PROF_DEC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1789 	DTRACE_PROBE1(rootnex__free__handle, uint64_t,
1790 	    rootnex_cnt[ROOTNEX_CNT_ACTIVE_HDLS]);
1791 
1792 	if (rootnex_state->r_dvma_call_list_id)
1793 		ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
1794 
1795 	return (DDI_SUCCESS);
1796 }
1797 
1798 /*
1799  * rootnex_dma_freehdl()
1800  *    called from ddi_dma_free_handle().
1801  */
1802 static int
1803 rootnex_dma_freehdl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle)
1804 {
1805 #if !defined(__xpv)
1806 	if (IOMMU_USED(rdip)) {
1807 		return (iommulib_nexdma_freehdl(dip, rdip, handle));
1808 	}
1809 #endif
1810 	return (rootnex_coredma_freehdl(dip, rdip, handle));
1811 }
1812 
1813 
1814 /*ARGSUSED*/
1815 static int
1816 rootnex_coredma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
1817     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
1818     ddi_dma_cookie_t *cookiep, uint_t *ccountp)
1819 {
1820 	rootnex_sglinfo_t *sinfo;
1821 	ddi_dma_attr_t *attr;
1822 	ddi_dma_impl_t *hp;
1823 	rootnex_dma_t *dma;
1824 	int kmflag;
1825 	int e;
1826 
1827 
1828 	hp = (ddi_dma_impl_t *)handle;
1829 	dma = (rootnex_dma_t *)hp->dmai_private;
1830 	sinfo = &dma->dp_sglinfo;
1831 	attr = &hp->dmai_attr;
1832 
1833 	hp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
1834 
1835 	/*
1836 	 * This is useful for debugging a driver. Not as useful in a production
1837 	 * system. The only time this will fail is if you have a driver bug.
1838 	 */
1839 	if (rootnex_bind_check_inuse) {
1840 		/*
1841 		 * No one else should ever have this lock unless someone else
1842 		 * is trying to use this handle. So contention on the lock
1843 		 * is the same as inuse being set.
1844 		 */
1845 		e = mutex_tryenter(&dma->dp_mutex);
1846 		if (e == 0) {
1847 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1848 			return (DDI_DMA_INUSE);
1849 		}
1850 		if (dma->dp_inuse) {
1851 			mutex_exit(&dma->dp_mutex);
1852 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1853 			return (DDI_DMA_INUSE);
1854 		}
1855 		dma->dp_inuse = B_TRUE;
1856 		mutex_exit(&dma->dp_mutex);
1857 	}
1858 
1859 	/* check the ddi_dma_attr arg to make sure it makes a little sense */
1860 	if (rootnex_bind_check_parms) {
1861 		e = rootnex_valid_bind_parms(dmareq, attr);
1862 		if (e != DDI_SUCCESS) {
1863 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1864 			rootnex_clean_dmahdl(hp);
1865 			return (e);
1866 		}
1867 	}
1868 
1869 	/* save away the original bind info */
1870 	dma->dp_dma = dmareq->dmar_object;
1871 
1872 #if !defined(__xpv)
1873 	if (rootnex_state->r_intel_iommu_enabled) {
1874 		e = intel_iommu_map_sgl(handle, dmareq,
1875 		    rootnex_state->r_prealloc_cookies);
1876 
1877 		switch (e) {
1878 		case IOMMU_SGL_SUCCESS:
1879 			goto rootnex_sgl_end;
1880 
1881 		case IOMMU_SGL_DISABLE:
1882 			goto rootnex_sgl_start;
1883 
1884 		case IOMMU_SGL_NORESOURCES:
1885 			cmn_err(CE_WARN, "iommu map sgl failed for %s",
1886 			    ddi_node_name(dma->dp_dip));
1887 			rootnex_clean_dmahdl(hp);
1888 			return (DDI_DMA_NORESOURCES);
1889 
1890 		default:
1891 			cmn_err(CE_WARN,
1892 			    "undefined value returned from"
1893 			    " intel_iommu_map_sgl: %d",
1894 			    e);
1895 			rootnex_clean_dmahdl(hp);
1896 			return (DDI_DMA_NORESOURCES);
1897 		}
1898 	}
1899 #endif
1900 
1901 rootnex_sgl_start:
1902 	/*
1903 	 * Figure out a rough estimate of what maximum number of pages this
1904 	 * buffer could use (a high estimate of course).
1905 	 */
1906 	sinfo->si_max_pages = mmu_btopr(dma->dp_dma.dmao_size) + 1;
1907 
1908 	/*
1909 	 * We'll use the pre-allocated cookies for any bind that will *always*
1910 	 * fit (more important to be consistent, we don't want to create
1911 	 * additional degenerate cases).
1912 	 */
1913 	if (sinfo->si_max_pages <= rootnex_state->r_prealloc_cookies) {
1914 		dma->dp_cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer;
1915 		dma->dp_need_to_free_cookie = B_FALSE;
1916 		DTRACE_PROBE2(rootnex__bind__prealloc, dev_info_t *, rdip,
1917 		    uint_t, sinfo->si_max_pages);
1918 
1919 	/*
1920 	 * For anything larger than that, we'll go ahead and allocate the
1921 	 * maximum number of pages we expect to see. Hopefuly, we won't be
1922 	 * seeing this path in the fast path for high performance devices very
1923 	 * frequently.
1924 	 *
1925 	 * a ddi bind interface that allowed the driver to provide storage to
1926 	 * the bind interface would speed this case up.
1927 	 */
1928 	} else {
1929 		/* convert the sleep flags */
1930 		if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
1931 			kmflag =  KM_SLEEP;
1932 		} else {
1933 			kmflag =  KM_NOSLEEP;
1934 		}
1935 
1936 		/*
1937 		 * Save away how much memory we allocated. If we're doing a
1938 		 * nosleep, the alloc could fail...
1939 		 */
1940 		dma->dp_cookie_size = sinfo->si_max_pages *
1941 		    sizeof (ddi_dma_cookie_t);
1942 		dma->dp_cookies = kmem_alloc(dma->dp_cookie_size, kmflag);
1943 		if (dma->dp_cookies == NULL) {
1944 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
1945 			rootnex_clean_dmahdl(hp);
1946 			return (DDI_DMA_NORESOURCES);
1947 		}
1948 		dma->dp_need_to_free_cookie = B_TRUE;
1949 		DTRACE_PROBE2(rootnex__bind__alloc, dev_info_t *, rdip, uint_t,
1950 		    sinfo->si_max_pages);
1951 	}
1952 	hp->dmai_cookie = dma->dp_cookies;
1953 
1954 	/*
1955 	 * Get the real sgl. rootnex_get_sgl will fill in cookie array while
1956 	 * looking at the contraints in the dma structure. It will then put some
1957 	 * additional state about the sgl in the dma struct (i.e. is the sgl
1958 	 * clean, or do we need to do some munging; how many pages need to be
1959 	 * copied, etc.)
1960 	 */
1961 	rootnex_get_sgl(&dmareq->dmar_object, dma->dp_cookies,
1962 	    &dma->dp_sglinfo);
1963 
1964 rootnex_sgl_end:
1965 	ASSERT(sinfo->si_sgl_size <= sinfo->si_max_pages);
1966 	/* if we don't need a copy buffer, we don't need to sync */
1967 	if (sinfo->si_copybuf_req == 0) {
1968 		hp->dmai_rflags |= DMP_NOSYNC;
1969 	}
1970 
1971 	/*
1972 	 * if we don't need the copybuf and we don't need to do a partial,  we
1973 	 * hit the fast path. All the high performance devices should be trying
1974 	 * to hit this path. To hit this path, a device should be able to reach
1975 	 * all of memory, shouldn't try to bind more than it can transfer, and
1976 	 * the buffer shouldn't require more cookies than the driver/device can
1977 	 * handle [sgllen]).
1978 	 */
1979 	if ((sinfo->si_copybuf_req == 0) &&
1980 	    (sinfo->si_sgl_size <= attr->dma_attr_sgllen) &&
1981 	    (dma->dp_dma.dmao_size < dma->dp_maxxfer)) {
1982 		/*
1983 		 * If the driver supports FMA, insert the handle in the FMA DMA
1984 		 * handle cache.
1985 		 */
1986 		if (attr->dma_attr_flags & DDI_DMA_FLAGERR) {
1987 			hp->dmai_error.err_cf = rootnex_dma_check;
1988 			(void) ndi_fmc_insert(rdip, DMA_HANDLE, hp, NULL);
1989 		}
1990 
1991 		/*
1992 		 * copy out the first cookie and ccountp, set the cookie
1993 		 * pointer to the second cookie. The first cookie is passed
1994 		 * back on the stack. Additional cookies are accessed via
1995 		 * ddi_dma_nextcookie()
1996 		 */
1997 		*cookiep = dma->dp_cookies[0];
1998 		*ccountp = sinfo->si_sgl_size;
1999 		hp->dmai_cookie++;
2000 		hp->dmai_rflags &= ~DDI_DMA_PARTIAL;
2001 		hp->dmai_nwin = 1;
2002 		ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2003 		DTRACE_PROBE3(rootnex__bind__fast, dev_info_t *, rdip, uint64_t,
2004 		    rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS], uint_t,
2005 		    dma->dp_dma.dmao_size);
2006 		return (DDI_DMA_MAPPED);
2007 	}
2008 
2009 	/*
2010 	 * go to the slow path, we may need to alloc more memory, create
2011 	 * multiple windows, and munge up a sgl to make the device happy.
2012 	 */
2013 	e = rootnex_bind_slowpath(hp, dmareq, dma, attr, kmflag);
2014 	if ((e != DDI_DMA_MAPPED) && (e != DDI_DMA_PARTIAL_MAP)) {
2015 		if (dma->dp_need_to_free_cookie) {
2016 			kmem_free(dma->dp_cookies, dma->dp_cookie_size);
2017 		}
2018 		ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_BIND_FAIL]);
2019 		rootnex_clean_dmahdl(hp); /* must be after free cookie */
2020 		return (e);
2021 	}
2022 
2023 	/*
2024 	 * If the driver supports FMA, insert the handle in the FMA DMA handle
2025 	 * cache.
2026 	 */
2027 	if (attr->dma_attr_flags & DDI_DMA_FLAGERR) {
2028 		hp->dmai_error.err_cf = rootnex_dma_check;
2029 		(void) ndi_fmc_insert(rdip, DMA_HANDLE, hp, NULL);
2030 	}
2031 
2032 	/* if the first window uses the copy buffer, sync it for the device */
2033 	if ((dma->dp_window[dma->dp_current_win].wd_dosync) &&
2034 	    (hp->dmai_rflags & DDI_DMA_WRITE)) {
2035 		(void) rootnex_dma_sync(dip, rdip, handle, 0, 0,
2036 		    DDI_DMA_SYNC_FORDEV);
2037 	}
2038 
2039 	/*
2040 	 * copy out the first cookie and ccountp, set the cookie pointer to the
2041 	 * second cookie. Make sure the partial flag is set/cleared correctly.
2042 	 * If we have a partial map (i.e. multiple windows), the number of
2043 	 * cookies we return is the number of cookies in the first window.
2044 	 */
2045 	if (e == DDI_DMA_MAPPED) {
2046 		hp->dmai_rflags &= ~DDI_DMA_PARTIAL;
2047 		*ccountp = sinfo->si_sgl_size;
2048 	} else {
2049 		hp->dmai_rflags |= DDI_DMA_PARTIAL;
2050 		*ccountp = dma->dp_window[dma->dp_current_win].wd_cookie_cnt;
2051 		ASSERT(hp->dmai_nwin <= dma->dp_max_win);
2052 	}
2053 	*cookiep = dma->dp_cookies[0];
2054 	hp->dmai_cookie++;
2055 
2056 	ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2057 	DTRACE_PROBE3(rootnex__bind__slow, dev_info_t *, rdip, uint64_t,
2058 	    rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS], uint_t,
2059 	    dma->dp_dma.dmao_size);
2060 	return (e);
2061 }
2062 
2063 
2064 /*
2065  * rootnex_dma_bindhdl()
2066  *    called from ddi_dma_addr_bind_handle() and ddi_dma_buf_bind_handle().
2067  */
2068 static int
2069 rootnex_dma_bindhdl(dev_info_t *dip, dev_info_t *rdip,
2070     ddi_dma_handle_t handle, struct ddi_dma_req *dmareq,
2071     ddi_dma_cookie_t *cookiep, uint_t *ccountp)
2072 {
2073 #if !defined(__xpv)
2074 	if (IOMMU_USED(rdip)) {
2075 		return (iommulib_nexdma_bindhdl(dip, rdip, handle, dmareq,
2076 		    cookiep, ccountp));
2077 	}
2078 #endif
2079 	return (rootnex_coredma_bindhdl(dip, rdip, handle, dmareq,
2080 	    cookiep, ccountp));
2081 }
2082 
2083 /*ARGSUSED*/
2084 static int
2085 rootnex_coredma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
2086     ddi_dma_handle_t handle)
2087 {
2088 	ddi_dma_impl_t *hp;
2089 	rootnex_dma_t *dma;
2090 	int e;
2091 
2092 	hp = (ddi_dma_impl_t *)handle;
2093 	dma = (rootnex_dma_t *)hp->dmai_private;
2094 
2095 	/* make sure the buffer wasn't free'd before calling unbind */
2096 	if (rootnex_unbind_verify_buffer) {
2097 		e = rootnex_verify_buffer(dma);
2098 		if (e != DDI_SUCCESS) {
2099 			ASSERT(0);
2100 			return (DDI_FAILURE);
2101 		}
2102 	}
2103 
2104 	/* sync the current window before unbinding the buffer */
2105 	if (dma->dp_window && dma->dp_window[dma->dp_current_win].wd_dosync &&
2106 	    (hp->dmai_rflags & DDI_DMA_READ)) {
2107 		(void) rootnex_dma_sync(dip, rdip, handle, 0, 0,
2108 		    DDI_DMA_SYNC_FORCPU);
2109 	}
2110 
2111 	/*
2112 	 * If the driver supports FMA, remove the handle in the FMA DMA handle
2113 	 * cache.
2114 	 */
2115 	if (hp->dmai_attr.dma_attr_flags & DDI_DMA_FLAGERR) {
2116 		if ((DEVI(rdip)->devi_fmhdl != NULL) &&
2117 		    (DDI_FM_DMA_ERR_CAP(DEVI(rdip)->devi_fmhdl->fh_cap))) {
2118 			(void) ndi_fmc_remove(rdip, DMA_HANDLE, hp);
2119 		}
2120 	}
2121 
2122 	/*
2123 	 * cleanup and copy buffer or window state. if we didn't use the copy
2124 	 * buffer or windows, there won't be much to do :-)
2125 	 */
2126 	rootnex_teardown_copybuf(dma);
2127 	rootnex_teardown_windows(dma);
2128 
2129 #if !defined(__xpv)
2130 	/*
2131 	 * If intel iommu enabled, clean up the page tables and free the dvma
2132 	 */
2133 	if (rootnex_state->r_intel_iommu_enabled) {
2134 		intel_iommu_unmap_sgl(handle);
2135 	}
2136 #endif
2137 
2138 	/*
2139 	 * If we had to allocate space to for the worse case sgl (it didn't
2140 	 * fit into our pre-allocate buffer), free that up now
2141 	 */
2142 	if (dma->dp_need_to_free_cookie) {
2143 		kmem_free(dma->dp_cookies, dma->dp_cookie_size);
2144 	}
2145 
2146 	/*
2147 	 * clean up the handle so it's ready for the next bind (i.e. if the
2148 	 * handle is reused).
2149 	 */
2150 	rootnex_clean_dmahdl(hp);
2151 
2152 	if (rootnex_state->r_dvma_call_list_id)
2153 		ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
2154 
2155 	ROOTNEX_PROF_DEC(&rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2156 	DTRACE_PROBE1(rootnex__unbind, uint64_t,
2157 	    rootnex_cnt[ROOTNEX_CNT_ACTIVE_BINDS]);
2158 
2159 	return (DDI_SUCCESS);
2160 }
2161 
2162 /*
2163  * rootnex_dma_unbindhdl()
2164  *    called from ddi_dma_unbind_handle()
2165  */
2166 /*ARGSUSED*/
2167 static int
2168 rootnex_dma_unbindhdl(dev_info_t *dip, dev_info_t *rdip,
2169     ddi_dma_handle_t handle)
2170 {
2171 #if !defined(__xpv)
2172 	if (IOMMU_USED(rdip)) {
2173 		return (iommulib_nexdma_unbindhdl(dip, rdip, handle));
2174 	}
2175 #endif
2176 	return (rootnex_coredma_unbindhdl(dip, rdip, handle));
2177 }
2178 
2179 #if !defined(__xpv)
2180 /*ARGSUSED*/
2181 static void
2182 rootnex_coredma_reset_cookies(dev_info_t *dip, ddi_dma_handle_t handle)
2183 {
2184 	ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2185 	rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2186 
2187 	hp->dmai_cookie = &dma->dp_cookies[0];
2188 	hp->dmai_cookie++;
2189 }
2190 
2191 /*ARGSUSED*/
2192 static int
2193 rootnex_coredma_get_cookies(dev_info_t *dip, ddi_dma_handle_t handle,
2194     ddi_dma_cookie_t *cookiep, uint_t *ccountp)
2195 {
2196 	ddi_dma_impl_t *hp = (ddi_dma_impl_t *)handle;
2197 	rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private;
2198 
2199 
2200 	if (hp->dmai_rflags & DDI_DMA_PARTIAL) {
2201 		*ccountp = dma->dp_window[dma->dp_current_win].wd_cookie_cnt;
2202 	} else {
2203 		*ccountp = dma->dp_sglinfo.si_sgl_size;
2204 	}
2205 	*cookiep = dma->dp_cookies[0];
2206 
2207 	/* reset the cookies */
2208 	hp->dmai_cookie = &dma->dp_cookies[0];
2209 	hp->dmai_cookie++;
2210 
2211 	return (DDI_SUCCESS);
2212 }
2213 #endif
2214 
2215 /*
2216  * rootnex_verify_buffer()
2217  *   verify buffer wasn't free'd
2218  */
2219 static int
2220 rootnex_verify_buffer(rootnex_dma_t *dma)
2221 {
2222 	page_t **pplist;
2223 	caddr_t vaddr;
2224 	uint_t pcnt;
2225 	uint_t poff;
2226 	page_t *pp;
2227 	char b;
2228 	int i;
2229 
2230 	/* Figure out how many pages this buffer occupies */
2231 	if (dma->dp_dma.dmao_type == DMA_OTYP_PAGES) {
2232 		poff = dma->dp_dma.dmao_obj.pp_obj.pp_offset & MMU_PAGEOFFSET;
2233 	} else {
2234 		vaddr = dma->dp_dma.dmao_obj.virt_obj.v_addr;
2235 		poff = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2236 	}
2237 	pcnt = mmu_btopr(dma->dp_dma.dmao_size + poff);
2238 
2239 	switch (dma->dp_dma.dmao_type) {
2240 	case DMA_OTYP_PAGES:
2241 		/*
2242 		 * for a linked list of pp's walk through them to make sure
2243 		 * they're locked and not free.
2244 		 */
2245 		pp = dma->dp_dma.dmao_obj.pp_obj.pp_pp;
2246 		for (i = 0; i < pcnt; i++) {
2247 			if (PP_ISFREE(pp) || !PAGE_LOCKED(pp)) {
2248 				return (DDI_FAILURE);
2249 			}
2250 			pp = pp->p_next;
2251 		}
2252 		break;
2253 
2254 	case DMA_OTYP_VADDR:
2255 	case DMA_OTYP_BUFVADDR:
2256 		pplist = dma->dp_dma.dmao_obj.virt_obj.v_priv;
2257 		/*
2258 		 * for an array of pp's walk through them to make sure they're
2259 		 * not free. It's possible that they may not be locked.
2260 		 */
2261 		if (pplist) {
2262 			for (i = 0; i < pcnt; i++) {
2263 				if (PP_ISFREE(pplist[i])) {
2264 					return (DDI_FAILURE);
2265 				}
2266 			}
2267 
2268 		/* For a virtual address, try to peek at each page */
2269 		} else {
2270 			if (dma->dp_sglinfo.si_asp == &kas) {
2271 				for (i = 0; i < pcnt; i++) {
2272 					if (ddi_peek8(NULL, vaddr, &b) ==
2273 					    DDI_FAILURE)
2274 						return (DDI_FAILURE);
2275 					vaddr += MMU_PAGESIZE;
2276 				}
2277 			}
2278 		}
2279 		break;
2280 
2281 	default:
2282 		ASSERT(0);
2283 		break;
2284 	}
2285 
2286 	return (DDI_SUCCESS);
2287 }
2288 
2289 
2290 /*
2291  * rootnex_clean_dmahdl()
2292  *    Clean the dma handle. This should be called on a handle alloc and an
2293  *    unbind handle. Set the handle state to the default settings.
2294  */
2295 static void
2296 rootnex_clean_dmahdl(ddi_dma_impl_t *hp)
2297 {
2298 	rootnex_dma_t *dma;
2299 
2300 
2301 	dma = (rootnex_dma_t *)hp->dmai_private;
2302 
2303 	hp->dmai_nwin = 0;
2304 	dma->dp_current_cookie = 0;
2305 	dma->dp_copybuf_size = 0;
2306 	dma->dp_window = NULL;
2307 	dma->dp_cbaddr = NULL;
2308 	dma->dp_inuse = B_FALSE;
2309 	dma->dp_need_to_free_cookie = B_FALSE;
2310 	dma->dp_need_to_free_window = B_FALSE;
2311 	dma->dp_partial_required = B_FALSE;
2312 	dma->dp_trim_required = B_FALSE;
2313 	dma->dp_sglinfo.si_copybuf_req = 0;
2314 #if !defined(__amd64)
2315 	dma->dp_cb_remaping = B_FALSE;
2316 	dma->dp_kva = NULL;
2317 #endif
2318 
2319 	/* FMA related initialization */
2320 	hp->dmai_fault = 0;
2321 	hp->dmai_fault_check = NULL;
2322 	hp->dmai_fault_notify = NULL;
2323 	hp->dmai_error.err_ena = 0;
2324 	hp->dmai_error.err_status = DDI_FM_OK;
2325 	hp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
2326 	hp->dmai_error.err_ontrap = NULL;
2327 	hp->dmai_error.err_fep = NULL;
2328 	hp->dmai_error.err_cf = NULL;
2329 }
2330 
2331 
2332 /*
2333  * rootnex_valid_alloc_parms()
2334  *    Called in ddi_dma_alloc_handle path to validate its parameters.
2335  */
2336 static int
2337 rootnex_valid_alloc_parms(ddi_dma_attr_t *attr, uint_t maxsegmentsize)
2338 {
2339 	if ((attr->dma_attr_seg < MMU_PAGEOFFSET) ||
2340 	    (attr->dma_attr_count_max < MMU_PAGEOFFSET) ||
2341 	    (attr->dma_attr_granular > MMU_PAGESIZE) ||
2342 	    (attr->dma_attr_maxxfer < MMU_PAGESIZE)) {
2343 		return (DDI_DMA_BADATTR);
2344 	}
2345 
2346 	if (attr->dma_attr_addr_hi <= attr->dma_attr_addr_lo) {
2347 		return (DDI_DMA_BADATTR);
2348 	}
2349 
2350 	if ((attr->dma_attr_seg & MMU_PAGEOFFSET) != MMU_PAGEOFFSET ||
2351 	    MMU_PAGESIZE & (attr->dma_attr_granular - 1) ||
2352 	    attr->dma_attr_sgllen <= 0) {
2353 		return (DDI_DMA_BADATTR);
2354 	}
2355 
2356 	/* We should be able to DMA into every byte offset in a page */
2357 	if (maxsegmentsize < MMU_PAGESIZE) {
2358 		return (DDI_DMA_BADATTR);
2359 	}
2360 
2361 	return (DDI_SUCCESS);
2362 }
2363 
2364 
2365 /*
2366  * rootnex_valid_bind_parms()
2367  *    Called in ddi_dma_*_bind_handle path to validate its parameters.
2368  */
2369 /* ARGSUSED */
2370 static int
2371 rootnex_valid_bind_parms(ddi_dma_req_t *dmareq, ddi_dma_attr_t *attr)
2372 {
2373 #if !defined(__amd64)
2374 	/*
2375 	 * we only support up to a 2G-1 transfer size on 32-bit kernels so
2376 	 * we can track the offset for the obsoleted interfaces.
2377 	 */
2378 	if (dmareq->dmar_object.dmao_size > 0x7FFFFFFF) {
2379 		return (DDI_DMA_TOOBIG);
2380 	}
2381 #endif
2382 
2383 	return (DDI_SUCCESS);
2384 }
2385 
2386 
2387 /*
2388  * rootnex_get_sgl()
2389  *    Called in bind fastpath to get the sgl. Most of this will be replaced
2390  *    with a call to the vm layer when vm2.0 comes around...
2391  */
2392 static void
2393 rootnex_get_sgl(ddi_dma_obj_t *dmar_object, ddi_dma_cookie_t *sgl,
2394     rootnex_sglinfo_t *sglinfo)
2395 {
2396 	ddi_dma_atyp_t buftype;
2397 	rootnex_addr_t raddr;
2398 	uint64_t last_page;
2399 	uint64_t offset;
2400 	uint64_t addrhi;
2401 	uint64_t addrlo;
2402 	uint64_t maxseg;
2403 	page_t **pplist;
2404 	uint64_t paddr;
2405 	uint32_t psize;
2406 	uint32_t size;
2407 	caddr_t vaddr;
2408 	uint_t pcnt;
2409 	page_t *pp;
2410 	uint_t cnt;
2411 
2412 
2413 	/* shortcuts */
2414 	pplist = dmar_object->dmao_obj.virt_obj.v_priv;
2415 	vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2416 	maxseg = sglinfo->si_max_cookie_size;
2417 	buftype = dmar_object->dmao_type;
2418 	addrhi = sglinfo->si_max_addr;
2419 	addrlo = sglinfo->si_min_addr;
2420 	size = dmar_object->dmao_size;
2421 
2422 	pcnt = 0;
2423 	cnt = 0;
2424 
2425 	/*
2426 	 * if we were passed down a linked list of pages, i.e. pointer to
2427 	 * page_t, use this to get our physical address and buf offset.
2428 	 */
2429 	if (buftype == DMA_OTYP_PAGES) {
2430 		pp = dmar_object->dmao_obj.pp_obj.pp_pp;
2431 		ASSERT(!PP_ISFREE(pp) && PAGE_LOCKED(pp));
2432 		offset =  dmar_object->dmao_obj.pp_obj.pp_offset &
2433 		    MMU_PAGEOFFSET;
2434 		paddr = pfn_to_pa(pp->p_pagenum) + offset;
2435 		psize = MIN(size, (MMU_PAGESIZE - offset));
2436 		pp = pp->p_next;
2437 		sglinfo->si_asp = NULL;
2438 
2439 	/*
2440 	 * We weren't passed down a linked list of pages, but if we were passed
2441 	 * down an array of pages, use this to get our physical address and buf
2442 	 * offset.
2443 	 */
2444 	} else if (pplist != NULL) {
2445 		ASSERT((buftype == DMA_OTYP_VADDR) ||
2446 		    (buftype == DMA_OTYP_BUFVADDR));
2447 
2448 		offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2449 		sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
2450 		if (sglinfo->si_asp == NULL) {
2451 			sglinfo->si_asp = &kas;
2452 		}
2453 
2454 		ASSERT(!PP_ISFREE(pplist[pcnt]));
2455 		paddr = pfn_to_pa(pplist[pcnt]->p_pagenum);
2456 		paddr += offset;
2457 		psize = MIN(size, (MMU_PAGESIZE - offset));
2458 		pcnt++;
2459 
2460 	/*
2461 	 * All we have is a virtual address, we'll need to call into the VM
2462 	 * to get the physical address.
2463 	 */
2464 	} else {
2465 		ASSERT((buftype == DMA_OTYP_VADDR) ||
2466 		    (buftype == DMA_OTYP_BUFVADDR));
2467 
2468 		offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2469 		sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as;
2470 		if (sglinfo->si_asp == NULL) {
2471 			sglinfo->si_asp = &kas;
2472 		}
2473 
2474 		paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, vaddr));
2475 		paddr += offset;
2476 		psize = MIN(size, (MMU_PAGESIZE - offset));
2477 		vaddr += psize;
2478 	}
2479 
2480 #ifdef __xpv
2481 	/*
2482 	 * If we're dom0, we're using a real device so we need to load
2483 	 * the cookies with MFNs instead of PFNs.
2484 	 */
2485 	raddr = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
2486 #else
2487 	raddr = paddr;
2488 #endif
2489 
2490 	/*
2491 	 * Setup the first cookie with the physical address of the page and the
2492 	 * size of the page (which takes into account the initial offset into
2493 	 * the page.
2494 	 */
2495 	sgl[cnt].dmac_laddress = raddr;
2496 	sgl[cnt].dmac_size = psize;
2497 	sgl[cnt].dmac_type = 0;
2498 
2499 	/*
2500 	 * Save away the buffer offset into the page. We'll need this later in
2501 	 * the copy buffer code to help figure out the page index within the
2502 	 * buffer and the offset into the current page.
2503 	 */
2504 	sglinfo->si_buf_offset = offset;
2505 
2506 	/*
2507 	 * If the DMA engine can't reach the physical address, increase how
2508 	 * much copy buffer we need. We always increase by pagesize so we don't
2509 	 * have to worry about converting offsets. Set a flag in the cookies
2510 	 * dmac_type to indicate that it uses the copy buffer. If this isn't the
2511 	 * last cookie, go to the next cookie (since we separate each page which
2512 	 * uses the copy buffer in case the copy buffer is not physically
2513 	 * contiguous.
2514 	 */
2515 	if ((raddr < addrlo) || ((raddr + psize) > addrhi)) {
2516 		sglinfo->si_copybuf_req += MMU_PAGESIZE;
2517 		sgl[cnt].dmac_type = ROOTNEX_USES_COPYBUF;
2518 		if ((cnt + 1) < sglinfo->si_max_pages) {
2519 			cnt++;
2520 			sgl[cnt].dmac_laddress = 0;
2521 			sgl[cnt].dmac_size = 0;
2522 			sgl[cnt].dmac_type = 0;
2523 		}
2524 	}
2525 
2526 	/*
2527 	 * save this page's physical address so we can figure out if the next
2528 	 * page is physically contiguous. Keep decrementing size until we are
2529 	 * done with the buffer.
2530 	 */
2531 	last_page = raddr & MMU_PAGEMASK;
2532 	size -= psize;
2533 
2534 	while (size > 0) {
2535 		/* Get the size for this page (i.e. partial or full page) */
2536 		psize = MIN(size, MMU_PAGESIZE);
2537 
2538 		if (buftype == DMA_OTYP_PAGES) {
2539 			/* get the paddr from the page_t */
2540 			ASSERT(!PP_ISFREE(pp) && PAGE_LOCKED(pp));
2541 			paddr = pfn_to_pa(pp->p_pagenum);
2542 			pp = pp->p_next;
2543 		} else if (pplist != NULL) {
2544 			/* index into the array of page_t's to get the paddr */
2545 			ASSERT(!PP_ISFREE(pplist[pcnt]));
2546 			paddr = pfn_to_pa(pplist[pcnt]->p_pagenum);
2547 			pcnt++;
2548 		} else {
2549 			/* call into the VM to get the paddr */
2550 			paddr =  pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat,
2551 			    vaddr));
2552 			vaddr += psize;
2553 		}
2554 
2555 #ifdef __xpv
2556 		/*
2557 		 * If we're dom0, we're using a real device so we need to load
2558 		 * the cookies with MFNs instead of PFNs.
2559 		 */
2560 		raddr = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
2561 #else
2562 		raddr = paddr;
2563 #endif
2564 
2565 		/* check to see if this page needs the copy buffer */
2566 		if ((raddr < addrlo) || ((raddr + psize) > addrhi)) {
2567 			sglinfo->si_copybuf_req += MMU_PAGESIZE;
2568 
2569 			/*
2570 			 * if there is something in the current cookie, go to
2571 			 * the next one. We only want one page in a cookie which
2572 			 * uses the copybuf since the copybuf doesn't have to
2573 			 * be physically contiguous.
2574 			 */
2575 			if (sgl[cnt].dmac_size != 0) {
2576 				cnt++;
2577 			}
2578 			sgl[cnt].dmac_laddress = raddr;
2579 			sgl[cnt].dmac_size = psize;
2580 #if defined(__amd64)
2581 			sgl[cnt].dmac_type = ROOTNEX_USES_COPYBUF;
2582 #else
2583 			/*
2584 			 * save the buf offset for 32-bit kernel. used in the
2585 			 * obsoleted interfaces.
2586 			 */
2587 			sgl[cnt].dmac_type = ROOTNEX_USES_COPYBUF |
2588 			    (dmar_object->dmao_size - size);
2589 #endif
2590 			/* if this isn't the last cookie, go to the next one */
2591 			if ((cnt + 1) < sglinfo->si_max_pages) {
2592 				cnt++;
2593 				sgl[cnt].dmac_laddress = 0;
2594 				sgl[cnt].dmac_size = 0;
2595 				sgl[cnt].dmac_type = 0;
2596 			}
2597 
2598 		/*
2599 		 * this page didn't need the copy buffer, if it's not physically
2600 		 * contiguous, or it would put us over a segment boundary, or it
2601 		 * puts us over the max cookie size, or the current sgl doesn't
2602 		 * have anything in it.
2603 		 */
2604 		} else if (((last_page + MMU_PAGESIZE) != raddr) ||
2605 		    !(raddr & sglinfo->si_segmask) ||
2606 		    ((sgl[cnt].dmac_size + psize) > maxseg) ||
2607 		    (sgl[cnt].dmac_size == 0)) {
2608 			/*
2609 			 * if we're not already in a new cookie, go to the next
2610 			 * cookie.
2611 			 */
2612 			if (sgl[cnt].dmac_size != 0) {
2613 				cnt++;
2614 			}
2615 
2616 			/* save the cookie information */
2617 			sgl[cnt].dmac_laddress = raddr;
2618 			sgl[cnt].dmac_size = psize;
2619 #if defined(__amd64)
2620 			sgl[cnt].dmac_type = 0;
2621 #else
2622 			/*
2623 			 * save the buf offset for 32-bit kernel. used in the
2624 			 * obsoleted interfaces.
2625 			 */
2626 			sgl[cnt].dmac_type = dmar_object->dmao_size - size;
2627 #endif
2628 
2629 		/*
2630 		 * this page didn't need the copy buffer, it is physically
2631 		 * contiguous with the last page, and it's <= the max cookie
2632 		 * size.
2633 		 */
2634 		} else {
2635 			sgl[cnt].dmac_size += psize;
2636 
2637 			/*
2638 			 * if this exactly ==  the maximum cookie size, and
2639 			 * it isn't the last cookie, go to the next cookie.
2640 			 */
2641 			if (((sgl[cnt].dmac_size + psize) == maxseg) &&
2642 			    ((cnt + 1) < sglinfo->si_max_pages)) {
2643 				cnt++;
2644 				sgl[cnt].dmac_laddress = 0;
2645 				sgl[cnt].dmac_size = 0;
2646 				sgl[cnt].dmac_type = 0;
2647 			}
2648 		}
2649 
2650 		/*
2651 		 * save this page's physical address so we can figure out if the
2652 		 * next page is physically contiguous. Keep decrementing size
2653 		 * until we are done with the buffer.
2654 		 */
2655 		last_page = raddr;
2656 		size -= psize;
2657 	}
2658 
2659 	/* we're done, save away how many cookies the sgl has */
2660 	if (sgl[cnt].dmac_size == 0) {
2661 		ASSERT(cnt < sglinfo->si_max_pages);
2662 		sglinfo->si_sgl_size = cnt;
2663 	} else {
2664 		sglinfo->si_sgl_size = cnt + 1;
2665 	}
2666 }
2667 
2668 
2669 /*
2670  * rootnex_bind_slowpath()
2671  *    Call in the bind path if the calling driver can't use the sgl without
2672  *    modifying it. We either need to use the copy buffer and/or we will end up
2673  *    with a partial bind.
2674  */
2675 static int
2676 rootnex_bind_slowpath(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
2677     rootnex_dma_t *dma, ddi_dma_attr_t *attr, int kmflag)
2678 {
2679 	rootnex_sglinfo_t *sinfo;
2680 	rootnex_window_t *window;
2681 	ddi_dma_cookie_t *cookie;
2682 	size_t copybuf_used;
2683 	size_t dmac_size;
2684 	boolean_t partial;
2685 	off_t cur_offset;
2686 	page_t *cur_pp;
2687 	major_t mnum;
2688 	int e;
2689 	int i;
2690 
2691 
2692 	sinfo = &dma->dp_sglinfo;
2693 	copybuf_used = 0;
2694 	partial = B_FALSE;
2695 
2696 	/*
2697 	 * If we're using the copybuf, set the copybuf state in dma struct.
2698 	 * Needs to be first since it sets the copy buffer size.
2699 	 */
2700 	if (sinfo->si_copybuf_req != 0) {
2701 		e = rootnex_setup_copybuf(hp, dmareq, dma, attr);
2702 		if (e != DDI_SUCCESS) {
2703 			return (e);
2704 		}
2705 	} else {
2706 		dma->dp_copybuf_size = 0;
2707 	}
2708 
2709 	/*
2710 	 * Figure out if we need to do a partial mapping. If so, figure out
2711 	 * if we need to trim the buffers when we munge the sgl.
2712 	 */
2713 	if ((dma->dp_copybuf_size < sinfo->si_copybuf_req) ||
2714 	    (dma->dp_dma.dmao_size > dma->dp_maxxfer) ||
2715 	    (attr->dma_attr_sgllen < sinfo->si_sgl_size)) {
2716 		dma->dp_partial_required = B_TRUE;
2717 		if (attr->dma_attr_granular != 1) {
2718 			dma->dp_trim_required = B_TRUE;
2719 		}
2720 	} else {
2721 		dma->dp_partial_required = B_FALSE;
2722 		dma->dp_trim_required = B_FALSE;
2723 	}
2724 
2725 	/* If we need to do a partial bind, make sure the driver supports it */
2726 	if (dma->dp_partial_required &&
2727 	    !(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
2728 
2729 		mnum = ddi_driver_major(dma->dp_dip);
2730 		/*
2731 		 * patchable which allows us to print one warning per major
2732 		 * number.
2733 		 */
2734 		if ((rootnex_bind_warn) &&
2735 		    ((rootnex_warn_list[mnum] & ROOTNEX_BIND_WARNING) == 0)) {
2736 			rootnex_warn_list[mnum] |= ROOTNEX_BIND_WARNING;
2737 			cmn_err(CE_WARN, "!%s: coding error detected, the "
2738 			    "driver is using ddi_dma_attr(9S) incorrectly. "
2739 			    "There is a small risk of data corruption in "
2740 			    "particular with large I/Os. The driver should be "
2741 			    "replaced with a corrected version for proper "
2742 			    "system operation. To disable this warning, add "
2743 			    "'set rootnex:rootnex_bind_warn=0' to "
2744 			    "/etc/system(4).", ddi_driver_name(dma->dp_dip));
2745 		}
2746 		return (DDI_DMA_TOOBIG);
2747 	}
2748 
2749 	/*
2750 	 * we might need multiple windows, setup state to handle them. In this
2751 	 * code path, we will have at least one window.
2752 	 */
2753 	e = rootnex_setup_windows(hp, dma, attr, kmflag);
2754 	if (e != DDI_SUCCESS) {
2755 		rootnex_teardown_copybuf(dma);
2756 		return (e);
2757 	}
2758 
2759 	window = &dma->dp_window[0];
2760 	cookie = &dma->dp_cookies[0];
2761 	cur_offset = 0;
2762 	rootnex_init_win(hp, dma, window, cookie, cur_offset);
2763 	if (dmareq->dmar_object.dmao_type == DMA_OTYP_PAGES) {
2764 		cur_pp = dmareq->dmar_object.dmao_obj.pp_obj.pp_pp;
2765 	}
2766 
2767 	/* loop though all the cookies we got back from get_sgl() */
2768 	for (i = 0; i < sinfo->si_sgl_size; i++) {
2769 		/*
2770 		 * If we're using the copy buffer, check this cookie and setup
2771 		 * its associated copy buffer state. If this cookie uses the
2772 		 * copy buffer, make sure we sync this window during dma_sync.
2773 		 */
2774 		if (dma->dp_copybuf_size > 0) {
2775 			rootnex_setup_cookie(&dmareq->dmar_object, dma, cookie,
2776 			    cur_offset, &copybuf_used, &cur_pp);
2777 			if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
2778 				window->wd_dosync = B_TRUE;
2779 			}
2780 		}
2781 
2782 		/*
2783 		 * save away the cookie size, since it could be modified in
2784 		 * the windowing code.
2785 		 */
2786 		dmac_size = cookie->dmac_size;
2787 
2788 		/* if we went over max copybuf size */
2789 		if (dma->dp_copybuf_size &&
2790 		    (copybuf_used > dma->dp_copybuf_size)) {
2791 			partial = B_TRUE;
2792 			e = rootnex_copybuf_window_boundary(hp, dma, &window,
2793 			    cookie, cur_offset, &copybuf_used);
2794 			if (e != DDI_SUCCESS) {
2795 				rootnex_teardown_copybuf(dma);
2796 				rootnex_teardown_windows(dma);
2797 				return (e);
2798 			}
2799 
2800 			/*
2801 			 * if the coookie uses the copy buffer, make sure the
2802 			 * new window we just moved to is set to sync.
2803 			 */
2804 			if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
2805 				window->wd_dosync = B_TRUE;
2806 			}
2807 			DTRACE_PROBE1(rootnex__copybuf__window, dev_info_t *,
2808 			    dma->dp_dip);
2809 
2810 		/* if the cookie cnt == max sgllen, move to the next window */
2811 		} else if (window->wd_cookie_cnt >= attr->dma_attr_sgllen) {
2812 			partial = B_TRUE;
2813 			ASSERT(window->wd_cookie_cnt == attr->dma_attr_sgllen);
2814 			e = rootnex_sgllen_window_boundary(hp, dma, &window,
2815 			    cookie, attr, cur_offset);
2816 			if (e != DDI_SUCCESS) {
2817 				rootnex_teardown_copybuf(dma);
2818 				rootnex_teardown_windows(dma);
2819 				return (e);
2820 			}
2821 
2822 			/*
2823 			 * if the coookie uses the copy buffer, make sure the
2824 			 * new window we just moved to is set to sync.
2825 			 */
2826 			if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
2827 				window->wd_dosync = B_TRUE;
2828 			}
2829 			DTRACE_PROBE1(rootnex__sgllen__window, dev_info_t *,
2830 			    dma->dp_dip);
2831 
2832 		/* else if we will be over maxxfer */
2833 		} else if ((window->wd_size + dmac_size) >
2834 		    dma->dp_maxxfer) {
2835 			partial = B_TRUE;
2836 			e = rootnex_maxxfer_window_boundary(hp, dma, &window,
2837 			    cookie);
2838 			if (e != DDI_SUCCESS) {
2839 				rootnex_teardown_copybuf(dma);
2840 				rootnex_teardown_windows(dma);
2841 				return (e);
2842 			}
2843 
2844 			/*
2845 			 * if the coookie uses the copy buffer, make sure the
2846 			 * new window we just moved to is set to sync.
2847 			 */
2848 			if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
2849 				window->wd_dosync = B_TRUE;
2850 			}
2851 			DTRACE_PROBE1(rootnex__maxxfer__window, dev_info_t *,
2852 			    dma->dp_dip);
2853 
2854 		/* else this cookie fits in the current window */
2855 		} else {
2856 			window->wd_cookie_cnt++;
2857 			window->wd_size += dmac_size;
2858 		}
2859 
2860 		/* track our offset into the buffer, go to the next cookie */
2861 		ASSERT(dmac_size <= dma->dp_dma.dmao_size);
2862 		ASSERT(cookie->dmac_size <= dmac_size);
2863 		cur_offset += dmac_size;
2864 		cookie++;
2865 	}
2866 
2867 	/* if we ended up with a zero sized window in the end, clean it up */
2868 	if (window->wd_size == 0) {
2869 		hp->dmai_nwin--;
2870 		window--;
2871 	}
2872 
2873 	ASSERT(window->wd_trim.tr_trim_last == B_FALSE);
2874 
2875 	if (!partial) {
2876 		return (DDI_DMA_MAPPED);
2877 	}
2878 
2879 	ASSERT(dma->dp_partial_required);
2880 	return (DDI_DMA_PARTIAL_MAP);
2881 }
2882 
2883 
2884 /*
2885  * rootnex_setup_copybuf()
2886  *    Called in bind slowpath. Figures out if we're going to use the copy
2887  *    buffer, and if we do, sets up the basic state to handle it.
2888  */
2889 static int
2890 rootnex_setup_copybuf(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
2891     rootnex_dma_t *dma, ddi_dma_attr_t *attr)
2892 {
2893 	rootnex_sglinfo_t *sinfo;
2894 	ddi_dma_attr_t lattr;
2895 	size_t max_copybuf;
2896 	int cansleep;
2897 	int e;
2898 #if !defined(__amd64)
2899 	int vmflag;
2900 #endif
2901 
2902 
2903 	sinfo = &dma->dp_sglinfo;
2904 
2905 	/* read this first so it's consistent through the routine  */
2906 	max_copybuf = i_ddi_copybuf_size() & MMU_PAGEMASK;
2907 
2908 	/* We need to call into the rootnex on ddi_dma_sync() */
2909 	hp->dmai_rflags &= ~DMP_NOSYNC;
2910 
2911 	/* make sure the copybuf size <= the max size */
2912 	dma->dp_copybuf_size = MIN(sinfo->si_copybuf_req, max_copybuf);
2913 	ASSERT((dma->dp_copybuf_size & MMU_PAGEOFFSET) == 0);
2914 
2915 #if !defined(__amd64)
2916 	/*
2917 	 * if we don't have kva space to copy to/from, allocate the KVA space
2918 	 * now. We only do this for the 32-bit kernel. We use seg kpm space for
2919 	 * the 64-bit kernel.
2920 	 */
2921 	if ((dmareq->dmar_object.dmao_type == DMA_OTYP_PAGES) ||
2922 	    (dmareq->dmar_object.dmao_obj.virt_obj.v_as != NULL)) {
2923 
2924 		/* convert the sleep flags */
2925 		if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
2926 			vmflag = VM_SLEEP;
2927 		} else {
2928 			vmflag = VM_NOSLEEP;
2929 		}
2930 
2931 		/* allocate Kernel VA space that we can bcopy to/from */
2932 		dma->dp_kva = vmem_alloc(heap_arena, dma->dp_copybuf_size,
2933 		    vmflag);
2934 		if (dma->dp_kva == NULL) {
2935 			return (DDI_DMA_NORESOURCES);
2936 		}
2937 	}
2938 #endif
2939 
2940 	/* convert the sleep flags */
2941 	if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
2942 		cansleep = 1;
2943 	} else {
2944 		cansleep = 0;
2945 	}
2946 
2947 	/*
2948 	 * Allocate the actual copy buffer. This needs to fit within the DMA
2949 	 * engine limits, so we can't use kmem_alloc... We don't need
2950 	 * contiguous memory (sgllen) since we will be forcing windows on
2951 	 * sgllen anyway.
2952 	 */
2953 	lattr = *attr;
2954 	lattr.dma_attr_align = MMU_PAGESIZE;
2955 	/*
2956 	 * this should be < 0 to indicate no limit, but due to a bug in
2957 	 * the rootnex, we'll set it to the maximum positive int.
2958 	 */
2959 	lattr.dma_attr_sgllen = 0x7fffffff;
2960 	e = i_ddi_mem_alloc(dma->dp_dip, &lattr, dma->dp_copybuf_size, cansleep,
2961 	    0, NULL, &dma->dp_cbaddr, &dma->dp_cbsize, NULL);
2962 	if (e != DDI_SUCCESS) {
2963 #if !defined(__amd64)
2964 		if (dma->dp_kva != NULL) {
2965 			vmem_free(heap_arena, dma->dp_kva,
2966 			    dma->dp_copybuf_size);
2967 		}
2968 #endif
2969 		return (DDI_DMA_NORESOURCES);
2970 	}
2971 
2972 	DTRACE_PROBE2(rootnex__alloc__copybuf, dev_info_t *, dma->dp_dip,
2973 	    size_t, dma->dp_copybuf_size);
2974 
2975 	return (DDI_SUCCESS);
2976 }
2977 
2978 
2979 /*
2980  * rootnex_setup_windows()
2981  *    Called in bind slowpath to setup the window state. We always have windows
2982  *    in the slowpath. Even if the window count = 1.
2983  */
2984 static int
2985 rootnex_setup_windows(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
2986     ddi_dma_attr_t *attr, int kmflag)
2987 {
2988 	rootnex_window_t *windowp;
2989 	rootnex_sglinfo_t *sinfo;
2990 	size_t copy_state_size;
2991 	size_t win_state_size;
2992 	size_t state_available;
2993 	size_t space_needed;
2994 	uint_t copybuf_win;
2995 	uint_t maxxfer_win;
2996 	size_t space_used;
2997 	uint_t sglwin;
2998 
2999 
3000 	sinfo = &dma->dp_sglinfo;
3001 
3002 	dma->dp_current_win = 0;
3003 	hp->dmai_nwin = 0;
3004 
3005 	/* If we don't need to do a partial, we only have one window */
3006 	if (!dma->dp_partial_required) {
3007 		dma->dp_max_win = 1;
3008 
3009 	/*
3010 	 * we need multiple windows, need to figure out the worse case number
3011 	 * of windows.
3012 	 */
3013 	} else {
3014 		/*
3015 		 * if we need windows because we need more copy buffer that
3016 		 * we allow, the worse case number of windows we could need
3017 		 * here would be (copybuf space required / copybuf space that
3018 		 * we have) plus one for remainder, and plus 2 to handle the
3019 		 * extra pages on the trim for the first and last pages of the
3020 		 * buffer (a page is the minimum window size so under the right
3021 		 * attr settings, you could have a window for each page).
3022 		 * The last page will only be hit here if the size is not a
3023 		 * multiple of the granularity (which theoretically shouldn't
3024 		 * be the case but never has been enforced, so we could have
3025 		 * broken things without it).
3026 		 */
3027 		if (sinfo->si_copybuf_req > dma->dp_copybuf_size) {
3028 			ASSERT(dma->dp_copybuf_size > 0);
3029 			copybuf_win = (sinfo->si_copybuf_req /
3030 			    dma->dp_copybuf_size) + 1 + 2;
3031 		} else {
3032 			copybuf_win = 0;
3033 		}
3034 
3035 		/*
3036 		 * if we need windows because we have more cookies than the H/W
3037 		 * can handle, the number of windows we would need here would
3038 		 * be (cookie count / cookies count H/W supports) plus one for
3039 		 * remainder, and plus 2 to handle the extra pages on the trim
3040 		 * (see above comment about trim)
3041 		 */
3042 		if (attr->dma_attr_sgllen < sinfo->si_sgl_size) {
3043 			sglwin = ((sinfo->si_sgl_size / attr->dma_attr_sgllen)
3044 			    + 1) + 2;
3045 		} else {
3046 			sglwin = 0;
3047 		}
3048 
3049 		/*
3050 		 * if we need windows because we're binding more memory than the
3051 		 * H/W can transfer at once, the number of windows we would need
3052 		 * here would be (xfer count / max xfer H/W supports) plus one
3053 		 * for remainder, and plus 2 to handle the extra pages on the
3054 		 * trim (see above comment about trim)
3055 		 */
3056 		if (dma->dp_dma.dmao_size > dma->dp_maxxfer) {
3057 			maxxfer_win = (dma->dp_dma.dmao_size /
3058 			    dma->dp_maxxfer) + 1 + 2;
3059 		} else {
3060 			maxxfer_win = 0;
3061 		}
3062 		dma->dp_max_win =  copybuf_win + sglwin + maxxfer_win;
3063 		ASSERT(dma->dp_max_win > 0);
3064 	}
3065 	win_state_size = dma->dp_max_win * sizeof (rootnex_window_t);
3066 
3067 	/*
3068 	 * Get space for window and potential copy buffer state. Before we
3069 	 * go and allocate memory, see if we can get away with using what's
3070 	 * left in the pre-allocted state or the dynamically allocated sgl.
3071 	 */
3072 	space_used = (uintptr_t)(sinfo->si_sgl_size *
3073 	    sizeof (ddi_dma_cookie_t));
3074 
3075 	/* if we dynamically allocated space for the cookies */
3076 	if (dma->dp_need_to_free_cookie) {
3077 		/* if we have more space in the pre-allocted buffer, use it */
3078 		ASSERT(space_used <= dma->dp_cookie_size);
3079 		if ((dma->dp_cookie_size - space_used) <=
3080 		    rootnex_state->r_prealloc_size) {
3081 			state_available = rootnex_state->r_prealloc_size;
3082 			windowp = (rootnex_window_t *)dma->dp_prealloc_buffer;
3083 
3084 		/*
3085 		 * else, we have more free space in the dynamically allocated
3086 		 * buffer, i.e. the buffer wasn't worse case fragmented so we
3087 		 * didn't need a lot of cookies.
3088 		 */
3089 		} else {
3090 			state_available = dma->dp_cookie_size - space_used;
3091 			windowp = (rootnex_window_t *)
3092 			    &dma->dp_cookies[sinfo->si_sgl_size];
3093 		}
3094 
3095 	/* we used the pre-alloced buffer */
3096 	} else {
3097 		ASSERT(space_used <= rootnex_state->r_prealloc_size);
3098 		state_available = rootnex_state->r_prealloc_size - space_used;
3099 		windowp = (rootnex_window_t *)
3100 		    &dma->dp_cookies[sinfo->si_sgl_size];
3101 	}
3102 
3103 	/*
3104 	 * figure out how much state we need to track the copy buffer. Add an
3105 	 * addition 8 bytes for pointer alignemnt later.
3106 	 */
3107 	if (dma->dp_copybuf_size > 0) {
3108 		copy_state_size = sinfo->si_max_pages *
3109 		    sizeof (rootnex_pgmap_t);
3110 	} else {
3111 		copy_state_size = 0;
3112 	}
3113 	/* add an additional 8 bytes for pointer alignment */
3114 	space_needed = win_state_size + copy_state_size + 0x8;
3115 
3116 	/* if we have enough space already, use it */
3117 	if (state_available >= space_needed) {
3118 		dma->dp_window = windowp;
3119 		dma->dp_need_to_free_window = B_FALSE;
3120 
3121 	/* not enough space, need to allocate more. */
3122 	} else {
3123 		dma->dp_window = kmem_alloc(space_needed, kmflag);
3124 		if (dma->dp_window == NULL) {
3125 			return (DDI_DMA_NORESOURCES);
3126 		}
3127 		dma->dp_need_to_free_window = B_TRUE;
3128 		dma->dp_window_size = space_needed;
3129 		DTRACE_PROBE2(rootnex__bind__sp__alloc, dev_info_t *,
3130 		    dma->dp_dip, size_t, space_needed);
3131 	}
3132 
3133 	/*
3134 	 * we allocate copy buffer state and window state at the same time.
3135 	 * setup our copy buffer state pointers. Make sure it's aligned.
3136 	 */
3137 	if (dma->dp_copybuf_size > 0) {
3138 		dma->dp_pgmap = (rootnex_pgmap_t *)(((uintptr_t)
3139 		    &dma->dp_window[dma->dp_max_win] + 0x7) & ~0x7);
3140 
3141 #if !defined(__amd64)
3142 		/*
3143 		 * make sure all pm_mapped, pm_vaddr, and pm_pp are set to
3144 		 * false/NULL. Should be quicker to bzero vs loop and set.
3145 		 */
3146 		bzero(dma->dp_pgmap, copy_state_size);
3147 #endif
3148 	} else {
3149 		dma->dp_pgmap = NULL;
3150 	}
3151 
3152 	return (DDI_SUCCESS);
3153 }
3154 
3155 
3156 /*
3157  * rootnex_teardown_copybuf()
3158  *    cleans up after rootnex_setup_copybuf()
3159  */
3160 static void
3161 rootnex_teardown_copybuf(rootnex_dma_t *dma)
3162 {
3163 #if !defined(__amd64)
3164 	int i;
3165 
3166 	/*
3167 	 * if we allocated kernel heap VMEM space, go through all the pages and
3168 	 * map out any of the ones that we're mapped into the kernel heap VMEM
3169 	 * arena. Then free the VMEM space.
3170 	 */
3171 	if (dma->dp_kva != NULL) {
3172 		for (i = 0; i < dma->dp_sglinfo.si_max_pages; i++) {
3173 			if (dma->dp_pgmap[i].pm_mapped) {
3174 				hat_unload(kas.a_hat, dma->dp_pgmap[i].pm_kaddr,
3175 				    MMU_PAGESIZE, HAT_UNLOAD);
3176 				dma->dp_pgmap[i].pm_mapped = B_FALSE;
3177 			}
3178 		}
3179 
3180 		vmem_free(heap_arena, dma->dp_kva, dma->dp_copybuf_size);
3181 	}
3182 
3183 #endif
3184 
3185 	/* if we allocated a copy buffer, free it */
3186 	if (dma->dp_cbaddr != NULL) {
3187 		i_ddi_mem_free(dma->dp_cbaddr, NULL);
3188 	}
3189 }
3190 
3191 
3192 /*
3193  * rootnex_teardown_windows()
3194  *    cleans up after rootnex_setup_windows()
3195  */
3196 static void
3197 rootnex_teardown_windows(rootnex_dma_t *dma)
3198 {
3199 	/*
3200 	 * if we had to allocate window state on the last bind (because we
3201 	 * didn't have enough pre-allocated space in the handle), free it.
3202 	 */
3203 	if (dma->dp_need_to_free_window) {
3204 		kmem_free(dma->dp_window, dma->dp_window_size);
3205 	}
3206 }
3207 
3208 
3209 /*
3210  * rootnex_init_win()
3211  *    Called in bind slow path during creation of a new window. Initializes
3212  *    window state to default values.
3213  */
3214 /*ARGSUSED*/
3215 static void
3216 rootnex_init_win(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
3217     rootnex_window_t *window, ddi_dma_cookie_t *cookie, off_t cur_offset)
3218 {
3219 	hp->dmai_nwin++;
3220 	window->wd_dosync = B_FALSE;
3221 	window->wd_offset = cur_offset;
3222 	window->wd_size = 0;
3223 	window->wd_first_cookie = cookie;
3224 	window->wd_cookie_cnt = 0;
3225 	window->wd_trim.tr_trim_first = B_FALSE;
3226 	window->wd_trim.tr_trim_last = B_FALSE;
3227 	window->wd_trim.tr_first_copybuf_win = B_FALSE;
3228 	window->wd_trim.tr_last_copybuf_win = B_FALSE;
3229 #if !defined(__amd64)
3230 	window->wd_remap_copybuf = dma->dp_cb_remaping;
3231 #endif
3232 }
3233 
3234 
3235 /*
3236  * rootnex_setup_cookie()
3237  *    Called in the bind slow path when the sgl uses the copy buffer. If any of
3238  *    the sgl uses the copy buffer, we need to go through each cookie, figure
3239  *    out if it uses the copy buffer, and if it does, save away everything we'll
3240  *    need during sync.
3241  */
3242 static void
3243 rootnex_setup_cookie(ddi_dma_obj_t *dmar_object, rootnex_dma_t *dma,
3244     ddi_dma_cookie_t *cookie, off_t cur_offset, size_t *copybuf_used,
3245     page_t **cur_pp)
3246 {
3247 	boolean_t copybuf_sz_power_2;
3248 	rootnex_sglinfo_t *sinfo;
3249 	paddr_t paddr;
3250 	uint_t pidx;
3251 	uint_t pcnt;
3252 	off_t poff;
3253 #if defined(__amd64)
3254 	pfn_t pfn;
3255 #else
3256 	page_t **pplist;
3257 #endif
3258 
3259 	sinfo = &dma->dp_sglinfo;
3260 
3261 	/*
3262 	 * Calculate the page index relative to the start of the buffer. The
3263 	 * index to the current page for our buffer is the offset into the
3264 	 * first page of the buffer plus our current offset into the buffer
3265 	 * itself, shifted of course...
3266 	 */
3267 	pidx = (sinfo->si_buf_offset + cur_offset) >> MMU_PAGESHIFT;
3268 	ASSERT(pidx < sinfo->si_max_pages);
3269 
3270 	/* if this cookie uses the copy buffer */
3271 	if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3272 		/*
3273 		 * NOTE: we know that since this cookie uses the copy buffer, it
3274 		 * is <= MMU_PAGESIZE.
3275 		 */
3276 
3277 		/*
3278 		 * get the offset into the page. For the 64-bit kernel, get the
3279 		 * pfn which we'll use with seg kpm.
3280 		 */
3281 		poff = cookie->dmac_laddress & MMU_PAGEOFFSET;
3282 #if defined(__amd64)
3283 		/* mfn_to_pfn() is a NOP on i86pc */
3284 		pfn = mfn_to_pfn(cookie->dmac_laddress >> MMU_PAGESHIFT);
3285 #endif /* __amd64 */
3286 
3287 		/* figure out if the copybuf size is a power of 2 */
3288 		if (dma->dp_copybuf_size & (dma->dp_copybuf_size - 1)) {
3289 			copybuf_sz_power_2 = B_FALSE;
3290 		} else {
3291 			copybuf_sz_power_2 = B_TRUE;
3292 		}
3293 
3294 		/* This page uses the copy buffer */
3295 		dma->dp_pgmap[pidx].pm_uses_copybuf = B_TRUE;
3296 
3297 		/*
3298 		 * save the copy buffer KVA that we'll use with this page.
3299 		 * if we still fit within the copybuf, it's a simple add.
3300 		 * otherwise, we need to wrap over using & or % accordingly.
3301 		 */
3302 		if ((*copybuf_used + MMU_PAGESIZE) <= dma->dp_copybuf_size) {
3303 			dma->dp_pgmap[pidx].pm_cbaddr = dma->dp_cbaddr +
3304 			    *copybuf_used;
3305 		} else {
3306 			if (copybuf_sz_power_2) {
3307 				dma->dp_pgmap[pidx].pm_cbaddr = (caddr_t)(
3308 				    (uintptr_t)dma->dp_cbaddr +
3309 				    (*copybuf_used &
3310 				    (dma->dp_copybuf_size - 1)));
3311 			} else {
3312 				dma->dp_pgmap[pidx].pm_cbaddr = (caddr_t)(
3313 				    (uintptr_t)dma->dp_cbaddr +
3314 				    (*copybuf_used % dma->dp_copybuf_size));
3315 			}
3316 		}
3317 
3318 		/*
3319 		 * over write the cookie physical address with the address of
3320 		 * the physical address of the copy buffer page that we will
3321 		 * use.
3322 		 */
3323 		paddr = pfn_to_pa(hat_getpfnum(kas.a_hat,
3324 		    dma->dp_pgmap[pidx].pm_cbaddr)) + poff;
3325 
3326 #ifdef __xpv
3327 		/*
3328 		 * If we're dom0, we're using a real device so we need to load
3329 		 * the cookies with MAs instead of PAs.
3330 		 */
3331 		cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
3332 #else
3333 		cookie->dmac_laddress = paddr;
3334 #endif
3335 
3336 		/* if we have a kernel VA, it's easy, just save that address */
3337 		if ((dmar_object->dmao_type != DMA_OTYP_PAGES) &&
3338 		    (sinfo->si_asp == &kas)) {
3339 			/*
3340 			 * save away the page aligned virtual address of the
3341 			 * driver buffer. Offsets are handled in the sync code.
3342 			 */
3343 			dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)(((uintptr_t)
3344 			    dmar_object->dmao_obj.virt_obj.v_addr + cur_offset)
3345 			    & MMU_PAGEMASK);
3346 #if !defined(__amd64)
3347 			/*
3348 			 * we didn't need to, and will never need to map this
3349 			 * page.
3350 			 */
3351 			dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
3352 #endif
3353 
3354 		/* we don't have a kernel VA. We need one for the bcopy. */
3355 		} else {
3356 #if defined(__amd64)
3357 			/*
3358 			 * for the 64-bit kernel, it's easy. We use seg kpm to
3359 			 * get a Kernel VA for the corresponding pfn.
3360 			 */
3361 			dma->dp_pgmap[pidx].pm_kaddr = hat_kpm_pfn2va(pfn);
3362 #else
3363 			/*
3364 			 * for the 32-bit kernel, this is a pain. First we'll
3365 			 * save away the page_t or user VA for this page. This
3366 			 * is needed in rootnex_dma_win() when we switch to a
3367 			 * new window which requires us to re-map the copy
3368 			 * buffer.
3369 			 */
3370 			pplist = dmar_object->dmao_obj.virt_obj.v_priv;
3371 			if (dmar_object->dmao_type == DMA_OTYP_PAGES) {
3372 				dma->dp_pgmap[pidx].pm_pp = *cur_pp;
3373 				dma->dp_pgmap[pidx].pm_vaddr = NULL;
3374 			} else if (pplist != NULL) {
3375 				dma->dp_pgmap[pidx].pm_pp = pplist[pidx];
3376 				dma->dp_pgmap[pidx].pm_vaddr = NULL;
3377 			} else {
3378 				dma->dp_pgmap[pidx].pm_pp = NULL;
3379 				dma->dp_pgmap[pidx].pm_vaddr = (caddr_t)
3380 				    (((uintptr_t)
3381 				    dmar_object->dmao_obj.virt_obj.v_addr +
3382 				    cur_offset) & MMU_PAGEMASK);
3383 			}
3384 
3385 			/*
3386 			 * save away the page aligned virtual address which was
3387 			 * allocated from the kernel heap arena (taking into
3388 			 * account if we need more copy buffer than we alloced
3389 			 * and use multiple windows to handle this, i.e. &,%).
3390 			 * NOTE: there isn't and physical memory backing up this
3391 			 * virtual address space currently.
3392 			 */
3393 			if ((*copybuf_used + MMU_PAGESIZE) <=
3394 			    dma->dp_copybuf_size) {
3395 				dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)
3396 				    (((uintptr_t)dma->dp_kva + *copybuf_used) &
3397 				    MMU_PAGEMASK);
3398 			} else {
3399 				if (copybuf_sz_power_2) {
3400 					dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)
3401 					    (((uintptr_t)dma->dp_kva +
3402 					    (*copybuf_used &
3403 					    (dma->dp_copybuf_size - 1))) &
3404 					    MMU_PAGEMASK);
3405 				} else {
3406 					dma->dp_pgmap[pidx].pm_kaddr = (caddr_t)
3407 					    (((uintptr_t)dma->dp_kva +
3408 					    (*copybuf_used %
3409 					    dma->dp_copybuf_size)) &
3410 					    MMU_PAGEMASK);
3411 				}
3412 			}
3413 
3414 			/*
3415 			 * if we haven't used up the available copy buffer yet,
3416 			 * map the kva to the physical page.
3417 			 */
3418 			if (!dma->dp_cb_remaping && ((*copybuf_used +
3419 			    MMU_PAGESIZE) <= dma->dp_copybuf_size)) {
3420 				dma->dp_pgmap[pidx].pm_mapped = B_TRUE;
3421 				if (dma->dp_pgmap[pidx].pm_pp != NULL) {
3422 					i86_pp_map(dma->dp_pgmap[pidx].pm_pp,
3423 					    dma->dp_pgmap[pidx].pm_kaddr);
3424 				} else {
3425 					i86_va_map(dma->dp_pgmap[pidx].pm_vaddr,
3426 					    sinfo->si_asp,
3427 					    dma->dp_pgmap[pidx].pm_kaddr);
3428 				}
3429 
3430 			/*
3431 			 * we've used up the available copy buffer, this page
3432 			 * will have to be mapped during rootnex_dma_win() when
3433 			 * we switch to a new window which requires a re-map
3434 			 * the copy buffer. (32-bit kernel only)
3435 			 */
3436 			} else {
3437 				dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
3438 			}
3439 #endif
3440 			/* go to the next page_t */
3441 			if (dmar_object->dmao_type == DMA_OTYP_PAGES) {
3442 				*cur_pp = (*cur_pp)->p_next;
3443 			}
3444 		}
3445 
3446 		/* add to the copy buffer count */
3447 		*copybuf_used += MMU_PAGESIZE;
3448 
3449 	/*
3450 	 * This cookie doesn't use the copy buffer. Walk through the pages this
3451 	 * cookie occupies to reflect this.
3452 	 */
3453 	} else {
3454 		/*
3455 		 * figure out how many pages the cookie occupies. We need to
3456 		 * use the original page offset of the buffer and the cookies
3457 		 * offset in the buffer to do this.
3458 		 */
3459 		poff = (sinfo->si_buf_offset + cur_offset) & MMU_PAGEOFFSET;
3460 		pcnt = mmu_btopr(cookie->dmac_size + poff);
3461 
3462 		while (pcnt > 0) {
3463 #if !defined(__amd64)
3464 			/*
3465 			 * the 32-bit kernel doesn't have seg kpm, so we need
3466 			 * to map in the driver buffer (if it didn't come down
3467 			 * with a kernel VA) on the fly. Since this page doesn't
3468 			 * use the copy buffer, it's not, or will it ever, have
3469 			 * to be mapped in.
3470 			 */
3471 			dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
3472 #endif
3473 			dma->dp_pgmap[pidx].pm_uses_copybuf = B_FALSE;
3474 
3475 			/*
3476 			 * we need to update pidx and cur_pp or we'll loose
3477 			 * track of where we are.
3478 			 */
3479 			if (dmar_object->dmao_type == DMA_OTYP_PAGES) {
3480 				*cur_pp = (*cur_pp)->p_next;
3481 			}
3482 			pidx++;
3483 			pcnt--;
3484 		}
3485 	}
3486 }
3487 
3488 
3489 /*
3490  * rootnex_sgllen_window_boundary()
3491  *    Called in the bind slow path when the next cookie causes us to exceed (in
3492  *    this case == since we start at 0 and sgllen starts at 1) the maximum sgl
3493  *    length supported by the DMA H/W.
3494  */
3495 static int
3496 rootnex_sgllen_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
3497     rootnex_window_t **windowp, ddi_dma_cookie_t *cookie, ddi_dma_attr_t *attr,
3498     off_t cur_offset)
3499 {
3500 	off_t new_offset;
3501 	size_t trim_sz;
3502 	off_t coffset;
3503 
3504 
3505 	/*
3506 	 * if we know we'll never have to trim, it's pretty easy. Just move to
3507 	 * the next window and init it. We're done.
3508 	 */
3509 	if (!dma->dp_trim_required) {
3510 		(*windowp)++;
3511 		rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
3512 		(*windowp)->wd_cookie_cnt++;
3513 		(*windowp)->wd_size = cookie->dmac_size;
3514 		return (DDI_SUCCESS);
3515 	}
3516 
3517 	/* figure out how much we need to trim from the window */
3518 	ASSERT(attr->dma_attr_granular != 0);
3519 	if (dma->dp_granularity_power_2) {
3520 		trim_sz = (*windowp)->wd_size & (attr->dma_attr_granular - 1);
3521 	} else {
3522 		trim_sz = (*windowp)->wd_size % attr->dma_attr_granular;
3523 	}
3524 
3525 	/* The window's a whole multiple of granularity. We're done */
3526 	if (trim_sz == 0) {
3527 		(*windowp)++;
3528 		rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
3529 		(*windowp)->wd_cookie_cnt++;
3530 		(*windowp)->wd_size = cookie->dmac_size;
3531 		return (DDI_SUCCESS);
3532 	}
3533 
3534 	/*
3535 	 * The window's not a whole multiple of granularity, since we know this
3536 	 * is due to the sgllen, we need to go back to the last cookie and trim
3537 	 * that one, add the left over part of the old cookie into the new
3538 	 * window, and then add in the new cookie into the new window.
3539 	 */
3540 
3541 	/*
3542 	 * make sure the driver isn't making us do something bad... Trimming and
3543 	 * sgllen == 1 don't go together.
3544 	 */
3545 	if (attr->dma_attr_sgllen == 1) {
3546 		return (DDI_DMA_NOMAPPING);
3547 	}
3548 
3549 	/*
3550 	 * first, setup the current window to account for the trim. Need to go
3551 	 * back to the last cookie for this.
3552 	 */
3553 	cookie--;
3554 	(*windowp)->wd_trim.tr_trim_last = B_TRUE;
3555 	(*windowp)->wd_trim.tr_last_cookie = cookie;
3556 	(*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
3557 	ASSERT(cookie->dmac_size > trim_sz);
3558 	(*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
3559 	(*windowp)->wd_size -= trim_sz;
3560 
3561 	/* save the buffer offsets for the next window */
3562 	coffset = cookie->dmac_size - trim_sz;
3563 	new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
3564 
3565 	/*
3566 	 * set this now in case this is the first window. all other cases are
3567 	 * set in dma_win()
3568 	 */
3569 	cookie->dmac_size = (*windowp)->wd_trim.tr_last_size;
3570 
3571 	/*
3572 	 * initialize the next window using what's left over in the previous
3573 	 * cookie.
3574 	 */
3575 	(*windowp)++;
3576 	rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
3577 	(*windowp)->wd_cookie_cnt++;
3578 	(*windowp)->wd_trim.tr_trim_first = B_TRUE;
3579 	(*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress + coffset;
3580 	(*windowp)->wd_trim.tr_first_size = trim_sz;
3581 	if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3582 		(*windowp)->wd_dosync = B_TRUE;
3583 	}
3584 
3585 	/*
3586 	 * now go back to the current cookie and add it to the new window. set
3587 	 * the new window size to the what was left over from the previous
3588 	 * cookie and what's in the current cookie.
3589 	 */
3590 	cookie++;
3591 	(*windowp)->wd_cookie_cnt++;
3592 	(*windowp)->wd_size = trim_sz + cookie->dmac_size;
3593 
3594 	/*
3595 	 * trim plus the next cookie could put us over maxxfer (a cookie can be
3596 	 * a max size of maxxfer). Handle that case.
3597 	 */
3598 	if ((*windowp)->wd_size > dma->dp_maxxfer) {
3599 		/*
3600 		 * maxxfer is already a whole multiple of granularity, and this
3601 		 * trim will be <= the previous trim (since a cookie can't be
3602 		 * larger than maxxfer). Make things simple here.
3603 		 */
3604 		trim_sz = (*windowp)->wd_size - dma->dp_maxxfer;
3605 		(*windowp)->wd_trim.tr_trim_last = B_TRUE;
3606 		(*windowp)->wd_trim.tr_last_cookie = cookie;
3607 		(*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
3608 		(*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
3609 		(*windowp)->wd_size -= trim_sz;
3610 		ASSERT((*windowp)->wd_size == dma->dp_maxxfer);
3611 
3612 		/* save the buffer offsets for the next window */
3613 		coffset = cookie->dmac_size - trim_sz;
3614 		new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
3615 
3616 		/* setup the next window */
3617 		(*windowp)++;
3618 		rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
3619 		(*windowp)->wd_cookie_cnt++;
3620 		(*windowp)->wd_trim.tr_trim_first = B_TRUE;
3621 		(*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress +
3622 		    coffset;
3623 		(*windowp)->wd_trim.tr_first_size = trim_sz;
3624 	}
3625 
3626 	return (DDI_SUCCESS);
3627 }
3628 
3629 
3630 /*
3631  * rootnex_copybuf_window_boundary()
3632  *    Called in bind slowpath when we get to a window boundary because we used
3633  *    up all the copy buffer that we have.
3634  */
3635 static int
3636 rootnex_copybuf_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
3637     rootnex_window_t **windowp, ddi_dma_cookie_t *cookie, off_t cur_offset,
3638     size_t *copybuf_used)
3639 {
3640 	rootnex_sglinfo_t *sinfo;
3641 	off_t new_offset;
3642 	size_t trim_sz;
3643 	paddr_t paddr;
3644 	off_t coffset;
3645 	uint_t pidx;
3646 	off_t poff;
3647 
3648 
3649 	sinfo = &dma->dp_sglinfo;
3650 
3651 	/*
3652 	 * the copy buffer should be a whole multiple of page size. We know that
3653 	 * this cookie is <= MMU_PAGESIZE.
3654 	 */
3655 	ASSERT(cookie->dmac_size <= MMU_PAGESIZE);
3656 
3657 	/*
3658 	 * from now on, all new windows in this bind need to be re-mapped during
3659 	 * ddi_dma_getwin() (32-bit kernel only). i.e. we ran out out copybuf
3660 	 * space...
3661 	 */
3662 #if !defined(__amd64)
3663 	dma->dp_cb_remaping = B_TRUE;
3664 #endif
3665 
3666 	/* reset copybuf used */
3667 	*copybuf_used = 0;
3668 
3669 	/*
3670 	 * if we don't have to trim (since granularity is set to 1), go to the
3671 	 * next window and add the current cookie to it. We know the current
3672 	 * cookie uses the copy buffer since we're in this code path.
3673 	 */
3674 	if (!dma->dp_trim_required) {
3675 		(*windowp)++;
3676 		rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
3677 
3678 		/* Add this cookie to the new window */
3679 		(*windowp)->wd_cookie_cnt++;
3680 		(*windowp)->wd_size += cookie->dmac_size;
3681 		*copybuf_used += MMU_PAGESIZE;
3682 		return (DDI_SUCCESS);
3683 	}
3684 
3685 	/*
3686 	 * *** may need to trim, figure it out.
3687 	 */
3688 
3689 	/* figure out how much we need to trim from the window */
3690 	if (dma->dp_granularity_power_2) {
3691 		trim_sz = (*windowp)->wd_size &
3692 		    (hp->dmai_attr.dma_attr_granular - 1);
3693 	} else {
3694 		trim_sz = (*windowp)->wd_size % hp->dmai_attr.dma_attr_granular;
3695 	}
3696 
3697 	/*
3698 	 * if the window's a whole multiple of granularity, go to the next
3699 	 * window, init it, then add in the current cookie. We know the current
3700 	 * cookie uses the copy buffer since we're in this code path.
3701 	 */
3702 	if (trim_sz == 0) {
3703 		(*windowp)++;
3704 		rootnex_init_win(hp, dma, *windowp, cookie, cur_offset);
3705 
3706 		/* Add this cookie to the new window */
3707 		(*windowp)->wd_cookie_cnt++;
3708 		(*windowp)->wd_size += cookie->dmac_size;
3709 		*copybuf_used += MMU_PAGESIZE;
3710 		return (DDI_SUCCESS);
3711 	}
3712 
3713 	/*
3714 	 * *** We figured it out, we definitly need to trim
3715 	 */
3716 
3717 	/*
3718 	 * make sure the driver isn't making us do something bad...
3719 	 * Trimming and sgllen == 1 don't go together.
3720 	 */
3721 	if (hp->dmai_attr.dma_attr_sgllen == 1) {
3722 		return (DDI_DMA_NOMAPPING);
3723 	}
3724 
3725 	/*
3726 	 * first, setup the current window to account for the trim. Need to go
3727 	 * back to the last cookie for this. Some of the last cookie will be in
3728 	 * the current window, and some of the last cookie will be in the new
3729 	 * window. All of the current cookie will be in the new window.
3730 	 */
3731 	cookie--;
3732 	(*windowp)->wd_trim.tr_trim_last = B_TRUE;
3733 	(*windowp)->wd_trim.tr_last_cookie = cookie;
3734 	(*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
3735 	ASSERT(cookie->dmac_size > trim_sz);
3736 	(*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
3737 	(*windowp)->wd_size -= trim_sz;
3738 
3739 	/*
3740 	 * we're trimming the last cookie (not the current cookie). So that
3741 	 * last cookie may have or may not have been using the copy buffer (
3742 	 * we know the cookie passed in uses the copy buffer since we're in
3743 	 * this code path).
3744 	 *
3745 	 * If the last cookie doesn't use the copy buffer, nothing special to
3746 	 * do. However, if it does uses the copy buffer, it will be both the
3747 	 * last page in the current window and the first page in the next
3748 	 * window. Since we are reusing the copy buffer (and KVA space on the
3749 	 * 32-bit kernel), this page will use the end of the copy buffer in the
3750 	 * current window, and the start of the copy buffer in the next window.
3751 	 * Track that info... The cookie physical address was already set to
3752 	 * the copy buffer physical address in setup_cookie..
3753 	 */
3754 	if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3755 		pidx = (sinfo->si_buf_offset + (*windowp)->wd_offset +
3756 		    (*windowp)->wd_size) >> MMU_PAGESHIFT;
3757 		(*windowp)->wd_trim.tr_last_copybuf_win = B_TRUE;
3758 		(*windowp)->wd_trim.tr_last_pidx = pidx;
3759 		(*windowp)->wd_trim.tr_last_cbaddr =
3760 		    dma->dp_pgmap[pidx].pm_cbaddr;
3761 #if !defined(__amd64)
3762 		(*windowp)->wd_trim.tr_last_kaddr =
3763 		    dma->dp_pgmap[pidx].pm_kaddr;
3764 #endif
3765 	}
3766 
3767 	/* save the buffer offsets for the next window */
3768 	coffset = cookie->dmac_size - trim_sz;
3769 	new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
3770 
3771 	/*
3772 	 * set this now in case this is the first window. all other cases are
3773 	 * set in dma_win()
3774 	 */
3775 	cookie->dmac_size = (*windowp)->wd_trim.tr_last_size;
3776 
3777 	/*
3778 	 * initialize the next window using what's left over in the previous
3779 	 * cookie.
3780 	 */
3781 	(*windowp)++;
3782 	rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
3783 	(*windowp)->wd_cookie_cnt++;
3784 	(*windowp)->wd_trim.tr_trim_first = B_TRUE;
3785 	(*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress + coffset;
3786 	(*windowp)->wd_trim.tr_first_size = trim_sz;
3787 
3788 	/*
3789 	 * again, we're tracking if the last cookie uses the copy buffer.
3790 	 * read the comment above for more info on why we need to track
3791 	 * additional state.
3792 	 *
3793 	 * For the first cookie in the new window, we need reset the physical
3794 	 * address to DMA into to the start of the copy buffer plus any
3795 	 * initial page offset which may be present.
3796 	 */
3797 	if (cookie->dmac_type & ROOTNEX_USES_COPYBUF) {
3798 		(*windowp)->wd_dosync = B_TRUE;
3799 		(*windowp)->wd_trim.tr_first_copybuf_win = B_TRUE;
3800 		(*windowp)->wd_trim.tr_first_pidx = pidx;
3801 		(*windowp)->wd_trim.tr_first_cbaddr = dma->dp_cbaddr;
3802 		poff = (*windowp)->wd_trim.tr_first_paddr & MMU_PAGEOFFSET;
3803 
3804 		paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, dma->dp_cbaddr)) +
3805 		    poff;
3806 #ifdef __xpv
3807 		/*
3808 		 * If we're dom0, we're using a real device so we need to load
3809 		 * the cookies with MAs instead of PAs.
3810 		 */
3811 		(*windowp)->wd_trim.tr_first_paddr =
3812 		    ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
3813 #else
3814 		(*windowp)->wd_trim.tr_first_paddr = paddr;
3815 #endif
3816 
3817 #if !defined(__amd64)
3818 		(*windowp)->wd_trim.tr_first_kaddr = dma->dp_kva;
3819 #endif
3820 		/* account for the cookie copybuf usage in the new window */
3821 		*copybuf_used += MMU_PAGESIZE;
3822 
3823 		/*
3824 		 * every piece of code has to have a hack, and here is this
3825 		 * ones :-)
3826 		 *
3827 		 * There is a complex interaction between setup_cookie and the
3828 		 * copybuf window boundary. The complexity had to be in either
3829 		 * the maxxfer window, or the copybuf window, and I chose the
3830 		 * copybuf code.
3831 		 *
3832 		 * So in this code path, we have taken the last cookie,
3833 		 * virtually broken it in half due to the trim, and it happens
3834 		 * to use the copybuf which further complicates life. At the
3835 		 * same time, we have already setup the current cookie, which
3836 		 * is now wrong. More background info: the current cookie uses
3837 		 * the copybuf, so it is only a page long max. So we need to
3838 		 * fix the current cookies copy buffer address, physical
3839 		 * address, and kva for the 32-bit kernel. We due this by
3840 		 * bumping them by page size (of course, we can't due this on
3841 		 * the physical address since the copy buffer may not be
3842 		 * physically contiguous).
3843 		 */
3844 		cookie++;
3845 		dma->dp_pgmap[pidx + 1].pm_cbaddr += MMU_PAGESIZE;
3846 		poff = cookie->dmac_laddress & MMU_PAGEOFFSET;
3847 
3848 		paddr = pfn_to_pa(hat_getpfnum(kas.a_hat,
3849 		    dma->dp_pgmap[pidx + 1].pm_cbaddr)) + poff;
3850 #ifdef __xpv
3851 		/*
3852 		 * If we're dom0, we're using a real device so we need to load
3853 		 * the cookies with MAs instead of PAs.
3854 		 */
3855 		cookie->dmac_laddress = ROOTNEX_PADDR_TO_RBASE(xen_info, paddr);
3856 #else
3857 		cookie->dmac_laddress = paddr;
3858 #endif
3859 
3860 #if !defined(__amd64)
3861 		ASSERT(dma->dp_pgmap[pidx + 1].pm_mapped == B_FALSE);
3862 		dma->dp_pgmap[pidx + 1].pm_kaddr += MMU_PAGESIZE;
3863 #endif
3864 	} else {
3865 		/* go back to the current cookie */
3866 		cookie++;
3867 	}
3868 
3869 	/*
3870 	 * add the current cookie to the new window. set the new window size to
3871 	 * the what was left over from the previous cookie and what's in the
3872 	 * current cookie.
3873 	 */
3874 	(*windowp)->wd_cookie_cnt++;
3875 	(*windowp)->wd_size = trim_sz + cookie->dmac_size;
3876 	ASSERT((*windowp)->wd_size < dma->dp_maxxfer);
3877 
3878 	/*
3879 	 * we know that the cookie passed in always uses the copy buffer. We
3880 	 * wouldn't be here if it didn't.
3881 	 */
3882 	*copybuf_used += MMU_PAGESIZE;
3883 
3884 	return (DDI_SUCCESS);
3885 }
3886 
3887 
3888 /*
3889  * rootnex_maxxfer_window_boundary()
3890  *    Called in bind slowpath when we get to a window boundary because we will
3891  *    go over maxxfer.
3892  */
3893 static int
3894 rootnex_maxxfer_window_boundary(ddi_dma_impl_t *hp, rootnex_dma_t *dma,
3895     rootnex_window_t **windowp, ddi_dma_cookie_t *cookie)
3896 {
3897 	size_t dmac_size;
3898 	off_t new_offset;
3899 	size_t trim_sz;
3900 	off_t coffset;
3901 
3902 
3903 	/*
3904 	 * calculate how much we have to trim off of the current cookie to equal
3905 	 * maxxfer. We don't have to account for granularity here since our
3906 	 * maxxfer already takes that into account.
3907 	 */
3908 	trim_sz = ((*windowp)->wd_size + cookie->dmac_size) - dma->dp_maxxfer;
3909 	ASSERT(trim_sz <= cookie->dmac_size);
3910 	ASSERT(trim_sz <= dma->dp_maxxfer);
3911 
3912 	/* save cookie size since we need it later and we might change it */
3913 	dmac_size = cookie->dmac_size;
3914 
3915 	/*
3916 	 * if we're not trimming the entire cookie, setup the current window to
3917 	 * account for the trim.
3918 	 */
3919 	if (trim_sz < cookie->dmac_size) {
3920 		(*windowp)->wd_cookie_cnt++;
3921 		(*windowp)->wd_trim.tr_trim_last = B_TRUE;
3922 		(*windowp)->wd_trim.tr_last_cookie = cookie;
3923 		(*windowp)->wd_trim.tr_last_paddr = cookie->dmac_laddress;
3924 		(*windowp)->wd_trim.tr_last_size = cookie->dmac_size - trim_sz;
3925 		(*windowp)->wd_size = dma->dp_maxxfer;
3926 
3927 		/*
3928 		 * set the adjusted cookie size now in case this is the first
3929 		 * window. All other windows are taken care of in get win
3930 		 */
3931 		cookie->dmac_size = (*windowp)->wd_trim.tr_last_size;
3932 	}
3933 
3934 	/*
3935 	 * coffset is the current offset within the cookie, new_offset is the
3936 	 * current offset with the entire buffer.
3937 	 */
3938 	coffset = dmac_size - trim_sz;
3939 	new_offset = (*windowp)->wd_offset + (*windowp)->wd_size;
3940 
3941 	/* initialize the next window */
3942 	(*windowp)++;
3943 	rootnex_init_win(hp, dma, *windowp, cookie, new_offset);
3944 	(*windowp)->wd_cookie_cnt++;
3945 	(*windowp)->wd_size = trim_sz;
3946 	if (trim_sz < dmac_size) {
3947 		(*windowp)->wd_trim.tr_trim_first = B_TRUE;
3948 		(*windowp)->wd_trim.tr_first_paddr = cookie->dmac_laddress +
3949 		    coffset;
3950 		(*windowp)->wd_trim.tr_first_size = trim_sz;
3951 	}
3952 
3953 	return (DDI_SUCCESS);
3954 }
3955 
3956 
3957 /*ARGSUSED*/
3958 static int
3959 rootnex_coredma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
3960     off_t off, size_t len, uint_t cache_flags)
3961 {
3962 	rootnex_sglinfo_t *sinfo;
3963 	rootnex_pgmap_t *cbpage;
3964 	rootnex_window_t *win;
3965 	ddi_dma_impl_t *hp;
3966 	rootnex_dma_t *dma;
3967 	caddr_t fromaddr;
3968 	caddr_t toaddr;
3969 	uint_t psize;
3970 	off_t offset;
3971 	uint_t pidx;
3972 	size_t size;
3973 	off_t poff;
3974 	int e;
3975 
3976 
3977 	hp = (ddi_dma_impl_t *)handle;
3978 	dma = (rootnex_dma_t *)hp->dmai_private;
3979 	sinfo = &dma->dp_sglinfo;
3980 
3981 	/*
3982 	 * if we don't have any windows, we don't need to sync. A copybuf
3983 	 * will cause us to have at least one window.
3984 	 */
3985 	if (dma->dp_window == NULL) {
3986 		return (DDI_SUCCESS);
3987 	}
3988 
3989 	/* This window may not need to be sync'd */
3990 	win = &dma->dp_window[dma->dp_current_win];
3991 	if (!win->wd_dosync) {
3992 		return (DDI_SUCCESS);
3993 	}
3994 
3995 	/* handle off and len special cases */
3996 	if ((off == 0) || (rootnex_sync_ignore_params)) {
3997 		offset = win->wd_offset;
3998 	} else {
3999 		offset = off;
4000 	}
4001 	if ((len == 0) || (rootnex_sync_ignore_params)) {
4002 		size = win->wd_size;
4003 	} else {
4004 		size = len;
4005 	}
4006 
4007 	/* check the sync args to make sure they make a little sense */
4008 	if (rootnex_sync_check_parms) {
4009 		e = rootnex_valid_sync_parms(hp, win, offset, size,
4010 		    cache_flags);
4011 		if (e != DDI_SUCCESS) {
4012 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_SYNC_FAIL]);
4013 			return (DDI_FAILURE);
4014 		}
4015 	}
4016 
4017 	/*
4018 	 * special case the first page to handle the offset into the page. The
4019 	 * offset to the current page for our buffer is the offset into the
4020 	 * first page of the buffer plus our current offset into the buffer
4021 	 * itself, masked of course.
4022 	 */
4023 	poff = (sinfo->si_buf_offset + offset) & MMU_PAGEOFFSET;
4024 	psize = MIN((MMU_PAGESIZE - poff), size);
4025 
4026 	/* go through all the pages that we want to sync */
4027 	while (size > 0) {
4028 		/*
4029 		 * Calculate the page index relative to the start of the buffer.
4030 		 * The index to the current page for our buffer is the offset
4031 		 * into the first page of the buffer plus our current offset
4032 		 * into the buffer itself, shifted of course...
4033 		 */
4034 		pidx = (sinfo->si_buf_offset + offset) >> MMU_PAGESHIFT;
4035 		ASSERT(pidx < sinfo->si_max_pages);
4036 
4037 		/*
4038 		 * if this page uses the copy buffer, we need to sync it,
4039 		 * otherwise, go on to the next page.
4040 		 */
4041 		cbpage = &dma->dp_pgmap[pidx];
4042 		ASSERT((cbpage->pm_uses_copybuf == B_TRUE) ||
4043 		    (cbpage->pm_uses_copybuf == B_FALSE));
4044 		if (cbpage->pm_uses_copybuf) {
4045 			/* cbaddr and kaddr should be page aligned */
4046 			ASSERT(((uintptr_t)cbpage->pm_cbaddr &
4047 			    MMU_PAGEOFFSET) == 0);
4048 			ASSERT(((uintptr_t)cbpage->pm_kaddr &
4049 			    MMU_PAGEOFFSET) == 0);
4050 
4051 			/*
4052 			 * if we're copying for the device, we are going to
4053 			 * copy from the drivers buffer and to the rootnex
4054 			 * allocated copy buffer.
4055 			 */
4056 			if (cache_flags == DDI_DMA_SYNC_FORDEV) {
4057 				fromaddr = cbpage->pm_kaddr + poff;
4058 				toaddr = cbpage->pm_cbaddr + poff;
4059 				DTRACE_PROBE2(rootnex__sync__dev,
4060 				    dev_info_t *, dma->dp_dip, size_t, psize);
4061 
4062 			/*
4063 			 * if we're copying for the cpu/kernel, we are going to
4064 			 * copy from the rootnex allocated copy buffer to the
4065 			 * drivers buffer.
4066 			 */
4067 			} else {
4068 				fromaddr = cbpage->pm_cbaddr + poff;
4069 				toaddr = cbpage->pm_kaddr + poff;
4070 				DTRACE_PROBE2(rootnex__sync__cpu,
4071 				    dev_info_t *, dma->dp_dip, size_t, psize);
4072 			}
4073 
4074 			bcopy(fromaddr, toaddr, psize);
4075 		}
4076 
4077 		/*
4078 		 * decrement size until we're done, update our offset into the
4079 		 * buffer, and get the next page size.
4080 		 */
4081 		size -= psize;
4082 		offset += psize;
4083 		psize = MIN(MMU_PAGESIZE, size);
4084 
4085 		/* page offset is zero for the rest of this loop */
4086 		poff = 0;
4087 	}
4088 
4089 	return (DDI_SUCCESS);
4090 }
4091 
4092 /*
4093  * rootnex_dma_sync()
4094  *    called from ddi_dma_sync() if DMP_NOSYNC is not set in hp->dmai_rflags.
4095  *    We set DMP_NOSYNC if we're not using the copy buffer. If DMP_NOSYNC
4096  *    is set, ddi_dma_sync() returns immediately passing back success.
4097  */
4098 /*ARGSUSED*/
4099 static int
4100 rootnex_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4101     off_t off, size_t len, uint_t cache_flags)
4102 {
4103 #if !defined(__xpv)
4104 	if (IOMMU_USED(rdip)) {
4105 		return (iommulib_nexdma_sync(dip, rdip, handle, off, len,
4106 		    cache_flags));
4107 	}
4108 #endif
4109 	return (rootnex_coredma_sync(dip, rdip, handle, off, len,
4110 	    cache_flags));
4111 }
4112 
4113 /*
4114  * rootnex_valid_sync_parms()
4115  *    checks the parameters passed to sync to verify they are correct.
4116  */
4117 static int
4118 rootnex_valid_sync_parms(ddi_dma_impl_t *hp, rootnex_window_t *win,
4119     off_t offset, size_t size, uint_t cache_flags)
4120 {
4121 	off_t woffset;
4122 
4123 
4124 	/*
4125 	 * the first part of the test to make sure the offset passed in is
4126 	 * within the window.
4127 	 */
4128 	if (offset < win->wd_offset) {
4129 		return (DDI_FAILURE);
4130 	}
4131 
4132 	/*
4133 	 * second and last part of the test to make sure the offset and length
4134 	 * passed in is within the window.
4135 	 */
4136 	woffset = offset - win->wd_offset;
4137 	if ((woffset + size) > win->wd_size) {
4138 		return (DDI_FAILURE);
4139 	}
4140 
4141 	/*
4142 	 * if we are sync'ing for the device, the DDI_DMA_WRITE flag should
4143 	 * be set too.
4144 	 */
4145 	if ((cache_flags == DDI_DMA_SYNC_FORDEV) &&
4146 	    (hp->dmai_rflags & DDI_DMA_WRITE)) {
4147 		return (DDI_SUCCESS);
4148 	}
4149 
4150 	/*
4151 	 * at this point, either DDI_DMA_SYNC_FORCPU or DDI_DMA_SYNC_FORKERNEL
4152 	 * should be set. Also DDI_DMA_READ should be set in the flags.
4153 	 */
4154 	if (((cache_flags == DDI_DMA_SYNC_FORCPU) ||
4155 	    (cache_flags == DDI_DMA_SYNC_FORKERNEL)) &&
4156 	    (hp->dmai_rflags & DDI_DMA_READ)) {
4157 		return (DDI_SUCCESS);
4158 	}
4159 
4160 	return (DDI_FAILURE);
4161 }
4162 
4163 
4164 /*ARGSUSED*/
4165 static int
4166 rootnex_coredma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4167     uint_t win, off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
4168     uint_t *ccountp)
4169 {
4170 	rootnex_window_t *window;
4171 	rootnex_trim_t *trim;
4172 	ddi_dma_impl_t *hp;
4173 	rootnex_dma_t *dma;
4174 #if !defined(__amd64)
4175 	rootnex_sglinfo_t *sinfo;
4176 	rootnex_pgmap_t *pmap;
4177 	uint_t pidx;
4178 	uint_t pcnt;
4179 	off_t poff;
4180 	int i;
4181 #endif
4182 
4183 
4184 	hp = (ddi_dma_impl_t *)handle;
4185 	dma = (rootnex_dma_t *)hp->dmai_private;
4186 #if !defined(__amd64)
4187 	sinfo = &dma->dp_sglinfo;
4188 #endif
4189 
4190 	/* If we try and get a window which doesn't exist, return failure */
4191 	if (win >= hp->dmai_nwin) {
4192 		ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
4193 		return (DDI_FAILURE);
4194 	}
4195 
4196 	/*
4197 	 * if we don't have any windows, and they're asking for the first
4198 	 * window, setup the cookie pointer to the first cookie in the bind.
4199 	 * setup our return values, then increment the cookie since we return
4200 	 * the first cookie on the stack.
4201 	 */
4202 	if (dma->dp_window == NULL) {
4203 		if (win != 0) {
4204 			ROOTNEX_PROF_INC(&rootnex_cnt[ROOTNEX_CNT_GETWIN_FAIL]);
4205 			return (DDI_FAILURE);
4206 		}
4207 		hp->dmai_cookie = dma->dp_cookies;
4208 		*offp = 0;
4209 		*lenp = dma->dp_dma.dmao_size;
4210 		*ccountp = dma->dp_sglinfo.si_sgl_size;
4211 		*cookiep = hp->dmai_cookie[0];
4212 		hp->dmai_cookie++;
4213 		return (DDI_SUCCESS);
4214 	}
4215 
4216 	/* sync the old window before moving on to the new one */
4217 	window = &dma->dp_window[dma->dp_current_win];
4218 	if ((window->wd_dosync) && (hp->dmai_rflags & DDI_DMA_READ)) {
4219 		(void) rootnex_dma_sync(dip, rdip, handle, 0, 0,
4220 		    DDI_DMA_SYNC_FORCPU);
4221 	}
4222 
4223 #if !defined(__amd64)
4224 	/*
4225 	 * before we move to the next window, if we need to re-map, unmap all
4226 	 * the pages in this window.
4227 	 */
4228 	if (dma->dp_cb_remaping) {
4229 		/*
4230 		 * If we switch to this window again, we'll need to map in
4231 		 * on the fly next time.
4232 		 */
4233 		window->wd_remap_copybuf = B_TRUE;
4234 
4235 		/*
4236 		 * calculate the page index into the buffer where this window
4237 		 * starts, and the number of pages this window takes up.
4238 		 */
4239 		pidx = (sinfo->si_buf_offset + window->wd_offset) >>
4240 		    MMU_PAGESHIFT;
4241 		poff = (sinfo->si_buf_offset + window->wd_offset) &
4242 		    MMU_PAGEOFFSET;
4243 		pcnt = mmu_btopr(window->wd_size + poff);
4244 		ASSERT((pidx + pcnt) <= sinfo->si_max_pages);
4245 
4246 		/* unmap pages which are currently mapped in this window */
4247 		for (i = 0; i < pcnt; i++) {
4248 			if (dma->dp_pgmap[pidx].pm_mapped) {
4249 				hat_unload(kas.a_hat,
4250 				    dma->dp_pgmap[pidx].pm_kaddr, MMU_PAGESIZE,
4251 				    HAT_UNLOAD);
4252 				dma->dp_pgmap[pidx].pm_mapped = B_FALSE;
4253 			}
4254 			pidx++;
4255 		}
4256 	}
4257 #endif
4258 
4259 	/*
4260 	 * Move to the new window.
4261 	 * NOTE: current_win must be set for sync to work right
4262 	 */
4263 	dma->dp_current_win = win;
4264 	window = &dma->dp_window[win];
4265 
4266 	/* if needed, adjust the first and/or last cookies for trim */
4267 	trim = &window->wd_trim;
4268 	if (trim->tr_trim_first) {
4269 		window->wd_first_cookie->dmac_laddress = trim->tr_first_paddr;
4270 		window->wd_first_cookie->dmac_size = trim->tr_first_size;
4271 #if !defined(__amd64)
4272 		window->wd_first_cookie->dmac_type =
4273 		    (window->wd_first_cookie->dmac_type &
4274 		    ROOTNEX_USES_COPYBUF) + window->wd_offset;
4275 #endif
4276 		if (trim->tr_first_copybuf_win) {
4277 			dma->dp_pgmap[trim->tr_first_pidx].pm_cbaddr =
4278 			    trim->tr_first_cbaddr;
4279 #if !defined(__amd64)
4280 			dma->dp_pgmap[trim->tr_first_pidx].pm_kaddr =
4281 			    trim->tr_first_kaddr;
4282 #endif
4283 		}
4284 	}
4285 	if (trim->tr_trim_last) {
4286 		trim->tr_last_cookie->dmac_laddress = trim->tr_last_paddr;
4287 		trim->tr_last_cookie->dmac_size = trim->tr_last_size;
4288 		if (trim->tr_last_copybuf_win) {
4289 			dma->dp_pgmap[trim->tr_last_pidx].pm_cbaddr =
4290 			    trim->tr_last_cbaddr;
4291 #if !defined(__amd64)
4292 			dma->dp_pgmap[trim->tr_last_pidx].pm_kaddr =
4293 			    trim->tr_last_kaddr;
4294 #endif
4295 		}
4296 	}
4297 
4298 	/*
4299 	 * setup the cookie pointer to the first cookie in the window. setup
4300 	 * our return values, then increment the cookie since we return the
4301 	 * first cookie on the stack.
4302 	 */
4303 	hp->dmai_cookie = window->wd_first_cookie;
4304 	*offp = window->wd_offset;
4305 	*lenp = window->wd_size;
4306 	*ccountp = window->wd_cookie_cnt;
4307 	*cookiep = hp->dmai_cookie[0];
4308 	hp->dmai_cookie++;
4309 
4310 #if !defined(__amd64)
4311 	/* re-map copybuf if required for this window */
4312 	if (dma->dp_cb_remaping) {
4313 		/*
4314 		 * calculate the page index into the buffer where this
4315 		 * window starts.
4316 		 */
4317 		pidx = (sinfo->si_buf_offset + window->wd_offset) >>
4318 		    MMU_PAGESHIFT;
4319 		ASSERT(pidx < sinfo->si_max_pages);
4320 
4321 		/*
4322 		 * the first page can get unmapped if it's shared with the
4323 		 * previous window. Even if the rest of this window is already
4324 		 * mapped in, we need to still check this one.
4325 		 */
4326 		pmap = &dma->dp_pgmap[pidx];
4327 		if ((pmap->pm_uses_copybuf) && (pmap->pm_mapped == B_FALSE)) {
4328 			if (pmap->pm_pp != NULL) {
4329 				pmap->pm_mapped = B_TRUE;
4330 				i86_pp_map(pmap->pm_pp, pmap->pm_kaddr);
4331 			} else if (pmap->pm_vaddr != NULL) {
4332 				pmap->pm_mapped = B_TRUE;
4333 				i86_va_map(pmap->pm_vaddr, sinfo->si_asp,
4334 				    pmap->pm_kaddr);
4335 			}
4336 		}
4337 		pidx++;
4338 
4339 		/* map in the rest of the pages if required */
4340 		if (window->wd_remap_copybuf) {
4341 			window->wd_remap_copybuf = B_FALSE;
4342 
4343 			/* figure out many pages this window takes up */
4344 			poff = (sinfo->si_buf_offset + window->wd_offset) &
4345 			    MMU_PAGEOFFSET;
4346 			pcnt = mmu_btopr(window->wd_size + poff);
4347 			ASSERT(((pidx - 1) + pcnt) <= sinfo->si_max_pages);
4348 
4349 			/* map pages which require it */
4350 			for (i = 1; i < pcnt; i++) {
4351 				pmap = &dma->dp_pgmap[pidx];
4352 				if (pmap->pm_uses_copybuf) {
4353 					ASSERT(pmap->pm_mapped == B_FALSE);
4354 					if (pmap->pm_pp != NULL) {
4355 						pmap->pm_mapped = B_TRUE;
4356 						i86_pp_map(pmap->pm_pp,
4357 						    pmap->pm_kaddr);
4358 					} else if (pmap->pm_vaddr != NULL) {
4359 						pmap->pm_mapped = B_TRUE;
4360 						i86_va_map(pmap->pm_vaddr,
4361 						    sinfo->si_asp,
4362 						    pmap->pm_kaddr);
4363 					}
4364 				}
4365 				pidx++;
4366 			}
4367 		}
4368 	}
4369 #endif
4370 
4371 	/* if the new window uses the copy buffer, sync it for the device */
4372 	if ((window->wd_dosync) && (hp->dmai_rflags & DDI_DMA_WRITE)) {
4373 		(void) rootnex_dma_sync(dip, rdip, handle, 0, 0,
4374 		    DDI_DMA_SYNC_FORDEV);
4375 	}
4376 
4377 	return (DDI_SUCCESS);
4378 }
4379 
4380 /*
4381  * rootnex_dma_win()
4382  *    called from ddi_dma_getwin()
4383  */
4384 /*ARGSUSED*/
4385 static int
4386 rootnex_dma_win(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4387     uint_t win, off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
4388     uint_t *ccountp)
4389 {
4390 #if !defined(__xpv)
4391 	if (IOMMU_USED(rdip)) {
4392 		return (iommulib_nexdma_win(dip, rdip, handle, win, offp, lenp,
4393 		    cookiep, ccountp));
4394 	}
4395 #endif
4396 
4397 	return (rootnex_coredma_win(dip, rdip, handle, win, offp, lenp,
4398 	    cookiep, ccountp));
4399 }
4400 
4401 /*
4402  * ************************
4403  *  obsoleted dma routines
4404  * ************************
4405  */
4406 
4407 /* ARGSUSED */
4408 static int
4409 rootnex_coredma_map(dev_info_t *dip, dev_info_t *rdip,
4410     struct ddi_dma_req *dmareq, ddi_dma_handle_t *handlep)
4411 {
4412 #if defined(__amd64)
4413 	/*
4414 	 * this interface is not supported in 64-bit x86 kernel. See comment in
4415 	 * rootnex_dma_mctl()
4416 	 */
4417 	return (DDI_DMA_NORESOURCES);
4418 
4419 #else /* 32-bit x86 kernel */
4420 	ddi_dma_handle_t *lhandlep;
4421 	ddi_dma_handle_t lhandle;
4422 	ddi_dma_cookie_t cookie;
4423 	ddi_dma_attr_t dma_attr;
4424 	ddi_dma_lim_t *dma_lim;
4425 	uint_t ccnt;
4426 	int e;
4427 
4428 
4429 	/*
4430 	 * if the driver is just testing to see if it's possible to do the bind,
4431 	 * we'll use local state. Otherwise, use the handle pointer passed in.
4432 	 */
4433 	if (handlep == NULL) {
4434 		lhandlep = &lhandle;
4435 	} else {
4436 		lhandlep = handlep;
4437 	}
4438 
4439 	/* convert the limit structure to a dma_attr one */
4440 	dma_lim = dmareq->dmar_limits;
4441 	dma_attr.dma_attr_version = DMA_ATTR_V0;
4442 	dma_attr.dma_attr_addr_lo = dma_lim->dlim_addr_lo;
4443 	dma_attr.dma_attr_addr_hi = dma_lim->dlim_addr_hi;
4444 	dma_attr.dma_attr_minxfer = dma_lim->dlim_minxfer;
4445 	dma_attr.dma_attr_seg = dma_lim->dlim_adreg_max;
4446 	dma_attr.dma_attr_count_max = dma_lim->dlim_ctreg_max;
4447 	dma_attr.dma_attr_granular = dma_lim->dlim_granular;
4448 	dma_attr.dma_attr_sgllen = dma_lim->dlim_sgllen;
4449 	dma_attr.dma_attr_maxxfer = dma_lim->dlim_reqsize;
4450 	dma_attr.dma_attr_burstsizes = dma_lim->dlim_burstsizes;
4451 	dma_attr.dma_attr_align = MMU_PAGESIZE;
4452 	dma_attr.dma_attr_flags = 0;
4453 
4454 	e = rootnex_dma_allochdl(dip, rdip, &dma_attr, dmareq->dmar_fp,
4455 	    dmareq->dmar_arg, lhandlep);
4456 	if (e != DDI_SUCCESS) {
4457 		return (e);
4458 	}
4459 
4460 	e = rootnex_dma_bindhdl(dip, rdip, *lhandlep, dmareq, &cookie, &ccnt);
4461 	if ((e != DDI_DMA_MAPPED) && (e != DDI_DMA_PARTIAL_MAP)) {
4462 		(void) rootnex_dma_freehdl(dip, rdip, *lhandlep);
4463 		return (e);
4464 	}
4465 
4466 	/*
4467 	 * if the driver is just testing to see if it's possible to do the bind,
4468 	 * free up the local state and return the result.
4469 	 */
4470 	if (handlep == NULL) {
4471 		(void) rootnex_dma_unbindhdl(dip, rdip, *lhandlep);
4472 		(void) rootnex_dma_freehdl(dip, rdip, *lhandlep);
4473 		if (e == DDI_DMA_MAPPED) {
4474 			return (DDI_DMA_MAPOK);
4475 		} else {
4476 			return (DDI_DMA_NOMAPPING);
4477 		}
4478 	}
4479 
4480 	return (e);
4481 #endif /* defined(__amd64) */
4482 }
4483 
4484 /*
4485  * rootnex_dma_map()
4486  *    called from ddi_dma_setup()
4487  */
4488 /* ARGSUSED */
4489 static int
4490 rootnex_dma_map(dev_info_t *dip, dev_info_t *rdip,
4491     struct ddi_dma_req *dmareq, ddi_dma_handle_t *handlep)
4492 {
4493 #if !defined(__xpv)
4494 	if (IOMMU_USED(rdip)) {
4495 		return (iommulib_nexdma_map(dip, rdip, dmareq, handlep));
4496 	}
4497 #endif
4498 	return (rootnex_coredma_map(dip, rdip, dmareq, handlep));
4499 }
4500 
4501 /*
4502  * rootnex_dma_mctl()
4503  *
4504  */
4505 /* ARGSUSED */
4506 static int
4507 rootnex_coredma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4508     enum ddi_dma_ctlops request, off_t *offp, size_t *lenp, caddr_t *objpp,
4509     uint_t cache_flags)
4510 {
4511 #if defined(__amd64)
4512 	/*
4513 	 * DDI_DMA_SMEM_ALLOC & DDI_DMA_IOPB_ALLOC we're changed to have a
4514 	 * common implementation in genunix, so they no longer have x86
4515 	 * specific functionality which called into dma_ctl.
4516 	 *
4517 	 * The rest of the obsoleted interfaces were never supported in the
4518 	 * 64-bit x86 kernel. For s10, the obsoleted DDI_DMA_SEGTOC interface
4519 	 * was not ported to the x86 64-bit kernel do to serious x86 rootnex
4520 	 * implementation issues.
4521 	 *
4522 	 * If you can't use DDI_DMA_SEGTOC; DDI_DMA_NEXTSEG, DDI_DMA_FREE, and
4523 	 * DDI_DMA_NEXTWIN are useless since you can get to the cookie, so we
4524 	 * reflect that now too...
4525 	 *
4526 	 * Even though we fixed the pointer problem in DDI_DMA_SEGTOC, we are
4527 	 * not going to put this functionality into the 64-bit x86 kernel now.
4528 	 * It wasn't ported to the 64-bit kernel for s10, no reason to change
4529 	 * that in a future release.
4530 	 */
4531 	return (DDI_FAILURE);
4532 
4533 #else /* 32-bit x86 kernel */
4534 	ddi_dma_cookie_t lcookie;
4535 	ddi_dma_cookie_t *cookie;
4536 	rootnex_window_t *window;
4537 	ddi_dma_impl_t *hp;
4538 	rootnex_dma_t *dma;
4539 	uint_t nwin;
4540 	uint_t ccnt;
4541 	size_t len;
4542 	off_t off;
4543 	int e;
4544 
4545 
4546 	/*
4547 	 * DDI_DMA_SEGTOC, DDI_DMA_NEXTSEG, and DDI_DMA_NEXTWIN are a little
4548 	 * hacky since were optimizing for the current interfaces and so we can
4549 	 * cleanup the mess in genunix. Hopefully we will remove the this
4550 	 * obsoleted routines someday soon.
4551 	 */
4552 
4553 	switch (request) {
4554 
4555 	case DDI_DMA_SEGTOC: /* ddi_dma_segtocookie() */
4556 		hp = (ddi_dma_impl_t *)handle;
4557 		cookie = (ddi_dma_cookie_t *)objpp;
4558 
4559 		/*
4560 		 * convert segment to cookie. We don't distinguish between the
4561 		 * two :-)
4562 		 */
4563 		*cookie = *hp->dmai_cookie;
4564 		*lenp = cookie->dmac_size;
4565 		*offp = cookie->dmac_type & ~ROOTNEX_USES_COPYBUF;
4566 		return (DDI_SUCCESS);
4567 
4568 	case DDI_DMA_NEXTSEG: /* ddi_dma_nextseg() */
4569 		hp = (ddi_dma_impl_t *)handle;
4570 		dma = (rootnex_dma_t *)hp->dmai_private;
4571 
4572 		if ((*lenp != NULL) && ((uintptr_t)*lenp != (uintptr_t)hp)) {
4573 			return (DDI_DMA_STALE);
4574 		}
4575 
4576 		/* handle the case where we don't have any windows */
4577 		if (dma->dp_window == NULL) {
4578 			/*
4579 			 * if seg == NULL, and we don't have any windows,
4580 			 * return the first cookie in the sgl.
4581 			 */
4582 			if (*lenp == NULL) {
4583 				dma->dp_current_cookie = 0;
4584 				hp->dmai_cookie = dma->dp_cookies;
4585 				*objpp = (caddr_t)handle;
4586 				return (DDI_SUCCESS);
4587 
4588 			/* if we have more cookies, go to the next cookie */
4589 			} else {
4590 				if ((dma->dp_current_cookie + 1) >=
4591 				    dma->dp_sglinfo.si_sgl_size) {
4592 					return (DDI_DMA_DONE);
4593 				}
4594 				dma->dp_current_cookie++;
4595 				hp->dmai_cookie++;
4596 				return (DDI_SUCCESS);
4597 			}
4598 		}
4599 
4600 		/* We have one or more windows */
4601 		window = &dma->dp_window[dma->dp_current_win];
4602 
4603 		/*
4604 		 * if seg == NULL, return the first cookie in the current
4605 		 * window
4606 		 */
4607 		if (*lenp == NULL) {
4608 			dma->dp_current_cookie = 0;
4609 			hp->dmai_cookie = window->wd_first_cookie;
4610 
4611 		/*
4612 		 * go to the next cookie in the window then see if we done with
4613 		 * this window.
4614 		 */
4615 		} else {
4616 			if ((dma->dp_current_cookie + 1) >=
4617 			    window->wd_cookie_cnt) {
4618 				return (DDI_DMA_DONE);
4619 			}
4620 			dma->dp_current_cookie++;
4621 			hp->dmai_cookie++;
4622 		}
4623 		*objpp = (caddr_t)handle;
4624 		return (DDI_SUCCESS);
4625 
4626 	case DDI_DMA_NEXTWIN: /* ddi_dma_nextwin() */
4627 		hp = (ddi_dma_impl_t *)handle;
4628 		dma = (rootnex_dma_t *)hp->dmai_private;
4629 
4630 		if ((*offp != NULL) && ((uintptr_t)*offp != (uintptr_t)hp)) {
4631 			return (DDI_DMA_STALE);
4632 		}
4633 
4634 		/* if win == NULL, return the first window in the bind */
4635 		if (*offp == NULL) {
4636 			nwin = 0;
4637 
4638 		/*
4639 		 * else, go to the next window then see if we're done with all
4640 		 * the windows.
4641 		 */
4642 		} else {
4643 			nwin = dma->dp_current_win + 1;
4644 			if (nwin >= hp->dmai_nwin) {
4645 				return (DDI_DMA_DONE);
4646 			}
4647 		}
4648 
4649 		/* switch to the next window */
4650 		e = rootnex_dma_win(dip, rdip, handle, nwin, &off, &len,
4651 		    &lcookie, &ccnt);
4652 		ASSERT(e == DDI_SUCCESS);
4653 		if (e != DDI_SUCCESS) {
4654 			return (DDI_DMA_STALE);
4655 		}
4656 
4657 		/* reset the cookie back to the first cookie in the window */
4658 		if (dma->dp_window != NULL) {
4659 			window = &dma->dp_window[dma->dp_current_win];
4660 			hp->dmai_cookie = window->wd_first_cookie;
4661 		} else {
4662 			hp->dmai_cookie = dma->dp_cookies;
4663 		}
4664 
4665 		*objpp = (caddr_t)handle;
4666 		return (DDI_SUCCESS);
4667 
4668 	case DDI_DMA_FREE: /* ddi_dma_free() */
4669 		(void) rootnex_dma_unbindhdl(dip, rdip, handle);
4670 		(void) rootnex_dma_freehdl(dip, rdip, handle);
4671 		if (rootnex_state->r_dvma_call_list_id) {
4672 			ddi_run_callback(&rootnex_state->r_dvma_call_list_id);
4673 		}
4674 		return (DDI_SUCCESS);
4675 
4676 	case DDI_DMA_IOPB_ALLOC:	/* get contiguous DMA-able memory */
4677 	case DDI_DMA_SMEM_ALLOC:	/* get contiguous DMA-able memory */
4678 		/* should never get here, handled in genunix */
4679 		ASSERT(0);
4680 		return (DDI_FAILURE);
4681 
4682 	case DDI_DMA_KVADDR:
4683 	case DDI_DMA_GETERR:
4684 	case DDI_DMA_COFF:
4685 		return (DDI_FAILURE);
4686 	}
4687 
4688 	return (DDI_FAILURE);
4689 #endif /* defined(__amd64) */
4690 }
4691 
4692 /*
4693  * rootnex_dma_mctl()
4694  *
4695  */
4696 /* ARGSUSED */
4697 static int
4698 rootnex_dma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
4699     enum ddi_dma_ctlops request, off_t *offp, size_t *lenp, caddr_t *objpp,
4700     uint_t cache_flags)
4701 {
4702 #if !defined(__xpv)
4703 	if (IOMMU_USED(rdip)) {
4704 		return (iommulib_nexdma_mctl(dip, rdip, handle, request, offp,
4705 		    lenp, objpp, cache_flags));
4706 	}
4707 #endif
4708 
4709 	return (rootnex_coredma_mctl(dip, rdip, handle, request, offp,
4710 	    lenp, objpp, cache_flags));
4711 }
4712 
4713 /*
4714  * *********
4715  *  FMA Code
4716  * *********
4717  */
4718 
4719 /*
4720  * rootnex_fm_init()
4721  *    FMA init busop
4722  */
4723 /* ARGSUSED */
4724 static int
4725 rootnex_fm_init(dev_info_t *dip, dev_info_t *tdip, int tcap,
4726     ddi_iblock_cookie_t *ibc)
4727 {
4728 	*ibc = rootnex_state->r_err_ibc;
4729 
4730 	return (ddi_system_fmcap);
4731 }
4732 
4733 /*
4734  * rootnex_dma_check()
4735  *    Function called after a dma fault occurred to find out whether the
4736  *    fault address is associated with a driver that is able to handle faults
4737  *    and recover from faults.
4738  */
4739 /* ARGSUSED */
4740 static int
4741 rootnex_dma_check(dev_info_t *dip, const void *handle, const void *addr,
4742     const void *not_used)
4743 {
4744 	rootnex_window_t *window;
4745 	uint64_t start_addr;
4746 	uint64_t fault_addr;
4747 	ddi_dma_impl_t *hp;
4748 	rootnex_dma_t *dma;
4749 	uint64_t end_addr;
4750 	size_t csize;
4751 	int i;
4752 	int j;
4753 
4754 
4755 	/* The driver has to set DDI_DMA_FLAGERR to recover from dma faults */
4756 	hp = (ddi_dma_impl_t *)handle;
4757 	ASSERT(hp);
4758 
4759 	dma = (rootnex_dma_t *)hp->dmai_private;
4760 
4761 	/* Get the address that we need to search for */
4762 	fault_addr = *(uint64_t *)addr;
4763 
4764 	/*
4765 	 * if we don't have any windows, we can just walk through all the
4766 	 * cookies.
4767 	 */
4768 	if (dma->dp_window == NULL) {
4769 		/* for each cookie */
4770 		for (i = 0; i < dma->dp_sglinfo.si_sgl_size; i++) {
4771 			/*
4772 			 * if the faulted address is within the physical address
4773 			 * range of the cookie, return DDI_FM_NONFATAL.
4774 			 */
4775 			if ((fault_addr >= dma->dp_cookies[i].dmac_laddress) &&
4776 			    (fault_addr <= (dma->dp_cookies[i].dmac_laddress +
4777 			    dma->dp_cookies[i].dmac_size))) {
4778 				return (DDI_FM_NONFATAL);
4779 			}
4780 		}
4781 
4782 		/* fault_addr not within this DMA handle */
4783 		return (DDI_FM_UNKNOWN);
4784 	}
4785 
4786 	/* we have mutiple windows, walk through each window */
4787 	for (i = 0; i < hp->dmai_nwin; i++) {
4788 		window = &dma->dp_window[i];
4789 
4790 		/* Go through all the cookies in the window */
4791 		for (j = 0; j < window->wd_cookie_cnt; j++) {
4792 
4793 			start_addr = window->wd_first_cookie[j].dmac_laddress;
4794 			csize = window->wd_first_cookie[j].dmac_size;
4795 
4796 			/*
4797 			 * if we are trimming the first cookie in the window,
4798 			 * and this is the first cookie, adjust the start
4799 			 * address and size of the cookie to account for the
4800 			 * trim.
4801 			 */
4802 			if (window->wd_trim.tr_trim_first && (j == 0)) {
4803 				start_addr = window->wd_trim.tr_first_paddr;
4804 				csize = window->wd_trim.tr_first_size;
4805 			}
4806 
4807 			/*
4808 			 * if we are trimming the last cookie in the window,
4809 			 * and this is the last cookie, adjust the start
4810 			 * address and size of the cookie to account for the
4811 			 * trim.
4812 			 */
4813 			if (window->wd_trim.tr_trim_last &&
4814 			    (j == (window->wd_cookie_cnt - 1))) {
4815 				start_addr = window->wd_trim.tr_last_paddr;
4816 				csize = window->wd_trim.tr_last_size;
4817 			}
4818 
4819 			end_addr = start_addr + csize;
4820 
4821 			/*
4822 			 * if the faulted address is within the physical address
4823 			 * range of the cookie, return DDI_FM_NONFATAL.
4824 			 */
4825 			if ((fault_addr >= start_addr) &&
4826 			    (fault_addr <= end_addr)) {
4827 				return (DDI_FM_NONFATAL);
4828 			}
4829 		}
4830 	}
4831 
4832 	/* fault_addr not within this DMA handle */
4833 	return (DDI_FM_UNKNOWN);
4834 }
4835