xref: /illumos-gate/usr/src/uts/sun4u/io/pci/pci_dma.c (revision 3ce33fb052b375020ea4249290d33b834d9f9e75)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
27  */
28 
29 /*
30  * PCI nexus DVMA and DMA core routines:
31  *	dma_map/dma_bind_handle implementation
32  *	bypass and peer-to-peer support
33  *	fast track DVMA space allocation
34  *	runtime DVMA debug
35  */
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/async.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sunddi.h>
41 #include <sys/machsystm.h>	/* lddphys() */
42 #include <sys/ddi_impldefs.h>
43 #include <vm/hat.h>
44 #include <sys/pci/pci_obj.h>
45 
46 /*LINTLIBRARY*/
47 
48 static void
49 pci_sc_pg_inv(dev_info_t *dip, sc_t *sc_p, ddi_dma_impl_t *mp, off_t off,
50 	size_t len)
51 {
52 	dvma_addr_t dvma_addr, pg_off;
53 	volatile uint64_t *invl_va = sc_p->sc_invl_reg;
54 
55 	if (!len)
56 		len = mp->dmai_size;
57 
58 	pg_off = mp->dmai_offset;			/* start min */
59 	dvma_addr = MAX(off, pg_off);			/* lo */
60 	pg_off += mp->dmai_size;			/* end max */
61 	pg_off = MIN(off + len, pg_off);		/* hi */
62 	if (dvma_addr >= pg_off) {			/* lo >= hi ? */
63 		DEBUG4(DBG_SC, dip, "%x+%x out of window [%x,%x)\n",
64 		    off, len, mp->dmai_offset,
65 		    mp->dmai_offset + mp->dmai_size);
66 		return;
67 	}
68 
69 	len = pg_off - dvma_addr;			/* sz = hi - lo */
70 	dvma_addr += mp->dmai_mapping;			/* start addr */
71 	pg_off = dvma_addr & IOMMU_PAGE_OFFSET;		/* offset in 1st pg */
72 	len = IOMMU_BTOPR(len + pg_off);		/* # of pages */
73 	dvma_addr ^= pg_off;
74 
75 	DEBUG2(DBG_SC, dip, "addr=%x+%x pages: \n", dvma_addr, len);
76 	for (; len; len--, dvma_addr += IOMMU_PAGE_SIZE) {
77 		DEBUG1(DBG_SC|DBG_CONT, dip, " %x", dvma_addr);
78 		*invl_va = (uint64_t)dvma_addr;
79 	}
80 	DEBUG0(DBG_SC|DBG_CONT, dip, "\n");
81 }
82 
83 static void
84 pci_dma_sync_flag_wait(ddi_dma_impl_t *mp, sc_t *sc_p, uint32_t onstack)
85 {
86 	hrtime_t start_time;
87 	uint64_t loops = 0;
88 	uint64_t sync_flag_pa = SYNC_BUF_PA(mp);
89 	uint64_t sync_reg_pa = sc_p->sc_sync_reg_pa;
90 	uint8_t stack_buf[128];
91 
92 	stack_buf[0] = DDI_SUCCESS;
93 
94 	/* check for handle specific sync flag */
95 	if (sync_flag_pa)
96 		goto start;
97 
98 	sync_flag_pa = sc_p->sc_sync_flag_pa;
99 
100 	if (onstack) {
101 		sync_flag_pa = va_to_pa(stack_buf);
102 		sync_flag_pa += PCI_SYNC_FLAG_SIZE;
103 		sync_flag_pa >>= PCI_SYNC_FLAG_SZSHIFT;
104 		sync_flag_pa <<= PCI_SYNC_FLAG_SZSHIFT;
105 		goto start;
106 	}
107 	stack_buf[0] |= PCI_SYNC_FLAG_LOCKED;
108 	mutex_enter(&sc_p->sc_sync_mutex);
109 start:
110 	ASSERT(!(sync_flag_pa & PCI_SYNC_FLAG_SIZE - 1));
111 	stdphys(sync_flag_pa, 0);	/* reset sync flag to 0 */
112 					/* membar  #LoadStore|#StoreStore */
113 	stdphysio(sync_reg_pa, sync_flag_pa);
114 	start_time = gethrtime();
115 
116 	for (; gethrtime() - start_time < pci_sync_buf_timeout; loops++)
117 		if (lddphys(sync_flag_pa))
118 			goto done;
119 
120 	if (!lddphys(sync_flag_pa))
121 		stack_buf[0] |= PCI_SYNC_FLAG_FAILED;
122 done:
123 	DEBUG3(DBG_SC|DBG_CONT, 0, "flag wait loops=%lu ticks=%lu status=%x\n",
124 	    loops, gethrtime() - start_time, stack_buf[0]);
125 
126 	if (stack_buf[0] & PCI_SYNC_FLAG_LOCKED)
127 		mutex_exit(&sc_p->sc_sync_mutex);
128 
129 	if (stack_buf[0] & PCI_SYNC_FLAG_FAILED)
130 		cmn_err(CE_PANIC, "%p pci dma sync %lx %lx timeout!",
131 		    mp, sync_flag_pa, loops);
132 }
133 
134 /*
135  * Cache	RW	Before	During		After
136  *
137  * STREAMING	read	no/no	pg/no		ctx,pg/no
138  * STREAMING	write	no/no	pg/yes		ctx,pg/yes
139  * CONSISTENT	read	no/no	yes,no/no	yes,no/no
140  * CONSISTENT	write	no/no	yes,yes/yes	yes,yes/yes
141  *
142  * STREAMING	read	ctx,pg/no
143  * STREAMING	write	ctx,pg/yes
144  * CONSISTENT	read	yes,no/no
145  * CONSISTENT	write	yes,yes/yes
146  */
147 int
148 pci_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
149 	off_t off, size_t len, uint32_t sync_flag)
150 {
151 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
152 	int ret = ddi_get_instance(dip);
153 	pci_t *pci_p = get_pci_soft_state(ret);
154 	pbm_t *pbm_p = pci_p->pci_pbm_p;
155 	uint32_t dev_flag = mp->dmai_rflags;
156 	sc_t *sc_p;
157 
158 	DEBUG4(DBG_DMA_SYNC, dip, "%s%d flags=%x,%x\n", ddi_driver_name(rdip),
159 	    ddi_get_instance(rdip), dev_flag, sync_flag);
160 	DEBUG4(DBG_SC, dip, "dmai_mapping=%x, dmai_sz=%x off=%x len=%x\n",
161 	    mp->dmai_mapping, mp->dmai_size, off, len);
162 	DEBUG2(DBG_SC, dip, "mp=%p, ctx=%x\n", mp, MP2CTX(mp));
163 
164 	if (!(mp->dmai_flags & DMAI_FLAGS_INUSE)) {
165 		cmn_err(CE_WARN, "Unbound dma handle %p from %s%d", mp,
166 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
167 		return (DDI_FAILURE);
168 	}
169 
170 	if (mp->dmai_flags & DMAI_FLAGS_NOSYNC)
171 		return (DDI_SUCCESS);
172 
173 	if (!(dev_flag & DDI_DMA_CONSISTENT))
174 		goto streaming;
175 
176 	if (sync_flag & PCI_DMA_SYNC_EXT) {
177 		if (sync_flag & (PCI_DMA_SYNC_BEFORE | PCI_DMA_SYNC_POST) ||
178 		    !(sync_flag & PCI_DMA_SYNC_WRITE))
179 			return (DDI_SUCCESS);
180 	} else {
181 		if (!(dev_flag & DDI_DMA_READ) ||
182 		    ((sync_flag & PCI_DMA_SYNC_DDI_FLAGS) ==
183 		    DDI_DMA_SYNC_FORDEV))
184 			return (DDI_SUCCESS);
185 	}
186 
187 	pci_pbm_dma_sync(pbm_p, pbm_p->pbm_sync_ino);
188 	return (DDI_SUCCESS);
189 
190 streaming:
191 	ASSERT(pci_stream_buf_exists && (pci_stream_buf_enable & 1 << ret));
192 	sc_p = pci_p->pci_sc_p;
193 	ret = DDI_FAILURE;
194 
195 	if (sync_flag & PCI_DMA_SYNC_EXT)
196 		goto ext;
197 
198 	if (mp->dmai_flags & DMAI_FLAGS_CONTEXT && pci_sc_use_contexts)
199 		ret = pci_sc_ctx_inv(dip, sc_p, mp);
200 	if (ret)
201 		pci_sc_pg_inv(dip, sc_p, mp, off, len);
202 
203 	if ((dev_flag & DDI_DMA_READ) &&
204 	    ((sync_flag & PCI_DMA_SYNC_DDI_FLAGS) != DDI_DMA_SYNC_FORDEV))
205 		goto wait;
206 
207 	return (DDI_SUCCESS);
208 ext:
209 	if (sync_flag & PCI_DMA_SYNC_BEFORE)
210 		return (DDI_SUCCESS);
211 	if (sync_flag & PCI_DMA_SYNC_BAR)
212 		goto wait_check;
213 	if (sync_flag & PCI_DMA_SYNC_AFTER &&
214 	    mp->dmai_flags & DMAI_FLAGS_CONTEXT && pci_sc_use_contexts)
215 		ret = pci_sc_ctx_inv(dip, sc_p, mp);
216 	if (ret)
217 		pci_sc_pg_inv(dip, sc_p, mp, off, len);
218 wait_check:
219 	if (sync_flag & PCI_DMA_SYNC_POST || !(sync_flag & PCI_DMA_SYNC_WRITE))
220 		return (DDI_SUCCESS);
221 wait:
222 	pci_dma_sync_flag_wait(mp, sc_p, sync_flag & PCI_DMA_SYNC_PRIVATE);
223 	return (DDI_SUCCESS);
224 }
225 
226 int
227 pci_dma_handle_clean(dev_info_t *rdip, ddi_dma_handle_t h)
228 {
229 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)h;
230 	if ((mp->dmai_flags & DMAI_FLAGS_INUSE) == 0)
231 		return (DDI_FAILURE);
232 	mp->dmai_rflags |= DMP_NOSYNC;
233 	mp->dmai_flags |= DMAI_FLAGS_NOSYNC;
234 	return (DDI_SUCCESS);
235 }
236 
237 /*
238  * pci_dma_allocmp - Allocate a pci dma implementation structure
239  *
240  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
241  * to hold unmodified device limits. The ddi_dma_attr inside the
242  * ddi_dma_impl structure is augumented with system limits to enhance
243  * DVMA performance at runtime. The unaugumented device limits saved
244  * right after (accessed through the DEV_ATTR macro) is used
245  * strictly for peer-to-peer transfers which do not obey system limits.
246  *
247  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
248  */
249 ddi_dma_impl_t *
250 pci_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
251 	caddr_t arg)
252 {
253 	ddi_dma_impl_t *mp;
254 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
255 
256 	/* Caution: we don't use zalloc to enhance performance! */
257 	if ((mp = kmem_alloc(sizeof (pci_dma_hdl_t), sleep)) == 0) {
258 		DEBUG0(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
259 		if (waitfp != DDI_DMA_DONTWAIT) {
260 			DEBUG0(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
261 			ddi_set_callback(waitfp, arg, &pci_kmem_clid);
262 		}
263 		return (mp);
264 	}
265 
266 	mp->dmai_rdip = rdip;
267 	mp->dmai_flags = 0;
268 	mp->dmai_pfnlst = NULL;
269 	mp->dmai_winlst = NULL;
270 
271 	/*
272 	 * kmem_alloc debug: the following fields are not zero-ed
273 	 * mp->dmai_mapping = 0;
274 	 * mp->dmai_size = 0;
275 	 * mp->dmai_offset = 0;
276 	 * mp->dmai_minxfer = 0;
277 	 * mp->dmai_burstsizes = 0;
278 	 * mp->dmai_ndvmapages = 0;
279 	 * mp->dmai_pool/roffset = 0;
280 	 * mp->dmai_rflags = 0;
281 	 * mp->dmai_inuse/flags
282 	 * mp->dmai_nwin = 0;
283 	 * mp->dmai_winsize = 0;
284 	 * mp->dmai_nexus_private/tte = 0;
285 	 * mp->dmai_iopte/pfnlst
286 	 * mp->dmai_sbi/pfn0 = 0;
287 	 * mp->dmai_minfo/winlst/fdvma
288 	 * mp->dmai_rdip
289 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
290 	 * mp->dmai_cookie = 0;
291 	 */
292 
293 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
294 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
295 	mp->dmai_fault = 0;
296 	mp->dmai_fault_check = NULL;
297 	mp->dmai_fault_notify = NULL;
298 
299 	mp->dmai_error.err_ena = 0;
300 	mp->dmai_error.err_status = DDI_FM_OK;
301 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
302 	mp->dmai_error.err_ontrap = NULL;
303 	mp->dmai_error.err_fep = NULL;
304 	mp->dmai_error.err_cf = NULL;
305 	ndi_fmc_insert(rdip, DMA_HANDLE, mp, NULL);
306 
307 	SYNC_BUF_PA(mp) = 0ull;
308 	return (mp);
309 }
310 
311 void
312 pci_dma_freemp(ddi_dma_impl_t *mp)
313 {
314 	ndi_fmc_remove(mp->dmai_rdip, DMA_HANDLE, mp);
315 	if (mp->dmai_ndvmapages > 1)
316 		pci_dma_freepfn(mp);
317 	if (mp->dmai_winlst)
318 		pci_dma_freewin(mp);
319 	kmem_free(mp, sizeof (pci_dma_hdl_t));
320 }
321 
322 void
323 pci_dma_freepfn(ddi_dma_impl_t *mp)
324 {
325 	void *addr = mp->dmai_pfnlst;
326 	ASSERT(!PCI_DMA_CANRELOC(mp));
327 	if (addr) {
328 		size_t npages = mp->dmai_ndvmapages;
329 		if (npages > 1)
330 			kmem_free(addr, npages * sizeof (iopfn_t));
331 		mp->dmai_pfnlst = NULL;
332 	}
333 	mp->dmai_ndvmapages = 0;
334 }
335 
336 /*
337  * pci_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
338  *			and convert dmareq->dmar_limits to mp->dmai_attr
339  *
340  * ddi_dma_impl_t member modified     input
341  * ------------------------------------------------------------------------
342  * mp->dmai_minxfer		    - dev
343  * mp->dmai_burstsizes		    - dev
344  * mp->dmai_flags		    - no limit? peer-to-peer only?
345  *
346  * ddi_dma_attr member modified       input
347  * ------------------------------------------------------------------------
348  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
349  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
350  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
351  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
352  * mp->dmai_attr.dma_attr_align     - 1		(no alignment restriction)
353  *
354  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
355  */
356 ddi_dma_impl_t *
357 pci_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, iommu_t *iommu_p,
358 	ddi_dma_req_t *dmareq)
359 {
360 	ddi_dma_impl_t *mp;
361 	ddi_dma_attr_t *attr_p;
362 	uint64_t syslo		= iommu_p->iommu_dvma_base;
363 	uint64_t syshi		= iommu_p->iommu_dvma_end;
364 	uint64_t fasthi		= iommu_p->iommu_dvma_fast_end;
365 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
366 	uint32_t count_max	= lim_p->dlim_cntr_max;
367 	uint64_t lo		= lim_p->dlim_addr_lo;
368 	uint64_t hi		= lim_p->dlim_addr_hi;
369 	if (hi <= lo) {
370 		DEBUG0(DBG_DMA_MAP, dip, "Bad limits\n");
371 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
372 	}
373 	if (!count_max)
374 		count_max--;
375 
376 	if (!(mp = pci_dma_allocmp(dip, rdip, dmareq->dmar_fp,
377 	    dmareq->dmar_arg)))
378 		return (NULL);
379 
380 	/* store original dev input at the 2nd ddi_dma_attr */
381 	attr_p = DEV_ATTR(mp);
382 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
383 	SET_DMAALIGN(attr_p, 1);
384 
385 	lo = MAX(lo, syslo);
386 	hi = MIN(hi, syshi);
387 	if (hi <= lo)
388 		mp->dmai_flags |= DMAI_FLAGS_PEER_ONLY;
389 	count_max = MIN(count_max, hi - lo);
390 
391 	if (DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
392 		mp->dmai_flags |= DMAI_FLAGS_NOFASTLIMIT |
393 		    DMAI_FLAGS_NOSYSLIMIT;
394 	else {
395 		if (DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
396 			mp->dmai_flags |= DMAI_FLAGS_NOFASTLIMIT;
397 	}
398 	if (PCI_DMA_NOCTX(rdip))
399 		mp->dmai_flags |= DMAI_FLAGS_NOCTX;
400 
401 	/* store augumented dev input to mp->dmai_attr */
402 	mp->dmai_minxfer	= lim_p->dlim_minxfer;
403 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
404 	attr_p = &mp->dmai_attr;
405 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
406 	SET_DMAALIGN(attr_p, 1);
407 	return (mp);
408 }
409 
410 /*
411  * pci_dma_attr2hdl
412  *
413  * This routine is called from the alloc handle entry point to sanity check the
414  * dma attribute structure.
415  *
416  * use by: pci_dma_allochdl()
417  *
418  * return value:
419  *
420  *	DDI_SUCCESS		- on success
421  *	DDI_DMA_BADATTR		- attribute has invalid version number
422  *				  or address limits exclude dvma space
423  */
424 int
425 pci_dma_attr2hdl(pci_t *pci_p, ddi_dma_impl_t *mp)
426 {
427 	iommu_t *iommu_p = pci_p->pci_iommu_p;
428 	uint64_t syslo, syshi;
429 	ddi_dma_attr_t *attrp		= DEV_ATTR(mp);
430 	uint64_t hi		= attrp->dma_attr_addr_hi;
431 	uint64_t lo		= attrp->dma_attr_addr_lo;
432 	uint64_t align		= attrp->dma_attr_align;
433 	uint64_t nocross	= attrp->dma_attr_seg;
434 	uint64_t count_max	= attrp->dma_attr_count_max;
435 
436 	DEBUG3(DBG_DMA_ALLOCH, pci_p->pci_dip, "attrp=%p cntr_max=%x.%08x\n",
437 	    attrp, HI32(count_max), LO32(count_max));
438 	DEBUG4(DBG_DMA_ALLOCH, pci_p->pci_dip, "hi=%x.%08x lo=%x.%08x\n",
439 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
440 	DEBUG4(DBG_DMA_ALLOCH, pci_p->pci_dip, "seg=%x.%08x align=%x.%08x\n",
441 	    HI32(nocross), LO32(nocross), HI32(align), LO32(align));
442 
443 	if (!nocross)
444 		nocross--;
445 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
446 
447 		DEBUG0(DBG_DMA_ALLOCH, pci_p->pci_dip, "bypass mode\n");
448 		/* if tomatillo ver <= 2.3 don't allow bypass */
449 		if (tomatillo_disallow_bypass)
450 			return (DDI_DMA_BADATTR);
451 
452 		mp->dmai_flags |= DMAI_FLAGS_BYPASSREQ;
453 		if (nocross != UINT64_MAX)
454 			return (DDI_DMA_BADATTR);
455 		if (align && (align > IOMMU_PAGE_SIZE))
456 			return (DDI_DMA_BADATTR);
457 		align = 1; /* align on 1 page boundary */
458 		syslo = iommu_p->iommu_dma_bypass_base;
459 		syshi = iommu_p->iommu_dma_bypass_end;
460 
461 	} else { /* IOMMU_XLATE or PEER_TO_PEER */
462 		align = MAX(align, IOMMU_PAGE_SIZE) - 1;
463 		if ((align & nocross) != align) {
464 			dev_info_t *rdip = mp->dmai_rdip;
465 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
466 			    NAMEINST(rdip));
467 			return (DDI_DMA_BADATTR);
468 		}
469 		align = IOMMU_BTOP(align + 1);
470 		syslo = iommu_p->iommu_dvma_base;
471 		syshi = iommu_p->iommu_dvma_end;
472 	}
473 	if (hi <= lo) {
474 		dev_info_t *rdip = mp->dmai_rdip;
475 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
476 		return (DDI_DMA_BADATTR);
477 	}
478 	lo = MAX(lo, syslo);
479 	hi = MIN(hi, syshi);
480 	if (!count_max)
481 		count_max--;
482 
483 	DEBUG4(DBG_DMA_ALLOCH, pci_p->pci_dip, "hi=%x.%08x, lo=%x.%08x\n",
484 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
485 	if (hi <= lo) { /* peer transfers cannot have alignment & nocross */
486 		dev_info_t *rdip = mp->dmai_rdip;
487 		cmn_err(CE_WARN, "%s%d peer only dev %p", NAMEINST(rdip), mp);
488 		if ((nocross < UINT32_MAX) || (align > 1)) {
489 			cmn_err(CE_WARN, "%s%d peer only device bad attr",
490 			    NAMEINST(rdip));
491 			return (DDI_DMA_BADATTR);
492 		}
493 		mp->dmai_flags |= DMAI_FLAGS_PEER_ONLY;
494 	} else /* set practical counter_max value */
495 		count_max = MIN(count_max, hi - lo);
496 
497 	if (DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
498 		mp->dmai_flags |= DMAI_FLAGS_NOSYSLIMIT |
499 		    DMAI_FLAGS_NOFASTLIMIT;
500 	else {
501 		syshi = iommu_p->iommu_dvma_fast_end;
502 		if (DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
503 			mp->dmai_flags |= DMAI_FLAGS_NOFASTLIMIT;
504 	}
505 	if (PCI_DMA_NOCTX(mp->dmai_rdip))
506 		mp->dmai_flags |= DMAI_FLAGS_NOCTX;
507 
508 	mp->dmai_minxfer	= attrp->dma_attr_minxfer;
509 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
510 	attrp = &mp->dmai_attr;
511 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
512 	return (DDI_SUCCESS);
513 }
514 
515 /*
516  * set up consistent dma flags according to hardware capability
517  */
518 uint32_t
519 pci_dma_consist_check(uint32_t req_flags, pbm_t *pbm_p)
520 {
521 	if (!pci_stream_buf_enable || !pci_stream_buf_exists)
522 		req_flags |= DDI_DMA_CONSISTENT;
523 	if (req_flags & DDI_DMA_CONSISTENT && !pbm_p->pbm_sync_reg_pa)
524 		req_flags |= DMP_NOSYNC;
525 	return (req_flags);
526 }
527 
528 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
529 
530 /*
531  * pci_dma_type - determine which of the three types DMA (peer-to-peer,
532  *		iommu bypass, or iommu translate) we are asked to do.
533  *		Also checks pfn0 and rejects any non-peer-to-peer
534  *		requests for peer-only devices.
535  *
536  *	return values:
537  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
538  *		DDI_SUCCESS
539  *
540  *	dma handle members affected (set on exit):
541  *	mp->dmai_object		- dmareq->dmar_object
542  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
543  *	mp->dmai_flags   	- DMA type
544  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
545  *	mp->dmai_roffset 	- initialized to starting IOMMU page offset
546  *	mp->dmai_ndvmapages	- # of total IOMMU pages of entire object
547  *	mp->pdh_sync_buf_pa	- dma sync buffer PA is DMA flow is supported
548  */
549 int
550 pci_dma_type(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
551 {
552 	dev_info_t *dip = pci_p->pci_dip;
553 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
554 	pbm_t *pbm_p = pci_p->pci_pbm_p;
555 	page_t **pplist;
556 	struct as *as_p;
557 	uint32_t offset;
558 	caddr_t vaddr;
559 	pfn_t pfn0;
560 
561 	mp->dmai_rflags = pci_dma_consist_check(dmareq->dmar_flags, pbm_p);
562 	mp->dmai_flags |= mp->dmai_rflags & DMP_NOSYNC ? DMAI_FLAGS_NOSYNC : 0;
563 
564 	switch (dobj_p->dmao_type) {
565 	case DMA_OTYP_BUFVADDR:
566 	case DMA_OTYP_VADDR: {
567 		vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
568 		pplist = dobj_p->dmao_obj.virt_obj.v_priv;
569 		as_p = dobj_p->dmao_obj.virt_obj.v_as;
570 		if (as_p == NULL)
571 			as_p = &kas;
572 
573 		DEBUG2(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
574 		offset = (ulong_t)vaddr & IOMMU_PAGE_OFFSET;
575 
576 		if (pplist) {				/* shadow list */
577 			mp->dmai_flags |= DMAI_FLAGS_PGPFN;
578 			ASSERT(PAGE_LOCKED(*pplist));
579 			pfn0 = page_pptonum(*pplist);
580 		} else if (pci_dvma_remap_enabled && as_p == &kas &&
581 		    dobj_p->dmao_type != DMA_OTYP_BUFVADDR) {
582 			int (*waitfp)(caddr_t) = dmareq->dmar_fp;
583 			uint_t flags = ((waitfp == DDI_DMA_SLEEP)?
584 			    HAC_SLEEP : HAC_NOSLEEP) | HAC_PAGELOCK;
585 			int ret;
586 
587 			ret = hat_add_callback(pci_dvma_cbid, vaddr,
588 			    IOMMU_PAGE_SIZE - offset, flags, mp, &pfn0,
589 			    MP_HAT_CB_COOKIE_PTR(mp, 0));
590 
591 			if (pfn0 == PFN_INVALID && ret == ENOMEM) {
592 				ASSERT(waitfp != DDI_DMA_SLEEP);
593 				if (waitfp != DDI_DMA_DONTWAIT) {
594 					ddi_set_callback(waitfp,
595 					    dmareq->dmar_arg,
596 					    &pci_kmem_clid);
597 					return (DDI_DMA_NORESOURCES);
598 					}
599 			}
600 			mp->dmai_flags |= DMAI_FLAGS_RELOC;
601 		} else
602 			pfn0 = hat_getpfnum(as_p->a_hat, vaddr);
603 		}
604 		break;
605 
606 	case DMA_OTYP_PAGES:
607 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
608 		mp->dmai_flags |= DMAI_FLAGS_PGPFN;
609 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
610 		ASSERT(PAGE_LOCKED(dobj_p->dmao_obj.pp_obj.pp_pp));
611 		break;
612 
613 	case DMA_OTYP_PADDR:
614 	default:
615 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
616 		    NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
617 		return (DDI_DMA_NOMAPPING);
618 	}
619 	if (pfn0 == PFN_INVALID) {
620 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
621 		    NAMEINST(dip), dobj_p);
622 		return (DDI_DMA_NOMAPPING);
623 	}
624 	if (TGT_PFN_INBETWEEN(pfn0, pbm_p->pbm_base_pfn, pbm_p->pbm_last_pfn)) {
625 		mp->dmai_flags |= DMAI_FLAGS_PEER_TO_PEER;
626 		goto done;	/* leave bypass and dvma flag as 0 */
627 	}
628 	if (PCI_DMA_ISPEERONLY(mp)) {
629 		dev_info_t *rdip = mp->dmai_rdip;
630 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
631 		return (DDI_DMA_NOMAPPING);
632 	}
633 	mp->dmai_flags |= (mp->dmai_flags & DMAI_FLAGS_BYPASSREQ) ?
634 	    DMAI_FLAGS_BYPASS : DMAI_FLAGS_DVMA;
635 done:
636 	mp->dmai_object	 = *dobj_p;			/* whole object    */
637 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
638 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
639 	mp->dmai_ndvmapages = IOMMU_BTOPR(offset + mp->dmai_object.dmao_size);
640 
641 	return (DDI_SUCCESS);
642 }
643 
644 /*
645  * pci_dma_pgpfn - set up pfnlst array according to pages
646  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
647  */
648 /*ARGSUSED*/
649 static int
650 pci_dma_pgpfn(pci_t *pci_p, ddi_dma_impl_t *mp, uint_t npages)
651 {
652 	int i;
653 #ifdef DEBUG
654 	dev_info_t *dip = pci_p->pci_dip;
655 #endif
656 	switch (mp->dmai_object.dmao_type) {
657 	case DMA_OTYP_BUFVADDR:
658 	case DMA_OTYP_VADDR: {
659 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
660 		DEBUG2(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
661 		    pplist, npages);
662 		for (i = 1; i < npages; i++) {
663 			iopfn_t pfn = page_pptonum(pplist[i]);
664 			ASSERT(PAGE_LOCKED(pplist[i]));
665 			PCI_SET_MP_PFN1(mp, i, pfn);
666 			DEBUG1(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
667 		}
668 		DEBUG0(DBG_DMA_MAP|DBG_CONT, dip, "\n");
669 		}
670 		break;
671 
672 	case DMA_OTYP_PAGES: {
673 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
674 		DEBUG1(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
675 		for (i = 1; i < npages; i++, pp = pp->p_next) {
676 			iopfn_t pfn = page_pptonum(pp);
677 			ASSERT(PAGE_LOCKED(pp));
678 			PCI_SET_MP_PFN1(mp, i, pfn);
679 			DEBUG1(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
680 		}
681 		DEBUG0(DBG_DMA_MAP|DBG_CONT, dip, "\n");
682 		}
683 		break;
684 
685 	default:	/* check is already done by pci_dma_type */
686 		ASSERT(0);
687 		break;
688 	}
689 	return (DDI_SUCCESS);
690 }
691 
692 /*
693  * pci_dma_vapfn - set up pfnlst array according to VA
694  *	VA/size pair: <normal, bypass, peer-to-peer>
695  *	pfn0 is skipped as it is already done.
696  *	In this case, the cached pfn0 is used to fill pfnlst[0]
697  */
698 static int
699 pci_dma_vapfn(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
700 	uint_t npages)
701 {
702 	dev_info_t *dip = pci_p->pci_dip;
703 	int i;
704 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
705 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
706 	caddr_t sva;
707 	int needcb = 0;
708 
709 	sva = (caddr_t)(((uintptr_t)mp->dmai_object.dmao_obj.virt_obj.v_addr +
710 	    IOMMU_PAGE_SIZE) & IOMMU_PAGE_MASK);
711 
712 	if (pci_dvma_remap_enabled && hat_p == kas.a_hat &&
713 	    mp->dmai_object.dmao_type != DMA_OTYP_BUFVADDR)
714 		needcb = 1;
715 
716 	for (vaddr = sva, i = 1; i < npages; i++, vaddr += IOMMU_PAGE_SIZE) {
717 		pfn_t pfn;
718 
719 		if (needcb) {
720 			int (*waitfp)(caddr_t) = dmareq->dmar_fp;
721 			uint_t flags = ((waitfp == DDI_DMA_SLEEP)?
722 			    HAC_SLEEP : HAC_NOSLEEP) | HAC_PAGELOCK;
723 			int ret;
724 
725 			ret = hat_add_callback(pci_dvma_cbid, vaddr,
726 			    IOMMU_PAGE_SIZE, flags, mp, &pfn,
727 			    MP_HAT_CB_COOKIE_PTR(mp, i));
728 
729 			if (pfn == PFN_INVALID && ret == ENOMEM) {
730 				ASSERT(waitfp != DDI_DMA_SLEEP);
731 				if (waitfp != DDI_DMA_DONTWAIT)
732 					ddi_set_callback(waitfp,
733 					    dmareq->dmar_arg, &pci_kmem_clid);
734 				return (DDI_DMA_NORESOURCES);
735 			}
736 		} else
737 			pfn = hat_getpfnum(hat_p, vaddr);
738 		if (pfn == PFN_INVALID)
739 			goto err_badpfn;
740 		PCI_SET_MP_PFN1(mp, i, (iopfn_t)pfn);
741 		DEBUG3(DBG_DMA_MAP, dip, "pci_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
742 		    mp, i, (iopfn_t)pfn);
743 	}
744 	return (DDI_SUCCESS);
745 err_badpfn:
746 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
747 	return (DDI_DMA_NOMAPPING);
748 }
749 
750 /*
751  * pci_dma_pfn - Fills pfn list for all pages being DMA-ed.
752  *
753  * dependencies:
754  *	mp->dmai_ndvmapages	- set to total # of dma pages
755  *
756  * return value:
757  *	DDI_SUCCESS
758  *	DDI_DMA_NOMAPPING
759  */
760 int
761 pci_dma_pfn(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
762 {
763 	uint32_t npages = mp->dmai_ndvmapages;
764 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
765 	int i, ret, peer = PCI_DMA_ISPTP(mp);
766 
767 	pbm_t *pbm_p = pci_p->pci_pbm_p;
768 	iopfn_t pfn_base = pbm_p->pbm_base_pfn;
769 	iopfn_t pfn_last = pbm_p->pbm_last_pfn;
770 	iopfn_t pfn_adj = peer ? pfn_base : 0;
771 
772 	DEBUG2(DBG_DMA_MAP, pci_p->pci_dip, "pci_dma_pfn: mp=%p pfn0=%x\n",
773 	    mp, MP_PFN0(mp) - pfn_adj);
774 	/* 1 page: no array alloc/fill, no mixed mode check */
775 	if (npages == 1) {
776 		PCI_SET_MP_PFN(mp, 0, MP_PFN0(mp) - pfn_adj);
777 		return (DDI_SUCCESS);
778 	}
779 	/* allocate pfn array */
780 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (iopfn_t),
781 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
782 		if (waitfp != DDI_DMA_DONTWAIT)
783 			ddi_set_callback(waitfp, dmareq->dmar_arg,
784 			    &pci_kmem_clid);
785 		return (DDI_DMA_NORESOURCES);
786 	}
787 	/* fill pfn array */
788 	PCI_SET_MP_PFN(mp, 0, MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
789 	if ((ret = PCI_DMA_ISPGPFN(mp) ? pci_dma_pgpfn(pci_p, mp, npages) :
790 	    pci_dma_vapfn(pci_p, dmareq, mp, npages)) != DDI_SUCCESS)
791 		goto err;
792 
793 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
794 	for (i = 1; i < npages; i++) {
795 		iopfn_t pfn = PCI_GET_MP_PFN1(mp, i);
796 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
797 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
798 			    NAMEINST(mp->dmai_rdip), MP_PFN0(mp), pfn);
799 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
800 			goto err;
801 		}
802 		DEBUG3(DBG_DMA_MAP, pci_p->pci_dip,
803 		    "pci_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
804 		if (pfn_adj)
805 			PCI_SET_MP_PFN1(mp, i, pfn - pfn_adj);
806 	}
807 	return (DDI_SUCCESS);
808 err:
809 	pci_dvma_unregister_callbacks(pci_p, mp);
810 	pci_dma_freepfn(mp);
811 	return (ret);
812 }
813 
814 /*
815  * pci_dvma_win() - trim requested DVMA size down to window size
816  *	The 1st window starts from offset and ends at page-aligned boundary.
817  *	From the 2nd window on, each window starts and ends at page-aligned
818  *	boundary except the last window ends at wherever requested.
819  *
820  *	accesses the following mp-> members:
821  *	mp->dmai_attr.dma_attr_count_max
822  *	mp->dmai_attr.dma_attr_seg
823  *	mp->dmai_roffset   - start offset of 1st window
824  *	mp->dmai_rflags (redzone)
825  *	mp->dmai_ndvmapages (for 1 page fast path)
826  *
827  *	sets the following mp-> members:
828  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
829  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
830  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
831  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
832  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
833  *
834  *	fixed - not changed across different DMA windows
835  */
836 /*ARGSUSED*/
837 int
838 pci_dvma_win(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
839 {
840 	uint32_t redzone_sz	= HAS_REDZONE(mp) ? IOMMU_PAGE_SIZE : 0;
841 	size_t obj_sz	= mp->dmai_object.dmao_size;
842 	size_t xfer_sz;
843 	ulong_t pg_off;
844 
845 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
846 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
847 		mp->dmai_size = obj_sz;
848 		mp->dmai_winsize = IOMMU_PAGE_SIZE;
849 		mp->dmai_nwin = 1;
850 		goto done;
851 	}
852 
853 	pg_off	= mp->dmai_roffset;
854 	xfer_sz	= obj_sz + redzone_sz;
855 
856 	/* include redzone in nocross check */
857 	{
858 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
859 		if (xfer_sz + pg_off - 1 > nocross)
860 			xfer_sz = nocross - pg_off + 1;
861 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
862 			DEBUG5(DBG_DMA_MAP, pci_p->pci_dip,
863 			    "nocross too small %lx(%lx)+%lx+%x < %" PRIx64 "\n",
864 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
865 			return (DDI_DMA_TOOBIG);
866 		}
867 	}
868 	xfer_sz -= redzone_sz;	/* restore transfer size  */
869 	/* check counter max */
870 	{
871 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
872 		if (xfer_sz - 1 > count_max)
873 			xfer_sz = count_max + 1;
874 	}
875 	if (xfer_sz >= obj_sz) {
876 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
877 		mp->dmai_size = xfer_sz;
878 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, IOMMU_PAGE_SIZE);
879 		mp->dmai_nwin = 1;
880 		goto done;
881 	}
882 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
883 		DEBUG4(DBG_DMA_MAP, pci_p->pci_dip,
884 		    "too big: %lx+%lx+%x > %lx\n",
885 		    obj_sz, pg_off, redzone_sz, xfer_sz);
886 		return (DDI_DMA_TOOBIG);
887 	}
888 
889 	xfer_sz = IOMMU_PTOB(IOMMU_BTOP(xfer_sz + pg_off)); /* page align */
890 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
891 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
892 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
893 done:
894 	mp->dmai_winlst = NULL;
895 	dump_dma_handle(DBG_DMA_MAP, pci_p->pci_dip, mp);
896 	return (DDI_SUCCESS);
897 }
898 
899 /*
900  * fast track cache entry to iommu context, inserts 3 0 bits between
901  * upper 6-bits and lower 3-bits of the 9-bit cache entry
902  */
903 #define	IOMMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
904 
905 /*
906  * pci_dvma_map_fast - attempts to map fast trackable DVMA
907  */
908 int
909 pci_dvma_map_fast(iommu_t *iommu_p, ddi_dma_impl_t *mp)
910 {
911 	uint_t clustsz = pci_dvma_page_cache_clustsz;
912 	uint_t entries = pci_dvma_page_cache_entries;
913 	uint64_t *tte_addr;
914 	uint64_t tte = GET_TTE_TEMPLATE(mp);
915 	int i = iommu_p->iommu_dvma_addr_scan_start;
916 	uint8_t *lock_addr = iommu_p->iommu_dvma_cache_locks + i;
917 	iopfn_t *pfn_addr;
918 	dvma_addr_t dvma_pg;
919 	size_t npages = IOMMU_BTOP(mp->dmai_winsize);
920 #ifdef DEBUG
921 	dev_info_t *dip = mp->dmai_rdip;
922 #endif
923 	extern uint8_t ldstub(uint8_t *);
924 	ASSERT(IOMMU_PTOB(npages) == mp->dmai_winsize);
925 	ASSERT(npages + HAS_REDZONE(mp) <= clustsz);
926 
927 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
928 		;
929 	if (i >= entries) {
930 		lock_addr = iommu_p->iommu_dvma_cache_locks;
931 		i = 0;
932 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
933 			;
934 		if (i >= entries) {
935 #ifdef PCI_DMA_PROF
936 			pci_dvmaft_exhaust++;
937 #endif
938 			return (DDI_DMA_NORESOURCES);
939 		}
940 	}
941 	iommu_p->iommu_dvma_addr_scan_start = (i + 1) & (entries - 1);
942 	if (PCI_DMA_USECTX(mp)) {
943 		dvma_context_t ctx = IOMMU_FCE_TO_CTX(i);
944 		tte |= IOMMU_CTX2TTE(ctx);
945 		mp->dmai_flags |= DMAI_FLAGS_CONTEXT;
946 		DEBUG1(DBG_DMA_MAP, dip, "fast: ctx=0x%x\n", ctx);
947 	}
948 	i *= clustsz;
949 	tte_addr = iommu_p->iommu_tsb_vaddr + i;
950 	dvma_pg = iommu_p->dvma_base_pg + i;
951 #ifdef DEBUG
952 	for (i = 0; i < clustsz; i++)
953 		ASSERT(TTE_IS_INVALID(tte_addr[i]));
954 #endif
955 	*tte_addr = tte | IOMMU_PTOB(MP_PFN0(mp)); /* map page 0 */
956 	DEBUG5(DBG_DMA_MAP, dip, "fast %p:dvma_pg=%x tte0(%p)=%08x.%08x\n", mp,
957 	    dvma_pg, tte_addr, HI32(*tte_addr), LO32(*tte_addr));
958 	if (npages == 1)
959 		goto tte_done;
960 	pfn_addr = PCI_GET_MP_PFN1_ADDR(mp); /* short iommu_map_pages() */
961 	for (tte_addr++, i = 1; i < npages; i++, tte_addr++, pfn_addr++) {
962 		*tte_addr = tte | IOMMU_PTOB(*pfn_addr);
963 		DEBUG5(DBG_DMA_MAP, dip, "fast %p:tte(%p, %p)=%08x.%08x\n", mp,
964 		    tte_addr, pfn_addr, HI32(*tte_addr), LO32(*tte_addr));
965 	}
966 tte_done:
967 #ifdef PCI_DMA_PROF
968 	pci_dvmaft_success++;
969 #endif
970 	mp->dmai_mapping = mp->dmai_roffset | IOMMU_PTOB(dvma_pg);
971 	mp->dmai_offset = 0;
972 	mp->dmai_flags |= DMAI_FLAGS_FASTTRACK;
973 	PCI_SAVE_MP_TTE(mp, tte);	/* save TTE template for unmapping */
974 	if (DVMA_DBG_ON(iommu_p))
975 		pci_dvma_alloc_debug(iommu_p, (char *)mp->dmai_mapping,
976 		    mp->dmai_size, mp);
977 	return (DDI_SUCCESS);
978 }
979 
980 /*
981  * pci_dvma_map: map non-fasttrack DMA
982  *		Use quantum cache if single page DMA.
983  */
984 int
985 pci_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, iommu_t *iommu_p)
986 {
987 	uint_t npages = PCI_DMA_WINNPGS(mp);
988 	dvma_addr_t dvma_pg, dvma_pg_index;
989 	void *dvma_addr;
990 	uint64_t tte = GET_TTE_TEMPLATE(mp);
991 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
992 #ifdef DEBUG
993 	dev_info_t *dip = mp->dmai_rdip;
994 #endif
995 	/*
996 	 * allocate dvma space resource and map in the first window.
997 	 * (vmem_t *vmp, size_t size,
998 	 *	size_t align, size_t phase, size_t nocross,
999 	 *	void *minaddr, void *maxaddr, int vmflag)
1000 	 */
1001 	if ((npages == 1) && !HAS_REDZONE(mp) && HAS_NOSYSLIMIT(mp)) {
1002 		dvma_addr = vmem_alloc(iommu_p->iommu_dvma_map,
1003 		    IOMMU_PAGE_SIZE, sleep);
1004 		mp->dmai_flags |= DMAI_FLAGS_VMEMCACHE;
1005 #ifdef PCI_DMA_PROF
1006 		pci_dvma_vmem_alloc++;
1007 #endif
1008 	} else {
1009 		dvma_addr = vmem_xalloc(iommu_p->iommu_dvma_map,
1010 		    IOMMU_PTOB(npages + HAS_REDZONE(mp)),
1011 		    MAX(mp->dmai_attr.dma_attr_align, IOMMU_PAGE_SIZE),
1012 		    0,
1013 		    mp->dmai_attr.dma_attr_seg + 1,
1014 		    (void *)mp->dmai_attr.dma_attr_addr_lo,
1015 		    (void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
1016 		    sleep);
1017 #ifdef PCI_DMA_PROF
1018 		pci_dvma_vmem_xalloc++;
1019 #endif
1020 	}
1021 	dvma_pg = IOMMU_BTOP((ulong_t)dvma_addr);
1022 	dvma_pg_index = dvma_pg - iommu_p->dvma_base_pg;
1023 	DEBUG2(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
1024 	    dvma_pg, dvma_pg_index);
1025 	if (dvma_pg == 0)
1026 		goto noresource;
1027 
1028 	/* allocate DVMA context */
1029 	if ((npages >= pci_context_minpages) && PCI_DMA_USECTX(mp)) {
1030 		dvma_context_t ctx;
1031 		if (ctx = pci_iommu_get_dvma_context(iommu_p, dvma_pg_index)) {
1032 			tte |= IOMMU_CTX2TTE(ctx);
1033 			mp->dmai_flags |= DMAI_FLAGS_CONTEXT;
1034 		}
1035 	}
1036 	mp->dmai_mapping = mp->dmai_roffset | IOMMU_PTOB(dvma_pg);
1037 	mp->dmai_offset = 0;
1038 	PCI_SAVE_MP_TTE(mp, tte);	/* mp->dmai_tte = tte */
1039 	iommu_map_pages(iommu_p, mp, dvma_pg, npages, 0);
1040 	return (DDI_SUCCESS);
1041 noresource:
1042 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
1043 		DEBUG0(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
1044 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
1045 		    &iommu_p->iommu_dvma_clid);
1046 	}
1047 	DEBUG0(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
1048 	return (DDI_DMA_NORESOURCES);
1049 }
1050 
1051 void
1052 pci_dvma_unmap(iommu_t *iommu_p, ddi_dma_impl_t *mp)
1053 {
1054 	size_t npages;
1055 	dvma_addr_t dvma_addr = (dvma_addr_t)mp->dmai_mapping;
1056 	dvma_addr_t dvma_pg = IOMMU_BTOP(dvma_addr);
1057 	dvma_addr = IOMMU_PTOB(dvma_pg);
1058 
1059 	if (mp->dmai_flags & DMAI_FLAGS_FASTTRACK) {
1060 		iopfn_t index = dvma_pg - iommu_p->dvma_base_pg;
1061 		ASSERT(index % pci_dvma_page_cache_clustsz == 0);
1062 		index /= pci_dvma_page_cache_clustsz;
1063 		ASSERT(index < pci_dvma_page_cache_entries);
1064 		iommu_p->iommu_dvma_cache_locks[index] = 0;
1065 #ifdef PCI_DMA_PROF
1066 		pci_dvmaft_free++;
1067 #endif
1068 		return;
1069 	}
1070 	npages = IOMMU_BTOP(mp->dmai_winsize) + HAS_REDZONE(mp);
1071 	pci_vmem_free(iommu_p, mp, (void *)dvma_addr, npages);
1072 
1073 	if (mp->dmai_flags & DMAI_FLAGS_CONTEXT)
1074 		pci_iommu_free_dvma_context(iommu_p, MP2CTX(mp));
1075 }
1076 
1077 void
1078 pci_dma_sync_unmap(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp)
1079 {
1080 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
1081 	iommu_t *iommu_p = pci_p->pci_iommu_p;
1082 	uint64_t sync_buf_save = SYNC_BUF_PA(mp);
1083 	uint32_t fast_track = mp->dmai_flags & DMAI_FLAGS_FASTTRACK;
1084 
1085 	if (fast_track) {
1086 		dvma_addr_t dvma_pg = IOMMU_BTOP(mp->dmai_mapping);
1087 
1088 		SYNC_BUF_PA(mp) = IOMMU_PAGE_TTEPA(iommu_p, dvma_pg);
1089 		ASSERT(!(SYNC_BUF_PA(mp) & PCI_SYNC_FLAG_SIZE - 1));
1090 	}
1091 
1092 	if (pci_dvma_sync_before_unmap) {
1093 		pci_dma_sync(dip, rdip, (ddi_dma_handle_t)mp, 0, 0,
1094 		    DDI_DMA_SYNC_FORCPU);
1095 		iommu_unmap_window(iommu_p, mp);
1096 	} else {
1097 		iommu_unmap_window(iommu_p, mp);
1098 		pci_dma_sync(dip, rdip, (ddi_dma_handle_t)mp, 0, 0,
1099 		    DDI_DMA_SYNC_FORCPU);
1100 	}
1101 
1102 	if (fast_track)
1103 		SYNC_BUF_PA(mp) = sync_buf_save;
1104 }
1105 
1106 /*
1107  * DVMA mappings may have multiple windows, but each window always have
1108  * one segment.
1109  */
1110 int
1111 pci_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1112 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1113 	uint_t cache_flags)
1114 {
1115 	switch (cmd) {
1116 
1117 	case DDI_DMA_REMAP:
1118 		if (pci_dvma_remap_enabled)
1119 			return (pci_dvma_remap(dip, rdip, mp, *offp, *lenp));
1120 		return (DDI_FAILURE);
1121 
1122 	default:
1123 		DEBUG3(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1124 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1125 		break;
1126 	}
1127 	return (DDI_FAILURE);
1128 }
1129 
1130 void
1131 pci_dma_freewin(ddi_dma_impl_t *mp)
1132 {
1133 	pci_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
1134 	for (win2_p = win_p; win_p; win2_p = win_p) {
1135 		win_p = win2_p->win_next;
1136 		kmem_free(win2_p, sizeof (pci_dma_win_t) +
1137 		    sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
1138 	}
1139 	mp->dmai_nwin = 0;
1140 	mp->dmai_winlst = NULL;
1141 }
1142 
1143 /*
1144  * pci_dma_newwin - create a dma window object and cookies
1145  *
1146  *	After the initial scan in pci_dma_physwin(), which identifies
1147  *	a portion of the pfn array that belongs to a dma window,
1148  *	we are called to allocate and initialize representing memory
1149  *	resources. We know from the 1st scan the number of cookies
1150  *	or dma segment in this window so we can allocate a contiguous
1151  *	memory array for the dma cookies (The implementation of
1152  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1153  *
1154  *	A second round scan is done on the pfn array to identify
1155  *	each dma segment and initialize its corresponding dma cookie.
1156  *	We don't need to do all the safety checking and we know they
1157  *	all belong to the same dma window.
1158  *
1159  *	Input:	cookie_no - # of cookies identified by the 1st scan
1160  *		start_idx - subscript of the pfn array for the starting pfn
1161  *		end_idx   - subscript of the last pfn in dma window
1162  *		win_pp    - pointer to win_next member of previous window
1163  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1164  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1165  *	Note:	Each cookie and window size are all initialized on page
1166  *		boundary. This is not true for the 1st cookie of the 1st
1167  *		window and the last cookie of the last window.
1168  *		We fix that later in upper layer which has access to size
1169  *		and offset info.
1170  *
1171  */
1172 static int
1173 pci_dma_newwin(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, uint32_t cookie_no,
1174 	uint32_t start_idx, uint32_t end_idx, pci_dma_win_t **win_pp,
1175 	uint64_t count_max, uint64_t bypass_prefix)
1176 {
1177 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1178 	ddi_dma_cookie_t *cookie_p;
1179 	uint32_t pfn_no = 1;
1180 	iopfn_t pfn = PCI_GET_MP_PFN(mp, start_idx);
1181 	iopfn_t prev_pfn = pfn;
1182 	uint64_t seg_pfn0 = pfn;
1183 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1184 	pci_dma_win_t *win_p = kmem_alloc(sizeof (pci_dma_win_t) + sz,
1185 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1186 	if (!win_p)
1187 		goto noresource;
1188 
1189 	win_p->win_next = NULL;
1190 	win_p->win_ncookies = cookie_no;
1191 	win_p->win_curseg = 0;	/* start from segment 0 */
1192 	win_p->win_size = IOMMU_PTOB(end_idx - start_idx + 1);
1193 	/* win_p->win_offset is left uninitialized */
1194 
1195 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1196 	start_idx++;
1197 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1198 		pfn = PCI_GET_MP_PFN1(mp, start_idx);
1199 		if ((pfn == prev_pfn + 1) &&
1200 		    (IOMMU_PTOB(pfn_no + 1) - 1 <= count_max))
1201 			continue;
1202 
1203 		/* close up the cookie up to (including) prev_pfn */
1204 		MAKE_DMA_COOKIE(cookie_p, IOMMU_PTOB(seg_pfn0) | bypass_prefix,
1205 		    IOMMU_PTOB(pfn_no));
1206 		DEBUG2(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1207 		    IOMMU_PTOB(seg_pfn0) | bypass_prefix, pfn_no);
1208 
1209 		cookie_p++;	/* advance to next available cookie cell */
1210 		pfn_no = 0;
1211 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1212 	}
1213 	MAKE_DMA_COOKIE(cookie_p, IOMMU_PTOB(seg_pfn0) | bypass_prefix,
1214 	    IOMMU_PTOB(pfn_no));
1215 	DEBUG3(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1216 	    IOMMU_PTOB(seg_pfn0) | bypass_prefix, pfn_no, cookie_no);
1217 #ifdef DEBUG
1218 	cookie_p++;
1219 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1220 #endif
1221 	*win_pp = win_p;
1222 	return (DDI_SUCCESS);
1223 noresource:
1224 	if (waitfp != DDI_DMA_DONTWAIT)
1225 		ddi_set_callback(waitfp, dmareq->dmar_arg, &pci_kmem_clid);
1226 	return (DDI_DMA_NORESOURCES);
1227 }
1228 
1229 /*
1230  * pci_dma_adjust - adjust 1st and last cookie and window sizes
1231  *	remove initial dma page offset from 1st cookie and window size
1232  *	remove last dma page remainder from last cookie and window size
1233  *	fill win_offset of each dma window according to just fixed up
1234  *		each window sizes
1235  *	pci_dma_win_t members modified:
1236  *	win_p->win_offset - this window's offset within entire DMA object
1237  *	win_p->win_size	  - xferrable size (in bytes) for this window
1238  *
1239  *	ddi_dma_impl_t members modified:
1240  *	mp->dmai_size	  - 1st window xferrable size
1241  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1242  *
1243  *	ddi_dma_cookie_t members modified:
1244  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1245  *	cookie_p->dmac_laddress - 1st cookie add page offset
1246  */
1247 static void
1248 pci_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, pci_dma_win_t *win_p)
1249 {
1250 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1251 	size_t pg_offset = mp->dmai_roffset;
1252 	size_t win_offset = 0;
1253 
1254 	cookie_p->dmac_size -= pg_offset;
1255 	cookie_p->dmac_laddress |= pg_offset;
1256 	win_p->win_size -= pg_offset;
1257 	DEBUG1(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1258 
1259 	mp->dmai_size = win_p->win_size;
1260 	mp->dmai_offset = 0;
1261 
1262 	pg_offset += mp->dmai_object.dmao_size;
1263 	pg_offset &= IOMMU_PAGE_OFFSET;
1264 	if (pg_offset)
1265 		pg_offset = IOMMU_PAGE_SIZE - pg_offset;
1266 	DEBUG1(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1267 
1268 	for (; win_p->win_next; win_p = win_p->win_next) {
1269 		DEBUG1(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1270 		win_p->win_offset = win_offset;
1271 		win_offset += win_p->win_size;
1272 	}
1273 	/* last window */
1274 	win_p->win_offset = win_offset;
1275 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1276 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1277 	win_p->win_size -= pg_offset;
1278 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1279 }
1280 
1281 /*
1282  * pci_dma_physwin() - carve up dma windows using physical addresses.
1283  *	Called to handle iommu bypass and pci peer-to-peer transfers.
1284  *	Calls pci_dma_newwin() to allocate window objects.
1285  *
1286  * Dependency: mp->dmai_pfnlst points to an array of pfns
1287  *
1288  * 1. Each dma window is represented by a pci_dma_win_t object.
1289  *	The object will be casted to ddi_dma_win_t and returned
1290  *	to leaf driver through the DDI interface.
1291  * 2. Each dma window can have several dma segments with each
1292  *	segment representing a physically contiguous either memory
1293  *	space (if we are doing an iommu bypass transfer) or pci address
1294  *	space (if we are doing a peer-to-peer transfer).
1295  * 3. Each segment has a DMA cookie to program the DMA engine.
1296  *	The cookies within each DMA window must be located in a
1297  *	contiguous array per ddi_dma_nextcookie(9f).
1298  * 4. The number of DMA segments within each DMA window cannot exceed
1299  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1300  *	too large to fit in the sgllen, the rest needs to be
1301  *	relocated to the next dma window.
1302  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1303  *	and nocross restrictions while bypass DMA follows the set of
1304  *	restrictions with system limits factored in.
1305  *
1306  * Return:
1307  *	mp->dmai_winlst	 - points to a link list of pci_dma_win_t objects.
1308  *		Each pci_dma_win_t object on the link list contains
1309  *		infomation such as its window size (# of pages),
1310  *		starting offset (also see Restriction), an array of
1311  *		DMA cookies, and # of cookies in the array.
1312  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1313  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1314  *	mp->dmai_mapping - starting cookie address
1315  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1316  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1317  *
1318  * Restriction:
1319  *	Each pci_dma_win_t object can theoratically start from any offset
1320  *	since the iommu is not involved. However, this implementation
1321  *	always make windows start from page aligned offset (except
1322  *	the 1st window, which follows the requested offset) due to the
1323  *	fact that we are handed a pfn list. This does require device's
1324  *	count_max and attr_seg to be at least IOMMU_PAGE_SIZE aligned.
1325  */
1326 int
1327 pci_dma_physwin(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1328 {
1329 	uint_t npages = mp->dmai_ndvmapages;
1330 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1331 	iopfn_t pfn_lo, pfn_hi, prev_pfn, bypass_pfn;
1332 	iopfn_t pfn = PCI_GET_MP_PFN(mp, 0);
1333 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1334 	uint64_t count_max, bypass = PCI_DMA_BYPASS_PREFIX(mp, pfn);
1335 	pci_dma_win_t **win_pp = (pci_dma_win_t **)&mp->dmai_winlst;
1336 	ddi_dma_cookie_t *cookie0_p;
1337 
1338 	if (PCI_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1339 		ddi_dma_attr_t *dev_attr_p = DEV_ATTR(mp);
1340 		iopfn_t pfn_base = pci_p->pci_pbm_p->pbm_base_pfn;
1341 		iopfn_t pfn_last = pci_p->pci_pbm_p->pbm_last_pfn - pfn_base;
1342 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1343 		if (nocross && (nocross < UINT32_MAX))
1344 			return (DDI_DMA_NOMAPPING);
1345 		if (dev_attr_p->dma_attr_align > IOMMU_PAGE_SIZE)
1346 			return (DDI_DMA_NOMAPPING);
1347 		pfn_lo = IOMMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1348 		pfn_hi = IOMMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1349 		pfn_hi = MIN(pfn_hi, pfn_last);
1350 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1351 			return (DDI_DMA_NOMAPPING);
1352 		count_max = dev_attr_p->dma_attr_count_max;
1353 		count_max = MIN(count_max, nocross);
1354 		/*
1355 		 * the following count_max trim is not done because we are
1356 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1357 		 * count_max=MIN(count_max, IOMMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1358 		 */
1359 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1360 		count_max = mp->dmai_attr.dma_attr_count_max;
1361 		pfn_lo = IOMMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1362 		pfn_hi = IOMMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1363 	}
1364 
1365 	bypass_pfn = IOMMU_BTOP(bypass);
1366 
1367 	for (prev_pfn = (bypass_pfn | pfn), i = 1; i < npages;
1368 	    i++, prev_pfn = pfn, pfn_no++) {
1369 		pfn = bypass_pfn | PCI_GET_MP_PFN1(mp, i);
1370 		if ((pfn == prev_pfn + 1) &&
1371 		    (IOMMU_PTOB(pfn_no + 1) - 1 <= count_max))
1372 			continue;
1373 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1374 			ret = DDI_DMA_NOMAPPING;
1375 			goto err;
1376 		}
1377 		cookie_no++;
1378 		pfn_no = 0;
1379 		if (cookie_no < sgllen)
1380 			continue;
1381 
1382 		DEBUG3(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1383 		    win_pfn0_index, i - 1, cookie_no);
1384 		if (ret = pci_dma_newwin(dmareq, mp, cookie_no,
1385 		    win_pfn0_index, i - 1, win_pp, count_max, bypass))
1386 			goto err;
1387 
1388 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1389 		win_no++;
1390 		win_pfn0_index = i;
1391 		cookie_no = 0;
1392 	}
1393 	if (pfn > pfn_hi) {
1394 		ret = DDI_DMA_NOMAPPING;
1395 		goto err;
1396 	}
1397 	cookie_no++;
1398 	DEBUG3(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1399 	    win_pfn0_index, i - 1, cookie_no);
1400 	if (ret = pci_dma_newwin(dmareq, mp, cookie_no, win_pfn0_index,
1401 	    i - 1, win_pp, count_max, bypass))
1402 		goto err;
1403 	win_no++;
1404 	pci_dma_adjust(dmareq, mp, mp->dmai_winlst);
1405 	mp->dmai_nwin = win_no;
1406 	mp->dmai_rflags |= DDI_DMA_CONSISTENT;
1407 	if (!pci_p->pci_pbm_p->pbm_sync_reg_pa) {
1408 		mp->dmai_rflags |= DMP_NOSYNC;
1409 		mp->dmai_flags |= DMAI_FLAGS_NOSYNC;
1410 	}
1411 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1412 	cookie0_p = (ddi_dma_cookie_t *)(WINLST(mp) + 1);
1413 	mp->dmai_cookie = WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1414 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1415 
1416 	pci_dma_freepfn(mp);
1417 	return (DDI_DMA_MAPPED);
1418 err:
1419 	pci_dma_freewin(mp);
1420 	return (ret);
1421 }
1422 
1423 /*ARGSUSED*/
1424 int
1425 pci_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1426 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1427 	uint_t cache_flags)
1428 {
1429 	switch (cmd) {
1430 
1431 	case DDI_DMA_HTOC: {
1432 		off_t off = *offp;
1433 		ddi_dma_cookie_t *loop_cp, *cp;
1434 		pci_dma_win_t *win_p = mp->dmai_winlst;
1435 
1436 		if (off >= mp->dmai_object.dmao_size)
1437 			return (DDI_FAILURE);
1438 
1439 		/* locate window */
1440 		while (win_p->win_offset + win_p->win_size <= off)
1441 			win_p = win_p->win_next;
1442 
1443 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1444 		mp->dmai_offset = win_p->win_offset;
1445 		mp->dmai_size   = win_p->win_size;
1446 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1447 
1448 		/* adjust cookie addr/len if we are not on cookie boundary */
1449 		off -= win_p->win_offset;	   /* offset within window */
1450 		for (; off >= loop_cp->dmac_size; loop_cp++)
1451 			off -= loop_cp->dmac_size; /* offset within cookie */
1452 
1453 		mp->dmai_cookie = loop_cp + 1;
1454 		win_p->win_curseg = loop_cp - cp;
1455 		cp = (ddi_dma_cookie_t *)objp;
1456 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1457 		    loop_cp->dmac_size - off);
1458 
1459 		DEBUG2(DBG_DMA_CTL, dip,
1460 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1461 		    cp->dmac_laddress, cp->dmac_size);
1462 		}
1463 		return (DDI_SUCCESS);
1464 
1465 	case DDI_DMA_COFF: {
1466 		pci_dma_win_t *win_p;
1467 		ddi_dma_cookie_t *cp;
1468 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1469 		size_t win_off;
1470 
1471 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1472 			int i;
1473 			win_off = 0;
1474 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1475 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1476 				size_t sz = cp->dmac_size;
1477 
1478 				addr = cp->dmac_laddress;
1479 				if ((addr <= key) && (addr + sz >= key))
1480 					goto found;
1481 				win_off += sz;
1482 			}
1483 		}
1484 		return (DDI_FAILURE);
1485 found:
1486 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1487 		return (DDI_SUCCESS);
1488 		}
1489 
1490 	case DDI_DMA_REMAP:
1491 		return (DDI_FAILURE);
1492 
1493 	default:
1494 		DEBUG3(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1495 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1496 		break;
1497 	}
1498 	return (DDI_FAILURE);
1499 }
1500 
1501 static void
1502 pci_dvma_debug_init(iommu_t *iommu_p)
1503 {
1504 	size_t sz = sizeof (struct dvma_rec) * pci_dvma_debug_rec;
1505 	ASSERT(MUTEX_HELD(&iommu_p->dvma_debug_lock));
1506 	cmn_err(CE_NOTE, "PCI DVMA %p stat ON", iommu_p);
1507 
1508 	iommu_p->dvma_alloc_rec = kmem_zalloc(sz, KM_SLEEP);
1509 	iommu_p->dvma_free_rec = kmem_zalloc(sz, KM_SLEEP);
1510 
1511 	iommu_p->dvma_active_list = NULL;
1512 	iommu_p->dvma_alloc_rec_index = 0;
1513 	iommu_p->dvma_free_rec_index = 0;
1514 	iommu_p->dvma_active_count = 0;
1515 }
1516 
1517 void
1518 pci_dvma_debug_fini(iommu_t *iommu_p)
1519 {
1520 	struct dvma_rec *prev, *ptr;
1521 	size_t sz = sizeof (struct dvma_rec) * pci_dvma_debug_rec;
1522 	uint64_t mask = ~(1ull << iommu_p->iommu_inst);
1523 	cmn_err(CE_NOTE, "PCI DVMA %p stat OFF", iommu_p);
1524 
1525 	kmem_free(iommu_p->dvma_alloc_rec, sz);
1526 	kmem_free(iommu_p->dvma_free_rec, sz);
1527 	iommu_p->dvma_alloc_rec = iommu_p->dvma_free_rec = NULL;
1528 
1529 	prev = iommu_p->dvma_active_list;
1530 	if (!prev)
1531 		return;
1532 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1533 		kmem_free(prev, sizeof (struct dvma_rec));
1534 	kmem_free(prev, sizeof (struct dvma_rec));
1535 
1536 	iommu_p->dvma_active_list = NULL;
1537 	iommu_p->dvma_alloc_rec_index = 0;
1538 	iommu_p->dvma_free_rec_index = 0;
1539 	iommu_p->dvma_active_count = 0;
1540 
1541 	pci_dvma_debug_on  &= mask;
1542 	pci_dvma_debug_off &= mask;
1543 }
1544 
1545 void
1546 pci_dvma_alloc_debug(iommu_t *iommu_p, char *address, uint_t len,
1547 	ddi_dma_impl_t *mp)
1548 {
1549 	struct dvma_rec *ptr;
1550 	mutex_enter(&iommu_p->dvma_debug_lock);
1551 
1552 	if (!iommu_p->dvma_alloc_rec)
1553 		pci_dvma_debug_init(iommu_p);
1554 	if (DVMA_DBG_OFF(iommu_p)) {
1555 		pci_dvma_debug_fini(iommu_p);
1556 		goto done;
1557 	}
1558 
1559 	ptr = &iommu_p->dvma_alloc_rec[iommu_p->dvma_alloc_rec_index];
1560 	ptr->dvma_addr = address;
1561 	ptr->len = len;
1562 	ptr->mp = mp;
1563 	if (++iommu_p->dvma_alloc_rec_index == pci_dvma_debug_rec)
1564 		iommu_p->dvma_alloc_rec_index = 0;
1565 
1566 	ptr = kmem_alloc(sizeof (struct dvma_rec), KM_SLEEP);
1567 	ptr->dvma_addr = address;
1568 	ptr->len = len;
1569 	ptr->mp = mp;
1570 
1571 	ptr->next = iommu_p->dvma_active_list;
1572 	iommu_p->dvma_active_list = ptr;
1573 	iommu_p->dvma_active_count++;
1574 done:
1575 	mutex_exit(&iommu_p->dvma_debug_lock);
1576 }
1577 
1578 void
1579 pci_dvma_free_debug(iommu_t *iommu_p, char *address, uint_t len,
1580 	ddi_dma_impl_t *mp)
1581 {
1582 	struct dvma_rec *ptr, *ptr_save;
1583 	mutex_enter(&iommu_p->dvma_debug_lock);
1584 
1585 	if (!iommu_p->dvma_alloc_rec)
1586 		pci_dvma_debug_init(iommu_p);
1587 	if (DVMA_DBG_OFF(iommu_p)) {
1588 		pci_dvma_debug_fini(iommu_p);
1589 		goto done;
1590 	}
1591 
1592 	ptr = &iommu_p->dvma_free_rec[iommu_p->dvma_free_rec_index];
1593 	ptr->dvma_addr = address;
1594 	ptr->len = len;
1595 	ptr->mp = mp;
1596 	if (++iommu_p->dvma_free_rec_index == pci_dvma_debug_rec)
1597 		iommu_p->dvma_free_rec_index = 0;
1598 
1599 	ptr_save = iommu_p->dvma_active_list;
1600 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1601 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1602 			break;
1603 		ptr_save = ptr;
1604 	}
1605 	if (!ptr) {
1606 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1607 		    (long)address, len);
1608 		goto done;
1609 	}
1610 	if (ptr == iommu_p->dvma_active_list)
1611 		iommu_p->dvma_active_list = ptr->next;
1612 	else
1613 		ptr_save->next = ptr->next;
1614 	kmem_free(ptr, sizeof (struct dvma_rec));
1615 	iommu_p->dvma_active_count--;
1616 done:
1617 	mutex_exit(&iommu_p->dvma_debug_lock);
1618 }
1619 
1620 #ifdef DEBUG
1621 void
1622 dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1623 {
1624 	DEBUG4(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1625 	    hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1626 	DEBUG4(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1627 	    hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1628 	    hp->dmai_nwin);
1629 	DEBUG4(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1630 	    hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1631 	DEBUG4(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1632 	    hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1633 	    hp->dmai_cookie);
1634 }
1635 #endif
1636 
1637 void
1638 pci_vmem_do_free(iommu_t *iommu_p, void *base_addr, size_t npages,
1639     int vmemcache)
1640 {
1641 	vmem_t *map_p = iommu_p->iommu_dvma_map;
1642 
1643 	if (vmemcache) {
1644 		vmem_free(map_p, base_addr, IOMMU_PAGE_SIZE);
1645 #ifdef PCI_DMA_PROF
1646 		pci_dvma_vmem_free++;
1647 #endif
1648 		return;
1649 	}
1650 
1651 	vmem_xfree(map_p, base_addr, IOMMU_PTOB(npages));
1652 #ifdef PCI_DMA_PROF
1653 		pci_dvma_vmem_xfree++;
1654 #endif
1655 }
1656