xref: /titanic_50/usr/src/uts/sun4u/io/pci/pci_dma.c (revision 8c74a1f9477c04aa8539a84a49aa2bf629c7a14d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PCI nexus DVMA and DMA core routines:
30  *	dma_map/dma_bind_handle implementation
31  *	bypass and peer-to-peer support
32  *	fast track DVMA space allocation
33  *	runtime DVMA debug
34  */
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/sysmacros.h>
39 #include <sys/sunddi.h>
40 #include <sys/machsystm.h>	/* lddphys() */
41 #include <sys/ddi_impldefs.h>
42 #include <vm/hat.h>
43 #include <sys/pci/pci_obj.h>
44 
45 /*LINTLIBRARY*/
46 
47 static void
48 pci_sc_pg_inv(dev_info_t *dip, sc_t *sc_p, ddi_dma_impl_t *mp, off_t off,
49 	size_t len)
50 {
51 	dvma_addr_t dvma_addr, pg_off;
52 	volatile uint64_t *invl_va = sc_p->sc_invl_reg;
53 
54 	if (!len)
55 		len = mp->dmai_size;
56 
57 	pg_off = mp->dmai_offset;			/* start min */
58 	dvma_addr = MAX(off, pg_off);			/* lo */
59 	pg_off += mp->dmai_size;			/* end max */
60 	pg_off = MIN(off + len, pg_off);		/* hi */
61 	if (dvma_addr >= pg_off) {			/* lo >= hi ? */
62 		DEBUG4(DBG_SC, dip, "%x+%x out of window [%x,%x)\n",
63 			off, len, mp->dmai_offset,
64 			mp->dmai_offset + mp->dmai_size);
65 		return;
66 	}
67 
68 	len = pg_off - dvma_addr;			/* sz = hi - lo */
69 	dvma_addr += mp->dmai_mapping;			/* start addr */
70 	pg_off = dvma_addr & IOMMU_PAGE_OFFSET;		/* offset in 1st pg */
71 	len = IOMMU_BTOPR(len + pg_off);		/* # of pages */
72 	dvma_addr ^= pg_off;
73 
74 	DEBUG2(DBG_SC, dip, "addr=%x+%x pages: \n", dvma_addr, len);
75 	for (; len; len--, dvma_addr += IOMMU_PAGE_SIZE) {
76 		DEBUG1(DBG_SC|DBG_CONT, dip, " %x", dvma_addr);
77 		*invl_va = (uint64_t)dvma_addr;
78 	}
79 	DEBUG0(DBG_SC|DBG_CONT, dip, "\n");
80 }
81 
82 static void
83 pci_dma_sync_flag_wait(ddi_dma_impl_t *mp, sc_t *sc_p, uint32_t onstack)
84 {
85 	hrtime_t start_time;
86 	uint64_t loops = 0;
87 	uint64_t sync_flag_pa = SYNC_BUF_PA(mp);
88 	uint64_t sync_reg_pa = sc_p->sc_sync_reg_pa;
89 	uint8_t stack_buf[128];
90 
91 	stack_buf[0] = DDI_SUCCESS;
92 
93 	/* check for handle specific sync flag */
94 	if (sync_flag_pa)
95 		goto start;
96 
97 	sync_flag_pa = sc_p->sc_sync_flag_pa;
98 
99 	if (onstack) {
100 		sync_flag_pa = va_to_pa(stack_buf);
101 		sync_flag_pa += PCI_SYNC_FLAG_SIZE;
102 		sync_flag_pa >>= PCI_SYNC_FLAG_SZSHIFT;
103 		sync_flag_pa <<= PCI_SYNC_FLAG_SZSHIFT;
104 		goto start;
105 	}
106 	stack_buf[0] |= PCI_SYNC_FLAG_LOCKED;
107 	mutex_enter(&sc_p->sc_sync_mutex);
108 start:
109 	ASSERT(!(sync_flag_pa & PCI_SYNC_FLAG_SIZE - 1));
110 	stdphys(sync_flag_pa, 0);	/* reset sync flag to 0 */
111 					/* membar  #LoadStore|#StoreStore */
112 	stdphysio(sync_reg_pa, sync_flag_pa);
113 	start_time = gethrtime();
114 
115 	for (; gethrtime() - start_time < pci_sync_buf_timeout; loops++)
116 		if (lddphys(sync_flag_pa))
117 			goto done;
118 
119 	if (!lddphys(sync_flag_pa))
120 		stack_buf[0] |= PCI_SYNC_FLAG_FAILED;
121 done:
122 	DEBUG3(DBG_SC|DBG_CONT, 0, "flag wait loops=%lu ticks=%lu status=%x\n",
123 		loops, gethrtime() - start_time, stack_buf[0]);
124 
125 	if (stack_buf[0] & PCI_SYNC_FLAG_LOCKED)
126 		mutex_exit(&sc_p->sc_sync_mutex);
127 
128 	if (stack_buf[0] & PCI_SYNC_FLAG_FAILED)
129 		cmn_err(CE_PANIC, "%p pci dma sync %lx %lx timeout!",
130 		    mp, sync_flag_pa, loops);
131 }
132 
133 /*
134  * Cache	RW	Before	During		After
135  *
136  * STREAMING	read	no/no	pg/no		ctx,pg/no
137  * STREAMING	write	no/no	pg/yes		ctx,pg/yes
138  * CONSISTENT	read	no/no	yes,no/no	yes,no/no
139  * CONSISTENT	write	no/no	yes,yes/yes	yes,yes/yes
140  *
141  * STREAMING	read	ctx,pg/no
142  * STREAMING	write	ctx,pg/yes
143  * CONSISTENT	read	yes,no/no
144  * CONSISTENT	write	yes,yes/yes
145  */
146 int
147 pci_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
148 	off_t off, size_t len, uint32_t sync_flag)
149 {
150 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
151 	int ret = ddi_get_instance(dip);
152 	pci_t *pci_p = get_pci_soft_state(ret);
153 	pbm_t *pbm_p = pci_p->pci_pbm_p;
154 	uint32_t dev_flag = mp->dmai_rflags;
155 	sc_t *sc_p;
156 
157 	DEBUG4(DBG_DMA_SYNC, dip, "%s%d flags=%x,%x\n", ddi_driver_name(rdip),
158 		ddi_get_instance(rdip), dev_flag, sync_flag);
159 	DEBUG4(DBG_SC, dip, "dmai_mapping=%x, dmai_sz=%x off=%x len=%x\n",
160 		mp->dmai_mapping, mp->dmai_size, off, len);
161 	DEBUG2(DBG_SC, dip, "mp=%p, ctx=%x\n", mp, MP2CTX(mp));
162 
163 	if (!(mp->dmai_flags & DMAI_FLAGS_INUSE)) {
164 		cmn_err(CE_WARN, "Unbound dma handle %p from %s%d", mp,
165 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
166 		return (DDI_FAILURE);
167 	}
168 
169 	if (mp->dmai_flags & DMAI_FLAGS_NOSYNC)
170 		return (DDI_SUCCESS);
171 
172 	if (!(dev_flag & DDI_DMA_CONSISTENT))
173 		goto streaming;
174 
175 	if (sync_flag & PCI_DMA_SYNC_EXT) {
176 		if (sync_flag & (PCI_DMA_SYNC_BEFORE | PCI_DMA_SYNC_POST) ||
177 		    !(sync_flag & PCI_DMA_SYNC_WRITE))
178 			return (DDI_SUCCESS);
179 	} else {
180 		if (!(dev_flag & DDI_DMA_READ) ||
181 		    ((sync_flag & PCI_DMA_SYNC_DDI_FLAGS) ==
182 		    DDI_DMA_SYNC_FORDEV))
183 			return (DDI_SUCCESS);
184 	}
185 
186 	pci_pbm_dma_sync(pbm_p, pbm_p->pbm_sync_ino);
187 	return (DDI_SUCCESS);
188 
189 streaming:
190 	ASSERT(pci_stream_buf_exists && (pci_stream_buf_enable & 1 << ret));
191 	sc_p = pci_p->pci_sc_p;
192 	ret = DDI_FAILURE;
193 
194 	if (sync_flag & PCI_DMA_SYNC_EXT)
195 		goto ext;
196 
197 	if (mp->dmai_flags & DMAI_FLAGS_CONTEXT && pci_sc_use_contexts)
198 		ret = pci_sc_ctx_inv(dip, sc_p, mp);
199 	if (ret)
200 		pci_sc_pg_inv(dip, sc_p, mp, off, len);
201 
202 	if ((dev_flag & DDI_DMA_READ) &&
203 	    ((sync_flag & PCI_DMA_SYNC_DDI_FLAGS) != DDI_DMA_SYNC_FORDEV))
204 		goto wait;
205 
206 	return (DDI_SUCCESS);
207 ext:
208 	if (sync_flag & PCI_DMA_SYNC_BEFORE)
209 		return (DDI_SUCCESS);
210 	if (sync_flag & PCI_DMA_SYNC_BAR)
211 		goto wait_check;
212 	if (sync_flag & PCI_DMA_SYNC_AFTER &&
213 		mp->dmai_flags & DMAI_FLAGS_CONTEXT && pci_sc_use_contexts)
214 		ret = pci_sc_ctx_inv(dip, sc_p, mp);
215 	if (ret)
216 		pci_sc_pg_inv(dip, sc_p, mp, off, len);
217 wait_check:
218 	if (sync_flag & PCI_DMA_SYNC_POST || !(sync_flag & PCI_DMA_SYNC_WRITE))
219 		return (DDI_SUCCESS);
220 wait:
221 	pci_dma_sync_flag_wait(mp, sc_p, sync_flag & PCI_DMA_SYNC_PRIVATE);
222 	return (DDI_SUCCESS);
223 }
224 
225 int
226 pci_dma_handle_clean(dev_info_t *rdip, ddi_dma_handle_t h)
227 {
228 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)h;
229 	if ((mp->dmai_flags & DMAI_FLAGS_INUSE) == 0)
230 		return (DDI_FAILURE);
231 	mp->dmai_rflags |= DMP_NOSYNC;
232 	mp->dmai_flags |= DMAI_FLAGS_NOSYNC;
233 	return (DDI_SUCCESS);
234 }
235 
236 /*
237  * pci_dma_allocmp - Allocate a pci dma implementation structure
238  *
239  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
240  * to hold unmodified device limits. The ddi_dma_attr inside the
241  * ddi_dma_impl structure is augumented with system limits to enhance
242  * DVMA performance at runtime. The unaugumented device limits saved
243  * right after (accessed through the DEV_ATTR macro) is used
244  * strictly for peer-to-peer transfers which do not obey system limits.
245  *
246  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
247  */
248 ddi_dma_impl_t *
249 pci_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
250 	caddr_t arg)
251 {
252 	ddi_dma_impl_t *mp;
253 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
254 
255 	/* Caution: we don't use zalloc to enhance performance! */
256 	if ((mp = kmem_alloc(sizeof (pci_dma_hdl_t), sleep)) == 0) {
257 		DEBUG0(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
258 		if (waitfp != DDI_DMA_DONTWAIT) {
259 			DEBUG0(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
260 			ddi_set_callback(waitfp, arg, &pci_kmem_clid);
261 		}
262 		return (mp);
263 	}
264 
265 	mp->dmai_rdip = rdip;
266 	mp->dmai_flags = 0;
267 	mp->dmai_pfnlst = NULL;
268 	mp->dmai_winlst = NULL;
269 
270 	/*
271 	 * kmem_alloc debug: the following fields are not zero-ed
272 	 * mp->dmai_mapping = 0;
273 	 * mp->dmai_size = 0;
274 	 * mp->dmai_offset = 0;
275 	 * mp->dmai_minxfer = 0;
276 	 * mp->dmai_burstsizes = 0;
277 	 * mp->dmai_ndvmapages = 0;
278 	 * mp->dmai_pool/roffset = 0;
279 	 * mp->dmai_rflags = 0;
280 	 * mp->dmai_inuse/flags
281 	 * mp->dmai_nwin = 0;
282 	 * mp->dmai_winsize = 0;
283 	 * mp->dmai_nexus_private/tte = 0;
284 	 * mp->dmai_iopte/pfnlst
285 	 * mp->dmai_sbi/pfn0 = 0;
286 	 * mp->dmai_minfo/winlst/fdvma
287 	 * mp->dmai_rdip
288 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
289 	 * mp->dmai_cookie = 0;
290 	 */
291 
292 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
293 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
294 	mp->dmai_fault = 0;
295 	mp->dmai_fault_check = NULL;
296 	mp->dmai_fault_notify = NULL;
297 
298 	mp->dmai_error.err_ena = 0;
299 	mp->dmai_error.err_status = DDI_FM_OK;
300 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
301 	mp->dmai_error.err_ontrap = NULL;
302 	mp->dmai_error.err_fep = NULL;
303 	mp->dmai_error.err_cf = NULL;
304 
305 	SYNC_BUF_PA(mp) = 0ull;
306 	return (mp);
307 }
308 
309 void
310 pci_dma_freemp(ddi_dma_impl_t *mp)
311 {
312 	if (mp->dmai_ndvmapages > 1)
313 		pci_dma_freepfn(mp);
314 	if (mp->dmai_winlst)
315 		pci_dma_freewin(mp);
316 	kmem_free(mp, sizeof (pci_dma_hdl_t));
317 }
318 
319 void
320 pci_dma_freepfn(ddi_dma_impl_t *mp)
321 {
322 	void *addr = mp->dmai_pfnlst;
323 	ASSERT(!PCI_DMA_CANRELOC(mp));
324 	if (addr) {
325 		size_t npages = mp->dmai_ndvmapages;
326 		if (npages > 1)
327 			kmem_free(addr, npages * sizeof (iopfn_t));
328 		mp->dmai_pfnlst = NULL;
329 	}
330 	mp->dmai_ndvmapages = 0;
331 }
332 
333 /*
334  * pci_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
335  *			and convert dmareq->dmar_limits to mp->dmai_attr
336  *
337  * ddi_dma_impl_t member modified     input
338  * ------------------------------------------------------------------------
339  * mp->dmai_minxfer		    - dev
340  * mp->dmai_burstsizes		    - dev
341  * mp->dmai_flags		    - no limit? peer-to-peer only?
342  *
343  * ddi_dma_attr member modified       input
344  * ------------------------------------------------------------------------
345  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
346  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
347  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
348  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
349  * mp->dmai_attr.dma_attr_align     - 1		(no alignment restriction)
350  *
351  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
352  */
353 ddi_dma_impl_t *
354 pci_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, iommu_t *iommu_p,
355 	ddi_dma_req_t *dmareq)
356 {
357 	ddi_dma_impl_t *mp;
358 	ddi_dma_attr_t *attr_p;
359 	uint64_t syslo		= iommu_p->iommu_dvma_base;
360 	uint64_t syshi		= iommu_p->iommu_dvma_end;
361 	uint64_t fasthi		= iommu_p->iommu_dvma_fast_end;
362 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
363 	uint32_t count_max	= lim_p->dlim_cntr_max;
364 	uint64_t lo		= lim_p->dlim_addr_lo;
365 	uint64_t hi		= lim_p->dlim_addr_hi;
366 	if (hi <= lo) {
367 		DEBUG0(DBG_DMA_MAP, dip, "Bad limits\n");
368 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
369 	}
370 	if (!count_max)
371 		count_max--;
372 
373 	if (!(mp = pci_dma_allocmp(dip, rdip, dmareq->dmar_fp,
374 		dmareq->dmar_arg)))
375 		return (NULL);
376 
377 	/* store original dev input at the 2nd ddi_dma_attr */
378 	attr_p = DEV_ATTR(mp);
379 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
380 	SET_DMAALIGN(attr_p, 1);
381 
382 	lo = MAX(lo, syslo);
383 	hi = MIN(hi, syshi);
384 	if (hi <= lo)
385 		mp->dmai_flags |= DMAI_FLAGS_PEER_ONLY;
386 	count_max = MIN(count_max, hi - lo);
387 
388 	if (DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
389 		mp->dmai_flags |= DMAI_FLAGS_NOFASTLIMIT |
390 			DMAI_FLAGS_NOSYSLIMIT;
391 	else {
392 		if (DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
393 			mp->dmai_flags |= DMAI_FLAGS_NOFASTLIMIT;
394 	}
395 	if (PCI_DMA_NOCTX(rdip))
396 		mp->dmai_flags |= DMAI_FLAGS_NOCTX;
397 
398 	/* store augumented dev input to mp->dmai_attr */
399 	mp->dmai_minxfer	= lim_p->dlim_minxfer;
400 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
401 	attr_p = &mp->dmai_attr;
402 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
403 	SET_DMAALIGN(attr_p, 1);
404 	return (mp);
405 }
406 
407 /*
408  * pci_dma_attr2hdl
409  *
410  * This routine is called from the alloc handle entry point to sanity check the
411  * dma attribute structure.
412  *
413  * use by: pci_dma_allochdl()
414  *
415  * return value:
416  *
417  *	DDI_SUCCESS		- on success
418  *	DDI_DMA_BADATTR		- attribute has invalid version number
419  *				  or address limits exclude dvma space
420  */
421 int
422 pci_dma_attr2hdl(pci_t *pci_p, ddi_dma_impl_t *mp)
423 {
424 	iommu_t *iommu_p = pci_p->pci_iommu_p;
425 	uint64_t syslo, syshi;
426 	ddi_dma_attr_t *attrp		= DEV_ATTR(mp);
427 	uint64_t hi		= attrp->dma_attr_addr_hi;
428 	uint64_t lo		= attrp->dma_attr_addr_lo;
429 	uint64_t align		= attrp->dma_attr_align;
430 	uint64_t nocross	= attrp->dma_attr_seg;
431 	uint64_t count_max	= attrp->dma_attr_count_max;
432 
433 	DEBUG3(DBG_DMA_ALLOCH, pci_p->pci_dip, "attrp=%p cntr_max=%x.%08x\n",
434 		attrp, HI32(count_max), LO32(count_max));
435 	DEBUG4(DBG_DMA_ALLOCH, pci_p->pci_dip, "hi=%x.%08x lo=%x.%08x\n",
436 		HI32(hi), LO32(hi), HI32(lo), LO32(lo));
437 	DEBUG4(DBG_DMA_ALLOCH, pci_p->pci_dip, "seg=%x.%08x align=%x.%08x\n",
438 		HI32(nocross), LO32(nocross), HI32(align), LO32(align));
439 
440 	if (!nocross)
441 		nocross--;
442 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
443 
444 		DEBUG0(DBG_DMA_ALLOCH, pci_p->pci_dip, "bypass mode\n");
445 		/* if tomatillo ver <= 2.3 don't allow bypass */
446 		if (tomatillo_disallow_bypass)
447 			return (DDI_DMA_BADATTR);
448 
449 		mp->dmai_flags |= DMAI_FLAGS_BYPASSREQ;
450 		if (nocross != UINT64_MAX)
451 			return (DDI_DMA_BADATTR);
452 		if (align && (align > IOMMU_PAGE_SIZE))
453 			return (DDI_DMA_BADATTR);
454 		align = 1; /* align on 1 page boundary */
455 		syslo = iommu_p->iommu_dma_bypass_base;
456 		syshi = iommu_p->iommu_dma_bypass_end;
457 
458 	} else { /* IOMMU_XLATE or PEER_TO_PEER */
459 		align = MAX(align, IOMMU_PAGE_SIZE) - 1;
460 		if ((align & nocross) != align) {
461 			dev_info_t *rdip = mp->dmai_rdip;
462 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
463 				NAMEINST(rdip));
464 			return (DDI_DMA_BADATTR);
465 		}
466 		align = IOMMU_BTOP(align + 1);
467 		syslo = iommu_p->iommu_dvma_base;
468 		syshi = iommu_p->iommu_dvma_end;
469 	}
470 	if (hi <= lo) {
471 		dev_info_t *rdip = mp->dmai_rdip;
472 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
473 		return (DDI_DMA_BADATTR);
474 	}
475 	lo = MAX(lo, syslo);
476 	hi = MIN(hi, syshi);
477 	if (!count_max)
478 		count_max--;
479 
480 	DEBUG4(DBG_DMA_ALLOCH, pci_p->pci_dip, "hi=%x.%08x, lo=%x.%08x\n",
481 		HI32(hi), LO32(hi), HI32(lo), LO32(lo));
482 	if (hi <= lo) { /* peer transfers cannot have alignment & nocross */
483 		dev_info_t *rdip = mp->dmai_rdip;
484 		cmn_err(CE_WARN, "%s%d peer only dev %p", NAMEINST(rdip), mp);
485 		if ((nocross < UINT32_MAX) || (align > 1)) {
486 			cmn_err(CE_WARN, "%s%d peer only device bad attr",
487 				NAMEINST(rdip));
488 			return (DDI_DMA_BADATTR);
489 		}
490 		mp->dmai_flags |= DMAI_FLAGS_PEER_ONLY;
491 	} else /* set practical counter_max value */
492 		count_max = MIN(count_max, hi - lo);
493 
494 	if (DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
495 		mp->dmai_flags |= DMAI_FLAGS_NOSYSLIMIT |
496 			DMAI_FLAGS_NOFASTLIMIT;
497 	else {
498 		syshi = iommu_p->iommu_dvma_fast_end;
499 		if (DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
500 			mp->dmai_flags |= DMAI_FLAGS_NOFASTLIMIT;
501 	}
502 	if (PCI_DMA_NOCTX(mp->dmai_rdip))
503 		mp->dmai_flags |= DMAI_FLAGS_NOCTX;
504 
505 	mp->dmai_minxfer	= attrp->dma_attr_minxfer;
506 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
507 	attrp = &mp->dmai_attr;
508 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
509 	return (DDI_SUCCESS);
510 }
511 
512 /*
513  * set up consistent dma flags according to hardware capability
514  */
515 uint32_t
516 pci_dma_consist_check(uint32_t req_flags, pbm_t *pbm_p)
517 {
518 	if (!pci_stream_buf_enable || !pci_stream_buf_exists)
519 		req_flags |= DDI_DMA_CONSISTENT;
520 	if (req_flags & DDI_DMA_CONSISTENT && !pbm_p->pbm_sync_reg_pa)
521 		req_flags |= DMP_NOSYNC;
522 	return (req_flags);
523 }
524 
525 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
526 
527 /*
528  * pci_dma_type - determine which of the three types DMA (peer-to-peer,
529  *		iommu bypass, or iommu translate) we are asked to do.
530  *		Also checks pfn0 and rejects any non-peer-to-peer
531  *		requests for peer-only devices.
532  *
533  *	return values:
534  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
535  *		DDI_SUCCESS
536  *
537  *	dma handle members affected (set on exit):
538  *	mp->dmai_object		- dmareq->dmar_object
539  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
540  *	mp->dmai_flags   	- DMA type
541  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
542  *	mp->dmai_roffset 	- initialized to starting IOMMU page offset
543  *	mp->dmai_ndvmapages	- # of total IOMMU pages of entire object
544  *	mp->pdh_sync_buf_pa	- dma sync buffer PA is DMA flow is supported
545  */
546 int
547 pci_dma_type(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
548 {
549 	dev_info_t *dip = pci_p->pci_dip;
550 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
551 	pbm_t *pbm_p = pci_p->pci_pbm_p;
552 	page_t **pplist;
553 	struct as *as_p;
554 	uint32_t offset;
555 	caddr_t vaddr;
556 	pfn_t pfn0;
557 
558 	mp->dmai_rflags = pci_dma_consist_check(dmareq->dmar_flags, pbm_p);
559 	mp->dmai_flags |= mp->dmai_rflags & DMP_NOSYNC ? DMAI_FLAGS_NOSYNC : 0;
560 
561 	switch (dobj_p->dmao_type) {
562 	case DMA_OTYP_BUFVADDR:
563 	case DMA_OTYP_VADDR: {
564 		vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
565 		pplist = dobj_p->dmao_obj.virt_obj.v_priv;
566 		as_p = dobj_p->dmao_obj.virt_obj.v_as;
567 		if (as_p == NULL)
568 			as_p = &kas;
569 
570 		DEBUG2(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
571 		offset = (ulong_t)vaddr & IOMMU_PAGE_OFFSET;
572 
573 		if (pplist) {				/* shadow list */
574 			mp->dmai_flags |= DMAI_FLAGS_PGPFN;
575 			ASSERT(PAGE_LOCKED(*pplist));
576 			pfn0 = page_pptonum(*pplist);
577 		} else if (pci_dvma_remap_enabled && as_p == &kas &&
578 			dobj_p->dmao_type != DMA_OTYP_BUFVADDR) {
579 			int (*waitfp)(caddr_t) = dmareq->dmar_fp;
580 			uint_t flags = ((waitfp == DDI_DMA_SLEEP)?
581 				    HAC_SLEEP : HAC_NOSLEEP) | HAC_PAGELOCK;
582 			int ret;
583 
584 			ret = hat_add_callback(pci_dvma_cbid, vaddr,
585 			    IOMMU_PAGE_SIZE - offset, flags, mp, &pfn0,
586 			    MP_HAT_CB_COOKIE_PTR(mp, 0));
587 
588 			if (pfn0 == PFN_INVALID && ret == ENOMEM) {
589 				ASSERT(waitfp != DDI_DMA_SLEEP);
590 				if (waitfp != DDI_DMA_DONTWAIT) {
591 					ddi_set_callback(waitfp,
592 					    dmareq->dmar_arg,
593 					    &pci_kmem_clid);
594 					return (DDI_DMA_NORESOURCES);
595 					}
596 			}
597 			mp->dmai_flags |= DMAI_FLAGS_RELOC;
598 		} else
599 			pfn0 = hat_getpfnum(as_p->a_hat, vaddr);
600 		}
601 		break;
602 
603 	case DMA_OTYP_PAGES:
604 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
605 		mp->dmai_flags |= DMAI_FLAGS_PGPFN;
606 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
607 		ASSERT(PAGE_LOCKED(dobj_p->dmao_obj.pp_obj.pp_pp));
608 		break;
609 
610 	case DMA_OTYP_PADDR:
611 	default:
612 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
613 			NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
614 		return (DDI_DMA_NOMAPPING);
615 	}
616 	if (pfn0 == PFN_INVALID) {
617 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
618 			NAMEINST(dip), dobj_p);
619 		return (DDI_DMA_NOMAPPING);
620 	}
621 	if (TGT_PFN_INBETWEEN(pfn0, pbm_p->pbm_base_pfn, pbm_p->pbm_last_pfn)) {
622 		mp->dmai_flags |= DMAI_FLAGS_PEER_TO_PEER;
623 		goto done;	/* leave bypass and dvma flag as 0 */
624 	}
625 	if (PCI_DMA_ISPEERONLY(mp)) {
626 		dev_info_t *rdip = mp->dmai_rdip;
627 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
628 		return (DDI_DMA_NOMAPPING);
629 	}
630 	mp->dmai_flags |= (mp->dmai_flags & DMAI_FLAGS_BYPASSREQ) ?
631 		DMAI_FLAGS_BYPASS : DMAI_FLAGS_DVMA;
632 done:
633 	mp->dmai_object	 = *dobj_p;			/* whole object    */
634 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
635 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
636 	mp->dmai_ndvmapages = IOMMU_BTOPR(offset + mp->dmai_object.dmao_size);
637 
638 	return (DDI_SUCCESS);
639 }
640 
641 /*
642  * pci_dma_pgpfn - set up pfnlst array according to pages
643  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
644  */
645 /*ARGSUSED*/
646 static int
647 pci_dma_pgpfn(pci_t *pci_p, ddi_dma_impl_t *mp, uint_t npages)
648 {
649 	int i;
650 #ifdef DEBUG
651 	dev_info_t *dip = pci_p->pci_dip;
652 #endif
653 	switch (mp->dmai_object.dmao_type) {
654 	case DMA_OTYP_BUFVADDR:
655 	case DMA_OTYP_VADDR: {
656 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
657 		DEBUG2(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
658 			pplist, npages);
659 		for (i = 1; i < npages; i++) {
660 			iopfn_t pfn = page_pptonum(pplist[i]);
661 			ASSERT(PAGE_LOCKED(pplist[i]));
662 			PCI_SET_MP_PFN1(mp, i, pfn);
663 			DEBUG1(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
664 		}
665 		DEBUG0(DBG_DMA_MAP|DBG_CONT, dip, "\n");
666 		}
667 		break;
668 
669 	case DMA_OTYP_PAGES: {
670 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
671 		DEBUG1(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
672 		for (i = 1; i < npages; i++, pp = pp->p_next) {
673 			iopfn_t pfn = page_pptonum(pp);
674 			ASSERT(PAGE_LOCKED(pp));
675 			PCI_SET_MP_PFN1(mp, i, pfn);
676 			DEBUG1(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
677 		}
678 		DEBUG0(DBG_DMA_MAP|DBG_CONT, dip, "\n");
679 		}
680 		break;
681 
682 	default:	/* check is already done by pci_dma_type */
683 		ASSERT(0);
684 		break;
685 	}
686 	return (DDI_SUCCESS);
687 }
688 
689 /*
690  * pci_dma_vapfn - set up pfnlst array according to VA
691  *	VA/size pair: <normal, bypass, peer-to-peer>
692  *	pfn0 is skipped as it is already done.
693  *	In this case, the cached pfn0 is used to fill pfnlst[0]
694  */
695 static int
696 pci_dma_vapfn(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
697 	uint_t npages)
698 {
699 	dev_info_t *dip = pci_p->pci_dip;
700 	int i;
701 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
702 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
703 	caddr_t sva;
704 	int needcb = 0;
705 
706 	sva = (caddr_t)(((uintptr_t)mp->dmai_object.dmao_obj.virt_obj.v_addr +
707 	    IOMMU_PAGE_SIZE) & IOMMU_PAGE_MASK);
708 
709 	if (pci_dvma_remap_enabled && hat_p == kas.a_hat &&
710 		mp->dmai_object.dmao_type != DMA_OTYP_BUFVADDR)
711 		needcb = 1;
712 
713 	for (vaddr = sva, i = 1; i < npages; i++, vaddr += IOMMU_PAGE_SIZE) {
714 		pfn_t pfn;
715 
716 		if (needcb) {
717 			int (*waitfp)(caddr_t) = dmareq->dmar_fp;
718 			uint_t flags = ((waitfp == DDI_DMA_SLEEP)?
719 			    HAC_SLEEP : HAC_NOSLEEP) | HAC_PAGELOCK;
720 			int ret;
721 
722 			ret = hat_add_callback(pci_dvma_cbid, vaddr,
723 			    IOMMU_PAGE_SIZE, flags, mp, &pfn,
724 			    MP_HAT_CB_COOKIE_PTR(mp, i));
725 
726 			if (pfn == PFN_INVALID && ret == ENOMEM) {
727 				ASSERT(waitfp != DDI_DMA_SLEEP);
728 				if (waitfp != DDI_DMA_DONTWAIT)
729 					ddi_set_callback(waitfp,
730 					    dmareq->dmar_arg, &pci_kmem_clid);
731 				return (DDI_DMA_NORESOURCES);
732 			}
733 		} else
734 			pfn = hat_getpfnum(hat_p, vaddr);
735 		if (pfn == PFN_INVALID)
736 			goto err_badpfn;
737 		PCI_SET_MP_PFN1(mp, i, (iopfn_t)pfn);
738 		DEBUG3(DBG_DMA_MAP, dip, "pci_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
739 			mp, i, (iopfn_t)pfn);
740 	}
741 	return (DDI_SUCCESS);
742 err_badpfn:
743 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
744 	return (DDI_DMA_NOMAPPING);
745 }
746 
747 /*
748  * pci_dma_pfn - Fills pfn list for all pages being DMA-ed.
749  *
750  * dependencies:
751  *	mp->dmai_ndvmapages	- set to total # of dma pages
752  *
753  * return value:
754  *	DDI_SUCCESS
755  *	DDI_DMA_NOMAPPING
756  */
757 int
758 pci_dma_pfn(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
759 {
760 	uint32_t npages = mp->dmai_ndvmapages;
761 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
762 	int i, ret, peer = PCI_DMA_ISPTP(mp);
763 
764 	pbm_t *pbm_p = pci_p->pci_pbm_p;
765 	iopfn_t pfn_base = pbm_p->pbm_base_pfn;
766 	iopfn_t pfn_last = pbm_p->pbm_last_pfn;
767 	iopfn_t pfn_adj = peer ? pfn_base : 0;
768 
769 	DEBUG2(DBG_DMA_MAP, pci_p->pci_dip, "pci_dma_pfn: mp=%p pfn0=%x\n",
770 		mp, MP_PFN0(mp) - pfn_adj);
771 	/* 1 page: no array alloc/fill, no mixed mode check */
772 	if (npages == 1) {
773 		PCI_SET_MP_PFN(mp, 0, MP_PFN0(mp) - pfn_adj);
774 		return (DDI_SUCCESS);
775 	}
776 	/* allocate pfn array */
777 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (iopfn_t),
778 		waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
779 		if (waitfp != DDI_DMA_DONTWAIT)
780 			ddi_set_callback(waitfp, dmareq->dmar_arg,
781 				&pci_kmem_clid);
782 		return (DDI_DMA_NORESOURCES);
783 	}
784 	/* fill pfn array */
785 	PCI_SET_MP_PFN(mp, 0, MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
786 	if ((ret = PCI_DMA_ISPGPFN(mp) ? pci_dma_pgpfn(pci_p, mp, npages) :
787 		pci_dma_vapfn(pci_p, dmareq, mp, npages)) != DDI_SUCCESS)
788 		goto err;
789 
790 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
791 	for (i = 1; i < npages; i++) {
792 		iopfn_t pfn = PCI_GET_MP_PFN1(mp, i);
793 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
794 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
795 				NAMEINST(mp->dmai_rdip), MP_PFN0(mp), pfn);
796 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
797 			goto err;
798 		}
799 		DEBUG3(DBG_DMA_MAP, pci_p->pci_dip,
800 			"pci_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
801 		if (pfn_adj)
802 			PCI_SET_MP_PFN1(mp, i, pfn - pfn_adj);
803 	}
804 	return (DDI_SUCCESS);
805 err:
806 	pci_dvma_unregister_callbacks(pci_p, mp);
807 	pci_dma_freepfn(mp);
808 	return (ret);
809 }
810 
811 /*
812  * pci_dvma_win() - trim requested DVMA size down to window size
813  *	The 1st window starts from offset and ends at page-aligned boundary.
814  *	From the 2nd window on, each window starts and ends at page-aligned
815  *	boundary except the last window ends at wherever requested.
816  *
817  *	accesses the following mp-> members:
818  *	mp->dmai_attr.dma_attr_count_max
819  *	mp->dmai_attr.dma_attr_seg
820  *	mp->dmai_roffset   - start offset of 1st window
821  *	mp->dmai_rflags (redzone)
822  *	mp->dmai_ndvmapages (for 1 page fast path)
823  *
824  *	sets the following mp-> members:
825  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
826  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
827  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
828  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
829  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
830  *
831  *	fixed - not changed across different DMA windows
832  */
833 /*ARGSUSED*/
834 int
835 pci_dvma_win(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
836 {
837 	uint32_t redzone_sz	= HAS_REDZONE(mp) ? IOMMU_PAGE_SIZE : 0;
838 	size_t obj_sz	= mp->dmai_object.dmao_size;
839 	size_t xfer_sz;
840 	ulong_t pg_off;
841 
842 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
843 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
844 		mp->dmai_size = obj_sz;
845 		mp->dmai_winsize = IOMMU_PAGE_SIZE;
846 		mp->dmai_nwin = 1;
847 		goto done;
848 	}
849 
850 	pg_off	= mp->dmai_roffset;
851 	xfer_sz	= obj_sz + redzone_sz;
852 
853 	/* include redzone in nocross check */ {
854 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
855 		if (xfer_sz + pg_off - 1 > nocross)
856 			xfer_sz = nocross - pg_off + 1;
857 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
858 			DEBUG5(DBG_DMA_MAP, pci_p->pci_dip,
859 			    "nocross too small %lx(%lx)+%lx+%x < %" PRIx64 "\n",
860 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
861 			return (DDI_DMA_TOOBIG);
862 		}
863 	}
864 	xfer_sz -= redzone_sz;		/* restore transfer size  */
865 	/* check counter max */ {
866 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
867 		if (xfer_sz - 1 > count_max)
868 			xfer_sz = count_max + 1;
869 	}
870 	if (xfer_sz >= obj_sz) {
871 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
872 		mp->dmai_size = xfer_sz;
873 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, IOMMU_PAGE_SIZE);
874 		mp->dmai_nwin = 1;
875 		goto done;
876 	}
877 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
878 		DEBUG4(DBG_DMA_MAP, pci_p->pci_dip,
879 		    "too big: %lx+%lx+%x > %lx\n",
880 		    obj_sz, pg_off, redzone_sz, xfer_sz);
881 		return (DDI_DMA_TOOBIG);
882 	}
883 
884 	xfer_sz = IOMMU_PTOB(IOMMU_BTOP(xfer_sz + pg_off)); /* page align */
885 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
886 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
887 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
888 done:
889 	mp->dmai_winlst = NULL;
890 	dump_dma_handle(DBG_DMA_MAP, pci_p->pci_dip, mp);
891 	return (DDI_SUCCESS);
892 }
893 
894 /*
895  * fast track cache entry to iommu context, inserts 3 0 bits between
896  * upper 6-bits and lower 3-bits of the 9-bit cache entry
897  */
898 #define	IOMMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
899 
900 /*
901  * pci_dvma_map_fast - attempts to map fast trackable DVMA
902  */
903 int
904 pci_dvma_map_fast(iommu_t *iommu_p, ddi_dma_impl_t *mp)
905 {
906 	uint_t clustsz = pci_dvma_page_cache_clustsz;
907 	uint_t entries = pci_dvma_page_cache_entries;
908 	uint64_t *tte_addr;
909 	uint64_t tte = GET_TTE_TEMPLATE(mp);
910 	int i = iommu_p->iommu_dvma_addr_scan_start;
911 	uint8_t *lock_addr = iommu_p->iommu_dvma_cache_locks + i;
912 	iopfn_t *pfn_addr;
913 	dvma_addr_t dvma_pg;
914 	size_t npages = IOMMU_BTOP(mp->dmai_winsize);
915 #ifdef DEBUG
916 	dev_info_t *dip = mp->dmai_rdip;
917 #endif
918 	extern uint8_t ldstub(uint8_t *);
919 	ASSERT(IOMMU_PTOB(npages) == mp->dmai_winsize);
920 	ASSERT(npages + HAS_REDZONE(mp) <= clustsz);
921 
922 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++);
923 	if (i >= entries) {
924 		lock_addr = iommu_p->iommu_dvma_cache_locks;
925 		i = 0;
926 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++);
927 		if (i >= entries) {
928 #ifdef PCI_DMA_PROF
929 			pci_dvmaft_exhaust++;
930 #endif
931 			return (DDI_DMA_NORESOURCES);
932 		}
933 	}
934 	iommu_p->iommu_dvma_addr_scan_start = (i + 1) & (entries - 1);
935 	if (PCI_DMA_USECTX(mp)) {
936 		dvma_context_t ctx = IOMMU_FCE_TO_CTX(i);
937 		tte |= IOMMU_CTX2TTE(ctx);
938 		mp->dmai_flags |= DMAI_FLAGS_CONTEXT;
939 		DEBUG1(DBG_DMA_MAP, dip, "fast: ctx=0x%x\n", ctx);
940 	}
941 	i *= clustsz;
942 	tte_addr = iommu_p->iommu_tsb_vaddr + i;
943 	dvma_pg = iommu_p->dvma_base_pg + i;
944 #ifdef DEBUG
945 	for (i = 0; i < clustsz; i++)
946 		ASSERT(TTE_IS_INVALID(tte_addr[i]));
947 #endif
948 	*tte_addr = tte | IOMMU_PTOB(MP_PFN0(mp)); /* map page 0 */
949 	DEBUG5(DBG_DMA_MAP, dip, "fast %p:dvma_pg=%x tte0(%p)=%08x.%08x\n", mp,
950 		dvma_pg, tte_addr, HI32(*tte_addr), LO32(*tte_addr));
951 	if (npages == 1)
952 		goto tte_done;
953 	pfn_addr = PCI_GET_MP_PFN1_ADDR(mp); /* short iommu_map_pages() */
954 	for (tte_addr++, i = 1; i < npages; i++, tte_addr++, pfn_addr++) {
955 		*tte_addr = tte | IOMMU_PTOB(*pfn_addr);
956 		DEBUG5(DBG_DMA_MAP, dip, "fast %p:tte(%p, %p)=%08x.%08x\n", mp,
957 			tte_addr, pfn_addr, HI32(*tte_addr), LO32(*tte_addr));
958 	}
959 tte_done:
960 #ifdef PCI_DMA_PROF
961 	pci_dvmaft_success++;
962 #endif
963 	mp->dmai_mapping = mp->dmai_roffset | IOMMU_PTOB(dvma_pg);
964 	mp->dmai_offset = 0;
965 	mp->dmai_flags |= DMAI_FLAGS_FASTTRACK;
966 	PCI_SAVE_MP_TTE(mp, tte);	/* save TTE template for unmapping */
967 	if (DVMA_DBG_ON(iommu_p))
968 		pci_dvma_alloc_debug(iommu_p, (char *)mp->dmai_mapping,
969 			mp->dmai_size, mp);
970 	return (DDI_SUCCESS);
971 }
972 
973 /*
974  * pci_dvma_map: map non-fasttrack DMA
975  *		Use quantum cache if single page DMA.
976  */
977 int
978 pci_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, iommu_t *iommu_p)
979 {
980 	uint_t npages = PCI_DMA_WINNPGS(mp);
981 	dvma_addr_t dvma_pg, dvma_pg_index;
982 	void *dvma_addr;
983 	uint64_t tte = GET_TTE_TEMPLATE(mp);
984 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
985 #ifdef DEBUG
986 	dev_info_t *dip = mp->dmai_rdip;
987 #endif
988 	/*
989 	 * allocate dvma space resource and map in the first window.
990 	 * (vmem_t *vmp, size_t size,
991 	 *	size_t align, size_t phase, size_t nocross,
992 	 *	void *minaddr, void *maxaddr, int vmflag)
993 	 */
994 	if ((npages == 1) && !HAS_REDZONE(mp) && HAS_NOSYSLIMIT(mp)) {
995 		dvma_addr = vmem_alloc(iommu_p->iommu_dvma_map,
996 			IOMMU_PAGE_SIZE, sleep);
997 		mp->dmai_flags |= DMAI_FLAGS_VMEMCACHE;
998 #ifdef PCI_DMA_PROF
999 		pci_dvma_vmem_alloc++;
1000 #endif
1001 	} else {
1002 		dvma_addr = vmem_xalloc(iommu_p->iommu_dvma_map,
1003 			IOMMU_PTOB(npages + HAS_REDZONE(mp)),
1004 			MAX(mp->dmai_attr.dma_attr_align, IOMMU_PAGE_SIZE),
1005 			0,
1006 			mp->dmai_attr.dma_attr_seg + 1,
1007 			(void *)mp->dmai_attr.dma_attr_addr_lo,
1008 			(void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
1009 			sleep);
1010 #ifdef PCI_DMA_PROF
1011 		pci_dvma_vmem_xalloc++;
1012 #endif
1013 	}
1014 	dvma_pg = IOMMU_BTOP((ulong_t)dvma_addr);
1015 	dvma_pg_index = dvma_pg - iommu_p->dvma_base_pg;
1016 	DEBUG2(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
1017 		dvma_pg, dvma_pg_index);
1018 	if (dvma_pg == 0)
1019 		goto noresource;
1020 
1021 	/* allocate DVMA context */
1022 	if ((npages >= pci_context_minpages) && PCI_DMA_USECTX(mp)) {
1023 		dvma_context_t ctx;
1024 		if (ctx = pci_iommu_get_dvma_context(iommu_p, dvma_pg_index)) {
1025 			tte |= IOMMU_CTX2TTE(ctx);
1026 			mp->dmai_flags |= DMAI_FLAGS_CONTEXT;
1027 		}
1028 	}
1029 	mp->dmai_mapping = mp->dmai_roffset | IOMMU_PTOB(dvma_pg);
1030 	mp->dmai_offset = 0;
1031 	PCI_SAVE_MP_TTE(mp, tte);	/* mp->dmai_tte = tte */
1032 	iommu_map_pages(iommu_p, mp, dvma_pg, npages, 0);
1033 	return (DDI_SUCCESS);
1034 noresource:
1035 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
1036 		DEBUG0(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
1037 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
1038 			&iommu_p->iommu_dvma_clid);
1039 	}
1040 	DEBUG0(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
1041 	return (DDI_DMA_NORESOURCES);
1042 }
1043 
1044 void
1045 pci_dvma_unmap(iommu_t *iommu_p, ddi_dma_impl_t *mp)
1046 {
1047 	size_t npages;
1048 	dvma_addr_t dvma_addr = (dvma_addr_t)mp->dmai_mapping;
1049 	dvma_addr_t dvma_pg = IOMMU_BTOP(dvma_addr);
1050 	dvma_addr = IOMMU_PTOB(dvma_pg);
1051 
1052 	if (mp->dmai_flags & DMAI_FLAGS_FASTTRACK) {
1053 		iopfn_t index = dvma_pg - iommu_p->dvma_base_pg;
1054 		ASSERT(index % pci_dvma_page_cache_clustsz == 0);
1055 		index /= pci_dvma_page_cache_clustsz;
1056 		ASSERT(index < pci_dvma_page_cache_entries);
1057 		iommu_p->iommu_dvma_cache_locks[index] = 0;
1058 #ifdef PCI_DMA_PROF
1059 		pci_dvmaft_free++;
1060 #endif
1061 		return;
1062 	}
1063 	npages = IOMMU_BTOP(mp->dmai_winsize) + HAS_REDZONE(mp);
1064 	pci_vmem_free(iommu_p, mp, (void *)dvma_addr, npages);
1065 
1066 	if (mp->dmai_flags & DMAI_FLAGS_CONTEXT)
1067 		pci_iommu_free_dvma_context(iommu_p, MP2CTX(mp));
1068 }
1069 
1070 void
1071 pci_dma_sync_unmap(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp)
1072 {
1073 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
1074 	iommu_t *iommu_p = pci_p->pci_iommu_p;
1075 	uint64_t sync_buf_save = SYNC_BUF_PA(mp);
1076 	uint32_t fast_track = mp->dmai_flags & DMAI_FLAGS_FASTTRACK;
1077 
1078 	if (fast_track) {
1079 		dvma_addr_t dvma_pg = IOMMU_BTOP(mp->dmai_mapping);
1080 
1081 		SYNC_BUF_PA(mp) = IOMMU_PAGE_TTEPA(iommu_p, dvma_pg);
1082 		ASSERT(!(SYNC_BUF_PA(mp) & PCI_SYNC_FLAG_SIZE - 1));
1083 	}
1084 
1085 	if (pci_dvma_sync_before_unmap) {
1086 		pci_dma_sync(dip, rdip, (ddi_dma_handle_t)mp, 0, 0, 0);
1087 		iommu_unmap_window(iommu_p, mp);
1088 	} else {
1089 		iommu_unmap_window(iommu_p, mp);
1090 		pci_dma_sync(dip, rdip, (ddi_dma_handle_t)mp, 0, 0, 0);
1091 	}
1092 
1093 	if (fast_track)
1094 		SYNC_BUF_PA(mp) = sync_buf_save;
1095 }
1096 
1097 /*
1098  * DVMA mappings may have multiple windows, but each window always have
1099  * one segment.
1100  */
1101 int
1102 pci_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1103 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1104 	uint_t cache_flags)
1105 {
1106 	switch (cmd) {
1107 	case DDI_DMA_SYNC:
1108 		return (pci_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
1109 		    *offp, *lenp, cache_flags));
1110 
1111 	case DDI_DMA_HTOC: {
1112 		int ret;
1113 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
1114 		uint_t win_size = mp->dmai_winsize;
1115 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
1116 
1117 		if (off >= mp->dmai_object.dmao_size) {
1118 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
1119 				NAMEINST(mp->dmai_rdip), off);
1120 			return (DDI_FAILURE);
1121 		}
1122 		off += mp->dmai_roffset;
1123 		ret = pci_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
1124 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
1125 		if (ret)
1126 			return (ret);
1127 		DEBUG4(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
1128 			cp->dmac_address, cp->dmac_size, off, *offp);
1129 
1130 		/* adjust cookie addr/len if we are not on window boundary */
1131 		ASSERT((off % win_size) == (off -
1132 			(PCI_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
1133 		off = PCI_DMA_CURWIN(mp) ? off % win_size : *offp;
1134 		ASSERT(cp->dmac_size > off);
1135 		cp->dmac_laddress += off;
1136 		cp->dmac_size -= off;
1137 		DEBUG5(DBG_DMA_CTL, dip,
1138 			"HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
1139 			mp, cp->dmac_address, cp->dmac_size, off, wo_off);
1140 		}
1141 		return (DDI_SUCCESS);
1142 
1143 	case DDI_DMA_REPWIN:
1144 		*offp = mp->dmai_offset;
1145 		*lenp = mp->dmai_size;
1146 		return (DDI_SUCCESS);
1147 
1148 	case DDI_DMA_MOVWIN: {
1149 		off_t off = *offp;
1150 		if (off >= mp->dmai_object.dmao_size)
1151 			return (DDI_FAILURE);
1152 		off += mp->dmai_roffset;
1153 		return (pci_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
1154 		    off / mp->dmai_winsize, offp, lenp,
1155 		    (ddi_dma_cookie_t *)objp, NULL));
1156 		}
1157 
1158 	case DDI_DMA_NEXTWIN: {
1159 		window_t win = PCI_DMA_CURWIN(mp);
1160 		if (offp) {
1161 			if (*(window_t *)offp != win) {  /* window not active */
1162 				*(window_t *)objp = win; /* return cur win */
1163 				return (DDI_DMA_STALE);
1164 			}
1165 			win++;
1166 		} else	/* map win 0 */
1167 			win = 0;
1168 		if (win >= mp->dmai_nwin) {
1169 			*(window_t *)objp = win - 1;
1170 			return (DDI_DMA_DONE);
1171 		}
1172 		if (pci_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
1173 		    win, 0, 0, 0, 0)) {
1174 			*(window_t *)objp = win - 1;
1175 			return (DDI_FAILURE);
1176 		}
1177 		*(window_t *)objp = win;
1178 		}
1179 		return (DDI_SUCCESS);
1180 
1181 	case DDI_DMA_NEXTSEG:
1182 		if (*(window_t *)offp != PCI_DMA_CURWIN(mp))
1183 			return (DDI_DMA_STALE);
1184 		if (lenp)				/* only 1 seg allowed */
1185 			return (DDI_DMA_DONE);
1186 							/* return mp as seg 0 */
1187 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1188 		return (DDI_SUCCESS);
1189 
1190 	case DDI_DMA_SEGTOC:
1191 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
1192 			mp->dmai_size);
1193 		*offp = mp->dmai_offset;
1194 		*lenp = mp->dmai_size;
1195 		return (DDI_SUCCESS);
1196 
1197 	case DDI_DMA_COFF: {
1198 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
1199 		if (cp->dmac_address < mp->dmai_mapping ||
1200 		    (cp->dmac_address + cp->dmac_size) >
1201 		    (mp->dmai_mapping + mp->dmai_size))
1202 			return (DDI_FAILURE);
1203 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
1204 			mp->dmai_offset);
1205 		}
1206 		return (DDI_SUCCESS);
1207 
1208 	case DDI_DMA_REMAP:
1209 		if (pci_dvma_remap_enabled)
1210 			return (pci_dvma_remap(dip, rdip, mp, *offp, *lenp));
1211 		return (DDI_FAILURE);
1212 
1213 	default:
1214 		DEBUG3(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1215 			cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1216 		break;
1217 	}
1218 	return (DDI_FAILURE);
1219 }
1220 
1221 void
1222 pci_dma_freewin(ddi_dma_impl_t *mp)
1223 {
1224 	pci_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
1225 	for (win2_p = win_p; win_p; win2_p = win_p) {
1226 		win_p = win2_p->win_next;
1227 		kmem_free(win2_p, sizeof (pci_dma_win_t) +
1228 			sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
1229 	}
1230 	mp->dmai_nwin = 0;
1231 	mp->dmai_winlst = NULL;
1232 }
1233 
1234 /*
1235  * pci_dma_newwin - create a dma window object and cookies
1236  *
1237  *	After the initial scan in pci_dma_physwin(), which identifies
1238  *	a portion of the pfn array that belongs to a dma window,
1239  *	we are called to allocate and initialize representing memory
1240  *	resources. We know from the 1st scan the number of cookies
1241  *	or dma segment in this window so we can allocate a contiguous
1242  *	memory array for the dma cookies (The implementation of
1243  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1244  *
1245  *	A second round scan is done on the pfn array to identify
1246  *	each dma segment and initialize its corresponding dma cookie.
1247  *	We don't need to do all the safety checking and we know they
1248  *	all belong to the same dma window.
1249  *
1250  *	Input:	cookie_no - # of cookies identified by the 1st scan
1251  *		start_idx - subscript of the pfn array for the starting pfn
1252  *		end_idx   - subscript of the last pfn in dma window
1253  *		win_pp    - pointer to win_next member of previous window
1254  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1255  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1256  *	Note:	Each cookie and window size are all initialized on page
1257  *		boundary. This is not true for the 1st cookie of the 1st
1258  *		window and the last cookie of the last window.
1259  *		We fix that later in upper layer which has access to size
1260  *		and offset info.
1261  *
1262  */
1263 static int
1264 pci_dma_newwin(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, uint32_t cookie_no,
1265 	uint32_t start_idx, uint32_t end_idx, pci_dma_win_t **win_pp,
1266 	uint64_t count_max, uint64_t bypass_prefix)
1267 {
1268 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1269 	ddi_dma_cookie_t *cookie_p;
1270 	uint32_t pfn_no = 1;
1271 	iopfn_t pfn = PCI_GET_MP_PFN(mp, start_idx);
1272 	iopfn_t prev_pfn = pfn;
1273 	uint64_t seg_pfn0 = pfn;
1274 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1275 	pci_dma_win_t *win_p = kmem_alloc(sizeof (pci_dma_win_t) + sz,
1276 		waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1277 	if (!win_p)
1278 		goto noresource;
1279 
1280 	win_p->win_next = NULL;
1281 	win_p->win_ncookies = cookie_no;
1282 	win_p->win_curseg = 0;	/* start from segment 0 */
1283 	win_p->win_size = IOMMU_PTOB(end_idx - start_idx + 1);
1284 	/* win_p->win_offset is left uninitialized */
1285 
1286 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1287 	start_idx++;
1288 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1289 		pfn = PCI_GET_MP_PFN1(mp, start_idx);
1290 		if ((pfn == prev_pfn + 1) &&
1291 			(IOMMU_PTOB(pfn_no + 1) - 1 <= count_max))
1292 			continue;
1293 
1294 		/* close up the cookie up to (including) prev_pfn */
1295 		MAKE_DMA_COOKIE(cookie_p, IOMMU_PTOB(seg_pfn0) | bypass_prefix,
1296 			IOMMU_PTOB(pfn_no));
1297 		DEBUG2(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1298 			IOMMU_PTOB(seg_pfn0) | bypass_prefix, pfn_no);
1299 
1300 		cookie_p++;	/* advance to next available cookie cell */
1301 		pfn_no = 0;
1302 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1303 	}
1304 	MAKE_DMA_COOKIE(cookie_p, IOMMU_PTOB(seg_pfn0) | bypass_prefix,
1305 		IOMMU_PTOB(pfn_no));
1306 	DEBUG3(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1307 		IOMMU_PTOB(seg_pfn0) | bypass_prefix, pfn_no, cookie_no);
1308 #ifdef DEBUG
1309 	cookie_p++;
1310 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1311 #endif
1312 	*win_pp = win_p;
1313 	return (DDI_SUCCESS);
1314 noresource:
1315 	if (waitfp != DDI_DMA_DONTWAIT)
1316 		ddi_set_callback(waitfp, dmareq->dmar_arg, &pci_kmem_clid);
1317 	return (DDI_DMA_NORESOURCES);
1318 }
1319 
1320 /*
1321  * pci_dma_adjust - adjust 1st and last cookie and window sizes
1322  *	remove initial dma page offset from 1st cookie and window size
1323  *	remove last dma page remainder from last cookie and window size
1324  *	fill win_offset of each dma window according to just fixed up
1325  *		each window sizes
1326  *	pci_dma_win_t members modified:
1327  *	win_p->win_offset - this window's offset within entire DMA object
1328  *	win_p->win_size	  - xferrable size (in bytes) for this window
1329  *
1330  *	ddi_dma_impl_t members modified:
1331  *	mp->dmai_size	  - 1st window xferrable size
1332  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1333  *
1334  *	ddi_dma_cookie_t members modified:
1335  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1336  *	cookie_p->dmac_laddress - 1st cookie add page offset
1337  */
1338 static void
1339 pci_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, pci_dma_win_t *win_p)
1340 {
1341 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1342 	size_t pg_offset = mp->dmai_roffset;
1343 	size_t win_offset = 0;
1344 
1345 	cookie_p->dmac_size -= pg_offset;
1346 	cookie_p->dmac_laddress |= pg_offset;
1347 	win_p->win_size -= pg_offset;
1348 	DEBUG1(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1349 
1350 	mp->dmai_size = win_p->win_size;
1351 	mp->dmai_offset = 0;
1352 
1353 	pg_offset += mp->dmai_object.dmao_size;
1354 	pg_offset &= IOMMU_PAGE_OFFSET;
1355 	if (pg_offset)
1356 		pg_offset = IOMMU_PAGE_SIZE - pg_offset;
1357 	DEBUG1(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1358 
1359 	for (; win_p->win_next; win_p = win_p->win_next) {
1360 		DEBUG1(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1361 		win_p->win_offset = win_offset;
1362 		win_offset += win_p->win_size;
1363 	}
1364 	/* last window */
1365 	win_p->win_offset = win_offset;
1366 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1367 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1368 	win_p->win_size -= pg_offset;
1369 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1370 }
1371 
1372 /*
1373  * pci_dma_physwin() - carve up dma windows using physical addresses.
1374  *	Called to handle iommu bypass and pci peer-to-peer transfers.
1375  *	Calls pci_dma_newwin() to allocate window objects.
1376  *
1377  * Dependency: mp->dmai_pfnlst points to an array of pfns
1378  *
1379  * 1. Each dma window is represented by a pci_dma_win_t object.
1380  *	The object will be casted to ddi_dma_win_t and returned
1381  *	to leaf driver through the DDI interface.
1382  * 2. Each dma window can have several dma segments with each
1383  *	segment representing a physically contiguous either memory
1384  *	space (if we are doing an iommu bypass transfer) or pci address
1385  *	space (if we are doing a peer-to-peer transfer).
1386  * 3. Each segment has a DMA cookie to program the DMA engine.
1387  *	The cookies within each DMA window must be located in a
1388  *	contiguous array per ddi_dma_nextcookie(9f).
1389  * 4. The number of DMA segments within each DMA window cannot exceed
1390  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1391  *	too large to fit in the sgllen, the rest needs to be
1392  *	relocated to the next dma window.
1393  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1394  *	and nocross restrictions while bypass DMA follows the set of
1395  *	restrictions with system limits factored in.
1396  *
1397  * Return:
1398  *	mp->dmai_winlst	 - points to a link list of pci_dma_win_t objects.
1399  *		Each pci_dma_win_t object on the link list contains
1400  *		infomation such as its window size (# of pages),
1401  *		starting offset (also see Restriction), an array of
1402  *		DMA cookies, and # of cookies in the array.
1403  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1404  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1405  *	mp->dmai_mapping - starting cookie address
1406  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1407  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1408  *
1409  * Restriction:
1410  *	Each pci_dma_win_t object can theoratically start from any offset
1411  *	since the iommu is not involved. However, this implementation
1412  *	always make windows start from page aligned offset (except
1413  *	the 1st window, which follows the requested offset) due to the
1414  *	fact that we are handed a pfn list. This does require device's
1415  *	count_max and attr_seg to be at least IOMMU_PAGE_SIZE aligned.
1416  */
1417 int
1418 pci_dma_physwin(pci_t *pci_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1419 {
1420 	uint_t npages = mp->dmai_ndvmapages;
1421 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1422 	iopfn_t pfn_lo, pfn_hi, prev_pfn, bypass_pfn;
1423 	iopfn_t pfn = PCI_GET_MP_PFN(mp, 0);
1424 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1425 	uint64_t count_max, bypass = PCI_DMA_BYPASS_PREFIX(mp, pfn);
1426 	pci_dma_win_t **win_pp = (pci_dma_win_t **)&mp->dmai_winlst;
1427 	ddi_dma_cookie_t *cookie0_p;
1428 
1429 	if (PCI_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1430 		ddi_dma_attr_t *dev_attr_p = DEV_ATTR(mp);
1431 		iopfn_t pfn_base = pci_p->pci_pbm_p->pbm_base_pfn;
1432 		iopfn_t pfn_last = pci_p->pci_pbm_p->pbm_last_pfn - pfn_base;
1433 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1434 		if (nocross && (nocross < UINT32_MAX))
1435 			return (DDI_DMA_NOMAPPING);
1436 		if (dev_attr_p->dma_attr_align > IOMMU_PAGE_SIZE)
1437 			return (DDI_DMA_NOMAPPING);
1438 		pfn_lo = IOMMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1439 		pfn_hi = IOMMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1440 		pfn_hi = MIN(pfn_hi, pfn_last);
1441 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1442 			return (DDI_DMA_NOMAPPING);
1443 		count_max = dev_attr_p->dma_attr_count_max;
1444 		count_max = MIN(count_max, nocross);
1445 		/*
1446 		 * the following count_max trim is not done because we are
1447 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1448 		 * count_max=MIN(count_max, IOMMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1449 		 */
1450 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1451 		count_max = mp->dmai_attr.dma_attr_count_max;
1452 		pfn_lo = IOMMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1453 		pfn_hi = IOMMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1454 	}
1455 
1456 	bypass_pfn = IOMMU_BTOP(bypass);
1457 
1458 	for (prev_pfn = (bypass_pfn | pfn), i = 1; i < npages;
1459 	    i++, prev_pfn = pfn, pfn_no++) {
1460 		pfn = bypass_pfn | PCI_GET_MP_PFN1(mp, i);
1461 		if ((pfn == prev_pfn + 1) &&
1462 			(IOMMU_PTOB(pfn_no + 1) - 1 <= count_max))
1463 			continue;
1464 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1465 			ret = DDI_DMA_NOMAPPING;
1466 			goto err;
1467 		}
1468 		cookie_no++;
1469 		pfn_no = 0;
1470 		if (cookie_no < sgllen)
1471 			continue;
1472 
1473 		DEBUG3(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1474 			win_pfn0_index, i - 1, cookie_no);
1475 		if (ret = pci_dma_newwin(dmareq, mp, cookie_no,
1476 			win_pfn0_index, i - 1, win_pp, count_max, bypass))
1477 			goto err;
1478 
1479 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1480 		win_no++;
1481 		win_pfn0_index = i;
1482 		cookie_no = 0;
1483 	}
1484 	if (pfn > pfn_hi) {
1485 		ret = DDI_DMA_NOMAPPING;
1486 		goto err;
1487 	}
1488 	cookie_no++;
1489 	DEBUG3(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1490 		win_pfn0_index, i - 1, cookie_no);
1491 	if (ret = pci_dma_newwin(dmareq, mp, cookie_no, win_pfn0_index,
1492 		i - 1, win_pp, count_max, bypass))
1493 		goto err;
1494 	win_no++;
1495 	pci_dma_adjust(dmareq, mp, mp->dmai_winlst);
1496 	mp->dmai_nwin = win_no;
1497 	mp->dmai_rflags |= DDI_DMA_CONSISTENT;
1498 	if (!pci_p->pci_pbm_p->pbm_sync_reg_pa) {
1499 		mp->dmai_rflags |= DMP_NOSYNC;
1500 		mp->dmai_flags |= DMAI_FLAGS_NOSYNC;
1501 	}
1502 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1503 	cookie0_p = (ddi_dma_cookie_t *)(WINLST(mp) + 1);
1504 	mp->dmai_cookie = WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1505 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1506 
1507 	pci_dma_freepfn(mp);
1508 	return (DDI_DMA_MAPPED);
1509 err:
1510 	pci_dma_freewin(mp);
1511 	return (ret);
1512 }
1513 
1514 /*ARGSUSED*/
1515 int
1516 pci_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1517 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1518 	uint_t cache_flags)
1519 {
1520 	switch (cmd) {
1521 	case DDI_DMA_SYNC: /* XXX */
1522 		return (DDI_SUCCESS);
1523 
1524 	case DDI_DMA_HTOC: {
1525 		off_t off = *offp;
1526 		ddi_dma_cookie_t *loop_cp, *cp;
1527 		pci_dma_win_t *win_p = mp->dmai_winlst;
1528 
1529 		if (off >= mp->dmai_object.dmao_size)
1530 			return (DDI_FAILURE);
1531 
1532 		/* locate window */
1533 		while (win_p->win_offset + win_p->win_size <= off)
1534 			win_p = win_p->win_next;
1535 
1536 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1537 		mp->dmai_offset = win_p->win_offset;
1538 		mp->dmai_size   = win_p->win_size;
1539 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1540 
1541 		/* adjust cookie addr/len if we are not on cookie boundary */
1542 		off -= win_p->win_offset;	   /* offset within window */
1543 		for (; off >= loop_cp->dmac_size; loop_cp++)
1544 			off -= loop_cp->dmac_size; /* offset within cookie */
1545 
1546 		mp->dmai_cookie = loop_cp + 1;
1547 		win_p->win_curseg = loop_cp - cp;
1548 		cp = (ddi_dma_cookie_t *)objp;
1549 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1550 			loop_cp->dmac_size - off);
1551 
1552 		DEBUG2(DBG_DMA_CTL, dip,
1553 			"HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1554 			cp->dmac_laddress, cp->dmac_size);
1555 		}
1556 		return (DDI_SUCCESS);
1557 
1558 	case DDI_DMA_REPWIN:
1559 		*offp = mp->dmai_offset;
1560 		*lenp = mp->dmai_size;
1561 		return (DDI_SUCCESS);
1562 
1563 	case DDI_DMA_MOVWIN: {
1564 		off_t off = *offp;
1565 		ddi_dma_cookie_t *cp;
1566 		pci_dma_win_t *win_p = mp->dmai_winlst;
1567 
1568 		if (off >= mp->dmai_object.dmao_size)
1569 			return (DDI_FAILURE);
1570 
1571 		/* locate window */
1572 		while (win_p->win_offset + win_p->win_size <= off)
1573 			win_p = win_p->win_next;
1574 
1575 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1576 		mp->dmai_offset = win_p->win_offset;
1577 		mp->dmai_size   = win_p->win_size;
1578 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
1579 		mp->dmai_cookie = cp + 1;
1580 		win_p->win_curseg = 0;
1581 
1582 		*(ddi_dma_cookie_t *)objp = *cp;
1583 		*offp = win_p->win_offset;
1584 		*lenp = win_p->win_size;
1585 		DEBUG2(DBG_DMA_CTL, dip,
1586 			"HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1587 			cp->dmac_laddress, cp->dmac_size);
1588 		}
1589 		return (DDI_SUCCESS);
1590 
1591 	case DDI_DMA_NEXTWIN: {
1592 		pci_dma_win_t *win_p = *(pci_dma_win_t **)offp;
1593 		pci_dma_win_t **nw_pp = (pci_dma_win_t **)objp;
1594 		ddi_dma_cookie_t *cp;
1595 		if (!win_p) {
1596 			*nw_pp = mp->dmai_winlst;
1597 			return (DDI_SUCCESS);
1598 		}
1599 
1600 		if (win_p->win_offset != mp->dmai_offset)
1601 			return (DDI_DMA_STALE);
1602 		if (!win_p->win_next)
1603 			return (DDI_DMA_DONE);
1604 		win_p = win_p->win_next;
1605 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1606 		mp->dmai_offset = win_p->win_offset;
1607 		mp->dmai_size   = win_p->win_size;
1608 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
1609 		mp->dmai_cookie = cp + 1;
1610 		win_p->win_curseg = 0;
1611 		*nw_pp = win_p;
1612 		}
1613 		return (DDI_SUCCESS);
1614 
1615 	case DDI_DMA_NEXTSEG: {
1616 		pci_dma_win_t *w_p = *(pci_dma_win_t **)offp;
1617 		if (w_p->win_offset != mp->dmai_offset)
1618 			return (DDI_DMA_STALE);
1619 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
1620 			return (DDI_DMA_DONE);
1621 		w_p->win_curseg++;
1622 		}
1623 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1624 		return (DDI_SUCCESS);
1625 
1626 	case DDI_DMA_SEGTOC: {
1627 		pci_dma_win_t *win_p = mp->dmai_winlst;
1628 		off_t off = mp->dmai_offset;
1629 		ddi_dma_cookie_t *cp;
1630 		int i;
1631 
1632 		/* locate active window */
1633 		for (; win_p->win_offset != off; win_p = win_p->win_next);
1634 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1635 		for (i = 0; i < win_p->win_curseg; i++, cp++)
1636 			off += cp->dmac_size;
1637 		*offp = off;
1638 		*lenp = cp->dmac_size;
1639 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
1640 		}
1641 		return (DDI_SUCCESS);
1642 
1643 	case DDI_DMA_COFF: {
1644 		pci_dma_win_t *win_p;
1645 		ddi_dma_cookie_t *cp;
1646 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1647 		size_t win_off;
1648 
1649 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1650 			int i;
1651 			win_off = 0;
1652 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1653 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1654 				size_t sz = cp->dmac_size;
1655 
1656 				addr = cp->dmac_laddress;
1657 				if ((addr <= key) && (addr + sz >= key))
1658 					goto found;
1659 				win_off += sz;
1660 			}
1661 		}
1662 		return (DDI_FAILURE);
1663 found:
1664 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1665 		return (DDI_SUCCESS);
1666 		}
1667 
1668 	case DDI_DMA_REMAP:
1669 		return (DDI_FAILURE);
1670 
1671 	default:
1672 		DEBUG3(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1673 			cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1674 		break;
1675 	}
1676 	return (DDI_FAILURE);
1677 }
1678 
1679 static void
1680 pci_dvma_debug_init(iommu_t *iommu_p)
1681 {
1682 	size_t sz = sizeof (struct dvma_rec) * pci_dvma_debug_rec;
1683 	ASSERT(MUTEX_HELD(&iommu_p->dvma_debug_lock));
1684 	cmn_err(CE_NOTE, "PCI DVMA %p stat ON", iommu_p);
1685 
1686 	iommu_p->dvma_alloc_rec = kmem_zalloc(sz, KM_SLEEP);
1687 	iommu_p->dvma_free_rec = kmem_zalloc(sz, KM_SLEEP);
1688 
1689 	iommu_p->dvma_active_list = NULL;
1690 	iommu_p->dvma_alloc_rec_index = 0;
1691 	iommu_p->dvma_free_rec_index = 0;
1692 	iommu_p->dvma_active_count = 0;
1693 }
1694 
1695 void
1696 pci_dvma_debug_fini(iommu_t *iommu_p)
1697 {
1698 	struct dvma_rec *prev, *ptr;
1699 	size_t sz = sizeof (struct dvma_rec) * pci_dvma_debug_rec;
1700 	uint64_t mask = ~(1ull << iommu_p->iommu_inst);
1701 	cmn_err(CE_NOTE, "PCI DVMA %p stat OFF", iommu_p);
1702 
1703 	kmem_free(iommu_p->dvma_alloc_rec, sz);
1704 	kmem_free(iommu_p->dvma_free_rec, sz);
1705 	iommu_p->dvma_alloc_rec = iommu_p->dvma_free_rec = NULL;
1706 
1707 	prev = iommu_p->dvma_active_list;
1708 	if (!prev)
1709 		return;
1710 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1711 		kmem_free(prev, sizeof (struct dvma_rec));
1712 	kmem_free(prev, sizeof (struct dvma_rec));
1713 
1714 	iommu_p->dvma_active_list = NULL;
1715 	iommu_p->dvma_alloc_rec_index = 0;
1716 	iommu_p->dvma_free_rec_index = 0;
1717 	iommu_p->dvma_active_count = 0;
1718 
1719 	pci_dvma_debug_on  &= mask;
1720 	pci_dvma_debug_off &= mask;
1721 }
1722 
1723 void
1724 pci_dvma_alloc_debug(iommu_t *iommu_p, char *address, uint_t len,
1725 	ddi_dma_impl_t *mp)
1726 {
1727 	struct dvma_rec *ptr;
1728 	mutex_enter(&iommu_p->dvma_debug_lock);
1729 
1730 	if (!iommu_p->dvma_alloc_rec)
1731 		pci_dvma_debug_init(iommu_p);
1732 	if (DVMA_DBG_OFF(iommu_p)) {
1733 		pci_dvma_debug_fini(iommu_p);
1734 		goto done;
1735 	}
1736 
1737 	ptr = &iommu_p->dvma_alloc_rec[iommu_p->dvma_alloc_rec_index];
1738 	ptr->dvma_addr = address;
1739 	ptr->len = len;
1740 	ptr->mp = mp;
1741 	if (++iommu_p->dvma_alloc_rec_index == pci_dvma_debug_rec)
1742 		iommu_p->dvma_alloc_rec_index = 0;
1743 
1744 	ptr = kmem_alloc(sizeof (struct dvma_rec), KM_SLEEP);
1745 	ptr->dvma_addr = address;
1746 	ptr->len = len;
1747 	ptr->mp = mp;
1748 
1749 	ptr->next = iommu_p->dvma_active_list;
1750 	iommu_p->dvma_active_list = ptr;
1751 	iommu_p->dvma_active_count++;
1752 done:
1753 	mutex_exit(&iommu_p->dvma_debug_lock);
1754 }
1755 
1756 void
1757 pci_dvma_free_debug(iommu_t *iommu_p, char *address, uint_t len,
1758 	ddi_dma_impl_t *mp)
1759 {
1760 	struct dvma_rec *ptr, *ptr_save;
1761 	mutex_enter(&iommu_p->dvma_debug_lock);
1762 
1763 	if (!iommu_p->dvma_alloc_rec)
1764 		pci_dvma_debug_init(iommu_p);
1765 	if (DVMA_DBG_OFF(iommu_p)) {
1766 		pci_dvma_debug_fini(iommu_p);
1767 		goto done;
1768 	}
1769 
1770 	ptr = &iommu_p->dvma_free_rec[iommu_p->dvma_free_rec_index];
1771 	ptr->dvma_addr = address;
1772 	ptr->len = len;
1773 	ptr->mp = mp;
1774 	if (++iommu_p->dvma_free_rec_index == pci_dvma_debug_rec)
1775 		iommu_p->dvma_free_rec_index = 0;
1776 
1777 	ptr_save = iommu_p->dvma_active_list;
1778 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1779 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1780 			break;
1781 		ptr_save = ptr;
1782 	}
1783 	if (!ptr) {
1784 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1785 			(long)address, len);
1786 		goto done;
1787 	}
1788 	if (ptr == iommu_p->dvma_active_list)
1789 		iommu_p->dvma_active_list = ptr->next;
1790 	else
1791 		ptr_save->next = ptr->next;
1792 	kmem_free(ptr, sizeof (struct dvma_rec));
1793 	iommu_p->dvma_active_count--;
1794 done:
1795 	mutex_exit(&iommu_p->dvma_debug_lock);
1796 }
1797 
1798 #ifdef DEBUG
1799 void
1800 dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1801 {
1802 	DEBUG4(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1803 		hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1804 	DEBUG4(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1805 		hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1806 		hp->dmai_nwin);
1807 	DEBUG4(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1808 		hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1809 	DEBUG4(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1810 		hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1811 		hp->dmai_cookie);
1812 }
1813 #endif
1814 
1815 void
1816 pci_vmem_do_free(iommu_t *iommu_p, void *base_addr, size_t npages,
1817     int vmemcache)
1818 {
1819 	vmem_t *map_p = iommu_p->iommu_dvma_map;
1820 
1821 	if (vmemcache) {
1822 		vmem_free(map_p, base_addr, IOMMU_PAGE_SIZE);
1823 #ifdef PCI_DMA_PROF
1824 		pci_dvma_vmem_free++;
1825 #endif
1826 		return;
1827 	}
1828 
1829 	vmem_xfree(map_p, base_addr, IOMMU_PTOB(npages));
1830 #ifdef PCI_DMA_PROF
1831 		pci_dvma_vmem_xfree++;
1832 #endif
1833 }
1834