xref: /titanic_50/usr/src/uts/sun4/io/px/px_dma.c (revision 0917b783fd655a0c943e0b8fb848db2301774947)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PCI Express nexus DVMA and DMA core routines:
30  *	dma_map/dma_bind_handle implementation
31  *	bypass and peer-to-peer support
32  *	fast track DVMA space allocation
33  *	runtime DVMA debug
34  */
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/sysmacros.h>
39 #include <sys/sunddi.h>
40 #include <sys/ddi_impldefs.h>
41 #include "px_obj.h"
42 
43 /*LINTLIBRARY*/
44 
45 /*
46  * px_dma_allocmp - Allocate a pci dma implementation structure
47  *
48  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
49  * to hold unmodified device limits. The ddi_dma_attr inside the
50  * ddi_dma_impl structure is augumented with system limits to enhance
51  * DVMA performance at runtime. The unaugumented device limits saved
52  * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
53  * strictly for peer-to-peer transfers which do not obey system limits.
54  *
55  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
56  */
57 ddi_dma_impl_t *
58 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
59 	caddr_t arg)
60 {
61 	register ddi_dma_impl_t *mp;
62 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
63 
64 	/* Caution: we don't use zalloc to enhance performance! */
65 	if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
66 		DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
67 		if (waitfp != DDI_DMA_DONTWAIT) {
68 			DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
69 			ddi_set_callback(waitfp, arg, &px_kmem_clid);
70 		}
71 		return (mp);
72 	}
73 
74 	mp->dmai_rdip = rdip;
75 	mp->dmai_flags = 0;
76 	mp->dmai_pfnlst = NULL;
77 	mp->dmai_winlst = NULL;
78 
79 	/*
80 	 * kmem_alloc debug: the following fields are not zero-ed
81 	 * mp->dmai_mapping = 0;
82 	 * mp->dmai_size = 0;
83 	 * mp->dmai_offset = 0;
84 	 * mp->dmai_minxfer = 0;
85 	 * mp->dmai_burstsizes = 0;
86 	 * mp->dmai_ndvmapages = 0;
87 	 * mp->dmai_pool/roffset = 0;
88 	 * mp->dmai_rflags = 0;
89 	 * mp->dmai_inuse/flags
90 	 * mp->dmai_nwin = 0;
91 	 * mp->dmai_winsize = 0;
92 	 * mp->dmai_nexus_private/tte = 0;
93 	 * mp->dmai_iopte/pfnlst
94 	 * mp->dmai_sbi/pfn0 = 0;
95 	 * mp->dmai_minfo/winlst/fdvma
96 	 * mp->dmai_rdip
97 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
98 	 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
99 	 * mp->dmai_cookie = 0;
100 	 */
101 
102 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
103 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
104 	mp->dmai_fault = 0;
105 	mp->dmai_fault_check = NULL;
106 	mp->dmai_fault_notify = NULL;
107 
108 	mp->dmai_error.err_ena = 0;
109 	mp->dmai_error.err_status = DDI_FM_OK;
110 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
111 	mp->dmai_error.err_ontrap = NULL;
112 	mp->dmai_error.err_fep = NULL;
113 
114 	if (px_child_prefetch(mp->dmai_rdip))
115 		mp->dmai_flags |= (PX_DMAI_FLAGS_MAP_BUFZONE |
116 		    PX_DMAI_FLAGS_REDZONE);
117 
118 	return (mp);
119 }
120 
121 void
122 px_dma_freemp(ddi_dma_impl_t *mp)
123 {
124 	if (mp->dmai_ndvmapages > 1)
125 		px_dma_freepfn(mp);
126 	if (mp->dmai_winlst)
127 		px_dma_freewin(mp);
128 	kmem_free(mp, sizeof (px_dma_hdl_t));
129 }
130 
131 void
132 px_dma_freepfn(ddi_dma_impl_t *mp)
133 {
134 	void *addr = mp->dmai_pfnlst;
135 	if (addr) {
136 		size_t npages = mp->dmai_ndvmapages;
137 		if (npages > 1)
138 			kmem_free(addr, npages * sizeof (px_iopfn_t));
139 		mp->dmai_pfnlst = NULL;
140 	}
141 	mp->dmai_ndvmapages = 0;
142 }
143 
144 /*
145  * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
146  *			and convert dmareq->dmar_limits to mp->dmai_attr
147  *
148  * ddi_dma_impl_t member modified     input
149  * ------------------------------------------------------------------------
150  * mp->dmai_minxfer		    - dev
151  * mp->dmai_burstsizes		    - dev
152  * mp->dmai_flags		    - no limit? peer-to-peer only?
153  *
154  * ddi_dma_attr member modified       input
155  * ------------------------------------------------------------------------
156  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
157  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
158  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
159  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
160  * mp->dmai_attr.dma_attr_align     - 1         (no alignment restriction)
161  *
162  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
163  */
164 ddi_dma_impl_t *
165 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
166 	ddi_dma_req_t *dmareq)
167 {
168 	ddi_dma_impl_t *mp;
169 	ddi_dma_attr_t *attr_p;
170 	uint64_t syslo		= mmu_p->mmu_dvma_base;
171 	uint64_t syshi		= mmu_p->mmu_dvma_end;
172 	uint64_t fasthi		= mmu_p->mmu_dvma_fast_end;
173 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
174 	uint32_t count_max	= lim_p->dlim_cntr_max;
175 	uint64_t lo		= lim_p->dlim_addr_lo;
176 	uint64_t hi		= lim_p->dlim_addr_hi;
177 	if (hi <= lo) {
178 		DBG(DBG_DMA_MAP, dip, "Bad limits\n");
179 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
180 	}
181 	if (!count_max)
182 		count_max--;
183 
184 	if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
185 		dmareq->dmar_arg)))
186 		return (NULL);
187 
188 	/* store original dev input at the 2nd ddi_dma_attr */
189 	attr_p = PX_DEV_ATTR(mp);
190 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
191 	SET_DMAALIGN(attr_p, 1);
192 
193 	lo = MAX(lo, syslo);
194 	hi = MIN(hi, syshi);
195 	if (hi <= lo)
196 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
197 	count_max = MIN(count_max, hi - lo);
198 
199 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
200 		mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
201 			PX_DMAI_FLAGS_NOSYSLIMIT;
202 	else {
203 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
204 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
205 	}
206 	if (PX_DMA_NOCTX(rdip))
207 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
208 
209 	/* store augumented dev input to mp->dmai_attr */
210 	mp->dmai_minxfer	= lim_p->dlim_minxfer;
211 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
212 	attr_p = &mp->dmai_attr;
213 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
214 	SET_DMAALIGN(attr_p, 1);
215 	return (mp);
216 }
217 
218 /*
219  * Called from px_attach to check for bypass dma support and set
220  * flags accordingly.
221  */
222 int
223 px_dma_attach(px_t *px_p)
224 {
225 	uint64_t baddr;
226 
227 	if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
228 			PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
229 			&baddr) != DDI_ENOTSUP)
230 		/* ignore all other errors */
231 		px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
232 
233 	return (DDI_SUCCESS);
234 }
235 
236 /*
237  * px_dma_attr2hdl
238  *
239  * This routine is called from the alloc handle entry point to sanity check the
240  * dma attribute structure.
241  *
242  * use by: px_dma_allochdl()
243  *
244  * return value:
245  *
246  *	DDI_SUCCESS		- on success
247  *	DDI_DMA_BADATTR		- attribute has invalid version number
248  *				  or address limits exclude dvma space
249  */
250 int
251 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
252 {
253 	px_mmu_t *mmu_p = px_p->px_mmu_p;
254 	uint64_t syslo, syshi;
255 	int	ret;
256 	ddi_dma_attr_t *attrp		= PX_DEV_ATTR(mp);
257 	uint64_t hi			= attrp->dma_attr_addr_hi;
258 	uint64_t lo			= attrp->dma_attr_addr_lo;
259 	uint64_t align			= attrp->dma_attr_align;
260 	uint64_t nocross		= attrp->dma_attr_seg;
261 	uint64_t count_max		= attrp->dma_attr_count_max;
262 
263 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
264 		attrp, HI32(count_max), LO32(count_max));
265 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
266 		HI32(hi), LO32(hi), HI32(lo), LO32(lo));
267 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
268 		HI32(nocross), LO32(nocross), HI32(align), LO32(align));
269 
270 	if (!nocross)
271 		nocross--;
272 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
273 
274 		DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
275 		/*
276 		 * If Bypass DMA is not supported, return error so that
277 		 * target driver can fall back to dvma mode of operation
278 		 */
279 		if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
280 			return (DDI_DMA_BADATTR);
281 		mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
282 		if (nocross != UINT64_MAX)
283 			return (DDI_DMA_BADATTR);
284 		if (align && (align > MMU_PAGE_SIZE))
285 			return (DDI_DMA_BADATTR);
286 		align = 1; /* align on 1 page boundary */
287 
288 		/* do a range check and get the limits */
289 		ret = px_lib_dma_bypass_rngchk(attrp, &syslo, &syshi);
290 		if (ret != DDI_SUCCESS)
291 			return (ret);
292 	} else { /* MMU_XLATE or PEER_TO_PEER */
293 		align = MAX(align, MMU_PAGE_SIZE) - 1;
294 		if ((align & nocross) != align) {
295 			dev_info_t *rdip = mp->dmai_rdip;
296 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
297 				NAMEINST(rdip));
298 			return (DDI_DMA_BADATTR);
299 		}
300 		align = MMU_BTOP(align + 1);
301 		syslo = mmu_p->mmu_dvma_base;
302 		syshi = mmu_p->mmu_dvma_end;
303 	}
304 	if (hi <= lo) {
305 		dev_info_t *rdip = mp->dmai_rdip;
306 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
307 		return (DDI_DMA_BADATTR);
308 	}
309 	lo = MAX(lo, syslo);
310 	hi = MIN(hi, syshi);
311 	if (!count_max)
312 		count_max--;
313 
314 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
315 		HI32(hi), LO32(hi), HI32(lo), LO32(lo));
316 	if (hi <= lo) { /* peer transfers cannot have alignment & nocross */
317 		dev_info_t *rdip = mp->dmai_rdip;
318 		cmn_err(CE_WARN, "%s%d peer only dev %p", NAMEINST(rdip), mp);
319 		if ((nocross < UINT32_MAX) || (align > 1)) {
320 			cmn_err(CE_WARN, "%s%d peer only device bad attr",
321 				NAMEINST(rdip));
322 			return (DDI_DMA_BADATTR);
323 		}
324 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
325 	} else /* set practical counter_max value */
326 		count_max = MIN(count_max, hi - lo);
327 
328 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
329 		mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
330 			PX_DMAI_FLAGS_NOFASTLIMIT;
331 	else {
332 		syshi = mmu_p->mmu_dvma_fast_end;
333 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
334 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
335 	}
336 	if (PX_DMA_NOCTX(mp->dmai_rdip))
337 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
338 
339 	mp->dmai_minxfer	= attrp->dma_attr_minxfer;
340 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
341 	attrp = &mp->dmai_attr;
342 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
343 	return (DDI_SUCCESS);
344 }
345 
346 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
347 
348 /*
349  * px_dma_type - determine which of the three types DMA (peer-to-peer,
350  *		mmu bypass, or mmu translate) we are asked to do.
351  *		Also checks pfn0 and rejects any non-peer-to-peer
352  *		requests for peer-only devices.
353  *
354  *	return values:
355  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
356  *		DDI_SUCCESS
357  *
358  *	dma handle members affected (set on exit):
359  *	mp->dmai_object		- dmareq->dmar_object
360  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
361  *	mp->dmai_flags   	- DMA type
362  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
363  *	mp->dmai_roffset 	- initialized to starting MMU page offset
364  *	mp->dmai_ndvmapages	- # of total MMU pages of entire object
365  */
366 int
367 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
368 {
369 	dev_info_t *dip = px_p->px_dip;
370 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
371 	px_pec_t *pec_p = px_p->px_pec_p;
372 	uint32_t offset;
373 	pfn_t pfn0;
374 
375 	mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS | DMP_NOSYNC;
376 
377 	switch (dobj_p->dmao_type) {
378 	case DMA_OTYP_BUFVADDR:
379 	case DMA_OTYP_VADDR: {
380 		page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
381 		caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
382 
383 		DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
384 		offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
385 		if (pplist) {				/* shadow list */
386 			mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
387 			pfn0 = page_pptonum(*pplist);
388 		} else {
389 			struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
390 			struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
391 			pfn0 = hat_getpfnum(hat_p, vaddr);
392 		}
393 		}
394 		break;
395 
396 	case DMA_OTYP_PAGES:
397 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
398 		mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
399 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
400 		break;
401 
402 	case DMA_OTYP_PADDR:
403 	default:
404 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
405 			NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
406 		return (DDI_DMA_NOMAPPING);
407 	}
408 	if (pfn0 == PFN_INVALID) {
409 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
410 			NAMEINST(dip), dobj_p);
411 		return (DDI_DMA_NOMAPPING);
412 	}
413 	if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
414 			pec_p->pec_last32_pfn)) {
415 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
416 		goto done;	/* leave bypass and dvma flag as 0 */
417 	} else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
418 			pec_p->pec_last64_pfn)) {
419 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
420 		goto done;	/* leave bypass and dvma flag as 0 */
421 	}
422 	if (PX_DMA_ISPEERONLY(mp)) {
423 		dev_info_t *rdip = mp->dmai_rdip;
424 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
425 		return (DDI_DMA_NOMAPPING);
426 	}
427 	mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
428 	    PX_DMAI_FLAGS_BYPASS : PX_DMAI_FLAGS_DVMA |
429 	    (mp->dmai_rflags & DDI_DMA_REDZONE ? PX_DMAI_FLAGS_REDZONE : 0);
430 done:
431 	mp->dmai_object	 = *dobj_p;			/* whole object    */
432 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
433 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
434 	mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
435 	return (DDI_SUCCESS);
436 }
437 
438 /*
439  * px_dma_pgpfn - set up pfnlst array according to pages
440  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
441  */
442 /*ARGSUSED*/
443 static int
444 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
445 {
446 	int i;
447 	dev_info_t *dip = px_p->px_dip;
448 
449 	switch (mp->dmai_object.dmao_type) {
450 	case DMA_OTYP_BUFVADDR:
451 	case DMA_OTYP_VADDR: {
452 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
453 		DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
454 			pplist, npages);
455 		for (i = 1; i < npages; i++) {
456 			px_iopfn_t pfn = page_pptonum(pplist[i]);
457 			PX_SET_MP_PFN1(mp, i, pfn);
458 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
459 		}
460 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
461 		}
462 		break;
463 
464 	case DMA_OTYP_PAGES: {
465 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
466 		DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
467 		for (i = 1; i < npages; i++, pp = pp->p_next) {
468 			px_iopfn_t pfn = page_pptonum(pp);
469 			PX_SET_MP_PFN1(mp, i, pfn);
470 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
471 		}
472 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
473 		}
474 		break;
475 
476 	default:	/* check is already done by px_dma_type */
477 		ASSERT(0);
478 		break;
479 	}
480 	return (DDI_SUCCESS);
481 }
482 
483 /*
484  * px_dma_vapfn - set up pfnlst array according to VA
485  *	VA/size pair: <normal, bypass, peer-to-peer>
486  *	pfn0 is skipped as it is already done.
487  *	In this case, the cached pfn0 is used to fill pfnlst[0]
488  */
489 static int
490 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
491 {
492 	dev_info_t *dip = px_p->px_dip;
493 	int i;
494 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
495 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
496 
497 	vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
498 	for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
499 		px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
500 		if (pfn == PFN_INVALID)
501 			goto err_badpfn;
502 		PX_SET_MP_PFN1(mp, i, pfn);
503 		DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
504 			mp, i, pfn);
505 	}
506 	return (DDI_SUCCESS);
507 err_badpfn:
508 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
509 	return (DDI_DMA_NOMAPPING);
510 }
511 
512 /*
513  * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
514  *
515  * dependencies:
516  *	mp->dmai_ndvmapages	- set to total # of dma pages
517  *
518  * return value:
519  *	DDI_SUCCESS
520  *	DDI_DMA_NOMAPPING
521  */
522 int
523 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
524 {
525 	uint32_t npages = mp->dmai_ndvmapages;
526 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
527 	int i, ret, peer = PX_DMA_ISPTP(mp);
528 	int peer32 = PX_DMA_ISPTP32(mp);
529 	dev_info_t *dip = px_p->px_dip;
530 
531 	px_pec_t *pec_p = px_p->px_pec_p;
532 	px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
533 					pec_p->pec_base64_pfn;
534 	px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
535 					pec_p->pec_last64_pfn;
536 	px_iopfn_t pfn_adj = peer ? pfn_base : 0;
537 
538 	DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
539 		mp, PX_MP_PFN0(mp) - pfn_adj);
540 	/* 1 page: no array alloc/fill, no mixed mode check */
541 	if (npages == 1) {
542 		PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
543 		return (DDI_SUCCESS);
544 	}
545 	/* allocate pfn array */
546 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
547 		waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
548 		if (waitfp != DDI_DMA_DONTWAIT)
549 			ddi_set_callback(waitfp, dmareq->dmar_arg,
550 				&px_kmem_clid);
551 		return (DDI_DMA_NORESOURCES);
552 	}
553 	/* fill pfn array */
554 	PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
555 	if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
556 		px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
557 		goto err;
558 
559 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
560 	for (i = 1; i < npages; i++) {
561 		px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
562 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
563 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
564 				NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
565 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
566 			goto err;
567 		}
568 		DBG(DBG_DMA_MAP, dip,
569 			"px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
570 		if (pfn_adj)
571 			PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
572 	}
573 	return (DDI_SUCCESS);
574 err:
575 	px_dma_freepfn(mp);
576 	return (ret);
577 }
578 
579 /*
580  * px_dvma_win() - trim requested DVMA size down to window size
581  *	The 1st window starts from offset and ends at page-aligned boundary.
582  *	From the 2nd window on, each window starts and ends at page-aligned
583  *	boundary except the last window ends at wherever requested.
584  *
585  *	accesses the following mp-> members:
586  *	mp->dmai_attr.dma_attr_count_max
587  *	mp->dmai_attr.dma_attr_seg
588  *	mp->dmai_roffset   - start offset of 1st window
589  *	mp->dmai_rflags (redzone)
590  *	mp->dmai_ndvmapages (for 1 page fast path)
591  *
592  *	sets the following mp-> members:
593  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
594  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
595  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
596  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
597  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
598  *
599  *	fixed - not changed across different DMA windows
600  */
601 /*ARGSUSED*/
602 int
603 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
604 {
605 	uint32_t redzone_sz	= PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
606 	size_t obj_sz		= mp->dmai_object.dmao_size;
607 	size_t xfer_sz;
608 	ulong_t pg_off;
609 
610 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
611 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
612 		mp->dmai_size = obj_sz;
613 		mp->dmai_winsize = MMU_PAGE_SIZE;
614 		mp->dmai_nwin = 1;
615 		goto done;
616 	}
617 
618 	pg_off	= mp->dmai_roffset;
619 	xfer_sz	= obj_sz + redzone_sz;
620 
621 	/* include redzone in nocross check */ {
622 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
623 		if (xfer_sz + pg_off - 1 > nocross)
624 			xfer_sz = nocross - pg_off + 1;
625 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
626 			DBG(DBG_DMA_MAP, px_p->px_dip,
627 			    "nocross too small: "
628 			    "%lx(%lx)+%lx+%lx < %llx\n",
629 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
630 			return (DDI_DMA_TOOBIG);
631 		}
632 	}
633 	xfer_sz -= redzone_sz;		/* restore transfer size  */
634 	/* check counter max */ {
635 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
636 		if (xfer_sz - 1 > count_max)
637 			xfer_sz = count_max + 1;
638 	}
639 	if (xfer_sz >= obj_sz) {
640 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
641 		mp->dmai_size = xfer_sz;
642 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
643 		mp->dmai_nwin = 1;
644 		goto done;
645 	}
646 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
647 		DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
648 			obj_sz, pg_off, redzone_sz, xfer_sz);
649 		return (DDI_DMA_TOOBIG);
650 	}
651 
652 	xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
653 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
654 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
655 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
656 done:
657 	mp->dmai_winlst = NULL;
658 	px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
659 	return (DDI_SUCCESS);
660 }
661 
662 /*
663  * fast track cache entry to mmu context, inserts 3 0 bits between
664  * upper 6-bits and lower 3-bits of the 9-bit cache entry
665  */
666 #define	MMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
667 
668 /*
669  * px_dvma_map_fast - attempts to map fast trackable DVMA
670  */
671 /*ARGSUSED*/
672 int
673 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
674 {
675 	uint_t clustsz = px_dvma_page_cache_clustsz;
676 	uint_t entries = px_dvma_page_cache_entries;
677 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags);
678 	int i = mmu_p->mmu_dvma_addr_scan_start;
679 	uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
680 	px_dvma_addr_t dvma_pg;
681 	size_t npages = MMU_BTOP(mp->dmai_winsize);
682 	dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
683 
684 	extern uint8_t ldstub(uint8_t *);
685 	ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
686 	ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
687 
688 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++);
689 	if (i >= entries) {
690 		lock_addr = mmu_p->mmu_dvma_cache_locks;
691 		i = 0;
692 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++);
693 		if (i >= entries) {
694 #ifdef	PX_DMA_PROF
695 			px_dvmaft_exhaust++;
696 #endif	/* PX_DMA_PROF */
697 			return (DDI_DMA_NORESOURCES);
698 		}
699 	}
700 	mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
701 
702 	i *= clustsz;
703 	dvma_pg = mmu_p->dvma_base_pg + i;
704 
705 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages, attr,
706 	    (void *)mp, 0, MMU_MAP_MP) != DDI_SUCCESS) {
707 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
708 		    "px_lib_iommu_map failed\n");
709 
710 		return (DDI_FAILURE);
711 	}
712 
713 	if (!PX_MAP_BUFZONE(mp))
714 		goto done;
715 
716 	DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
717 
718 	ASSERT(PX_HAS_REDZONE(mp));
719 
720 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1, attr,
721 	    (void *)mp, npages - 1, MMU_MAP_MP) != DDI_SUCCESS) {
722 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
723 		    "mapping REDZONE page failed\n");
724 
725 		(void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
726 		return (DDI_FAILURE);
727 	}
728 
729 done:
730 #ifdef PX_DMA_PROF
731 	px_dvmaft_success++;
732 #endif
733 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
734 	mp->dmai_offset = 0;
735 	mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
736 	PX_SAVE_MP_TTE(mp, attr);	/* save TTE template for unmapping */
737 	if (PX_DVMA_DBG_ON(mmu_p))
738 		px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
739 			mp->dmai_size, mp);
740 	return (DDI_SUCCESS);
741 }
742 
743 /*
744  * px_dvma_map: map non-fasttrack DMA
745  *		Use quantum cache if single page DMA.
746  */
747 int
748 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
749 {
750 	uint_t npages = PX_DMA_WINNPGS(mp);
751 	px_dvma_addr_t dvma_pg, dvma_pg_index;
752 	void *dvma_addr;
753 	uint64_t tte = PX_GET_TTE_ATTR(mp->dmai_rflags);
754 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
755 	dev_info_t *dip = mp->dmai_rdip;
756 	int	ret = DDI_SUCCESS;
757 
758 	/*
759 	 * allocate dvma space resource and map in the first window.
760 	 * (vmem_t *vmp, size_t size,
761 	 *	size_t align, size_t phase, size_t nocross,
762 	 *	void *minaddr, void *maxaddr, int vmflag)
763 	 */
764 	if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
765 		dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
766 			MMU_PAGE_SIZE, sleep);
767 		mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
768 #ifdef	PX_DMA_PROF
769 		px_dvma_vmem_alloc++;
770 #endif	/* PX_DMA_PROF */
771 	} else {
772 		dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
773 			MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
774 			MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
775 			0,
776 			mp->dmai_attr.dma_attr_seg + 1,
777 			(void *)mp->dmai_attr.dma_attr_addr_lo,
778 			(void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
779 			sleep);
780 #ifdef	PX_DMA_PROF
781 		px_dvma_vmem_xalloc++;
782 #endif	/* PX_DMA_PROF */
783 	}
784 	dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
785 	dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
786 	DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
787 		dvma_pg, dvma_pg_index);
788 	if (dvma_pg == 0)
789 		goto noresource;
790 
791 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
792 	mp->dmai_offset = 0;
793 	PX_SAVE_MP_TTE(mp, tte);	/* mp->dmai_tte = tte */
794 
795 	if ((ret = px_mmu_map_pages(mmu_p,
796 	    mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
797 		if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
798 			vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
799 			    MMU_PAGE_SIZE);
800 #ifdef PX_DMA_PROF
801 			px_dvma_vmem_free++;
802 #endif /* PX_DMA_PROF */
803 		} else {
804 			vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
805 			    MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
806 #ifdef PX_DMA_PROF
807 			px_dvma_vmem_xfree++;
808 #endif /* PX_DMA_PROF */
809 		}
810 	}
811 
812 	return (ret);
813 noresource:
814 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
815 		DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
816 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
817 			&mmu_p->mmu_dvma_clid);
818 	}
819 	DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
820 	return (DDI_DMA_NORESOURCES);
821 }
822 
823 void
824 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
825 {
826 	px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
827 	px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
828 	dvma_addr = MMU_PTOB(dvma_pg);
829 
830 	if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
831 		px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
832 		ASSERT(index % px_dvma_page_cache_clustsz == 0);
833 		index /= px_dvma_page_cache_clustsz;
834 		ASSERT(index < px_dvma_page_cache_entries);
835 		mmu_p->mmu_dvma_cache_locks[index] = 0;
836 #ifdef	PX_DMA_PROF
837 		px_dvmaft_free++;
838 #endif	/* PX_DMA_PROF */
839 		return;
840 	}
841 
842 	if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
843 		vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
844 			MMU_PAGE_SIZE);
845 #ifdef PX_DMA_PROF
846 		px_dvma_vmem_free++;
847 #endif /* PX_DMA_PROF */
848 	} else {
849 		size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
850 		vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
851 			MMU_PTOB(npages));
852 #ifdef PX_DMA_PROF
853 		px_dvma_vmem_xfree++;
854 #endif /* PX_DMA_PROF */
855 	}
856 }
857 
858 /*
859  * DVMA mappings may have multiple windows, but each window always have
860  * one segment.
861  */
862 int
863 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
864 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
865 	uint_t cache_flags)
866 {
867 	switch (cmd) {
868 	case DDI_DMA_SYNC:
869 		return (px_lib_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
870 		    *offp, *lenp, cache_flags));
871 
872 	case DDI_DMA_HTOC: {
873 		int ret;
874 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
875 		uint_t win_size = mp->dmai_winsize;
876 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
877 
878 		if (off >= mp->dmai_object.dmao_size) {
879 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
880 				NAMEINST(mp->dmai_rdip), off);
881 			return (DDI_FAILURE);
882 		}
883 		off += mp->dmai_roffset;
884 		ret = px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
885 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
886 		if (ret)
887 			return (ret);
888 		DBG(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
889 			cp->dmac_address, cp->dmac_size, off, *offp);
890 
891 		/* adjust cookie addr/len if we are not on window boundary */
892 		ASSERT((off % win_size) == (off -
893 			(PX_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
894 		off = PX_DMA_CURWIN(mp) ? off % win_size : *offp;
895 		ASSERT(cp->dmac_size > off);
896 		cp->dmac_laddress += off;
897 		cp->dmac_size -= off;
898 		DBG(DBG_DMA_CTL, dip, "HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
899 			mp, cp->dmac_address, cp->dmac_size, off, wo_off);
900 		}
901 		return (DDI_SUCCESS);
902 
903 	case DDI_DMA_REPWIN:
904 		*offp = mp->dmai_offset;
905 		*lenp = mp->dmai_size;
906 		return (DDI_SUCCESS);
907 
908 	case DDI_DMA_MOVWIN: {
909 		off_t off = *offp;
910 		if (off >= mp->dmai_object.dmao_size)
911 			return (DDI_FAILURE);
912 		off += mp->dmai_roffset;
913 		return (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
914 		    off / mp->dmai_winsize, offp, lenp,
915 		    (ddi_dma_cookie_t *)objp, NULL));
916 		}
917 
918 	case DDI_DMA_NEXTWIN: {
919 		px_window_t win = PX_DMA_CURWIN(mp);
920 		if (offp) {
921 			if (*(px_window_t *)offp != win) {
922 				/* window not active */
923 				*(px_window_t *)objp = win; /* return cur win */
924 				return (DDI_DMA_STALE);
925 			}
926 			win++;
927 		} else	/* map win 0 */
928 			win = 0;
929 		if (win >= mp->dmai_nwin) {
930 			*(px_window_t *)objp = win - 1;
931 			return (DDI_DMA_DONE);
932 		}
933 		if (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
934 		    win, 0, 0, 0, 0)) {
935 			*(px_window_t *)objp = win - 1;
936 			return (DDI_FAILURE);
937 		}
938 		*(px_window_t *)objp = win;
939 		}
940 		return (DDI_SUCCESS);
941 
942 	case DDI_DMA_NEXTSEG:
943 		if (*(px_window_t *)offp != PX_DMA_CURWIN(mp))
944 			return (DDI_DMA_STALE);
945 		if (lenp)				/* only 1 seg allowed */
946 			return (DDI_DMA_DONE);
947 
948 		/* return mp as seg 0 */
949 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
950 		return (DDI_SUCCESS);
951 
952 	case DDI_DMA_SEGTOC:
953 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
954 			mp->dmai_size);
955 		*offp = mp->dmai_offset;
956 		*lenp = mp->dmai_size;
957 		return (DDI_SUCCESS);
958 
959 	case DDI_DMA_COFF: {
960 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
961 		if (cp->dmac_address < mp->dmai_mapping ||
962 			(cp->dmac_address + cp->dmac_size) >
963 			(mp->dmai_mapping + mp->dmai_size))
964 			return (DDI_FAILURE);
965 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
966 			mp->dmai_offset);
967 		}
968 		return (DDI_SUCCESS);
969 	default:
970 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
971 			cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
972 		break;
973 	}
974 	return (DDI_FAILURE);
975 }
976 
977 void
978 px_dma_freewin(ddi_dma_impl_t *mp)
979 {
980 	px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
981 	for (win2_p = win_p; win_p; win2_p = win_p) {
982 		win_p = win2_p->win_next;
983 		kmem_free(win2_p, sizeof (px_dma_win_t) +
984 			sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
985 	}
986 	mp->dmai_nwin = 0;
987 	mp->dmai_winlst = NULL;
988 }
989 
990 /*
991  * px_dma_newwin - create a dma window object and cookies
992  *
993  *	After the initial scan in px_dma_physwin(), which identifies
994  *	a portion of the pfn array that belongs to a dma window,
995  *	we are called to allocate and initialize representing memory
996  *	resources. We know from the 1st scan the number of cookies
997  *	or dma segment in this window so we can allocate a contiguous
998  *	memory array for the dma cookies (The implementation of
999  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1000  *
1001  *	A second round scan is done on the pfn array to identify
1002  *	each dma segment and initialize its corresponding dma cookie.
1003  *	We don't need to do all the safety checking and we know they
1004  *	all belong to the same dma window.
1005  *
1006  *	Input:	cookie_no - # of cookies identified by the 1st scan
1007  *		start_idx - subscript of the pfn array for the starting pfn
1008  *		end_idx   - subscript of the last pfn in dma window
1009  *		win_pp    - pointer to win_next member of previous window
1010  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1011  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1012  *	Note:	Each cookie and window size are all initialized on page
1013  *		boundary. This is not true for the 1st cookie of the 1st
1014  *		window and the last cookie of the last window.
1015  *		We fix that later in upper layer which has access to size
1016  *		and offset info.
1017  *
1018  */
1019 /*ARGSUSED*/
1020 static int
1021 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
1022 	uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
1023 	px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
1024 {
1025 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1026 	ddi_dma_cookie_t *cookie_p;
1027 	uint32_t pfn_no = 1;
1028 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
1029 	px_iopfn_t prev_pfn = pfn;
1030 	uint64_t baddr, seg_pfn0 = pfn;
1031 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1032 	px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
1033 		waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1034 	io_attributes_t	attr = PX_GET_TTE_ATTR(mp->dmai_rflags);
1035 
1036 	if (!win_p)
1037 		goto noresource;
1038 
1039 	win_p->win_next = NULL;
1040 	win_p->win_ncookies = cookie_no;
1041 	win_p->win_curseg = 0;	/* start from segment 0 */
1042 	win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
1043 	/* win_p->win_offset is left uninitialized */
1044 
1045 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1046 	start_idx++;
1047 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1048 		pfn = PX_GET_MP_PFN1(mp, start_idx);
1049 		if ((pfn == prev_pfn + 1) &&
1050 			(MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1051 			continue;
1052 
1053 		/* close up the cookie up to (including) prev_pfn */
1054 		baddr = MMU_PTOB(seg_pfn0);
1055 		if (bypass && (px_lib_iommu_getbypass(dip,
1056 				baddr, attr, &baddr) != DDI_SUCCESS))
1057 			return (DDI_FAILURE);
1058 
1059 		MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1060 		DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1061 			MMU_PTOB(seg_pfn0), pfn_no);
1062 
1063 		cookie_p++;	/* advance to next available cookie cell */
1064 		pfn_no = 0;
1065 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1066 	}
1067 
1068 	baddr = MMU_PTOB(seg_pfn0);
1069 	if (bypass && (px_lib_iommu_getbypass(dip,
1070 			baddr, attr, &baddr) != DDI_SUCCESS))
1071 		return (DDI_FAILURE);
1072 
1073 	MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1074 	DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1075 		MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
1076 #ifdef	DEBUG
1077 	cookie_p++;
1078 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1079 #endif	/* DEBUG */
1080 	*win_pp = win_p;
1081 	return (DDI_SUCCESS);
1082 noresource:
1083 	if (waitfp != DDI_DMA_DONTWAIT)
1084 		ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
1085 	return (DDI_DMA_NORESOURCES);
1086 }
1087 
1088 /*
1089  * px_dma_adjust - adjust 1st and last cookie and window sizes
1090  *	remove initial dma page offset from 1st cookie and window size
1091  *	remove last dma page remainder from last cookie and window size
1092  *	fill win_offset of each dma window according to just fixed up
1093  *		each window sizes
1094  *	px_dma_win_t members modified:
1095  *	win_p->win_offset - this window's offset within entire DMA object
1096  *	win_p->win_size	  - xferrable size (in bytes) for this window
1097  *
1098  *	ddi_dma_impl_t members modified:
1099  *	mp->dmai_size	  - 1st window xferrable size
1100  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1101  *
1102  *	ddi_dma_cookie_t members modified:
1103  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1104  *	cookie_p->dmac_laddress - 1st cookie add page offset
1105  */
1106 static void
1107 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
1108 {
1109 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1110 	size_t pg_offset = mp->dmai_roffset;
1111 	size_t win_offset = 0;
1112 
1113 	cookie_p->dmac_size -= pg_offset;
1114 	cookie_p->dmac_laddress |= pg_offset;
1115 	win_p->win_size -= pg_offset;
1116 	DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1117 
1118 	mp->dmai_size = win_p->win_size;
1119 	mp->dmai_offset = 0;
1120 
1121 	pg_offset += mp->dmai_object.dmao_size;
1122 	pg_offset &= MMU_PAGE_OFFSET;
1123 	if (pg_offset)
1124 		pg_offset = MMU_PAGE_SIZE - pg_offset;
1125 	DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1126 
1127 	for (; win_p->win_next; win_p = win_p->win_next) {
1128 		DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1129 		win_p->win_offset = win_offset;
1130 		win_offset += win_p->win_size;
1131 	}
1132 	/* last window */
1133 	win_p->win_offset = win_offset;
1134 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1135 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1136 	win_p->win_size -= pg_offset;
1137 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1138 }
1139 
1140 /*
1141  * px_dma_physwin() - carve up dma windows using physical addresses.
1142  *	Called to handle mmu bypass and pci peer-to-peer transfers.
1143  *	Calls px_dma_newwin() to allocate window objects.
1144  *
1145  * Dependency: mp->dmai_pfnlst points to an array of pfns
1146  *
1147  * 1. Each dma window is represented by a px_dma_win_t object.
1148  *	The object will be casted to ddi_dma_win_t and returned
1149  *	to leaf driver through the DDI interface.
1150  * 2. Each dma window can have several dma segments with each
1151  *	segment representing a physically contiguous either memory
1152  *	space (if we are doing an mmu bypass transfer) or pci address
1153  *	space (if we are doing a peer-to-peer transfer).
1154  * 3. Each segment has a DMA cookie to program the DMA engine.
1155  *	The cookies within each DMA window must be located in a
1156  *	contiguous array per ddi_dma_nextcookie(9f).
1157  * 4. The number of DMA segments within each DMA window cannot exceed
1158  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1159  *	too large to fit in the sgllen, the rest needs to be
1160  *	relocated to the next dma window.
1161  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1162  *	and nocross restrictions while bypass DMA follows the set of
1163  *	restrictions with system limits factored in.
1164  *
1165  * Return:
1166  *	mp->dmai_winlst	 - points to a link list of px_dma_win_t objects.
1167  *		Each px_dma_win_t object on the link list contains
1168  *		infomation such as its window size (# of pages),
1169  *		starting offset (also see Restriction), an array of
1170  *		DMA cookies, and # of cookies in the array.
1171  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1172  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1173  *	mp->dmai_mapping - starting cookie address
1174  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1175  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1176  *
1177  * Restriction:
1178  *	Each px_dma_win_t object can theoratically start from any offset
1179  *	since the mmu is not involved. However, this implementation
1180  *	always make windows start from page aligned offset (except
1181  *	the 1st window, which follows the requested offset) due to the
1182  *	fact that we are handed a pfn list. This does require device's
1183  *	count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
1184  */
1185 int
1186 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1187 {
1188 	uint_t npages = mp->dmai_ndvmapages;
1189 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1190 	px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
1191 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
1192 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1193 	uint64_t count_max, bypass_addr = 0;
1194 	px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
1195 	ddi_dma_cookie_t *cookie0_p;
1196 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags);
1197 	dev_info_t *dip = px_p->px_dip;
1198 
1199 	ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
1200 	if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1201 		ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
1202 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1203 		px_pec_t *pec_p = px_p->px_pec_p;
1204 		px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
1205 				pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
1206 				pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
1207 
1208 		if (nocross && (nocross < UINT32_MAX))
1209 			return (DDI_DMA_NOMAPPING);
1210 		if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
1211 			return (DDI_DMA_NOMAPPING);
1212 		pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1213 		pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1214 		pfn_hi = MIN(pfn_hi, pfn_last);
1215 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1216 			return (DDI_DMA_NOMAPPING);
1217 
1218 		count_max = dev_attr_p->dma_attr_count_max;
1219 		count_max = MIN(count_max, nocross);
1220 		/*
1221 		 * the following count_max trim is not done because we are
1222 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1223 		 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1224 		 */
1225 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1226 		count_max = mp->dmai_attr.dma_attr_count_max;
1227 		pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1228 		pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1229 
1230 		if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
1231 				attr, &bypass_addr) != DDI_SUCCESS) {
1232 			cmn_err(CE_WARN, "bypass cookie failure %lx\n", pfn);
1233 			return (DDI_DMA_NOMAPPING);
1234 		}
1235 		pfn = MMU_BTOP(bypass_addr);
1236 	}
1237 
1238 	/* pfn: absolute (bypass mode) or relative (p2p mode) */
1239 	for (prev_pfn = pfn, i = 1; i < npages;
1240 	    i++, prev_pfn = pfn, pfn_no++) {
1241 		pfn = PX_GET_MP_PFN1(mp, i);
1242 		if (bypass_addr) {
1243 			if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
1244 					&bypass_addr) != DDI_SUCCESS) {
1245 				ret = DDI_DMA_NOMAPPING;
1246 				goto err;
1247 			}
1248 			pfn = MMU_BTOP(bypass_addr);
1249 		}
1250 		if ((pfn == prev_pfn + 1) &&
1251 				(MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1252 			continue;
1253 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1254 			ret = DDI_DMA_NOMAPPING;
1255 			goto err;
1256 		}
1257 		cookie_no++;
1258 		pfn_no = 0;
1259 		if (cookie_no < sgllen)
1260 			continue;
1261 
1262 		DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1263 			win_pfn0_index, i - 1, cookie_no);
1264 		if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
1265 			win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
1266 			goto err;
1267 
1268 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1269 		win_no++;
1270 		win_pfn0_index = i;
1271 		cookie_no = 0;
1272 	}
1273 	if (pfn > pfn_hi) {
1274 		ret = DDI_DMA_NOMAPPING;
1275 		goto err;
1276 	}
1277 	cookie_no++;
1278 	DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1279 		win_pfn0_index, i - 1, cookie_no);
1280 	if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
1281 		i - 1, win_pp, count_max, bypass_addr))
1282 		goto err;
1283 	win_no++;
1284 	px_dma_adjust(dmareq, mp, mp->dmai_winlst);
1285 	mp->dmai_nwin = win_no;
1286 	mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
1287 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1288 	mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
1289 	cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
1290 	mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1291 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1292 
1293 	px_dma_freepfn(mp);
1294 	return (DDI_DMA_MAPPED);
1295 err:
1296 	px_dma_freewin(mp);
1297 	return (ret);
1298 }
1299 
1300 int
1301 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1302 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1303 	uint_t cache_flags)
1304 {
1305 	switch (cmd) {
1306 	case DDI_DMA_SYNC:
1307 		return (DDI_SUCCESS);
1308 
1309 	case DDI_DMA_HTOC: {
1310 		off_t off = *offp;
1311 		ddi_dma_cookie_t *loop_cp, *cp;
1312 		px_dma_win_t *win_p = mp->dmai_winlst;
1313 
1314 		if (off >= mp->dmai_object.dmao_size)
1315 			return (DDI_FAILURE);
1316 
1317 		/* locate window */
1318 		while (win_p->win_offset + win_p->win_size <= off)
1319 			win_p = win_p->win_next;
1320 
1321 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1322 		mp->dmai_offset = win_p->win_offset;
1323 		mp->dmai_size   = win_p->win_size;
1324 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1325 
1326 		/* adjust cookie addr/len if we are not on cookie boundary */
1327 		off -= win_p->win_offset;	   /* offset within window */
1328 		for (; off >= loop_cp->dmac_size; loop_cp++)
1329 			off -= loop_cp->dmac_size; /* offset within cookie */
1330 
1331 		mp->dmai_cookie = loop_cp + 1;
1332 		win_p->win_curseg = loop_cp - cp;
1333 		cp = (ddi_dma_cookie_t *)objp;
1334 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1335 			loop_cp->dmac_size - off);
1336 
1337 		DBG(DBG_DMA_CTL, dip,
1338 			"HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1339 			cp->dmac_laddress, cp->dmac_size);
1340 		}
1341 		return (DDI_SUCCESS);
1342 
1343 	case DDI_DMA_REPWIN:
1344 		*offp = mp->dmai_offset;
1345 		*lenp = mp->dmai_size;
1346 		return (DDI_SUCCESS);
1347 
1348 	case DDI_DMA_MOVWIN: {
1349 		off_t off = *offp;
1350 		ddi_dma_cookie_t *cp;
1351 		px_dma_win_t *win_p = mp->dmai_winlst;
1352 
1353 		if (off >= mp->dmai_object.dmao_size)
1354 			return (DDI_FAILURE);
1355 
1356 		/* locate window */
1357 		while (win_p->win_offset + win_p->win_size <= off)
1358 			win_p = win_p->win_next;
1359 
1360 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1361 		mp->dmai_offset = win_p->win_offset;
1362 		mp->dmai_size   = win_p->win_size;
1363 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
1364 		mp->dmai_cookie = cp + 1;
1365 		win_p->win_curseg = 0;
1366 
1367 		*(ddi_dma_cookie_t *)objp = *cp;
1368 		*offp = win_p->win_offset;
1369 		*lenp = win_p->win_size;
1370 		DBG(DBG_DMA_CTL, dip,
1371 			"HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1372 			cp->dmac_laddress, cp->dmac_size);
1373 		}
1374 		return (DDI_SUCCESS);
1375 
1376 	case DDI_DMA_NEXTWIN: {
1377 		px_dma_win_t *win_p = *(px_dma_win_t **)offp;
1378 		px_dma_win_t **nw_pp = (px_dma_win_t **)objp;
1379 		ddi_dma_cookie_t *cp;
1380 		if (!win_p) {
1381 			*nw_pp = mp->dmai_winlst;
1382 			return (DDI_SUCCESS);
1383 		}
1384 
1385 		if (win_p->win_offset != mp->dmai_offset)
1386 			return (DDI_DMA_STALE);
1387 		if (!win_p->win_next)
1388 			return (DDI_DMA_DONE);
1389 		win_p = win_p->win_next;
1390 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1391 		mp->dmai_offset = win_p->win_offset;
1392 		mp->dmai_size   = win_p->win_size;
1393 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
1394 		mp->dmai_cookie = cp + 1;
1395 		win_p->win_curseg = 0;
1396 		*nw_pp = win_p;
1397 		}
1398 		return (DDI_SUCCESS);
1399 
1400 	case DDI_DMA_NEXTSEG: {
1401 		px_dma_win_t *w_p = *(px_dma_win_t **)offp;
1402 		if (w_p->win_offset != mp->dmai_offset)
1403 			return (DDI_DMA_STALE);
1404 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
1405 			return (DDI_DMA_DONE);
1406 		w_p->win_curseg++;
1407 		}
1408 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1409 		return (DDI_SUCCESS);
1410 
1411 	case DDI_DMA_SEGTOC: {
1412 		px_dma_win_t *win_p = mp->dmai_winlst;
1413 		off_t off = mp->dmai_offset;
1414 		ddi_dma_cookie_t *cp;
1415 		int i;
1416 
1417 		/* locate active window */
1418 		for (; win_p->win_offset != off; win_p = win_p->win_next);
1419 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1420 		for (i = 0; i < win_p->win_curseg; i++, cp++)
1421 			off += cp->dmac_size;
1422 		*offp = off;
1423 		*lenp = cp->dmac_size;
1424 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
1425 		}
1426 		return (DDI_SUCCESS);
1427 
1428 	case DDI_DMA_COFF: {
1429 		px_dma_win_t *win_p;
1430 		ddi_dma_cookie_t *cp;
1431 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1432 		size_t win_off;
1433 
1434 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1435 			int i;
1436 			win_off = 0;
1437 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1438 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1439 				size_t sz = cp->dmac_size;
1440 
1441 				addr = cp->dmac_laddress;
1442 				if ((addr <= key) && (addr + sz >= key))
1443 					goto found;
1444 				win_off += sz;
1445 			}
1446 		}
1447 		return (DDI_FAILURE);
1448 found:
1449 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1450 		return (DDI_SUCCESS);
1451 		}
1452 	default:
1453 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1454 			cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1455 		break;
1456 	}
1457 	return (DDI_FAILURE);
1458 }
1459 
1460 static void
1461 px_dvma_debug_init(px_mmu_t *mmu_p)
1462 {
1463 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1464 	ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
1465 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
1466 
1467 	mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
1468 	mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
1469 
1470 	mmu_p->dvma_active_list = NULL;
1471 	mmu_p->dvma_alloc_rec_index = 0;
1472 	mmu_p->dvma_free_rec_index = 0;
1473 	mmu_p->dvma_active_count = 0;
1474 }
1475 
1476 void
1477 px_dvma_debug_fini(px_mmu_t *mmu_p)
1478 {
1479 	struct px_dvma_rec *prev, *ptr;
1480 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1481 	uint64_t mask = ~(1ull << mmu_p->mmu_inst);
1482 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
1483 
1484 	kmem_free(mmu_p->dvma_alloc_rec, sz);
1485 	kmem_free(mmu_p->dvma_free_rec, sz);
1486 	mmu_p->dvma_alloc_rec = mmu_p->dvma_free_rec = NULL;
1487 
1488 	prev = mmu_p->dvma_active_list;
1489 	if (!prev)
1490 		return;
1491 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1492 		kmem_free(prev, sizeof (struct px_dvma_rec));
1493 	kmem_free(prev, sizeof (struct px_dvma_rec));
1494 
1495 	mmu_p->dvma_active_list = NULL;
1496 	mmu_p->dvma_alloc_rec_index = 0;
1497 	mmu_p->dvma_free_rec_index = 0;
1498 	mmu_p->dvma_active_count = 0;
1499 
1500 	px_dvma_debug_off &= mask;
1501 	px_dvma_debug_on &= mask;
1502 }
1503 
1504 void
1505 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1506 	ddi_dma_impl_t *mp)
1507 {
1508 	struct px_dvma_rec *ptr;
1509 	mutex_enter(&mmu_p->dvma_debug_lock);
1510 
1511 	if (!mmu_p->dvma_alloc_rec)
1512 		px_dvma_debug_init(mmu_p);
1513 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1514 		px_dvma_debug_fini(mmu_p);
1515 		goto done;
1516 	}
1517 
1518 	ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
1519 	ptr->dvma_addr = address;
1520 	ptr->len = len;
1521 	ptr->mp = mp;
1522 	if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
1523 		mmu_p->dvma_alloc_rec_index = 0;
1524 
1525 	ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
1526 	ptr->dvma_addr = address;
1527 	ptr->len = len;
1528 	ptr->mp = mp;
1529 
1530 	ptr->next = mmu_p->dvma_active_list;
1531 	mmu_p->dvma_active_list = ptr;
1532 	mmu_p->dvma_active_count++;
1533 done:
1534 	mutex_exit(&mmu_p->dvma_debug_lock);
1535 }
1536 
1537 void
1538 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1539     ddi_dma_impl_t *mp)
1540 {
1541 	struct px_dvma_rec *ptr, *ptr_save;
1542 	mutex_enter(&mmu_p->dvma_debug_lock);
1543 
1544 	if (!mmu_p->dvma_alloc_rec)
1545 		px_dvma_debug_init(mmu_p);
1546 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1547 		px_dvma_debug_fini(mmu_p);
1548 		goto done;
1549 	}
1550 
1551 	ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
1552 	ptr->dvma_addr = address;
1553 	ptr->len = len;
1554 	ptr->mp = mp;
1555 	if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
1556 		mmu_p->dvma_free_rec_index = 0;
1557 
1558 	ptr_save = mmu_p->dvma_active_list;
1559 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1560 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1561 			break;
1562 		ptr_save = ptr;
1563 	}
1564 	if (!ptr) {
1565 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1566 			(long)address, len);
1567 		goto done;
1568 	}
1569 	if (ptr == mmu_p->dvma_active_list)
1570 		mmu_p->dvma_active_list = ptr->next;
1571 	else
1572 		ptr_save->next = ptr->next;
1573 	kmem_free(ptr, sizeof (struct px_dvma_rec));
1574 	mmu_p->dvma_active_count--;
1575 done:
1576 	mutex_exit(&mmu_p->dvma_debug_lock);
1577 }
1578 
1579 #ifdef	DEBUG
1580 void
1581 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1582 {
1583 	DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1584 		hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1585 	DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1586 		hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1587 		hp->dmai_nwin);
1588 	DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1589 		hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1590 	DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1591 		hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1592 		hp->dmai_cookie);
1593 }
1594 #endif	/* DEBUG */
1595