xref: /titanic_51/usr/src/uts/sun4/io/px/px_dma.c (revision f4a94ada79e5d2be49a574fa7fba9364c57b05d9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PCI Express nexus DVMA and DMA core routines:
28  *	dma_map/dma_bind_handle implementation
29  *	bypass and peer-to-peer support
30  *	fast track DVMA space allocation
31  *	runtime DVMA debug
32  */
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddi_impldefs.h>
39 #include "px_obj.h"
40 
41 /*LINTLIBRARY*/
42 
43 /*
44  * px_dma_allocmp - Allocate a pci dma implementation structure
45  *
46  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
47  * to hold unmodified device limits. The ddi_dma_attr inside the
48  * ddi_dma_impl structure is augumented with system limits to enhance
49  * DVMA performance at runtime. The unaugumented device limits saved
50  * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
51  * strictly for peer-to-peer transfers which do not obey system limits.
52  *
53  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
54  */
55 ddi_dma_impl_t *
56 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
57 	caddr_t arg)
58 {
59 	register ddi_dma_impl_t *mp;
60 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
61 
62 	/* Caution: we don't use zalloc to enhance performance! */
63 	if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
64 		DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
65 		if (waitfp != DDI_DMA_DONTWAIT) {
66 			DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
67 			ddi_set_callback(waitfp, arg, &px_kmem_clid);
68 		}
69 		return (mp);
70 	}
71 
72 	mp->dmai_rdip = rdip;
73 	mp->dmai_flags = 0;
74 	mp->dmai_pfnlst = NULL;
75 	mp->dmai_winlst = NULL;
76 
77 	/*
78 	 * kmem_alloc debug: the following fields are not zero-ed
79 	 * mp->dmai_mapping = 0;
80 	 * mp->dmai_size = 0;
81 	 * mp->dmai_offset = 0;
82 	 * mp->dmai_minxfer = 0;
83 	 * mp->dmai_burstsizes = 0;
84 	 * mp->dmai_ndvmapages = 0;
85 	 * mp->dmai_pool/roffset = 0;
86 	 * mp->dmai_rflags = 0;
87 	 * mp->dmai_inuse/flags
88 	 * mp->dmai_nwin = 0;
89 	 * mp->dmai_winsize = 0;
90 	 * mp->dmai_nexus_private/tte = 0;
91 	 * mp->dmai_iopte/pfnlst
92 	 * mp->dmai_sbi/pfn0 = 0;
93 	 * mp->dmai_minfo/winlst/fdvma
94 	 * mp->dmai_rdip
95 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
96 	 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
97 	 * mp->dmai_cookie = 0;
98 	 */
99 
100 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
101 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
102 	mp->dmai_fault = 0;
103 	mp->dmai_fault_check = NULL;
104 	mp->dmai_fault_notify = NULL;
105 
106 	mp->dmai_error.err_ena = 0;
107 	mp->dmai_error.err_status = DDI_FM_OK;
108 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
109 	mp->dmai_error.err_ontrap = NULL;
110 	mp->dmai_error.err_fep = NULL;
111 	mp->dmai_error.err_cf = NULL;
112 
113 	/*
114 	 * The bdf protection value is set to immediate child
115 	 * at first. It gets modified by switch/bridge drivers
116 	 * as the code traverses down the fabric topology.
117 	 *
118 	 * XXX No IOMMU protection for broken devices.
119 	 */
120 	ASSERT((intptr_t)ddi_get_parent_data(rdip) >> 1 == 0);
121 	mp->dmai_bdf = ((intptr_t)ddi_get_parent_data(rdip) == 1) ? 0 :
122 	    pcie_get_bdf_for_dma_xfer(dip, rdip);
123 
124 	return (mp);
125 }
126 
127 void
128 px_dma_freemp(ddi_dma_impl_t *mp)
129 {
130 	if (mp->dmai_ndvmapages > 1)
131 		px_dma_freepfn(mp);
132 	if (mp->dmai_winlst)
133 		px_dma_freewin(mp);
134 	kmem_free(mp, sizeof (px_dma_hdl_t));
135 }
136 
137 void
138 px_dma_freepfn(ddi_dma_impl_t *mp)
139 {
140 	void *addr = mp->dmai_pfnlst;
141 	if (addr) {
142 		size_t npages = mp->dmai_ndvmapages;
143 		if (npages > 1)
144 			kmem_free(addr, npages * sizeof (px_iopfn_t));
145 		mp->dmai_pfnlst = NULL;
146 	}
147 	mp->dmai_ndvmapages = 0;
148 }
149 
150 /*
151  * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
152  *			and convert dmareq->dmar_limits to mp->dmai_attr
153  *
154  * ddi_dma_impl_t member modified     input
155  * ------------------------------------------------------------------------
156  * mp->dmai_minxfer		    - dev
157  * mp->dmai_burstsizes		    - dev
158  * mp->dmai_flags		    - no limit? peer-to-peer only?
159  *
160  * ddi_dma_attr member modified       input
161  * ------------------------------------------------------------------------
162  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
163  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
164  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
165  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
166  * mp->dmai_attr.dma_attr_align     - 1         (no alignment restriction)
167  *
168  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
169  */
170 ddi_dma_impl_t *
171 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
172 	ddi_dma_req_t *dmareq)
173 {
174 	ddi_dma_impl_t *mp;
175 	ddi_dma_attr_t *attr_p;
176 	uint64_t syslo		= mmu_p->mmu_dvma_base;
177 	uint64_t syshi		= mmu_p->mmu_dvma_end;
178 	uint64_t fasthi		= mmu_p->mmu_dvma_fast_end;
179 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
180 	uint32_t count_max	= lim_p->dlim_cntr_max;
181 	uint64_t lo		= lim_p->dlim_addr_lo;
182 	uint64_t hi		= lim_p->dlim_addr_hi;
183 	if (hi <= lo) {
184 		DBG(DBG_DMA_MAP, dip, "Bad limits\n");
185 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
186 	}
187 	if (!count_max)
188 		count_max--;
189 
190 	if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
191 	    dmareq->dmar_arg)))
192 		return (NULL);
193 
194 	/* store original dev input at the 2nd ddi_dma_attr */
195 	attr_p = PX_DEV_ATTR(mp);
196 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
197 	SET_DMAALIGN(attr_p, 1);
198 
199 	lo = MAX(lo, syslo);
200 	hi = MIN(hi, syshi);
201 	if (hi <= lo)
202 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
203 	count_max = MIN(count_max, hi - lo);
204 
205 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
206 		mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
207 		    PX_DMAI_FLAGS_NOSYSLIMIT;
208 	else {
209 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
210 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
211 	}
212 	if (PX_DMA_NOCTX(rdip))
213 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
214 
215 	/* store augumented dev input to mp->dmai_attr */
216 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
217 	attr_p = &mp->dmai_attr;
218 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
219 	SET_DMAALIGN(attr_p, 1);
220 	return (mp);
221 }
222 
223 /*
224  * Called from px_attach to check for bypass dma support and set
225  * flags accordingly.
226  */
227 int
228 px_dma_attach(px_t *px_p)
229 {
230 	uint64_t baddr;
231 
232 	if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
233 	    PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
234 	    &baddr) != DDI_ENOTSUP)
235 		/* ignore all other errors */
236 		px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
237 
238 	px_p->px_dma_sync_opt = ddi_prop_get_int(DDI_DEV_T_ANY,
239 	    px_p->px_dip, DDI_PROP_DONTPASS, "dma-sync-options", 0);
240 
241 	if (px_p->px_dma_sync_opt != 0)
242 		px_p->px_dev_caps |= PX_DMA_SYNC_REQUIRED;
243 
244 	return (DDI_SUCCESS);
245 }
246 
247 /*
248  * px_dma_attr2hdl
249  *
250  * This routine is called from the alloc handle entry point to sanity check the
251  * dma attribute structure.
252  *
253  * use by: px_dma_allochdl()
254  *
255  * return value:
256  *
257  *	DDI_SUCCESS		- on success
258  *	DDI_DMA_BADATTR		- attribute has invalid version number
259  *				  or address limits exclude dvma space
260  */
261 int
262 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
263 {
264 	px_mmu_t *mmu_p = px_p->px_mmu_p;
265 	uint64_t syslo, syshi;
266 	int	ret;
267 	ddi_dma_attr_t *attrp		= PX_DEV_ATTR(mp);
268 	uint64_t hi			= attrp->dma_attr_addr_hi;
269 	uint64_t lo			= attrp->dma_attr_addr_lo;
270 	uint64_t align			= attrp->dma_attr_align;
271 	uint64_t nocross		= attrp->dma_attr_seg;
272 	uint64_t count_max		= attrp->dma_attr_count_max;
273 
274 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
275 	    attrp, HI32(count_max), LO32(count_max));
276 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
277 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
278 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
279 	    HI32(nocross), LO32(nocross), HI32(align), LO32(align));
280 
281 	if (!nocross)
282 		nocross--;
283 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
284 
285 		DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
286 		/*
287 		 * If Bypass DMA is not supported, return error so that
288 		 * target driver can fall back to dvma mode of operation
289 		 */
290 		if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
291 			return (DDI_DMA_BADATTR);
292 		mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
293 		if (nocross != UINT64_MAX)
294 			return (DDI_DMA_BADATTR);
295 		if (align && (align > MMU_PAGE_SIZE))
296 			return (DDI_DMA_BADATTR);
297 		align = 1; /* align on 1 page boundary */
298 
299 		/* do a range check and get the limits */
300 		ret = px_lib_dma_bypass_rngchk(px_p->px_dip, attrp,
301 		    &syslo, &syshi);
302 		if (ret != DDI_SUCCESS)
303 			return (ret);
304 	} else { /* MMU_XLATE or PEER_TO_PEER */
305 		align = MAX(align, MMU_PAGE_SIZE) - 1;
306 		if ((align & nocross) != align) {
307 			dev_info_t *rdip = mp->dmai_rdip;
308 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
309 			    NAMEINST(rdip));
310 			return (DDI_DMA_BADATTR);
311 		}
312 		align = MMU_BTOP(align + 1);
313 		syslo = mmu_p->mmu_dvma_base;
314 		syshi = mmu_p->mmu_dvma_end;
315 	}
316 	if (hi <= lo) {
317 		dev_info_t *rdip = mp->dmai_rdip;
318 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
319 		return (DDI_DMA_BADATTR);
320 	}
321 	lo = MAX(lo, syslo);
322 	hi = MIN(hi, syshi);
323 	if (!count_max)
324 		count_max--;
325 
326 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
327 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
328 	if (hi <= lo) {
329 		/*
330 		 * If this is an IOMMU bypass access, the caller can't use
331 		 * the required addresses, so fail it.  Otherwise, it's
332 		 * peer-to-peer; ensure that the caller has no alignment or
333 		 * segment size restrictions.
334 		 */
335 		if ((mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ||
336 		    (nocross < UINT32_MAX) || (align > 1))
337 			return (DDI_DMA_BADATTR);
338 
339 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
340 	} else /* set practical counter_max value */
341 		count_max = MIN(count_max, hi - lo);
342 
343 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
344 		mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
345 		    PX_DMAI_FLAGS_NOFASTLIMIT;
346 	else {
347 		syshi = mmu_p->mmu_dvma_fast_end;
348 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
349 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
350 	}
351 	if (PX_DMA_NOCTX(mp->dmai_rdip))
352 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
353 
354 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
355 	attrp = &mp->dmai_attr;
356 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
357 	return (DDI_SUCCESS);
358 }
359 
360 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
361 
362 /*
363  * px_dma_type - determine which of the three types DMA (peer-to-peer,
364  *		mmu bypass, or mmu translate) we are asked to do.
365  *		Also checks pfn0 and rejects any non-peer-to-peer
366  *		requests for peer-only devices.
367  *
368  *	return values:
369  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
370  *		DDI_SUCCESS
371  *
372  *	dma handle members affected (set on exit):
373  *	mp->dmai_object		- dmareq->dmar_object
374  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
375  *	mp->dmai_flags   	- DMA type
376  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
377  *	mp->dmai_roffset 	- initialized to starting MMU page offset
378  *	mp->dmai_ndvmapages	- # of total MMU pages of entire object
379  */
380 int
381 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
382 {
383 	dev_info_t *dip = px_p->px_dip;
384 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
385 	px_pec_t *pec_p = px_p->px_pec_p;
386 	uint32_t offset;
387 	pfn_t pfn0;
388 	uint_t redzone;
389 
390 	mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
391 
392 	if (!(px_p->px_dev_caps & PX_DMA_SYNC_REQUIRED))
393 		mp->dmai_rflags |= DMP_NOSYNC;
394 
395 	switch (dobj_p->dmao_type) {
396 	case DMA_OTYP_BUFVADDR:
397 	case DMA_OTYP_VADDR: {
398 		page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
399 		caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
400 
401 		DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
402 		offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
403 		if (pplist) {				/* shadow list */
404 			mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
405 			pfn0 = page_pptonum(*pplist);
406 		} else {
407 			struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
408 			struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
409 			pfn0 = hat_getpfnum(hat_p, vaddr);
410 		}
411 		}
412 		break;
413 
414 	case DMA_OTYP_PAGES:
415 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
416 		mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
417 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
418 		break;
419 
420 	case DMA_OTYP_PADDR:
421 	default:
422 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
423 		    NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
424 		return (DDI_DMA_NOMAPPING);
425 	}
426 	if (pfn0 == PFN_INVALID) {
427 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
428 		    NAMEINST(dip), dobj_p);
429 		return (DDI_DMA_NOMAPPING);
430 	}
431 	if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
432 	    pec_p->pec_last32_pfn)) {
433 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
434 		goto done;	/* leave bypass and dvma flag as 0 */
435 	} else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
436 	    pec_p->pec_last64_pfn)) {
437 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
438 		goto done;	/* leave bypass and dvma flag as 0 */
439 	}
440 	if (PX_DMA_ISPEERONLY(mp)) {
441 		dev_info_t *rdip = mp->dmai_rdip;
442 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
443 		return (DDI_DMA_NOMAPPING);
444 	}
445 
446 	redzone = (mp->dmai_rflags & DDI_DMA_REDZONE) ||
447 	    (mp->dmai_flags & PX_DMAI_FLAGS_MAP_BUFZONE) ?
448 	    PX_DMAI_FLAGS_REDZONE : 0;
449 
450 	mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
451 	    PX_DMAI_FLAGS_BYPASS : (PX_DMAI_FLAGS_DVMA | redzone);
452 done:
453 	mp->dmai_object	 = *dobj_p;			/* whole object    */
454 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
455 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
456 	mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
457 	return (DDI_SUCCESS);
458 }
459 
460 /*
461  * px_dma_pgpfn - set up pfnlst array according to pages
462  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
463  */
464 /*ARGSUSED*/
465 static int
466 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
467 {
468 	int i;
469 	dev_info_t *dip = px_p->px_dip;
470 
471 	switch (mp->dmai_object.dmao_type) {
472 	case DMA_OTYP_BUFVADDR:
473 	case DMA_OTYP_VADDR: {
474 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
475 		DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
476 		    pplist, npages);
477 		for (i = 1; i < npages; i++) {
478 			px_iopfn_t pfn = page_pptonum(pplist[i]);
479 			PX_SET_MP_PFN1(mp, i, pfn);
480 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
481 		}
482 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
483 		}
484 		break;
485 
486 	case DMA_OTYP_PAGES: {
487 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
488 		DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
489 		for (i = 1; i < npages; i++, pp = pp->p_next) {
490 			px_iopfn_t pfn = page_pptonum(pp);
491 			PX_SET_MP_PFN1(mp, i, pfn);
492 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
493 		}
494 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
495 		}
496 		break;
497 
498 	default:	/* check is already done by px_dma_type */
499 		ASSERT(0);
500 		break;
501 	}
502 	return (DDI_SUCCESS);
503 }
504 
505 /*
506  * px_dma_vapfn - set up pfnlst array according to VA
507  *	VA/size pair: <normal, bypass, peer-to-peer>
508  *	pfn0 is skipped as it is already done.
509  *	In this case, the cached pfn0 is used to fill pfnlst[0]
510  */
511 static int
512 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
513 {
514 	dev_info_t *dip = px_p->px_dip;
515 	int i;
516 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
517 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
518 
519 	vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
520 	for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
521 		px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
522 		if (pfn == PFN_INVALID)
523 			goto err_badpfn;
524 		PX_SET_MP_PFN1(mp, i, pfn);
525 		DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
526 		    mp, i, pfn);
527 	}
528 	return (DDI_SUCCESS);
529 err_badpfn:
530 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
531 	return (DDI_DMA_NOMAPPING);
532 }
533 
534 /*
535  * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
536  *
537  * dependencies:
538  *	mp->dmai_ndvmapages	- set to total # of dma pages
539  *
540  * return value:
541  *	DDI_SUCCESS
542  *	DDI_DMA_NOMAPPING
543  */
544 int
545 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
546 {
547 	uint32_t npages = mp->dmai_ndvmapages;
548 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
549 	int i, ret, peer = PX_DMA_ISPTP(mp);
550 	int peer32 = PX_DMA_ISPTP32(mp);
551 	dev_info_t *dip = px_p->px_dip;
552 
553 	px_pec_t *pec_p = px_p->px_pec_p;
554 	px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
555 	    pec_p->pec_base64_pfn;
556 	px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
557 	    pec_p->pec_last64_pfn;
558 	px_iopfn_t pfn_adj = peer ? pfn_base : 0;
559 
560 	DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
561 	    mp, PX_MP_PFN0(mp) - pfn_adj);
562 	/* 1 page: no array alloc/fill, no mixed mode check */
563 	if (npages == 1) {
564 		PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
565 		return (DDI_SUCCESS);
566 	}
567 	/* allocate pfn array */
568 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
569 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
570 		if (waitfp != DDI_DMA_DONTWAIT)
571 			ddi_set_callback(waitfp, dmareq->dmar_arg,
572 			    &px_kmem_clid);
573 		return (DDI_DMA_NORESOURCES);
574 	}
575 	/* fill pfn array */
576 	PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
577 	if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
578 	    px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
579 		goto err;
580 
581 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
582 	for (i = 1; i < npages; i++) {
583 		px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
584 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
585 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
586 			    NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
587 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
588 			goto err;
589 		}
590 		DBG(DBG_DMA_MAP, dip,
591 		    "px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
592 		if (pfn_adj)
593 			PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
594 	}
595 	return (DDI_SUCCESS);
596 err:
597 	px_dma_freepfn(mp);
598 	return (ret);
599 }
600 
601 /*
602  * px_dvma_win() - trim requested DVMA size down to window size
603  *	The 1st window starts from offset and ends at page-aligned boundary.
604  *	From the 2nd window on, each window starts and ends at page-aligned
605  *	boundary except the last window ends at wherever requested.
606  *
607  *	accesses the following mp-> members:
608  *	mp->dmai_attr.dma_attr_count_max
609  *	mp->dmai_attr.dma_attr_seg
610  *	mp->dmai_roffset   - start offset of 1st window
611  *	mp->dmai_rflags (redzone)
612  *	mp->dmai_ndvmapages (for 1 page fast path)
613  *
614  *	sets the following mp-> members:
615  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
616  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
617  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
618  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
619  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
620  *
621  *	fixed - not changed across different DMA windows
622  */
623 /*ARGSUSED*/
624 int
625 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
626 {
627 	uint32_t redzone_sz	= PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
628 	size_t obj_sz		= mp->dmai_object.dmao_size;
629 	size_t xfer_sz;
630 	ulong_t pg_off;
631 
632 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
633 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
634 		mp->dmai_size = obj_sz;
635 		mp->dmai_winsize = MMU_PAGE_SIZE;
636 		mp->dmai_nwin = 1;
637 		goto done;
638 	}
639 
640 	pg_off	= mp->dmai_roffset;
641 	xfer_sz	= obj_sz + redzone_sz;
642 
643 	/* include redzone in nocross check */	{
644 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
645 		if (xfer_sz + pg_off - 1 > nocross)
646 			xfer_sz = nocross - pg_off + 1;
647 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
648 			DBG(DBG_DMA_MAP, px_p->px_dip,
649 			    "nocross too small: "
650 			    "%lx(%lx)+%lx+%lx < %llx\n",
651 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
652 			return (DDI_DMA_TOOBIG);
653 		}
654 	}
655 	xfer_sz -= redzone_sz;		/* restore transfer size  */
656 	/* check counter max */	{
657 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
658 		if (xfer_sz - 1 > count_max)
659 			xfer_sz = count_max + 1;
660 	}
661 	if (xfer_sz >= obj_sz) {
662 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
663 		mp->dmai_size = xfer_sz;
664 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
665 		mp->dmai_nwin = 1;
666 		goto done;
667 	}
668 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
669 		DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
670 		    obj_sz, pg_off, redzone_sz, xfer_sz);
671 		return (DDI_DMA_TOOBIG);
672 	}
673 
674 	xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
675 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
676 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
677 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
678 done:
679 	mp->dmai_winlst = NULL;
680 	px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
681 	return (DDI_SUCCESS);
682 }
683 
684 /*
685  * fast track cache entry to mmu context, inserts 3 0 bits between
686  * upper 6-bits and lower 3-bits of the 9-bit cache entry
687  */
688 #define	MMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
689 
690 /*
691  * px_dvma_map_fast - attempts to map fast trackable DVMA
692  */
693 /*ARGSUSED*/
694 int
695 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
696 {
697 	uint_t clustsz = px_dvma_page_cache_clustsz;
698 	uint_t entries = px_dvma_page_cache_entries;
699 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
700 	    mp->dmai_attr.dma_attr_flags);
701 	int i = mmu_p->mmu_dvma_addr_scan_start;
702 	uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
703 	px_dvma_addr_t dvma_pg;
704 	size_t npages = MMU_BTOP(mp->dmai_winsize);
705 	dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
706 
707 	extern uint8_t ldstub(uint8_t *);
708 	ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
709 	ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
710 
711 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
712 		;
713 	if (i >= entries) {
714 		lock_addr = mmu_p->mmu_dvma_cache_locks;
715 		i = 0;
716 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
717 			;
718 		if (i >= entries) {
719 #ifdef	PX_DMA_PROF
720 			px_dvmaft_exhaust++;
721 #endif	/* PX_DMA_PROF */
722 			return (DDI_DMA_NORESOURCES);
723 		}
724 	}
725 	mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
726 
727 	i *= clustsz;
728 	dvma_pg = mmu_p->dvma_base_pg + i;
729 
730 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages,
731 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, 0,
732 	    MMU_MAP_PFN) != DDI_SUCCESS) {
733 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
734 		    "px_lib_iommu_map failed\n");
735 		return (DDI_FAILURE);
736 	}
737 
738 	if (!PX_MAP_BUFZONE(mp))
739 		goto done;
740 
741 	DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
742 
743 	ASSERT(PX_HAS_REDZONE(mp));
744 
745 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1,
746 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, npages - 1,
747 	    MMU_MAP_PFN) != DDI_SUCCESS) {
748 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
749 		    "mapping REDZONE page failed\n");
750 
751 		(void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
752 		return (DDI_FAILURE);
753 	}
754 
755 done:
756 #ifdef PX_DMA_PROF
757 	px_dvmaft_success++;
758 #endif
759 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
760 	mp->dmai_offset = 0;
761 	mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
762 	PX_SAVE_MP_TTE(mp, attr);	/* save TTE template for unmapping */
763 	if (PX_DVMA_DBG_ON(mmu_p))
764 		px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
765 		    mp->dmai_size, mp);
766 	return (DDI_SUCCESS);
767 }
768 
769 /*
770  * px_dvma_map: map non-fasttrack DMA
771  *		Use quantum cache if single page DMA.
772  */
773 int
774 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
775 {
776 	uint_t npages = PX_DMA_WINNPGS(mp);
777 	px_dvma_addr_t dvma_pg, dvma_pg_index;
778 	void *dvma_addr;
779 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
780 	    mp->dmai_attr.dma_attr_flags);
781 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
782 	dev_info_t *dip = mp->dmai_rdip;
783 	int	ret = DDI_SUCCESS;
784 
785 	/*
786 	 * allocate dvma space resource and map in the first window.
787 	 * (vmem_t *vmp, size_t size,
788 	 *	size_t align, size_t phase, size_t nocross,
789 	 *	void *minaddr, void *maxaddr, int vmflag)
790 	 */
791 	if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
792 		dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
793 		    MMU_PAGE_SIZE, sleep);
794 		mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
795 #ifdef	PX_DMA_PROF
796 		px_dvma_vmem_alloc++;
797 #endif	/* PX_DMA_PROF */
798 	} else {
799 		dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
800 		    MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
801 		    MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
802 		    0,
803 		    mp->dmai_attr.dma_attr_seg + 1,
804 		    (void *)mp->dmai_attr.dma_attr_addr_lo,
805 		    (void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
806 		    sleep);
807 #ifdef	PX_DMA_PROF
808 		px_dvma_vmem_xalloc++;
809 #endif	/* PX_DMA_PROF */
810 	}
811 	dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
812 	dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
813 	DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
814 	    dvma_pg, dvma_pg_index);
815 	if (dvma_pg == 0)
816 		goto noresource;
817 
818 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
819 	mp->dmai_offset = 0;
820 	PX_SAVE_MP_TTE(mp, attr);	/* mp->dmai_tte = tte */
821 
822 	if ((ret = px_mmu_map_pages(mmu_p,
823 	    mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
824 		if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
825 			vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
826 			    MMU_PAGE_SIZE);
827 #ifdef PX_DMA_PROF
828 			px_dvma_vmem_free++;
829 #endif /* PX_DMA_PROF */
830 		} else {
831 			vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
832 			    MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
833 #ifdef PX_DMA_PROF
834 			px_dvma_vmem_xfree++;
835 #endif /* PX_DMA_PROF */
836 		}
837 	}
838 
839 	return (ret);
840 noresource:
841 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
842 		DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
843 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
844 		    &mmu_p->mmu_dvma_clid);
845 	}
846 	DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
847 	return (DDI_DMA_NORESOURCES);
848 }
849 
850 void
851 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
852 {
853 	px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
854 	px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
855 	dvma_addr = MMU_PTOB(dvma_pg);
856 
857 	if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
858 		px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
859 		ASSERT(index % px_dvma_page_cache_clustsz == 0);
860 		index /= px_dvma_page_cache_clustsz;
861 		ASSERT(index < px_dvma_page_cache_entries);
862 		mmu_p->mmu_dvma_cache_locks[index] = 0;
863 #ifdef	PX_DMA_PROF
864 		px_dvmaft_free++;
865 #endif	/* PX_DMA_PROF */
866 		return;
867 	}
868 
869 	if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
870 		vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
871 		    MMU_PAGE_SIZE);
872 #ifdef PX_DMA_PROF
873 		px_dvma_vmem_free++;
874 #endif /* PX_DMA_PROF */
875 	} else {
876 		size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
877 		vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
878 		    MMU_PTOB(npages));
879 #ifdef PX_DMA_PROF
880 		px_dvma_vmem_xfree++;
881 #endif /* PX_DMA_PROF */
882 	}
883 }
884 
885 /*
886  * DVMA mappings may have multiple windows, but each window always have
887  * one segment.
888  */
889 int
890 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
891 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
892 	uint_t cache_flags)
893 {
894 	switch (cmd) {
895 	case DDI_DMA_SYNC:
896 		return (px_lib_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
897 		    *offp, *lenp, cache_flags));
898 
899 	case DDI_DMA_HTOC: {
900 		int ret;
901 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
902 		uint_t win_size = mp->dmai_winsize;
903 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
904 
905 		if (off >= mp->dmai_object.dmao_size) {
906 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
907 			    NAMEINST(mp->dmai_rdip), off);
908 			return (DDI_FAILURE);
909 		}
910 		off += mp->dmai_roffset;
911 		ret = px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
912 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
913 		if (ret)
914 			return (ret);
915 		DBG(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
916 		    cp->dmac_address, cp->dmac_size, off, *offp);
917 
918 		/* adjust cookie addr/len if we are not on window boundary */
919 		ASSERT((off % win_size) == (off -
920 		    (PX_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
921 		off = PX_DMA_CURWIN(mp) ? off % win_size : *offp;
922 		ASSERT(cp->dmac_size > off);
923 		cp->dmac_laddress += off;
924 		cp->dmac_size -= off;
925 		DBG(DBG_DMA_CTL, dip, "HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
926 		    mp, cp->dmac_address, cp->dmac_size, off, wo_off);
927 		}
928 		return (DDI_SUCCESS);
929 
930 	case DDI_DMA_REPWIN:
931 		*offp = mp->dmai_offset;
932 		*lenp = mp->dmai_size;
933 		return (DDI_SUCCESS);
934 
935 	case DDI_DMA_MOVWIN: {
936 		off_t off = *offp;
937 		if (off >= mp->dmai_object.dmao_size)
938 			return (DDI_FAILURE);
939 		off += mp->dmai_roffset;
940 		return (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
941 		    off / mp->dmai_winsize, offp, lenp,
942 		    (ddi_dma_cookie_t *)objp, NULL));
943 		}
944 
945 	case DDI_DMA_NEXTWIN: {
946 		px_window_t win = PX_DMA_CURWIN(mp);
947 		if (offp) {
948 			if (*(px_window_t *)offp != win) {
949 				/* window not active */
950 				*(px_window_t *)objp = win; /* return cur win */
951 				return (DDI_DMA_STALE);
952 			}
953 			win++;
954 		} else	/* map win 0 */
955 			win = 0;
956 		if (win >= mp->dmai_nwin) {
957 			*(px_window_t *)objp = win - 1;
958 			return (DDI_DMA_DONE);
959 		}
960 		if (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
961 		    win, 0, 0, 0, 0)) {
962 			*(px_window_t *)objp = win - 1;
963 			return (DDI_FAILURE);
964 		}
965 		*(px_window_t *)objp = win;
966 		}
967 		return (DDI_SUCCESS);
968 
969 	case DDI_DMA_NEXTSEG:
970 		if (*(px_window_t *)offp != PX_DMA_CURWIN(mp))
971 			return (DDI_DMA_STALE);
972 		if (lenp)				/* only 1 seg allowed */
973 			return (DDI_DMA_DONE);
974 
975 		/* return mp as seg 0 */
976 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
977 		return (DDI_SUCCESS);
978 
979 	case DDI_DMA_SEGTOC:
980 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
981 		    mp->dmai_size);
982 		*offp = mp->dmai_offset;
983 		*lenp = mp->dmai_size;
984 		return (DDI_SUCCESS);
985 
986 	case DDI_DMA_COFF: {
987 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
988 		if (cp->dmac_address < mp->dmai_mapping ||
989 		    (cp->dmac_address + cp->dmac_size) >
990 		    (mp->dmai_mapping + mp->dmai_size))
991 			return (DDI_FAILURE);
992 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
993 		    mp->dmai_offset);
994 		}
995 		return (DDI_SUCCESS);
996 	default:
997 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
998 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
999 		break;
1000 	}
1001 	return (DDI_FAILURE);
1002 }
1003 
1004 void
1005 px_dma_freewin(ddi_dma_impl_t *mp)
1006 {
1007 	px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
1008 	for (win2_p = win_p; win_p; win2_p = win_p) {
1009 		win_p = win2_p->win_next;
1010 		kmem_free(win2_p, sizeof (px_dma_win_t) +
1011 		    sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
1012 	}
1013 	mp->dmai_nwin = 0;
1014 	mp->dmai_winlst = NULL;
1015 }
1016 
1017 /*
1018  * px_dma_newwin - create a dma window object and cookies
1019  *
1020  *	After the initial scan in px_dma_physwin(), which identifies
1021  *	a portion of the pfn array that belongs to a dma window,
1022  *	we are called to allocate and initialize representing memory
1023  *	resources. We know from the 1st scan the number of cookies
1024  *	or dma segment in this window so we can allocate a contiguous
1025  *	memory array for the dma cookies (The implementation of
1026  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1027  *
1028  *	A second round scan is done on the pfn array to identify
1029  *	each dma segment and initialize its corresponding dma cookie.
1030  *	We don't need to do all the safety checking and we know they
1031  *	all belong to the same dma window.
1032  *
1033  *	Input:	cookie_no - # of cookies identified by the 1st scan
1034  *		start_idx - subscript of the pfn array for the starting pfn
1035  *		end_idx   - subscript of the last pfn in dma window
1036  *		win_pp    - pointer to win_next member of previous window
1037  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1038  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1039  *	Note:	Each cookie and window size are all initialized on page
1040  *		boundary. This is not true for the 1st cookie of the 1st
1041  *		window and the last cookie of the last window.
1042  *		We fix that later in upper layer which has access to size
1043  *		and offset info.
1044  *
1045  */
1046 /*ARGSUSED*/
1047 static int
1048 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
1049 	uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
1050 	px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
1051 {
1052 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1053 	ddi_dma_cookie_t *cookie_p;
1054 	uint32_t pfn_no = 1;
1055 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
1056 	px_iopfn_t prev_pfn = pfn;
1057 	uint64_t baddr, seg_pfn0 = pfn;
1058 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1059 	px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
1060 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1061 	io_attributes_t	attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1062 	    mp->dmai_attr.dma_attr_flags);
1063 
1064 	if (!win_p)
1065 		goto noresource;
1066 
1067 	win_p->win_next = NULL;
1068 	win_p->win_ncookies = cookie_no;
1069 	win_p->win_curseg = 0;	/* start from segment 0 */
1070 	win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
1071 	/* win_p->win_offset is left uninitialized */
1072 
1073 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1074 	start_idx++;
1075 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1076 		pfn = PX_GET_MP_PFN1(mp, start_idx);
1077 		if ((pfn == prev_pfn + 1) &&
1078 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1079 			continue;
1080 
1081 		/* close up the cookie up to (including) prev_pfn */
1082 		baddr = MMU_PTOB(seg_pfn0);
1083 		if (bypass) {
1084 			if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
1085 			    == DDI_SUCCESS)
1086 				baddr = px_lib_ro_bypass(dip, attr, baddr);
1087 			else
1088 				return (DDI_FAILURE);
1089 		}
1090 
1091 		MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1092 		DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1093 		    MMU_PTOB(seg_pfn0), pfn_no);
1094 
1095 		cookie_p++;	/* advance to next available cookie cell */
1096 		pfn_no = 0;
1097 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1098 	}
1099 
1100 	baddr = MMU_PTOB(seg_pfn0);
1101 	if (bypass) {
1102 		if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
1103 		    == DDI_SUCCESS)
1104 			baddr = px_lib_ro_bypass(dip, attr, baddr);
1105 		else
1106 			return (DDI_FAILURE);
1107 	}
1108 
1109 	MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1110 	DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1111 	    MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
1112 #ifdef	DEBUG
1113 	cookie_p++;
1114 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1115 #endif	/* DEBUG */
1116 	*win_pp = win_p;
1117 	return (DDI_SUCCESS);
1118 noresource:
1119 	if (waitfp != DDI_DMA_DONTWAIT)
1120 		ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
1121 	return (DDI_DMA_NORESOURCES);
1122 }
1123 
1124 /*
1125  * px_dma_adjust - adjust 1st and last cookie and window sizes
1126  *	remove initial dma page offset from 1st cookie and window size
1127  *	remove last dma page remainder from last cookie and window size
1128  *	fill win_offset of each dma window according to just fixed up
1129  *		each window sizes
1130  *	px_dma_win_t members modified:
1131  *	win_p->win_offset - this window's offset within entire DMA object
1132  *	win_p->win_size	  - xferrable size (in bytes) for this window
1133  *
1134  *	ddi_dma_impl_t members modified:
1135  *	mp->dmai_size	  - 1st window xferrable size
1136  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1137  *
1138  *	ddi_dma_cookie_t members modified:
1139  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1140  *	cookie_p->dmac_laddress - 1st cookie add page offset
1141  */
1142 static void
1143 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
1144 {
1145 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1146 	size_t pg_offset = mp->dmai_roffset;
1147 	size_t win_offset = 0;
1148 
1149 	cookie_p->dmac_size -= pg_offset;
1150 	cookie_p->dmac_laddress |= pg_offset;
1151 	win_p->win_size -= pg_offset;
1152 	DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1153 
1154 	mp->dmai_size = win_p->win_size;
1155 	mp->dmai_offset = 0;
1156 
1157 	pg_offset += mp->dmai_object.dmao_size;
1158 	pg_offset &= MMU_PAGE_OFFSET;
1159 	if (pg_offset)
1160 		pg_offset = MMU_PAGE_SIZE - pg_offset;
1161 	DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1162 
1163 	for (; win_p->win_next; win_p = win_p->win_next) {
1164 		DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1165 		win_p->win_offset = win_offset;
1166 		win_offset += win_p->win_size;
1167 	}
1168 	/* last window */
1169 	win_p->win_offset = win_offset;
1170 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1171 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1172 	win_p->win_size -= pg_offset;
1173 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1174 }
1175 
1176 /*
1177  * px_dma_physwin() - carve up dma windows using physical addresses.
1178  *	Called to handle mmu bypass and pci peer-to-peer transfers.
1179  *	Calls px_dma_newwin() to allocate window objects.
1180  *
1181  * Dependency: mp->dmai_pfnlst points to an array of pfns
1182  *
1183  * 1. Each dma window is represented by a px_dma_win_t object.
1184  *	The object will be casted to ddi_dma_win_t and returned
1185  *	to leaf driver through the DDI interface.
1186  * 2. Each dma window can have several dma segments with each
1187  *	segment representing a physically contiguous either memory
1188  *	space (if we are doing an mmu bypass transfer) or pci address
1189  *	space (if we are doing a peer-to-peer transfer).
1190  * 3. Each segment has a DMA cookie to program the DMA engine.
1191  *	The cookies within each DMA window must be located in a
1192  *	contiguous array per ddi_dma_nextcookie(9f).
1193  * 4. The number of DMA segments within each DMA window cannot exceed
1194  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1195  *	too large to fit in the sgllen, the rest needs to be
1196  *	relocated to the next dma window.
1197  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1198  *	and nocross restrictions while bypass DMA follows the set of
1199  *	restrictions with system limits factored in.
1200  *
1201  * Return:
1202  *	mp->dmai_winlst	 - points to a link list of px_dma_win_t objects.
1203  *		Each px_dma_win_t object on the link list contains
1204  *		infomation such as its window size (# of pages),
1205  *		starting offset (also see Restriction), an array of
1206  *		DMA cookies, and # of cookies in the array.
1207  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1208  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1209  *	mp->dmai_mapping - starting cookie address
1210  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1211  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1212  *
1213  * Restriction:
1214  *	Each px_dma_win_t object can theoratically start from any offset
1215  *	since the mmu is not involved. However, this implementation
1216  *	always make windows start from page aligned offset (except
1217  *	the 1st window, which follows the requested offset) due to the
1218  *	fact that we are handed a pfn list. This does require device's
1219  *	count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
1220  */
1221 int
1222 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1223 {
1224 	uint_t npages = mp->dmai_ndvmapages;
1225 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1226 	px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
1227 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
1228 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1229 	uint64_t count_max, bypass_addr = 0;
1230 	px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
1231 	ddi_dma_cookie_t *cookie0_p;
1232 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1233 	    mp->dmai_attr.dma_attr_flags);
1234 	dev_info_t *dip = px_p->px_dip;
1235 
1236 	ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
1237 	if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1238 		ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
1239 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1240 		px_pec_t *pec_p = px_p->px_pec_p;
1241 		px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
1242 		    pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
1243 		    pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
1244 
1245 		if (nocross && (nocross < UINT32_MAX))
1246 			return (DDI_DMA_NOMAPPING);
1247 		if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
1248 			return (DDI_DMA_NOMAPPING);
1249 		pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1250 		pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1251 		pfn_hi = MIN(pfn_hi, pfn_last);
1252 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1253 			return (DDI_DMA_NOMAPPING);
1254 
1255 		count_max = dev_attr_p->dma_attr_count_max;
1256 		count_max = MIN(count_max, nocross);
1257 		/*
1258 		 * the following count_max trim is not done because we are
1259 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1260 		 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1261 		 */
1262 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1263 		count_max = mp->dmai_attr.dma_attr_count_max;
1264 		pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1265 		pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1266 
1267 		if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
1268 		    attr, &bypass_addr) != DDI_SUCCESS) {
1269 			DBG(DBG_BYPASS, mp->dmai_rdip,
1270 			    "bypass cookie failure %lx\n", pfn);
1271 			return (DDI_DMA_NOMAPPING);
1272 		}
1273 		pfn = MMU_BTOP(bypass_addr);
1274 	}
1275 
1276 	/* pfn: absolute (bypass mode) or relative (p2p mode) */
1277 	for (prev_pfn = pfn, i = 1; i < npages;
1278 	    i++, prev_pfn = pfn, pfn_no++) {
1279 		pfn = PX_GET_MP_PFN1(mp, i);
1280 		if (bypass_addr) {
1281 			if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
1282 			    &bypass_addr) != DDI_SUCCESS) {
1283 				ret = DDI_DMA_NOMAPPING;
1284 				goto err;
1285 			}
1286 			pfn = MMU_BTOP(bypass_addr);
1287 		}
1288 		if ((pfn == prev_pfn + 1) &&
1289 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1290 			continue;
1291 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1292 			ret = DDI_DMA_NOMAPPING;
1293 			goto err;
1294 		}
1295 		cookie_no++;
1296 		pfn_no = 0;
1297 		if (cookie_no < sgllen)
1298 			continue;
1299 
1300 		DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1301 		    win_pfn0_index, i - 1, cookie_no);
1302 		if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
1303 		    win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
1304 			goto err;
1305 
1306 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1307 		win_no++;
1308 		win_pfn0_index = i;
1309 		cookie_no = 0;
1310 	}
1311 	if (pfn > pfn_hi) {
1312 		ret = DDI_DMA_NOMAPPING;
1313 		goto err;
1314 	}
1315 	cookie_no++;
1316 	DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1317 	    win_pfn0_index, i - 1, cookie_no);
1318 	if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
1319 	    i - 1, win_pp, count_max, bypass_addr))
1320 		goto err;
1321 	win_no++;
1322 	px_dma_adjust(dmareq, mp, mp->dmai_winlst);
1323 	mp->dmai_nwin = win_no;
1324 	mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
1325 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1326 	mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
1327 	cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
1328 	mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1329 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1330 
1331 	px_dma_freepfn(mp);
1332 	return (DDI_DMA_MAPPED);
1333 err:
1334 	px_dma_freewin(mp);
1335 	return (ret);
1336 }
1337 
1338 int
1339 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1340 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1341 	uint_t cache_flags)
1342 {
1343 	switch (cmd) {
1344 	case DDI_DMA_SYNC:
1345 		return (DDI_SUCCESS);
1346 
1347 	case DDI_DMA_HTOC: {
1348 		off_t off = *offp;
1349 		ddi_dma_cookie_t *loop_cp, *cp;
1350 		px_dma_win_t *win_p = mp->dmai_winlst;
1351 
1352 		if (off >= mp->dmai_object.dmao_size)
1353 			return (DDI_FAILURE);
1354 
1355 		/* locate window */
1356 		while (win_p->win_offset + win_p->win_size <= off)
1357 			win_p = win_p->win_next;
1358 
1359 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1360 		mp->dmai_offset = win_p->win_offset;
1361 		mp->dmai_size   = win_p->win_size;
1362 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1363 
1364 		/* adjust cookie addr/len if we are not on cookie boundary */
1365 		off -= win_p->win_offset;	   /* offset within window */
1366 		for (; off >= loop_cp->dmac_size; loop_cp++)
1367 			off -= loop_cp->dmac_size; /* offset within cookie */
1368 
1369 		mp->dmai_cookie = loop_cp + 1;
1370 		win_p->win_curseg = loop_cp - cp;
1371 		cp = (ddi_dma_cookie_t *)objp;
1372 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1373 		    loop_cp->dmac_size - off);
1374 
1375 		DBG(DBG_DMA_CTL, dip,
1376 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1377 		    cp->dmac_laddress, cp->dmac_size);
1378 		}
1379 		return (DDI_SUCCESS);
1380 
1381 	case DDI_DMA_REPWIN:
1382 		*offp = mp->dmai_offset;
1383 		*lenp = mp->dmai_size;
1384 		return (DDI_SUCCESS);
1385 
1386 	case DDI_DMA_MOVWIN: {
1387 		off_t off = *offp;
1388 		ddi_dma_cookie_t *cp;
1389 		px_dma_win_t *win_p = mp->dmai_winlst;
1390 
1391 		if (off >= mp->dmai_object.dmao_size)
1392 			return (DDI_FAILURE);
1393 
1394 		/* locate window */
1395 		while (win_p->win_offset + win_p->win_size <= off)
1396 			win_p = win_p->win_next;
1397 
1398 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1399 		mp->dmai_offset = win_p->win_offset;
1400 		mp->dmai_size   = win_p->win_size;
1401 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
1402 		mp->dmai_cookie = cp + 1;
1403 		win_p->win_curseg = 0;
1404 
1405 		*(ddi_dma_cookie_t *)objp = *cp;
1406 		*offp = win_p->win_offset;
1407 		*lenp = win_p->win_size;
1408 		DBG(DBG_DMA_CTL, dip,
1409 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1410 		    cp->dmac_laddress, cp->dmac_size);
1411 		}
1412 		return (DDI_SUCCESS);
1413 
1414 	case DDI_DMA_NEXTWIN: {
1415 		px_dma_win_t *win_p = *(px_dma_win_t **)offp;
1416 		px_dma_win_t **nw_pp = (px_dma_win_t **)objp;
1417 		ddi_dma_cookie_t *cp;
1418 		if (!win_p) {
1419 			*nw_pp = mp->dmai_winlst;
1420 			return (DDI_SUCCESS);
1421 		}
1422 
1423 		if (win_p->win_offset != mp->dmai_offset)
1424 			return (DDI_DMA_STALE);
1425 		if (!win_p->win_next)
1426 			return (DDI_DMA_DONE);
1427 		win_p = win_p->win_next;
1428 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1429 		mp->dmai_offset = win_p->win_offset;
1430 		mp->dmai_size   = win_p->win_size;
1431 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
1432 		mp->dmai_cookie = cp + 1;
1433 		win_p->win_curseg = 0;
1434 		*nw_pp = win_p;
1435 		}
1436 		return (DDI_SUCCESS);
1437 
1438 	case DDI_DMA_NEXTSEG: {
1439 		px_dma_win_t *w_p = *(px_dma_win_t **)offp;
1440 		if (w_p->win_offset != mp->dmai_offset)
1441 			return (DDI_DMA_STALE);
1442 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
1443 			return (DDI_DMA_DONE);
1444 		w_p->win_curseg++;
1445 		}
1446 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1447 		return (DDI_SUCCESS);
1448 
1449 	case DDI_DMA_SEGTOC: {
1450 		px_dma_win_t *win_p = mp->dmai_winlst;
1451 		off_t off = mp->dmai_offset;
1452 		ddi_dma_cookie_t *cp;
1453 		int i;
1454 
1455 		/* locate active window */
1456 		for (; win_p->win_offset != off; win_p = win_p->win_next)
1457 			;
1458 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1459 		for (i = 0; i < win_p->win_curseg; i++, cp++)
1460 			off += cp->dmac_size;
1461 		*offp = off;
1462 		*lenp = cp->dmac_size;
1463 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
1464 		}
1465 		return (DDI_SUCCESS);
1466 
1467 	case DDI_DMA_COFF: {
1468 		px_dma_win_t *win_p;
1469 		ddi_dma_cookie_t *cp;
1470 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1471 		size_t win_off;
1472 
1473 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1474 			int i;
1475 			win_off = 0;
1476 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1477 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1478 				size_t sz = cp->dmac_size;
1479 
1480 				addr = cp->dmac_laddress;
1481 				if ((addr <= key) && (addr + sz >= key))
1482 					goto found;
1483 				win_off += sz;
1484 			}
1485 		}
1486 		return (DDI_FAILURE);
1487 found:
1488 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1489 		return (DDI_SUCCESS);
1490 		}
1491 	default:
1492 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1493 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1494 		break;
1495 	}
1496 	return (DDI_FAILURE);
1497 }
1498 
1499 static void
1500 px_dvma_debug_init(px_mmu_t *mmu_p)
1501 {
1502 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1503 	ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
1504 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
1505 
1506 	mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
1507 	mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
1508 
1509 	mmu_p->dvma_active_list = NULL;
1510 	mmu_p->dvma_alloc_rec_index = 0;
1511 	mmu_p->dvma_free_rec_index = 0;
1512 	mmu_p->dvma_active_count = 0;
1513 }
1514 
1515 void
1516 px_dvma_debug_fini(px_mmu_t *mmu_p)
1517 {
1518 	struct px_dvma_rec *prev, *ptr;
1519 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1520 	uint64_t mask = ~(1ull << mmu_p->mmu_inst);
1521 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
1522 
1523 	if (mmu_p->dvma_alloc_rec) {
1524 		kmem_free(mmu_p->dvma_alloc_rec, sz);
1525 		mmu_p->dvma_alloc_rec = NULL;
1526 	}
1527 	if (mmu_p->dvma_free_rec) {
1528 		kmem_free(mmu_p->dvma_free_rec, sz);
1529 		mmu_p->dvma_free_rec = NULL;
1530 	}
1531 
1532 	prev = mmu_p->dvma_active_list;
1533 	if (!prev)
1534 		return;
1535 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1536 		kmem_free(prev, sizeof (struct px_dvma_rec));
1537 	kmem_free(prev, sizeof (struct px_dvma_rec));
1538 
1539 	mmu_p->dvma_active_list = NULL;
1540 	mmu_p->dvma_alloc_rec_index = 0;
1541 	mmu_p->dvma_free_rec_index = 0;
1542 	mmu_p->dvma_active_count = 0;
1543 
1544 	px_dvma_debug_off &= mask;
1545 	px_dvma_debug_on &= mask;
1546 }
1547 
1548 void
1549 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1550 	ddi_dma_impl_t *mp)
1551 {
1552 	struct px_dvma_rec *ptr;
1553 	mutex_enter(&mmu_p->dvma_debug_lock);
1554 
1555 	if (!mmu_p->dvma_alloc_rec)
1556 		px_dvma_debug_init(mmu_p);
1557 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1558 		px_dvma_debug_fini(mmu_p);
1559 		goto done;
1560 	}
1561 
1562 	ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
1563 	ptr->dvma_addr = address;
1564 	ptr->len = len;
1565 	ptr->mp = mp;
1566 	if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
1567 		mmu_p->dvma_alloc_rec_index = 0;
1568 
1569 	ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
1570 	ptr->dvma_addr = address;
1571 	ptr->len = len;
1572 	ptr->mp = mp;
1573 
1574 	ptr->next = mmu_p->dvma_active_list;
1575 	mmu_p->dvma_active_list = ptr;
1576 	mmu_p->dvma_active_count++;
1577 done:
1578 	mutex_exit(&mmu_p->dvma_debug_lock);
1579 }
1580 
1581 void
1582 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1583     ddi_dma_impl_t *mp)
1584 {
1585 	struct px_dvma_rec *ptr, *ptr_save;
1586 	mutex_enter(&mmu_p->dvma_debug_lock);
1587 
1588 	if (!mmu_p->dvma_alloc_rec)
1589 		px_dvma_debug_init(mmu_p);
1590 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1591 		px_dvma_debug_fini(mmu_p);
1592 		goto done;
1593 	}
1594 
1595 	ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
1596 	ptr->dvma_addr = address;
1597 	ptr->len = len;
1598 	ptr->mp = mp;
1599 	if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
1600 		mmu_p->dvma_free_rec_index = 0;
1601 
1602 	ptr_save = mmu_p->dvma_active_list;
1603 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1604 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1605 			break;
1606 		ptr_save = ptr;
1607 	}
1608 	if (!ptr) {
1609 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1610 		    (long)address, len);
1611 		goto done;
1612 	}
1613 	if (ptr == mmu_p->dvma_active_list)
1614 		mmu_p->dvma_active_list = ptr->next;
1615 	else
1616 		ptr_save->next = ptr->next;
1617 	kmem_free(ptr, sizeof (struct px_dvma_rec));
1618 	mmu_p->dvma_active_count--;
1619 done:
1620 	mutex_exit(&mmu_p->dvma_debug_lock);
1621 }
1622 
1623 #ifdef	DEBUG
1624 void
1625 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1626 {
1627 	DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1628 	    hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1629 	DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1630 	    hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1631 	    hp->dmai_nwin);
1632 	DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1633 	    hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1634 	DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1635 	    hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1636 	    hp->dmai_cookie);
1637 }
1638 #endif	/* DEBUG */
1639