xref: /titanic_51/usr/src/uts/sun4/io/px/px_dma.c (revision 0f509175c7fa701d6edf3f65789303587905b1bd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PCI Express nexus DVMA and DMA core routines:
28  *	dma_map/dma_bind_handle implementation
29  *	bypass and peer-to-peer support
30  *	fast track DVMA space allocation
31  *	runtime DVMA debug
32  */
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddi_impldefs.h>
39 #include "px_obj.h"
40 
41 /*LINTLIBRARY*/
42 
43 /*
44  * px_dma_allocmp - Allocate a pci dma implementation structure
45  *
46  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
47  * to hold unmodified device limits. The ddi_dma_attr inside the
48  * ddi_dma_impl structure is augumented with system limits to enhance
49  * DVMA performance at runtime. The unaugumented device limits saved
50  * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
51  * strictly for peer-to-peer transfers which do not obey system limits.
52  *
53  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
54  */
55 ddi_dma_impl_t *
56 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
57 	caddr_t arg)
58 {
59 	register ddi_dma_impl_t *mp;
60 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
61 
62 	/* Caution: we don't use zalloc to enhance performance! */
63 	if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
64 		DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
65 		if (waitfp != DDI_DMA_DONTWAIT) {
66 			DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
67 			ddi_set_callback(waitfp, arg, &px_kmem_clid);
68 		}
69 		return (mp);
70 	}
71 
72 	mp->dmai_rdip = rdip;
73 	mp->dmai_flags = 0;
74 	mp->dmai_pfnlst = NULL;
75 	mp->dmai_winlst = NULL;
76 
77 	/*
78 	 * kmem_alloc debug: the following fields are not zero-ed
79 	 * mp->dmai_mapping = 0;
80 	 * mp->dmai_size = 0;
81 	 * mp->dmai_offset = 0;
82 	 * mp->dmai_minxfer = 0;
83 	 * mp->dmai_burstsizes = 0;
84 	 * mp->dmai_ndvmapages = 0;
85 	 * mp->dmai_pool/roffset = 0;
86 	 * mp->dmai_rflags = 0;
87 	 * mp->dmai_inuse/flags
88 	 * mp->dmai_nwin = 0;
89 	 * mp->dmai_winsize = 0;
90 	 * mp->dmai_nexus_private/tte = 0;
91 	 * mp->dmai_iopte/pfnlst
92 	 * mp->dmai_sbi/pfn0 = 0;
93 	 * mp->dmai_minfo/winlst/fdvma
94 	 * mp->dmai_rdip
95 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
96 	 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
97 	 * mp->dmai_cookie = 0;
98 	 */
99 
100 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
101 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
102 	mp->dmai_fault = 0;
103 	mp->dmai_fault_check = NULL;
104 	mp->dmai_fault_notify = NULL;
105 
106 	mp->dmai_error.err_ena = 0;
107 	mp->dmai_error.err_status = DDI_FM_OK;
108 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
109 	mp->dmai_error.err_ontrap = NULL;
110 	mp->dmai_error.err_fep = NULL;
111 	mp->dmai_error.err_cf = NULL;
112 
113 	/*
114 	 * The bdf protection value is set to immediate child
115 	 * at first. It gets modified by switch/bridge drivers
116 	 * as the code traverses down the fabric topology.
117 	 *
118 	 * XXX No IOMMU protection for broken devices.
119 	 */
120 	ASSERT((intptr_t)ddi_get_parent_data(rdip) >> 1 == 0);
121 	mp->dmai_bdf = ((intptr_t)ddi_get_parent_data(rdip) == 1) ? 0 :
122 	    pcie_get_bdf_for_dma_xfer(dip, rdip);
123 
124 	return (mp);
125 }
126 
127 void
128 px_dma_freemp(ddi_dma_impl_t *mp)
129 {
130 	if (mp->dmai_ndvmapages > 1)
131 		px_dma_freepfn(mp);
132 	if (mp->dmai_winlst)
133 		px_dma_freewin(mp);
134 	kmem_free(mp, sizeof (px_dma_hdl_t));
135 }
136 
137 void
138 px_dma_freepfn(ddi_dma_impl_t *mp)
139 {
140 	void *addr = mp->dmai_pfnlst;
141 	if (addr) {
142 		size_t npages = mp->dmai_ndvmapages;
143 		if (npages > 1)
144 			kmem_free(addr, npages * sizeof (px_iopfn_t));
145 		mp->dmai_pfnlst = NULL;
146 	}
147 	mp->dmai_ndvmapages = 0;
148 }
149 
150 /*
151  * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
152  *			and convert dmareq->dmar_limits to mp->dmai_attr
153  *
154  * ddi_dma_impl_t member modified     input
155  * ------------------------------------------------------------------------
156  * mp->dmai_minxfer		    - dev
157  * mp->dmai_burstsizes		    - dev
158  * mp->dmai_flags		    - no limit? peer-to-peer only?
159  *
160  * ddi_dma_attr member modified       input
161  * ------------------------------------------------------------------------
162  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
163  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
164  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
165  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
166  * mp->dmai_attr.dma_attr_align     - 1         (no alignment restriction)
167  *
168  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
169  */
170 ddi_dma_impl_t *
171 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
172 	ddi_dma_req_t *dmareq)
173 {
174 	ddi_dma_impl_t *mp;
175 	ddi_dma_attr_t *attr_p;
176 	uint64_t syslo		= mmu_p->mmu_dvma_base;
177 	uint64_t syshi		= mmu_p->mmu_dvma_end;
178 	uint64_t fasthi		= mmu_p->mmu_dvma_fast_end;
179 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
180 	uint32_t count_max	= lim_p->dlim_cntr_max;
181 	uint64_t lo		= lim_p->dlim_addr_lo;
182 	uint64_t hi		= lim_p->dlim_addr_hi;
183 	if (hi <= lo) {
184 		DBG(DBG_DMA_MAP, dip, "Bad limits\n");
185 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
186 	}
187 	if (!count_max)
188 		count_max--;
189 
190 	if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
191 	    dmareq->dmar_arg)))
192 		return (NULL);
193 
194 	/* store original dev input at the 2nd ddi_dma_attr */
195 	attr_p = PX_DEV_ATTR(mp);
196 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
197 	SET_DMAALIGN(attr_p, 1);
198 
199 	lo = MAX(lo, syslo);
200 	hi = MIN(hi, syshi);
201 	if (hi <= lo)
202 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
203 	count_max = MIN(count_max, hi - lo);
204 
205 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
206 		mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
207 		    PX_DMAI_FLAGS_NOSYSLIMIT;
208 	else {
209 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
210 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
211 	}
212 	if (PX_DMA_NOCTX(rdip))
213 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
214 
215 	/* store augumented dev input to mp->dmai_attr */
216 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
217 	attr_p = &mp->dmai_attr;
218 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
219 	SET_DMAALIGN(attr_p, 1);
220 	return (mp);
221 }
222 
223 /*
224  * Called from px_attach to check for bypass dma support and set
225  * flags accordingly.
226  */
227 int
228 px_dma_attach(px_t *px_p)
229 {
230 	uint64_t baddr;
231 
232 	if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
233 	    PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
234 	    &baddr) != DDI_ENOTSUP)
235 		/* ignore all other errors */
236 		px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
237 
238 	px_p->px_dma_sync_opt = ddi_prop_get_int(DDI_DEV_T_ANY,
239 	    px_p->px_dip, DDI_PROP_DONTPASS, "dma-sync-options", 0);
240 
241 	if (px_p->px_dma_sync_opt != 0)
242 		px_p->px_dev_caps |= PX_DMA_SYNC_REQUIRED;
243 
244 	return (DDI_SUCCESS);
245 }
246 
247 /*
248  * px_dma_attr2hdl
249  *
250  * This routine is called from the alloc handle entry point to sanity check the
251  * dma attribute structure.
252  *
253  * use by: px_dma_allochdl()
254  *
255  * return value:
256  *
257  *	DDI_SUCCESS		- on success
258  *	DDI_DMA_BADATTR		- attribute has invalid version number
259  *				  or address limits exclude dvma space
260  */
261 int
262 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
263 {
264 	px_mmu_t *mmu_p = px_p->px_mmu_p;
265 	uint64_t syslo, syshi;
266 	int	ret;
267 	ddi_dma_attr_t *attrp		= PX_DEV_ATTR(mp);
268 	uint64_t hi			= attrp->dma_attr_addr_hi;
269 	uint64_t lo			= attrp->dma_attr_addr_lo;
270 	uint64_t align			= attrp->dma_attr_align;
271 	uint64_t nocross		= attrp->dma_attr_seg;
272 	uint64_t count_max		= attrp->dma_attr_count_max;
273 
274 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
275 	    attrp, HI32(count_max), LO32(count_max));
276 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
277 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
278 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
279 	    HI32(nocross), LO32(nocross), HI32(align), LO32(align));
280 
281 	if (!nocross)
282 		nocross--;
283 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
284 
285 		DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
286 		/*
287 		 * If Bypass DMA is not supported, return error so that
288 		 * target driver can fall back to dvma mode of operation
289 		 */
290 		if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
291 			return (DDI_DMA_BADATTR);
292 		mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
293 		if (nocross != UINT64_MAX)
294 			return (DDI_DMA_BADATTR);
295 		if (align && (align > MMU_PAGE_SIZE))
296 			return (DDI_DMA_BADATTR);
297 		align = 1; /* align on 1 page boundary */
298 
299 		/* do a range check and get the limits */
300 		ret = px_lib_dma_bypass_rngchk(px_p->px_dip, attrp,
301 		    &syslo, &syshi);
302 		if (ret != DDI_SUCCESS)
303 			return (ret);
304 	} else { /* MMU_XLATE or PEER_TO_PEER */
305 		align = MAX(align, MMU_PAGE_SIZE) - 1;
306 		if ((align & nocross) != align) {
307 			dev_info_t *rdip = mp->dmai_rdip;
308 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
309 			    NAMEINST(rdip));
310 			return (DDI_DMA_BADATTR);
311 		}
312 		align = MMU_BTOP(align + 1);
313 		syslo = mmu_p->mmu_dvma_base;
314 		syshi = mmu_p->mmu_dvma_end;
315 	}
316 	if (hi <= lo) {
317 		dev_info_t *rdip = mp->dmai_rdip;
318 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
319 		return (DDI_DMA_BADATTR);
320 	}
321 	lo = MAX(lo, syslo);
322 	hi = MIN(hi, syshi);
323 	if (!count_max)
324 		count_max--;
325 
326 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
327 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
328 	if (hi <= lo) {
329 		/*
330 		 * If this is an IOMMU bypass access, the caller can't use
331 		 * the required addresses, so fail it.  Otherwise, it's
332 		 * peer-to-peer; ensure that the caller has no alignment or
333 		 * segment size restrictions.
334 		 */
335 		if ((mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ||
336 		    (nocross < UINT32_MAX) || (align > 1))
337 			return (DDI_DMA_BADATTR);
338 
339 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
340 	} else /* set practical counter_max value */
341 		count_max = MIN(count_max, hi - lo);
342 
343 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
344 		mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
345 		    PX_DMAI_FLAGS_NOFASTLIMIT;
346 	else {
347 		syshi = mmu_p->mmu_dvma_fast_end;
348 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
349 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
350 	}
351 	if (PX_DMA_NOCTX(mp->dmai_rdip))
352 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
353 
354 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
355 	attrp = &mp->dmai_attr;
356 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
357 	return (DDI_SUCCESS);
358 }
359 
360 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
361 
362 /*
363  * px_dma_type - determine which of the three types DMA (peer-to-peer,
364  *		mmu bypass, or mmu translate) we are asked to do.
365  *		Also checks pfn0 and rejects any non-peer-to-peer
366  *		requests for peer-only devices.
367  *
368  *	return values:
369  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
370  *		DDI_SUCCESS
371  *
372  *	dma handle members affected (set on exit):
373  *	mp->dmai_object		- dmareq->dmar_object
374  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
375  *	mp->dmai_flags   	- DMA type
376  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
377  *	mp->dmai_roffset 	- initialized to starting MMU page offset
378  *	mp->dmai_ndvmapages	- # of total MMU pages of entire object
379  */
380 int
381 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
382 {
383 	dev_info_t *dip = px_p->px_dip;
384 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
385 	px_pec_t *pec_p = px_p->px_pec_p;
386 	uint32_t offset;
387 	pfn_t pfn0;
388 	uint_t redzone;
389 
390 	mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
391 
392 	if (!(px_p->px_dev_caps & PX_DMA_SYNC_REQUIRED))
393 		mp->dmai_rflags |= DMP_NOSYNC;
394 
395 	switch (dobj_p->dmao_type) {
396 	case DMA_OTYP_BUFVADDR:
397 	case DMA_OTYP_VADDR: {
398 		page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
399 		caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
400 
401 		DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
402 		offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
403 		if (pplist) {				/* shadow list */
404 			mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
405 			pfn0 = page_pptonum(*pplist);
406 		} else {
407 			struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
408 			struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
409 			pfn0 = hat_getpfnum(hat_p, vaddr);
410 		}
411 		}
412 		break;
413 
414 	case DMA_OTYP_PAGES:
415 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
416 		mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
417 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
418 		break;
419 
420 	case DMA_OTYP_PADDR:
421 	default:
422 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
423 		    NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
424 		return (DDI_DMA_NOMAPPING);
425 	}
426 	if (pfn0 == PFN_INVALID) {
427 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
428 		    NAMEINST(dip), dobj_p);
429 		return (DDI_DMA_NOMAPPING);
430 	}
431 	if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
432 	    pec_p->pec_last32_pfn)) {
433 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
434 		goto done;	/* leave bypass and dvma flag as 0 */
435 	} else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
436 	    pec_p->pec_last64_pfn)) {
437 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
438 		goto done;	/* leave bypass and dvma flag as 0 */
439 	}
440 	if (PX_DMA_ISPEERONLY(mp)) {
441 		dev_info_t *rdip = mp->dmai_rdip;
442 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
443 		return (DDI_DMA_NOMAPPING);
444 	}
445 
446 	redzone = (mp->dmai_rflags & DDI_DMA_REDZONE) ||
447 	    (mp->dmai_flags & PX_DMAI_FLAGS_MAP_BUFZONE) ?
448 	    PX_DMAI_FLAGS_REDZONE : 0;
449 
450 	mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
451 	    PX_DMAI_FLAGS_BYPASS : (PX_DMAI_FLAGS_DVMA | redzone);
452 done:
453 	mp->dmai_object	 = *dobj_p;			/* whole object    */
454 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
455 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
456 	mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
457 	return (DDI_SUCCESS);
458 }
459 
460 /*
461  * px_dma_pgpfn - set up pfnlst array according to pages
462  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
463  */
464 /*ARGSUSED*/
465 static int
466 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
467 {
468 	int i;
469 	dev_info_t *dip = px_p->px_dip;
470 
471 	switch (mp->dmai_object.dmao_type) {
472 	case DMA_OTYP_BUFVADDR:
473 	case DMA_OTYP_VADDR: {
474 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
475 		DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
476 		    pplist, npages);
477 		for (i = 1; i < npages; i++) {
478 			px_iopfn_t pfn = page_pptonum(pplist[i]);
479 			PX_SET_MP_PFN1(mp, i, pfn);
480 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
481 		}
482 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
483 		}
484 		break;
485 
486 	case DMA_OTYP_PAGES: {
487 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
488 		DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
489 		for (i = 1; i < npages; i++, pp = pp->p_next) {
490 			px_iopfn_t pfn = page_pptonum(pp);
491 			PX_SET_MP_PFN1(mp, i, pfn);
492 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
493 		}
494 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
495 		}
496 		break;
497 
498 	default:	/* check is already done by px_dma_type */
499 		ASSERT(0);
500 		break;
501 	}
502 	return (DDI_SUCCESS);
503 }
504 
505 /*
506  * px_dma_vapfn - set up pfnlst array according to VA
507  *	VA/size pair: <normal, bypass, peer-to-peer>
508  *	pfn0 is skipped as it is already done.
509  *	In this case, the cached pfn0 is used to fill pfnlst[0]
510  */
511 static int
512 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
513 {
514 	dev_info_t *dip = px_p->px_dip;
515 	int i;
516 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
517 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
518 
519 	vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
520 	for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
521 		px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
522 		if (pfn == PFN_INVALID)
523 			goto err_badpfn;
524 		PX_SET_MP_PFN1(mp, i, pfn);
525 		DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
526 		    mp, i, pfn);
527 	}
528 	return (DDI_SUCCESS);
529 err_badpfn:
530 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
531 	return (DDI_DMA_NOMAPPING);
532 }
533 
534 /*
535  * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
536  *
537  * dependencies:
538  *	mp->dmai_ndvmapages	- set to total # of dma pages
539  *
540  * return value:
541  *	DDI_SUCCESS
542  *	DDI_DMA_NOMAPPING
543  */
544 int
545 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
546 {
547 	uint32_t npages = mp->dmai_ndvmapages;
548 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
549 	int i, ret, peer = PX_DMA_ISPTP(mp);
550 	int peer32 = PX_DMA_ISPTP32(mp);
551 	dev_info_t *dip = px_p->px_dip;
552 
553 	px_pec_t *pec_p = px_p->px_pec_p;
554 	px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
555 	    pec_p->pec_base64_pfn;
556 	px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
557 	    pec_p->pec_last64_pfn;
558 	px_iopfn_t pfn_adj = peer ? pfn_base : 0;
559 
560 	DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
561 	    mp, PX_MP_PFN0(mp) - pfn_adj);
562 	/* 1 page: no array alloc/fill, no mixed mode check */
563 	if (npages == 1) {
564 		PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
565 		return (DDI_SUCCESS);
566 	}
567 	/* allocate pfn array */
568 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
569 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
570 		if (waitfp != DDI_DMA_DONTWAIT)
571 			ddi_set_callback(waitfp, dmareq->dmar_arg,
572 			    &px_kmem_clid);
573 		return (DDI_DMA_NORESOURCES);
574 	}
575 	/* fill pfn array */
576 	PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
577 	if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
578 	    px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
579 		goto err;
580 
581 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
582 	for (i = 1; i < npages; i++) {
583 		px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
584 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
585 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
586 			    NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
587 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
588 			goto err;
589 		}
590 		DBG(DBG_DMA_MAP, dip,
591 		    "px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
592 		if (pfn_adj)
593 			PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
594 	}
595 	return (DDI_SUCCESS);
596 err:
597 	px_dma_freepfn(mp);
598 	return (ret);
599 }
600 
601 /*
602  * px_dvma_win() - trim requested DVMA size down to window size
603  *	The 1st window starts from offset and ends at page-aligned boundary.
604  *	From the 2nd window on, each window starts and ends at page-aligned
605  *	boundary except the last window ends at wherever requested.
606  *
607  *	accesses the following mp-> members:
608  *	mp->dmai_attr.dma_attr_count_max
609  *	mp->dmai_attr.dma_attr_seg
610  *	mp->dmai_roffset   - start offset of 1st window
611  *	mp->dmai_rflags (redzone)
612  *	mp->dmai_ndvmapages (for 1 page fast path)
613  *
614  *	sets the following mp-> members:
615  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
616  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
617  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
618  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
619  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
620  *
621  *	fixed - not changed across different DMA windows
622  */
623 /*ARGSUSED*/
624 int
625 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
626 {
627 	uint32_t redzone_sz	= PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
628 	size_t obj_sz		= mp->dmai_object.dmao_size;
629 	size_t xfer_sz;
630 	ulong_t pg_off;
631 
632 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
633 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
634 		mp->dmai_size = obj_sz;
635 		mp->dmai_winsize = MMU_PAGE_SIZE;
636 		mp->dmai_nwin = 1;
637 		goto done;
638 	}
639 
640 	pg_off	= mp->dmai_roffset;
641 	xfer_sz	= obj_sz + redzone_sz;
642 
643 	/* include redzone in nocross check */	{
644 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
645 		if (xfer_sz + pg_off - 1 > nocross)
646 			xfer_sz = nocross - pg_off + 1;
647 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
648 			DBG(DBG_DMA_MAP, px_p->px_dip,
649 			    "nocross too small: "
650 			    "%lx(%lx)+%lx+%lx < %llx\n",
651 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
652 			return (DDI_DMA_TOOBIG);
653 		}
654 	}
655 	xfer_sz -= redzone_sz;		/* restore transfer size  */
656 	/* check counter max */	{
657 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
658 		if (xfer_sz - 1 > count_max)
659 			xfer_sz = count_max + 1;
660 	}
661 	if (xfer_sz >= obj_sz) {
662 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
663 		mp->dmai_size = xfer_sz;
664 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
665 		mp->dmai_nwin = 1;
666 		goto done;
667 	}
668 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
669 		DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
670 		    obj_sz, pg_off, redzone_sz, xfer_sz);
671 		return (DDI_DMA_TOOBIG);
672 	}
673 
674 	xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
675 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
676 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
677 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
678 done:
679 	mp->dmai_winlst = NULL;
680 	px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
681 	return (DDI_SUCCESS);
682 }
683 
684 /*
685  * fast track cache entry to mmu context, inserts 3 0 bits between
686  * upper 6-bits and lower 3-bits of the 9-bit cache entry
687  */
688 #define	MMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
689 
690 /*
691  * px_dvma_map_fast - attempts to map fast trackable DVMA
692  */
693 /*ARGSUSED*/
694 int
695 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
696 {
697 	uint_t clustsz = px_dvma_page_cache_clustsz;
698 	uint_t entries = px_dvma_page_cache_entries;
699 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
700 	    mp->dmai_attr.dma_attr_flags);
701 	int i = mmu_p->mmu_dvma_addr_scan_start;
702 	uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
703 	px_dvma_addr_t dvma_pg;
704 	size_t npages = MMU_BTOP(mp->dmai_winsize);
705 	dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
706 
707 	extern uint8_t ldstub(uint8_t *);
708 	ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
709 	ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
710 
711 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
712 		;
713 	if (i >= entries) {
714 		lock_addr = mmu_p->mmu_dvma_cache_locks;
715 		i = 0;
716 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
717 			;
718 		if (i >= entries) {
719 #ifdef	PX_DMA_PROF
720 			px_dvmaft_exhaust++;
721 #endif	/* PX_DMA_PROF */
722 			return (DDI_DMA_NORESOURCES);
723 		}
724 	}
725 	mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
726 
727 	i *= clustsz;
728 	dvma_pg = mmu_p->dvma_base_pg + i;
729 
730 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages,
731 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, 0,
732 	    MMU_MAP_PFN) != DDI_SUCCESS) {
733 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
734 		    "px_lib_iommu_map failed\n");
735 		return (DDI_FAILURE);
736 	}
737 
738 	if (!PX_MAP_BUFZONE(mp))
739 		goto done;
740 
741 	DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
742 
743 	ASSERT(PX_HAS_REDZONE(mp));
744 
745 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1,
746 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, npages - 1,
747 	    MMU_MAP_PFN) != DDI_SUCCESS) {
748 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
749 		    "mapping REDZONE page failed\n");
750 
751 		(void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
752 		return (DDI_FAILURE);
753 	}
754 
755 done:
756 #ifdef PX_DMA_PROF
757 	px_dvmaft_success++;
758 #endif
759 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
760 	mp->dmai_offset = 0;
761 	mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
762 	PX_SAVE_MP_TTE(mp, attr);	/* save TTE template for unmapping */
763 	if (PX_DVMA_DBG_ON(mmu_p))
764 		px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
765 		    mp->dmai_size, mp);
766 	return (DDI_SUCCESS);
767 }
768 
769 /*
770  * px_dvma_map: map non-fasttrack DMA
771  *		Use quantum cache if single page DMA.
772  */
773 int
774 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
775 {
776 	uint_t npages = PX_DMA_WINNPGS(mp);
777 	px_dvma_addr_t dvma_pg, dvma_pg_index;
778 	void *dvma_addr;
779 	uint64_t tte = PX_GET_TTE_ATTR(mp->dmai_rflags,
780 	    mp->dmai_attr.dma_attr_flags);
781 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
782 	dev_info_t *dip = mp->dmai_rdip;
783 	int	ret = DDI_SUCCESS;
784 
785 	/*
786 	 * allocate dvma space resource and map in the first window.
787 	 * (vmem_t *vmp, size_t size,
788 	 *	size_t align, size_t phase, size_t nocross,
789 	 *	void *minaddr, void *maxaddr, int vmflag)
790 	 */
791 	if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
792 		dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
793 		    MMU_PAGE_SIZE, sleep);
794 		mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
795 #ifdef	PX_DMA_PROF
796 		px_dvma_vmem_alloc++;
797 #endif	/* PX_DMA_PROF */
798 	} else {
799 		dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
800 		    MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
801 		    MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
802 		    0,
803 		    mp->dmai_attr.dma_attr_seg + 1,
804 		    (void *)mp->dmai_attr.dma_attr_addr_lo,
805 		    (void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
806 		    sleep);
807 #ifdef	PX_DMA_PROF
808 		px_dvma_vmem_xalloc++;
809 #endif	/* PX_DMA_PROF */
810 	}
811 	dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
812 	dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
813 	DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
814 	    dvma_pg, dvma_pg_index);
815 	if (dvma_pg == 0)
816 		goto noresource;
817 
818 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
819 	mp->dmai_offset = 0;
820 	PX_SAVE_MP_TTE(mp, tte);	/* mp->dmai_tte = tte */
821 
822 	if ((ret = px_mmu_map_pages(mmu_p,
823 	    mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
824 		if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
825 			vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
826 			    MMU_PAGE_SIZE);
827 #ifdef PX_DMA_PROF
828 			px_dvma_vmem_free++;
829 #endif /* PX_DMA_PROF */
830 		} else {
831 			vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
832 			    MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
833 #ifdef PX_DMA_PROF
834 			px_dvma_vmem_xfree++;
835 #endif /* PX_DMA_PROF */
836 		}
837 	}
838 
839 	return (ret);
840 noresource:
841 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
842 		DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
843 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
844 		    &mmu_p->mmu_dvma_clid);
845 	}
846 	DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
847 	return (DDI_DMA_NORESOURCES);
848 }
849 
850 void
851 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
852 {
853 	px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
854 	px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
855 	dvma_addr = MMU_PTOB(dvma_pg);
856 
857 	if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
858 		px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
859 		ASSERT(index % px_dvma_page_cache_clustsz == 0);
860 		index /= px_dvma_page_cache_clustsz;
861 		ASSERT(index < px_dvma_page_cache_entries);
862 		mmu_p->mmu_dvma_cache_locks[index] = 0;
863 #ifdef	PX_DMA_PROF
864 		px_dvmaft_free++;
865 #endif	/* PX_DMA_PROF */
866 		return;
867 	}
868 
869 	if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
870 		vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
871 		    MMU_PAGE_SIZE);
872 #ifdef PX_DMA_PROF
873 		px_dvma_vmem_free++;
874 #endif /* PX_DMA_PROF */
875 	} else {
876 		size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
877 		vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
878 		    MMU_PTOB(npages));
879 #ifdef PX_DMA_PROF
880 		px_dvma_vmem_xfree++;
881 #endif /* PX_DMA_PROF */
882 	}
883 }
884 
885 /*
886  * DVMA mappings may have multiple windows, but each window always have
887  * one segment.
888  */
889 int
890 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
891 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
892 	uint_t cache_flags)
893 {
894 	switch (cmd) {
895 	case DDI_DMA_SYNC:
896 		return (px_lib_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
897 		    *offp, *lenp, cache_flags));
898 
899 	case DDI_DMA_HTOC: {
900 		int ret;
901 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
902 		uint_t win_size = mp->dmai_winsize;
903 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
904 
905 		if (off >= mp->dmai_object.dmao_size) {
906 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
907 			    NAMEINST(mp->dmai_rdip), off);
908 			return (DDI_FAILURE);
909 		}
910 		off += mp->dmai_roffset;
911 		ret = px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
912 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
913 		if (ret)
914 			return (ret);
915 		DBG(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
916 		    cp->dmac_address, cp->dmac_size, off, *offp);
917 
918 		/* adjust cookie addr/len if we are not on window boundary */
919 		ASSERT((off % win_size) == (off -
920 		    (PX_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
921 		off = PX_DMA_CURWIN(mp) ? off % win_size : *offp;
922 		ASSERT(cp->dmac_size > off);
923 		cp->dmac_laddress += off;
924 		cp->dmac_size -= off;
925 		DBG(DBG_DMA_CTL, dip, "HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
926 		    mp, cp->dmac_address, cp->dmac_size, off, wo_off);
927 		}
928 		return (DDI_SUCCESS);
929 
930 	case DDI_DMA_REPWIN:
931 		*offp = mp->dmai_offset;
932 		*lenp = mp->dmai_size;
933 		return (DDI_SUCCESS);
934 
935 	case DDI_DMA_MOVWIN: {
936 		off_t off = *offp;
937 		if (off >= mp->dmai_object.dmao_size)
938 			return (DDI_FAILURE);
939 		off += mp->dmai_roffset;
940 		return (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
941 		    off / mp->dmai_winsize, offp, lenp,
942 		    (ddi_dma_cookie_t *)objp, NULL));
943 		}
944 
945 	case DDI_DMA_NEXTWIN: {
946 		px_window_t win = PX_DMA_CURWIN(mp);
947 		if (offp) {
948 			if (*(px_window_t *)offp != win) {
949 				/* window not active */
950 				*(px_window_t *)objp = win; /* return cur win */
951 				return (DDI_DMA_STALE);
952 			}
953 			win++;
954 		} else	/* map win 0 */
955 			win = 0;
956 		if (win >= mp->dmai_nwin) {
957 			*(px_window_t *)objp = win - 1;
958 			return (DDI_DMA_DONE);
959 		}
960 		if (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
961 		    win, 0, 0, 0, 0)) {
962 			*(px_window_t *)objp = win - 1;
963 			return (DDI_FAILURE);
964 		}
965 		*(px_window_t *)objp = win;
966 		}
967 		return (DDI_SUCCESS);
968 
969 	case DDI_DMA_NEXTSEG:
970 		if (*(px_window_t *)offp != PX_DMA_CURWIN(mp))
971 			return (DDI_DMA_STALE);
972 		if (lenp)				/* only 1 seg allowed */
973 			return (DDI_DMA_DONE);
974 
975 		/* return mp as seg 0 */
976 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
977 		return (DDI_SUCCESS);
978 
979 	case DDI_DMA_SEGTOC:
980 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
981 		    mp->dmai_size);
982 		*offp = mp->dmai_offset;
983 		*lenp = mp->dmai_size;
984 		return (DDI_SUCCESS);
985 
986 	case DDI_DMA_COFF: {
987 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
988 		if (cp->dmac_address < mp->dmai_mapping ||
989 		    (cp->dmac_address + cp->dmac_size) >
990 		    (mp->dmai_mapping + mp->dmai_size))
991 			return (DDI_FAILURE);
992 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
993 		    mp->dmai_offset);
994 		}
995 		return (DDI_SUCCESS);
996 	default:
997 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
998 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
999 		break;
1000 	}
1001 	return (DDI_FAILURE);
1002 }
1003 
1004 void
1005 px_dma_freewin(ddi_dma_impl_t *mp)
1006 {
1007 	px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
1008 	for (win2_p = win_p; win_p; win2_p = win_p) {
1009 		win_p = win2_p->win_next;
1010 		kmem_free(win2_p, sizeof (px_dma_win_t) +
1011 		    sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
1012 	}
1013 	mp->dmai_nwin = 0;
1014 	mp->dmai_winlst = NULL;
1015 }
1016 
1017 /*
1018  * px_dma_newwin - create a dma window object and cookies
1019  *
1020  *	After the initial scan in px_dma_physwin(), which identifies
1021  *	a portion of the pfn array that belongs to a dma window,
1022  *	we are called to allocate and initialize representing memory
1023  *	resources. We know from the 1st scan the number of cookies
1024  *	or dma segment in this window so we can allocate a contiguous
1025  *	memory array for the dma cookies (The implementation of
1026  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1027  *
1028  *	A second round scan is done on the pfn array to identify
1029  *	each dma segment and initialize its corresponding dma cookie.
1030  *	We don't need to do all the safety checking and we know they
1031  *	all belong to the same dma window.
1032  *
1033  *	Input:	cookie_no - # of cookies identified by the 1st scan
1034  *		start_idx - subscript of the pfn array for the starting pfn
1035  *		end_idx   - subscript of the last pfn in dma window
1036  *		win_pp    - pointer to win_next member of previous window
1037  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1038  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1039  *	Note:	Each cookie and window size are all initialized on page
1040  *		boundary. This is not true for the 1st cookie of the 1st
1041  *		window and the last cookie of the last window.
1042  *		We fix that later in upper layer which has access to size
1043  *		and offset info.
1044  *
1045  */
1046 /*ARGSUSED*/
1047 static int
1048 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
1049 	uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
1050 	px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
1051 {
1052 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1053 	ddi_dma_cookie_t *cookie_p;
1054 	uint32_t pfn_no = 1;
1055 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
1056 	px_iopfn_t prev_pfn = pfn;
1057 	uint64_t baddr, seg_pfn0 = pfn;
1058 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1059 	px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
1060 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1061 	io_attributes_t	attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1062 	    mp->dmai_attr.dma_attr_flags);
1063 
1064 	if (!win_p)
1065 		goto noresource;
1066 
1067 	win_p->win_next = NULL;
1068 	win_p->win_ncookies = cookie_no;
1069 	win_p->win_curseg = 0;	/* start from segment 0 */
1070 	win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
1071 	/* win_p->win_offset is left uninitialized */
1072 
1073 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1074 	start_idx++;
1075 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1076 		pfn = PX_GET_MP_PFN1(mp, start_idx);
1077 		if ((pfn == prev_pfn + 1) &&
1078 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1079 			continue;
1080 
1081 		/* close up the cookie up to (including) prev_pfn */
1082 		baddr = MMU_PTOB(seg_pfn0);
1083 		if (bypass && (px_lib_iommu_getbypass(dip,
1084 		    baddr, attr, &baddr) != DDI_SUCCESS))
1085 			return (DDI_FAILURE);
1086 
1087 		MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1088 		DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1089 		    MMU_PTOB(seg_pfn0), pfn_no);
1090 
1091 		cookie_p++;	/* advance to next available cookie cell */
1092 		pfn_no = 0;
1093 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1094 	}
1095 
1096 	baddr = MMU_PTOB(seg_pfn0);
1097 	if (bypass && (px_lib_iommu_getbypass(dip,
1098 	    baddr, attr, &baddr) != DDI_SUCCESS))
1099 		return (DDI_FAILURE);
1100 
1101 	MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1102 	DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1103 	    MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
1104 #ifdef	DEBUG
1105 	cookie_p++;
1106 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1107 #endif	/* DEBUG */
1108 	*win_pp = win_p;
1109 	return (DDI_SUCCESS);
1110 noresource:
1111 	if (waitfp != DDI_DMA_DONTWAIT)
1112 		ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
1113 	return (DDI_DMA_NORESOURCES);
1114 }
1115 
1116 /*
1117  * px_dma_adjust - adjust 1st and last cookie and window sizes
1118  *	remove initial dma page offset from 1st cookie and window size
1119  *	remove last dma page remainder from last cookie and window size
1120  *	fill win_offset of each dma window according to just fixed up
1121  *		each window sizes
1122  *	px_dma_win_t members modified:
1123  *	win_p->win_offset - this window's offset within entire DMA object
1124  *	win_p->win_size	  - xferrable size (in bytes) for this window
1125  *
1126  *	ddi_dma_impl_t members modified:
1127  *	mp->dmai_size	  - 1st window xferrable size
1128  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1129  *
1130  *	ddi_dma_cookie_t members modified:
1131  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1132  *	cookie_p->dmac_laddress - 1st cookie add page offset
1133  */
1134 static void
1135 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
1136 {
1137 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1138 	size_t pg_offset = mp->dmai_roffset;
1139 	size_t win_offset = 0;
1140 
1141 	cookie_p->dmac_size -= pg_offset;
1142 	cookie_p->dmac_laddress |= pg_offset;
1143 	win_p->win_size -= pg_offset;
1144 	DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1145 
1146 	mp->dmai_size = win_p->win_size;
1147 	mp->dmai_offset = 0;
1148 
1149 	pg_offset += mp->dmai_object.dmao_size;
1150 	pg_offset &= MMU_PAGE_OFFSET;
1151 	if (pg_offset)
1152 		pg_offset = MMU_PAGE_SIZE - pg_offset;
1153 	DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1154 
1155 	for (; win_p->win_next; win_p = win_p->win_next) {
1156 		DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1157 		win_p->win_offset = win_offset;
1158 		win_offset += win_p->win_size;
1159 	}
1160 	/* last window */
1161 	win_p->win_offset = win_offset;
1162 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1163 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1164 	win_p->win_size -= pg_offset;
1165 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1166 }
1167 
1168 /*
1169  * px_dma_physwin() - carve up dma windows using physical addresses.
1170  *	Called to handle mmu bypass and pci peer-to-peer transfers.
1171  *	Calls px_dma_newwin() to allocate window objects.
1172  *
1173  * Dependency: mp->dmai_pfnlst points to an array of pfns
1174  *
1175  * 1. Each dma window is represented by a px_dma_win_t object.
1176  *	The object will be casted to ddi_dma_win_t and returned
1177  *	to leaf driver through the DDI interface.
1178  * 2. Each dma window can have several dma segments with each
1179  *	segment representing a physically contiguous either memory
1180  *	space (if we are doing an mmu bypass transfer) or pci address
1181  *	space (if we are doing a peer-to-peer transfer).
1182  * 3. Each segment has a DMA cookie to program the DMA engine.
1183  *	The cookies within each DMA window must be located in a
1184  *	contiguous array per ddi_dma_nextcookie(9f).
1185  * 4. The number of DMA segments within each DMA window cannot exceed
1186  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1187  *	too large to fit in the sgllen, the rest needs to be
1188  *	relocated to the next dma window.
1189  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1190  *	and nocross restrictions while bypass DMA follows the set of
1191  *	restrictions with system limits factored in.
1192  *
1193  * Return:
1194  *	mp->dmai_winlst	 - points to a link list of px_dma_win_t objects.
1195  *		Each px_dma_win_t object on the link list contains
1196  *		infomation such as its window size (# of pages),
1197  *		starting offset (also see Restriction), an array of
1198  *		DMA cookies, and # of cookies in the array.
1199  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1200  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1201  *	mp->dmai_mapping - starting cookie address
1202  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1203  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1204  *
1205  * Restriction:
1206  *	Each px_dma_win_t object can theoratically start from any offset
1207  *	since the mmu is not involved. However, this implementation
1208  *	always make windows start from page aligned offset (except
1209  *	the 1st window, which follows the requested offset) due to the
1210  *	fact that we are handed a pfn list. This does require device's
1211  *	count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
1212  */
1213 int
1214 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1215 {
1216 	uint_t npages = mp->dmai_ndvmapages;
1217 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1218 	px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
1219 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
1220 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1221 	uint64_t count_max, bypass_addr = 0;
1222 	px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
1223 	ddi_dma_cookie_t *cookie0_p;
1224 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1225 	    mp->dmai_attr.dma_attr_flags);
1226 	dev_info_t *dip = px_p->px_dip;
1227 
1228 	ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
1229 	if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1230 		ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
1231 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1232 		px_pec_t *pec_p = px_p->px_pec_p;
1233 		px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
1234 		    pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
1235 		    pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
1236 
1237 		if (nocross && (nocross < UINT32_MAX))
1238 			return (DDI_DMA_NOMAPPING);
1239 		if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
1240 			return (DDI_DMA_NOMAPPING);
1241 		pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1242 		pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1243 		pfn_hi = MIN(pfn_hi, pfn_last);
1244 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1245 			return (DDI_DMA_NOMAPPING);
1246 
1247 		count_max = dev_attr_p->dma_attr_count_max;
1248 		count_max = MIN(count_max, nocross);
1249 		/*
1250 		 * the following count_max trim is not done because we are
1251 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1252 		 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1253 		 */
1254 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1255 		count_max = mp->dmai_attr.dma_attr_count_max;
1256 		pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1257 		pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1258 
1259 		if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
1260 		    attr, &bypass_addr) != DDI_SUCCESS) {
1261 			cmn_err(CE_WARN, "bypass cookie failure %lx\n", pfn);
1262 			return (DDI_DMA_NOMAPPING);
1263 		}
1264 		pfn = MMU_BTOP(bypass_addr);
1265 	}
1266 
1267 	/* pfn: absolute (bypass mode) or relative (p2p mode) */
1268 	for (prev_pfn = pfn, i = 1; i < npages;
1269 	    i++, prev_pfn = pfn, pfn_no++) {
1270 		pfn = PX_GET_MP_PFN1(mp, i);
1271 		if (bypass_addr) {
1272 			if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
1273 			    &bypass_addr) != DDI_SUCCESS) {
1274 				ret = DDI_DMA_NOMAPPING;
1275 				goto err;
1276 			}
1277 			pfn = MMU_BTOP(bypass_addr);
1278 		}
1279 		if ((pfn == prev_pfn + 1) &&
1280 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1281 			continue;
1282 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1283 			ret = DDI_DMA_NOMAPPING;
1284 			goto err;
1285 		}
1286 		cookie_no++;
1287 		pfn_no = 0;
1288 		if (cookie_no < sgllen)
1289 			continue;
1290 
1291 		DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1292 		    win_pfn0_index, i - 1, cookie_no);
1293 		if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
1294 		    win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
1295 			goto err;
1296 
1297 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1298 		win_no++;
1299 		win_pfn0_index = i;
1300 		cookie_no = 0;
1301 	}
1302 	if (pfn > pfn_hi) {
1303 		ret = DDI_DMA_NOMAPPING;
1304 		goto err;
1305 	}
1306 	cookie_no++;
1307 	DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1308 	    win_pfn0_index, i - 1, cookie_no);
1309 	if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
1310 	    i - 1, win_pp, count_max, bypass_addr))
1311 		goto err;
1312 	win_no++;
1313 	px_dma_adjust(dmareq, mp, mp->dmai_winlst);
1314 	mp->dmai_nwin = win_no;
1315 	mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
1316 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1317 	mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
1318 	cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
1319 	mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1320 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1321 
1322 	px_dma_freepfn(mp);
1323 	return (DDI_DMA_MAPPED);
1324 err:
1325 	px_dma_freewin(mp);
1326 	return (ret);
1327 }
1328 
1329 int
1330 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1331 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1332 	uint_t cache_flags)
1333 {
1334 	switch (cmd) {
1335 	case DDI_DMA_SYNC:
1336 		return (DDI_SUCCESS);
1337 
1338 	case DDI_DMA_HTOC: {
1339 		off_t off = *offp;
1340 		ddi_dma_cookie_t *loop_cp, *cp;
1341 		px_dma_win_t *win_p = mp->dmai_winlst;
1342 
1343 		if (off >= mp->dmai_object.dmao_size)
1344 			return (DDI_FAILURE);
1345 
1346 		/* locate window */
1347 		while (win_p->win_offset + win_p->win_size <= off)
1348 			win_p = win_p->win_next;
1349 
1350 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1351 		mp->dmai_offset = win_p->win_offset;
1352 		mp->dmai_size   = win_p->win_size;
1353 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1354 
1355 		/* adjust cookie addr/len if we are not on cookie boundary */
1356 		off -= win_p->win_offset;	   /* offset within window */
1357 		for (; off >= loop_cp->dmac_size; loop_cp++)
1358 			off -= loop_cp->dmac_size; /* offset within cookie */
1359 
1360 		mp->dmai_cookie = loop_cp + 1;
1361 		win_p->win_curseg = loop_cp - cp;
1362 		cp = (ddi_dma_cookie_t *)objp;
1363 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1364 		    loop_cp->dmac_size - off);
1365 
1366 		DBG(DBG_DMA_CTL, dip,
1367 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1368 		    cp->dmac_laddress, cp->dmac_size);
1369 		}
1370 		return (DDI_SUCCESS);
1371 
1372 	case DDI_DMA_REPWIN:
1373 		*offp = mp->dmai_offset;
1374 		*lenp = mp->dmai_size;
1375 		return (DDI_SUCCESS);
1376 
1377 	case DDI_DMA_MOVWIN: {
1378 		off_t off = *offp;
1379 		ddi_dma_cookie_t *cp;
1380 		px_dma_win_t *win_p = mp->dmai_winlst;
1381 
1382 		if (off >= mp->dmai_object.dmao_size)
1383 			return (DDI_FAILURE);
1384 
1385 		/* locate window */
1386 		while (win_p->win_offset + win_p->win_size <= off)
1387 			win_p = win_p->win_next;
1388 
1389 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1390 		mp->dmai_offset = win_p->win_offset;
1391 		mp->dmai_size   = win_p->win_size;
1392 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
1393 		mp->dmai_cookie = cp + 1;
1394 		win_p->win_curseg = 0;
1395 
1396 		*(ddi_dma_cookie_t *)objp = *cp;
1397 		*offp = win_p->win_offset;
1398 		*lenp = win_p->win_size;
1399 		DBG(DBG_DMA_CTL, dip,
1400 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1401 		    cp->dmac_laddress, cp->dmac_size);
1402 		}
1403 		return (DDI_SUCCESS);
1404 
1405 	case DDI_DMA_NEXTWIN: {
1406 		px_dma_win_t *win_p = *(px_dma_win_t **)offp;
1407 		px_dma_win_t **nw_pp = (px_dma_win_t **)objp;
1408 		ddi_dma_cookie_t *cp;
1409 		if (!win_p) {
1410 			*nw_pp = mp->dmai_winlst;
1411 			return (DDI_SUCCESS);
1412 		}
1413 
1414 		if (win_p->win_offset != mp->dmai_offset)
1415 			return (DDI_DMA_STALE);
1416 		if (!win_p->win_next)
1417 			return (DDI_DMA_DONE);
1418 		win_p = win_p->win_next;
1419 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1420 		mp->dmai_offset = win_p->win_offset;
1421 		mp->dmai_size   = win_p->win_size;
1422 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
1423 		mp->dmai_cookie = cp + 1;
1424 		win_p->win_curseg = 0;
1425 		*nw_pp = win_p;
1426 		}
1427 		return (DDI_SUCCESS);
1428 
1429 	case DDI_DMA_NEXTSEG: {
1430 		px_dma_win_t *w_p = *(px_dma_win_t **)offp;
1431 		if (w_p->win_offset != mp->dmai_offset)
1432 			return (DDI_DMA_STALE);
1433 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
1434 			return (DDI_DMA_DONE);
1435 		w_p->win_curseg++;
1436 		}
1437 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1438 		return (DDI_SUCCESS);
1439 
1440 	case DDI_DMA_SEGTOC: {
1441 		px_dma_win_t *win_p = mp->dmai_winlst;
1442 		off_t off = mp->dmai_offset;
1443 		ddi_dma_cookie_t *cp;
1444 		int i;
1445 
1446 		/* locate active window */
1447 		for (; win_p->win_offset != off; win_p = win_p->win_next)
1448 			;
1449 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1450 		for (i = 0; i < win_p->win_curseg; i++, cp++)
1451 			off += cp->dmac_size;
1452 		*offp = off;
1453 		*lenp = cp->dmac_size;
1454 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
1455 		}
1456 		return (DDI_SUCCESS);
1457 
1458 	case DDI_DMA_COFF: {
1459 		px_dma_win_t *win_p;
1460 		ddi_dma_cookie_t *cp;
1461 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1462 		size_t win_off;
1463 
1464 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1465 			int i;
1466 			win_off = 0;
1467 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1468 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1469 				size_t sz = cp->dmac_size;
1470 
1471 				addr = cp->dmac_laddress;
1472 				if ((addr <= key) && (addr + sz >= key))
1473 					goto found;
1474 				win_off += sz;
1475 			}
1476 		}
1477 		return (DDI_FAILURE);
1478 found:
1479 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1480 		return (DDI_SUCCESS);
1481 		}
1482 	default:
1483 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1484 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1485 		break;
1486 	}
1487 	return (DDI_FAILURE);
1488 }
1489 
1490 static void
1491 px_dvma_debug_init(px_mmu_t *mmu_p)
1492 {
1493 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1494 	ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
1495 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
1496 
1497 	mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
1498 	mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
1499 
1500 	mmu_p->dvma_active_list = NULL;
1501 	mmu_p->dvma_alloc_rec_index = 0;
1502 	mmu_p->dvma_free_rec_index = 0;
1503 	mmu_p->dvma_active_count = 0;
1504 }
1505 
1506 void
1507 px_dvma_debug_fini(px_mmu_t *mmu_p)
1508 {
1509 	struct px_dvma_rec *prev, *ptr;
1510 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1511 	uint64_t mask = ~(1ull << mmu_p->mmu_inst);
1512 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
1513 
1514 	if (mmu_p->dvma_alloc_rec) {
1515 		kmem_free(mmu_p->dvma_alloc_rec, sz);
1516 		mmu_p->dvma_alloc_rec = NULL;
1517 	}
1518 	if (mmu_p->dvma_free_rec) {
1519 		kmem_free(mmu_p->dvma_free_rec, sz);
1520 		mmu_p->dvma_free_rec = NULL;
1521 	}
1522 
1523 	prev = mmu_p->dvma_active_list;
1524 	if (!prev)
1525 		return;
1526 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1527 		kmem_free(prev, sizeof (struct px_dvma_rec));
1528 	kmem_free(prev, sizeof (struct px_dvma_rec));
1529 
1530 	mmu_p->dvma_active_list = NULL;
1531 	mmu_p->dvma_alloc_rec_index = 0;
1532 	mmu_p->dvma_free_rec_index = 0;
1533 	mmu_p->dvma_active_count = 0;
1534 
1535 	px_dvma_debug_off &= mask;
1536 	px_dvma_debug_on &= mask;
1537 }
1538 
1539 void
1540 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1541 	ddi_dma_impl_t *mp)
1542 {
1543 	struct px_dvma_rec *ptr;
1544 	mutex_enter(&mmu_p->dvma_debug_lock);
1545 
1546 	if (!mmu_p->dvma_alloc_rec)
1547 		px_dvma_debug_init(mmu_p);
1548 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1549 		px_dvma_debug_fini(mmu_p);
1550 		goto done;
1551 	}
1552 
1553 	ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
1554 	ptr->dvma_addr = address;
1555 	ptr->len = len;
1556 	ptr->mp = mp;
1557 	if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
1558 		mmu_p->dvma_alloc_rec_index = 0;
1559 
1560 	ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
1561 	ptr->dvma_addr = address;
1562 	ptr->len = len;
1563 	ptr->mp = mp;
1564 
1565 	ptr->next = mmu_p->dvma_active_list;
1566 	mmu_p->dvma_active_list = ptr;
1567 	mmu_p->dvma_active_count++;
1568 done:
1569 	mutex_exit(&mmu_p->dvma_debug_lock);
1570 }
1571 
1572 void
1573 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1574     ddi_dma_impl_t *mp)
1575 {
1576 	struct px_dvma_rec *ptr, *ptr_save;
1577 	mutex_enter(&mmu_p->dvma_debug_lock);
1578 
1579 	if (!mmu_p->dvma_alloc_rec)
1580 		px_dvma_debug_init(mmu_p);
1581 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1582 		px_dvma_debug_fini(mmu_p);
1583 		goto done;
1584 	}
1585 
1586 	ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
1587 	ptr->dvma_addr = address;
1588 	ptr->len = len;
1589 	ptr->mp = mp;
1590 	if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
1591 		mmu_p->dvma_free_rec_index = 0;
1592 
1593 	ptr_save = mmu_p->dvma_active_list;
1594 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1595 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1596 			break;
1597 		ptr_save = ptr;
1598 	}
1599 	if (!ptr) {
1600 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1601 		    (long)address, len);
1602 		goto done;
1603 	}
1604 	if (ptr == mmu_p->dvma_active_list)
1605 		mmu_p->dvma_active_list = ptr->next;
1606 	else
1607 		ptr_save->next = ptr->next;
1608 	kmem_free(ptr, sizeof (struct px_dvma_rec));
1609 	mmu_p->dvma_active_count--;
1610 done:
1611 	mutex_exit(&mmu_p->dvma_debug_lock);
1612 }
1613 
1614 #ifdef	DEBUG
1615 void
1616 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1617 {
1618 	DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1619 	    hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1620 	DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1621 	    hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1622 	    hp->dmai_nwin);
1623 	DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1624 	    hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1625 	DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1626 	    hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1627 	    hp->dmai_cookie);
1628 }
1629 #endif	/* DEBUG */
1630