xref: /titanic_51/usr/src/uts/sun4/io/px/px_dma.c (revision 3db30c357c20c1eb09687fd0194e0ca62d6358cb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PCI Express nexus DVMA and DMA core routines:
30  *	dma_map/dma_bind_handle implementation
31  *	bypass and peer-to-peer support
32  *	fast track DVMA space allocation
33  *	runtime DVMA debug
34  */
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/sysmacros.h>
39 #include <sys/sunddi.h>
40 #include <sys/ddi_impldefs.h>
41 #include "px_obj.h"
42 
43 /*LINTLIBRARY*/
44 
45 /*
46  * px_dma_allocmp - Allocate a pci dma implementation structure
47  *
48  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
49  * to hold unmodified device limits. The ddi_dma_attr inside the
50  * ddi_dma_impl structure is augumented with system limits to enhance
51  * DVMA performance at runtime. The unaugumented device limits saved
52  * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
53  * strictly for peer-to-peer transfers which do not obey system limits.
54  *
55  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
56  */
57 ddi_dma_impl_t *
58 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
59 	caddr_t arg)
60 {
61 	register ddi_dma_impl_t *mp;
62 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
63 
64 	/* Caution: we don't use zalloc to enhance performance! */
65 	if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
66 		DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
67 		if (waitfp != DDI_DMA_DONTWAIT) {
68 			DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
69 			ddi_set_callback(waitfp, arg, &px_kmem_clid);
70 		}
71 		return (mp);
72 	}
73 
74 	mp->dmai_rdip = rdip;
75 	mp->dmai_flags = 0;
76 	mp->dmai_pfnlst = NULL;
77 	mp->dmai_winlst = NULL;
78 
79 	/*
80 	 * kmem_alloc debug: the following fields are not zero-ed
81 	 * mp->dmai_mapping = 0;
82 	 * mp->dmai_size = 0;
83 	 * mp->dmai_offset = 0;
84 	 * mp->dmai_minxfer = 0;
85 	 * mp->dmai_burstsizes = 0;
86 	 * mp->dmai_ndvmapages = 0;
87 	 * mp->dmai_pool/roffset = 0;
88 	 * mp->dmai_rflags = 0;
89 	 * mp->dmai_inuse/flags
90 	 * mp->dmai_nwin = 0;
91 	 * mp->dmai_winsize = 0;
92 	 * mp->dmai_nexus_private/tte = 0;
93 	 * mp->dmai_iopte/pfnlst
94 	 * mp->dmai_sbi/pfn0 = 0;
95 	 * mp->dmai_minfo/winlst/fdvma
96 	 * mp->dmai_rdip
97 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
98 	 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
99 	 * mp->dmai_cookie = 0;
100 	 */
101 
102 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
103 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
104 	mp->dmai_fault = 0;
105 	mp->dmai_fault_check = NULL;
106 	mp->dmai_fault_notify = NULL;
107 
108 	mp->dmai_error.err_ena = 0;
109 	mp->dmai_error.err_status = DDI_FM_OK;
110 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
111 	mp->dmai_error.err_ontrap = NULL;
112 	mp->dmai_error.err_fep = NULL;
113 	mp->dmai_error.err_cf = NULL;
114 
115 	/*
116 	 * For a given rdip, set mp->dmai_bdf with the bdf value of px's
117 	 * immediate child. As we move down the PCIe fabric, this field
118 	 * may be modified by switch and bridge drivers.
119 	 */
120 	mp->dmai_bdf = pcie_get_bdf_for_dma_xfer(dip, rdip);
121 
122 	return (mp);
123 }
124 
125 void
126 px_dma_freemp(ddi_dma_impl_t *mp)
127 {
128 	if (mp->dmai_ndvmapages > 1)
129 		px_dma_freepfn(mp);
130 	if (mp->dmai_winlst)
131 		px_dma_freewin(mp);
132 	kmem_free(mp, sizeof (px_dma_hdl_t));
133 }
134 
135 void
136 px_dma_freepfn(ddi_dma_impl_t *mp)
137 {
138 	void *addr = mp->dmai_pfnlst;
139 	if (addr) {
140 		size_t npages = mp->dmai_ndvmapages;
141 		if (npages > 1)
142 			kmem_free(addr, npages * sizeof (px_iopfn_t));
143 		mp->dmai_pfnlst = NULL;
144 	}
145 	mp->dmai_ndvmapages = 0;
146 }
147 
148 /*
149  * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
150  *			and convert dmareq->dmar_limits to mp->dmai_attr
151  *
152  * ddi_dma_impl_t member modified     input
153  * ------------------------------------------------------------------------
154  * mp->dmai_minxfer		    - dev
155  * mp->dmai_burstsizes		    - dev
156  * mp->dmai_flags		    - no limit? peer-to-peer only?
157  *
158  * ddi_dma_attr member modified       input
159  * ------------------------------------------------------------------------
160  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
161  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
162  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
163  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
164  * mp->dmai_attr.dma_attr_align     - 1         (no alignment restriction)
165  *
166  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
167  */
168 ddi_dma_impl_t *
169 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
170 	ddi_dma_req_t *dmareq)
171 {
172 	ddi_dma_impl_t *mp;
173 	ddi_dma_attr_t *attr_p;
174 	uint64_t syslo		= mmu_p->mmu_dvma_base;
175 	uint64_t syshi		= mmu_p->mmu_dvma_end;
176 	uint64_t fasthi		= mmu_p->mmu_dvma_fast_end;
177 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
178 	uint32_t count_max	= lim_p->dlim_cntr_max;
179 	uint64_t lo		= lim_p->dlim_addr_lo;
180 	uint64_t hi		= lim_p->dlim_addr_hi;
181 	if (hi <= lo) {
182 		DBG(DBG_DMA_MAP, dip, "Bad limits\n");
183 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
184 	}
185 	if (!count_max)
186 		count_max--;
187 
188 	if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
189 		dmareq->dmar_arg)))
190 		return (NULL);
191 
192 	/* store original dev input at the 2nd ddi_dma_attr */
193 	attr_p = PX_DEV_ATTR(mp);
194 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
195 	SET_DMAALIGN(attr_p, 1);
196 
197 	lo = MAX(lo, syslo);
198 	hi = MIN(hi, syshi);
199 	if (hi <= lo)
200 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
201 	count_max = MIN(count_max, hi - lo);
202 
203 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
204 		mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
205 			PX_DMAI_FLAGS_NOSYSLIMIT;
206 	else {
207 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
208 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
209 	}
210 	if (PX_DMA_NOCTX(rdip))
211 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
212 
213 	/* store augumented dev input to mp->dmai_attr */
214 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
215 	attr_p = &mp->dmai_attr;
216 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
217 	SET_DMAALIGN(attr_p, 1);
218 	return (mp);
219 }
220 
221 /*
222  * Called from px_attach to check for bypass dma support and set
223  * flags accordingly.
224  */
225 int
226 px_dma_attach(px_t *px_p)
227 {
228 	uint64_t baddr;
229 
230 	if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
231 			PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
232 			&baddr) != DDI_ENOTSUP)
233 		/* ignore all other errors */
234 		px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
235 
236 	px_p->px_dma_sync_opt = ddi_prop_get_int(DDI_DEV_T_ANY,
237 	    px_p->px_dip, DDI_PROP_DONTPASS, "dma-sync-options", 0);
238 
239 	if (px_p->px_dma_sync_opt != 0)
240 		px_p->px_dev_caps |= PX_DMA_SYNC_REQUIRED;
241 
242 	return (DDI_SUCCESS);
243 }
244 
245 /*
246  * px_dma_attr2hdl
247  *
248  * This routine is called from the alloc handle entry point to sanity check the
249  * dma attribute structure.
250  *
251  * use by: px_dma_allochdl()
252  *
253  * return value:
254  *
255  *	DDI_SUCCESS		- on success
256  *	DDI_DMA_BADATTR		- attribute has invalid version number
257  *				  or address limits exclude dvma space
258  */
259 int
260 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
261 {
262 	px_mmu_t *mmu_p = px_p->px_mmu_p;
263 	uint64_t syslo, syshi;
264 	int	ret;
265 	ddi_dma_attr_t *attrp		= PX_DEV_ATTR(mp);
266 	uint64_t hi			= attrp->dma_attr_addr_hi;
267 	uint64_t lo			= attrp->dma_attr_addr_lo;
268 	uint64_t align			= attrp->dma_attr_align;
269 	uint64_t nocross		= attrp->dma_attr_seg;
270 	uint64_t count_max		= attrp->dma_attr_count_max;
271 
272 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
273 		attrp, HI32(count_max), LO32(count_max));
274 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
275 		HI32(hi), LO32(hi), HI32(lo), LO32(lo));
276 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
277 		HI32(nocross), LO32(nocross), HI32(align), LO32(align));
278 
279 	if (!nocross)
280 		nocross--;
281 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
282 
283 		DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
284 		/*
285 		 * If Bypass DMA is not supported, return error so that
286 		 * target driver can fall back to dvma mode of operation
287 		 */
288 		if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
289 			return (DDI_DMA_BADATTR);
290 		mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
291 		if (nocross != UINT64_MAX)
292 			return (DDI_DMA_BADATTR);
293 		if (align && (align > MMU_PAGE_SIZE))
294 			return (DDI_DMA_BADATTR);
295 		align = 1; /* align on 1 page boundary */
296 
297 		/* do a range check and get the limits */
298 		ret = px_lib_dma_bypass_rngchk(px_p->px_dip, attrp,
299 				&syslo, &syshi);
300 		if (ret != DDI_SUCCESS)
301 			return (ret);
302 	} else { /* MMU_XLATE or PEER_TO_PEER */
303 		align = MAX(align, MMU_PAGE_SIZE) - 1;
304 		if ((align & nocross) != align) {
305 			dev_info_t *rdip = mp->dmai_rdip;
306 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
307 				NAMEINST(rdip));
308 			return (DDI_DMA_BADATTR);
309 		}
310 		align = MMU_BTOP(align + 1);
311 		syslo = mmu_p->mmu_dvma_base;
312 		syshi = mmu_p->mmu_dvma_end;
313 	}
314 	if (hi <= lo) {
315 		dev_info_t *rdip = mp->dmai_rdip;
316 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
317 		return (DDI_DMA_BADATTR);
318 	}
319 	lo = MAX(lo, syslo);
320 	hi = MIN(hi, syshi);
321 	if (!count_max)
322 		count_max--;
323 
324 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
325 		HI32(hi), LO32(hi), HI32(lo), LO32(lo));
326 	if (hi <= lo) {
327 		/*
328 		 * If this is an IOMMU bypass access, the caller can't use
329 		 * the required addresses, so fail it.  Otherwise, it's
330 		 * peer-to-peer; ensure that the caller has no alignment or
331 		 * segment size restrictions.
332 		 */
333 		if ((mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ||
334 		    (nocross < UINT32_MAX) || (align > 1))
335 			return (DDI_DMA_BADATTR);
336 
337 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
338 	} else /* set practical counter_max value */
339 		count_max = MIN(count_max, hi - lo);
340 
341 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
342 		mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
343 			PX_DMAI_FLAGS_NOFASTLIMIT;
344 	else {
345 		syshi = mmu_p->mmu_dvma_fast_end;
346 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
347 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
348 	}
349 	if (PX_DMA_NOCTX(mp->dmai_rdip))
350 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
351 
352 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
353 	attrp = &mp->dmai_attr;
354 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
355 	return (DDI_SUCCESS);
356 }
357 
358 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
359 
360 /*
361  * px_dma_type - determine which of the three types DMA (peer-to-peer,
362  *		mmu bypass, or mmu translate) we are asked to do.
363  *		Also checks pfn0 and rejects any non-peer-to-peer
364  *		requests for peer-only devices.
365  *
366  *	return values:
367  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
368  *		DDI_SUCCESS
369  *
370  *	dma handle members affected (set on exit):
371  *	mp->dmai_object		- dmareq->dmar_object
372  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
373  *	mp->dmai_flags   	- DMA type
374  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
375  *	mp->dmai_roffset 	- initialized to starting MMU page offset
376  *	mp->dmai_ndvmapages	- # of total MMU pages of entire object
377  */
378 int
379 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
380 {
381 	dev_info_t *dip = px_p->px_dip;
382 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
383 	px_pec_t *pec_p = px_p->px_pec_p;
384 	uint32_t offset;
385 	pfn_t pfn0;
386 	uint_t redzone;
387 
388 	mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
389 
390 	if (!(px_p->px_dev_caps & PX_DMA_SYNC_REQUIRED))
391 		mp->dmai_rflags |= DMP_NOSYNC;
392 
393 	switch (dobj_p->dmao_type) {
394 	case DMA_OTYP_BUFVADDR:
395 	case DMA_OTYP_VADDR: {
396 		page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
397 		caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
398 
399 		DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
400 		offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
401 		if (pplist) {				/* shadow list */
402 			mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
403 			pfn0 = page_pptonum(*pplist);
404 		} else {
405 			struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
406 			struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
407 			pfn0 = hat_getpfnum(hat_p, vaddr);
408 		}
409 		}
410 		break;
411 
412 	case DMA_OTYP_PAGES:
413 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
414 		mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
415 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
416 		break;
417 
418 	case DMA_OTYP_PADDR:
419 	default:
420 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
421 			NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
422 		return (DDI_DMA_NOMAPPING);
423 	}
424 	if (pfn0 == PFN_INVALID) {
425 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
426 			NAMEINST(dip), dobj_p);
427 		return (DDI_DMA_NOMAPPING);
428 	}
429 	if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
430 			pec_p->pec_last32_pfn)) {
431 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
432 		goto done;	/* leave bypass and dvma flag as 0 */
433 	} else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
434 			pec_p->pec_last64_pfn)) {
435 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
436 		goto done;	/* leave bypass and dvma flag as 0 */
437 	}
438 	if (PX_DMA_ISPEERONLY(mp)) {
439 		dev_info_t *rdip = mp->dmai_rdip;
440 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
441 		return (DDI_DMA_NOMAPPING);
442 	}
443 
444 	redzone = (mp->dmai_rflags & DDI_DMA_REDZONE) ||
445 	    (mp->dmai_flags & PX_DMAI_FLAGS_MAP_BUFZONE) ?
446 	    PX_DMAI_FLAGS_REDZONE : 0;
447 
448 	mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
449 	    PX_DMAI_FLAGS_BYPASS : (PX_DMAI_FLAGS_DVMA | redzone);
450 done:
451 	mp->dmai_object	 = *dobj_p;			/* whole object    */
452 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
453 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
454 	mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
455 	return (DDI_SUCCESS);
456 }
457 
458 /*
459  * px_dma_pgpfn - set up pfnlst array according to pages
460  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
461  */
462 /*ARGSUSED*/
463 static int
464 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
465 {
466 	int i;
467 	dev_info_t *dip = px_p->px_dip;
468 
469 	switch (mp->dmai_object.dmao_type) {
470 	case DMA_OTYP_BUFVADDR:
471 	case DMA_OTYP_VADDR: {
472 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
473 		DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
474 			pplist, npages);
475 		for (i = 1; i < npages; i++) {
476 			px_iopfn_t pfn = page_pptonum(pplist[i]);
477 			PX_SET_MP_PFN1(mp, i, pfn);
478 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
479 		}
480 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
481 		}
482 		break;
483 
484 	case DMA_OTYP_PAGES: {
485 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
486 		DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
487 		for (i = 1; i < npages; i++, pp = pp->p_next) {
488 			px_iopfn_t pfn = page_pptonum(pp);
489 			PX_SET_MP_PFN1(mp, i, pfn);
490 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
491 		}
492 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
493 		}
494 		break;
495 
496 	default:	/* check is already done by px_dma_type */
497 		ASSERT(0);
498 		break;
499 	}
500 	return (DDI_SUCCESS);
501 }
502 
503 /*
504  * px_dma_vapfn - set up pfnlst array according to VA
505  *	VA/size pair: <normal, bypass, peer-to-peer>
506  *	pfn0 is skipped as it is already done.
507  *	In this case, the cached pfn0 is used to fill pfnlst[0]
508  */
509 static int
510 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
511 {
512 	dev_info_t *dip = px_p->px_dip;
513 	int i;
514 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
515 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
516 
517 	vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
518 	for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
519 		px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
520 		if (pfn == PFN_INVALID)
521 			goto err_badpfn;
522 		PX_SET_MP_PFN1(mp, i, pfn);
523 		DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
524 			mp, i, pfn);
525 	}
526 	return (DDI_SUCCESS);
527 err_badpfn:
528 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
529 	return (DDI_DMA_NOMAPPING);
530 }
531 
532 /*
533  * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
534  *
535  * dependencies:
536  *	mp->dmai_ndvmapages	- set to total # of dma pages
537  *
538  * return value:
539  *	DDI_SUCCESS
540  *	DDI_DMA_NOMAPPING
541  */
542 int
543 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
544 {
545 	uint32_t npages = mp->dmai_ndvmapages;
546 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
547 	int i, ret, peer = PX_DMA_ISPTP(mp);
548 	int peer32 = PX_DMA_ISPTP32(mp);
549 	dev_info_t *dip = px_p->px_dip;
550 
551 	px_pec_t *pec_p = px_p->px_pec_p;
552 	px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
553 					pec_p->pec_base64_pfn;
554 	px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
555 					pec_p->pec_last64_pfn;
556 	px_iopfn_t pfn_adj = peer ? pfn_base : 0;
557 
558 	DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
559 		mp, PX_MP_PFN0(mp) - pfn_adj);
560 	/* 1 page: no array alloc/fill, no mixed mode check */
561 	if (npages == 1) {
562 		PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
563 		return (DDI_SUCCESS);
564 	}
565 	/* allocate pfn array */
566 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
567 		waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
568 		if (waitfp != DDI_DMA_DONTWAIT)
569 			ddi_set_callback(waitfp, dmareq->dmar_arg,
570 				&px_kmem_clid);
571 		return (DDI_DMA_NORESOURCES);
572 	}
573 	/* fill pfn array */
574 	PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
575 	if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
576 		px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
577 		goto err;
578 
579 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
580 	for (i = 1; i < npages; i++) {
581 		px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
582 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
583 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
584 				NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
585 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
586 			goto err;
587 		}
588 		DBG(DBG_DMA_MAP, dip,
589 			"px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
590 		if (pfn_adj)
591 			PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
592 	}
593 	return (DDI_SUCCESS);
594 err:
595 	px_dma_freepfn(mp);
596 	return (ret);
597 }
598 
599 /*
600  * px_dvma_win() - trim requested DVMA size down to window size
601  *	The 1st window starts from offset and ends at page-aligned boundary.
602  *	From the 2nd window on, each window starts and ends at page-aligned
603  *	boundary except the last window ends at wherever requested.
604  *
605  *	accesses the following mp-> members:
606  *	mp->dmai_attr.dma_attr_count_max
607  *	mp->dmai_attr.dma_attr_seg
608  *	mp->dmai_roffset   - start offset of 1st window
609  *	mp->dmai_rflags (redzone)
610  *	mp->dmai_ndvmapages (for 1 page fast path)
611  *
612  *	sets the following mp-> members:
613  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
614  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
615  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
616  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
617  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
618  *
619  *	fixed - not changed across different DMA windows
620  */
621 /*ARGSUSED*/
622 int
623 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
624 {
625 	uint32_t redzone_sz	= PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
626 	size_t obj_sz		= mp->dmai_object.dmao_size;
627 	size_t xfer_sz;
628 	ulong_t pg_off;
629 
630 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
631 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
632 		mp->dmai_size = obj_sz;
633 		mp->dmai_winsize = MMU_PAGE_SIZE;
634 		mp->dmai_nwin = 1;
635 		goto done;
636 	}
637 
638 	pg_off	= mp->dmai_roffset;
639 	xfer_sz	= obj_sz + redzone_sz;
640 
641 	/* include redzone in nocross check */ {
642 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
643 		if (xfer_sz + pg_off - 1 > nocross)
644 			xfer_sz = nocross - pg_off + 1;
645 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
646 			DBG(DBG_DMA_MAP, px_p->px_dip,
647 			    "nocross too small: "
648 			    "%lx(%lx)+%lx+%lx < %llx\n",
649 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
650 			return (DDI_DMA_TOOBIG);
651 		}
652 	}
653 	xfer_sz -= redzone_sz;		/* restore transfer size  */
654 	/* check counter max */ {
655 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
656 		if (xfer_sz - 1 > count_max)
657 			xfer_sz = count_max + 1;
658 	}
659 	if (xfer_sz >= obj_sz) {
660 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
661 		mp->dmai_size = xfer_sz;
662 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
663 		mp->dmai_nwin = 1;
664 		goto done;
665 	}
666 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
667 		DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
668 			obj_sz, pg_off, redzone_sz, xfer_sz);
669 		return (DDI_DMA_TOOBIG);
670 	}
671 
672 	xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
673 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
674 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
675 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
676 done:
677 	mp->dmai_winlst = NULL;
678 	px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
679 	return (DDI_SUCCESS);
680 }
681 
682 /*
683  * fast track cache entry to mmu context, inserts 3 0 bits between
684  * upper 6-bits and lower 3-bits of the 9-bit cache entry
685  */
686 #define	MMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
687 
688 /*
689  * px_dvma_map_fast - attempts to map fast trackable DVMA
690  */
691 /*ARGSUSED*/
692 int
693 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
694 {
695 	uint_t clustsz = px_dvma_page_cache_clustsz;
696 	uint_t entries = px_dvma_page_cache_entries;
697 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
698 	    mp->dmai_attr.dma_attr_flags);
699 	int i = mmu_p->mmu_dvma_addr_scan_start;
700 	uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
701 	px_dvma_addr_t dvma_pg;
702 	size_t npages = MMU_BTOP(mp->dmai_winsize);
703 	dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
704 
705 	extern uint8_t ldstub(uint8_t *);
706 	ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
707 	ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
708 
709 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++);
710 	if (i >= entries) {
711 		lock_addr = mmu_p->mmu_dvma_cache_locks;
712 		i = 0;
713 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++);
714 		if (i >= entries) {
715 #ifdef	PX_DMA_PROF
716 			px_dvmaft_exhaust++;
717 #endif	/* PX_DMA_PROF */
718 			return (DDI_DMA_NORESOURCES);
719 		}
720 	}
721 	mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
722 
723 	i *= clustsz;
724 	dvma_pg = mmu_p->dvma_base_pg + i;
725 
726 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages,
727 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, 0,
728 	    MMU_MAP_PFN) != DDI_SUCCESS) {
729 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
730 		    "px_lib_iommu_map failed\n");
731 		return (DDI_FAILURE);
732 	}
733 
734 	if (!PX_MAP_BUFZONE(mp))
735 		goto done;
736 
737 	DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
738 
739 	ASSERT(PX_HAS_REDZONE(mp));
740 
741 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1,
742 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, npages - 1,
743 	    MMU_MAP_PFN) != DDI_SUCCESS) {
744 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
745 		    "mapping REDZONE page failed\n");
746 
747 		(void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
748 		return (DDI_FAILURE);
749 	}
750 
751 done:
752 #ifdef PX_DMA_PROF
753 	px_dvmaft_success++;
754 #endif
755 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
756 	mp->dmai_offset = 0;
757 	mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
758 	PX_SAVE_MP_TTE(mp, attr);	/* save TTE template for unmapping */
759 	if (PX_DVMA_DBG_ON(mmu_p))
760 		px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
761 			mp->dmai_size, mp);
762 	return (DDI_SUCCESS);
763 }
764 
765 /*
766  * px_dvma_map: map non-fasttrack DMA
767  *		Use quantum cache if single page DMA.
768  */
769 int
770 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
771 {
772 	uint_t npages = PX_DMA_WINNPGS(mp);
773 	px_dvma_addr_t dvma_pg, dvma_pg_index;
774 	void *dvma_addr;
775 	uint64_t tte = PX_GET_TTE_ATTR(mp->dmai_rflags,
776 	    mp->dmai_attr.dma_attr_flags);
777 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
778 	dev_info_t *dip = mp->dmai_rdip;
779 	int	ret = DDI_SUCCESS;
780 
781 	/*
782 	 * allocate dvma space resource and map in the first window.
783 	 * (vmem_t *vmp, size_t size,
784 	 *	size_t align, size_t phase, size_t nocross,
785 	 *	void *minaddr, void *maxaddr, int vmflag)
786 	 */
787 	if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
788 		dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
789 			MMU_PAGE_SIZE, sleep);
790 		mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
791 #ifdef	PX_DMA_PROF
792 		px_dvma_vmem_alloc++;
793 #endif	/* PX_DMA_PROF */
794 	} else {
795 		dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
796 			MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
797 			MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
798 			0,
799 			mp->dmai_attr.dma_attr_seg + 1,
800 			(void *)mp->dmai_attr.dma_attr_addr_lo,
801 			(void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
802 			sleep);
803 #ifdef	PX_DMA_PROF
804 		px_dvma_vmem_xalloc++;
805 #endif	/* PX_DMA_PROF */
806 	}
807 	dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
808 	dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
809 	DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
810 		dvma_pg, dvma_pg_index);
811 	if (dvma_pg == 0)
812 		goto noresource;
813 
814 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
815 	mp->dmai_offset = 0;
816 	PX_SAVE_MP_TTE(mp, tte);	/* mp->dmai_tte = tte */
817 
818 	if ((ret = px_mmu_map_pages(mmu_p,
819 	    mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
820 		if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
821 			vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
822 			    MMU_PAGE_SIZE);
823 #ifdef PX_DMA_PROF
824 			px_dvma_vmem_free++;
825 #endif /* PX_DMA_PROF */
826 		} else {
827 			vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
828 			    MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
829 #ifdef PX_DMA_PROF
830 			px_dvma_vmem_xfree++;
831 #endif /* PX_DMA_PROF */
832 		}
833 	}
834 
835 	return (ret);
836 noresource:
837 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
838 		DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
839 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
840 			&mmu_p->mmu_dvma_clid);
841 	}
842 	DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
843 	return (DDI_DMA_NORESOURCES);
844 }
845 
846 void
847 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
848 {
849 	px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
850 	px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
851 	dvma_addr = MMU_PTOB(dvma_pg);
852 
853 	if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
854 		px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
855 		ASSERT(index % px_dvma_page_cache_clustsz == 0);
856 		index /= px_dvma_page_cache_clustsz;
857 		ASSERT(index < px_dvma_page_cache_entries);
858 		mmu_p->mmu_dvma_cache_locks[index] = 0;
859 #ifdef	PX_DMA_PROF
860 		px_dvmaft_free++;
861 #endif	/* PX_DMA_PROF */
862 		return;
863 	}
864 
865 	if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
866 		vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
867 			MMU_PAGE_SIZE);
868 #ifdef PX_DMA_PROF
869 		px_dvma_vmem_free++;
870 #endif /* PX_DMA_PROF */
871 	} else {
872 		size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
873 		vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
874 			MMU_PTOB(npages));
875 #ifdef PX_DMA_PROF
876 		px_dvma_vmem_xfree++;
877 #endif /* PX_DMA_PROF */
878 	}
879 }
880 
881 /*
882  * DVMA mappings may have multiple windows, but each window always have
883  * one segment.
884  */
885 int
886 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
887 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
888 	uint_t cache_flags)
889 {
890 	switch (cmd) {
891 	case DDI_DMA_SYNC:
892 		return (px_lib_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
893 		    *offp, *lenp, cache_flags));
894 
895 	case DDI_DMA_HTOC: {
896 		int ret;
897 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
898 		uint_t win_size = mp->dmai_winsize;
899 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
900 
901 		if (off >= mp->dmai_object.dmao_size) {
902 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
903 				NAMEINST(mp->dmai_rdip), off);
904 			return (DDI_FAILURE);
905 		}
906 		off += mp->dmai_roffset;
907 		ret = px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
908 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
909 		if (ret)
910 			return (ret);
911 		DBG(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
912 			cp->dmac_address, cp->dmac_size, off, *offp);
913 
914 		/* adjust cookie addr/len if we are not on window boundary */
915 		ASSERT((off % win_size) == (off -
916 			(PX_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
917 		off = PX_DMA_CURWIN(mp) ? off % win_size : *offp;
918 		ASSERT(cp->dmac_size > off);
919 		cp->dmac_laddress += off;
920 		cp->dmac_size -= off;
921 		DBG(DBG_DMA_CTL, dip, "HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
922 			mp, cp->dmac_address, cp->dmac_size, off, wo_off);
923 		}
924 		return (DDI_SUCCESS);
925 
926 	case DDI_DMA_REPWIN:
927 		*offp = mp->dmai_offset;
928 		*lenp = mp->dmai_size;
929 		return (DDI_SUCCESS);
930 
931 	case DDI_DMA_MOVWIN: {
932 		off_t off = *offp;
933 		if (off >= mp->dmai_object.dmao_size)
934 			return (DDI_FAILURE);
935 		off += mp->dmai_roffset;
936 		return (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
937 		    off / mp->dmai_winsize, offp, lenp,
938 		    (ddi_dma_cookie_t *)objp, NULL));
939 		}
940 
941 	case DDI_DMA_NEXTWIN: {
942 		px_window_t win = PX_DMA_CURWIN(mp);
943 		if (offp) {
944 			if (*(px_window_t *)offp != win) {
945 				/* window not active */
946 				*(px_window_t *)objp = win; /* return cur win */
947 				return (DDI_DMA_STALE);
948 			}
949 			win++;
950 		} else	/* map win 0 */
951 			win = 0;
952 		if (win >= mp->dmai_nwin) {
953 			*(px_window_t *)objp = win - 1;
954 			return (DDI_DMA_DONE);
955 		}
956 		if (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
957 		    win, 0, 0, 0, 0)) {
958 			*(px_window_t *)objp = win - 1;
959 			return (DDI_FAILURE);
960 		}
961 		*(px_window_t *)objp = win;
962 		}
963 		return (DDI_SUCCESS);
964 
965 	case DDI_DMA_NEXTSEG:
966 		if (*(px_window_t *)offp != PX_DMA_CURWIN(mp))
967 			return (DDI_DMA_STALE);
968 		if (lenp)				/* only 1 seg allowed */
969 			return (DDI_DMA_DONE);
970 
971 		/* return mp as seg 0 */
972 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
973 		return (DDI_SUCCESS);
974 
975 	case DDI_DMA_SEGTOC:
976 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
977 			mp->dmai_size);
978 		*offp = mp->dmai_offset;
979 		*lenp = mp->dmai_size;
980 		return (DDI_SUCCESS);
981 
982 	case DDI_DMA_COFF: {
983 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
984 		if (cp->dmac_address < mp->dmai_mapping ||
985 			(cp->dmac_address + cp->dmac_size) >
986 			(mp->dmai_mapping + mp->dmai_size))
987 			return (DDI_FAILURE);
988 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
989 			mp->dmai_offset);
990 		}
991 		return (DDI_SUCCESS);
992 	default:
993 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
994 			cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
995 		break;
996 	}
997 	return (DDI_FAILURE);
998 }
999 
1000 void
1001 px_dma_freewin(ddi_dma_impl_t *mp)
1002 {
1003 	px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
1004 	for (win2_p = win_p; win_p; win2_p = win_p) {
1005 		win_p = win2_p->win_next;
1006 		kmem_free(win2_p, sizeof (px_dma_win_t) +
1007 			sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
1008 	}
1009 	mp->dmai_nwin = 0;
1010 	mp->dmai_winlst = NULL;
1011 }
1012 
1013 /*
1014  * px_dma_newwin - create a dma window object and cookies
1015  *
1016  *	After the initial scan in px_dma_physwin(), which identifies
1017  *	a portion of the pfn array that belongs to a dma window,
1018  *	we are called to allocate and initialize representing memory
1019  *	resources. We know from the 1st scan the number of cookies
1020  *	or dma segment in this window so we can allocate a contiguous
1021  *	memory array for the dma cookies (The implementation of
1022  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1023  *
1024  *	A second round scan is done on the pfn array to identify
1025  *	each dma segment and initialize its corresponding dma cookie.
1026  *	We don't need to do all the safety checking and we know they
1027  *	all belong to the same dma window.
1028  *
1029  *	Input:	cookie_no - # of cookies identified by the 1st scan
1030  *		start_idx - subscript of the pfn array for the starting pfn
1031  *		end_idx   - subscript of the last pfn in dma window
1032  *		win_pp    - pointer to win_next member of previous window
1033  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1034  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1035  *	Note:	Each cookie and window size are all initialized on page
1036  *		boundary. This is not true for the 1st cookie of the 1st
1037  *		window and the last cookie of the last window.
1038  *		We fix that later in upper layer which has access to size
1039  *		and offset info.
1040  *
1041  */
1042 /*ARGSUSED*/
1043 static int
1044 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
1045 	uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
1046 	px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
1047 {
1048 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1049 	ddi_dma_cookie_t *cookie_p;
1050 	uint32_t pfn_no = 1;
1051 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
1052 	px_iopfn_t prev_pfn = pfn;
1053 	uint64_t baddr, seg_pfn0 = pfn;
1054 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1055 	px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
1056 		waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1057 	io_attributes_t	attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1058 	    mp->dmai_attr.dma_attr_flags);
1059 
1060 	if (!win_p)
1061 		goto noresource;
1062 
1063 	win_p->win_next = NULL;
1064 	win_p->win_ncookies = cookie_no;
1065 	win_p->win_curseg = 0;	/* start from segment 0 */
1066 	win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
1067 	/* win_p->win_offset is left uninitialized */
1068 
1069 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1070 	start_idx++;
1071 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1072 		pfn = PX_GET_MP_PFN1(mp, start_idx);
1073 		if ((pfn == prev_pfn + 1) &&
1074 			(MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1075 			continue;
1076 
1077 		/* close up the cookie up to (including) prev_pfn */
1078 		baddr = MMU_PTOB(seg_pfn0);
1079 		if (bypass && (px_lib_iommu_getbypass(dip,
1080 				baddr, attr, &baddr) != DDI_SUCCESS))
1081 			return (DDI_FAILURE);
1082 
1083 		MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1084 		DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1085 			MMU_PTOB(seg_pfn0), pfn_no);
1086 
1087 		cookie_p++;	/* advance to next available cookie cell */
1088 		pfn_no = 0;
1089 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1090 	}
1091 
1092 	baddr = MMU_PTOB(seg_pfn0);
1093 	if (bypass && (px_lib_iommu_getbypass(dip,
1094 			baddr, attr, &baddr) != DDI_SUCCESS))
1095 		return (DDI_FAILURE);
1096 
1097 	MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1098 	DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1099 		MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
1100 #ifdef	DEBUG
1101 	cookie_p++;
1102 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1103 #endif	/* DEBUG */
1104 	*win_pp = win_p;
1105 	return (DDI_SUCCESS);
1106 noresource:
1107 	if (waitfp != DDI_DMA_DONTWAIT)
1108 		ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
1109 	return (DDI_DMA_NORESOURCES);
1110 }
1111 
1112 /*
1113  * px_dma_adjust - adjust 1st and last cookie and window sizes
1114  *	remove initial dma page offset from 1st cookie and window size
1115  *	remove last dma page remainder from last cookie and window size
1116  *	fill win_offset of each dma window according to just fixed up
1117  *		each window sizes
1118  *	px_dma_win_t members modified:
1119  *	win_p->win_offset - this window's offset within entire DMA object
1120  *	win_p->win_size	  - xferrable size (in bytes) for this window
1121  *
1122  *	ddi_dma_impl_t members modified:
1123  *	mp->dmai_size	  - 1st window xferrable size
1124  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1125  *
1126  *	ddi_dma_cookie_t members modified:
1127  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1128  *	cookie_p->dmac_laddress - 1st cookie add page offset
1129  */
1130 static void
1131 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
1132 {
1133 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1134 	size_t pg_offset = mp->dmai_roffset;
1135 	size_t win_offset = 0;
1136 
1137 	cookie_p->dmac_size -= pg_offset;
1138 	cookie_p->dmac_laddress |= pg_offset;
1139 	win_p->win_size -= pg_offset;
1140 	DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1141 
1142 	mp->dmai_size = win_p->win_size;
1143 	mp->dmai_offset = 0;
1144 
1145 	pg_offset += mp->dmai_object.dmao_size;
1146 	pg_offset &= MMU_PAGE_OFFSET;
1147 	if (pg_offset)
1148 		pg_offset = MMU_PAGE_SIZE - pg_offset;
1149 	DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1150 
1151 	for (; win_p->win_next; win_p = win_p->win_next) {
1152 		DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1153 		win_p->win_offset = win_offset;
1154 		win_offset += win_p->win_size;
1155 	}
1156 	/* last window */
1157 	win_p->win_offset = win_offset;
1158 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1159 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1160 	win_p->win_size -= pg_offset;
1161 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1162 }
1163 
1164 /*
1165  * px_dma_physwin() - carve up dma windows using physical addresses.
1166  *	Called to handle mmu bypass and pci peer-to-peer transfers.
1167  *	Calls px_dma_newwin() to allocate window objects.
1168  *
1169  * Dependency: mp->dmai_pfnlst points to an array of pfns
1170  *
1171  * 1. Each dma window is represented by a px_dma_win_t object.
1172  *	The object will be casted to ddi_dma_win_t and returned
1173  *	to leaf driver through the DDI interface.
1174  * 2. Each dma window can have several dma segments with each
1175  *	segment representing a physically contiguous either memory
1176  *	space (if we are doing an mmu bypass transfer) or pci address
1177  *	space (if we are doing a peer-to-peer transfer).
1178  * 3. Each segment has a DMA cookie to program the DMA engine.
1179  *	The cookies within each DMA window must be located in a
1180  *	contiguous array per ddi_dma_nextcookie(9f).
1181  * 4. The number of DMA segments within each DMA window cannot exceed
1182  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1183  *	too large to fit in the sgllen, the rest needs to be
1184  *	relocated to the next dma window.
1185  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1186  *	and nocross restrictions while bypass DMA follows the set of
1187  *	restrictions with system limits factored in.
1188  *
1189  * Return:
1190  *	mp->dmai_winlst	 - points to a link list of px_dma_win_t objects.
1191  *		Each px_dma_win_t object on the link list contains
1192  *		infomation such as its window size (# of pages),
1193  *		starting offset (also see Restriction), an array of
1194  *		DMA cookies, and # of cookies in the array.
1195  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1196  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1197  *	mp->dmai_mapping - starting cookie address
1198  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1199  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1200  *
1201  * Restriction:
1202  *	Each px_dma_win_t object can theoratically start from any offset
1203  *	since the mmu is not involved. However, this implementation
1204  *	always make windows start from page aligned offset (except
1205  *	the 1st window, which follows the requested offset) due to the
1206  *	fact that we are handed a pfn list. This does require device's
1207  *	count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
1208  */
1209 int
1210 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1211 {
1212 	uint_t npages = mp->dmai_ndvmapages;
1213 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1214 	px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
1215 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
1216 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1217 	uint64_t count_max, bypass_addr = 0;
1218 	px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
1219 	ddi_dma_cookie_t *cookie0_p;
1220 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1221 	    mp->dmai_attr.dma_attr_flags);
1222 	dev_info_t *dip = px_p->px_dip;
1223 
1224 	ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
1225 	if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1226 		ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
1227 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1228 		px_pec_t *pec_p = px_p->px_pec_p;
1229 		px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
1230 				pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
1231 				pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
1232 
1233 		if (nocross && (nocross < UINT32_MAX))
1234 			return (DDI_DMA_NOMAPPING);
1235 		if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
1236 			return (DDI_DMA_NOMAPPING);
1237 		pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1238 		pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1239 		pfn_hi = MIN(pfn_hi, pfn_last);
1240 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1241 			return (DDI_DMA_NOMAPPING);
1242 
1243 		count_max = dev_attr_p->dma_attr_count_max;
1244 		count_max = MIN(count_max, nocross);
1245 		/*
1246 		 * the following count_max trim is not done because we are
1247 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1248 		 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1249 		 */
1250 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1251 		count_max = mp->dmai_attr.dma_attr_count_max;
1252 		pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1253 		pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1254 
1255 		if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
1256 				attr, &bypass_addr) != DDI_SUCCESS) {
1257 			cmn_err(CE_WARN, "bypass cookie failure %lx\n", pfn);
1258 			return (DDI_DMA_NOMAPPING);
1259 		}
1260 		pfn = MMU_BTOP(bypass_addr);
1261 	}
1262 
1263 	/* pfn: absolute (bypass mode) or relative (p2p mode) */
1264 	for (prev_pfn = pfn, i = 1; i < npages;
1265 	    i++, prev_pfn = pfn, pfn_no++) {
1266 		pfn = PX_GET_MP_PFN1(mp, i);
1267 		if (bypass_addr) {
1268 			if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
1269 					&bypass_addr) != DDI_SUCCESS) {
1270 				ret = DDI_DMA_NOMAPPING;
1271 				goto err;
1272 			}
1273 			pfn = MMU_BTOP(bypass_addr);
1274 		}
1275 		if ((pfn == prev_pfn + 1) &&
1276 				(MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1277 			continue;
1278 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1279 			ret = DDI_DMA_NOMAPPING;
1280 			goto err;
1281 		}
1282 		cookie_no++;
1283 		pfn_no = 0;
1284 		if (cookie_no < sgllen)
1285 			continue;
1286 
1287 		DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1288 			win_pfn0_index, i - 1, cookie_no);
1289 		if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
1290 			win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
1291 			goto err;
1292 
1293 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1294 		win_no++;
1295 		win_pfn0_index = i;
1296 		cookie_no = 0;
1297 	}
1298 	if (pfn > pfn_hi) {
1299 		ret = DDI_DMA_NOMAPPING;
1300 		goto err;
1301 	}
1302 	cookie_no++;
1303 	DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1304 		win_pfn0_index, i - 1, cookie_no);
1305 	if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
1306 		i - 1, win_pp, count_max, bypass_addr))
1307 		goto err;
1308 	win_no++;
1309 	px_dma_adjust(dmareq, mp, mp->dmai_winlst);
1310 	mp->dmai_nwin = win_no;
1311 	mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
1312 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1313 	mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
1314 	cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
1315 	mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1316 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1317 
1318 	px_dma_freepfn(mp);
1319 	return (DDI_DMA_MAPPED);
1320 err:
1321 	px_dma_freewin(mp);
1322 	return (ret);
1323 }
1324 
1325 int
1326 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1327 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1328 	uint_t cache_flags)
1329 {
1330 	switch (cmd) {
1331 	case DDI_DMA_SYNC:
1332 		return (DDI_SUCCESS);
1333 
1334 	case DDI_DMA_HTOC: {
1335 		off_t off = *offp;
1336 		ddi_dma_cookie_t *loop_cp, *cp;
1337 		px_dma_win_t *win_p = mp->dmai_winlst;
1338 
1339 		if (off >= mp->dmai_object.dmao_size)
1340 			return (DDI_FAILURE);
1341 
1342 		/* locate window */
1343 		while (win_p->win_offset + win_p->win_size <= off)
1344 			win_p = win_p->win_next;
1345 
1346 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1347 		mp->dmai_offset = win_p->win_offset;
1348 		mp->dmai_size   = win_p->win_size;
1349 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1350 
1351 		/* adjust cookie addr/len if we are not on cookie boundary */
1352 		off -= win_p->win_offset;	   /* offset within window */
1353 		for (; off >= loop_cp->dmac_size; loop_cp++)
1354 			off -= loop_cp->dmac_size; /* offset within cookie */
1355 
1356 		mp->dmai_cookie = loop_cp + 1;
1357 		win_p->win_curseg = loop_cp - cp;
1358 		cp = (ddi_dma_cookie_t *)objp;
1359 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1360 			loop_cp->dmac_size - off);
1361 
1362 		DBG(DBG_DMA_CTL, dip,
1363 			"HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1364 			cp->dmac_laddress, cp->dmac_size);
1365 		}
1366 		return (DDI_SUCCESS);
1367 
1368 	case DDI_DMA_REPWIN:
1369 		*offp = mp->dmai_offset;
1370 		*lenp = mp->dmai_size;
1371 		return (DDI_SUCCESS);
1372 
1373 	case DDI_DMA_MOVWIN: {
1374 		off_t off = *offp;
1375 		ddi_dma_cookie_t *cp;
1376 		px_dma_win_t *win_p = mp->dmai_winlst;
1377 
1378 		if (off >= mp->dmai_object.dmao_size)
1379 			return (DDI_FAILURE);
1380 
1381 		/* locate window */
1382 		while (win_p->win_offset + win_p->win_size <= off)
1383 			win_p = win_p->win_next;
1384 
1385 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1386 		mp->dmai_offset = win_p->win_offset;
1387 		mp->dmai_size   = win_p->win_size;
1388 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
1389 		mp->dmai_cookie = cp + 1;
1390 		win_p->win_curseg = 0;
1391 
1392 		*(ddi_dma_cookie_t *)objp = *cp;
1393 		*offp = win_p->win_offset;
1394 		*lenp = win_p->win_size;
1395 		DBG(DBG_DMA_CTL, dip,
1396 			"HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1397 			cp->dmac_laddress, cp->dmac_size);
1398 		}
1399 		return (DDI_SUCCESS);
1400 
1401 	case DDI_DMA_NEXTWIN: {
1402 		px_dma_win_t *win_p = *(px_dma_win_t **)offp;
1403 		px_dma_win_t **nw_pp = (px_dma_win_t **)objp;
1404 		ddi_dma_cookie_t *cp;
1405 		if (!win_p) {
1406 			*nw_pp = mp->dmai_winlst;
1407 			return (DDI_SUCCESS);
1408 		}
1409 
1410 		if (win_p->win_offset != mp->dmai_offset)
1411 			return (DDI_DMA_STALE);
1412 		if (!win_p->win_next)
1413 			return (DDI_DMA_DONE);
1414 		win_p = win_p->win_next;
1415 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1416 		mp->dmai_offset = win_p->win_offset;
1417 		mp->dmai_size   = win_p->win_size;
1418 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
1419 		mp->dmai_cookie = cp + 1;
1420 		win_p->win_curseg = 0;
1421 		*nw_pp = win_p;
1422 		}
1423 		return (DDI_SUCCESS);
1424 
1425 	case DDI_DMA_NEXTSEG: {
1426 		px_dma_win_t *w_p = *(px_dma_win_t **)offp;
1427 		if (w_p->win_offset != mp->dmai_offset)
1428 			return (DDI_DMA_STALE);
1429 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
1430 			return (DDI_DMA_DONE);
1431 		w_p->win_curseg++;
1432 		}
1433 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1434 		return (DDI_SUCCESS);
1435 
1436 	case DDI_DMA_SEGTOC: {
1437 		px_dma_win_t *win_p = mp->dmai_winlst;
1438 		off_t off = mp->dmai_offset;
1439 		ddi_dma_cookie_t *cp;
1440 		int i;
1441 
1442 		/* locate active window */
1443 		for (; win_p->win_offset != off; win_p = win_p->win_next);
1444 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1445 		for (i = 0; i < win_p->win_curseg; i++, cp++)
1446 			off += cp->dmac_size;
1447 		*offp = off;
1448 		*lenp = cp->dmac_size;
1449 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
1450 		}
1451 		return (DDI_SUCCESS);
1452 
1453 	case DDI_DMA_COFF: {
1454 		px_dma_win_t *win_p;
1455 		ddi_dma_cookie_t *cp;
1456 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1457 		size_t win_off;
1458 
1459 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1460 			int i;
1461 			win_off = 0;
1462 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1463 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1464 				size_t sz = cp->dmac_size;
1465 
1466 				addr = cp->dmac_laddress;
1467 				if ((addr <= key) && (addr + sz >= key))
1468 					goto found;
1469 				win_off += sz;
1470 			}
1471 		}
1472 		return (DDI_FAILURE);
1473 found:
1474 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1475 		return (DDI_SUCCESS);
1476 		}
1477 	default:
1478 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1479 			cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1480 		break;
1481 	}
1482 	return (DDI_FAILURE);
1483 }
1484 
1485 static void
1486 px_dvma_debug_init(px_mmu_t *mmu_p)
1487 {
1488 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1489 	ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
1490 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
1491 
1492 	mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
1493 	mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
1494 
1495 	mmu_p->dvma_active_list = NULL;
1496 	mmu_p->dvma_alloc_rec_index = 0;
1497 	mmu_p->dvma_free_rec_index = 0;
1498 	mmu_p->dvma_active_count = 0;
1499 }
1500 
1501 void
1502 px_dvma_debug_fini(px_mmu_t *mmu_p)
1503 {
1504 	struct px_dvma_rec *prev, *ptr;
1505 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1506 	uint64_t mask = ~(1ull << mmu_p->mmu_inst);
1507 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
1508 
1509 	if (mmu_p->dvma_alloc_rec) {
1510 		kmem_free(mmu_p->dvma_alloc_rec, sz);
1511 		mmu_p->dvma_alloc_rec = NULL;
1512 	}
1513 	if (mmu_p->dvma_free_rec) {
1514 		kmem_free(mmu_p->dvma_free_rec, sz);
1515 		mmu_p->dvma_free_rec = NULL;
1516 	}
1517 
1518 	prev = mmu_p->dvma_active_list;
1519 	if (!prev)
1520 		return;
1521 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1522 		kmem_free(prev, sizeof (struct px_dvma_rec));
1523 	kmem_free(prev, sizeof (struct px_dvma_rec));
1524 
1525 	mmu_p->dvma_active_list = NULL;
1526 	mmu_p->dvma_alloc_rec_index = 0;
1527 	mmu_p->dvma_free_rec_index = 0;
1528 	mmu_p->dvma_active_count = 0;
1529 
1530 	px_dvma_debug_off &= mask;
1531 	px_dvma_debug_on &= mask;
1532 }
1533 
1534 void
1535 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1536 	ddi_dma_impl_t *mp)
1537 {
1538 	struct px_dvma_rec *ptr;
1539 	mutex_enter(&mmu_p->dvma_debug_lock);
1540 
1541 	if (!mmu_p->dvma_alloc_rec)
1542 		px_dvma_debug_init(mmu_p);
1543 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1544 		px_dvma_debug_fini(mmu_p);
1545 		goto done;
1546 	}
1547 
1548 	ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
1549 	ptr->dvma_addr = address;
1550 	ptr->len = len;
1551 	ptr->mp = mp;
1552 	if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
1553 		mmu_p->dvma_alloc_rec_index = 0;
1554 
1555 	ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
1556 	ptr->dvma_addr = address;
1557 	ptr->len = len;
1558 	ptr->mp = mp;
1559 
1560 	ptr->next = mmu_p->dvma_active_list;
1561 	mmu_p->dvma_active_list = ptr;
1562 	mmu_p->dvma_active_count++;
1563 done:
1564 	mutex_exit(&mmu_p->dvma_debug_lock);
1565 }
1566 
1567 void
1568 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1569     ddi_dma_impl_t *mp)
1570 {
1571 	struct px_dvma_rec *ptr, *ptr_save;
1572 	mutex_enter(&mmu_p->dvma_debug_lock);
1573 
1574 	if (!mmu_p->dvma_alloc_rec)
1575 		px_dvma_debug_init(mmu_p);
1576 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1577 		px_dvma_debug_fini(mmu_p);
1578 		goto done;
1579 	}
1580 
1581 	ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
1582 	ptr->dvma_addr = address;
1583 	ptr->len = len;
1584 	ptr->mp = mp;
1585 	if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
1586 		mmu_p->dvma_free_rec_index = 0;
1587 
1588 	ptr_save = mmu_p->dvma_active_list;
1589 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1590 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1591 			break;
1592 		ptr_save = ptr;
1593 	}
1594 	if (!ptr) {
1595 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1596 			(long)address, len);
1597 		goto done;
1598 	}
1599 	if (ptr == mmu_p->dvma_active_list)
1600 		mmu_p->dvma_active_list = ptr->next;
1601 	else
1602 		ptr_save->next = ptr->next;
1603 	kmem_free(ptr, sizeof (struct px_dvma_rec));
1604 	mmu_p->dvma_active_count--;
1605 done:
1606 	mutex_exit(&mmu_p->dvma_debug_lock);
1607 }
1608 
1609 #ifdef	DEBUG
1610 void
1611 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1612 {
1613 	DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1614 		hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1615 	DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1616 		hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1617 		hp->dmai_nwin);
1618 	DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1619 		hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1620 	DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1621 		hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1622 		hp->dmai_cookie);
1623 }
1624 #endif	/* DEBUG */
1625