xref: /titanic_44/usr/src/uts/sun4/io/px/px_dma.c (revision bf994817a71d4ac680198e25fe79d13c247306e0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PCI Express nexus DVMA and DMA core routines:
28  *	dma_map/dma_bind_handle implementation
29  *	bypass and peer-to-peer support
30  *	fast track DVMA space allocation
31  *	runtime DVMA debug
32  */
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddi_impldefs.h>
39 #include "px_obj.h"
40 
41 /*LINTLIBRARY*/
42 
43 /*
44  * px_dma_allocmp - Allocate a pci dma implementation structure
45  *
46  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
47  * to hold unmodified device limits. The ddi_dma_attr inside the
48  * ddi_dma_impl structure is augumented with system limits to enhance
49  * DVMA performance at runtime. The unaugumented device limits saved
50  * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
51  * strictly for peer-to-peer transfers which do not obey system limits.
52  *
53  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
54  */
55 ddi_dma_impl_t *
56 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
57 	caddr_t arg)
58 {
59 	register ddi_dma_impl_t *mp;
60 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
61 
62 	/* Caution: we don't use zalloc to enhance performance! */
63 	if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
64 		DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
65 		if (waitfp != DDI_DMA_DONTWAIT) {
66 			DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
67 			ddi_set_callback(waitfp, arg, &px_kmem_clid);
68 		}
69 		return (mp);
70 	}
71 
72 	mp->dmai_rdip = rdip;
73 	mp->dmai_flags = 0;
74 	mp->dmai_pfnlst = NULL;
75 	mp->dmai_winlst = NULL;
76 
77 	/*
78 	 * kmem_alloc debug: the following fields are not zero-ed
79 	 * mp->dmai_mapping = 0;
80 	 * mp->dmai_size = 0;
81 	 * mp->dmai_offset = 0;
82 	 * mp->dmai_minxfer = 0;
83 	 * mp->dmai_burstsizes = 0;
84 	 * mp->dmai_ndvmapages = 0;
85 	 * mp->dmai_pool/roffset = 0;
86 	 * mp->dmai_rflags = 0;
87 	 * mp->dmai_inuse/flags
88 	 * mp->dmai_nwin = 0;
89 	 * mp->dmai_winsize = 0;
90 	 * mp->dmai_nexus_private/tte = 0;
91 	 * mp->dmai_iopte/pfnlst
92 	 * mp->dmai_sbi/pfn0 = 0;
93 	 * mp->dmai_minfo/winlst/fdvma
94 	 * mp->dmai_rdip
95 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
96 	 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
97 	 * mp->dmai_cookie = 0;
98 	 */
99 
100 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
101 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
102 	mp->dmai_fault = 0;
103 	mp->dmai_fault_check = NULL;
104 	mp->dmai_fault_notify = NULL;
105 
106 	mp->dmai_error.err_ena = 0;
107 	mp->dmai_error.err_status = DDI_FM_OK;
108 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
109 	mp->dmai_error.err_ontrap = NULL;
110 	mp->dmai_error.err_fep = NULL;
111 	mp->dmai_error.err_cf = NULL;
112 
113 	/*
114 	 * The bdf protection value is set to immediate child
115 	 * at first. It gets modified by switch/bridge drivers
116 	 * as the code traverses down the fabric topology.
117 	 *
118 	 * XXX No IOMMU protection for broken devices.
119 	 */
120 	ASSERT((intptr_t)ddi_get_parent_data(rdip) >> 1 == 0);
121 	mp->dmai_bdf = ((intptr_t)ddi_get_parent_data(rdip) == 1) ?
122 	    PCIE_INVALID_BDF : pcie_get_bdf_for_dma_xfer(dip, rdip);
123 
124 	ndi_fmc_insert(rdip, DMA_HANDLE, mp, NULL);
125 	return (mp);
126 }
127 
128 void
129 px_dma_freemp(ddi_dma_impl_t *mp)
130 {
131 	ndi_fmc_remove(mp->dmai_rdip, DMA_HANDLE, mp);
132 	if (mp->dmai_ndvmapages > 1)
133 		px_dma_freepfn(mp);
134 	if (mp->dmai_winlst)
135 		px_dma_freewin(mp);
136 	kmem_free(mp, sizeof (px_dma_hdl_t));
137 }
138 
139 void
140 px_dma_freepfn(ddi_dma_impl_t *mp)
141 {
142 	void *addr = mp->dmai_pfnlst;
143 	if (addr) {
144 		size_t npages = mp->dmai_ndvmapages;
145 		if (npages > 1)
146 			kmem_free(addr, npages * sizeof (px_iopfn_t));
147 		mp->dmai_pfnlst = NULL;
148 	}
149 	mp->dmai_ndvmapages = 0;
150 }
151 
152 /*
153  * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
154  *			and convert dmareq->dmar_limits to mp->dmai_attr
155  *
156  * ddi_dma_impl_t member modified     input
157  * ------------------------------------------------------------------------
158  * mp->dmai_minxfer		    - dev
159  * mp->dmai_burstsizes		    - dev
160  * mp->dmai_flags		    - no limit? peer-to-peer only?
161  *
162  * ddi_dma_attr member modified       input
163  * ------------------------------------------------------------------------
164  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
165  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
166  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
167  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
168  * mp->dmai_attr.dma_attr_align     - 1         (no alignment restriction)
169  *
170  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
171  */
172 ddi_dma_impl_t *
173 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
174 	ddi_dma_req_t *dmareq)
175 {
176 	ddi_dma_impl_t *mp;
177 	ddi_dma_attr_t *attr_p;
178 	uint64_t syslo		= mmu_p->mmu_dvma_base;
179 	uint64_t syshi		= mmu_p->mmu_dvma_end;
180 	uint64_t fasthi		= mmu_p->mmu_dvma_fast_end;
181 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
182 	uint32_t count_max	= lim_p->dlim_cntr_max;
183 	uint64_t lo		= lim_p->dlim_addr_lo;
184 	uint64_t hi		= lim_p->dlim_addr_hi;
185 	if (hi <= lo) {
186 		DBG(DBG_DMA_MAP, dip, "Bad limits\n");
187 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
188 	}
189 	if (!count_max)
190 		count_max--;
191 
192 	if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
193 	    dmareq->dmar_arg)))
194 		return (NULL);
195 
196 	/* store original dev input at the 2nd ddi_dma_attr */
197 	attr_p = PX_DEV_ATTR(mp);
198 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
199 	SET_DMAALIGN(attr_p, 1);
200 
201 	lo = MAX(lo, syslo);
202 	hi = MIN(hi, syshi);
203 	if (hi <= lo)
204 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
205 	count_max = MIN(count_max, hi - lo);
206 
207 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
208 		mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
209 		    PX_DMAI_FLAGS_NOSYSLIMIT;
210 	else {
211 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
212 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
213 	}
214 	if (PX_DMA_NOCTX(rdip))
215 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
216 
217 	/* store augumented dev input to mp->dmai_attr */
218 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
219 	attr_p = &mp->dmai_attr;
220 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
221 	SET_DMAALIGN(attr_p, 1);
222 	return (mp);
223 }
224 
225 /*
226  * Called from px_attach to check for bypass dma support and set
227  * flags accordingly.
228  */
229 int
230 px_dma_attach(px_t *px_p)
231 {
232 	uint64_t baddr;
233 
234 	if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
235 	    PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
236 	    &baddr) != DDI_ENOTSUP)
237 		/* ignore all other errors */
238 		px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
239 
240 	px_p->px_dma_sync_opt = ddi_prop_get_int(DDI_DEV_T_ANY,
241 	    px_p->px_dip, DDI_PROP_DONTPASS, "dma-sync-options", 0);
242 
243 	if (px_p->px_dma_sync_opt != 0)
244 		px_p->px_dev_caps |= PX_DMA_SYNC_REQUIRED;
245 
246 	return (DDI_SUCCESS);
247 }
248 
249 /*
250  * px_dma_attr2hdl
251  *
252  * This routine is called from the alloc handle entry point to sanity check the
253  * dma attribute structure.
254  *
255  * use by: px_dma_allochdl()
256  *
257  * return value:
258  *
259  *	DDI_SUCCESS		- on success
260  *	DDI_DMA_BADATTR		- attribute has invalid version number
261  *				  or address limits exclude dvma space
262  */
263 int
264 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
265 {
266 	px_mmu_t *mmu_p = px_p->px_mmu_p;
267 	uint64_t syslo, syshi;
268 	int	ret;
269 	ddi_dma_attr_t *attrp		= PX_DEV_ATTR(mp);
270 	uint64_t hi			= attrp->dma_attr_addr_hi;
271 	uint64_t lo			= attrp->dma_attr_addr_lo;
272 	uint64_t align			= attrp->dma_attr_align;
273 	uint64_t nocross		= attrp->dma_attr_seg;
274 	uint64_t count_max		= attrp->dma_attr_count_max;
275 
276 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
277 	    attrp, HI32(count_max), LO32(count_max));
278 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
279 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
280 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
281 	    HI32(nocross), LO32(nocross), HI32(align), LO32(align));
282 
283 	if (!nocross)
284 		nocross--;
285 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
286 
287 		DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
288 		/*
289 		 * If Bypass DMA is not supported, return error so that
290 		 * target driver can fall back to dvma mode of operation
291 		 */
292 		if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
293 			return (DDI_DMA_BADATTR);
294 		mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
295 		if (nocross != UINT64_MAX)
296 			return (DDI_DMA_BADATTR);
297 		if (align && (align > MMU_PAGE_SIZE))
298 			return (DDI_DMA_BADATTR);
299 		align = 1; /* align on 1 page boundary */
300 
301 		/* do a range check and get the limits */
302 		ret = px_lib_dma_bypass_rngchk(px_p->px_dip, attrp,
303 		    &syslo, &syshi);
304 		if (ret != DDI_SUCCESS)
305 			return (ret);
306 	} else { /* MMU_XLATE or PEER_TO_PEER */
307 		align = MAX(align, MMU_PAGE_SIZE) - 1;
308 		if ((align & nocross) != align) {
309 			dev_info_t *rdip = mp->dmai_rdip;
310 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
311 			    NAMEINST(rdip));
312 			return (DDI_DMA_BADATTR);
313 		}
314 		align = MMU_BTOP(align + 1);
315 		syslo = mmu_p->mmu_dvma_base;
316 		syshi = mmu_p->mmu_dvma_end;
317 	}
318 	if (hi <= lo) {
319 		dev_info_t *rdip = mp->dmai_rdip;
320 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
321 		return (DDI_DMA_BADATTR);
322 	}
323 	lo = MAX(lo, syslo);
324 	hi = MIN(hi, syshi);
325 	if (!count_max)
326 		count_max--;
327 
328 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
329 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
330 	if (hi <= lo) {
331 		/*
332 		 * If this is an IOMMU bypass access, the caller can't use
333 		 * the required addresses, so fail it.  Otherwise, it's
334 		 * peer-to-peer; ensure that the caller has no alignment or
335 		 * segment size restrictions.
336 		 */
337 		if ((mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ||
338 		    (nocross < UINT32_MAX) || (align > 1))
339 			return (DDI_DMA_BADATTR);
340 
341 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
342 	} else /* set practical counter_max value */
343 		count_max = MIN(count_max, hi - lo);
344 
345 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
346 		mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
347 		    PX_DMAI_FLAGS_NOFASTLIMIT;
348 	else {
349 		syshi = mmu_p->mmu_dvma_fast_end;
350 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
351 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
352 	}
353 	if (PX_DMA_NOCTX(mp->dmai_rdip))
354 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
355 
356 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
357 	attrp = &mp->dmai_attr;
358 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
359 	return (DDI_SUCCESS);
360 }
361 
362 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
363 
364 /*
365  * px_dma_type - determine which of the three types DMA (peer-to-peer,
366  *		mmu bypass, or mmu translate) we are asked to do.
367  *		Also checks pfn0 and rejects any non-peer-to-peer
368  *		requests for peer-only devices.
369  *
370  *	return values:
371  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
372  *		DDI_SUCCESS
373  *
374  *	dma handle members affected (set on exit):
375  *	mp->dmai_object		- dmareq->dmar_object
376  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
377  *	mp->dmai_flags   	- DMA type
378  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
379  *	mp->dmai_roffset 	- initialized to starting MMU page offset
380  *	mp->dmai_ndvmapages	- # of total MMU pages of entire object
381  */
382 int
383 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
384 {
385 	dev_info_t *dip = px_p->px_dip;
386 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
387 	px_pec_t *pec_p = px_p->px_pec_p;
388 	uint32_t offset;
389 	pfn_t pfn0;
390 	uint_t redzone;
391 
392 	mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
393 
394 	if (!(px_p->px_dev_caps & PX_DMA_SYNC_REQUIRED))
395 		mp->dmai_rflags |= DMP_NOSYNC;
396 
397 	switch (dobj_p->dmao_type) {
398 	case DMA_OTYP_BUFVADDR:
399 	case DMA_OTYP_VADDR: {
400 		page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
401 		caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
402 
403 		DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
404 		offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
405 		if (pplist) {				/* shadow list */
406 			mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
407 			pfn0 = page_pptonum(*pplist);
408 		} else {
409 			struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
410 			struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
411 			pfn0 = hat_getpfnum(hat_p, vaddr);
412 		}
413 		}
414 		break;
415 
416 	case DMA_OTYP_PAGES:
417 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
418 		mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
419 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
420 		break;
421 
422 	case DMA_OTYP_PADDR:
423 	default:
424 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
425 		    NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
426 		return (DDI_DMA_NOMAPPING);
427 	}
428 	if (pfn0 == PFN_INVALID) {
429 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
430 		    NAMEINST(dip), dobj_p);
431 		return (DDI_DMA_NOMAPPING);
432 	}
433 	if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
434 	    pec_p->pec_last32_pfn)) {
435 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
436 		goto done;	/* leave bypass and dvma flag as 0 */
437 	} else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
438 	    pec_p->pec_last64_pfn)) {
439 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
440 		goto done;	/* leave bypass and dvma flag as 0 */
441 	}
442 	if (PX_DMA_ISPEERONLY(mp)) {
443 		dev_info_t *rdip = mp->dmai_rdip;
444 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
445 		return (DDI_DMA_NOMAPPING);
446 	}
447 
448 	redzone = (mp->dmai_rflags & DDI_DMA_REDZONE) ||
449 	    (mp->dmai_flags & PX_DMAI_FLAGS_MAP_BUFZONE) ?
450 	    PX_DMAI_FLAGS_REDZONE : 0;
451 
452 	mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
453 	    PX_DMAI_FLAGS_BYPASS : (PX_DMAI_FLAGS_DVMA | redzone);
454 done:
455 	mp->dmai_object	 = *dobj_p;			/* whole object    */
456 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
457 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
458 	mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
459 	return (DDI_SUCCESS);
460 }
461 
462 /*
463  * px_dma_pgpfn - set up pfnlst array according to pages
464  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
465  */
466 /*ARGSUSED*/
467 static int
468 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
469 {
470 	int i;
471 	dev_info_t *dip = px_p->px_dip;
472 
473 	switch (mp->dmai_object.dmao_type) {
474 	case DMA_OTYP_BUFVADDR:
475 	case DMA_OTYP_VADDR: {
476 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
477 		DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
478 		    pplist, npages);
479 		for (i = 1; i < npages; i++) {
480 			px_iopfn_t pfn = page_pptonum(pplist[i]);
481 			PX_SET_MP_PFN1(mp, i, pfn);
482 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
483 		}
484 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
485 		}
486 		break;
487 
488 	case DMA_OTYP_PAGES: {
489 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
490 		DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
491 		for (i = 1; i < npages; i++, pp = pp->p_next) {
492 			px_iopfn_t pfn = page_pptonum(pp);
493 			PX_SET_MP_PFN1(mp, i, pfn);
494 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
495 		}
496 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
497 		}
498 		break;
499 
500 	default:	/* check is already done by px_dma_type */
501 		ASSERT(0);
502 		break;
503 	}
504 	return (DDI_SUCCESS);
505 }
506 
507 /*
508  * px_dma_vapfn - set up pfnlst array according to VA
509  *	VA/size pair: <normal, bypass, peer-to-peer>
510  *	pfn0 is skipped as it is already done.
511  *	In this case, the cached pfn0 is used to fill pfnlst[0]
512  */
513 static int
514 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
515 {
516 	dev_info_t *dip = px_p->px_dip;
517 	int i;
518 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
519 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
520 
521 	vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
522 	for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
523 		px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
524 		if (pfn == PFN_INVALID)
525 			goto err_badpfn;
526 		PX_SET_MP_PFN1(mp, i, pfn);
527 		DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
528 		    mp, i, pfn);
529 	}
530 	return (DDI_SUCCESS);
531 err_badpfn:
532 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
533 	return (DDI_DMA_NOMAPPING);
534 }
535 
536 /*
537  * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
538  *
539  * dependencies:
540  *	mp->dmai_ndvmapages	- set to total # of dma pages
541  *
542  * return value:
543  *	DDI_SUCCESS
544  *	DDI_DMA_NOMAPPING
545  */
546 int
547 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
548 {
549 	uint32_t npages = mp->dmai_ndvmapages;
550 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
551 	int i, ret, peer = PX_DMA_ISPTP(mp);
552 	int peer32 = PX_DMA_ISPTP32(mp);
553 	dev_info_t *dip = px_p->px_dip;
554 
555 	px_pec_t *pec_p = px_p->px_pec_p;
556 	px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
557 	    pec_p->pec_base64_pfn;
558 	px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
559 	    pec_p->pec_last64_pfn;
560 	px_iopfn_t pfn_adj = peer ? pfn_base : 0;
561 
562 	DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
563 	    mp, PX_MP_PFN0(mp) - pfn_adj);
564 	/* 1 page: no array alloc/fill, no mixed mode check */
565 	if (npages == 1) {
566 		PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
567 		return (DDI_SUCCESS);
568 	}
569 	/* allocate pfn array */
570 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
571 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
572 		if (waitfp != DDI_DMA_DONTWAIT)
573 			ddi_set_callback(waitfp, dmareq->dmar_arg,
574 			    &px_kmem_clid);
575 		return (DDI_DMA_NORESOURCES);
576 	}
577 	/* fill pfn array */
578 	PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
579 	if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
580 	    px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
581 		goto err;
582 
583 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
584 	for (i = 1; i < npages; i++) {
585 		px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
586 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
587 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
588 			    NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
589 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
590 			goto err;
591 		}
592 		DBG(DBG_DMA_MAP, dip,
593 		    "px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
594 		if (pfn_adj)
595 			PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
596 	}
597 	return (DDI_SUCCESS);
598 err:
599 	px_dma_freepfn(mp);
600 	return (ret);
601 }
602 
603 /*
604  * px_dvma_win() - trim requested DVMA size down to window size
605  *	The 1st window starts from offset and ends at page-aligned boundary.
606  *	From the 2nd window on, each window starts and ends at page-aligned
607  *	boundary except the last window ends at wherever requested.
608  *
609  *	accesses the following mp-> members:
610  *	mp->dmai_attr.dma_attr_count_max
611  *	mp->dmai_attr.dma_attr_seg
612  *	mp->dmai_roffset   - start offset of 1st window
613  *	mp->dmai_rflags (redzone)
614  *	mp->dmai_ndvmapages (for 1 page fast path)
615  *
616  *	sets the following mp-> members:
617  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
618  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
619  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
620  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
621  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
622  *
623  *	fixed - not changed across different DMA windows
624  */
625 /*ARGSUSED*/
626 int
627 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
628 {
629 	uint32_t redzone_sz	= PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
630 	size_t obj_sz		= mp->dmai_object.dmao_size;
631 	size_t xfer_sz;
632 	ulong_t pg_off;
633 
634 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
635 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
636 		mp->dmai_size = obj_sz;
637 		mp->dmai_winsize = MMU_PAGE_SIZE;
638 		mp->dmai_nwin = 1;
639 		goto done;
640 	}
641 
642 	pg_off	= mp->dmai_roffset;
643 	xfer_sz	= obj_sz + redzone_sz;
644 
645 	/* include redzone in nocross check */	{
646 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
647 		if (xfer_sz + pg_off - 1 > nocross)
648 			xfer_sz = nocross - pg_off + 1;
649 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
650 			DBG(DBG_DMA_MAP, px_p->px_dip,
651 			    "nocross too small: "
652 			    "%lx(%lx)+%lx+%lx < %llx\n",
653 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
654 			return (DDI_DMA_TOOBIG);
655 		}
656 	}
657 	xfer_sz -= redzone_sz;		/* restore transfer size  */
658 	/* check counter max */	{
659 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
660 		if (xfer_sz - 1 > count_max)
661 			xfer_sz = count_max + 1;
662 	}
663 	if (xfer_sz >= obj_sz) {
664 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
665 		mp->dmai_size = xfer_sz;
666 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
667 		mp->dmai_nwin = 1;
668 		goto done;
669 	}
670 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
671 		DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
672 		    obj_sz, pg_off, redzone_sz, xfer_sz);
673 		return (DDI_DMA_TOOBIG);
674 	}
675 
676 	xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
677 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
678 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
679 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
680 done:
681 	mp->dmai_winlst = NULL;
682 	px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
683 	return (DDI_SUCCESS);
684 }
685 
686 /*
687  * fast track cache entry to mmu context, inserts 3 0 bits between
688  * upper 6-bits and lower 3-bits of the 9-bit cache entry
689  */
690 #define	MMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
691 
692 /*
693  * px_dvma_map_fast - attempts to map fast trackable DVMA
694  */
695 /*ARGSUSED*/
696 int
697 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
698 {
699 	uint_t clustsz = px_dvma_page_cache_clustsz;
700 	uint_t entries = px_dvma_page_cache_entries;
701 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
702 	    mp->dmai_attr.dma_attr_flags);
703 	int i = mmu_p->mmu_dvma_addr_scan_start;
704 	uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
705 	px_dvma_addr_t dvma_pg;
706 	size_t npages = MMU_BTOP(mp->dmai_winsize);
707 	dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
708 
709 	extern uint8_t ldstub(uint8_t *);
710 	ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
711 	ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
712 
713 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
714 		;
715 	if (i >= entries) {
716 		lock_addr = mmu_p->mmu_dvma_cache_locks;
717 		i = 0;
718 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
719 			;
720 		if (i >= entries) {
721 #ifdef	PX_DMA_PROF
722 			px_dvmaft_exhaust++;
723 #endif	/* PX_DMA_PROF */
724 			return (DDI_DMA_NORESOURCES);
725 		}
726 	}
727 	mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
728 
729 	i *= clustsz;
730 	dvma_pg = mmu_p->dvma_base_pg + i;
731 
732 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages,
733 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, 0,
734 	    MMU_MAP_PFN) != DDI_SUCCESS) {
735 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
736 		    "px_lib_iommu_map failed\n");
737 		return (DDI_FAILURE);
738 	}
739 
740 	if (!PX_MAP_BUFZONE(mp))
741 		goto done;
742 
743 	DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
744 
745 	ASSERT(PX_HAS_REDZONE(mp));
746 
747 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1,
748 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, npages - 1,
749 	    MMU_MAP_PFN) != DDI_SUCCESS) {
750 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
751 		    "mapping REDZONE page failed\n");
752 
753 		(void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
754 		return (DDI_FAILURE);
755 	}
756 
757 done:
758 #ifdef PX_DMA_PROF
759 	px_dvmaft_success++;
760 #endif
761 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
762 	mp->dmai_offset = 0;
763 	mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
764 	PX_SAVE_MP_TTE(mp, attr);	/* save TTE template for unmapping */
765 	if (PX_DVMA_DBG_ON(mmu_p))
766 		px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
767 		    mp->dmai_size, mp);
768 	return (DDI_SUCCESS);
769 }
770 
771 /*
772  * px_dvma_map: map non-fasttrack DMA
773  *		Use quantum cache if single page DMA.
774  */
775 int
776 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
777 {
778 	uint_t npages = PX_DMA_WINNPGS(mp);
779 	px_dvma_addr_t dvma_pg, dvma_pg_index;
780 	void *dvma_addr;
781 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
782 	    mp->dmai_attr.dma_attr_flags);
783 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
784 	dev_info_t *dip = mp->dmai_rdip;
785 	int	ret = DDI_SUCCESS;
786 
787 	/*
788 	 * allocate dvma space resource and map in the first window.
789 	 * (vmem_t *vmp, size_t size,
790 	 *	size_t align, size_t phase, size_t nocross,
791 	 *	void *minaddr, void *maxaddr, int vmflag)
792 	 */
793 	if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
794 		dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
795 		    MMU_PAGE_SIZE, sleep);
796 		mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
797 #ifdef	PX_DMA_PROF
798 		px_dvma_vmem_alloc++;
799 #endif	/* PX_DMA_PROF */
800 	} else {
801 		dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
802 		    MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
803 		    MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
804 		    0,
805 		    mp->dmai_attr.dma_attr_seg + 1,
806 		    (void *)mp->dmai_attr.dma_attr_addr_lo,
807 		    (void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
808 		    sleep);
809 #ifdef	PX_DMA_PROF
810 		px_dvma_vmem_xalloc++;
811 #endif	/* PX_DMA_PROF */
812 	}
813 	dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
814 	dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
815 	DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
816 	    dvma_pg, dvma_pg_index);
817 	if (dvma_pg == 0)
818 		goto noresource;
819 
820 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
821 	mp->dmai_offset = 0;
822 	PX_SAVE_MP_TTE(mp, attr);	/* mp->dmai_tte = tte */
823 
824 	if ((ret = px_mmu_map_pages(mmu_p,
825 	    mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
826 		if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
827 			vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
828 			    MMU_PAGE_SIZE);
829 #ifdef PX_DMA_PROF
830 			px_dvma_vmem_free++;
831 #endif /* PX_DMA_PROF */
832 		} else {
833 			vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
834 			    MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
835 #ifdef PX_DMA_PROF
836 			px_dvma_vmem_xfree++;
837 #endif /* PX_DMA_PROF */
838 		}
839 	}
840 
841 	return (ret);
842 noresource:
843 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
844 		DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
845 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
846 		    &mmu_p->mmu_dvma_clid);
847 	}
848 	DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
849 	return (DDI_DMA_NORESOURCES);
850 }
851 
852 void
853 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
854 {
855 	px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
856 	px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
857 	dvma_addr = MMU_PTOB(dvma_pg);
858 
859 	if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
860 		px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
861 		ASSERT(index % px_dvma_page_cache_clustsz == 0);
862 		index /= px_dvma_page_cache_clustsz;
863 		ASSERT(index < px_dvma_page_cache_entries);
864 		mmu_p->mmu_dvma_cache_locks[index] = 0;
865 #ifdef	PX_DMA_PROF
866 		px_dvmaft_free++;
867 #endif	/* PX_DMA_PROF */
868 		return;
869 	}
870 
871 	if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
872 		vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
873 		    MMU_PAGE_SIZE);
874 #ifdef PX_DMA_PROF
875 		px_dvma_vmem_free++;
876 #endif /* PX_DMA_PROF */
877 	} else {
878 		size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
879 		vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
880 		    MMU_PTOB(npages));
881 #ifdef PX_DMA_PROF
882 		px_dvma_vmem_xfree++;
883 #endif /* PX_DMA_PROF */
884 	}
885 }
886 
887 /*
888  * DVMA mappings may have multiple windows, but each window always have
889  * one segment.
890  */
891 int
892 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
893 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
894 	uint_t cache_flags)
895 {
896 	switch (cmd) {
897 	case DDI_DMA_SYNC:
898 		return (px_lib_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
899 		    *offp, *lenp, cache_flags));
900 
901 	case DDI_DMA_HTOC: {
902 		int ret;
903 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
904 		uint_t win_size = mp->dmai_winsize;
905 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
906 
907 		if (off >= mp->dmai_object.dmao_size) {
908 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
909 			    NAMEINST(mp->dmai_rdip), off);
910 			return (DDI_FAILURE);
911 		}
912 		off += mp->dmai_roffset;
913 		ret = px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
914 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
915 		if (ret)
916 			return (ret);
917 		DBG(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
918 		    cp->dmac_address, cp->dmac_size, off, *offp);
919 
920 		/* adjust cookie addr/len if we are not on window boundary */
921 		ASSERT((off % win_size) == (off -
922 		    (PX_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
923 		off = PX_DMA_CURWIN(mp) ? off % win_size : *offp;
924 		ASSERT(cp->dmac_size > off);
925 		cp->dmac_laddress += off;
926 		cp->dmac_size -= off;
927 		DBG(DBG_DMA_CTL, dip, "HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
928 		    mp, cp->dmac_address, cp->dmac_size, off, wo_off);
929 		}
930 		return (DDI_SUCCESS);
931 
932 	case DDI_DMA_REPWIN:
933 		*offp = mp->dmai_offset;
934 		*lenp = mp->dmai_size;
935 		return (DDI_SUCCESS);
936 
937 	case DDI_DMA_MOVWIN: {
938 		off_t off = *offp;
939 		if (off >= mp->dmai_object.dmao_size)
940 			return (DDI_FAILURE);
941 		off += mp->dmai_roffset;
942 		return (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
943 		    off / mp->dmai_winsize, offp, lenp,
944 		    (ddi_dma_cookie_t *)objp, NULL));
945 		}
946 
947 	case DDI_DMA_NEXTWIN: {
948 		px_window_t win = PX_DMA_CURWIN(mp);
949 		if (offp) {
950 			if (*(px_window_t *)offp != win) {
951 				/* window not active */
952 				*(px_window_t *)objp = win; /* return cur win */
953 				return (DDI_DMA_STALE);
954 			}
955 			win++;
956 		} else	/* map win 0 */
957 			win = 0;
958 		if (win >= mp->dmai_nwin) {
959 			*(px_window_t *)objp = win - 1;
960 			return (DDI_DMA_DONE);
961 		}
962 		if (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
963 		    win, 0, 0, 0, 0)) {
964 			*(px_window_t *)objp = win - 1;
965 			return (DDI_FAILURE);
966 		}
967 		*(px_window_t *)objp = win;
968 		}
969 		return (DDI_SUCCESS);
970 
971 	case DDI_DMA_NEXTSEG:
972 		if (*(px_window_t *)offp != PX_DMA_CURWIN(mp))
973 			return (DDI_DMA_STALE);
974 		if (lenp)				/* only 1 seg allowed */
975 			return (DDI_DMA_DONE);
976 
977 		/* return mp as seg 0 */
978 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
979 		return (DDI_SUCCESS);
980 
981 	case DDI_DMA_SEGTOC:
982 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
983 		    mp->dmai_size);
984 		*offp = mp->dmai_offset;
985 		*lenp = mp->dmai_size;
986 		return (DDI_SUCCESS);
987 
988 	case DDI_DMA_COFF: {
989 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
990 		if (cp->dmac_address < mp->dmai_mapping ||
991 		    (cp->dmac_address + cp->dmac_size) >
992 		    (mp->dmai_mapping + mp->dmai_size))
993 			return (DDI_FAILURE);
994 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
995 		    mp->dmai_offset);
996 		}
997 		return (DDI_SUCCESS);
998 	default:
999 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1000 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1001 		break;
1002 	}
1003 	return (DDI_FAILURE);
1004 }
1005 
1006 void
1007 px_dma_freewin(ddi_dma_impl_t *mp)
1008 {
1009 	px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
1010 	for (win2_p = win_p; win_p; win2_p = win_p) {
1011 		win_p = win2_p->win_next;
1012 		kmem_free(win2_p, sizeof (px_dma_win_t) +
1013 		    sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
1014 	}
1015 	mp->dmai_nwin = 0;
1016 	mp->dmai_winlst = NULL;
1017 }
1018 
1019 /*
1020  * px_dma_newwin - create a dma window object and cookies
1021  *
1022  *	After the initial scan in px_dma_physwin(), which identifies
1023  *	a portion of the pfn array that belongs to a dma window,
1024  *	we are called to allocate and initialize representing memory
1025  *	resources. We know from the 1st scan the number of cookies
1026  *	or dma segment in this window so we can allocate a contiguous
1027  *	memory array for the dma cookies (The implementation of
1028  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
1029  *
1030  *	A second round scan is done on the pfn array to identify
1031  *	each dma segment and initialize its corresponding dma cookie.
1032  *	We don't need to do all the safety checking and we know they
1033  *	all belong to the same dma window.
1034  *
1035  *	Input:	cookie_no - # of cookies identified by the 1st scan
1036  *		start_idx - subscript of the pfn array for the starting pfn
1037  *		end_idx   - subscript of the last pfn in dma window
1038  *		win_pp    - pointer to win_next member of previous window
1039  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
1040  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
1041  *	Note:	Each cookie and window size are all initialized on page
1042  *		boundary. This is not true for the 1st cookie of the 1st
1043  *		window and the last cookie of the last window.
1044  *		We fix that later in upper layer which has access to size
1045  *		and offset info.
1046  *
1047  */
1048 /*ARGSUSED*/
1049 static int
1050 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
1051 	uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
1052 	px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
1053 {
1054 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
1055 	ddi_dma_cookie_t *cookie_p;
1056 	uint32_t pfn_no = 1;
1057 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
1058 	px_iopfn_t prev_pfn = pfn;
1059 	uint64_t baddr, seg_pfn0 = pfn;
1060 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
1061 	px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
1062 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
1063 	io_attributes_t	attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1064 	    mp->dmai_attr.dma_attr_flags);
1065 
1066 	if (!win_p)
1067 		goto noresource;
1068 
1069 	win_p->win_next = NULL;
1070 	win_p->win_ncookies = cookie_no;
1071 	win_p->win_curseg = 0;	/* start from segment 0 */
1072 	win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
1073 	/* win_p->win_offset is left uninitialized */
1074 
1075 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1076 	start_idx++;
1077 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
1078 		pfn = PX_GET_MP_PFN1(mp, start_idx);
1079 		if ((pfn == prev_pfn + 1) &&
1080 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1081 			continue;
1082 
1083 		/* close up the cookie up to (including) prev_pfn */
1084 		baddr = MMU_PTOB(seg_pfn0);
1085 		if (bypass) {
1086 			if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
1087 			    == DDI_SUCCESS)
1088 				baddr = px_lib_ro_bypass(dip, attr, baddr);
1089 			else
1090 				return (DDI_FAILURE);
1091 		}
1092 
1093 		MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1094 		DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
1095 		    MMU_PTOB(seg_pfn0), pfn_no);
1096 
1097 		cookie_p++;	/* advance to next available cookie cell */
1098 		pfn_no = 0;
1099 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
1100 	}
1101 
1102 	baddr = MMU_PTOB(seg_pfn0);
1103 	if (bypass) {
1104 		if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
1105 		    == DDI_SUCCESS)
1106 			baddr = px_lib_ro_bypass(dip, attr, baddr);
1107 		else
1108 			return (DDI_FAILURE);
1109 	}
1110 
1111 	MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1112 	DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1113 	    MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
1114 #ifdef	DEBUG
1115 	cookie_p++;
1116 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1117 #endif	/* DEBUG */
1118 	*win_pp = win_p;
1119 	return (DDI_SUCCESS);
1120 noresource:
1121 	if (waitfp != DDI_DMA_DONTWAIT)
1122 		ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
1123 	return (DDI_DMA_NORESOURCES);
1124 }
1125 
1126 /*
1127  * px_dma_adjust - adjust 1st and last cookie and window sizes
1128  *	remove initial dma page offset from 1st cookie and window size
1129  *	remove last dma page remainder from last cookie and window size
1130  *	fill win_offset of each dma window according to just fixed up
1131  *		each window sizes
1132  *	px_dma_win_t members modified:
1133  *	win_p->win_offset - this window's offset within entire DMA object
1134  *	win_p->win_size	  - xferrable size (in bytes) for this window
1135  *
1136  *	ddi_dma_impl_t members modified:
1137  *	mp->dmai_size	  - 1st window xferrable size
1138  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
1139  *
1140  *	ddi_dma_cookie_t members modified:
1141  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1142  *	cookie_p->dmac_laddress - 1st cookie add page offset
1143  */
1144 static void
1145 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
1146 {
1147 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1148 	size_t pg_offset = mp->dmai_roffset;
1149 	size_t win_offset = 0;
1150 
1151 	cookie_p->dmac_size -= pg_offset;
1152 	cookie_p->dmac_laddress |= pg_offset;
1153 	win_p->win_size -= pg_offset;
1154 	DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1155 
1156 	mp->dmai_size = win_p->win_size;
1157 	mp->dmai_offset = 0;
1158 
1159 	pg_offset += mp->dmai_object.dmao_size;
1160 	pg_offset &= MMU_PAGE_OFFSET;
1161 	if (pg_offset)
1162 		pg_offset = MMU_PAGE_SIZE - pg_offset;
1163 	DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1164 
1165 	for (; win_p->win_next; win_p = win_p->win_next) {
1166 		DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1167 		win_p->win_offset = win_offset;
1168 		win_offset += win_p->win_size;
1169 	}
1170 	/* last window */
1171 	win_p->win_offset = win_offset;
1172 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1173 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1174 	win_p->win_size -= pg_offset;
1175 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1176 }
1177 
1178 /*
1179  * px_dma_physwin() - carve up dma windows using physical addresses.
1180  *	Called to handle mmu bypass and pci peer-to-peer transfers.
1181  *	Calls px_dma_newwin() to allocate window objects.
1182  *
1183  * Dependency: mp->dmai_pfnlst points to an array of pfns
1184  *
1185  * 1. Each dma window is represented by a px_dma_win_t object.
1186  *	The object will be casted to ddi_dma_win_t and returned
1187  *	to leaf driver through the DDI interface.
1188  * 2. Each dma window can have several dma segments with each
1189  *	segment representing a physically contiguous either memory
1190  *	space (if we are doing an mmu bypass transfer) or pci address
1191  *	space (if we are doing a peer-to-peer transfer).
1192  * 3. Each segment has a DMA cookie to program the DMA engine.
1193  *	The cookies within each DMA window must be located in a
1194  *	contiguous array per ddi_dma_nextcookie(9f).
1195  * 4. The number of DMA segments within each DMA window cannot exceed
1196  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1197  *	too large to fit in the sgllen, the rest needs to be
1198  *	relocated to the next dma window.
1199  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1200  *	and nocross restrictions while bypass DMA follows the set of
1201  *	restrictions with system limits factored in.
1202  *
1203  * Return:
1204  *	mp->dmai_winlst	 - points to a link list of px_dma_win_t objects.
1205  *		Each px_dma_win_t object on the link list contains
1206  *		infomation such as its window size (# of pages),
1207  *		starting offset (also see Restriction), an array of
1208  *		DMA cookies, and # of cookies in the array.
1209  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
1210  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
1211  *	mp->dmai_mapping - starting cookie address
1212  *	mp->dmai_rflags	 - consistent, nosync, no redzone
1213  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
1214  *
1215  * Restriction:
1216  *	Each px_dma_win_t object can theoratically start from any offset
1217  *	since the mmu is not involved. However, this implementation
1218  *	always make windows start from page aligned offset (except
1219  *	the 1st window, which follows the requested offset) due to the
1220  *	fact that we are handed a pfn list. This does require device's
1221  *	count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
1222  */
1223 int
1224 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1225 {
1226 	uint_t npages = mp->dmai_ndvmapages;
1227 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1228 	px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
1229 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
1230 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1231 	uint64_t count_max, bypass_addr = 0;
1232 	px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
1233 	ddi_dma_cookie_t *cookie0_p;
1234 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1235 	    mp->dmai_attr.dma_attr_flags);
1236 	dev_info_t *dip = px_p->px_dip;
1237 
1238 	ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
1239 	if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1240 		ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
1241 		uint64_t nocross = dev_attr_p->dma_attr_seg;
1242 		px_pec_t *pec_p = px_p->px_pec_p;
1243 		px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
1244 		    pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
1245 		    pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
1246 
1247 		if (nocross && (nocross < UINT32_MAX))
1248 			return (DDI_DMA_NOMAPPING);
1249 		if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
1250 			return (DDI_DMA_NOMAPPING);
1251 		pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1252 		pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1253 		pfn_hi = MIN(pfn_hi, pfn_last);
1254 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1255 			return (DDI_DMA_NOMAPPING);
1256 
1257 		count_max = dev_attr_p->dma_attr_count_max;
1258 		count_max = MIN(count_max, nocross);
1259 		/*
1260 		 * the following count_max trim is not done because we are
1261 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1262 		 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1263 		 */
1264 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1265 		count_max = mp->dmai_attr.dma_attr_count_max;
1266 		pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1267 		pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1268 
1269 		if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
1270 		    attr, &bypass_addr) != DDI_SUCCESS) {
1271 			DBG(DBG_BYPASS, mp->dmai_rdip,
1272 			    "bypass cookie failure %lx\n", pfn);
1273 			return (DDI_DMA_NOMAPPING);
1274 		}
1275 		pfn = MMU_BTOP(bypass_addr);
1276 	}
1277 
1278 	/* pfn: absolute (bypass mode) or relative (p2p mode) */
1279 	for (prev_pfn = pfn, i = 1; i < npages;
1280 	    i++, prev_pfn = pfn, pfn_no++) {
1281 		pfn = PX_GET_MP_PFN1(mp, i);
1282 		if (bypass_addr) {
1283 			if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
1284 			    &bypass_addr) != DDI_SUCCESS) {
1285 				ret = DDI_DMA_NOMAPPING;
1286 				goto err;
1287 			}
1288 			pfn = MMU_BTOP(bypass_addr);
1289 		}
1290 		if ((pfn == prev_pfn + 1) &&
1291 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1292 			continue;
1293 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1294 			ret = DDI_DMA_NOMAPPING;
1295 			goto err;
1296 		}
1297 		cookie_no++;
1298 		pfn_no = 0;
1299 		if (cookie_no < sgllen)
1300 			continue;
1301 
1302 		DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1303 		    win_pfn0_index, i - 1, cookie_no);
1304 		if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
1305 		    win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
1306 			goto err;
1307 
1308 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
1309 		win_no++;
1310 		win_pfn0_index = i;
1311 		cookie_no = 0;
1312 	}
1313 	if (pfn > pfn_hi) {
1314 		ret = DDI_DMA_NOMAPPING;
1315 		goto err;
1316 	}
1317 	cookie_no++;
1318 	DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1319 	    win_pfn0_index, i - 1, cookie_no);
1320 	if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
1321 	    i - 1, win_pp, count_max, bypass_addr))
1322 		goto err;
1323 	win_no++;
1324 	px_dma_adjust(dmareq, mp, mp->dmai_winlst);
1325 	mp->dmai_nwin = win_no;
1326 	mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
1327 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1328 	mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
1329 	cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
1330 	mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1331 	mp->dmai_mapping = cookie0_p->dmac_laddress;
1332 
1333 	px_dma_freepfn(mp);
1334 	return (DDI_DMA_MAPPED);
1335 err:
1336 	px_dma_freewin(mp);
1337 	return (ret);
1338 }
1339 
1340 int
1341 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1342 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1343 	uint_t cache_flags)
1344 {
1345 	switch (cmd) {
1346 	case DDI_DMA_SYNC:
1347 		return (DDI_SUCCESS);
1348 
1349 	case DDI_DMA_HTOC: {
1350 		off_t off = *offp;
1351 		ddi_dma_cookie_t *loop_cp, *cp;
1352 		px_dma_win_t *win_p = mp->dmai_winlst;
1353 
1354 		if (off >= mp->dmai_object.dmao_size)
1355 			return (DDI_FAILURE);
1356 
1357 		/* locate window */
1358 		while (win_p->win_offset + win_p->win_size <= off)
1359 			win_p = win_p->win_next;
1360 
1361 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
1362 		mp->dmai_offset = win_p->win_offset;
1363 		mp->dmai_size   = win_p->win_size;
1364 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
1365 
1366 		/* adjust cookie addr/len if we are not on cookie boundary */
1367 		off -= win_p->win_offset;	   /* offset within window */
1368 		for (; off >= loop_cp->dmac_size; loop_cp++)
1369 			off -= loop_cp->dmac_size; /* offset within cookie */
1370 
1371 		mp->dmai_cookie = loop_cp + 1;
1372 		win_p->win_curseg = loop_cp - cp;
1373 		cp = (ddi_dma_cookie_t *)objp;
1374 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
1375 		    loop_cp->dmac_size - off);
1376 
1377 		DBG(DBG_DMA_CTL, dip,
1378 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1379 		    cp->dmac_laddress, cp->dmac_size);
1380 		}
1381 		return (DDI_SUCCESS);
1382 
1383 	case DDI_DMA_REPWIN:
1384 		*offp = mp->dmai_offset;
1385 		*lenp = mp->dmai_size;
1386 		return (DDI_SUCCESS);
1387 
1388 	case DDI_DMA_MOVWIN: {
1389 		off_t off = *offp;
1390 		ddi_dma_cookie_t *cp;
1391 		px_dma_win_t *win_p = mp->dmai_winlst;
1392 
1393 		if (off >= mp->dmai_object.dmao_size)
1394 			return (DDI_FAILURE);
1395 
1396 		/* locate window */
1397 		while (win_p->win_offset + win_p->win_size <= off)
1398 			win_p = win_p->win_next;
1399 
1400 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1401 		mp->dmai_offset = win_p->win_offset;
1402 		mp->dmai_size   = win_p->win_size;
1403 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
1404 		mp->dmai_cookie = cp + 1;
1405 		win_p->win_curseg = 0;
1406 
1407 		*(ddi_dma_cookie_t *)objp = *cp;
1408 		*offp = win_p->win_offset;
1409 		*lenp = win_p->win_size;
1410 		DBG(DBG_DMA_CTL, dip,
1411 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
1412 		    cp->dmac_laddress, cp->dmac_size);
1413 		}
1414 		return (DDI_SUCCESS);
1415 
1416 	case DDI_DMA_NEXTWIN: {
1417 		px_dma_win_t *win_p = *(px_dma_win_t **)offp;
1418 		px_dma_win_t **nw_pp = (px_dma_win_t **)objp;
1419 		ddi_dma_cookie_t *cp;
1420 		if (!win_p) {
1421 			*nw_pp = mp->dmai_winlst;
1422 			return (DDI_SUCCESS);
1423 		}
1424 
1425 		if (win_p->win_offset != mp->dmai_offset)
1426 			return (DDI_DMA_STALE);
1427 		if (!win_p->win_next)
1428 			return (DDI_DMA_DONE);
1429 		win_p = win_p->win_next;
1430 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1431 		mp->dmai_offset = win_p->win_offset;
1432 		mp->dmai_size   = win_p->win_size;
1433 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
1434 		mp->dmai_cookie = cp + 1;
1435 		win_p->win_curseg = 0;
1436 		*nw_pp = win_p;
1437 		}
1438 		return (DDI_SUCCESS);
1439 
1440 	case DDI_DMA_NEXTSEG: {
1441 		px_dma_win_t *w_p = *(px_dma_win_t **)offp;
1442 		if (w_p->win_offset != mp->dmai_offset)
1443 			return (DDI_DMA_STALE);
1444 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
1445 			return (DDI_DMA_DONE);
1446 		w_p->win_curseg++;
1447 		}
1448 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
1449 		return (DDI_SUCCESS);
1450 
1451 	case DDI_DMA_SEGTOC: {
1452 		px_dma_win_t *win_p = mp->dmai_winlst;
1453 		off_t off = mp->dmai_offset;
1454 		ddi_dma_cookie_t *cp;
1455 		int i;
1456 
1457 		/* locate active window */
1458 		for (; win_p->win_offset != off; win_p = win_p->win_next)
1459 			;
1460 		cp = (ddi_dma_cookie_t *)(win_p + 1);
1461 		for (i = 0; i < win_p->win_curseg; i++, cp++)
1462 			off += cp->dmac_size;
1463 		*offp = off;
1464 		*lenp = cp->dmac_size;
1465 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
1466 		}
1467 		return (DDI_SUCCESS);
1468 
1469 	case DDI_DMA_COFF: {
1470 		px_dma_win_t *win_p;
1471 		ddi_dma_cookie_t *cp;
1472 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
1473 		size_t win_off;
1474 
1475 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
1476 			int i;
1477 			win_off = 0;
1478 			cp = (ddi_dma_cookie_t *)(win_p + 1);
1479 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
1480 				size_t sz = cp->dmac_size;
1481 
1482 				addr = cp->dmac_laddress;
1483 				if ((addr <= key) && (addr + sz >= key))
1484 					goto found;
1485 				win_off += sz;
1486 			}
1487 		}
1488 		return (DDI_FAILURE);
1489 found:
1490 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
1491 		return (DDI_SUCCESS);
1492 		}
1493 	default:
1494 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1495 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1496 		break;
1497 	}
1498 	return (DDI_FAILURE);
1499 }
1500 
1501 static void
1502 px_dvma_debug_init(px_mmu_t *mmu_p)
1503 {
1504 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1505 	ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
1506 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
1507 
1508 	mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
1509 	mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
1510 
1511 	mmu_p->dvma_active_list = NULL;
1512 	mmu_p->dvma_alloc_rec_index = 0;
1513 	mmu_p->dvma_free_rec_index = 0;
1514 	mmu_p->dvma_active_count = 0;
1515 }
1516 
1517 void
1518 px_dvma_debug_fini(px_mmu_t *mmu_p)
1519 {
1520 	struct px_dvma_rec *prev, *ptr;
1521 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1522 	uint64_t mask = ~(1ull << mmu_p->mmu_inst);
1523 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
1524 
1525 	if (mmu_p->dvma_alloc_rec) {
1526 		kmem_free(mmu_p->dvma_alloc_rec, sz);
1527 		mmu_p->dvma_alloc_rec = NULL;
1528 	}
1529 	if (mmu_p->dvma_free_rec) {
1530 		kmem_free(mmu_p->dvma_free_rec, sz);
1531 		mmu_p->dvma_free_rec = NULL;
1532 	}
1533 
1534 	prev = mmu_p->dvma_active_list;
1535 	if (!prev)
1536 		return;
1537 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1538 		kmem_free(prev, sizeof (struct px_dvma_rec));
1539 	kmem_free(prev, sizeof (struct px_dvma_rec));
1540 
1541 	mmu_p->dvma_active_list = NULL;
1542 	mmu_p->dvma_alloc_rec_index = 0;
1543 	mmu_p->dvma_free_rec_index = 0;
1544 	mmu_p->dvma_active_count = 0;
1545 
1546 	px_dvma_debug_off &= mask;
1547 	px_dvma_debug_on &= mask;
1548 }
1549 
1550 void
1551 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1552 	ddi_dma_impl_t *mp)
1553 {
1554 	struct px_dvma_rec *ptr;
1555 	mutex_enter(&mmu_p->dvma_debug_lock);
1556 
1557 	if (!mmu_p->dvma_alloc_rec)
1558 		px_dvma_debug_init(mmu_p);
1559 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1560 		px_dvma_debug_fini(mmu_p);
1561 		goto done;
1562 	}
1563 
1564 	ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
1565 	ptr->dvma_addr = address;
1566 	ptr->len = len;
1567 	ptr->mp = mp;
1568 	if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
1569 		mmu_p->dvma_alloc_rec_index = 0;
1570 
1571 	ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
1572 	ptr->dvma_addr = address;
1573 	ptr->len = len;
1574 	ptr->mp = mp;
1575 
1576 	ptr->next = mmu_p->dvma_active_list;
1577 	mmu_p->dvma_active_list = ptr;
1578 	mmu_p->dvma_active_count++;
1579 done:
1580 	mutex_exit(&mmu_p->dvma_debug_lock);
1581 }
1582 
1583 void
1584 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1585     ddi_dma_impl_t *mp)
1586 {
1587 	struct px_dvma_rec *ptr, *ptr_save;
1588 	mutex_enter(&mmu_p->dvma_debug_lock);
1589 
1590 	if (!mmu_p->dvma_alloc_rec)
1591 		px_dvma_debug_init(mmu_p);
1592 	if (PX_DVMA_DBG_OFF(mmu_p)) {
1593 		px_dvma_debug_fini(mmu_p);
1594 		goto done;
1595 	}
1596 
1597 	ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
1598 	ptr->dvma_addr = address;
1599 	ptr->len = len;
1600 	ptr->mp = mp;
1601 	if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
1602 		mmu_p->dvma_free_rec_index = 0;
1603 
1604 	ptr_save = mmu_p->dvma_active_list;
1605 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
1606 		if ((ptr->dvma_addr == address) && (ptr->len = len))
1607 			break;
1608 		ptr_save = ptr;
1609 	}
1610 	if (!ptr) {
1611 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1612 		    (long)address, len);
1613 		goto done;
1614 	}
1615 	if (ptr == mmu_p->dvma_active_list)
1616 		mmu_p->dvma_active_list = ptr->next;
1617 	else
1618 		ptr_save->next = ptr->next;
1619 	kmem_free(ptr, sizeof (struct px_dvma_rec));
1620 	mmu_p->dvma_active_count--;
1621 done:
1622 	mutex_exit(&mmu_p->dvma_debug_lock);
1623 }
1624 
1625 #ifdef	DEBUG
1626 void
1627 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1628 {
1629 	DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1630 	    hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1631 	DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1632 	    hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1633 	    hp->dmai_nwin);
1634 	DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1635 	    hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1636 	DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1637 	    hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1638 	    hp->dmai_cookie);
1639 }
1640 #endif	/* DEBUG */
1641