1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
27 */
28
29 /*
30 * PCI Express nexus DVMA and DMA core routines:
31 * dma_map/dma_bind_handle implementation
32 * bypass and peer-to-peer support
33 * fast track DVMA space allocation
34 * runtime DVMA debug
35 */
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/async.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sunddi.h>
41 #include <sys/ddi_impldefs.h>
42 #include "px_obj.h"
43
44 /*LINTLIBRARY*/
45
46 /*
47 * px_dma_allocmp - Allocate a pci dma implementation structure
48 *
49 * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
50 * to hold unmodified device limits. The ddi_dma_attr inside the
51 * ddi_dma_impl structure is augumented with system limits to enhance
52 * DVMA performance at runtime. The unaugumented device limits saved
53 * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
54 * strictly for peer-to-peer transfers which do not obey system limits.
55 *
56 * return: DDI_SUCCESS DDI_DMA_NORESOURCES
57 */
58 ddi_dma_impl_t *
px_dma_allocmp(dev_info_t * dip,dev_info_t * rdip,int (* waitfp)(caddr_t),caddr_t arg)59 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
60 caddr_t arg)
61 {
62 register ddi_dma_impl_t *mp;
63 int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
64
65 /* Caution: we don't use zalloc to enhance performance! */
66 if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
67 DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
68 if (waitfp != DDI_DMA_DONTWAIT) {
69 DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
70 ddi_set_callback(waitfp, arg, &px_kmem_clid);
71 }
72 return (mp);
73 }
74
75 mp->dmai_rdip = rdip;
76 mp->dmai_flags = 0;
77 mp->dmai_pfnlst = NULL;
78 mp->dmai_winlst = NULL;
79
80 /*
81 * kmem_alloc debug: the following fields are not zero-ed
82 * mp->dmai_mapping = 0;
83 * mp->dmai_size = 0;
84 * mp->dmai_offset = 0;
85 * mp->dmai_minxfer = 0;
86 * mp->dmai_burstsizes = 0;
87 * mp->dmai_ndvmapages = 0;
88 * mp->dmai_pool/roffset = 0;
89 * mp->dmai_rflags = 0;
90 * mp->dmai_inuse/flags
91 * mp->dmai_nwin = 0;
92 * mp->dmai_winsize = 0;
93 * mp->dmai_nexus_private/tte = 0;
94 * mp->dmai_iopte/pfnlst
95 * mp->dmai_sbi/pfn0 = 0;
96 * mp->dmai_minfo/winlst/fdvma
97 * mp->dmai_rdip
98 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
99 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
100 * mp->dmai_cookie = 0;
101 */
102
103 mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
104 mp->dmai_attr.dma_attr_flags = (uint_t)0;
105 mp->dmai_fault = 0;
106 mp->dmai_fault_check = NULL;
107 mp->dmai_fault_notify = NULL;
108
109 mp->dmai_error.err_ena = 0;
110 mp->dmai_error.err_status = DDI_FM_OK;
111 mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
112 mp->dmai_error.err_ontrap = NULL;
113 mp->dmai_error.err_fep = NULL;
114 mp->dmai_error.err_cf = NULL;
115
116 /*
117 * The bdf protection value is set to immediate child
118 * at first. It gets modified by switch/bridge drivers
119 * as the code traverses down the fabric topology.
120 *
121 * XXX No IOMMU protection for broken devices.
122 */
123 ASSERT((intptr_t)ddi_get_parent_data(rdip) >> 1 == 0);
124 mp->dmai_bdf = ((intptr_t)ddi_get_parent_data(rdip) == 1) ?
125 PCIE_INVALID_BDF : pcie_get_bdf_for_dma_xfer(dip, rdip);
126
127 ndi_fmc_insert(rdip, DMA_HANDLE, mp, NULL);
128 return (mp);
129 }
130
131 void
px_dma_freemp(ddi_dma_impl_t * mp)132 px_dma_freemp(ddi_dma_impl_t *mp)
133 {
134 ndi_fmc_remove(mp->dmai_rdip, DMA_HANDLE, mp);
135 if (mp->dmai_ndvmapages > 1)
136 px_dma_freepfn(mp);
137 if (mp->dmai_winlst)
138 px_dma_freewin(mp);
139 kmem_free(mp, sizeof (px_dma_hdl_t));
140 }
141
142 void
px_dma_freepfn(ddi_dma_impl_t * mp)143 px_dma_freepfn(ddi_dma_impl_t *mp)
144 {
145 void *addr = mp->dmai_pfnlst;
146 if (addr) {
147 size_t npages = mp->dmai_ndvmapages;
148 if (npages > 1)
149 kmem_free(addr, npages * sizeof (px_iopfn_t));
150 mp->dmai_pfnlst = NULL;
151 }
152 mp->dmai_ndvmapages = 0;
153 }
154
155 /*
156 * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
157 * and convert dmareq->dmar_limits to mp->dmai_attr
158 *
159 * ddi_dma_impl_t member modified input
160 * ------------------------------------------------------------------------
161 * mp->dmai_minxfer - dev
162 * mp->dmai_burstsizes - dev
163 * mp->dmai_flags - no limit? peer-to-peer only?
164 *
165 * ddi_dma_attr member modified input
166 * ------------------------------------------------------------------------
167 * mp->dmai_attr.dma_attr_addr_lo - dev lo, sys lo
168 * mp->dmai_attr.dma_attr_addr_hi - dev hi, sys hi
169 * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
170 * mp->dmai_attr.dma_attr_seg - 0 (no nocross restriction)
171 * mp->dmai_attr.dma_attr_align - 1 (no alignment restriction)
172 *
173 * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
174 */
175 ddi_dma_impl_t *
px_dma_lmts2hdl(dev_info_t * dip,dev_info_t * rdip,px_mmu_t * mmu_p,ddi_dma_req_t * dmareq)176 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
177 ddi_dma_req_t *dmareq)
178 {
179 ddi_dma_impl_t *mp;
180 ddi_dma_attr_t *attr_p;
181 uint64_t syslo = mmu_p->mmu_dvma_base;
182 uint64_t syshi = mmu_p->mmu_dvma_end;
183 uint64_t fasthi = mmu_p->mmu_dvma_fast_end;
184 ddi_dma_lim_t *lim_p = dmareq->dmar_limits;
185 uint32_t count_max = lim_p->dlim_cntr_max;
186 uint64_t lo = lim_p->dlim_addr_lo;
187 uint64_t hi = lim_p->dlim_addr_hi;
188 if (hi <= lo) {
189 DBG(DBG_DMA_MAP, dip, "Bad limits\n");
190 return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
191 }
192 if (!count_max)
193 count_max--;
194
195 if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
196 dmareq->dmar_arg)))
197 return (NULL);
198
199 /* store original dev input at the 2nd ddi_dma_attr */
200 attr_p = PX_DEV_ATTR(mp);
201 SET_DMAATTR(attr_p, lo, hi, -1, count_max);
202 SET_DMAALIGN(attr_p, 1);
203
204 lo = MAX(lo, syslo);
205 hi = MIN(hi, syshi);
206 if (hi <= lo)
207 mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
208 count_max = MIN(count_max, hi - lo);
209
210 if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
211 mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
212 PX_DMAI_FLAGS_NOSYSLIMIT;
213 else {
214 if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
215 mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
216 }
217 if (PX_DMA_NOCTX(rdip))
218 mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
219
220 /* store augumented dev input to mp->dmai_attr */
221 mp->dmai_burstsizes = lim_p->dlim_burstsizes;
222 attr_p = &mp->dmai_attr;
223 SET_DMAATTR(attr_p, lo, hi, -1, count_max);
224 SET_DMAALIGN(attr_p, 1);
225 return (mp);
226 }
227
228 /*
229 * Called from px_attach to check for bypass dma support and set
230 * flags accordingly.
231 */
232 int
px_dma_attach(px_t * px_p)233 px_dma_attach(px_t *px_p)
234 {
235 uint64_t baddr;
236
237 if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
238 PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
239 &baddr) != DDI_ENOTSUP)
240 /* ignore all other errors */
241 px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
242
243 px_p->px_dma_sync_opt = ddi_prop_get_int(DDI_DEV_T_ANY,
244 px_p->px_dip, DDI_PROP_DONTPASS, "dma-sync-options", 0);
245
246 if (px_p->px_dma_sync_opt != 0)
247 px_p->px_dev_caps |= PX_DMA_SYNC_REQUIRED;
248
249 return (DDI_SUCCESS);
250 }
251
252 /*
253 * px_dma_attr2hdl
254 *
255 * This routine is called from the alloc handle entry point to sanity check the
256 * dma attribute structure.
257 *
258 * use by: px_dma_allochdl()
259 *
260 * return value:
261 *
262 * DDI_SUCCESS - on success
263 * DDI_DMA_BADATTR - attribute has invalid version number
264 * or address limits exclude dvma space
265 */
266 int
px_dma_attr2hdl(px_t * px_p,ddi_dma_impl_t * mp)267 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
268 {
269 px_mmu_t *mmu_p = px_p->px_mmu_p;
270 uint64_t syslo, syshi;
271 int ret;
272 ddi_dma_attr_t *attrp = PX_DEV_ATTR(mp);
273 uint64_t hi = attrp->dma_attr_addr_hi;
274 uint64_t lo = attrp->dma_attr_addr_lo;
275 uint64_t align = attrp->dma_attr_align;
276 uint64_t nocross = attrp->dma_attr_seg;
277 uint64_t count_max = attrp->dma_attr_count_max;
278
279 DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
280 attrp, HI32(count_max), LO32(count_max));
281 DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
282 HI32(hi), LO32(hi), HI32(lo), LO32(lo));
283 DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
284 HI32(nocross), LO32(nocross), HI32(align), LO32(align));
285
286 if (!nocross)
287 nocross--;
288 if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
289
290 DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
291 /*
292 * If Bypass DMA is not supported, return error so that
293 * target driver can fall back to dvma mode of operation
294 */
295 if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
296 return (DDI_DMA_BADATTR);
297 mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
298 if (nocross != UINT64_MAX)
299 return (DDI_DMA_BADATTR);
300 if (align && (align > MMU_PAGE_SIZE))
301 return (DDI_DMA_BADATTR);
302 align = 1; /* align on 1 page boundary */
303
304 /* do a range check and get the limits */
305 ret = px_lib_dma_bypass_rngchk(px_p->px_dip, attrp,
306 &syslo, &syshi);
307 if (ret != DDI_SUCCESS)
308 return (ret);
309 } else { /* MMU_XLATE or PEER_TO_PEER */
310 align = MAX(align, MMU_PAGE_SIZE) - 1;
311 if ((align & nocross) != align) {
312 dev_info_t *rdip = mp->dmai_rdip;
313 cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
314 NAMEINST(rdip));
315 return (DDI_DMA_BADATTR);
316 }
317 align = MMU_BTOP(align + 1);
318 syslo = mmu_p->mmu_dvma_base;
319 syshi = mmu_p->mmu_dvma_end;
320 }
321 if (hi <= lo) {
322 dev_info_t *rdip = mp->dmai_rdip;
323 cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
324 return (DDI_DMA_BADATTR);
325 }
326 lo = MAX(lo, syslo);
327 hi = MIN(hi, syshi);
328 if (!count_max)
329 count_max--;
330
331 DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
332 HI32(hi), LO32(hi), HI32(lo), LO32(lo));
333 if (hi <= lo) {
334 /*
335 * If this is an IOMMU bypass access, the caller can't use
336 * the required addresses, so fail it. Otherwise, it's
337 * peer-to-peer; ensure that the caller has no alignment or
338 * segment size restrictions.
339 */
340 if ((mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ||
341 (nocross < UINT32_MAX) || (align > 1))
342 return (DDI_DMA_BADATTR);
343
344 mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
345 } else /* set practical counter_max value */
346 count_max = MIN(count_max, hi - lo);
347
348 if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
349 mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
350 PX_DMAI_FLAGS_NOFASTLIMIT;
351 else {
352 syshi = mmu_p->mmu_dvma_fast_end;
353 if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
354 mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
355 }
356 if (PX_DMA_NOCTX(mp->dmai_rdip))
357 mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
358
359 mp->dmai_burstsizes = attrp->dma_attr_burstsizes;
360 attrp = &mp->dmai_attr;
361 SET_DMAATTR(attrp, lo, hi, nocross, count_max);
362 return (DDI_SUCCESS);
363 }
364
365 #define TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
366
367 /*
368 * px_dma_type - determine which of the three types DMA (peer-to-peer,
369 * mmu bypass, or mmu translate) we are asked to do.
370 * Also checks pfn0 and rejects any non-peer-to-peer
371 * requests for peer-only devices.
372 *
373 * return values:
374 * DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
375 * DDI_SUCCESS
376 *
377 * dma handle members affected (set on exit):
378 * mp->dmai_object - dmareq->dmar_object
379 * mp->dmai_rflags - consistent?, nosync?, dmareq->dmar_flags
380 * mp->dmai_flags - DMA type
381 * mp->dmai_pfn0 - 1st page pfn (if va/size pair and not shadow)
382 * mp->dmai_roffset - initialized to starting MMU page offset
383 * mp->dmai_ndvmapages - # of total MMU pages of entire object
384 */
385 int
px_dma_type(px_t * px_p,ddi_dma_req_t * dmareq,ddi_dma_impl_t * mp)386 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
387 {
388 dev_info_t *dip = px_p->px_dip;
389 ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
390 px_pec_t *pec_p = px_p->px_pec_p;
391 uint32_t offset;
392 pfn_t pfn0;
393 uint_t redzone;
394
395 mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
396
397 if (!(px_p->px_dev_caps & PX_DMA_SYNC_REQUIRED))
398 mp->dmai_rflags |= DMP_NOSYNC;
399
400 switch (dobj_p->dmao_type) {
401 case DMA_OTYP_BUFVADDR:
402 case DMA_OTYP_VADDR: {
403 page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
404 caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
405
406 DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
407 offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
408 if (pplist) { /* shadow list */
409 mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
410 pfn0 = page_pptonum(*pplist);
411 } else {
412 struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
413 struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
414 pfn0 = hat_getpfnum(hat_p, vaddr);
415 }
416 }
417 break;
418
419 case DMA_OTYP_PAGES:
420 offset = dobj_p->dmao_obj.pp_obj.pp_offset;
421 mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
422 pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
423 break;
424
425 case DMA_OTYP_PADDR:
426 default:
427 cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
428 NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
429 return (DDI_DMA_NOMAPPING);
430 }
431 if (pfn0 == PFN_INVALID) {
432 cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
433 NAMEINST(dip), dobj_p);
434 return (DDI_DMA_NOMAPPING);
435 }
436 if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
437 pec_p->pec_last32_pfn)) {
438 mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
439 goto done; /* leave bypass and dvma flag as 0 */
440 } else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
441 pec_p->pec_last64_pfn)) {
442 mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
443 goto done; /* leave bypass and dvma flag as 0 */
444 }
445 if (PX_DMA_ISPEERONLY(mp)) {
446 dev_info_t *rdip = mp->dmai_rdip;
447 cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
448 return (DDI_DMA_NOMAPPING);
449 }
450
451 redzone = (mp->dmai_rflags & DDI_DMA_REDZONE) ||
452 (mp->dmai_flags & PX_DMAI_FLAGS_MAP_BUFZONE) ?
453 PX_DMAI_FLAGS_REDZONE : 0;
454
455 mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
456 PX_DMAI_FLAGS_BYPASS : (PX_DMAI_FLAGS_DVMA | redzone);
457 done:
458 mp->dmai_object = *dobj_p; /* whole object */
459 mp->dmai_pfn0 = (void *)pfn0; /* cache pfn0 */
460 mp->dmai_roffset = offset; /* win0 pg0 offset */
461 mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
462 return (DDI_SUCCESS);
463 }
464
465 /*
466 * px_dma_pgpfn - set up pfnlst array according to pages
467 * VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
468 */
469 /*ARGSUSED*/
470 static int
px_dma_pgpfn(px_t * px_p,ddi_dma_impl_t * mp,uint_t npages)471 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
472 {
473 int i;
474 dev_info_t *dip = px_p->px_dip;
475
476 switch (mp->dmai_object.dmao_type) {
477 case DMA_OTYP_BUFVADDR:
478 case DMA_OTYP_VADDR: {
479 page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
480 DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
481 pplist, npages);
482 for (i = 1; i < npages; i++) {
483 px_iopfn_t pfn = page_pptonum(pplist[i]);
484 PX_SET_MP_PFN1(mp, i, pfn);
485 DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
486 }
487 DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
488 }
489 break;
490
491 case DMA_OTYP_PAGES: {
492 page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
493 DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
494 for (i = 1; i < npages; i++, pp = pp->p_next) {
495 px_iopfn_t pfn = page_pptonum(pp);
496 PX_SET_MP_PFN1(mp, i, pfn);
497 DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
498 }
499 DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
500 }
501 break;
502
503 default: /* check is already done by px_dma_type */
504 ASSERT(0);
505 break;
506 }
507 return (DDI_SUCCESS);
508 }
509
510 /*
511 * px_dma_vapfn - set up pfnlst array according to VA
512 * VA/size pair: <normal, bypass, peer-to-peer>
513 * pfn0 is skipped as it is already done.
514 * In this case, the cached pfn0 is used to fill pfnlst[0]
515 */
516 static int
px_dma_vapfn(px_t * px_p,ddi_dma_impl_t * mp,uint_t npages)517 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
518 {
519 dev_info_t *dip = px_p->px_dip;
520 int i;
521 caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
522 struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
523
524 vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
525 for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
526 px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
527 if (pfn == PFN_INVALID)
528 goto err_badpfn;
529 PX_SET_MP_PFN1(mp, i, pfn);
530 DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
531 mp, i, pfn);
532 }
533 return (DDI_SUCCESS);
534 err_badpfn:
535 cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
536 return (DDI_DMA_NOMAPPING);
537 }
538
539 /*
540 * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
541 *
542 * dependencies:
543 * mp->dmai_ndvmapages - set to total # of dma pages
544 *
545 * return value:
546 * DDI_SUCCESS
547 * DDI_DMA_NOMAPPING
548 */
549 int
px_dma_pfn(px_t * px_p,ddi_dma_req_t * dmareq,ddi_dma_impl_t * mp)550 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
551 {
552 uint32_t npages = mp->dmai_ndvmapages;
553 int (*waitfp)(caddr_t) = dmareq->dmar_fp;
554 int i, ret, peer = PX_DMA_ISPTP(mp);
555 int peer32 = PX_DMA_ISPTP32(mp);
556 dev_info_t *dip = px_p->px_dip;
557
558 px_pec_t *pec_p = px_p->px_pec_p;
559 px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
560 pec_p->pec_base64_pfn;
561 px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
562 pec_p->pec_last64_pfn;
563 px_iopfn_t pfn_adj = peer ? pfn_base : 0;
564
565 DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
566 mp, PX_MP_PFN0(mp) - pfn_adj);
567 /* 1 page: no array alloc/fill, no mixed mode check */
568 if (npages == 1) {
569 PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
570 return (DDI_SUCCESS);
571 }
572 /* allocate pfn array */
573 if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
574 waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
575 if (waitfp != DDI_DMA_DONTWAIT)
576 ddi_set_callback(waitfp, dmareq->dmar_arg,
577 &px_kmem_clid);
578 return (DDI_DMA_NORESOURCES);
579 }
580 /* fill pfn array */
581 PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj); /* pfnlst[0] */
582 if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
583 px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
584 goto err;
585
586 /* skip pfn0, check mixed mode and adjust peer to peer pfn */
587 for (i = 1; i < npages; i++) {
588 px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
589 if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
590 cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
591 NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
592 ret = DDI_DMA_NOMAPPING; /* mixed mode */
593 goto err;
594 }
595 DBG(DBG_DMA_MAP, dip,
596 "px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
597 if (pfn_adj)
598 PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
599 }
600 return (DDI_SUCCESS);
601 err:
602 px_dma_freepfn(mp);
603 return (ret);
604 }
605
606 /*
607 * px_dvma_win() - trim requested DVMA size down to window size
608 * The 1st window starts from offset and ends at page-aligned boundary.
609 * From the 2nd window on, each window starts and ends at page-aligned
610 * boundary except the last window ends at wherever requested.
611 *
612 * accesses the following mp-> members:
613 * mp->dmai_attr.dma_attr_count_max
614 * mp->dmai_attr.dma_attr_seg
615 * mp->dmai_roffset - start offset of 1st window
616 * mp->dmai_rflags (redzone)
617 * mp->dmai_ndvmapages (for 1 page fast path)
618 *
619 * sets the following mp-> members:
620 * mp->dmai_size - xfer size, != winsize if 1st/last win (not fixed)
621 * mp->dmai_winsize - window size (no redzone), n * page size (fixed)
622 * mp->dmai_nwin - # of DMA windows of entire object (fixed)
623 * mp->dmai_rflags - remove partial flag if nwin == 1 (fixed)
624 * mp->dmai_winlst - NULL, window objects not used for DVMA (fixed)
625 *
626 * fixed - not changed across different DMA windows
627 */
628 /*ARGSUSED*/
629 int
px_dvma_win(px_t * px_p,ddi_dma_req_t * dmareq,ddi_dma_impl_t * mp)630 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
631 {
632 uint32_t redzone_sz = PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
633 size_t obj_sz = mp->dmai_object.dmao_size;
634 size_t xfer_sz;
635 ulong_t pg_off;
636
637 if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
638 mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
639 mp->dmai_size = obj_sz;
640 mp->dmai_winsize = MMU_PAGE_SIZE;
641 mp->dmai_nwin = 1;
642 goto done;
643 }
644
645 pg_off = mp->dmai_roffset;
646 xfer_sz = obj_sz + redzone_sz;
647
648 /* include redzone in nocross check */ {
649 uint64_t nocross = mp->dmai_attr.dma_attr_seg;
650 if (xfer_sz + pg_off - 1 > nocross)
651 xfer_sz = nocross - pg_off + 1;
652 if (redzone_sz && (xfer_sz <= redzone_sz)) {
653 DBG(DBG_DMA_MAP, px_p->px_dip,
654 "nocross too small: "
655 "%lx(%lx)+%lx+%lx < %llx\n",
656 xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
657 return (DDI_DMA_TOOBIG);
658 }
659 }
660 xfer_sz -= redzone_sz; /* restore transfer size */
661 /* check counter max */ {
662 uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
663 if (xfer_sz - 1 > count_max)
664 xfer_sz = count_max + 1;
665 }
666 if (xfer_sz >= obj_sz) {
667 mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
668 mp->dmai_size = xfer_sz;
669 mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
670 mp->dmai_nwin = 1;
671 goto done;
672 }
673 if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
674 DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
675 obj_sz, pg_off, redzone_sz, xfer_sz);
676 return (DDI_DMA_TOOBIG);
677 }
678
679 xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
680 mp->dmai_size = xfer_sz - pg_off; /* 1st window xferrable size */
681 mp->dmai_winsize = xfer_sz; /* redzone not in winsize */
682 mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
683 done:
684 mp->dmai_winlst = NULL;
685 px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
686 return (DDI_SUCCESS);
687 }
688
689 /*
690 * fast track cache entry to mmu context, inserts 3 0 bits between
691 * upper 6-bits and lower 3-bits of the 9-bit cache entry
692 */
693 #define MMU_FCE_TO_CTX(i) (((i) << 3) | ((i) & 0x7) | 0x38)
694
695 /*
696 * px_dvma_map_fast - attempts to map fast trackable DVMA
697 */
698 /*ARGSUSED*/
699 int
px_dvma_map_fast(px_mmu_t * mmu_p,ddi_dma_impl_t * mp)700 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
701 {
702 uint_t clustsz = px_dvma_page_cache_clustsz;
703 uint_t entries = px_dvma_page_cache_entries;
704 io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
705 mp->dmai_attr.dma_attr_flags);
706 int i = mmu_p->mmu_dvma_addr_scan_start;
707 uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
708 px_dvma_addr_t dvma_pg;
709 size_t npages = MMU_BTOP(mp->dmai_winsize);
710 dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
711
712 extern uint8_t ldstub(uint8_t *);
713 ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
714 ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
715
716 for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
717 ;
718 if (i >= entries) {
719 lock_addr = mmu_p->mmu_dvma_cache_locks;
720 i = 0;
721 for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
722 ;
723 if (i >= entries) {
724 #ifdef PX_DMA_PROF
725 px_dvmaft_exhaust++;
726 #endif /* PX_DMA_PROF */
727 return (DDI_DMA_NORESOURCES);
728 }
729 }
730 mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
731
732 i *= clustsz;
733 dvma_pg = mmu_p->dvma_base_pg + i;
734
735 if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages,
736 PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, 0,
737 MMU_MAP_PFN) != DDI_SUCCESS) {
738 DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
739 "px_lib_iommu_map failed\n");
740 return (DDI_FAILURE);
741 }
742
743 if (!PX_MAP_BUFZONE(mp))
744 goto done;
745
746 DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
747
748 ASSERT(PX_HAS_REDZONE(mp));
749
750 if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1,
751 PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, npages - 1,
752 MMU_MAP_PFN) != DDI_SUCCESS) {
753 DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
754 "mapping REDZONE page failed\n");
755
756 (void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
757 return (DDI_FAILURE);
758 }
759
760 done:
761 #ifdef PX_DMA_PROF
762 px_dvmaft_success++;
763 #endif
764 mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
765 mp->dmai_offset = 0;
766 mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
767 PX_SAVE_MP_TTE(mp, attr); /* save TTE template for unmapping */
768 if (PX_DVMA_DBG_ON(mmu_p))
769 px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
770 mp->dmai_size, mp);
771 return (DDI_SUCCESS);
772 }
773
774 /*
775 * px_dvma_map: map non-fasttrack DMA
776 * Use quantum cache if single page DMA.
777 */
778 int
px_dvma_map(ddi_dma_impl_t * mp,ddi_dma_req_t * dmareq,px_mmu_t * mmu_p)779 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
780 {
781 uint_t npages = PX_DMA_WINNPGS(mp);
782 px_dvma_addr_t dvma_pg, dvma_pg_index;
783 void *dvma_addr;
784 io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
785 mp->dmai_attr.dma_attr_flags);
786 int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
787 dev_info_t *dip = mp->dmai_rdip;
788 int ret = DDI_SUCCESS;
789
790 /*
791 * allocate dvma space resource and map in the first window.
792 * (vmem_t *vmp, size_t size,
793 * size_t align, size_t phase, size_t nocross,
794 * void *minaddr, void *maxaddr, int vmflag)
795 */
796 if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
797 dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
798 MMU_PAGE_SIZE, sleep);
799 mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
800 #ifdef PX_DMA_PROF
801 px_dvma_vmem_alloc++;
802 #endif /* PX_DMA_PROF */
803 } else {
804 dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
805 MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
806 MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
807 0,
808 mp->dmai_attr.dma_attr_seg + 1,
809 (void *)mp->dmai_attr.dma_attr_addr_lo,
810 (void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
811 sleep);
812 #ifdef PX_DMA_PROF
813 px_dvma_vmem_xalloc++;
814 #endif /* PX_DMA_PROF */
815 }
816 dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
817 dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
818 DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
819 dvma_pg, dvma_pg_index);
820 if (dvma_pg == 0)
821 goto noresource;
822
823 mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
824 mp->dmai_offset = 0;
825 PX_SAVE_MP_TTE(mp, attr); /* mp->dmai_tte = tte */
826
827 if ((ret = px_mmu_map_pages(mmu_p,
828 mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
829 if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
830 vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
831 MMU_PAGE_SIZE);
832 #ifdef PX_DMA_PROF
833 px_dvma_vmem_free++;
834 #endif /* PX_DMA_PROF */
835 } else {
836 vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
837 MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
838 #ifdef PX_DMA_PROF
839 px_dvma_vmem_xfree++;
840 #endif /* PX_DMA_PROF */
841 }
842 }
843
844 return (ret);
845 noresource:
846 if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
847 DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
848 ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
849 &mmu_p->mmu_dvma_clid);
850 }
851 DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
852 return (DDI_DMA_NORESOURCES);
853 }
854
855 void
px_dvma_unmap(px_mmu_t * mmu_p,ddi_dma_impl_t * mp)856 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
857 {
858 px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
859 px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
860 dvma_addr = MMU_PTOB(dvma_pg);
861
862 if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
863 px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
864 ASSERT(index % px_dvma_page_cache_clustsz == 0);
865 index /= px_dvma_page_cache_clustsz;
866 ASSERT(index < px_dvma_page_cache_entries);
867 mmu_p->mmu_dvma_cache_locks[index] = 0;
868 #ifdef PX_DMA_PROF
869 px_dvmaft_free++;
870 #endif /* PX_DMA_PROF */
871 return;
872 }
873
874 if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
875 vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
876 MMU_PAGE_SIZE);
877 #ifdef PX_DMA_PROF
878 px_dvma_vmem_free++;
879 #endif /* PX_DMA_PROF */
880 } else {
881 size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
882 vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
883 MMU_PTOB(npages));
884 #ifdef PX_DMA_PROF
885 px_dvma_vmem_xfree++;
886 #endif /* PX_DMA_PROF */
887 }
888 }
889
890 /*
891 * DVMA mappings may have multiple windows, but each window always have
892 * one segment.
893 */
894 int
px_dvma_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_dma_impl_t * mp,enum ddi_dma_ctlops cmd,off_t * offp,size_t * lenp,caddr_t * objp,uint_t cache_flags)895 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
896 enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
897 uint_t cache_flags)
898 {
899 switch (cmd) {
900 default:
901 DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
902 cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
903 break;
904 }
905 return (DDI_FAILURE);
906 }
907
908 void
px_dma_freewin(ddi_dma_impl_t * mp)909 px_dma_freewin(ddi_dma_impl_t *mp)
910 {
911 px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
912 for (win2_p = win_p; win_p; win2_p = win_p) {
913 win_p = win2_p->win_next;
914 kmem_free(win2_p, sizeof (px_dma_win_t) +
915 sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
916 }
917 mp->dmai_nwin = 0;
918 mp->dmai_winlst = NULL;
919 }
920
921 /*
922 * px_dma_newwin - create a dma window object and cookies
923 *
924 * After the initial scan in px_dma_physwin(), which identifies
925 * a portion of the pfn array that belongs to a dma window,
926 * we are called to allocate and initialize representing memory
927 * resources. We know from the 1st scan the number of cookies
928 * or dma segment in this window so we can allocate a contiguous
929 * memory array for the dma cookies (The implementation of
930 * ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
931 *
932 * A second round scan is done on the pfn array to identify
933 * each dma segment and initialize its corresponding dma cookie.
934 * We don't need to do all the safety checking and we know they
935 * all belong to the same dma window.
936 *
937 * Input: cookie_no - # of cookies identified by the 1st scan
938 * start_idx - subscript of the pfn array for the starting pfn
939 * end_idx - subscript of the last pfn in dma window
940 * win_pp - pointer to win_next member of previous window
941 * Return: DDI_SUCCESS - with **win_pp as newly created window object
942 * DDI_DMA_NORESROUCE - caller frees all previous window objs
943 * Note: Each cookie and window size are all initialized on page
944 * boundary. This is not true for the 1st cookie of the 1st
945 * window and the last cookie of the last window.
946 * We fix that later in upper layer which has access to size
947 * and offset info.
948 *
949 */
950 /*ARGSUSED*/
951 static int
px_dma_newwin(dev_info_t * dip,ddi_dma_req_t * dmareq,ddi_dma_impl_t * mp,uint32_t cookie_no,uint32_t start_idx,uint32_t end_idx,px_dma_win_t ** win_pp,uint64_t count_max,uint64_t bypass)952 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
953 uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
954 px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
955 {
956 int (*waitfp)(caddr_t) = dmareq->dmar_fp;
957 ddi_dma_cookie_t *cookie_p;
958 uint32_t pfn_no = 1;
959 px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
960 px_iopfn_t prev_pfn = pfn;
961 uint64_t baddr, seg_pfn0 = pfn;
962 size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
963 px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
964 waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
965 io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
966 mp->dmai_attr.dma_attr_flags);
967
968 if (!win_p)
969 goto noresource;
970
971 win_p->win_next = NULL;
972 win_p->win_ncookies = cookie_no;
973 win_p->win_curseg = 0; /* start from segment 0 */
974 win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
975 /* win_p->win_offset is left uninitialized */
976
977 cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
978 start_idx++;
979 for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
980 pfn = PX_GET_MP_PFN1(mp, start_idx);
981 if ((pfn == prev_pfn + 1) &&
982 (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
983 continue;
984
985 /* close up the cookie up to (including) prev_pfn */
986 baddr = MMU_PTOB(seg_pfn0);
987 if (bypass) {
988 if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
989 == DDI_SUCCESS)
990 baddr = px_lib_ro_bypass(dip, attr, baddr);
991 else
992 return (DDI_FAILURE);
993 }
994
995 MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
996 DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
997 MMU_PTOB(seg_pfn0), pfn_no);
998
999 cookie_p++; /* advance to next available cookie cell */
1000 pfn_no = 0;
1001 seg_pfn0 = pfn; /* start a new segment from current pfn */
1002 }
1003
1004 baddr = MMU_PTOB(seg_pfn0);
1005 if (bypass) {
1006 if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
1007 == DDI_SUCCESS)
1008 baddr = px_lib_ro_bypass(dip, attr, baddr);
1009 else
1010 return (DDI_FAILURE);
1011 }
1012
1013 MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
1014 DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
1015 MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
1016 #ifdef DEBUG
1017 cookie_p++;
1018 ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
1019 #endif /* DEBUG */
1020 *win_pp = win_p;
1021 return (DDI_SUCCESS);
1022 noresource:
1023 if (waitfp != DDI_DMA_DONTWAIT)
1024 ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
1025 return (DDI_DMA_NORESOURCES);
1026 }
1027
1028 /*
1029 * px_dma_adjust - adjust 1st and last cookie and window sizes
1030 * remove initial dma page offset from 1st cookie and window size
1031 * remove last dma page remainder from last cookie and window size
1032 * fill win_offset of each dma window according to just fixed up
1033 * each window sizes
1034 * px_dma_win_t members modified:
1035 * win_p->win_offset - this window's offset within entire DMA object
1036 * win_p->win_size - xferrable size (in bytes) for this window
1037 *
1038 * ddi_dma_impl_t members modified:
1039 * mp->dmai_size - 1st window xferrable size
1040 * mp->dmai_offset - 0, which is the dma offset of the 1st window
1041 *
1042 * ddi_dma_cookie_t members modified:
1043 * cookie_p->dmac_size - 1st and last cookie remove offset or remainder
1044 * cookie_p->dmac_laddress - 1st cookie add page offset
1045 */
1046 static void
px_dma_adjust(ddi_dma_req_t * dmareq,ddi_dma_impl_t * mp,px_dma_win_t * win_p)1047 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
1048 {
1049 ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1050 size_t pg_offset = mp->dmai_roffset;
1051 size_t win_offset = 0;
1052
1053 cookie_p->dmac_size -= pg_offset;
1054 cookie_p->dmac_laddress |= pg_offset;
1055 win_p->win_size -= pg_offset;
1056 DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
1057
1058 mp->dmai_size = win_p->win_size;
1059 mp->dmai_offset = 0;
1060
1061 pg_offset += mp->dmai_object.dmao_size;
1062 pg_offset &= MMU_PAGE_OFFSET;
1063 if (pg_offset)
1064 pg_offset = MMU_PAGE_SIZE - pg_offset;
1065 DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
1066
1067 for (; win_p->win_next; win_p = win_p->win_next) {
1068 DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
1069 win_p->win_offset = win_offset;
1070 win_offset += win_p->win_size;
1071 }
1072 /* last window */
1073 win_p->win_offset = win_offset;
1074 cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
1075 cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
1076 win_p->win_size -= pg_offset;
1077 ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
1078 }
1079
1080 /*
1081 * px_dma_physwin() - carve up dma windows using physical addresses.
1082 * Called to handle mmu bypass and pci peer-to-peer transfers.
1083 * Calls px_dma_newwin() to allocate window objects.
1084 *
1085 * Dependency: mp->dmai_pfnlst points to an array of pfns
1086 *
1087 * 1. Each dma window is represented by a px_dma_win_t object.
1088 * The object will be casted to ddi_dma_win_t and returned
1089 * to leaf driver through the DDI interface.
1090 * 2. Each dma window can have several dma segments with each
1091 * segment representing a physically contiguous either memory
1092 * space (if we are doing an mmu bypass transfer) or pci address
1093 * space (if we are doing a peer-to-peer transfer).
1094 * 3. Each segment has a DMA cookie to program the DMA engine.
1095 * The cookies within each DMA window must be located in a
1096 * contiguous array per ddi_dma_nextcookie(9f).
1097 * 4. The number of DMA segments within each DMA window cannot exceed
1098 * mp->dmai_attr.dma_attr_sgllen. If the transfer size is
1099 * too large to fit in the sgllen, the rest needs to be
1100 * relocated to the next dma window.
1101 * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
1102 * and nocross restrictions while bypass DMA follows the set of
1103 * restrictions with system limits factored in.
1104 *
1105 * Return:
1106 * mp->dmai_winlst - points to a link list of px_dma_win_t objects.
1107 * Each px_dma_win_t object on the link list contains
1108 * infomation such as its window size (# of pages),
1109 * starting offset (also see Restriction), an array of
1110 * DMA cookies, and # of cookies in the array.
1111 * mp->dmai_pfnlst - NULL, the pfn list is freed to conserve memory.
1112 * mp->dmai_nwin - # of total DMA windows on mp->dmai_winlst.
1113 * mp->dmai_mapping - starting cookie address
1114 * mp->dmai_rflags - consistent, nosync, no redzone
1115 * mp->dmai_cookie - start of cookie table of the 1st DMA window
1116 *
1117 * Restriction:
1118 * Each px_dma_win_t object can theoratically start from any offset
1119 * since the mmu is not involved. However, this implementation
1120 * always make windows start from page aligned offset (except
1121 * the 1st window, which follows the requested offset) due to the
1122 * fact that we are handed a pfn list. This does require device's
1123 * count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
1124 */
1125 int
px_dma_physwin(px_t * px_p,ddi_dma_req_t * dmareq,ddi_dma_impl_t * mp)1126 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
1127 {
1128 uint_t npages = mp->dmai_ndvmapages;
1129 int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
1130 px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
1131 px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
1132 uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
1133 uint64_t count_max, bypass_addr = 0;
1134 px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
1135 ddi_dma_cookie_t *cookie0_p;
1136 io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
1137 mp->dmai_attr.dma_attr_flags);
1138 dev_info_t *dip = px_p->px_dip;
1139
1140 ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
1141 if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
1142 ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
1143 uint64_t nocross = dev_attr_p->dma_attr_seg;
1144 px_pec_t *pec_p = px_p->px_pec_p;
1145 px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
1146 pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
1147 pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
1148
1149 if (nocross && (nocross < UINT32_MAX))
1150 return (DDI_DMA_NOMAPPING);
1151 if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
1152 return (DDI_DMA_NOMAPPING);
1153 pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
1154 pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
1155 pfn_hi = MIN(pfn_hi, pfn_last);
1156 if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
1157 return (DDI_DMA_NOMAPPING);
1158
1159 count_max = dev_attr_p->dma_attr_count_max;
1160 count_max = MIN(count_max, nocross);
1161 /*
1162 * the following count_max trim is not done because we are
1163 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
1164 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
1165 */
1166 } else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
1167 count_max = mp->dmai_attr.dma_attr_count_max;
1168 pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
1169 pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
1170
1171 if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
1172 attr, &bypass_addr) != DDI_SUCCESS) {
1173 DBG(DBG_BYPASS, mp->dmai_rdip,
1174 "bypass cookie failure %lx\n", pfn);
1175 return (DDI_DMA_NOMAPPING);
1176 }
1177 pfn = MMU_BTOP(bypass_addr);
1178 }
1179
1180 /* pfn: absolute (bypass mode) or relative (p2p mode) */
1181 for (prev_pfn = pfn, i = 1; i < npages;
1182 i++, prev_pfn = pfn, pfn_no++) {
1183 pfn = PX_GET_MP_PFN1(mp, i);
1184 if (bypass_addr) {
1185 if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
1186 &bypass_addr) != DDI_SUCCESS) {
1187 ret = DDI_DMA_NOMAPPING;
1188 goto err;
1189 }
1190 pfn = MMU_BTOP(bypass_addr);
1191 }
1192 if ((pfn == prev_pfn + 1) &&
1193 (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
1194 continue;
1195 if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
1196 ret = DDI_DMA_NOMAPPING;
1197 goto err;
1198 }
1199 cookie_no++;
1200 pfn_no = 0;
1201 if (cookie_no < sgllen)
1202 continue;
1203
1204 DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1205 win_pfn0_index, i - 1, cookie_no);
1206 if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
1207 win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
1208 goto err;
1209
1210 win_pp = &(*win_pp)->win_next; /* win_pp = *(win_pp) */
1211 win_no++;
1212 win_pfn0_index = i;
1213 cookie_no = 0;
1214 }
1215 if (pfn > pfn_hi) {
1216 ret = DDI_DMA_NOMAPPING;
1217 goto err;
1218 }
1219 cookie_no++;
1220 DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
1221 win_pfn0_index, i - 1, cookie_no);
1222 if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
1223 i - 1, win_pp, count_max, bypass_addr))
1224 goto err;
1225 win_no++;
1226 px_dma_adjust(dmareq, mp, mp->dmai_winlst);
1227 mp->dmai_nwin = win_no;
1228 mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
1229 mp->dmai_rflags &= ~DDI_DMA_REDZONE;
1230 mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
1231 cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
1232 mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
1233 mp->dmai_mapping = cookie0_p->dmac_laddress;
1234
1235 px_dma_freepfn(mp);
1236 return (DDI_DMA_MAPPED);
1237 err:
1238 px_dma_freewin(mp);
1239 return (ret);
1240 }
1241
1242 int
px_dma_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_dma_impl_t * mp,enum ddi_dma_ctlops cmd,off_t * offp,size_t * lenp,caddr_t * objp,uint_t cache_flags)1243 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
1244 enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1245 uint_t cache_flags)
1246 {
1247 switch (cmd) {
1248 default:
1249 DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
1250 cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
1251 break;
1252 }
1253 return (DDI_FAILURE);
1254 }
1255
1256 static void
px_dvma_debug_init(px_mmu_t * mmu_p)1257 px_dvma_debug_init(px_mmu_t *mmu_p)
1258 {
1259 size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1260 ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
1261 cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
1262
1263 mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
1264 mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
1265
1266 mmu_p->dvma_active_list = NULL;
1267 mmu_p->dvma_alloc_rec_index = 0;
1268 mmu_p->dvma_free_rec_index = 0;
1269 mmu_p->dvma_active_count = 0;
1270 }
1271
1272 void
px_dvma_debug_fini(px_mmu_t * mmu_p)1273 px_dvma_debug_fini(px_mmu_t *mmu_p)
1274 {
1275 struct px_dvma_rec *prev, *ptr;
1276 size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
1277 uint64_t mask = ~(1ull << mmu_p->mmu_inst);
1278 cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
1279
1280 if (mmu_p->dvma_alloc_rec) {
1281 kmem_free(mmu_p->dvma_alloc_rec, sz);
1282 mmu_p->dvma_alloc_rec = NULL;
1283 }
1284 if (mmu_p->dvma_free_rec) {
1285 kmem_free(mmu_p->dvma_free_rec, sz);
1286 mmu_p->dvma_free_rec = NULL;
1287 }
1288
1289 prev = mmu_p->dvma_active_list;
1290 if (!prev)
1291 return;
1292 for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
1293 kmem_free(prev, sizeof (struct px_dvma_rec));
1294 kmem_free(prev, sizeof (struct px_dvma_rec));
1295
1296 mmu_p->dvma_active_list = NULL;
1297 mmu_p->dvma_alloc_rec_index = 0;
1298 mmu_p->dvma_free_rec_index = 0;
1299 mmu_p->dvma_active_count = 0;
1300
1301 px_dvma_debug_off &= mask;
1302 px_dvma_debug_on &= mask;
1303 }
1304
1305 void
px_dvma_alloc_debug(px_mmu_t * mmu_p,char * address,uint_t len,ddi_dma_impl_t * mp)1306 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1307 ddi_dma_impl_t *mp)
1308 {
1309 struct px_dvma_rec *ptr;
1310 mutex_enter(&mmu_p->dvma_debug_lock);
1311
1312 if (!mmu_p->dvma_alloc_rec)
1313 px_dvma_debug_init(mmu_p);
1314 if (PX_DVMA_DBG_OFF(mmu_p)) {
1315 px_dvma_debug_fini(mmu_p);
1316 goto done;
1317 }
1318
1319 ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
1320 ptr->dvma_addr = address;
1321 ptr->len = len;
1322 ptr->mp = mp;
1323 if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
1324 mmu_p->dvma_alloc_rec_index = 0;
1325
1326 ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
1327 ptr->dvma_addr = address;
1328 ptr->len = len;
1329 ptr->mp = mp;
1330
1331 ptr->next = mmu_p->dvma_active_list;
1332 mmu_p->dvma_active_list = ptr;
1333 mmu_p->dvma_active_count++;
1334 done:
1335 mutex_exit(&mmu_p->dvma_debug_lock);
1336 }
1337
1338 void
px_dvma_free_debug(px_mmu_t * mmu_p,char * address,uint_t len,ddi_dma_impl_t * mp)1339 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
1340 ddi_dma_impl_t *mp)
1341 {
1342 struct px_dvma_rec *ptr, *ptr_save;
1343 mutex_enter(&mmu_p->dvma_debug_lock);
1344
1345 if (!mmu_p->dvma_alloc_rec)
1346 px_dvma_debug_init(mmu_p);
1347 if (PX_DVMA_DBG_OFF(mmu_p)) {
1348 px_dvma_debug_fini(mmu_p);
1349 goto done;
1350 }
1351
1352 ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
1353 ptr->dvma_addr = address;
1354 ptr->len = len;
1355 ptr->mp = mp;
1356 if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
1357 mmu_p->dvma_free_rec_index = 0;
1358
1359 ptr_save = mmu_p->dvma_active_list;
1360 for (ptr = ptr_save; ptr; ptr = ptr->next) {
1361 if ((ptr->dvma_addr == address) && (ptr->len = len))
1362 break;
1363 ptr_save = ptr;
1364 }
1365 if (!ptr) {
1366 cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
1367 (long)address, len);
1368 goto done;
1369 }
1370 if (ptr == mmu_p->dvma_active_list)
1371 mmu_p->dvma_active_list = ptr->next;
1372 else
1373 ptr_save->next = ptr->next;
1374 kmem_free(ptr, sizeof (struct px_dvma_rec));
1375 mmu_p->dvma_active_count--;
1376 done:
1377 mutex_exit(&mmu_p->dvma_debug_lock);
1378 }
1379
1380 #ifdef DEBUG
1381 void
px_dump_dma_handle(uint64_t flag,dev_info_t * dip,ddi_dma_impl_t * hp)1382 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
1383 {
1384 DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
1385 hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
1386 DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
1387 hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
1388 hp->dmai_nwin);
1389 DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
1390 hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
1391 DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
1392 hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
1393 hp->dmai_cookie);
1394 }
1395 #endif /* DEBUG */
1396