1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23 * All rights reserved.
24 */
25 /*
26 * Copyright (c) 2009, Intel Corporation.
27 * All rights reserved.
28 */
29 /*
30 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
31 * Copyright 2017 Joyent, Inc.
32 */
33
34 /*
35 * DVMA code
36 * This file contains Intel IOMMU code that deals with DVMA
37 * i.e. DMA remapping.
38 */
39
40 #include <sys/sysmacros.h>
41 #include <sys/pcie.h>
42 #include <sys/pci_cfgspace.h>
43 #include <vm/hat_i86.h>
44 #include <sys/memlist.h>
45 #include <sys/acpi/acpi.h>
46 #include <sys/acpica.h>
47 #include <sys/modhash.h>
48 #include <sys/immu.h>
49 #include <sys/x86_archext.h>
50 #include <sys/archsystm.h>
51
52 #undef TEST
53
54 /*
55 * Macros based on PCI spec
56 */
57 #define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */
58 #define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */
59 #define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */
60
61 #define IMMU_CONTIG_PADDR(d, p) \
62 ((d).dck_paddr && ((d).dck_paddr + (d).dck_npages * IMMU_PAGESIZE) \
63 == (p))
64
65 typedef struct dvma_arg {
66 immu_t *dva_immu;
67 dev_info_t *dva_rdip;
68 dev_info_t *dva_ddip;
69 domain_t *dva_domain;
70 int dva_level;
71 immu_flags_t dva_flags;
72 list_t *dva_list;
73 int dva_error;
74 } dvma_arg_t;
75
76 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
77 dev_info_t *rdip, immu_flags_t immu_flags);
78 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
79 int dev, int func, immu_flags_t immu_flags);
80 static void destroy_immu_devi(immu_devi_t *immu_devi);
81 static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
82 uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
83 immu_flags_t immu_flags);
84
85 /* Extern globals */
86 extern struct memlist *phys_install;
87
88 /*
89 * iommulib interface functions.
90 */
91 static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
92 static int immu_allochdl(iommulib_handle_t handle,
93 dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
94 int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
95 static int immu_freehdl(iommulib_handle_t handle,
96 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
97 static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
98 dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
99 ddi_dma_cookie_t *cookiep, uint_t *ccountp);
100 static int immu_unbindhdl(iommulib_handle_t handle,
101 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
102 static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
103 dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
104 uint_t cachefl);
105 static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
106 dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
107 off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
108 static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
109 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
110 struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
111 static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
112 dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
113
114 /* static Globals */
115
116 /*
117 * Used to setup DMA objects (memory regions)
118 * for DMA reads by IOMMU units
119 */
120 static ddi_dma_attr_t immu_dma_attr = {
121 DMA_ATTR_V0,
122 0U,
123 0xffffffffffffffffULL,
124 0xffffffffU,
125 MMU_PAGESIZE, /* MMU page aligned */
126 0x1,
127 0x1,
128 0xffffffffU,
129 0xffffffffffffffffULL,
130 1,
131 4,
132 0
133 };
134
135 static ddi_device_acc_attr_t immu_acc_attr = {
136 DDI_DEVICE_ATTR_V0,
137 DDI_NEVERSWAP_ACC,
138 DDI_STRICTORDER_ACC
139 };
140
141 struct iommulib_ops immulib_ops = {
142 IOMMU_OPS_VERSION,
143 INTEL_IOMMU,
144 "Intel IOMMU",
145 NULL,
146 immu_probe,
147 immu_allochdl,
148 immu_freehdl,
149 immu_bindhdl,
150 immu_unbindhdl,
151 immu_sync,
152 immu_win,
153 immu_mapobject,
154 immu_unmapobject,
155 };
156
157 /*
158 * Fake physical address range used to set up initial prealloc mappings.
159 * This memory is never actually accessed. It is mapped read-only,
160 * and is overwritten as soon as the first DMA bind operation is
161 * performed. Since 0 is a special case, just start at the 2nd
162 * physical page.
163 */
164
165 static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
166
167 /* globals private to this file */
168 static kmutex_t immu_domain_lock;
169 static list_t immu_unity_domain_list;
170 static list_t immu_xlate_domain_list;
171
172 /* structure used to store idx into each level of the page tables */
173 typedef struct xlate {
174 int xlt_level;
175 uint_t xlt_idx;
176 pgtable_t *xlt_pgtable;
177 } xlate_t;
178
179 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */
180 #define IMMU_UNITY_DID 1
181
182 static mod_hash_t *bdf_domain_hash;
183
184 int immu_use_alh;
185 int immu_use_tm;
186
187 static domain_t *
bdf_domain_lookup(immu_devi_t * immu_devi)188 bdf_domain_lookup(immu_devi_t *immu_devi)
189 {
190 domain_t *domain;
191 int16_t seg = immu_devi->imd_seg;
192 int16_t bus = immu_devi->imd_bus;
193 int16_t devfunc = immu_devi->imd_devfunc;
194 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
195
196 if (seg < 0 || bus < 0 || devfunc < 0) {
197 return (NULL);
198 }
199
200 domain = NULL;
201 if (mod_hash_find(bdf_domain_hash,
202 (void *)bdf, (void *)&domain) == 0) {
203 ASSERT(domain);
204 ASSERT(domain->dom_did > 0);
205 return (domain);
206 } else {
207 return (NULL);
208 }
209 }
210
211 static void
bdf_domain_insert(immu_devi_t * immu_devi,domain_t * domain)212 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
213 {
214 int16_t seg = immu_devi->imd_seg;
215 int16_t bus = immu_devi->imd_bus;
216 int16_t devfunc = immu_devi->imd_devfunc;
217 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
218
219 if (seg < 0 || bus < 0 || devfunc < 0) {
220 return;
221 }
222
223 (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
224 }
225
226 static int
match_lpc(dev_info_t * pdip,void * arg)227 match_lpc(dev_info_t *pdip, void *arg)
228 {
229 immu_devi_t *immu_devi;
230 dvma_arg_t *dvap = (dvma_arg_t *)arg;
231
232 if (list_is_empty(dvap->dva_list)) {
233 return (DDI_WALK_TERMINATE);
234 }
235
236 immu_devi = list_head(dvap->dva_list);
237 for (; immu_devi; immu_devi = list_next(dvap->dva_list,
238 immu_devi)) {
239 if (immu_devi->imd_dip == pdip) {
240 dvap->dva_ddip = pdip;
241 dvap->dva_error = DDI_SUCCESS;
242 return (DDI_WALK_TERMINATE);
243 }
244 }
245
246 return (DDI_WALK_CONTINUE);
247 }
248
249 static void
immu_devi_set_spclist(dev_info_t * dip,immu_t * immu)250 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
251 {
252 list_t *spclist = NULL;
253 immu_devi_t *immu_devi;
254
255 immu_devi = IMMU_DEVI(dip);
256 if (immu_devi->imd_display == B_TRUE) {
257 spclist = &(immu->immu_dvma_gfx_list);
258 } else if (immu_devi->imd_lpc == B_TRUE) {
259 spclist = &(immu->immu_dvma_lpc_list);
260 }
261
262 if (spclist) {
263 mutex_enter(&(immu->immu_lock));
264 list_insert_head(spclist, immu_devi);
265 mutex_exit(&(immu->immu_lock));
266 }
267 }
268
269 /*
270 * Set the immu_devi struct in the immu_devi field of a devinfo node
271 */
272 int
immu_devi_set(dev_info_t * dip,immu_flags_t immu_flags)273 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
274 {
275 int bus, dev, func;
276 immu_devi_t *new_imd;
277 immu_devi_t *immu_devi;
278
279 immu_devi = immu_devi_get(dip);
280 if (immu_devi != NULL) {
281 return (DDI_SUCCESS);
282 }
283
284 bus = dev = func = -1;
285
286 /*
287 * Assume a new immu_devi struct is needed
288 */
289 if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
290 /*
291 * No BDF. Set bus = -1 to indicate this.
292 * We still need to create a immu_devi struct
293 * though
294 */
295 bus = -1;
296 dev = 0;
297 func = 0;
298 }
299
300 new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
301 if (new_imd == NULL) {
302 ddi_err(DER_WARN, dip, "Failed to create immu_devi "
303 "structure");
304 return (DDI_FAILURE);
305 }
306
307 /*
308 * Check if some other thread allocated a immu_devi while we
309 * didn't own the lock.
310 */
311 mutex_enter(&(DEVI(dip)->devi_lock));
312 if (IMMU_DEVI(dip) == NULL) {
313 IMMU_DEVI_SET(dip, new_imd);
314 } else {
315 destroy_immu_devi(new_imd);
316 }
317 mutex_exit(&(DEVI(dip)->devi_lock));
318
319 return (DDI_SUCCESS);
320 }
321
322 static dev_info_t *
get_lpc_devinfo(immu_t * immu,dev_info_t * rdip,immu_flags_t immu_flags)323 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
324 {
325 dvma_arg_t dvarg = {0};
326 dvarg.dva_list = &(immu->immu_dvma_lpc_list);
327 dvarg.dva_rdip = rdip;
328 dvarg.dva_error = DDI_FAILURE;
329
330 if (immu_walk_ancestor(rdip, NULL, match_lpc,
331 &dvarg, NULL, immu_flags) != DDI_SUCCESS) {
332 ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
333 "find lpc_devinfo for ISA device");
334 return (NULL);
335 }
336
337 if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
338 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
339 "ISA device");
340 return (NULL);
341 }
342
343 return (dvarg.dva_ddip);
344 }
345
346 static dev_info_t *
get_gfx_devinfo(dev_info_t * rdip)347 get_gfx_devinfo(dev_info_t *rdip)
348 {
349 immu_t *immu;
350 immu_devi_t *immu_devi;
351 list_t *list_gfx;
352
353 /*
354 * The GFX device may not be on the same iommu unit as "agpgart"
355 * so search globally
356 */
357 immu_devi = NULL;
358 immu = list_head(&immu_list);
359 for (; immu; immu = list_next(&immu_list, immu)) {
360 list_gfx = &(immu->immu_dvma_gfx_list);
361 if (!list_is_empty(list_gfx)) {
362 immu_devi = list_head(list_gfx);
363 break;
364 }
365 }
366
367 if (immu_devi == NULL) {
368 ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
369 "Cannot redirect agpgart");
370 return (NULL);
371 }
372
373 ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
374 ddi_node_name(immu_devi->imd_dip));
375
376 return (immu_devi->imd_dip);
377 }
378
379 static immu_flags_t
dma_to_immu_flags(struct ddi_dma_req * dmareq)380 dma_to_immu_flags(struct ddi_dma_req *dmareq)
381 {
382 immu_flags_t flags = 0;
383
384 if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
385 flags |= IMMU_FLAGS_SLEEP;
386 } else {
387 flags |= IMMU_FLAGS_NOSLEEP;
388 }
389
390 #ifdef BUGGY_DRIVERS
391
392 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
393
394 #else
395 /*
396 * Read and write flags need to be reversed.
397 * DMA_READ means read from device and write
398 * to memory. So DMA read means DVMA write.
399 */
400 if (dmareq->dmar_flags & DDI_DMA_READ)
401 flags |= IMMU_FLAGS_WRITE;
402
403 if (dmareq->dmar_flags & DDI_DMA_WRITE)
404 flags |= IMMU_FLAGS_READ;
405
406 /*
407 * Some buggy drivers specify neither READ or WRITE
408 * For such drivers set both read and write permissions
409 */
410 if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
411 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
412 }
413 #endif
414
415 return (flags);
416 }
417
418 /*ARGSUSED*/
419 int
pgtable_ctor(void * buf,void * arg,int kmflag)420 pgtable_ctor(void *buf, void *arg, int kmflag)
421 {
422 size_t actual_size = 0;
423 pgtable_t *pgtable;
424 int (*dmafp)(caddr_t);
425 caddr_t vaddr;
426 void *next;
427 uint_t flags;
428 immu_t *immu = arg;
429
430 pgtable = (pgtable_t *)buf;
431
432 dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
433
434 next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
435 if (next == NULL) {
436 return (-1);
437 }
438
439 if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
440 dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
441 kmem_free(next, IMMU_PAGESIZE);
442 return (-1);
443 }
444
445 flags = DDI_DMA_CONSISTENT;
446 if (!immu->immu_dvma_coherent)
447 flags |= IOMEM_DATA_UC_WR_COMBINE;
448
449 if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
450 &immu_acc_attr, flags,
451 dmafp, NULL, &vaddr, &actual_size,
452 &pgtable->hwpg_memhdl) != DDI_SUCCESS) {
453 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
454 kmem_free(next, IMMU_PAGESIZE);
455 return (-1);
456 }
457
458 /*
459 * Memory allocation failure. Maybe a temporary condition
460 * so return error rather than panic, so we can try again
461 */
462 if (actual_size < IMMU_PAGESIZE) {
463 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
464 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
465 kmem_free(next, IMMU_PAGESIZE);
466 return (-1);
467 }
468
469 pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
470 pgtable->hwpg_vaddr = vaddr;
471 pgtable->swpg_next_array = next;
472
473 rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
474
475 return (0);
476 }
477
478 /*ARGSUSED*/
479 void
pgtable_dtor(void * buf,void * arg)480 pgtable_dtor(void *buf, void *arg)
481 {
482 pgtable_t *pgtable;
483
484 pgtable = (pgtable_t *)buf;
485
486 /* destroy will panic if lock is held. */
487 rw_destroy(&(pgtable->swpg_rwlock));
488
489 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
490 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
491 kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
492 }
493
494 /*
495 * pgtable_alloc()
496 * alloc a IOMMU pgtable structure.
497 * This same struct is used for root and context tables as well.
498 * This routine allocs the f/ollowing:
499 * - a pgtable_t struct
500 * - a HW page which holds PTEs/entries which is accesssed by HW
501 * so we set up DMA for this page
502 * - a SW page which is only for our bookeeping
503 * (for example to hold pointers to the next level pgtable).
504 * So a simple kmem_alloc suffices
505 */
506 static pgtable_t *
pgtable_alloc(immu_t * immu,immu_flags_t immu_flags)507 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
508 {
509 pgtable_t *pgtable;
510 int kmflags;
511
512 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
513
514 pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
515 if (pgtable == NULL) {
516 return (NULL);
517 }
518 return (pgtable);
519 }
520
521 static void
pgtable_zero(pgtable_t * pgtable)522 pgtable_zero(pgtable_t *pgtable)
523 {
524 bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
525 bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
526 }
527
528 static void
pgtable_free(immu_t * immu,pgtable_t * pgtable)529 pgtable_free(immu_t *immu, pgtable_t *pgtable)
530 {
531 kmem_cache_free(immu->immu_pgtable_cache, pgtable);
532 }
533
534 /*
535 * Function to identify a display device from the PCI class code
536 */
537 static boolean_t
device_is_display(uint_t classcode)538 device_is_display(uint_t classcode)
539 {
540 static uint_t disp_classes[] = {
541 0x000100,
542 0x030000,
543 0x030001
544 };
545 int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
546
547 for (i = 0; i < nclasses; i++) {
548 if (classcode == disp_classes[i])
549 return (B_TRUE);
550 }
551 return (B_FALSE);
552 }
553
554 /*
555 * Function that determines if device is PCIEX and/or PCIEX bridge
556 */
557 static boolean_t
device_is_pciex(uchar_t bus,uchar_t dev,uchar_t func,boolean_t * is_pcib)558 device_is_pciex(
559 uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
560 {
561 ushort_t cap;
562 ushort_t capsp;
563 ushort_t cap_count = PCI_CAP_MAX_PTR;
564 ushort_t status;
565 boolean_t is_pciex = B_FALSE;
566
567 *is_pcib = B_FALSE;
568
569 status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
570 if (!(status & PCI_STAT_CAP))
571 return (B_FALSE);
572
573 capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
574 while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
575 capsp &= PCI_CAP_PTR_MASK;
576 cap = pci_getb_func(bus, dev, func, capsp);
577
578 if (cap == PCI_CAP_ID_PCI_E) {
579 status = pci_getw_func(bus, dev, func, capsp + 2);
580 /*
581 * See section 7.8.2 of PCI-Express Base Spec v1.0a
582 * for Device/Port Type.
583 * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
584 * device is a PCIE2PCI bridge
585 */
586 *is_pcib =
587 ((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
588 PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
589 is_pciex = B_TRUE;
590 }
591
592 capsp = (*pci_getb_func)(bus, dev, func,
593 capsp + PCI_CAP_NEXT_PTR);
594 }
595
596 return (is_pciex);
597 }
598
599 static boolean_t
device_use_premap(uint_t classcode)600 device_use_premap(uint_t classcode)
601 {
602 if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
603 return (B_TRUE);
604 return (B_FALSE);
605 }
606
607
608 /*
609 * immu_dvma_get_immu()
610 * get the immu unit structure for a dev_info node
611 */
612 immu_t *
immu_dvma_get_immu(dev_info_t * dip,immu_flags_t immu_flags)613 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
614 {
615 immu_devi_t *immu_devi;
616 immu_t *immu;
617
618 /*
619 * check if immu unit was already found earlier.
620 * If yes, then it will be stashed in immu_devi struct.
621 */
622 immu_devi = immu_devi_get(dip);
623 if (immu_devi == NULL) {
624 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
625 /*
626 * May fail because of low memory. Return error rather
627 * than panic as we want driver to rey again later
628 */
629 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
630 "No immu_devi structure");
631 /*NOTREACHED*/
632 }
633 immu_devi = immu_devi_get(dip);
634 }
635
636 mutex_enter(&(DEVI(dip)->devi_lock));
637 if (immu_devi->imd_immu) {
638 immu = immu_devi->imd_immu;
639 mutex_exit(&(DEVI(dip)->devi_lock));
640 return (immu);
641 }
642 mutex_exit(&(DEVI(dip)->devi_lock));
643
644 immu = immu_dmar_get_immu(dip);
645 if (immu == NULL) {
646 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
647 "Cannot find immu_t for device");
648 /*NOTREACHED*/
649 }
650
651 /*
652 * Check if some other thread found immu
653 * while lock was not held
654 */
655 immu_devi = immu_devi_get(dip);
656 /* immu_devi should be present as we found it earlier */
657 if (immu_devi == NULL) {
658 ddi_err(DER_PANIC, dip,
659 "immu_dvma_get_immu: No immu_devi structure");
660 /*NOTREACHED*/
661 }
662
663 mutex_enter(&(DEVI(dip)->devi_lock));
664 if (immu_devi->imd_immu == NULL) {
665 /* nobody else set it, so we should do it */
666 immu_devi->imd_immu = immu;
667 immu_devi_set_spclist(dip, immu);
668 } else {
669 /*
670 * if some other thread got immu before
671 * us, it should get the same results
672 */
673 if (immu_devi->imd_immu != immu) {
674 ddi_err(DER_PANIC, dip, "Multiple "
675 "immu units found for device. Expected (%p), "
676 "actual (%p)", (void *)immu,
677 (void *)immu_devi->imd_immu);
678 mutex_exit(&(DEVI(dip)->devi_lock));
679 /*NOTREACHED*/
680 }
681 }
682 mutex_exit(&(DEVI(dip)->devi_lock));
683
684 return (immu);
685 }
686
687
688 /* ############################# IMMU_DEVI code ############################ */
689
690 /*
691 * Allocate a immu_devi structure and initialize it
692 */
693 static immu_devi_t *
create_immu_devi(dev_info_t * rdip,int bus,int dev,int func,immu_flags_t immu_flags)694 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
695 immu_flags_t immu_flags)
696 {
697 uchar_t baseclass, subclass;
698 uint_t classcode, revclass;
699 immu_devi_t *immu_devi;
700 boolean_t pciex = B_FALSE;
701 int kmflags;
702 boolean_t is_pcib = B_FALSE;
703
704 /* bus == -1 indicate non-PCI device (no BDF) */
705 ASSERT(bus == -1 || bus >= 0);
706 ASSERT(dev >= 0);
707 ASSERT(func >= 0);
708
709 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
710 immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
711 if (immu_devi == NULL) {
712 ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
713 "Intel IOMMU immu_devi structure");
714 return (NULL);
715 }
716 immu_devi->imd_dip = rdip;
717 immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
718 immu_devi->imd_bus = bus;
719 immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
720
721 if (bus == -1) {
722 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
723 return (immu_devi);
724 }
725
726 immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
727 immu_devi->imd_sec = 0;
728 immu_devi->imd_sub = 0;
729
730 revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
731
732 classcode = IMMU_PCI_REV2CLASS(revclass);
733 baseclass = IMMU_PCI_CLASS2BASE(classcode);
734 subclass = IMMU_PCI_CLASS2SUB(classcode);
735
736 if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
737
738 immu_devi->imd_sec = pci_getb_func(bus, dev, func,
739 PCI_BCNF_SECBUS);
740 immu_devi->imd_sub = pci_getb_func(bus, dev, func,
741 PCI_BCNF_SUBBUS);
742
743 pciex = device_is_pciex(bus, dev, func, &is_pcib);
744 if (pciex == B_TRUE && is_pcib == B_TRUE) {
745 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
746 } else if (pciex == B_TRUE) {
747 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
748 } else {
749 immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
750 }
751 } else {
752 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
753 }
754
755 /* check for certain special devices */
756 immu_devi->imd_display = device_is_display(classcode);
757 immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
758 (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
759 immu_devi->imd_use_premap = device_use_premap(classcode);
760
761 immu_devi->imd_domain = NULL;
762
763 immu_devi->imd_dvma_flags = immu_global_dvma_flags;
764
765 return (immu_devi);
766 }
767
768 static void
destroy_immu_devi(immu_devi_t * immu_devi)769 destroy_immu_devi(immu_devi_t *immu_devi)
770 {
771 kmem_free(immu_devi, sizeof (immu_devi_t));
772 }
773
774 static domain_t *
immu_devi_domain(dev_info_t * rdip,dev_info_t ** ddipp)775 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
776 {
777 immu_devi_t *immu_devi;
778 domain_t *domain;
779 dev_info_t *ddip;
780
781 *ddipp = NULL;
782
783 immu_devi = immu_devi_get(rdip);
784 if (immu_devi == NULL) {
785 return (NULL);
786 }
787
788 mutex_enter(&(DEVI(rdip)->devi_lock));
789 domain = immu_devi->imd_domain;
790 ddip = immu_devi->imd_ddip;
791 mutex_exit(&(DEVI(rdip)->devi_lock));
792
793 if (domain)
794 *ddipp = ddip;
795
796 return (domain);
797
798 }
799
800 /* ############################# END IMMU_DEVI code ######################## */
801 /* ############################# DOMAIN code ############################### */
802
803 /*
804 * This routine always succeeds
805 */
806 static int
did_alloc(immu_t * immu,dev_info_t * rdip,dev_info_t * ddip,immu_flags_t immu_flags)807 did_alloc(immu_t *immu, dev_info_t *rdip,
808 dev_info_t *ddip, immu_flags_t immu_flags)
809 {
810 int did;
811
812 did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
813 (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
814
815 if (did == 0) {
816 ddi_err(DER_WARN, rdip, "device domain-id alloc error"
817 " domain-device: %s%d. immu unit is %s. Using "
818 "unity domain with domain-id (%d)",
819 ddi_driver_name(ddip), ddi_get_instance(ddip),
820 immu->immu_name, immu->immu_unity_domain->dom_did);
821 did = immu->immu_unity_domain->dom_did;
822 }
823
824 return (did);
825 }
826
827 static int
get_branch_domain(dev_info_t * pdip,void * arg)828 get_branch_domain(dev_info_t *pdip, void *arg)
829 {
830 immu_devi_t *immu_devi;
831 domain_t *domain;
832 dev_info_t *ddip;
833 immu_t *immu;
834 dvma_arg_t *dvp = (dvma_arg_t *)arg;
835
836 /*
837 * The field dvp->dva_rdip is a work-in-progress
838 * and gets updated as we walk up the ancestor
839 * tree. The final ddip is set only when we reach
840 * the top of the tree. So the dvp->dva_ddip field cannot
841 * be relied on until we reach the top of the field.
842 */
843
844 /* immu_devi may not be set. */
845 immu_devi = immu_devi_get(pdip);
846 if (immu_devi == NULL) {
847 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
848 dvp->dva_error = DDI_FAILURE;
849 return (DDI_WALK_TERMINATE);
850 }
851 }
852
853 immu_devi = immu_devi_get(pdip);
854 immu = immu_devi->imd_immu;
855 if (immu == NULL)
856 immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
857
858 /*
859 * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
860 * terminate the walk (since the device under the PCIE bridge
861 * is a PCIE device and has an independent entry in the
862 * root/context table)
863 */
864 if (dvp->dva_rdip != pdip &&
865 immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
866 return (DDI_WALK_TERMINATE);
867 }
868
869 /*
870 * In order to be a domain-dim, it must be a PCI device i.e.
871 * must have valid BDF. This also eliminates the root complex.
872 */
873 if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
874 immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
875 ASSERT(immu_devi->imd_bus >= 0);
876 ASSERT(immu_devi->imd_devfunc >= 0);
877 dvp->dva_ddip = pdip;
878 }
879
880 if (immu_devi->imd_display == B_TRUE ||
881 (dvp->dva_flags & IMMU_FLAGS_UNITY)) {
882 dvp->dva_domain = immu->immu_unity_domain;
883 /* continue walking to find ddip */
884 return (DDI_WALK_CONTINUE);
885 }
886
887 mutex_enter(&(DEVI(pdip)->devi_lock));
888 domain = immu_devi->imd_domain;
889 ddip = immu_devi->imd_ddip;
890 mutex_exit(&(DEVI(pdip)->devi_lock));
891
892 if (domain && ddip) {
893 /* if domain is set, it must be the same */
894 if (dvp->dva_domain) {
895 ASSERT(domain == dvp->dva_domain);
896 }
897 dvp->dva_domain = domain;
898 dvp->dva_ddip = ddip;
899 return (DDI_WALK_TERMINATE);
900 }
901
902 /* Domain may already be set, continue walking so that ddip gets set */
903 if (dvp->dva_domain) {
904 return (DDI_WALK_CONTINUE);
905 }
906
907 /* domain is not set in either immu_devi or dvp */
908 domain = bdf_domain_lookup(immu_devi);
909 if (domain == NULL) {
910 return (DDI_WALK_CONTINUE);
911 }
912
913 /* ok, the BDF hash had a domain for this BDF. */
914
915 /* Grab lock again to check if something else set immu_devi fields */
916 mutex_enter(&(DEVI(pdip)->devi_lock));
917 if (immu_devi->imd_domain != NULL) {
918 dvp->dva_domain = domain;
919 } else {
920 dvp->dva_domain = domain;
921 }
922 mutex_exit(&(DEVI(pdip)->devi_lock));
923
924 /*
925 * walk upwards until the topmost PCI bridge is found
926 */
927 return (DDI_WALK_CONTINUE);
928
929 }
930
931 static void
map_unity_domain(domain_t * domain)932 map_unity_domain(domain_t *domain)
933 {
934 struct memlist *mp;
935 uint64_t start;
936 uint64_t npages;
937 immu_dcookie_t dcookies[1] = {0};
938 int dcount = 0;
939
940 /*
941 * UNITY arenas are a mirror of the physical memory
942 * installed on the system.
943 */
944
945 #ifdef BUGGY_DRIVERS
946 /*
947 * Dont skip page0. Some broken HW/FW access it.
948 */
949 dcookies[0].dck_paddr = 0;
950 dcookies[0].dck_npages = 1;
951 dcount = 1;
952 (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
953 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
954 #endif
955
956 memlist_read_lock();
957
958 mp = phys_install;
959
960 if (mp->ml_address == 0) {
961 /* since we already mapped page1 above */
962 start = IMMU_PAGESIZE;
963 } else {
964 start = mp->ml_address;
965 }
966 npages = mp->ml_size/IMMU_PAGESIZE + 1;
967
968 dcookies[0].dck_paddr = start;
969 dcookies[0].dck_npages = npages;
970 dcount = 1;
971 (void) dvma_map(domain, start, npages, dcookies,
972 dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
973
974 ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
975 " - 0x%" PRIx64 "]", start, start + mp->ml_size);
976
977 mp = mp->ml_next;
978 while (mp) {
979 ddi_err(DER_LOG, domain->dom_dip,
980 "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
981 mp->ml_address, mp->ml_address + mp->ml_size);
982
983 start = mp->ml_address;
984 npages = mp->ml_size/IMMU_PAGESIZE + 1;
985
986 dcookies[0].dck_paddr = start;
987 dcookies[0].dck_npages = npages;
988 dcount = 1;
989 (void) dvma_map(domain, start, npages,
990 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
991 mp = mp->ml_next;
992 }
993
994 mp = bios_rsvd;
995 while (mp) {
996 ddi_err(DER_LOG, domain->dom_dip,
997 "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
998 mp->ml_address, mp->ml_address + mp->ml_size);
999
1000 start = mp->ml_address;
1001 npages = mp->ml_size/IMMU_PAGESIZE + 1;
1002
1003 dcookies[0].dck_paddr = start;
1004 dcookies[0].dck_npages = npages;
1005 dcount = 1;
1006 (void) dvma_map(domain, start, npages,
1007 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
1008
1009 mp = mp->ml_next;
1010 }
1011
1012 memlist_read_unlock();
1013 }
1014
1015 /*
1016 * create_xlate_arena()
1017 * Create the dvma arena for a domain with translation
1018 * mapping
1019 */
1020 static void
create_xlate_arena(immu_t * immu,domain_t * domain,dev_info_t * rdip,immu_flags_t immu_flags)1021 create_xlate_arena(immu_t *immu, domain_t *domain,
1022 dev_info_t *rdip, immu_flags_t immu_flags)
1023 {
1024 char *arena_name;
1025 struct memlist *mp;
1026 int vmem_flags;
1027 uint64_t start;
1028 uint_t mgaw;
1029 uint64_t size;
1030 uint64_t maxaddr;
1031 void *vmem_ret;
1032
1033 arena_name = domain->dom_dvma_arena_name;
1034
1035 /* Note, don't do sizeof (arena_name) - it is just a pointer */
1036 (void) snprintf(arena_name,
1037 sizeof (domain->dom_dvma_arena_name),
1038 "%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
1039 domain->dom_did);
1040
1041 vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
1042
1043 /* Restrict mgaddr (max guest addr) to MGAW */
1044 mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
1045
1046 /*
1047 * To ensure we avoid ioapic and PCI MMIO ranges we just
1048 * use the physical memory address range of the system as the
1049 * range
1050 */
1051 maxaddr = ((uint64_t)1 << mgaw);
1052
1053 memlist_read_lock();
1054
1055 mp = phys_install;
1056
1057 if (mp->ml_address == 0)
1058 start = MMU_PAGESIZE;
1059 else
1060 start = mp->ml_address;
1061
1062 if (start + mp->ml_size > maxaddr)
1063 size = maxaddr - start;
1064 else
1065 size = mp->ml_size;
1066
1067 ddi_err(DER_VERB, rdip,
1068 "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
1069 " - 0x%" PRIx64 "]", arena_name, start, start + size);
1070
1071 /*
1072 * We always allocate in quanta of IMMU_PAGESIZE
1073 */
1074 domain->dom_dvma_arena = vmem_create(arena_name,
1075 (void *)(uintptr_t)start, /* start addr */
1076 size, /* size */
1077 IMMU_PAGESIZE, /* quantum */
1078 NULL, /* afunc */
1079 NULL, /* ffunc */
1080 NULL, /* source */
1081 0, /* qcache_max */
1082 vmem_flags);
1083
1084 if (domain->dom_dvma_arena == NULL) {
1085 ddi_err(DER_PANIC, rdip,
1086 "Failed to allocate DVMA arena(%s) "
1087 "for domain ID (%d)", arena_name, domain->dom_did);
1088 /*NOTREACHED*/
1089 }
1090
1091 mp = mp->ml_next;
1092 while (mp) {
1093
1094 if (mp->ml_address == 0)
1095 start = MMU_PAGESIZE;
1096 else
1097 start = mp->ml_address;
1098
1099 if (start + mp->ml_size > maxaddr)
1100 size = maxaddr - start;
1101 else
1102 size = mp->ml_size;
1103
1104 ddi_err(DER_VERB, rdip,
1105 "iommu: %s: Adding dvma vmem span [0x%" PRIx64
1106 " - 0x%" PRIx64 "]", arena_name, start,
1107 start + size);
1108
1109 vmem_ret = vmem_add(domain->dom_dvma_arena,
1110 (void *)(uintptr_t)start, size, vmem_flags);
1111
1112 if (vmem_ret == NULL) {
1113 ddi_err(DER_PANIC, rdip,
1114 "Failed to allocate DVMA arena(%s) "
1115 "for domain ID (%d)",
1116 arena_name, domain->dom_did);
1117 /*NOTREACHED*/
1118 }
1119 mp = mp->ml_next;
1120 }
1121 memlist_read_unlock();
1122 }
1123
1124 /* ################################### DOMAIN CODE ######################### */
1125
1126 /*
1127 * Set the domain and domain-dip for a dip
1128 */
1129 static void
set_domain(dev_info_t * dip,dev_info_t * ddip,domain_t * domain)1130 set_domain(
1131 dev_info_t *dip,
1132 dev_info_t *ddip,
1133 domain_t *domain)
1134 {
1135 immu_devi_t *immu_devi;
1136 domain_t *fdomain;
1137 dev_info_t *fddip;
1138
1139 immu_devi = immu_devi_get(dip);
1140
1141 mutex_enter(&(DEVI(dip)->devi_lock));
1142 fddip = immu_devi->imd_ddip;
1143 fdomain = immu_devi->imd_domain;
1144
1145 if (fddip) {
1146 ASSERT(fddip == ddip);
1147 } else {
1148 immu_devi->imd_ddip = ddip;
1149 }
1150
1151 if (fdomain) {
1152 ASSERT(fdomain == domain);
1153 } else {
1154 immu_devi->imd_domain = domain;
1155 }
1156 mutex_exit(&(DEVI(dip)->devi_lock));
1157 }
1158
1159 /*
1160 * device_domain()
1161 * Get domain for a device. The domain may be global in which case it
1162 * is shared between all IOMMU units. Due to potential AGAW differences
1163 * between IOMMU units, such global domains *have to be* UNITY mapping
1164 * domains. Alternatively, the domain may be local to a IOMMU unit.
1165 * Local domains may be shared or immu_devi, although the
1166 * scope of sharing
1167 * is restricted to devices controlled by the IOMMU unit to
1168 * which the domain
1169 * belongs. If shared, they (currently) have to be UNITY domains. If
1170 * immu_devi a domain may be either UNITY or translation (XLATE) domain.
1171 */
1172 static domain_t *
device_domain(dev_info_t * rdip,dev_info_t ** ddipp,immu_flags_t immu_flags)1173 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
1174 {
1175 dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
1176 immu_t *immu;
1177 domain_t *domain;
1178 dvma_arg_t dvarg = {0};
1179 int level;
1180
1181 *ddipp = NULL;
1182
1183 /*
1184 * Check if the domain is already set. This is usually true
1185 * if this is not the first DVMA transaction.
1186 */
1187 ddip = NULL;
1188 domain = immu_devi_domain(rdip, &ddip);
1189 if (domain) {
1190 *ddipp = ddip;
1191 return (domain);
1192 }
1193
1194 immu = immu_dvma_get_immu(rdip, immu_flags);
1195 if (immu == NULL) {
1196 /*
1197 * possible that there is no IOMMU unit for this device
1198 * - BIOS bugs are one example.
1199 */
1200 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
1201 return (NULL);
1202 }
1203
1204 immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
1205
1206 dvarg.dva_rdip = rdip;
1207 dvarg.dva_ddip = NULL;
1208 dvarg.dva_domain = NULL;
1209 dvarg.dva_flags = immu_flags;
1210 level = 0;
1211 if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
1212 &dvarg, &level, immu_flags) != DDI_SUCCESS) {
1213 /*
1214 * maybe low memory. return error,
1215 * so driver tries again later
1216 */
1217 return (NULL);
1218 }
1219
1220 /* should have walked at least 1 dip (i.e. edip) */
1221 ASSERT(level > 0);
1222
1223 ddip = dvarg.dva_ddip; /* must be present */
1224 domain = dvarg.dva_domain; /* may be NULL */
1225
1226 /*
1227 * We may find the domain during our ancestor walk on any one of our
1228 * ancestor dips, If the domain is found then the domain-dip
1229 * (i.e. ddip) will also be found in the same immu_devi struct.
1230 * The domain-dip is the highest ancestor dip which shares the
1231 * same domain with edip.
1232 * The domain may or may not be found, but the domain dip must
1233 * be found.
1234 */
1235 if (ddip == NULL) {
1236 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
1237 return (NULL);
1238 }
1239
1240 /*
1241 * Did we find a domain ?
1242 */
1243 if (domain) {
1244 goto found;
1245 }
1246
1247 /* nope, so allocate */
1248 domain = domain_create(immu, ddip, rdip, immu_flags);
1249 if (domain == NULL) {
1250 return (NULL);
1251 }
1252
1253 /*FALLTHROUGH*/
1254 found:
1255 /*
1256 * We know *domain *is* the right domain, so panic if
1257 * another domain is set for either the request-dip or
1258 * effective dip.
1259 */
1260 set_domain(ddip, ddip, domain);
1261 set_domain(rdip, ddip, domain);
1262
1263 *ddipp = ddip;
1264 return (domain);
1265 }
1266
1267 static void
create_unity_domain(immu_t * immu)1268 create_unity_domain(immu_t *immu)
1269 {
1270 domain_t *domain;
1271
1272 /* domain created during boot and always use sleep flag */
1273 domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
1274
1275 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1276
1277 domain->dom_did = IMMU_UNITY_DID;
1278 domain->dom_maptype = IMMU_MAPTYPE_UNITY;
1279
1280 domain->dom_immu = immu;
1281 immu->immu_unity_domain = domain;
1282
1283 /*
1284 * Setup the domain's initial page table
1285 * should never fail.
1286 */
1287 domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1288 pgtable_zero(domain->dom_pgtable_root);
1289
1290 /*
1291 * Only map all physical memory in to the unity domain
1292 * if passthrough is not supported. If it is supported,
1293 * passthrough is set in the context entry instead.
1294 */
1295 if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1296 map_unity_domain(domain);
1297
1298
1299 /*
1300 * put it on the system-wide UNITY domain list
1301 */
1302 mutex_enter(&(immu_domain_lock));
1303 list_insert_tail(&immu_unity_domain_list, domain);
1304 mutex_exit(&(immu_domain_lock));
1305 }
1306
1307 /*
1308 * ddip is the domain-dip - the topmost dip in a domain
1309 * rdip is the requesting-dip - the device which is
1310 * requesting DVMA setup
1311 * if domain is a non-shared domain rdip == ddip
1312 */
1313 static domain_t *
domain_create(immu_t * immu,dev_info_t * ddip,dev_info_t * rdip,immu_flags_t immu_flags)1314 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
1315 immu_flags_t immu_flags)
1316 {
1317 int kmflags;
1318 domain_t *domain;
1319 char mod_hash_name[128];
1320 immu_devi_t *immu_devi;
1321 int did;
1322 immu_dcookie_t dcookies[1] = {0};
1323 int dcount = 0;
1324
1325 immu_devi = immu_devi_get(rdip);
1326
1327 /*
1328 * First allocate a domainid.
1329 * This routine will never fail, since if we run out
1330 * of domains the unity domain will be allocated.
1331 */
1332 did = did_alloc(immu, rdip, ddip, immu_flags);
1333 if (did == IMMU_UNITY_DID) {
1334 /* domain overflow */
1335 ASSERT(immu->immu_unity_domain);
1336 return (immu->immu_unity_domain);
1337 }
1338
1339 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
1340 domain = kmem_zalloc(sizeof (domain_t), kmflags);
1341 if (domain == NULL) {
1342 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
1343 "structure for device. IOMMU unit: %s", immu->immu_name);
1344 /*NOTREACHED*/
1345 }
1346
1347 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1348
1349 (void) snprintf(mod_hash_name, sizeof (mod_hash_name),
1350 "immu%s-domain%d-pava-hash", immu->immu_name, did);
1351
1352 domain->dom_did = did;
1353 domain->dom_immu = immu;
1354 domain->dom_maptype = IMMU_MAPTYPE_XLATE;
1355 domain->dom_dip = ddip;
1356
1357 /*
1358 * Create xlate DVMA arena for this domain.
1359 */
1360 create_xlate_arena(immu, domain, rdip, immu_flags);
1361
1362 /*
1363 * Setup the domain's initial page table
1364 */
1365 domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
1366 if (domain->dom_pgtable_root == NULL) {
1367 ddi_err(DER_PANIC, rdip, "Failed to alloc root "
1368 "pgtable for domain (%d). IOMMU unit: %s",
1369 domain->dom_did, immu->immu_name);
1370 /*NOTREACHED*/
1371 }
1372 pgtable_zero(domain->dom_pgtable_root);
1373
1374 /*
1375 * Since this is a immu unit-specific domain, put it on
1376 * the per-immu domain list.
1377 */
1378 mutex_enter(&(immu->immu_lock));
1379 list_insert_head(&immu->immu_domain_list, domain);
1380 mutex_exit(&(immu->immu_lock));
1381
1382 /*
1383 * Also put it on the system-wide xlate domain list
1384 */
1385 mutex_enter(&(immu_domain_lock));
1386 list_insert_head(&immu_xlate_domain_list, domain);
1387 mutex_exit(&(immu_domain_lock));
1388
1389 bdf_domain_insert(immu_devi, domain);
1390
1391 #ifdef BUGGY_DRIVERS
1392 /*
1393 * Map page0. Some broken HW/FW access it.
1394 */
1395 dcookies[0].dck_paddr = 0;
1396 dcookies[0].dck_npages = 1;
1397 dcount = 1;
1398 (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
1399 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
1400 #endif
1401 return (domain);
1402 }
1403
1404 /*
1405 * Create domainid arena.
1406 * Domainid 0 is reserved by Vt-d spec and cannot be used by
1407 * system software.
1408 * Domainid 1 is reserved by solaris and used for *all* of the following:
1409 * as the "uninitialized" domain - For devices not yet controlled
1410 * by Solaris
1411 * as the "unity" domain - For devices that will always belong
1412 * to the unity domain
1413 * as the "overflow" domain - Used for any new device after we
1414 * run out of domains
1415 * All of the above domains map into a single domain with
1416 * domainid 1 and UNITY DVMA mapping
1417 * Each IMMU unity has its own unity/uninit/overflow domain
1418 */
1419 static void
did_init(immu_t * immu)1420 did_init(immu_t *immu)
1421 {
1422 (void) snprintf(immu->immu_did_arena_name,
1423 sizeof (immu->immu_did_arena_name),
1424 "%s_domainid_arena", immu->immu_name);
1425
1426 ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
1427 immu->immu_did_arena_name);
1428
1429 immu->immu_did_arena = vmem_create(
1430 immu->immu_did_arena_name,
1431 (void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */
1432 immu->immu_max_domains - IMMU_UNITY_DID,
1433 1, /* quantum */
1434 NULL, /* afunc */
1435 NULL, /* ffunc */
1436 NULL, /* source */
1437 0, /* qcache_max */
1438 VM_SLEEP);
1439
1440 /* Even with SLEEP flag, vmem_create() can fail */
1441 if (immu->immu_did_arena == NULL) {
1442 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
1443 "IOMMU domainid allocator: %s", immu->immu_name,
1444 immu->immu_did_arena_name);
1445 }
1446 }
1447
1448 /* ######################### CONTEXT CODE ################################# */
1449
1450 static void
context_set(immu_t * immu,domain_t * domain,pgtable_t * root_table,int bus,int devfunc)1451 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
1452 int bus, int devfunc)
1453 {
1454 pgtable_t *context;
1455 pgtable_t *pgtable_root;
1456 hw_rce_t *hw_rent;
1457 hw_rce_t *hw_cent;
1458 hw_rce_t *ctxp;
1459 int sid;
1460 krw_t rwtype;
1461 boolean_t fill_root;
1462 boolean_t fill_ctx;
1463
1464 pgtable_root = domain->dom_pgtable_root;
1465
1466 ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1467 context = *(pgtable_t **)(ctxp + bus);
1468 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
1469
1470 fill_root = B_FALSE;
1471 fill_ctx = B_FALSE;
1472
1473 /* Check the most common case first with reader lock */
1474 rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
1475 rwtype = RW_READER;
1476 again:
1477 if (ROOT_GET_P(hw_rent)) {
1478 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1479 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
1480 rw_exit(&(immu->immu_ctx_rwlock));
1481 return;
1482 } else {
1483 fill_ctx = B_TRUE;
1484 }
1485 } else {
1486 fill_root = B_TRUE;
1487 fill_ctx = B_TRUE;
1488 }
1489
1490 if (rwtype == RW_READER &&
1491 rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
1492 rw_exit(&(immu->immu_ctx_rwlock));
1493 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1494 rwtype = RW_WRITER;
1495 goto again;
1496 }
1497 rwtype = RW_WRITER;
1498
1499 if (fill_root == B_TRUE) {
1500 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1501 ROOT_SET_P(hw_rent);
1502 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
1503 }
1504
1505 if (fill_ctx == B_TRUE) {
1506 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1507 /* need to disable context entry before reprogramming it */
1508 bzero(hw_cent, sizeof (hw_rce_t));
1509
1510 /* flush caches */
1511 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1512
1513 sid = ((bus << 8) | devfunc);
1514 immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
1515 &immu->immu_ctx_inv_wait);
1516
1517 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
1518 CONT_SET_DID(hw_cent, domain->dom_did);
1519 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1520 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1521 if (domain->dom_did == IMMU_UNITY_DID &&
1522 IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1523 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1524 else
1525 /*LINTED*/
1526 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1527 CONT_SET_P(hw_cent);
1528 if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
1529 CONT_SET_EH(hw_cent);
1530 if (immu_use_alh)
1531 CONT_SET_ALH(hw_cent);
1532 }
1533 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1534 }
1535 rw_exit(&(immu->immu_ctx_rwlock));
1536 }
1537
1538 static pgtable_t *
context_create(immu_t * immu)1539 context_create(immu_t *immu)
1540 {
1541 int bus;
1542 int devfunc;
1543 pgtable_t *root_table;
1544 pgtable_t *context;
1545 pgtable_t *pgtable_root;
1546 hw_rce_t *ctxp;
1547 hw_rce_t *hw_rent;
1548 hw_rce_t *hw_cent;
1549
1550 /* Allocate a zeroed root table (4K 256b entries) */
1551 root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1552 pgtable_zero(root_table);
1553
1554 /*
1555 * Setup context tables for all possible root table entries.
1556 * Start out with unity domains for all entries.
1557 */
1558 ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1559 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
1560 for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
1561 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1562 pgtable_zero(context);
1563 ROOT_SET_P(hw_rent);
1564 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1565 hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
1566 for (devfunc = 0; devfunc < IMMU_CONT_NUM;
1567 devfunc++, hw_cent++) {
1568 pgtable_root =
1569 immu->immu_unity_domain->dom_pgtable_root;
1570 CONT_SET_DID(hw_cent,
1571 immu->immu_unity_domain->dom_did);
1572 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1573 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1574 if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1575 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1576 else
1577 /*LINTED*/
1578 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1579 CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
1580 CONT_SET_P(hw_cent);
1581 }
1582 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
1583 *((pgtable_t **)ctxp) = context;
1584 }
1585
1586 return (root_table);
1587 }
1588
1589 /*
1590 * Called during rootnex attach, so no locks needed
1591 */
1592 static void
context_init(immu_t * immu)1593 context_init(immu_t *immu)
1594 {
1595 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
1596
1597 immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
1598
1599 immu_regs_wbf_flush(immu);
1600
1601 immu->immu_ctx_root = context_create(immu);
1602
1603 immu_regs_set_root_table(immu);
1604
1605 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1606 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1607 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1608 rw_exit(&(immu->immu_ctx_rwlock));
1609 }
1610
1611
1612 /*
1613 * Find top pcib
1614 */
1615 static int
find_top_pcib(dev_info_t * dip,void * arg)1616 find_top_pcib(dev_info_t *dip, void *arg)
1617 {
1618 immu_devi_t *immu_devi;
1619 dev_info_t **pcibdipp = (dev_info_t **)arg;
1620
1621 immu_devi = immu_devi_get(dip);
1622
1623 if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
1624 *pcibdipp = dip;
1625 }
1626
1627 return (DDI_WALK_CONTINUE);
1628 }
1629
1630 static int
immu_context_update(immu_t * immu,domain_t * domain,dev_info_t * ddip,dev_info_t * rdip,immu_flags_t immu_flags)1631 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
1632 dev_info_t *rdip, immu_flags_t immu_flags)
1633 {
1634 immu_devi_t *r_immu_devi;
1635 immu_devi_t *d_immu_devi;
1636 int r_bus;
1637 int d_bus;
1638 int r_devfunc;
1639 int d_devfunc;
1640 immu_pcib_t d_pcib_type;
1641 dev_info_t *pcibdip;
1642
1643 if (ddip == NULL || rdip == NULL ||
1644 ddip == root_devinfo || rdip == root_devinfo) {
1645 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
1646 "request-dip are NULL or are root devinfo");
1647 return (DDI_FAILURE);
1648 }
1649
1650 /*
1651 * We need to set the context fields
1652 * based on what type of device rdip and ddip are.
1653 * To do that we need the immu_devi field.
1654 * Set the immu_devi field (if not already set)
1655 */
1656 if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
1657 ddi_err(DER_MODE, rdip,
1658 "immu_context_update: failed to set immu_devi for ddip");
1659 return (DDI_FAILURE);
1660 }
1661
1662 if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
1663 ddi_err(DER_MODE, rdip,
1664 "immu_context_update: failed to set immu_devi for rdip");
1665 return (DDI_FAILURE);
1666 }
1667
1668 d_immu_devi = immu_devi_get(ddip);
1669 r_immu_devi = immu_devi_get(rdip);
1670
1671 d_bus = d_immu_devi->imd_bus;
1672 d_devfunc = d_immu_devi->imd_devfunc;
1673 d_pcib_type = d_immu_devi->imd_pcib_type;
1674 r_bus = r_immu_devi->imd_bus;
1675 r_devfunc = r_immu_devi->imd_devfunc;
1676
1677 if (rdip == ddip) {
1678 /* rdip is a PCIE device. set context for it only */
1679 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1680 r_devfunc);
1681 #ifdef BUGGY_DRIVERS
1682 } else if (r_immu_devi == d_immu_devi) {
1683 #ifdef TEST
1684 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
1685 "0x%lx are identical", rdip, ddip);
1686 #endif
1687 /* rdip is a PCIE device. set context for it only */
1688 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1689 r_devfunc);
1690 #endif
1691 } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
1692 /*
1693 * ddip is a PCIE_PCI bridge. Set context for ddip's
1694 * secondary bus. If rdip is on ddip's secondary
1695 * bus, set context for rdip. Else, set context
1696 * for rdip's PCI bridge on ddip's secondary bus.
1697 */
1698 context_set(immu, domain, immu->immu_ctx_root,
1699 d_immu_devi->imd_sec, 0);
1700 if (d_immu_devi->imd_sec == r_bus) {
1701 context_set(immu, domain, immu->immu_ctx_root,
1702 r_bus, r_devfunc);
1703 } else {
1704 pcibdip = NULL;
1705 if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
1706 &pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
1707 pcibdip != NULL) {
1708 r_immu_devi = immu_devi_get(pcibdip);
1709 r_bus = r_immu_devi->imd_bus;
1710 r_devfunc = r_immu_devi->imd_devfunc;
1711 context_set(immu, domain, immu->immu_ctx_root,
1712 r_bus, r_devfunc);
1713 } else {
1714 ddi_err(DER_PANIC, rdip, "Failed to find PCI "
1715 " bridge for PCI device");
1716 /*NOTREACHED*/
1717 }
1718 }
1719 } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
1720 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1721 d_devfunc);
1722 } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
1723 /*
1724 * ddip is a PCIE device which has a non-PCI device under it
1725 * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
1726 */
1727 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1728 d_devfunc);
1729 } else {
1730 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
1731 "set iommu context.");
1732 /*NOTREACHED*/
1733 }
1734
1735 /* XXX do we need a membar_producer() here */
1736 return (DDI_SUCCESS);
1737 }
1738
1739 /* ##################### END CONTEXT CODE ################################## */
1740 /* ##################### MAPPING CODE ################################## */
1741
1742
1743 #ifdef DEBUG
1744 static boolean_t
PDTE_check(immu_t * immu,hw_pdte_t pdte,pgtable_t * next,paddr_t paddr,dev_info_t * rdip,immu_flags_t immu_flags)1745 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
1746 dev_info_t *rdip, immu_flags_t immu_flags)
1747 {
1748 /* The PDTE must be set i.e. present bit is set */
1749 if (!PDTE_P(pdte)) {
1750 ddi_err(DER_MODE, rdip, "No present flag");
1751 return (B_FALSE);
1752 }
1753
1754 /*
1755 * Just assert to check most significant system software field
1756 * (PDTE_SW4) as it is same as present bit and we
1757 * checked that above
1758 */
1759 ASSERT(PDTE_SW4(pdte));
1760
1761 /*
1762 * TM field should be clear if not reserved.
1763 * non-leaf is always reserved
1764 */
1765 if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
1766 if (PDTE_TM(pdte)) {
1767 ddi_err(DER_MODE, rdip, "TM flag set");
1768 return (B_FALSE);
1769 }
1770 }
1771
1772 /*
1773 * The SW3 field is not used and must be clear
1774 */
1775 if (PDTE_SW3(pdte)) {
1776 ddi_err(DER_MODE, rdip, "SW3 set");
1777 return (B_FALSE);
1778 }
1779
1780 /*
1781 * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
1782 */
1783 if (next == NULL) {
1784 ASSERT(paddr % IMMU_PAGESIZE == 0);
1785 if (PDTE_PADDR(pdte) != paddr) {
1786 ddi_err(DER_MODE, rdip,
1787 "PTE paddr mismatch: %lx != %lx",
1788 PDTE_PADDR(pdte), paddr);
1789 return (B_FALSE);
1790 }
1791 } else {
1792 if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
1793 ddi_err(DER_MODE, rdip,
1794 "PDE paddr mismatch: %lx != %lx",
1795 PDTE_PADDR(pdte), next->hwpg_paddr);
1796 return (B_FALSE);
1797 }
1798 }
1799
1800 /*
1801 * SNP field should be clear if not reserved.
1802 * non-leaf is always reserved
1803 */
1804 if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
1805 if (PDTE_SNP(pdte)) {
1806 ddi_err(DER_MODE, rdip, "SNP set");
1807 return (B_FALSE);
1808 }
1809 }
1810
1811 /* second field available for system software should be clear */
1812 if (PDTE_SW2(pdte)) {
1813 ddi_err(DER_MODE, rdip, "SW2 set");
1814 return (B_FALSE);
1815 }
1816
1817 /* Super pages field should be clear */
1818 if (PDTE_SP(pdte)) {
1819 ddi_err(DER_MODE, rdip, "SP set");
1820 return (B_FALSE);
1821 }
1822
1823 /*
1824 * least significant field available for
1825 * system software should be clear
1826 */
1827 if (PDTE_SW1(pdte)) {
1828 ddi_err(DER_MODE, rdip, "SW1 set");
1829 return (B_FALSE);
1830 }
1831
1832 if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
1833 ddi_err(DER_MODE, rdip, "READ not set");
1834 return (B_FALSE);
1835 }
1836
1837 if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
1838 ddi_err(DER_MODE, rdip, "WRITE not set");
1839 return (B_FALSE);
1840 }
1841
1842 return (B_TRUE);
1843 }
1844 #endif
1845
1846 /*ARGSUSED*/
1847 static void
PTE_clear_all(immu_t * immu,domain_t * domain,xlate_t * xlate,uint64_t * dvma_ptr,uint64_t * npages_ptr,dev_info_t * rdip)1848 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
1849 uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
1850 {
1851 uint64_t npages;
1852 uint64_t dvma;
1853 pgtable_t *pgtable;
1854 hw_pdte_t *hwp;
1855 hw_pdte_t *shwp;
1856 int idx;
1857
1858 pgtable = xlate->xlt_pgtable;
1859 idx = xlate->xlt_idx;
1860
1861 dvma = *dvma_ptr;
1862 npages = *npages_ptr;
1863
1864 /*
1865 * since a caller gets a unique dvma for a physical address,
1866 * no other concurrent thread will be writing to the same
1867 * PTE even if it has the same paddr. So no locks needed.
1868 */
1869 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
1870
1871 hwp = shwp;
1872 for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
1873 PDTE_CLEAR_P(*hwp);
1874 dvma += IMMU_PAGESIZE;
1875 npages--;
1876 }
1877
1878 *dvma_ptr = dvma;
1879 *npages_ptr = npages;
1880
1881 xlate->xlt_idx = idx;
1882 }
1883
1884 static void
xlate_setup(uint64_t dvma,xlate_t * xlate,int nlevels)1885 xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
1886 {
1887 int level;
1888 uint64_t offbits;
1889
1890 /*
1891 * Skip the first 12 bits which is the offset into
1892 * 4K PFN (phys page frame based on IMMU_PAGESIZE)
1893 */
1894 offbits = dvma >> IMMU_PAGESHIFT;
1895
1896 /* skip to level 1 i.e. leaf PTE */
1897 for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
1898 xlate->xlt_level = level;
1899 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
1900 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
1901 xlate->xlt_pgtable = NULL;
1902 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
1903 }
1904 }
1905
1906 /*
1907 * Read the pgtables
1908 */
1909 static boolean_t
PDE_lookup(domain_t * domain,xlate_t * xlate,int nlevels)1910 PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
1911 {
1912 pgtable_t *pgtable;
1913 pgtable_t *next;
1914 uint_t idx;
1915
1916 /* start with highest level pgtable i.e. root */
1917 xlate += nlevels;
1918
1919 if (xlate->xlt_pgtable == NULL) {
1920 xlate->xlt_pgtable = domain->dom_pgtable_root;
1921 }
1922
1923 for (; xlate->xlt_level > 1; xlate--) {
1924 idx = xlate->xlt_idx;
1925 pgtable = xlate->xlt_pgtable;
1926
1927 if ((xlate - 1)->xlt_pgtable) {
1928 continue;
1929 }
1930
1931 /* Lock the pgtable in read mode */
1932 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
1933
1934 /*
1935 * since we are unmapping, the pgtable should
1936 * already point to a leafier pgtable.
1937 */
1938 next = *(pgtable->swpg_next_array + idx);
1939 (xlate - 1)->xlt_pgtable = next;
1940 rw_exit(&(pgtable->swpg_rwlock));
1941 if (next == NULL)
1942 return (B_FALSE);
1943 }
1944
1945 return (B_TRUE);
1946 }
1947
1948 static void
immu_fault_walk(void * arg,void * base,size_t len)1949 immu_fault_walk(void *arg, void *base, size_t len)
1950 {
1951 uint64_t dvma, start;
1952
1953 dvma = *(uint64_t *)arg;
1954 start = (uint64_t)(uintptr_t)base;
1955
1956 if (dvma >= start && dvma < (start + len)) {
1957 ddi_err(DER_WARN, NULL,
1958 "faulting DVMA address is in vmem arena "
1959 "(%" PRIx64 "-%" PRIx64 ")",
1960 start, start + len);
1961 *(uint64_t *)arg = ~0ULL;
1962 }
1963 }
1964
1965 void
immu_print_fault_info(uint_t sid,uint64_t dvma)1966 immu_print_fault_info(uint_t sid, uint64_t dvma)
1967 {
1968 int nlevels;
1969 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
1970 xlate_t *xlatep;
1971 hw_pdte_t pte;
1972 domain_t *domain;
1973 immu_t *immu;
1974 uint64_t dvma_arg;
1975
1976 if (mod_hash_find(bdf_domain_hash,
1977 (void *)(uintptr_t)sid, (void *)&domain) != 0) {
1978 ddi_err(DER_WARN, NULL,
1979 "no domain for faulting SID %08x", sid);
1980 return;
1981 }
1982
1983 immu = domain->dom_immu;
1984
1985 dvma_arg = dvma;
1986 vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
1987 (void *)&dvma_arg);
1988 if (dvma_arg != ~0ULL)
1989 ddi_err(DER_WARN, domain->dom_dip,
1990 "faulting DVMA address is not in vmem arena");
1991
1992 nlevels = immu->immu_dvma_nlevels;
1993 xlate_setup(dvma, xlate, nlevels);
1994
1995 if (!PDE_lookup(domain, xlate, nlevels)) {
1996 ddi_err(DER_WARN, domain->dom_dip,
1997 "pte not found in domid %d for faulting addr %" PRIx64,
1998 domain->dom_did, dvma);
1999 return;
2000 }
2001
2002 xlatep = &xlate[1];
2003 pte = *((hw_pdte_t *)
2004 (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
2005
2006 ddi_err(DER_WARN, domain->dom_dip,
2007 "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
2008 (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
2009 }
2010
2011 /*ARGSUSED*/
2012 static void
PTE_set_one(immu_t * immu,hw_pdte_t * hwp,paddr_t paddr,dev_info_t * rdip,immu_flags_t immu_flags)2013 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
2014 dev_info_t *rdip, immu_flags_t immu_flags)
2015 {
2016 hw_pdte_t pte;
2017
2018 #ifndef DEBUG
2019 pte = immu->immu_ptemask;
2020 PDTE_SET_PADDR(pte, paddr);
2021 #else
2022 pte = *hwp;
2023
2024 if (PDTE_P(pte)) {
2025 if (PDTE_PADDR(pte) != paddr) {
2026 ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
2027 PDTE_PADDR(pte), paddr);
2028 }
2029 #ifdef BUGGY_DRIVERS
2030 return;
2031 #else
2032 goto out;
2033 #endif
2034 }
2035
2036 /* clear TM field if not reserved */
2037 if (immu->immu_TM_reserved == B_FALSE) {
2038 PDTE_CLEAR_TM(pte);
2039 }
2040
2041 /* Clear 3rd field for system software - not used */
2042 PDTE_CLEAR_SW3(pte);
2043
2044 /* Set paddr */
2045 ASSERT(paddr % IMMU_PAGESIZE == 0);
2046 PDTE_CLEAR_PADDR(pte);
2047 PDTE_SET_PADDR(pte, paddr);
2048
2049 /* clear SNP field if not reserved. */
2050 if (immu->immu_SNP_reserved == B_FALSE) {
2051 PDTE_CLEAR_SNP(pte);
2052 }
2053
2054 /* Clear SW2 field available for software */
2055 PDTE_CLEAR_SW2(pte);
2056
2057
2058 /* SP is don't care for PTEs. Clear it for cleanliness */
2059 PDTE_CLEAR_SP(pte);
2060
2061 /* Clear SW1 field available for software */
2062 PDTE_CLEAR_SW1(pte);
2063
2064 /*
2065 * Now that we are done writing the PTE
2066 * set the "present" flag. Note this present
2067 * flag is a bit in the PDE/PTE that the
2068 * spec says is available for system software.
2069 * This is an implementation detail of Solaris
2070 * bare-metal Intel IOMMU.
2071 * The present field in a PDE/PTE is not defined
2072 * by the Vt-d spec
2073 */
2074
2075 PDTE_SET_P(pte);
2076
2077 pte |= immu->immu_ptemask;
2078
2079 out:
2080 #endif /* DEBUG */
2081 #ifdef BUGGY_DRIVERS
2082 PDTE_SET_READ(pte);
2083 PDTE_SET_WRITE(pte);
2084 #else
2085 if (immu_flags & IMMU_FLAGS_READ)
2086 PDTE_SET_READ(pte);
2087 if (immu_flags & IMMU_FLAGS_WRITE)
2088 PDTE_SET_WRITE(pte);
2089 #endif /* BUGGY_DRIVERS */
2090
2091 *hwp = pte;
2092 }
2093
2094 /*ARGSUSED*/
2095 static void
PTE_set_all(immu_t * immu,domain_t * domain,xlate_t * xlate,uint64_t * dvma_ptr,uint64_t * nvpages_ptr,immu_dcookie_t * dcookies,int dcount,dev_info_t * rdip,immu_flags_t immu_flags)2096 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
2097 uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
2098 int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
2099 {
2100 paddr_t paddr;
2101 uint64_t nvpages;
2102 uint64_t nppages;
2103 uint64_t dvma;
2104 pgtable_t *pgtable;
2105 hw_pdte_t *hwp;
2106 hw_pdte_t *shwp;
2107 int idx, nset;
2108 int j;
2109
2110 pgtable = xlate->xlt_pgtable;
2111 idx = xlate->xlt_idx;
2112
2113 dvma = *dvma_ptr;
2114 nvpages = *nvpages_ptr;
2115
2116 /*
2117 * since a caller gets a unique dvma for a physical address,
2118 * no other concurrent thread will be writing to the same
2119 * PTE even if it has the same paddr. So no locks needed.
2120 */
2121 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2122
2123 hwp = shwp;
2124 for (j = dcount - 1; j >= 0; j--) {
2125 if (nvpages <= dcookies[j].dck_npages)
2126 break;
2127 nvpages -= dcookies[j].dck_npages;
2128 }
2129
2130 VERIFY(j >= 0);
2131 nppages = nvpages;
2132 paddr = dcookies[j].dck_paddr +
2133 (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
2134
2135 nvpages = *nvpages_ptr;
2136 nset = 0;
2137 for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
2138 PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
2139 nset++;
2140
2141 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
2142 == B_TRUE);
2143 nppages--;
2144 nvpages--;
2145 paddr += IMMU_PAGESIZE;
2146 dvma += IMMU_PAGESIZE;
2147
2148 if (nppages == 0) {
2149 j++;
2150 }
2151
2152 if (j == dcount)
2153 break;
2154
2155 if (nppages == 0) {
2156 nppages = dcookies[j].dck_npages;
2157 paddr = dcookies[j].dck_paddr;
2158 }
2159 }
2160
2161 if (nvpages) {
2162 *dvma_ptr = dvma;
2163 *nvpages_ptr = nvpages;
2164 } else {
2165 *dvma_ptr = 0;
2166 *nvpages_ptr = 0;
2167 }
2168
2169 xlate->xlt_idx = idx;
2170 }
2171
2172 /*ARGSUSED*/
2173 static void
PDE_set_one(immu_t * immu,hw_pdte_t * hwp,pgtable_t * next,dev_info_t * rdip,immu_flags_t immu_flags)2174 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
2175 dev_info_t *rdip, immu_flags_t immu_flags)
2176 {
2177 hw_pdte_t pde;
2178
2179 pde = *hwp;
2180
2181 /* if PDE is already set, make sure it is correct */
2182 if (PDTE_P(pde)) {
2183 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
2184 #ifdef BUGGY_DRIVERS
2185 return;
2186 #else
2187 goto out;
2188 #endif
2189 }
2190
2191 /* Dont touch SW4, it is the present bit */
2192
2193 /* don't touch TM field it is reserved for PDEs */
2194
2195 /* 3rd field available for system software is not used */
2196 PDTE_CLEAR_SW3(pde);
2197
2198 /* Set next level pgtable-paddr for PDE */
2199 PDTE_CLEAR_PADDR(pde);
2200 PDTE_SET_PADDR(pde, next->hwpg_paddr);
2201
2202 /* don't touch SNP field it is reserved for PDEs */
2203
2204 /* Clear second field available for system software */
2205 PDTE_CLEAR_SW2(pde);
2206
2207 /* No super pages for PDEs */
2208 PDTE_CLEAR_SP(pde);
2209
2210 /* Clear SW1 for software */
2211 PDTE_CLEAR_SW1(pde);
2212
2213 /*
2214 * Now that we are done writing the PDE
2215 * set the "present" flag. Note this present
2216 * flag is a bit in the PDE/PTE that the
2217 * spec says is available for system software.
2218 * This is an implementation detail of Solaris
2219 * base-metal Intel IOMMU.
2220 * The present field in a PDE/PTE is not defined
2221 * by the Vt-d spec
2222 */
2223
2224 out:
2225 #ifdef BUGGY_DRIVERS
2226 PDTE_SET_READ(pde);
2227 PDTE_SET_WRITE(pde);
2228 #else
2229 if (immu_flags & IMMU_FLAGS_READ)
2230 PDTE_SET_READ(pde);
2231 if (immu_flags & IMMU_FLAGS_WRITE)
2232 PDTE_SET_WRITE(pde);
2233 #endif
2234
2235 PDTE_SET_P(pde);
2236
2237 *hwp = pde;
2238 }
2239
2240 /*
2241 * Used to set PDEs
2242 */
2243 static boolean_t
PDE_set_all(immu_t * immu,domain_t * domain,xlate_t * xlate,int nlevels,dev_info_t * rdip,immu_flags_t immu_flags)2244 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
2245 dev_info_t *rdip, immu_flags_t immu_flags)
2246 {
2247 pgtable_t *pgtable;
2248 pgtable_t *new;
2249 pgtable_t *next;
2250 hw_pdte_t *hwp;
2251 int level;
2252 uint_t idx;
2253 krw_t rwtype;
2254 boolean_t set = B_FALSE;
2255
2256 /* start with highest level pgtable i.e. root */
2257 xlate += nlevels;
2258
2259 new = NULL;
2260 xlate->xlt_pgtable = domain->dom_pgtable_root;
2261 for (level = nlevels; level > 1; level--, xlate--) {
2262 idx = xlate->xlt_idx;
2263 pgtable = xlate->xlt_pgtable;
2264
2265 /* Lock the pgtable in READ mode first */
2266 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
2267 rwtype = RW_READER;
2268 again:
2269 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2270 next = (pgtable->swpg_next_array)[idx];
2271
2272 /*
2273 * check if leafier level already has a pgtable
2274 * if yes, verify
2275 */
2276 if (next == NULL) {
2277 if (new == NULL) {
2278
2279 IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
2280 rdip, int, level);
2281
2282 new = pgtable_alloc(immu, immu_flags);
2283 if (new == NULL) {
2284 ddi_err(DER_PANIC, rdip,
2285 "pgtable alloc err");
2286 }
2287 pgtable_zero(new);
2288 }
2289
2290 /* Change to a write lock */
2291 if (rwtype == RW_READER &&
2292 rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
2293 rw_exit(&(pgtable->swpg_rwlock));
2294 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
2295 rwtype = RW_WRITER;
2296 goto again;
2297 }
2298 rwtype = RW_WRITER;
2299 next = new;
2300 (pgtable->swpg_next_array)[idx] = next;
2301 new = NULL;
2302 PDE_set_one(immu, hwp, next, rdip, immu_flags);
2303 set = B_TRUE;
2304 rw_downgrade(&(pgtable->swpg_rwlock));
2305 rwtype = RW_READER;
2306 }
2307 #ifndef BUGGY_DRIVERS
2308 else {
2309 hw_pdte_t pde = *hwp;
2310
2311 /*
2312 * If buggy driver we already set permission
2313 * READ+WRITE so nothing to do for that case
2314 * XXX Check that read writer perms change before
2315 * actually setting perms. Also need to hold lock
2316 */
2317 if (immu_flags & IMMU_FLAGS_READ)
2318 PDTE_SET_READ(pde);
2319 if (immu_flags & IMMU_FLAGS_WRITE)
2320 PDTE_SET_WRITE(pde);
2321
2322 *hwp = pde;
2323 }
2324 #endif
2325
2326 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
2327 == B_TRUE);
2328
2329 (xlate - 1)->xlt_pgtable = next;
2330 rw_exit(&(pgtable->swpg_rwlock));
2331 }
2332
2333 if (new) {
2334 pgtable_free(immu, new);
2335 }
2336
2337 return (set);
2338 }
2339
2340 /*
2341 * dvma_map()
2342 * map a contiguous range of DVMA pages
2343 *
2344 * immu: IOMMU unit for which we are generating DVMA cookies
2345 * domain: domain
2346 * sdvma: Starting dvma
2347 * spaddr: Starting paddr
2348 * npages: Number of pages
2349 * rdip: requesting device
2350 * immu_flags: flags
2351 */
2352 static boolean_t
dvma_map(domain_t * domain,uint64_t sdvma,uint64_t snvpages,immu_dcookie_t * dcookies,int dcount,dev_info_t * rdip,immu_flags_t immu_flags)2353 dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
2354 immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
2355 immu_flags_t immu_flags)
2356 {
2357 uint64_t dvma;
2358 uint64_t n;
2359 immu_t *immu = domain->dom_immu;
2360 int nlevels = immu->immu_dvma_nlevels;
2361 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2362 boolean_t pde_set = B_FALSE;
2363
2364 n = snvpages;
2365 dvma = sdvma;
2366
2367 while (n > 0) {
2368 xlate_setup(dvma, xlate, nlevels);
2369
2370 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */
2371 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
2372 == B_TRUE) {
2373 pde_set = B_TRUE;
2374 }
2375
2376 /* set all matching ptes that fit into this leaf pgtable */
2377 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
2378 dcount, rdip, immu_flags);
2379 }
2380
2381 return (pde_set);
2382 }
2383
2384 /*
2385 * dvma_unmap()
2386 * unmap a range of DVMAs
2387 *
2388 * immu: IOMMU unit state
2389 * domain: domain for requesting device
2390 * ddip: domain-dip
2391 * dvma: starting DVMA
2392 * npages: Number of IMMU pages to be unmapped
2393 * rdip: requesting device
2394 */
2395 static void
dvma_unmap(domain_t * domain,uint64_t sdvma,uint64_t snpages,dev_info_t * rdip)2396 dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
2397 dev_info_t *rdip)
2398 {
2399 immu_t *immu = domain->dom_immu;
2400 int nlevels = immu->immu_dvma_nlevels;
2401 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2402 uint64_t n;
2403 uint64_t dvma;
2404
2405 dvma = sdvma;
2406 n = snpages;
2407
2408 while (n > 0) {
2409 /* setup the xlate array */
2410 xlate_setup(dvma, xlate, nlevels);
2411
2412 /* just lookup existing pgtables. Should never fail */
2413 if (!PDE_lookup(domain, xlate, nlevels))
2414 ddi_err(DER_PANIC, rdip,
2415 "PTE not found for addr %" PRIx64,
2416 (unsigned long long)dvma);
2417
2418 /* clear all matching ptes that fit into this leaf pgtable */
2419 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
2420 }
2421
2422 /* No need to flush IOTLB after unmap */
2423 }
2424
2425 static uint64_t
dvma_alloc(domain_t * domain,ddi_dma_attr_t * dma_attr,uint_t npages,int kmf)2426 dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
2427 {
2428 uint64_t dvma;
2429 size_t xsize, align;
2430 uint64_t minaddr, maxaddr;
2431
2432 /* parameters */
2433 xsize = npages * IMMU_PAGESIZE;
2434 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2435 minaddr = dma_attr->dma_attr_addr_lo;
2436 maxaddr = dma_attr->dma_attr_addr_hi + 1;
2437
2438 /* handle the rollover cases */
2439 if (maxaddr < dma_attr->dma_attr_addr_hi) {
2440 maxaddr = dma_attr->dma_attr_addr_hi;
2441 }
2442
2443 /*
2444 * allocate from vmem arena.
2445 */
2446 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2447 xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
2448 (void *)(uintptr_t)maxaddr, kmf);
2449
2450 return (dvma);
2451 }
2452
2453 static void
dvma_prealloc(dev_info_t * rdip,immu_hdl_priv_t * ihp,ddi_dma_attr_t * dma_attr)2454 dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
2455 {
2456 int nlevels;
2457 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
2458 uint64_t dvma, n;
2459 size_t xsize, align;
2460 uint64_t minaddr, maxaddr, dmamax;
2461 int on, npte, pindex;
2462 hw_pdte_t *shwp;
2463 immu_t *immu;
2464 domain_t *domain;
2465
2466 /* parameters */
2467 domain = IMMU_DEVI(rdip)->imd_domain;
2468 immu = domain->dom_immu;
2469 nlevels = immu->immu_dvma_nlevels;
2470 xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
2471 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2472 minaddr = dma_attr->dma_attr_addr_lo;
2473 if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
2474 dmamax = dma_attr->dma_attr_seg;
2475 else
2476 dmamax = dma_attr->dma_attr_addr_hi;
2477 maxaddr = dmamax + 1;
2478
2479 if (maxaddr < dmamax)
2480 maxaddr = dmamax;
2481
2482 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2483 xsize, align, 0, dma_attr->dma_attr_seg + 1,
2484 (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
2485
2486 ihp->ihp_predvma = dvma;
2487 ihp->ihp_npremapped = 0;
2488 if (dvma == 0)
2489 return;
2490
2491 n = IMMU_NPREPTES;
2492 pindex = 0;
2493
2494 /*
2495 * Set up a mapping at address 0, just so that all PDPs get allocated
2496 * now. Although this initial mapping should never be used,
2497 * explicitly set it to read-only, just to be safe.
2498 */
2499 while (n > 0) {
2500 xlate_setup(dvma, xlate, nlevels);
2501
2502 (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
2503 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2504
2505 xlp = &xlate[1];
2506 shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
2507 + xlp->xlt_idx;
2508 on = n;
2509
2510 PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
2511 1, rdip, IMMU_FLAGS_READ);
2512
2513 npte = on - n;
2514
2515 while (npte > 0) {
2516 ihp->ihp_preptes[pindex++] = shwp;
2517 #ifdef BUGGY_DRIVERS
2518 PDTE_CLEAR_WRITE(*shwp);
2519 #endif
2520 shwp++;
2521 npte--;
2522 }
2523 }
2524 }
2525
2526 static void
dvma_prefree(dev_info_t * rdip,immu_hdl_priv_t * ihp)2527 dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
2528 {
2529 domain_t *domain;
2530
2531 domain = IMMU_DEVI(rdip)->imd_domain;
2532
2533 if (ihp->ihp_predvma != 0) {
2534 dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
2535 vmem_free(domain->dom_dvma_arena,
2536 (void *)(uintptr_t)ihp->ihp_predvma,
2537 IMMU_NPREPTES * IMMU_PAGESIZE);
2538 }
2539 }
2540
2541 static void
dvma_free(domain_t * domain,uint64_t dvma,uint64_t npages)2542 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
2543 {
2544 uint64_t size = npages * IMMU_PAGESIZE;
2545
2546 if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
2547 return;
2548
2549 vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
2550 }
2551
2552 static int
immu_map_dvmaseg(dev_info_t * rdip,ddi_dma_handle_t handle,immu_hdl_priv_t * ihp,struct ddi_dma_req * dmareq,ddi_dma_obj_t * dma_out)2553 immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
2554 immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
2555 ddi_dma_obj_t *dma_out)
2556 {
2557 domain_t *domain;
2558 immu_t *immu;
2559 immu_flags_t immu_flags;
2560 ddi_dma_atyp_t buftype;
2561 ddi_dma_obj_t *dmar_object;
2562 ddi_dma_attr_t *attrp;
2563 uint64_t offset, paddr, dvma, sdvma, rwmask;
2564 size_t npages, npgalloc;
2565 uint_t psize, size, pcnt, dmax;
2566 page_t **pparray;
2567 caddr_t vaddr;
2568 page_t *page;
2569 struct as *vas;
2570 immu_dcookie_t *dcookies;
2571 int pde_set;
2572
2573 rwmask = 0;
2574 page = NULL;
2575 domain = IMMU_DEVI(rdip)->imd_domain;
2576 immu = domain->dom_immu;
2577 immu_flags = dma_to_immu_flags(dmareq);
2578
2579 attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
2580
2581 dmar_object = &dmareq->dmar_object;
2582 pparray = dmar_object->dmao_obj.virt_obj.v_priv;
2583 vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2584 buftype = dmar_object->dmao_type;
2585 size = dmar_object->dmao_size;
2586
2587 IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
2588 buftype, uint_t, size);
2589
2590 dcookies = &ihp->ihp_dcookies[0];
2591
2592 pcnt = dmax = 0;
2593
2594 /* retrieve paddr, psize, offset from dmareq */
2595 if (buftype == DMA_OTYP_PAGES) {
2596 page = dmar_object->dmao_obj.pp_obj.pp_pp;
2597 offset = dmar_object->dmao_obj.pp_obj.pp_offset &
2598 MMU_PAGEOFFSET;
2599 paddr = pfn_to_pa(page->p_pagenum) + offset;
2600 psize = MIN((MMU_PAGESIZE - offset), size);
2601 page = page->p_next;
2602 vas = dmar_object->dmao_obj.virt_obj.v_as;
2603 } else {
2604 if (vas == NULL) {
2605 vas = &kas;
2606 }
2607 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2608 if (pparray != NULL) {
2609 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
2610 psize = MIN((MMU_PAGESIZE - offset), size);
2611 pcnt++;
2612 } else {
2613 paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
2614 vaddr)) + offset;
2615 psize = MIN(size, (MMU_PAGESIZE - offset));
2616 vaddr += psize;
2617 }
2618 }
2619
2620 npgalloc = IMMU_BTOPR(size + offset);
2621
2622 if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
2623 #ifdef BUGGY_DRIVERS
2624 rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
2625 #else
2626 rwmask = immu->immu_ptemask;
2627 if (immu_flags & IMMU_FLAGS_READ)
2628 rwmask |= PDTE_MASK_R;
2629 if (immu_flags & IMMU_FLAGS_WRITE)
2630 rwmask |= PDTE_MASK_W;
2631 #endif
2632 #ifdef DEBUG
2633 rwmask |= PDTE_MASK_P;
2634 #endif
2635 sdvma = ihp->ihp_predvma;
2636 ihp->ihp_npremapped = npgalloc;
2637 *ihp->ihp_preptes[0] =
2638 PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
2639 } else {
2640 ihp->ihp_npremapped = 0;
2641 sdvma = dvma_alloc(domain, attrp, npgalloc,
2642 dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
2643 if (sdvma == 0)
2644 return (DDI_DMA_NORESOURCES);
2645
2646 dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
2647 dcookies[0].dck_npages = 1;
2648 }
2649
2650 IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
2651 uint64_t, sdvma);
2652
2653 dvma = sdvma;
2654 pde_set = 0;
2655 npages = 1;
2656 size -= psize;
2657 while (size > 0) {
2658 /* get the size for this page (i.e. partial or full page) */
2659 psize = MIN(size, MMU_PAGESIZE);
2660 if (buftype == DMA_OTYP_PAGES) {
2661 /* get the paddr from the page_t */
2662 paddr = pfn_to_pa(page->p_pagenum);
2663 page = page->p_next;
2664 } else if (pparray != NULL) {
2665 /* index into the array of page_t's to get the paddr */
2666 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
2667 pcnt++;
2668 } else {
2669 /* call into the VM to get the paddr */
2670 paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
2671 vaddr += psize;
2672 }
2673
2674 if (ihp->ihp_npremapped > 0) {
2675 *ihp->ihp_preptes[npages] =
2676 PDTE_PADDR(paddr) | rwmask;
2677 } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
2678 dcookies[dmax].dck_npages++;
2679 } else {
2680 /* No, we need a new dcookie */
2681 if (dmax == (IMMU_NDCK - 1)) {
2682 /*
2683 * Ran out of dcookies. Map them now.
2684 */
2685 if (dvma_map(domain, dvma,
2686 npages, dcookies, dmax + 1, rdip,
2687 immu_flags))
2688 pde_set++;
2689
2690 IMMU_DPROBE4(immu__dvmamap__early,
2691 dev_info_t *, rdip, uint64_t, dvma,
2692 uint_t, npages, uint_t, dmax+1);
2693
2694 dvma += (npages << IMMU_PAGESHIFT);
2695 npages = 0;
2696 dmax = 0;
2697 } else {
2698 dmax++;
2699 }
2700 dcookies[dmax].dck_paddr = paddr;
2701 dcookies[dmax].dck_npages = 1;
2702 }
2703 size -= psize;
2704 if (npages != 0)
2705 npages++;
2706 }
2707
2708 /*
2709 * Finish up, mapping all, or all of the remaining,
2710 * physical memory ranges.
2711 */
2712 if (ihp->ihp_npremapped == 0 && npages > 0) {
2713 IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
2714 uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
2715
2716 if (dvma_map(domain, dvma, npages, dcookies,
2717 dmax + 1, rdip, immu_flags))
2718 pde_set++;
2719 }
2720
2721 /* Invalidate the IOTLB */
2722 immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
2723 pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
2724 &ihp->ihp_inv_wait);
2725
2726 ihp->ihp_ndvseg = 1;
2727 ihp->ihp_dvseg[0].dvs_start = sdvma;
2728 ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
2729
2730 dma_out->dmao_size = dmar_object->dmao_size;
2731 dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
2732 dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
2733 dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
2734 dma_out->dmao_type = DMA_OTYP_DVADDR;
2735
2736 return (DDI_DMA_MAPPED);
2737 }
2738
2739 static int
immu_unmap_dvmaseg(dev_info_t * rdip,ddi_dma_obj_t * dmao)2740 immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
2741 {
2742 uint64_t dvma, npages;
2743 domain_t *domain;
2744 struct dvmaseg *dvs;
2745
2746 domain = IMMU_DEVI(rdip)->imd_domain;
2747 dvs = dmao->dmao_obj.dvma_obj.dv_seg;
2748
2749 dvma = dvs[0].dvs_start;
2750 npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
2751
2752 #ifdef DEBUG
2753 /* Unmap only in DEBUG mode */
2754 dvma_unmap(domain, dvma, npages, rdip);
2755 #endif
2756 dvma_free(domain, dvma, npages);
2757
2758 IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
2759 uint64_t, dvma);
2760
2761 #ifdef DEBUG
2762 /*
2763 * In the DEBUG case, the unmap was actually done,
2764 * but an IOTLB flush was not done. So, an explicit
2765 * write back flush is needed.
2766 */
2767 immu_regs_wbf_flush(domain->dom_immu);
2768 #endif
2769
2770 return (DDI_SUCCESS);
2771 }
2772
2773 /* ############################# Functions exported ######################## */
2774
2775 /*
2776 * setup the DVMA subsystem
2777 * this code runs only for the first IOMMU unit
2778 */
2779 void
immu_dvma_setup(list_t * listp)2780 immu_dvma_setup(list_t *listp)
2781 {
2782 immu_t *immu;
2783 uint_t kval;
2784 size_t nchains;
2785
2786 /* locks */
2787 mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
2788
2789 /* Create lists */
2790 list_create(&immu_unity_domain_list, sizeof (domain_t),
2791 offsetof(domain_t, dom_maptype_node));
2792 list_create(&immu_xlate_domain_list, sizeof (domain_t),
2793 offsetof(domain_t, dom_maptype_node));
2794
2795 /* Setup BDF domain hash */
2796 nchains = 0xff;
2797 kval = mod_hash_iddata_gen(nchains);
2798
2799 bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
2800 nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
2801 mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
2802 KM_NOSLEEP);
2803
2804 immu = list_head(listp);
2805 for (; immu; immu = list_next(listp, immu)) {
2806 create_unity_domain(immu);
2807 did_init(immu);
2808 context_init(immu);
2809 immu->immu_dvma_setup = B_TRUE;
2810 }
2811 }
2812
2813 /*
2814 * Startup up one DVMA unit
2815 */
2816 void
immu_dvma_startup(immu_t * immu)2817 immu_dvma_startup(immu_t *immu)
2818 {
2819 if (immu_gfxdvma_enable == B_FALSE &&
2820 immu->immu_dvma_gfx_only == B_TRUE) {
2821 return;
2822 }
2823
2824 /*
2825 * DVMA will start once IOMMU is "running"
2826 */
2827 immu->immu_dvma_running = B_TRUE;
2828 }
2829
2830 /*
2831 * immu_dvma_physmem_update()
2832 * called when the installed memory on a
2833 * system increases, to expand domain DVMA
2834 * for domains with UNITY mapping
2835 */
2836 void
immu_dvma_physmem_update(uint64_t addr,uint64_t size)2837 immu_dvma_physmem_update(uint64_t addr, uint64_t size)
2838 {
2839 uint64_t start;
2840 uint64_t npages;
2841 int dcount;
2842 immu_dcookie_t dcookies[1] = {0};
2843 domain_t *domain;
2844
2845 /*
2846 * Just walk the system-wide list of domains with
2847 * UNITY mapping. Both the list of *all* domains
2848 * and *UNITY* domains is protected by the same
2849 * single lock
2850 */
2851 mutex_enter(&immu_domain_lock);
2852 domain = list_head(&immu_unity_domain_list);
2853 for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
2854 /*
2855 * Nothing to do if the IOMMU supports passthrough.
2856 */
2857 if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
2858 continue;
2859
2860 /* There is no vmem_arena for unity domains. Just map it */
2861 ddi_err(DER_LOG, domain->dom_dip,
2862 "iommu: unity-domain: Adding map "
2863 "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
2864
2865 start = IMMU_ROUNDOWN(addr);
2866 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
2867
2868 dcookies[0].dck_paddr = start;
2869 dcookies[0].dck_npages = npages;
2870 dcount = 1;
2871 (void) dvma_map(domain, start, npages,
2872 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2873
2874 }
2875 mutex_exit(&immu_domain_lock);
2876 }
2877
2878 int
immu_dvma_device_setup(dev_info_t * rdip,immu_flags_t immu_flags)2879 immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
2880 {
2881 dev_info_t *ddip, *odip;
2882 immu_t *immu;
2883 domain_t *domain;
2884
2885 odip = rdip;
2886
2887 immu = immu_dvma_get_immu(rdip, immu_flags);
2888 if (immu == NULL) {
2889 /*
2890 * possible that there is no IOMMU unit for this device
2891 * - BIOS bugs are one example.
2892 */
2893 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
2894 return (DDI_DMA_NORESOURCES);
2895 }
2896
2897 /*
2898 * redirect isa devices attached under lpc to lpc dip
2899 */
2900 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
2901 rdip = get_lpc_devinfo(immu, rdip, immu_flags);
2902 if (rdip == NULL) {
2903 ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2904 /*NOTREACHED*/
2905 }
2906 }
2907
2908 /* Reset immu, as redirection can change IMMU */
2909 immu = NULL;
2910
2911 /*
2912 * for gart, redirect to the real graphic devinfo
2913 */
2914 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
2915 rdip = get_gfx_devinfo(rdip);
2916 if (rdip == NULL) {
2917 ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2918 /*NOTREACHED*/
2919 }
2920 }
2921
2922 /*
2923 * Setup DVMA domain for the device. This does
2924 * work only the first time we do DVMA for a
2925 * device.
2926 */
2927 ddip = NULL;
2928 domain = device_domain(rdip, &ddip, immu_flags);
2929 if (domain == NULL) {
2930 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
2931 return (DDI_DMA_NORESOURCES);
2932 }
2933
2934 immu = domain->dom_immu;
2935
2936 /*
2937 * If a domain is found, we must also have a domain dip
2938 * which is the topmost ancestor dip of rdip that shares
2939 * the same domain with rdip.
2940 */
2941 if (domain->dom_did == 0 || ddip == NULL) {
2942 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
2943 domain->dom_did, ddip);
2944 return (DDI_DMA_NORESOURCES);
2945 }
2946
2947 if (odip != rdip)
2948 set_domain(odip, ddip, domain);
2949
2950 /*
2951 * Update the root and context entries
2952 */
2953 if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
2954 != DDI_SUCCESS) {
2955 ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
2956 return (DDI_DMA_NORESOURCES);
2957 }
2958
2959 return (DDI_SUCCESS);
2960 }
2961
2962 int
immu_map_memrange(dev_info_t * rdip,memrng_t * mrng)2963 immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
2964 {
2965 immu_dcookie_t dcookies[1] = {0};
2966 boolean_t pde_set;
2967 immu_t *immu;
2968 domain_t *domain;
2969 immu_inv_wait_t iw;
2970
2971 dcookies[0].dck_paddr = mrng->mrng_start;
2972 dcookies[0].dck_npages = mrng->mrng_npages;
2973
2974 domain = IMMU_DEVI(rdip)->imd_domain;
2975 immu = domain->dom_immu;
2976
2977 pde_set = dvma_map(domain, mrng->mrng_start,
2978 mrng->mrng_npages, dcookies, 1, rdip,
2979 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2980
2981 immu_init_inv_wait(&iw, "memrange", B_TRUE);
2982
2983 immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
2984 mrng->mrng_npages, pde_set == B_TRUE ?
2985 TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
2986
2987 return (DDI_SUCCESS);
2988 }
2989
2990 immu_devi_t *
immu_devi_get(dev_info_t * rdip)2991 immu_devi_get(dev_info_t *rdip)
2992 {
2993 immu_devi_t *immu_devi;
2994 volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
2995
2996 /* Just want atomic reads. No need for lock */
2997 immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
2998 0);
2999 return (immu_devi);
3000 }
3001
3002 /*ARGSUSED*/
3003 int
immu_hdl_priv_ctor(void * buf,void * arg,int kmf)3004 immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
3005 {
3006 immu_hdl_priv_t *ihp;
3007
3008 ihp = buf;
3009 immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
3010
3011 return (0);
3012 }
3013
3014 /*
3015 * iommulib interface functions
3016 */
3017 static int
immu_probe(iommulib_handle_t handle,dev_info_t * dip)3018 immu_probe(iommulib_handle_t handle, dev_info_t *dip)
3019 {
3020 immu_devi_t *immu_devi;
3021 int ret;
3022
3023 if (!immu_enable)
3024 return (DDI_FAILURE);
3025
3026 /*
3027 * Make sure the device has all the IOMMU structures
3028 * initialized. If this device goes through an IOMMU
3029 * unit (e.g. this probe function returns success),
3030 * this will be called at most N times, with N being
3031 * the number of IOMMUs in the system.
3032 *
3033 * After that, when iommulib_nex_open succeeds,
3034 * we can always assume that this device has all
3035 * the structures initialized. IOMMU_USED(dip) will
3036 * be true. There is no need to find the controlling
3037 * IOMMU/domain again.
3038 */
3039 ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
3040 if (ret != DDI_SUCCESS)
3041 return (ret);
3042
3043 immu_devi = IMMU_DEVI(dip);
3044
3045 /*
3046 * For unity domains, there is no need to call in to
3047 * the IOMMU code.
3048 */
3049 if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
3050 return (DDI_FAILURE);
3051
3052 if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
3053 return (DDI_SUCCESS);
3054
3055 return (DDI_FAILURE);
3056 }
3057
3058 /*ARGSUSED*/
3059 static int
immu_allochdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_attr_t * attr,int (* waitfp)(caddr_t),caddr_t arg,ddi_dma_handle_t * dma_handlep)3060 immu_allochdl(iommulib_handle_t handle,
3061 dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
3062 int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
3063 {
3064 int ret;
3065 immu_hdl_priv_t *ihp;
3066 immu_t *immu;
3067
3068 ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
3069 arg, dma_handlep);
3070 if (ret == DDI_SUCCESS) {
3071 immu = IMMU_DEVI(rdip)->imd_immu;
3072
3073 ihp = kmem_cache_alloc(immu->immu_hdl_cache,
3074 waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
3075 if (ihp == NULL) {
3076 (void) iommulib_iommu_dma_freehdl(dip, rdip,
3077 *dma_handlep);
3078 return (DDI_DMA_NORESOURCES);
3079 }
3080
3081 if (IMMU_DEVI(rdip)->imd_use_premap)
3082 dvma_prealloc(rdip, ihp, attr);
3083 else {
3084 ihp->ihp_npremapped = 0;
3085 ihp->ihp_predvma = 0;
3086 }
3087 ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
3088 ihp);
3089 }
3090 return (ret);
3091 }
3092
3093 /*ARGSUSED*/
3094 static int
immu_freehdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle)3095 immu_freehdl(iommulib_handle_t handle,
3096 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3097 {
3098 immu_hdl_priv_t *ihp;
3099
3100 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3101 if (ihp != NULL) {
3102 if (IMMU_DEVI(rdip)->imd_use_premap)
3103 dvma_prefree(rdip, ihp);
3104 kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
3105 }
3106
3107 return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
3108 }
3109
3110
3111 /*ARGSUSED*/
3112 static int
immu_bindhdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,struct ddi_dma_req * dma_req,ddi_dma_cookie_t * cookiep,uint_t * ccountp)3113 immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
3114 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3115 struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
3116 uint_t *ccountp)
3117 {
3118 int ret;
3119 immu_hdl_priv_t *ihp;
3120
3121 ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
3122 dma_req, cookiep, ccountp);
3123
3124 if (ret == DDI_DMA_MAPPED) {
3125 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3126 immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
3127 }
3128
3129 return (ret);
3130 }
3131
3132 /*ARGSUSED*/
3133 static int
immu_unbindhdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle)3134 immu_unbindhdl(iommulib_handle_t handle,
3135 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3136 {
3137 return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
3138 }
3139
3140 /*ARGSUSED*/
3141 static int
immu_sync(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,off_t off,size_t len,uint_t cachefl)3142 immu_sync(iommulib_handle_t handle, dev_info_t *dip,
3143 dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
3144 size_t len, uint_t cachefl)
3145 {
3146 return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
3147 cachefl));
3148 }
3149
3150 /*ARGSUSED*/
3151 static int
immu_win(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,uint_t win,off_t * offp,size_t * lenp,ddi_dma_cookie_t * cookiep,uint_t * ccountp)3152 immu_win(iommulib_handle_t handle, dev_info_t *dip,
3153 dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
3154 off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
3155 uint_t *ccountp)
3156 {
3157 return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
3158 lenp, cookiep, ccountp));
3159 }
3160
3161 /*ARGSUSED*/
3162 static int
immu_mapobject(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,struct ddi_dma_req * dmareq,ddi_dma_obj_t * dmao)3163 immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
3164 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3165 struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
3166 {
3167 immu_hdl_priv_t *ihp;
3168
3169 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3170
3171 return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
3172 }
3173
3174 /*ARGSUSED*/
3175 static int
immu_unmapobject(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,ddi_dma_obj_t * dmao)3176 immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
3177 dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
3178 {
3179 immu_hdl_priv_t *ihp;
3180
3181 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3182 if (ihp->ihp_npremapped > 0)
3183 return (DDI_SUCCESS);
3184 return (immu_unmap_dvmaseg(rdip, dmao));
3185 }
3186