1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Portions Copyright (c) 2010, Oracle and/or its affiliates. 23 * All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * DVMA code 32 * This file contains Intel IOMMU code that deals with DVMA 33 * i.e. DMA remapping. 34 */ 35 36 #include <sys/sysmacros.h> 37 #include <sys/pcie.h> 38 #include <sys/pci_cfgspace.h> 39 #include <vm/hat_i86.h> 40 #include <sys/memlist.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/modhash.h> 44 #include <sys/immu.h> 45 46 #undef TEST 47 48 /* 49 * Macros based on PCI spec 50 */ 51 #define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */ 52 #define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */ 53 #define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */ 54 55 #define IMMU_CONTIG_PADDR(d, p) \ 56 ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p)) 57 58 typedef struct dvma_arg { 59 immu_t *dva_immu; 60 dev_info_t *dva_rdip; 61 dev_info_t *dva_ddip; 62 domain_t *dva_domain; 63 int dva_level; 64 immu_flags_t dva_flags; 65 list_t *dva_list; 66 int dva_error; 67 } dvma_arg_t; 68 69 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip, 70 dev_info_t *rdip, immu_flags_t immu_flags); 71 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus, 72 int dev, int func, immu_flags_t immu_flags); 73 static void destroy_immu_devi(immu_devi_t *immu_devi); 74 static boolean_t dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, 75 uint64_t nvpages, dcookie_t *dcookies, int dcount, dev_info_t *rdip, 76 immu_flags_t immu_flags); 77 78 /* Extern globals */ 79 extern struct memlist *phys_install; 80 81 82 /* static Globals */ 83 84 /* 85 * Used to setup DMA objects (memory regions) 86 * for DMA reads by IOMMU units 87 */ 88 static ddi_dma_attr_t immu_dma_attr = { 89 DMA_ATTR_V0, 90 0U, 91 0xffffffffffffffffULL, 92 0xffffffffU, 93 MMU_PAGESIZE, /* MMU page aligned */ 94 0x1, 95 0x1, 96 0xffffffffU, 97 0xffffffffffffffffULL, 98 1, 99 4, 100 0 101 }; 102 103 static ddi_device_acc_attr_t immu_acc_attr = { 104 DDI_DEVICE_ATTR_V0, 105 DDI_NEVERSWAP_ACC, 106 DDI_STRICTORDER_ACC 107 }; 108 109 110 /* globals private to this file */ 111 static kmutex_t immu_domain_lock; 112 static list_t immu_unity_domain_list; 113 static list_t immu_xlate_domain_list; 114 115 /* structure used to store idx into each level of the page tables */ 116 typedef struct xlate { 117 int xlt_level; 118 uint_t xlt_idx; 119 pgtable_t *xlt_pgtable; 120 } xlate_t; 121 122 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */ 123 #define IMMU_UNITY_DID 1 124 125 static mod_hash_t *bdf_domain_hash; 126 127 static domain_t * 128 bdf_domain_lookup(immu_devi_t *immu_devi) 129 { 130 domain_t *domain; 131 int16_t seg = immu_devi->imd_seg; 132 int16_t bus = immu_devi->imd_bus; 133 int16_t devfunc = immu_devi->imd_devfunc; 134 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc); 135 136 if (seg < 0 || bus < 0 || devfunc < 0) { 137 return (NULL); 138 } 139 140 domain = NULL; 141 if (mod_hash_find(bdf_domain_hash, 142 (void *)bdf, (void *)&domain) == 0) { 143 ASSERT(domain); 144 ASSERT(domain->dom_did > 0); 145 return (domain); 146 } else { 147 return (NULL); 148 } 149 } 150 151 static void 152 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain) 153 { 154 int16_t seg = immu_devi->imd_seg; 155 int16_t bus = immu_devi->imd_bus; 156 int16_t devfunc = immu_devi->imd_devfunc; 157 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc); 158 int r; 159 160 if (seg < 0 || bus < 0 || devfunc < 0) { 161 return; 162 } 163 164 r = mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain); 165 ASSERT(r != MH_ERR_DUPLICATE); 166 ASSERT(r == 0); 167 } 168 169 static int 170 match_lpc(dev_info_t *pdip, void *arg) 171 { 172 immu_devi_t *immu_devi; 173 dvma_arg_t *dvap = (dvma_arg_t *)arg; 174 175 ASSERT(dvap->dva_error == DDI_FAILURE); 176 ASSERT(dvap->dva_ddip == NULL); 177 ASSERT(dvap->dva_list); 178 179 if (list_is_empty(dvap->dva_list)) { 180 return (DDI_WALK_TERMINATE); 181 } 182 183 immu_devi = list_head(dvap->dva_list); 184 for (; immu_devi; immu_devi = list_next(dvap->dva_list, 185 immu_devi)) { 186 ASSERT(immu_devi->imd_dip); 187 if (immu_devi->imd_dip == pdip) { 188 dvap->dva_ddip = pdip; 189 dvap->dva_error = DDI_SUCCESS; 190 return (DDI_WALK_TERMINATE); 191 } 192 } 193 194 return (DDI_WALK_CONTINUE); 195 } 196 197 static void 198 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu) 199 { 200 list_t *spclist = NULL; 201 immu_devi_t *immu_devi; 202 203 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_lock))); 204 205 immu_devi = IMMU_DEVI(dip); 206 if (immu_devi->imd_display == B_TRUE) { 207 spclist = &(immu->immu_dvma_gfx_list); 208 } else if (immu_devi->imd_lpc == B_TRUE) { 209 spclist = &(immu->immu_dvma_lpc_list); 210 } 211 212 if (spclist) { 213 mutex_enter(&(immu->immu_lock)); 214 list_insert_head(spclist, immu_devi); 215 mutex_exit(&(immu->immu_lock)); 216 } 217 } 218 219 /* 220 * Set the immu_devi struct in the immu_devi field of a devinfo node 221 */ 222 int 223 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags) 224 { 225 int bus, dev, func; 226 immu_devi_t *new_imd; 227 immu_devi_t *immu_devi; 228 229 ASSERT(root_devinfo); 230 ASSERT(dip); 231 ASSERT(dip != root_devinfo); 232 233 immu_devi = immu_devi_get(dip); 234 if (immu_devi != NULL) { 235 return (DDI_SUCCESS); 236 } 237 238 bus = dev = func = -1; 239 240 /* 241 * Assume a new immu_devi struct is needed 242 */ 243 if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) { 244 /* 245 * No BDF. Set bus = -1 to indicate this. 246 * We still need to create a immu_devi struct 247 * though 248 */ 249 bus = -1; 250 dev = 0; 251 func = 0; 252 } 253 254 new_imd = create_immu_devi(dip, bus, dev, func, immu_flags); 255 if (new_imd == NULL) { 256 ddi_err(DER_WARN, dip, "Failed to create immu_devi " 257 "structure"); 258 return (DDI_FAILURE); 259 } 260 261 /* 262 * Check if some other thread allocated a immu_devi while we 263 * didn't own the lock. 264 */ 265 mutex_enter(&(DEVI(dip)->devi_lock)); 266 if (IMMU_DEVI(dip) == NULL) { 267 IMMU_DEVI_SET(dip, new_imd); 268 } else { 269 destroy_immu_devi(new_imd); 270 } 271 mutex_exit(&(DEVI(dip)->devi_lock)); 272 273 return (DDI_SUCCESS); 274 } 275 276 static dev_info_t * 277 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags) 278 { 279 dvma_arg_t dvarg = {0}; 280 dvarg.dva_list = &(immu->immu_dvma_lpc_list); 281 dvarg.dva_rdip = rdip; 282 dvarg.dva_error = DDI_FAILURE; 283 284 if (immu_walk_ancestor(rdip, NULL, match_lpc, 285 &dvarg, NULL, immu_flags) != DDI_SUCCESS) { 286 ddi_err(DER_MODE, rdip, "Could not walk ancestors to " 287 "find lpc_devinfo for ISA device"); 288 return (NULL); 289 } 290 291 if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) { 292 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for " 293 "ISA device"); 294 return (NULL); 295 } 296 297 return (dvarg.dva_ddip); 298 } 299 300 static dev_info_t * 301 get_gfx_devinfo(dev_info_t *rdip) 302 { 303 immu_t *immu; 304 immu_devi_t *immu_devi; 305 list_t *list_gfx; 306 307 /* 308 * The GFX device may not be on the same IMMU unit as "agpgart" 309 * so search globally 310 */ 311 immu_devi = NULL; 312 immu = list_head(&immu_list); 313 for (; immu; immu = list_next(&immu_list, immu)) { 314 list_gfx = &(immu->immu_dvma_gfx_list); 315 if (!list_is_empty(list_gfx)) { 316 immu_devi = list_head(list_gfx); 317 break; 318 } 319 } 320 321 if (immu_devi == NULL) { 322 ddi_err(DER_WARN, rdip, "IMMU: No GFX device. " 323 "Cannot redirect agpgart"); 324 return (NULL); 325 } 326 327 /* list is not empty we checked above */ 328 ASSERT(immu_devi); 329 ASSERT(immu_devi->imd_dip); 330 331 ddi_err(DER_LOG, rdip, "IMMU: GFX redirect to %s", 332 ddi_node_name(immu_devi->imd_dip)); 333 334 return (immu_devi->imd_dip); 335 } 336 337 static immu_flags_t 338 dma_to_immu_flags(struct ddi_dma_req *dmareq) 339 { 340 immu_flags_t flags = 0; 341 342 if (dmareq->dmar_fp == DDI_DMA_SLEEP) { 343 flags |= IMMU_FLAGS_SLEEP; 344 } else { 345 flags |= IMMU_FLAGS_NOSLEEP; 346 } 347 348 #ifdef BUGGY_DRIVERS 349 350 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 351 352 #else 353 /* 354 * Read and write flags need to be reversed. 355 * DMA_READ means read from device and write 356 * to memory. So DMA read means DVMA write. 357 */ 358 if (dmareq->dmar_flags & DDI_DMA_READ) 359 flags |= IMMU_FLAGS_WRITE; 360 361 if (dmareq->dmar_flags & DDI_DMA_WRITE) 362 flags |= IMMU_FLAGS_READ; 363 364 /* 365 * Some buggy drivers specify neither READ or WRITE 366 * For such drivers set both read and write permissions 367 */ 368 if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) { 369 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 370 } 371 #endif 372 373 return (flags); 374 } 375 376 int 377 pgtable_ctor(void *buf, void *arg, int kmflag) 378 { 379 size_t actual_size = 0; 380 pgtable_t *pgtable; 381 int (*dmafp)(caddr_t); 382 caddr_t vaddr; 383 void *next; 384 385 ASSERT(buf); 386 ASSERT(arg == NULL); 387 388 pgtable = (pgtable_t *)buf; 389 390 dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP; 391 392 next = kmem_zalloc(IMMU_PAGESIZE, kmflag); 393 if (next == NULL) { 394 return (-1); 395 } 396 397 ASSERT(root_devinfo); 398 if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr, 399 dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) { 400 kmem_free(next, IMMU_PAGESIZE); 401 return (-1); 402 } 403 404 if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE, 405 &immu_acc_attr, DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, 406 dmafp, NULL, &vaddr, &actual_size, 407 &pgtable->hwpg_memhdl) != DDI_SUCCESS) { 408 ddi_dma_free_handle(&pgtable->hwpg_dmahdl); 409 kmem_free(next, IMMU_PAGESIZE); 410 return (-1); 411 } 412 413 /* 414 * Memory allocation failure. Maybe a temporary condition 415 * so return error rather than panic, so we can try again 416 */ 417 if (actual_size < IMMU_PAGESIZE) { 418 ddi_dma_mem_free(&pgtable->hwpg_memhdl); 419 ddi_dma_free_handle(&pgtable->hwpg_dmahdl); 420 kmem_free(next, IMMU_PAGESIZE); 421 return (-1); 422 } 423 424 pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr)); 425 pgtable->hwpg_vaddr = vaddr; 426 pgtable->swpg_next_array = next; 427 428 rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL); 429 430 return (0); 431 } 432 433 void 434 pgtable_dtor(void *buf, void *arg) 435 { 436 pgtable_t *pgtable; 437 438 ASSERT(buf); 439 ASSERT(arg == NULL); 440 441 pgtable = (pgtable_t *)buf; 442 ASSERT(pgtable->swpg_next_array); 443 444 /* destroy will panic if lock is held. */ 445 rw_destroy(&(pgtable->swpg_rwlock)); 446 447 ddi_dma_mem_free(&pgtable->hwpg_memhdl); 448 ddi_dma_free_handle(&pgtable->hwpg_dmahdl); 449 kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE); 450 451 /* don't zero out hwpg_vaddr and swpg_next_array for debugging */ 452 } 453 454 /* 455 * pgtable_alloc() 456 * alloc a IOMMU pgtable structure. 457 * This same struct is used for root and context tables as well. 458 * This routine allocs the f/ollowing: 459 * - a pgtable_t struct 460 * - a HW page which holds PTEs/entries which is accesssed by HW 461 * so we set up DMA for this page 462 * - a SW page which is only for our bookeeping 463 * (for example to hold pointers to the next level pgtable). 464 * So a simple kmem_alloc suffices 465 */ 466 static pgtable_t * 467 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags) 468 { 469 pgtable_t *pgtable; 470 int kmflags; 471 472 ASSERT(immu); 473 474 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 475 476 pgtable = kmem_cache_alloc(immu_pgtable_cache, kmflags); 477 if (pgtable == NULL) { 478 return (NULL); 479 } 480 return (pgtable); 481 } 482 483 static void 484 pgtable_zero(immu_t *immu, pgtable_t *pgtable) 485 { 486 bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE); 487 bzero(pgtable->swpg_next_array, IMMU_PAGESIZE); 488 489 /* Dont need to flush the write we will flush when we use the entry */ 490 immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE); 491 } 492 493 static void 494 pgtable_free(immu_t *immu, pgtable_t *pgtable) 495 { 496 ASSERT(immu); 497 ASSERT(pgtable); 498 499 kmem_cache_free(immu_pgtable_cache, pgtable); 500 } 501 502 /* 503 * Function to identify a display device from the PCI class code 504 */ 505 static boolean_t 506 device_is_display(uint_t classcode) 507 { 508 static uint_t disp_classes[] = { 509 0x000100, 510 0x030000, 511 0x030001 512 }; 513 int i, nclasses = sizeof (disp_classes) / sizeof (uint_t); 514 515 for (i = 0; i < nclasses; i++) { 516 if (classcode == disp_classes[i]) 517 return (B_TRUE); 518 } 519 return (B_FALSE); 520 } 521 522 /* 523 * Function that determines if device is PCIEX and/or PCIEX bridge 524 */ 525 static boolean_t 526 device_is_pciex( 527 uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib) 528 { 529 ushort_t cap; 530 ushort_t capsp; 531 ushort_t cap_count = PCI_CAP_MAX_PTR; 532 ushort_t status; 533 boolean_t is_pciex = B_FALSE; 534 535 *is_pcib = B_FALSE; 536 537 status = pci_getw_func(bus, dev, func, PCI_CONF_STAT); 538 if (!(status & PCI_STAT_CAP)) 539 return (B_FALSE); 540 541 capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR); 542 while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) { 543 capsp &= PCI_CAP_PTR_MASK; 544 cap = pci_getb_func(bus, dev, func, capsp); 545 546 if (cap == PCI_CAP_ID_PCI_E) { 547 status = pci_getw_func(bus, dev, func, capsp + 2); 548 /* 549 * See section 7.8.2 of PCI-Express Base Spec v1.0a 550 * for Device/Port Type. 551 * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the 552 * device is a PCIE2PCI bridge 553 */ 554 *is_pcib = 555 ((status & PCIE_PCIECAP_DEV_TYPE_MASK) == 556 PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE; 557 is_pciex = B_TRUE; 558 } 559 560 capsp = (*pci_getb_func)(bus, dev, func, 561 capsp + PCI_CAP_NEXT_PTR); 562 } 563 564 return (is_pciex); 565 } 566 567 568 /* 569 * immu_dvma_get_immu() 570 * get the immu unit structure for a dev_info node 571 */ 572 immu_t * 573 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags) 574 { 575 immu_devi_t *immu_devi; 576 immu_t *immu; 577 578 /* 579 * check if immu unit was already found earlier. 580 * If yes, then it will be stashed in immu_devi struct. 581 */ 582 immu_devi = immu_devi_get(dip); 583 if (immu_devi == NULL) { 584 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) { 585 /* 586 * May fail because of low memory. Return error rather 587 * than panic as we want driver to rey again later 588 */ 589 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: " 590 "No immu_devi structure"); 591 /*NOTREACHED*/ 592 } 593 immu_devi = immu_devi_get(dip); 594 ASSERT(immu_devi); 595 } 596 597 mutex_enter(&(DEVI(dip)->devi_lock)); 598 if (immu_devi->imd_immu) { 599 immu = immu_devi->imd_immu; 600 mutex_exit(&(DEVI(dip)->devi_lock)); 601 return (immu); 602 } 603 mutex_exit(&(DEVI(dip)->devi_lock)); 604 605 immu = immu_dmar_get_immu(dip); 606 if (immu == NULL) { 607 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: " 608 "Cannot find immu_t for device"); 609 /*NOTREACHED*/ 610 } 611 612 /* 613 * Check if some other thread found immu 614 * while lock was not held 615 */ 616 immu_devi = immu_devi_get(dip); 617 /* immu_devi should be present as we found it earlier */ 618 if (immu_devi == NULL) { 619 ddi_err(DER_PANIC, dip, 620 "immu_dvma_get_immu: No immu_devi structure"); 621 /*NOTREACHED*/ 622 } 623 624 mutex_enter(&(DEVI(dip)->devi_lock)); 625 if (immu_devi->imd_immu == NULL) { 626 /* nobody else set it, so we should do it */ 627 immu_devi->imd_immu = immu; 628 immu_devi_set_spclist(dip, immu); 629 } else { 630 /* 631 * if some other thread got immu before 632 * us, it should get the same results 633 */ 634 if (immu_devi->imd_immu != immu) { 635 ddi_err(DER_PANIC, dip, "Multiple " 636 "immu units found for device. Expected (%p), " 637 "actual (%p)", (void *)immu, 638 (void *)immu_devi->imd_immu); 639 mutex_exit(&(DEVI(dip)->devi_lock)); 640 /*NOTREACHED*/ 641 } 642 } 643 mutex_exit(&(DEVI(dip)->devi_lock)); 644 645 return (immu); 646 } 647 648 649 /* ############################# IMMU_DEVI code ############################ */ 650 651 /* 652 * Allocate a immu_devi structure and initialize it 653 */ 654 static immu_devi_t * 655 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func, 656 immu_flags_t immu_flags) 657 { 658 uchar_t baseclass, subclass; 659 uint_t classcode, revclass; 660 immu_devi_t *immu_devi; 661 boolean_t pciex = B_FALSE; 662 int kmflags; 663 boolean_t is_pcib = B_FALSE; 664 665 /* bus == -1 indicate non-PCI device (no BDF) */ 666 ASSERT(bus == -1 || bus >= 0); 667 ASSERT(dev >= 0); 668 ASSERT(func >= 0); 669 670 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 671 immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags); 672 if (immu_devi == NULL) { 673 ddi_err(DER_WARN, rdip, "Failed to allocate memory for " 674 "Intel IOMMU immu_devi structure"); 675 return (NULL); 676 } 677 immu_devi->imd_dip = rdip; 678 immu_devi->imd_seg = 0; /* Currently seg can only be 0 */ 679 immu_devi->imd_bus = bus; 680 immu_devi->imd_pcib_type = IMMU_PCIB_BAD; 681 682 if (bus == -1) { 683 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF; 684 return (immu_devi); 685 } 686 687 immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func); 688 immu_devi->imd_sec = 0; 689 immu_devi->imd_sub = 0; 690 691 revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID); 692 693 classcode = IMMU_PCI_REV2CLASS(revclass); 694 baseclass = IMMU_PCI_CLASS2BASE(classcode); 695 subclass = IMMU_PCI_CLASS2SUB(classcode); 696 697 if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) { 698 699 immu_devi->imd_sec = pci_getb_func(bus, dev, func, 700 PCI_BCNF_SECBUS); 701 immu_devi->imd_sub = pci_getb_func(bus, dev, func, 702 PCI_BCNF_SUBBUS); 703 704 pciex = device_is_pciex(bus, dev, func, &is_pcib); 705 if (pciex == B_TRUE && is_pcib == B_TRUE) { 706 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI; 707 } else if (pciex == B_TRUE) { 708 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE; 709 } else { 710 immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI; 711 } 712 } else { 713 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT; 714 } 715 716 /* check for certain special devices */ 717 immu_devi->imd_display = device_is_display(classcode); 718 719 immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) && 720 (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE; 721 722 immu_devi->imd_domain = NULL; 723 724 immu_devi->imd_dvma_flags = immu_global_dvma_flags; 725 726 return (immu_devi); 727 } 728 729 static void 730 destroy_immu_devi(immu_devi_t *immu_devi) 731 { 732 kmem_free(immu_devi, sizeof (immu_devi_t)); 733 } 734 735 static domain_t * 736 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp) 737 { 738 immu_devi_t *immu_devi; 739 domain_t *domain; 740 dev_info_t *ddip; 741 742 ASSERT(rdip); 743 ASSERT(ddipp); 744 745 *ddipp = NULL; 746 747 immu_devi = immu_devi_get(rdip); 748 if (immu_devi == NULL) { 749 return (NULL); 750 } 751 752 mutex_enter(&(DEVI(rdip)->devi_lock)); 753 domain = immu_devi->imd_domain; 754 ddip = immu_devi->imd_ddip; 755 mutex_exit(&(DEVI(rdip)->devi_lock)); 756 757 if (domain) { 758 ASSERT(domain->dom_did > 0); 759 ASSERT(ddip); 760 *ddipp = ddip; 761 } 762 763 return (domain); 764 765 } 766 767 /* ############################# END IMMU_DEVI code ######################## */ 768 /* ############################# DOMAIN code ############################### */ 769 770 /* 771 * This routine always succeeds 772 */ 773 static int 774 did_alloc(immu_t *immu, dev_info_t *rdip, 775 dev_info_t *ddip, immu_flags_t immu_flags) 776 { 777 int did; 778 779 ASSERT(immu); 780 ASSERT(rdip); 781 ASSERT(rdip != root_devinfo); 782 783 did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1, 784 (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP); 785 786 if (did == 0) { 787 ASSERT(immu->immu_unity_domain); 788 ASSERT(immu->immu_unity_domain->dom_did > 0); 789 ddi_err(DER_WARN, rdip, "device domain-id alloc error" 790 " domain-device: %s%d. immu unit is %s. Using " 791 "unity domain with domain-id (%d)", 792 ddi_driver_name(ddip), ddi_get_instance(ddip), 793 immu->immu_name, immu->immu_unity_domain->dom_did); 794 did = immu->immu_unity_domain->dom_did; 795 } 796 797 return (did); 798 } 799 800 static int 801 get_branch_domain(dev_info_t *pdip, void *arg) 802 { 803 immu_devi_t *immu_devi; 804 domain_t *domain; 805 dev_info_t *ddip; 806 immu_t *immu; 807 dvma_arg_t *dvp = (dvma_arg_t *)arg; 808 809 ASSERT(pdip); 810 ASSERT(dvp); 811 ASSERT(dvp->dva_rdip); 812 813 /* 814 * The field dvp->dva_rdip is a work-in-progress 815 * and gets updated as we walk up the ancestor 816 * tree. The final ddip is set only when we reach 817 * the top of the tree. So the dvp->dva_ddip field cannot 818 * be relied on until we reach the top of the field. 819 */ 820 821 /* immu_devi may not be set. */ 822 immu_devi = immu_devi_get(pdip); 823 if (immu_devi == NULL) { 824 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) { 825 dvp->dva_error = DDI_FAILURE; 826 return (DDI_WALK_TERMINATE); 827 } 828 } 829 830 immu_devi = immu_devi_get(pdip); 831 ASSERT(immu_devi); 832 immu = immu_devi->imd_immu; 833 if (immu == NULL) { 834 immu = immu_dvma_get_immu(pdip, dvp->dva_flags); 835 ASSERT(immu); 836 } 837 838 /* 839 * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to 840 * terminate the walk (since the device under the PCIE bridge 841 * is a PCIE device and has an independent entry in the 842 * root/context table) 843 */ 844 if (dvp->dva_rdip != pdip && 845 immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) { 846 return (DDI_WALK_TERMINATE); 847 } 848 849 /* 850 * In order to be a domain-dim, it must be a PCI device i.e. 851 * must have valid BDF. This also eliminates the root complex. 852 */ 853 if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD && 854 immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) { 855 ASSERT(immu_devi->imd_bus >= 0); 856 ASSERT(immu_devi->imd_devfunc >= 0); 857 dvp->dva_ddip = pdip; 858 } 859 860 if (immu_devi->imd_display == B_TRUE || 861 (dvp->dva_flags & IMMU_FLAGS_UNITY)) { 862 dvp->dva_domain = immu->immu_unity_domain; 863 /* continue walking to find ddip */ 864 return (DDI_WALK_CONTINUE); 865 } 866 867 mutex_enter(&(DEVI(pdip)->devi_lock)); 868 domain = immu_devi->imd_domain; 869 ddip = immu_devi->imd_ddip; 870 mutex_exit(&(DEVI(pdip)->devi_lock)); 871 872 if (domain && ddip) { 873 /* if domain is set, it must be the same */ 874 if (dvp->dva_domain) { 875 ASSERT(domain == dvp->dva_domain); 876 } 877 dvp->dva_domain = domain; 878 dvp->dva_ddip = ddip; 879 return (DDI_WALK_TERMINATE); 880 } 881 882 /* immu_devi either has both set or both clear */ 883 ASSERT(domain == NULL); 884 ASSERT(ddip == NULL); 885 886 /* Domain may already be set, continue walking so that ddip gets set */ 887 if (dvp->dva_domain) { 888 return (DDI_WALK_CONTINUE); 889 } 890 891 /* domain is not set in either immu_devi or dvp */ 892 domain = bdf_domain_lookup(immu_devi); 893 if (domain == NULL) { 894 return (DDI_WALK_CONTINUE); 895 } 896 897 /* ok, the BDF hash had a domain for this BDF. */ 898 899 /* Grab lock again to check if something else set immu_devi fields */ 900 mutex_enter(&(DEVI(pdip)->devi_lock)); 901 if (immu_devi->imd_domain != NULL) { 902 ASSERT(immu_devi->imd_domain == domain); 903 dvp->dva_domain = domain; 904 } else { 905 dvp->dva_domain = domain; 906 } 907 mutex_exit(&(DEVI(pdip)->devi_lock)); 908 909 /* 910 * walk upwards until the topmost PCI bridge is found 911 */ 912 return (DDI_WALK_CONTINUE); 913 914 } 915 916 static void 917 map_unity_domain(domain_t *domain) 918 { 919 struct memlist *mp; 920 uint64_t start; 921 uint64_t npages; 922 dcookie_t dcookies[1] = {0}; 923 int dcount = 0; 924 925 ASSERT(domain); 926 ASSERT(domain->dom_did == IMMU_UNITY_DID); 927 928 /* 929 * We call into routines that grab the lock so we should 930 * not be called with the lock held. This does not matter 931 * much since, no else has a reference to this domain 932 */ 933 ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock))); 934 935 /* 936 * UNITY arenas are a mirror of the physical memory 937 * installed on the system. 938 */ 939 940 #ifdef BUGGY_DRIVERS 941 /* 942 * Dont skip page0. Some broken HW/FW access it. 943 */ 944 dcookies[0].dck_paddr = 0; 945 dcookies[0].dck_npages = 1; 946 dcount = 1; 947 (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL, 948 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1); 949 #endif 950 951 memlist_read_lock(); 952 953 mp = phys_install; 954 955 if (mp->ml_address == 0) { 956 /* since we already mapped page1 above */ 957 start = IMMU_PAGESIZE; 958 } else { 959 start = mp->ml_address; 960 } 961 npages = mp->ml_size/IMMU_PAGESIZE + 1; 962 963 dcookies[0].dck_paddr = start; 964 dcookies[0].dck_npages = npages; 965 dcount = 1; 966 (void) dvma_map(domain->dom_immu, domain, start, npages, dcookies, 967 dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 968 969 ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 970 " - 0x%" PRIx64 "]", start, start + mp->ml_size); 971 972 mp = mp->ml_next; 973 while (mp) { 974 ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 975 " - 0x%" PRIx64 "]", mp->ml_address, 976 mp->ml_address + mp->ml_size); 977 978 start = mp->ml_address; 979 npages = mp->ml_size/IMMU_PAGESIZE + 1; 980 981 dcookies[0].dck_paddr = start; 982 dcookies[0].dck_npages = npages; 983 dcount = 1; 984 (void) dvma_map(domain->dom_immu, domain, start, npages, 985 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 986 mp = mp->ml_next; 987 } 988 989 mp = bios_rsvd; 990 while (mp) { 991 ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 992 " - 0x%" PRIx64 "]", mp->ml_address, 993 mp->ml_address + mp->ml_size); 994 995 start = mp->ml_address; 996 npages = mp->ml_size/IMMU_PAGESIZE + 1; 997 998 dcookies[0].dck_paddr = start; 999 dcookies[0].dck_npages = npages; 1000 dcount = 1; 1001 (void) dvma_map(domain->dom_immu, domain, start, npages, 1002 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 1003 1004 mp = mp->ml_next; 1005 } 1006 1007 memlist_read_unlock(); 1008 } 1009 1010 /* 1011 * create_xlate_arena() 1012 * Create the dvma arena for a domain with translation 1013 * mapping 1014 */ 1015 static void 1016 create_xlate_arena(immu_t *immu, domain_t *domain, 1017 dev_info_t *rdip, immu_flags_t immu_flags) 1018 { 1019 char *arena_name; 1020 struct memlist *mp; 1021 int vmem_flags; 1022 uint64_t start; 1023 uint_t mgaw; 1024 uint64_t size; 1025 uint64_t maxaddr; 1026 void *vmem_ret; 1027 1028 arena_name = domain->dom_dvma_arena_name; 1029 1030 /* Note, don't do sizeof (arena_name) - it is just a pointer */ 1031 (void) snprintf(arena_name, 1032 sizeof (domain->dom_dvma_arena_name), 1033 "%s-domain-%d-xlate-DVMA-arena", immu->immu_name, 1034 domain->dom_did); 1035 1036 vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP; 1037 1038 /* 1039 * No one else has access to this domain. 1040 * So no domain locks needed 1041 */ 1042 ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock))); 1043 1044 /* Restrict mgaddr (max guest addr) to MGAW */ 1045 mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap); 1046 1047 /* 1048 * To ensure we avoid ioapic and PCI MMIO ranges we just 1049 * use the physical memory address range of the system as the 1050 * range 1051 */ 1052 maxaddr = ((uint64_t)1 << mgaw); 1053 1054 memlist_read_lock(); 1055 1056 mp = phys_install; 1057 1058 if (mp->ml_address == 0) 1059 start = MMU_PAGESIZE; 1060 else 1061 start = mp->ml_address; 1062 1063 if (start + mp->ml_size > maxaddr) 1064 size = maxaddr - start; 1065 else 1066 size = mp->ml_size; 1067 1068 ddi_err(DER_VERB, rdip, 1069 "%s: Creating dvma vmem arena [0x%" PRIx64 1070 " - 0x%" PRIx64 "]", arena_name, start, start + size); 1071 1072 ASSERT(domain->dom_dvma_arena == NULL); 1073 1074 /* 1075 * We always allocate in quanta of IMMU_PAGESIZE 1076 */ 1077 domain->dom_dvma_arena = vmem_create(arena_name, 1078 (void *)(uintptr_t)start, /* start addr */ 1079 size, /* size */ 1080 IMMU_PAGESIZE, /* quantum */ 1081 NULL, /* afunc */ 1082 NULL, /* ffunc */ 1083 NULL, /* source */ 1084 0, /* qcache_max */ 1085 vmem_flags); 1086 1087 if (domain->dom_dvma_arena == NULL) { 1088 ddi_err(DER_PANIC, rdip, 1089 "Failed to allocate DVMA arena(%s) " 1090 "for domain ID (%d)", arena_name, domain->dom_did); 1091 /*NOTREACHED*/ 1092 } 1093 1094 mp = mp->ml_next; 1095 while (mp) { 1096 1097 if (mp->ml_address == 0) 1098 start = MMU_PAGESIZE; 1099 else 1100 start = mp->ml_address; 1101 1102 if (start + mp->ml_size > maxaddr) 1103 size = maxaddr - start; 1104 else 1105 size = mp->ml_size; 1106 1107 ddi_err(DER_VERB, rdip, 1108 "%s: Adding dvma vmem span [0x%" PRIx64 1109 " - 0x%" PRIx64 "]", arena_name, start, 1110 start + size); 1111 1112 vmem_ret = vmem_add(domain->dom_dvma_arena, 1113 (void *)(uintptr_t)start, size, vmem_flags); 1114 1115 if (vmem_ret == NULL) { 1116 ddi_err(DER_PANIC, rdip, 1117 "Failed to allocate DVMA arena(%s) " 1118 "for domain ID (%d)", 1119 arena_name, domain->dom_did); 1120 /*NOTREACHED*/ 1121 } 1122 mp = mp->ml_next; 1123 } 1124 memlist_read_unlock(); 1125 } 1126 1127 /* ################################### DOMAIN CODE ######################### */ 1128 1129 /* 1130 * Set the domain and domain-dip for a dip 1131 */ 1132 static void 1133 set_domain( 1134 dev_info_t *dip, 1135 dev_info_t *ddip, 1136 domain_t *domain) 1137 { 1138 immu_devi_t *immu_devi; 1139 domain_t *fdomain; 1140 dev_info_t *fddip; 1141 1142 ASSERT(dip); 1143 ASSERT(ddip); 1144 ASSERT(domain); 1145 ASSERT(domain->dom_did > 0); /* must be an initialized domain */ 1146 1147 immu_devi = immu_devi_get(dip); 1148 ASSERT(immu_devi); 1149 1150 mutex_enter(&(DEVI(dip)->devi_lock)); 1151 fddip = immu_devi->imd_ddip; 1152 fdomain = immu_devi->imd_domain; 1153 1154 if (fddip) { 1155 ASSERT(fddip == ddip); 1156 } else { 1157 immu_devi->imd_ddip = ddip; 1158 } 1159 1160 if (fdomain) { 1161 ASSERT(fdomain == domain); 1162 } else { 1163 immu_devi->imd_domain = domain; 1164 } 1165 mutex_exit(&(DEVI(dip)->devi_lock)); 1166 } 1167 1168 /* 1169 * device_domain() 1170 * Get domain for a device. The domain may be global in which case it 1171 * is shared between all IOMMU units. Due to potential AGAW differences 1172 * between IOMMU units, such global domains *have to be* UNITY mapping 1173 * domains. Alternatively, the domain may be local to a IOMMU unit. 1174 * Local domains may be shared or immu_devi, although the 1175 * scope of sharing 1176 * is restricted to devices controlled by the IOMMU unit to 1177 * which the domain 1178 * belongs. If shared, they (currently) have to be UNITY domains. If 1179 * immu_devi a domain may be either UNITY or translation (XLATE) domain. 1180 */ 1181 static domain_t * 1182 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags) 1183 { 1184 dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */ 1185 immu_t *immu; 1186 domain_t *domain; 1187 dvma_arg_t dvarg = {0}; 1188 int level; 1189 1190 ASSERT(rdip); 1191 1192 *ddipp = NULL; 1193 1194 /* 1195 * Check if the domain is already set. This is usually true 1196 * if this is not the first DVMA transaction. 1197 */ 1198 ddip = NULL; 1199 domain = immu_devi_domain(rdip, &ddip); 1200 if (domain) { 1201 ASSERT(domain->dom_did > 0); 1202 ASSERT(ddip); 1203 *ddipp = ddip; 1204 return (domain); 1205 } 1206 1207 immu = immu_dvma_get_immu(rdip, immu_flags); 1208 if (immu == NULL) { 1209 /* 1210 * possible that there is no IOMMU unit for this device 1211 * - BIOS bugs are one example. 1212 */ 1213 ddi_err(DER_WARN, rdip, "No IMMU unit found for device"); 1214 return (NULL); 1215 } 1216 1217 immu_flags |= immu_devi_get(rdip)->imd_dvma_flags; 1218 1219 dvarg.dva_rdip = rdip; 1220 dvarg.dva_ddip = NULL; 1221 dvarg.dva_domain = NULL; 1222 dvarg.dva_flags = immu_flags; 1223 level = 0; 1224 if (immu_walk_ancestor(rdip, NULL, get_branch_domain, 1225 &dvarg, &level, immu_flags) != DDI_SUCCESS) { 1226 /* 1227 * maybe low memory. return error, 1228 * so driver tries again later 1229 */ 1230 return (NULL); 1231 } 1232 1233 /* should have walked at least 1 dip (i.e. edip) */ 1234 ASSERT(level > 0); 1235 1236 ddip = dvarg.dva_ddip; /* must be present */ 1237 domain = dvarg.dva_domain; /* may be NULL */ 1238 1239 /* 1240 * We may find the domain during our ancestor walk on any one of our 1241 * ancestor dips, If the domain is found then the domain-dip 1242 * (i.e. ddip) will also be found in the same immu_devi struct. 1243 * The domain-dip is the highest ancestor dip which shares the 1244 * same domain with edip. 1245 * The domain may or may not be found, but the domain dip must 1246 * be found. 1247 */ 1248 if (ddip == NULL) { 1249 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device."); 1250 return (NULL); 1251 } 1252 1253 /* 1254 * Did we find a domain ? 1255 */ 1256 if (domain) { 1257 goto found; 1258 } 1259 1260 /* nope, so allocate */ 1261 domain = domain_create(immu, ddip, rdip, immu_flags); 1262 if (domain == NULL) { 1263 return (NULL); 1264 } 1265 ASSERT(domain->dom_did > 0); 1266 1267 /*FALLTHROUGH*/ 1268 found: 1269 /* 1270 * We know *domain *is* the right domain, so panic if 1271 * another domain is set for either the request-dip or 1272 * effective dip. 1273 */ 1274 set_domain(ddip, ddip, domain); 1275 set_domain(rdip, ddip, domain); 1276 1277 *ddipp = ddip; 1278 return (domain); 1279 } 1280 1281 static void 1282 create_unity_domain(immu_t *immu) 1283 { 1284 domain_t *domain; 1285 1286 /* 0 is reserved by Vt-d */ 1287 /*LINTED*/ 1288 ASSERT(IMMU_UNITY_DID > 0); 1289 1290 /* domain created during boot and always use sleep flag */ 1291 domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP); 1292 1293 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL); 1294 1295 domain->dom_did = IMMU_UNITY_DID; 1296 domain->dom_maptype = IMMU_MAPTYPE_UNITY; 1297 1298 domain->dom_immu = immu; 1299 immu->immu_unity_domain = domain; 1300 1301 /* 1302 * Setup the domain's initial page table 1303 * should never fail. 1304 */ 1305 domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP); 1306 ASSERT(domain->dom_pgtable_root); 1307 pgtable_zero(immu, domain->dom_pgtable_root); 1308 1309 /* 1310 * Only map all physical memory in to the unity domain 1311 * if passthrough is not supported. If it is supported, 1312 * passthrough is set in the context entry instead. 1313 */ 1314 if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap)) 1315 map_unity_domain(domain); 1316 1317 1318 /* 1319 * put it on the system-wide UNITY domain list 1320 */ 1321 mutex_enter(&(immu_domain_lock)); 1322 list_insert_tail(&immu_unity_domain_list, domain); 1323 mutex_exit(&(immu_domain_lock)); 1324 } 1325 1326 /* 1327 * ddip is the domain-dip - the topmost dip in a domain 1328 * rdip is the requesting-dip - the device which is 1329 * requesting DVMA setup 1330 * if domain is a non-shared domain rdip == ddip 1331 */ 1332 static domain_t * 1333 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip, 1334 immu_flags_t immu_flags) 1335 { 1336 int kmflags; 1337 domain_t *domain; 1338 char mod_hash_name[128]; 1339 immu_devi_t *immu_devi; 1340 int did; 1341 dcookie_t dcookies[1] = {0}; 1342 int dcount = 0; 1343 1344 ASSERT(immu); 1345 ASSERT(ddip); 1346 1347 immu_devi = immu_devi_get(rdip); 1348 1349 ASSERT(immu_devi); 1350 1351 /* 1352 * First allocate a domainid. 1353 * This routine will never fail, since if we run out 1354 * of domains the unity domain will be allocated. 1355 */ 1356 did = did_alloc(immu, rdip, ddip, immu_flags); 1357 ASSERT(did > 0); 1358 if (did == IMMU_UNITY_DID) { 1359 /* domain overflow */ 1360 ASSERT(immu->immu_unity_domain); 1361 return (immu->immu_unity_domain); 1362 } 1363 1364 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 1365 domain = kmem_zalloc(sizeof (domain_t), kmflags); 1366 if (domain == NULL) { 1367 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain " 1368 "structure for device. IOMMU unit: %s", immu->immu_name); 1369 /*NOTREACHED*/ 1370 } 1371 1372 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL); 1373 1374 (void) snprintf(mod_hash_name, sizeof (mod_hash_name), 1375 "immu%s-domain%d-pava-hash", immu->immu_name, did); 1376 1377 domain->dom_did = did; 1378 domain->dom_immu = immu; 1379 domain->dom_maptype = IMMU_MAPTYPE_XLATE; 1380 1381 /* 1382 * Create xlate DVMA arena for this domain. 1383 */ 1384 create_xlate_arena(immu, domain, rdip, immu_flags); 1385 1386 /* 1387 * Setup the domain's initial page table 1388 */ 1389 domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags); 1390 if (domain->dom_pgtable_root == NULL) { 1391 ddi_err(DER_PANIC, rdip, "Failed to alloc root " 1392 "pgtable for domain (%d). IOMMU unit: %s", 1393 domain->dom_did, immu->immu_name); 1394 /*NOTREACHED*/ 1395 } 1396 pgtable_zero(immu, domain->dom_pgtable_root); 1397 1398 /* 1399 * Since this is a immu unit-specific domain, put it on 1400 * the per-immu domain list. 1401 */ 1402 mutex_enter(&(immu->immu_lock)); 1403 list_insert_head(&immu->immu_domain_list, domain); 1404 mutex_exit(&(immu->immu_lock)); 1405 1406 /* 1407 * Also put it on the system-wide xlate domain list 1408 */ 1409 mutex_enter(&(immu_domain_lock)); 1410 list_insert_head(&immu_xlate_domain_list, domain); 1411 mutex_exit(&(immu_domain_lock)); 1412 1413 bdf_domain_insert(immu_devi, domain); 1414 1415 #ifdef BUGGY_DRIVERS 1416 /* 1417 * Map page0. Some broken HW/FW access it. 1418 */ 1419 dcookies[0].dck_paddr = 0; 1420 dcookies[0].dck_npages = 1; 1421 dcount = 1; 1422 (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL, 1423 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1); 1424 #endif 1425 return (domain); 1426 } 1427 1428 /* 1429 * Create domainid arena. 1430 * Domainid 0 is reserved by Vt-d spec and cannot be used by 1431 * system software. 1432 * Domainid 1 is reserved by solaris and used for *all* of the following: 1433 * as the "uninitialized" domain - For devices not yet controlled 1434 * by Solaris 1435 * as the "unity" domain - For devices that will always belong 1436 * to the unity domain 1437 * as the "overflow" domain - Used for any new device after we 1438 * run out of domains 1439 * All of the above domains map into a single domain with 1440 * domainid 1 and UNITY DVMA mapping 1441 * Each IMMU unity has its own unity/uninit/overflow domain 1442 */ 1443 static void 1444 did_init(immu_t *immu) 1445 { 1446 (void) snprintf(immu->immu_did_arena_name, 1447 sizeof (immu->immu_did_arena_name), 1448 "%s_domainid_arena", immu->immu_name); 1449 1450 ddi_err(DER_VERB, NULL, "%s: Creating domainid arena %s", 1451 immu->immu_name, immu->immu_did_arena_name); 1452 1453 immu->immu_did_arena = vmem_create( 1454 immu->immu_did_arena_name, 1455 (void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */ 1456 immu->immu_max_domains - IMMU_UNITY_DID, 1457 1, /* quantum */ 1458 NULL, /* afunc */ 1459 NULL, /* ffunc */ 1460 NULL, /* source */ 1461 0, /* qcache_max */ 1462 VM_SLEEP); 1463 1464 /* Even with SLEEP flag, vmem_create() can fail */ 1465 if (immu->immu_did_arena == NULL) { 1466 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel " 1467 "IOMMU domainid allocator: %s", immu->immu_name, 1468 immu->immu_did_arena_name); 1469 } 1470 } 1471 1472 /* ######################### CONTEXT CODE ################################# */ 1473 1474 static void 1475 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table, 1476 int bus, int devfunc) 1477 { 1478 pgtable_t *context; 1479 pgtable_t *pgtable_root; 1480 pgtable_t *unity_pgtable_root; 1481 hw_rce_t *hw_rent; 1482 hw_rce_t *hw_cent; 1483 hw_rce_t *ctxp; 1484 int sid; 1485 krw_t rwtype; 1486 boolean_t fill_root; 1487 boolean_t fill_ctx; 1488 1489 ASSERT(immu); 1490 ASSERT(domain); 1491 ASSERT(root_table); 1492 ASSERT(bus >= 0); 1493 ASSERT(devfunc >= 0); 1494 ASSERT(domain->dom_pgtable_root); 1495 1496 pgtable_root = domain->dom_pgtable_root; 1497 1498 ctxp = (hw_rce_t *)(root_table->swpg_next_array); 1499 context = *(pgtable_t **)(ctxp + bus); 1500 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus; 1501 1502 fill_root = B_FALSE; 1503 fill_ctx = B_FALSE; 1504 1505 /* Check the most common case first with reader lock */ 1506 rw_enter(&(immu->immu_ctx_rwlock), RW_READER); 1507 rwtype = RW_READER; 1508 again: 1509 if (ROOT_GET_P(hw_rent)) { 1510 ASSERT(ROOT_GET_CONT(hw_rent) == context->hwpg_paddr); 1511 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc; 1512 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) { 1513 ASSERT(CONT_GET_P(hw_cent)); 1514 ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did); 1515 ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw); 1516 ASSERT(CONT_GET_ASR(hw_cent) == 1517 pgtable_root->hwpg_paddr); 1518 rw_exit(&(immu->immu_ctx_rwlock)); 1519 return; 1520 } else { 1521 fill_ctx = B_TRUE; 1522 } 1523 } else { 1524 fill_root = B_TRUE; 1525 fill_ctx = B_TRUE; 1526 } 1527 1528 if (rwtype == RW_READER && 1529 rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) { 1530 rw_exit(&(immu->immu_ctx_rwlock)); 1531 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); 1532 rwtype = RW_WRITER; 1533 goto again; 1534 } 1535 rwtype = RW_WRITER; 1536 1537 if (fill_root == B_TRUE) { 1538 ROOT_SET_CONT(hw_rent, context->hwpg_paddr); 1539 ROOT_SET_P(hw_rent); 1540 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t)); 1541 } 1542 1543 if (fill_ctx == B_TRUE) { 1544 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc; 1545 unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root; 1546 ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED); 1547 ASSERT(CONT_GET_P(hw_cent)); 1548 ASSERT(CONT_GET_DID(hw_cent) == 1549 immu->immu_unity_domain->dom_did); 1550 ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw); 1551 ASSERT(CONT_GET_ASR(hw_cent) == 1552 unity_pgtable_root->hwpg_paddr); 1553 1554 /* need to disable context entry before reprogramming it */ 1555 bzero(hw_cent, sizeof (hw_rce_t)); 1556 1557 /* flush caches */ 1558 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t)); 1559 ASSERT(rw_write_held(&(immu->immu_ctx_rwlock))); 1560 1561 sid = ((bus << 8) | devfunc); 1562 immu_flush_context_fsi(immu, 0, sid, domain->dom_did); 1563 1564 immu_regs_wbf_flush(immu); 1565 1566 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED); 1567 CONT_SET_DID(hw_cent, domain->dom_did); 1568 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw); 1569 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr); 1570 if (domain->dom_did == IMMU_UNITY_DID && 1571 IMMU_ECAP_GET_PT(immu->immu_regs_excap)) 1572 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU); 1573 else 1574 /*LINTED*/ 1575 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY); 1576 CONT_SET_P(hw_cent); 1577 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t)); 1578 } 1579 rw_exit(&(immu->immu_ctx_rwlock)); 1580 } 1581 1582 static pgtable_t * 1583 context_create(immu_t *immu) 1584 { 1585 int bus; 1586 int devfunc; 1587 pgtable_t *root_table; 1588 pgtable_t *context; 1589 pgtable_t *pgtable_root; 1590 hw_rce_t *ctxp; 1591 hw_rce_t *hw_rent; 1592 hw_rce_t *hw_cent; 1593 1594 /* Allocate a zeroed root table (4K 256b entries) */ 1595 root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP); 1596 pgtable_zero(immu, root_table); 1597 1598 /* 1599 * Setup context tables for all possible root table entries. 1600 * Start out with unity domains for all entries. 1601 */ 1602 ctxp = (hw_rce_t *)(root_table->swpg_next_array); 1603 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr); 1604 for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) { 1605 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP); 1606 pgtable_zero(immu, context); 1607 ASSERT(ROOT_GET_P(hw_rent) == 0); 1608 ROOT_SET_P(hw_rent); 1609 ROOT_SET_CONT(hw_rent, context->hwpg_paddr); 1610 hw_cent = (hw_rce_t *)(context->hwpg_vaddr); 1611 for (devfunc = 0; devfunc < IMMU_CONT_NUM; 1612 devfunc++, hw_cent++) { 1613 ASSERT(CONT_GET_P(hw_cent) == 0); 1614 pgtable_root = 1615 immu->immu_unity_domain->dom_pgtable_root; 1616 CONT_SET_DID(hw_cent, 1617 immu->immu_unity_domain->dom_did); 1618 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw); 1619 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr); 1620 if (IMMU_ECAP_GET_PT(immu->immu_regs_excap)) 1621 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU); 1622 else 1623 /*LINTED*/ 1624 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY); 1625 CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED); 1626 CONT_SET_P(hw_cent); 1627 } 1628 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE); 1629 *((pgtable_t **)ctxp) = context; 1630 } 1631 immu_regs_cpu_flush(immu, root_table->hwpg_vaddr, IMMU_PAGESIZE); 1632 1633 return (root_table); 1634 } 1635 1636 /* 1637 * Called during rootnex attach, so no locks needed 1638 */ 1639 static void 1640 context_init(immu_t *immu) 1641 { 1642 ASSERT(immu); 1643 ASSERT(immu->immu_ctx_root == NULL); 1644 1645 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL); 1646 1647 immu_regs_wbf_flush(immu); 1648 1649 immu->immu_ctx_root = context_create(immu); 1650 1651 immu_regs_set_root_table(immu); 1652 1653 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); 1654 immu_flush_context_gbl(immu); 1655 rw_exit(&(immu->immu_ctx_rwlock)); 1656 immu_flush_iotlb_gbl(immu); 1657 immu_regs_wbf_flush(immu); 1658 } 1659 1660 1661 /* 1662 * Find top pcib 1663 */ 1664 static int 1665 find_top_pcib(dev_info_t *dip, void *arg) 1666 { 1667 immu_devi_t *immu_devi; 1668 dev_info_t **pcibdipp = (dev_info_t **)arg; 1669 1670 ASSERT(dip); 1671 1672 immu_devi = immu_devi_get(dip); 1673 ASSERT(immu_devi); 1674 1675 if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) { 1676 *pcibdipp = dip; 1677 } 1678 1679 return (DDI_WALK_CONTINUE); 1680 } 1681 1682 static int 1683 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip, 1684 dev_info_t *rdip, immu_flags_t immu_flags) 1685 { 1686 immu_devi_t *r_immu_devi; 1687 immu_devi_t *d_immu_devi; 1688 int r_bus; 1689 int d_bus; 1690 int r_devfunc; 1691 int d_devfunc; 1692 immu_pcib_t d_pcib_type; 1693 immu_pcib_t r_pcib_type; 1694 dev_info_t *pcibdip; 1695 1696 if (ddip == NULL || rdip == NULL || 1697 ddip == root_devinfo || rdip == root_devinfo) { 1698 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or " 1699 "request-dip are NULL or are root devinfo"); 1700 return (DDI_FAILURE); 1701 } 1702 1703 /* 1704 * We need to set the context fields 1705 * based on what type of device rdip and ddip are. 1706 * To do that we need the immu_devi field. 1707 * Set the immu_devi field (if not already set) 1708 */ 1709 if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) { 1710 ddi_err(DER_MODE, rdip, 1711 "immu_context_update: failed to set immu_devi for ddip"); 1712 return (DDI_FAILURE); 1713 } 1714 1715 if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) { 1716 ddi_err(DER_MODE, rdip, 1717 "immu_context_update: failed to set immu_devi for rdip"); 1718 return (DDI_FAILURE); 1719 } 1720 1721 d_immu_devi = immu_devi_get(ddip); 1722 r_immu_devi = immu_devi_get(rdip); 1723 ASSERT(r_immu_devi); 1724 ASSERT(d_immu_devi); 1725 1726 d_bus = d_immu_devi->imd_bus; 1727 d_devfunc = d_immu_devi->imd_devfunc; 1728 d_pcib_type = d_immu_devi->imd_pcib_type; 1729 r_bus = r_immu_devi->imd_bus; 1730 r_devfunc = r_immu_devi->imd_devfunc; 1731 r_pcib_type = r_immu_devi->imd_pcib_type; 1732 1733 ASSERT(d_bus >= 0); 1734 1735 if (rdip == ddip) { 1736 ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT || 1737 d_pcib_type == IMMU_PCIB_PCIE_PCIE); 1738 ASSERT(r_bus >= 0); 1739 ASSERT(r_devfunc >= 0); 1740 /* rdip is a PCIE device. set context for it only */ 1741 context_set(immu, domain, immu->immu_ctx_root, r_bus, 1742 r_devfunc); 1743 #ifdef BUGGY_DRIVERS 1744 } else if (r_immu_devi == d_immu_devi) { 1745 #ifdef TEST 1746 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and " 1747 "0x%lx are identical", rdip, ddip); 1748 #endif 1749 ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT); 1750 ASSERT(r_bus >= 0); 1751 ASSERT(r_devfunc >= 0); 1752 /* rdip is a PCIE device. set context for it only */ 1753 context_set(immu, domain, immu->immu_ctx_root, r_bus, 1754 r_devfunc); 1755 #endif 1756 } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) { 1757 /* 1758 * ddip is a PCIE_PCI bridge. Set context for ddip's 1759 * secondary bus. If rdip is on ddip's secondary 1760 * bus, set context for rdip. Else, set context 1761 * for rdip's PCI bridge on ddip's secondary bus. 1762 */ 1763 context_set(immu, domain, immu->immu_ctx_root, 1764 d_immu_devi->imd_sec, 0); 1765 if (d_immu_devi->imd_sec == r_bus) { 1766 context_set(immu, domain, immu->immu_ctx_root, 1767 r_bus, r_devfunc); 1768 } else { 1769 pcibdip = NULL; 1770 if (immu_walk_ancestor(rdip, ddip, find_top_pcib, 1771 &pcibdip, NULL, immu_flags) == DDI_SUCCESS && 1772 pcibdip != NULL) { 1773 ASSERT(pcibdip); 1774 r_immu_devi = immu_devi_get(pcibdip); 1775 ASSERT(d_immu_devi); 1776 ASSERT(d_immu_devi->imd_pcib_type == 1777 IMMU_PCIB_PCI_PCI); 1778 r_bus = r_immu_devi->imd_bus; 1779 r_devfunc = r_immu_devi->imd_devfunc; 1780 context_set(immu, domain, immu->immu_ctx_root, 1781 r_bus, r_devfunc); 1782 } else { 1783 ddi_err(DER_PANIC, rdip, "Failed to find PCI " 1784 " bridge for PCI device"); 1785 /*NOTREACHED*/ 1786 } 1787 } 1788 } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) { 1789 context_set(immu, domain, immu->immu_ctx_root, d_bus, 1790 d_devfunc); 1791 } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) { 1792 ASSERT(r_pcib_type == IMMU_PCIB_NOBDF); 1793 /* 1794 * ddip is a PCIE device which has a non-PCI device under it 1795 * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata 1796 */ 1797 context_set(immu, domain, immu->immu_ctx_root, d_bus, 1798 d_devfunc); 1799 } else { 1800 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot " 1801 "set IMMU context."); 1802 /*NOTREACHED*/ 1803 } 1804 1805 /* XXX do we need a membar_producer() here */ 1806 return (DDI_SUCCESS); 1807 } 1808 1809 /* ##################### END CONTEXT CODE ################################## */ 1810 /* ##################### MAPPING CODE ################################## */ 1811 1812 1813 static boolean_t 1814 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr, 1815 dev_info_t *rdip, immu_flags_t immu_flags) 1816 { 1817 if (immu_flags & IMMU_FLAGS_PAGE1) { 1818 ASSERT(paddr == 0); 1819 } else { 1820 ASSERT((next == NULL) ^ (paddr == 0)); 1821 } 1822 1823 /* The PDTE must be set i.e. present bit is set */ 1824 if (!PDTE_P(pdte)) { 1825 ddi_err(DER_MODE, rdip, "No present flag"); 1826 return (B_FALSE); 1827 } 1828 1829 /* 1830 * Just assert to check most significant system software field 1831 * (PDTE_SW4) as it is same as present bit and we 1832 * checked that above 1833 */ 1834 ASSERT(PDTE_SW4(pdte)); 1835 1836 /* 1837 * TM field should be clear if not reserved. 1838 * non-leaf is always reserved 1839 */ 1840 if (next == NULL && immu->immu_TM_reserved == B_FALSE) { 1841 if (PDTE_TM(pdte)) { 1842 ddi_err(DER_MODE, rdip, "TM flag set"); 1843 return (B_FALSE); 1844 } 1845 } 1846 1847 /* 1848 * The SW3 field is not used and must be clear 1849 */ 1850 if (PDTE_SW3(pdte)) { 1851 ddi_err(DER_MODE, rdip, "SW3 set"); 1852 return (B_FALSE); 1853 } 1854 1855 /* 1856 * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set 1857 */ 1858 if (next == NULL) { 1859 ASSERT(paddr % IMMU_PAGESIZE == 0); 1860 if (PDTE_PADDR(pdte) != paddr) { 1861 ddi_err(DER_MODE, rdip, 1862 "PTE paddr mismatch: %lx != %lx", 1863 PDTE_PADDR(pdte), paddr); 1864 return (B_FALSE); 1865 } 1866 } else { 1867 if (PDTE_PADDR(pdte) != next->hwpg_paddr) { 1868 ddi_err(DER_MODE, rdip, 1869 "PDE paddr mismatch: %lx != %lx", 1870 PDTE_PADDR(pdte), next->hwpg_paddr); 1871 return (B_FALSE); 1872 } 1873 } 1874 1875 /* 1876 * SNP field should be clear if not reserved. 1877 * non-leaf is always reserved 1878 */ 1879 if (next == NULL && immu->immu_SNP_reserved == B_FALSE) { 1880 if (PDTE_SNP(pdte)) { 1881 ddi_err(DER_MODE, rdip, "SNP set"); 1882 return (B_FALSE); 1883 } 1884 } 1885 1886 /* second field available for system software should be clear */ 1887 if (PDTE_SW2(pdte)) { 1888 ddi_err(DER_MODE, rdip, "SW2 set"); 1889 return (B_FALSE); 1890 } 1891 1892 /* Super pages field should be clear */ 1893 if (PDTE_SP(pdte)) { 1894 ddi_err(DER_MODE, rdip, "SP set"); 1895 return (B_FALSE); 1896 } 1897 1898 /* 1899 * least significant field available for 1900 * system software should be clear 1901 */ 1902 if (PDTE_SW1(pdte)) { 1903 ddi_err(DER_MODE, rdip, "SW1 set"); 1904 return (B_FALSE); 1905 } 1906 1907 if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) { 1908 ddi_err(DER_MODE, rdip, "READ not set"); 1909 return (B_FALSE); 1910 } 1911 1912 if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) { 1913 ddi_err(DER_MODE, rdip, "WRITE not set"); 1914 return (B_FALSE); 1915 } 1916 1917 return (B_TRUE); 1918 } 1919 /*ARGSUSED*/ 1920 static void 1921 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate, 1922 uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip) 1923 { 1924 uint64_t npages; 1925 uint64_t dvma; 1926 pgtable_t *pgtable; 1927 hw_pdte_t *hwp; 1928 hw_pdte_t *shwp; 1929 int idx; 1930 hw_pdte_t pte; 1931 1932 ASSERT(xlate->xlt_level == 1); 1933 1934 pgtable = xlate->xlt_pgtable; 1935 idx = xlate->xlt_idx; 1936 1937 ASSERT(pgtable); 1938 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 1939 1940 dvma = *dvma_ptr; 1941 npages = *npages_ptr; 1942 1943 ASSERT(dvma); 1944 ASSERT(dvma % IMMU_PAGESIZE == 0); 1945 ASSERT(npages); 1946 1947 /* 1948 * since a caller gets a unique dvma for a physical address, 1949 * no other concurrent thread will be writing to the same 1950 * PTE even if it has the same paddr. So no locks needed. 1951 */ 1952 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; 1953 1954 hwp = shwp; 1955 for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) { 1956 1957 pte = *hwp; 1958 1959 /* Cannot clear a HW PTE that is aleady clear */ 1960 ASSERT(PDTE_P(pte)); 1961 PDTE_CLEAR_P(pte); 1962 *hwp = pte; 1963 1964 dvma += IMMU_PAGESIZE; 1965 npages--; 1966 } 1967 1968 1969 #ifdef TEST 1970 /* dont need to flush write during unmap */ 1971 immu_regs_cpu_flush(immu, (caddr_t)shwp, 1972 (hwp - shwp) * sizeof (hw_pdte_t)); 1973 #endif 1974 1975 *dvma_ptr = dvma; 1976 *npages_ptr = npages; 1977 1978 xlate->xlt_idx = idx; 1979 } 1980 1981 /*ARGSUSED*/ 1982 static void 1983 xlate_setup(immu_t *immu, uint64_t dvma, xlate_t *xlate, 1984 int nlevels, dev_info_t *rdip) 1985 { 1986 int level; 1987 uint64_t offbits; 1988 1989 /* level 0 is never used. Sanity check */ 1990 ASSERT(xlate->xlt_level == 0); 1991 ASSERT(xlate->xlt_idx == 0); 1992 ASSERT(xlate->xlt_pgtable == NULL); 1993 ASSERT(dvma % IMMU_PAGESIZE == 0); 1994 1995 /* 1996 * Skip the first 12 bits which is the offset into 1997 * 4K PFN (phys page frame based on IMMU_PAGESIZE) 1998 */ 1999 offbits = dvma >> IMMU_PAGESHIFT; 2000 2001 /* skip to level 1 i.e. leaf PTE */ 2002 for (level = 1, xlate++; level <= nlevels; level++, xlate++) { 2003 xlate->xlt_level = level; 2004 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK); 2005 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX); 2006 xlate->xlt_pgtable = NULL; 2007 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE; 2008 } 2009 } 2010 2011 /* 2012 * Read the pgtables 2013 */ 2014 static void 2015 PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels, 2016 dev_info_t *rdip) 2017 { 2018 pgtable_t *pgtable; 2019 pgtable_t *next; 2020 hw_pdte_t pde; 2021 uint_t idx; 2022 2023 /* xlate should be at level 0 */ 2024 ASSERT(xlate->xlt_level == 0); 2025 ASSERT(xlate->xlt_idx == 0); 2026 2027 /* start with highest level pgtable i.e. root */ 2028 xlate += nlevels; 2029 ASSERT(xlate->xlt_level == nlevels); 2030 2031 if (xlate->xlt_pgtable == NULL) { 2032 xlate->xlt_pgtable = domain->dom_pgtable_root; 2033 } 2034 2035 for (; xlate->xlt_level > 1; xlate--) { 2036 2037 idx = xlate->xlt_idx; 2038 pgtable = xlate->xlt_pgtable; 2039 2040 ASSERT(pgtable); 2041 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 2042 2043 if ((xlate - 1)->xlt_pgtable) { 2044 continue; 2045 } 2046 2047 /* xlate's leafier level is not set, set it now */ 2048 2049 /* Lock the pgtable in read mode */ 2050 rw_enter(&(pgtable->swpg_rwlock), RW_READER); 2051 2052 /* 2053 * since we are unmapping, the pgtable should 2054 * already point to a leafier pgtable. 2055 */ 2056 next = *(pgtable->swpg_next_array + idx); 2057 ASSERT(next); 2058 2059 pde = *((hw_pdte_t *)(pgtable->hwpg_vaddr) + idx); 2060 2061 ASSERT(PDTE_check(immu, pde, next, 0, rdip, 0) == B_TRUE); 2062 2063 (xlate - 1)->xlt_pgtable = next; 2064 2065 rw_exit(&(pgtable->swpg_rwlock)); 2066 } 2067 } 2068 2069 /*ARGSUSED*/ 2070 static void 2071 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr, 2072 dev_info_t *rdip, immu_flags_t immu_flags) 2073 { 2074 hw_pdte_t pte; 2075 2076 pte = *hwp; 2077 2078 #ifndef DEBUG 2079 /* Set paddr */ 2080 ASSERT(paddr % IMMU_PAGESIZE == 0); 2081 pte = 0; 2082 PDTE_SET_PADDR(pte, paddr); 2083 PDTE_SET_READ(pte); 2084 PDTE_SET_WRITE(pte); 2085 *hwp = pte; 2086 #else 2087 2088 if (PDTE_P(pte)) { 2089 if (PDTE_PADDR(pte) != paddr) { 2090 ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx", 2091 PDTE_PADDR(pte), paddr); 2092 } 2093 #ifdef BUGGY_DRIVERS 2094 return; 2095 #else 2096 goto out; 2097 #endif 2098 } 2099 2100 /* Don't touch SW4. It is the present field */ 2101 2102 /* clear TM field if not reserved */ 2103 if (immu->immu_TM_reserved == B_FALSE) { 2104 PDTE_CLEAR_TM(pte); 2105 } 2106 2107 #ifdef DEBUG 2108 /* Clear 3rd field for system software - not used */ 2109 PDTE_CLEAR_SW3(pte); 2110 #endif 2111 2112 /* Set paddr */ 2113 ASSERT(paddr % IMMU_PAGESIZE == 0); 2114 PDTE_CLEAR_PADDR(pte); 2115 PDTE_SET_PADDR(pte, paddr); 2116 2117 /* clear SNP field if not reserved. */ 2118 if (immu->immu_SNP_reserved == B_FALSE) { 2119 PDTE_CLEAR_SNP(pte); 2120 } 2121 2122 #ifdef DEBUG 2123 /* Clear SW2 field available for software */ 2124 PDTE_CLEAR_SW2(pte); 2125 #endif 2126 2127 2128 #ifdef DEBUG 2129 /* SP is don't care for PTEs. Clear it for cleanliness */ 2130 PDTE_CLEAR_SP(pte); 2131 #endif 2132 2133 #ifdef DEBUG 2134 /* Clear SW1 field available for software */ 2135 PDTE_CLEAR_SW1(pte); 2136 #endif 2137 2138 /* 2139 * Now that we are done writing the PTE 2140 * set the "present" flag. Note this present 2141 * flag is a bit in the PDE/PTE that the 2142 * spec says is available for system software. 2143 * This is an implementation detail of Solaris 2144 * bare-metal Intel IOMMU. 2145 * The present field in a PDE/PTE is not defined 2146 * by the Vt-d spec 2147 */ 2148 2149 PDTE_SET_P(pte); 2150 2151 out: 2152 #ifdef BUGGY_DRIVERS 2153 PDTE_SET_READ(pte); 2154 PDTE_SET_WRITE(pte); 2155 #else 2156 if (immu_flags & IMMU_FLAGS_READ) 2157 PDTE_SET_READ(pte); 2158 if (immu_flags & IMMU_FLAGS_WRITE) 2159 PDTE_SET_WRITE(pte); 2160 #endif 2161 2162 *hwp = pte; 2163 #endif 2164 } 2165 2166 /*ARGSUSED*/ 2167 static void 2168 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, 2169 uint64_t *dvma_ptr, uint64_t *nvpages_ptr, dcookie_t *dcookies, 2170 int dcount, dev_info_t *rdip, immu_flags_t immu_flags) 2171 { 2172 paddr_t paddr; 2173 uint64_t nvpages; 2174 uint64_t nppages; 2175 uint64_t dvma; 2176 pgtable_t *pgtable; 2177 hw_pdte_t *hwp; 2178 hw_pdte_t *shwp; 2179 int idx; 2180 int j; 2181 2182 ASSERT(xlate->xlt_level == 1); 2183 2184 pgtable = xlate->xlt_pgtable; 2185 idx = xlate->xlt_idx; 2186 2187 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 2188 ASSERT(pgtable); 2189 2190 dvma = *dvma_ptr; 2191 nvpages = *nvpages_ptr; 2192 2193 ASSERT(dvma || (immu_flags & IMMU_FLAGS_PAGE1)); 2194 ASSERT(nvpages); 2195 2196 /* 2197 * since a caller gets a unique dvma for a physical address, 2198 * no other concurrent thread will be writing to the same 2199 * PTE even if it has the same paddr. So no locks needed. 2200 */ 2201 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; 2202 2203 hwp = shwp; 2204 for (j = dcount - 1; j >= 0; j--) { 2205 if (nvpages <= dcookies[j].dck_npages) 2206 break; 2207 nvpages -= dcookies[j].dck_npages; 2208 } 2209 2210 ASSERT(j >= 0); 2211 ASSERT(nvpages); 2212 ASSERT(nvpages <= dcookies[j].dck_npages); 2213 nppages = nvpages; 2214 paddr = dcookies[j].dck_paddr + 2215 (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE; 2216 2217 nvpages = *nvpages_ptr; 2218 for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) { 2219 2220 ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1)); 2221 2222 PTE_set_one(immu, hwp, paddr, rdip, immu_flags); 2223 2224 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags) 2225 == B_TRUE); 2226 nppages--; 2227 nvpages--; 2228 paddr += IMMU_PAGESIZE; 2229 dvma += IMMU_PAGESIZE; 2230 2231 if (nppages == 0) { 2232 j++; 2233 } 2234 2235 if (j == dcount) { 2236 ASSERT(nvpages == 0); 2237 break; 2238 } 2239 2240 ASSERT(nvpages); 2241 if (nppages == 0) { 2242 nppages = dcookies[j].dck_npages; 2243 paddr = dcookies[j].dck_paddr; 2244 } 2245 } 2246 2247 /* flush writes to HW PTE table */ 2248 immu_regs_cpu_flush(immu, (caddr_t)shwp, (hwp - shwp) * 2249 sizeof (hw_pdte_t)); 2250 2251 if (nvpages) { 2252 *dvma_ptr = dvma; 2253 *nvpages_ptr = nvpages; 2254 } else { 2255 *dvma_ptr = 0; 2256 *nvpages_ptr = 0; 2257 } 2258 2259 xlate->xlt_idx = idx; 2260 } 2261 2262 /*ARGSUSED*/ 2263 static void 2264 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next, 2265 dev_info_t *rdip, immu_flags_t immu_flags) 2266 { 2267 hw_pdte_t pde; 2268 2269 pde = *hwp; 2270 2271 /* if PDE is already set, make sure it is correct */ 2272 if (PDTE_P(pde)) { 2273 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr); 2274 #ifdef BUGGY_DRIVERS 2275 return; 2276 #else 2277 goto out; 2278 #endif 2279 } 2280 2281 /* Dont touch SW4, it is the present bit */ 2282 2283 /* don't touch TM field it is reserved for PDEs */ 2284 2285 /* 3rd field available for system software is not used */ 2286 PDTE_CLEAR_SW3(pde); 2287 2288 /* Set next level pgtable-paddr for PDE */ 2289 ASSERT(next->hwpg_paddr % IMMU_PAGESIZE == 0); 2290 PDTE_CLEAR_PADDR(pde); 2291 PDTE_SET_PADDR(pde, next->hwpg_paddr); 2292 2293 /* don't touch SNP field it is reserved for PDEs */ 2294 2295 /* Clear second field available for system software */ 2296 PDTE_CLEAR_SW2(pde); 2297 2298 /* No super pages for PDEs */ 2299 PDTE_CLEAR_SP(pde); 2300 2301 /* Clear SW1 for software */ 2302 PDTE_CLEAR_SW1(pde); 2303 2304 /* 2305 * Now that we are done writing the PDE 2306 * set the "present" flag. Note this present 2307 * flag is a bit in the PDE/PTE that the 2308 * spec says is available for system software. 2309 * This is an implementation detail of Solaris 2310 * base-metal Intel IOMMU. 2311 * The present field in a PDE/PTE is not defined 2312 * by the Vt-d spec 2313 */ 2314 2315 out: 2316 #ifdef BUGGY_DRIVERS 2317 PDTE_SET_READ(pde); 2318 PDTE_SET_WRITE(pde); 2319 #else 2320 if (immu_flags & IMMU_FLAGS_READ) 2321 PDTE_SET_READ(pde); 2322 if (immu_flags & IMMU_FLAGS_WRITE) 2323 PDTE_SET_WRITE(pde); 2324 #endif 2325 2326 PDTE_SET_P(pde); 2327 2328 *hwp = pde; 2329 2330 immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t)); 2331 } 2332 2333 /* 2334 * Used to set PDEs 2335 */ 2336 static boolean_t 2337 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels, 2338 dev_info_t *rdip, immu_flags_t immu_flags) 2339 { 2340 pgtable_t *pgtable; 2341 pgtable_t *new; 2342 pgtable_t *next; 2343 hw_pdte_t *hwp; 2344 int level; 2345 uint_t idx; 2346 krw_t rwtype; 2347 boolean_t set = B_FALSE; 2348 2349 /* xlate should be at level 0 */ 2350 ASSERT(xlate->xlt_level == 0); 2351 ASSERT(xlate->xlt_idx == 0); 2352 2353 /* start with highest level pgtable i.e. root */ 2354 xlate += nlevels; 2355 ASSERT(xlate->xlt_level == nlevels); 2356 2357 new = NULL; 2358 xlate->xlt_pgtable = domain->dom_pgtable_root; 2359 for (level = nlevels; level > 1; level--, xlate--) { 2360 2361 ASSERT(xlate->xlt_level == level); 2362 2363 idx = xlate->xlt_idx; 2364 pgtable = xlate->xlt_pgtable; 2365 2366 ASSERT(pgtable); 2367 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 2368 2369 /* speculative alloc */ 2370 if (new == NULL) { 2371 new = pgtable_alloc(immu, immu_flags); 2372 if (new == NULL) { 2373 ddi_err(DER_PANIC, rdip, "pgtable alloc err"); 2374 } 2375 } 2376 2377 /* Lock the pgtable in READ mode first */ 2378 rw_enter(&(pgtable->swpg_rwlock), RW_READER); 2379 rwtype = RW_READER; 2380 again: 2381 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; 2382 2383 ASSERT(pgtable->swpg_next_array); 2384 2385 next = (pgtable->swpg_next_array)[idx]; 2386 2387 /* 2388 * check if leafier level already has a pgtable 2389 * if yes, verify 2390 */ 2391 if (next == NULL) { 2392 /* Change to a write lock */ 2393 if (rwtype == RW_READER && 2394 rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) { 2395 rw_exit(&(pgtable->swpg_rwlock)); 2396 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER); 2397 rwtype = RW_WRITER; 2398 goto again; 2399 } 2400 rwtype = RW_WRITER; 2401 pgtable_zero(immu, new); 2402 next = new; 2403 new = NULL; 2404 (pgtable->swpg_next_array)[idx] = next; 2405 PDE_set_one(immu, hwp, next, rdip, immu_flags); 2406 set = B_TRUE; 2407 rw_downgrade(&(pgtable->swpg_rwlock)); 2408 rwtype = RW_READER; 2409 } else { 2410 hw_pdte_t pde = *hwp; 2411 2412 #ifndef BUGGY_DRIVERS 2413 /* 2414 * If buggy driver we already set permission 2415 * READ+WRITE so nothing to do for that case 2416 * XXX Check that read writer perms change before 2417 * actually setting perms. Also need to hold lock 2418 */ 2419 if (immu_flags & IMMU_FLAGS_READ) 2420 PDTE_SET_READ(pde); 2421 if (immu_flags & IMMU_FLAGS_WRITE) 2422 PDTE_SET_WRITE(pde); 2423 2424 #endif 2425 2426 *hwp = pde; 2427 } 2428 2429 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags) 2430 == B_TRUE); 2431 2432 (xlate - 1)->xlt_pgtable = next; 2433 ASSERT(rwtype == RW_READER); 2434 rw_exit(&(pgtable->swpg_rwlock)); 2435 } 2436 2437 if (new) { 2438 pgtable_free(immu, new); 2439 } 2440 2441 return (set); 2442 } 2443 2444 /* 2445 * dvma_map() 2446 * map a contiguous range of DVMA pages 2447 * 2448 * immu: IOMMU unit for which we are generating DVMA cookies 2449 * domain: domain 2450 * sdvma: Starting dvma 2451 * spaddr: Starting paddr 2452 * npages: Number of pages 2453 * rdip: requesting device 2454 * immu_flags: flags 2455 */ 2456 static boolean_t 2457 dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snvpages, 2458 dcookie_t *dcookies, int dcount, dev_info_t *rdip, immu_flags_t immu_flags) 2459 { 2460 uint64_t dvma; 2461 uint64_t n; 2462 int nlevels = immu->immu_dvma_nlevels; 2463 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}; 2464 boolean_t pde_set = B_FALSE; 2465 2466 ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS); 2467 ASSERT(sdvma % IMMU_PAGESIZE == 0); 2468 ASSERT(snvpages); 2469 2470 n = snvpages; 2471 dvma = sdvma; 2472 2473 while (n > 0) { 2474 xlate_setup(immu, dvma, xlate, nlevels, rdip); 2475 2476 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */ 2477 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags) 2478 == B_TRUE) { 2479 pde_set = B_TRUE; 2480 } 2481 2482 /* set all matching ptes that fit into this leaf pgtable */ 2483 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies, 2484 dcount, rdip, immu_flags); 2485 } 2486 2487 return (pde_set); 2488 } 2489 2490 /* 2491 * dvma_unmap() 2492 * unmap a range of DVMAs 2493 * 2494 * immu: IOMMU unit state 2495 * domain: domain for requesting device 2496 * ddip: domain-dip 2497 * dvma: starting DVMA 2498 * npages: Number of IMMU pages to be unmapped 2499 * rdip: requesting device 2500 */ 2501 static void 2502 dvma_unmap(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snpages, 2503 dev_info_t *rdip) 2504 { 2505 int nlevels = immu->immu_dvma_nlevels; 2506 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}; 2507 uint64_t n; 2508 uint64_t dvma; 2509 2510 ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS); 2511 ASSERT(sdvma != 0); 2512 ASSERT(sdvma % IMMU_PAGESIZE == 0); 2513 ASSERT(snpages); 2514 2515 dvma = sdvma; 2516 n = snpages; 2517 2518 while (n > 0) { 2519 /* setup the xlate array */ 2520 xlate_setup(immu, dvma, xlate, nlevels, rdip); 2521 2522 /* just lookup existing pgtables. Should never fail */ 2523 PDE_lookup(immu, domain, xlate, nlevels, rdip); 2524 2525 /* clear all matching ptes that fit into this leaf pgtable */ 2526 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip); 2527 } 2528 2529 /* No need to flush IOTLB after unmap */ 2530 } 2531 2532 static uint64_t 2533 dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages) 2534 { 2535 ddi_dma_attr_t *dma_attr; 2536 uint64_t dvma; 2537 size_t xsize, align; 2538 uint64_t minaddr, maxaddr; 2539 2540 ASSERT(domain->dom_maptype != IMMU_MAPTYPE_UNITY); 2541 2542 /* shotcuts */ 2543 dma_attr = &(hp->dmai_attr); 2544 2545 /* parameters */ 2546 xsize = npages * IMMU_PAGESIZE; 2547 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE); 2548 minaddr = dma_attr->dma_attr_addr_lo; 2549 maxaddr = dma_attr->dma_attr_addr_hi + 1; 2550 /* nocross is checked in cookie_update() */ 2551 2552 /* handle the rollover cases */ 2553 if (maxaddr < dma_attr->dma_attr_addr_hi) { 2554 maxaddr = dma_attr->dma_attr_addr_hi; 2555 } 2556 2557 /* 2558 * allocate from vmem arena. 2559 */ 2560 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena, 2561 xsize, align, 0, 0, (void *)(uintptr_t)minaddr, 2562 (void *)(uintptr_t)maxaddr, VM_NOSLEEP); 2563 2564 ASSERT(dvma); 2565 ASSERT(dvma >= minaddr); 2566 ASSERT(dvma + xsize - 1 < maxaddr); 2567 2568 return (dvma); 2569 } 2570 2571 static void 2572 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages) 2573 { 2574 uint64_t size = npages * IMMU_PAGESIZE; 2575 2576 ASSERT(domain); 2577 ASSERT(domain->dom_did > 0); 2578 ASSERT(dvma); 2579 ASSERT(npages); 2580 2581 if (domain->dom_maptype != IMMU_MAPTYPE_XLATE) { 2582 ASSERT(domain->dom_maptype == IMMU_MAPTYPE_UNITY); 2583 return; 2584 } 2585 2586 vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size); 2587 } 2588 /*ARGSUSED*/ 2589 static void 2590 cookie_free(rootnex_dma_t *dma, immu_t *immu, domain_t *domain, 2591 dev_info_t *rdip) 2592 { 2593 int i; 2594 uint64_t dvma; 2595 uint64_t npages; 2596 dvcookie_t *dvcookies = dma->dp_dvcookies; 2597 2598 ASSERT(dma->dp_max_cookies); 2599 ASSERT(dma->dp_max_dcookies); 2600 ASSERT(dma->dp_dvmax < dma->dp_max_cookies); 2601 ASSERT(dma->dp_dmax < dma->dp_max_dcookies); 2602 2603 /* 2604 * we allocated DVMA in a single chunk. Calculate total number 2605 * of pages 2606 */ 2607 for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) { 2608 npages += dvcookies[i].dvck_npages; 2609 } 2610 dvma = dvcookies[0].dvck_dvma; 2611 #ifdef DEBUG 2612 /* Unmap only in DEBUG mode */ 2613 dvma_unmap(immu, domain, dvma, npages, rdip); 2614 #endif 2615 dvma_free(domain, dvma, npages); 2616 2617 kmem_free(dma->dp_dvcookies, sizeof (dvcookie_t) * dma->dp_max_cookies); 2618 dma->dp_dvcookies = NULL; 2619 kmem_free(dma->dp_dcookies, sizeof (dcookie_t) * dma->dp_max_dcookies); 2620 dma->dp_dcookies = NULL; 2621 if (dma->dp_need_to_free_cookie == B_TRUE) { 2622 kmem_free(dma->dp_cookies, sizeof (ddi_dma_cookie_t) * 2623 dma->dp_max_cookies); 2624 dma->dp_dcookies = NULL; 2625 dma->dp_need_to_free_cookie = B_FALSE; 2626 } 2627 2628 dma->dp_max_cookies = 0; 2629 dma->dp_max_dcookies = 0; 2630 dma->dp_cookie_size = 0; 2631 dma->dp_dvmax = 0; 2632 dma->dp_dmax = 0; 2633 } 2634 2635 /* 2636 * cookie_alloc() 2637 */ 2638 static int 2639 cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq, 2640 ddi_dma_attr_t *attr, uint_t prealloc) 2641 { 2642 int kmflag; 2643 rootnex_sglinfo_t *sinfo = &(dma->dp_sglinfo); 2644 dvcookie_t *dvcookies = dma->dp_dvcookies; 2645 dcookie_t *dcookies = dma->dp_dcookies; 2646 ddi_dma_cookie_t *cookies = dma->dp_cookies; 2647 uint64_t max_cookies; 2648 uint64_t max_dcookies; 2649 uint64_t cookie_size; 2650 2651 /* we need to allocate new array */ 2652 if (dmareq->dmar_fp == DDI_DMA_SLEEP) { 2653 kmflag = KM_SLEEP; 2654 } else { 2655 kmflag = KM_NOSLEEP; 2656 } 2657 2658 /* 2659 * XXX make sure cookies size doen't exceed sinfo->si_max_cookie_size; 2660 */ 2661 2662 /* 2663 * figure out the rough estimate of array size 2664 * At a minimum, each cookie must hold 1 page. 2665 * At a maximum, it cannot exceed dma_attr_sgllen 2666 */ 2667 max_dcookies = dmareq->dmar_object.dmao_size + IMMU_PAGEOFFSET; 2668 max_dcookies /= IMMU_PAGESIZE; 2669 max_dcookies++; 2670 max_cookies = MIN(max_dcookies, attr->dma_attr_sgllen); 2671 2672 /* allocate the dvma cookie array */ 2673 dvcookies = kmem_zalloc(sizeof (dvcookie_t) * max_cookies, kmflag); 2674 if (dvcookies == NULL) { 2675 return (DDI_FAILURE); 2676 } 2677 2678 /* allocate the "phys" cookie array */ 2679 dcookies = kmem_zalloc(sizeof (dcookie_t) * max_dcookies, kmflag); 2680 if (dcookies == NULL) { 2681 kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies); 2682 dvcookies = NULL; 2683 return (DDI_FAILURE); 2684 } 2685 2686 /* allocate the "real" cookie array - the one given to users */ 2687 cookie_size = sizeof (ddi_dma_cookie_t) * max_cookies; 2688 if (max_cookies > prealloc) { 2689 cookies = kmem_zalloc(cookie_size, kmflag); 2690 if (cookies == NULL) { 2691 kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies); 2692 kmem_free(dcookies, sizeof (dcookie_t) * max_dcookies); 2693 goto fail; 2694 } 2695 dma->dp_need_to_free_cookie = B_TRUE; 2696 } else { 2697 /* the preallocated buffer fits this size */ 2698 cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer; 2699 bzero(cookies, sizeof (ddi_dma_cookie_t)* max_cookies); 2700 dma->dp_need_to_free_cookie = B_FALSE; 2701 } 2702 2703 dma->dp_dvcookies = dvcookies; 2704 dma->dp_dcookies = dcookies; 2705 dma->dp_cookies = cookies; 2706 dma->dp_cookie_size = cookie_size; 2707 dma->dp_max_cookies = max_cookies; 2708 dma->dp_max_dcookies = max_dcookies; 2709 dma->dp_dvmax = 0; 2710 dma->dp_dmax = 0; 2711 sinfo->si_max_pages = dma->dp_max_cookies; 2712 2713 return (DDI_SUCCESS); 2714 2715 fail: 2716 dma->dp_dvcookies = NULL; 2717 dma->dp_dcookies = NULL; 2718 dma->dp_cookies = NULL; 2719 dma->dp_cookie_size = 0; 2720 dma->dp_max_cookies = 0; 2721 dma->dp_max_dcookies = 0; 2722 dma->dp_dvmax = 0; 2723 dma->dp_dmax = 0; 2724 dma->dp_need_to_free_cookie = B_FALSE; 2725 sinfo->si_max_pages = 0; 2726 2727 return (DDI_FAILURE); 2728 } 2729 2730 /*ARGSUSED*/ 2731 static void 2732 cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr, 2733 int64_t psize, uint64_t maxseg, size_t nocross) 2734 { 2735 dvcookie_t *dvcookies = dma->dp_dvcookies; 2736 dcookie_t *dcookies = dma->dp_dcookies; 2737 ddi_dma_cookie_t *cookies = dma->dp_cookies; 2738 uint64_t dvmax = dma->dp_dvmax; 2739 uint64_t dmax = dma->dp_dmax; 2740 2741 ASSERT(dvmax < dma->dp_max_cookies); 2742 ASSERT(dmax < dma->dp_max_dcookies); 2743 2744 paddr &= IMMU_PAGEMASK; 2745 2746 ASSERT(paddr); 2747 ASSERT(psize); 2748 ASSERT(maxseg); 2749 2750 /* 2751 * check to see if this page would put us 2752 * over the max cookie size. 2753 */ 2754 if (cookies[dvmax].dmac_size + psize > maxseg) { 2755 dvmax++; /* use the next dvcookie */ 2756 dmax++; /* also means we use the next dcookie */ 2757 ASSERT(dvmax < dma->dp_max_cookies); 2758 ASSERT(dmax < dma->dp_max_dcookies); 2759 } 2760 2761 /* 2762 * check to see if this page would make us larger than 2763 * the nocross boundary. If yes, create a new cookie 2764 * otherwise we will fail later with vmem_xalloc() 2765 * due to overconstrained alloc requests 2766 * nocross == 0 implies no nocross constraint. 2767 */ 2768 if (nocross > 0) { 2769 ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE 2770 <= nocross); 2771 if ((dvcookies[dvmax].dvck_npages + 1) * IMMU_PAGESIZE 2772 > nocross) { 2773 dvmax++; /* use the next dvcookie */ 2774 dmax++; /* also means we use the next dcookie */ 2775 ASSERT(dvmax < dma->dp_max_cookies); 2776 ASSERT(dmax < dma->dp_max_dcookies); 2777 } 2778 ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE 2779 <= nocross); 2780 } 2781 2782 /* 2783 * If the cookie is empty 2784 */ 2785 if (dvcookies[dvmax].dvck_npages == 0) { 2786 ASSERT(cookies[dvmax].dmac_size == 0); 2787 ASSERT(dvcookies[dvmax].dvck_dvma == 0); 2788 ASSERT(dvcookies[dvmax].dvck_npages 2789 == 0); 2790 ASSERT(dcookies[dmax].dck_paddr == 0); 2791 ASSERT(dcookies[dmax].dck_npages == 0); 2792 2793 dvcookies[dvmax].dvck_dvma = 0; 2794 dvcookies[dvmax].dvck_npages = 1; 2795 dcookies[dmax].dck_paddr = paddr; 2796 dcookies[dmax].dck_npages = 1; 2797 cookies[dvmax].dmac_size = psize; 2798 } else { 2799 /* Cookie not empty. Add to it */ 2800 cookies[dma->dp_dvmax].dmac_size += psize; 2801 ASSERT(dvcookies[dma->dp_dvmax].dvck_dvma == 0); 2802 dvcookies[dma->dp_dvmax].dvck_npages++; 2803 ASSERT(dcookies[dmax].dck_paddr != 0); 2804 ASSERT(dcookies[dmax].dck_npages != 0); 2805 2806 /* Check if this paddr is contiguous */ 2807 if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) { 2808 dcookies[dmax].dck_npages++; 2809 } else { 2810 /* No, we need a new dcookie */ 2811 dmax++; 2812 ASSERT(dcookies[dmax].dck_paddr == 0); 2813 ASSERT(dcookies[dmax].dck_npages == 0); 2814 dcookies[dmax].dck_paddr = paddr; 2815 dcookies[dmax].dck_npages = 1; 2816 } 2817 } 2818 2819 dma->dp_dvmax = dvmax; 2820 dma->dp_dmax = dmax; 2821 } 2822 2823 static void 2824 cookie_finalize(ddi_dma_impl_t *hp, immu_t *immu, domain_t *domain, 2825 dev_info_t *rdip, immu_flags_t immu_flags) 2826 { 2827 int i; 2828 rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private; 2829 dvcookie_t *dvcookies = dma->dp_dvcookies; 2830 dcookie_t *dcookies = dma->dp_dcookies; 2831 ddi_dma_cookie_t *cookies = dma->dp_cookies; 2832 uint64_t npages; 2833 uint64_t dvma; 2834 boolean_t pde_set; 2835 2836 /* First calculate the total number of pages required */ 2837 for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) { 2838 npages += dvcookies[i].dvck_npages; 2839 } 2840 2841 /* Now allocate dvma */ 2842 dvma = dvma_alloc(hp, domain, npages); 2843 2844 /* Now map the dvma */ 2845 pde_set = dvma_map(immu, domain, dvma, npages, dcookies, 2846 dma->dp_dmax + 1, rdip, immu_flags); 2847 2848 /* Invalidate the IOTLB */ 2849 immu_flush_iotlb_psi(immu, domain->dom_did, dvma, npages, 2850 pde_set == B_TRUE ? TLB_IVA_WHOLE : TLB_IVA_LEAF); 2851 2852 /* Now setup dvcookies and real cookie addresses */ 2853 for (i = 0; i <= dma->dp_dvmax; i++) { 2854 dvcookies[i].dvck_dvma = dvma; 2855 cookies[i].dmac_laddress = dvma; 2856 ASSERT(cookies[i].dmac_size != 0); 2857 cookies[i].dmac_type = 0; 2858 dvma += (dvcookies[i].dvck_npages * IMMU_PAGESIZE); 2859 } 2860 2861 #ifdef TEST 2862 immu_flush_iotlb_dsi(immu, domain->dom_did); 2863 #endif 2864 } 2865 2866 /* 2867 * cookie_create() 2868 */ 2869 static int 2870 cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, 2871 ddi_dma_attr_t *a, immu_t *immu, domain_t *domain, dev_info_t *rdip, 2872 uint_t prealloc_count, immu_flags_t immu_flags) 2873 { 2874 ddi_dma_atyp_t buftype; 2875 uint64_t offset; 2876 page_t **pparray; 2877 uint64_t paddr; 2878 uint_t psize; 2879 uint_t size; 2880 uint64_t maxseg; 2881 caddr_t vaddr; 2882 uint_t pcnt; 2883 page_t *page; 2884 rootnex_sglinfo_t *sglinfo; 2885 ddi_dma_obj_t *dmar_object; 2886 rootnex_dma_t *dma; 2887 size_t nocross; 2888 2889 dma = (rootnex_dma_t *)hp->dmai_private; 2890 sglinfo = &(dma->dp_sglinfo); 2891 dmar_object = &(dmareq->dmar_object); 2892 maxseg = sglinfo->si_max_cookie_size; 2893 pparray = dmar_object->dmao_obj.virt_obj.v_priv; 2894 vaddr = dmar_object->dmao_obj.virt_obj.v_addr; 2895 buftype = dmar_object->dmao_type; 2896 size = dmar_object->dmao_size; 2897 nocross = (size_t)(a->dma_attr_seg + 1); 2898 2899 /* 2900 * Allocate cookie, dvcookie and dcookie 2901 */ 2902 if (cookie_alloc(dma, dmareq, a, prealloc_count) != DDI_SUCCESS) { 2903 return (DDI_FAILURE); 2904 } 2905 hp->dmai_cookie = dma->dp_cookies; 2906 2907 pcnt = 0; 2908 2909 /* retrieve paddr, psize, offset from dmareq */ 2910 if (buftype == DMA_OTYP_PAGES) { 2911 page = dmar_object->dmao_obj.pp_obj.pp_pp; 2912 ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); 2913 offset = dmar_object->dmao_obj.pp_obj.pp_offset & 2914 MMU_PAGEOFFSET; 2915 paddr = pfn_to_pa(page->p_pagenum) + offset; 2916 psize = MIN((MMU_PAGESIZE - offset), size); 2917 sglinfo->si_asp = NULL; 2918 page = page->p_next; 2919 } else { 2920 ASSERT((buftype == DMA_OTYP_VADDR) || 2921 (buftype == DMA_OTYP_BUFVADDR)); 2922 sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as; 2923 if (sglinfo->si_asp == NULL) { 2924 sglinfo->si_asp = &kas; 2925 } 2926 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET; 2927 if (pparray != NULL) { 2928 ASSERT(!PP_ISFREE(pparray[pcnt])); 2929 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset; 2930 psize = MIN((MMU_PAGESIZE - offset), size); 2931 pcnt++; 2932 } else { 2933 paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, 2934 vaddr)) + offset; 2935 psize = MIN(size, (MMU_PAGESIZE - offset)); 2936 vaddr += psize; 2937 } 2938 } 2939 2940 /* save the iommu page offset */ 2941 sglinfo->si_buf_offset = offset & IMMU_PAGEOFFSET; 2942 2943 /* 2944 * setup dvcookie and dcookie for [paddr, paddr+psize) 2945 */ 2946 cookie_update(domain, dma, paddr, psize, maxseg, nocross); 2947 2948 size -= psize; 2949 while (size > 0) { 2950 /* get the size for this page (i.e. partial or full page) */ 2951 psize = MIN(size, MMU_PAGESIZE); 2952 if (buftype == DMA_OTYP_PAGES) { 2953 /* get the paddr from the page_t */ 2954 ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); 2955 paddr = pfn_to_pa(page->p_pagenum); 2956 page = page->p_next; 2957 } else if (pparray != NULL) { 2958 /* index into the array of page_t's to get the paddr */ 2959 ASSERT(!PP_ISFREE(pparray[pcnt])); 2960 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum); 2961 pcnt++; 2962 } else { 2963 /* call into the VM to get the paddr */ 2964 paddr = pfn_to_pa(hat_getpfnum 2965 (sglinfo->si_asp->a_hat, vaddr)); 2966 vaddr += psize; 2967 } 2968 /* 2969 * set dvcookie and dcookie for [paddr, paddr+psize) 2970 */ 2971 cookie_update(domain, dma, paddr, psize, maxseg, nocross); 2972 size -= psize; 2973 } 2974 2975 cookie_finalize(hp, immu, domain, rdip, immu_flags); 2976 2977 /* take account in the offset into the first page */ 2978 dma->dp_cookies[0].dmac_laddress += sglinfo->si_buf_offset; 2979 2980 /* save away how many cookies we have */ 2981 sglinfo->si_sgl_size = dma->dp_dvmax + 1; 2982 2983 return (DDI_SUCCESS); 2984 } 2985 2986 /* ############################# Functions exported ######################## */ 2987 2988 /* 2989 * setup the DVMA subsystem 2990 * this code runs only for the first IOMMU unit 2991 */ 2992 void 2993 immu_dvma_setup(list_t *listp) 2994 { 2995 immu_t *immu; 2996 uint_t kval; 2997 size_t nchains; 2998 2999 /* locks */ 3000 mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL); 3001 3002 /* Create lists */ 3003 list_create(&immu_unity_domain_list, sizeof (domain_t), 3004 offsetof(domain_t, dom_maptype_node)); 3005 list_create(&immu_xlate_domain_list, sizeof (domain_t), 3006 offsetof(domain_t, dom_maptype_node)); 3007 3008 /* Setup BDF domain hash */ 3009 nchains = 0xff; 3010 kval = mod_hash_iddata_gen(nchains); 3011 3012 bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH", 3013 nchains, mod_hash_null_keydtor, mod_hash_null_valdtor, 3014 mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp, 3015 KM_NOSLEEP); 3016 ASSERT(bdf_domain_hash); 3017 3018 immu = list_head(listp); 3019 for (; immu; immu = list_next(listp, immu)) { 3020 create_unity_domain(immu); 3021 did_init(immu); 3022 context_init(immu); 3023 immu->immu_dvma_setup = B_TRUE; 3024 } 3025 } 3026 3027 /* 3028 * Startup up one DVMA unit 3029 */ 3030 void 3031 immu_dvma_startup(immu_t *immu) 3032 { 3033 ASSERT(immu); 3034 ASSERT(immu->immu_dvma_running == B_FALSE); 3035 3036 if (immu_gfxdvma_enable == B_FALSE && 3037 immu->immu_dvma_gfx_only == B_TRUE) { 3038 return; 3039 } 3040 3041 /* 3042 * DVMA will start once IOMMU is "running" 3043 */ 3044 ASSERT(immu->immu_dvma_running == B_FALSE); 3045 immu->immu_dvma_running = B_TRUE; 3046 } 3047 3048 /* 3049 * immu_dvma_physmem_update() 3050 * called when the installed memory on a 3051 * system increases, to expand domain DVMA 3052 * for domains with UNITY mapping 3053 */ 3054 void 3055 immu_dvma_physmem_update(uint64_t addr, uint64_t size) 3056 { 3057 uint64_t start; 3058 uint64_t npages; 3059 int dcount; 3060 dcookie_t dcookies[1] = {0}; 3061 domain_t *domain; 3062 3063 /* 3064 * Just walk the system-wide list of domains with 3065 * UNITY mapping. Both the list of *all* domains 3066 * and *UNITY* domains is protected by the same 3067 * single lock 3068 */ 3069 mutex_enter(&immu_domain_lock); 3070 domain = list_head(&immu_unity_domain_list); 3071 for (; domain; domain = list_next(&immu_unity_domain_list, domain)) { 3072 /* 3073 * Nothing to do if the IOMMU supports passthrough. 3074 */ 3075 if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap)) 3076 continue; 3077 3078 /* There is no vmem_arena for unity domains. Just map it */ 3079 ddi_err(DER_LOG, NULL, "IMMU: unity-domain: Adding map " 3080 "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size); 3081 3082 start = IMMU_ROUNDOWN(addr); 3083 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1; 3084 3085 dcookies[0].dck_paddr = start; 3086 dcookies[0].dck_npages = npages; 3087 dcount = 1; 3088 (void) dvma_map(domain->dom_immu, domain, start, npages, 3089 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 3090 3091 } 3092 mutex_exit(&immu_domain_lock); 3093 } 3094 3095 3096 int 3097 immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng, 3098 uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags) 3099 { 3100 ddi_dma_attr_t *attr; 3101 dev_info_t *ddip; 3102 domain_t *domain; 3103 immu_t *immu; 3104 dcookie_t dcookies[1] = {0}; 3105 int dcount = 0; 3106 boolean_t pde_set = B_TRUE; 3107 int r = DDI_FAILURE; 3108 3109 ASSERT(immu_enable == B_TRUE); 3110 ASSERT(immu_running == B_TRUE || !(immu_flags & IMMU_FLAGS_DMAHDL)); 3111 ASSERT(hp || !(immu_flags & IMMU_FLAGS_DMAHDL)); 3112 3113 /* 3114 * Intel IOMMU will only be turned on if IOMMU 3115 * page size is a multiple of IOMMU page size 3116 */ 3117 3118 /*LINTED*/ 3119 ASSERT(MMU_PAGESIZE % IMMU_PAGESIZE == 0); 3120 3121 /* Can only do DVMA if dip is attached */ 3122 if (rdip == NULL) { 3123 ddi_err(DER_PANIC, rdip, "DVMA map: No device specified"); 3124 /*NOTREACHED*/ 3125 } 3126 3127 immu_flags |= dma_to_immu_flags(dmareq); 3128 3129 immu = immu_dvma_get_immu(rdip, immu_flags); 3130 if (immu == NULL) { 3131 /* 3132 * possible that there is no IOMMU unit for this device 3133 * - BIOS bugs are one example. 3134 */ 3135 ddi_err(DER_WARN, rdip, "No IMMU unit found for device"); 3136 return (DDI_DMA_NORESOURCES); 3137 } 3138 3139 /* 3140 * redirect isa devices attached under lpc to lpc dip 3141 */ 3142 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) { 3143 rdip = get_lpc_devinfo(immu, rdip, immu_flags); 3144 if (rdip == NULL) { 3145 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3146 /*NOTREACHED*/ 3147 } 3148 } 3149 3150 /* Reset immu, as redirection can change IMMU */ 3151 immu = NULL; 3152 3153 /* 3154 * for gart, redirect to the real graphic devinfo 3155 */ 3156 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) { 3157 rdip = get_gfx_devinfo(rdip); 3158 if (rdip == NULL) { 3159 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3160 /*NOTREACHED*/ 3161 } 3162 } 3163 3164 /* 3165 * Setup DVMA domain for the device. This does 3166 * work only the first time we do DVMA for a 3167 * device. 3168 */ 3169 ddip = NULL; 3170 domain = device_domain(rdip, &ddip, immu_flags); 3171 if (domain == NULL) { 3172 ASSERT(ddip == NULL); 3173 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device"); 3174 return (DDI_DMA_NORESOURCES); 3175 } 3176 3177 /* 3178 * If a domain is found, we must also have a domain dip 3179 * which is the topmost ancestor dip of rdip that shares 3180 * the same domain with rdip. 3181 */ 3182 if (domain->dom_did == 0 || ddip == NULL) { 3183 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)", 3184 domain->dom_did, ddip); 3185 return (DDI_DMA_NORESOURCES); 3186 } 3187 3188 immu = domain->dom_immu; 3189 ASSERT(immu); 3190 if (domain->dom_did == IMMU_UNITY_DID) { 3191 ASSERT(domain == immu->immu_unity_domain); 3192 /* mapping already done. Let rootnex create cookies */ 3193 r = DDI_DMA_USE_PHYSICAL; 3194 } else if (immu_flags & IMMU_FLAGS_DMAHDL) { 3195 3196 /* if we have a DMA handle, the IOMMUs must be running */ 3197 ASSERT(immu->immu_regs_running == B_TRUE); 3198 ASSERT(immu->immu_dvma_running == B_TRUE); 3199 3200 attr = &hp->dmai_attr; 3201 if (attr == NULL) { 3202 ddi_err(DER_PANIC, rdip, 3203 "DMA handle (%p): NULL attr", hp); 3204 /*NOTREACHED*/ 3205 } 3206 3207 if (cookie_create(hp, dmareq, attr, immu, domain, rdip, 3208 prealloc_count, immu_flags) != DDI_SUCCESS) { 3209 ddi_err(DER_MODE, rdip, "dvcookie_alloc: failed"); 3210 return (DDI_DMA_NORESOURCES); 3211 } 3212 r = DDI_DMA_MAPPED; 3213 } else if (immu_flags & IMMU_FLAGS_MEMRNG) { 3214 dcookies[0].dck_paddr = mrng->mrng_start; 3215 dcookies[0].dck_npages = mrng->mrng_npages; 3216 dcount = 1; 3217 pde_set = dvma_map(immu, domain, mrng->mrng_start, 3218 mrng->mrng_npages, dcookies, dcount, rdip, immu_flags); 3219 immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start, 3220 mrng->mrng_npages, pde_set == B_TRUE ? 3221 TLB_IVA_WHOLE : TLB_IVA_LEAF); 3222 r = DDI_DMA_MAPPED; 3223 } else { 3224 ddi_err(DER_PANIC, rdip, "invalid flags for immu_dvma_map()"); 3225 /*NOTREACHED*/ 3226 } 3227 3228 /* 3229 * Update the root and context entries 3230 */ 3231 if (immu_context_update(immu, domain, ddip, rdip, immu_flags) 3232 != DDI_SUCCESS) { 3233 ddi_err(DER_MODE, rdip, "DVMA map: context update failed"); 3234 return (DDI_DMA_NORESOURCES); 3235 } 3236 3237 immu_regs_wbf_flush(immu); 3238 3239 return (r); 3240 } 3241 3242 int 3243 immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip) 3244 { 3245 ddi_dma_attr_t *attr; 3246 rootnex_dma_t *dma; 3247 domain_t *domain; 3248 immu_t *immu; 3249 dev_info_t *ddip; 3250 immu_flags_t immu_flags; 3251 3252 ASSERT(immu_enable == B_TRUE); 3253 ASSERT(immu_running == B_TRUE); 3254 ASSERT(hp); 3255 3256 /* 3257 * Intel IOMMU will only be turned on if IOMMU 3258 * page size is same as MMU page size 3259 */ 3260 /*LINTED*/ 3261 ASSERT(MMU_PAGESIZE == IMMU_PAGESIZE); 3262 3263 /* rdip need not be attached */ 3264 if (rdip == NULL) { 3265 ddi_err(DER_PANIC, rdip, "DVMA unmap: No device specified"); 3266 return (DDI_DMA_NORESOURCES); 3267 } 3268 3269 /* 3270 * Get the device domain, this should always 3271 * succeed since there had to be a domain to 3272 * setup DVMA. 3273 */ 3274 dma = (rootnex_dma_t *)hp->dmai_private; 3275 attr = &hp->dmai_attr; 3276 if (attr == NULL) { 3277 ddi_err(DER_PANIC, rdip, "DMA handle (%p) has NULL attr", hp); 3278 /*NOTREACHED*/ 3279 } 3280 immu_flags = dma->dp_sleep_flags; 3281 3282 immu = immu_dvma_get_immu(rdip, immu_flags); 3283 if (immu == NULL) { 3284 /* 3285 * possible that there is no IOMMU unit for this device 3286 * - BIOS bugs are one example. 3287 */ 3288 ddi_err(DER_WARN, rdip, "No IMMU unit found for device"); 3289 return (DDI_DMA_NORESOURCES); 3290 } 3291 3292 3293 /* 3294 * redirect isa devices attached under lpc to lpc dip 3295 */ 3296 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) { 3297 rdip = get_lpc_devinfo(immu, rdip, immu_flags); 3298 if (rdip == NULL) { 3299 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3300 /*NOTREACHED*/ 3301 } 3302 } 3303 3304 /* Reset immu, as redirection can change IMMU */ 3305 immu = NULL; 3306 3307 /* 3308 * for gart, redirect to the real graphic devinfo 3309 */ 3310 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) { 3311 rdip = get_gfx_devinfo(rdip); 3312 if (rdip == NULL) { 3313 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3314 /*NOTREACHED*/ 3315 } 3316 } 3317 3318 ddip = NULL; 3319 domain = device_domain(rdip, &ddip, immu_flags); 3320 if (domain == NULL || domain->dom_did == 0 || ddip == NULL) { 3321 ddi_err(DER_MODE, rdip, "Attempt to unmap DVMA for " 3322 "a device without domain or with an uninitialized " 3323 "domain"); 3324 return (DDI_DMA_NORESOURCES); 3325 } 3326 3327 /* 3328 * immu must be set in the domain. 3329 */ 3330 immu = domain->dom_immu; 3331 ASSERT(immu); 3332 if (domain->dom_did == IMMU_UNITY_DID) { 3333 ASSERT(domain == immu->immu_unity_domain); 3334 /* 3335 * domain is unity, nothing to do here, let the rootnex 3336 * code free the cookies. 3337 */ 3338 return (DDI_DMA_USE_PHYSICAL); 3339 } 3340 3341 dma = hp->dmai_private; 3342 if (dma == NULL) { 3343 ddi_err(DER_PANIC, rdip, "DVMA unmap: DMA handle (%p) has " 3344 "no private dma structure", hp); 3345 /*NOTREACHED*/ 3346 } 3347 3348 cookie_free(dma, immu, domain, rdip); 3349 3350 /* No invalidation needed for unmap */ 3351 immu_regs_wbf_flush(immu); 3352 3353 return (DDI_SUCCESS); 3354 } 3355 3356 immu_devi_t * 3357 immu_devi_get(dev_info_t *rdip) 3358 { 3359 immu_devi_t *immu_devi; 3360 volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu); 3361 3362 /* Just want atomic reads. No need for lock */ 3363 immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr, 3364 0); 3365 return (immu_devi); 3366 } 3367