1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Portions Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * DVMA code 32 * This file contains Intel IOMMU code that deals with DVMA 33 * i.e. DMA remapping. 34 */ 35 36 #include <sys/sysmacros.h> 37 #include <sys/pcie.h> 38 #include <sys/pci_cfgspace.h> 39 #include <vm/hat_i86.h> 40 #include <sys/memlist.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/modhash.h> 44 #include <sys/immu.h> 45 46 #undef TEST 47 48 /* 49 * Macros based on PCI spec 50 */ 51 #define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */ 52 #define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */ 53 #define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */ 54 55 #define IMMU_CONTIG_PADDR(d, p) \ 56 ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p)) 57 58 typedef struct dvma_arg { 59 immu_t *dva_immu; 60 dev_info_t *dva_rdip; 61 dev_info_t *dva_ddip; 62 domain_t *dva_domain; 63 int dva_level; 64 immu_flags_t dva_flags; 65 list_t *dva_list; 66 int dva_error; 67 } dvma_arg_t; 68 69 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip, 70 dev_info_t *rdip, immu_flags_t immu_flags); 71 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus, 72 int dev, int func, immu_flags_t immu_flags); 73 static void destroy_immu_devi(immu_devi_t *immu_devi); 74 static boolean_t dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, 75 uint64_t nvpages, dcookie_t *dcookies, int dcount, dev_info_t *rdip, 76 immu_flags_t immu_flags); 77 78 /* Extern globals */ 79 extern struct memlist *phys_install; 80 81 82 /* static Globals */ 83 84 /* 85 * Used to setup DMA objects (memory regions) 86 * for DMA reads by IOMMU units 87 */ 88 static ddi_dma_attr_t immu_dma_attr = { 89 DMA_ATTR_V0, 90 0U, 91 0xffffffffU, 92 0xffffffffU, 93 MMU_PAGESIZE, /* MMU page aligned */ 94 0x1, 95 0x1, 96 0xffffffffU, 97 0xffffffffU, 98 1, 99 4, 100 0 101 }; 102 103 static ddi_device_acc_attr_t immu_acc_attr = { 104 DDI_DEVICE_ATTR_V0, 105 DDI_NEVERSWAP_ACC, 106 DDI_STRICTORDER_ACC 107 }; 108 109 110 /* globals private to this file */ 111 static kmutex_t immu_domain_lock; 112 static list_t immu_unity_domain_list; 113 static list_t immu_xlate_domain_list; 114 115 /* structure used to store idx into each level of the page tables */ 116 typedef struct xlate { 117 int xlt_level; 118 uint_t xlt_idx; 119 pgtable_t *xlt_pgtable; 120 } xlate_t; 121 122 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */ 123 #define IMMU_UNITY_DID 1 124 125 static mod_hash_t *bdf_domain_hash; 126 127 static domain_t * 128 bdf_domain_lookup(immu_devi_t *immu_devi) 129 { 130 domain_t *domain; 131 int16_t seg = immu_devi->imd_seg; 132 int16_t bus = immu_devi->imd_bus; 133 int16_t devfunc = immu_devi->imd_devfunc; 134 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc); 135 136 if (seg < 0 || bus < 0 || devfunc < 0) { 137 return (NULL); 138 } 139 140 domain = NULL; 141 if (mod_hash_find(bdf_domain_hash, 142 (void *)bdf, (void *)&domain) == 0) { 143 ASSERT(domain); 144 ASSERT(domain->dom_did > 0); 145 return (domain); 146 } else { 147 return (NULL); 148 } 149 } 150 151 static void 152 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain) 153 { 154 int16_t seg = immu_devi->imd_seg; 155 int16_t bus = immu_devi->imd_bus; 156 int16_t devfunc = immu_devi->imd_devfunc; 157 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc); 158 int r; 159 160 if (seg < 0 || bus < 0 || devfunc < 0) { 161 return; 162 } 163 164 r = mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain); 165 ASSERT(r != MH_ERR_DUPLICATE); 166 ASSERT(r == 0); 167 } 168 169 static int 170 match_lpc(dev_info_t *pdip, void *arg) 171 { 172 immu_devi_t *immu_devi; 173 dvma_arg_t *dvap = (dvma_arg_t *)arg; 174 175 ASSERT(dvap->dva_error == DDI_FAILURE); 176 ASSERT(dvap->dva_ddip == NULL); 177 ASSERT(dvap->dva_list); 178 179 if (list_is_empty(dvap->dva_list)) { 180 return (DDI_WALK_TERMINATE); 181 } 182 183 immu_devi = list_head(dvap->dva_list); 184 for (; immu_devi; immu_devi = list_next(dvap->dva_list, 185 immu_devi)) { 186 ASSERT(immu_devi->imd_dip); 187 if (immu_devi->imd_dip == pdip) { 188 dvap->dva_ddip = pdip; 189 dvap->dva_error = DDI_SUCCESS; 190 return (DDI_WALK_TERMINATE); 191 } 192 } 193 194 return (DDI_WALK_CONTINUE); 195 } 196 197 static void 198 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu) 199 { 200 list_t *spclist = NULL; 201 immu_devi_t *immu_devi; 202 203 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_lock))); 204 205 immu_devi = IMMU_DEVI(dip); 206 if (immu_devi->imd_display == B_TRUE) { 207 spclist = &(immu->immu_dvma_gfx_list); 208 } else if (immu_devi->imd_lpc == B_TRUE) { 209 spclist = &(immu->immu_dvma_lpc_list); 210 } 211 212 if (spclist) { 213 mutex_enter(&(immu->immu_lock)); 214 list_insert_head(spclist, immu_devi); 215 mutex_exit(&(immu->immu_lock)); 216 } 217 } 218 219 /* 220 * Set the immu_devi struct in the immu_devi field of a devinfo node 221 */ 222 int 223 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags) 224 { 225 int bus, dev, func; 226 immu_devi_t *new_imd; 227 immu_devi_t *immu_devi; 228 229 ASSERT(root_devinfo); 230 ASSERT(dip); 231 ASSERT(dip != root_devinfo); 232 233 immu_devi = immu_devi_get(dip); 234 if (immu_devi != NULL) { 235 return (DDI_SUCCESS); 236 } 237 238 bus = dev = func = -1; 239 240 /* 241 * Assume a new immu_devi struct is needed 242 */ 243 if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) { 244 /* 245 * No BDF. Set bus = -1 to indicate this. 246 * We still need to create a immu_devi struct 247 * though 248 */ 249 bus = -1; 250 dev = 0; 251 func = 0; 252 } 253 254 new_imd = create_immu_devi(dip, bus, dev, func, immu_flags); 255 if (new_imd == NULL) { 256 ddi_err(DER_WARN, dip, "Failed to create immu_devi " 257 "structure"); 258 return (DDI_FAILURE); 259 } 260 261 /* 262 * Check if some other thread allocated a immu_devi while we 263 * didn't own the lock. 264 */ 265 mutex_enter(&(DEVI(dip)->devi_lock)); 266 if (IMMU_DEVI(dip) == NULL) { 267 IMMU_DEVI_SET(dip, new_imd); 268 } else { 269 destroy_immu_devi(new_imd); 270 } 271 mutex_exit(&(DEVI(dip)->devi_lock)); 272 273 return (DDI_SUCCESS); 274 } 275 276 static dev_info_t * 277 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags) 278 { 279 dvma_arg_t dvarg = {0}; 280 dvarg.dva_list = &(immu->immu_dvma_lpc_list); 281 dvarg.dva_rdip = rdip; 282 dvarg.dva_error = DDI_FAILURE; 283 284 if (immu_walk_ancestor(rdip, NULL, match_lpc, 285 &dvarg, NULL, immu_flags) != DDI_SUCCESS) { 286 ddi_err(DER_MODE, rdip, "Could not walk ancestors to " 287 "find lpc_devinfo for ISA device"); 288 return (NULL); 289 } 290 291 if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) { 292 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for " 293 "ISA device"); 294 return (NULL); 295 } 296 297 return (dvarg.dva_ddip); 298 } 299 300 static dev_info_t * 301 get_gfx_devinfo(dev_info_t *rdip) 302 { 303 immu_t *immu; 304 immu_devi_t *immu_devi; 305 list_t *list_gfx; 306 307 /* 308 * The GFX device may not be on the same IMMU unit as "agpgart" 309 * so search globally 310 */ 311 immu_devi = NULL; 312 immu = list_head(&immu_list); 313 for (; immu; immu = list_next(&immu_list, immu)) { 314 list_gfx = &(immu->immu_dvma_gfx_list); 315 if (!list_is_empty(list_gfx)) { 316 immu_devi = list_head(list_gfx); 317 break; 318 } 319 } 320 321 if (immu_devi == NULL) { 322 ddi_err(DER_WARN, rdip, "IMMU: No GFX device. " 323 "Cannot redirect agpgart"); 324 return (NULL); 325 } 326 327 /* list is not empty we checked above */ 328 ASSERT(immu_devi); 329 ASSERT(immu_devi->imd_dip); 330 331 ddi_err(DER_LOG, rdip, "IMMU: GFX redirect to %s", 332 ddi_node_name(immu_devi->imd_dip)); 333 334 return (immu_devi->imd_dip); 335 } 336 337 static immu_flags_t 338 dma_to_immu_flags(struct ddi_dma_req *dmareq) 339 { 340 immu_flags_t flags = 0; 341 342 if (dmareq->dmar_fp == DDI_DMA_SLEEP) { 343 flags |= IMMU_FLAGS_SLEEP; 344 } else { 345 flags |= IMMU_FLAGS_NOSLEEP; 346 } 347 348 #ifdef BUGGY_DRIVERS 349 350 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 351 352 #else 353 /* 354 * Read and write flags need to be reversed. 355 * DMA_READ means read from device and write 356 * to memory. So DMA read means DVMA write. 357 */ 358 if (dmareq->dmar_flags & DDI_DMA_READ) 359 flags |= IMMU_FLAGS_WRITE; 360 361 if (dmareq->dmar_flags & DDI_DMA_WRITE) 362 flags |= IMMU_FLAGS_READ; 363 364 /* 365 * Some buggy drivers specify neither READ or WRITE 366 * For such drivers set both read and write permissions 367 */ 368 if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) { 369 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 370 } 371 #endif 372 373 return (flags); 374 } 375 376 int 377 pgtable_ctor(void *buf, void *arg, int kmflag) 378 { 379 size_t actual_size = 0; 380 pgtable_t *pgtable; 381 int (*dmafp)(caddr_t); 382 caddr_t vaddr; 383 void *next; 384 385 ASSERT(buf); 386 ASSERT(arg == NULL); 387 388 pgtable = (pgtable_t *)buf; 389 390 dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP; 391 392 next = kmem_zalloc(IMMU_PAGESIZE, kmflag); 393 if (next == NULL) { 394 return (-1); 395 } 396 397 ASSERT(root_devinfo); 398 if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr, 399 dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) { 400 kmem_free(next, IMMU_PAGESIZE); 401 return (-1); 402 } 403 404 if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE, 405 &immu_acc_attr, DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED, 406 dmafp, NULL, &vaddr, &actual_size, 407 &pgtable->hwpg_memhdl) != DDI_SUCCESS) { 408 ddi_dma_free_handle(&pgtable->hwpg_dmahdl); 409 kmem_free(next, IMMU_PAGESIZE); 410 return (-1); 411 } 412 413 /* 414 * Memory allocation failure. Maybe a temporary condition 415 * so return error rather than panic, so we can try again 416 */ 417 if (actual_size < IMMU_PAGESIZE) { 418 ddi_dma_mem_free(&pgtable->hwpg_memhdl); 419 ddi_dma_free_handle(&pgtable->hwpg_dmahdl); 420 kmem_free(next, IMMU_PAGESIZE); 421 return (-1); 422 } 423 424 pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr)); 425 pgtable->hwpg_vaddr = vaddr; 426 pgtable->swpg_next_array = next; 427 428 rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL); 429 430 return (0); 431 } 432 433 void 434 pgtable_dtor(void *buf, void *arg) 435 { 436 pgtable_t *pgtable; 437 438 ASSERT(buf); 439 ASSERT(arg == NULL); 440 441 pgtable = (pgtable_t *)buf; 442 ASSERT(pgtable->swpg_next_array); 443 444 /* destroy will panic if lock is held. */ 445 rw_destroy(&(pgtable->swpg_rwlock)); 446 447 ddi_dma_mem_free(&pgtable->hwpg_memhdl); 448 ddi_dma_free_handle(&pgtable->hwpg_dmahdl); 449 kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE); 450 451 /* don't zero out hwpg_vaddr and swpg_next_array for debugging */ 452 } 453 454 /* 455 * pgtable_alloc() 456 * alloc a IOMMU pgtable structure. 457 * This same struct is used for root and context tables as well. 458 * This routine allocs the f/ollowing: 459 * - a pgtable_t struct 460 * - a HW page which holds PTEs/entries which is accesssed by HW 461 * so we set up DMA for this page 462 * - a SW page which is only for our bookeeping 463 * (for example to hold pointers to the next level pgtable). 464 * So a simple kmem_alloc suffices 465 */ 466 static pgtable_t * 467 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags) 468 { 469 pgtable_t *pgtable; 470 int kmflags; 471 472 ASSERT(immu); 473 474 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 475 476 pgtable = kmem_cache_alloc(immu_pgtable_cache, kmflags); 477 if (pgtable == NULL) { 478 return (NULL); 479 } 480 return (pgtable); 481 } 482 483 static void 484 pgtable_zero(immu_t *immu, pgtable_t *pgtable) 485 { 486 bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE); 487 bzero(pgtable->swpg_next_array, IMMU_PAGESIZE); 488 489 /* Dont need to flush the write we will flush when we use the entry */ 490 immu_regs_cpu_flush(immu, pgtable->hwpg_vaddr, IMMU_PAGESIZE); 491 } 492 493 static void 494 pgtable_free(immu_t *immu, pgtable_t *pgtable) 495 { 496 ASSERT(immu); 497 ASSERT(pgtable); 498 499 kmem_cache_free(immu_pgtable_cache, pgtable); 500 } 501 502 /* 503 * Function to identify a display device from the PCI class code 504 */ 505 static boolean_t 506 device_is_display(uint_t classcode) 507 { 508 static uint_t disp_classes[] = { 509 0x000100, 510 0x030000, 511 0x030001 512 }; 513 int i, nclasses = sizeof (disp_classes) / sizeof (uint_t); 514 515 for (i = 0; i < nclasses; i++) { 516 if (classcode == disp_classes[i]) 517 return (B_TRUE); 518 } 519 return (B_FALSE); 520 } 521 522 /* 523 * Function that determines if device is PCIEX and/or PCIEX bridge 524 */ 525 static boolean_t 526 device_is_pciex( 527 uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib) 528 { 529 ushort_t cap; 530 ushort_t capsp; 531 ushort_t cap_count = PCI_CAP_MAX_PTR; 532 ushort_t status; 533 boolean_t is_pciex = B_FALSE; 534 535 *is_pcib = B_FALSE; 536 537 status = pci_getw_func(bus, dev, func, PCI_CONF_STAT); 538 if (!(status & PCI_STAT_CAP)) 539 return (B_FALSE); 540 541 capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR); 542 while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) { 543 capsp &= PCI_CAP_PTR_MASK; 544 cap = pci_getb_func(bus, dev, func, capsp); 545 546 if (cap == PCI_CAP_ID_PCI_E) { 547 status = pci_getw_func(bus, dev, func, capsp + 2); 548 /* 549 * See section 7.8.2 of PCI-Express Base Spec v1.0a 550 * for Device/Port Type. 551 * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the 552 * device is a PCIE2PCI bridge 553 */ 554 *is_pcib = 555 ((status & PCIE_PCIECAP_DEV_TYPE_MASK) == 556 PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE; 557 is_pciex = B_TRUE; 558 } 559 560 capsp = (*pci_getb_func)(bus, dev, func, 561 capsp + PCI_CAP_NEXT_PTR); 562 } 563 564 return (is_pciex); 565 } 566 567 568 /* 569 * immu_dvma_get_immu() 570 * get the immu unit structure for a dev_info node 571 */ 572 immu_t * 573 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags) 574 { 575 immu_devi_t *immu_devi; 576 immu_t *immu; 577 578 /* 579 * check if immu unit was already found earlier. 580 * If yes, then it will be stashed in immu_devi struct. 581 */ 582 immu_devi = immu_devi_get(dip); 583 if (immu_devi == NULL) { 584 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) { 585 /* 586 * May fail because of low memory. Return error rather 587 * than panic as we want driver to rey again later 588 */ 589 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: " 590 "No immu_devi structure"); 591 /*NOTREACHED*/ 592 } 593 immu_devi = immu_devi_get(dip); 594 ASSERT(immu_devi); 595 } 596 597 mutex_enter(&(DEVI(dip)->devi_lock)); 598 if (immu_devi->imd_immu) { 599 immu = immu_devi->imd_immu; 600 mutex_exit(&(DEVI(dip)->devi_lock)); 601 return (immu); 602 } 603 mutex_exit(&(DEVI(dip)->devi_lock)); 604 605 immu = immu_dmar_get_immu(dip); 606 if (immu == NULL) { 607 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: " 608 "Cannot find immu_t for device"); 609 /*NOTREACHED*/ 610 } 611 612 /* 613 * Check if some other thread found immu 614 * while lock was not held 615 */ 616 immu_devi = immu_devi_get(dip); 617 /* immu_devi should be present as we found it earlier */ 618 if (immu_devi == NULL) { 619 ddi_err(DER_PANIC, dip, 620 "immu_dvma_get_immu: No immu_devi structure"); 621 /*NOTREACHED*/ 622 } 623 624 mutex_enter(&(DEVI(dip)->devi_lock)); 625 if (immu_devi->imd_immu == NULL) { 626 /* nobody else set it, so we should do it */ 627 immu_devi->imd_immu = immu; 628 immu_devi_set_spclist(dip, immu); 629 } else { 630 /* 631 * if some other thread got immu before 632 * us, it should get the same results 633 */ 634 if (immu_devi->imd_immu != immu) { 635 ddi_err(DER_PANIC, dip, "Multiple " 636 "immu units found for device. Expected (%p), " 637 "actual (%p)", (void *)immu, 638 (void *)immu_devi->imd_immu); 639 mutex_exit(&(DEVI(dip)->devi_lock)); 640 /*NOTREACHED*/ 641 } 642 } 643 mutex_exit(&(DEVI(dip)->devi_lock)); 644 645 return (immu); 646 } 647 648 649 /* ############################# IMMU_DEVI code ############################ */ 650 651 /* 652 * Allocate a immu_devi structure and initialize it 653 */ 654 static immu_devi_t * 655 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func, 656 immu_flags_t immu_flags) 657 { 658 uchar_t baseclass, subclass; 659 uint_t classcode, revclass; 660 immu_devi_t *immu_devi; 661 boolean_t pciex = B_FALSE; 662 int kmflags; 663 boolean_t is_pcib = B_FALSE; 664 665 /* bus == -1 indicate non-PCI device (no BDF) */ 666 ASSERT(bus == -1 || bus >= 0); 667 ASSERT(dev >= 0); 668 ASSERT(func >= 0); 669 670 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 671 immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags); 672 if (immu_devi == NULL) { 673 ddi_err(DER_WARN, rdip, "Failed to allocate memory for " 674 "Intel IOMMU immu_devi structure"); 675 return (NULL); 676 } 677 immu_devi->imd_dip = rdip; 678 immu_devi->imd_seg = 0; /* Currently seg can only be 0 */ 679 immu_devi->imd_bus = bus; 680 immu_devi->imd_pcib_type = IMMU_PCIB_BAD; 681 682 if (bus == -1) { 683 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF; 684 return (immu_devi); 685 } 686 687 immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func); 688 immu_devi->imd_sec = 0; 689 immu_devi->imd_sub = 0; 690 691 revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID); 692 693 classcode = IMMU_PCI_REV2CLASS(revclass); 694 baseclass = IMMU_PCI_CLASS2BASE(classcode); 695 subclass = IMMU_PCI_CLASS2SUB(classcode); 696 697 if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) { 698 699 immu_devi->imd_sec = pci_getb_func(bus, dev, func, 700 PCI_BCNF_SECBUS); 701 immu_devi->imd_sub = pci_getb_func(bus, dev, func, 702 PCI_BCNF_SUBBUS); 703 704 pciex = device_is_pciex(bus, dev, func, &is_pcib); 705 if (pciex == B_TRUE && is_pcib == B_TRUE) { 706 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI; 707 } else if (pciex == B_TRUE) { 708 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE; 709 } else { 710 immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI; 711 } 712 } else { 713 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT; 714 } 715 716 /* check for certain special devices */ 717 immu_devi->imd_display = device_is_display(classcode); 718 719 immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) && 720 (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE; 721 722 immu_devi->imd_domain = NULL; 723 724 return (immu_devi); 725 } 726 727 static void 728 destroy_immu_devi(immu_devi_t *immu_devi) 729 { 730 kmem_free(immu_devi, sizeof (immu_devi_t)); 731 } 732 733 static domain_t * 734 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp) 735 { 736 immu_devi_t *immu_devi; 737 domain_t *domain; 738 dev_info_t *ddip; 739 740 ASSERT(rdip); 741 ASSERT(ddipp); 742 743 *ddipp = NULL; 744 745 immu_devi = immu_devi_get(rdip); 746 if (immu_devi == NULL) { 747 return (NULL); 748 } 749 750 mutex_enter(&(DEVI(rdip)->devi_lock)); 751 domain = immu_devi->imd_domain; 752 ddip = immu_devi->imd_ddip; 753 mutex_exit(&(DEVI(rdip)->devi_lock)); 754 755 if (domain) { 756 ASSERT(domain->dom_did > 0); 757 ASSERT(ddip); 758 *ddipp = ddip; 759 } 760 761 return (domain); 762 763 } 764 765 /* ############################# END IMMU_DEVI code ######################## */ 766 /* ############################# DOMAIN code ############################### */ 767 768 /* 769 * This routine always succeeds 770 */ 771 static int 772 did_alloc(immu_t *immu, dev_info_t *rdip, 773 dev_info_t *ddip, immu_flags_t immu_flags) 774 { 775 int did; 776 777 ASSERT(immu); 778 ASSERT(rdip); 779 ASSERT(rdip != root_devinfo); 780 781 did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1, 782 (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP); 783 784 if (did == 0) { 785 ASSERT(immu->immu_unity_domain); 786 ASSERT(immu->immu_unity_domain->dom_did > 0); 787 ddi_err(DER_WARN, rdip, "device domain-id alloc error" 788 " domain-device: %s%d. immu unit is %s. Using " 789 "unity domain with domain-id (%d)", 790 ddi_driver_name(ddip), ddi_get_instance(ddip), 791 immu->immu_name, immu->immu_unity_domain->dom_did); 792 did = immu->immu_unity_domain->dom_did; 793 } 794 795 return (did); 796 } 797 798 static int 799 get_branch_domain(dev_info_t *pdip, void *arg) 800 { 801 immu_devi_t *immu_devi; 802 domain_t *domain; 803 dev_info_t *ddip; 804 immu_t *immu; 805 dvma_arg_t *dvp = (dvma_arg_t *)arg; 806 807 ASSERT(pdip); 808 ASSERT(dvp); 809 ASSERT(dvp->dva_rdip); 810 811 /* 812 * The field dvp->dva_rdip is a work-in-progress 813 * and gets updated as we walk up the ancestor 814 * tree. The final ddip is set only when we reach 815 * the top of the tree. So the dvp->dva_ddip field cannot 816 * be relied on until we reach the top of the field. 817 */ 818 819 /* immu_devi may not be set. */ 820 immu_devi = immu_devi_get(pdip); 821 if (immu_devi == NULL) { 822 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) { 823 dvp->dva_error = DDI_FAILURE; 824 return (DDI_WALK_TERMINATE); 825 } 826 } 827 828 immu_devi = immu_devi_get(pdip); 829 ASSERT(immu_devi); 830 immu = immu_devi->imd_immu; 831 if (immu == NULL) { 832 immu = immu_dvma_get_immu(pdip, dvp->dva_flags); 833 ASSERT(immu); 834 } 835 836 /* 837 * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to 838 * terminate the walk (since the device under the PCIE bridge 839 * is a PCIE device and has an independent entry in the 840 * root/context table) 841 */ 842 if (dvp->dva_rdip != pdip && 843 immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) { 844 return (DDI_WALK_TERMINATE); 845 } 846 847 /* 848 * In order to be a domain-dim, it must be a PCI device i.e. 849 * must have valid BDF. This also eliminates the root complex. 850 */ 851 if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD && 852 immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) { 853 ASSERT(immu_devi->imd_bus >= 0); 854 ASSERT(immu_devi->imd_devfunc >= 0); 855 dvp->dva_ddip = pdip; 856 } 857 858 if (immu_devi->imd_display == B_TRUE || 859 (dvp->dva_flags & IMMU_FLAGS_UNITY)) { 860 dvp->dva_domain = immu->immu_unity_domain; 861 /* continue walking to find ddip */ 862 return (DDI_WALK_CONTINUE); 863 } 864 865 mutex_enter(&(DEVI(pdip)->devi_lock)); 866 domain = immu_devi->imd_domain; 867 ddip = immu_devi->imd_ddip; 868 mutex_exit(&(DEVI(pdip)->devi_lock)); 869 870 if (domain && ddip) { 871 /* if domain is set, it must be the same */ 872 if (dvp->dva_domain) { 873 ASSERT(domain == dvp->dva_domain); 874 } 875 dvp->dva_domain = domain; 876 dvp->dva_ddip = ddip; 877 return (DDI_WALK_TERMINATE); 878 } 879 880 /* immu_devi either has both set or both clear */ 881 ASSERT(domain == NULL); 882 ASSERT(ddip == NULL); 883 884 /* Domain may already be set, continue walking so that ddip gets set */ 885 if (dvp->dva_domain) { 886 return (DDI_WALK_CONTINUE); 887 } 888 889 /* domain is not set in either immu_devi or dvp */ 890 domain = bdf_domain_lookup(immu_devi); 891 if (domain == NULL) { 892 return (DDI_WALK_CONTINUE); 893 } 894 895 /* ok, the BDF hash had a domain for this BDF. */ 896 897 /* Grab lock again to check if something else set immu_devi fields */ 898 mutex_enter(&(DEVI(pdip)->devi_lock)); 899 if (immu_devi->imd_domain != NULL) { 900 ASSERT(immu_devi->imd_domain == domain); 901 dvp->dva_domain = domain; 902 } else { 903 dvp->dva_domain = domain; 904 } 905 mutex_exit(&(DEVI(pdip)->devi_lock)); 906 907 /* 908 * walk upwards until the topmost PCI bridge is found 909 */ 910 return (DDI_WALK_CONTINUE); 911 912 } 913 914 static void 915 map_unity_domain(domain_t *domain) 916 { 917 struct memlist *mp; 918 uint64_t start; 919 uint64_t npages; 920 dcookie_t dcookies[1] = {0}; 921 int dcount = 0; 922 923 ASSERT(domain); 924 ASSERT(domain->dom_did == IMMU_UNITY_DID); 925 926 /* 927 * We call into routines that grab the lock so we should 928 * not be called with the lock held. This does not matter 929 * much since, no else has a reference to this domain 930 */ 931 ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock))); 932 933 /* 934 * UNITY arenas are a mirror of the physical memory 935 * installed on the system. 936 */ 937 938 #ifdef BUGGY_DRIVERS 939 /* 940 * Dont skip page0. Some broken HW/FW access it. 941 */ 942 dcookies[0].dck_paddr = 0; 943 dcookies[0].dck_npages = 1; 944 dcount = 1; 945 (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL, 946 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1); 947 #endif 948 949 memlist_read_lock(); 950 951 mp = phys_install; 952 953 if (mp->ml_address == 0) { 954 /* since we already mapped page1 above */ 955 start = IMMU_PAGESIZE; 956 } else { 957 start = mp->ml_address; 958 } 959 npages = mp->ml_size/IMMU_PAGESIZE + 1; 960 961 dcookies[0].dck_paddr = start; 962 dcookies[0].dck_npages = npages; 963 dcount = 1; 964 (void) dvma_map(domain->dom_immu, domain, start, npages, dcookies, 965 dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 966 967 ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 968 " - 0x%" PRIx64 "]", start, start + mp->ml_size); 969 970 mp = mp->ml_next; 971 while (mp) { 972 ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 973 " - 0x%" PRIx64 "]", mp->ml_address, 974 mp->ml_address + mp->ml_size); 975 976 start = mp->ml_address; 977 npages = mp->ml_size/IMMU_PAGESIZE + 1; 978 979 dcookies[0].dck_paddr = start; 980 dcookies[0].dck_npages = npages; 981 dcount = 1; 982 (void) dvma_map(domain->dom_immu, domain, start, npages, 983 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 984 mp = mp->ml_next; 985 } 986 987 mp = bios_rsvd; 988 while (mp) { 989 ddi_err(DER_LOG, NULL, "IMMU: mapping PHYS span [0x%" PRIx64 990 " - 0x%" PRIx64 "]", mp->ml_address, 991 mp->ml_address + mp->ml_size); 992 993 start = mp->ml_address; 994 npages = mp->ml_size/IMMU_PAGESIZE + 1; 995 996 dcookies[0].dck_paddr = start; 997 dcookies[0].dck_npages = npages; 998 dcount = 1; 999 (void) dvma_map(domain->dom_immu, domain, start, npages, 1000 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 1001 1002 mp = mp->ml_next; 1003 } 1004 1005 memlist_read_unlock(); 1006 } 1007 1008 /* 1009 * create_xlate_arena() 1010 * Create the dvma arena for a domain with translation 1011 * mapping 1012 */ 1013 static void 1014 create_xlate_arena(immu_t *immu, domain_t *domain, 1015 dev_info_t *rdip, immu_flags_t immu_flags) 1016 { 1017 char *arena_name; 1018 struct memlist *mp; 1019 int vmem_flags; 1020 uint64_t start; 1021 uint_t mgaw; 1022 uint64_t size; 1023 uint64_t maxaddr; 1024 void *vmem_ret; 1025 1026 arena_name = domain->dom_dvma_arena_name; 1027 1028 /* Note, don't do sizeof (arena_name) - it is just a pointer */ 1029 (void) snprintf(arena_name, 1030 sizeof (domain->dom_dvma_arena_name), 1031 "%s-domain-%d-xlate-DVMA-arena", immu->immu_name, 1032 domain->dom_did); 1033 1034 vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP; 1035 1036 /* 1037 * No one else has access to this domain. 1038 * So no domain locks needed 1039 */ 1040 ASSERT(!rw_lock_held(&(domain->dom_pgtable_rwlock))); 1041 1042 /* Restrict mgaddr (max guest addr) to MGAW */ 1043 mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap); 1044 1045 /* 1046 * To ensure we avoid ioapic and PCI MMIO ranges we just 1047 * use the physical memory address range of the system as the 1048 * range 1049 */ 1050 maxaddr = ((uint64_t)1 << mgaw); 1051 1052 memlist_read_lock(); 1053 1054 mp = phys_install; 1055 1056 if (mp->ml_address == 0) 1057 start = MMU_PAGESIZE; 1058 else 1059 start = mp->ml_address; 1060 1061 if (start + mp->ml_size > maxaddr) 1062 size = maxaddr - start; 1063 else 1064 size = mp->ml_size; 1065 1066 ddi_err(DER_VERB, rdip, 1067 "%s: Creating dvma vmem arena [0x%" PRIx64 1068 " - 0x%" PRIx64 "]", arena_name, start, start + size); 1069 1070 ASSERT(domain->dom_dvma_arena == NULL); 1071 1072 /* 1073 * We always allocate in quanta of IMMU_PAGESIZE 1074 */ 1075 domain->dom_dvma_arena = vmem_create(arena_name, 1076 (void *)(uintptr_t)start, /* start addr */ 1077 size, /* size */ 1078 IMMU_PAGESIZE, /* quantum */ 1079 NULL, /* afunc */ 1080 NULL, /* ffunc */ 1081 NULL, /* source */ 1082 0, /* qcache_max */ 1083 vmem_flags); 1084 1085 if (domain->dom_dvma_arena == NULL) { 1086 ddi_err(DER_PANIC, rdip, 1087 "Failed to allocate DVMA arena(%s) " 1088 "for domain ID (%d)", arena_name, domain->dom_did); 1089 /*NOTREACHED*/ 1090 } 1091 1092 mp = mp->ml_next; 1093 while (mp) { 1094 1095 if (mp->ml_address == 0) 1096 start = MMU_PAGESIZE; 1097 else 1098 start = mp->ml_address; 1099 1100 if (start + mp->ml_size > maxaddr) 1101 size = maxaddr - start; 1102 else 1103 size = mp->ml_size; 1104 1105 ddi_err(DER_VERB, rdip, 1106 "%s: Adding dvma vmem span [0x%" PRIx64 1107 " - 0x%" PRIx64 "]", arena_name, start, 1108 start + size); 1109 1110 vmem_ret = vmem_add(domain->dom_dvma_arena, 1111 (void *)(uintptr_t)start, size, vmem_flags); 1112 1113 if (vmem_ret == NULL) { 1114 ddi_err(DER_PANIC, rdip, 1115 "Failed to allocate DVMA arena(%s) " 1116 "for domain ID (%d)", 1117 arena_name, domain->dom_did); 1118 /*NOTREACHED*/ 1119 } 1120 mp = mp->ml_next; 1121 } 1122 memlist_read_unlock(); 1123 } 1124 1125 /* ################################### DOMAIN CODE ######################### */ 1126 1127 /* 1128 * Set the domain and domain-dip for a dip 1129 */ 1130 static void 1131 set_domain( 1132 dev_info_t *dip, 1133 dev_info_t *ddip, 1134 domain_t *domain) 1135 { 1136 immu_devi_t *immu_devi; 1137 domain_t *fdomain; 1138 dev_info_t *fddip; 1139 1140 ASSERT(dip); 1141 ASSERT(ddip); 1142 ASSERT(domain); 1143 ASSERT(domain->dom_did > 0); /* must be an initialized domain */ 1144 1145 immu_devi = immu_devi_get(dip); 1146 ASSERT(immu_devi); 1147 1148 mutex_enter(&(DEVI(dip)->devi_lock)); 1149 fddip = immu_devi->imd_ddip; 1150 fdomain = immu_devi->imd_domain; 1151 1152 if (fddip) { 1153 ASSERT(fddip == ddip); 1154 } else { 1155 immu_devi->imd_ddip = ddip; 1156 } 1157 1158 if (fdomain) { 1159 ASSERT(fdomain == domain); 1160 } else { 1161 immu_devi->imd_domain = domain; 1162 } 1163 mutex_exit(&(DEVI(dip)->devi_lock)); 1164 } 1165 1166 /* 1167 * device_domain() 1168 * Get domain for a device. The domain may be global in which case it 1169 * is shared between all IOMMU units. Due to potential AGAW differences 1170 * between IOMMU units, such global domains *have to be* UNITY mapping 1171 * domains. Alternatively, the domain may be local to a IOMMU unit. 1172 * Local domains may be shared or immu_devi, although the 1173 * scope of sharing 1174 * is restricted to devices controlled by the IOMMU unit to 1175 * which the domain 1176 * belongs. If shared, they (currently) have to be UNITY domains. If 1177 * immu_devi a domain may be either UNITY or translation (XLATE) domain. 1178 */ 1179 static domain_t * 1180 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags) 1181 { 1182 dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */ 1183 immu_t *immu; 1184 domain_t *domain; 1185 dvma_arg_t dvarg = {0}; 1186 int level; 1187 1188 ASSERT(rdip); 1189 1190 *ddipp = NULL; 1191 1192 /* 1193 * Check if the domain is already set. This is usually true 1194 * if this is not the first DVMA transaction. 1195 */ 1196 ddip = NULL; 1197 domain = immu_devi_domain(rdip, &ddip); 1198 if (domain) { 1199 ASSERT(domain->dom_did > 0); 1200 ASSERT(ddip); 1201 *ddipp = ddip; 1202 return (domain); 1203 } 1204 1205 immu = immu_dvma_get_immu(rdip, immu_flags); 1206 if (immu == NULL) { 1207 /* 1208 * possible that there is no IOMMU unit for this device 1209 * - BIOS bugs are one example. 1210 */ 1211 ddi_err(DER_WARN, rdip, "No IMMU unit found for device"); 1212 return (NULL); 1213 } 1214 1215 dvarg.dva_rdip = rdip; 1216 dvarg.dva_ddip = NULL; 1217 dvarg.dva_domain = NULL; 1218 dvarg.dva_flags = immu_flags; 1219 level = 0; 1220 if (immu_walk_ancestor(rdip, NULL, get_branch_domain, 1221 &dvarg, &level, immu_flags) != DDI_SUCCESS) { 1222 /* 1223 * maybe low memory. return error, 1224 * so driver tries again later 1225 */ 1226 return (NULL); 1227 } 1228 1229 /* should have walked at least 1 dip (i.e. edip) */ 1230 ASSERT(level > 0); 1231 1232 ddip = dvarg.dva_ddip; /* must be present */ 1233 domain = dvarg.dva_domain; /* may be NULL */ 1234 1235 /* 1236 * We may find the domain during our ancestor walk on any one of our 1237 * ancestor dips, If the domain is found then the domain-dip 1238 * (i.e. ddip) will also be found in the same immu_devi struct. 1239 * The domain-dip is the highest ancestor dip which shares the 1240 * same domain with edip. 1241 * The domain may or may not be found, but the domain dip must 1242 * be found. 1243 */ 1244 if (ddip == NULL) { 1245 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device."); 1246 return (NULL); 1247 } 1248 1249 /* 1250 * Did we find a domain ? 1251 */ 1252 if (domain) { 1253 goto found; 1254 } 1255 1256 /* nope, so allocate */ 1257 domain = domain_create(immu, ddip, rdip, immu_flags); 1258 if (domain == NULL) { 1259 return (NULL); 1260 } 1261 ASSERT(domain->dom_did > 0); 1262 1263 /*FALLTHROUGH*/ 1264 found: 1265 /* 1266 * We know *domain *is* the right domain, so panic if 1267 * another domain is set for either the request-dip or 1268 * effective dip. 1269 */ 1270 set_domain(ddip, ddip, domain); 1271 set_domain(rdip, ddip, domain); 1272 1273 *ddipp = ddip; 1274 return (domain); 1275 } 1276 1277 static void 1278 create_unity_domain(immu_t *immu) 1279 { 1280 domain_t *domain; 1281 1282 /* 0 is reserved by Vt-d */ 1283 /*LINTED*/ 1284 ASSERT(IMMU_UNITY_DID > 0); 1285 1286 /* domain created during boot and always use sleep flag */ 1287 domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP); 1288 1289 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL); 1290 1291 domain->dom_did = IMMU_UNITY_DID; 1292 domain->dom_maptype = IMMU_MAPTYPE_UNITY; 1293 1294 domain->dom_immu = immu; 1295 immu->immu_unity_domain = domain; 1296 1297 /* 1298 * Setup the domain's initial page table 1299 * should never fail. 1300 */ 1301 domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP); 1302 ASSERT(domain->dom_pgtable_root); 1303 pgtable_zero(immu, domain->dom_pgtable_root); 1304 1305 map_unity_domain(domain); 1306 1307 /* 1308 * put it on the system-wide UNITY domain list 1309 */ 1310 mutex_enter(&(immu_domain_lock)); 1311 list_insert_tail(&immu_unity_domain_list, domain); 1312 mutex_exit(&(immu_domain_lock)); 1313 } 1314 1315 /* 1316 * ddip is the domain-dip - the topmost dip in a domain 1317 * rdip is the requesting-dip - the device which is 1318 * requesting DVMA setup 1319 * if domain is a non-shared domain rdip == ddip 1320 */ 1321 static domain_t * 1322 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip, 1323 immu_flags_t immu_flags) 1324 { 1325 int kmflags; 1326 domain_t *domain; 1327 char mod_hash_name[128]; 1328 immu_devi_t *immu_devi; 1329 int did; 1330 dcookie_t dcookies[1] = {0}; 1331 int dcount = 0; 1332 1333 ASSERT(immu); 1334 ASSERT(ddip); 1335 1336 immu_devi = immu_devi_get(rdip); 1337 1338 ASSERT(immu_devi); 1339 1340 /* 1341 * First allocate a domainid. 1342 * This routine will never fail, since if we run out 1343 * of domains the unity domain will be allocated. 1344 */ 1345 did = did_alloc(immu, rdip, ddip, immu_flags); 1346 ASSERT(did > 0); 1347 if (did == IMMU_UNITY_DID) { 1348 /* domain overflow */ 1349 ASSERT(immu->immu_unity_domain); 1350 return (immu->immu_unity_domain); 1351 } 1352 1353 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 1354 domain = kmem_zalloc(sizeof (domain_t), kmflags); 1355 if (domain == NULL) { 1356 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain " 1357 "structure for device. IOMMU unit: %s", immu->immu_name); 1358 /*NOTREACHED*/ 1359 } 1360 1361 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL); 1362 1363 (void) snprintf(mod_hash_name, sizeof (mod_hash_name), 1364 "immu%s-domain%d-pava-hash", immu->immu_name, did); 1365 1366 domain->dom_did = did; 1367 domain->dom_immu = immu; 1368 domain->dom_maptype = IMMU_MAPTYPE_XLATE; 1369 1370 /* 1371 * Create xlate DVMA arena for this domain. 1372 */ 1373 create_xlate_arena(immu, domain, rdip, immu_flags); 1374 1375 /* 1376 * Setup the domain's initial page table 1377 */ 1378 domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags); 1379 if (domain->dom_pgtable_root == NULL) { 1380 ddi_err(DER_PANIC, rdip, "Failed to alloc root " 1381 "pgtable for domain (%d). IOMMU unit: %s", 1382 domain->dom_did, immu->immu_name); 1383 /*NOTREACHED*/ 1384 } 1385 pgtable_zero(immu, domain->dom_pgtable_root); 1386 1387 /* 1388 * Since this is a immu unit-specific domain, put it on 1389 * the per-immu domain list. 1390 */ 1391 mutex_enter(&(immu->immu_lock)); 1392 list_insert_head(&immu->immu_domain_list, domain); 1393 mutex_exit(&(immu->immu_lock)); 1394 1395 /* 1396 * Also put it on the system-wide xlate domain list 1397 */ 1398 mutex_enter(&(immu_domain_lock)); 1399 list_insert_head(&immu_xlate_domain_list, domain); 1400 mutex_exit(&(immu_domain_lock)); 1401 1402 bdf_domain_insert(immu_devi, domain); 1403 1404 #ifdef BUGGY_DRIVERS 1405 /* 1406 * Map page0. Some broken HW/FW access it. 1407 */ 1408 dcookies[0].dck_paddr = 0; 1409 dcookies[0].dck_npages = 1; 1410 dcount = 1; 1411 (void) dvma_map(domain->dom_immu, domain, 0, 1, dcookies, dcount, NULL, 1412 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1); 1413 #endif 1414 return (domain); 1415 } 1416 1417 /* 1418 * Create domainid arena. 1419 * Domainid 0 is reserved by Vt-d spec and cannot be used by 1420 * system software. 1421 * Domainid 1 is reserved by solaris and used for *all* of the following: 1422 * as the "uninitialized" domain - For devices not yet controlled 1423 * by Solaris 1424 * as the "unity" domain - For devices that will always belong 1425 * to the unity domain 1426 * as the "overflow" domain - Used for any new device after we 1427 * run out of domains 1428 * All of the above domains map into a single domain with 1429 * domainid 1 and UNITY DVMA mapping 1430 * Each IMMU unity has its own unity/uninit/overflow domain 1431 */ 1432 static void 1433 did_init(immu_t *immu) 1434 { 1435 (void) snprintf(immu->immu_did_arena_name, 1436 sizeof (immu->immu_did_arena_name), 1437 "%s_domainid_arena", immu->immu_name); 1438 1439 ddi_err(DER_VERB, NULL, "%s: Creating domainid arena %s", 1440 immu->immu_name, immu->immu_did_arena_name); 1441 1442 immu->immu_did_arena = vmem_create( 1443 immu->immu_did_arena_name, 1444 (void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */ 1445 immu->immu_max_domains - IMMU_UNITY_DID, 1446 1, /* quantum */ 1447 NULL, /* afunc */ 1448 NULL, /* ffunc */ 1449 NULL, /* source */ 1450 0, /* qcache_max */ 1451 VM_SLEEP); 1452 1453 /* Even with SLEEP flag, vmem_create() can fail */ 1454 if (immu->immu_did_arena == NULL) { 1455 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel " 1456 "IOMMU domainid allocator: %s", immu->immu_name, 1457 immu->immu_did_arena_name); 1458 } 1459 } 1460 1461 /* ######################### CONTEXT CODE ################################# */ 1462 1463 static void 1464 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table, 1465 int bus, int devfunc) 1466 { 1467 pgtable_t *context; 1468 pgtable_t *pgtable_root; 1469 pgtable_t *unity_pgtable_root; 1470 hw_rce_t *hw_rent; 1471 hw_rce_t *hw_cent; 1472 hw_rce_t *ctxp; 1473 int sid; 1474 krw_t rwtype; 1475 boolean_t fill_root; 1476 boolean_t fill_ctx; 1477 1478 ASSERT(immu); 1479 ASSERT(domain); 1480 ASSERT(root_table); 1481 ASSERT(bus >= 0); 1482 ASSERT(devfunc >= 0); 1483 ASSERT(domain->dom_pgtable_root); 1484 1485 pgtable_root = domain->dom_pgtable_root; 1486 1487 ctxp = (hw_rce_t *)(root_table->swpg_next_array); 1488 context = *(pgtable_t **)(ctxp + bus); 1489 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus; 1490 1491 fill_root = B_FALSE; 1492 fill_ctx = B_FALSE; 1493 1494 /* Check the most common case first with reader lock */ 1495 rw_enter(&(immu->immu_ctx_rwlock), RW_READER); 1496 rwtype = RW_READER; 1497 again: 1498 if (ROOT_GET_P(hw_rent)) { 1499 ASSERT(ROOT_GET_CONT(hw_rent) == context->hwpg_paddr); 1500 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc; 1501 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) { 1502 ASSERT(CONT_GET_P(hw_cent)); 1503 ASSERT(CONT_GET_DID(hw_cent) == domain->dom_did); 1504 ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw); 1505 ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY); 1506 ASSERT(CONT_GET_ASR(hw_cent) == 1507 pgtable_root->hwpg_paddr); 1508 rw_exit(&(immu->immu_ctx_rwlock)); 1509 return; 1510 } else { 1511 fill_ctx = B_TRUE; 1512 } 1513 } else { 1514 fill_root = B_TRUE; 1515 fill_ctx = B_TRUE; 1516 } 1517 1518 if (rwtype == RW_READER && 1519 rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) { 1520 rw_exit(&(immu->immu_ctx_rwlock)); 1521 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); 1522 rwtype = RW_WRITER; 1523 goto again; 1524 } 1525 rwtype = RW_WRITER; 1526 1527 if (fill_root == B_TRUE) { 1528 ROOT_SET_CONT(hw_rent, context->hwpg_paddr); 1529 ROOT_SET_P(hw_rent); 1530 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t)); 1531 } 1532 1533 if (fill_ctx == B_TRUE) { 1534 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc; 1535 unity_pgtable_root = immu->immu_unity_domain->dom_pgtable_root; 1536 ASSERT(CONT_GET_AVAIL(hw_cent) == IMMU_CONT_UNINITED); 1537 ASSERT(CONT_GET_P(hw_cent)); 1538 ASSERT(CONT_GET_DID(hw_cent) == 1539 immu->immu_unity_domain->dom_did); 1540 ASSERT(CONT_GET_AW(hw_cent) == immu->immu_dvma_agaw); 1541 ASSERT(CONT_GET_TTYPE(hw_cent) == TTYPE_XLATE_ONLY); 1542 ASSERT(CONT_GET_ASR(hw_cent) == 1543 unity_pgtable_root->hwpg_paddr); 1544 1545 /* need to disable context entry before reprogramming it */ 1546 bzero(hw_cent, sizeof (hw_rce_t)); 1547 1548 /* flush caches */ 1549 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t)); 1550 ASSERT(rw_write_held(&(immu->immu_ctx_rwlock))); 1551 1552 sid = ((bus << 8) | devfunc); 1553 immu_regs_context_flush(immu, 0, sid, domain->dom_did, 1554 CONTEXT_FSI); 1555 1556 immu_regs_wbf_flush(immu); 1557 1558 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED); 1559 CONT_SET_DID(hw_cent, domain->dom_did); 1560 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw); 1561 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr); 1562 /*LINTED*/ 1563 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY); 1564 CONT_SET_P(hw_cent); 1565 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t)); 1566 } 1567 rw_exit(&(immu->immu_ctx_rwlock)); 1568 } 1569 1570 static pgtable_t * 1571 context_create(immu_t *immu) 1572 { 1573 int bus; 1574 int devfunc; 1575 pgtable_t *root_table; 1576 pgtable_t *context; 1577 pgtable_t *pgtable_root; 1578 hw_rce_t *ctxp; 1579 hw_rce_t *hw_rent; 1580 hw_rce_t *hw_cent; 1581 1582 /* Allocate a zeroed root table (4K 256b entries) */ 1583 root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP); 1584 pgtable_zero(immu, root_table); 1585 1586 /* 1587 * Setup context tables for all possible root table entries. 1588 * Start out with unity domains for all entries. 1589 */ 1590 ctxp = (hw_rce_t *)(root_table->swpg_next_array); 1591 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr); 1592 for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) { 1593 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP); 1594 pgtable_zero(immu, context); 1595 ASSERT(ROOT_GET_P(hw_rent) == 0); 1596 ROOT_SET_P(hw_rent); 1597 ROOT_SET_CONT(hw_rent, context->hwpg_paddr); 1598 hw_cent = (hw_rce_t *)(context->hwpg_vaddr); 1599 for (devfunc = 0; devfunc < IMMU_CONT_NUM; 1600 devfunc++, hw_cent++) { 1601 ASSERT(CONT_GET_P(hw_cent) == 0); 1602 pgtable_root = 1603 immu->immu_unity_domain->dom_pgtable_root; 1604 CONT_SET_DID(hw_cent, 1605 immu->immu_unity_domain->dom_did); 1606 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw); 1607 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr); 1608 /*LINTED*/ 1609 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY); 1610 CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED); 1611 CONT_SET_P(hw_cent); 1612 } 1613 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE); 1614 *((pgtable_t **)ctxp) = context; 1615 } 1616 immu_regs_cpu_flush(immu, root_table->hwpg_vaddr, IMMU_PAGESIZE); 1617 1618 return (root_table); 1619 } 1620 1621 /* 1622 * Called during rootnex attach, so no locks needed 1623 */ 1624 static void 1625 context_init(immu_t *immu) 1626 { 1627 ASSERT(immu); 1628 ASSERT(immu->immu_ctx_root == NULL); 1629 1630 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL); 1631 1632 immu_regs_wbf_flush(immu); 1633 1634 immu->immu_ctx_root = context_create(immu); 1635 1636 immu_regs_set_root_table(immu); 1637 1638 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); 1639 immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL); 1640 rw_exit(&(immu->immu_ctx_rwlock)); 1641 immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL); 1642 immu_regs_wbf_flush(immu); 1643 } 1644 1645 1646 /* 1647 * Find top pcib 1648 */ 1649 static int 1650 find_top_pcib(dev_info_t *dip, void *arg) 1651 { 1652 immu_devi_t *immu_devi; 1653 dev_info_t **pcibdipp = (dev_info_t **)arg; 1654 1655 ASSERT(dip); 1656 1657 immu_devi = immu_devi_get(dip); 1658 ASSERT(immu_devi); 1659 1660 if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) { 1661 *pcibdipp = dip; 1662 } 1663 1664 return (DDI_WALK_CONTINUE); 1665 } 1666 1667 static int 1668 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip, 1669 dev_info_t *rdip, immu_flags_t immu_flags) 1670 { 1671 immu_devi_t *r_immu_devi; 1672 immu_devi_t *d_immu_devi; 1673 int r_bus; 1674 int d_bus; 1675 int r_devfunc; 1676 int d_devfunc; 1677 immu_pcib_t d_pcib_type; 1678 immu_pcib_t r_pcib_type; 1679 dev_info_t *pcibdip; 1680 1681 if (ddip == NULL || rdip == NULL || 1682 ddip == root_devinfo || rdip == root_devinfo) { 1683 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or " 1684 "request-dip are NULL or are root devinfo"); 1685 return (DDI_FAILURE); 1686 } 1687 1688 /* 1689 * We need to set the context fields 1690 * based on what type of device rdip and ddip are. 1691 * To do that we need the immu_devi field. 1692 * Set the immu_devi field (if not already set) 1693 */ 1694 if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) { 1695 ddi_err(DER_MODE, rdip, 1696 "immu_context_update: failed to set immu_devi for ddip"); 1697 return (DDI_FAILURE); 1698 } 1699 1700 if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) { 1701 ddi_err(DER_MODE, rdip, 1702 "immu_context_update: failed to set immu_devi for rdip"); 1703 return (DDI_FAILURE); 1704 } 1705 1706 d_immu_devi = immu_devi_get(ddip); 1707 r_immu_devi = immu_devi_get(rdip); 1708 ASSERT(r_immu_devi); 1709 ASSERT(d_immu_devi); 1710 1711 d_bus = d_immu_devi->imd_bus; 1712 d_devfunc = d_immu_devi->imd_devfunc; 1713 d_pcib_type = d_immu_devi->imd_pcib_type; 1714 r_bus = r_immu_devi->imd_bus; 1715 r_devfunc = r_immu_devi->imd_devfunc; 1716 r_pcib_type = r_immu_devi->imd_pcib_type; 1717 1718 ASSERT(d_bus >= 0); 1719 1720 if (rdip == ddip) { 1721 ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT || 1722 d_pcib_type == IMMU_PCIB_PCIE_PCIE); 1723 ASSERT(r_bus >= 0); 1724 ASSERT(r_devfunc >= 0); 1725 /* rdip is a PCIE device. set context for it only */ 1726 context_set(immu, domain, immu->immu_ctx_root, r_bus, 1727 r_devfunc); 1728 #ifdef BUGGY_DRIVERS 1729 } else if (r_immu_devi == d_immu_devi) { 1730 #ifdef TEST 1731 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and " 1732 "0x%lx are identical", rdip, ddip); 1733 #endif 1734 ASSERT(d_pcib_type == IMMU_PCIB_ENDPOINT); 1735 ASSERT(r_bus >= 0); 1736 ASSERT(r_devfunc >= 0); 1737 /* rdip is a PCIE device. set context for it only */ 1738 context_set(immu, domain, immu->immu_ctx_root, r_bus, 1739 r_devfunc); 1740 #endif 1741 } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) { 1742 /* 1743 * ddip is a PCIE_PCI bridge. Set context for ddip's 1744 * secondary bus. If rdip is on ddip's secondary 1745 * bus, set context for rdip. Else, set context 1746 * for rdip's PCI bridge on ddip's secondary bus. 1747 */ 1748 context_set(immu, domain, immu->immu_ctx_root, 1749 d_immu_devi->imd_sec, 0); 1750 if (d_immu_devi->imd_sec == r_bus) { 1751 context_set(immu, domain, immu->immu_ctx_root, 1752 r_bus, r_devfunc); 1753 } else { 1754 pcibdip = NULL; 1755 if (immu_walk_ancestor(rdip, ddip, find_top_pcib, 1756 &pcibdip, NULL, immu_flags) == DDI_SUCCESS && 1757 pcibdip != NULL) { 1758 ASSERT(pcibdip); 1759 r_immu_devi = immu_devi_get(pcibdip); 1760 ASSERT(d_immu_devi); 1761 ASSERT(d_immu_devi->imd_pcib_type == 1762 IMMU_PCIB_PCI_PCI); 1763 r_bus = r_immu_devi->imd_bus; 1764 r_devfunc = r_immu_devi->imd_devfunc; 1765 context_set(immu, domain, immu->immu_ctx_root, 1766 r_bus, r_devfunc); 1767 } else { 1768 ddi_err(DER_PANIC, rdip, "Failed to find PCI " 1769 " bridge for PCI device"); 1770 /*NOTREACHED*/ 1771 } 1772 } 1773 } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) { 1774 context_set(immu, domain, immu->immu_ctx_root, d_bus, 1775 d_devfunc); 1776 } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) { 1777 ASSERT(r_pcib_type == IMMU_PCIB_NOBDF); 1778 /* 1779 * ddip is a PCIE device which has a non-PCI device under it 1780 * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata 1781 */ 1782 context_set(immu, domain, immu->immu_ctx_root, d_bus, 1783 d_devfunc); 1784 } else { 1785 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot " 1786 "set IMMU context."); 1787 /*NOTREACHED*/ 1788 } 1789 1790 /* XXX do we need a membar_producer() here */ 1791 return (DDI_SUCCESS); 1792 } 1793 1794 /* ##################### END CONTEXT CODE ################################## */ 1795 /* ##################### MAPPING CODE ################################## */ 1796 1797 1798 static boolean_t 1799 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr, 1800 dev_info_t *rdip, immu_flags_t immu_flags) 1801 { 1802 if (immu_flags & IMMU_FLAGS_PAGE1) { 1803 ASSERT(paddr == 0); 1804 } else { 1805 ASSERT((next == NULL) ^ (paddr == 0)); 1806 } 1807 1808 /* The PDTE must be set i.e. present bit is set */ 1809 if (!PDTE_P(pdte)) { 1810 ddi_err(DER_MODE, rdip, "No present flag"); 1811 return (B_FALSE); 1812 } 1813 1814 /* 1815 * Just assert to check most significant system software field 1816 * (PDTE_SW4) as it is same as present bit and we 1817 * checked that above 1818 */ 1819 ASSERT(PDTE_SW4(pdte)); 1820 1821 /* 1822 * TM field should be clear if not reserved. 1823 * non-leaf is always reserved 1824 */ 1825 if (next == NULL && immu->immu_TM_reserved == B_FALSE) { 1826 if (PDTE_TM(pdte)) { 1827 ddi_err(DER_MODE, rdip, "TM flag set"); 1828 return (B_FALSE); 1829 } 1830 } 1831 1832 /* 1833 * The SW3 field is not used and must be clear 1834 */ 1835 if (PDTE_SW3(pdte)) { 1836 ddi_err(DER_MODE, rdip, "SW3 set"); 1837 return (B_FALSE); 1838 } 1839 1840 /* 1841 * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set 1842 */ 1843 if (next == NULL) { 1844 ASSERT(paddr % IMMU_PAGESIZE == 0); 1845 if (PDTE_PADDR(pdte) != paddr) { 1846 ddi_err(DER_MODE, rdip, 1847 "PTE paddr mismatch: %lx != %lx", 1848 PDTE_PADDR(pdte), paddr); 1849 return (B_FALSE); 1850 } 1851 } else { 1852 if (PDTE_PADDR(pdte) != next->hwpg_paddr) { 1853 ddi_err(DER_MODE, rdip, 1854 "PDE paddr mismatch: %lx != %lx", 1855 PDTE_PADDR(pdte), next->hwpg_paddr); 1856 return (B_FALSE); 1857 } 1858 } 1859 1860 /* 1861 * SNP field should be clear if not reserved. 1862 * non-leaf is always reserved 1863 */ 1864 if (next == NULL && immu->immu_SNP_reserved == B_FALSE) { 1865 if (PDTE_SNP(pdte)) { 1866 ddi_err(DER_MODE, rdip, "SNP set"); 1867 return (B_FALSE); 1868 } 1869 } 1870 1871 /* second field available for system software should be clear */ 1872 if (PDTE_SW2(pdte)) { 1873 ddi_err(DER_MODE, rdip, "SW2 set"); 1874 return (B_FALSE); 1875 } 1876 1877 /* Super pages field should be clear */ 1878 if (PDTE_SP(pdte)) { 1879 ddi_err(DER_MODE, rdip, "SP set"); 1880 return (B_FALSE); 1881 } 1882 1883 /* 1884 * least significant field available for 1885 * system software should be clear 1886 */ 1887 if (PDTE_SW1(pdte)) { 1888 ddi_err(DER_MODE, rdip, "SW1 set"); 1889 return (B_FALSE); 1890 } 1891 1892 if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) { 1893 ddi_err(DER_MODE, rdip, "READ not set"); 1894 return (B_FALSE); 1895 } 1896 1897 if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) { 1898 ddi_err(DER_MODE, rdip, "WRITE not set"); 1899 return (B_FALSE); 1900 } 1901 1902 return (B_TRUE); 1903 } 1904 /*ARGSUSED*/ 1905 static void 1906 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate, 1907 uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip) 1908 { 1909 uint64_t npages; 1910 uint64_t dvma; 1911 pgtable_t *pgtable; 1912 hw_pdte_t *hwp; 1913 hw_pdte_t *shwp; 1914 int idx; 1915 hw_pdte_t pte; 1916 1917 ASSERT(xlate->xlt_level == 1); 1918 1919 pgtable = xlate->xlt_pgtable; 1920 idx = xlate->xlt_idx; 1921 1922 ASSERT(pgtable); 1923 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 1924 1925 dvma = *dvma_ptr; 1926 npages = *npages_ptr; 1927 1928 ASSERT(dvma); 1929 ASSERT(dvma % IMMU_PAGESIZE == 0); 1930 ASSERT(npages); 1931 1932 /* 1933 * since a caller gets a unique dvma for a physical address, 1934 * no other concurrent thread will be writing to the same 1935 * PTE even if it has the same paddr. So no locks needed. 1936 */ 1937 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; 1938 1939 hwp = shwp; 1940 for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) { 1941 1942 pte = *hwp; 1943 1944 /* Cannot clear a HW PTE that is aleady clear */ 1945 ASSERT(PDTE_P(pte)); 1946 PDTE_CLEAR_P(pte); 1947 *hwp = pte; 1948 1949 dvma += IMMU_PAGESIZE; 1950 npages--; 1951 } 1952 1953 1954 #ifdef TEST 1955 /* dont need to flush write during unmap */ 1956 immu_regs_cpu_flush(immu, (caddr_t)shwp, 1957 (hwp - shwp) * sizeof (hw_pdte_t)); 1958 #endif 1959 1960 *dvma_ptr = dvma; 1961 *npages_ptr = npages; 1962 1963 xlate->xlt_idx = idx; 1964 } 1965 1966 /*ARGSUSED*/ 1967 static void 1968 xlate_setup(immu_t *immu, uint64_t dvma, xlate_t *xlate, 1969 int nlevels, dev_info_t *rdip) 1970 { 1971 int level; 1972 uint64_t offbits; 1973 1974 /* level 0 is never used. Sanity check */ 1975 ASSERT(xlate->xlt_level == 0); 1976 ASSERT(xlate->xlt_idx == 0); 1977 ASSERT(xlate->xlt_pgtable == NULL); 1978 ASSERT(dvma % IMMU_PAGESIZE == 0); 1979 1980 /* 1981 * Skip the first 12 bits which is the offset into 1982 * 4K PFN (phys page frame based on IMMU_PAGESIZE) 1983 */ 1984 offbits = dvma >> IMMU_PAGESHIFT; 1985 1986 /* skip to level 1 i.e. leaf PTE */ 1987 for (level = 1, xlate++; level <= nlevels; level++, xlate++) { 1988 xlate->xlt_level = level; 1989 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK); 1990 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX); 1991 xlate->xlt_pgtable = NULL; 1992 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE; 1993 } 1994 } 1995 1996 /* 1997 * Read the pgtables 1998 */ 1999 static void 2000 PDE_lookup(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels, 2001 dev_info_t *rdip) 2002 { 2003 pgtable_t *pgtable; 2004 pgtable_t *next; 2005 hw_pdte_t pde; 2006 uint_t idx; 2007 2008 /* xlate should be at level 0 */ 2009 ASSERT(xlate->xlt_level == 0); 2010 ASSERT(xlate->xlt_idx == 0); 2011 2012 /* start with highest level pgtable i.e. root */ 2013 xlate += nlevels; 2014 ASSERT(xlate->xlt_level == nlevels); 2015 2016 if (xlate->xlt_pgtable == NULL) { 2017 xlate->xlt_pgtable = domain->dom_pgtable_root; 2018 } 2019 2020 for (; xlate->xlt_level > 1; xlate--) { 2021 2022 idx = xlate->xlt_idx; 2023 pgtable = xlate->xlt_pgtable; 2024 2025 ASSERT(pgtable); 2026 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 2027 2028 if ((xlate - 1)->xlt_pgtable) { 2029 continue; 2030 } 2031 2032 /* xlate's leafier level is not set, set it now */ 2033 2034 /* Lock the pgtable in read mode */ 2035 rw_enter(&(pgtable->swpg_rwlock), RW_READER); 2036 2037 /* 2038 * since we are unmapping, the pgtable should 2039 * already point to a leafier pgtable. 2040 */ 2041 next = *(pgtable->swpg_next_array + idx); 2042 ASSERT(next); 2043 2044 pde = *((hw_pdte_t *)(pgtable->hwpg_vaddr) + idx); 2045 2046 ASSERT(PDTE_check(immu, pde, next, 0, rdip, 0) == B_TRUE); 2047 2048 (xlate - 1)->xlt_pgtable = next; 2049 2050 rw_exit(&(pgtable->swpg_rwlock)); 2051 } 2052 } 2053 2054 /*ARGSUSED*/ 2055 static void 2056 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr, 2057 dev_info_t *rdip, immu_flags_t immu_flags) 2058 { 2059 hw_pdte_t pte; 2060 2061 pte = *hwp; 2062 2063 #ifndef DEBUG 2064 /* Set paddr */ 2065 ASSERT(paddr % IMMU_PAGESIZE == 0); 2066 pte = 0; 2067 PDTE_SET_PADDR(pte, paddr); 2068 PDTE_SET_READ(pte); 2069 PDTE_SET_WRITE(pte); 2070 *hwp = pte; 2071 #else 2072 2073 if (PDTE_P(pte)) { 2074 if (PDTE_PADDR(pte) != paddr) { 2075 ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx", 2076 PDTE_PADDR(pte), paddr); 2077 } 2078 #ifdef BUGGY_DRIVERS 2079 return; 2080 #else 2081 goto out; 2082 #endif 2083 } 2084 2085 /* Don't touch SW4. It is the present field */ 2086 2087 /* clear TM field if not reserved */ 2088 if (immu->immu_TM_reserved == B_FALSE) { 2089 PDTE_CLEAR_TM(pte); 2090 } 2091 2092 #ifdef DEBUG 2093 /* Clear 3rd field for system software - not used */ 2094 PDTE_CLEAR_SW3(pte); 2095 #endif 2096 2097 /* Set paddr */ 2098 ASSERT(paddr % IMMU_PAGESIZE == 0); 2099 PDTE_CLEAR_PADDR(pte); 2100 PDTE_SET_PADDR(pte, paddr); 2101 2102 /* clear SNP field if not reserved. */ 2103 if (immu->immu_SNP_reserved == B_FALSE) { 2104 PDTE_CLEAR_SNP(pte); 2105 } 2106 2107 #ifdef DEBUG 2108 /* Clear SW2 field available for software */ 2109 PDTE_CLEAR_SW2(pte); 2110 #endif 2111 2112 2113 #ifdef DEBUG 2114 /* SP is don't care for PTEs. Clear it for cleanliness */ 2115 PDTE_CLEAR_SP(pte); 2116 #endif 2117 2118 #ifdef DEBUG 2119 /* Clear SW1 field available for software */ 2120 PDTE_CLEAR_SW1(pte); 2121 #endif 2122 2123 /* 2124 * Now that we are done writing the PTE 2125 * set the "present" flag. Note this present 2126 * flag is a bit in the PDE/PTE that the 2127 * spec says is available for system software. 2128 * This is an implementation detail of Solaris 2129 * bare-metal Intel IOMMU. 2130 * The present field in a PDE/PTE is not defined 2131 * by the Vt-d spec 2132 */ 2133 2134 PDTE_SET_P(pte); 2135 2136 out: 2137 #ifdef BUGGY_DRIVERS 2138 PDTE_SET_READ(pte); 2139 PDTE_SET_WRITE(pte); 2140 #else 2141 if (immu_flags & IMMU_FLAGS_READ) 2142 PDTE_SET_READ(pte); 2143 if (immu_flags & IMMU_FLAGS_WRITE) 2144 PDTE_SET_WRITE(pte); 2145 #endif 2146 2147 *hwp = pte; 2148 #endif 2149 } 2150 2151 /*ARGSUSED*/ 2152 static void 2153 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, 2154 uint64_t *dvma_ptr, uint64_t *nvpages_ptr, dcookie_t *dcookies, 2155 int dcount, dev_info_t *rdip, immu_flags_t immu_flags) 2156 { 2157 paddr_t paddr; 2158 uint64_t nvpages; 2159 uint64_t nppages; 2160 uint64_t dvma; 2161 pgtable_t *pgtable; 2162 hw_pdte_t *hwp; 2163 hw_pdte_t *shwp; 2164 int idx; 2165 int j; 2166 2167 ASSERT(xlate->xlt_level == 1); 2168 2169 pgtable = xlate->xlt_pgtable; 2170 idx = xlate->xlt_idx; 2171 2172 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 2173 ASSERT(pgtable); 2174 2175 dvma = *dvma_ptr; 2176 nvpages = *nvpages_ptr; 2177 2178 ASSERT(dvma || (immu_flags & IMMU_FLAGS_PAGE1)); 2179 ASSERT(nvpages); 2180 2181 /* 2182 * since a caller gets a unique dvma for a physical address, 2183 * no other concurrent thread will be writing to the same 2184 * PTE even if it has the same paddr. So no locks needed. 2185 */ 2186 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; 2187 2188 hwp = shwp; 2189 for (j = dcount - 1; j >= 0; j--) { 2190 if (nvpages <= dcookies[j].dck_npages) 2191 break; 2192 nvpages -= dcookies[j].dck_npages; 2193 } 2194 2195 ASSERT(j >= 0); 2196 ASSERT(nvpages); 2197 ASSERT(nvpages <= dcookies[j].dck_npages); 2198 nppages = nvpages; 2199 paddr = dcookies[j].dck_paddr + 2200 (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE; 2201 2202 nvpages = *nvpages_ptr; 2203 for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) { 2204 2205 ASSERT(paddr || (immu_flags & IMMU_FLAGS_PAGE1)); 2206 2207 PTE_set_one(immu, hwp, paddr, rdip, immu_flags); 2208 2209 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags) 2210 == B_TRUE); 2211 nppages--; 2212 nvpages--; 2213 paddr += IMMU_PAGESIZE; 2214 dvma += IMMU_PAGESIZE; 2215 2216 if (nppages == 0) { 2217 j++; 2218 } 2219 2220 if (j == dcount) { 2221 ASSERT(nvpages == 0); 2222 break; 2223 } 2224 2225 ASSERT(nvpages); 2226 if (nppages == 0) { 2227 nppages = dcookies[j].dck_npages; 2228 paddr = dcookies[j].dck_paddr; 2229 } 2230 } 2231 2232 /* flush writes to HW PTE table */ 2233 immu_regs_cpu_flush(immu, (caddr_t)shwp, (hwp - shwp) * 2234 sizeof (hw_pdte_t)); 2235 2236 if (nvpages) { 2237 *dvma_ptr = dvma; 2238 *nvpages_ptr = nvpages; 2239 } else { 2240 *dvma_ptr = 0; 2241 *nvpages_ptr = 0; 2242 } 2243 2244 xlate->xlt_idx = idx; 2245 } 2246 2247 /*ARGSUSED*/ 2248 static void 2249 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next, 2250 dev_info_t *rdip, immu_flags_t immu_flags) 2251 { 2252 hw_pdte_t pde; 2253 2254 pde = *hwp; 2255 2256 /* if PDE is already set, make sure it is correct */ 2257 if (PDTE_P(pde)) { 2258 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr); 2259 #ifdef BUGGY_DRIVERS 2260 return; 2261 #else 2262 goto out; 2263 #endif 2264 } 2265 2266 /* Dont touch SW4, it is the present bit */ 2267 2268 /* don't touch TM field it is reserved for PDEs */ 2269 2270 /* 3rd field available for system software is not used */ 2271 PDTE_CLEAR_SW3(pde); 2272 2273 /* Set next level pgtable-paddr for PDE */ 2274 ASSERT(next->hwpg_paddr % IMMU_PAGESIZE == 0); 2275 PDTE_CLEAR_PADDR(pde); 2276 PDTE_SET_PADDR(pde, next->hwpg_paddr); 2277 2278 /* don't touch SNP field it is reserved for PDEs */ 2279 2280 /* Clear second field available for system software */ 2281 PDTE_CLEAR_SW2(pde); 2282 2283 /* No super pages for PDEs */ 2284 PDTE_CLEAR_SP(pde); 2285 2286 /* Clear SW1 for software */ 2287 PDTE_CLEAR_SW1(pde); 2288 2289 /* 2290 * Now that we are done writing the PDE 2291 * set the "present" flag. Note this present 2292 * flag is a bit in the PDE/PTE that the 2293 * spec says is available for system software. 2294 * This is an implementation detail of Solaris 2295 * base-metal Intel IOMMU. 2296 * The present field in a PDE/PTE is not defined 2297 * by the Vt-d spec 2298 */ 2299 2300 out: 2301 #ifdef BUGGY_DRIVERS 2302 PDTE_SET_READ(pde); 2303 PDTE_SET_WRITE(pde); 2304 #else 2305 if (immu_flags & IMMU_FLAGS_READ) 2306 PDTE_SET_READ(pde); 2307 if (immu_flags & IMMU_FLAGS_WRITE) 2308 PDTE_SET_WRITE(pde); 2309 #endif 2310 2311 PDTE_SET_P(pde); 2312 2313 *hwp = pde; 2314 2315 immu_regs_cpu_flush(immu, (caddr_t)hwp, sizeof (hw_pdte_t)); 2316 } 2317 2318 /* 2319 * Used to set PDEs 2320 */ 2321 static boolean_t 2322 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels, 2323 dev_info_t *rdip, immu_flags_t immu_flags) 2324 { 2325 pgtable_t *pgtable; 2326 pgtable_t *new; 2327 pgtable_t *next; 2328 hw_pdte_t *hwp; 2329 int level; 2330 uint_t idx; 2331 krw_t rwtype; 2332 boolean_t set = B_FALSE; 2333 2334 /* xlate should be at level 0 */ 2335 ASSERT(xlate->xlt_level == 0); 2336 ASSERT(xlate->xlt_idx == 0); 2337 2338 /* start with highest level pgtable i.e. root */ 2339 xlate += nlevels; 2340 ASSERT(xlate->xlt_level == nlevels); 2341 2342 new = NULL; 2343 xlate->xlt_pgtable = domain->dom_pgtable_root; 2344 for (level = nlevels; level > 1; level--, xlate--) { 2345 2346 ASSERT(xlate->xlt_level == level); 2347 2348 idx = xlate->xlt_idx; 2349 pgtable = xlate->xlt_pgtable; 2350 2351 ASSERT(pgtable); 2352 ASSERT(idx <= IMMU_PGTABLE_MAXIDX); 2353 2354 /* speculative alloc */ 2355 if (new == NULL) { 2356 new = pgtable_alloc(immu, immu_flags); 2357 if (new == NULL) { 2358 ddi_err(DER_PANIC, rdip, "pgtable alloc err"); 2359 } 2360 } 2361 2362 /* Lock the pgtable in READ mode first */ 2363 rw_enter(&(pgtable->swpg_rwlock), RW_READER); 2364 rwtype = RW_READER; 2365 again: 2366 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx; 2367 2368 ASSERT(pgtable->swpg_next_array); 2369 2370 next = (pgtable->swpg_next_array)[idx]; 2371 2372 /* 2373 * check if leafier level already has a pgtable 2374 * if yes, verify 2375 */ 2376 if (next == NULL) { 2377 /* Change to a write lock */ 2378 if (rwtype == RW_READER && 2379 rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) { 2380 rw_exit(&(pgtable->swpg_rwlock)); 2381 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER); 2382 rwtype = RW_WRITER; 2383 goto again; 2384 } 2385 rwtype = RW_WRITER; 2386 pgtable_zero(immu, new); 2387 next = new; 2388 new = NULL; 2389 (pgtable->swpg_next_array)[idx] = next; 2390 PDE_set_one(immu, hwp, next, rdip, immu_flags); 2391 set = B_TRUE; 2392 rw_downgrade(&(pgtable->swpg_rwlock)); 2393 rwtype = RW_READER; 2394 } else { 2395 hw_pdte_t pde = *hwp; 2396 2397 #ifndef BUGGY_DRIVERS 2398 /* 2399 * If buggy driver we already set permission 2400 * READ+WRITE so nothing to do for that case 2401 * XXX Check that read writer perms change before 2402 * actually setting perms. Also need to hold lock 2403 */ 2404 if (immu_flags & IMMU_FLAGS_READ) 2405 PDTE_SET_READ(pde); 2406 if (immu_flags & IMMU_FLAGS_WRITE) 2407 PDTE_SET_WRITE(pde); 2408 2409 #endif 2410 2411 *hwp = pde; 2412 } 2413 2414 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags) 2415 == B_TRUE); 2416 2417 (xlate - 1)->xlt_pgtable = next; 2418 ASSERT(rwtype == RW_READER); 2419 rw_exit(&(pgtable->swpg_rwlock)); 2420 } 2421 2422 if (new) { 2423 pgtable_free(immu, new); 2424 } 2425 2426 return (set); 2427 } 2428 2429 /* 2430 * dvma_map() 2431 * map a contiguous range of DVMA pages 2432 * 2433 * immu: IOMMU unit for which we are generating DVMA cookies 2434 * domain: domain 2435 * sdvma: Starting dvma 2436 * spaddr: Starting paddr 2437 * npages: Number of pages 2438 * rdip: requesting device 2439 * immu_flags: flags 2440 */ 2441 static boolean_t 2442 dvma_map(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snvpages, 2443 dcookie_t *dcookies, int dcount, dev_info_t *rdip, immu_flags_t immu_flags) 2444 { 2445 uint64_t dvma; 2446 uint64_t n; 2447 int nlevels = immu->immu_dvma_nlevels; 2448 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}; 2449 boolean_t pde_set = B_FALSE; 2450 2451 ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS); 2452 ASSERT(sdvma % IMMU_PAGESIZE == 0); 2453 ASSERT(snvpages); 2454 2455 n = snvpages; 2456 dvma = sdvma; 2457 2458 while (n > 0) { 2459 xlate_setup(immu, dvma, xlate, nlevels, rdip); 2460 2461 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */ 2462 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags) 2463 == B_TRUE) { 2464 pde_set = B_TRUE; 2465 } 2466 2467 /* set all matching ptes that fit into this leaf pgtable */ 2468 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies, 2469 dcount, rdip, immu_flags); 2470 } 2471 2472 return (pde_set); 2473 } 2474 2475 /* 2476 * dvma_unmap() 2477 * unmap a range of DVMAs 2478 * 2479 * immu: IOMMU unit state 2480 * domain: domain for requesting device 2481 * ddip: domain-dip 2482 * dvma: starting DVMA 2483 * npages: Number of IMMU pages to be unmapped 2484 * rdip: requesting device 2485 */ 2486 static void 2487 dvma_unmap(immu_t *immu, domain_t *domain, uint64_t sdvma, uint64_t snpages, 2488 dev_info_t *rdip) 2489 { 2490 int nlevels = immu->immu_dvma_nlevels; 2491 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}; 2492 uint64_t n; 2493 uint64_t dvma; 2494 2495 ASSERT(nlevels <= IMMU_PGTABLE_MAX_LEVELS); 2496 ASSERT(sdvma != 0); 2497 ASSERT(sdvma % IMMU_PAGESIZE == 0); 2498 ASSERT(snpages); 2499 2500 dvma = sdvma; 2501 n = snpages; 2502 2503 while (n > 0) { 2504 /* setup the xlate array */ 2505 xlate_setup(immu, dvma, xlate, nlevels, rdip); 2506 2507 /* just lookup existing pgtables. Should never fail */ 2508 PDE_lookup(immu, domain, xlate, nlevels, rdip); 2509 2510 /* clear all matching ptes that fit into this leaf pgtable */ 2511 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip); 2512 } 2513 2514 /* No need to flush IOTLB after unmap */ 2515 } 2516 2517 static uint64_t 2518 dvma_alloc(ddi_dma_impl_t *hp, domain_t *domain, uint_t npages) 2519 { 2520 ddi_dma_attr_t *dma_attr; 2521 uint64_t dvma; 2522 size_t xsize, align; 2523 uint64_t minaddr, maxaddr; 2524 2525 ASSERT(domain->dom_maptype != IMMU_MAPTYPE_UNITY); 2526 2527 /* shotcuts */ 2528 dma_attr = &(hp->dmai_attr); 2529 2530 /* parameters */ 2531 xsize = npages * IMMU_PAGESIZE; 2532 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE); 2533 minaddr = dma_attr->dma_attr_addr_lo; 2534 maxaddr = dma_attr->dma_attr_addr_hi + 1; 2535 /* nocross is checked in cookie_update() */ 2536 2537 /* handle the rollover cases */ 2538 if (maxaddr < dma_attr->dma_attr_addr_hi) { 2539 maxaddr = dma_attr->dma_attr_addr_hi; 2540 } 2541 2542 /* 2543 * allocate from vmem arena. 2544 */ 2545 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena, 2546 xsize, align, 0, 0, (void *)(uintptr_t)minaddr, 2547 (void *)(uintptr_t)maxaddr, VM_NOSLEEP); 2548 2549 ASSERT(dvma); 2550 ASSERT(dvma >= minaddr); 2551 ASSERT(dvma + xsize - 1 < maxaddr); 2552 2553 return (dvma); 2554 } 2555 2556 static void 2557 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages) 2558 { 2559 uint64_t size = npages * IMMU_PAGESIZE; 2560 2561 ASSERT(domain); 2562 ASSERT(domain->dom_did > 0); 2563 ASSERT(dvma); 2564 ASSERT(npages); 2565 2566 if (domain->dom_maptype != IMMU_MAPTYPE_XLATE) { 2567 ASSERT(domain->dom_maptype == IMMU_MAPTYPE_UNITY); 2568 return; 2569 } 2570 2571 vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size); 2572 } 2573 /*ARGSUSED*/ 2574 static void 2575 cookie_free(rootnex_dma_t *dma, immu_t *immu, domain_t *domain, 2576 dev_info_t *rdip) 2577 { 2578 int i; 2579 uint64_t dvma; 2580 uint64_t npages; 2581 dvcookie_t *dvcookies = dma->dp_dvcookies; 2582 2583 ASSERT(dma->dp_max_cookies); 2584 ASSERT(dma->dp_max_dcookies); 2585 ASSERT(dma->dp_dvmax < dma->dp_max_cookies); 2586 ASSERT(dma->dp_dmax < dma->dp_max_dcookies); 2587 2588 /* 2589 * we allocated DVMA in a single chunk. Calculate total number 2590 * of pages 2591 */ 2592 for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) { 2593 npages += dvcookies[i].dvck_npages; 2594 } 2595 dvma = dvcookies[0].dvck_dvma; 2596 #ifdef DEBUG 2597 /* Unmap only in DEBUG mode */ 2598 dvma_unmap(immu, domain, dvma, npages, rdip); 2599 #endif 2600 dvma_free(domain, dvma, npages); 2601 2602 kmem_free(dma->dp_dvcookies, sizeof (dvcookie_t) * dma->dp_max_cookies); 2603 dma->dp_dvcookies = NULL; 2604 kmem_free(dma->dp_dcookies, sizeof (dcookie_t) * dma->dp_max_dcookies); 2605 dma->dp_dcookies = NULL; 2606 if (dma->dp_need_to_free_cookie == B_TRUE) { 2607 kmem_free(dma->dp_cookies, sizeof (ddi_dma_cookie_t) * 2608 dma->dp_max_cookies); 2609 dma->dp_dcookies = NULL; 2610 dma->dp_need_to_free_cookie = B_FALSE; 2611 } 2612 2613 dma->dp_max_cookies = 0; 2614 dma->dp_max_dcookies = 0; 2615 dma->dp_cookie_size = 0; 2616 dma->dp_dvmax = 0; 2617 dma->dp_dmax = 0; 2618 } 2619 2620 /* 2621 * cookie_alloc() 2622 */ 2623 static int 2624 cookie_alloc(rootnex_dma_t *dma, struct ddi_dma_req *dmareq, 2625 ddi_dma_attr_t *attr, uint_t prealloc) 2626 { 2627 int kmflag; 2628 rootnex_sglinfo_t *sinfo = &(dma->dp_sglinfo); 2629 dvcookie_t *dvcookies = dma->dp_dvcookies; 2630 dcookie_t *dcookies = dma->dp_dcookies; 2631 ddi_dma_cookie_t *cookies = dma->dp_cookies; 2632 uint64_t max_cookies; 2633 uint64_t max_dcookies; 2634 uint64_t cookie_size; 2635 2636 /* we need to allocate new array */ 2637 if (dmareq->dmar_fp == DDI_DMA_SLEEP) { 2638 kmflag = KM_SLEEP; 2639 } else { 2640 kmflag = KM_NOSLEEP; 2641 } 2642 2643 /* 2644 * XXX make sure cookies size doen't exceed sinfo->si_max_cookie_size; 2645 */ 2646 2647 /* 2648 * figure out the rough estimate of array size 2649 * At a minimum, each cookie must hold 1 page. 2650 * At a maximum, it cannot exceed dma_attr_sgllen 2651 */ 2652 max_dcookies = dmareq->dmar_object.dmao_size + IMMU_PAGEOFFSET; 2653 max_dcookies /= IMMU_PAGESIZE; 2654 max_dcookies++; 2655 max_cookies = MIN(max_dcookies, attr->dma_attr_sgllen); 2656 2657 /* allocate the dvma cookie array */ 2658 dvcookies = kmem_zalloc(sizeof (dvcookie_t) * max_cookies, kmflag); 2659 if (dvcookies == NULL) { 2660 return (DDI_FAILURE); 2661 } 2662 2663 /* allocate the "phys" cookie array */ 2664 dcookies = kmem_zalloc(sizeof (dcookie_t) * max_dcookies, kmflag); 2665 if (dcookies == NULL) { 2666 kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies); 2667 dvcookies = NULL; 2668 return (DDI_FAILURE); 2669 } 2670 2671 /* allocate the "real" cookie array - the one given to users */ 2672 cookie_size = sizeof (ddi_dma_cookie_t) * max_cookies; 2673 if (max_cookies > prealloc) { 2674 cookies = kmem_zalloc(cookie_size, kmflag); 2675 if (cookies == NULL) { 2676 kmem_free(dvcookies, sizeof (dvcookie_t) * max_cookies); 2677 kmem_free(dcookies, sizeof (dcookie_t) * max_dcookies); 2678 goto fail; 2679 } 2680 dma->dp_need_to_free_cookie = B_TRUE; 2681 } else { 2682 /* the preallocated buffer fits this size */ 2683 cookies = (ddi_dma_cookie_t *)dma->dp_prealloc_buffer; 2684 bzero(cookies, sizeof (ddi_dma_cookie_t)* max_cookies); 2685 dma->dp_need_to_free_cookie = B_FALSE; 2686 } 2687 2688 dma->dp_dvcookies = dvcookies; 2689 dma->dp_dcookies = dcookies; 2690 dma->dp_cookies = cookies; 2691 dma->dp_cookie_size = cookie_size; 2692 dma->dp_max_cookies = max_cookies; 2693 dma->dp_max_dcookies = max_dcookies; 2694 dma->dp_dvmax = 0; 2695 dma->dp_dmax = 0; 2696 sinfo->si_max_pages = dma->dp_max_cookies; 2697 2698 return (DDI_SUCCESS); 2699 2700 fail: 2701 dma->dp_dvcookies = NULL; 2702 dma->dp_dcookies = NULL; 2703 dma->dp_cookies = NULL; 2704 dma->dp_cookie_size = 0; 2705 dma->dp_max_cookies = 0; 2706 dma->dp_max_dcookies = 0; 2707 dma->dp_dvmax = 0; 2708 dma->dp_dmax = 0; 2709 dma->dp_need_to_free_cookie = B_FALSE; 2710 sinfo->si_max_pages = 0; 2711 2712 return (DDI_FAILURE); 2713 } 2714 2715 /*ARGSUSED*/ 2716 static void 2717 cookie_update(domain_t *domain, rootnex_dma_t *dma, paddr_t paddr, 2718 int64_t psize, uint64_t maxseg, size_t nocross) 2719 { 2720 dvcookie_t *dvcookies = dma->dp_dvcookies; 2721 dcookie_t *dcookies = dma->dp_dcookies; 2722 ddi_dma_cookie_t *cookies = dma->dp_cookies; 2723 uint64_t dvmax = dma->dp_dvmax; 2724 uint64_t dmax = dma->dp_dmax; 2725 2726 ASSERT(dvmax < dma->dp_max_cookies); 2727 ASSERT(dmax < dma->dp_max_dcookies); 2728 2729 paddr &= IMMU_PAGEMASK; 2730 2731 ASSERT(paddr); 2732 ASSERT(psize); 2733 ASSERT(maxseg); 2734 2735 /* 2736 * check to see if this page would put us 2737 * over the max cookie size. 2738 */ 2739 if (cookies[dvmax].dmac_size + psize > maxseg) { 2740 dvmax++; /* use the next dvcookie */ 2741 dmax++; /* also means we use the next dcookie */ 2742 ASSERT(dvmax < dma->dp_max_cookies); 2743 ASSERT(dmax < dma->dp_max_dcookies); 2744 } 2745 2746 /* 2747 * check to see if this page would make us larger than 2748 * the nocross boundary. If yes, create a new cookie 2749 * otherwise we will fail later with vmem_xalloc() 2750 * due to overconstrained alloc requests 2751 * nocross == 0 implies no nocross constraint. 2752 */ 2753 if (nocross > 0) { 2754 ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE 2755 <= nocross); 2756 if ((dvcookies[dvmax].dvck_npages + 1) * IMMU_PAGESIZE 2757 > nocross) { 2758 dvmax++; /* use the next dvcookie */ 2759 dmax++; /* also means we use the next dcookie */ 2760 ASSERT(dvmax < dma->dp_max_cookies); 2761 ASSERT(dmax < dma->dp_max_dcookies); 2762 } 2763 ASSERT((dvcookies[dvmax].dvck_npages) * IMMU_PAGESIZE 2764 <= nocross); 2765 } 2766 2767 /* 2768 * If the cookie is empty 2769 */ 2770 if (dvcookies[dvmax].dvck_npages == 0) { 2771 ASSERT(cookies[dvmax].dmac_size == 0); 2772 ASSERT(dvcookies[dvmax].dvck_dvma == 0); 2773 ASSERT(dvcookies[dvmax].dvck_npages 2774 == 0); 2775 ASSERT(dcookies[dmax].dck_paddr == 0); 2776 ASSERT(dcookies[dmax].dck_npages == 0); 2777 2778 dvcookies[dvmax].dvck_dvma = 0; 2779 dvcookies[dvmax].dvck_npages = 1; 2780 dcookies[dmax].dck_paddr = paddr; 2781 dcookies[dmax].dck_npages = 1; 2782 cookies[dvmax].dmac_size = psize; 2783 } else { 2784 /* Cookie not empty. Add to it */ 2785 cookies[dma->dp_dvmax].dmac_size += psize; 2786 ASSERT(dvcookies[dma->dp_dvmax].dvck_dvma == 0); 2787 dvcookies[dma->dp_dvmax].dvck_npages++; 2788 ASSERT(dcookies[dmax].dck_paddr != 0); 2789 ASSERT(dcookies[dmax].dck_npages != 0); 2790 2791 /* Check if this paddr is contiguous */ 2792 if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) { 2793 dcookies[dmax].dck_npages++; 2794 } else { 2795 /* No, we need a new dcookie */ 2796 dmax++; 2797 ASSERT(dcookies[dmax].dck_paddr == 0); 2798 ASSERT(dcookies[dmax].dck_npages == 0); 2799 dcookies[dmax].dck_paddr = paddr; 2800 dcookies[dmax].dck_npages = 1; 2801 } 2802 } 2803 2804 dma->dp_dvmax = dvmax; 2805 dma->dp_dmax = dmax; 2806 } 2807 2808 static void 2809 cookie_finalize(ddi_dma_impl_t *hp, immu_t *immu, domain_t *domain, 2810 dev_info_t *rdip, immu_flags_t immu_flags) 2811 { 2812 int i; 2813 rootnex_dma_t *dma = (rootnex_dma_t *)hp->dmai_private; 2814 dvcookie_t *dvcookies = dma->dp_dvcookies; 2815 dcookie_t *dcookies = dma->dp_dcookies; 2816 ddi_dma_cookie_t *cookies = dma->dp_cookies; 2817 uint64_t npages; 2818 uint64_t dvma; 2819 boolean_t pde_set; 2820 2821 /* First calculate the total number of pages required */ 2822 for (i = 0, npages = 0; i <= dma->dp_dvmax; i++) { 2823 npages += dvcookies[i].dvck_npages; 2824 } 2825 2826 /* Now allocate dvma */ 2827 dvma = dvma_alloc(hp, domain, npages); 2828 2829 /* Now map the dvma */ 2830 pde_set = dvma_map(immu, domain, dvma, npages, dcookies, 2831 dma->dp_dmax + 1, rdip, immu_flags); 2832 2833 /* Invalidate the IOTLB */ 2834 immu_regs_iotlb_flush(immu, domain->dom_did, dvma, npages, 2835 pde_set == B_TRUE ? TLB_IVA_WHOLE : TLB_IVA_LEAF, IOTLB_PSI); 2836 2837 /* Now setup dvcookies and real cookie addresses */ 2838 for (i = 0; i <= dma->dp_dvmax; i++) { 2839 dvcookies[i].dvck_dvma = dvma; 2840 cookies[i].dmac_laddress = dvma; 2841 ASSERT(cookies[i].dmac_size != 0); 2842 cookies[i].dmac_type = 0; 2843 dvma += (dvcookies[i].dvck_npages * IMMU_PAGESIZE); 2844 } 2845 2846 #ifdef TEST 2847 immu_regs_iotlb_flush(immu, domain->dom_did, 0, 0, 0, IOTLB_DSI); 2848 #endif 2849 } 2850 2851 /* 2852 * cookie_create() 2853 */ 2854 static int 2855 cookie_create(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, 2856 ddi_dma_attr_t *a, immu_t *immu, domain_t *domain, dev_info_t *rdip, 2857 uint_t prealloc_count, immu_flags_t immu_flags) 2858 { 2859 ddi_dma_atyp_t buftype; 2860 uint64_t offset; 2861 page_t **pparray; 2862 uint64_t paddr; 2863 uint_t psize; 2864 uint_t size; 2865 uint64_t maxseg; 2866 caddr_t vaddr; 2867 uint_t pcnt; 2868 page_t *page; 2869 rootnex_sglinfo_t *sglinfo; 2870 ddi_dma_obj_t *dmar_object; 2871 rootnex_dma_t *dma; 2872 size_t nocross; 2873 2874 dma = (rootnex_dma_t *)hp->dmai_private; 2875 sglinfo = &(dma->dp_sglinfo); 2876 dmar_object = &(dmareq->dmar_object); 2877 maxseg = sglinfo->si_max_cookie_size; 2878 pparray = dmar_object->dmao_obj.virt_obj.v_priv; 2879 vaddr = dmar_object->dmao_obj.virt_obj.v_addr; 2880 buftype = dmar_object->dmao_type; 2881 size = dmar_object->dmao_size; 2882 nocross = (size_t)(a->dma_attr_seg + 1); 2883 2884 /* 2885 * Allocate cookie, dvcookie and dcookie 2886 */ 2887 if (cookie_alloc(dma, dmareq, a, prealloc_count) != DDI_SUCCESS) { 2888 return (DDI_FAILURE); 2889 } 2890 hp->dmai_cookie = dma->dp_cookies; 2891 2892 pcnt = 0; 2893 2894 /* retrieve paddr, psize, offset from dmareq */ 2895 if (buftype == DMA_OTYP_PAGES) { 2896 page = dmar_object->dmao_obj.pp_obj.pp_pp; 2897 ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); 2898 offset = dmar_object->dmao_obj.pp_obj.pp_offset & 2899 MMU_PAGEOFFSET; 2900 paddr = pfn_to_pa(page->p_pagenum) + offset; 2901 psize = MIN((MMU_PAGESIZE - offset), size); 2902 sglinfo->si_asp = NULL; 2903 page = page->p_next; 2904 } else { 2905 ASSERT((buftype == DMA_OTYP_VADDR) || 2906 (buftype == DMA_OTYP_BUFVADDR)); 2907 sglinfo->si_asp = dmar_object->dmao_obj.virt_obj.v_as; 2908 if (sglinfo->si_asp == NULL) { 2909 sglinfo->si_asp = &kas; 2910 } 2911 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET; 2912 if (pparray != NULL) { 2913 ASSERT(!PP_ISFREE(pparray[pcnt])); 2914 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset; 2915 psize = MIN((MMU_PAGESIZE - offset), size); 2916 pcnt++; 2917 } else { 2918 paddr = pfn_to_pa(hat_getpfnum(sglinfo->si_asp->a_hat, 2919 vaddr)) + offset; 2920 psize = MIN(size, (MMU_PAGESIZE - offset)); 2921 vaddr += psize; 2922 } 2923 } 2924 2925 /* save the iommu page offset */ 2926 sglinfo->si_buf_offset = offset & IMMU_PAGEOFFSET; 2927 2928 /* 2929 * setup dvcookie and dcookie for [paddr, paddr+psize) 2930 */ 2931 cookie_update(domain, dma, paddr, psize, maxseg, nocross); 2932 2933 size -= psize; 2934 while (size > 0) { 2935 /* get the size for this page (i.e. partial or full page) */ 2936 psize = MIN(size, MMU_PAGESIZE); 2937 if (buftype == DMA_OTYP_PAGES) { 2938 /* get the paddr from the page_t */ 2939 ASSERT(!PP_ISFREE(page) && PAGE_LOCKED(page)); 2940 paddr = pfn_to_pa(page->p_pagenum); 2941 page = page->p_next; 2942 } else if (pparray != NULL) { 2943 /* index into the array of page_t's to get the paddr */ 2944 ASSERT(!PP_ISFREE(pparray[pcnt])); 2945 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum); 2946 pcnt++; 2947 } else { 2948 /* call into the VM to get the paddr */ 2949 paddr = pfn_to_pa(hat_getpfnum 2950 (sglinfo->si_asp->a_hat, vaddr)); 2951 vaddr += psize; 2952 } 2953 /* 2954 * set dvcookie and dcookie for [paddr, paddr+psize) 2955 */ 2956 cookie_update(domain, dma, paddr, psize, maxseg, nocross); 2957 size -= psize; 2958 } 2959 2960 cookie_finalize(hp, immu, domain, rdip, immu_flags); 2961 2962 /* take account in the offset into the first page */ 2963 dma->dp_cookies[0].dmac_laddress += sglinfo->si_buf_offset; 2964 2965 /* save away how many cookies we have */ 2966 sglinfo->si_sgl_size = dma->dp_dvmax + 1; 2967 2968 return (DDI_SUCCESS); 2969 } 2970 2971 /* ############################# Functions exported ######################## */ 2972 2973 /* 2974 * setup the DVMA subsystem 2975 * this code runs only for the first IOMMU unit 2976 */ 2977 void 2978 immu_dvma_setup(list_t *listp) 2979 { 2980 immu_t *immu; 2981 uint_t kval; 2982 size_t nchains; 2983 2984 /* locks */ 2985 mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL); 2986 2987 /* Create lists */ 2988 list_create(&immu_unity_domain_list, sizeof (domain_t), 2989 offsetof(domain_t, dom_maptype_node)); 2990 list_create(&immu_xlate_domain_list, sizeof (domain_t), 2991 offsetof(domain_t, dom_maptype_node)); 2992 2993 /* Setup BDF domain hash */ 2994 nchains = 0xff; 2995 kval = mod_hash_iddata_gen(nchains); 2996 2997 bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH", 2998 nchains, mod_hash_null_keydtor, mod_hash_null_valdtor, 2999 mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp, 3000 KM_NOSLEEP); 3001 ASSERT(bdf_domain_hash); 3002 3003 immu = list_head(listp); 3004 for (; immu; immu = list_next(listp, immu)) { 3005 create_unity_domain(immu); 3006 did_init(immu); 3007 context_init(immu); 3008 immu->immu_dvma_setup = B_TRUE; 3009 } 3010 } 3011 3012 /* 3013 * Startup up one DVMA unit 3014 */ 3015 void 3016 immu_dvma_startup(immu_t *immu) 3017 { 3018 ASSERT(immu); 3019 ASSERT(immu->immu_dvma_running == B_FALSE); 3020 3021 if (immu_gfxdvma_enable == B_FALSE && 3022 immu->immu_dvma_gfx_only == B_TRUE) { 3023 return; 3024 } 3025 3026 /* 3027 * DVMA will start once IOMMU is "running" 3028 */ 3029 ASSERT(immu->immu_dvma_running == B_FALSE); 3030 immu->immu_dvma_running = B_TRUE; 3031 } 3032 3033 /* 3034 * immu_dvma_physmem_update() 3035 * called when the installed memory on a 3036 * system increases, to expand domain DVMA 3037 * for domains with UNITY mapping 3038 */ 3039 void 3040 immu_dvma_physmem_update(uint64_t addr, uint64_t size) 3041 { 3042 uint64_t start; 3043 uint64_t npages; 3044 int dcount; 3045 dcookie_t dcookies[1] = {0}; 3046 domain_t *domain; 3047 3048 /* 3049 * Just walk the system-wide list of domains with 3050 * UNITY mapping. Both the list of *all* domains 3051 * and *UNITY* domains is protected by the same 3052 * single lock 3053 */ 3054 mutex_enter(&immu_domain_lock); 3055 domain = list_head(&immu_unity_domain_list); 3056 for (; domain; domain = list_next(&immu_unity_domain_list, domain)) { 3057 3058 /* There is no vmem_arena for unity domains. Just map it */ 3059 ddi_err(DER_LOG, NULL, "IMMU: unity-domain: Adding map " 3060 "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size); 3061 3062 start = IMMU_ROUNDOWN(addr); 3063 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1; 3064 3065 dcookies[0].dck_paddr = start; 3066 dcookies[0].dck_npages = npages; 3067 dcount = 1; 3068 (void) dvma_map(domain->dom_immu, domain, start, npages, 3069 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE); 3070 3071 } 3072 mutex_exit(&immu_domain_lock); 3073 } 3074 3075 3076 int 3077 immu_dvma_map(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq, memrng_t *mrng, 3078 uint_t prealloc_count, dev_info_t *rdip, immu_flags_t immu_flags) 3079 { 3080 ddi_dma_attr_t *attr; 3081 dev_info_t *ddip; 3082 domain_t *domain; 3083 immu_t *immu; 3084 dcookie_t dcookies[1] = {0}; 3085 int dcount = 0; 3086 boolean_t pde_set = B_TRUE; 3087 int r = DDI_FAILURE; 3088 3089 ASSERT(immu_enable == B_TRUE); 3090 ASSERT(immu_running == B_TRUE || !(immu_flags & IMMU_FLAGS_DMAHDL)); 3091 ASSERT(hp || !(immu_flags & IMMU_FLAGS_DMAHDL)); 3092 3093 /* 3094 * Intel IOMMU will only be turned on if IOMMU 3095 * page size is a multiple of IOMMU page size 3096 */ 3097 3098 /*LINTED*/ 3099 ASSERT(MMU_PAGESIZE % IMMU_PAGESIZE == 0); 3100 3101 /* Can only do DVMA if dip is attached */ 3102 if (rdip == NULL) { 3103 ddi_err(DER_PANIC, rdip, "DVMA map: No device specified"); 3104 /*NOTREACHED*/ 3105 } 3106 3107 immu_flags |= dma_to_immu_flags(dmareq); 3108 3109 3110 immu = immu_dvma_get_immu(rdip, immu_flags); 3111 if (immu == NULL) { 3112 /* 3113 * possible that there is no IOMMU unit for this device 3114 * - BIOS bugs are one example. 3115 */ 3116 ddi_err(DER_WARN, rdip, "No IMMU unit found for device"); 3117 return (DDI_DMA_NORESOURCES); 3118 } 3119 3120 3121 /* 3122 * redirect isa devices attached under lpc to lpc dip 3123 */ 3124 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) { 3125 rdip = get_lpc_devinfo(immu, rdip, immu_flags); 3126 if (rdip == NULL) { 3127 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3128 /*NOTREACHED*/ 3129 } 3130 } 3131 3132 /* Reset immu, as redirection can change IMMU */ 3133 immu = NULL; 3134 3135 /* 3136 * for gart, redirect to the real graphic devinfo 3137 */ 3138 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) { 3139 rdip = get_gfx_devinfo(rdip); 3140 if (rdip == NULL) { 3141 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3142 /*NOTREACHED*/ 3143 } 3144 } 3145 3146 /* 3147 * Setup DVMA domain for the device. This does 3148 * work only the first time we do DVMA for a 3149 * device. 3150 */ 3151 ddip = NULL; 3152 domain = device_domain(rdip, &ddip, immu_flags); 3153 if (domain == NULL) { 3154 ASSERT(ddip == NULL); 3155 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device"); 3156 return (DDI_DMA_NORESOURCES); 3157 } 3158 3159 /* 3160 * If a domain is found, we must also have a domain dip 3161 * which is the topmost ancestor dip of rdip that shares 3162 * the same domain with rdip. 3163 */ 3164 if (domain->dom_did == 0 || ddip == NULL) { 3165 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)", 3166 domain->dom_did, ddip); 3167 return (DDI_DMA_NORESOURCES); 3168 } 3169 3170 immu = domain->dom_immu; 3171 ASSERT(immu); 3172 if (domain->dom_did == IMMU_UNITY_DID) { 3173 ASSERT(domain == immu->immu_unity_domain); 3174 /* mapping already done. Let rootnex create cookies */ 3175 r = DDI_DMA_USE_PHYSICAL; 3176 } else if (immu_flags & IMMU_FLAGS_DMAHDL) { 3177 3178 /* if we have a DMA handle, the IOMMUs must be running */ 3179 ASSERT(immu->immu_regs_running == B_TRUE); 3180 ASSERT(immu->immu_dvma_running == B_TRUE); 3181 3182 attr = &hp->dmai_attr; 3183 if (attr == NULL) { 3184 ddi_err(DER_PANIC, rdip, 3185 "DMA handle (%p): NULL attr", hp); 3186 /*NOTREACHED*/ 3187 } 3188 3189 if (cookie_create(hp, dmareq, attr, immu, domain, rdip, 3190 prealloc_count, immu_flags) != DDI_SUCCESS) { 3191 ddi_err(DER_MODE, rdip, "dvcookie_alloc: failed"); 3192 return (DDI_DMA_NORESOURCES); 3193 } 3194 r = DDI_DMA_MAPPED; 3195 } else if (immu_flags & IMMU_FLAGS_MEMRNG) { 3196 dcookies[0].dck_paddr = mrng->mrng_start; 3197 dcookies[0].dck_npages = mrng->mrng_npages; 3198 dcount = 1; 3199 pde_set = dvma_map(immu, domain, mrng->mrng_start, 3200 mrng->mrng_npages, dcookies, dcount, rdip, immu_flags); 3201 immu_regs_iotlb_flush(immu, domain->dom_did, mrng->mrng_start, 3202 mrng->mrng_npages, pde_set == B_TRUE ? 3203 TLB_IVA_WHOLE : TLB_IVA_LEAF, IOTLB_PSI); 3204 r = DDI_DMA_MAPPED; 3205 } else { 3206 ddi_err(DER_PANIC, rdip, "invalid flags for immu_dvma_map()"); 3207 /*NOTREACHED*/ 3208 } 3209 3210 /* 3211 * Update the root and context entries 3212 */ 3213 if (immu_context_update(immu, domain, ddip, rdip, immu_flags) 3214 != DDI_SUCCESS) { 3215 ddi_err(DER_MODE, rdip, "DVMA map: context update failed"); 3216 return (DDI_DMA_NORESOURCES); 3217 } 3218 3219 immu_regs_wbf_flush(immu); 3220 3221 return (r); 3222 } 3223 3224 int 3225 immu_dvma_unmap(ddi_dma_impl_t *hp, dev_info_t *rdip) 3226 { 3227 ddi_dma_attr_t *attr; 3228 rootnex_dma_t *dma; 3229 domain_t *domain; 3230 immu_t *immu; 3231 dev_info_t *ddip; 3232 immu_flags_t immu_flags; 3233 3234 ASSERT(immu_enable == B_TRUE); 3235 ASSERT(immu_running == B_TRUE); 3236 ASSERT(hp); 3237 3238 /* 3239 * Intel IOMMU will only be turned on if IOMMU 3240 * page size is same as MMU page size 3241 */ 3242 /*LINTED*/ 3243 ASSERT(MMU_PAGESIZE == IMMU_PAGESIZE); 3244 3245 /* rdip need not be attached */ 3246 if (rdip == NULL) { 3247 ddi_err(DER_PANIC, rdip, "DVMA unmap: No device specified"); 3248 return (DDI_DMA_NORESOURCES); 3249 } 3250 3251 /* 3252 * Get the device domain, this should always 3253 * succeed since there had to be a domain to 3254 * setup DVMA. 3255 */ 3256 dma = (rootnex_dma_t *)hp->dmai_private; 3257 attr = &hp->dmai_attr; 3258 if (attr == NULL) { 3259 ddi_err(DER_PANIC, rdip, "DMA handle (%p) has NULL attr", hp); 3260 /*NOTREACHED*/ 3261 } 3262 immu_flags = dma->dp_sleep_flags; 3263 3264 immu = immu_dvma_get_immu(rdip, immu_flags); 3265 if (immu == NULL) { 3266 /* 3267 * possible that there is no IOMMU unit for this device 3268 * - BIOS bugs are one example. 3269 */ 3270 ddi_err(DER_WARN, rdip, "No IMMU unit found for device"); 3271 return (DDI_DMA_NORESOURCES); 3272 } 3273 3274 3275 /* 3276 * redirect isa devices attached under lpc to lpc dip 3277 */ 3278 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) { 3279 rdip = get_lpc_devinfo(immu, rdip, immu_flags); 3280 if (rdip == NULL) { 3281 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3282 /*NOTREACHED*/ 3283 } 3284 } 3285 3286 /* Reset immu, as redirection can change IMMU */ 3287 immu = NULL; 3288 3289 /* 3290 * for gart, redirect to the real graphic devinfo 3291 */ 3292 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) { 3293 rdip = get_gfx_devinfo(rdip); 3294 if (rdip == NULL) { 3295 ddi_err(DER_PANIC, rdip, "IMMU redirect failed"); 3296 /*NOTREACHED*/ 3297 } 3298 } 3299 3300 ddip = NULL; 3301 domain = device_domain(rdip, &ddip, immu_flags); 3302 if (domain == NULL || domain->dom_did == 0 || ddip == NULL) { 3303 ddi_err(DER_MODE, rdip, "Attempt to unmap DVMA for " 3304 "a device without domain or with an uninitialized " 3305 "domain"); 3306 return (DDI_DMA_NORESOURCES); 3307 } 3308 3309 /* 3310 * immu must be set in the domain. 3311 */ 3312 immu = domain->dom_immu; 3313 ASSERT(immu); 3314 if (domain->dom_did == IMMU_UNITY_DID) { 3315 ASSERT(domain == immu->immu_unity_domain); 3316 /* 3317 * domain is unity, nothing to do here, let the rootnex 3318 * code free the cookies. 3319 */ 3320 return (DDI_DMA_USE_PHYSICAL); 3321 } 3322 3323 dma = hp->dmai_private; 3324 if (dma == NULL) { 3325 ddi_err(DER_PANIC, rdip, "DVMA unmap: DMA handle (%p) has " 3326 "no private dma structure", hp); 3327 /*NOTREACHED*/ 3328 } 3329 3330 cookie_free(dma, immu, domain, rdip); 3331 3332 /* No invalidation needed for unmap */ 3333 immu_regs_wbf_flush(immu); 3334 3335 return (DDI_SUCCESS); 3336 } 3337 3338 immu_devi_t * 3339 immu_devi_get(dev_info_t *rdip) 3340 { 3341 immu_devi_t *immu_devi; 3342 volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu); 3343 3344 /* Just want atomic reads. No need for lock */ 3345 immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr, 3346 0); 3347 return (immu_devi); 3348 } 3349