1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2006, Intel Corporation. 4 * 5 * Copyright (C) 2006-2008 Intel Corporation 6 * Author: Ashok Raj <ashok.raj@intel.com> 7 * Author: Shaohua Li <shaohua.li@intel.com> 8 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 9 * 10 * This file implements early detection/parsing of Remapping Devices 11 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI 12 * tables. 13 * 14 * These routines are used by both DMA-remapping and Interrupt-remapping 15 */ 16 17 #define pr_fmt(fmt) "DMAR: " fmt 18 19 #include <linux/pci.h> 20 #include <linux/dmar.h> 21 #include <linux/iova.h> 22 #include <linux/intel-iommu.h> 23 #include <linux/timer.h> 24 #include <linux/irq.h> 25 #include <linux/interrupt.h> 26 #include <linux/tboot.h> 27 #include <linux/dmi.h> 28 #include <linux/slab.h> 29 #include <linux/iommu.h> 30 #include <linux/numa.h> 31 #include <linux/limits.h> 32 #include <asm/irq_remapping.h> 33 #include <asm/iommu_table.h> 34 #include <trace/events/intel_iommu.h> 35 36 #include "../irq_remapping.h" 37 #include "perf.h" 38 39 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); 40 struct dmar_res_callback { 41 dmar_res_handler_t cb[ACPI_DMAR_TYPE_RESERVED]; 42 void *arg[ACPI_DMAR_TYPE_RESERVED]; 43 bool ignore_unhandled; 44 bool print_entry; 45 }; 46 47 /* 48 * Assumptions: 49 * 1) The hotplug framework guarentees that DMAR unit will be hot-added 50 * before IO devices managed by that unit. 51 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed 52 * after IO devices managed by that unit. 53 * 3) Hotplug events are rare. 54 * 55 * Locking rules for DMA and interrupt remapping related global data structures: 56 * 1) Use dmar_global_lock in process context 57 * 2) Use RCU in interrupt context 58 */ 59 DECLARE_RWSEM(dmar_global_lock); 60 LIST_HEAD(dmar_drhd_units); 61 62 struct acpi_table_header * __initdata dmar_tbl; 63 static int dmar_dev_scope_status = 1; 64 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)]; 65 66 static int alloc_iommu(struct dmar_drhd_unit *drhd); 67 static void free_iommu(struct intel_iommu *iommu); 68 69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) 70 { 71 /* 72 * add INCLUDE_ALL at the tail, so scan the list will find it at 73 * the very end. 74 */ 75 if (drhd->include_all) 76 list_add_tail_rcu(&drhd->list, &dmar_drhd_units); 77 else 78 list_add_rcu(&drhd->list, &dmar_drhd_units); 79 } 80 81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) 82 { 83 struct acpi_dmar_device_scope *scope; 84 85 *cnt = 0; 86 while (start < end) { 87 scope = start; 88 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE || 89 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || 90 scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) 91 (*cnt)++; 92 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC && 93 scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) { 94 pr_warn("Unsupported device scope\n"); 95 } 96 start += scope->length; 97 } 98 if (*cnt == 0) 99 return NULL; 100 101 return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL); 102 } 103 104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt) 105 { 106 int i; 107 struct device *tmp_dev; 108 109 if (*devices && *cnt) { 110 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev) 111 put_device(tmp_dev); 112 kfree(*devices); 113 } 114 115 *devices = NULL; 116 *cnt = 0; 117 } 118 119 /* Optimize out kzalloc()/kfree() for normal cases */ 120 static char dmar_pci_notify_info_buf[64]; 121 122 static struct dmar_pci_notify_info * 123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) 124 { 125 int level = 0; 126 size_t size; 127 struct pci_dev *tmp; 128 struct dmar_pci_notify_info *info; 129 130 BUG_ON(dev->is_virtfn); 131 132 /* 133 * Ignore devices that have a domain number higher than what can 134 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 135 */ 136 if (pci_domain_nr(dev->bus) > U16_MAX) 137 return NULL; 138 139 /* Only generate path[] for device addition event */ 140 if (event == BUS_NOTIFY_ADD_DEVICE) 141 for (tmp = dev; tmp; tmp = tmp->bus->self) 142 level++; 143 144 size = struct_size(info, path, level); 145 if (size <= sizeof(dmar_pci_notify_info_buf)) { 146 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; 147 } else { 148 info = kzalloc(size, GFP_KERNEL); 149 if (!info) { 150 if (dmar_dev_scope_status == 0) 151 dmar_dev_scope_status = -ENOMEM; 152 return NULL; 153 } 154 } 155 156 info->event = event; 157 info->dev = dev; 158 info->seg = pci_domain_nr(dev->bus); 159 info->level = level; 160 if (event == BUS_NOTIFY_ADD_DEVICE) { 161 for (tmp = dev; tmp; tmp = tmp->bus->self) { 162 level--; 163 info->path[level].bus = tmp->bus->number; 164 info->path[level].device = PCI_SLOT(tmp->devfn); 165 info->path[level].function = PCI_FUNC(tmp->devfn); 166 if (pci_is_root_bus(tmp->bus)) 167 info->bus = tmp->bus->number; 168 } 169 } 170 171 return info; 172 } 173 174 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info) 175 { 176 if ((void *)info != dmar_pci_notify_info_buf) 177 kfree(info); 178 } 179 180 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, 181 struct acpi_dmar_pci_path *path, int count) 182 { 183 int i; 184 185 if (info->bus != bus) 186 goto fallback; 187 if (info->level != count) 188 goto fallback; 189 190 for (i = 0; i < count; i++) { 191 if (path[i].device != info->path[i].device || 192 path[i].function != info->path[i].function) 193 goto fallback; 194 } 195 196 return true; 197 198 fallback: 199 200 if (count != 1) 201 return false; 202 203 i = info->level - 1; 204 if (bus == info->path[i].bus && 205 path[0].device == info->path[i].device && 206 path[0].function == info->path[i].function) { 207 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n", 208 bus, path[0].device, path[0].function); 209 return true; 210 } 211 212 return false; 213 } 214 215 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */ 216 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, 217 void *start, void*end, u16 segment, 218 struct dmar_dev_scope *devices, 219 int devices_cnt) 220 { 221 int i, level; 222 struct device *tmp, *dev = &info->dev->dev; 223 struct acpi_dmar_device_scope *scope; 224 struct acpi_dmar_pci_path *path; 225 226 if (segment != info->seg) 227 return 0; 228 229 for (; start < end; start += scope->length) { 230 scope = start; 231 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && 232 scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE) 233 continue; 234 235 path = (struct acpi_dmar_pci_path *)(scope + 1); 236 level = (scope->length - sizeof(*scope)) / sizeof(*path); 237 if (!dmar_match_pci_path(info, scope->bus, path, level)) 238 continue; 239 240 /* 241 * We expect devices with endpoint scope to have normal PCI 242 * headers, and devices with bridge scope to have bridge PCI 243 * headers. However PCI NTB devices may be listed in the 244 * DMAR table with bridge scope, even though they have a 245 * normal PCI header. NTB devices are identified by class 246 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch 247 * for this special case. 248 */ 249 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && 250 info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) || 251 (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE && 252 (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL && 253 info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) { 254 pr_warn("Device scope type does not match for %s\n", 255 pci_name(info->dev)); 256 return -EINVAL; 257 } 258 259 for_each_dev_scope(devices, devices_cnt, i, tmp) 260 if (tmp == NULL) { 261 devices[i].bus = info->dev->bus->number; 262 devices[i].devfn = info->dev->devfn; 263 rcu_assign_pointer(devices[i].dev, 264 get_device(dev)); 265 return 1; 266 } 267 BUG_ON(i >= devices_cnt); 268 } 269 270 return 0; 271 } 272 273 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, 274 struct dmar_dev_scope *devices, int count) 275 { 276 int index; 277 struct device *tmp; 278 279 if (info->seg != segment) 280 return 0; 281 282 for_each_active_dev_scope(devices, count, index, tmp) 283 if (tmp == &info->dev->dev) { 284 RCU_INIT_POINTER(devices[index].dev, NULL); 285 synchronize_rcu(); 286 put_device(tmp); 287 return 1; 288 } 289 290 return 0; 291 } 292 293 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) 294 { 295 int ret = 0; 296 struct dmar_drhd_unit *dmaru; 297 struct acpi_dmar_hardware_unit *drhd; 298 299 for_each_drhd_unit(dmaru) { 300 if (dmaru->include_all) 301 continue; 302 303 drhd = container_of(dmaru->hdr, 304 struct acpi_dmar_hardware_unit, header); 305 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1), 306 ((void *)drhd) + drhd->header.length, 307 dmaru->segment, 308 dmaru->devices, dmaru->devices_cnt); 309 if (ret) 310 break; 311 } 312 if (ret >= 0) 313 ret = dmar_iommu_notify_scope_dev(info); 314 if (ret < 0 && dmar_dev_scope_status == 0) 315 dmar_dev_scope_status = ret; 316 317 if (ret >= 0) 318 intel_irq_remap_add_device(info); 319 320 return ret; 321 } 322 323 static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) 324 { 325 struct dmar_drhd_unit *dmaru; 326 327 for_each_drhd_unit(dmaru) 328 if (dmar_remove_dev_scope(info, dmaru->segment, 329 dmaru->devices, dmaru->devices_cnt)) 330 break; 331 dmar_iommu_notify_scope_dev(info); 332 } 333 334 static inline void vf_inherit_msi_domain(struct pci_dev *pdev) 335 { 336 struct pci_dev *physfn = pci_physfn(pdev); 337 338 dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev)); 339 } 340 341 static int dmar_pci_bus_notifier(struct notifier_block *nb, 342 unsigned long action, void *data) 343 { 344 struct pci_dev *pdev = to_pci_dev(data); 345 struct dmar_pci_notify_info *info; 346 347 /* Only care about add/remove events for physical functions. 348 * For VFs we actually do the lookup based on the corresponding 349 * PF in device_to_iommu() anyway. */ 350 if (pdev->is_virtfn) { 351 /* 352 * Ensure that the VF device inherits the irq domain of the 353 * PF device. Ideally the device would inherit the domain 354 * from the bus, but DMAR can have multiple units per bus 355 * which makes this impossible. The VF 'bus' could inherit 356 * from the PF device, but that's yet another x86'sism to 357 * inflict on everybody else. 358 */ 359 if (action == BUS_NOTIFY_ADD_DEVICE) 360 vf_inherit_msi_domain(pdev); 361 return NOTIFY_DONE; 362 } 363 364 if (action != BUS_NOTIFY_ADD_DEVICE && 365 action != BUS_NOTIFY_REMOVED_DEVICE) 366 return NOTIFY_DONE; 367 368 info = dmar_alloc_pci_notify_info(pdev, action); 369 if (!info) 370 return NOTIFY_DONE; 371 372 down_write(&dmar_global_lock); 373 if (action == BUS_NOTIFY_ADD_DEVICE) 374 dmar_pci_bus_add_dev(info); 375 else if (action == BUS_NOTIFY_REMOVED_DEVICE) 376 dmar_pci_bus_del_dev(info); 377 up_write(&dmar_global_lock); 378 379 dmar_free_pci_notify_info(info); 380 381 return NOTIFY_OK; 382 } 383 384 static struct notifier_block dmar_pci_bus_nb = { 385 .notifier_call = dmar_pci_bus_notifier, 386 .priority = INT_MIN, 387 }; 388 389 static struct dmar_drhd_unit * 390 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd) 391 { 392 struct dmar_drhd_unit *dmaru; 393 394 list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list, 395 dmar_rcu_check()) 396 if (dmaru->segment == drhd->segment && 397 dmaru->reg_base_addr == drhd->address) 398 return dmaru; 399 400 return NULL; 401 } 402 403 /* 404 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition 405 * structure which uniquely represent one DMA remapping hardware unit 406 * present in the platform 407 */ 408 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) 409 { 410 struct acpi_dmar_hardware_unit *drhd; 411 struct dmar_drhd_unit *dmaru; 412 int ret; 413 414 drhd = (struct acpi_dmar_hardware_unit *)header; 415 dmaru = dmar_find_dmaru(drhd); 416 if (dmaru) 417 goto out; 418 419 dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL); 420 if (!dmaru) 421 return -ENOMEM; 422 423 /* 424 * If header is allocated from slab by ACPI _DSM method, we need to 425 * copy the content because the memory buffer will be freed on return. 426 */ 427 dmaru->hdr = (void *)(dmaru + 1); 428 memcpy(dmaru->hdr, header, header->length); 429 dmaru->reg_base_addr = drhd->address; 430 dmaru->segment = drhd->segment; 431 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ 432 dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), 433 ((void *)drhd) + drhd->header.length, 434 &dmaru->devices_cnt); 435 if (dmaru->devices_cnt && dmaru->devices == NULL) { 436 kfree(dmaru); 437 return -ENOMEM; 438 } 439 440 ret = alloc_iommu(dmaru); 441 if (ret) { 442 dmar_free_dev_scope(&dmaru->devices, 443 &dmaru->devices_cnt); 444 kfree(dmaru); 445 return ret; 446 } 447 dmar_register_drhd_unit(dmaru); 448 449 out: 450 if (arg) 451 (*(int *)arg)++; 452 453 return 0; 454 } 455 456 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru) 457 { 458 if (dmaru->devices && dmaru->devices_cnt) 459 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt); 460 if (dmaru->iommu) 461 free_iommu(dmaru->iommu); 462 kfree(dmaru); 463 } 464 465 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, 466 void *arg) 467 { 468 struct acpi_dmar_andd *andd = (void *)header; 469 470 /* Check for NUL termination within the designated length */ 471 if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { 472 pr_warn(FW_BUG 473 "Your BIOS is broken; ANDD object name is not NUL-terminated\n" 474 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 475 dmi_get_system_info(DMI_BIOS_VENDOR), 476 dmi_get_system_info(DMI_BIOS_VERSION), 477 dmi_get_system_info(DMI_PRODUCT_VERSION)); 478 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 479 return -EINVAL; 480 } 481 pr_info("ANDD device: %x name: %s\n", andd->device_number, 482 andd->device_name); 483 484 return 0; 485 } 486 487 #ifdef CONFIG_ACPI_NUMA 488 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) 489 { 490 struct acpi_dmar_rhsa *rhsa; 491 struct dmar_drhd_unit *drhd; 492 493 rhsa = (struct acpi_dmar_rhsa *)header; 494 for_each_drhd_unit(drhd) { 495 if (drhd->reg_base_addr == rhsa->base_address) { 496 int node = pxm_to_node(rhsa->proximity_domain); 497 498 if (!node_online(node)) 499 node = NUMA_NO_NODE; 500 drhd->iommu->node = node; 501 return 0; 502 } 503 } 504 pr_warn(FW_BUG 505 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" 506 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 507 rhsa->base_address, 508 dmi_get_system_info(DMI_BIOS_VENDOR), 509 dmi_get_system_info(DMI_BIOS_VERSION), 510 dmi_get_system_info(DMI_PRODUCT_VERSION)); 511 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 512 513 return 0; 514 } 515 #else 516 #define dmar_parse_one_rhsa dmar_res_noop 517 #endif 518 519 static void 520 dmar_table_print_dmar_entry(struct acpi_dmar_header *header) 521 { 522 struct acpi_dmar_hardware_unit *drhd; 523 struct acpi_dmar_reserved_memory *rmrr; 524 struct acpi_dmar_atsr *atsr; 525 struct acpi_dmar_rhsa *rhsa; 526 struct acpi_dmar_satc *satc; 527 528 switch (header->type) { 529 case ACPI_DMAR_TYPE_HARDWARE_UNIT: 530 drhd = container_of(header, struct acpi_dmar_hardware_unit, 531 header); 532 pr_info("DRHD base: %#016Lx flags: %#x\n", 533 (unsigned long long)drhd->address, drhd->flags); 534 break; 535 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 536 rmrr = container_of(header, struct acpi_dmar_reserved_memory, 537 header); 538 pr_info("RMRR base: %#016Lx end: %#016Lx\n", 539 (unsigned long long)rmrr->base_address, 540 (unsigned long long)rmrr->end_address); 541 break; 542 case ACPI_DMAR_TYPE_ROOT_ATS: 543 atsr = container_of(header, struct acpi_dmar_atsr, header); 544 pr_info("ATSR flags: %#x\n", atsr->flags); 545 break; 546 case ACPI_DMAR_TYPE_HARDWARE_AFFINITY: 547 rhsa = container_of(header, struct acpi_dmar_rhsa, header); 548 pr_info("RHSA base: %#016Lx proximity domain: %#x\n", 549 (unsigned long long)rhsa->base_address, 550 rhsa->proximity_domain); 551 break; 552 case ACPI_DMAR_TYPE_NAMESPACE: 553 /* We don't print this here because we need to sanity-check 554 it first. So print it in dmar_parse_one_andd() instead. */ 555 break; 556 case ACPI_DMAR_TYPE_SATC: 557 satc = container_of(header, struct acpi_dmar_satc, header); 558 pr_info("SATC flags: 0x%x\n", satc->flags); 559 break; 560 } 561 } 562 563 /** 564 * dmar_table_detect - checks to see if the platform supports DMAR devices 565 */ 566 static int __init dmar_table_detect(void) 567 { 568 acpi_status status = AE_OK; 569 570 /* if we could find DMAR table, then there are DMAR devices */ 571 status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); 572 573 if (ACPI_SUCCESS(status) && !dmar_tbl) { 574 pr_warn("Unable to map DMAR\n"); 575 status = AE_NOT_FOUND; 576 } 577 578 return ACPI_SUCCESS(status) ? 0 : -ENOENT; 579 } 580 581 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, 582 size_t len, struct dmar_res_callback *cb) 583 { 584 struct acpi_dmar_header *iter, *next; 585 struct acpi_dmar_header *end = ((void *)start) + len; 586 587 for (iter = start; iter < end; iter = next) { 588 next = (void *)iter + iter->length; 589 if (iter->length == 0) { 590 /* Avoid looping forever on bad ACPI tables */ 591 pr_debug(FW_BUG "Invalid 0-length structure\n"); 592 break; 593 } else if (next > end) { 594 /* Avoid passing table end */ 595 pr_warn(FW_BUG "Record passes table end\n"); 596 return -EINVAL; 597 } 598 599 if (cb->print_entry) 600 dmar_table_print_dmar_entry(iter); 601 602 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) { 603 /* continue for forward compatibility */ 604 pr_debug("Unknown DMAR structure type %d\n", 605 iter->type); 606 } else if (cb->cb[iter->type]) { 607 int ret; 608 609 ret = cb->cb[iter->type](iter, cb->arg[iter->type]); 610 if (ret) 611 return ret; 612 } else if (!cb->ignore_unhandled) { 613 pr_warn("No handler for DMAR structure type %d\n", 614 iter->type); 615 return -EINVAL; 616 } 617 } 618 619 return 0; 620 } 621 622 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar, 623 struct dmar_res_callback *cb) 624 { 625 return dmar_walk_remapping_entries((void *)(dmar + 1), 626 dmar->header.length - sizeof(*dmar), cb); 627 } 628 629 /** 630 * parse_dmar_table - parses the DMA reporting table 631 */ 632 static int __init 633 parse_dmar_table(void) 634 { 635 struct acpi_table_dmar *dmar; 636 int drhd_count = 0; 637 int ret; 638 struct dmar_res_callback cb = { 639 .print_entry = true, 640 .ignore_unhandled = true, 641 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count, 642 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd, 643 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr, 644 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr, 645 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa, 646 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd, 647 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc, 648 }; 649 650 /* 651 * Do it again, earlier dmar_tbl mapping could be mapped with 652 * fixed map. 653 */ 654 dmar_table_detect(); 655 656 /* 657 * ACPI tables may not be DMA protected by tboot, so use DMAR copy 658 * SINIT saved in SinitMleData in TXT heap (which is DMA protected) 659 */ 660 dmar_tbl = tboot_get_dmar_table(dmar_tbl); 661 662 dmar = (struct acpi_table_dmar *)dmar_tbl; 663 if (!dmar) 664 return -ENODEV; 665 666 if (dmar->width < PAGE_SHIFT - 1) { 667 pr_warn("Invalid DMAR haw\n"); 668 return -EINVAL; 669 } 670 671 pr_info("Host address width %d\n", dmar->width + 1); 672 ret = dmar_walk_dmar_table(dmar, &cb); 673 if (ret == 0 && drhd_count == 0) 674 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n"); 675 676 return ret; 677 } 678 679 static int dmar_pci_device_match(struct dmar_dev_scope devices[], 680 int cnt, struct pci_dev *dev) 681 { 682 int index; 683 struct device *tmp; 684 685 while (dev) { 686 for_each_active_dev_scope(devices, cnt, index, tmp) 687 if (dev_is_pci(tmp) && dev == to_pci_dev(tmp)) 688 return 1; 689 690 /* Check our parent */ 691 dev = dev->bus->self; 692 } 693 694 return 0; 695 } 696 697 struct dmar_drhd_unit * 698 dmar_find_matched_drhd_unit(struct pci_dev *dev) 699 { 700 struct dmar_drhd_unit *dmaru; 701 struct acpi_dmar_hardware_unit *drhd; 702 703 dev = pci_physfn(dev); 704 705 rcu_read_lock(); 706 for_each_drhd_unit(dmaru) { 707 drhd = container_of(dmaru->hdr, 708 struct acpi_dmar_hardware_unit, 709 header); 710 711 if (dmaru->include_all && 712 drhd->segment == pci_domain_nr(dev->bus)) 713 goto out; 714 715 if (dmar_pci_device_match(dmaru->devices, 716 dmaru->devices_cnt, dev)) 717 goto out; 718 } 719 dmaru = NULL; 720 out: 721 rcu_read_unlock(); 722 723 return dmaru; 724 } 725 726 static void __init dmar_acpi_insert_dev_scope(u8 device_number, 727 struct acpi_device *adev) 728 { 729 struct dmar_drhd_unit *dmaru; 730 struct acpi_dmar_hardware_unit *drhd; 731 struct acpi_dmar_device_scope *scope; 732 struct device *tmp; 733 int i; 734 struct acpi_dmar_pci_path *path; 735 736 for_each_drhd_unit(dmaru) { 737 drhd = container_of(dmaru->hdr, 738 struct acpi_dmar_hardware_unit, 739 header); 740 741 for (scope = (void *)(drhd + 1); 742 (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length; 743 scope = ((void *)scope) + scope->length) { 744 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE) 745 continue; 746 if (scope->enumeration_id != device_number) 747 continue; 748 749 path = (void *)(scope + 1); 750 pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n", 751 dev_name(&adev->dev), dmaru->reg_base_addr, 752 scope->bus, path->device, path->function); 753 for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp) 754 if (tmp == NULL) { 755 dmaru->devices[i].bus = scope->bus; 756 dmaru->devices[i].devfn = PCI_DEVFN(path->device, 757 path->function); 758 rcu_assign_pointer(dmaru->devices[i].dev, 759 get_device(&adev->dev)); 760 return; 761 } 762 BUG_ON(i >= dmaru->devices_cnt); 763 } 764 } 765 pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n", 766 device_number, dev_name(&adev->dev)); 767 } 768 769 static int __init dmar_acpi_dev_scope_init(void) 770 { 771 struct acpi_dmar_andd *andd; 772 773 if (dmar_tbl == NULL) 774 return -ENODEV; 775 776 for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); 777 ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length; 778 andd = ((void *)andd) + andd->header.length) { 779 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) { 780 acpi_handle h; 781 struct acpi_device *adev; 782 783 if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT, 784 andd->device_name, 785 &h))) { 786 pr_err("Failed to find handle for ACPI object %s\n", 787 andd->device_name); 788 continue; 789 } 790 adev = acpi_fetch_acpi_dev(h); 791 if (!adev) { 792 pr_err("Failed to get device for ACPI object %s\n", 793 andd->device_name); 794 continue; 795 } 796 dmar_acpi_insert_dev_scope(andd->device_number, adev); 797 } 798 } 799 return 0; 800 } 801 802 int __init dmar_dev_scope_init(void) 803 { 804 struct pci_dev *dev = NULL; 805 struct dmar_pci_notify_info *info; 806 807 if (dmar_dev_scope_status != 1) 808 return dmar_dev_scope_status; 809 810 if (list_empty(&dmar_drhd_units)) { 811 dmar_dev_scope_status = -ENODEV; 812 } else { 813 dmar_dev_scope_status = 0; 814 815 dmar_acpi_dev_scope_init(); 816 817 for_each_pci_dev(dev) { 818 if (dev->is_virtfn) 819 continue; 820 821 info = dmar_alloc_pci_notify_info(dev, 822 BUS_NOTIFY_ADD_DEVICE); 823 if (!info) { 824 return dmar_dev_scope_status; 825 } else { 826 dmar_pci_bus_add_dev(info); 827 dmar_free_pci_notify_info(info); 828 } 829 } 830 } 831 832 return dmar_dev_scope_status; 833 } 834 835 void __init dmar_register_bus_notifier(void) 836 { 837 bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); 838 } 839 840 841 int __init dmar_table_init(void) 842 { 843 static int dmar_table_initialized; 844 int ret; 845 846 if (dmar_table_initialized == 0) { 847 ret = parse_dmar_table(); 848 if (ret < 0) { 849 if (ret != -ENODEV) 850 pr_info("Parse DMAR table failure.\n"); 851 } else if (list_empty(&dmar_drhd_units)) { 852 pr_info("No DMAR devices found\n"); 853 ret = -ENODEV; 854 } 855 856 if (ret < 0) 857 dmar_table_initialized = ret; 858 else 859 dmar_table_initialized = 1; 860 } 861 862 return dmar_table_initialized < 0 ? dmar_table_initialized : 0; 863 } 864 865 static void warn_invalid_dmar(u64 addr, const char *message) 866 { 867 pr_warn_once(FW_BUG 868 "Your BIOS is broken; DMAR reported at address %llx%s!\n" 869 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 870 addr, message, 871 dmi_get_system_info(DMI_BIOS_VENDOR), 872 dmi_get_system_info(DMI_BIOS_VERSION), 873 dmi_get_system_info(DMI_PRODUCT_VERSION)); 874 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); 875 } 876 877 static int __ref 878 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg) 879 { 880 struct acpi_dmar_hardware_unit *drhd; 881 void __iomem *addr; 882 u64 cap, ecap; 883 884 drhd = (void *)entry; 885 if (!drhd->address) { 886 warn_invalid_dmar(0, ""); 887 return -EINVAL; 888 } 889 890 if (arg) 891 addr = ioremap(drhd->address, VTD_PAGE_SIZE); 892 else 893 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE); 894 if (!addr) { 895 pr_warn("Can't validate DRHD address: %llx\n", drhd->address); 896 return -EINVAL; 897 } 898 899 cap = dmar_readq(addr + DMAR_CAP_REG); 900 ecap = dmar_readq(addr + DMAR_ECAP_REG); 901 902 if (arg) 903 iounmap(addr); 904 else 905 early_iounmap(addr, VTD_PAGE_SIZE); 906 907 if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) { 908 warn_invalid_dmar(drhd->address, " returns all ones"); 909 return -EINVAL; 910 } 911 912 return 0; 913 } 914 915 int __init detect_intel_iommu(void) 916 { 917 int ret; 918 struct dmar_res_callback validate_drhd_cb = { 919 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd, 920 .ignore_unhandled = true, 921 }; 922 923 down_write(&dmar_global_lock); 924 ret = dmar_table_detect(); 925 if (!ret) 926 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, 927 &validate_drhd_cb); 928 if (!ret && !no_iommu && !iommu_detected && 929 (!dmar_disabled || dmar_platform_optin())) { 930 iommu_detected = 1; 931 /* Make sure ACS will be enabled */ 932 pci_request_acs(); 933 } 934 935 #ifdef CONFIG_X86 936 if (!ret) { 937 x86_init.iommu.iommu_init = intel_iommu_init; 938 x86_platform.iommu_shutdown = intel_iommu_shutdown; 939 } 940 941 #endif 942 943 if (dmar_tbl) { 944 acpi_put_table(dmar_tbl); 945 dmar_tbl = NULL; 946 } 947 up_write(&dmar_global_lock); 948 949 return ret ? ret : 1; 950 } 951 952 static void unmap_iommu(struct intel_iommu *iommu) 953 { 954 iounmap(iommu->reg); 955 release_mem_region(iommu->reg_phys, iommu->reg_size); 956 } 957 958 /** 959 * map_iommu: map the iommu's registers 960 * @iommu: the iommu to map 961 * @phys_addr: the physical address of the base resgister 962 * 963 * Memory map the iommu's registers. Start w/ a single page, and 964 * possibly expand if that turns out to be insufficent. 965 */ 966 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) 967 { 968 int map_size, err=0; 969 970 iommu->reg_phys = phys_addr; 971 iommu->reg_size = VTD_PAGE_SIZE; 972 973 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { 974 pr_err("Can't reserve memory\n"); 975 err = -EBUSY; 976 goto out; 977 } 978 979 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); 980 if (!iommu->reg) { 981 pr_err("Can't map the region\n"); 982 err = -ENOMEM; 983 goto release; 984 } 985 986 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); 987 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); 988 989 if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { 990 err = -EINVAL; 991 warn_invalid_dmar(phys_addr, " returns all ones"); 992 goto unmap; 993 } 994 if (ecap_vcs(iommu->ecap)) 995 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); 996 997 /* the registers might be more than one page */ 998 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), 999 cap_max_fault_reg_offset(iommu->cap)); 1000 map_size = VTD_PAGE_ALIGN(map_size); 1001 if (map_size > iommu->reg_size) { 1002 iounmap(iommu->reg); 1003 release_mem_region(iommu->reg_phys, iommu->reg_size); 1004 iommu->reg_size = map_size; 1005 if (!request_mem_region(iommu->reg_phys, iommu->reg_size, 1006 iommu->name)) { 1007 pr_err("Can't reserve memory\n"); 1008 err = -EBUSY; 1009 goto out; 1010 } 1011 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size); 1012 if (!iommu->reg) { 1013 pr_err("Can't map the region\n"); 1014 err = -ENOMEM; 1015 goto release; 1016 } 1017 } 1018 err = 0; 1019 goto out; 1020 1021 unmap: 1022 iounmap(iommu->reg); 1023 release: 1024 release_mem_region(iommu->reg_phys, iommu->reg_size); 1025 out: 1026 return err; 1027 } 1028 1029 static int dmar_alloc_seq_id(struct intel_iommu *iommu) 1030 { 1031 iommu->seq_id = find_first_zero_bit(dmar_seq_ids, 1032 DMAR_UNITS_SUPPORTED); 1033 if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) { 1034 iommu->seq_id = -1; 1035 } else { 1036 set_bit(iommu->seq_id, dmar_seq_ids); 1037 sprintf(iommu->name, "dmar%d", iommu->seq_id); 1038 } 1039 1040 return iommu->seq_id; 1041 } 1042 1043 static void dmar_free_seq_id(struct intel_iommu *iommu) 1044 { 1045 if (iommu->seq_id >= 0) { 1046 clear_bit(iommu->seq_id, dmar_seq_ids); 1047 iommu->seq_id = -1; 1048 } 1049 } 1050 1051 static int alloc_iommu(struct dmar_drhd_unit *drhd) 1052 { 1053 struct intel_iommu *iommu; 1054 u32 ver, sts; 1055 int agaw = -1; 1056 int msagaw = -1; 1057 int err; 1058 1059 if (!drhd->reg_base_addr) { 1060 warn_invalid_dmar(0, ""); 1061 return -EINVAL; 1062 } 1063 1064 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); 1065 if (!iommu) 1066 return -ENOMEM; 1067 1068 if (dmar_alloc_seq_id(iommu) < 0) { 1069 pr_err("Failed to allocate seq_id\n"); 1070 err = -ENOSPC; 1071 goto error; 1072 } 1073 1074 err = map_iommu(iommu, drhd->reg_base_addr); 1075 if (err) { 1076 pr_err("Failed to map %s\n", iommu->name); 1077 goto error_free_seq_id; 1078 } 1079 1080 err = -EINVAL; 1081 if (cap_sagaw(iommu->cap) == 0) { 1082 pr_info("%s: No supported address widths. Not attempting DMA translation.\n", 1083 iommu->name); 1084 drhd->ignored = 1; 1085 } 1086 1087 if (!drhd->ignored) { 1088 agaw = iommu_calculate_agaw(iommu); 1089 if (agaw < 0) { 1090 pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", 1091 iommu->seq_id); 1092 drhd->ignored = 1; 1093 } 1094 } 1095 if (!drhd->ignored) { 1096 msagaw = iommu_calculate_max_sagaw(iommu); 1097 if (msagaw < 0) { 1098 pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n", 1099 iommu->seq_id); 1100 drhd->ignored = 1; 1101 agaw = -1; 1102 } 1103 } 1104 iommu->agaw = agaw; 1105 iommu->msagaw = msagaw; 1106 iommu->segment = drhd->segment; 1107 1108 iommu->node = NUMA_NO_NODE; 1109 1110 ver = readl(iommu->reg + DMAR_VER_REG); 1111 pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", 1112 iommu->name, 1113 (unsigned long long)drhd->reg_base_addr, 1114 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), 1115 (unsigned long long)iommu->cap, 1116 (unsigned long long)iommu->ecap); 1117 1118 /* Reflect status in gcmd */ 1119 sts = readl(iommu->reg + DMAR_GSTS_REG); 1120 if (sts & DMA_GSTS_IRES) 1121 iommu->gcmd |= DMA_GCMD_IRE; 1122 if (sts & DMA_GSTS_TES) 1123 iommu->gcmd |= DMA_GCMD_TE; 1124 if (sts & DMA_GSTS_QIES) 1125 iommu->gcmd |= DMA_GCMD_QIE; 1126 1127 raw_spin_lock_init(&iommu->register_lock); 1128 1129 /* 1130 * This is only for hotplug; at boot time intel_iommu_enabled won't 1131 * be set yet. When intel_iommu_init() runs, it registers the units 1132 * present at boot time, then sets intel_iommu_enabled. 1133 */ 1134 if (intel_iommu_enabled && !drhd->ignored) { 1135 err = iommu_device_sysfs_add(&iommu->iommu, NULL, 1136 intel_iommu_groups, 1137 "%s", iommu->name); 1138 if (err) 1139 goto err_unmap; 1140 1141 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); 1142 if (err) 1143 goto err_sysfs; 1144 } 1145 1146 drhd->iommu = iommu; 1147 iommu->drhd = drhd; 1148 1149 return 0; 1150 1151 err_sysfs: 1152 iommu_device_sysfs_remove(&iommu->iommu); 1153 err_unmap: 1154 unmap_iommu(iommu); 1155 error_free_seq_id: 1156 dmar_free_seq_id(iommu); 1157 error: 1158 kfree(iommu); 1159 return err; 1160 } 1161 1162 static void free_iommu(struct intel_iommu *iommu) 1163 { 1164 if (intel_iommu_enabled && !iommu->drhd->ignored) { 1165 iommu_device_unregister(&iommu->iommu); 1166 iommu_device_sysfs_remove(&iommu->iommu); 1167 } 1168 1169 if (iommu->irq) { 1170 if (iommu->pr_irq) { 1171 free_irq(iommu->pr_irq, iommu); 1172 dmar_free_hwirq(iommu->pr_irq); 1173 iommu->pr_irq = 0; 1174 } 1175 free_irq(iommu->irq, iommu); 1176 dmar_free_hwirq(iommu->irq); 1177 iommu->irq = 0; 1178 } 1179 1180 if (iommu->qi) { 1181 free_page((unsigned long)iommu->qi->desc); 1182 kfree(iommu->qi->desc_status); 1183 kfree(iommu->qi); 1184 } 1185 1186 if (iommu->reg) 1187 unmap_iommu(iommu); 1188 1189 dmar_free_seq_id(iommu); 1190 kfree(iommu); 1191 } 1192 1193 /* 1194 * Reclaim all the submitted descriptors which have completed its work. 1195 */ 1196 static inline void reclaim_free_desc(struct q_inval *qi) 1197 { 1198 while (qi->desc_status[qi->free_tail] == QI_DONE || 1199 qi->desc_status[qi->free_tail] == QI_ABORT) { 1200 qi->desc_status[qi->free_tail] = QI_FREE; 1201 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; 1202 qi->free_cnt++; 1203 } 1204 } 1205 1206 static const char *qi_type_string(u8 type) 1207 { 1208 switch (type) { 1209 case QI_CC_TYPE: 1210 return "Context-cache Invalidation"; 1211 case QI_IOTLB_TYPE: 1212 return "IOTLB Invalidation"; 1213 case QI_DIOTLB_TYPE: 1214 return "Device-TLB Invalidation"; 1215 case QI_IEC_TYPE: 1216 return "Interrupt Entry Cache Invalidation"; 1217 case QI_IWD_TYPE: 1218 return "Invalidation Wait"; 1219 case QI_EIOTLB_TYPE: 1220 return "PASID-based IOTLB Invalidation"; 1221 case QI_PC_TYPE: 1222 return "PASID-cache Invalidation"; 1223 case QI_DEIOTLB_TYPE: 1224 return "PASID-based Device-TLB Invalidation"; 1225 case QI_PGRP_RESP_TYPE: 1226 return "Page Group Response"; 1227 default: 1228 return "UNKNOWN"; 1229 } 1230 } 1231 1232 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault) 1233 { 1234 unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG); 1235 u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); 1236 struct qi_desc *desc = iommu->qi->desc + head; 1237 1238 if (fault & DMA_FSTS_IQE) 1239 pr_err("VT-d detected Invalidation Queue Error: Reason %llx", 1240 DMAR_IQER_REG_IQEI(iqe_err)); 1241 if (fault & DMA_FSTS_ITE) 1242 pr_err("VT-d detected Invalidation Time-out Error: SID %llx", 1243 DMAR_IQER_REG_ITESID(iqe_err)); 1244 if (fault & DMA_FSTS_ICE) 1245 pr_err("VT-d detected Invalidation Completion Error: SID %llx", 1246 DMAR_IQER_REG_ICESID(iqe_err)); 1247 1248 pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n", 1249 qi_type_string(desc->qw0 & 0xf), 1250 (unsigned long long)desc->qw0, 1251 (unsigned long long)desc->qw1); 1252 1253 head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH; 1254 head <<= qi_shift(iommu); 1255 desc = iommu->qi->desc + head; 1256 1257 pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n", 1258 qi_type_string(desc->qw0 & 0xf), 1259 (unsigned long long)desc->qw0, 1260 (unsigned long long)desc->qw1); 1261 } 1262 1263 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) 1264 { 1265 u32 fault; 1266 int head, tail; 1267 struct q_inval *qi = iommu->qi; 1268 int shift = qi_shift(iommu); 1269 1270 if (qi->desc_status[wait_index] == QI_ABORT) 1271 return -EAGAIN; 1272 1273 fault = readl(iommu->reg + DMAR_FSTS_REG); 1274 if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE)) 1275 qi_dump_fault(iommu, fault); 1276 1277 /* 1278 * If IQE happens, the head points to the descriptor associated 1279 * with the error. No new descriptors are fetched until the IQE 1280 * is cleared. 1281 */ 1282 if (fault & DMA_FSTS_IQE) { 1283 head = readl(iommu->reg + DMAR_IQH_REG); 1284 if ((head >> shift) == index) { 1285 struct qi_desc *desc = qi->desc + head; 1286 1287 /* 1288 * desc->qw2 and desc->qw3 are either reserved or 1289 * used by software as private data. We won't print 1290 * out these two qw's for security consideration. 1291 */ 1292 memcpy(desc, qi->desc + (wait_index << shift), 1293 1 << shift); 1294 writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); 1295 pr_info("Invalidation Queue Error (IQE) cleared\n"); 1296 return -EINVAL; 1297 } 1298 } 1299 1300 /* 1301 * If ITE happens, all pending wait_desc commands are aborted. 1302 * No new descriptors are fetched until the ITE is cleared. 1303 */ 1304 if (fault & DMA_FSTS_ITE) { 1305 head = readl(iommu->reg + DMAR_IQH_REG); 1306 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH; 1307 head |= 1; 1308 tail = readl(iommu->reg + DMAR_IQT_REG); 1309 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; 1310 1311 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); 1312 pr_info("Invalidation Time-out Error (ITE) cleared\n"); 1313 1314 do { 1315 if (qi->desc_status[head] == QI_IN_USE) 1316 qi->desc_status[head] = QI_ABORT; 1317 head = (head - 2 + QI_LENGTH) % QI_LENGTH; 1318 } while (head != tail); 1319 1320 if (qi->desc_status[wait_index] == QI_ABORT) 1321 return -EAGAIN; 1322 } 1323 1324 if (fault & DMA_FSTS_ICE) { 1325 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); 1326 pr_info("Invalidation Completion Error (ICE) cleared\n"); 1327 } 1328 1329 return 0; 1330 } 1331 1332 /* 1333 * Function to submit invalidation descriptors of all types to the queued 1334 * invalidation interface(QI). Multiple descriptors can be submitted at a 1335 * time, a wait descriptor will be appended to each submission to ensure 1336 * hardware has completed the invalidation before return. Wait descriptors 1337 * can be part of the submission but it will not be polled for completion. 1338 */ 1339 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, 1340 unsigned int count, unsigned long options) 1341 { 1342 struct q_inval *qi = iommu->qi; 1343 s64 devtlb_start_ktime = 0; 1344 s64 iotlb_start_ktime = 0; 1345 s64 iec_start_ktime = 0; 1346 struct qi_desc wait_desc; 1347 int wait_index, index; 1348 unsigned long flags; 1349 int offset, shift; 1350 int rc, i; 1351 u64 type; 1352 1353 if (!qi) 1354 return 0; 1355 1356 type = desc->qw0 & GENMASK_ULL(3, 0); 1357 1358 if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) && 1359 dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB)) 1360 iotlb_start_ktime = ktime_to_ns(ktime_get()); 1361 1362 if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) && 1363 dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB)) 1364 devtlb_start_ktime = ktime_to_ns(ktime_get()); 1365 1366 if (type == QI_IEC_TYPE && 1367 dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC)) 1368 iec_start_ktime = ktime_to_ns(ktime_get()); 1369 1370 restart: 1371 rc = 0; 1372 1373 raw_spin_lock_irqsave(&qi->q_lock, flags); 1374 /* 1375 * Check if we have enough empty slots in the queue to submit, 1376 * the calculation is based on: 1377 * # of desc + 1 wait desc + 1 space between head and tail 1378 */ 1379 while (qi->free_cnt < count + 2) { 1380 raw_spin_unlock_irqrestore(&qi->q_lock, flags); 1381 cpu_relax(); 1382 raw_spin_lock_irqsave(&qi->q_lock, flags); 1383 } 1384 1385 index = qi->free_head; 1386 wait_index = (index + count) % QI_LENGTH; 1387 shift = qi_shift(iommu); 1388 1389 for (i = 0; i < count; i++) { 1390 offset = ((index + i) % QI_LENGTH) << shift; 1391 memcpy(qi->desc + offset, &desc[i], 1 << shift); 1392 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; 1393 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1, 1394 desc[i].qw2, desc[i].qw3); 1395 } 1396 qi->desc_status[wait_index] = QI_IN_USE; 1397 1398 wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 1399 QI_IWD_STATUS_WRITE | QI_IWD_TYPE; 1400 if (options & QI_OPT_WAIT_DRAIN) 1401 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN; 1402 wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]); 1403 wait_desc.qw2 = 0; 1404 wait_desc.qw3 = 0; 1405 1406 offset = wait_index << shift; 1407 memcpy(qi->desc + offset, &wait_desc, 1 << shift); 1408 1409 qi->free_head = (qi->free_head + count + 1) % QI_LENGTH; 1410 qi->free_cnt -= count + 1; 1411 1412 /* 1413 * update the HW tail register indicating the presence of 1414 * new descriptors. 1415 */ 1416 writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); 1417 1418 while (qi->desc_status[wait_index] != QI_DONE) { 1419 /* 1420 * We will leave the interrupts disabled, to prevent interrupt 1421 * context to queue another cmd while a cmd is already submitted 1422 * and waiting for completion on this cpu. This is to avoid 1423 * a deadlock where the interrupt context can wait indefinitely 1424 * for free slots in the queue. 1425 */ 1426 rc = qi_check_fault(iommu, index, wait_index); 1427 if (rc) 1428 break; 1429 1430 raw_spin_unlock(&qi->q_lock); 1431 cpu_relax(); 1432 raw_spin_lock(&qi->q_lock); 1433 } 1434 1435 for (i = 0; i < count; i++) 1436 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE; 1437 1438 reclaim_free_desc(qi); 1439 raw_spin_unlock_irqrestore(&qi->q_lock, flags); 1440 1441 if (rc == -EAGAIN) 1442 goto restart; 1443 1444 if (iotlb_start_ktime) 1445 dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB, 1446 ktime_to_ns(ktime_get()) - iotlb_start_ktime); 1447 1448 if (devtlb_start_ktime) 1449 dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB, 1450 ktime_to_ns(ktime_get()) - devtlb_start_ktime); 1451 1452 if (iec_start_ktime) 1453 dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC, 1454 ktime_to_ns(ktime_get()) - iec_start_ktime); 1455 1456 return rc; 1457 } 1458 1459 /* 1460 * Flush the global interrupt entry cache. 1461 */ 1462 void qi_global_iec(struct intel_iommu *iommu) 1463 { 1464 struct qi_desc desc; 1465 1466 desc.qw0 = QI_IEC_TYPE; 1467 desc.qw1 = 0; 1468 desc.qw2 = 0; 1469 desc.qw3 = 0; 1470 1471 /* should never fail */ 1472 qi_submit_sync(iommu, &desc, 1, 0); 1473 } 1474 1475 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, 1476 u64 type) 1477 { 1478 struct qi_desc desc; 1479 1480 desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did) 1481 | QI_CC_GRAN(type) | QI_CC_TYPE; 1482 desc.qw1 = 0; 1483 desc.qw2 = 0; 1484 desc.qw3 = 0; 1485 1486 qi_submit_sync(iommu, &desc, 1, 0); 1487 } 1488 1489 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 1490 unsigned int size_order, u64 type) 1491 { 1492 u8 dw = 0, dr = 0; 1493 1494 struct qi_desc desc; 1495 int ih = 0; 1496 1497 if (cap_write_drain(iommu->cap)) 1498 dw = 1; 1499 1500 if (cap_read_drain(iommu->cap)) 1501 dr = 1; 1502 1503 desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) 1504 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; 1505 desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) 1506 | QI_IOTLB_AM(size_order); 1507 desc.qw2 = 0; 1508 desc.qw3 = 0; 1509 1510 qi_submit_sync(iommu, &desc, 1, 0); 1511 } 1512 1513 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 1514 u16 qdep, u64 addr, unsigned mask) 1515 { 1516 struct qi_desc desc; 1517 1518 if (mask) { 1519 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; 1520 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; 1521 } else 1522 desc.qw1 = QI_DEV_IOTLB_ADDR(addr); 1523 1524 if (qdep >= QI_DEV_IOTLB_MAX_INVS) 1525 qdep = 0; 1526 1527 desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | 1528 QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); 1529 desc.qw2 = 0; 1530 desc.qw3 = 0; 1531 1532 qi_submit_sync(iommu, &desc, 1, 0); 1533 } 1534 1535 /* PASID-based IOTLB invalidation */ 1536 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, 1537 unsigned long npages, bool ih) 1538 { 1539 struct qi_desc desc = {.qw2 = 0, .qw3 = 0}; 1540 1541 /* 1542 * npages == -1 means a PASID-selective invalidation, otherwise, 1543 * a positive value for Page-selective-within-PASID invalidation. 1544 * 0 is not a valid input. 1545 */ 1546 if (WARN_ON(!npages)) { 1547 pr_err("Invalid input npages = %ld\n", npages); 1548 return; 1549 } 1550 1551 if (npages == -1) { 1552 desc.qw0 = QI_EIOTLB_PASID(pasid) | 1553 QI_EIOTLB_DID(did) | 1554 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 1555 QI_EIOTLB_TYPE; 1556 desc.qw1 = 0; 1557 } else { 1558 int mask = ilog2(__roundup_pow_of_two(npages)); 1559 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); 1560 1561 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) 1562 addr = ALIGN_DOWN(addr, align); 1563 1564 desc.qw0 = QI_EIOTLB_PASID(pasid) | 1565 QI_EIOTLB_DID(did) | 1566 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | 1567 QI_EIOTLB_TYPE; 1568 desc.qw1 = QI_EIOTLB_ADDR(addr) | 1569 QI_EIOTLB_IH(ih) | 1570 QI_EIOTLB_AM(mask); 1571 } 1572 1573 qi_submit_sync(iommu, &desc, 1, 0); 1574 } 1575 1576 /* PASID-based device IOTLB Invalidate */ 1577 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, 1578 u32 pasid, u16 qdep, u64 addr, unsigned int size_order) 1579 { 1580 unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); 1581 struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; 1582 1583 desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | 1584 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | 1585 QI_DEV_IOTLB_PFSID(pfsid); 1586 1587 /* 1588 * If S bit is 0, we only flush a single page. If S bit is set, 1589 * The least significant zero bit indicates the invalidation address 1590 * range. VT-d spec 6.5.2.6. 1591 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. 1592 * size order = 0 is PAGE_SIZE 4KB 1593 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in 1594 * ECAP. 1595 */ 1596 if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) 1597 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", 1598 addr, size_order); 1599 1600 /* Take page address */ 1601 desc.qw1 = QI_DEV_EIOTLB_ADDR(addr); 1602 1603 if (size_order) { 1604 /* 1605 * Existing 0s in address below size_order may be the least 1606 * significant bit, we must set them to 1s to avoid having 1607 * smaller size than desired. 1608 */ 1609 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, 1610 VTD_PAGE_SHIFT); 1611 /* Clear size_order bit to indicate size */ 1612 desc.qw1 &= ~mask; 1613 /* Set the S bit to indicate flushing more than 1 page */ 1614 desc.qw1 |= QI_DEV_EIOTLB_SIZE; 1615 } 1616 1617 qi_submit_sync(iommu, &desc, 1, 0); 1618 } 1619 1620 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, 1621 u64 granu, u32 pasid) 1622 { 1623 struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; 1624 1625 desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) | 1626 QI_PC_GRAN(granu) | QI_PC_TYPE; 1627 qi_submit_sync(iommu, &desc, 1, 0); 1628 } 1629 1630 /* 1631 * Disable Queued Invalidation interface. 1632 */ 1633 void dmar_disable_qi(struct intel_iommu *iommu) 1634 { 1635 unsigned long flags; 1636 u32 sts; 1637 cycles_t start_time = get_cycles(); 1638 1639 if (!ecap_qis(iommu->ecap)) 1640 return; 1641 1642 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1643 1644 sts = readl(iommu->reg + DMAR_GSTS_REG); 1645 if (!(sts & DMA_GSTS_QIES)) 1646 goto end; 1647 1648 /* 1649 * Give a chance to HW to complete the pending invalidation requests. 1650 */ 1651 while ((readl(iommu->reg + DMAR_IQT_REG) != 1652 readl(iommu->reg + DMAR_IQH_REG)) && 1653 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time))) 1654 cpu_relax(); 1655 1656 iommu->gcmd &= ~DMA_GCMD_QIE; 1657 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1658 1659 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, 1660 !(sts & DMA_GSTS_QIES), sts); 1661 end: 1662 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1663 } 1664 1665 /* 1666 * Enable queued invalidation. 1667 */ 1668 static void __dmar_enable_qi(struct intel_iommu *iommu) 1669 { 1670 u32 sts; 1671 unsigned long flags; 1672 struct q_inval *qi = iommu->qi; 1673 u64 val = virt_to_phys(qi->desc); 1674 1675 qi->free_head = qi->free_tail = 0; 1676 qi->free_cnt = QI_LENGTH; 1677 1678 /* 1679 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability 1680 * is present. 1681 */ 1682 if (ecap_smts(iommu->ecap)) 1683 val |= (1 << 11) | 1; 1684 1685 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1686 1687 /* write zero to the tail reg */ 1688 writel(0, iommu->reg + DMAR_IQT_REG); 1689 1690 dmar_writeq(iommu->reg + DMAR_IQA_REG, val); 1691 1692 iommu->gcmd |= DMA_GCMD_QIE; 1693 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1694 1695 /* Make sure hardware complete it */ 1696 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); 1697 1698 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1699 } 1700 1701 /* 1702 * Enable Queued Invalidation interface. This is a must to support 1703 * interrupt-remapping. Also used by DMA-remapping, which replaces 1704 * register based IOTLB invalidation. 1705 */ 1706 int dmar_enable_qi(struct intel_iommu *iommu) 1707 { 1708 struct q_inval *qi; 1709 struct page *desc_page; 1710 1711 if (!ecap_qis(iommu->ecap)) 1712 return -ENOENT; 1713 1714 /* 1715 * queued invalidation is already setup and enabled. 1716 */ 1717 if (iommu->qi) 1718 return 0; 1719 1720 iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC); 1721 if (!iommu->qi) 1722 return -ENOMEM; 1723 1724 qi = iommu->qi; 1725 1726 /* 1727 * Need two pages to accommodate 256 descriptors of 256 bits each 1728 * if the remapping hardware supports scalable mode translation. 1729 */ 1730 desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 1731 !!ecap_smts(iommu->ecap)); 1732 if (!desc_page) { 1733 kfree(qi); 1734 iommu->qi = NULL; 1735 return -ENOMEM; 1736 } 1737 1738 qi->desc = page_address(desc_page); 1739 1740 qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); 1741 if (!qi->desc_status) { 1742 free_page((unsigned long) qi->desc); 1743 kfree(qi); 1744 iommu->qi = NULL; 1745 return -ENOMEM; 1746 } 1747 1748 raw_spin_lock_init(&qi->q_lock); 1749 1750 __dmar_enable_qi(iommu); 1751 1752 return 0; 1753 } 1754 1755 /* iommu interrupt handling. Most stuff are MSI-like. */ 1756 1757 enum faulttype { 1758 DMA_REMAP, 1759 INTR_REMAP, 1760 UNKNOWN, 1761 }; 1762 1763 static const char *dma_remap_fault_reasons[] = 1764 { 1765 "Software", 1766 "Present bit in root entry is clear", 1767 "Present bit in context entry is clear", 1768 "Invalid context entry", 1769 "Access beyond MGAW", 1770 "PTE Write access is not set", 1771 "PTE Read access is not set", 1772 "Next page table ptr is invalid", 1773 "Root table address invalid", 1774 "Context table ptr is invalid", 1775 "non-zero reserved fields in RTP", 1776 "non-zero reserved fields in CTP", 1777 "non-zero reserved fields in PTE", 1778 "PCE for translation request specifies blocking", 1779 }; 1780 1781 static const char * const dma_remap_sm_fault_reasons[] = { 1782 "SM: Invalid Root Table Address", 1783 "SM: TTM 0 for request with PASID", 1784 "SM: TTM 0 for page group request", 1785 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */ 1786 "SM: Error attempting to access Root Entry", 1787 "SM: Present bit in Root Entry is clear", 1788 "SM: Non-zero reserved field set in Root Entry", 1789 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */ 1790 "SM: Error attempting to access Context Entry", 1791 "SM: Present bit in Context Entry is clear", 1792 "SM: Non-zero reserved field set in the Context Entry", 1793 "SM: Invalid Context Entry", 1794 "SM: DTE field in Context Entry is clear", 1795 "SM: PASID Enable field in Context Entry is clear", 1796 "SM: PASID is larger than the max in Context Entry", 1797 "SM: PRE field in Context-Entry is clear", 1798 "SM: RID_PASID field error in Context-Entry", 1799 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */ 1800 "SM: Error attempting to access the PASID Directory Entry", 1801 "SM: Present bit in Directory Entry is clear", 1802 "SM: Non-zero reserved field set in PASID Directory Entry", 1803 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */ 1804 "SM: Error attempting to access PASID Table Entry", 1805 "SM: Present bit in PASID Table Entry is clear", 1806 "SM: Non-zero reserved field set in PASID Table Entry", 1807 "SM: Invalid Scalable-Mode PASID Table Entry", 1808 "SM: ERE field is clear in PASID Table Entry", 1809 "SM: SRE field is clear in PASID Table Entry", 1810 "Unknown", "Unknown",/* 0x5E-0x5F */ 1811 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */ 1812 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */ 1813 "SM: Error attempting to access first-level paging entry", 1814 "SM: Present bit in first-level paging entry is clear", 1815 "SM: Non-zero reserved field set in first-level paging entry", 1816 "SM: Error attempting to access FL-PML4 entry", 1817 "SM: First-level entry address beyond MGAW in Nested translation", 1818 "SM: Read permission error in FL-PML4 entry in Nested translation", 1819 "SM: Read permission error in first-level paging entry in Nested translation", 1820 "SM: Write permission error in first-level paging entry in Nested translation", 1821 "SM: Error attempting to access second-level paging entry", 1822 "SM: Read/Write permission error in second-level paging entry", 1823 "SM: Non-zero reserved field set in second-level paging entry", 1824 "SM: Invalid second-level page table pointer", 1825 "SM: A/D bit update needed in second-level entry when set up in no snoop", 1826 "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */ 1827 "SM: Address in first-level translation is not canonical", 1828 "SM: U/S set 0 for first-level translation with user privilege", 1829 "SM: No execute permission for request with PASID and ER=1", 1830 "SM: Address beyond the DMA hardware max", 1831 "SM: Second-level entry address beyond the max", 1832 "SM: No write permission for Write/AtomicOp request", 1833 "SM: No read permission for Read/AtomicOp request", 1834 "SM: Invalid address-interrupt address", 1835 "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */ 1836 "SM: A/D bit update needed in first-level entry when set up in no snoop", 1837 }; 1838 1839 static const char *irq_remap_fault_reasons[] = 1840 { 1841 "Detected reserved fields in the decoded interrupt-remapped request", 1842 "Interrupt index exceeded the interrupt-remapping table size", 1843 "Present field in the IRTE entry is clear", 1844 "Error accessing interrupt-remapping table pointed by IRTA_REG", 1845 "Detected reserved fields in the IRTE entry", 1846 "Blocked a compatibility format interrupt request", 1847 "Blocked an interrupt request due to source-id verification failure", 1848 }; 1849 1850 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) 1851 { 1852 if (fault_reason >= 0x20 && (fault_reason - 0x20 < 1853 ARRAY_SIZE(irq_remap_fault_reasons))) { 1854 *fault_type = INTR_REMAP; 1855 return irq_remap_fault_reasons[fault_reason - 0x20]; 1856 } else if (fault_reason >= 0x30 && (fault_reason - 0x30 < 1857 ARRAY_SIZE(dma_remap_sm_fault_reasons))) { 1858 *fault_type = DMA_REMAP; 1859 return dma_remap_sm_fault_reasons[fault_reason - 0x30]; 1860 } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) { 1861 *fault_type = DMA_REMAP; 1862 return dma_remap_fault_reasons[fault_reason]; 1863 } else { 1864 *fault_type = UNKNOWN; 1865 return "Unknown"; 1866 } 1867 } 1868 1869 1870 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq) 1871 { 1872 if (iommu->irq == irq) 1873 return DMAR_FECTL_REG; 1874 else if (iommu->pr_irq == irq) 1875 return DMAR_PECTL_REG; 1876 else 1877 BUG(); 1878 } 1879 1880 void dmar_msi_unmask(struct irq_data *data) 1881 { 1882 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); 1883 int reg = dmar_msi_reg(iommu, data->irq); 1884 unsigned long flag; 1885 1886 /* unmask it */ 1887 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1888 writel(0, iommu->reg + reg); 1889 /* Read a reg to force flush the post write */ 1890 readl(iommu->reg + reg); 1891 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1892 } 1893 1894 void dmar_msi_mask(struct irq_data *data) 1895 { 1896 struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); 1897 int reg = dmar_msi_reg(iommu, data->irq); 1898 unsigned long flag; 1899 1900 /* mask it */ 1901 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1902 writel(DMA_FECTL_IM, iommu->reg + reg); 1903 /* Read a reg to force flush the post write */ 1904 readl(iommu->reg + reg); 1905 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1906 } 1907 1908 void dmar_msi_write(int irq, struct msi_msg *msg) 1909 { 1910 struct intel_iommu *iommu = irq_get_handler_data(irq); 1911 int reg = dmar_msi_reg(iommu, irq); 1912 unsigned long flag; 1913 1914 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1915 writel(msg->data, iommu->reg + reg + 4); 1916 writel(msg->address_lo, iommu->reg + reg + 8); 1917 writel(msg->address_hi, iommu->reg + reg + 12); 1918 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1919 } 1920 1921 void dmar_msi_read(int irq, struct msi_msg *msg) 1922 { 1923 struct intel_iommu *iommu = irq_get_handler_data(irq); 1924 int reg = dmar_msi_reg(iommu, irq); 1925 unsigned long flag; 1926 1927 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1928 msg->data = readl(iommu->reg + reg + 4); 1929 msg->address_lo = readl(iommu->reg + reg + 8); 1930 msg->address_hi = readl(iommu->reg + reg + 12); 1931 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1932 } 1933 1934 static int dmar_fault_do_one(struct intel_iommu *iommu, int type, 1935 u8 fault_reason, u32 pasid, u16 source_id, 1936 unsigned long long addr) 1937 { 1938 const char *reason; 1939 int fault_type; 1940 1941 reason = dmar_get_fault_reason(fault_reason, &fault_type); 1942 1943 if (fault_type == INTR_REMAP) { 1944 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n", 1945 source_id >> 8, PCI_SLOT(source_id & 0xFF), 1946 PCI_FUNC(source_id & 0xFF), addr >> 48, 1947 fault_reason, reason); 1948 1949 return 0; 1950 } 1951 1952 if (pasid == INVALID_IOASID) 1953 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", 1954 type ? "DMA Read" : "DMA Write", 1955 source_id >> 8, PCI_SLOT(source_id & 0xFF), 1956 PCI_FUNC(source_id & 0xFF), addr, 1957 fault_reason, reason); 1958 else 1959 pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", 1960 type ? "DMA Read" : "DMA Write", pasid, 1961 source_id >> 8, PCI_SLOT(source_id & 0xFF), 1962 PCI_FUNC(source_id & 0xFF), addr, 1963 fault_reason, reason); 1964 1965 dmar_fault_dump_ptes(iommu, source_id, addr, pasid); 1966 1967 return 0; 1968 } 1969 1970 #define PRIMARY_FAULT_REG_LEN (16) 1971 irqreturn_t dmar_fault(int irq, void *dev_id) 1972 { 1973 struct intel_iommu *iommu = dev_id; 1974 int reg, fault_index; 1975 u32 fault_status; 1976 unsigned long flag; 1977 static DEFINE_RATELIMIT_STATE(rs, 1978 DEFAULT_RATELIMIT_INTERVAL, 1979 DEFAULT_RATELIMIT_BURST); 1980 1981 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1982 fault_status = readl(iommu->reg + DMAR_FSTS_REG); 1983 if (fault_status && __ratelimit(&rs)) 1984 pr_err("DRHD: handling fault status reg %x\n", fault_status); 1985 1986 /* TBD: ignore advanced fault log currently */ 1987 if (!(fault_status & DMA_FSTS_PPF)) 1988 goto unlock_exit; 1989 1990 fault_index = dma_fsts_fault_record_index(fault_status); 1991 reg = cap_fault_reg_offset(iommu->cap); 1992 while (1) { 1993 /* Disable printing, simply clear the fault when ratelimited */ 1994 bool ratelimited = !__ratelimit(&rs); 1995 u8 fault_reason; 1996 u16 source_id; 1997 u64 guest_addr; 1998 u32 pasid; 1999 int type; 2000 u32 data; 2001 bool pasid_present; 2002 2003 /* highest 32 bits */ 2004 data = readl(iommu->reg + reg + 2005 fault_index * PRIMARY_FAULT_REG_LEN + 12); 2006 if (!(data & DMA_FRCD_F)) 2007 break; 2008 2009 if (!ratelimited) { 2010 fault_reason = dma_frcd_fault_reason(data); 2011 type = dma_frcd_type(data); 2012 2013 pasid = dma_frcd_pasid_value(data); 2014 data = readl(iommu->reg + reg + 2015 fault_index * PRIMARY_FAULT_REG_LEN + 8); 2016 source_id = dma_frcd_source_id(data); 2017 2018 pasid_present = dma_frcd_pasid_present(data); 2019 guest_addr = dmar_readq(iommu->reg + reg + 2020 fault_index * PRIMARY_FAULT_REG_LEN); 2021 guest_addr = dma_frcd_page_addr(guest_addr); 2022 } 2023 2024 /* clear the fault */ 2025 writel(DMA_FRCD_F, iommu->reg + reg + 2026 fault_index * PRIMARY_FAULT_REG_LEN + 12); 2027 2028 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 2029 2030 if (!ratelimited) 2031 /* Using pasid -1 if pasid is not present */ 2032 dmar_fault_do_one(iommu, type, fault_reason, 2033 pasid_present ? pasid : INVALID_IOASID, 2034 source_id, guest_addr); 2035 2036 fault_index++; 2037 if (fault_index >= cap_num_fault_regs(iommu->cap)) 2038 fault_index = 0; 2039 raw_spin_lock_irqsave(&iommu->register_lock, flag); 2040 } 2041 2042 writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO, 2043 iommu->reg + DMAR_FSTS_REG); 2044 2045 unlock_exit: 2046 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 2047 return IRQ_HANDLED; 2048 } 2049 2050 int dmar_set_interrupt(struct intel_iommu *iommu) 2051 { 2052 int irq, ret; 2053 2054 /* 2055 * Check if the fault interrupt is already initialized. 2056 */ 2057 if (iommu->irq) 2058 return 0; 2059 2060 irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu); 2061 if (irq > 0) { 2062 iommu->irq = irq; 2063 } else { 2064 pr_err("No free IRQ vectors\n"); 2065 return -EINVAL; 2066 } 2067 2068 ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu); 2069 if (ret) 2070 pr_err("Can't request irq\n"); 2071 return ret; 2072 } 2073 2074 int __init enable_drhd_fault_handling(void) 2075 { 2076 struct dmar_drhd_unit *drhd; 2077 struct intel_iommu *iommu; 2078 2079 /* 2080 * Enable fault control interrupt. 2081 */ 2082 for_each_iommu(iommu, drhd) { 2083 u32 fault_status; 2084 int ret = dmar_set_interrupt(iommu); 2085 2086 if (ret) { 2087 pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", 2088 (unsigned long long)drhd->reg_base_addr, ret); 2089 return -1; 2090 } 2091 2092 /* 2093 * Clear any previous faults. 2094 */ 2095 dmar_fault(iommu->irq, iommu); 2096 fault_status = readl(iommu->reg + DMAR_FSTS_REG); 2097 writel(fault_status, iommu->reg + DMAR_FSTS_REG); 2098 } 2099 2100 return 0; 2101 } 2102 2103 /* 2104 * Re-enable Queued Invalidation interface. 2105 */ 2106 int dmar_reenable_qi(struct intel_iommu *iommu) 2107 { 2108 if (!ecap_qis(iommu->ecap)) 2109 return -ENOENT; 2110 2111 if (!iommu->qi) 2112 return -ENOENT; 2113 2114 /* 2115 * First disable queued invalidation. 2116 */ 2117 dmar_disable_qi(iommu); 2118 /* 2119 * Then enable queued invalidation again. Since there is no pending 2120 * invalidation requests now, it's safe to re-enable queued 2121 * invalidation. 2122 */ 2123 __dmar_enable_qi(iommu); 2124 2125 return 0; 2126 } 2127 2128 /* 2129 * Check interrupt remapping support in DMAR table description. 2130 */ 2131 int __init dmar_ir_support(void) 2132 { 2133 struct acpi_table_dmar *dmar; 2134 dmar = (struct acpi_table_dmar *)dmar_tbl; 2135 if (!dmar) 2136 return 0; 2137 return dmar->flags & 0x1; 2138 } 2139 2140 /* Check whether DMAR units are in use */ 2141 static inline bool dmar_in_use(void) 2142 { 2143 return irq_remapping_enabled || intel_iommu_enabled; 2144 } 2145 2146 static int __init dmar_free_unused_resources(void) 2147 { 2148 struct dmar_drhd_unit *dmaru, *dmaru_n; 2149 2150 if (dmar_in_use()) 2151 return 0; 2152 2153 if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units)) 2154 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb); 2155 2156 down_write(&dmar_global_lock); 2157 list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { 2158 list_del(&dmaru->list); 2159 dmar_free_drhd(dmaru); 2160 } 2161 up_write(&dmar_global_lock); 2162 2163 return 0; 2164 } 2165 2166 late_initcall(dmar_free_unused_resources); 2167 IOMMU_INIT_POST(detect_intel_iommu); 2168 2169 /* 2170 * DMAR Hotplug Support 2171 * For more details, please refer to Intel(R) Virtualization Technology 2172 * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8 2173 * "Remapping Hardware Unit Hot Plug". 2174 */ 2175 static guid_t dmar_hp_guid = 2176 GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B, 2177 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF); 2178 2179 /* 2180 * Currently there's only one revision and BIOS will not check the revision id, 2181 * so use 0 for safety. 2182 */ 2183 #define DMAR_DSM_REV_ID 0 2184 #define DMAR_DSM_FUNC_DRHD 1 2185 #define DMAR_DSM_FUNC_ATSR 2 2186 #define DMAR_DSM_FUNC_RHSA 3 2187 #define DMAR_DSM_FUNC_SATC 4 2188 2189 static inline bool dmar_detect_dsm(acpi_handle handle, int func) 2190 { 2191 return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func); 2192 } 2193 2194 static int dmar_walk_dsm_resource(acpi_handle handle, int func, 2195 dmar_res_handler_t handler, void *arg) 2196 { 2197 int ret = -ENODEV; 2198 union acpi_object *obj; 2199 struct acpi_dmar_header *start; 2200 struct dmar_res_callback callback; 2201 static int res_type[] = { 2202 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT, 2203 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS, 2204 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY, 2205 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC, 2206 }; 2207 2208 if (!dmar_detect_dsm(handle, func)) 2209 return 0; 2210 2211 obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 2212 func, NULL, ACPI_TYPE_BUFFER); 2213 if (!obj) 2214 return -ENODEV; 2215 2216 memset(&callback, 0, sizeof(callback)); 2217 callback.cb[res_type[func]] = handler; 2218 callback.arg[res_type[func]] = arg; 2219 start = (struct acpi_dmar_header *)obj->buffer.pointer; 2220 ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback); 2221 2222 ACPI_FREE(obj); 2223 2224 return ret; 2225 } 2226 2227 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg) 2228 { 2229 int ret; 2230 struct dmar_drhd_unit *dmaru; 2231 2232 dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header); 2233 if (!dmaru) 2234 return -ENODEV; 2235 2236 ret = dmar_ir_hotplug(dmaru, true); 2237 if (ret == 0) 2238 ret = dmar_iommu_hotplug(dmaru, true); 2239 2240 return ret; 2241 } 2242 2243 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg) 2244 { 2245 int i, ret; 2246 struct device *dev; 2247 struct dmar_drhd_unit *dmaru; 2248 2249 dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header); 2250 if (!dmaru) 2251 return 0; 2252 2253 /* 2254 * All PCI devices managed by this unit should have been destroyed. 2255 */ 2256 if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) { 2257 for_each_active_dev_scope(dmaru->devices, 2258 dmaru->devices_cnt, i, dev) 2259 return -EBUSY; 2260 } 2261 2262 ret = dmar_ir_hotplug(dmaru, false); 2263 if (ret == 0) 2264 ret = dmar_iommu_hotplug(dmaru, false); 2265 2266 return ret; 2267 } 2268 2269 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg) 2270 { 2271 struct dmar_drhd_unit *dmaru; 2272 2273 dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header); 2274 if (dmaru) { 2275 list_del_rcu(&dmaru->list); 2276 synchronize_rcu(); 2277 dmar_free_drhd(dmaru); 2278 } 2279 2280 return 0; 2281 } 2282 2283 static int dmar_hotplug_insert(acpi_handle handle) 2284 { 2285 int ret; 2286 int drhd_count = 0; 2287 2288 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2289 &dmar_validate_one_drhd, (void *)1); 2290 if (ret) 2291 goto out; 2292 2293 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2294 &dmar_parse_one_drhd, (void *)&drhd_count); 2295 if (ret == 0 && drhd_count == 0) { 2296 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n"); 2297 goto out; 2298 } else if (ret) { 2299 goto release_drhd; 2300 } 2301 2302 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA, 2303 &dmar_parse_one_rhsa, NULL); 2304 if (ret) 2305 goto release_drhd; 2306 2307 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, 2308 &dmar_parse_one_atsr, NULL); 2309 if (ret) 2310 goto release_atsr; 2311 2312 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2313 &dmar_hp_add_drhd, NULL); 2314 if (!ret) 2315 return 0; 2316 2317 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2318 &dmar_hp_remove_drhd, NULL); 2319 release_atsr: 2320 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, 2321 &dmar_release_one_atsr, NULL); 2322 release_drhd: 2323 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2324 &dmar_hp_release_drhd, NULL); 2325 out: 2326 return ret; 2327 } 2328 2329 static int dmar_hotplug_remove(acpi_handle handle) 2330 { 2331 int ret; 2332 2333 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, 2334 &dmar_check_one_atsr, NULL); 2335 if (ret) 2336 return ret; 2337 2338 ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2339 &dmar_hp_remove_drhd, NULL); 2340 if (ret == 0) { 2341 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR, 2342 &dmar_release_one_atsr, NULL)); 2343 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2344 &dmar_hp_release_drhd, NULL)); 2345 } else { 2346 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD, 2347 &dmar_hp_add_drhd, NULL); 2348 } 2349 2350 return ret; 2351 } 2352 2353 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl, 2354 void *context, void **retval) 2355 { 2356 acpi_handle *phdl = retval; 2357 2358 if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { 2359 *phdl = handle; 2360 return AE_CTRL_TERMINATE; 2361 } 2362 2363 return AE_OK; 2364 } 2365 2366 static int dmar_device_hotplug(acpi_handle handle, bool insert) 2367 { 2368 int ret; 2369 acpi_handle tmp = NULL; 2370 acpi_status status; 2371 2372 if (!dmar_in_use()) 2373 return 0; 2374 2375 if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { 2376 tmp = handle; 2377 } else { 2378 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 2379 ACPI_UINT32_MAX, 2380 dmar_get_dsm_handle, 2381 NULL, NULL, &tmp); 2382 if (ACPI_FAILURE(status)) { 2383 pr_warn("Failed to locate _DSM method.\n"); 2384 return -ENXIO; 2385 } 2386 } 2387 if (tmp == NULL) 2388 return 0; 2389 2390 down_write(&dmar_global_lock); 2391 if (insert) 2392 ret = dmar_hotplug_insert(tmp); 2393 else 2394 ret = dmar_hotplug_remove(tmp); 2395 up_write(&dmar_global_lock); 2396 2397 return ret; 2398 } 2399 2400 int dmar_device_add(acpi_handle handle) 2401 { 2402 return dmar_device_hotplug(handle, true); 2403 } 2404 2405 int dmar_device_remove(acpi_handle handle) 2406 { 2407 return dmar_device_hotplug(handle, false); 2408 } 2409 2410 /* 2411 * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table 2412 * 2413 * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in 2414 * the ACPI DMAR table. This means that the platform boot firmware has made 2415 * sure no device can issue DMA outside of RMRR regions. 2416 */ 2417 bool dmar_platform_optin(void) 2418 { 2419 struct acpi_table_dmar *dmar; 2420 acpi_status status; 2421 bool ret; 2422 2423 status = acpi_get_table(ACPI_SIG_DMAR, 0, 2424 (struct acpi_table_header **)&dmar); 2425 if (ACPI_FAILURE(status)) 2426 return false; 2427 2428 ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN); 2429 acpi_put_table((struct acpi_table_header *)dmar); 2430 2431 return ret; 2432 } 2433 EXPORT_SYMBOL_GPL(dmar_platform_optin); 2434