1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * cache.c - Intel VT-d cache invalidation 4 * 5 * Copyright (C) 2024 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/dmar.h> 13 #include <linux/iommu.h> 14 #include <linux/memory.h> 15 #include <linux/pci.h> 16 #include <linux/spinlock.h> 17 18 #include "iommu.h" 19 #include "pasid.h" 20 #include "trace.h" 21 22 /* Check if an existing cache tag can be reused for a new association. */ 23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, 24 struct intel_iommu *iommu, struct device *dev, 25 ioasid_t pasid, enum cache_tag_type type) 26 { 27 if (tag->type != type) 28 return false; 29 30 if (tag->domain_id != domain_id || tag->pasid != pasid) 31 return false; 32 33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB) 34 return tag->iommu == iommu; 35 36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 37 return tag->dev == dev; 38 39 return false; 40 } 41 42 /* Assign a cache tag with specified type to domain. */ 43 static int cache_tag_assign(struct dmar_domain *domain, u16 did, 44 struct device *dev, ioasid_t pasid, 45 enum cache_tag_type type) 46 { 47 struct device_domain_info *info = dev_iommu_priv_get(dev); 48 struct intel_iommu *iommu = info->iommu; 49 struct cache_tag *tag, *temp; 50 struct list_head *prev; 51 unsigned long flags; 52 53 tag = kzalloc(sizeof(*tag), GFP_KERNEL); 54 if (!tag) 55 return -ENOMEM; 56 57 tag->type = type; 58 tag->iommu = iommu; 59 tag->domain_id = did; 60 tag->pasid = pasid; 61 tag->users = 1; 62 63 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 64 tag->dev = dev; 65 else 66 tag->dev = iommu->iommu.dev; 67 68 spin_lock_irqsave(&domain->cache_lock, flags); 69 prev = &domain->cache_tags; 70 list_for_each_entry(temp, &domain->cache_tags, node) { 71 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { 72 temp->users++; 73 spin_unlock_irqrestore(&domain->cache_lock, flags); 74 kfree(tag); 75 trace_cache_tag_assign(temp); 76 return 0; 77 } 78 if (temp->iommu == iommu) 79 prev = &temp->node; 80 } 81 /* 82 * Link cache tags of same iommu unit together, so corresponding 83 * flush ops can be batched for iommu unit. 84 */ 85 list_add(&tag->node, prev); 86 87 spin_unlock_irqrestore(&domain->cache_lock, flags); 88 trace_cache_tag_assign(tag); 89 90 return 0; 91 } 92 93 /* Unassign a cache tag with specified type from domain. */ 94 static void cache_tag_unassign(struct dmar_domain *domain, u16 did, 95 struct device *dev, ioasid_t pasid, 96 enum cache_tag_type type) 97 { 98 struct device_domain_info *info = dev_iommu_priv_get(dev); 99 struct intel_iommu *iommu = info->iommu; 100 struct cache_tag *tag; 101 unsigned long flags; 102 103 spin_lock_irqsave(&domain->cache_lock, flags); 104 list_for_each_entry(tag, &domain->cache_tags, node) { 105 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) { 106 trace_cache_tag_unassign(tag); 107 if (--tag->users == 0) { 108 list_del(&tag->node); 109 kfree(tag); 110 } 111 break; 112 } 113 } 114 spin_unlock_irqrestore(&domain->cache_lock, flags); 115 } 116 117 /* domain->qi_batch will be freed in iommu_free_domain() path. */ 118 static int domain_qi_batch_alloc(struct dmar_domain *domain) 119 { 120 unsigned long flags; 121 int ret = 0; 122 123 spin_lock_irqsave(&domain->cache_lock, flags); 124 if (domain->qi_batch) 125 goto out_unlock; 126 127 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC); 128 if (!domain->qi_batch) 129 ret = -ENOMEM; 130 out_unlock: 131 spin_unlock_irqrestore(&domain->cache_lock, flags); 132 133 return ret; 134 } 135 136 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, 137 struct device *dev, ioasid_t pasid) 138 { 139 struct device_domain_info *info = dev_iommu_priv_get(dev); 140 int ret; 141 142 ret = domain_qi_batch_alloc(domain); 143 if (ret) 144 return ret; 145 146 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 147 if (ret || !info->ats_enabled) 148 return ret; 149 150 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 151 if (ret) 152 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 153 154 return ret; 155 } 156 157 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did, 158 struct device *dev, ioasid_t pasid) 159 { 160 struct device_domain_info *info = dev_iommu_priv_get(dev); 161 162 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 163 164 if (info->ats_enabled) 165 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 166 } 167 168 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did, 169 struct device *dev, ioasid_t pasid) 170 { 171 struct device_domain_info *info = dev_iommu_priv_get(dev); 172 int ret; 173 174 ret = domain_qi_batch_alloc(domain); 175 if (ret) 176 return ret; 177 178 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 179 if (ret || !info->ats_enabled) 180 return ret; 181 182 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 183 if (ret) 184 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 185 186 return ret; 187 } 188 189 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did, 190 struct device *dev, ioasid_t pasid) 191 { 192 struct device_domain_info *info = dev_iommu_priv_get(dev); 193 194 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 195 196 if (info->ats_enabled) 197 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 198 } 199 200 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev) 201 { 202 struct device_domain_info *info = dev_iommu_priv_get(dev); 203 struct intel_iommu *iommu = info->iommu; 204 205 /* 206 * The driver assigns different domain IDs for all domains except 207 * the SVA type. 208 */ 209 if (domain->domain.type == IOMMU_DOMAIN_SVA) 210 return FLPT_DEFAULT_DID; 211 212 return domain_id_iommu(domain, iommu); 213 } 214 215 /* 216 * Assign cache tags to a domain when it's associated with a device's 217 * PASID using a specific domain ID. 218 * 219 * On success (return value of 0), cache tags are created and added to the 220 * domain's cache tag list. On failure (negative return value), an error 221 * code is returned indicating the reason for the failure. 222 */ 223 int cache_tag_assign_domain(struct dmar_domain *domain, 224 struct device *dev, ioasid_t pasid) 225 { 226 u16 did = domain_get_id_for_dev(domain, dev); 227 int ret; 228 229 ret = __cache_tag_assign_domain(domain, did, dev, pasid); 230 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) 231 return ret; 232 233 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid); 234 if (ret) 235 __cache_tag_unassign_domain(domain, did, dev, pasid); 236 237 return ret; 238 } 239 240 /* 241 * Remove the cache tags associated with a device's PASID when the domain is 242 * detached from the device. 243 * 244 * The cache tags must be previously assigned to the domain by calling the 245 * assign interface. 246 */ 247 void cache_tag_unassign_domain(struct dmar_domain *domain, 248 struct device *dev, ioasid_t pasid) 249 { 250 u16 did = domain_get_id_for_dev(domain, dev); 251 252 __cache_tag_unassign_domain(domain, did, dev, pasid); 253 if (domain->domain.type == IOMMU_DOMAIN_NESTED) 254 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid); 255 } 256 257 static unsigned long calculate_psi_aligned_address(unsigned long start, 258 unsigned long end, 259 unsigned long *_pages, 260 unsigned long *_mask) 261 { 262 unsigned long pages = aligned_nrpages(start, end - start + 1); 263 unsigned long aligned_pages = __roundup_pow_of_two(pages); 264 unsigned long bitmask = aligned_pages - 1; 265 unsigned long mask = ilog2(aligned_pages); 266 unsigned long pfn = IOVA_PFN(start); 267 268 /* 269 * PSI masks the low order bits of the base address. If the 270 * address isn't aligned to the mask, then compute a mask value 271 * needed to ensure the target range is flushed. 272 */ 273 if (unlikely(bitmask & pfn)) { 274 unsigned long end_pfn = pfn + pages - 1, shared_bits; 275 276 /* 277 * Since end_pfn <= pfn + bitmask, the only way bits 278 * higher than bitmask can differ in pfn and end_pfn is 279 * by carrying. This means after masking out bitmask, 280 * high bits starting with the first set bit in 281 * shared_bits are all equal in both pfn and end_pfn. 282 */ 283 shared_bits = ~(pfn ^ end_pfn) & ~bitmask; 284 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; 285 aligned_pages = 1UL << mask; 286 } 287 288 *_pages = aligned_pages; 289 *_mask = mask; 290 291 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); 292 } 293 294 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) 295 { 296 if (!iommu || !batch->index) 297 return; 298 299 qi_submit_sync(iommu, batch->descs, batch->index, 0); 300 301 /* Reset the index value and clean the whole batch buffer. */ 302 memset(batch, 0, sizeof(*batch)); 303 } 304 305 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch) 306 { 307 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT) 308 qi_batch_flush_descs(iommu, batch); 309 } 310 311 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 312 unsigned int size_order, u64 type, 313 struct qi_batch *batch) 314 { 315 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]); 316 qi_batch_increment_index(iommu, batch); 317 } 318 319 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 320 u16 qdep, u64 addr, unsigned int mask, 321 struct qi_batch *batch) 322 { 323 /* 324 * According to VT-d spec, software is recommended to not submit any Device-TLB 325 * invalidation requests while address remapping hardware is disabled. 326 */ 327 if (!(iommu->gcmd & DMA_GCMD_TE)) 328 return; 329 330 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]); 331 qi_batch_increment_index(iommu, batch); 332 } 333 334 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, 335 u64 addr, unsigned long npages, bool ih, 336 struct qi_batch *batch) 337 { 338 /* 339 * npages == -1 means a PASID-selective invalidation, otherwise, 340 * a positive value for Page-selective-within-PASID invalidation. 341 * 0 is not a valid input. 342 */ 343 if (!npages) 344 return; 345 346 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]); 347 qi_batch_increment_index(iommu, batch); 348 } 349 350 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 351 u32 pasid, u16 qdep, u64 addr, 352 unsigned int size_order, struct qi_batch *batch) 353 { 354 /* 355 * According to VT-d spec, software is recommended to not submit any 356 * Device-TLB invalidation requests while address remapping hardware 357 * is disabled. 358 */ 359 if (!(iommu->gcmd & DMA_GCMD_TE)) 360 return; 361 362 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order, 363 &batch->descs[batch->index]); 364 qi_batch_increment_index(iommu, batch); 365 } 366 367 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, 368 unsigned long addr, unsigned long pages, 369 unsigned long mask, int ih) 370 { 371 struct intel_iommu *iommu = tag->iommu; 372 u64 type = DMA_TLB_PSI_FLUSH; 373 374 if (domain->use_first_level) { 375 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr, 376 pages, ih, domain->qi_batch); 377 return; 378 } 379 380 /* 381 * Fallback to domain selective flush if no PSI support or the size 382 * is too big. 383 */ 384 if (!cap_pgsel_inv(iommu->cap) || 385 mask > cap_max_amask_val(iommu->cap) || pages == -1) { 386 addr = 0; 387 mask = 0; 388 ih = 0; 389 type = DMA_TLB_DSI_FLUSH; 390 } 391 392 if (ecap_qis(iommu->ecap)) 393 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type, 394 domain->qi_batch); 395 else 396 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); 397 } 398 399 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, 400 unsigned long addr, unsigned long mask) 401 { 402 struct intel_iommu *iommu = tag->iommu; 403 struct device_domain_info *info; 404 u16 sid; 405 406 info = dev_iommu_priv_get(tag->dev); 407 sid = PCI_DEVID(info->bus, info->devfn); 408 409 if (tag->pasid == IOMMU_NO_PASID) { 410 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 411 addr, mask, domain->qi_batch); 412 if (info->dtlb_extra_inval) 413 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 414 addr, mask, domain->qi_batch); 415 return; 416 } 417 418 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 419 info->ats_qdep, addr, mask, domain->qi_batch); 420 if (info->dtlb_extra_inval) 421 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 422 info->ats_qdep, addr, mask, 423 domain->qi_batch); 424 } 425 426 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) 427 { 428 struct intel_iommu *iommu = tag->iommu; 429 struct device_domain_info *info; 430 u16 sid; 431 432 info = dev_iommu_priv_get(tag->dev); 433 sid = PCI_DEVID(info->bus, info->devfn); 434 435 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, 436 MAX_AGAW_PFN_WIDTH, domain->qi_batch); 437 if (info->dtlb_extra_inval) 438 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, 439 MAX_AGAW_PFN_WIDTH, domain->qi_batch); 440 } 441 442 /* 443 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) 444 * when the memory mappings in the target domain have been modified. 445 */ 446 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, 447 unsigned long end, int ih) 448 { 449 struct intel_iommu *iommu = NULL; 450 unsigned long pages, mask, addr; 451 struct cache_tag *tag; 452 unsigned long flags; 453 454 addr = calculate_psi_aligned_address(start, end, &pages, &mask); 455 456 spin_lock_irqsave(&domain->cache_lock, flags); 457 list_for_each_entry(tag, &domain->cache_tags, node) { 458 if (iommu && iommu != tag->iommu) 459 qi_batch_flush_descs(iommu, domain->qi_batch); 460 iommu = tag->iommu; 461 462 switch (tag->type) { 463 case CACHE_TAG_IOTLB: 464 case CACHE_TAG_NESTING_IOTLB: 465 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih); 466 break; 467 case CACHE_TAG_NESTING_DEVTLB: 468 /* 469 * Address translation cache in device side caches the 470 * result of nested translation. There is no easy way 471 * to identify the exact set of nested translations 472 * affected by a change in S2. So just flush the entire 473 * device cache. 474 */ 475 addr = 0; 476 mask = MAX_AGAW_PFN_WIDTH; 477 fallthrough; 478 case CACHE_TAG_DEVTLB: 479 cache_tag_flush_devtlb_psi(domain, tag, addr, mask); 480 break; 481 } 482 483 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask); 484 } 485 qi_batch_flush_descs(iommu, domain->qi_batch); 486 spin_unlock_irqrestore(&domain->cache_lock, flags); 487 } 488 489 /* 490 * Invalidates all ranges of IOVA when the memory mappings in the target 491 * domain have been modified. 492 */ 493 void cache_tag_flush_all(struct dmar_domain *domain) 494 { 495 struct intel_iommu *iommu = NULL; 496 struct cache_tag *tag; 497 unsigned long flags; 498 499 spin_lock_irqsave(&domain->cache_lock, flags); 500 list_for_each_entry(tag, &domain->cache_tags, node) { 501 if (iommu && iommu != tag->iommu) 502 qi_batch_flush_descs(iommu, domain->qi_batch); 503 iommu = tag->iommu; 504 505 switch (tag->type) { 506 case CACHE_TAG_IOTLB: 507 case CACHE_TAG_NESTING_IOTLB: 508 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); 509 break; 510 case CACHE_TAG_DEVTLB: 511 case CACHE_TAG_NESTING_DEVTLB: 512 cache_tag_flush_devtlb_all(domain, tag); 513 break; 514 } 515 516 trace_cache_tag_flush_all(tag); 517 } 518 qi_batch_flush_descs(iommu, domain->qi_batch); 519 spin_unlock_irqrestore(&domain->cache_lock, flags); 520 } 521 522 /* 523 * Invalidate a range of IOVA when new mappings are created in the target 524 * domain. 525 * 526 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as 527 * Set, any software updates to remapping structures other than first- 528 * stage mapping requires explicit invalidation of the caches. 529 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires 530 * write buffer flushing, software must explicitly perform write-buffer 531 * flushing, if cache invalidation is not required. 532 */ 533 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, 534 unsigned long end) 535 { 536 struct intel_iommu *iommu = NULL; 537 unsigned long pages, mask, addr; 538 struct cache_tag *tag; 539 unsigned long flags; 540 541 addr = calculate_psi_aligned_address(start, end, &pages, &mask); 542 543 spin_lock_irqsave(&domain->cache_lock, flags); 544 list_for_each_entry(tag, &domain->cache_tags, node) { 545 if (iommu && iommu != tag->iommu) 546 qi_batch_flush_descs(iommu, domain->qi_batch); 547 iommu = tag->iommu; 548 549 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { 550 iommu_flush_write_buffer(iommu); 551 continue; 552 } 553 554 if (tag->type == CACHE_TAG_IOTLB || 555 tag->type == CACHE_TAG_NESTING_IOTLB) 556 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0); 557 558 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); 559 } 560 qi_batch_flush_descs(iommu, domain->qi_batch); 561 spin_unlock_irqrestore(&domain->cache_lock, flags); 562 } 563