1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * cache.c - Intel VT-d cache invalidation 4 * 5 * Copyright (C) 2024 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/dmar.h> 13 #include <linux/iommu.h> 14 #include <linux/memory.h> 15 #include <linux/pci.h> 16 #include <linux/spinlock.h> 17 18 #include "iommu.h" 19 #include "pasid.h" 20 #include "trace.h" 21 22 /* Check if an existing cache tag can be reused for a new association. */ 23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, 24 struct intel_iommu *iommu, struct device *dev, 25 ioasid_t pasid, enum cache_tag_type type) 26 { 27 if (tag->type != type) 28 return false; 29 30 if (tag->domain_id != domain_id || tag->pasid != pasid) 31 return false; 32 33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB) 34 return tag->iommu == iommu; 35 36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 37 return tag->dev == dev; 38 39 return false; 40 } 41 42 /* Assign a cache tag with specified type to domain. */ 43 static int cache_tag_assign(struct dmar_domain *domain, u16 did, 44 struct device *dev, ioasid_t pasid, 45 enum cache_tag_type type) 46 { 47 struct device_domain_info *info = dev_iommu_priv_get(dev); 48 struct intel_iommu *iommu = info->iommu; 49 struct cache_tag *tag, *temp; 50 unsigned long flags; 51 52 tag = kzalloc(sizeof(*tag), GFP_KERNEL); 53 if (!tag) 54 return -ENOMEM; 55 56 tag->type = type; 57 tag->iommu = iommu; 58 tag->domain_id = did; 59 tag->pasid = pasid; 60 tag->users = 1; 61 62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 63 tag->dev = dev; 64 else 65 tag->dev = iommu->iommu.dev; 66 67 spin_lock_irqsave(&domain->cache_lock, flags); 68 list_for_each_entry(temp, &domain->cache_tags, node) { 69 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { 70 temp->users++; 71 spin_unlock_irqrestore(&domain->cache_lock, flags); 72 kfree(tag); 73 trace_cache_tag_assign(temp); 74 return 0; 75 } 76 } 77 list_add_tail(&tag->node, &domain->cache_tags); 78 spin_unlock_irqrestore(&domain->cache_lock, flags); 79 trace_cache_tag_assign(tag); 80 81 return 0; 82 } 83 84 /* Unassign a cache tag with specified type from domain. */ 85 static void cache_tag_unassign(struct dmar_domain *domain, u16 did, 86 struct device *dev, ioasid_t pasid, 87 enum cache_tag_type type) 88 { 89 struct device_domain_info *info = dev_iommu_priv_get(dev); 90 struct intel_iommu *iommu = info->iommu; 91 struct cache_tag *tag; 92 unsigned long flags; 93 94 spin_lock_irqsave(&domain->cache_lock, flags); 95 list_for_each_entry(tag, &domain->cache_tags, node) { 96 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) { 97 trace_cache_tag_unassign(tag); 98 if (--tag->users == 0) { 99 list_del(&tag->node); 100 kfree(tag); 101 } 102 break; 103 } 104 } 105 spin_unlock_irqrestore(&domain->cache_lock, flags); 106 } 107 108 /* domain->qi_batch will be freed in iommu_free_domain() path. */ 109 static int domain_qi_batch_alloc(struct dmar_domain *domain) 110 { 111 unsigned long flags; 112 int ret = 0; 113 114 spin_lock_irqsave(&domain->cache_lock, flags); 115 if (domain->qi_batch) 116 goto out_unlock; 117 118 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC); 119 if (!domain->qi_batch) 120 ret = -ENOMEM; 121 out_unlock: 122 spin_unlock_irqrestore(&domain->cache_lock, flags); 123 124 return ret; 125 } 126 127 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, 128 struct device *dev, ioasid_t pasid) 129 { 130 struct device_domain_info *info = dev_iommu_priv_get(dev); 131 int ret; 132 133 ret = domain_qi_batch_alloc(domain); 134 if (ret) 135 return ret; 136 137 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 138 if (ret || !info->ats_enabled) 139 return ret; 140 141 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 142 if (ret) 143 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 144 145 return ret; 146 } 147 148 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did, 149 struct device *dev, ioasid_t pasid) 150 { 151 struct device_domain_info *info = dev_iommu_priv_get(dev); 152 153 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 154 155 if (info->ats_enabled) 156 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 157 } 158 159 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did, 160 struct device *dev, ioasid_t pasid) 161 { 162 struct device_domain_info *info = dev_iommu_priv_get(dev); 163 int ret; 164 165 ret = domain_qi_batch_alloc(domain); 166 if (ret) 167 return ret; 168 169 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 170 if (ret || !info->ats_enabled) 171 return ret; 172 173 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 174 if (ret) 175 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 176 177 return ret; 178 } 179 180 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did, 181 struct device *dev, ioasid_t pasid) 182 { 183 struct device_domain_info *info = dev_iommu_priv_get(dev); 184 185 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 186 187 if (info->ats_enabled) 188 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 189 } 190 191 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev) 192 { 193 struct device_domain_info *info = dev_iommu_priv_get(dev); 194 struct intel_iommu *iommu = info->iommu; 195 196 /* 197 * The driver assigns different domain IDs for all domains except 198 * the SVA type. 199 */ 200 if (domain->domain.type == IOMMU_DOMAIN_SVA) 201 return FLPT_DEFAULT_DID; 202 203 return domain_id_iommu(domain, iommu); 204 } 205 206 /* 207 * Assign cache tags to a domain when it's associated with a device's 208 * PASID using a specific domain ID. 209 * 210 * On success (return value of 0), cache tags are created and added to the 211 * domain's cache tag list. On failure (negative return value), an error 212 * code is returned indicating the reason for the failure. 213 */ 214 int cache_tag_assign_domain(struct dmar_domain *domain, 215 struct device *dev, ioasid_t pasid) 216 { 217 u16 did = domain_get_id_for_dev(domain, dev); 218 int ret; 219 220 ret = __cache_tag_assign_domain(domain, did, dev, pasid); 221 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) 222 return ret; 223 224 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid); 225 if (ret) 226 __cache_tag_unassign_domain(domain, did, dev, pasid); 227 228 return ret; 229 } 230 231 /* 232 * Remove the cache tags associated with a device's PASID when the domain is 233 * detached from the device. 234 * 235 * The cache tags must be previously assigned to the domain by calling the 236 * assign interface. 237 */ 238 void cache_tag_unassign_domain(struct dmar_domain *domain, 239 struct device *dev, ioasid_t pasid) 240 { 241 u16 did = domain_get_id_for_dev(domain, dev); 242 243 __cache_tag_unassign_domain(domain, did, dev, pasid); 244 if (domain->domain.type == IOMMU_DOMAIN_NESTED) 245 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid); 246 } 247 248 static unsigned long calculate_psi_aligned_address(unsigned long start, 249 unsigned long end, 250 unsigned long *_pages, 251 unsigned long *_mask) 252 { 253 unsigned long pages = aligned_nrpages(start, end - start + 1); 254 unsigned long aligned_pages = __roundup_pow_of_two(pages); 255 unsigned long bitmask = aligned_pages - 1; 256 unsigned long mask = ilog2(aligned_pages); 257 unsigned long pfn = IOVA_PFN(start); 258 259 /* 260 * PSI masks the low order bits of the base address. If the 261 * address isn't aligned to the mask, then compute a mask value 262 * needed to ensure the target range is flushed. 263 */ 264 if (unlikely(bitmask & pfn)) { 265 unsigned long end_pfn = pfn + pages - 1, shared_bits; 266 267 /* 268 * Since end_pfn <= pfn + bitmask, the only way bits 269 * higher than bitmask can differ in pfn and end_pfn is 270 * by carrying. This means after masking out bitmask, 271 * high bits starting with the first set bit in 272 * shared_bits are all equal in both pfn and end_pfn. 273 */ 274 shared_bits = ~(pfn ^ end_pfn) & ~bitmask; 275 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; 276 aligned_pages = 1UL << mask; 277 } 278 279 *_pages = aligned_pages; 280 *_mask = mask; 281 282 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); 283 } 284 285 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) 286 { 287 if (!iommu || !batch->index) 288 return; 289 290 qi_submit_sync(iommu, batch->descs, batch->index, 0); 291 292 /* Reset the index value and clean the whole batch buffer. */ 293 memset(batch, 0, sizeof(*batch)); 294 } 295 296 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch) 297 { 298 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT) 299 qi_batch_flush_descs(iommu, batch); 300 } 301 302 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 303 unsigned int size_order, u64 type, 304 struct qi_batch *batch) 305 { 306 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]); 307 qi_batch_increment_index(iommu, batch); 308 } 309 310 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 311 u16 qdep, u64 addr, unsigned int mask, 312 struct qi_batch *batch) 313 { 314 /* 315 * According to VT-d spec, software is recommended to not submit any Device-TLB 316 * invalidation requests while address remapping hardware is disabled. 317 */ 318 if (!(iommu->gcmd & DMA_GCMD_TE)) 319 return; 320 321 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]); 322 qi_batch_increment_index(iommu, batch); 323 } 324 325 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, 326 u64 addr, unsigned long npages, bool ih, 327 struct qi_batch *batch) 328 { 329 /* 330 * npages == -1 means a PASID-selective invalidation, otherwise, 331 * a positive value for Page-selective-within-PASID invalidation. 332 * 0 is not a valid input. 333 */ 334 if (!npages) 335 return; 336 337 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]); 338 qi_batch_increment_index(iommu, batch); 339 } 340 341 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 342 u32 pasid, u16 qdep, u64 addr, 343 unsigned int size_order, struct qi_batch *batch) 344 { 345 /* 346 * According to VT-d spec, software is recommended to not submit any 347 * Device-TLB invalidation requests while address remapping hardware 348 * is disabled. 349 */ 350 if (!(iommu->gcmd & DMA_GCMD_TE)) 351 return; 352 353 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order, 354 &batch->descs[batch->index]); 355 qi_batch_increment_index(iommu, batch); 356 } 357 358 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, 359 unsigned long addr, unsigned long pages, 360 unsigned long mask, int ih) 361 { 362 struct intel_iommu *iommu = tag->iommu; 363 u64 type = DMA_TLB_PSI_FLUSH; 364 365 if (domain->use_first_level) { 366 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr, 367 pages, ih, domain->qi_batch); 368 return; 369 } 370 371 /* 372 * Fallback to domain selective flush if no PSI support or the size 373 * is too big. 374 */ 375 if (!cap_pgsel_inv(iommu->cap) || 376 mask > cap_max_amask_val(iommu->cap) || pages == -1) { 377 addr = 0; 378 mask = 0; 379 ih = 0; 380 type = DMA_TLB_DSI_FLUSH; 381 } 382 383 if (ecap_qis(iommu->ecap)) 384 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type, 385 domain->qi_batch); 386 else 387 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); 388 } 389 390 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, 391 unsigned long addr, unsigned long mask) 392 { 393 struct intel_iommu *iommu = tag->iommu; 394 struct device_domain_info *info; 395 u16 sid; 396 397 info = dev_iommu_priv_get(tag->dev); 398 sid = PCI_DEVID(info->bus, info->devfn); 399 400 if (tag->pasid == IOMMU_NO_PASID) { 401 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 402 addr, mask, domain->qi_batch); 403 if (info->dtlb_extra_inval) 404 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 405 addr, mask, domain->qi_batch); 406 return; 407 } 408 409 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 410 info->ats_qdep, addr, mask, domain->qi_batch); 411 if (info->dtlb_extra_inval) 412 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 413 info->ats_qdep, addr, mask, 414 domain->qi_batch); 415 } 416 417 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) 418 { 419 struct intel_iommu *iommu = tag->iommu; 420 struct device_domain_info *info; 421 u16 sid; 422 423 info = dev_iommu_priv_get(tag->dev); 424 sid = PCI_DEVID(info->bus, info->devfn); 425 426 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, 427 MAX_AGAW_PFN_WIDTH, domain->qi_batch); 428 if (info->dtlb_extra_inval) 429 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, 430 MAX_AGAW_PFN_WIDTH, domain->qi_batch); 431 } 432 433 /* 434 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) 435 * when the memory mappings in the target domain have been modified. 436 */ 437 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, 438 unsigned long end, int ih) 439 { 440 struct intel_iommu *iommu = NULL; 441 unsigned long pages, mask, addr; 442 struct cache_tag *tag; 443 unsigned long flags; 444 445 addr = calculate_psi_aligned_address(start, end, &pages, &mask); 446 447 spin_lock_irqsave(&domain->cache_lock, flags); 448 list_for_each_entry(tag, &domain->cache_tags, node) { 449 if (iommu && iommu != tag->iommu) 450 qi_batch_flush_descs(iommu, domain->qi_batch); 451 iommu = tag->iommu; 452 453 switch (tag->type) { 454 case CACHE_TAG_IOTLB: 455 case CACHE_TAG_NESTING_IOTLB: 456 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih); 457 break; 458 case CACHE_TAG_NESTING_DEVTLB: 459 /* 460 * Address translation cache in device side caches the 461 * result of nested translation. There is no easy way 462 * to identify the exact set of nested translations 463 * affected by a change in S2. So just flush the entire 464 * device cache. 465 */ 466 addr = 0; 467 mask = MAX_AGAW_PFN_WIDTH; 468 fallthrough; 469 case CACHE_TAG_DEVTLB: 470 cache_tag_flush_devtlb_psi(domain, tag, addr, mask); 471 break; 472 } 473 474 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask); 475 } 476 qi_batch_flush_descs(iommu, domain->qi_batch); 477 spin_unlock_irqrestore(&domain->cache_lock, flags); 478 } 479 480 /* 481 * Invalidates all ranges of IOVA when the memory mappings in the target 482 * domain have been modified. 483 */ 484 void cache_tag_flush_all(struct dmar_domain *domain) 485 { 486 struct intel_iommu *iommu = NULL; 487 struct cache_tag *tag; 488 unsigned long flags; 489 490 spin_lock_irqsave(&domain->cache_lock, flags); 491 list_for_each_entry(tag, &domain->cache_tags, node) { 492 if (iommu && iommu != tag->iommu) 493 qi_batch_flush_descs(iommu, domain->qi_batch); 494 iommu = tag->iommu; 495 496 switch (tag->type) { 497 case CACHE_TAG_IOTLB: 498 case CACHE_TAG_NESTING_IOTLB: 499 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); 500 break; 501 case CACHE_TAG_DEVTLB: 502 case CACHE_TAG_NESTING_DEVTLB: 503 cache_tag_flush_devtlb_all(domain, tag); 504 break; 505 } 506 507 trace_cache_tag_flush_all(tag); 508 } 509 qi_batch_flush_descs(iommu, domain->qi_batch); 510 spin_unlock_irqrestore(&domain->cache_lock, flags); 511 } 512 513 /* 514 * Invalidate a range of IOVA when new mappings are created in the target 515 * domain. 516 * 517 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as 518 * Set, any software updates to remapping structures other than first- 519 * stage mapping requires explicit invalidation of the caches. 520 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires 521 * write buffer flushing, software must explicitly perform write-buffer 522 * flushing, if cache invalidation is not required. 523 */ 524 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, 525 unsigned long end) 526 { 527 struct intel_iommu *iommu = NULL; 528 unsigned long pages, mask, addr; 529 struct cache_tag *tag; 530 unsigned long flags; 531 532 addr = calculate_psi_aligned_address(start, end, &pages, &mask); 533 534 spin_lock_irqsave(&domain->cache_lock, flags); 535 list_for_each_entry(tag, &domain->cache_tags, node) { 536 if (iommu && iommu != tag->iommu) 537 qi_batch_flush_descs(iommu, domain->qi_batch); 538 iommu = tag->iommu; 539 540 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { 541 iommu_flush_write_buffer(iommu); 542 continue; 543 } 544 545 if (tag->type == CACHE_TAG_IOTLB || 546 tag->type == CACHE_TAG_NESTING_IOTLB) 547 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0); 548 549 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); 550 } 551 qi_batch_flush_descs(iommu, domain->qi_batch); 552 spin_unlock_irqrestore(&domain->cache_lock, flags); 553 } 554