1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * cache.c - Intel VT-d cache invalidation 4 * 5 * Copyright (C) 2024 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/dmar.h> 13 #include <linux/iommu.h> 14 #include <linux/memory.h> 15 #include <linux/pci.h> 16 #include <linux/spinlock.h> 17 18 #include "iommu.h" 19 #include "pasid.h" 20 #include "trace.h" 21 22 /* Check if an existing cache tag can be reused for a new association. */ 23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, 24 struct intel_iommu *iommu, struct device *dev, 25 ioasid_t pasid, enum cache_tag_type type) 26 { 27 if (tag->type != type) 28 return false; 29 30 if (tag->domain_id != domain_id || tag->pasid != pasid) 31 return false; 32 33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB) 34 return tag->iommu == iommu; 35 36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 37 return tag->dev == dev; 38 39 return false; 40 } 41 42 /* Assign a cache tag with specified type to domain. */ 43 static int cache_tag_assign(struct dmar_domain *domain, u16 did, 44 struct device *dev, ioasid_t pasid, 45 enum cache_tag_type type) 46 { 47 struct device_domain_info *info = dev_iommu_priv_get(dev); 48 struct intel_iommu *iommu = info->iommu; 49 struct cache_tag *tag, *temp; 50 unsigned long flags; 51 52 tag = kzalloc(sizeof(*tag), GFP_KERNEL); 53 if (!tag) 54 return -ENOMEM; 55 56 tag->type = type; 57 tag->iommu = iommu; 58 tag->domain_id = did; 59 tag->pasid = pasid; 60 tag->users = 1; 61 62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 63 tag->dev = dev; 64 else 65 tag->dev = iommu->iommu.dev; 66 67 spin_lock_irqsave(&domain->cache_lock, flags); 68 list_for_each_entry(temp, &domain->cache_tags, node) { 69 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { 70 temp->users++; 71 spin_unlock_irqrestore(&domain->cache_lock, flags); 72 kfree(tag); 73 trace_cache_tag_assign(temp); 74 return 0; 75 } 76 } 77 list_add_tail(&tag->node, &domain->cache_tags); 78 spin_unlock_irqrestore(&domain->cache_lock, flags); 79 trace_cache_tag_assign(tag); 80 81 return 0; 82 } 83 84 /* Unassign a cache tag with specified type from domain. */ 85 static void cache_tag_unassign(struct dmar_domain *domain, u16 did, 86 struct device *dev, ioasid_t pasid, 87 enum cache_tag_type type) 88 { 89 struct device_domain_info *info = dev_iommu_priv_get(dev); 90 struct intel_iommu *iommu = info->iommu; 91 struct cache_tag *tag; 92 unsigned long flags; 93 94 spin_lock_irqsave(&domain->cache_lock, flags); 95 list_for_each_entry(tag, &domain->cache_tags, node) { 96 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) { 97 trace_cache_tag_unassign(tag); 98 if (--tag->users == 0) { 99 list_del(&tag->node); 100 kfree(tag); 101 } 102 break; 103 } 104 } 105 spin_unlock_irqrestore(&domain->cache_lock, flags); 106 } 107 108 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, 109 struct device *dev, ioasid_t pasid) 110 { 111 struct device_domain_info *info = dev_iommu_priv_get(dev); 112 int ret; 113 114 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 115 if (ret || !info->ats_enabled) 116 return ret; 117 118 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 119 if (ret) 120 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 121 122 return ret; 123 } 124 125 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did, 126 struct device *dev, ioasid_t pasid) 127 { 128 struct device_domain_info *info = dev_iommu_priv_get(dev); 129 130 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 131 132 if (info->ats_enabled) 133 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 134 } 135 136 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did, 137 struct device *dev, ioasid_t pasid) 138 { 139 struct device_domain_info *info = dev_iommu_priv_get(dev); 140 int ret; 141 142 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 143 if (ret || !info->ats_enabled) 144 return ret; 145 146 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 147 if (ret) 148 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 149 150 return ret; 151 } 152 153 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did, 154 struct device *dev, ioasid_t pasid) 155 { 156 struct device_domain_info *info = dev_iommu_priv_get(dev); 157 158 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 159 160 if (info->ats_enabled) 161 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 162 } 163 164 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev) 165 { 166 struct device_domain_info *info = dev_iommu_priv_get(dev); 167 struct intel_iommu *iommu = info->iommu; 168 169 /* 170 * The driver assigns different domain IDs for all domains except 171 * the SVA type. 172 */ 173 if (domain->domain.type == IOMMU_DOMAIN_SVA) 174 return FLPT_DEFAULT_DID; 175 176 return domain_id_iommu(domain, iommu); 177 } 178 179 /* 180 * Assign cache tags to a domain when it's associated with a device's 181 * PASID using a specific domain ID. 182 * 183 * On success (return value of 0), cache tags are created and added to the 184 * domain's cache tag list. On failure (negative return value), an error 185 * code is returned indicating the reason for the failure. 186 */ 187 int cache_tag_assign_domain(struct dmar_domain *domain, 188 struct device *dev, ioasid_t pasid) 189 { 190 u16 did = domain_get_id_for_dev(domain, dev); 191 int ret; 192 193 /* domain->qi_bach will be freed in iommu_free_domain() path. */ 194 if (!domain->qi_batch) { 195 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL); 196 if (!domain->qi_batch) 197 return -ENOMEM; 198 } 199 200 ret = __cache_tag_assign_domain(domain, did, dev, pasid); 201 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) 202 return ret; 203 204 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid); 205 if (ret) 206 __cache_tag_unassign_domain(domain, did, dev, pasid); 207 208 return ret; 209 } 210 211 /* 212 * Remove the cache tags associated with a device's PASID when the domain is 213 * detached from the device. 214 * 215 * The cache tags must be previously assigned to the domain by calling the 216 * assign interface. 217 */ 218 void cache_tag_unassign_domain(struct dmar_domain *domain, 219 struct device *dev, ioasid_t pasid) 220 { 221 u16 did = domain_get_id_for_dev(domain, dev); 222 223 __cache_tag_unassign_domain(domain, did, dev, pasid); 224 if (domain->domain.type == IOMMU_DOMAIN_NESTED) 225 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid); 226 } 227 228 static unsigned long calculate_psi_aligned_address(unsigned long start, 229 unsigned long end, 230 unsigned long *_pages, 231 unsigned long *_mask) 232 { 233 unsigned long pages = aligned_nrpages(start, end - start + 1); 234 unsigned long aligned_pages = __roundup_pow_of_two(pages); 235 unsigned long bitmask = aligned_pages - 1; 236 unsigned long mask = ilog2(aligned_pages); 237 unsigned long pfn = IOVA_PFN(start); 238 239 /* 240 * PSI masks the low order bits of the base address. If the 241 * address isn't aligned to the mask, then compute a mask value 242 * needed to ensure the target range is flushed. 243 */ 244 if (unlikely(bitmask & pfn)) { 245 unsigned long end_pfn = pfn + pages - 1, shared_bits; 246 247 /* 248 * Since end_pfn <= pfn + bitmask, the only way bits 249 * higher than bitmask can differ in pfn and end_pfn is 250 * by carrying. This means after masking out bitmask, 251 * high bits starting with the first set bit in 252 * shared_bits are all equal in both pfn and end_pfn. 253 */ 254 shared_bits = ~(pfn ^ end_pfn) & ~bitmask; 255 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; 256 aligned_pages = 1UL << mask; 257 } 258 259 *_pages = aligned_pages; 260 *_mask = mask; 261 262 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); 263 } 264 265 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) 266 { 267 if (!iommu || !batch->index) 268 return; 269 270 qi_submit_sync(iommu, batch->descs, batch->index, 0); 271 272 /* Reset the index value and clean the whole batch buffer. */ 273 memset(batch, 0, sizeof(*batch)); 274 } 275 276 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch) 277 { 278 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT) 279 qi_batch_flush_descs(iommu, batch); 280 } 281 282 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 283 unsigned int size_order, u64 type, 284 struct qi_batch *batch) 285 { 286 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]); 287 qi_batch_increment_index(iommu, batch); 288 } 289 290 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 291 u16 qdep, u64 addr, unsigned int mask, 292 struct qi_batch *batch) 293 { 294 /* 295 * According to VT-d spec, software is recommended to not submit any Device-TLB 296 * invalidation requests while address remapping hardware is disabled. 297 */ 298 if (!(iommu->gcmd & DMA_GCMD_TE)) 299 return; 300 301 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]); 302 qi_batch_increment_index(iommu, batch); 303 } 304 305 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, 306 u64 addr, unsigned long npages, bool ih, 307 struct qi_batch *batch) 308 { 309 /* 310 * npages == -1 means a PASID-selective invalidation, otherwise, 311 * a positive value for Page-selective-within-PASID invalidation. 312 * 0 is not a valid input. 313 */ 314 if (!npages) 315 return; 316 317 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]); 318 qi_batch_increment_index(iommu, batch); 319 } 320 321 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 322 u32 pasid, u16 qdep, u64 addr, 323 unsigned int size_order, struct qi_batch *batch) 324 { 325 /* 326 * According to VT-d spec, software is recommended to not submit any 327 * Device-TLB invalidation requests while address remapping hardware 328 * is disabled. 329 */ 330 if (!(iommu->gcmd & DMA_GCMD_TE)) 331 return; 332 333 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order, 334 &batch->descs[batch->index]); 335 qi_batch_increment_index(iommu, batch); 336 } 337 338 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, 339 unsigned long addr, unsigned long pages, 340 unsigned long mask, int ih) 341 { 342 struct intel_iommu *iommu = tag->iommu; 343 u64 type = DMA_TLB_PSI_FLUSH; 344 345 if (domain->use_first_level) { 346 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr, 347 pages, ih, domain->qi_batch); 348 return; 349 } 350 351 /* 352 * Fallback to domain selective flush if no PSI support or the size 353 * is too big. 354 */ 355 if (!cap_pgsel_inv(iommu->cap) || 356 mask > cap_max_amask_val(iommu->cap) || pages == -1) { 357 addr = 0; 358 mask = 0; 359 ih = 0; 360 type = DMA_TLB_DSI_FLUSH; 361 } 362 363 if (ecap_qis(iommu->ecap)) 364 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type, 365 domain->qi_batch); 366 else 367 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); 368 } 369 370 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, 371 unsigned long addr, unsigned long mask) 372 { 373 struct intel_iommu *iommu = tag->iommu; 374 struct device_domain_info *info; 375 u16 sid; 376 377 info = dev_iommu_priv_get(tag->dev); 378 sid = PCI_DEVID(info->bus, info->devfn); 379 380 if (tag->pasid == IOMMU_NO_PASID) { 381 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 382 addr, mask, domain->qi_batch); 383 if (info->dtlb_extra_inval) 384 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 385 addr, mask, domain->qi_batch); 386 return; 387 } 388 389 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 390 info->ats_qdep, addr, mask, domain->qi_batch); 391 if (info->dtlb_extra_inval) 392 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 393 info->ats_qdep, addr, mask, 394 domain->qi_batch); 395 } 396 397 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) 398 { 399 struct intel_iommu *iommu = tag->iommu; 400 struct device_domain_info *info; 401 u16 sid; 402 403 info = dev_iommu_priv_get(tag->dev); 404 sid = PCI_DEVID(info->bus, info->devfn); 405 406 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, 407 MAX_AGAW_PFN_WIDTH, domain->qi_batch); 408 if (info->dtlb_extra_inval) 409 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, 410 MAX_AGAW_PFN_WIDTH, domain->qi_batch); 411 } 412 413 /* 414 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) 415 * when the memory mappings in the target domain have been modified. 416 */ 417 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, 418 unsigned long end, int ih) 419 { 420 struct intel_iommu *iommu = NULL; 421 unsigned long pages, mask, addr; 422 struct cache_tag *tag; 423 unsigned long flags; 424 425 addr = calculate_psi_aligned_address(start, end, &pages, &mask); 426 427 spin_lock_irqsave(&domain->cache_lock, flags); 428 list_for_each_entry(tag, &domain->cache_tags, node) { 429 if (iommu && iommu != tag->iommu) 430 qi_batch_flush_descs(iommu, domain->qi_batch); 431 iommu = tag->iommu; 432 433 switch (tag->type) { 434 case CACHE_TAG_IOTLB: 435 case CACHE_TAG_NESTING_IOTLB: 436 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih); 437 break; 438 case CACHE_TAG_NESTING_DEVTLB: 439 /* 440 * Address translation cache in device side caches the 441 * result of nested translation. There is no easy way 442 * to identify the exact set of nested translations 443 * affected by a change in S2. So just flush the entire 444 * device cache. 445 */ 446 addr = 0; 447 mask = MAX_AGAW_PFN_WIDTH; 448 fallthrough; 449 case CACHE_TAG_DEVTLB: 450 cache_tag_flush_devtlb_psi(domain, tag, addr, mask); 451 break; 452 } 453 454 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask); 455 } 456 qi_batch_flush_descs(iommu, domain->qi_batch); 457 spin_unlock_irqrestore(&domain->cache_lock, flags); 458 } 459 460 /* 461 * Invalidates all ranges of IOVA when the memory mappings in the target 462 * domain have been modified. 463 */ 464 void cache_tag_flush_all(struct dmar_domain *domain) 465 { 466 struct intel_iommu *iommu = NULL; 467 struct cache_tag *tag; 468 unsigned long flags; 469 470 spin_lock_irqsave(&domain->cache_lock, flags); 471 list_for_each_entry(tag, &domain->cache_tags, node) { 472 if (iommu && iommu != tag->iommu) 473 qi_batch_flush_descs(iommu, domain->qi_batch); 474 iommu = tag->iommu; 475 476 switch (tag->type) { 477 case CACHE_TAG_IOTLB: 478 case CACHE_TAG_NESTING_IOTLB: 479 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); 480 break; 481 case CACHE_TAG_DEVTLB: 482 case CACHE_TAG_NESTING_DEVTLB: 483 cache_tag_flush_devtlb_all(domain, tag); 484 break; 485 } 486 487 trace_cache_tag_flush_all(tag); 488 } 489 qi_batch_flush_descs(iommu, domain->qi_batch); 490 spin_unlock_irqrestore(&domain->cache_lock, flags); 491 } 492 493 /* 494 * Invalidate a range of IOVA when new mappings are created in the target 495 * domain. 496 * 497 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as 498 * Set, any software updates to remapping structures other than first- 499 * stage mapping requires explicit invalidation of the caches. 500 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires 501 * write buffer flushing, software must explicitly perform write-buffer 502 * flushing, if cache invalidation is not required. 503 */ 504 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, 505 unsigned long end) 506 { 507 struct intel_iommu *iommu = NULL; 508 unsigned long pages, mask, addr; 509 struct cache_tag *tag; 510 unsigned long flags; 511 512 addr = calculate_psi_aligned_address(start, end, &pages, &mask); 513 514 spin_lock_irqsave(&domain->cache_lock, flags); 515 list_for_each_entry(tag, &domain->cache_tags, node) { 516 if (iommu && iommu != tag->iommu) 517 qi_batch_flush_descs(iommu, domain->qi_batch); 518 iommu = tag->iommu; 519 520 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { 521 iommu_flush_write_buffer(iommu); 522 continue; 523 } 524 525 if (tag->type == CACHE_TAG_IOTLB || 526 tag->type == CACHE_TAG_NESTING_IOTLB) 527 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0); 528 529 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask); 530 } 531 qi_batch_flush_descs(iommu, domain->qi_batch); 532 spin_unlock_irqrestore(&domain->cache_lock, flags); 533 } 534