1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * cache.c - Intel VT-d cache invalidation 4 * 5 * Copyright (C) 2024 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/dmar.h> 13 #include <linux/iommu.h> 14 #include <linux/memory.h> 15 #include <linux/pci.h> 16 #include <linux/spinlock.h> 17 18 #include "iommu.h" 19 #include "pasid.h" 20 #include "trace.h" 21 22 /* Check if an existing cache tag can be reused for a new association. */ 23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, 24 struct intel_iommu *iommu, struct device *dev, 25 ioasid_t pasid, enum cache_tag_type type) 26 { 27 if (tag->type != type) 28 return false; 29 30 if (tag->domain_id != domain_id || tag->pasid != pasid) 31 return false; 32 33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB) 34 return tag->iommu == iommu; 35 36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 37 return tag->dev == dev; 38 39 return false; 40 } 41 42 /* Assign a cache tag with specified type to domain. */ 43 int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, 44 ioasid_t pasid, enum cache_tag_type type) 45 { 46 struct device_domain_info *info = dev_iommu_priv_get(dev); 47 struct intel_iommu *iommu = info->iommu; 48 struct cache_tag *tag, *temp; 49 struct list_head *prev; 50 unsigned long flags; 51 52 tag = kzalloc_obj(*tag); 53 if (!tag) 54 return -ENOMEM; 55 56 tag->type = type; 57 tag->iommu = iommu; 58 tag->domain_id = did; 59 tag->pasid = pasid; 60 tag->users = 1; 61 62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB) 63 tag->dev = dev; 64 else 65 tag->dev = iommu->iommu.dev; 66 67 spin_lock_irqsave(&domain->cache_lock, flags); 68 prev = &domain->cache_tags; 69 list_for_each_entry(temp, &domain->cache_tags, node) { 70 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) { 71 temp->users++; 72 spin_unlock_irqrestore(&domain->cache_lock, flags); 73 kfree(tag); 74 trace_cache_tag_assign(temp); 75 return 0; 76 } 77 if (temp->iommu == iommu) 78 prev = &temp->node; 79 } 80 /* 81 * Link cache tags of same iommu unit together, so corresponding 82 * flush ops can be batched for iommu unit. 83 */ 84 list_add(&tag->node, prev); 85 86 spin_unlock_irqrestore(&domain->cache_lock, flags); 87 trace_cache_tag_assign(tag); 88 89 return 0; 90 } 91 92 /* Unassign a cache tag with specified type from domain. */ 93 static void cache_tag_unassign(struct dmar_domain *domain, u16 did, 94 struct device *dev, ioasid_t pasid, 95 enum cache_tag_type type) 96 { 97 struct device_domain_info *info = dev_iommu_priv_get(dev); 98 struct intel_iommu *iommu = info->iommu; 99 struct cache_tag *tag; 100 unsigned long flags; 101 102 spin_lock_irqsave(&domain->cache_lock, flags); 103 list_for_each_entry(tag, &domain->cache_tags, node) { 104 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) { 105 trace_cache_tag_unassign(tag); 106 if (--tag->users == 0) { 107 list_del(&tag->node); 108 kfree(tag); 109 } 110 break; 111 } 112 } 113 spin_unlock_irqrestore(&domain->cache_lock, flags); 114 } 115 116 /* domain->qi_batch will be freed in iommu_free_domain() path. */ 117 static int domain_qi_batch_alloc(struct dmar_domain *domain) 118 { 119 unsigned long flags; 120 int ret = 0; 121 122 spin_lock_irqsave(&domain->cache_lock, flags); 123 if (domain->qi_batch) 124 goto out_unlock; 125 126 domain->qi_batch = kzalloc_obj(*domain->qi_batch, GFP_ATOMIC); 127 if (!domain->qi_batch) 128 ret = -ENOMEM; 129 out_unlock: 130 spin_unlock_irqrestore(&domain->cache_lock, flags); 131 132 return ret; 133 } 134 135 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, 136 struct device *dev, ioasid_t pasid) 137 { 138 struct device_domain_info *info = dev_iommu_priv_get(dev); 139 int ret; 140 141 ret = domain_qi_batch_alloc(domain); 142 if (ret) 143 return ret; 144 145 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 146 if (ret || !info->ats_enabled) 147 return ret; 148 149 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 150 if (ret) 151 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 152 153 return ret; 154 } 155 156 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did, 157 struct device *dev, ioasid_t pasid) 158 { 159 struct device_domain_info *info = dev_iommu_priv_get(dev); 160 161 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB); 162 163 if (info->ats_enabled) 164 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB); 165 } 166 167 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did, 168 struct device *dev, ioasid_t pasid) 169 { 170 struct device_domain_info *info = dev_iommu_priv_get(dev); 171 int ret; 172 173 ret = domain_qi_batch_alloc(domain); 174 if (ret) 175 return ret; 176 177 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 178 if (ret || !info->ats_enabled) 179 return ret; 180 181 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 182 if (ret) 183 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 184 185 return ret; 186 } 187 188 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did, 189 struct device *dev, ioasid_t pasid) 190 { 191 struct device_domain_info *info = dev_iommu_priv_get(dev); 192 193 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); 194 195 if (info->ats_enabled) 196 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB); 197 } 198 199 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev) 200 { 201 struct device_domain_info *info = dev_iommu_priv_get(dev); 202 struct intel_iommu *iommu = info->iommu; 203 204 /* 205 * The driver assigns different domain IDs for all domains except 206 * the SVA type. 207 */ 208 if (domain->domain.type == IOMMU_DOMAIN_SVA) 209 return FLPT_DEFAULT_DID; 210 211 return domain_id_iommu(domain, iommu); 212 } 213 214 /* 215 * Assign cache tags to a domain when it's associated with a device's 216 * PASID using a specific domain ID. 217 * 218 * On success (return value of 0), cache tags are created and added to the 219 * domain's cache tag list. On failure (negative return value), an error 220 * code is returned indicating the reason for the failure. 221 */ 222 int cache_tag_assign_domain(struct dmar_domain *domain, 223 struct device *dev, ioasid_t pasid) 224 { 225 u16 did = domain_get_id_for_dev(domain, dev); 226 int ret; 227 228 ret = __cache_tag_assign_domain(domain, did, dev, pasid); 229 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) 230 return ret; 231 232 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid); 233 if (ret) 234 __cache_tag_unassign_domain(domain, did, dev, pasid); 235 236 return ret; 237 } 238 239 /* 240 * Remove the cache tags associated with a device's PASID when the domain is 241 * detached from the device. 242 * 243 * The cache tags must be previously assigned to the domain by calling the 244 * assign interface. 245 */ 246 void cache_tag_unassign_domain(struct dmar_domain *domain, 247 struct device *dev, ioasid_t pasid) 248 { 249 u16 did = domain_get_id_for_dev(domain, dev); 250 251 __cache_tag_unassign_domain(domain, did, dev, pasid); 252 if (domain->domain.type == IOMMU_DOMAIN_NESTED) 253 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid); 254 } 255 256 static unsigned long calculate_psi_aligned_address(unsigned long start, 257 unsigned long end, 258 unsigned long *_mask) 259 { 260 unsigned long pages = aligned_nrpages(start, end - start + 1); 261 unsigned long aligned_pages = __roundup_pow_of_two(pages); 262 unsigned long bitmask = aligned_pages - 1; 263 unsigned long mask = ilog2(aligned_pages); 264 unsigned long pfn = IOVA_PFN(start); 265 266 /* 267 * PSI masks the low order bits of the base address. If the 268 * address isn't aligned to the mask, then compute a mask value 269 * needed to ensure the target range is flushed. 270 */ 271 if (unlikely(bitmask & pfn)) { 272 unsigned long end_pfn = pfn + pages - 1, shared_bits; 273 274 /* 275 * Since end_pfn <= pfn + bitmask, the only way bits 276 * higher than bitmask can differ in pfn and end_pfn is 277 * by carrying. This means after masking out bitmask, 278 * high bits starting with the first set bit in 279 * shared_bits are all equal in both pfn and end_pfn. 280 */ 281 shared_bits = ~(pfn ^ end_pfn) & ~bitmask; 282 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; 283 } 284 285 *_mask = mask; 286 287 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); 288 } 289 290 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) 291 { 292 if (!iommu || !batch->index) 293 return; 294 295 qi_submit_sync(iommu, batch->descs, batch->index, 0); 296 297 /* Reset the index value and clean the whole batch buffer. */ 298 memset(batch, 0, sizeof(*batch)); 299 } 300 301 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch) 302 { 303 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT) 304 qi_batch_flush_descs(iommu, batch); 305 } 306 307 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, 308 unsigned int size_order, u64 type, 309 struct qi_batch *batch) 310 { 311 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]); 312 qi_batch_increment_index(iommu, batch); 313 } 314 315 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 316 u16 qdep, u64 addr, unsigned int mask, 317 struct qi_batch *batch) 318 { 319 /* 320 * According to VT-d spec, software is recommended to not submit any Device-TLB 321 * invalidation requests while address remapping hardware is disabled. 322 */ 323 if (!(iommu->gcmd & DMA_GCMD_TE)) 324 return; 325 326 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]); 327 qi_batch_increment_index(iommu, batch); 328 } 329 330 static void qi_batch_add_piotlb_all(struct intel_iommu *iommu, u16 did, 331 u32 pasid, struct qi_batch *batch) 332 { 333 qi_desc_piotlb_all(did, pasid, &batch->descs[batch->index]); 334 qi_batch_increment_index(iommu, batch); 335 } 336 337 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, 338 u64 addr, unsigned int size_order, bool ih, 339 struct qi_batch *batch) 340 { 341 qi_desc_piotlb(did, pasid, addr, size_order, ih, 342 &batch->descs[batch->index]); 343 qi_batch_increment_index(iommu, batch); 344 } 345 346 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, 347 u32 pasid, u16 qdep, u64 addr, 348 unsigned int size_order, struct qi_batch *batch) 349 { 350 /* 351 * According to VT-d spec, software is recommended to not submit any 352 * Device-TLB invalidation requests while address remapping hardware 353 * is disabled. 354 */ 355 if (!(iommu->gcmd & DMA_GCMD_TE)) 356 return; 357 358 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order, 359 &batch->descs[batch->index]); 360 qi_batch_increment_index(iommu, batch); 361 } 362 363 static bool intel_domain_use_piotlb(struct dmar_domain *domain) 364 { 365 return domain->domain.type == IOMMU_DOMAIN_SVA || 366 domain->domain.type == IOMMU_DOMAIN_NESTED || 367 intel_domain_is_fs_paging(domain); 368 } 369 370 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag, 371 unsigned long addr, unsigned long mask, int ih) 372 { 373 struct intel_iommu *iommu = tag->iommu; 374 u64 type = DMA_TLB_PSI_FLUSH; 375 376 if (intel_domain_use_piotlb(domain)) { 377 if (mask >= MAX_AGAW_PFN_WIDTH) 378 qi_batch_add_piotlb_all(iommu, tag->domain_id, 379 tag->pasid, domain->qi_batch); 380 else 381 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, 382 addr, mask, ih, domain->qi_batch); 383 return; 384 } 385 386 /* 387 * Fallback to domain selective flush if no PSI support or the size 388 * is too big. 389 */ 390 if (!cap_pgsel_inv(iommu->cap) || 391 mask > cap_max_amask_val(iommu->cap)) { 392 addr = 0; 393 mask = 0; 394 ih = 0; 395 type = DMA_TLB_DSI_FLUSH; 396 } 397 398 if (ecap_qis(iommu->ecap)) 399 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type, 400 domain->qi_batch); 401 else 402 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type); 403 } 404 405 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag, 406 unsigned long addr, unsigned long mask) 407 { 408 struct intel_iommu *iommu = tag->iommu; 409 struct device_domain_info *info; 410 u16 sid; 411 412 info = dev_iommu_priv_get(tag->dev); 413 sid = PCI_DEVID(info->bus, info->devfn); 414 415 if (tag->pasid == IOMMU_NO_PASID) { 416 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 417 addr, mask, domain->qi_batch); 418 if (info->dtlb_extra_inval) 419 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 420 addr, mask, domain->qi_batch); 421 return; 422 } 423 424 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 425 info->ats_qdep, addr, mask, domain->qi_batch); 426 if (info->dtlb_extra_inval) 427 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid, 428 info->ats_qdep, addr, mask, 429 domain->qi_batch); 430 } 431 432 /* 433 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) 434 * when the memory mappings in the target domain have been modified. 435 */ 436 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, 437 unsigned long end, int ih) 438 { 439 struct intel_iommu *iommu = NULL; 440 unsigned long mask, addr; 441 struct cache_tag *tag; 442 unsigned long flags; 443 444 if (start == 0 && end == ULONG_MAX) { 445 addr = 0; 446 mask = MAX_AGAW_PFN_WIDTH; 447 } else { 448 addr = calculate_psi_aligned_address(start, end, &mask); 449 } 450 451 spin_lock_irqsave(&domain->cache_lock, flags); 452 list_for_each_entry(tag, &domain->cache_tags, node) { 453 if (iommu && iommu != tag->iommu) 454 qi_batch_flush_descs(iommu, domain->qi_batch); 455 iommu = tag->iommu; 456 457 switch (tag->type) { 458 case CACHE_TAG_IOTLB: 459 case CACHE_TAG_NESTING_IOTLB: 460 cache_tag_flush_iotlb(domain, tag, addr, mask, ih); 461 break; 462 case CACHE_TAG_NESTING_DEVTLB: 463 /* 464 * Address translation cache in device side caches the 465 * result of nested translation. There is no easy way 466 * to identify the exact set of nested translations 467 * affected by a change in S2. So just flush the entire 468 * device cache. 469 */ 470 addr = 0; 471 mask = MAX_AGAW_PFN_WIDTH; 472 fallthrough; 473 case CACHE_TAG_DEVTLB: 474 cache_tag_flush_devtlb_psi(domain, tag, addr, mask); 475 break; 476 } 477 478 trace_cache_tag_flush_range(tag, start, end, addr, mask); 479 } 480 qi_batch_flush_descs(iommu, domain->qi_batch); 481 spin_unlock_irqrestore(&domain->cache_lock, flags); 482 } 483 484 /* 485 * Invalidates all ranges of IOVA when the memory mappings in the target 486 * domain have been modified. 487 */ 488 void cache_tag_flush_all(struct dmar_domain *domain) 489 { 490 cache_tag_flush_range(domain, 0, ULONG_MAX, 0); 491 } 492 493 /* 494 * Invalidate a range of IOVA when new mappings are created in the target 495 * domain. 496 * 497 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as 498 * Set, any software updates to remapping structures other than first- 499 * stage mapping requires explicit invalidation of the caches. 500 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires 501 * write buffer flushing, software must explicitly perform write-buffer 502 * flushing, if cache invalidation is not required. 503 */ 504 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, 505 unsigned long end) 506 { 507 struct intel_iommu *iommu = NULL; 508 unsigned long mask, addr; 509 struct cache_tag *tag; 510 unsigned long flags; 511 512 addr = calculate_psi_aligned_address(start, end, &mask); 513 514 spin_lock_irqsave(&domain->cache_lock, flags); 515 list_for_each_entry(tag, &domain->cache_tags, node) { 516 if (iommu && iommu != tag->iommu) 517 qi_batch_flush_descs(iommu, domain->qi_batch); 518 iommu = tag->iommu; 519 520 if (!cap_caching_mode(iommu->cap) || 521 intel_domain_is_fs_paging(domain)) { 522 iommu_flush_write_buffer(iommu); 523 continue; 524 } 525 526 if (tag->type == CACHE_TAG_IOTLB || 527 tag->type == CACHE_TAG_NESTING_IOTLB) 528 cache_tag_flush_iotlb(domain, tag, addr, mask, 0); 529 530 trace_cache_tag_flush_range_np(tag, start, end, addr, mask); 531 } 532 qi_batch_flush_descs(iommu, domain->qi_batch); 533 spin_unlock_irqrestore(&domain->cache_lock, flags); 534 } 535