1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * cache.c - Intel VT-d cache invalidation
4 *
5 * Copyright (C) 2024 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10 #define pr_fmt(fmt) "DMAR: " fmt
11
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21
22 /* Check if an existing cache tag can be reused for a new association. */
cache_tage_match(struct cache_tag * tag,u16 domain_id,struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,enum cache_tag_type type)23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 struct intel_iommu *iommu, struct device *dev,
25 ioasid_t pasid, enum cache_tag_type type)
26 {
27 if (tag->type != type)
28 return false;
29
30 if (tag->domain_id != domain_id || tag->pasid != pasid)
31 return false;
32
33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 return tag->iommu == iommu;
35
36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 return tag->dev == dev;
38
39 return false;
40 }
41
42 /* Assign a cache tag with specified type to domain. */
cache_tag_assign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)43 static int cache_tag_assign(struct dmar_domain *domain, u16 did,
44 struct device *dev, ioasid_t pasid,
45 enum cache_tag_type type)
46 {
47 struct device_domain_info *info = dev_iommu_priv_get(dev);
48 struct intel_iommu *iommu = info->iommu;
49 struct cache_tag *tag, *temp;
50 struct list_head *prev;
51 unsigned long flags;
52
53 tag = kzalloc(sizeof(*tag), GFP_KERNEL);
54 if (!tag)
55 return -ENOMEM;
56
57 tag->type = type;
58 tag->iommu = iommu;
59 tag->domain_id = did;
60 tag->pasid = pasid;
61 tag->users = 1;
62
63 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
64 tag->dev = dev;
65 else
66 tag->dev = iommu->iommu.dev;
67
68 spin_lock_irqsave(&domain->cache_lock, flags);
69 prev = &domain->cache_tags;
70 list_for_each_entry(temp, &domain->cache_tags, node) {
71 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
72 temp->users++;
73 spin_unlock_irqrestore(&domain->cache_lock, flags);
74 kfree(tag);
75 trace_cache_tag_assign(temp);
76 return 0;
77 }
78 if (temp->iommu == iommu)
79 prev = &temp->node;
80 }
81 /*
82 * Link cache tags of same iommu unit together, so corresponding
83 * flush ops can be batched for iommu unit.
84 */
85 list_add(&tag->node, prev);
86
87 spin_unlock_irqrestore(&domain->cache_lock, flags);
88 trace_cache_tag_assign(tag);
89
90 return 0;
91 }
92
93 /* Unassign a cache tag with specified type from domain. */
cache_tag_unassign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)94 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
95 struct device *dev, ioasid_t pasid,
96 enum cache_tag_type type)
97 {
98 struct device_domain_info *info = dev_iommu_priv_get(dev);
99 struct intel_iommu *iommu = info->iommu;
100 struct cache_tag *tag;
101 unsigned long flags;
102
103 spin_lock_irqsave(&domain->cache_lock, flags);
104 list_for_each_entry(tag, &domain->cache_tags, node) {
105 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
106 trace_cache_tag_unassign(tag);
107 if (--tag->users == 0) {
108 list_del(&tag->node);
109 kfree(tag);
110 }
111 break;
112 }
113 }
114 spin_unlock_irqrestore(&domain->cache_lock, flags);
115 }
116
117 /* domain->qi_batch will be freed in iommu_free_domain() path. */
domain_qi_batch_alloc(struct dmar_domain * domain)118 static int domain_qi_batch_alloc(struct dmar_domain *domain)
119 {
120 unsigned long flags;
121 int ret = 0;
122
123 spin_lock_irqsave(&domain->cache_lock, flags);
124 if (domain->qi_batch)
125 goto out_unlock;
126
127 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
128 if (!domain->qi_batch)
129 ret = -ENOMEM;
130 out_unlock:
131 spin_unlock_irqrestore(&domain->cache_lock, flags);
132
133 return ret;
134 }
135
__cache_tag_assign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)136 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
137 struct device *dev, ioasid_t pasid)
138 {
139 struct device_domain_info *info = dev_iommu_priv_get(dev);
140 int ret;
141
142 ret = domain_qi_batch_alloc(domain);
143 if (ret)
144 return ret;
145
146 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
147 if (ret || !info->ats_enabled)
148 return ret;
149
150 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
151 if (ret)
152 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
153
154 return ret;
155 }
156
__cache_tag_unassign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)157 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
158 struct device *dev, ioasid_t pasid)
159 {
160 struct device_domain_info *info = dev_iommu_priv_get(dev);
161
162 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
163
164 if (info->ats_enabled)
165 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
166 }
167
__cache_tag_assign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)168 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
169 struct device *dev, ioasid_t pasid)
170 {
171 struct device_domain_info *info = dev_iommu_priv_get(dev);
172 int ret;
173
174 ret = domain_qi_batch_alloc(domain);
175 if (ret)
176 return ret;
177
178 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
179 if (ret || !info->ats_enabled)
180 return ret;
181
182 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
183 if (ret)
184 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
185
186 return ret;
187 }
188
__cache_tag_unassign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)189 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
190 struct device *dev, ioasid_t pasid)
191 {
192 struct device_domain_info *info = dev_iommu_priv_get(dev);
193
194 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
195
196 if (info->ats_enabled)
197 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
198 }
199
domain_get_id_for_dev(struct dmar_domain * domain,struct device * dev)200 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
201 {
202 struct device_domain_info *info = dev_iommu_priv_get(dev);
203 struct intel_iommu *iommu = info->iommu;
204
205 /*
206 * The driver assigns different domain IDs for all domains except
207 * the SVA type.
208 */
209 if (domain->domain.type == IOMMU_DOMAIN_SVA)
210 return FLPT_DEFAULT_DID;
211
212 return domain_id_iommu(domain, iommu);
213 }
214
215 /*
216 * Assign cache tags to a domain when it's associated with a device's
217 * PASID using a specific domain ID.
218 *
219 * On success (return value of 0), cache tags are created and added to the
220 * domain's cache tag list. On failure (negative return value), an error
221 * code is returned indicating the reason for the failure.
222 */
cache_tag_assign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)223 int cache_tag_assign_domain(struct dmar_domain *domain,
224 struct device *dev, ioasid_t pasid)
225 {
226 u16 did = domain_get_id_for_dev(domain, dev);
227 int ret;
228
229 ret = __cache_tag_assign_domain(domain, did, dev, pasid);
230 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
231 return ret;
232
233 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
234 if (ret)
235 __cache_tag_unassign_domain(domain, did, dev, pasid);
236
237 return ret;
238 }
239
240 /*
241 * Remove the cache tags associated with a device's PASID when the domain is
242 * detached from the device.
243 *
244 * The cache tags must be previously assigned to the domain by calling the
245 * assign interface.
246 */
cache_tag_unassign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)247 void cache_tag_unassign_domain(struct dmar_domain *domain,
248 struct device *dev, ioasid_t pasid)
249 {
250 u16 did = domain_get_id_for_dev(domain, dev);
251
252 __cache_tag_unassign_domain(domain, did, dev, pasid);
253 if (domain->domain.type == IOMMU_DOMAIN_NESTED)
254 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
255 }
256
calculate_psi_aligned_address(unsigned long start,unsigned long end,unsigned long * _pages,unsigned long * _mask)257 static unsigned long calculate_psi_aligned_address(unsigned long start,
258 unsigned long end,
259 unsigned long *_pages,
260 unsigned long *_mask)
261 {
262 unsigned long pages = aligned_nrpages(start, end - start + 1);
263 unsigned long aligned_pages = __roundup_pow_of_two(pages);
264 unsigned long bitmask = aligned_pages - 1;
265 unsigned long mask = ilog2(aligned_pages);
266 unsigned long pfn = IOVA_PFN(start);
267
268 /*
269 * PSI masks the low order bits of the base address. If the
270 * address isn't aligned to the mask, then compute a mask value
271 * needed to ensure the target range is flushed.
272 */
273 if (unlikely(bitmask & pfn)) {
274 unsigned long end_pfn = pfn + pages - 1, shared_bits;
275
276 /*
277 * Since end_pfn <= pfn + bitmask, the only way bits
278 * higher than bitmask can differ in pfn and end_pfn is
279 * by carrying. This means after masking out bitmask,
280 * high bits starting with the first set bit in
281 * shared_bits are all equal in both pfn and end_pfn.
282 */
283 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
284 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
285 aligned_pages = 1UL << mask;
286 }
287
288 *_pages = aligned_pages;
289 *_mask = mask;
290
291 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
292 }
293
qi_batch_flush_descs(struct intel_iommu * iommu,struct qi_batch * batch)294 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
295 {
296 if (!iommu || !batch->index)
297 return;
298
299 qi_submit_sync(iommu, batch->descs, batch->index, 0);
300
301 /* Reset the index value and clean the whole batch buffer. */
302 memset(batch, 0, sizeof(*batch));
303 }
304
qi_batch_increment_index(struct intel_iommu * iommu,struct qi_batch * batch)305 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
306 {
307 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
308 qi_batch_flush_descs(iommu, batch);
309 }
310
qi_batch_add_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type,struct qi_batch * batch)311 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
312 unsigned int size_order, u64 type,
313 struct qi_batch *batch)
314 {
315 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
316 qi_batch_increment_index(iommu, batch);
317 }
318
qi_batch_add_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u16 qdep,u64 addr,unsigned int mask,struct qi_batch * batch)319 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
320 u16 qdep, u64 addr, unsigned int mask,
321 struct qi_batch *batch)
322 {
323 /*
324 * According to VT-d spec, software is recommended to not submit any Device-TLB
325 * invalidation requests while address remapping hardware is disabled.
326 */
327 if (!(iommu->gcmd & DMA_GCMD_TE))
328 return;
329
330 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
331 qi_batch_increment_index(iommu, batch);
332 }
333
qi_batch_add_piotlb(struct intel_iommu * iommu,u16 did,u32 pasid,u64 addr,unsigned long npages,bool ih,struct qi_batch * batch)334 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
335 u64 addr, unsigned long npages, bool ih,
336 struct qi_batch *batch)
337 {
338 /*
339 * npages == -1 means a PASID-selective invalidation, otherwise,
340 * a positive value for Page-selective-within-PASID invalidation.
341 * 0 is not a valid input.
342 */
343 if (!npages)
344 return;
345
346 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
347 qi_batch_increment_index(iommu, batch);
348 }
349
qi_batch_add_pasid_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u32 pasid,u16 qdep,u64 addr,unsigned int size_order,struct qi_batch * batch)350 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
351 u32 pasid, u16 qdep, u64 addr,
352 unsigned int size_order, struct qi_batch *batch)
353 {
354 /*
355 * According to VT-d spec, software is recommended to not submit any
356 * Device-TLB invalidation requests while address remapping hardware
357 * is disabled.
358 */
359 if (!(iommu->gcmd & DMA_GCMD_TE))
360 return;
361
362 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
363 &batch->descs[batch->index]);
364 qi_batch_increment_index(iommu, batch);
365 }
366
cache_tag_flush_iotlb(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long pages,unsigned long mask,int ih)367 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
368 unsigned long addr, unsigned long pages,
369 unsigned long mask, int ih)
370 {
371 struct intel_iommu *iommu = tag->iommu;
372 u64 type = DMA_TLB_PSI_FLUSH;
373
374 if (domain->use_first_level) {
375 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
376 pages, ih, domain->qi_batch);
377 return;
378 }
379
380 /*
381 * Fallback to domain selective flush if no PSI support or the size
382 * is too big.
383 */
384 if (!cap_pgsel_inv(iommu->cap) ||
385 mask > cap_max_amask_val(iommu->cap) || pages == -1) {
386 addr = 0;
387 mask = 0;
388 ih = 0;
389 type = DMA_TLB_DSI_FLUSH;
390 }
391
392 if (ecap_qis(iommu->ecap))
393 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
394 domain->qi_batch);
395 else
396 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
397 }
398
cache_tag_flush_devtlb_psi(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long mask)399 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
400 unsigned long addr, unsigned long mask)
401 {
402 struct intel_iommu *iommu = tag->iommu;
403 struct device_domain_info *info;
404 u16 sid;
405
406 info = dev_iommu_priv_get(tag->dev);
407 sid = PCI_DEVID(info->bus, info->devfn);
408
409 if (tag->pasid == IOMMU_NO_PASID) {
410 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
411 addr, mask, domain->qi_batch);
412 if (info->dtlb_extra_inval)
413 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
414 addr, mask, domain->qi_batch);
415 return;
416 }
417
418 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
419 info->ats_qdep, addr, mask, domain->qi_batch);
420 if (info->dtlb_extra_inval)
421 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
422 info->ats_qdep, addr, mask,
423 domain->qi_batch);
424 }
425
cache_tag_flush_devtlb_all(struct dmar_domain * domain,struct cache_tag * tag)426 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
427 {
428 struct intel_iommu *iommu = tag->iommu;
429 struct device_domain_info *info;
430 u16 sid;
431
432 info = dev_iommu_priv_get(tag->dev);
433 sid = PCI_DEVID(info->bus, info->devfn);
434
435 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
436 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
437 if (info->dtlb_extra_inval)
438 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
439 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
440 }
441
442 /*
443 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
444 * when the memory mappings in the target domain have been modified.
445 */
cache_tag_flush_range(struct dmar_domain * domain,unsigned long start,unsigned long end,int ih)446 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
447 unsigned long end, int ih)
448 {
449 struct intel_iommu *iommu = NULL;
450 unsigned long pages, mask, addr;
451 struct cache_tag *tag;
452 unsigned long flags;
453
454 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
455
456 spin_lock_irqsave(&domain->cache_lock, flags);
457 list_for_each_entry(tag, &domain->cache_tags, node) {
458 if (iommu && iommu != tag->iommu)
459 qi_batch_flush_descs(iommu, domain->qi_batch);
460 iommu = tag->iommu;
461
462 switch (tag->type) {
463 case CACHE_TAG_IOTLB:
464 case CACHE_TAG_NESTING_IOTLB:
465 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
466 break;
467 case CACHE_TAG_NESTING_DEVTLB:
468 /*
469 * Address translation cache in device side caches the
470 * result of nested translation. There is no easy way
471 * to identify the exact set of nested translations
472 * affected by a change in S2. So just flush the entire
473 * device cache.
474 */
475 addr = 0;
476 mask = MAX_AGAW_PFN_WIDTH;
477 fallthrough;
478 case CACHE_TAG_DEVTLB:
479 cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
480 break;
481 }
482
483 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
484 }
485 qi_batch_flush_descs(iommu, domain->qi_batch);
486 spin_unlock_irqrestore(&domain->cache_lock, flags);
487 }
488
489 /*
490 * Invalidates all ranges of IOVA when the memory mappings in the target
491 * domain have been modified.
492 */
cache_tag_flush_all(struct dmar_domain * domain)493 void cache_tag_flush_all(struct dmar_domain *domain)
494 {
495 struct intel_iommu *iommu = NULL;
496 struct cache_tag *tag;
497 unsigned long flags;
498
499 spin_lock_irqsave(&domain->cache_lock, flags);
500 list_for_each_entry(tag, &domain->cache_tags, node) {
501 if (iommu && iommu != tag->iommu)
502 qi_batch_flush_descs(iommu, domain->qi_batch);
503 iommu = tag->iommu;
504
505 switch (tag->type) {
506 case CACHE_TAG_IOTLB:
507 case CACHE_TAG_NESTING_IOTLB:
508 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
509 break;
510 case CACHE_TAG_DEVTLB:
511 case CACHE_TAG_NESTING_DEVTLB:
512 cache_tag_flush_devtlb_all(domain, tag);
513 break;
514 }
515
516 trace_cache_tag_flush_all(tag);
517 }
518 qi_batch_flush_descs(iommu, domain->qi_batch);
519 spin_unlock_irqrestore(&domain->cache_lock, flags);
520 }
521
522 /*
523 * Invalidate a range of IOVA when new mappings are created in the target
524 * domain.
525 *
526 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
527 * Set, any software updates to remapping structures other than first-
528 * stage mapping requires explicit invalidation of the caches.
529 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
530 * write buffer flushing, software must explicitly perform write-buffer
531 * flushing, if cache invalidation is not required.
532 */
cache_tag_flush_range_np(struct dmar_domain * domain,unsigned long start,unsigned long end)533 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
534 unsigned long end)
535 {
536 struct intel_iommu *iommu = NULL;
537 unsigned long pages, mask, addr;
538 struct cache_tag *tag;
539 unsigned long flags;
540
541 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
542
543 spin_lock_irqsave(&domain->cache_lock, flags);
544 list_for_each_entry(tag, &domain->cache_tags, node) {
545 if (iommu && iommu != tag->iommu)
546 qi_batch_flush_descs(iommu, domain->qi_batch);
547 iommu = tag->iommu;
548
549 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
550 iommu_flush_write_buffer(iommu);
551 continue;
552 }
553
554 if (tag->type == CACHE_TAG_IOTLB ||
555 tag->type == CACHE_TAG_NESTING_IOTLB)
556 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
557
558 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
559 }
560 qi_batch_flush_descs(iommu, domain->qi_batch);
561 spin_unlock_irqrestore(&domain->cache_lock, flags);
562 }
563