1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * cache.c - Intel VT-d cache invalidation
4 *
5 * Copyright (C) 2024 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10 #define pr_fmt(fmt) "DMAR: " fmt
11
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21
22 /* Check if an existing cache tag can be reused for a new association. */
cache_tage_match(struct cache_tag * tag,u16 domain_id,struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,enum cache_tag_type type)23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 struct intel_iommu *iommu, struct device *dev,
25 ioasid_t pasid, enum cache_tag_type type)
26 {
27 if (tag->type != type)
28 return false;
29
30 if (tag->domain_id != domain_id || tag->pasid != pasid)
31 return false;
32
33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 return tag->iommu == iommu;
35
36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 return tag->dev == dev;
38
39 return false;
40 }
41
42 /* Assign a cache tag with specified type to domain. */
cache_tag_assign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)43 static int cache_tag_assign(struct dmar_domain *domain, u16 did,
44 struct device *dev, ioasid_t pasid,
45 enum cache_tag_type type)
46 {
47 struct device_domain_info *info = dev_iommu_priv_get(dev);
48 struct intel_iommu *iommu = info->iommu;
49 struct cache_tag *tag, *temp;
50 unsigned long flags;
51
52 tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 if (!tag)
54 return -ENOMEM;
55
56 tag->type = type;
57 tag->iommu = iommu;
58 tag->domain_id = did;
59 tag->pasid = pasid;
60 tag->users = 1;
61
62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 tag->dev = dev;
64 else
65 tag->dev = iommu->iommu.dev;
66
67 spin_lock_irqsave(&domain->cache_lock, flags);
68 list_for_each_entry(temp, &domain->cache_tags, node) {
69 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
70 temp->users++;
71 spin_unlock_irqrestore(&domain->cache_lock, flags);
72 kfree(tag);
73 trace_cache_tag_assign(temp);
74 return 0;
75 }
76 }
77 list_add_tail(&tag->node, &domain->cache_tags);
78 spin_unlock_irqrestore(&domain->cache_lock, flags);
79 trace_cache_tag_assign(tag);
80
81 return 0;
82 }
83
84 /* Unassign a cache tag with specified type from domain. */
cache_tag_unassign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)85 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
86 struct device *dev, ioasid_t pasid,
87 enum cache_tag_type type)
88 {
89 struct device_domain_info *info = dev_iommu_priv_get(dev);
90 struct intel_iommu *iommu = info->iommu;
91 struct cache_tag *tag;
92 unsigned long flags;
93
94 spin_lock_irqsave(&domain->cache_lock, flags);
95 list_for_each_entry(tag, &domain->cache_tags, node) {
96 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
97 trace_cache_tag_unassign(tag);
98 if (--tag->users == 0) {
99 list_del(&tag->node);
100 kfree(tag);
101 }
102 break;
103 }
104 }
105 spin_unlock_irqrestore(&domain->cache_lock, flags);
106 }
107
__cache_tag_assign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)108 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
109 struct device *dev, ioasid_t pasid)
110 {
111 struct device_domain_info *info = dev_iommu_priv_get(dev);
112 int ret;
113
114 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
115 if (ret || !info->ats_enabled)
116 return ret;
117
118 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
119 if (ret)
120 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
121
122 return ret;
123 }
124
__cache_tag_unassign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)125 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
126 struct device *dev, ioasid_t pasid)
127 {
128 struct device_domain_info *info = dev_iommu_priv_get(dev);
129
130 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
131
132 if (info->ats_enabled)
133 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
134 }
135
__cache_tag_assign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)136 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
137 struct device *dev, ioasid_t pasid)
138 {
139 struct device_domain_info *info = dev_iommu_priv_get(dev);
140 int ret;
141
142 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
143 if (ret || !info->ats_enabled)
144 return ret;
145
146 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
147 if (ret)
148 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
149
150 return ret;
151 }
152
__cache_tag_unassign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)153 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
154 struct device *dev, ioasid_t pasid)
155 {
156 struct device_domain_info *info = dev_iommu_priv_get(dev);
157
158 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
159
160 if (info->ats_enabled)
161 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
162 }
163
domain_get_id_for_dev(struct dmar_domain * domain,struct device * dev)164 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
165 {
166 struct device_domain_info *info = dev_iommu_priv_get(dev);
167 struct intel_iommu *iommu = info->iommu;
168
169 /*
170 * The driver assigns different domain IDs for all domains except
171 * the SVA type.
172 */
173 if (domain->domain.type == IOMMU_DOMAIN_SVA)
174 return FLPT_DEFAULT_DID;
175
176 return domain_id_iommu(domain, iommu);
177 }
178
179 /*
180 * Assign cache tags to a domain when it's associated with a device's
181 * PASID using a specific domain ID.
182 *
183 * On success (return value of 0), cache tags are created and added to the
184 * domain's cache tag list. On failure (negative return value), an error
185 * code is returned indicating the reason for the failure.
186 */
cache_tag_assign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)187 int cache_tag_assign_domain(struct dmar_domain *domain,
188 struct device *dev, ioasid_t pasid)
189 {
190 u16 did = domain_get_id_for_dev(domain, dev);
191 int ret;
192
193 /* domain->qi_bach will be freed in iommu_free_domain() path. */
194 if (!domain->qi_batch) {
195 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL);
196 if (!domain->qi_batch)
197 return -ENOMEM;
198 }
199
200 ret = __cache_tag_assign_domain(domain, did, dev, pasid);
201 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
202 return ret;
203
204 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
205 if (ret)
206 __cache_tag_unassign_domain(domain, did, dev, pasid);
207
208 return ret;
209 }
210
211 /*
212 * Remove the cache tags associated with a device's PASID when the domain is
213 * detached from the device.
214 *
215 * The cache tags must be previously assigned to the domain by calling the
216 * assign interface.
217 */
cache_tag_unassign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)218 void cache_tag_unassign_domain(struct dmar_domain *domain,
219 struct device *dev, ioasid_t pasid)
220 {
221 u16 did = domain_get_id_for_dev(domain, dev);
222
223 __cache_tag_unassign_domain(domain, did, dev, pasid);
224 if (domain->domain.type == IOMMU_DOMAIN_NESTED)
225 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
226 }
227
calculate_psi_aligned_address(unsigned long start,unsigned long end,unsigned long * _pages,unsigned long * _mask)228 static unsigned long calculate_psi_aligned_address(unsigned long start,
229 unsigned long end,
230 unsigned long *_pages,
231 unsigned long *_mask)
232 {
233 unsigned long pages = aligned_nrpages(start, end - start + 1);
234 unsigned long aligned_pages = __roundup_pow_of_two(pages);
235 unsigned long bitmask = aligned_pages - 1;
236 unsigned long mask = ilog2(aligned_pages);
237 unsigned long pfn = IOVA_PFN(start);
238
239 /*
240 * PSI masks the low order bits of the base address. If the
241 * address isn't aligned to the mask, then compute a mask value
242 * needed to ensure the target range is flushed.
243 */
244 if (unlikely(bitmask & pfn)) {
245 unsigned long end_pfn = pfn + pages - 1, shared_bits;
246
247 /*
248 * Since end_pfn <= pfn + bitmask, the only way bits
249 * higher than bitmask can differ in pfn and end_pfn is
250 * by carrying. This means after masking out bitmask,
251 * high bits starting with the first set bit in
252 * shared_bits are all equal in both pfn and end_pfn.
253 */
254 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
255 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
256 aligned_pages = 1UL << mask;
257 }
258
259 *_pages = aligned_pages;
260 *_mask = mask;
261
262 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
263 }
264
qi_batch_flush_descs(struct intel_iommu * iommu,struct qi_batch * batch)265 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
266 {
267 if (!iommu || !batch->index)
268 return;
269
270 qi_submit_sync(iommu, batch->descs, batch->index, 0);
271
272 /* Reset the index value and clean the whole batch buffer. */
273 memset(batch, 0, sizeof(*batch));
274 }
275
qi_batch_increment_index(struct intel_iommu * iommu,struct qi_batch * batch)276 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
277 {
278 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
279 qi_batch_flush_descs(iommu, batch);
280 }
281
qi_batch_add_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type,struct qi_batch * batch)282 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
283 unsigned int size_order, u64 type,
284 struct qi_batch *batch)
285 {
286 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
287 qi_batch_increment_index(iommu, batch);
288 }
289
qi_batch_add_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u16 qdep,u64 addr,unsigned int mask,struct qi_batch * batch)290 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
291 u16 qdep, u64 addr, unsigned int mask,
292 struct qi_batch *batch)
293 {
294 /*
295 * According to VT-d spec, software is recommended to not submit any Device-TLB
296 * invalidation requests while address remapping hardware is disabled.
297 */
298 if (!(iommu->gcmd & DMA_GCMD_TE))
299 return;
300
301 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
302 qi_batch_increment_index(iommu, batch);
303 }
304
qi_batch_add_piotlb(struct intel_iommu * iommu,u16 did,u32 pasid,u64 addr,unsigned long npages,bool ih,struct qi_batch * batch)305 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
306 u64 addr, unsigned long npages, bool ih,
307 struct qi_batch *batch)
308 {
309 /*
310 * npages == -1 means a PASID-selective invalidation, otherwise,
311 * a positive value for Page-selective-within-PASID invalidation.
312 * 0 is not a valid input.
313 */
314 if (!npages)
315 return;
316
317 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
318 qi_batch_increment_index(iommu, batch);
319 }
320
qi_batch_add_pasid_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u32 pasid,u16 qdep,u64 addr,unsigned int size_order,struct qi_batch * batch)321 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
322 u32 pasid, u16 qdep, u64 addr,
323 unsigned int size_order, struct qi_batch *batch)
324 {
325 /*
326 * According to VT-d spec, software is recommended to not submit any
327 * Device-TLB invalidation requests while address remapping hardware
328 * is disabled.
329 */
330 if (!(iommu->gcmd & DMA_GCMD_TE))
331 return;
332
333 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
334 &batch->descs[batch->index]);
335 qi_batch_increment_index(iommu, batch);
336 }
337
cache_tag_flush_iotlb(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long pages,unsigned long mask,int ih)338 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
339 unsigned long addr, unsigned long pages,
340 unsigned long mask, int ih)
341 {
342 struct intel_iommu *iommu = tag->iommu;
343 u64 type = DMA_TLB_PSI_FLUSH;
344
345 if (domain->use_first_level) {
346 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
347 pages, ih, domain->qi_batch);
348 return;
349 }
350
351 /*
352 * Fallback to domain selective flush if no PSI support or the size
353 * is too big.
354 */
355 if (!cap_pgsel_inv(iommu->cap) ||
356 mask > cap_max_amask_val(iommu->cap) || pages == -1) {
357 addr = 0;
358 mask = 0;
359 ih = 0;
360 type = DMA_TLB_DSI_FLUSH;
361 }
362
363 if (ecap_qis(iommu->ecap))
364 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
365 domain->qi_batch);
366 else
367 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
368 }
369
cache_tag_flush_devtlb_psi(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long mask)370 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
371 unsigned long addr, unsigned long mask)
372 {
373 struct intel_iommu *iommu = tag->iommu;
374 struct device_domain_info *info;
375 u16 sid;
376
377 info = dev_iommu_priv_get(tag->dev);
378 sid = PCI_DEVID(info->bus, info->devfn);
379
380 if (tag->pasid == IOMMU_NO_PASID) {
381 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
382 addr, mask, domain->qi_batch);
383 if (info->dtlb_extra_inval)
384 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
385 addr, mask, domain->qi_batch);
386 return;
387 }
388
389 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
390 info->ats_qdep, addr, mask, domain->qi_batch);
391 if (info->dtlb_extra_inval)
392 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
393 info->ats_qdep, addr, mask,
394 domain->qi_batch);
395 }
396
cache_tag_flush_devtlb_all(struct dmar_domain * domain,struct cache_tag * tag)397 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
398 {
399 struct intel_iommu *iommu = tag->iommu;
400 struct device_domain_info *info;
401 u16 sid;
402
403 info = dev_iommu_priv_get(tag->dev);
404 sid = PCI_DEVID(info->bus, info->devfn);
405
406 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
407 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
408 if (info->dtlb_extra_inval)
409 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
410 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
411 }
412
413 /*
414 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
415 * when the memory mappings in the target domain have been modified.
416 */
cache_tag_flush_range(struct dmar_domain * domain,unsigned long start,unsigned long end,int ih)417 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
418 unsigned long end, int ih)
419 {
420 struct intel_iommu *iommu = NULL;
421 unsigned long pages, mask, addr;
422 struct cache_tag *tag;
423 unsigned long flags;
424
425 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
426
427 spin_lock_irqsave(&domain->cache_lock, flags);
428 list_for_each_entry(tag, &domain->cache_tags, node) {
429 if (iommu && iommu != tag->iommu)
430 qi_batch_flush_descs(iommu, domain->qi_batch);
431 iommu = tag->iommu;
432
433 switch (tag->type) {
434 case CACHE_TAG_IOTLB:
435 case CACHE_TAG_NESTING_IOTLB:
436 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
437 break;
438 case CACHE_TAG_NESTING_DEVTLB:
439 /*
440 * Address translation cache in device side caches the
441 * result of nested translation. There is no easy way
442 * to identify the exact set of nested translations
443 * affected by a change in S2. So just flush the entire
444 * device cache.
445 */
446 addr = 0;
447 mask = MAX_AGAW_PFN_WIDTH;
448 fallthrough;
449 case CACHE_TAG_DEVTLB:
450 cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
451 break;
452 }
453
454 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
455 }
456 qi_batch_flush_descs(iommu, domain->qi_batch);
457 spin_unlock_irqrestore(&domain->cache_lock, flags);
458 }
459
460 /*
461 * Invalidates all ranges of IOVA when the memory mappings in the target
462 * domain have been modified.
463 */
cache_tag_flush_all(struct dmar_domain * domain)464 void cache_tag_flush_all(struct dmar_domain *domain)
465 {
466 struct intel_iommu *iommu = NULL;
467 struct cache_tag *tag;
468 unsigned long flags;
469
470 spin_lock_irqsave(&domain->cache_lock, flags);
471 list_for_each_entry(tag, &domain->cache_tags, node) {
472 if (iommu && iommu != tag->iommu)
473 qi_batch_flush_descs(iommu, domain->qi_batch);
474 iommu = tag->iommu;
475
476 switch (tag->type) {
477 case CACHE_TAG_IOTLB:
478 case CACHE_TAG_NESTING_IOTLB:
479 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
480 break;
481 case CACHE_TAG_DEVTLB:
482 case CACHE_TAG_NESTING_DEVTLB:
483 cache_tag_flush_devtlb_all(domain, tag);
484 break;
485 }
486
487 trace_cache_tag_flush_all(tag);
488 }
489 qi_batch_flush_descs(iommu, domain->qi_batch);
490 spin_unlock_irqrestore(&domain->cache_lock, flags);
491 }
492
493 /*
494 * Invalidate a range of IOVA when new mappings are created in the target
495 * domain.
496 *
497 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
498 * Set, any software updates to remapping structures other than first-
499 * stage mapping requires explicit invalidation of the caches.
500 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
501 * write buffer flushing, software must explicitly perform write-buffer
502 * flushing, if cache invalidation is not required.
503 */
cache_tag_flush_range_np(struct dmar_domain * domain,unsigned long start,unsigned long end)504 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
505 unsigned long end)
506 {
507 struct intel_iommu *iommu = NULL;
508 unsigned long pages, mask, addr;
509 struct cache_tag *tag;
510 unsigned long flags;
511
512 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
513
514 spin_lock_irqsave(&domain->cache_lock, flags);
515 list_for_each_entry(tag, &domain->cache_tags, node) {
516 if (iommu && iommu != tag->iommu)
517 qi_batch_flush_descs(iommu, domain->qi_batch);
518 iommu = tag->iommu;
519
520 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
521 iommu_flush_write_buffer(iommu);
522 continue;
523 }
524
525 if (tag->type == CACHE_TAG_IOTLB ||
526 tag->type == CACHE_TAG_NESTING_IOTLB)
527 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
528
529 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
530 }
531 qi_batch_flush_descs(iommu, domain->qi_batch);
532 spin_unlock_irqrestore(&domain->cache_lock, flags);
533 }
534