1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * cache.c - Intel VT-d cache invalidation
4 *
5 * Copyright (C) 2024 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10 #define pr_fmt(fmt) "DMAR: " fmt
11
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21
22 /* Check if an existing cache tag can be reused for a new association. */
cache_tage_match(struct cache_tag * tag,u16 domain_id,struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,enum cache_tag_type type)23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 struct intel_iommu *iommu, struct device *dev,
25 ioasid_t pasid, enum cache_tag_type type)
26 {
27 if (tag->type != type)
28 return false;
29
30 if (tag->domain_id != domain_id || tag->pasid != pasid)
31 return false;
32
33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 return tag->iommu == iommu;
35
36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 return tag->dev == dev;
38
39 return false;
40 }
41
42 /* Assign a cache tag with specified type to domain. */
cache_tag_assign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)43 static int cache_tag_assign(struct dmar_domain *domain, u16 did,
44 struct device *dev, ioasid_t pasid,
45 enum cache_tag_type type)
46 {
47 struct device_domain_info *info = dev_iommu_priv_get(dev);
48 struct intel_iommu *iommu = info->iommu;
49 struct cache_tag *tag, *temp;
50 unsigned long flags;
51
52 tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 if (!tag)
54 return -ENOMEM;
55
56 tag->type = type;
57 tag->iommu = iommu;
58 tag->domain_id = did;
59 tag->pasid = pasid;
60 tag->users = 1;
61
62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 tag->dev = dev;
64 else
65 tag->dev = iommu->iommu.dev;
66
67 spin_lock_irqsave(&domain->cache_lock, flags);
68 list_for_each_entry(temp, &domain->cache_tags, node) {
69 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
70 temp->users++;
71 spin_unlock_irqrestore(&domain->cache_lock, flags);
72 kfree(tag);
73 trace_cache_tag_assign(temp);
74 return 0;
75 }
76 }
77 list_add_tail(&tag->node, &domain->cache_tags);
78 spin_unlock_irqrestore(&domain->cache_lock, flags);
79 trace_cache_tag_assign(tag);
80
81 return 0;
82 }
83
84 /* Unassign a cache tag with specified type from domain. */
cache_tag_unassign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)85 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
86 struct device *dev, ioasid_t pasid,
87 enum cache_tag_type type)
88 {
89 struct device_domain_info *info = dev_iommu_priv_get(dev);
90 struct intel_iommu *iommu = info->iommu;
91 struct cache_tag *tag;
92 unsigned long flags;
93
94 spin_lock_irqsave(&domain->cache_lock, flags);
95 list_for_each_entry(tag, &domain->cache_tags, node) {
96 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
97 trace_cache_tag_unassign(tag);
98 if (--tag->users == 0) {
99 list_del(&tag->node);
100 kfree(tag);
101 }
102 break;
103 }
104 }
105 spin_unlock_irqrestore(&domain->cache_lock, flags);
106 }
107
108 /* domain->qi_batch will be freed in iommu_free_domain() path. */
domain_qi_batch_alloc(struct dmar_domain * domain)109 static int domain_qi_batch_alloc(struct dmar_domain *domain)
110 {
111 unsigned long flags;
112 int ret = 0;
113
114 spin_lock_irqsave(&domain->cache_lock, flags);
115 if (domain->qi_batch)
116 goto out_unlock;
117
118 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
119 if (!domain->qi_batch)
120 ret = -ENOMEM;
121 out_unlock:
122 spin_unlock_irqrestore(&domain->cache_lock, flags);
123
124 return ret;
125 }
126
__cache_tag_assign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)127 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
128 struct device *dev, ioasid_t pasid)
129 {
130 struct device_domain_info *info = dev_iommu_priv_get(dev);
131 int ret;
132
133 ret = domain_qi_batch_alloc(domain);
134 if (ret)
135 return ret;
136
137 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
138 if (ret || !info->ats_enabled)
139 return ret;
140
141 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
142 if (ret)
143 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
144
145 return ret;
146 }
147
__cache_tag_unassign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)148 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
149 struct device *dev, ioasid_t pasid)
150 {
151 struct device_domain_info *info = dev_iommu_priv_get(dev);
152
153 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
154
155 if (info->ats_enabled)
156 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
157 }
158
__cache_tag_assign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)159 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
160 struct device *dev, ioasid_t pasid)
161 {
162 struct device_domain_info *info = dev_iommu_priv_get(dev);
163 int ret;
164
165 ret = domain_qi_batch_alloc(domain);
166 if (ret)
167 return ret;
168
169 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
170 if (ret || !info->ats_enabled)
171 return ret;
172
173 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
174 if (ret)
175 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
176
177 return ret;
178 }
179
__cache_tag_unassign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)180 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
181 struct device *dev, ioasid_t pasid)
182 {
183 struct device_domain_info *info = dev_iommu_priv_get(dev);
184
185 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
186
187 if (info->ats_enabled)
188 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
189 }
190
domain_get_id_for_dev(struct dmar_domain * domain,struct device * dev)191 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
192 {
193 struct device_domain_info *info = dev_iommu_priv_get(dev);
194 struct intel_iommu *iommu = info->iommu;
195
196 /*
197 * The driver assigns different domain IDs for all domains except
198 * the SVA type.
199 */
200 if (domain->domain.type == IOMMU_DOMAIN_SVA)
201 return FLPT_DEFAULT_DID;
202
203 return domain_id_iommu(domain, iommu);
204 }
205
206 /*
207 * Assign cache tags to a domain when it's associated with a device's
208 * PASID using a specific domain ID.
209 *
210 * On success (return value of 0), cache tags are created and added to the
211 * domain's cache tag list. On failure (negative return value), an error
212 * code is returned indicating the reason for the failure.
213 */
cache_tag_assign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)214 int cache_tag_assign_domain(struct dmar_domain *domain,
215 struct device *dev, ioasid_t pasid)
216 {
217 u16 did = domain_get_id_for_dev(domain, dev);
218 int ret;
219
220 ret = __cache_tag_assign_domain(domain, did, dev, pasid);
221 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
222 return ret;
223
224 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
225 if (ret)
226 __cache_tag_unassign_domain(domain, did, dev, pasid);
227
228 return ret;
229 }
230
231 /*
232 * Remove the cache tags associated with a device's PASID when the domain is
233 * detached from the device.
234 *
235 * The cache tags must be previously assigned to the domain by calling the
236 * assign interface.
237 */
cache_tag_unassign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)238 void cache_tag_unassign_domain(struct dmar_domain *domain,
239 struct device *dev, ioasid_t pasid)
240 {
241 u16 did = domain_get_id_for_dev(domain, dev);
242
243 __cache_tag_unassign_domain(domain, did, dev, pasid);
244 if (domain->domain.type == IOMMU_DOMAIN_NESTED)
245 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
246 }
247
calculate_psi_aligned_address(unsigned long start,unsigned long end,unsigned long * _pages,unsigned long * _mask)248 static unsigned long calculate_psi_aligned_address(unsigned long start,
249 unsigned long end,
250 unsigned long *_pages,
251 unsigned long *_mask)
252 {
253 unsigned long pages = aligned_nrpages(start, end - start + 1);
254 unsigned long aligned_pages = __roundup_pow_of_two(pages);
255 unsigned long bitmask = aligned_pages - 1;
256 unsigned long mask = ilog2(aligned_pages);
257 unsigned long pfn = IOVA_PFN(start);
258
259 /*
260 * PSI masks the low order bits of the base address. If the
261 * address isn't aligned to the mask, then compute a mask value
262 * needed to ensure the target range is flushed.
263 */
264 if (unlikely(bitmask & pfn)) {
265 unsigned long end_pfn = pfn + pages - 1, shared_bits;
266
267 /*
268 * Since end_pfn <= pfn + bitmask, the only way bits
269 * higher than bitmask can differ in pfn and end_pfn is
270 * by carrying. This means after masking out bitmask,
271 * high bits starting with the first set bit in
272 * shared_bits are all equal in both pfn and end_pfn.
273 */
274 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
275 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
276 aligned_pages = 1UL << mask;
277 }
278
279 *_pages = aligned_pages;
280 *_mask = mask;
281
282 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
283 }
284
qi_batch_flush_descs(struct intel_iommu * iommu,struct qi_batch * batch)285 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
286 {
287 if (!iommu || !batch->index)
288 return;
289
290 qi_submit_sync(iommu, batch->descs, batch->index, 0);
291
292 /* Reset the index value and clean the whole batch buffer. */
293 memset(batch, 0, sizeof(*batch));
294 }
295
qi_batch_increment_index(struct intel_iommu * iommu,struct qi_batch * batch)296 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
297 {
298 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
299 qi_batch_flush_descs(iommu, batch);
300 }
301
qi_batch_add_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type,struct qi_batch * batch)302 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
303 unsigned int size_order, u64 type,
304 struct qi_batch *batch)
305 {
306 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
307 qi_batch_increment_index(iommu, batch);
308 }
309
qi_batch_add_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u16 qdep,u64 addr,unsigned int mask,struct qi_batch * batch)310 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
311 u16 qdep, u64 addr, unsigned int mask,
312 struct qi_batch *batch)
313 {
314 /*
315 * According to VT-d spec, software is recommended to not submit any Device-TLB
316 * invalidation requests while address remapping hardware is disabled.
317 */
318 if (!(iommu->gcmd & DMA_GCMD_TE))
319 return;
320
321 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
322 qi_batch_increment_index(iommu, batch);
323 }
324
qi_batch_add_piotlb(struct intel_iommu * iommu,u16 did,u32 pasid,u64 addr,unsigned long npages,bool ih,struct qi_batch * batch)325 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
326 u64 addr, unsigned long npages, bool ih,
327 struct qi_batch *batch)
328 {
329 /*
330 * npages == -1 means a PASID-selective invalidation, otherwise,
331 * a positive value for Page-selective-within-PASID invalidation.
332 * 0 is not a valid input.
333 */
334 if (!npages)
335 return;
336
337 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
338 qi_batch_increment_index(iommu, batch);
339 }
340
qi_batch_add_pasid_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u32 pasid,u16 qdep,u64 addr,unsigned int size_order,struct qi_batch * batch)341 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
342 u32 pasid, u16 qdep, u64 addr,
343 unsigned int size_order, struct qi_batch *batch)
344 {
345 /*
346 * According to VT-d spec, software is recommended to not submit any
347 * Device-TLB invalidation requests while address remapping hardware
348 * is disabled.
349 */
350 if (!(iommu->gcmd & DMA_GCMD_TE))
351 return;
352
353 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
354 &batch->descs[batch->index]);
355 qi_batch_increment_index(iommu, batch);
356 }
357
cache_tag_flush_iotlb(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long pages,unsigned long mask,int ih)358 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
359 unsigned long addr, unsigned long pages,
360 unsigned long mask, int ih)
361 {
362 struct intel_iommu *iommu = tag->iommu;
363 u64 type = DMA_TLB_PSI_FLUSH;
364
365 if (domain->use_first_level) {
366 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
367 pages, ih, domain->qi_batch);
368 return;
369 }
370
371 /*
372 * Fallback to domain selective flush if no PSI support or the size
373 * is too big.
374 */
375 if (!cap_pgsel_inv(iommu->cap) ||
376 mask > cap_max_amask_val(iommu->cap) || pages == -1) {
377 addr = 0;
378 mask = 0;
379 ih = 0;
380 type = DMA_TLB_DSI_FLUSH;
381 }
382
383 if (ecap_qis(iommu->ecap))
384 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
385 domain->qi_batch);
386 else
387 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
388 }
389
cache_tag_flush_devtlb_psi(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long mask)390 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
391 unsigned long addr, unsigned long mask)
392 {
393 struct intel_iommu *iommu = tag->iommu;
394 struct device_domain_info *info;
395 u16 sid;
396
397 info = dev_iommu_priv_get(tag->dev);
398 sid = PCI_DEVID(info->bus, info->devfn);
399
400 if (tag->pasid == IOMMU_NO_PASID) {
401 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
402 addr, mask, domain->qi_batch);
403 if (info->dtlb_extra_inval)
404 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
405 addr, mask, domain->qi_batch);
406 return;
407 }
408
409 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
410 info->ats_qdep, addr, mask, domain->qi_batch);
411 if (info->dtlb_extra_inval)
412 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
413 info->ats_qdep, addr, mask,
414 domain->qi_batch);
415 }
416
cache_tag_flush_devtlb_all(struct dmar_domain * domain,struct cache_tag * tag)417 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
418 {
419 struct intel_iommu *iommu = tag->iommu;
420 struct device_domain_info *info;
421 u16 sid;
422
423 info = dev_iommu_priv_get(tag->dev);
424 sid = PCI_DEVID(info->bus, info->devfn);
425
426 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
427 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
428 if (info->dtlb_extra_inval)
429 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
430 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
431 }
432
433 /*
434 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
435 * when the memory mappings in the target domain have been modified.
436 */
cache_tag_flush_range(struct dmar_domain * domain,unsigned long start,unsigned long end,int ih)437 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
438 unsigned long end, int ih)
439 {
440 struct intel_iommu *iommu = NULL;
441 unsigned long pages, mask, addr;
442 struct cache_tag *tag;
443 unsigned long flags;
444
445 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
446
447 spin_lock_irqsave(&domain->cache_lock, flags);
448 list_for_each_entry(tag, &domain->cache_tags, node) {
449 if (iommu && iommu != tag->iommu)
450 qi_batch_flush_descs(iommu, domain->qi_batch);
451 iommu = tag->iommu;
452
453 switch (tag->type) {
454 case CACHE_TAG_IOTLB:
455 case CACHE_TAG_NESTING_IOTLB:
456 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
457 break;
458 case CACHE_TAG_NESTING_DEVTLB:
459 /*
460 * Address translation cache in device side caches the
461 * result of nested translation. There is no easy way
462 * to identify the exact set of nested translations
463 * affected by a change in S2. So just flush the entire
464 * device cache.
465 */
466 addr = 0;
467 mask = MAX_AGAW_PFN_WIDTH;
468 fallthrough;
469 case CACHE_TAG_DEVTLB:
470 cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
471 break;
472 }
473
474 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
475 }
476 qi_batch_flush_descs(iommu, domain->qi_batch);
477 spin_unlock_irqrestore(&domain->cache_lock, flags);
478 }
479
480 /*
481 * Invalidates all ranges of IOVA when the memory mappings in the target
482 * domain have been modified.
483 */
cache_tag_flush_all(struct dmar_domain * domain)484 void cache_tag_flush_all(struct dmar_domain *domain)
485 {
486 struct intel_iommu *iommu = NULL;
487 struct cache_tag *tag;
488 unsigned long flags;
489
490 spin_lock_irqsave(&domain->cache_lock, flags);
491 list_for_each_entry(tag, &domain->cache_tags, node) {
492 if (iommu && iommu != tag->iommu)
493 qi_batch_flush_descs(iommu, domain->qi_batch);
494 iommu = tag->iommu;
495
496 switch (tag->type) {
497 case CACHE_TAG_IOTLB:
498 case CACHE_TAG_NESTING_IOTLB:
499 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
500 break;
501 case CACHE_TAG_DEVTLB:
502 case CACHE_TAG_NESTING_DEVTLB:
503 cache_tag_flush_devtlb_all(domain, tag);
504 break;
505 }
506
507 trace_cache_tag_flush_all(tag);
508 }
509 qi_batch_flush_descs(iommu, domain->qi_batch);
510 spin_unlock_irqrestore(&domain->cache_lock, flags);
511 }
512
513 /*
514 * Invalidate a range of IOVA when new mappings are created in the target
515 * domain.
516 *
517 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
518 * Set, any software updates to remapping structures other than first-
519 * stage mapping requires explicit invalidation of the caches.
520 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
521 * write buffer flushing, software must explicitly perform write-buffer
522 * flushing, if cache invalidation is not required.
523 */
cache_tag_flush_range_np(struct dmar_domain * domain,unsigned long start,unsigned long end)524 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
525 unsigned long end)
526 {
527 struct intel_iommu *iommu = NULL;
528 unsigned long pages, mask, addr;
529 struct cache_tag *tag;
530 unsigned long flags;
531
532 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
533
534 spin_lock_irqsave(&domain->cache_lock, flags);
535 list_for_each_entry(tag, &domain->cache_tags, node) {
536 if (iommu && iommu != tag->iommu)
537 qi_batch_flush_descs(iommu, domain->qi_batch);
538 iommu = tag->iommu;
539
540 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
541 iommu_flush_write_buffer(iommu);
542 continue;
543 }
544
545 if (tag->type == CACHE_TAG_IOTLB ||
546 tag->type == CACHE_TAG_NESTING_IOTLB)
547 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
548
549 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
550 }
551 qi_batch_flush_descs(iommu, domain->qi_batch);
552 spin_unlock_irqrestore(&domain->cache_lock, flags);
553 }
554