1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * cache.c - Intel VT-d cache invalidation
4 *
5 * Copyright (C) 2024 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10 #define pr_fmt(fmt) "DMAR: " fmt
11
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21
22 /* Check if an existing cache tag can be reused for a new association. */
cache_tage_match(struct cache_tag * tag,u16 domain_id,struct intel_iommu * iommu,struct device * dev,ioasid_t pasid,enum cache_tag_type type)23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 struct intel_iommu *iommu, struct device *dev,
25 ioasid_t pasid, enum cache_tag_type type)
26 {
27 if (tag->type != type)
28 return false;
29
30 if (tag->domain_id != domain_id || tag->pasid != pasid)
31 return false;
32
33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 return tag->iommu == iommu;
35
36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 return tag->dev == dev;
38
39 return false;
40 }
41
42 /* Assign a cache tag with specified type to domain. */
cache_tag_assign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)43 int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
44 ioasid_t pasid, enum cache_tag_type type)
45 {
46 struct device_domain_info *info = dev_iommu_priv_get(dev);
47 struct intel_iommu *iommu = info->iommu;
48 struct cache_tag *tag, *temp;
49 struct list_head *prev;
50 unsigned long flags;
51
52 tag = kzalloc_obj(*tag);
53 if (!tag)
54 return -ENOMEM;
55
56 tag->type = type;
57 tag->iommu = iommu;
58 tag->domain_id = did;
59 tag->pasid = pasid;
60 tag->users = 1;
61
62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 tag->dev = dev;
64 else
65 tag->dev = iommu->iommu.dev;
66
67 spin_lock_irqsave(&domain->cache_lock, flags);
68 prev = &domain->cache_tags;
69 list_for_each_entry(temp, &domain->cache_tags, node) {
70 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
71 temp->users++;
72 spin_unlock_irqrestore(&domain->cache_lock, flags);
73 kfree(tag);
74 trace_cache_tag_assign(temp);
75 return 0;
76 }
77 if (temp->iommu == iommu)
78 prev = &temp->node;
79 }
80 /*
81 * Link cache tags of same iommu unit together, so corresponding
82 * flush ops can be batched for iommu unit.
83 */
84 list_add(&tag->node, prev);
85
86 spin_unlock_irqrestore(&domain->cache_lock, flags);
87 trace_cache_tag_assign(tag);
88
89 return 0;
90 }
91
92 /* Unassign a cache tag with specified type from domain. */
cache_tag_unassign(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid,enum cache_tag_type type)93 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
94 struct device *dev, ioasid_t pasid,
95 enum cache_tag_type type)
96 {
97 struct device_domain_info *info = dev_iommu_priv_get(dev);
98 struct intel_iommu *iommu = info->iommu;
99 struct cache_tag *tag;
100 unsigned long flags;
101
102 spin_lock_irqsave(&domain->cache_lock, flags);
103 list_for_each_entry(tag, &domain->cache_tags, node) {
104 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
105 trace_cache_tag_unassign(tag);
106 if (--tag->users == 0) {
107 list_del(&tag->node);
108 kfree(tag);
109 }
110 break;
111 }
112 }
113 spin_unlock_irqrestore(&domain->cache_lock, flags);
114 }
115
116 /* domain->qi_batch will be freed in iommu_free_domain() path. */
domain_qi_batch_alloc(struct dmar_domain * domain)117 static int domain_qi_batch_alloc(struct dmar_domain *domain)
118 {
119 unsigned long flags;
120 int ret = 0;
121
122 spin_lock_irqsave(&domain->cache_lock, flags);
123 if (domain->qi_batch)
124 goto out_unlock;
125
126 domain->qi_batch = kzalloc_obj(*domain->qi_batch, GFP_ATOMIC);
127 if (!domain->qi_batch)
128 ret = -ENOMEM;
129 out_unlock:
130 spin_unlock_irqrestore(&domain->cache_lock, flags);
131
132 return ret;
133 }
134
__cache_tag_assign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)135 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
136 struct device *dev, ioasid_t pasid)
137 {
138 struct device_domain_info *info = dev_iommu_priv_get(dev);
139 int ret;
140
141 ret = domain_qi_batch_alloc(domain);
142 if (ret)
143 return ret;
144
145 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
146 if (ret || !info->ats_enabled)
147 return ret;
148
149 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
150 if (ret)
151 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
152
153 return ret;
154 }
155
__cache_tag_unassign_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)156 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
157 struct device *dev, ioasid_t pasid)
158 {
159 struct device_domain_info *info = dev_iommu_priv_get(dev);
160
161 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
162
163 if (info->ats_enabled)
164 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
165 }
166
__cache_tag_assign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)167 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
168 struct device *dev, ioasid_t pasid)
169 {
170 struct device_domain_info *info = dev_iommu_priv_get(dev);
171 int ret;
172
173 ret = domain_qi_batch_alloc(domain);
174 if (ret)
175 return ret;
176
177 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
178 if (ret || !info->ats_enabled)
179 return ret;
180
181 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
182 if (ret)
183 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
184
185 return ret;
186 }
187
__cache_tag_unassign_parent_domain(struct dmar_domain * domain,u16 did,struct device * dev,ioasid_t pasid)188 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
189 struct device *dev, ioasid_t pasid)
190 {
191 struct device_domain_info *info = dev_iommu_priv_get(dev);
192
193 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
194
195 if (info->ats_enabled)
196 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
197 }
198
domain_get_id_for_dev(struct dmar_domain * domain,struct device * dev)199 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
200 {
201 struct device_domain_info *info = dev_iommu_priv_get(dev);
202 struct intel_iommu *iommu = info->iommu;
203
204 /*
205 * The driver assigns different domain IDs for all domains except
206 * the SVA type.
207 */
208 if (domain->domain.type == IOMMU_DOMAIN_SVA)
209 return FLPT_DEFAULT_DID;
210
211 return domain_id_iommu(domain, iommu);
212 }
213
214 /*
215 * Assign cache tags to a domain when it's associated with a device's
216 * PASID using a specific domain ID.
217 *
218 * On success (return value of 0), cache tags are created and added to the
219 * domain's cache tag list. On failure (negative return value), an error
220 * code is returned indicating the reason for the failure.
221 */
cache_tag_assign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)222 int cache_tag_assign_domain(struct dmar_domain *domain,
223 struct device *dev, ioasid_t pasid)
224 {
225 u16 did = domain_get_id_for_dev(domain, dev);
226 int ret;
227
228 ret = __cache_tag_assign_domain(domain, did, dev, pasid);
229 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
230 return ret;
231
232 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
233 if (ret)
234 __cache_tag_unassign_domain(domain, did, dev, pasid);
235
236 return ret;
237 }
238
239 /*
240 * Remove the cache tags associated with a device's PASID when the domain is
241 * detached from the device.
242 *
243 * The cache tags must be previously assigned to the domain by calling the
244 * assign interface.
245 */
cache_tag_unassign_domain(struct dmar_domain * domain,struct device * dev,ioasid_t pasid)246 void cache_tag_unassign_domain(struct dmar_domain *domain,
247 struct device *dev, ioasid_t pasid)
248 {
249 u16 did = domain_get_id_for_dev(domain, dev);
250
251 __cache_tag_unassign_domain(domain, did, dev, pasid);
252 if (domain->domain.type == IOMMU_DOMAIN_NESTED)
253 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
254 }
255
calculate_psi_aligned_address(unsigned long start,unsigned long last,unsigned long * size_order)256 static unsigned long calculate_psi_aligned_address(unsigned long start,
257 unsigned long last,
258 unsigned long *size_order)
259 {
260 unsigned int sz_lg2;
261
262 /* Compute a sz_lg2 that spans start and last */
263 start &= GENMASK(BITS_PER_LONG - 1, VTD_PAGE_SHIFT);
264 sz_lg2 = fls_long(start ^ last);
265 if (sz_lg2 <= 12) {
266 *size_order = 0;
267 return start;
268 }
269 if (unlikely(sz_lg2 >= BITS_PER_LONG)) {
270 /*
271 * MAX_AGAW_PFN_WIDTH triggers full invalidation in all
272 * downstream users.
273 */
274 *size_order = MAX_AGAW_PFN_WIDTH;
275 return 0;
276 }
277
278 *size_order = sz_lg2 - VTD_PAGE_SHIFT;
279 return start & GENMASK(BITS_PER_LONG - 1, sz_lg2);
280 }
281
qi_batch_flush_descs(struct intel_iommu * iommu,struct qi_batch * batch)282 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
283 {
284 if (!iommu || !batch->index)
285 return;
286
287 qi_submit_sync(iommu, batch->descs, batch->index, 0);
288
289 /* Reset the index value and clean the whole batch buffer. */
290 memset(batch, 0, sizeof(*batch));
291 }
292
qi_batch_increment_index(struct intel_iommu * iommu,struct qi_batch * batch)293 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
294 {
295 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
296 qi_batch_flush_descs(iommu, batch);
297 }
298
qi_batch_add_iotlb(struct intel_iommu * iommu,u16 did,u64 addr,unsigned int size_order,u64 type,struct qi_batch * batch)299 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
300 unsigned int size_order, u64 type,
301 struct qi_batch *batch)
302 {
303 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
304 qi_batch_increment_index(iommu, batch);
305 }
306
qi_batch_add_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u16 qdep,u64 addr,unsigned int mask,struct qi_batch * batch)307 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
308 u16 qdep, u64 addr, unsigned int mask,
309 struct qi_batch *batch)
310 {
311 /*
312 * According to VT-d spec, software is recommended to not submit any Device-TLB
313 * invalidation requests while address remapping hardware is disabled.
314 */
315 if (!(iommu->gcmd & DMA_GCMD_TE))
316 return;
317
318 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
319 qi_batch_increment_index(iommu, batch);
320 }
321
qi_batch_add_piotlb_all(struct intel_iommu * iommu,u16 did,u32 pasid,struct qi_batch * batch)322 static void qi_batch_add_piotlb_all(struct intel_iommu *iommu, u16 did,
323 u32 pasid, struct qi_batch *batch)
324 {
325 qi_desc_piotlb_all(did, pasid, &batch->descs[batch->index]);
326 qi_batch_increment_index(iommu, batch);
327 }
328
qi_batch_add_piotlb(struct intel_iommu * iommu,u16 did,u32 pasid,u64 addr,unsigned int size_order,bool ih,struct qi_batch * batch)329 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
330 u64 addr, unsigned int size_order, bool ih,
331 struct qi_batch *batch)
332 {
333 qi_desc_piotlb(did, pasid, addr, size_order, ih,
334 &batch->descs[batch->index]);
335 qi_batch_increment_index(iommu, batch);
336 }
337
qi_batch_add_pasid_dev_iotlb(struct intel_iommu * iommu,u16 sid,u16 pfsid,u32 pasid,u16 qdep,u64 addr,unsigned int size_order,struct qi_batch * batch)338 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
339 u32 pasid, u16 qdep, u64 addr,
340 unsigned int size_order, struct qi_batch *batch)
341 {
342 /*
343 * According to VT-d spec, software is recommended to not submit any
344 * Device-TLB invalidation requests while address remapping hardware
345 * is disabled.
346 */
347 if (!(iommu->gcmd & DMA_GCMD_TE))
348 return;
349
350 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
351 &batch->descs[batch->index]);
352 qi_batch_increment_index(iommu, batch);
353 }
354
intel_domain_use_piotlb(struct dmar_domain * domain)355 static bool intel_domain_use_piotlb(struct dmar_domain *domain)
356 {
357 return domain->domain.type == IOMMU_DOMAIN_SVA ||
358 domain->domain.type == IOMMU_DOMAIN_NESTED ||
359 intel_domain_is_fs_paging(domain);
360 }
361
cache_tag_flush_iotlb(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long mask,int ih)362 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
363 unsigned long addr, unsigned long mask, int ih)
364 {
365 struct intel_iommu *iommu = tag->iommu;
366 u64 type = DMA_TLB_PSI_FLUSH;
367
368 if (intel_domain_use_piotlb(domain)) {
369 if (mask >= MAX_AGAW_PFN_WIDTH)
370 qi_batch_add_piotlb_all(iommu, tag->domain_id,
371 tag->pasid, domain->qi_batch);
372 else
373 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid,
374 addr, mask, ih, domain->qi_batch);
375 return;
376 }
377
378 /*
379 * Fallback to domain selective flush if no PSI support or the size
380 * is too big.
381 */
382 if (!cap_pgsel_inv(iommu->cap) ||
383 mask > cap_max_amask_val(iommu->cap)) {
384 addr = 0;
385 mask = 0;
386 ih = 0;
387 type = DMA_TLB_DSI_FLUSH;
388 }
389
390 if (ecap_qis(iommu->ecap))
391 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
392 domain->qi_batch);
393 else
394 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
395 }
396
cache_tag_flush_devtlb_psi(struct dmar_domain * domain,struct cache_tag * tag,unsigned long addr,unsigned long mask)397 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
398 unsigned long addr, unsigned long mask)
399 {
400 struct intel_iommu *iommu = tag->iommu;
401 struct device_domain_info *info;
402 u16 sid;
403
404 info = dev_iommu_priv_get(tag->dev);
405 sid = PCI_DEVID(info->bus, info->devfn);
406
407 if (tag->pasid == IOMMU_NO_PASID) {
408 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
409 addr, mask, domain->qi_batch);
410 if (info->dtlb_extra_inval)
411 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
412 addr, mask, domain->qi_batch);
413 return;
414 }
415
416 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
417 info->ats_qdep, addr, mask, domain->qi_batch);
418 if (info->dtlb_extra_inval)
419 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
420 info->ats_qdep, addr, mask,
421 domain->qi_batch);
422 }
423
424 /*
425 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
426 * when the memory mappings in the target domain have been modified.
427 */
cache_tag_flush_range(struct dmar_domain * domain,unsigned long start,unsigned long end,int ih)428 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
429 unsigned long end, int ih)
430 {
431 struct intel_iommu *iommu = NULL;
432 unsigned long mask, addr;
433 struct cache_tag *tag;
434 unsigned long flags;
435
436 addr = calculate_psi_aligned_address(start, end, &mask);
437
438 spin_lock_irqsave(&domain->cache_lock, flags);
439 list_for_each_entry(tag, &domain->cache_tags, node) {
440 if (iommu && iommu != tag->iommu)
441 qi_batch_flush_descs(iommu, domain->qi_batch);
442 iommu = tag->iommu;
443
444 switch (tag->type) {
445 case CACHE_TAG_IOTLB:
446 case CACHE_TAG_NESTING_IOTLB:
447 cache_tag_flush_iotlb(domain, tag, addr, mask, ih);
448 break;
449 case CACHE_TAG_NESTING_DEVTLB:
450 /*
451 * Address translation cache in device side caches the
452 * result of nested translation. There is no easy way
453 * to identify the exact set of nested translations
454 * affected by a change in S2. So just flush the entire
455 * device cache.
456 */
457 addr = 0;
458 mask = MAX_AGAW_PFN_WIDTH;
459 fallthrough;
460 case CACHE_TAG_DEVTLB:
461 cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
462 break;
463 }
464
465 trace_cache_tag_flush_range(tag, start, end, addr, mask);
466 }
467 qi_batch_flush_descs(iommu, domain->qi_batch);
468 spin_unlock_irqrestore(&domain->cache_lock, flags);
469 }
470
471 /*
472 * Invalidates all ranges of IOVA when the memory mappings in the target
473 * domain have been modified.
474 */
cache_tag_flush_all(struct dmar_domain * domain)475 void cache_tag_flush_all(struct dmar_domain *domain)
476 {
477 cache_tag_flush_range(domain, 0, ULONG_MAX, 0);
478 }
479
480 /*
481 * Invalidate a range of IOVA when new mappings are created in the target
482 * domain.
483 *
484 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
485 * Set, any software updates to remapping structures other than first-
486 * stage mapping requires explicit invalidation of the caches.
487 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
488 * write buffer flushing, software must explicitly perform write-buffer
489 * flushing, if cache invalidation is not required.
490 */
cache_tag_flush_range_np(struct dmar_domain * domain,unsigned long start,unsigned long end)491 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
492 unsigned long end)
493 {
494 struct intel_iommu *iommu = NULL;
495 unsigned long mask, addr;
496 struct cache_tag *tag;
497 unsigned long flags;
498
499 addr = calculate_psi_aligned_address(start, end, &mask);
500
501 spin_lock_irqsave(&domain->cache_lock, flags);
502 list_for_each_entry(tag, &domain->cache_tags, node) {
503 if (iommu && iommu != tag->iommu)
504 qi_batch_flush_descs(iommu, domain->qi_batch);
505 iommu = tag->iommu;
506
507 if (!cap_caching_mode(iommu->cap) ||
508 intel_domain_is_fs_paging(domain)) {
509 iommu_flush_write_buffer(iommu);
510 continue;
511 }
512
513 if (tag->type == CACHE_TAG_IOTLB ||
514 tag->type == CACHE_TAG_NESTING_IOTLB)
515 cache_tag_flush_iotlb(domain, tag, addr, mask, 0);
516
517 trace_cache_tag_flush_range_np(tag, start, end, addr, mask);
518 }
519 qi_batch_flush_descs(iommu, domain->qi_batch);
520 spin_unlock_irqrestore(&domain->cache_lock, flags);
521 }
522