xref: /linux/drivers/iommu/intel/cache.c (revision af2d6148d2a159e1a0862bce5a2c88c1618a2b27)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * cache.c - Intel VT-d cache invalidation
4  *
5  * Copyright (C) 2024 Intel Corporation
6  *
7  * Author: Lu Baolu <baolu.lu@linux.intel.com>
8  */
9 
10 #define pr_fmt(fmt)	"DMAR: " fmt
11 
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17 
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21 
22 /* Check if an existing cache tag can be reused for a new association. */
23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 			     struct intel_iommu *iommu, struct device *dev,
25 			     ioasid_t pasid, enum cache_tag_type type)
26 {
27 	if (tag->type != type)
28 		return false;
29 
30 	if (tag->domain_id != domain_id || tag->pasid != pasid)
31 		return false;
32 
33 	if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 		return tag->iommu == iommu;
35 
36 	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 		return tag->dev == dev;
38 
39 	return false;
40 }
41 
42 /* Assign a cache tag with specified type to domain. */
43 int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
44 		     ioasid_t pasid, enum cache_tag_type type)
45 {
46 	struct device_domain_info *info = dev_iommu_priv_get(dev);
47 	struct intel_iommu *iommu = info->iommu;
48 	struct cache_tag *tag, *temp;
49 	struct list_head *prev;
50 	unsigned long flags;
51 
52 	tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 	if (!tag)
54 		return -ENOMEM;
55 
56 	tag->type = type;
57 	tag->iommu = iommu;
58 	tag->domain_id = did;
59 	tag->pasid = pasid;
60 	tag->users = 1;
61 
62 	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 		tag->dev = dev;
64 	else
65 		tag->dev = iommu->iommu.dev;
66 
67 	spin_lock_irqsave(&domain->cache_lock, flags);
68 	prev = &domain->cache_tags;
69 	list_for_each_entry(temp, &domain->cache_tags, node) {
70 		if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
71 			temp->users++;
72 			spin_unlock_irqrestore(&domain->cache_lock, flags);
73 			kfree(tag);
74 			trace_cache_tag_assign(temp);
75 			return 0;
76 		}
77 		if (temp->iommu == iommu)
78 			prev = &temp->node;
79 	}
80 	/*
81 	 * Link cache tags of same iommu unit together, so corresponding
82 	 * flush ops can be batched for iommu unit.
83 	 */
84 	list_add(&tag->node, prev);
85 
86 	spin_unlock_irqrestore(&domain->cache_lock, flags);
87 	trace_cache_tag_assign(tag);
88 
89 	return 0;
90 }
91 
92 /* Unassign a cache tag with specified type from domain. */
93 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
94 			       struct device *dev, ioasid_t pasid,
95 			       enum cache_tag_type type)
96 {
97 	struct device_domain_info *info = dev_iommu_priv_get(dev);
98 	struct intel_iommu *iommu = info->iommu;
99 	struct cache_tag *tag;
100 	unsigned long flags;
101 
102 	spin_lock_irqsave(&domain->cache_lock, flags);
103 	list_for_each_entry(tag, &domain->cache_tags, node) {
104 		if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
105 			trace_cache_tag_unassign(tag);
106 			if (--tag->users == 0) {
107 				list_del(&tag->node);
108 				kfree(tag);
109 			}
110 			break;
111 		}
112 	}
113 	spin_unlock_irqrestore(&domain->cache_lock, flags);
114 }
115 
116 /* domain->qi_batch will be freed in iommu_free_domain() path. */
117 static int domain_qi_batch_alloc(struct dmar_domain *domain)
118 {
119 	unsigned long flags;
120 	int ret = 0;
121 
122 	spin_lock_irqsave(&domain->cache_lock, flags);
123 	if (domain->qi_batch)
124 		goto out_unlock;
125 
126 	domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
127 	if (!domain->qi_batch)
128 		ret = -ENOMEM;
129 out_unlock:
130 	spin_unlock_irqrestore(&domain->cache_lock, flags);
131 
132 	return ret;
133 }
134 
135 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
136 				     struct device *dev, ioasid_t pasid)
137 {
138 	struct device_domain_info *info = dev_iommu_priv_get(dev);
139 	int ret;
140 
141 	ret = domain_qi_batch_alloc(domain);
142 	if (ret)
143 		return ret;
144 
145 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
146 	if (ret || !info->ats_enabled)
147 		return ret;
148 
149 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
150 	if (ret)
151 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
152 
153 	return ret;
154 }
155 
156 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
157 					struct device *dev, ioasid_t pasid)
158 {
159 	struct device_domain_info *info = dev_iommu_priv_get(dev);
160 
161 	cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
162 
163 	if (info->ats_enabled)
164 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
165 }
166 
167 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
168 					    struct device *dev, ioasid_t pasid)
169 {
170 	struct device_domain_info *info = dev_iommu_priv_get(dev);
171 	int ret;
172 
173 	ret = domain_qi_batch_alloc(domain);
174 	if (ret)
175 		return ret;
176 
177 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
178 	if (ret || !info->ats_enabled)
179 		return ret;
180 
181 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
182 	if (ret)
183 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
184 
185 	return ret;
186 }
187 
188 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
189 					       struct device *dev, ioasid_t pasid)
190 {
191 	struct device_domain_info *info = dev_iommu_priv_get(dev);
192 
193 	cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
194 
195 	if (info->ats_enabled)
196 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
197 }
198 
199 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
200 {
201 	struct device_domain_info *info = dev_iommu_priv_get(dev);
202 	struct intel_iommu *iommu = info->iommu;
203 
204 	/*
205 	 * The driver assigns different domain IDs for all domains except
206 	 * the SVA type.
207 	 */
208 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
209 		return FLPT_DEFAULT_DID;
210 
211 	return domain_id_iommu(domain, iommu);
212 }
213 
214 /*
215  * Assign cache tags to a domain when it's associated with a device's
216  * PASID using a specific domain ID.
217  *
218  * On success (return value of 0), cache tags are created and added to the
219  * domain's cache tag list. On failure (negative return value), an error
220  * code is returned indicating the reason for the failure.
221  */
222 int cache_tag_assign_domain(struct dmar_domain *domain,
223 			    struct device *dev, ioasid_t pasid)
224 {
225 	u16 did = domain_get_id_for_dev(domain, dev);
226 	int ret;
227 
228 	ret = __cache_tag_assign_domain(domain, did, dev, pasid);
229 	if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
230 		return ret;
231 
232 	ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
233 	if (ret)
234 		__cache_tag_unassign_domain(domain, did, dev, pasid);
235 
236 	return ret;
237 }
238 
239 /*
240  * Remove the cache tags associated with a device's PASID when the domain is
241  * detached from the device.
242  *
243  * The cache tags must be previously assigned to the domain by calling the
244  * assign interface.
245  */
246 void cache_tag_unassign_domain(struct dmar_domain *domain,
247 			       struct device *dev, ioasid_t pasid)
248 {
249 	u16 did = domain_get_id_for_dev(domain, dev);
250 
251 	__cache_tag_unassign_domain(domain, did, dev, pasid);
252 	if (domain->domain.type == IOMMU_DOMAIN_NESTED)
253 		__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
254 }
255 
256 static unsigned long calculate_psi_aligned_address(unsigned long start,
257 						   unsigned long end,
258 						   unsigned long *_pages,
259 						   unsigned long *_mask)
260 {
261 	unsigned long pages = aligned_nrpages(start, end - start + 1);
262 	unsigned long aligned_pages = __roundup_pow_of_two(pages);
263 	unsigned long bitmask = aligned_pages - 1;
264 	unsigned long mask = ilog2(aligned_pages);
265 	unsigned long pfn = IOVA_PFN(start);
266 
267 	/*
268 	 * PSI masks the low order bits of the base address. If the
269 	 * address isn't aligned to the mask, then compute a mask value
270 	 * needed to ensure the target range is flushed.
271 	 */
272 	if (unlikely(bitmask & pfn)) {
273 		unsigned long end_pfn = pfn + pages - 1, shared_bits;
274 
275 		/*
276 		 * Since end_pfn <= pfn + bitmask, the only way bits
277 		 * higher than bitmask can differ in pfn and end_pfn is
278 		 * by carrying. This means after masking out bitmask,
279 		 * high bits starting with the first set bit in
280 		 * shared_bits are all equal in both pfn and end_pfn.
281 		 */
282 		shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
283 		mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
284 		aligned_pages = 1UL << mask;
285 	}
286 
287 	*_pages = aligned_pages;
288 	*_mask = mask;
289 
290 	return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
291 }
292 
293 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
294 {
295 	if (!iommu || !batch->index)
296 		return;
297 
298 	qi_submit_sync(iommu, batch->descs, batch->index, 0);
299 
300 	/* Reset the index value and clean the whole batch buffer. */
301 	memset(batch, 0, sizeof(*batch));
302 }
303 
304 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
305 {
306 	if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
307 		qi_batch_flush_descs(iommu, batch);
308 }
309 
310 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
311 			       unsigned int size_order, u64 type,
312 			       struct qi_batch *batch)
313 {
314 	qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
315 	qi_batch_increment_index(iommu, batch);
316 }
317 
318 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
319 				   u16 qdep, u64 addr, unsigned int mask,
320 				   struct qi_batch *batch)
321 {
322 	/*
323 	 * According to VT-d spec, software is recommended to not submit any Device-TLB
324 	 * invalidation requests while address remapping hardware is disabled.
325 	 */
326 	if (!(iommu->gcmd & DMA_GCMD_TE))
327 		return;
328 
329 	qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
330 	qi_batch_increment_index(iommu, batch);
331 }
332 
333 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
334 				u64 addr, unsigned long npages, bool ih,
335 				struct qi_batch *batch)
336 {
337 	/*
338 	 * npages == -1 means a PASID-selective invalidation, otherwise,
339 	 * a positive value for Page-selective-within-PASID invalidation.
340 	 * 0 is not a valid input.
341 	 */
342 	if (!npages)
343 		return;
344 
345 	qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
346 	qi_batch_increment_index(iommu, batch);
347 }
348 
349 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
350 					 u32 pasid,  u16 qdep, u64 addr,
351 					 unsigned int size_order, struct qi_batch *batch)
352 {
353 	/*
354 	 * According to VT-d spec, software is recommended to not submit any
355 	 * Device-TLB invalidation requests while address remapping hardware
356 	 * is disabled.
357 	 */
358 	if (!(iommu->gcmd & DMA_GCMD_TE))
359 		return;
360 
361 	qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
362 				&batch->descs[batch->index]);
363 	qi_batch_increment_index(iommu, batch);
364 }
365 
366 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
367 				  unsigned long addr, unsigned long pages,
368 				  unsigned long mask, int ih)
369 {
370 	struct intel_iommu *iommu = tag->iommu;
371 	u64 type = DMA_TLB_PSI_FLUSH;
372 
373 	if (domain->use_first_level) {
374 		qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
375 				    pages, ih, domain->qi_batch);
376 		return;
377 	}
378 
379 	/*
380 	 * Fallback to domain selective flush if no PSI support or the size
381 	 * is too big.
382 	 */
383 	if (!cap_pgsel_inv(iommu->cap) ||
384 	    mask > cap_max_amask_val(iommu->cap) || pages == -1) {
385 		addr = 0;
386 		mask = 0;
387 		ih = 0;
388 		type = DMA_TLB_DSI_FLUSH;
389 	}
390 
391 	if (ecap_qis(iommu->ecap))
392 		qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
393 				   domain->qi_batch);
394 	else
395 		__iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
396 }
397 
398 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
399 				       unsigned long addr, unsigned long mask)
400 {
401 	struct intel_iommu *iommu = tag->iommu;
402 	struct device_domain_info *info;
403 	u16 sid;
404 
405 	info = dev_iommu_priv_get(tag->dev);
406 	sid = PCI_DEVID(info->bus, info->devfn);
407 
408 	if (tag->pasid == IOMMU_NO_PASID) {
409 		qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
410 				       addr, mask, domain->qi_batch);
411 		if (info->dtlb_extra_inval)
412 			qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
413 					       addr, mask, domain->qi_batch);
414 		return;
415 	}
416 
417 	qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
418 				     info->ats_qdep, addr, mask, domain->qi_batch);
419 	if (info->dtlb_extra_inval)
420 		qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
421 					     info->ats_qdep, addr, mask,
422 					     domain->qi_batch);
423 }
424 
425 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
426 {
427 	struct intel_iommu *iommu = tag->iommu;
428 	struct device_domain_info *info;
429 	u16 sid;
430 
431 	info = dev_iommu_priv_get(tag->dev);
432 	sid = PCI_DEVID(info->bus, info->devfn);
433 
434 	qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
435 			       MAX_AGAW_PFN_WIDTH, domain->qi_batch);
436 	if (info->dtlb_extra_inval)
437 		qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
438 				       MAX_AGAW_PFN_WIDTH, domain->qi_batch);
439 }
440 
441 /*
442  * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
443  * when the memory mappings in the target domain have been modified.
444  */
445 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
446 			   unsigned long end, int ih)
447 {
448 	struct intel_iommu *iommu = NULL;
449 	unsigned long pages, mask, addr;
450 	struct cache_tag *tag;
451 	unsigned long flags;
452 
453 	addr = calculate_psi_aligned_address(start, end, &pages, &mask);
454 
455 	spin_lock_irqsave(&domain->cache_lock, flags);
456 	list_for_each_entry(tag, &domain->cache_tags, node) {
457 		if (iommu && iommu != tag->iommu)
458 			qi_batch_flush_descs(iommu, domain->qi_batch);
459 		iommu = tag->iommu;
460 
461 		switch (tag->type) {
462 		case CACHE_TAG_IOTLB:
463 		case CACHE_TAG_NESTING_IOTLB:
464 			cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
465 			break;
466 		case CACHE_TAG_NESTING_DEVTLB:
467 			/*
468 			 * Address translation cache in device side caches the
469 			 * result of nested translation. There is no easy way
470 			 * to identify the exact set of nested translations
471 			 * affected by a change in S2. So just flush the entire
472 			 * device cache.
473 			 */
474 			addr = 0;
475 			mask = MAX_AGAW_PFN_WIDTH;
476 			fallthrough;
477 		case CACHE_TAG_DEVTLB:
478 			cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
479 			break;
480 		}
481 
482 		trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
483 	}
484 	qi_batch_flush_descs(iommu, domain->qi_batch);
485 	spin_unlock_irqrestore(&domain->cache_lock, flags);
486 }
487 
488 /*
489  * Invalidates all ranges of IOVA when the memory mappings in the target
490  * domain have been modified.
491  */
492 void cache_tag_flush_all(struct dmar_domain *domain)
493 {
494 	struct intel_iommu *iommu = NULL;
495 	struct cache_tag *tag;
496 	unsigned long flags;
497 
498 	spin_lock_irqsave(&domain->cache_lock, flags);
499 	list_for_each_entry(tag, &domain->cache_tags, node) {
500 		if (iommu && iommu != tag->iommu)
501 			qi_batch_flush_descs(iommu, domain->qi_batch);
502 		iommu = tag->iommu;
503 
504 		switch (tag->type) {
505 		case CACHE_TAG_IOTLB:
506 		case CACHE_TAG_NESTING_IOTLB:
507 			cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
508 			break;
509 		case CACHE_TAG_DEVTLB:
510 		case CACHE_TAG_NESTING_DEVTLB:
511 			cache_tag_flush_devtlb_all(domain, tag);
512 			break;
513 		}
514 
515 		trace_cache_tag_flush_all(tag);
516 	}
517 	qi_batch_flush_descs(iommu, domain->qi_batch);
518 	spin_unlock_irqrestore(&domain->cache_lock, flags);
519 }
520 
521 /*
522  * Invalidate a range of IOVA when new mappings are created in the target
523  * domain.
524  *
525  * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
526  *   Set, any software updates to remapping structures other than first-
527  *   stage mapping requires explicit invalidation of the caches.
528  * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
529  *   write buffer flushing, software must explicitly perform write-buffer
530  *   flushing, if cache invalidation is not required.
531  */
532 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
533 			      unsigned long end)
534 {
535 	struct intel_iommu *iommu = NULL;
536 	unsigned long pages, mask, addr;
537 	struct cache_tag *tag;
538 	unsigned long flags;
539 
540 	addr = calculate_psi_aligned_address(start, end, &pages, &mask);
541 
542 	spin_lock_irqsave(&domain->cache_lock, flags);
543 	list_for_each_entry(tag, &domain->cache_tags, node) {
544 		if (iommu && iommu != tag->iommu)
545 			qi_batch_flush_descs(iommu, domain->qi_batch);
546 		iommu = tag->iommu;
547 
548 		if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
549 			iommu_flush_write_buffer(iommu);
550 			continue;
551 		}
552 
553 		if (tag->type == CACHE_TAG_IOTLB ||
554 		    tag->type == CACHE_TAG_NESTING_IOTLB)
555 			cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
556 
557 		trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
558 	}
559 	qi_batch_flush_descs(iommu, domain->qi_batch);
560 	spin_unlock_irqrestore(&domain->cache_lock, flags);
561 }
562