xref: /linux/drivers/iommu/intel/cache.c (revision 1623bc27a85a93e82194c8d077eccc464efa67db)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * cache.c - Intel VT-d cache invalidation
4  *
5  * Copyright (C) 2024 Intel Corporation
6  *
7  * Author: Lu Baolu <baolu.lu@linux.intel.com>
8  */
9 
10 #define pr_fmt(fmt)	"DMAR: " fmt
11 
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17 
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21 
22 /* Check if an existing cache tag can be reused for a new association. */
23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 			     struct intel_iommu *iommu, struct device *dev,
25 			     ioasid_t pasid, enum cache_tag_type type)
26 {
27 	if (tag->type != type)
28 		return false;
29 
30 	if (tag->domain_id != domain_id || tag->pasid != pasid)
31 		return false;
32 
33 	if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 		return tag->iommu == iommu;
35 
36 	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 		return tag->dev == dev;
38 
39 	return false;
40 }
41 
42 /* Assign a cache tag with specified type to domain. */
43 static int cache_tag_assign(struct dmar_domain *domain, u16 did,
44 			    struct device *dev, ioasid_t pasid,
45 			    enum cache_tag_type type)
46 {
47 	struct device_domain_info *info = dev_iommu_priv_get(dev);
48 	struct intel_iommu *iommu = info->iommu;
49 	struct cache_tag *tag, *temp;
50 	unsigned long flags;
51 
52 	tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 	if (!tag)
54 		return -ENOMEM;
55 
56 	tag->type = type;
57 	tag->iommu = iommu;
58 	tag->domain_id = did;
59 	tag->pasid = pasid;
60 	tag->users = 1;
61 
62 	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 		tag->dev = dev;
64 	else
65 		tag->dev = iommu->iommu.dev;
66 
67 	spin_lock_irqsave(&domain->cache_lock, flags);
68 	list_for_each_entry(temp, &domain->cache_tags, node) {
69 		if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
70 			temp->users++;
71 			spin_unlock_irqrestore(&domain->cache_lock, flags);
72 			kfree(tag);
73 			trace_cache_tag_assign(temp);
74 			return 0;
75 		}
76 	}
77 	list_add_tail(&tag->node, &domain->cache_tags);
78 	spin_unlock_irqrestore(&domain->cache_lock, flags);
79 	trace_cache_tag_assign(tag);
80 
81 	return 0;
82 }
83 
84 /* Unassign a cache tag with specified type from domain. */
85 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
86 			       struct device *dev, ioasid_t pasid,
87 			       enum cache_tag_type type)
88 {
89 	struct device_domain_info *info = dev_iommu_priv_get(dev);
90 	struct intel_iommu *iommu = info->iommu;
91 	struct cache_tag *tag;
92 	unsigned long flags;
93 
94 	spin_lock_irqsave(&domain->cache_lock, flags);
95 	list_for_each_entry(tag, &domain->cache_tags, node) {
96 		if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
97 			trace_cache_tag_unassign(tag);
98 			if (--tag->users == 0) {
99 				list_del(&tag->node);
100 				kfree(tag);
101 			}
102 			break;
103 		}
104 	}
105 	spin_unlock_irqrestore(&domain->cache_lock, flags);
106 }
107 
108 /* domain->qi_batch will be freed in iommu_free_domain() path. */
109 static int domain_qi_batch_alloc(struct dmar_domain *domain)
110 {
111 	unsigned long flags;
112 	int ret = 0;
113 
114 	spin_lock_irqsave(&domain->cache_lock, flags);
115 	if (domain->qi_batch)
116 		goto out_unlock;
117 
118 	domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
119 	if (!domain->qi_batch)
120 		ret = -ENOMEM;
121 out_unlock:
122 	spin_unlock_irqrestore(&domain->cache_lock, flags);
123 
124 	return ret;
125 }
126 
127 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
128 				     struct device *dev, ioasid_t pasid)
129 {
130 	struct device_domain_info *info = dev_iommu_priv_get(dev);
131 	int ret;
132 
133 	ret = domain_qi_batch_alloc(domain);
134 	if (ret)
135 		return ret;
136 
137 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
138 	if (ret || !info->ats_enabled)
139 		return ret;
140 
141 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
142 	if (ret)
143 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
144 
145 	return ret;
146 }
147 
148 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
149 					struct device *dev, ioasid_t pasid)
150 {
151 	struct device_domain_info *info = dev_iommu_priv_get(dev);
152 
153 	cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
154 
155 	if (info->ats_enabled)
156 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
157 }
158 
159 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
160 					    struct device *dev, ioasid_t pasid)
161 {
162 	struct device_domain_info *info = dev_iommu_priv_get(dev);
163 	int ret;
164 
165 	ret = domain_qi_batch_alloc(domain);
166 	if (ret)
167 		return ret;
168 
169 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
170 	if (ret || !info->ats_enabled)
171 		return ret;
172 
173 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
174 	if (ret)
175 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
176 
177 	return ret;
178 }
179 
180 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
181 					       struct device *dev, ioasid_t pasid)
182 {
183 	struct device_domain_info *info = dev_iommu_priv_get(dev);
184 
185 	cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
186 
187 	if (info->ats_enabled)
188 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
189 }
190 
191 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
192 {
193 	struct device_domain_info *info = dev_iommu_priv_get(dev);
194 	struct intel_iommu *iommu = info->iommu;
195 
196 	/*
197 	 * The driver assigns different domain IDs for all domains except
198 	 * the SVA type.
199 	 */
200 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
201 		return FLPT_DEFAULT_DID;
202 
203 	return domain_id_iommu(domain, iommu);
204 }
205 
206 /*
207  * Assign cache tags to a domain when it's associated with a device's
208  * PASID using a specific domain ID.
209  *
210  * On success (return value of 0), cache tags are created and added to the
211  * domain's cache tag list. On failure (negative return value), an error
212  * code is returned indicating the reason for the failure.
213  */
214 int cache_tag_assign_domain(struct dmar_domain *domain,
215 			    struct device *dev, ioasid_t pasid)
216 {
217 	u16 did = domain_get_id_for_dev(domain, dev);
218 	int ret;
219 
220 	ret = __cache_tag_assign_domain(domain, did, dev, pasid);
221 	if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
222 		return ret;
223 
224 	ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
225 	if (ret)
226 		__cache_tag_unassign_domain(domain, did, dev, pasid);
227 
228 	return ret;
229 }
230 
231 /*
232  * Remove the cache tags associated with a device's PASID when the domain is
233  * detached from the device.
234  *
235  * The cache tags must be previously assigned to the domain by calling the
236  * assign interface.
237  */
238 void cache_tag_unassign_domain(struct dmar_domain *domain,
239 			       struct device *dev, ioasid_t pasid)
240 {
241 	u16 did = domain_get_id_for_dev(domain, dev);
242 
243 	__cache_tag_unassign_domain(domain, did, dev, pasid);
244 	if (domain->domain.type == IOMMU_DOMAIN_NESTED)
245 		__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
246 }
247 
248 static unsigned long calculate_psi_aligned_address(unsigned long start,
249 						   unsigned long end,
250 						   unsigned long *_pages,
251 						   unsigned long *_mask)
252 {
253 	unsigned long pages = aligned_nrpages(start, end - start + 1);
254 	unsigned long aligned_pages = __roundup_pow_of_two(pages);
255 	unsigned long bitmask = aligned_pages - 1;
256 	unsigned long mask = ilog2(aligned_pages);
257 	unsigned long pfn = IOVA_PFN(start);
258 
259 	/*
260 	 * PSI masks the low order bits of the base address. If the
261 	 * address isn't aligned to the mask, then compute a mask value
262 	 * needed to ensure the target range is flushed.
263 	 */
264 	if (unlikely(bitmask & pfn)) {
265 		unsigned long end_pfn = pfn + pages - 1, shared_bits;
266 
267 		/*
268 		 * Since end_pfn <= pfn + bitmask, the only way bits
269 		 * higher than bitmask can differ in pfn and end_pfn is
270 		 * by carrying. This means after masking out bitmask,
271 		 * high bits starting with the first set bit in
272 		 * shared_bits are all equal in both pfn and end_pfn.
273 		 */
274 		shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
275 		mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
276 		aligned_pages = 1UL << mask;
277 	}
278 
279 	*_pages = aligned_pages;
280 	*_mask = mask;
281 
282 	return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
283 }
284 
285 static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
286 {
287 	if (!iommu || !batch->index)
288 		return;
289 
290 	qi_submit_sync(iommu, batch->descs, batch->index, 0);
291 
292 	/* Reset the index value and clean the whole batch buffer. */
293 	memset(batch, 0, sizeof(*batch));
294 }
295 
296 static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
297 {
298 	if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
299 		qi_batch_flush_descs(iommu, batch);
300 }
301 
302 static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
303 			       unsigned int size_order, u64 type,
304 			       struct qi_batch *batch)
305 {
306 	qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
307 	qi_batch_increment_index(iommu, batch);
308 }
309 
310 static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
311 				   u16 qdep, u64 addr, unsigned int mask,
312 				   struct qi_batch *batch)
313 {
314 	/*
315 	 * According to VT-d spec, software is recommended to not submit any Device-TLB
316 	 * invalidation requests while address remapping hardware is disabled.
317 	 */
318 	if (!(iommu->gcmd & DMA_GCMD_TE))
319 		return;
320 
321 	qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
322 	qi_batch_increment_index(iommu, batch);
323 }
324 
325 static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
326 				u64 addr, unsigned long npages, bool ih,
327 				struct qi_batch *batch)
328 {
329 	/*
330 	 * npages == -1 means a PASID-selective invalidation, otherwise,
331 	 * a positive value for Page-selective-within-PASID invalidation.
332 	 * 0 is not a valid input.
333 	 */
334 	if (!npages)
335 		return;
336 
337 	qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
338 	qi_batch_increment_index(iommu, batch);
339 }
340 
341 static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
342 					 u32 pasid,  u16 qdep, u64 addr,
343 					 unsigned int size_order, struct qi_batch *batch)
344 {
345 	/*
346 	 * According to VT-d spec, software is recommended to not submit any
347 	 * Device-TLB invalidation requests while address remapping hardware
348 	 * is disabled.
349 	 */
350 	if (!(iommu->gcmd & DMA_GCMD_TE))
351 		return;
352 
353 	qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
354 				&batch->descs[batch->index]);
355 	qi_batch_increment_index(iommu, batch);
356 }
357 
358 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
359 				  unsigned long addr, unsigned long pages,
360 				  unsigned long mask, int ih)
361 {
362 	struct intel_iommu *iommu = tag->iommu;
363 	u64 type = DMA_TLB_PSI_FLUSH;
364 
365 	if (domain->use_first_level) {
366 		qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
367 				    pages, ih, domain->qi_batch);
368 		return;
369 	}
370 
371 	/*
372 	 * Fallback to domain selective flush if no PSI support or the size
373 	 * is too big.
374 	 */
375 	if (!cap_pgsel_inv(iommu->cap) ||
376 	    mask > cap_max_amask_val(iommu->cap) || pages == -1) {
377 		addr = 0;
378 		mask = 0;
379 		ih = 0;
380 		type = DMA_TLB_DSI_FLUSH;
381 	}
382 
383 	if (ecap_qis(iommu->ecap))
384 		qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
385 				   domain->qi_batch);
386 	else
387 		__iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
388 }
389 
390 static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
391 				       unsigned long addr, unsigned long mask)
392 {
393 	struct intel_iommu *iommu = tag->iommu;
394 	struct device_domain_info *info;
395 	u16 sid;
396 
397 	info = dev_iommu_priv_get(tag->dev);
398 	sid = PCI_DEVID(info->bus, info->devfn);
399 
400 	if (tag->pasid == IOMMU_NO_PASID) {
401 		qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
402 				       addr, mask, domain->qi_batch);
403 		if (info->dtlb_extra_inval)
404 			qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
405 					       addr, mask, domain->qi_batch);
406 		return;
407 	}
408 
409 	qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
410 				     info->ats_qdep, addr, mask, domain->qi_batch);
411 	if (info->dtlb_extra_inval)
412 		qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
413 					     info->ats_qdep, addr, mask,
414 					     domain->qi_batch);
415 }
416 
417 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
418 {
419 	struct intel_iommu *iommu = tag->iommu;
420 	struct device_domain_info *info;
421 	u16 sid;
422 
423 	info = dev_iommu_priv_get(tag->dev);
424 	sid = PCI_DEVID(info->bus, info->devfn);
425 
426 	qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
427 			       MAX_AGAW_PFN_WIDTH, domain->qi_batch);
428 	if (info->dtlb_extra_inval)
429 		qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
430 				       MAX_AGAW_PFN_WIDTH, domain->qi_batch);
431 }
432 
433 /*
434  * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
435  * when the memory mappings in the target domain have been modified.
436  */
437 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
438 			   unsigned long end, int ih)
439 {
440 	struct intel_iommu *iommu = NULL;
441 	unsigned long pages, mask, addr;
442 	struct cache_tag *tag;
443 	unsigned long flags;
444 
445 	addr = calculate_psi_aligned_address(start, end, &pages, &mask);
446 
447 	spin_lock_irqsave(&domain->cache_lock, flags);
448 	list_for_each_entry(tag, &domain->cache_tags, node) {
449 		if (iommu && iommu != tag->iommu)
450 			qi_batch_flush_descs(iommu, domain->qi_batch);
451 		iommu = tag->iommu;
452 
453 		switch (tag->type) {
454 		case CACHE_TAG_IOTLB:
455 		case CACHE_TAG_NESTING_IOTLB:
456 			cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
457 			break;
458 		case CACHE_TAG_NESTING_DEVTLB:
459 			/*
460 			 * Address translation cache in device side caches the
461 			 * result of nested translation. There is no easy way
462 			 * to identify the exact set of nested translations
463 			 * affected by a change in S2. So just flush the entire
464 			 * device cache.
465 			 */
466 			addr = 0;
467 			mask = MAX_AGAW_PFN_WIDTH;
468 			fallthrough;
469 		case CACHE_TAG_DEVTLB:
470 			cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
471 			break;
472 		}
473 
474 		trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
475 	}
476 	qi_batch_flush_descs(iommu, domain->qi_batch);
477 	spin_unlock_irqrestore(&domain->cache_lock, flags);
478 }
479 
480 /*
481  * Invalidates all ranges of IOVA when the memory mappings in the target
482  * domain have been modified.
483  */
484 void cache_tag_flush_all(struct dmar_domain *domain)
485 {
486 	struct intel_iommu *iommu = NULL;
487 	struct cache_tag *tag;
488 	unsigned long flags;
489 
490 	spin_lock_irqsave(&domain->cache_lock, flags);
491 	list_for_each_entry(tag, &domain->cache_tags, node) {
492 		if (iommu && iommu != tag->iommu)
493 			qi_batch_flush_descs(iommu, domain->qi_batch);
494 		iommu = tag->iommu;
495 
496 		switch (tag->type) {
497 		case CACHE_TAG_IOTLB:
498 		case CACHE_TAG_NESTING_IOTLB:
499 			cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
500 			break;
501 		case CACHE_TAG_DEVTLB:
502 		case CACHE_TAG_NESTING_DEVTLB:
503 			cache_tag_flush_devtlb_all(domain, tag);
504 			break;
505 		}
506 
507 		trace_cache_tag_flush_all(tag);
508 	}
509 	qi_batch_flush_descs(iommu, domain->qi_batch);
510 	spin_unlock_irqrestore(&domain->cache_lock, flags);
511 }
512 
513 /*
514  * Invalidate a range of IOVA when new mappings are created in the target
515  * domain.
516  *
517  * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
518  *   Set, any software updates to remapping structures other than first-
519  *   stage mapping requires explicit invalidation of the caches.
520  * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
521  *   write buffer flushing, software must explicitly perform write-buffer
522  *   flushing, if cache invalidation is not required.
523  */
524 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
525 			      unsigned long end)
526 {
527 	struct intel_iommu *iommu = NULL;
528 	unsigned long pages, mask, addr;
529 	struct cache_tag *tag;
530 	unsigned long flags;
531 
532 	addr = calculate_psi_aligned_address(start, end, &pages, &mask);
533 
534 	spin_lock_irqsave(&domain->cache_lock, flags);
535 	list_for_each_entry(tag, &domain->cache_tags, node) {
536 		if (iommu && iommu != tag->iommu)
537 			qi_batch_flush_descs(iommu, domain->qi_batch);
538 		iommu = tag->iommu;
539 
540 		if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
541 			iommu_flush_write_buffer(iommu);
542 			continue;
543 		}
544 
545 		if (tag->type == CACHE_TAG_IOTLB ||
546 		    tag->type == CACHE_TAG_NESTING_IOTLB)
547 			cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
548 
549 		trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
550 	}
551 	qi_batch_flush_descs(iommu, domain->qi_batch);
552 	spin_unlock_irqrestore(&domain->cache_lock, flags);
553 }
554