xref: /linux/drivers/iommu/intel/cache.c (revision 7354eb7f1558466e92e926802d36e69e42938ea9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * cache.c - Intel VT-d cache invalidation
4  *
5  * Copyright (C) 2024 Intel Corporation
6  *
7  * Author: Lu Baolu <baolu.lu@linux.intel.com>
8  */
9 
10 #define pr_fmt(fmt)	"DMAR: " fmt
11 
12 #include <linux/dmar.h>
13 #include <linux/iommu.h>
14 #include <linux/memory.h>
15 #include <linux/pci.h>
16 #include <linux/spinlock.h>
17 
18 #include "iommu.h"
19 #include "pasid.h"
20 #include "trace.h"
21 
22 /* Check if an existing cache tag can be reused for a new association. */
23 static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 			     struct intel_iommu *iommu, struct device *dev,
25 			     ioasid_t pasid, enum cache_tag_type type)
26 {
27 	if (tag->type != type)
28 		return false;
29 
30 	if (tag->domain_id != domain_id || tag->pasid != pasid)
31 		return false;
32 
33 	if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 		return tag->iommu == iommu;
35 
36 	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 		return tag->dev == dev;
38 
39 	return false;
40 }
41 
42 /* Assign a cache tag with specified type to domain. */
43 static int cache_tag_assign(struct dmar_domain *domain, u16 did,
44 			    struct device *dev, ioasid_t pasid,
45 			    enum cache_tag_type type)
46 {
47 	struct device_domain_info *info = dev_iommu_priv_get(dev);
48 	struct intel_iommu *iommu = info->iommu;
49 	struct cache_tag *tag, *temp;
50 	unsigned long flags;
51 
52 	tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 	if (!tag)
54 		return -ENOMEM;
55 
56 	tag->type = type;
57 	tag->iommu = iommu;
58 	tag->domain_id = did;
59 	tag->pasid = pasid;
60 	tag->users = 1;
61 
62 	if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 		tag->dev = dev;
64 	else
65 		tag->dev = iommu->iommu.dev;
66 
67 	spin_lock_irqsave(&domain->cache_lock, flags);
68 	list_for_each_entry(temp, &domain->cache_tags, node) {
69 		if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
70 			temp->users++;
71 			spin_unlock_irqrestore(&domain->cache_lock, flags);
72 			kfree(tag);
73 			trace_cache_tag_assign(temp);
74 			return 0;
75 		}
76 	}
77 	list_add_tail(&tag->node, &domain->cache_tags);
78 	spin_unlock_irqrestore(&domain->cache_lock, flags);
79 	trace_cache_tag_assign(tag);
80 
81 	return 0;
82 }
83 
84 /* Unassign a cache tag with specified type from domain. */
85 static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
86 			       struct device *dev, ioasid_t pasid,
87 			       enum cache_tag_type type)
88 {
89 	struct device_domain_info *info = dev_iommu_priv_get(dev);
90 	struct intel_iommu *iommu = info->iommu;
91 	struct cache_tag *tag;
92 	unsigned long flags;
93 
94 	spin_lock_irqsave(&domain->cache_lock, flags);
95 	list_for_each_entry(tag, &domain->cache_tags, node) {
96 		if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
97 			trace_cache_tag_unassign(tag);
98 			if (--tag->users == 0) {
99 				list_del(&tag->node);
100 				kfree(tag);
101 			}
102 			break;
103 		}
104 	}
105 	spin_unlock_irqrestore(&domain->cache_lock, flags);
106 }
107 
108 static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
109 				     struct device *dev, ioasid_t pasid)
110 {
111 	struct device_domain_info *info = dev_iommu_priv_get(dev);
112 	int ret;
113 
114 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
115 	if (ret || !info->ats_enabled)
116 		return ret;
117 
118 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
119 	if (ret)
120 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
121 
122 	return ret;
123 }
124 
125 static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
126 					struct device *dev, ioasid_t pasid)
127 {
128 	struct device_domain_info *info = dev_iommu_priv_get(dev);
129 
130 	cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
131 
132 	if (info->ats_enabled)
133 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
134 }
135 
136 static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
137 					    struct device *dev, ioasid_t pasid)
138 {
139 	struct device_domain_info *info = dev_iommu_priv_get(dev);
140 	int ret;
141 
142 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
143 	if (ret || !info->ats_enabled)
144 		return ret;
145 
146 	ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
147 	if (ret)
148 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
149 
150 	return ret;
151 }
152 
153 static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
154 					       struct device *dev, ioasid_t pasid)
155 {
156 	struct device_domain_info *info = dev_iommu_priv_get(dev);
157 
158 	cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
159 
160 	if (info->ats_enabled)
161 		cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
162 }
163 
164 static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
165 {
166 	struct device_domain_info *info = dev_iommu_priv_get(dev);
167 	struct intel_iommu *iommu = info->iommu;
168 
169 	/*
170 	 * The driver assigns different domain IDs for all domains except
171 	 * the SVA type.
172 	 */
173 	if (domain->domain.type == IOMMU_DOMAIN_SVA)
174 		return FLPT_DEFAULT_DID;
175 
176 	return domain_id_iommu(domain, iommu);
177 }
178 
179 /*
180  * Assign cache tags to a domain when it's associated with a device's
181  * PASID using a specific domain ID.
182  *
183  * On success (return value of 0), cache tags are created and added to the
184  * domain's cache tag list. On failure (negative return value), an error
185  * code is returned indicating the reason for the failure.
186  */
187 int cache_tag_assign_domain(struct dmar_domain *domain,
188 			    struct device *dev, ioasid_t pasid)
189 {
190 	u16 did = domain_get_id_for_dev(domain, dev);
191 	int ret;
192 
193 	ret = __cache_tag_assign_domain(domain, did, dev, pasid);
194 	if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
195 		return ret;
196 
197 	ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
198 	if (ret)
199 		__cache_tag_unassign_domain(domain, did, dev, pasid);
200 
201 	return ret;
202 }
203 
204 /*
205  * Remove the cache tags associated with a device's PASID when the domain is
206  * detached from the device.
207  *
208  * The cache tags must be previously assigned to the domain by calling the
209  * assign interface.
210  */
211 void cache_tag_unassign_domain(struct dmar_domain *domain,
212 			       struct device *dev, ioasid_t pasid)
213 {
214 	u16 did = domain_get_id_for_dev(domain, dev);
215 
216 	__cache_tag_unassign_domain(domain, did, dev, pasid);
217 	if (domain->domain.type == IOMMU_DOMAIN_NESTED)
218 		__cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
219 }
220 
221 static unsigned long calculate_psi_aligned_address(unsigned long start,
222 						   unsigned long end,
223 						   unsigned long *_pages,
224 						   unsigned long *_mask)
225 {
226 	unsigned long pages = aligned_nrpages(start, end - start + 1);
227 	unsigned long aligned_pages = __roundup_pow_of_two(pages);
228 	unsigned long bitmask = aligned_pages - 1;
229 	unsigned long mask = ilog2(aligned_pages);
230 	unsigned long pfn = IOVA_PFN(start);
231 
232 	/*
233 	 * PSI masks the low order bits of the base address. If the
234 	 * address isn't aligned to the mask, then compute a mask value
235 	 * needed to ensure the target range is flushed.
236 	 */
237 	if (unlikely(bitmask & pfn)) {
238 		unsigned long end_pfn = pfn + pages - 1, shared_bits;
239 
240 		/*
241 		 * Since end_pfn <= pfn + bitmask, the only way bits
242 		 * higher than bitmask can differ in pfn and end_pfn is
243 		 * by carrying. This means after masking out bitmask,
244 		 * high bits starting with the first set bit in
245 		 * shared_bits are all equal in both pfn and end_pfn.
246 		 */
247 		shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
248 		mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
249 		aligned_pages = 1UL << mask;
250 	}
251 
252 	*_pages = aligned_pages;
253 	*_mask = mask;
254 
255 	return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
256 }
257 
258 /*
259  * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
260  * when the memory mappings in the target domain have been modified.
261  */
262 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
263 			   unsigned long end, int ih)
264 {
265 	unsigned long pages, mask, addr;
266 	struct cache_tag *tag;
267 	unsigned long flags;
268 
269 	addr = calculate_psi_aligned_address(start, end, &pages, &mask);
270 
271 	spin_lock_irqsave(&domain->cache_lock, flags);
272 	list_for_each_entry(tag, &domain->cache_tags, node) {
273 		struct intel_iommu *iommu = tag->iommu;
274 		struct device_domain_info *info;
275 		u16 sid;
276 
277 		switch (tag->type) {
278 		case CACHE_TAG_IOTLB:
279 		case CACHE_TAG_NESTING_IOTLB:
280 			if (domain->use_first_level) {
281 				qi_flush_piotlb(iommu, tag->domain_id,
282 						tag->pasid, addr, pages, ih);
283 			} else {
284 				/*
285 				 * Fallback to domain selective flush if no
286 				 * PSI support or the size is too big.
287 				 */
288 				if (!cap_pgsel_inv(iommu->cap) ||
289 				    mask > cap_max_amask_val(iommu->cap))
290 					iommu->flush.flush_iotlb(iommu, tag->domain_id,
291 								 0, 0, DMA_TLB_DSI_FLUSH);
292 				else
293 					iommu->flush.flush_iotlb(iommu, tag->domain_id,
294 								 addr | ih, mask,
295 								 DMA_TLB_PSI_FLUSH);
296 			}
297 			break;
298 		case CACHE_TAG_NESTING_DEVTLB:
299 			/*
300 			 * Address translation cache in device side caches the
301 			 * result of nested translation. There is no easy way
302 			 * to identify the exact set of nested translations
303 			 * affected by a change in S2. So just flush the entire
304 			 * device cache.
305 			 */
306 			addr = 0;
307 			mask = MAX_AGAW_PFN_WIDTH;
308 			fallthrough;
309 		case CACHE_TAG_DEVTLB:
310 			info = dev_iommu_priv_get(tag->dev);
311 			sid = PCI_DEVID(info->bus, info->devfn);
312 
313 			if (tag->pasid == IOMMU_NO_PASID)
314 				qi_flush_dev_iotlb(iommu, sid, info->pfsid,
315 						   info->ats_qdep, addr, mask);
316 			else
317 				qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
318 							 tag->pasid, info->ats_qdep,
319 							 addr, mask);
320 
321 			quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
322 			break;
323 		}
324 
325 		trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
326 	}
327 	spin_unlock_irqrestore(&domain->cache_lock, flags);
328 }
329 
330 /*
331  * Invalidates all ranges of IOVA when the memory mappings in the target
332  * domain have been modified.
333  */
334 void cache_tag_flush_all(struct dmar_domain *domain)
335 {
336 	struct cache_tag *tag;
337 	unsigned long flags;
338 
339 	spin_lock_irqsave(&domain->cache_lock, flags);
340 	list_for_each_entry(tag, &domain->cache_tags, node) {
341 		struct intel_iommu *iommu = tag->iommu;
342 		struct device_domain_info *info;
343 		u16 sid;
344 
345 		switch (tag->type) {
346 		case CACHE_TAG_IOTLB:
347 		case CACHE_TAG_NESTING_IOTLB:
348 			if (domain->use_first_level)
349 				qi_flush_piotlb(iommu, tag->domain_id,
350 						tag->pasid, 0, -1, 0);
351 			else
352 				iommu->flush.flush_iotlb(iommu, tag->domain_id,
353 							 0, 0, DMA_TLB_DSI_FLUSH);
354 			break;
355 		case CACHE_TAG_DEVTLB:
356 		case CACHE_TAG_NESTING_DEVTLB:
357 			info = dev_iommu_priv_get(tag->dev);
358 			sid = PCI_DEVID(info->bus, info->devfn);
359 
360 			qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
361 					   0, MAX_AGAW_PFN_WIDTH);
362 			quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
363 						  IOMMU_NO_PASID, info->ats_qdep);
364 			break;
365 		}
366 
367 		trace_cache_tag_flush_all(tag);
368 	}
369 	spin_unlock_irqrestore(&domain->cache_lock, flags);
370 }
371 
372 /*
373  * Invalidate a range of IOVA when new mappings are created in the target
374  * domain.
375  *
376  * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
377  *   Set, any software updates to remapping structures other than first-
378  *   stage mapping requires explicit invalidation of the caches.
379  * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
380  *   write buffer flushing, software must explicitly perform write-buffer
381  *   flushing, if cache invalidation is not required.
382  */
383 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
384 			      unsigned long end)
385 {
386 	unsigned long pages, mask, addr;
387 	struct cache_tag *tag;
388 	unsigned long flags;
389 
390 	addr = calculate_psi_aligned_address(start, end, &pages, &mask);
391 
392 	spin_lock_irqsave(&domain->cache_lock, flags);
393 	list_for_each_entry(tag, &domain->cache_tags, node) {
394 		struct intel_iommu *iommu = tag->iommu;
395 
396 		if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
397 			iommu_flush_write_buffer(iommu);
398 			continue;
399 		}
400 
401 		if (tag->type == CACHE_TAG_IOTLB ||
402 		    tag->type == CACHE_TAG_NESTING_IOTLB) {
403 			/*
404 			 * Fallback to domain selective flush if no
405 			 * PSI support or the size is too big.
406 			 */
407 			if (!cap_pgsel_inv(iommu->cap) ||
408 			    mask > cap_max_amask_val(iommu->cap))
409 				iommu->flush.flush_iotlb(iommu, tag->domain_id,
410 							 0, 0, DMA_TLB_DSI_FLUSH);
411 			else
412 				iommu->flush.flush_iotlb(iommu, tag->domain_id,
413 							 addr, mask,
414 							 DMA_TLB_PSI_FLUSH);
415 		}
416 
417 		trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
418 	}
419 	spin_unlock_irqrestore(&domain->cache_lock, flags);
420 }
421