xref: /linux/drivers/iommu/amd/nested.c (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2025 Advanced Micro Devices, Inc.
4  */
5 
6 #define dev_fmt(fmt)	"AMD-Vi: " fmt
7 
8 #include <linux/iommu.h>
9 #include <linux/refcount.h>
10 #include <uapi/linux/iommufd.h>
11 
12 #include "amd_iommu.h"
13 
14 static const struct iommu_domain_ops nested_domain_ops;
15 
16 static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
17 {
18 	return container_of(dom, struct nested_domain, domain);
19 }
20 
21 /*
22  * Validate guest DTE to make sure that configuration for host (v1)
23  * and guest (v2) page tables are valid when allocating nested domain.
24  */
25 static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte)
26 {
27 	u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]);
28 
29 	/* Must be zero: Mode, Host-TPR */
30 	if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 ||
31 	    FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0)
32 		return -EINVAL;
33 
34 	/* GCR3 TRP must be non-zero if V, GV is set */
35 	if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 &&
36 	    FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 &&
37 	    FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 &&
38 	    FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 &&
39 	    FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0)
40 		return -EINVAL;
41 
42 	/* Valid Guest Paging Mode values are 0 and 1 */
43 	if (gpt_level != GUEST_PGTABLE_4_LEVEL &&
44 	    gpt_level != GUEST_PGTABLE_5_LEVEL)
45 		return -EINVAL;
46 
47 	/* GLX = 3 is reserved */
48 	if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3)
49 		return -EINVAL;
50 
51 	/*
52 	 * We need to check host capability before setting
53 	 * the Guest Paging Mode
54 	 */
55 	if (gpt_level == GUEST_PGTABLE_5_LEVEL &&
56 	    amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)
57 		return -EOPNOTSUPP;
58 
59 	return 0;
60 }
61 
62 static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index)
63 {
64 	struct guest_domain_mapping_info *elm, *res;
65 
66 	elm = xa_load(xa, index);
67 	if (elm)
68 		return elm;
69 
70 	xa_unlock(xa);
71 	elm = kzalloc(sizeof(struct guest_domain_mapping_info), GFP_KERNEL);
72 	xa_lock(xa);
73 	if (!elm)
74 		return ERR_PTR(-ENOMEM);
75 
76 	res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
77 	if (xa_is_err(res))
78 		res = ERR_PTR(xa_err(res));
79 
80 	if (res) {
81 		kfree(elm);
82 		return res;
83 	}
84 
85 	refcount_set(&elm->users, 0);
86 	return elm;
87 }
88 
89 /*
90  * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()
91  * during the call to struct iommu_ops.viommu_init().
92  */
93 struct iommu_domain *
94 amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
95 			      const struct iommu_user_data *user_data)
96 {
97 	int ret;
98 	struct nested_domain *ndom;
99 	struct guest_domain_mapping_info *gdom_info;
100 	struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
101 
102 	if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
103 		return ERR_PTR(-EOPNOTSUPP);
104 
105 	ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
106 	if (!ndom)
107 		return ERR_PTR(-ENOMEM);
108 
109 	ret = iommu_copy_struct_from_user(&ndom->gdte, user_data,
110 					  IOMMU_HWPT_DATA_AMD_GUEST,
111 					  dte);
112 	if (ret)
113 		goto out_err;
114 
115 	ret = validate_gdte_nested(&ndom->gdte);
116 	if (ret)
117 		goto out_err;
118 
119 	ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]);
120 	ndom->domain.ops = &nested_domain_ops;
121 	ndom->domain.type = IOMMU_DOMAIN_NESTED;
122 	ndom->viommu = aviommu;
123 
124 	/*
125 	 * Normally, when a guest has multiple pass-through devices,
126 	 * the IOMMU driver setup DTEs with the same stage-2 table and
127 	 * use the same host domain ID (hDomId). In case of nested translation,
128 	 * if the guest setup different stage-1 tables with same PASID,
129 	 * IOMMU would use the same TLB tag. This will results in TLB
130 	 * aliasing issue.
131 	 *
132 	 * The guest is assigning gDomIDs based on its own algorithm for managing
133 	 * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
134 	 * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
135 	 * to a single hDomID. This is done using an xarray in the vIOMMU to
136 	 * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
137 	 * command must be issued for each hDomID in the xarray.
138 	 */
139 	xa_lock(&aviommu->gdomid_array);
140 
141 	gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id);
142 	if (IS_ERR(gdom_info)) {
143 		xa_unlock(&aviommu->gdomid_array);
144 		ret = PTR_ERR(gdom_info);
145 		goto out_err;
146 	}
147 
148 	/* Check if gDomID exist */
149 	if (refcount_inc_not_zero(&gdom_info->users)) {
150 		ndom->gdom_info = gdom_info;
151 		xa_unlock(&aviommu->gdomid_array);
152 
153 		pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
154 			  __func__, ndom->gdom_id, gdom_info->hdom_id);
155 
156 		return &ndom->domain;
157 	}
158 
159 	/* The gDomID does not exist. We allocate new hdom_id */
160 	gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
161 	if (gdom_info->hdom_id <= 0) {
162 		__xa_cmpxchg(&aviommu->gdomid_array,
163 			     ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
164 		xa_unlock(&aviommu->gdomid_array);
165 		ret = -ENOSPC;
166 		goto out_err_gdom_info;
167 	}
168 
169 	ndom->gdom_info = gdom_info;
170 	refcount_set(&gdom_info->users, 1);
171 
172 	xa_unlock(&aviommu->gdomid_array);
173 
174 	pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
175 		 __func__, ndom->gdom_id, gdom_info->hdom_id);
176 
177 	return &ndom->domain;
178 
179 out_err_gdom_info:
180 	kfree(gdom_info);
181 out_err:
182 	kfree(ndom);
183 	return ERR_PTR(ret);
184 }
185 
186 static void set_dte_nested(struct amd_iommu *iommu, struct iommu_domain *dom,
187 			   struct iommu_dev_data *dev_data, struct dev_table_entry *new)
188 {
189 	struct protection_domain *parent;
190 	struct nested_domain *ndom = to_ndomain(dom);
191 	struct iommu_hwpt_amd_guest *gdte = &ndom->gdte;
192 	struct pt_iommu_amdv1_hw_info pt_info;
193 
194 	/*
195 	 * The nest parent domain is attached during the call to the
196 	 * struct iommu_ops.viommu_init(), which will be stored as part
197 	 * of the struct amd_iommu_viommu.parent.
198 	 */
199 	if (WARN_ON(!ndom->viommu || !ndom->viommu->parent))
200 		return;
201 
202 	parent = ndom->viommu->parent;
203 	amd_iommu_make_clear_dte(dev_data, new);
204 
205 	/* Retrieve the current pagetable info via the IOMMU PT API. */
206 	pt_iommu_amdv1_hw_info(&parent->amdv1, &pt_info);
207 
208 	/*
209 	 * Use domain ID from nested domain to program DTE.
210 	 * See amd_iommu_alloc_domain_nested().
211 	 */
212 	amd_iommu_set_dte_v1(dev_data, parent, ndom->gdom_info->hdom_id,
213 			     &pt_info, new);
214 
215 	/* GV is required for nested page table */
216 	new->data[0] |= DTE_FLAG_GV;
217 
218 	/* Guest PPR */
219 	new->data[0] |= gdte->dte[0] & DTE_FLAG_PPR;
220 
221 	/* Guest translation stuff */
222 	new->data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GIOV);
223 
224 	/* GCR3 table */
225 	new->data[0] |= gdte->dte[0] & DTE_GCR3_14_12;
226 	new->data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31);
227 
228 	/* Guest paging mode */
229 	new->data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK;
230 }
231 
232 static int nested_attach_device(struct iommu_domain *dom, struct device *dev,
233 				struct iommu_domain *old)
234 {
235 	struct dev_table_entry new = {0};
236 	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
237 	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
238 	int ret = 0;
239 
240 	/*
241 	 * Needs to make sure PASID is not enabled
242 	 * for this attach path.
243 	 */
244 	if (WARN_ON(dev_data->pasid_enabled))
245 		return -EINVAL;
246 
247 	mutex_lock(&dev_data->mutex);
248 
249 	set_dte_nested(iommu, dom, dev_data, &new);
250 
251 	amd_iommu_update_dte(iommu, dev_data, &new);
252 
253 	mutex_unlock(&dev_data->mutex);
254 
255 	return ret;
256 }
257 
258 static void nested_domain_free(struct iommu_domain *dom)
259 {
260 	struct guest_domain_mapping_info *curr;
261 	struct nested_domain *ndom = to_ndomain(dom);
262 	struct amd_iommu_viommu *aviommu = ndom->viommu;
263 
264 	xa_lock(&aviommu->gdomid_array);
265 
266 	if (!refcount_dec_and_test(&ndom->gdom_info->users)) {
267 		xa_unlock(&aviommu->gdomid_array);
268 		return;
269 	}
270 
271 	/*
272 	 * The refcount for the gdom_id to hdom_id mapping is zero.
273 	 * It is now safe to remove the mapping.
274 	 */
275 	curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
276 			    ndom->gdom_info, NULL, GFP_ATOMIC);
277 
278 	xa_unlock(&aviommu->gdomid_array);
279 	if (WARN_ON(!curr || xa_err(curr)))
280 		return;
281 
282 	/* success */
283 	pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
284 		__func__, ndom->gdom_id, curr->hdom_id);
285 
286 	amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
287 	kfree(curr);
288 	kfree(ndom);
289 }
290 
291 static const struct iommu_domain_ops nested_domain_ops = {
292 	.attach_dev = nested_attach_device,
293 	.free = nested_domain_free,
294 };
295