1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2025 Advanced Micro Devices, Inc.
4 */
5
6 #define dev_fmt(fmt) "AMD-Vi: " fmt
7
8 #include <linux/iommu.h>
9 #include <linux/refcount.h>
10 #include <uapi/linux/iommufd.h>
11
12 #include "amd_iommu.h"
13
14 static const struct iommu_domain_ops nested_domain_ops;
15
to_ndomain(struct iommu_domain * dom)16 static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
17 {
18 return container_of(dom, struct nested_domain, domain);
19 }
20
21 /*
22 * Validate guest DTE to make sure that configuration for host (v1)
23 * and guest (v2) page tables are valid when allocating nested domain.
24 */
validate_gdte_nested(struct iommu_hwpt_amd_guest * gdte)25 static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte)
26 {
27 u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]);
28
29 /* Must be zero: Mode, Host-TPR */
30 if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 ||
31 FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0)
32 return -EINVAL;
33
34 /* GCR3 TRP must be non-zero if V, GV is set */
35 if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 &&
36 FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 &&
37 FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 &&
38 FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 &&
39 FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0)
40 return -EINVAL;
41
42 /* Valid Guest Paging Mode values are 0 and 1 */
43 if (gpt_level != GUEST_PGTABLE_4_LEVEL &&
44 gpt_level != GUEST_PGTABLE_5_LEVEL)
45 return -EINVAL;
46
47 /* GLX = 3 is reserved */
48 if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3)
49 return -EINVAL;
50
51 /*
52 * We need to check host capability before setting
53 * the Guest Paging Mode
54 */
55 if (gpt_level == GUEST_PGTABLE_5_LEVEL &&
56 amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)
57 return -EOPNOTSUPP;
58
59 return 0;
60 }
61
gdom_info_load_or_alloc_locked(struct xarray * xa,unsigned long index)62 static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index)
63 {
64 struct guest_domain_mapping_info *elm, *res;
65
66 elm = xa_load(xa, index);
67 if (elm)
68 return elm;
69
70 xa_unlock(xa);
71 elm = kzalloc_obj(struct guest_domain_mapping_info, GFP_KERNEL);
72 xa_lock(xa);
73 if (!elm)
74 return ERR_PTR(-ENOMEM);
75
76 res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
77 if (xa_is_err(res))
78 res = ERR_PTR(xa_err(res));
79
80 if (res) {
81 kfree(elm);
82 return res;
83 }
84
85 refcount_set(&elm->users, 0);
86 return elm;
87 }
88
89 /*
90 * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()
91 * during the call to struct iommu_ops.viommu_init().
92 */
93 struct iommu_domain *
amd_iommu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)94 amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
95 const struct iommu_user_data *user_data)
96 {
97 int ret;
98 struct nested_domain *ndom;
99 struct guest_domain_mapping_info *gdom_info;
100 struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
101
102 if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
103 return ERR_PTR(-EOPNOTSUPP);
104
105 ndom = kzalloc_obj(*ndom, GFP_KERNEL);
106 if (!ndom)
107 return ERR_PTR(-ENOMEM);
108
109 ret = iommu_copy_struct_from_user(&ndom->gdte, user_data,
110 IOMMU_HWPT_DATA_AMD_GUEST,
111 dte);
112 if (ret)
113 goto out_err;
114
115 ret = validate_gdte_nested(&ndom->gdte);
116 if (ret)
117 goto out_err;
118
119 ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]);
120 ndom->domain.ops = &nested_domain_ops;
121 ndom->domain.type = IOMMU_DOMAIN_NESTED;
122 ndom->viommu = aviommu;
123
124 /*
125 * Normally, when a guest has multiple pass-through devices,
126 * the IOMMU driver setup DTEs with the same stage-2 table and
127 * use the same host domain ID (hDomId). In case of nested translation,
128 * if the guest setup different stage-1 tables with same PASID,
129 * IOMMU would use the same TLB tag. This will results in TLB
130 * aliasing issue.
131 *
132 * The guest is assigning gDomIDs based on its own algorithm for managing
133 * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
134 * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
135 * to a single hDomID. This is done using an xarray in the vIOMMU to
136 * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
137 * command must be issued for each hDomID in the xarray.
138 */
139 xa_lock(&aviommu->gdomid_array);
140
141 gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id);
142 if (IS_ERR(gdom_info)) {
143 xa_unlock(&aviommu->gdomid_array);
144 ret = PTR_ERR(gdom_info);
145 goto out_err;
146 }
147
148 /* Check if gDomID exist */
149 if (refcount_inc_not_zero(&gdom_info->users)) {
150 ndom->gdom_info = gdom_info;
151 xa_unlock(&aviommu->gdomid_array);
152
153 pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
154 __func__, ndom->gdom_id, gdom_info->hdom_id);
155
156 return &ndom->domain;
157 }
158
159 /* The gDomID does not exist. We allocate new hdom_id */
160 gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
161 if (gdom_info->hdom_id <= 0) {
162 __xa_cmpxchg(&aviommu->gdomid_array,
163 ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
164 xa_unlock(&aviommu->gdomid_array);
165 ret = -ENOSPC;
166 goto out_err_gdom_info;
167 }
168
169 ndom->gdom_info = gdom_info;
170 refcount_set(&gdom_info->users, 1);
171
172 xa_unlock(&aviommu->gdomid_array);
173
174 pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
175 __func__, ndom->gdom_id, gdom_info->hdom_id);
176
177 return &ndom->domain;
178
179 out_err_gdom_info:
180 kfree(gdom_info);
181 out_err:
182 kfree(ndom);
183 return ERR_PTR(ret);
184 }
185
set_dte_nested(struct amd_iommu * iommu,struct iommu_domain * dom,struct iommu_dev_data * dev_data,struct dev_table_entry * new)186 static void set_dte_nested(struct amd_iommu *iommu, struct iommu_domain *dom,
187 struct iommu_dev_data *dev_data, struct dev_table_entry *new)
188 {
189 struct protection_domain *parent;
190 struct nested_domain *ndom = to_ndomain(dom);
191 struct iommu_hwpt_amd_guest *gdte = &ndom->gdte;
192 struct pt_iommu_amdv1_hw_info pt_info;
193
194 /*
195 * The nest parent domain is attached during the call to the
196 * struct iommu_ops.viommu_init(), which will be stored as part
197 * of the struct amd_iommu_viommu.parent.
198 */
199 if (WARN_ON(!ndom->viommu || !ndom->viommu->parent))
200 return;
201
202 parent = ndom->viommu->parent;
203 amd_iommu_make_clear_dte(dev_data, new);
204
205 /* Retrieve the current pagetable info via the IOMMU PT API. */
206 pt_iommu_amdv1_hw_info(&parent->amdv1, &pt_info);
207
208 /*
209 * Use domain ID from nested domain to program DTE.
210 * See amd_iommu_alloc_domain_nested().
211 */
212 amd_iommu_set_dte_v1(dev_data, parent, ndom->gdom_info->hdom_id,
213 &pt_info, new);
214
215 /* GV is required for nested page table */
216 new->data[0] |= DTE_FLAG_GV;
217
218 /* Guest PPR */
219 new->data[0] |= gdte->dte[0] & DTE_FLAG_PPR;
220
221 /* Guest translation stuff */
222 new->data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GIOV);
223
224 /* GCR3 table */
225 new->data[0] |= gdte->dte[0] & DTE_GCR3_14_12;
226 new->data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31);
227
228 /* Guest paging mode */
229 new->data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK;
230 }
231
nested_attach_device(struct iommu_domain * dom,struct device * dev,struct iommu_domain * old)232 static int nested_attach_device(struct iommu_domain *dom, struct device *dev,
233 struct iommu_domain *old)
234 {
235 struct dev_table_entry new = {0};
236 struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
237 struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
238 int ret = 0;
239
240 /*
241 * Needs to make sure PASID is not enabled
242 * for this attach path.
243 */
244 if (WARN_ON(dev_data->pasid_enabled))
245 return -EINVAL;
246
247 mutex_lock(&dev_data->mutex);
248
249 set_dte_nested(iommu, dom, dev_data, &new);
250
251 amd_iommu_update_dte(iommu, dev_data, &new);
252
253 mutex_unlock(&dev_data->mutex);
254
255 return ret;
256 }
257
nested_domain_free(struct iommu_domain * dom)258 static void nested_domain_free(struct iommu_domain *dom)
259 {
260 struct guest_domain_mapping_info *curr;
261 struct nested_domain *ndom = to_ndomain(dom);
262 struct amd_iommu_viommu *aviommu = ndom->viommu;
263
264 xa_lock(&aviommu->gdomid_array);
265
266 if (!refcount_dec_and_test(&ndom->gdom_info->users)) {
267 xa_unlock(&aviommu->gdomid_array);
268 return;
269 }
270
271 /*
272 * The refcount for the gdom_id to hdom_id mapping is zero.
273 * It is now safe to remove the mapping.
274 */
275 curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
276 ndom->gdom_info, NULL, GFP_ATOMIC);
277
278 xa_unlock(&aviommu->gdomid_array);
279 if (WARN_ON(!curr || xa_err(curr)))
280 return;
281
282 /* success */
283 pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
284 __func__, ndom->gdom_id, curr->hdom_id);
285
286 amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
287 kfree(curr);
288 kfree(ndom);
289 }
290
291 static const struct iommu_domain_ops nested_domain_ops = {
292 .attach_dev = nested_attach_device,
293 .free = nested_domain_free,
294 };
295