xref: /linux/drivers/iommu/amd/nested.c (revision 757d2b1fdf5b7d6eead5963a49b5780617987ab8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2025 Advanced Micro Devices, Inc.
4  */
5 
6 #define dev_fmt(fmt)	"AMD-Vi: " fmt
7 
8 #include <linux/iommu.h>
9 #include <linux/refcount.h>
10 #include <uapi/linux/iommufd.h>
11 
12 #include "amd_iommu.h"
13 
14 static const struct iommu_domain_ops nested_domain_ops;
15 
16 static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
17 {
18 	return container_of(dom, struct nested_domain, domain);
19 }
20 
21 /*
22  * Validate guest DTE to make sure that configuration for host (v1)
23  * and guest (v2) page tables are valid when allocating nested domain.
24  */
25 static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte)
26 {
27 	u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]);
28 
29 	/* Must be zero: Mode, Host-TPR */
30 	if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 ||
31 	    FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0)
32 		return -EINVAL;
33 
34 	/* GCR3 TRP must be non-zero if V, GV is set */
35 	if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 &&
36 	    FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 &&
37 	    FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 &&
38 	    FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 &&
39 	    FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0)
40 		return -EINVAL;
41 
42 	/* Valid Guest Paging Mode values are 0 and 1 */
43 	if (gpt_level != GUEST_PGTABLE_4_LEVEL &&
44 	    gpt_level != GUEST_PGTABLE_5_LEVEL)
45 		return -EINVAL;
46 
47 	/* GLX = 3 is reserved */
48 	if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3)
49 		return -EINVAL;
50 
51 	/*
52 	 * We need to check host capability before setting
53 	 * the Guest Paging Mode
54 	 */
55 	if (gpt_level == GUEST_PGTABLE_5_LEVEL &&
56 	    amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)
57 		return -EOPNOTSUPP;
58 
59 	return 0;
60 }
61 
62 static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index)
63 {
64 	struct guest_domain_mapping_info *elm, *res;
65 
66 	elm = xa_load(xa, index);
67 	if (elm)
68 		return elm;
69 
70 	xa_unlock(xa);
71 	elm = kzalloc(sizeof(struct guest_domain_mapping_info), GFP_KERNEL);
72 	xa_lock(xa);
73 	if (!elm)
74 		return ERR_PTR(-ENOMEM);
75 
76 	res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
77 	if (xa_is_err(res))
78 		res = ERR_PTR(xa_err(res));
79 
80 	if (res) {
81 		kfree(elm);
82 		return res;
83 	}
84 
85 	refcount_set(&elm->users, 0);
86 	return elm;
87 }
88 
89 /*
90  * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()
91  * during the call to struct iommu_ops.viommu_init().
92  */
93 struct iommu_domain *
94 amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
95 			      const struct iommu_user_data *user_data)
96 {
97 	int ret;
98 	struct nested_domain *ndom;
99 	struct guest_domain_mapping_info *gdom_info;
100 	struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
101 
102 	if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
103 		return ERR_PTR(-EOPNOTSUPP);
104 
105 	ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
106 	if (!ndom)
107 		return ERR_PTR(-ENOMEM);
108 
109 	ret = iommu_copy_struct_from_user(&ndom->gdte, user_data,
110 					  IOMMU_HWPT_DATA_AMD_GUEST,
111 					  dte);
112 	if (ret)
113 		goto out_err;
114 
115 	ret = validate_gdte_nested(&ndom->gdte);
116 	if (ret)
117 		goto out_err;
118 
119 	ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]);
120 	ndom->domain.ops = &nested_domain_ops;
121 	ndom->domain.type = IOMMU_DOMAIN_NESTED;
122 	ndom->viommu = aviommu;
123 
124 	/*
125 	 * Normally, when a guest has multiple pass-through devices,
126 	 * the IOMMU driver setup DTEs with the same stage-2 table and
127 	 * use the same host domain ID (hDomId). In case of nested translation,
128 	 * if the guest setup different stage-1 tables with same PASID,
129 	 * IOMMU would use the same TLB tag. This will results in TLB
130 	 * aliasing issue.
131 	 *
132 	 * The guest is assigning gDomIDs based on its own algorithm for managing
133 	 * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
134 	 * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
135 	 * to a single hDomID. This is done using an xarray in the vIOMMU to
136 	 * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
137 	 * command must be issued for each hDomID in the xarray.
138 	 */
139 	xa_lock(&aviommu->gdomid_array);
140 
141 	gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id);
142 	if (IS_ERR(gdom_info)) {
143 		xa_unlock(&aviommu->gdomid_array);
144 		ret = PTR_ERR(gdom_info);
145 		goto out_err;
146 	}
147 
148 	/* Check if gDomID exist */
149 	if (refcount_inc_not_zero(&gdom_info->users)) {
150 		ndom->gdom_info = gdom_info;
151 		xa_unlock(&aviommu->gdomid_array);
152 
153 		pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
154 			  __func__, ndom->gdom_id, gdom_info->hdom_id);
155 
156 		return &ndom->domain;
157 	}
158 
159 	/* The gDomID does not exist. We allocate new hdom_id */
160 	gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
161 	if (gdom_info->hdom_id <= 0) {
162 		__xa_cmpxchg(&aviommu->gdomid_array,
163 			     ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
164 		xa_unlock(&aviommu->gdomid_array);
165 		ret = -ENOSPC;
166 		goto out_err_gdom_info;
167 	}
168 
169 	ndom->gdom_info = gdom_info;
170 	refcount_set(&gdom_info->users, 1);
171 
172 	xa_unlock(&aviommu->gdomid_array);
173 
174 	pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
175 		 __func__, ndom->gdom_id, gdom_info->hdom_id);
176 
177 	return &ndom->domain;
178 
179 out_err_gdom_info:
180 	kfree(gdom_info);
181 out_err:
182 	kfree(ndom);
183 	return ERR_PTR(ret);
184 }
185 
186 static void nested_domain_free(struct iommu_domain *dom)
187 {
188 	struct guest_domain_mapping_info *curr;
189 	struct nested_domain *ndom = to_ndomain(dom);
190 	struct amd_iommu_viommu *aviommu = ndom->viommu;
191 
192 	xa_lock(&aviommu->gdomid_array);
193 
194 	if (!refcount_dec_and_test(&ndom->gdom_info->users)) {
195 		xa_unlock(&aviommu->gdomid_array);
196 		return;
197 	}
198 
199 	/*
200 	 * The refcount for the gdom_id to hdom_id mapping is zero.
201 	 * It is now safe to remove the mapping.
202 	 */
203 	curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
204 			    ndom->gdom_info, NULL, GFP_ATOMIC);
205 
206 	xa_unlock(&aviommu->gdomid_array);
207 	if (WARN_ON(!curr || xa_err(curr)))
208 		return;
209 
210 	/* success */
211 	pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
212 		__func__, ndom->gdom_id, curr->hdom_id);
213 
214 	amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
215 	kfree(curr);
216 	kfree(ndom);
217 }
218 
219 static const struct iommu_domain_ops nested_domain_ops = {
220 	.free = nested_domain_free,
221 };
222