1774180a7SSuravee Suthikulpanit // SPDX-License-Identifier: GPL-2.0-only
2774180a7SSuravee Suthikulpanit /*
3774180a7SSuravee Suthikulpanit * Copyright (C) 2025 Advanced Micro Devices, Inc.
4774180a7SSuravee Suthikulpanit */
5774180a7SSuravee Suthikulpanit
6774180a7SSuravee Suthikulpanit #define dev_fmt(fmt) "AMD-Vi: " fmt
7774180a7SSuravee Suthikulpanit
8774180a7SSuravee Suthikulpanit #include <linux/iommu.h>
9757d2b1fSSuravee Suthikulpanit #include <linux/refcount.h>
10774180a7SSuravee Suthikulpanit #include <uapi/linux/iommufd.h>
11774180a7SSuravee Suthikulpanit
12774180a7SSuravee Suthikulpanit #include "amd_iommu.h"
13774180a7SSuravee Suthikulpanit
14774180a7SSuravee Suthikulpanit static const struct iommu_domain_ops nested_domain_ops;
15774180a7SSuravee Suthikulpanit
to_ndomain(struct iommu_domain * dom)16774180a7SSuravee Suthikulpanit static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
17774180a7SSuravee Suthikulpanit {
18774180a7SSuravee Suthikulpanit return container_of(dom, struct nested_domain, domain);
19774180a7SSuravee Suthikulpanit }
20774180a7SSuravee Suthikulpanit
21774180a7SSuravee Suthikulpanit /*
22774180a7SSuravee Suthikulpanit * Validate guest DTE to make sure that configuration for host (v1)
23774180a7SSuravee Suthikulpanit * and guest (v2) page tables are valid when allocating nested domain.
24774180a7SSuravee Suthikulpanit */
validate_gdte_nested(struct iommu_hwpt_amd_guest * gdte)25774180a7SSuravee Suthikulpanit static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte)
26774180a7SSuravee Suthikulpanit {
27774180a7SSuravee Suthikulpanit u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]);
28774180a7SSuravee Suthikulpanit
29774180a7SSuravee Suthikulpanit /* Must be zero: Mode, Host-TPR */
30774180a7SSuravee Suthikulpanit if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 ||
31774180a7SSuravee Suthikulpanit FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0)
32774180a7SSuravee Suthikulpanit return -EINVAL;
33774180a7SSuravee Suthikulpanit
34774180a7SSuravee Suthikulpanit /* GCR3 TRP must be non-zero if V, GV is set */
35774180a7SSuravee Suthikulpanit if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 &&
36774180a7SSuravee Suthikulpanit FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 &&
37774180a7SSuravee Suthikulpanit FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 &&
38774180a7SSuravee Suthikulpanit FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 &&
39774180a7SSuravee Suthikulpanit FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0)
40774180a7SSuravee Suthikulpanit return -EINVAL;
41774180a7SSuravee Suthikulpanit
42774180a7SSuravee Suthikulpanit /* Valid Guest Paging Mode values are 0 and 1 */
43774180a7SSuravee Suthikulpanit if (gpt_level != GUEST_PGTABLE_4_LEVEL &&
44774180a7SSuravee Suthikulpanit gpt_level != GUEST_PGTABLE_5_LEVEL)
45774180a7SSuravee Suthikulpanit return -EINVAL;
46774180a7SSuravee Suthikulpanit
47774180a7SSuravee Suthikulpanit /* GLX = 3 is reserved */
48774180a7SSuravee Suthikulpanit if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3)
49774180a7SSuravee Suthikulpanit return -EINVAL;
50774180a7SSuravee Suthikulpanit
51774180a7SSuravee Suthikulpanit /*
52774180a7SSuravee Suthikulpanit * We need to check host capability before setting
53774180a7SSuravee Suthikulpanit * the Guest Paging Mode
54774180a7SSuravee Suthikulpanit */
55774180a7SSuravee Suthikulpanit if (gpt_level == GUEST_PGTABLE_5_LEVEL &&
56774180a7SSuravee Suthikulpanit amd_iommu_gpt_level < PAGE_MODE_5_LEVEL)
57774180a7SSuravee Suthikulpanit return -EOPNOTSUPP;
58774180a7SSuravee Suthikulpanit
59774180a7SSuravee Suthikulpanit return 0;
60774180a7SSuravee Suthikulpanit }
61774180a7SSuravee Suthikulpanit
gdom_info_load_or_alloc_locked(struct xarray * xa,unsigned long index)62757d2b1fSSuravee Suthikulpanit static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index)
63757d2b1fSSuravee Suthikulpanit {
64757d2b1fSSuravee Suthikulpanit struct guest_domain_mapping_info *elm, *res;
65757d2b1fSSuravee Suthikulpanit
66757d2b1fSSuravee Suthikulpanit elm = xa_load(xa, index);
67757d2b1fSSuravee Suthikulpanit if (elm)
68757d2b1fSSuravee Suthikulpanit return elm;
69757d2b1fSSuravee Suthikulpanit
70757d2b1fSSuravee Suthikulpanit xa_unlock(xa);
71*bf4afc53SLinus Torvalds elm = kzalloc_obj(struct guest_domain_mapping_info);
72757d2b1fSSuravee Suthikulpanit xa_lock(xa);
73757d2b1fSSuravee Suthikulpanit if (!elm)
74757d2b1fSSuravee Suthikulpanit return ERR_PTR(-ENOMEM);
75757d2b1fSSuravee Suthikulpanit
76757d2b1fSSuravee Suthikulpanit res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
77757d2b1fSSuravee Suthikulpanit if (xa_is_err(res))
78757d2b1fSSuravee Suthikulpanit res = ERR_PTR(xa_err(res));
79757d2b1fSSuravee Suthikulpanit
80757d2b1fSSuravee Suthikulpanit if (res) {
81757d2b1fSSuravee Suthikulpanit kfree(elm);
82757d2b1fSSuravee Suthikulpanit return res;
83757d2b1fSSuravee Suthikulpanit }
84757d2b1fSSuravee Suthikulpanit
85757d2b1fSSuravee Suthikulpanit refcount_set(&elm->users, 0);
86757d2b1fSSuravee Suthikulpanit return elm;
87757d2b1fSSuravee Suthikulpanit }
88757d2b1fSSuravee Suthikulpanit
89774180a7SSuravee Suthikulpanit /*
90774180a7SSuravee Suthikulpanit * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()
91774180a7SSuravee Suthikulpanit * during the call to struct iommu_ops.viommu_init().
92774180a7SSuravee Suthikulpanit */
93774180a7SSuravee Suthikulpanit struct iommu_domain *
amd_iommu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)94774180a7SSuravee Suthikulpanit amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
95774180a7SSuravee Suthikulpanit const struct iommu_user_data *user_data)
96774180a7SSuravee Suthikulpanit {
97774180a7SSuravee Suthikulpanit int ret;
98774180a7SSuravee Suthikulpanit struct nested_domain *ndom;
99757d2b1fSSuravee Suthikulpanit struct guest_domain_mapping_info *gdom_info;
100774180a7SSuravee Suthikulpanit struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
101774180a7SSuravee Suthikulpanit
102774180a7SSuravee Suthikulpanit if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
103774180a7SSuravee Suthikulpanit return ERR_PTR(-EOPNOTSUPP);
104774180a7SSuravee Suthikulpanit
105*bf4afc53SLinus Torvalds ndom = kzalloc_obj(*ndom);
106774180a7SSuravee Suthikulpanit if (!ndom)
107774180a7SSuravee Suthikulpanit return ERR_PTR(-ENOMEM);
108774180a7SSuravee Suthikulpanit
109774180a7SSuravee Suthikulpanit ret = iommu_copy_struct_from_user(&ndom->gdte, user_data,
110774180a7SSuravee Suthikulpanit IOMMU_HWPT_DATA_AMD_GUEST,
111774180a7SSuravee Suthikulpanit dte);
112774180a7SSuravee Suthikulpanit if (ret)
113774180a7SSuravee Suthikulpanit goto out_err;
114774180a7SSuravee Suthikulpanit
115774180a7SSuravee Suthikulpanit ret = validate_gdte_nested(&ndom->gdte);
116774180a7SSuravee Suthikulpanit if (ret)
117774180a7SSuravee Suthikulpanit goto out_err;
118774180a7SSuravee Suthikulpanit
119774180a7SSuravee Suthikulpanit ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]);
120774180a7SSuravee Suthikulpanit ndom->domain.ops = &nested_domain_ops;
121774180a7SSuravee Suthikulpanit ndom->domain.type = IOMMU_DOMAIN_NESTED;
122774180a7SSuravee Suthikulpanit ndom->viommu = aviommu;
123774180a7SSuravee Suthikulpanit
124757d2b1fSSuravee Suthikulpanit /*
125757d2b1fSSuravee Suthikulpanit * Normally, when a guest has multiple pass-through devices,
126757d2b1fSSuravee Suthikulpanit * the IOMMU driver setup DTEs with the same stage-2 table and
127757d2b1fSSuravee Suthikulpanit * use the same host domain ID (hDomId). In case of nested translation,
128757d2b1fSSuravee Suthikulpanit * if the guest setup different stage-1 tables with same PASID,
129757d2b1fSSuravee Suthikulpanit * IOMMU would use the same TLB tag. This will results in TLB
130757d2b1fSSuravee Suthikulpanit * aliasing issue.
131757d2b1fSSuravee Suthikulpanit *
132757d2b1fSSuravee Suthikulpanit * The guest is assigning gDomIDs based on its own algorithm for managing
133757d2b1fSSuravee Suthikulpanit * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain
134757d2b1fSSuravee Suthikulpanit * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID
135757d2b1fSSuravee Suthikulpanit * to a single hDomID. This is done using an xarray in the vIOMMU to
136757d2b1fSSuravee Suthikulpanit * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES
137757d2b1fSSuravee Suthikulpanit * command must be issued for each hDomID in the xarray.
138757d2b1fSSuravee Suthikulpanit */
139757d2b1fSSuravee Suthikulpanit xa_lock(&aviommu->gdomid_array);
140757d2b1fSSuravee Suthikulpanit
141757d2b1fSSuravee Suthikulpanit gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id);
142757d2b1fSSuravee Suthikulpanit if (IS_ERR(gdom_info)) {
143757d2b1fSSuravee Suthikulpanit xa_unlock(&aviommu->gdomid_array);
144757d2b1fSSuravee Suthikulpanit ret = PTR_ERR(gdom_info);
145757d2b1fSSuravee Suthikulpanit goto out_err;
146757d2b1fSSuravee Suthikulpanit }
147757d2b1fSSuravee Suthikulpanit
148757d2b1fSSuravee Suthikulpanit /* Check if gDomID exist */
149757d2b1fSSuravee Suthikulpanit if (refcount_inc_not_zero(&gdom_info->users)) {
150757d2b1fSSuravee Suthikulpanit ndom->gdom_info = gdom_info;
151757d2b1fSSuravee Suthikulpanit xa_unlock(&aviommu->gdomid_array);
152757d2b1fSSuravee Suthikulpanit
153757d2b1fSSuravee Suthikulpanit pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n",
154757d2b1fSSuravee Suthikulpanit __func__, ndom->gdom_id, gdom_info->hdom_id);
155757d2b1fSSuravee Suthikulpanit
156774180a7SSuravee Suthikulpanit return &ndom->domain;
157757d2b1fSSuravee Suthikulpanit }
158757d2b1fSSuravee Suthikulpanit
159757d2b1fSSuravee Suthikulpanit /* The gDomID does not exist. We allocate new hdom_id */
160757d2b1fSSuravee Suthikulpanit gdom_info->hdom_id = amd_iommu_pdom_id_alloc();
161757d2b1fSSuravee Suthikulpanit if (gdom_info->hdom_id <= 0) {
162757d2b1fSSuravee Suthikulpanit __xa_cmpxchg(&aviommu->gdomid_array,
163757d2b1fSSuravee Suthikulpanit ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC);
164757d2b1fSSuravee Suthikulpanit xa_unlock(&aviommu->gdomid_array);
165757d2b1fSSuravee Suthikulpanit ret = -ENOSPC;
166757d2b1fSSuravee Suthikulpanit goto out_err_gdom_info;
167757d2b1fSSuravee Suthikulpanit }
168757d2b1fSSuravee Suthikulpanit
169757d2b1fSSuravee Suthikulpanit ndom->gdom_info = gdom_info;
170757d2b1fSSuravee Suthikulpanit refcount_set(&gdom_info->users, 1);
171757d2b1fSSuravee Suthikulpanit
172757d2b1fSSuravee Suthikulpanit xa_unlock(&aviommu->gdomid_array);
173757d2b1fSSuravee Suthikulpanit
174757d2b1fSSuravee Suthikulpanit pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n",
175757d2b1fSSuravee Suthikulpanit __func__, ndom->gdom_id, gdom_info->hdom_id);
176757d2b1fSSuravee Suthikulpanit
177757d2b1fSSuravee Suthikulpanit return &ndom->domain;
178757d2b1fSSuravee Suthikulpanit
179757d2b1fSSuravee Suthikulpanit out_err_gdom_info:
180757d2b1fSSuravee Suthikulpanit kfree(gdom_info);
181774180a7SSuravee Suthikulpanit out_err:
182774180a7SSuravee Suthikulpanit kfree(ndom);
183774180a7SSuravee Suthikulpanit return ERR_PTR(ret);
184774180a7SSuravee Suthikulpanit }
185774180a7SSuravee Suthikulpanit
set_dte_nested(struct amd_iommu * iommu,struct iommu_domain * dom,struct iommu_dev_data * dev_data,struct dev_table_entry * new)186103f4e7cSSuravee Suthikulpanit static void set_dte_nested(struct amd_iommu *iommu, struct iommu_domain *dom,
187103f4e7cSSuravee Suthikulpanit struct iommu_dev_data *dev_data, struct dev_table_entry *new)
188103f4e7cSSuravee Suthikulpanit {
189103f4e7cSSuravee Suthikulpanit struct protection_domain *parent;
190103f4e7cSSuravee Suthikulpanit struct nested_domain *ndom = to_ndomain(dom);
191103f4e7cSSuravee Suthikulpanit struct iommu_hwpt_amd_guest *gdte = &ndom->gdte;
192103f4e7cSSuravee Suthikulpanit struct pt_iommu_amdv1_hw_info pt_info;
193103f4e7cSSuravee Suthikulpanit
194103f4e7cSSuravee Suthikulpanit /*
195103f4e7cSSuravee Suthikulpanit * The nest parent domain is attached during the call to the
196103f4e7cSSuravee Suthikulpanit * struct iommu_ops.viommu_init(), which will be stored as part
197103f4e7cSSuravee Suthikulpanit * of the struct amd_iommu_viommu.parent.
198103f4e7cSSuravee Suthikulpanit */
199103f4e7cSSuravee Suthikulpanit if (WARN_ON(!ndom->viommu || !ndom->viommu->parent))
200103f4e7cSSuravee Suthikulpanit return;
201103f4e7cSSuravee Suthikulpanit
202103f4e7cSSuravee Suthikulpanit parent = ndom->viommu->parent;
203103f4e7cSSuravee Suthikulpanit amd_iommu_make_clear_dte(dev_data, new);
204103f4e7cSSuravee Suthikulpanit
205103f4e7cSSuravee Suthikulpanit /* Retrieve the current pagetable info via the IOMMU PT API. */
206103f4e7cSSuravee Suthikulpanit pt_iommu_amdv1_hw_info(&parent->amdv1, &pt_info);
207103f4e7cSSuravee Suthikulpanit
208103f4e7cSSuravee Suthikulpanit /*
209103f4e7cSSuravee Suthikulpanit * Use domain ID from nested domain to program DTE.
210103f4e7cSSuravee Suthikulpanit * See amd_iommu_alloc_domain_nested().
211103f4e7cSSuravee Suthikulpanit */
212103f4e7cSSuravee Suthikulpanit amd_iommu_set_dte_v1(dev_data, parent, ndom->gdom_info->hdom_id,
213103f4e7cSSuravee Suthikulpanit &pt_info, new);
214103f4e7cSSuravee Suthikulpanit
215103f4e7cSSuravee Suthikulpanit /* GV is required for nested page table */
216103f4e7cSSuravee Suthikulpanit new->data[0] |= DTE_FLAG_GV;
217103f4e7cSSuravee Suthikulpanit
218103f4e7cSSuravee Suthikulpanit /* Guest PPR */
219103f4e7cSSuravee Suthikulpanit new->data[0] |= gdte->dte[0] & DTE_FLAG_PPR;
220103f4e7cSSuravee Suthikulpanit
221103f4e7cSSuravee Suthikulpanit /* Guest translation stuff */
222103f4e7cSSuravee Suthikulpanit new->data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GIOV);
223103f4e7cSSuravee Suthikulpanit
224103f4e7cSSuravee Suthikulpanit /* GCR3 table */
225103f4e7cSSuravee Suthikulpanit new->data[0] |= gdte->dte[0] & DTE_GCR3_14_12;
226103f4e7cSSuravee Suthikulpanit new->data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31);
227103f4e7cSSuravee Suthikulpanit
228103f4e7cSSuravee Suthikulpanit /* Guest paging mode */
229103f4e7cSSuravee Suthikulpanit new->data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK;
230103f4e7cSSuravee Suthikulpanit }
231103f4e7cSSuravee Suthikulpanit
nested_attach_device(struct iommu_domain * dom,struct device * dev,struct iommu_domain * old)232103f4e7cSSuravee Suthikulpanit static int nested_attach_device(struct iommu_domain *dom, struct device *dev,
233103f4e7cSSuravee Suthikulpanit struct iommu_domain *old)
234103f4e7cSSuravee Suthikulpanit {
235103f4e7cSSuravee Suthikulpanit struct dev_table_entry new = {0};
236103f4e7cSSuravee Suthikulpanit struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
237103f4e7cSSuravee Suthikulpanit struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
238103f4e7cSSuravee Suthikulpanit int ret = 0;
239103f4e7cSSuravee Suthikulpanit
240103f4e7cSSuravee Suthikulpanit /*
241103f4e7cSSuravee Suthikulpanit * Needs to make sure PASID is not enabled
242103f4e7cSSuravee Suthikulpanit * for this attach path.
243103f4e7cSSuravee Suthikulpanit */
244103f4e7cSSuravee Suthikulpanit if (WARN_ON(dev_data->pasid_enabled))
245103f4e7cSSuravee Suthikulpanit return -EINVAL;
246103f4e7cSSuravee Suthikulpanit
247103f4e7cSSuravee Suthikulpanit mutex_lock(&dev_data->mutex);
248103f4e7cSSuravee Suthikulpanit
249103f4e7cSSuravee Suthikulpanit set_dte_nested(iommu, dom, dev_data, &new);
250103f4e7cSSuravee Suthikulpanit
251103f4e7cSSuravee Suthikulpanit amd_iommu_update_dte(iommu, dev_data, &new);
252103f4e7cSSuravee Suthikulpanit
253103f4e7cSSuravee Suthikulpanit mutex_unlock(&dev_data->mutex);
254103f4e7cSSuravee Suthikulpanit
255103f4e7cSSuravee Suthikulpanit return ret;
256103f4e7cSSuravee Suthikulpanit }
257103f4e7cSSuravee Suthikulpanit
nested_domain_free(struct iommu_domain * dom)258774180a7SSuravee Suthikulpanit static void nested_domain_free(struct iommu_domain *dom)
259774180a7SSuravee Suthikulpanit {
260757d2b1fSSuravee Suthikulpanit struct guest_domain_mapping_info *curr;
261774180a7SSuravee Suthikulpanit struct nested_domain *ndom = to_ndomain(dom);
262757d2b1fSSuravee Suthikulpanit struct amd_iommu_viommu *aviommu = ndom->viommu;
263774180a7SSuravee Suthikulpanit
264757d2b1fSSuravee Suthikulpanit xa_lock(&aviommu->gdomid_array);
265757d2b1fSSuravee Suthikulpanit
266757d2b1fSSuravee Suthikulpanit if (!refcount_dec_and_test(&ndom->gdom_info->users)) {
267757d2b1fSSuravee Suthikulpanit xa_unlock(&aviommu->gdomid_array);
268757d2b1fSSuravee Suthikulpanit return;
269757d2b1fSSuravee Suthikulpanit }
270757d2b1fSSuravee Suthikulpanit
271757d2b1fSSuravee Suthikulpanit /*
272757d2b1fSSuravee Suthikulpanit * The refcount for the gdom_id to hdom_id mapping is zero.
273757d2b1fSSuravee Suthikulpanit * It is now safe to remove the mapping.
274757d2b1fSSuravee Suthikulpanit */
275757d2b1fSSuravee Suthikulpanit curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id,
276757d2b1fSSuravee Suthikulpanit ndom->gdom_info, NULL, GFP_ATOMIC);
277757d2b1fSSuravee Suthikulpanit
278757d2b1fSSuravee Suthikulpanit xa_unlock(&aviommu->gdomid_array);
279757d2b1fSSuravee Suthikulpanit if (WARN_ON(!curr || xa_err(curr)))
280757d2b1fSSuravee Suthikulpanit return;
281757d2b1fSSuravee Suthikulpanit
282757d2b1fSSuravee Suthikulpanit /* success */
283757d2b1fSSuravee Suthikulpanit pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n",
284757d2b1fSSuravee Suthikulpanit __func__, ndom->gdom_id, curr->hdom_id);
285757d2b1fSSuravee Suthikulpanit
286757d2b1fSSuravee Suthikulpanit amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id);
287757d2b1fSSuravee Suthikulpanit kfree(curr);
288774180a7SSuravee Suthikulpanit kfree(ndom);
289774180a7SSuravee Suthikulpanit }
290774180a7SSuravee Suthikulpanit
291774180a7SSuravee Suthikulpanit static const struct iommu_domain_ops nested_domain_ops = {
292103f4e7cSSuravee Suthikulpanit .attach_dev = nested_attach_device,
293774180a7SSuravee Suthikulpanit .free = nested_domain_free,
294774180a7SSuravee Suthikulpanit };
295