1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2025 Advanced Micro Devices, Inc. 4 */ 5 6 #define dev_fmt(fmt) "AMD-Vi: " fmt 7 8 #include <linux/iommu.h> 9 #include <linux/refcount.h> 10 #include <uapi/linux/iommufd.h> 11 12 #include "amd_iommu.h" 13 14 static const struct iommu_domain_ops nested_domain_ops; 15 16 static inline struct nested_domain *to_ndomain(struct iommu_domain *dom) 17 { 18 return container_of(dom, struct nested_domain, domain); 19 } 20 21 /* 22 * Validate guest DTE to make sure that configuration for host (v1) 23 * and guest (v2) page tables are valid when allocating nested domain. 24 */ 25 static int validate_gdte_nested(struct iommu_hwpt_amd_guest *gdte) 26 { 27 u32 gpt_level = FIELD_GET(DTE_GPT_LEVEL_MASK, gdte->dte[2]); 28 29 /* Must be zero: Mode, Host-TPR */ 30 if (FIELD_GET(DTE_MODE_MASK, gdte->dte[0]) != 0 || 31 FIELD_GET(DTE_HOST_TRP, gdte->dte[0]) != 0) 32 return -EINVAL; 33 34 /* GCR3 TRP must be non-zero if V, GV is set */ 35 if (FIELD_GET(DTE_FLAG_V, gdte->dte[0]) == 1 && 36 FIELD_GET(DTE_FLAG_GV, gdte->dte[0]) == 1 && 37 FIELD_GET(DTE_GCR3_14_12, gdte->dte[0]) == 0 && 38 FIELD_GET(DTE_GCR3_30_15, gdte->dte[1]) == 0 && 39 FIELD_GET(DTE_GCR3_51_31, gdte->dte[1]) == 0) 40 return -EINVAL; 41 42 /* Valid Guest Paging Mode values are 0 and 1 */ 43 if (gpt_level != GUEST_PGTABLE_4_LEVEL && 44 gpt_level != GUEST_PGTABLE_5_LEVEL) 45 return -EINVAL; 46 47 /* GLX = 3 is reserved */ 48 if (FIELD_GET(DTE_GLX, gdte->dte[0]) == 3) 49 return -EINVAL; 50 51 /* 52 * We need to check host capability before setting 53 * the Guest Paging Mode 54 */ 55 if (gpt_level == GUEST_PGTABLE_5_LEVEL && 56 amd_iommu_gpt_level < PAGE_MODE_5_LEVEL) 57 return -EOPNOTSUPP; 58 59 return 0; 60 } 61 62 static void *gdom_info_load_or_alloc_locked(struct xarray *xa, unsigned long index) 63 { 64 struct guest_domain_mapping_info *elm, *res; 65 66 elm = xa_load(xa, index); 67 if (elm) 68 return elm; 69 70 xa_unlock(xa); 71 elm = kzalloc(sizeof(struct guest_domain_mapping_info), GFP_KERNEL); 72 xa_lock(xa); 73 if (!elm) 74 return ERR_PTR(-ENOMEM); 75 76 res = __xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); 77 if (xa_is_err(res)) 78 res = ERR_PTR(xa_err(res)); 79 80 if (res) { 81 kfree(elm); 82 return res; 83 } 84 85 refcount_set(&elm->users, 0); 86 return elm; 87 } 88 89 /* 90 * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested() 91 * during the call to struct iommu_ops.viommu_init(). 92 */ 93 struct iommu_domain * 94 amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, 95 const struct iommu_user_data *user_data) 96 { 97 int ret; 98 struct nested_domain *ndom; 99 struct guest_domain_mapping_info *gdom_info; 100 struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core); 101 102 if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST) 103 return ERR_PTR(-EOPNOTSUPP); 104 105 ndom = kzalloc(sizeof(*ndom), GFP_KERNEL); 106 if (!ndom) 107 return ERR_PTR(-ENOMEM); 108 109 ret = iommu_copy_struct_from_user(&ndom->gdte, user_data, 110 IOMMU_HWPT_DATA_AMD_GUEST, 111 dte); 112 if (ret) 113 goto out_err; 114 115 ret = validate_gdte_nested(&ndom->gdte); 116 if (ret) 117 goto out_err; 118 119 ndom->gdom_id = FIELD_GET(DTE_DOMID_MASK, ndom->gdte.dte[1]); 120 ndom->domain.ops = &nested_domain_ops; 121 ndom->domain.type = IOMMU_DOMAIN_NESTED; 122 ndom->viommu = aviommu; 123 124 /* 125 * Normally, when a guest has multiple pass-through devices, 126 * the IOMMU driver setup DTEs with the same stage-2 table and 127 * use the same host domain ID (hDomId). In case of nested translation, 128 * if the guest setup different stage-1 tables with same PASID, 129 * IOMMU would use the same TLB tag. This will results in TLB 130 * aliasing issue. 131 * 132 * The guest is assigning gDomIDs based on its own algorithm for managing 133 * cache tags of (DomID, PASID). Within a single viommu, the nest parent domain 134 * (w/ S2 table) is used by all DTEs. But we need to consistently map the gDomID 135 * to a single hDomID. This is done using an xarray in the vIOMMU to 136 * keep track of the gDomID mapping. When the S2 is changed, the INVALIDATE_IOMMU_PAGES 137 * command must be issued for each hDomID in the xarray. 138 */ 139 xa_lock(&aviommu->gdomid_array); 140 141 gdom_info = gdom_info_load_or_alloc_locked(&aviommu->gdomid_array, ndom->gdom_id); 142 if (IS_ERR(gdom_info)) { 143 xa_unlock(&aviommu->gdomid_array); 144 ret = PTR_ERR(gdom_info); 145 goto out_err; 146 } 147 148 /* Check if gDomID exist */ 149 if (refcount_inc_not_zero(&gdom_info->users)) { 150 ndom->gdom_info = gdom_info; 151 xa_unlock(&aviommu->gdomid_array); 152 153 pr_debug("%s: Found gdom_id=%#x, hdom_id=%#x\n", 154 __func__, ndom->gdom_id, gdom_info->hdom_id); 155 156 return &ndom->domain; 157 } 158 159 /* The gDomID does not exist. We allocate new hdom_id */ 160 gdom_info->hdom_id = amd_iommu_pdom_id_alloc(); 161 if (gdom_info->hdom_id <= 0) { 162 __xa_cmpxchg(&aviommu->gdomid_array, 163 ndom->gdom_id, gdom_info, NULL, GFP_ATOMIC); 164 xa_unlock(&aviommu->gdomid_array); 165 ret = -ENOSPC; 166 goto out_err_gdom_info; 167 } 168 169 ndom->gdom_info = gdom_info; 170 refcount_set(&gdom_info->users, 1); 171 172 xa_unlock(&aviommu->gdomid_array); 173 174 pr_debug("%s: Allocate gdom_id=%#x, hdom_id=%#x\n", 175 __func__, ndom->gdom_id, gdom_info->hdom_id); 176 177 return &ndom->domain; 178 179 out_err_gdom_info: 180 kfree(gdom_info); 181 out_err: 182 kfree(ndom); 183 return ERR_PTR(ret); 184 } 185 186 static void set_dte_nested(struct amd_iommu *iommu, struct iommu_domain *dom, 187 struct iommu_dev_data *dev_data, struct dev_table_entry *new) 188 { 189 struct protection_domain *parent; 190 struct nested_domain *ndom = to_ndomain(dom); 191 struct iommu_hwpt_amd_guest *gdte = &ndom->gdte; 192 struct pt_iommu_amdv1_hw_info pt_info; 193 194 /* 195 * The nest parent domain is attached during the call to the 196 * struct iommu_ops.viommu_init(), which will be stored as part 197 * of the struct amd_iommu_viommu.parent. 198 */ 199 if (WARN_ON(!ndom->viommu || !ndom->viommu->parent)) 200 return; 201 202 parent = ndom->viommu->parent; 203 amd_iommu_make_clear_dte(dev_data, new); 204 205 /* Retrieve the current pagetable info via the IOMMU PT API. */ 206 pt_iommu_amdv1_hw_info(&parent->amdv1, &pt_info); 207 208 /* 209 * Use domain ID from nested domain to program DTE. 210 * See amd_iommu_alloc_domain_nested(). 211 */ 212 amd_iommu_set_dte_v1(dev_data, parent, ndom->gdom_info->hdom_id, 213 &pt_info, new); 214 215 /* GV is required for nested page table */ 216 new->data[0] |= DTE_FLAG_GV; 217 218 /* Guest PPR */ 219 new->data[0] |= gdte->dte[0] & DTE_FLAG_PPR; 220 221 /* Guest translation stuff */ 222 new->data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GIOV); 223 224 /* GCR3 table */ 225 new->data[0] |= gdte->dte[0] & DTE_GCR3_14_12; 226 new->data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31); 227 228 /* Guest paging mode */ 229 new->data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK; 230 } 231 232 static int nested_attach_device(struct iommu_domain *dom, struct device *dev, 233 struct iommu_domain *old) 234 { 235 struct dev_table_entry new = {0}; 236 struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); 237 struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data); 238 int ret = 0; 239 240 /* 241 * Needs to make sure PASID is not enabled 242 * for this attach path. 243 */ 244 if (WARN_ON(dev_data->pasid_enabled)) 245 return -EINVAL; 246 247 mutex_lock(&dev_data->mutex); 248 249 set_dte_nested(iommu, dom, dev_data, &new); 250 251 amd_iommu_update_dte(iommu, dev_data, &new); 252 253 mutex_unlock(&dev_data->mutex); 254 255 return ret; 256 } 257 258 static void nested_domain_free(struct iommu_domain *dom) 259 { 260 struct guest_domain_mapping_info *curr; 261 struct nested_domain *ndom = to_ndomain(dom); 262 struct amd_iommu_viommu *aviommu = ndom->viommu; 263 264 xa_lock(&aviommu->gdomid_array); 265 266 if (!refcount_dec_and_test(&ndom->gdom_info->users)) { 267 xa_unlock(&aviommu->gdomid_array); 268 return; 269 } 270 271 /* 272 * The refcount for the gdom_id to hdom_id mapping is zero. 273 * It is now safe to remove the mapping. 274 */ 275 curr = __xa_cmpxchg(&aviommu->gdomid_array, ndom->gdom_id, 276 ndom->gdom_info, NULL, GFP_ATOMIC); 277 278 xa_unlock(&aviommu->gdomid_array); 279 if (WARN_ON(!curr || xa_err(curr))) 280 return; 281 282 /* success */ 283 pr_debug("%s: Free gdom_id=%#x, hdom_id=%#x\n", 284 __func__, ndom->gdom_id, curr->hdom_id); 285 286 amd_iommu_pdom_id_free(ndom->gdom_info->hdom_id); 287 kfree(curr); 288 kfree(ndom); 289 } 290 291 static const struct iommu_domain_ops nested_domain_ops = { 292 .attach_dev = nested_attach_device, 293 .free = nested_domain_free, 294 }; 295