1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
4 */
5
6 #include <uapi/linux/iommufd.h>
7
8 #include "arm-smmu-v3.h"
9
arm_smmu_hw_info(struct device * dev,u32 * length,u32 * type)10 void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
11 {
12 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
13 struct iommu_hw_info_arm_smmuv3 *info;
14 u32 __iomem *base_idr;
15 unsigned int i;
16
17 info = kzalloc(sizeof(*info), GFP_KERNEL);
18 if (!info)
19 return ERR_PTR(-ENOMEM);
20
21 base_idr = master->smmu->base + ARM_SMMU_IDR0;
22 for (i = 0; i <= 5; i++)
23 info->idr[i] = readl_relaxed(base_idr + i);
24 info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
25 info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
26
27 *length = sizeof(*info);
28 *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
29
30 return info;
31 }
32
arm_smmu_make_nested_cd_table_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)33 static void arm_smmu_make_nested_cd_table_ste(
34 struct arm_smmu_ste *target, struct arm_smmu_master *master,
35 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
36 {
37 arm_smmu_make_s2_domain_ste(
38 target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
39
40 target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
41 FIELD_PREP(STRTAB_STE_0_CFG,
42 STRTAB_STE_0_CFG_NESTED));
43 target->data[0] |= nested_domain->ste[0] &
44 ~cpu_to_le64(STRTAB_STE_0_CFG);
45 target->data[1] |= nested_domain->ste[1];
46 }
47
48 /*
49 * Create a physical STE from the virtual STE that userspace provided when it
50 * created the nested domain. Using the vSTE userspace can request:
51 * - Non-valid STE
52 * - Abort STE
53 * - Bypass STE (install the S2, no CD table)
54 * - CD table STE (install the S2 and the userspace CD table)
55 */
arm_smmu_make_nested_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)56 static void arm_smmu_make_nested_domain_ste(
57 struct arm_smmu_ste *target, struct arm_smmu_master *master,
58 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
59 {
60 unsigned int cfg =
61 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
62
63 /*
64 * Userspace can request a non-valid STE through the nesting interface.
65 * We relay that into an abort physical STE with the intention that
66 * C_BAD_STE for this SID can be generated to userspace.
67 */
68 if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
69 cfg = STRTAB_STE_0_CFG_ABORT;
70
71 switch (cfg) {
72 case STRTAB_STE_0_CFG_S1_TRANS:
73 arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
74 ats_enabled);
75 break;
76 case STRTAB_STE_0_CFG_BYPASS:
77 arm_smmu_make_s2_domain_ste(target, master,
78 nested_domain->vsmmu->s2_parent,
79 ats_enabled);
80 break;
81 case STRTAB_STE_0_CFG_ABORT:
82 default:
83 arm_smmu_make_abort_ste(target);
84 break;
85 }
86 }
87
arm_smmu_attach_dev_nested(struct iommu_domain * domain,struct device * dev)88 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
89 struct device *dev)
90 {
91 struct arm_smmu_nested_domain *nested_domain =
92 to_smmu_nested_domain(domain);
93 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
94 struct arm_smmu_attach_state state = {
95 .master = master,
96 .old_domain = iommu_get_domain_for_dev(dev),
97 .ssid = IOMMU_NO_PASID,
98 };
99 struct arm_smmu_ste ste;
100 int ret;
101
102 if (nested_domain->vsmmu->smmu != master->smmu)
103 return -EINVAL;
104 if (arm_smmu_ssids_in_use(&master->cd_table))
105 return -EBUSY;
106
107 mutex_lock(&arm_smmu_asid_lock);
108 /*
109 * The VM has to control the actual ATS state at the PCI device because
110 * we forward the invalidations directly from the VM. If the VM doesn't
111 * think ATS is on it will not generate ATC flushes and the ATC will
112 * become incoherent. Since we can't access the actual virtual PCI ATS
113 * config bit here base this off the EATS value in the STE. If the EATS
114 * is set then the VM must generate ATC flushes.
115 */
116 state.disable_ats = !nested_domain->enable_ats;
117 ret = arm_smmu_attach_prepare(&state, domain);
118 if (ret) {
119 mutex_unlock(&arm_smmu_asid_lock);
120 return ret;
121 }
122
123 arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
124 state.ats_enabled);
125 arm_smmu_install_ste_for_dev(master, &ste);
126 arm_smmu_attach_commit(&state);
127 mutex_unlock(&arm_smmu_asid_lock);
128 return 0;
129 }
130
arm_smmu_domain_nested_free(struct iommu_domain * domain)131 static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
132 {
133 kfree(to_smmu_nested_domain(domain));
134 }
135
136 static const struct iommu_domain_ops arm_smmu_nested_ops = {
137 .attach_dev = arm_smmu_attach_dev_nested,
138 .free = arm_smmu_domain_nested_free,
139 };
140
arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 * arg,bool * enable_ats)141 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
142 bool *enable_ats)
143 {
144 unsigned int eats;
145 unsigned int cfg;
146
147 if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
148 memset(arg->ste, 0, sizeof(arg->ste));
149 return 0;
150 }
151
152 /* EIO is reserved for invalid STE data. */
153 if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
154 (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
155 return -EIO;
156
157 cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
158 if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
159 cfg != STRTAB_STE_0_CFG_S1_TRANS)
160 return -EIO;
161
162 /*
163 * Only Full ATS or ATS UR is supported
164 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
165 */
166 eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
167 arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
168 if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
169 return -EIO;
170
171 if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
172 *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
173 return 0;
174 }
175
176 static struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)177 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
178 const struct iommu_user_data *user_data)
179 {
180 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
181 const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID;
182 struct arm_smmu_nested_domain *nested_domain;
183 struct iommu_hwpt_arm_smmuv3 arg;
184 bool enable_ats = false;
185 int ret;
186
187 /*
188 * Faults delivered to the nested domain are faults that originated by
189 * the S1 in the domain. The core code will match all PASIDs when
190 * delivering the fault due to user_pasid_table
191 */
192 if (flags & ~SUPPORTED_FLAGS)
193 return ERR_PTR(-EOPNOTSUPP);
194
195 ret = iommu_copy_struct_from_user(&arg, user_data,
196 IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
197 if (ret)
198 return ERR_PTR(ret);
199
200 ret = arm_smmu_validate_vste(&arg, &enable_ats);
201 if (ret)
202 return ERR_PTR(ret);
203
204 nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
205 if (!nested_domain)
206 return ERR_PTR(-ENOMEM);
207
208 nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
209 nested_domain->domain.ops = &arm_smmu_nested_ops;
210 nested_domain->enable_ats = enable_ats;
211 nested_domain->vsmmu = vsmmu;
212 nested_domain->ste[0] = arg.ste[0];
213 nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
214
215 return &nested_domain->domain;
216 }
217
arm_vsmmu_vsid_to_sid(struct arm_vsmmu * vsmmu,u32 vsid,u32 * sid)218 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
219 {
220 struct arm_smmu_master *master;
221 struct device *dev;
222 int ret = 0;
223
224 xa_lock(&vsmmu->core.vdevs);
225 dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
226 if (!dev) {
227 ret = -EIO;
228 goto unlock;
229 }
230 master = dev_iommu_priv_get(dev);
231
232 /* At this moment, iommufd only supports PCI device that has one SID */
233 if (sid)
234 *sid = master->streams[0].id;
235 unlock:
236 xa_unlock(&vsmmu->core.vdevs);
237 return ret;
238 }
239
240 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
241 struct arm_vsmmu_invalidation_cmd {
242 union {
243 u64 cmd[2];
244 struct iommu_viommu_arm_smmuv3_invalidate ucmd;
245 };
246 };
247
248 /*
249 * Convert, in place, the raw invalidation command into an internal format that
250 * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
251 * stored in CPU endian.
252 *
253 * Enforce the VMID or SID on the command.
254 */
arm_vsmmu_convert_user_cmd(struct arm_vsmmu * vsmmu,struct arm_vsmmu_invalidation_cmd * cmd)255 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
256 struct arm_vsmmu_invalidation_cmd *cmd)
257 {
258 /* Commands are le64 stored in u64 */
259 cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
260 cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
261
262 switch (cmd->cmd[0] & CMDQ_0_OP) {
263 case CMDQ_OP_TLBI_NSNH_ALL:
264 /* Convert to NH_ALL */
265 cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
266 FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
267 cmd->cmd[1] = 0;
268 break;
269 case CMDQ_OP_TLBI_NH_VA:
270 case CMDQ_OP_TLBI_NH_VAA:
271 case CMDQ_OP_TLBI_NH_ALL:
272 case CMDQ_OP_TLBI_NH_ASID:
273 cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
274 cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
275 break;
276 case CMDQ_OP_ATC_INV:
277 case CMDQ_OP_CFGI_CD:
278 case CMDQ_OP_CFGI_CD_ALL: {
279 u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
280
281 if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
282 return -EIO;
283 cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
284 cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
285 break;
286 }
287 default:
288 return -EIO;
289 }
290 return 0;
291 }
292
arm_vsmmu_cache_invalidate(struct iommufd_viommu * viommu,struct iommu_user_data_array * array)293 static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
294 struct iommu_user_data_array *array)
295 {
296 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
297 struct arm_smmu_device *smmu = vsmmu->smmu;
298 struct arm_vsmmu_invalidation_cmd *last;
299 struct arm_vsmmu_invalidation_cmd *cmds;
300 struct arm_vsmmu_invalidation_cmd *cur;
301 struct arm_vsmmu_invalidation_cmd *end;
302 int ret;
303
304 cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
305 if (!cmds)
306 return -ENOMEM;
307 cur = cmds;
308 end = cmds + array->entry_num;
309
310 static_assert(sizeof(*cmds) == 2 * sizeof(u64));
311 ret = iommu_copy_struct_from_full_user_array(
312 cmds, sizeof(*cmds), array,
313 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
314 if (ret)
315 goto out;
316
317 last = cmds;
318 while (cur != end) {
319 ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
320 if (ret)
321 goto out;
322
323 /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
324 cur++;
325 if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
326 continue;
327
328 /* FIXME always uses the main cmdq rather than trying to group by type */
329 ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
330 cur - last, true);
331 if (ret) {
332 cur--;
333 goto out;
334 }
335 last = cur;
336 }
337 out:
338 array->entry_num = cur - cmds;
339 kfree(cmds);
340 return ret;
341 }
342
343 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
344 .alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
345 .cache_invalidate = arm_vsmmu_cache_invalidate,
346 };
347
arm_vsmmu_alloc(struct device * dev,struct iommu_domain * parent,struct iommufd_ctx * ictx,unsigned int viommu_type)348 struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
349 struct iommu_domain *parent,
350 struct iommufd_ctx *ictx,
351 unsigned int viommu_type)
352 {
353 struct arm_smmu_device *smmu =
354 iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu);
355 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
356 struct arm_smmu_domain *s2_parent = to_smmu_domain(parent);
357 struct arm_vsmmu *vsmmu;
358
359 if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
360 return ERR_PTR(-EOPNOTSUPP);
361
362 if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
363 return ERR_PTR(-EOPNOTSUPP);
364
365 if (s2_parent->smmu != master->smmu)
366 return ERR_PTR(-EINVAL);
367
368 /*
369 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
370 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
371 * any change to remove this.
372 */
373 if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
374 return ERR_PTR(-EOPNOTSUPP);
375
376 /*
377 * Must support some way to prevent the VM from bypassing the cache
378 * because VFIO currently does not do any cache maintenance. canwbs
379 * indicates the device is fully coherent and no cache maintenance is
380 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
381 * things non-coherent using the memattr, but No-Snoop behavior is not
382 * effected.
383 */
384 if (!arm_smmu_master_canwbs(master) &&
385 !(smmu->features & ARM_SMMU_FEAT_S2FWB))
386 return ERR_PTR(-EOPNOTSUPP);
387
388 vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
389 &arm_vsmmu_ops);
390 if (IS_ERR(vsmmu))
391 return ERR_CAST(vsmmu);
392
393 vsmmu->smmu = smmu;
394 vsmmu->s2_parent = s2_parent;
395 /* FIXME Move VMID allocation from the S2 domain allocation to here */
396 vsmmu->vmid = s2_parent->s2_cfg.vmid;
397
398 return &vsmmu->core;
399 }
400
401 MODULE_IMPORT_NS("IOMMUFD");
402