1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
4 */
5
6 #include <uapi/linux/iommufd.h>
7
8 #include "arm-smmu-v3.h"
9
arm_smmu_hw_info(struct device * dev,u32 * length,enum iommu_hw_info_type * type)10 void *arm_smmu_hw_info(struct device *dev, u32 *length,
11 enum iommu_hw_info_type *type)
12 {
13 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
14 const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops;
15 struct iommu_hw_info_arm_smmuv3 *info;
16 u32 __iomem *base_idr;
17 unsigned int i;
18
19 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
20 *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) {
21 if (!impl_ops || !impl_ops->hw_info)
22 return ERR_PTR(-EOPNOTSUPP);
23 return impl_ops->hw_info(master->smmu, length, type);
24 }
25
26 info = kzalloc_obj(*info);
27 if (!info)
28 return ERR_PTR(-ENOMEM);
29
30 base_idr = master->smmu->base + ARM_SMMU_IDR0;
31 for (i = 0; i <= 5; i++)
32 info->idr[i] = readl_relaxed(base_idr + i);
33 info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
34 info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
35
36 *length = sizeof(*info);
37 *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
38
39 return info;
40 }
41
arm_smmu_make_nested_cd_table_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)42 static void arm_smmu_make_nested_cd_table_ste(
43 struct arm_smmu_ste *target, struct arm_smmu_master *master,
44 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
45 {
46 arm_smmu_make_s2_domain_ste(
47 target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
48
49 target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
50 FIELD_PREP(STRTAB_STE_0_CFG,
51 STRTAB_STE_0_CFG_NESTED));
52 target->data[0] |= nested_domain->ste[0] &
53 ~cpu_to_le64(STRTAB_STE_0_CFG);
54 target->data[1] |= nested_domain->ste[1];
55 /* Merge events for DoS mitigations on eventq */
56 target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
57 }
58
59 /*
60 * Create a physical STE from the virtual STE that userspace provided when it
61 * created the nested domain. Using the vSTE userspace can request:
62 * - Non-valid STE
63 * - Abort STE
64 * - Bypass STE (install the S2, no CD table)
65 * - CD table STE (install the S2 and the userspace CD table)
66 */
arm_smmu_make_nested_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)67 static void arm_smmu_make_nested_domain_ste(
68 struct arm_smmu_ste *target, struct arm_smmu_master *master,
69 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
70 {
71 unsigned int cfg =
72 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
73
74 /*
75 * Userspace can request a non-valid STE through the nesting interface.
76 * We relay that into an abort physical STE with the intention that
77 * C_BAD_STE for this SID can be generated to userspace.
78 */
79 if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
80 cfg = STRTAB_STE_0_CFG_ABORT;
81
82 switch (cfg) {
83 case STRTAB_STE_0_CFG_S1_TRANS:
84 arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
85 ats_enabled);
86 break;
87 case STRTAB_STE_0_CFG_BYPASS:
88 arm_smmu_make_s2_domain_ste(target, master,
89 nested_domain->vsmmu->s2_parent,
90 ats_enabled);
91 break;
92 case STRTAB_STE_0_CFG_ABORT:
93 default:
94 arm_smmu_make_abort_ste(target);
95 break;
96 }
97 }
98
arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state * state,struct arm_smmu_nested_domain * nested_domain)99 int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
100 struct arm_smmu_nested_domain *nested_domain)
101 {
102 unsigned int cfg =
103 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
104 struct arm_smmu_vmaster *vmaster;
105 unsigned long vsid;
106 int ret;
107
108 iommu_group_mutex_assert(state->master->dev);
109
110 ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
111 state->master->dev, &vsid);
112 /*
113 * Attaching to a translate nested domain must allocate a vDEVICE prior,
114 * as CD/ATS invalidations and vevents require a vSID to work properly.
115 * A abort/bypass domain is allowed to attach w/o vmaster for GBPA case.
116 */
117 if (ret) {
118 if (cfg == STRTAB_STE_0_CFG_ABORT ||
119 cfg == STRTAB_STE_0_CFG_BYPASS)
120 return 0;
121 return ret;
122 }
123
124 vmaster = kzalloc_obj(*vmaster);
125 if (!vmaster)
126 return -ENOMEM;
127 vmaster->vsmmu = nested_domain->vsmmu;
128 vmaster->vsid = vsid;
129 state->vmaster = vmaster;
130
131 return 0;
132 }
133
arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state * state)134 void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
135 {
136 struct arm_smmu_master *master = state->master;
137
138 mutex_lock(&master->smmu->streams_mutex);
139 kfree(master->vmaster);
140 master->vmaster = state->vmaster;
141 mutex_unlock(&master->smmu->streams_mutex);
142 }
143
arm_smmu_master_clear_vmaster(struct arm_smmu_master * master)144 void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
145 {
146 struct arm_smmu_attach_state state = { .master = master };
147
148 arm_smmu_attach_commit_vmaster(&state);
149 }
150
arm_smmu_attach_dev_nested(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old_domain)151 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
152 struct device *dev,
153 struct iommu_domain *old_domain)
154 {
155 struct arm_smmu_nested_domain *nested_domain =
156 to_smmu_nested_domain(domain);
157 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
158 struct arm_smmu_attach_state state = {
159 .master = master,
160 .old_domain = old_domain,
161 .ssid = IOMMU_NO_PASID,
162 };
163 struct arm_smmu_ste ste;
164 int ret;
165
166 if (nested_domain->vsmmu->smmu != master->smmu)
167 return -EINVAL;
168 if (arm_smmu_ssids_in_use(&master->cd_table))
169 return -EBUSY;
170
171 mutex_lock(&arm_smmu_asid_lock);
172 /*
173 * The VM has to control the actual ATS state at the PCI device because
174 * we forward the invalidations directly from the VM. If the VM doesn't
175 * think ATS is on it will not generate ATC flushes and the ATC will
176 * become incoherent. Since we can't access the actual virtual PCI ATS
177 * config bit here base this off the EATS value in the STE. If the EATS
178 * is set then the VM must generate ATC flushes.
179 */
180 if (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])) ==
181 STRTAB_STE_0_CFG_S1_TRANS)
182 state.disable_ats = !nested_domain->enable_ats;
183 ret = arm_smmu_attach_prepare(&state, domain);
184 if (ret) {
185 mutex_unlock(&arm_smmu_asid_lock);
186 return ret;
187 }
188
189 arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
190 state.ats_enabled);
191 arm_smmu_install_ste_for_dev(master, &ste);
192 arm_smmu_attach_commit(&state);
193 mutex_unlock(&arm_smmu_asid_lock);
194 return 0;
195 }
196
arm_smmu_domain_nested_free(struct iommu_domain * domain)197 static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
198 {
199 kfree(to_smmu_nested_domain(domain));
200 }
201
202 static const struct iommu_domain_ops arm_smmu_nested_ops = {
203 .attach_dev = arm_smmu_attach_dev_nested,
204 .free = arm_smmu_domain_nested_free,
205 };
206
arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 * arg,bool * enable_ats)207 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
208 bool *enable_ats)
209 {
210 unsigned int eats;
211 unsigned int cfg;
212
213 if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
214 memset(arg->ste, 0, sizeof(arg->ste));
215 return 0;
216 }
217
218 /* EIO is reserved for invalid STE data. */
219 if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
220 (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
221 return -EIO;
222
223 cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
224 if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
225 cfg != STRTAB_STE_0_CFG_S1_TRANS)
226 return -EIO;
227
228 /*
229 * Only Full ATS or ATS UR is supported
230 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
231 */
232 eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
233 arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
234 if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
235 return -EIO;
236
237 if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
238 *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
239 return 0;
240 }
241
242 struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)243 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
244 const struct iommu_user_data *user_data)
245 {
246 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
247 struct arm_smmu_nested_domain *nested_domain;
248 struct iommu_hwpt_arm_smmuv3 arg;
249 bool enable_ats = false;
250 int ret;
251
252 if (flags)
253 return ERR_PTR(-EOPNOTSUPP);
254
255 ret = iommu_copy_struct_from_user(&arg, user_data,
256 IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
257 if (ret)
258 return ERR_PTR(ret);
259
260 ret = arm_smmu_validate_vste(&arg, &enable_ats);
261 if (ret)
262 return ERR_PTR(ret);
263
264 nested_domain = kzalloc_obj(*nested_domain, GFP_KERNEL_ACCOUNT);
265 if (!nested_domain)
266 return ERR_PTR(-ENOMEM);
267
268 nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
269 nested_domain->domain.ops = &arm_smmu_nested_ops;
270 nested_domain->enable_ats = enable_ats;
271 nested_domain->vsmmu = vsmmu;
272 nested_domain->ste[0] = arg.ste[0];
273 nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
274
275 return &nested_domain->domain;
276 }
277
arm_vsmmu_vsid_to_sid(struct arm_vsmmu * vsmmu,u32 vsid,u32 * sid)278 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
279 {
280 struct arm_smmu_master *master;
281 struct device *dev;
282 int ret = 0;
283
284 xa_lock(&vsmmu->core.vdevs);
285 dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
286 if (!dev) {
287 ret = -EIO;
288 goto unlock;
289 }
290 master = dev_iommu_priv_get(dev);
291
292 /* At this moment, iommufd only supports PCI device that has one SID */
293 if (sid)
294 *sid = master->streams[0].id;
295 unlock:
296 xa_unlock(&vsmmu->core.vdevs);
297 return ret;
298 }
299
300 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
301 struct arm_vsmmu_invalidation_cmd {
302 union {
303 u64 cmd[2];
304 struct iommu_viommu_arm_smmuv3_invalidate ucmd;
305 };
306 };
307
308 /*
309 * Convert, in place, the raw invalidation command into an internal format that
310 * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
311 * stored in CPU endian.
312 *
313 * Enforce the VMID or SID on the command.
314 */
arm_vsmmu_convert_user_cmd(struct arm_vsmmu * vsmmu,struct arm_vsmmu_invalidation_cmd * cmd)315 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
316 struct arm_vsmmu_invalidation_cmd *cmd)
317 {
318 /* Commands are le64 stored in u64 */
319 cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
320 cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
321
322 switch (cmd->cmd[0] & CMDQ_0_OP) {
323 case CMDQ_OP_TLBI_NSNH_ALL:
324 /* Convert to NH_ALL */
325 cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
326 FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
327 cmd->cmd[1] = 0;
328 break;
329 case CMDQ_OP_TLBI_NH_VA:
330 case CMDQ_OP_TLBI_NH_VAA:
331 case CMDQ_OP_TLBI_NH_ALL:
332 case CMDQ_OP_TLBI_NH_ASID:
333 cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
334 cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
335 break;
336 case CMDQ_OP_ATC_INV:
337 case CMDQ_OP_CFGI_CD:
338 case CMDQ_OP_CFGI_CD_ALL: {
339 u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
340
341 if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
342 return -EIO;
343 cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
344 cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
345 break;
346 }
347 default:
348 return -EIO;
349 }
350 return 0;
351 }
352
arm_vsmmu_cache_invalidate(struct iommufd_viommu * viommu,struct iommu_user_data_array * array)353 int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
354 struct iommu_user_data_array *array)
355 {
356 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
357 struct arm_smmu_device *smmu = vsmmu->smmu;
358 struct arm_vsmmu_invalidation_cmd *last;
359 struct arm_vsmmu_invalidation_cmd *cmds;
360 struct arm_vsmmu_invalidation_cmd *cur;
361 struct arm_vsmmu_invalidation_cmd *end;
362 int ret;
363
364 cmds = kzalloc_objs(*cmds, array->entry_num);
365 if (!cmds)
366 return -ENOMEM;
367 cur = cmds;
368 end = cmds + array->entry_num;
369
370 static_assert(sizeof(*cmds) == 2 * sizeof(u64));
371 ret = iommu_copy_struct_from_full_user_array(
372 cmds, sizeof(*cmds), array,
373 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
374 if (ret)
375 goto out;
376
377 last = cmds;
378 while (cur != end) {
379 ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
380 if (ret)
381 goto out;
382
383 /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
384 cur++;
385 if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
386 continue;
387
388 /* FIXME always uses the main cmdq rather than trying to group by type */
389 ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
390 cur - last, true);
391 if (ret) {
392 cur--;
393 goto out;
394 }
395 last = cur;
396 }
397 out:
398 array->entry_num = cur - cmds;
399 kfree(cmds);
400 return ret;
401 }
402
403 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
404 .alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
405 .cache_invalidate = arm_vsmmu_cache_invalidate,
406 };
407
arm_smmu_get_viommu_size(struct device * dev,enum iommu_viommu_type viommu_type)408 size_t arm_smmu_get_viommu_size(struct device *dev,
409 enum iommu_viommu_type viommu_type)
410 {
411 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
412 struct arm_smmu_device *smmu = master->smmu;
413
414 if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
415 return 0;
416
417 /*
418 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
419 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
420 * any change to remove this.
421 */
422 if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
423 return 0;
424
425 /*
426 * Must support some way to prevent the VM from bypassing the cache
427 * because VFIO currently does not do any cache maintenance. canwbs
428 * indicates the device is fully coherent and no cache maintenance is
429 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
430 * things non-coherent using the memattr, but No-Snoop behavior is not
431 * effected.
432 */
433 if (!arm_smmu_master_canwbs(master) &&
434 !(smmu->features & ARM_SMMU_FEAT_S2FWB))
435 return 0;
436
437 if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
438 return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);
439
440 if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
441 return 0;
442 return smmu->impl_ops->get_viommu_size(viommu_type);
443 }
444
arm_vsmmu_init(struct iommufd_viommu * viommu,struct iommu_domain * parent_domain,const struct iommu_user_data * user_data)445 int arm_vsmmu_init(struct iommufd_viommu *viommu,
446 struct iommu_domain *parent_domain,
447 const struct iommu_user_data *user_data)
448 {
449 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
450 struct arm_smmu_device *smmu =
451 container_of(viommu->iommu_dev, struct arm_smmu_device, iommu);
452 struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain);
453
454 if (s2_parent->smmu != smmu)
455 return -EINVAL;
456
457 vsmmu->smmu = smmu;
458 vsmmu->s2_parent = s2_parent;
459 /* FIXME Move VMID allocation from the S2 domain allocation to here */
460 vsmmu->vmid = s2_parent->s2_cfg.vmid;
461
462 if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) {
463 viommu->ops = &arm_vsmmu_ops;
464 return 0;
465 }
466
467 return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
468 }
469
arm_vmaster_report_event(struct arm_smmu_vmaster * vmaster,u64 * evt)470 int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
471 {
472 struct iommu_vevent_arm_smmuv3 vevt;
473 int i;
474
475 lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
476
477 vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
478 FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
479 for (i = 1; i < EVTQ_ENT_DWORDS; i++)
480 vevt.evt[i] = cpu_to_le64(evt[i]);
481
482 return iommufd_viommu_report_event(&vmaster->vsmmu->core,
483 IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
484 sizeof(vevt));
485 }
486
487 MODULE_IMPORT_NS("IOMMUFD");
488