1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
4 */
5
6 #include <uapi/linux/iommufd.h>
7
8 #include "arm-smmu-v3.h"
9
arm_smmu_hw_info(struct device * dev,u32 * length,enum iommu_hw_info_type * type)10 void *arm_smmu_hw_info(struct device *dev, u32 *length,
11 enum iommu_hw_info_type *type)
12 {
13 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
14 const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops;
15 struct iommu_hw_info_arm_smmuv3 *info;
16 u32 __iomem *base_idr;
17 unsigned int i;
18
19 if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
20 *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) {
21 if (!impl_ops || !impl_ops->hw_info)
22 return ERR_PTR(-EOPNOTSUPP);
23 return impl_ops->hw_info(master->smmu, length, type);
24 }
25
26 info = kzalloc(sizeof(*info), GFP_KERNEL);
27 if (!info)
28 return ERR_PTR(-ENOMEM);
29
30 base_idr = master->smmu->base + ARM_SMMU_IDR0;
31 for (i = 0; i <= 5; i++)
32 info->idr[i] = readl_relaxed(base_idr + i);
33 info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
34 info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
35
36 *length = sizeof(*info);
37 *type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
38
39 return info;
40 }
41
arm_smmu_make_nested_cd_table_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)42 static void arm_smmu_make_nested_cd_table_ste(
43 struct arm_smmu_ste *target, struct arm_smmu_master *master,
44 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
45 {
46 arm_smmu_make_s2_domain_ste(
47 target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
48
49 target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
50 FIELD_PREP(STRTAB_STE_0_CFG,
51 STRTAB_STE_0_CFG_NESTED));
52 target->data[0] |= nested_domain->ste[0] &
53 ~cpu_to_le64(STRTAB_STE_0_CFG);
54 target->data[1] |= nested_domain->ste[1];
55 /* Merge events for DoS mitigations on eventq */
56 target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
57 }
58
59 /*
60 * Create a physical STE from the virtual STE that userspace provided when it
61 * created the nested domain. Using the vSTE userspace can request:
62 * - Non-valid STE
63 * - Abort STE
64 * - Bypass STE (install the S2, no CD table)
65 * - CD table STE (install the S2 and the userspace CD table)
66 */
arm_smmu_make_nested_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)67 static void arm_smmu_make_nested_domain_ste(
68 struct arm_smmu_ste *target, struct arm_smmu_master *master,
69 struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
70 {
71 unsigned int cfg =
72 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
73
74 /*
75 * Userspace can request a non-valid STE through the nesting interface.
76 * We relay that into an abort physical STE with the intention that
77 * C_BAD_STE for this SID can be generated to userspace.
78 */
79 if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
80 cfg = STRTAB_STE_0_CFG_ABORT;
81
82 switch (cfg) {
83 case STRTAB_STE_0_CFG_S1_TRANS:
84 arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
85 ats_enabled);
86 break;
87 case STRTAB_STE_0_CFG_BYPASS:
88 arm_smmu_make_s2_domain_ste(target, master,
89 nested_domain->vsmmu->s2_parent,
90 ats_enabled);
91 break;
92 case STRTAB_STE_0_CFG_ABORT:
93 default:
94 arm_smmu_make_abort_ste(target);
95 break;
96 }
97 }
98
arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state * state,struct arm_smmu_nested_domain * nested_domain)99 int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
100 struct arm_smmu_nested_domain *nested_domain)
101 {
102 unsigned int cfg =
103 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
104 struct arm_smmu_vmaster *vmaster;
105 unsigned long vsid;
106 int ret;
107
108 iommu_group_mutex_assert(state->master->dev);
109
110 ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
111 state->master->dev, &vsid);
112 /*
113 * Attaching to a translate nested domain must allocate a vDEVICE prior,
114 * as CD/ATS invalidations and vevents require a vSID to work properly.
115 * A abort/bypass domain is allowed to attach w/o vmaster for GBPA case.
116 */
117 if (ret) {
118 if (cfg == STRTAB_STE_0_CFG_ABORT ||
119 cfg == STRTAB_STE_0_CFG_BYPASS)
120 return 0;
121 return ret;
122 }
123
124 vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
125 if (!vmaster)
126 return -ENOMEM;
127 vmaster->vsmmu = nested_domain->vsmmu;
128 vmaster->vsid = vsid;
129 state->vmaster = vmaster;
130
131 return 0;
132 }
133
arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state * state)134 void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
135 {
136 struct arm_smmu_master *master = state->master;
137
138 mutex_lock(&master->smmu->streams_mutex);
139 kfree(master->vmaster);
140 master->vmaster = state->vmaster;
141 mutex_unlock(&master->smmu->streams_mutex);
142 }
143
arm_smmu_master_clear_vmaster(struct arm_smmu_master * master)144 void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
145 {
146 struct arm_smmu_attach_state state = { .master = master };
147
148 arm_smmu_attach_commit_vmaster(&state);
149 }
150
arm_smmu_attach_dev_nested(struct iommu_domain * domain,struct device * dev,struct iommu_domain * old_domain)151 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
152 struct device *dev,
153 struct iommu_domain *old_domain)
154 {
155 struct arm_smmu_nested_domain *nested_domain =
156 to_smmu_nested_domain(domain);
157 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
158 struct arm_smmu_attach_state state = {
159 .master = master,
160 .old_domain = old_domain,
161 .ssid = IOMMU_NO_PASID,
162 };
163 struct arm_smmu_ste ste;
164 int ret;
165
166 if (nested_domain->vsmmu->smmu != master->smmu)
167 return -EINVAL;
168 if (arm_smmu_ssids_in_use(&master->cd_table))
169 return -EBUSY;
170
171 mutex_lock(&arm_smmu_asid_lock);
172 /*
173 * The VM has to control the actual ATS state at the PCI device because
174 * we forward the invalidations directly from the VM. If the VM doesn't
175 * think ATS is on it will not generate ATC flushes and the ATC will
176 * become incoherent. Since we can't access the actual virtual PCI ATS
177 * config bit here base this off the EATS value in the STE. If the EATS
178 * is set then the VM must generate ATC flushes.
179 */
180 state.disable_ats = !nested_domain->enable_ats;
181 ret = arm_smmu_attach_prepare(&state, domain);
182 if (ret) {
183 mutex_unlock(&arm_smmu_asid_lock);
184 return ret;
185 }
186
187 arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
188 state.ats_enabled);
189 arm_smmu_install_ste_for_dev(master, &ste);
190 arm_smmu_attach_commit(&state);
191 mutex_unlock(&arm_smmu_asid_lock);
192 return 0;
193 }
194
arm_smmu_domain_nested_free(struct iommu_domain * domain)195 static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
196 {
197 kfree(to_smmu_nested_domain(domain));
198 }
199
200 static const struct iommu_domain_ops arm_smmu_nested_ops = {
201 .attach_dev = arm_smmu_attach_dev_nested,
202 .free = arm_smmu_domain_nested_free,
203 };
204
arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 * arg,bool * enable_ats)205 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
206 bool *enable_ats)
207 {
208 unsigned int eats;
209 unsigned int cfg;
210
211 if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
212 memset(arg->ste, 0, sizeof(arg->ste));
213 return 0;
214 }
215
216 /* EIO is reserved for invalid STE data. */
217 if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
218 (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
219 return -EIO;
220
221 cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
222 if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
223 cfg != STRTAB_STE_0_CFG_S1_TRANS)
224 return -EIO;
225
226 /*
227 * Only Full ATS or ATS UR is supported
228 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
229 */
230 eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
231 arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
232 if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
233 return -EIO;
234
235 if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
236 *enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
237 return 0;
238 }
239
240 struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)241 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
242 const struct iommu_user_data *user_data)
243 {
244 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
245 struct arm_smmu_nested_domain *nested_domain;
246 struct iommu_hwpt_arm_smmuv3 arg;
247 bool enable_ats = false;
248 int ret;
249
250 if (flags)
251 return ERR_PTR(-EOPNOTSUPP);
252
253 ret = iommu_copy_struct_from_user(&arg, user_data,
254 IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
255 if (ret)
256 return ERR_PTR(ret);
257
258 ret = arm_smmu_validate_vste(&arg, &enable_ats);
259 if (ret)
260 return ERR_PTR(ret);
261
262 nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
263 if (!nested_domain)
264 return ERR_PTR(-ENOMEM);
265
266 nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
267 nested_domain->domain.ops = &arm_smmu_nested_ops;
268 nested_domain->enable_ats = enable_ats;
269 nested_domain->vsmmu = vsmmu;
270 nested_domain->ste[0] = arg.ste[0];
271 nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
272
273 return &nested_domain->domain;
274 }
275
arm_vsmmu_vsid_to_sid(struct arm_vsmmu * vsmmu,u32 vsid,u32 * sid)276 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
277 {
278 struct arm_smmu_master *master;
279 struct device *dev;
280 int ret = 0;
281
282 xa_lock(&vsmmu->core.vdevs);
283 dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
284 if (!dev) {
285 ret = -EIO;
286 goto unlock;
287 }
288 master = dev_iommu_priv_get(dev);
289
290 /* At this moment, iommufd only supports PCI device that has one SID */
291 if (sid)
292 *sid = master->streams[0].id;
293 unlock:
294 xa_unlock(&vsmmu->core.vdevs);
295 return ret;
296 }
297
298 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
299 struct arm_vsmmu_invalidation_cmd {
300 union {
301 u64 cmd[2];
302 struct iommu_viommu_arm_smmuv3_invalidate ucmd;
303 };
304 };
305
306 /*
307 * Convert, in place, the raw invalidation command into an internal format that
308 * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
309 * stored in CPU endian.
310 *
311 * Enforce the VMID or SID on the command.
312 */
arm_vsmmu_convert_user_cmd(struct arm_vsmmu * vsmmu,struct arm_vsmmu_invalidation_cmd * cmd)313 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
314 struct arm_vsmmu_invalidation_cmd *cmd)
315 {
316 /* Commands are le64 stored in u64 */
317 cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
318 cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
319
320 switch (cmd->cmd[0] & CMDQ_0_OP) {
321 case CMDQ_OP_TLBI_NSNH_ALL:
322 /* Convert to NH_ALL */
323 cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
324 FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
325 cmd->cmd[1] = 0;
326 break;
327 case CMDQ_OP_TLBI_NH_VA:
328 case CMDQ_OP_TLBI_NH_VAA:
329 case CMDQ_OP_TLBI_NH_ALL:
330 case CMDQ_OP_TLBI_NH_ASID:
331 cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
332 cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
333 break;
334 case CMDQ_OP_ATC_INV:
335 case CMDQ_OP_CFGI_CD:
336 case CMDQ_OP_CFGI_CD_ALL: {
337 u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
338
339 if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
340 return -EIO;
341 cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
342 cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
343 break;
344 }
345 default:
346 return -EIO;
347 }
348 return 0;
349 }
350
arm_vsmmu_cache_invalidate(struct iommufd_viommu * viommu,struct iommu_user_data_array * array)351 int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
352 struct iommu_user_data_array *array)
353 {
354 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
355 struct arm_smmu_device *smmu = vsmmu->smmu;
356 struct arm_vsmmu_invalidation_cmd *last;
357 struct arm_vsmmu_invalidation_cmd *cmds;
358 struct arm_vsmmu_invalidation_cmd *cur;
359 struct arm_vsmmu_invalidation_cmd *end;
360 int ret;
361
362 cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
363 if (!cmds)
364 return -ENOMEM;
365 cur = cmds;
366 end = cmds + array->entry_num;
367
368 static_assert(sizeof(*cmds) == 2 * sizeof(u64));
369 ret = iommu_copy_struct_from_full_user_array(
370 cmds, sizeof(*cmds), array,
371 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
372 if (ret)
373 goto out;
374
375 last = cmds;
376 while (cur != end) {
377 ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
378 if (ret)
379 goto out;
380
381 /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
382 cur++;
383 if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
384 continue;
385
386 /* FIXME always uses the main cmdq rather than trying to group by type */
387 ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
388 cur - last, true);
389 if (ret) {
390 cur--;
391 goto out;
392 }
393 last = cur;
394 }
395 out:
396 array->entry_num = cur - cmds;
397 kfree(cmds);
398 return ret;
399 }
400
401 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
402 .alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
403 .cache_invalidate = arm_vsmmu_cache_invalidate,
404 };
405
arm_smmu_get_viommu_size(struct device * dev,enum iommu_viommu_type viommu_type)406 size_t arm_smmu_get_viommu_size(struct device *dev,
407 enum iommu_viommu_type viommu_type)
408 {
409 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
410 struct arm_smmu_device *smmu = master->smmu;
411
412 if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
413 return 0;
414
415 /*
416 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
417 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
418 * any change to remove this.
419 */
420 if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
421 return 0;
422
423 /*
424 * Must support some way to prevent the VM from bypassing the cache
425 * because VFIO currently does not do any cache maintenance. canwbs
426 * indicates the device is fully coherent and no cache maintenance is
427 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
428 * things non-coherent using the memattr, but No-Snoop behavior is not
429 * effected.
430 */
431 if (!arm_smmu_master_canwbs(master) &&
432 !(smmu->features & ARM_SMMU_FEAT_S2FWB))
433 return 0;
434
435 if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
436 return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);
437
438 if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
439 return 0;
440 return smmu->impl_ops->get_viommu_size(viommu_type);
441 }
442
arm_vsmmu_init(struct iommufd_viommu * viommu,struct iommu_domain * parent_domain,const struct iommu_user_data * user_data)443 int arm_vsmmu_init(struct iommufd_viommu *viommu,
444 struct iommu_domain *parent_domain,
445 const struct iommu_user_data *user_data)
446 {
447 struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
448 struct arm_smmu_device *smmu =
449 container_of(viommu->iommu_dev, struct arm_smmu_device, iommu);
450 struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain);
451
452 if (s2_parent->smmu != smmu)
453 return -EINVAL;
454
455 vsmmu->smmu = smmu;
456 vsmmu->s2_parent = s2_parent;
457 /* FIXME Move VMID allocation from the S2 domain allocation to here */
458 vsmmu->vmid = s2_parent->s2_cfg.vmid;
459
460 if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) {
461 viommu->ops = &arm_vsmmu_ops;
462 return 0;
463 }
464
465 return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
466 }
467
arm_vmaster_report_event(struct arm_smmu_vmaster * vmaster,u64 * evt)468 int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
469 {
470 struct iommu_vevent_arm_smmuv3 vevt;
471 int i;
472
473 lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
474
475 vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
476 FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
477 for (i = 1; i < EVTQ_ENT_DWORDS; i++)
478 vevt.evt[i] = cpu_to_le64(evt[i]);
479
480 return iommufd_viommu_report_event(&vmaster->vsmmu->core,
481 IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
482 sizeof(vevt));
483 }
484
485 MODULE_IMPORT_NS("IOMMUFD");
486