xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c (revision 8a5f956a9fb7d74fff681145082acfad5afa6bb8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
4  */
5 
6 #include <uapi/linux/iommufd.h>
7 
8 #include "arm-smmu-v3.h"
9 
10 void *arm_smmu_hw_info(struct device *dev, u32 *length,
11 		       enum iommu_hw_info_type *type)
12 {
13 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
14 	const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops;
15 	struct iommu_hw_info_arm_smmuv3 *info;
16 	u32 __iomem *base_idr;
17 	unsigned int i;
18 
19 	if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
20 	    *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) {
21 		if (!impl_ops || !impl_ops->hw_info)
22 			return ERR_PTR(-EOPNOTSUPP);
23 		return impl_ops->hw_info(master->smmu, length, type);
24 	}
25 
26 	info = kzalloc(sizeof(*info), GFP_KERNEL);
27 	if (!info)
28 		return ERR_PTR(-ENOMEM);
29 
30 	base_idr = master->smmu->base + ARM_SMMU_IDR0;
31 	for (i = 0; i <= 5; i++)
32 		info->idr[i] = readl_relaxed(base_idr + i);
33 	info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
34 	info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
35 
36 	*length = sizeof(*info);
37 	*type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
38 
39 	return info;
40 }
41 
42 static void arm_smmu_make_nested_cd_table_ste(
43 	struct arm_smmu_ste *target, struct arm_smmu_master *master,
44 	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
45 {
46 	arm_smmu_make_s2_domain_ste(
47 		target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
48 
49 	target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
50 				      FIELD_PREP(STRTAB_STE_0_CFG,
51 						 STRTAB_STE_0_CFG_NESTED));
52 	target->data[0] |= nested_domain->ste[0] &
53 			   ~cpu_to_le64(STRTAB_STE_0_CFG);
54 	target->data[1] |= nested_domain->ste[1];
55 	/* Merge events for DoS mitigations on eventq */
56 	target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
57 }
58 
59 /*
60  * Create a physical STE from the virtual STE that userspace provided when it
61  * created the nested domain. Using the vSTE userspace can request:
62  * - Non-valid STE
63  * - Abort STE
64  * - Bypass STE (install the S2, no CD table)
65  * - CD table STE (install the S2 and the userspace CD table)
66  */
67 static void arm_smmu_make_nested_domain_ste(
68 	struct arm_smmu_ste *target, struct arm_smmu_master *master,
69 	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
70 {
71 	unsigned int cfg =
72 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
73 
74 	/*
75 	 * Userspace can request a non-valid STE through the nesting interface.
76 	 * We relay that into an abort physical STE with the intention that
77 	 * C_BAD_STE for this SID can be generated to userspace.
78 	 */
79 	if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
80 		cfg = STRTAB_STE_0_CFG_ABORT;
81 
82 	switch (cfg) {
83 	case STRTAB_STE_0_CFG_S1_TRANS:
84 		arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
85 						  ats_enabled);
86 		break;
87 	case STRTAB_STE_0_CFG_BYPASS:
88 		arm_smmu_make_s2_domain_ste(target, master,
89 					    nested_domain->vsmmu->s2_parent,
90 					    ats_enabled);
91 		break;
92 	case STRTAB_STE_0_CFG_ABORT:
93 	default:
94 		arm_smmu_make_abort_ste(target);
95 		break;
96 	}
97 }
98 
99 int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
100 				    struct arm_smmu_nested_domain *nested_domain)
101 {
102 	struct arm_smmu_vmaster *vmaster;
103 	unsigned long vsid;
104 	int ret;
105 
106 	iommu_group_mutex_assert(state->master->dev);
107 
108 	ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
109 					 state->master->dev, &vsid);
110 	if (ret)
111 		return ret;
112 
113 	vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
114 	if (!vmaster)
115 		return -ENOMEM;
116 	vmaster->vsmmu = nested_domain->vsmmu;
117 	vmaster->vsid = vsid;
118 	state->vmaster = vmaster;
119 
120 	return 0;
121 }
122 
123 void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
124 {
125 	struct arm_smmu_master *master = state->master;
126 
127 	mutex_lock(&master->smmu->streams_mutex);
128 	kfree(master->vmaster);
129 	master->vmaster = state->vmaster;
130 	mutex_unlock(&master->smmu->streams_mutex);
131 }
132 
133 void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
134 {
135 	struct arm_smmu_attach_state state = { .master = master };
136 
137 	arm_smmu_attach_commit_vmaster(&state);
138 }
139 
140 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
141 				      struct device *dev)
142 {
143 	struct arm_smmu_nested_domain *nested_domain =
144 		to_smmu_nested_domain(domain);
145 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
146 	struct arm_smmu_attach_state state = {
147 		.master = master,
148 		.old_domain = iommu_get_domain_for_dev(dev),
149 		.ssid = IOMMU_NO_PASID,
150 	};
151 	struct arm_smmu_ste ste;
152 	int ret;
153 
154 	if (nested_domain->vsmmu->smmu != master->smmu)
155 		return -EINVAL;
156 	if (arm_smmu_ssids_in_use(&master->cd_table))
157 		return -EBUSY;
158 
159 	mutex_lock(&arm_smmu_asid_lock);
160 	/*
161 	 * The VM has to control the actual ATS state at the PCI device because
162 	 * we forward the invalidations directly from the VM. If the VM doesn't
163 	 * think ATS is on it will not generate ATC flushes and the ATC will
164 	 * become incoherent. Since we can't access the actual virtual PCI ATS
165 	 * config bit here base this off the EATS value in the STE. If the EATS
166 	 * is set then the VM must generate ATC flushes.
167 	 */
168 	state.disable_ats = !nested_domain->enable_ats;
169 	ret = arm_smmu_attach_prepare(&state, domain);
170 	if (ret) {
171 		mutex_unlock(&arm_smmu_asid_lock);
172 		return ret;
173 	}
174 
175 	arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
176 					state.ats_enabled);
177 	arm_smmu_install_ste_for_dev(master, &ste);
178 	arm_smmu_attach_commit(&state);
179 	mutex_unlock(&arm_smmu_asid_lock);
180 	return 0;
181 }
182 
183 static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
184 {
185 	kfree(to_smmu_nested_domain(domain));
186 }
187 
188 static const struct iommu_domain_ops arm_smmu_nested_ops = {
189 	.attach_dev = arm_smmu_attach_dev_nested,
190 	.free = arm_smmu_domain_nested_free,
191 };
192 
193 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
194 				  bool *enable_ats)
195 {
196 	unsigned int eats;
197 	unsigned int cfg;
198 
199 	if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
200 		memset(arg->ste, 0, sizeof(arg->ste));
201 		return 0;
202 	}
203 
204 	/* EIO is reserved for invalid STE data. */
205 	if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
206 	    (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
207 		return -EIO;
208 
209 	cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
210 	if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
211 	    cfg != STRTAB_STE_0_CFG_S1_TRANS)
212 		return -EIO;
213 
214 	/*
215 	 * Only Full ATS or ATS UR is supported
216 	 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
217 	 */
218 	eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
219 	arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
220 	if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
221 		return -EIO;
222 
223 	if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
224 		*enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
225 	return 0;
226 }
227 
228 struct iommu_domain *
229 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
230 			      const struct iommu_user_data *user_data)
231 {
232 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
233 	struct arm_smmu_nested_domain *nested_domain;
234 	struct iommu_hwpt_arm_smmuv3 arg;
235 	bool enable_ats = false;
236 	int ret;
237 
238 	if (flags)
239 		return ERR_PTR(-EOPNOTSUPP);
240 
241 	ret = iommu_copy_struct_from_user(&arg, user_data,
242 					  IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
243 	if (ret)
244 		return ERR_PTR(ret);
245 
246 	ret = arm_smmu_validate_vste(&arg, &enable_ats);
247 	if (ret)
248 		return ERR_PTR(ret);
249 
250 	nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
251 	if (!nested_domain)
252 		return ERR_PTR(-ENOMEM);
253 
254 	nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
255 	nested_domain->domain.ops = &arm_smmu_nested_ops;
256 	nested_domain->enable_ats = enable_ats;
257 	nested_domain->vsmmu = vsmmu;
258 	nested_domain->ste[0] = arg.ste[0];
259 	nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
260 
261 	return &nested_domain->domain;
262 }
263 
264 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
265 {
266 	struct arm_smmu_master *master;
267 	struct device *dev;
268 	int ret = 0;
269 
270 	xa_lock(&vsmmu->core.vdevs);
271 	dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
272 	if (!dev) {
273 		ret = -EIO;
274 		goto unlock;
275 	}
276 	master = dev_iommu_priv_get(dev);
277 
278 	/* At this moment, iommufd only supports PCI device that has one SID */
279 	if (sid)
280 		*sid = master->streams[0].id;
281 unlock:
282 	xa_unlock(&vsmmu->core.vdevs);
283 	return ret;
284 }
285 
286 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
287 struct arm_vsmmu_invalidation_cmd {
288 	union {
289 		u64 cmd[2];
290 		struct iommu_viommu_arm_smmuv3_invalidate ucmd;
291 	};
292 };
293 
294 /*
295  * Convert, in place, the raw invalidation command into an internal format that
296  * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
297  * stored in CPU endian.
298  *
299  * Enforce the VMID or SID on the command.
300  */
301 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
302 				      struct arm_vsmmu_invalidation_cmd *cmd)
303 {
304 	/* Commands are le64 stored in u64 */
305 	cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
306 	cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
307 
308 	switch (cmd->cmd[0] & CMDQ_0_OP) {
309 	case CMDQ_OP_TLBI_NSNH_ALL:
310 		/* Convert to NH_ALL */
311 		cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
312 			      FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
313 		cmd->cmd[1] = 0;
314 		break;
315 	case CMDQ_OP_TLBI_NH_VA:
316 	case CMDQ_OP_TLBI_NH_VAA:
317 	case CMDQ_OP_TLBI_NH_ALL:
318 	case CMDQ_OP_TLBI_NH_ASID:
319 		cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
320 		cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
321 		break;
322 	case CMDQ_OP_ATC_INV:
323 	case CMDQ_OP_CFGI_CD:
324 	case CMDQ_OP_CFGI_CD_ALL: {
325 		u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
326 
327 		if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
328 			return -EIO;
329 		cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
330 		cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
331 		break;
332 	}
333 	default:
334 		return -EIO;
335 	}
336 	return 0;
337 }
338 
339 int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
340 			       struct iommu_user_data_array *array)
341 {
342 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
343 	struct arm_smmu_device *smmu = vsmmu->smmu;
344 	struct arm_vsmmu_invalidation_cmd *last;
345 	struct arm_vsmmu_invalidation_cmd *cmds;
346 	struct arm_vsmmu_invalidation_cmd *cur;
347 	struct arm_vsmmu_invalidation_cmd *end;
348 	int ret;
349 
350 	cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
351 	if (!cmds)
352 		return -ENOMEM;
353 	cur = cmds;
354 	end = cmds + array->entry_num;
355 
356 	static_assert(sizeof(*cmds) == 2 * sizeof(u64));
357 	ret = iommu_copy_struct_from_full_user_array(
358 		cmds, sizeof(*cmds), array,
359 		IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
360 	if (ret)
361 		goto out;
362 
363 	last = cmds;
364 	while (cur != end) {
365 		ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
366 		if (ret)
367 			goto out;
368 
369 		/* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
370 		cur++;
371 		if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
372 			continue;
373 
374 		/* FIXME always uses the main cmdq rather than trying to group by type */
375 		ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
376 						  cur - last, true);
377 		if (ret) {
378 			cur--;
379 			goto out;
380 		}
381 		last = cur;
382 	}
383 out:
384 	array->entry_num = cur - cmds;
385 	kfree(cmds);
386 	return ret;
387 }
388 
389 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
390 	.alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
391 	.cache_invalidate = arm_vsmmu_cache_invalidate,
392 };
393 
394 size_t arm_smmu_get_viommu_size(struct device *dev,
395 				enum iommu_viommu_type viommu_type)
396 {
397 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
398 	struct arm_smmu_device *smmu = master->smmu;
399 
400 	if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
401 		return 0;
402 
403 	/*
404 	 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
405 	 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
406 	 * any change to remove this.
407 	 */
408 	if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
409 		return 0;
410 
411 	/*
412 	 * Must support some way to prevent the VM from bypassing the cache
413 	 * because VFIO currently does not do any cache maintenance. canwbs
414 	 * indicates the device is fully coherent and no cache maintenance is
415 	 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
416 	 * things non-coherent using the memattr, but No-Snoop behavior is not
417 	 * effected.
418 	 */
419 	if (!arm_smmu_master_canwbs(master) &&
420 	    !(smmu->features & ARM_SMMU_FEAT_S2FWB))
421 		return 0;
422 
423 	if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
424 		return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);
425 
426 	if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
427 		return 0;
428 	return smmu->impl_ops->get_viommu_size(viommu_type);
429 }
430 
431 int arm_vsmmu_init(struct iommufd_viommu *viommu,
432 		   struct iommu_domain *parent_domain,
433 		   const struct iommu_user_data *user_data)
434 {
435 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
436 	struct arm_smmu_device *smmu =
437 		container_of(viommu->iommu_dev, struct arm_smmu_device, iommu);
438 	struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain);
439 
440 	if (s2_parent->smmu != smmu)
441 		return -EINVAL;
442 
443 	vsmmu->smmu = smmu;
444 	vsmmu->s2_parent = s2_parent;
445 	/* FIXME Move VMID allocation from the S2 domain allocation to here */
446 	vsmmu->vmid = s2_parent->s2_cfg.vmid;
447 
448 	if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) {
449 		viommu->ops = &arm_vsmmu_ops;
450 		return 0;
451 	}
452 
453 	return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
454 }
455 
456 int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
457 {
458 	struct iommu_vevent_arm_smmuv3 vevt;
459 	int i;
460 
461 	lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
462 
463 	vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
464 				  FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
465 	for (i = 1; i < EVTQ_ENT_DWORDS; i++)
466 		vevt.evt[i] = cpu_to_le64(evt[i]);
467 
468 	return iommufd_viommu_report_event(&vmaster->vsmmu->core,
469 					   IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
470 					   sizeof(vevt));
471 }
472 
473 MODULE_IMPORT_NS("IOMMUFD");
474