xref: /linux/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
4  */
5 
6 #include <uapi/linux/iommufd.h>
7 
8 #include "arm-smmu-v3.h"
9 
arm_smmu_hw_info(struct device * dev,u32 * length,u32 * type)10 void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
11 {
12 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
13 	struct iommu_hw_info_arm_smmuv3 *info;
14 	u32 __iomem *base_idr;
15 	unsigned int i;
16 
17 	info = kzalloc(sizeof(*info), GFP_KERNEL);
18 	if (!info)
19 		return ERR_PTR(-ENOMEM);
20 
21 	base_idr = master->smmu->base + ARM_SMMU_IDR0;
22 	for (i = 0; i <= 5; i++)
23 		info->idr[i] = readl_relaxed(base_idr + i);
24 	info->iidr = readl_relaxed(master->smmu->base + ARM_SMMU_IIDR);
25 	info->aidr = readl_relaxed(master->smmu->base + ARM_SMMU_AIDR);
26 
27 	*length = sizeof(*info);
28 	*type = IOMMU_HW_INFO_TYPE_ARM_SMMUV3;
29 
30 	return info;
31 }
32 
arm_smmu_make_nested_cd_table_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)33 static void arm_smmu_make_nested_cd_table_ste(
34 	struct arm_smmu_ste *target, struct arm_smmu_master *master,
35 	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
36 {
37 	arm_smmu_make_s2_domain_ste(
38 		target, master, nested_domain->vsmmu->s2_parent, ats_enabled);
39 
40 	target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
41 				      FIELD_PREP(STRTAB_STE_0_CFG,
42 						 STRTAB_STE_0_CFG_NESTED));
43 	target->data[0] |= nested_domain->ste[0] &
44 			   ~cpu_to_le64(STRTAB_STE_0_CFG);
45 	target->data[1] |= nested_domain->ste[1];
46 }
47 
48 /*
49  * Create a physical STE from the virtual STE that userspace provided when it
50  * created the nested domain. Using the vSTE userspace can request:
51  * - Non-valid STE
52  * - Abort STE
53  * - Bypass STE (install the S2, no CD table)
54  * - CD table STE (install the S2 and the userspace CD table)
55  */
arm_smmu_make_nested_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_nested_domain * nested_domain,bool ats_enabled)56 static void arm_smmu_make_nested_domain_ste(
57 	struct arm_smmu_ste *target, struct arm_smmu_master *master,
58 	struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
59 {
60 	unsigned int cfg =
61 		FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
62 
63 	/*
64 	 * Userspace can request a non-valid STE through the nesting interface.
65 	 * We relay that into an abort physical STE with the intention that
66 	 * C_BAD_STE for this SID can be generated to userspace.
67 	 */
68 	if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
69 		cfg = STRTAB_STE_0_CFG_ABORT;
70 
71 	switch (cfg) {
72 	case STRTAB_STE_0_CFG_S1_TRANS:
73 		arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
74 						  ats_enabled);
75 		break;
76 	case STRTAB_STE_0_CFG_BYPASS:
77 		arm_smmu_make_s2_domain_ste(target, master,
78 					    nested_domain->vsmmu->s2_parent,
79 					    ats_enabled);
80 		break;
81 	case STRTAB_STE_0_CFG_ABORT:
82 	default:
83 		arm_smmu_make_abort_ste(target);
84 		break;
85 	}
86 }
87 
arm_smmu_attach_dev_nested(struct iommu_domain * domain,struct device * dev)88 static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
89 				      struct device *dev)
90 {
91 	struct arm_smmu_nested_domain *nested_domain =
92 		to_smmu_nested_domain(domain);
93 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
94 	struct arm_smmu_attach_state state = {
95 		.master = master,
96 		.old_domain = iommu_get_domain_for_dev(dev),
97 		.ssid = IOMMU_NO_PASID,
98 	};
99 	struct arm_smmu_ste ste;
100 	int ret;
101 
102 	if (nested_domain->vsmmu->smmu != master->smmu)
103 		return -EINVAL;
104 	if (arm_smmu_ssids_in_use(&master->cd_table))
105 		return -EBUSY;
106 
107 	mutex_lock(&arm_smmu_asid_lock);
108 	/*
109 	 * The VM has to control the actual ATS state at the PCI device because
110 	 * we forward the invalidations directly from the VM. If the VM doesn't
111 	 * think ATS is on it will not generate ATC flushes and the ATC will
112 	 * become incoherent. Since we can't access the actual virtual PCI ATS
113 	 * config bit here base this off the EATS value in the STE. If the EATS
114 	 * is set then the VM must generate ATC flushes.
115 	 */
116 	state.disable_ats = !nested_domain->enable_ats;
117 	ret = arm_smmu_attach_prepare(&state, domain);
118 	if (ret) {
119 		mutex_unlock(&arm_smmu_asid_lock);
120 		return ret;
121 	}
122 
123 	arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
124 					state.ats_enabled);
125 	arm_smmu_install_ste_for_dev(master, &ste);
126 	arm_smmu_attach_commit(&state);
127 	mutex_unlock(&arm_smmu_asid_lock);
128 	return 0;
129 }
130 
arm_smmu_domain_nested_free(struct iommu_domain * domain)131 static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
132 {
133 	kfree(to_smmu_nested_domain(domain));
134 }
135 
136 static const struct iommu_domain_ops arm_smmu_nested_ops = {
137 	.attach_dev = arm_smmu_attach_dev_nested,
138 	.free = arm_smmu_domain_nested_free,
139 };
140 
arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 * arg,bool * enable_ats)141 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
142 				  bool *enable_ats)
143 {
144 	unsigned int eats;
145 	unsigned int cfg;
146 
147 	if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
148 		memset(arg->ste, 0, sizeof(arg->ste));
149 		return 0;
150 	}
151 
152 	/* EIO is reserved for invalid STE data. */
153 	if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
154 	    (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
155 		return -EIO;
156 
157 	cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
158 	if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
159 	    cfg != STRTAB_STE_0_CFG_S1_TRANS)
160 		return -EIO;
161 
162 	/*
163 	 * Only Full ATS or ATS UR is supported
164 	 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
165 	 */
166 	eats = FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(arg->ste[1]));
167 	arg->ste[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS);
168 	if (eats != STRTAB_STE_1_EATS_ABT && eats != STRTAB_STE_1_EATS_TRANS)
169 		return -EIO;
170 
171 	if (cfg == STRTAB_STE_0_CFG_S1_TRANS)
172 		*enable_ats = (eats == STRTAB_STE_1_EATS_TRANS);
173 	return 0;
174 }
175 
176 static struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu * viommu,u32 flags,const struct iommu_user_data * user_data)177 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
178 			      const struct iommu_user_data *user_data)
179 {
180 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
181 	struct arm_smmu_nested_domain *nested_domain;
182 	struct iommu_hwpt_arm_smmuv3 arg;
183 	bool enable_ats = false;
184 	int ret;
185 
186 	if (flags)
187 		return ERR_PTR(-EOPNOTSUPP);
188 
189 	ret = iommu_copy_struct_from_user(&arg, user_data,
190 					  IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
191 	if (ret)
192 		return ERR_PTR(ret);
193 
194 	ret = arm_smmu_validate_vste(&arg, &enable_ats);
195 	if (ret)
196 		return ERR_PTR(ret);
197 
198 	nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
199 	if (!nested_domain)
200 		return ERR_PTR(-ENOMEM);
201 
202 	nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
203 	nested_domain->domain.ops = &arm_smmu_nested_ops;
204 	nested_domain->enable_ats = enable_ats;
205 	nested_domain->vsmmu = vsmmu;
206 	nested_domain->ste[0] = arg.ste[0];
207 	nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
208 
209 	return &nested_domain->domain;
210 }
211 
arm_vsmmu_vsid_to_sid(struct arm_vsmmu * vsmmu,u32 vsid,u32 * sid)212 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu *vsmmu, u32 vsid, u32 *sid)
213 {
214 	struct arm_smmu_master *master;
215 	struct device *dev;
216 	int ret = 0;
217 
218 	xa_lock(&vsmmu->core.vdevs);
219 	dev = iommufd_viommu_find_dev(&vsmmu->core, (unsigned long)vsid);
220 	if (!dev) {
221 		ret = -EIO;
222 		goto unlock;
223 	}
224 	master = dev_iommu_priv_get(dev);
225 
226 	/* At this moment, iommufd only supports PCI device that has one SID */
227 	if (sid)
228 		*sid = master->streams[0].id;
229 unlock:
230 	xa_unlock(&vsmmu->core.vdevs);
231 	return ret;
232 }
233 
234 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
235 struct arm_vsmmu_invalidation_cmd {
236 	union {
237 		u64 cmd[2];
238 		struct iommu_viommu_arm_smmuv3_invalidate ucmd;
239 	};
240 };
241 
242 /*
243  * Convert, in place, the raw invalidation command into an internal format that
244  * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
245  * stored in CPU endian.
246  *
247  * Enforce the VMID or SID on the command.
248  */
arm_vsmmu_convert_user_cmd(struct arm_vsmmu * vsmmu,struct arm_vsmmu_invalidation_cmd * cmd)249 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
250 				      struct arm_vsmmu_invalidation_cmd *cmd)
251 {
252 	/* Commands are le64 stored in u64 */
253 	cmd->cmd[0] = le64_to_cpu(cmd->ucmd.cmd[0]);
254 	cmd->cmd[1] = le64_to_cpu(cmd->ucmd.cmd[1]);
255 
256 	switch (cmd->cmd[0] & CMDQ_0_OP) {
257 	case CMDQ_OP_TLBI_NSNH_ALL:
258 		/* Convert to NH_ALL */
259 		cmd->cmd[0] = CMDQ_OP_TLBI_NH_ALL |
260 			      FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
261 		cmd->cmd[1] = 0;
262 		break;
263 	case CMDQ_OP_TLBI_NH_VA:
264 	case CMDQ_OP_TLBI_NH_VAA:
265 	case CMDQ_OP_TLBI_NH_ALL:
266 	case CMDQ_OP_TLBI_NH_ASID:
267 		cmd->cmd[0] &= ~CMDQ_TLBI_0_VMID;
268 		cmd->cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, vsmmu->vmid);
269 		break;
270 	case CMDQ_OP_ATC_INV:
271 	case CMDQ_OP_CFGI_CD:
272 	case CMDQ_OP_CFGI_CD_ALL: {
273 		u32 sid, vsid = FIELD_GET(CMDQ_CFGI_0_SID, cmd->cmd[0]);
274 
275 		if (arm_vsmmu_vsid_to_sid(vsmmu, vsid, &sid))
276 			return -EIO;
277 		cmd->cmd[0] &= ~CMDQ_CFGI_0_SID;
278 		cmd->cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, sid);
279 		break;
280 	}
281 	default:
282 		return -EIO;
283 	}
284 	return 0;
285 }
286 
arm_vsmmu_cache_invalidate(struct iommufd_viommu * viommu,struct iommu_user_data_array * array)287 static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
288 				      struct iommu_user_data_array *array)
289 {
290 	struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
291 	struct arm_smmu_device *smmu = vsmmu->smmu;
292 	struct arm_vsmmu_invalidation_cmd *last;
293 	struct arm_vsmmu_invalidation_cmd *cmds;
294 	struct arm_vsmmu_invalidation_cmd *cur;
295 	struct arm_vsmmu_invalidation_cmd *end;
296 	int ret;
297 
298 	cmds = kcalloc(array->entry_num, sizeof(*cmds), GFP_KERNEL);
299 	if (!cmds)
300 		return -ENOMEM;
301 	cur = cmds;
302 	end = cmds + array->entry_num;
303 
304 	static_assert(sizeof(*cmds) == 2 * sizeof(u64));
305 	ret = iommu_copy_struct_from_full_user_array(
306 		cmds, sizeof(*cmds), array,
307 		IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3);
308 	if (ret)
309 		goto out;
310 
311 	last = cmds;
312 	while (cur != end) {
313 		ret = arm_vsmmu_convert_user_cmd(vsmmu, cur);
314 		if (ret)
315 			goto out;
316 
317 		/* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
318 		cur++;
319 		if (cur != end && (cur - last) != CMDQ_BATCH_ENTRIES - 1)
320 			continue;
321 
322 		/* FIXME always uses the main cmdq rather than trying to group by type */
323 		ret = arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, last->cmd,
324 						  cur - last, true);
325 		if (ret) {
326 			cur--;
327 			goto out;
328 		}
329 		last = cur;
330 	}
331 out:
332 	array->entry_num = cur - cmds;
333 	kfree(cmds);
334 	return ret;
335 }
336 
337 static const struct iommufd_viommu_ops arm_vsmmu_ops = {
338 	.alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
339 	.cache_invalidate = arm_vsmmu_cache_invalidate,
340 };
341 
arm_vsmmu_alloc(struct device * dev,struct iommu_domain * parent,struct iommufd_ctx * ictx,unsigned int viommu_type)342 struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
343 				       struct iommu_domain *parent,
344 				       struct iommufd_ctx *ictx,
345 				       unsigned int viommu_type)
346 {
347 	struct arm_smmu_device *smmu =
348 		iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu);
349 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
350 	struct arm_smmu_domain *s2_parent = to_smmu_domain(parent);
351 	struct arm_vsmmu *vsmmu;
352 
353 	if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
354 		return ERR_PTR(-EOPNOTSUPP);
355 
356 	if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
357 		return ERR_PTR(-EOPNOTSUPP);
358 
359 	if (s2_parent->smmu != master->smmu)
360 		return ERR_PTR(-EINVAL);
361 
362 	/*
363 	 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
364 	 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
365 	 * any change to remove this.
366 	 */
367 	if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
368 		return ERR_PTR(-EOPNOTSUPP);
369 
370 	/*
371 	 * Must support some way to prevent the VM from bypassing the cache
372 	 * because VFIO currently does not do any cache maintenance. canwbs
373 	 * indicates the device is fully coherent and no cache maintenance is
374 	 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
375 	 * things non-coherent using the memattr, but No-Snoop behavior is not
376 	 * effected.
377 	 */
378 	if (!arm_smmu_master_canwbs(master) &&
379 	    !(smmu->features & ARM_SMMU_FEAT_S2FWB))
380 		return ERR_PTR(-EOPNOTSUPP);
381 
382 	vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
383 				     &arm_vsmmu_ops);
384 	if (IS_ERR(vsmmu))
385 		return ERR_CAST(vsmmu);
386 
387 	vsmmu->smmu = smmu;
388 	vsmmu->s2_parent = s2_parent;
389 	/* FIXME Move VMID allocation from the S2 domain allocation to here */
390 	vsmmu->vmid = s2_parent->s2_cfg.vmid;
391 
392 	return &vsmmu->core;
393 }
394 
395 MODULE_IMPORT_NS("IOMMUFD");
396