1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 #include <kunit/visibility.h>
30 #include <uapi/linux/iommufd.h>
31
32 #include "arm-smmu-v3.h"
33 #include "../../dma-iommu.h"
34
35 static bool disable_msipolling;
36 module_param(disable_msipolling, bool, 0444);
37 MODULE_PARM_DESC(disable_msipolling,
38 "Disable MSI-based polling for CMD_SYNC completion.");
39
40 static struct iommu_ops arm_smmu_ops;
41 static struct iommu_dirty_ops arm_smmu_dirty_ops;
42
43 enum arm_smmu_msi_index {
44 EVTQ_MSI_INDEX,
45 GERROR_MSI_INDEX,
46 PRIQ_MSI_INDEX,
47 ARM_SMMU_MAX_MSIS,
48 };
49
50 #define NUM_ENTRY_QWORDS 8
51 static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
52 static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
53
54 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
55 [EVTQ_MSI_INDEX] = {
56 ARM_SMMU_EVTQ_IRQ_CFG0,
57 ARM_SMMU_EVTQ_IRQ_CFG1,
58 ARM_SMMU_EVTQ_IRQ_CFG2,
59 },
60 [GERROR_MSI_INDEX] = {
61 ARM_SMMU_GERROR_IRQ_CFG0,
62 ARM_SMMU_GERROR_IRQ_CFG1,
63 ARM_SMMU_GERROR_IRQ_CFG2,
64 },
65 [PRIQ_MSI_INDEX] = {
66 ARM_SMMU_PRIQ_IRQ_CFG0,
67 ARM_SMMU_PRIQ_IRQ_CFG1,
68 ARM_SMMU_PRIQ_IRQ_CFG2,
69 },
70 };
71
72 struct arm_smmu_option_prop {
73 u32 opt;
74 const char *prop;
75 };
76
77 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
78 DEFINE_MUTEX(arm_smmu_asid_lock);
79
80 static struct arm_smmu_option_prop arm_smmu_options[] = {
81 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
82 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
83 { 0, NULL},
84 };
85
86 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
87 struct arm_smmu_device *smmu, u32 flags);
88 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
89
parse_driver_options(struct arm_smmu_device * smmu)90 static void parse_driver_options(struct arm_smmu_device *smmu)
91 {
92 int i = 0;
93
94 do {
95 if (of_property_read_bool(smmu->dev->of_node,
96 arm_smmu_options[i].prop)) {
97 smmu->options |= arm_smmu_options[i].opt;
98 dev_notice(smmu->dev, "option %s\n",
99 arm_smmu_options[i].prop);
100 }
101 } while (arm_smmu_options[++i].opt);
102 }
103
104 /* Low-level queue manipulation functions */
queue_has_space(struct arm_smmu_ll_queue * q,u32 n)105 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
106 {
107 u32 space, prod, cons;
108
109 prod = Q_IDX(q, q->prod);
110 cons = Q_IDX(q, q->cons);
111
112 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
113 space = (1 << q->max_n_shift) - (prod - cons);
114 else
115 space = cons - prod;
116
117 return space >= n;
118 }
119
queue_full(struct arm_smmu_ll_queue * q)120 static bool queue_full(struct arm_smmu_ll_queue *q)
121 {
122 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
123 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
124 }
125
queue_empty(struct arm_smmu_ll_queue * q)126 static bool queue_empty(struct arm_smmu_ll_queue *q)
127 {
128 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
129 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
130 }
131
queue_consumed(struct arm_smmu_ll_queue * q,u32 prod)132 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
133 {
134 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
135 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
136 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
137 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
138 }
139
queue_sync_cons_out(struct arm_smmu_queue * q)140 static void queue_sync_cons_out(struct arm_smmu_queue *q)
141 {
142 /*
143 * Ensure that all CPU accesses (reads and writes) to the queue
144 * are complete before we update the cons pointer.
145 */
146 __iomb();
147 writel_relaxed(q->llq.cons, q->cons_reg);
148 }
149
queue_inc_cons(struct arm_smmu_ll_queue * q)150 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
151 {
152 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
153 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
154 }
155
queue_sync_cons_ovf(struct arm_smmu_queue * q)156 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
157 {
158 struct arm_smmu_ll_queue *llq = &q->llq;
159
160 if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
161 return;
162
163 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
164 Q_IDX(llq, llq->cons);
165 queue_sync_cons_out(q);
166 }
167
queue_sync_prod_in(struct arm_smmu_queue * q)168 static int queue_sync_prod_in(struct arm_smmu_queue *q)
169 {
170 u32 prod;
171 int ret = 0;
172
173 /*
174 * We can't use the _relaxed() variant here, as we must prevent
175 * speculative reads of the queue before we have determined that
176 * prod has indeed moved.
177 */
178 prod = readl(q->prod_reg);
179
180 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
181 ret = -EOVERFLOW;
182
183 q->llq.prod = prod;
184 return ret;
185 }
186
queue_inc_prod_n(struct arm_smmu_ll_queue * q,int n)187 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
188 {
189 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
190 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
191 }
192
queue_poll_init(struct arm_smmu_device * smmu,struct arm_smmu_queue_poll * qp)193 static void queue_poll_init(struct arm_smmu_device *smmu,
194 struct arm_smmu_queue_poll *qp)
195 {
196 qp->delay = 1;
197 qp->spin_cnt = 0;
198 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
199 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
200 }
201
queue_poll(struct arm_smmu_queue_poll * qp)202 static int queue_poll(struct arm_smmu_queue_poll *qp)
203 {
204 if (ktime_compare(ktime_get(), qp->timeout) > 0)
205 return -ETIMEDOUT;
206
207 if (qp->wfe) {
208 wfe();
209 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
210 cpu_relax();
211 } else {
212 udelay(qp->delay);
213 qp->delay *= 2;
214 qp->spin_cnt = 0;
215 }
216
217 return 0;
218 }
219
queue_write(__le64 * dst,u64 * src,size_t n_dwords)220 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
221 {
222 int i;
223
224 for (i = 0; i < n_dwords; ++i)
225 *dst++ = cpu_to_le64(*src++);
226 }
227
queue_read(u64 * dst,__le64 * src,size_t n_dwords)228 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
229 {
230 int i;
231
232 for (i = 0; i < n_dwords; ++i)
233 *dst++ = le64_to_cpu(*src++);
234 }
235
queue_remove_raw(struct arm_smmu_queue * q,u64 * ent)236 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
237 {
238 if (queue_empty(&q->llq))
239 return -EAGAIN;
240
241 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
242 queue_inc_cons(&q->llq);
243 queue_sync_cons_out(q);
244 return 0;
245 }
246
247 /* High-level queue accessors */
arm_smmu_cmdq_build_cmd(u64 * cmd,struct arm_smmu_cmdq_ent * ent)248 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
249 {
250 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
251 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
252
253 switch (ent->opcode) {
254 case CMDQ_OP_TLBI_EL2_ALL:
255 case CMDQ_OP_TLBI_NSNH_ALL:
256 break;
257 case CMDQ_OP_PREFETCH_CFG:
258 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
259 break;
260 case CMDQ_OP_CFGI_CD:
261 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
262 fallthrough;
263 case CMDQ_OP_CFGI_STE:
264 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
265 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
266 break;
267 case CMDQ_OP_CFGI_CD_ALL:
268 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269 break;
270 case CMDQ_OP_CFGI_ALL:
271 /* Cover the entire SID range */
272 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
273 break;
274 case CMDQ_OP_TLBI_NH_VA:
275 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
276 fallthrough;
277 case CMDQ_OP_TLBI_EL2_VA:
278 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
279 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
280 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
281 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
282 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
283 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
284 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
285 break;
286 case CMDQ_OP_TLBI_S2_IPA:
287 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
288 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
289 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
290 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
291 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
292 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
293 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
294 break;
295 case CMDQ_OP_TLBI_NH_ASID:
296 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
297 fallthrough;
298 case CMDQ_OP_TLBI_S12_VMALL:
299 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
300 break;
301 case CMDQ_OP_TLBI_EL2_ASID:
302 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
303 break;
304 case CMDQ_OP_ATC_INV:
305 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
306 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
307 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
308 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
309 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
310 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
311 break;
312 case CMDQ_OP_PRI_RESP:
313 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
314 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
315 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
316 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
317 switch (ent->pri.resp) {
318 case PRI_RESP_DENY:
319 case PRI_RESP_FAIL:
320 case PRI_RESP_SUCC:
321 break;
322 default:
323 return -EINVAL;
324 }
325 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
326 break;
327 case CMDQ_OP_RESUME:
328 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
329 cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
330 cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
331 break;
332 case CMDQ_OP_CMD_SYNC:
333 if (ent->sync.msiaddr) {
334 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
335 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
336 } else {
337 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
338 }
339 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
340 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
341 break;
342 default:
343 return -ENOENT;
344 }
345
346 return 0;
347 }
348
arm_smmu_get_cmdq(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)349 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
350 struct arm_smmu_cmdq_ent *ent)
351 {
352 struct arm_smmu_cmdq *cmdq = NULL;
353
354 if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
355 cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
356
357 return cmdq ?: &smmu->cmdq;
358 }
359
arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)360 static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
361 struct arm_smmu_cmdq *cmdq)
362 {
363 if (cmdq == &smmu->cmdq)
364 return false;
365
366 return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
367 }
368
arm_smmu_cmdq_build_sync_cmd(u64 * cmd,struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,u32 prod)369 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
370 struct arm_smmu_cmdq *cmdq, u32 prod)
371 {
372 struct arm_smmu_queue *q = &cmdq->q;
373 struct arm_smmu_cmdq_ent ent = {
374 .opcode = CMDQ_OP_CMD_SYNC,
375 };
376
377 /*
378 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
379 * payload, so the write will zero the entire command on that platform.
380 */
381 if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
382 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
383 q->ent_dwords * 8;
384 }
385
386 arm_smmu_cmdq_build_cmd(cmd, &ent);
387 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
388 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
389 }
390
__arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)391 void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
392 struct arm_smmu_cmdq *cmdq)
393 {
394 static const char * const cerror_str[] = {
395 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
396 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
397 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
398 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
399 };
400 struct arm_smmu_queue *q = &cmdq->q;
401
402 int i;
403 u64 cmd[CMDQ_ENT_DWORDS];
404 u32 cons = readl_relaxed(q->cons_reg);
405 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
406 struct arm_smmu_cmdq_ent cmd_sync = {
407 .opcode = CMDQ_OP_CMD_SYNC,
408 };
409
410 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
411 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
412
413 switch (idx) {
414 case CMDQ_ERR_CERROR_ABT_IDX:
415 dev_err(smmu->dev, "retrying command fetch\n");
416 return;
417 case CMDQ_ERR_CERROR_NONE_IDX:
418 return;
419 case CMDQ_ERR_CERROR_ATC_INV_IDX:
420 /*
421 * ATC Invalidation Completion timeout. CONS is still pointing
422 * at the CMD_SYNC. Attempt to complete other pending commands
423 * by repeating the CMD_SYNC, though we might well end up back
424 * here since the ATC invalidation may still be pending.
425 */
426 return;
427 case CMDQ_ERR_CERROR_ILL_IDX:
428 default:
429 break;
430 }
431
432 /*
433 * We may have concurrent producers, so we need to be careful
434 * not to touch any of the shadow cmdq state.
435 */
436 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
437 dev_err(smmu->dev, "skipping command in error state:\n");
438 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
439 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
440
441 /* Convert the erroneous command into a CMD_SYNC */
442 arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
443 if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
444 u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
445
446 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
447 }
448
arm_smmu_cmdq_skip_err(struct arm_smmu_device * smmu)449 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
450 {
451 __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
452 }
453
454 /*
455 * Command queue locking.
456 * This is a form of bastardised rwlock with the following major changes:
457 *
458 * - The only LOCK routines are exclusive_trylock() and shared_lock().
459 * Neither have barrier semantics, and instead provide only a control
460 * dependency.
461 *
462 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
463 * fails if the caller appears to be the last lock holder (yes, this is
464 * racy). All successful UNLOCK routines have RELEASE semantics.
465 */
arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq * cmdq)466 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
467 {
468 int val;
469
470 /*
471 * We can try to avoid the cmpxchg() loop by simply incrementing the
472 * lock counter. When held in exclusive state, the lock counter is set
473 * to INT_MIN so these increments won't hurt as the value will remain
474 * negative.
475 */
476 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
477 return;
478
479 do {
480 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
481 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
482 }
483
arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq * cmdq)484 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
485 {
486 (void)atomic_dec_return_release(&cmdq->lock);
487 }
488
arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq * cmdq)489 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
490 {
491 if (atomic_read(&cmdq->lock) == 1)
492 return false;
493
494 arm_smmu_cmdq_shared_unlock(cmdq);
495 return true;
496 }
497
498 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
499 ({ \
500 bool __ret; \
501 local_irq_save(flags); \
502 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
503 if (!__ret) \
504 local_irq_restore(flags); \
505 __ret; \
506 })
507
508 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
509 ({ \
510 atomic_set_release(&cmdq->lock, 0); \
511 local_irq_restore(flags); \
512 })
513
514
515 /*
516 * Command queue insertion.
517 * This is made fiddly by our attempts to achieve some sort of scalability
518 * since there is one queue shared amongst all of the CPUs in the system. If
519 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
520 * then you'll *love* this monstrosity.
521 *
522 * The basic idea is to split the queue up into ranges of commands that are
523 * owned by a given CPU; the owner may not have written all of the commands
524 * itself, but is responsible for advancing the hardware prod pointer when
525 * the time comes. The algorithm is roughly:
526 *
527 * 1. Allocate some space in the queue. At this point we also discover
528 * whether the head of the queue is currently owned by another CPU,
529 * or whether we are the owner.
530 *
531 * 2. Write our commands into our allocated slots in the queue.
532 *
533 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
534 *
535 * 4. If we are an owner:
536 * a. Wait for the previous owner to finish.
537 * b. Mark the queue head as unowned, which tells us the range
538 * that we are responsible for publishing.
539 * c. Wait for all commands in our owned range to become valid.
540 * d. Advance the hardware prod pointer.
541 * e. Tell the next owner we've finished.
542 *
543 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
544 * owner), then we need to stick around until it has completed:
545 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
546 * to clear the first 4 bytes.
547 * b. Otherwise, we spin waiting for the hardware cons pointer to
548 * advance past our command.
549 *
550 * The devil is in the details, particularly the use of locking for handling
551 * SYNC completion and freeing up space in the queue before we think that it is
552 * full.
553 */
__arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod,bool set)554 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
555 u32 sprod, u32 eprod, bool set)
556 {
557 u32 swidx, sbidx, ewidx, ebidx;
558 struct arm_smmu_ll_queue llq = {
559 .max_n_shift = cmdq->q.llq.max_n_shift,
560 .prod = sprod,
561 };
562
563 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
564 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
565
566 while (llq.prod != eprod) {
567 unsigned long mask;
568 atomic_long_t *ptr;
569 u32 limit = BITS_PER_LONG;
570
571 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
572 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
573
574 ptr = &cmdq->valid_map[swidx];
575
576 if ((swidx == ewidx) && (sbidx < ebidx))
577 limit = ebidx;
578
579 mask = GENMASK(limit - 1, sbidx);
580
581 /*
582 * The valid bit is the inverse of the wrap bit. This means
583 * that a zero-initialised queue is invalid and, after marking
584 * all entries as valid, they become invalid again when we
585 * wrap.
586 */
587 if (set) {
588 atomic_long_xor(mask, ptr);
589 } else { /* Poll */
590 unsigned long valid;
591
592 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
593 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
594 }
595
596 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
597 }
598 }
599
600 /* Mark all entries in the range [sprod, eprod) as valid */
arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)601 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
602 u32 sprod, u32 eprod)
603 {
604 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
605 }
606
607 /* Wait for all entries in the range [sprod, eprod) to become valid */
arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq * cmdq,u32 sprod,u32 eprod)608 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
609 u32 sprod, u32 eprod)
610 {
611 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
612 }
613
614 /* Wait for the command queue to become non-full */
arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)615 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
616 struct arm_smmu_cmdq *cmdq,
617 struct arm_smmu_ll_queue *llq)
618 {
619 unsigned long flags;
620 struct arm_smmu_queue_poll qp;
621 int ret = 0;
622
623 /*
624 * Try to update our copy of cons by grabbing exclusive cmdq access. If
625 * that fails, spin until somebody else updates it for us.
626 */
627 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
628 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
629 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
630 llq->val = READ_ONCE(cmdq->q.llq.val);
631 return 0;
632 }
633
634 queue_poll_init(smmu, &qp);
635 do {
636 llq->val = READ_ONCE(cmdq->q.llq.val);
637 if (!queue_full(llq))
638 break;
639
640 ret = queue_poll(&qp);
641 } while (!ret);
642
643 return ret;
644 }
645
646 /*
647 * Wait until the SMMU signals a CMD_SYNC completion MSI.
648 * Must be called with the cmdq lock held in some capacity.
649 */
__arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)650 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
651 struct arm_smmu_cmdq *cmdq,
652 struct arm_smmu_ll_queue *llq)
653 {
654 int ret = 0;
655 struct arm_smmu_queue_poll qp;
656 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
657
658 queue_poll_init(smmu, &qp);
659
660 /*
661 * The MSI won't generate an event, since it's being written back
662 * into the command queue.
663 */
664 qp.wfe = false;
665 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
666 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
667 return ret;
668 }
669
670 /*
671 * Wait until the SMMU cons index passes llq->prod.
672 * Must be called with the cmdq lock held in some capacity.
673 */
__arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)674 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
675 struct arm_smmu_cmdq *cmdq,
676 struct arm_smmu_ll_queue *llq)
677 {
678 struct arm_smmu_queue_poll qp;
679 u32 prod = llq->prod;
680 int ret = 0;
681
682 queue_poll_init(smmu, &qp);
683 llq->val = READ_ONCE(cmdq->q.llq.val);
684 do {
685 if (queue_consumed(llq, prod))
686 break;
687
688 ret = queue_poll(&qp);
689
690 /*
691 * This needs to be a readl() so that our subsequent call
692 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
693 *
694 * Specifically, we need to ensure that we observe all
695 * shared_lock()s by other CMD_SYNCs that share our owner,
696 * so that a failing call to tryunlock() means that we're
697 * the last one out and therefore we can safely advance
698 * cmdq->q.llq.cons. Roughly speaking:
699 *
700 * CPU 0 CPU1 CPU2 (us)
701 *
702 * if (sync)
703 * shared_lock();
704 *
705 * dma_wmb();
706 * set_valid_map();
707 *
708 * if (owner) {
709 * poll_valid_map();
710 * <control dependency>
711 * writel(prod_reg);
712 *
713 * readl(cons_reg);
714 * tryunlock();
715 *
716 * Requires us to see CPU 0's shared_lock() acquisition.
717 */
718 llq->cons = readl(cmdq->q.cons_reg);
719 } while (!ret);
720
721 return ret;
722 }
723
arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,struct arm_smmu_ll_queue * llq)724 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
725 struct arm_smmu_cmdq *cmdq,
726 struct arm_smmu_ll_queue *llq)
727 {
728 if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
729 !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
730 return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
731
732 return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
733 }
734
arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq * cmdq,u64 * cmds,u32 prod,int n)735 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
736 u32 prod, int n)
737 {
738 int i;
739 struct arm_smmu_ll_queue llq = {
740 .max_n_shift = cmdq->q.llq.max_n_shift,
741 .prod = prod,
742 };
743
744 for (i = 0; i < n; ++i) {
745 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
746
747 prod = queue_inc_prod_n(&llq, i);
748 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
749 }
750 }
751
752 /*
753 * This is the actual insertion function, and provides the following
754 * ordering guarantees to callers:
755 *
756 * - There is a dma_wmb() before publishing any commands to the queue.
757 * This can be relied upon to order prior writes to data structures
758 * in memory (such as a CD or an STE) before the command.
759 *
760 * - On completion of a CMD_SYNC, there is a control dependency.
761 * This can be relied upon to order subsequent writes to memory (e.g.
762 * freeing an IOVA) after completion of the CMD_SYNC.
763 *
764 * - Command insertion is totally ordered, so if two CPUs each race to
765 * insert their own list of commands then all of the commands from one
766 * CPU will appear before any of the commands from the other CPU.
767 */
arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq,u64 * cmds,int n,bool sync)768 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
769 struct arm_smmu_cmdq *cmdq,
770 u64 *cmds, int n, bool sync)
771 {
772 u64 cmd_sync[CMDQ_ENT_DWORDS];
773 u32 prod;
774 unsigned long flags;
775 bool owner;
776 struct arm_smmu_ll_queue llq, head;
777 int ret = 0;
778
779 llq.max_n_shift = cmdq->q.llq.max_n_shift;
780
781 /* 1. Allocate some space in the queue */
782 local_irq_save(flags);
783 llq.val = READ_ONCE(cmdq->q.llq.val);
784 do {
785 u64 old;
786
787 while (!queue_has_space(&llq, n + sync)) {
788 local_irq_restore(flags);
789 if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
790 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
791 local_irq_save(flags);
792 }
793
794 head.cons = llq.cons;
795 head.prod = queue_inc_prod_n(&llq, n + sync) |
796 CMDQ_PROD_OWNED_FLAG;
797
798 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
799 if (old == llq.val)
800 break;
801
802 llq.val = old;
803 } while (1);
804 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
805 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
806 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
807
808 /*
809 * 2. Write our commands into the queue
810 * Dependency ordering from the cmpxchg() loop above.
811 */
812 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
813 if (sync) {
814 prod = queue_inc_prod_n(&llq, n);
815 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
816 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
817
818 /*
819 * In order to determine completion of our CMD_SYNC, we must
820 * ensure that the queue can't wrap twice without us noticing.
821 * We achieve that by taking the cmdq lock as shared before
822 * marking our slot as valid.
823 */
824 arm_smmu_cmdq_shared_lock(cmdq);
825 }
826
827 /* 3. Mark our slots as valid, ensuring commands are visible first */
828 dma_wmb();
829 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
830
831 /* 4. If we are the owner, take control of the SMMU hardware */
832 if (owner) {
833 /* a. Wait for previous owner to finish */
834 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
835
836 /* b. Stop gathering work by clearing the owned flag */
837 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
838 &cmdq->q.llq.atomic.prod);
839 prod &= ~CMDQ_PROD_OWNED_FLAG;
840
841 /*
842 * c. Wait for any gathered work to be written to the queue.
843 * Note that we read our own entries so that we have the control
844 * dependency required by (d).
845 */
846 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
847
848 /*
849 * d. Advance the hardware prod pointer
850 * Control dependency ordering from the entries becoming valid.
851 */
852 writel_relaxed(prod, cmdq->q.prod_reg);
853
854 /*
855 * e. Tell the next owner we're done
856 * Make sure we've updated the hardware first, so that we don't
857 * race to update prod and potentially move it backwards.
858 */
859 atomic_set_release(&cmdq->owner_prod, prod);
860 }
861
862 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
863 if (sync) {
864 llq.prod = queue_inc_prod_n(&llq, n);
865 ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
866 if (ret) {
867 dev_err_ratelimited(smmu->dev,
868 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
869 llq.prod,
870 readl_relaxed(cmdq->q.prod_reg),
871 readl_relaxed(cmdq->q.cons_reg));
872 }
873
874 /*
875 * Try to unlock the cmdq lock. This will fail if we're the last
876 * reader, in which case we can safely update cmdq->q.llq.cons
877 */
878 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
879 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
880 arm_smmu_cmdq_shared_unlock(cmdq);
881 }
882 }
883
884 local_irq_restore(flags);
885 return ret;
886 }
887
__arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent,bool sync)888 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
889 struct arm_smmu_cmdq_ent *ent,
890 bool sync)
891 {
892 u64 cmd[CMDQ_ENT_DWORDS];
893
894 if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
895 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
896 ent->opcode);
897 return -EINVAL;
898 }
899
900 return arm_smmu_cmdq_issue_cmdlist(
901 smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
902 }
903
arm_smmu_cmdq_issue_cmd(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)904 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
905 struct arm_smmu_cmdq_ent *ent)
906 {
907 return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
908 }
909
arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_ent * ent)910 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
911 struct arm_smmu_cmdq_ent *ent)
912 {
913 return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
914 }
915
arm_smmu_cmdq_batch_init(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * ent)916 static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
917 struct arm_smmu_cmdq_batch *cmds,
918 struct arm_smmu_cmdq_ent *ent)
919 {
920 cmds->num = 0;
921 cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
922 }
923
arm_smmu_cmdq_batch_add(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds,struct arm_smmu_cmdq_ent * cmd)924 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
925 struct arm_smmu_cmdq_batch *cmds,
926 struct arm_smmu_cmdq_ent *cmd)
927 {
928 bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
929 bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
930 (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
931 int index;
932
933 if (force_sync || unsupported_cmd) {
934 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
935 cmds->num, true);
936 arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
937 }
938
939 if (cmds->num == CMDQ_BATCH_ENTRIES) {
940 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
941 cmds->num, false);
942 arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
943 }
944
945 index = cmds->num * CMDQ_ENT_DWORDS;
946 if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
947 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
948 cmd->opcode);
949 return;
950 }
951
952 cmds->num++;
953 }
954
arm_smmu_cmdq_batch_submit(struct arm_smmu_device * smmu,struct arm_smmu_cmdq_batch * cmds)955 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
956 struct arm_smmu_cmdq_batch *cmds)
957 {
958 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
959 cmds->num, true);
960 }
961
arm_smmu_page_response(struct device * dev,struct iopf_fault * unused,struct iommu_page_response * resp)962 static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
963 struct iommu_page_response *resp)
964 {
965 struct arm_smmu_cmdq_ent cmd = {0};
966 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
967 int sid = master->streams[0].id;
968
969 if (WARN_ON(!master->stall_enabled))
970 return;
971
972 cmd.opcode = CMDQ_OP_RESUME;
973 cmd.resume.sid = sid;
974 cmd.resume.stag = resp->grpid;
975 switch (resp->code) {
976 case IOMMU_PAGE_RESP_INVALID:
977 case IOMMU_PAGE_RESP_FAILURE:
978 cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
979 break;
980 case IOMMU_PAGE_RESP_SUCCESS:
981 cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
982 break;
983 default:
984 break;
985 }
986
987 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
988 /*
989 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
990 * RESUME consumption guarantees that the stalled transaction will be
991 * terminated... at some point in the future. PRI_RESP is fire and
992 * forget.
993 */
994 }
995
996 /* Context descriptor manipulation functions */
arm_smmu_tlb_inv_asid(struct arm_smmu_device * smmu,u16 asid)997 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
998 {
999 struct arm_smmu_cmdq_ent cmd = {
1000 .opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
1001 CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
1002 .tlbi.asid = asid,
1003 };
1004
1005 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1006 }
1007
1008 /*
1009 * Based on the value of ent report which bits of the STE the HW will access. It
1010 * would be nice if this was complete according to the spec, but minimally it
1011 * has to capture the bits this driver uses.
1012 */
1013 VISIBLE_IF_KUNIT
arm_smmu_get_ste_used(const __le64 * ent,__le64 * used_bits)1014 void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
1015 {
1016 unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
1017
1018 used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
1019 if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
1020 return;
1021
1022 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
1023
1024 /* S1 translates */
1025 if (cfg & BIT(0)) {
1026 used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
1027 STRTAB_STE_0_S1CTXPTR_MASK |
1028 STRTAB_STE_0_S1CDMAX);
1029 used_bits[1] |=
1030 cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
1031 STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
1032 STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1033 STRTAB_STE_1_EATS);
1034 used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1035
1036 /*
1037 * See 13.5 Summary of attribute/permission configuration fields
1038 * for the SHCFG behavior.
1039 */
1040 if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
1041 STRTAB_STE_1_S1DSS_BYPASS)
1042 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1043 }
1044
1045 /* S2 translates */
1046 if (cfg & BIT(1)) {
1047 used_bits[1] |=
1048 cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1049 used_bits[2] |=
1050 cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1051 STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1052 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
1053 STRTAB_STE_2_S2R);
1054 used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1055 }
1056
1057 if (cfg == STRTAB_STE_0_CFG_BYPASS)
1058 used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1059 }
1060 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
1061
1062 /*
1063 * Figure out if we can do a hitless update of entry to become target. Returns a
1064 * bit mask where 1 indicates that qword needs to be set disruptively.
1065 * unused_update is an intermediate value of entry that has unused bits set to
1066 * their new values.
1067 */
arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer * writer,const __le64 * entry,const __le64 * target,__le64 * unused_update)1068 static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
1069 const __le64 *entry, const __le64 *target,
1070 __le64 *unused_update)
1071 {
1072 __le64 target_used[NUM_ENTRY_QWORDS] = {};
1073 __le64 cur_used[NUM_ENTRY_QWORDS] = {};
1074 u8 used_qword_diff = 0;
1075 unsigned int i;
1076
1077 writer->ops->get_used(entry, cur_used);
1078 writer->ops->get_used(target, target_used);
1079
1080 for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
1081 /*
1082 * Check that masks are up to date, the make functions are not
1083 * allowed to set a bit to 1 if the used function doesn't say it
1084 * is used.
1085 */
1086 WARN_ON_ONCE(target[i] & ~target_used[i]);
1087
1088 /* Bits can change because they are not currently being used */
1089 unused_update[i] = (entry[i] & cur_used[i]) |
1090 (target[i] & ~cur_used[i]);
1091 /*
1092 * Each bit indicates that a used bit in a qword needs to be
1093 * changed after unused_update is applied.
1094 */
1095 if ((unused_update[i] & target_used[i]) != target[i])
1096 used_qword_diff |= 1 << i;
1097 }
1098 return used_qword_diff;
1099 }
1100
entry_set(struct arm_smmu_entry_writer * writer,__le64 * entry,const __le64 * target,unsigned int start,unsigned int len)1101 static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
1102 const __le64 *target, unsigned int start,
1103 unsigned int len)
1104 {
1105 bool changed = false;
1106 unsigned int i;
1107
1108 for (i = start; len != 0; len--, i++) {
1109 if (entry[i] != target[i]) {
1110 WRITE_ONCE(entry[i], target[i]);
1111 changed = true;
1112 }
1113 }
1114
1115 if (changed)
1116 writer->ops->sync(writer);
1117 return changed;
1118 }
1119
1120 /*
1121 * Update the STE/CD to the target configuration. The transition from the
1122 * current entry to the target entry takes place over multiple steps that
1123 * attempts to make the transition hitless if possible. This function takes care
1124 * not to create a situation where the HW can perceive a corrupted entry. HW is
1125 * only required to have a 64 bit atomicity with stores from the CPU, while
1126 * entries are many 64 bit values big.
1127 *
1128 * The difference between the current value and the target value is analyzed to
1129 * determine which of three updates are required - disruptive, hitless or no
1130 * change.
1131 *
1132 * In the most general disruptive case we can make any update in three steps:
1133 * - Disrupting the entry (V=0)
1134 * - Fill now unused qwords, execpt qword 0 which contains V
1135 * - Make qword 0 have the final value and valid (V=1) with a single 64
1136 * bit store
1137 *
1138 * However this disrupts the HW while it is happening. There are several
1139 * interesting cases where a STE/CD can be updated without disturbing the HW
1140 * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1141 * because the used bits don't intersect. We can detect this by calculating how
1142 * many 64 bit values need update after adjusting the unused bits and skip the
1143 * V=0 process. This relies on the IGNORED behavior described in the
1144 * specification.
1145 */
1146 VISIBLE_IF_KUNIT
arm_smmu_write_entry(struct arm_smmu_entry_writer * writer,__le64 * entry,const __le64 * target)1147 void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
1148 const __le64 *target)
1149 {
1150 __le64 unused_update[NUM_ENTRY_QWORDS];
1151 u8 used_qword_diff;
1152
1153 used_qword_diff =
1154 arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
1155 if (hweight8(used_qword_diff) == 1) {
1156 /*
1157 * Only one qword needs its used bits to be changed. This is a
1158 * hitless update, update all bits the current STE/CD is
1159 * ignoring to their new values, then update a single "critical
1160 * qword" to change the STE/CD and finally 0 out any bits that
1161 * are now unused in the target configuration.
1162 */
1163 unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1164
1165 /*
1166 * Skip writing unused bits in the critical qword since we'll be
1167 * writing it in the next step anyways. This can save a sync
1168 * when the only change is in that qword.
1169 */
1170 unused_update[critical_qword_index] =
1171 entry[critical_qword_index];
1172 entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
1173 entry_set(writer, entry, target, critical_qword_index, 1);
1174 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
1175 } else if (used_qword_diff) {
1176 /*
1177 * At least two qwords need their inuse bits to be changed. This
1178 * requires a breaking update, zero the V bit, write all qwords
1179 * but 0, then set qword 0
1180 */
1181 unused_update[0] = 0;
1182 entry_set(writer, entry, unused_update, 0, 1);
1183 entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
1184 entry_set(writer, entry, target, 0, 1);
1185 } else {
1186 /*
1187 * No inuse bit changed. Sanity check that all unused bits are 0
1188 * in the entry. The target was already sanity checked by
1189 * compute_qword_diff().
1190 */
1191 WARN_ON_ONCE(
1192 entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
1193 }
1194 }
1195 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
1196
arm_smmu_sync_cd(struct arm_smmu_master * master,int ssid,bool leaf)1197 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1198 int ssid, bool leaf)
1199 {
1200 size_t i;
1201 struct arm_smmu_cmdq_batch cmds;
1202 struct arm_smmu_device *smmu = master->smmu;
1203 struct arm_smmu_cmdq_ent cmd = {
1204 .opcode = CMDQ_OP_CFGI_CD,
1205 .cfgi = {
1206 .ssid = ssid,
1207 .leaf = leaf,
1208 },
1209 };
1210
1211 arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
1212 for (i = 0; i < master->num_streams; i++) {
1213 cmd.cfgi.sid = master->streams[i].id;
1214 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1215 }
1216
1217 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1218 }
1219
arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 * dst,dma_addr_t l2ptr_dma)1220 static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
1221 dma_addr_t l2ptr_dma)
1222 {
1223 u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
1224
1225 /* The HW has 64 bit atomicity with stores to the L2 CD table */
1226 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1227 }
1228
arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 * src)1229 static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
1230 {
1231 return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
1232 }
1233
arm_smmu_get_cd_ptr(struct arm_smmu_master * master,u32 ssid)1234 struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
1235 u32 ssid)
1236 {
1237 struct arm_smmu_cdtab_l2 *l2;
1238 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1239
1240 if (!arm_smmu_cdtab_allocated(cd_table))
1241 return NULL;
1242
1243 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1244 return &cd_table->linear.table[ssid];
1245
1246 l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
1247 if (!l2)
1248 return NULL;
1249 return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
1250 }
1251
arm_smmu_alloc_cd_ptr(struct arm_smmu_master * master,u32 ssid)1252 static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
1253 u32 ssid)
1254 {
1255 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1256 struct arm_smmu_device *smmu = master->smmu;
1257
1258 might_sleep();
1259 iommu_group_mutex_assert(master->dev);
1260
1261 if (!arm_smmu_cdtab_allocated(cd_table)) {
1262 if (arm_smmu_alloc_cd_tables(master))
1263 return NULL;
1264 }
1265
1266 if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
1267 unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
1268 struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
1269
1270 if (!*l2ptr) {
1271 dma_addr_t l2ptr_dma;
1272
1273 *l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
1274 &l2ptr_dma, GFP_KERNEL);
1275 if (!*l2ptr)
1276 return NULL;
1277
1278 arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
1279 l2ptr_dma);
1280 /* An invalid L1CD can be cached */
1281 arm_smmu_sync_cd(master, ssid, false);
1282 }
1283 }
1284 return arm_smmu_get_cd_ptr(master, ssid);
1285 }
1286
1287 struct arm_smmu_cd_writer {
1288 struct arm_smmu_entry_writer writer;
1289 unsigned int ssid;
1290 };
1291
1292 VISIBLE_IF_KUNIT
arm_smmu_get_cd_used(const __le64 * ent,__le64 * used_bits)1293 void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
1294 {
1295 used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
1296 if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
1297 return;
1298 memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
1299
1300 /*
1301 * If EPD0 is set by the make function it means
1302 * T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
1303 */
1304 if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
1305 used_bits[0] &= ~cpu_to_le64(
1306 CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
1307 CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
1308 CTXDESC_CD_0_TCR_SH0);
1309 used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
1310 }
1311 }
1312 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
1313
arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer * writer)1314 static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1315 {
1316 struct arm_smmu_cd_writer *cd_writer =
1317 container_of(writer, struct arm_smmu_cd_writer, writer);
1318
1319 arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
1320 }
1321
1322 static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
1323 .sync = arm_smmu_cd_writer_sync_entry,
1324 .get_used = arm_smmu_get_cd_used,
1325 };
1326
arm_smmu_write_cd_entry(struct arm_smmu_master * master,int ssid,struct arm_smmu_cd * cdptr,const struct arm_smmu_cd * target)1327 void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
1328 struct arm_smmu_cd *cdptr,
1329 const struct arm_smmu_cd *target)
1330 {
1331 bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1332 bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
1333 struct arm_smmu_cd_writer cd_writer = {
1334 .writer = {
1335 .ops = &arm_smmu_cd_writer_ops,
1336 .master = master,
1337 },
1338 .ssid = ssid,
1339 };
1340
1341 if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
1342 if (cur_valid)
1343 master->cd_table.used_ssids--;
1344 else
1345 master->cd_table.used_ssids++;
1346 }
1347
1348 arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
1349 }
1350
arm_smmu_make_s1_cd(struct arm_smmu_cd * target,struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain)1351 void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
1352 struct arm_smmu_master *master,
1353 struct arm_smmu_domain *smmu_domain)
1354 {
1355 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
1356 const struct io_pgtable_cfg *pgtbl_cfg =
1357 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1358 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
1359 &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1360
1361 memset(target, 0, sizeof(*target));
1362
1363 target->data[0] = cpu_to_le64(
1364 FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
1365 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
1366 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
1367 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
1368 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
1369 #ifdef __BIG_ENDIAN
1370 CTXDESC_CD_0_ENDI |
1371 #endif
1372 CTXDESC_CD_0_TCR_EPD1 |
1373 CTXDESC_CD_0_V |
1374 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
1375 CTXDESC_CD_0_AA64 |
1376 (master->stall_enabled ? CTXDESC_CD_0_S : 0) |
1377 CTXDESC_CD_0_R |
1378 CTXDESC_CD_0_A |
1379 CTXDESC_CD_0_ASET |
1380 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
1381 );
1382
1383 /* To enable dirty flag update, set both Access flag and dirty state update */
1384 if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
1385 target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
1386 CTXDESC_CD_0_TCR_HD);
1387
1388 target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
1389 CTXDESC_CD_1_TTB0_MASK);
1390 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
1391 }
1392 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
1393
arm_smmu_clear_cd(struct arm_smmu_master * master,ioasid_t ssid)1394 void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
1395 {
1396 struct arm_smmu_cd target = {};
1397 struct arm_smmu_cd *cdptr;
1398
1399 if (!arm_smmu_cdtab_allocated(&master->cd_table))
1400 return;
1401 cdptr = arm_smmu_get_cd_ptr(master, ssid);
1402 if (WARN_ON(!cdptr))
1403 return;
1404 arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
1405 }
1406
arm_smmu_alloc_cd_tables(struct arm_smmu_master * master)1407 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1408 {
1409 int ret;
1410 size_t l1size;
1411 size_t max_contexts;
1412 struct arm_smmu_device *smmu = master->smmu;
1413 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1414
1415 cd_table->s1cdmax = master->ssid_bits;
1416 max_contexts = 1 << cd_table->s1cdmax;
1417
1418 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1419 max_contexts <= CTXDESC_L2_ENTRIES) {
1420 cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1421 cd_table->linear.num_ents = max_contexts;
1422
1423 l1size = max_contexts * sizeof(struct arm_smmu_cd);
1424 cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
1425 &cd_table->cdtab_dma,
1426 GFP_KERNEL);
1427 if (!cd_table->linear.table)
1428 return -ENOMEM;
1429 } else {
1430 cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1431 cd_table->l2.num_l1_ents =
1432 DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
1433
1434 cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
1435 sizeof(*cd_table->l2.l2ptrs),
1436 GFP_KERNEL);
1437 if (!cd_table->l2.l2ptrs)
1438 return -ENOMEM;
1439
1440 l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
1441 cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
1442 &cd_table->cdtab_dma,
1443 GFP_KERNEL);
1444 if (!cd_table->l2.l2ptrs) {
1445 ret = -ENOMEM;
1446 goto err_free_l2ptrs;
1447 }
1448 }
1449 return 0;
1450
1451 err_free_l2ptrs:
1452 kfree(cd_table->l2.l2ptrs);
1453 cd_table->l2.l2ptrs = NULL;
1454 return ret;
1455 }
1456
arm_smmu_free_cd_tables(struct arm_smmu_master * master)1457 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1458 {
1459 int i;
1460 struct arm_smmu_device *smmu = master->smmu;
1461 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1462
1463 if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
1464 for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
1465 if (!cd_table->l2.l2ptrs[i])
1466 continue;
1467
1468 dma_free_coherent(smmu->dev,
1469 sizeof(*cd_table->l2.l2ptrs[i]),
1470 cd_table->l2.l2ptrs[i],
1471 arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
1472 }
1473 kfree(cd_table->l2.l2ptrs);
1474
1475 dma_free_coherent(smmu->dev,
1476 cd_table->l2.num_l1_ents *
1477 sizeof(struct arm_smmu_cdtab_l1),
1478 cd_table->l2.l1tab, cd_table->cdtab_dma);
1479 } else {
1480 dma_free_coherent(smmu->dev,
1481 cd_table->linear.num_ents *
1482 sizeof(struct arm_smmu_cd),
1483 cd_table->linear.table, cd_table->cdtab_dma);
1484 }
1485 }
1486
1487 /* Stream table manipulation functions */
arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 * dst,dma_addr_t l2ptr_dma)1488 static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
1489 dma_addr_t l2ptr_dma)
1490 {
1491 u64 val = 0;
1492
1493 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
1494 val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1495
1496 /* The HW has 64 bit atomicity with stores to the L2 STE table */
1497 WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
1498 }
1499
1500 struct arm_smmu_ste_writer {
1501 struct arm_smmu_entry_writer writer;
1502 u32 sid;
1503 };
1504
arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer * writer)1505 static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
1506 {
1507 struct arm_smmu_ste_writer *ste_writer =
1508 container_of(writer, struct arm_smmu_ste_writer, writer);
1509 struct arm_smmu_cmdq_ent cmd = {
1510 .opcode = CMDQ_OP_CFGI_STE,
1511 .cfgi = {
1512 .sid = ste_writer->sid,
1513 .leaf = true,
1514 },
1515 };
1516
1517 arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
1518 }
1519
1520 static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
1521 .sync = arm_smmu_ste_writer_sync_entry,
1522 .get_used = arm_smmu_get_ste_used,
1523 };
1524
arm_smmu_write_ste(struct arm_smmu_master * master,u32 sid,struct arm_smmu_ste * ste,const struct arm_smmu_ste * target)1525 static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1526 struct arm_smmu_ste *ste,
1527 const struct arm_smmu_ste *target)
1528 {
1529 struct arm_smmu_device *smmu = master->smmu;
1530 struct arm_smmu_ste_writer ste_writer = {
1531 .writer = {
1532 .ops = &arm_smmu_ste_writer_ops,
1533 .master = master,
1534 },
1535 .sid = sid,
1536 };
1537
1538 arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
1539
1540 /* It's likely that we'll want to use the new STE soon */
1541 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1542 struct arm_smmu_cmdq_ent
1543 prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1544 .prefetch = {
1545 .sid = sid,
1546 } };
1547
1548 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1549 }
1550 }
1551
1552 VISIBLE_IF_KUNIT
arm_smmu_make_abort_ste(struct arm_smmu_ste * target)1553 void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1554 {
1555 memset(target, 0, sizeof(*target));
1556 target->data[0] = cpu_to_le64(
1557 STRTAB_STE_0_V |
1558 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1559 }
1560 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
1561
1562 VISIBLE_IF_KUNIT
arm_smmu_make_bypass_ste(struct arm_smmu_device * smmu,struct arm_smmu_ste * target)1563 void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1564 struct arm_smmu_ste *target)
1565 {
1566 memset(target, 0, sizeof(*target));
1567 target->data[0] = cpu_to_le64(
1568 STRTAB_STE_0_V |
1569 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1570
1571 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1572 target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1573 STRTAB_STE_1_SHCFG_INCOMING));
1574 }
1575 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
1576
1577 VISIBLE_IF_KUNIT
arm_smmu_make_cdtable_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,bool ats_enabled,unsigned int s1dss)1578 void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1579 struct arm_smmu_master *master, bool ats_enabled,
1580 unsigned int s1dss)
1581 {
1582 struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1583 struct arm_smmu_device *smmu = master->smmu;
1584
1585 memset(target, 0, sizeof(*target));
1586 target->data[0] = cpu_to_le64(
1587 STRTAB_STE_0_V |
1588 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1589 FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1590 (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1591 FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1592
1593 target->data[1] = cpu_to_le64(
1594 FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
1595 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1596 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1597 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1598 ((smmu->features & ARM_SMMU_FEAT_STALLS &&
1599 !master->stall_enabled) ?
1600 STRTAB_STE_1_S1STALLD :
1601 0) |
1602 FIELD_PREP(STRTAB_STE_1_EATS,
1603 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1604
1605 if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
1606 s1dss == STRTAB_STE_1_S1DSS_BYPASS)
1607 target->data[1] |= cpu_to_le64(FIELD_PREP(
1608 STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1609
1610 if (smmu->features & ARM_SMMU_FEAT_E2H) {
1611 /*
1612 * To support BTM the streamworld needs to match the
1613 * configuration of the CPU so that the ASID broadcasts are
1614 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1615 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1616 * PASID this should always use a BTM compatible configuration
1617 * if the HW supports it.
1618 */
1619 target->data[1] |= cpu_to_le64(
1620 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1621 } else {
1622 target->data[1] |= cpu_to_le64(
1623 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1624
1625 /*
1626 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1627 * arm_smmu_domain_alloc_id()
1628 */
1629 target->data[2] =
1630 cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1631 }
1632 }
1633 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
1634
1635 VISIBLE_IF_KUNIT
arm_smmu_make_s2_domain_ste(struct arm_smmu_ste * target,struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain,bool ats_enabled)1636 void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1637 struct arm_smmu_master *master,
1638 struct arm_smmu_domain *smmu_domain,
1639 bool ats_enabled)
1640 {
1641 struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1642 const struct io_pgtable_cfg *pgtbl_cfg =
1643 &io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1644 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1645 &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1646 u64 vtcr_val;
1647 struct arm_smmu_device *smmu = master->smmu;
1648
1649 memset(target, 0, sizeof(*target));
1650 target->data[0] = cpu_to_le64(
1651 STRTAB_STE_0_V |
1652 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1653
1654 target->data[1] = cpu_to_le64(
1655 FIELD_PREP(STRTAB_STE_1_EATS,
1656 ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1657
1658 if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1659 target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1660 STRTAB_STE_1_SHCFG_INCOMING));
1661
1662 vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1663 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1664 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1665 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1666 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1667 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1668 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1669 target->data[2] = cpu_to_le64(
1670 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1671 FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1672 STRTAB_STE_2_S2AA64 |
1673 #ifdef __BIG_ENDIAN
1674 STRTAB_STE_2_S2ENDI |
1675 #endif
1676 STRTAB_STE_2_S2PTW |
1677 (master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
1678 STRTAB_STE_2_S2R);
1679
1680 target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1681 STRTAB_STE_3_S2TTB_MASK);
1682 }
1683 EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
1684
1685 /*
1686 * This can safely directly manipulate the STE memory without a sync sequence
1687 * because the STE table has not been installed in the SMMU yet.
1688 */
arm_smmu_init_initial_stes(struct arm_smmu_ste * strtab,unsigned int nent)1689 static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
1690 unsigned int nent)
1691 {
1692 unsigned int i;
1693
1694 for (i = 0; i < nent; ++i) {
1695 arm_smmu_make_abort_ste(strtab);
1696 strtab++;
1697 }
1698 }
1699
arm_smmu_init_l2_strtab(struct arm_smmu_device * smmu,u32 sid)1700 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1701 {
1702 dma_addr_t l2ptr_dma;
1703 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1704 struct arm_smmu_strtab_l2 **l2table;
1705
1706 l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
1707 if (*l2table)
1708 return 0;
1709
1710 *l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
1711 &l2ptr_dma, GFP_KERNEL);
1712 if (!*l2table) {
1713 dev_err(smmu->dev,
1714 "failed to allocate l2 stream table for SID %u\n",
1715 sid);
1716 return -ENOMEM;
1717 }
1718
1719 arm_smmu_init_initial_stes((*l2table)->stes,
1720 ARRAY_SIZE((*l2table)->stes));
1721 arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
1722 l2ptr_dma);
1723 return 0;
1724 }
1725
arm_smmu_streams_cmp_key(const void * lhs,const struct rb_node * rhs)1726 static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
1727 {
1728 struct arm_smmu_stream *stream_rhs =
1729 rb_entry(rhs, struct arm_smmu_stream, node);
1730 const u32 *sid_lhs = lhs;
1731
1732 if (*sid_lhs < stream_rhs->id)
1733 return -1;
1734 if (*sid_lhs > stream_rhs->id)
1735 return 1;
1736 return 0;
1737 }
1738
arm_smmu_streams_cmp_node(struct rb_node * lhs,const struct rb_node * rhs)1739 static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
1740 const struct rb_node *rhs)
1741 {
1742 return arm_smmu_streams_cmp_key(
1743 &rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
1744 }
1745
1746 static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device * smmu,u32 sid)1747 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1748 {
1749 struct rb_node *node;
1750
1751 lockdep_assert_held(&smmu->streams_mutex);
1752
1753 node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
1754 if (!node)
1755 return NULL;
1756 return rb_entry(node, struct arm_smmu_stream, node)->master;
1757 }
1758
1759 /* IRQ and event handlers */
arm_smmu_handle_evt(struct arm_smmu_device * smmu,u64 * evt)1760 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1761 {
1762 int ret = 0;
1763 u32 perm = 0;
1764 struct arm_smmu_master *master;
1765 bool ssid_valid = evt[0] & EVTQ_0_SSV;
1766 u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1767 struct iopf_fault fault_evt = { };
1768 struct iommu_fault *flt = &fault_evt.fault;
1769
1770 switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1771 case EVT_ID_TRANSLATION_FAULT:
1772 case EVT_ID_ADDR_SIZE_FAULT:
1773 case EVT_ID_ACCESS_FAULT:
1774 case EVT_ID_PERMISSION_FAULT:
1775 break;
1776 default:
1777 return -EOPNOTSUPP;
1778 }
1779
1780 if (!(evt[1] & EVTQ_1_STALL))
1781 return -EOPNOTSUPP;
1782
1783 if (evt[1] & EVTQ_1_RnW)
1784 perm |= IOMMU_FAULT_PERM_READ;
1785 else
1786 perm |= IOMMU_FAULT_PERM_WRITE;
1787
1788 if (evt[1] & EVTQ_1_InD)
1789 perm |= IOMMU_FAULT_PERM_EXEC;
1790
1791 if (evt[1] & EVTQ_1_PnU)
1792 perm |= IOMMU_FAULT_PERM_PRIV;
1793
1794 flt->type = IOMMU_FAULT_PAGE_REQ;
1795 flt->prm = (struct iommu_fault_page_request) {
1796 .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1797 .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1798 .perm = perm,
1799 .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1800 };
1801
1802 if (ssid_valid) {
1803 flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1804 flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1805 }
1806
1807 mutex_lock(&smmu->streams_mutex);
1808 master = arm_smmu_find_master(smmu, sid);
1809 if (!master) {
1810 ret = -EINVAL;
1811 goto out_unlock;
1812 }
1813
1814 ret = iommu_report_device_fault(master->dev, &fault_evt);
1815 out_unlock:
1816 mutex_unlock(&smmu->streams_mutex);
1817 return ret;
1818 }
1819
arm_smmu_evtq_thread(int irq,void * dev)1820 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1821 {
1822 int i, ret;
1823 struct arm_smmu_device *smmu = dev;
1824 struct arm_smmu_queue *q = &smmu->evtq.q;
1825 struct arm_smmu_ll_queue *llq = &q->llq;
1826 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1827 DEFAULT_RATELIMIT_BURST);
1828 u64 evt[EVTQ_ENT_DWORDS];
1829
1830 do {
1831 while (!queue_remove_raw(q, evt)) {
1832 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1833
1834 ret = arm_smmu_handle_evt(smmu, evt);
1835 if (!ret || !__ratelimit(&rs))
1836 continue;
1837
1838 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1839 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1840 dev_info(smmu->dev, "\t0x%016llx\n",
1841 (unsigned long long)evt[i]);
1842
1843 cond_resched();
1844 }
1845
1846 /*
1847 * Not much we can do on overflow, so scream and pretend we're
1848 * trying harder.
1849 */
1850 if (queue_sync_prod_in(q) == -EOVERFLOW)
1851 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1852 } while (!queue_empty(llq));
1853
1854 /* Sync our overflow flag, as we believe we're up to speed */
1855 queue_sync_cons_ovf(q);
1856 return IRQ_HANDLED;
1857 }
1858
arm_smmu_handle_ppr(struct arm_smmu_device * smmu,u64 * evt)1859 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1860 {
1861 u32 sid, ssid;
1862 u16 grpid;
1863 bool ssv, last;
1864
1865 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1866 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1867 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1868 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1869 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1870
1871 dev_info(smmu->dev, "unexpected PRI request received:\n");
1872 dev_info(smmu->dev,
1873 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1874 sid, ssid, grpid, last ? "L" : "",
1875 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1876 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1877 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1878 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1879 evt[1] & PRIQ_1_ADDR_MASK);
1880
1881 if (last) {
1882 struct arm_smmu_cmdq_ent cmd = {
1883 .opcode = CMDQ_OP_PRI_RESP,
1884 .substream_valid = ssv,
1885 .pri = {
1886 .sid = sid,
1887 .ssid = ssid,
1888 .grpid = grpid,
1889 .resp = PRI_RESP_DENY,
1890 },
1891 };
1892
1893 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1894 }
1895 }
1896
arm_smmu_priq_thread(int irq,void * dev)1897 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1898 {
1899 struct arm_smmu_device *smmu = dev;
1900 struct arm_smmu_queue *q = &smmu->priq.q;
1901 struct arm_smmu_ll_queue *llq = &q->llq;
1902 u64 evt[PRIQ_ENT_DWORDS];
1903
1904 do {
1905 while (!queue_remove_raw(q, evt))
1906 arm_smmu_handle_ppr(smmu, evt);
1907
1908 if (queue_sync_prod_in(q) == -EOVERFLOW)
1909 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1910 } while (!queue_empty(llq));
1911
1912 /* Sync our overflow flag, as we believe we're up to speed */
1913 queue_sync_cons_ovf(q);
1914 return IRQ_HANDLED;
1915 }
1916
1917 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1918
arm_smmu_gerror_handler(int irq,void * dev)1919 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1920 {
1921 u32 gerror, gerrorn, active;
1922 struct arm_smmu_device *smmu = dev;
1923
1924 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1925 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1926
1927 active = gerror ^ gerrorn;
1928 if (!(active & GERROR_ERR_MASK))
1929 return IRQ_NONE; /* No errors pending */
1930
1931 dev_warn(smmu->dev,
1932 "unexpected global error reported (0x%08x), this could be serious\n",
1933 active);
1934
1935 if (active & GERROR_SFM_ERR) {
1936 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1937 arm_smmu_device_disable(smmu);
1938 }
1939
1940 if (active & GERROR_MSI_GERROR_ABT_ERR)
1941 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1942
1943 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1944 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1945
1946 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1947 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1948
1949 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1950 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1951
1952 if (active & GERROR_PRIQ_ABT_ERR)
1953 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1954
1955 if (active & GERROR_EVTQ_ABT_ERR)
1956 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1957
1958 if (active & GERROR_CMDQ_ERR)
1959 arm_smmu_cmdq_skip_err(smmu);
1960
1961 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1962 return IRQ_HANDLED;
1963 }
1964
arm_smmu_combined_irq_thread(int irq,void * dev)1965 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1966 {
1967 struct arm_smmu_device *smmu = dev;
1968
1969 arm_smmu_evtq_thread(irq, dev);
1970 if (smmu->features & ARM_SMMU_FEAT_PRI)
1971 arm_smmu_priq_thread(irq, dev);
1972
1973 return IRQ_HANDLED;
1974 }
1975
arm_smmu_combined_irq_handler(int irq,void * dev)1976 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1977 {
1978 arm_smmu_gerror_handler(irq, dev);
1979 return IRQ_WAKE_THREAD;
1980 }
1981
1982 static void
arm_smmu_atc_inv_to_cmd(int ssid,unsigned long iova,size_t size,struct arm_smmu_cmdq_ent * cmd)1983 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1984 struct arm_smmu_cmdq_ent *cmd)
1985 {
1986 size_t log2_span;
1987 size_t span_mask;
1988 /* ATC invalidates are always on 4096-bytes pages */
1989 size_t inval_grain_shift = 12;
1990 unsigned long page_start, page_end;
1991
1992 /*
1993 * ATS and PASID:
1994 *
1995 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1996 * prefix. In that case all ATC entries within the address range are
1997 * invalidated, including those that were requested with a PASID! There
1998 * is no way to invalidate only entries without PASID.
1999 *
2000 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
2001 * traffic), translation requests without PASID create ATC entries
2002 * without PASID, which must be invalidated with substream_valid clear.
2003 * This has the unpleasant side-effect of invalidating all PASID-tagged
2004 * ATC entries within the address range.
2005 */
2006 *cmd = (struct arm_smmu_cmdq_ent) {
2007 .opcode = CMDQ_OP_ATC_INV,
2008 .substream_valid = (ssid != IOMMU_NO_PASID),
2009 .atc.ssid = ssid,
2010 };
2011
2012 if (!size) {
2013 cmd->atc.size = ATC_INV_SIZE_ALL;
2014 return;
2015 }
2016
2017 page_start = iova >> inval_grain_shift;
2018 page_end = (iova + size - 1) >> inval_grain_shift;
2019
2020 /*
2021 * In an ATS Invalidate Request, the address must be aligned on the
2022 * range size, which must be a power of two number of page sizes. We
2023 * thus have to choose between grossly over-invalidating the region, or
2024 * splitting the invalidation into multiple commands. For simplicity
2025 * we'll go with the first solution, but should refine it in the future
2026 * if multiple commands are shown to be more efficient.
2027 *
2028 * Find the smallest power of two that covers the range. The most
2029 * significant differing bit between the start and end addresses,
2030 * fls(start ^ end), indicates the required span. For example:
2031 *
2032 * We want to invalidate pages [8; 11]. This is already the ideal range:
2033 * x = 0b1000 ^ 0b1011 = 0b11
2034 * span = 1 << fls(x) = 4
2035 *
2036 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2037 * x = 0b0111 ^ 0b1010 = 0b1101
2038 * span = 1 << fls(x) = 16
2039 */
2040 log2_span = fls_long(page_start ^ page_end);
2041 span_mask = (1ULL << log2_span) - 1;
2042
2043 page_start &= ~span_mask;
2044
2045 cmd->atc.addr = page_start << inval_grain_shift;
2046 cmd->atc.size = log2_span;
2047 }
2048
arm_smmu_atc_inv_master(struct arm_smmu_master * master,ioasid_t ssid)2049 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
2050 ioasid_t ssid)
2051 {
2052 int i;
2053 struct arm_smmu_cmdq_ent cmd;
2054 struct arm_smmu_cmdq_batch cmds;
2055
2056 arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
2057
2058 arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
2059 for (i = 0; i < master->num_streams; i++) {
2060 cmd.atc.sid = master->streams[i].id;
2061 arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
2062 }
2063
2064 return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
2065 }
2066
arm_smmu_atc_inv_domain(struct arm_smmu_domain * smmu_domain,unsigned long iova,size_t size)2067 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2068 unsigned long iova, size_t size)
2069 {
2070 struct arm_smmu_master_domain *master_domain;
2071 int i;
2072 unsigned long flags;
2073 struct arm_smmu_cmdq_ent cmd = {
2074 .opcode = CMDQ_OP_ATC_INV,
2075 };
2076 struct arm_smmu_cmdq_batch cmds;
2077
2078 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2079 return 0;
2080
2081 /*
2082 * Ensure that we've completed prior invalidation of the main TLBs
2083 * before we read 'nr_ats_masters' in case of a concurrent call to
2084 * arm_smmu_enable_ats():
2085 *
2086 * // unmap() // arm_smmu_enable_ats()
2087 * TLBI+SYNC atomic_inc(&nr_ats_masters);
2088 * smp_mb(); [...]
2089 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2090 *
2091 * Ensures that we always see the incremented 'nr_ats_masters' count if
2092 * ATS was enabled at the PCI device before completion of the TLBI.
2093 */
2094 smp_mb();
2095 if (!atomic_read(&smmu_domain->nr_ats_masters))
2096 return 0;
2097
2098 arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
2099
2100 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2101 list_for_each_entry(master_domain, &smmu_domain->devices,
2102 devices_elm) {
2103 struct arm_smmu_master *master = master_domain->master;
2104
2105 if (!master->ats_enabled)
2106 continue;
2107
2108 arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd);
2109
2110 for (i = 0; i < master->num_streams; i++) {
2111 cmd.atc.sid = master->streams[i].id;
2112 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2113 }
2114 }
2115 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2116
2117 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2118 }
2119
2120 /* IO_PGTABLE API */
arm_smmu_tlb_inv_context(void * cookie)2121 static void arm_smmu_tlb_inv_context(void *cookie)
2122 {
2123 struct arm_smmu_domain *smmu_domain = cookie;
2124 struct arm_smmu_device *smmu = smmu_domain->smmu;
2125 struct arm_smmu_cmdq_ent cmd;
2126
2127 /*
2128 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2129 * PTEs previously cleared by unmaps on the current CPU not yet visible
2130 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2131 * insertion to guarantee those are observed before the TLBI. Do be
2132 * careful, 007.
2133 */
2134 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2135 arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2136 } else {
2137 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
2138 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2139 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2140 }
2141 arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
2142 }
2143
__arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent * cmd,unsigned long iova,size_t size,size_t granule,struct arm_smmu_domain * smmu_domain)2144 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2145 unsigned long iova, size_t size,
2146 size_t granule,
2147 struct arm_smmu_domain *smmu_domain)
2148 {
2149 struct arm_smmu_device *smmu = smmu_domain->smmu;
2150 unsigned long end = iova + size, num_pages = 0, tg = 0;
2151 size_t inv_range = granule;
2152 struct arm_smmu_cmdq_batch cmds;
2153
2154 if (!size)
2155 return;
2156
2157 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2158 /* Get the leaf page size */
2159 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2160
2161 num_pages = size >> tg;
2162
2163 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
2164 cmd->tlbi.tg = (tg - 10) / 2;
2165
2166 /*
2167 * Determine what level the granule is at. For non-leaf, both
2168 * io-pgtable and SVA pass a nominal last-level granule because
2169 * they don't know what level(s) actually apply, so ignore that
2170 * and leave TTL=0. However for various errata reasons we still
2171 * want to use a range command, so avoid the SVA corner case
2172 * where both scale and num could be 0 as well.
2173 */
2174 if (cmd->tlbi.leaf)
2175 cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2176 else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2177 num_pages++;
2178 }
2179
2180 arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
2181
2182 while (iova < end) {
2183 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2184 /*
2185 * On each iteration of the loop, the range is 5 bits
2186 * worth of the aligned size remaining.
2187 * The range in pages is:
2188 *
2189 * range = (num_pages & (0x1f << __ffs(num_pages)))
2190 */
2191 unsigned long scale, num;
2192
2193 /* Determine the power of 2 multiple number of pages */
2194 scale = __ffs(num_pages);
2195 cmd->tlbi.scale = scale;
2196
2197 /* Determine how many chunks of 2^scale size we have */
2198 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2199 cmd->tlbi.num = num - 1;
2200
2201 /* range is num * 2^scale * pgsize */
2202 inv_range = num << (scale + tg);
2203
2204 /* Clear out the lower order bits for the next iteration */
2205 num_pages -= num << scale;
2206 }
2207
2208 cmd->tlbi.addr = iova;
2209 arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2210 iova += inv_range;
2211 }
2212 arm_smmu_cmdq_batch_submit(smmu, &cmds);
2213 }
2214
arm_smmu_tlb_inv_range_domain(unsigned long iova,size_t size,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)2215 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2216 size_t granule, bool leaf,
2217 struct arm_smmu_domain *smmu_domain)
2218 {
2219 struct arm_smmu_cmdq_ent cmd = {
2220 .tlbi = {
2221 .leaf = leaf,
2222 },
2223 };
2224
2225 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2226 cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2227 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2228 cmd.tlbi.asid = smmu_domain->cd.asid;
2229 } else {
2230 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
2231 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2232 }
2233 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2234
2235 /*
2236 * Unfortunately, this can't be leaf-only since we may have
2237 * zapped an entire table.
2238 */
2239 arm_smmu_atc_inv_domain(smmu_domain, iova, size);
2240 }
2241
arm_smmu_tlb_inv_range_asid(unsigned long iova,size_t size,int asid,size_t granule,bool leaf,struct arm_smmu_domain * smmu_domain)2242 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2243 size_t granule, bool leaf,
2244 struct arm_smmu_domain *smmu_domain)
2245 {
2246 struct arm_smmu_cmdq_ent cmd = {
2247 .opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2248 CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2249 .tlbi = {
2250 .asid = asid,
2251 .leaf = leaf,
2252 },
2253 };
2254
2255 __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2256 }
2257
arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather * gather,unsigned long iova,size_t granule,void * cookie)2258 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2259 unsigned long iova, size_t granule,
2260 void *cookie)
2261 {
2262 struct arm_smmu_domain *smmu_domain = cookie;
2263 struct iommu_domain *domain = &smmu_domain->domain;
2264
2265 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2266 }
2267
arm_smmu_tlb_inv_walk(unsigned long iova,size_t size,size_t granule,void * cookie)2268 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2269 size_t granule, void *cookie)
2270 {
2271 arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2272 }
2273
2274 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2275 .tlb_flush_all = arm_smmu_tlb_inv_context,
2276 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2277 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2278 };
2279
arm_smmu_dbm_capable(struct arm_smmu_device * smmu)2280 static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
2281 {
2282 u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
2283
2284 return (smmu->features & features) == features;
2285 }
2286
2287 /* IOMMU API */
arm_smmu_capable(struct device * dev,enum iommu_cap cap)2288 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2289 {
2290 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2291
2292 switch (cap) {
2293 case IOMMU_CAP_CACHE_COHERENCY:
2294 /* Assume that a coherent TCU implies coherent TBUs */
2295 return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2296 case IOMMU_CAP_NOEXEC:
2297 case IOMMU_CAP_DEFERRED_FLUSH:
2298 return true;
2299 case IOMMU_CAP_DIRTY_TRACKING:
2300 return arm_smmu_dbm_capable(master->smmu);
2301 default:
2302 return false;
2303 }
2304 }
2305
arm_smmu_domain_alloc(void)2306 struct arm_smmu_domain *arm_smmu_domain_alloc(void)
2307 {
2308 struct arm_smmu_domain *smmu_domain;
2309
2310 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2311 if (!smmu_domain)
2312 return ERR_PTR(-ENOMEM);
2313
2314 mutex_init(&smmu_domain->init_mutex);
2315 INIT_LIST_HEAD(&smmu_domain->devices);
2316 spin_lock_init(&smmu_domain->devices_lock);
2317
2318 return smmu_domain;
2319 }
2320
arm_smmu_domain_alloc_paging(struct device * dev)2321 static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2322 {
2323 struct arm_smmu_domain *smmu_domain;
2324
2325 /*
2326 * Allocate the domain and initialise some of its data structures.
2327 * We can't really do anything meaningful until we've added a
2328 * master.
2329 */
2330 smmu_domain = arm_smmu_domain_alloc();
2331 if (IS_ERR(smmu_domain))
2332 return ERR_CAST(smmu_domain);
2333
2334 if (dev) {
2335 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2336 int ret;
2337
2338 ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
2339 if (ret) {
2340 kfree(smmu_domain);
2341 return ERR_PTR(ret);
2342 }
2343 }
2344 return &smmu_domain->domain;
2345 }
2346
arm_smmu_domain_free_paging(struct iommu_domain * domain)2347 static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
2348 {
2349 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2350 struct arm_smmu_device *smmu = smmu_domain->smmu;
2351
2352 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2353
2354 /* Free the ASID or VMID */
2355 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2356 /* Prevent SVA from touching the CD while we're freeing it */
2357 mutex_lock(&arm_smmu_asid_lock);
2358 xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
2359 mutex_unlock(&arm_smmu_asid_lock);
2360 } else {
2361 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2362 if (cfg->vmid)
2363 ida_free(&smmu->vmid_map, cfg->vmid);
2364 }
2365
2366 kfree(smmu_domain);
2367 }
2368
arm_smmu_domain_finalise_s1(struct arm_smmu_device * smmu,struct arm_smmu_domain * smmu_domain)2369 static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2370 struct arm_smmu_domain *smmu_domain)
2371 {
2372 int ret;
2373 u32 asid = 0;
2374 struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2375
2376 /* Prevent SVA from modifying the ASID until it is written to the CD */
2377 mutex_lock(&arm_smmu_asid_lock);
2378 ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
2379 XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2380 cd->asid = (u16)asid;
2381 mutex_unlock(&arm_smmu_asid_lock);
2382 return ret;
2383 }
2384
arm_smmu_domain_finalise_s2(struct arm_smmu_device * smmu,struct arm_smmu_domain * smmu_domain)2385 static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2386 struct arm_smmu_domain *smmu_domain)
2387 {
2388 int vmid;
2389 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2390
2391 /* Reserve VMID 0 for stage-2 bypass STEs */
2392 vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2393 GFP_KERNEL);
2394 if (vmid < 0)
2395 return vmid;
2396
2397 cfg->vmid = (u16)vmid;
2398 return 0;
2399 }
2400
arm_smmu_domain_finalise(struct arm_smmu_domain * smmu_domain,struct arm_smmu_device * smmu,u32 flags)2401 static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2402 struct arm_smmu_device *smmu, u32 flags)
2403 {
2404 int ret;
2405 enum io_pgtable_fmt fmt;
2406 struct io_pgtable_cfg pgtbl_cfg;
2407 struct io_pgtable_ops *pgtbl_ops;
2408 int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2409 struct arm_smmu_domain *smmu_domain);
2410 bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
2411
2412 /* Restrict the stage to what we can actually support */
2413 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2414 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2415 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2416 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2417
2418 pgtbl_cfg = (struct io_pgtable_cfg) {
2419 .pgsize_bitmap = smmu->pgsize_bitmap,
2420 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2421 .tlb = &arm_smmu_flush_ops,
2422 .iommu_dev = smmu->dev,
2423 };
2424
2425 switch (smmu_domain->stage) {
2426 case ARM_SMMU_DOMAIN_S1: {
2427 unsigned long ias = (smmu->features &
2428 ARM_SMMU_FEAT_VAX) ? 52 : 48;
2429
2430 pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
2431 pgtbl_cfg.oas = smmu->ias;
2432 if (enable_dirty)
2433 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
2434 fmt = ARM_64_LPAE_S1;
2435 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2436 break;
2437 }
2438 case ARM_SMMU_DOMAIN_S2:
2439 if (enable_dirty)
2440 return -EOPNOTSUPP;
2441 pgtbl_cfg.ias = smmu->ias;
2442 pgtbl_cfg.oas = smmu->oas;
2443 fmt = ARM_64_LPAE_S2;
2444 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2445 break;
2446 default:
2447 return -EINVAL;
2448 }
2449
2450 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2451 if (!pgtbl_ops)
2452 return -ENOMEM;
2453
2454 smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2455 smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2456 smmu_domain->domain.geometry.force_aperture = true;
2457 if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
2458 smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
2459
2460 ret = finalise_stage_fn(smmu, smmu_domain);
2461 if (ret < 0) {
2462 free_io_pgtable_ops(pgtbl_ops);
2463 return ret;
2464 }
2465
2466 smmu_domain->pgtbl_ops = pgtbl_ops;
2467 smmu_domain->smmu = smmu;
2468 return 0;
2469 }
2470
2471 static struct arm_smmu_ste *
arm_smmu_get_step_for_sid(struct arm_smmu_device * smmu,u32 sid)2472 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2473 {
2474 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2475
2476 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2477 /* Two-level walk */
2478 return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
2479 ->stes[arm_smmu_strtab_l2_idx(sid)];
2480 } else {
2481 /* Simple linear lookup */
2482 return &cfg->linear.table[sid];
2483 }
2484 }
2485
arm_smmu_install_ste_for_dev(struct arm_smmu_master * master,const struct arm_smmu_ste * target)2486 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2487 const struct arm_smmu_ste *target)
2488 {
2489 int i, j;
2490 struct arm_smmu_device *smmu = master->smmu;
2491
2492 master->cd_table.in_ste =
2493 FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
2494 STRTAB_STE_0_CFG_S1_TRANS;
2495 master->ste_ats_enabled =
2496 FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
2497 STRTAB_STE_1_EATS_TRANS;
2498
2499 for (i = 0; i < master->num_streams; ++i) {
2500 u32 sid = master->streams[i].id;
2501 struct arm_smmu_ste *step =
2502 arm_smmu_get_step_for_sid(smmu, sid);
2503
2504 /* Bridged PCI devices may end up with duplicated IDs */
2505 for (j = 0; j < i; j++)
2506 if (master->streams[j].id == sid)
2507 break;
2508 if (j < i)
2509 continue;
2510
2511 arm_smmu_write_ste(master, sid, step, target);
2512 }
2513 }
2514
arm_smmu_ats_supported(struct arm_smmu_master * master)2515 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2516 {
2517 struct device *dev = master->dev;
2518 struct arm_smmu_device *smmu = master->smmu;
2519 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2520
2521 if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2522 return false;
2523
2524 if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2525 return false;
2526
2527 return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2528 }
2529
arm_smmu_enable_ats(struct arm_smmu_master * master)2530 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2531 {
2532 size_t stu;
2533 struct pci_dev *pdev;
2534 struct arm_smmu_device *smmu = master->smmu;
2535
2536 /* Smallest Translation Unit: log2 of the smallest supported granule */
2537 stu = __ffs(smmu->pgsize_bitmap);
2538 pdev = to_pci_dev(master->dev);
2539
2540 /*
2541 * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
2542 */
2543 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2544 if (pci_enable_ats(pdev, stu))
2545 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2546 }
2547
arm_smmu_enable_pasid(struct arm_smmu_master * master)2548 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2549 {
2550 int ret;
2551 int features;
2552 int num_pasids;
2553 struct pci_dev *pdev;
2554
2555 if (!dev_is_pci(master->dev))
2556 return -ENODEV;
2557
2558 pdev = to_pci_dev(master->dev);
2559
2560 features = pci_pasid_features(pdev);
2561 if (features < 0)
2562 return features;
2563
2564 num_pasids = pci_max_pasids(pdev);
2565 if (num_pasids <= 0)
2566 return num_pasids;
2567
2568 ret = pci_enable_pasid(pdev, features);
2569 if (ret) {
2570 dev_err(&pdev->dev, "Failed to enable PASID\n");
2571 return ret;
2572 }
2573
2574 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2575 master->smmu->ssid_bits);
2576 return 0;
2577 }
2578
arm_smmu_disable_pasid(struct arm_smmu_master * master)2579 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2580 {
2581 struct pci_dev *pdev;
2582
2583 if (!dev_is_pci(master->dev))
2584 return;
2585
2586 pdev = to_pci_dev(master->dev);
2587
2588 if (!pdev->pasid_enabled)
2589 return;
2590
2591 master->ssid_bits = 0;
2592 pci_disable_pasid(pdev);
2593 }
2594
2595 static struct arm_smmu_master_domain *
arm_smmu_find_master_domain(struct arm_smmu_domain * smmu_domain,struct arm_smmu_master * master,ioasid_t ssid)2596 arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
2597 struct arm_smmu_master *master,
2598 ioasid_t ssid)
2599 {
2600 struct arm_smmu_master_domain *master_domain;
2601
2602 lockdep_assert_held(&smmu_domain->devices_lock);
2603
2604 list_for_each_entry(master_domain, &smmu_domain->devices,
2605 devices_elm) {
2606 if (master_domain->master == master &&
2607 master_domain->ssid == ssid)
2608 return master_domain;
2609 }
2610 return NULL;
2611 }
2612
2613 /*
2614 * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
2615 * structure, otherwise NULL. These domains track attached devices so they can
2616 * issue invalidations.
2617 */
2618 static struct arm_smmu_domain *
to_smmu_domain_devices(struct iommu_domain * domain)2619 to_smmu_domain_devices(struct iommu_domain *domain)
2620 {
2621 /* The domain can be NULL only when processing the first attach */
2622 if (!domain)
2623 return NULL;
2624 if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
2625 domain->type == IOMMU_DOMAIN_SVA)
2626 return to_smmu_domain(domain);
2627 return NULL;
2628 }
2629
arm_smmu_remove_master_domain(struct arm_smmu_master * master,struct iommu_domain * domain,ioasid_t ssid)2630 static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
2631 struct iommu_domain *domain,
2632 ioasid_t ssid)
2633 {
2634 struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
2635 struct arm_smmu_master_domain *master_domain;
2636 unsigned long flags;
2637
2638 if (!smmu_domain)
2639 return;
2640
2641 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2642 master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid);
2643 if (master_domain) {
2644 list_del(&master_domain->devices_elm);
2645 kfree(master_domain);
2646 if (master->ats_enabled)
2647 atomic_dec(&smmu_domain->nr_ats_masters);
2648 }
2649 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2650 }
2651
2652 struct arm_smmu_attach_state {
2653 /* Inputs */
2654 struct iommu_domain *old_domain;
2655 struct arm_smmu_master *master;
2656 bool cd_needs_ats;
2657 ioasid_t ssid;
2658 /* Resulting state */
2659 bool ats_enabled;
2660 };
2661
2662 /*
2663 * Start the sequence to attach a domain to a master. The sequence contains three
2664 * steps:
2665 * arm_smmu_attach_prepare()
2666 * arm_smmu_install_ste_for_dev()
2667 * arm_smmu_attach_commit()
2668 *
2669 * If prepare succeeds then the sequence must be completed. The STE installed
2670 * must set the STE.EATS field according to state.ats_enabled.
2671 *
2672 * If the device supports ATS then this determines if EATS should be enabled
2673 * in the STE, and starts sequencing EATS disable if required.
2674 *
2675 * The change of the EATS in the STE and the PCI ATS config space is managed by
2676 * this sequence to be in the right order so that if PCI ATS is enabled then
2677 * STE.ETAS is enabled.
2678 *
2679 * new_domain can be a non-paging domain. In this case ATS will not be enabled,
2680 * and invalidations won't be tracked.
2681 */
arm_smmu_attach_prepare(struct arm_smmu_attach_state * state,struct iommu_domain * new_domain)2682 static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
2683 struct iommu_domain *new_domain)
2684 {
2685 struct arm_smmu_master *master = state->master;
2686 struct arm_smmu_master_domain *master_domain;
2687 struct arm_smmu_domain *smmu_domain =
2688 to_smmu_domain_devices(new_domain);
2689 unsigned long flags;
2690
2691 /*
2692 * arm_smmu_share_asid() must not see two domains pointing to the same
2693 * arm_smmu_master_domain contents otherwise it could randomly write one
2694 * or the other to the CD.
2695 */
2696 lockdep_assert_held(&arm_smmu_asid_lock);
2697
2698 if (smmu_domain || state->cd_needs_ats) {
2699 /*
2700 * The SMMU does not support enabling ATS with bypass/abort.
2701 * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
2702 * Translation Requests and Translated transactions are denied
2703 * as though ATS is disabled for the stream (STE.EATS == 0b00),
2704 * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
2705 * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
2706 * enabled if we have arm_smmu_domain, those always have page
2707 * tables.
2708 */
2709 state->ats_enabled = arm_smmu_ats_supported(master);
2710 }
2711
2712 if (smmu_domain) {
2713 master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
2714 if (!master_domain)
2715 return -ENOMEM;
2716 master_domain->master = master;
2717 master_domain->ssid = state->ssid;
2718
2719 /*
2720 * During prepare we want the current smmu_domain and new
2721 * smmu_domain to be in the devices list before we change any
2722 * HW. This ensures that both domains will send ATS
2723 * invalidations to the master until we are done.
2724 *
2725 * It is tempting to make this list only track masters that are
2726 * using ATS, but arm_smmu_share_asid() also uses this to change
2727 * the ASID of a domain, unrelated to ATS.
2728 *
2729 * Notice if we are re-attaching the same domain then the list
2730 * will have two identical entries and commit will remove only
2731 * one of them.
2732 */
2733 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2734 if (state->ats_enabled)
2735 atomic_inc(&smmu_domain->nr_ats_masters);
2736 list_add(&master_domain->devices_elm, &smmu_domain->devices);
2737 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2738 }
2739
2740 if (!state->ats_enabled && master->ats_enabled) {
2741 pci_disable_ats(to_pci_dev(master->dev));
2742 /*
2743 * This is probably overkill, but the config write for disabling
2744 * ATS should complete before the STE is configured to generate
2745 * UR to avoid AER noise.
2746 */
2747 wmb();
2748 }
2749 return 0;
2750 }
2751
2752 /*
2753 * Commit is done after the STE/CD are configured with the EATS setting. It
2754 * completes synchronizing the PCI device's ATC and finishes manipulating the
2755 * smmu_domain->devices list.
2756 */
arm_smmu_attach_commit(struct arm_smmu_attach_state * state)2757 static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
2758 {
2759 struct arm_smmu_master *master = state->master;
2760
2761 lockdep_assert_held(&arm_smmu_asid_lock);
2762
2763 if (state->ats_enabled && !master->ats_enabled) {
2764 arm_smmu_enable_ats(master);
2765 } else if (state->ats_enabled && master->ats_enabled) {
2766 /*
2767 * The translation has changed, flush the ATC. At this point the
2768 * SMMU is translating for the new domain and both the old&new
2769 * domain will issue invalidations.
2770 */
2771 arm_smmu_atc_inv_master(master, state->ssid);
2772 } else if (!state->ats_enabled && master->ats_enabled) {
2773 /* ATS is being switched off, invalidate the entire ATC */
2774 arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
2775 }
2776 master->ats_enabled = state->ats_enabled;
2777
2778 arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
2779 }
2780
arm_smmu_attach_dev(struct iommu_domain * domain,struct device * dev)2781 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2782 {
2783 int ret = 0;
2784 struct arm_smmu_ste target;
2785 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2786 struct arm_smmu_device *smmu;
2787 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2788 struct arm_smmu_attach_state state = {
2789 .old_domain = iommu_get_domain_for_dev(dev),
2790 .ssid = IOMMU_NO_PASID,
2791 };
2792 struct arm_smmu_master *master;
2793 struct arm_smmu_cd *cdptr;
2794
2795 if (!fwspec)
2796 return -ENOENT;
2797
2798 state.master = master = dev_iommu_priv_get(dev);
2799 smmu = master->smmu;
2800
2801 mutex_lock(&smmu_domain->init_mutex);
2802
2803 if (!smmu_domain->smmu) {
2804 ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2805 } else if (smmu_domain->smmu != smmu)
2806 ret = -EINVAL;
2807
2808 mutex_unlock(&smmu_domain->init_mutex);
2809 if (ret)
2810 return ret;
2811
2812 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2813 cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
2814 if (!cdptr)
2815 return -ENOMEM;
2816 } else if (arm_smmu_ssids_in_use(&master->cd_table))
2817 return -EBUSY;
2818
2819 /*
2820 * Prevent arm_smmu_share_asid() from trying to change the ASID
2821 * of either the old or new domain while we are working on it.
2822 * This allows the STE and the smmu_domain->devices list to
2823 * be inconsistent during this routine.
2824 */
2825 mutex_lock(&arm_smmu_asid_lock);
2826
2827 ret = arm_smmu_attach_prepare(&state, domain);
2828 if (ret) {
2829 mutex_unlock(&arm_smmu_asid_lock);
2830 return ret;
2831 }
2832
2833 switch (smmu_domain->stage) {
2834 case ARM_SMMU_DOMAIN_S1: {
2835 struct arm_smmu_cd target_cd;
2836
2837 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2838 arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
2839 &target_cd);
2840 arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
2841 STRTAB_STE_1_S1DSS_SSID0);
2842 arm_smmu_install_ste_for_dev(master, &target);
2843 break;
2844 }
2845 case ARM_SMMU_DOMAIN_S2:
2846 arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
2847 state.ats_enabled);
2848 arm_smmu_install_ste_for_dev(master, &target);
2849 arm_smmu_clear_cd(master, IOMMU_NO_PASID);
2850 break;
2851 }
2852
2853 arm_smmu_attach_commit(&state);
2854 mutex_unlock(&arm_smmu_asid_lock);
2855 return 0;
2856 }
2857
arm_smmu_s1_set_dev_pasid(struct iommu_domain * domain,struct device * dev,ioasid_t id)2858 static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
2859 struct device *dev, ioasid_t id)
2860 {
2861 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2862 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2863 struct arm_smmu_device *smmu = master->smmu;
2864 struct arm_smmu_cd target_cd;
2865 int ret = 0;
2866
2867 mutex_lock(&smmu_domain->init_mutex);
2868 if (!smmu_domain->smmu)
2869 ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
2870 else if (smmu_domain->smmu != smmu)
2871 ret = -EINVAL;
2872 mutex_unlock(&smmu_domain->init_mutex);
2873 if (ret)
2874 return ret;
2875
2876 if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
2877 return -EINVAL;
2878
2879 /*
2880 * We can read cd.asid outside the lock because arm_smmu_set_pasid()
2881 * will fix it
2882 */
2883 arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
2884 return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
2885 &target_cd);
2886 }
2887
arm_smmu_update_ste(struct arm_smmu_master * master,struct iommu_domain * sid_domain,bool ats_enabled)2888 static void arm_smmu_update_ste(struct arm_smmu_master *master,
2889 struct iommu_domain *sid_domain,
2890 bool ats_enabled)
2891 {
2892 unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
2893 struct arm_smmu_ste ste;
2894
2895 if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
2896 return;
2897
2898 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
2899 s1dss = STRTAB_STE_1_S1DSS_BYPASS;
2900 else
2901 WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
2902
2903 /*
2904 * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
2905 * using s1dss if necessary. If the cd_table is already installed then
2906 * the S1DSS is correct and this will just update the EATS. Otherwise it
2907 * installs the entire thing. This will be hitless.
2908 */
2909 arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
2910 arm_smmu_install_ste_for_dev(master, &ste);
2911 }
2912
arm_smmu_set_pasid(struct arm_smmu_master * master,struct arm_smmu_domain * smmu_domain,ioasid_t pasid,struct arm_smmu_cd * cd)2913 int arm_smmu_set_pasid(struct arm_smmu_master *master,
2914 struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
2915 struct arm_smmu_cd *cd)
2916 {
2917 struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
2918 struct arm_smmu_attach_state state = {
2919 .master = master,
2920 /*
2921 * For now the core code prevents calling this when a domain is
2922 * already attached, no need to set old_domain.
2923 */
2924 .ssid = pasid,
2925 };
2926 struct arm_smmu_cd *cdptr;
2927 int ret;
2928
2929 /* The core code validates pasid */
2930
2931 if (smmu_domain->smmu != master->smmu)
2932 return -EINVAL;
2933
2934 if (!master->cd_table.in_ste &&
2935 sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
2936 sid_domain->type != IOMMU_DOMAIN_BLOCKED)
2937 return -EINVAL;
2938
2939 cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
2940 if (!cdptr)
2941 return -ENOMEM;
2942
2943 mutex_lock(&arm_smmu_asid_lock);
2944 ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
2945 if (ret)
2946 goto out_unlock;
2947
2948 /*
2949 * We don't want to obtain to the asid_lock too early, so fix up the
2950 * caller set ASID under the lock in case it changed.
2951 */
2952 cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
2953 cd->data[0] |= cpu_to_le64(
2954 FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
2955
2956 arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
2957 arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
2958
2959 arm_smmu_attach_commit(&state);
2960
2961 out_unlock:
2962 mutex_unlock(&arm_smmu_asid_lock);
2963 return ret;
2964 }
2965
arm_smmu_remove_dev_pasid(struct device * dev,ioasid_t pasid,struct iommu_domain * domain)2966 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
2967 struct iommu_domain *domain)
2968 {
2969 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2970 struct arm_smmu_domain *smmu_domain;
2971
2972 smmu_domain = to_smmu_domain(domain);
2973
2974 mutex_lock(&arm_smmu_asid_lock);
2975 arm_smmu_clear_cd(master, pasid);
2976 if (master->ats_enabled)
2977 arm_smmu_atc_inv_master(master, pasid);
2978 arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
2979 mutex_unlock(&arm_smmu_asid_lock);
2980
2981 /*
2982 * When the last user of the CD table goes away downgrade the STE back
2983 * to a non-cd_table one.
2984 */
2985 if (!arm_smmu_ssids_in_use(&master->cd_table)) {
2986 struct iommu_domain *sid_domain =
2987 iommu_get_domain_for_dev(master->dev);
2988
2989 if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
2990 sid_domain->type == IOMMU_DOMAIN_BLOCKED)
2991 sid_domain->ops->attach_dev(sid_domain, dev);
2992 }
2993 }
2994
arm_smmu_attach_dev_ste(struct iommu_domain * domain,struct device * dev,struct arm_smmu_ste * ste,unsigned int s1dss)2995 static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
2996 struct device *dev,
2997 struct arm_smmu_ste *ste,
2998 unsigned int s1dss)
2999 {
3000 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3001 struct arm_smmu_attach_state state = {
3002 .master = master,
3003 .old_domain = iommu_get_domain_for_dev(dev),
3004 .ssid = IOMMU_NO_PASID,
3005 };
3006
3007 /*
3008 * Do not allow any ASID to be changed while are working on the STE,
3009 * otherwise we could miss invalidations.
3010 */
3011 mutex_lock(&arm_smmu_asid_lock);
3012
3013 /*
3014 * If the CD table is not in use we can use the provided STE, otherwise
3015 * we use a cdtable STE with the provided S1DSS.
3016 */
3017 if (arm_smmu_ssids_in_use(&master->cd_table)) {
3018 /*
3019 * If a CD table has to be present then we need to run with ATS
3020 * on even though the RID will fail ATS queries with UR. This is
3021 * because we have no idea what the PASID's need.
3022 */
3023 state.cd_needs_ats = true;
3024 arm_smmu_attach_prepare(&state, domain);
3025 arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
3026 } else {
3027 arm_smmu_attach_prepare(&state, domain);
3028 }
3029 arm_smmu_install_ste_for_dev(master, ste);
3030 arm_smmu_attach_commit(&state);
3031 mutex_unlock(&arm_smmu_asid_lock);
3032
3033 /*
3034 * This has to be done after removing the master from the
3035 * arm_smmu_domain->devices to avoid races updating the same context
3036 * descriptor from arm_smmu_share_asid().
3037 */
3038 arm_smmu_clear_cd(master, IOMMU_NO_PASID);
3039 }
3040
arm_smmu_attach_dev_identity(struct iommu_domain * domain,struct device * dev)3041 static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
3042 struct device *dev)
3043 {
3044 struct arm_smmu_ste ste;
3045 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3046
3047 arm_smmu_make_bypass_ste(master->smmu, &ste);
3048 arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
3049 return 0;
3050 }
3051
3052 static const struct iommu_domain_ops arm_smmu_identity_ops = {
3053 .attach_dev = arm_smmu_attach_dev_identity,
3054 };
3055
3056 static struct iommu_domain arm_smmu_identity_domain = {
3057 .type = IOMMU_DOMAIN_IDENTITY,
3058 .ops = &arm_smmu_identity_ops,
3059 };
3060
arm_smmu_attach_dev_blocked(struct iommu_domain * domain,struct device * dev)3061 static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
3062 struct device *dev)
3063 {
3064 struct arm_smmu_ste ste;
3065
3066 arm_smmu_make_abort_ste(&ste);
3067 arm_smmu_attach_dev_ste(domain, dev, &ste,
3068 STRTAB_STE_1_S1DSS_TERMINATE);
3069 return 0;
3070 }
3071
3072 static const struct iommu_domain_ops arm_smmu_blocked_ops = {
3073 .attach_dev = arm_smmu_attach_dev_blocked,
3074 };
3075
3076 static struct iommu_domain arm_smmu_blocked_domain = {
3077 .type = IOMMU_DOMAIN_BLOCKED,
3078 .ops = &arm_smmu_blocked_ops,
3079 };
3080
3081 static struct iommu_domain *
arm_smmu_domain_alloc_user(struct device * dev,u32 flags,struct iommu_domain * parent,const struct iommu_user_data * user_data)3082 arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
3083 struct iommu_domain *parent,
3084 const struct iommu_user_data *user_data)
3085 {
3086 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3087 const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
3088 struct arm_smmu_domain *smmu_domain;
3089 int ret;
3090
3091 if (flags & ~PAGING_FLAGS)
3092 return ERR_PTR(-EOPNOTSUPP);
3093 if (parent || user_data)
3094 return ERR_PTR(-EOPNOTSUPP);
3095
3096 smmu_domain = arm_smmu_domain_alloc();
3097 if (IS_ERR(smmu_domain))
3098 return ERR_CAST(smmu_domain);
3099
3100 smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
3101 smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
3102 ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
3103 if (ret)
3104 goto err_free;
3105 return &smmu_domain->domain;
3106
3107 err_free:
3108 kfree(smmu_domain);
3109 return ERR_PTR(ret);
3110 }
3111
arm_smmu_map_pages(struct iommu_domain * domain,unsigned long iova,phys_addr_t paddr,size_t pgsize,size_t pgcount,int prot,gfp_t gfp,size_t * mapped)3112 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
3113 phys_addr_t paddr, size_t pgsize, size_t pgcount,
3114 int prot, gfp_t gfp, size_t *mapped)
3115 {
3116 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3117
3118 if (!ops)
3119 return -ENODEV;
3120
3121 return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
3122 }
3123
arm_smmu_unmap_pages(struct iommu_domain * domain,unsigned long iova,size_t pgsize,size_t pgcount,struct iommu_iotlb_gather * gather)3124 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
3125 size_t pgsize, size_t pgcount,
3126 struct iommu_iotlb_gather *gather)
3127 {
3128 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3129 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3130
3131 if (!ops)
3132 return 0;
3133
3134 return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
3135 }
3136
arm_smmu_flush_iotlb_all(struct iommu_domain * domain)3137 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
3138 {
3139 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3140
3141 if (smmu_domain->smmu)
3142 arm_smmu_tlb_inv_context(smmu_domain);
3143 }
3144
arm_smmu_iotlb_sync(struct iommu_domain * domain,struct iommu_iotlb_gather * gather)3145 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
3146 struct iommu_iotlb_gather *gather)
3147 {
3148 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3149
3150 if (!gather->pgsize)
3151 return;
3152
3153 arm_smmu_tlb_inv_range_domain(gather->start,
3154 gather->end - gather->start + 1,
3155 gather->pgsize, true, smmu_domain);
3156 }
3157
3158 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain * domain,dma_addr_t iova)3159 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
3160 {
3161 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
3162
3163 if (!ops)
3164 return 0;
3165
3166 return ops->iova_to_phys(ops, iova);
3167 }
3168
3169 static struct platform_driver arm_smmu_driver;
3170
3171 static
arm_smmu_get_by_fwnode(struct fwnode_handle * fwnode)3172 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
3173 {
3174 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
3175 fwnode);
3176 put_device(dev);
3177 return dev ? dev_get_drvdata(dev) : NULL;
3178 }
3179
arm_smmu_sid_in_range(struct arm_smmu_device * smmu,u32 sid)3180 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
3181 {
3182 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3183 return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
3184 return sid < smmu->strtab_cfg.linear.num_ents;
3185 }
3186
arm_smmu_init_sid_strtab(struct arm_smmu_device * smmu,u32 sid)3187 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
3188 {
3189 /* Check the SIDs are in range of the SMMU and our stream table */
3190 if (!arm_smmu_sid_in_range(smmu, sid))
3191 return -ERANGE;
3192
3193 /* Ensure l2 strtab is initialised */
3194 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3195 return arm_smmu_init_l2_strtab(smmu, sid);
3196
3197 return 0;
3198 }
3199
arm_smmu_insert_master(struct arm_smmu_device * smmu,struct arm_smmu_master * master)3200 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
3201 struct arm_smmu_master *master)
3202 {
3203 int i;
3204 int ret = 0;
3205 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3206
3207 master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
3208 GFP_KERNEL);
3209 if (!master->streams)
3210 return -ENOMEM;
3211 master->num_streams = fwspec->num_ids;
3212
3213 mutex_lock(&smmu->streams_mutex);
3214 for (i = 0; i < fwspec->num_ids; i++) {
3215 struct arm_smmu_stream *new_stream = &master->streams[i];
3216 u32 sid = fwspec->ids[i];
3217
3218 new_stream->id = sid;
3219 new_stream->master = master;
3220
3221 ret = arm_smmu_init_sid_strtab(smmu, sid);
3222 if (ret)
3223 break;
3224
3225 /* Insert into SID tree */
3226 if (rb_find_add(&new_stream->node, &smmu->streams,
3227 arm_smmu_streams_cmp_node)) {
3228 dev_warn(master->dev, "stream %u already in tree\n",
3229 sid);
3230 ret = -EINVAL;
3231 break;
3232 }
3233 }
3234
3235 if (ret) {
3236 for (i--; i >= 0; i--)
3237 rb_erase(&master->streams[i].node, &smmu->streams);
3238 kfree(master->streams);
3239 }
3240 mutex_unlock(&smmu->streams_mutex);
3241
3242 return ret;
3243 }
3244
arm_smmu_remove_master(struct arm_smmu_master * master)3245 static void arm_smmu_remove_master(struct arm_smmu_master *master)
3246 {
3247 int i;
3248 struct arm_smmu_device *smmu = master->smmu;
3249 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
3250
3251 if (!smmu || !master->streams)
3252 return;
3253
3254 mutex_lock(&smmu->streams_mutex);
3255 for (i = 0; i < fwspec->num_ids; i++)
3256 rb_erase(&master->streams[i].node, &smmu->streams);
3257 mutex_unlock(&smmu->streams_mutex);
3258
3259 kfree(master->streams);
3260 }
3261
arm_smmu_probe_device(struct device * dev)3262 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
3263 {
3264 int ret;
3265 struct arm_smmu_device *smmu;
3266 struct arm_smmu_master *master;
3267 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3268
3269 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
3270 return ERR_PTR(-EBUSY);
3271
3272 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
3273 if (!smmu)
3274 return ERR_PTR(-ENODEV);
3275
3276 master = kzalloc(sizeof(*master), GFP_KERNEL);
3277 if (!master)
3278 return ERR_PTR(-ENOMEM);
3279
3280 master->dev = dev;
3281 master->smmu = smmu;
3282 dev_iommu_priv_set(dev, master);
3283
3284 ret = arm_smmu_insert_master(smmu, master);
3285 if (ret)
3286 goto err_free_master;
3287
3288 device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
3289 master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
3290
3291 /*
3292 * Note that PASID must be enabled before, and disabled after ATS:
3293 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
3294 *
3295 * Behavior is undefined if this bit is Set and the value of the PASID
3296 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
3297 * are changed.
3298 */
3299 arm_smmu_enable_pasid(master);
3300
3301 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
3302 master->ssid_bits = min_t(u8, master->ssid_bits,
3303 CTXDESC_LINEAR_CDMAX);
3304
3305 if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
3306 device_property_read_bool(dev, "dma-can-stall")) ||
3307 smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
3308 master->stall_enabled = true;
3309
3310 if (dev_is_pci(dev)) {
3311 unsigned int stu = __ffs(smmu->pgsize_bitmap);
3312
3313 pci_prepare_ats(to_pci_dev(dev), stu);
3314 }
3315
3316 return &smmu->iommu;
3317
3318 err_free_master:
3319 kfree(master);
3320 return ERR_PTR(ret);
3321 }
3322
arm_smmu_release_device(struct device * dev)3323 static void arm_smmu_release_device(struct device *dev)
3324 {
3325 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3326
3327 if (WARN_ON(arm_smmu_master_sva_enabled(master)))
3328 iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
3329
3330 /* Put the STE back to what arm_smmu_init_strtab() sets */
3331 if (dev->iommu->require_direct)
3332 arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
3333 else
3334 arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
3335
3336 arm_smmu_disable_pasid(master);
3337 arm_smmu_remove_master(master);
3338 if (arm_smmu_cdtab_allocated(&master->cd_table))
3339 arm_smmu_free_cd_tables(master);
3340 kfree(master);
3341 }
3342
arm_smmu_read_and_clear_dirty(struct iommu_domain * domain,unsigned long iova,size_t size,unsigned long flags,struct iommu_dirty_bitmap * dirty)3343 static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
3344 unsigned long iova, size_t size,
3345 unsigned long flags,
3346 struct iommu_dirty_bitmap *dirty)
3347 {
3348 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3349 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
3350
3351 return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
3352 }
3353
arm_smmu_set_dirty_tracking(struct iommu_domain * domain,bool enabled)3354 static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
3355 bool enabled)
3356 {
3357 /*
3358 * Always enabled and the dirty bitmap is cleared prior to
3359 * set_dirty_tracking().
3360 */
3361 return 0;
3362 }
3363
arm_smmu_device_group(struct device * dev)3364 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3365 {
3366 struct iommu_group *group;
3367
3368 /*
3369 * We don't support devices sharing stream IDs other than PCI RID
3370 * aliases, since the necessary ID-to-device lookup becomes rather
3371 * impractical given a potential sparse 32-bit stream ID space.
3372 */
3373 if (dev_is_pci(dev))
3374 group = pci_device_group(dev);
3375 else
3376 group = generic_device_group(dev);
3377
3378 return group;
3379 }
3380
arm_smmu_enable_nesting(struct iommu_domain * domain)3381 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
3382 {
3383 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3384 int ret = 0;
3385
3386 mutex_lock(&smmu_domain->init_mutex);
3387 if (smmu_domain->smmu)
3388 ret = -EPERM;
3389 else
3390 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
3391 mutex_unlock(&smmu_domain->init_mutex);
3392
3393 return ret;
3394 }
3395
arm_smmu_of_xlate(struct device * dev,const struct of_phandle_args * args)3396 static int arm_smmu_of_xlate(struct device *dev,
3397 const struct of_phandle_args *args)
3398 {
3399 return iommu_fwspec_add_ids(dev, args->args, 1);
3400 }
3401
arm_smmu_get_resv_regions(struct device * dev,struct list_head * head)3402 static void arm_smmu_get_resv_regions(struct device *dev,
3403 struct list_head *head)
3404 {
3405 struct iommu_resv_region *region;
3406 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3407
3408 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3409 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
3410 if (!region)
3411 return;
3412
3413 list_add_tail(®ion->list, head);
3414
3415 iommu_dma_get_resv_regions(dev, head);
3416 }
3417
arm_smmu_dev_enable_feature(struct device * dev,enum iommu_dev_features feat)3418 static int arm_smmu_dev_enable_feature(struct device *dev,
3419 enum iommu_dev_features feat)
3420 {
3421 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3422
3423 if (!master)
3424 return -ENODEV;
3425
3426 switch (feat) {
3427 case IOMMU_DEV_FEAT_IOPF:
3428 if (!arm_smmu_master_iopf_supported(master))
3429 return -EINVAL;
3430 if (master->iopf_enabled)
3431 return -EBUSY;
3432 master->iopf_enabled = true;
3433 return 0;
3434 case IOMMU_DEV_FEAT_SVA:
3435 if (!arm_smmu_master_sva_supported(master))
3436 return -EINVAL;
3437 if (arm_smmu_master_sva_enabled(master))
3438 return -EBUSY;
3439 return arm_smmu_master_enable_sva(master);
3440 default:
3441 return -EINVAL;
3442 }
3443 }
3444
arm_smmu_dev_disable_feature(struct device * dev,enum iommu_dev_features feat)3445 static int arm_smmu_dev_disable_feature(struct device *dev,
3446 enum iommu_dev_features feat)
3447 {
3448 struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3449
3450 if (!master)
3451 return -EINVAL;
3452
3453 switch (feat) {
3454 case IOMMU_DEV_FEAT_IOPF:
3455 if (!master->iopf_enabled)
3456 return -EINVAL;
3457 if (master->sva_enabled)
3458 return -EBUSY;
3459 master->iopf_enabled = false;
3460 return 0;
3461 case IOMMU_DEV_FEAT_SVA:
3462 if (!arm_smmu_master_sva_enabled(master))
3463 return -EINVAL;
3464 return arm_smmu_master_disable_sva(master);
3465 default:
3466 return -EINVAL;
3467 }
3468 }
3469
3470 /*
3471 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3472 * PCIe link and save the data to memory by DMA. The hardware is restricted to
3473 * use identity mapping only.
3474 */
3475 #define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3476 (pdev)->device == 0xa12e)
3477
arm_smmu_def_domain_type(struct device * dev)3478 static int arm_smmu_def_domain_type(struct device *dev)
3479 {
3480 if (dev_is_pci(dev)) {
3481 struct pci_dev *pdev = to_pci_dev(dev);
3482
3483 if (IS_HISI_PTT_DEVICE(pdev))
3484 return IOMMU_DOMAIN_IDENTITY;
3485 }
3486
3487 return 0;
3488 }
3489
3490 static struct iommu_ops arm_smmu_ops = {
3491 .identity_domain = &arm_smmu_identity_domain,
3492 .blocked_domain = &arm_smmu_blocked_domain,
3493 .capable = arm_smmu_capable,
3494 .domain_alloc_paging = arm_smmu_domain_alloc_paging,
3495 .domain_alloc_sva = arm_smmu_sva_domain_alloc,
3496 .domain_alloc_user = arm_smmu_domain_alloc_user,
3497 .probe_device = arm_smmu_probe_device,
3498 .release_device = arm_smmu_release_device,
3499 .device_group = arm_smmu_device_group,
3500 .of_xlate = arm_smmu_of_xlate,
3501 .get_resv_regions = arm_smmu_get_resv_regions,
3502 .remove_dev_pasid = arm_smmu_remove_dev_pasid,
3503 .dev_enable_feat = arm_smmu_dev_enable_feature,
3504 .dev_disable_feat = arm_smmu_dev_disable_feature,
3505 .page_response = arm_smmu_page_response,
3506 .def_domain_type = arm_smmu_def_domain_type,
3507 .pgsize_bitmap = -1UL, /* Restricted during device attach */
3508 .owner = THIS_MODULE,
3509 .default_domain_ops = &(const struct iommu_domain_ops) {
3510 .attach_dev = arm_smmu_attach_dev,
3511 .set_dev_pasid = arm_smmu_s1_set_dev_pasid,
3512 .map_pages = arm_smmu_map_pages,
3513 .unmap_pages = arm_smmu_unmap_pages,
3514 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
3515 .iotlb_sync = arm_smmu_iotlb_sync,
3516 .iova_to_phys = arm_smmu_iova_to_phys,
3517 .enable_nesting = arm_smmu_enable_nesting,
3518 .free = arm_smmu_domain_free_paging,
3519 }
3520 };
3521
3522 static struct iommu_dirty_ops arm_smmu_dirty_ops = {
3523 .read_and_clear_dirty = arm_smmu_read_and_clear_dirty,
3524 .set_dirty_tracking = arm_smmu_set_dirty_tracking,
3525 };
3526
3527 /* Probing and initialisation functions */
arm_smmu_init_one_queue(struct arm_smmu_device * smmu,struct arm_smmu_queue * q,void __iomem * page,unsigned long prod_off,unsigned long cons_off,size_t dwords,const char * name)3528 int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3529 struct arm_smmu_queue *q, void __iomem *page,
3530 unsigned long prod_off, unsigned long cons_off,
3531 size_t dwords, const char *name)
3532 {
3533 size_t qsz;
3534
3535 do {
3536 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3537 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3538 GFP_KERNEL);
3539 if (q->base || qsz < PAGE_SIZE)
3540 break;
3541
3542 q->llq.max_n_shift--;
3543 } while (1);
3544
3545 if (!q->base) {
3546 dev_err(smmu->dev,
3547 "failed to allocate queue (0x%zx bytes) for %s\n",
3548 qsz, name);
3549 return -ENOMEM;
3550 }
3551
3552 if (!WARN_ON(q->base_dma & (qsz - 1))) {
3553 dev_info(smmu->dev, "allocated %u entries for %s\n",
3554 1 << q->llq.max_n_shift, name);
3555 }
3556
3557 q->prod_reg = page + prod_off;
3558 q->cons_reg = page + cons_off;
3559 q->ent_dwords = dwords;
3560
3561 q->q_base = Q_BASE_RWA;
3562 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3563 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3564
3565 q->llq.prod = q->llq.cons = 0;
3566 return 0;
3567 }
3568
arm_smmu_cmdq_init(struct arm_smmu_device * smmu,struct arm_smmu_cmdq * cmdq)3569 int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
3570 struct arm_smmu_cmdq *cmdq)
3571 {
3572 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3573
3574 atomic_set(&cmdq->owner_prod, 0);
3575 atomic_set(&cmdq->lock, 0);
3576
3577 cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3578 GFP_KERNEL);
3579 if (!cmdq->valid_map)
3580 return -ENOMEM;
3581
3582 return 0;
3583 }
3584
arm_smmu_init_queues(struct arm_smmu_device * smmu)3585 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3586 {
3587 int ret;
3588
3589 /* cmdq */
3590 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3591 ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3592 CMDQ_ENT_DWORDS, "cmdq");
3593 if (ret)
3594 return ret;
3595
3596 ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
3597 if (ret)
3598 return ret;
3599
3600 /* evtq */
3601 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3602 ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3603 EVTQ_ENT_DWORDS, "evtq");
3604 if (ret)
3605 return ret;
3606
3607 if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3608 (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3609 smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3610 if (!smmu->evtq.iopf)
3611 return -ENOMEM;
3612 }
3613
3614 /* priq */
3615 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3616 return 0;
3617
3618 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3619 ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3620 PRIQ_ENT_DWORDS, "priq");
3621 }
3622
arm_smmu_init_strtab_2lvl(struct arm_smmu_device * smmu)3623 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3624 {
3625 u32 l1size;
3626 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3627 unsigned int last_sid_idx =
3628 arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
3629
3630 /* Calculate the L1 size, capped to the SIDSIZE. */
3631 cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
3632 if (cfg->l2.num_l1_ents <= last_sid_idx)
3633 dev_warn(smmu->dev,
3634 "2-level strtab only covers %u/%u bits of SID\n",
3635 ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
3636 smmu->sid_bits);
3637
3638 l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
3639 cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
3640 GFP_KERNEL);
3641 if (!cfg->l2.l1tab) {
3642 dev_err(smmu->dev,
3643 "failed to allocate l1 stream table (%u bytes)\n",
3644 l1size);
3645 return -ENOMEM;
3646 }
3647
3648 cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
3649 sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
3650 if (!cfg->l2.l2ptrs)
3651 return -ENOMEM;
3652
3653 return 0;
3654 }
3655
arm_smmu_init_strtab_linear(struct arm_smmu_device * smmu)3656 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3657 {
3658 u32 size;
3659 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3660
3661 size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
3662 cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
3663 &cfg->linear.ste_dma,
3664 GFP_KERNEL);
3665 if (!cfg->linear.table) {
3666 dev_err(smmu->dev,
3667 "failed to allocate linear stream table (%u bytes)\n",
3668 size);
3669 return -ENOMEM;
3670 }
3671 cfg->linear.num_ents = 1 << smmu->sid_bits;
3672
3673 arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
3674 return 0;
3675 }
3676
arm_smmu_init_strtab(struct arm_smmu_device * smmu)3677 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3678 {
3679 int ret;
3680
3681 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3682 ret = arm_smmu_init_strtab_2lvl(smmu);
3683 else
3684 ret = arm_smmu_init_strtab_linear(smmu);
3685 if (ret)
3686 return ret;
3687
3688 ida_init(&smmu->vmid_map);
3689
3690 return 0;
3691 }
3692
arm_smmu_init_structures(struct arm_smmu_device * smmu)3693 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3694 {
3695 int ret;
3696
3697 mutex_init(&smmu->streams_mutex);
3698 smmu->streams = RB_ROOT;
3699
3700 ret = arm_smmu_init_queues(smmu);
3701 if (ret)
3702 return ret;
3703
3704 ret = arm_smmu_init_strtab(smmu);
3705 if (ret)
3706 return ret;
3707
3708 if (smmu->impl_ops && smmu->impl_ops->init_structures)
3709 return smmu->impl_ops->init_structures(smmu);
3710
3711 return 0;
3712 }
3713
arm_smmu_write_reg_sync(struct arm_smmu_device * smmu,u32 val,unsigned int reg_off,unsigned int ack_off)3714 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3715 unsigned int reg_off, unsigned int ack_off)
3716 {
3717 u32 reg;
3718
3719 writel_relaxed(val, smmu->base + reg_off);
3720 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3721 1, ARM_SMMU_POLL_TIMEOUT_US);
3722 }
3723
3724 /* GBPA is "special" */
arm_smmu_update_gbpa(struct arm_smmu_device * smmu,u32 set,u32 clr)3725 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3726 {
3727 int ret;
3728 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3729
3730 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3731 1, ARM_SMMU_POLL_TIMEOUT_US);
3732 if (ret)
3733 return ret;
3734
3735 reg &= ~clr;
3736 reg |= set;
3737 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3738 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3739 1, ARM_SMMU_POLL_TIMEOUT_US);
3740
3741 if (ret)
3742 dev_err(smmu->dev, "GBPA not responding to update\n");
3743 return ret;
3744 }
3745
arm_smmu_free_msis(void * data)3746 static void arm_smmu_free_msis(void *data)
3747 {
3748 struct device *dev = data;
3749
3750 platform_device_msi_free_irqs_all(dev);
3751 }
3752
arm_smmu_write_msi_msg(struct msi_desc * desc,struct msi_msg * msg)3753 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3754 {
3755 phys_addr_t doorbell;
3756 struct device *dev = msi_desc_to_dev(desc);
3757 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3758 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3759
3760 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3761 doorbell &= MSI_CFG0_ADDR_MASK;
3762
3763 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3764 writel_relaxed(msg->data, smmu->base + cfg[1]);
3765 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3766 }
3767
arm_smmu_setup_msis(struct arm_smmu_device * smmu)3768 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3769 {
3770 int ret, nvec = ARM_SMMU_MAX_MSIS;
3771 struct device *dev = smmu->dev;
3772
3773 /* Clear the MSI address regs */
3774 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3775 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3776
3777 if (smmu->features & ARM_SMMU_FEAT_PRI)
3778 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3779 else
3780 nvec--;
3781
3782 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3783 return;
3784
3785 if (!dev->msi.domain) {
3786 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3787 return;
3788 }
3789
3790 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3791 ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3792 if (ret) {
3793 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3794 return;
3795 }
3796
3797 smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3798 smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3799 smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3800
3801 /* Add callback to free MSIs on teardown */
3802 devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
3803 }
3804
arm_smmu_setup_unique_irqs(struct arm_smmu_device * smmu)3805 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3806 {
3807 int irq, ret;
3808
3809 arm_smmu_setup_msis(smmu);
3810
3811 /* Request interrupt lines */
3812 irq = smmu->evtq.q.irq;
3813 if (irq) {
3814 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3815 arm_smmu_evtq_thread,
3816 IRQF_ONESHOT,
3817 "arm-smmu-v3-evtq", smmu);
3818 if (ret < 0)
3819 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3820 } else {
3821 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3822 }
3823
3824 irq = smmu->gerr_irq;
3825 if (irq) {
3826 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3827 0, "arm-smmu-v3-gerror", smmu);
3828 if (ret < 0)
3829 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3830 } else {
3831 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3832 }
3833
3834 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3835 irq = smmu->priq.q.irq;
3836 if (irq) {
3837 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3838 arm_smmu_priq_thread,
3839 IRQF_ONESHOT,
3840 "arm-smmu-v3-priq",
3841 smmu);
3842 if (ret < 0)
3843 dev_warn(smmu->dev,
3844 "failed to enable priq irq\n");
3845 } else {
3846 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3847 }
3848 }
3849 }
3850
arm_smmu_setup_irqs(struct arm_smmu_device * smmu)3851 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3852 {
3853 int ret, irq;
3854 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3855
3856 /* Disable IRQs first */
3857 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3858 ARM_SMMU_IRQ_CTRLACK);
3859 if (ret) {
3860 dev_err(smmu->dev, "failed to disable irqs\n");
3861 return ret;
3862 }
3863
3864 irq = smmu->combined_irq;
3865 if (irq) {
3866 /*
3867 * Cavium ThunderX2 implementation doesn't support unique irq
3868 * lines. Use a single irq line for all the SMMUv3 interrupts.
3869 */
3870 ret = devm_request_threaded_irq(smmu->dev, irq,
3871 arm_smmu_combined_irq_handler,
3872 arm_smmu_combined_irq_thread,
3873 IRQF_ONESHOT,
3874 "arm-smmu-v3-combined-irq", smmu);
3875 if (ret < 0)
3876 dev_warn(smmu->dev, "failed to enable combined irq\n");
3877 } else
3878 arm_smmu_setup_unique_irqs(smmu);
3879
3880 if (smmu->features & ARM_SMMU_FEAT_PRI)
3881 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3882
3883 /* Enable interrupt generation on the SMMU */
3884 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3885 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3886 if (ret)
3887 dev_warn(smmu->dev, "failed to enable irqs\n");
3888
3889 return 0;
3890 }
3891
arm_smmu_device_disable(struct arm_smmu_device * smmu)3892 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3893 {
3894 int ret;
3895
3896 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3897 if (ret)
3898 dev_err(smmu->dev, "failed to clear cr0\n");
3899
3900 return ret;
3901 }
3902
arm_smmu_write_strtab(struct arm_smmu_device * smmu)3903 static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
3904 {
3905 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3906 dma_addr_t dma;
3907 u32 reg;
3908
3909 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
3910 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
3911 STRTAB_BASE_CFG_FMT_2LVL) |
3912 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
3913 ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
3914 FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3915 dma = cfg->l2.l1_dma;
3916 } else {
3917 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
3918 STRTAB_BASE_CFG_FMT_LINEAR) |
3919 FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3920 dma = cfg->linear.ste_dma;
3921 }
3922 writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
3923 smmu->base + ARM_SMMU_STRTAB_BASE);
3924 writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3925 }
3926
arm_smmu_device_reset(struct arm_smmu_device * smmu)3927 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
3928 {
3929 int ret;
3930 u32 reg, enables;
3931 struct arm_smmu_cmdq_ent cmd;
3932
3933 /* Clear CR0 and sync (disables SMMU and queue processing) */
3934 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3935 if (reg & CR0_SMMUEN) {
3936 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3937 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3938 }
3939
3940 ret = arm_smmu_device_disable(smmu);
3941 if (ret)
3942 return ret;
3943
3944 /* CR1 (table and queue memory attributes) */
3945 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3946 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3947 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3948 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3949 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3950 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3951 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3952
3953 /* CR2 (random crap) */
3954 reg = CR2_PTM | CR2_RECINVSID;
3955
3956 if (smmu->features & ARM_SMMU_FEAT_E2H)
3957 reg |= CR2_E2H;
3958
3959 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3960
3961 /* Stream table */
3962 arm_smmu_write_strtab(smmu);
3963
3964 /* Command queue */
3965 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3966 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3967 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3968
3969 enables = CR0_CMDQEN;
3970 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3971 ARM_SMMU_CR0ACK);
3972 if (ret) {
3973 dev_err(smmu->dev, "failed to enable command queue\n");
3974 return ret;
3975 }
3976
3977 /* Invalidate any cached configuration */
3978 cmd.opcode = CMDQ_OP_CFGI_ALL;
3979 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3980
3981 /* Invalidate any stale TLB entries */
3982 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3983 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3984 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3985 }
3986
3987 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3988 arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3989
3990 /* Event queue */
3991 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3992 writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3993 writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3994
3995 enables |= CR0_EVTQEN;
3996 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3997 ARM_SMMU_CR0ACK);
3998 if (ret) {
3999 dev_err(smmu->dev, "failed to enable event queue\n");
4000 return ret;
4001 }
4002
4003 /* PRI queue */
4004 if (smmu->features & ARM_SMMU_FEAT_PRI) {
4005 writeq_relaxed(smmu->priq.q.q_base,
4006 smmu->base + ARM_SMMU_PRIQ_BASE);
4007 writel_relaxed(smmu->priq.q.llq.prod,
4008 smmu->page1 + ARM_SMMU_PRIQ_PROD);
4009 writel_relaxed(smmu->priq.q.llq.cons,
4010 smmu->page1 + ARM_SMMU_PRIQ_CONS);
4011
4012 enables |= CR0_PRIQEN;
4013 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4014 ARM_SMMU_CR0ACK);
4015 if (ret) {
4016 dev_err(smmu->dev, "failed to enable PRI queue\n");
4017 return ret;
4018 }
4019 }
4020
4021 if (smmu->features & ARM_SMMU_FEAT_ATS) {
4022 enables |= CR0_ATSCHK;
4023 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4024 ARM_SMMU_CR0ACK);
4025 if (ret) {
4026 dev_err(smmu->dev, "failed to enable ATS check\n");
4027 return ret;
4028 }
4029 }
4030
4031 ret = arm_smmu_setup_irqs(smmu);
4032 if (ret) {
4033 dev_err(smmu->dev, "failed to setup irqs\n");
4034 return ret;
4035 }
4036
4037 if (is_kdump_kernel())
4038 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
4039
4040 /* Enable the SMMU interface */
4041 enables |= CR0_SMMUEN;
4042 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
4043 ARM_SMMU_CR0ACK);
4044 if (ret) {
4045 dev_err(smmu->dev, "failed to enable SMMU interface\n");
4046 return ret;
4047 }
4048
4049 if (smmu->impl_ops && smmu->impl_ops->device_reset) {
4050 ret = smmu->impl_ops->device_reset(smmu);
4051 if (ret) {
4052 dev_err(smmu->dev, "failed to reset impl\n");
4053 return ret;
4054 }
4055 }
4056
4057 return 0;
4058 }
4059
4060 #define IIDR_IMPLEMENTER_ARM 0x43b
4061 #define IIDR_PRODUCTID_ARM_MMU_600 0x483
4062 #define IIDR_PRODUCTID_ARM_MMU_700 0x487
4063
arm_smmu_device_iidr_probe(struct arm_smmu_device * smmu)4064 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
4065 {
4066 u32 reg;
4067 unsigned int implementer, productid, variant, revision;
4068
4069 reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
4070 implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
4071 productid = FIELD_GET(IIDR_PRODUCTID, reg);
4072 variant = FIELD_GET(IIDR_VARIANT, reg);
4073 revision = FIELD_GET(IIDR_REVISION, reg);
4074
4075 switch (implementer) {
4076 case IIDR_IMPLEMENTER_ARM:
4077 switch (productid) {
4078 case IIDR_PRODUCTID_ARM_MMU_600:
4079 /* Arm erratum 1076982 */
4080 if (variant == 0 && revision <= 2)
4081 smmu->features &= ~ARM_SMMU_FEAT_SEV;
4082 /* Arm erratum 1209401 */
4083 if (variant < 2)
4084 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4085 break;
4086 case IIDR_PRODUCTID_ARM_MMU_700:
4087 /* Arm erratum 2812531 */
4088 smmu->features &= ~ARM_SMMU_FEAT_BTM;
4089 smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
4090 /* Arm errata 2268618, 2812531 */
4091 smmu->features &= ~ARM_SMMU_FEAT_NESTING;
4092 break;
4093 }
4094 break;
4095 }
4096 }
4097
arm_smmu_get_httu(struct arm_smmu_device * smmu,u32 reg)4098 static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
4099 {
4100 u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
4101 u32 hw_features = 0;
4102
4103 switch (FIELD_GET(IDR0_HTTU, reg)) {
4104 case IDR0_HTTU_ACCESS_DIRTY:
4105 hw_features |= ARM_SMMU_FEAT_HD;
4106 fallthrough;
4107 case IDR0_HTTU_ACCESS:
4108 hw_features |= ARM_SMMU_FEAT_HA;
4109 }
4110
4111 if (smmu->dev->of_node)
4112 smmu->features |= hw_features;
4113 else if (hw_features != fw_features)
4114 /* ACPI IORT sets the HTTU bits */
4115 dev_warn(smmu->dev,
4116 "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
4117 hw_features, fw_features);
4118 }
4119
arm_smmu_device_hw_probe(struct arm_smmu_device * smmu)4120 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
4121 {
4122 u32 reg;
4123 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
4124
4125 /* IDR0 */
4126 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
4127
4128 /* 2-level structures */
4129 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
4130 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
4131
4132 if (reg & IDR0_CD2L)
4133 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
4134
4135 /*
4136 * Translation table endianness.
4137 * We currently require the same endianness as the CPU, but this
4138 * could be changed later by adding a new IO_PGTABLE_QUIRK.
4139 */
4140 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
4141 case IDR0_TTENDIAN_MIXED:
4142 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
4143 break;
4144 #ifdef __BIG_ENDIAN
4145 case IDR0_TTENDIAN_BE:
4146 smmu->features |= ARM_SMMU_FEAT_TT_BE;
4147 break;
4148 #else
4149 case IDR0_TTENDIAN_LE:
4150 smmu->features |= ARM_SMMU_FEAT_TT_LE;
4151 break;
4152 #endif
4153 default:
4154 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
4155 return -ENXIO;
4156 }
4157
4158 /* Boolean feature flags */
4159 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
4160 smmu->features |= ARM_SMMU_FEAT_PRI;
4161
4162 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
4163 smmu->features |= ARM_SMMU_FEAT_ATS;
4164
4165 if (reg & IDR0_SEV)
4166 smmu->features |= ARM_SMMU_FEAT_SEV;
4167
4168 if (reg & IDR0_MSI) {
4169 smmu->features |= ARM_SMMU_FEAT_MSI;
4170 if (coherent && !disable_msipolling)
4171 smmu->options |= ARM_SMMU_OPT_MSIPOLL;
4172 }
4173
4174 if (reg & IDR0_HYP) {
4175 smmu->features |= ARM_SMMU_FEAT_HYP;
4176 if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
4177 smmu->features |= ARM_SMMU_FEAT_E2H;
4178 }
4179
4180 arm_smmu_get_httu(smmu, reg);
4181
4182 /*
4183 * The coherency feature as set by FW is used in preference to the ID
4184 * register, but warn on mismatch.
4185 */
4186 if (!!(reg & IDR0_COHACC) != coherent)
4187 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
4188 coherent ? "true" : "false");
4189
4190 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
4191 case IDR0_STALL_MODEL_FORCE:
4192 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
4193 fallthrough;
4194 case IDR0_STALL_MODEL_STALL:
4195 smmu->features |= ARM_SMMU_FEAT_STALLS;
4196 }
4197
4198 if (reg & IDR0_S1P)
4199 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
4200
4201 if (reg & IDR0_S2P)
4202 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
4203
4204 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
4205 dev_err(smmu->dev, "no translation support!\n");
4206 return -ENXIO;
4207 }
4208
4209 /* We only support the AArch64 table format at present */
4210 switch (FIELD_GET(IDR0_TTF, reg)) {
4211 case IDR0_TTF_AARCH32_64:
4212 smmu->ias = 40;
4213 fallthrough;
4214 case IDR0_TTF_AARCH64:
4215 break;
4216 default:
4217 dev_err(smmu->dev, "AArch64 table format not supported!\n");
4218 return -ENXIO;
4219 }
4220
4221 /* ASID/VMID sizes */
4222 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
4223 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
4224
4225 /* IDR1 */
4226 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
4227 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
4228 dev_err(smmu->dev, "embedded implementation not supported\n");
4229 return -ENXIO;
4230 }
4231
4232 if (reg & IDR1_ATTR_TYPES_OVR)
4233 smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
4234
4235 /* Queue sizes, capped to ensure natural alignment */
4236 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
4237 FIELD_GET(IDR1_CMDQS, reg));
4238 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
4239 /*
4240 * We don't support splitting up batches, so one batch of
4241 * commands plus an extra sync needs to fit inside the command
4242 * queue. There's also no way we can handle the weird alignment
4243 * restrictions on the base pointer for a unit-length queue.
4244 */
4245 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
4246 CMDQ_BATCH_ENTRIES);
4247 return -ENXIO;
4248 }
4249
4250 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
4251 FIELD_GET(IDR1_EVTQS, reg));
4252 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
4253 FIELD_GET(IDR1_PRIQS, reg));
4254
4255 /* SID/SSID sizes */
4256 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
4257 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
4258 smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
4259
4260 /*
4261 * If the SMMU supports fewer bits than would fill a single L2 stream
4262 * table, use a linear table instead.
4263 */
4264 if (smmu->sid_bits <= STRTAB_SPLIT)
4265 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
4266
4267 /* IDR3 */
4268 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
4269 if (FIELD_GET(IDR3_RIL, reg))
4270 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
4271
4272 /* IDR5 */
4273 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
4274
4275 /* Maximum number of outstanding stalls */
4276 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
4277
4278 /* Page sizes */
4279 if (reg & IDR5_GRAN64K)
4280 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
4281 if (reg & IDR5_GRAN16K)
4282 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
4283 if (reg & IDR5_GRAN4K)
4284 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
4285
4286 /* Input address size */
4287 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
4288 smmu->features |= ARM_SMMU_FEAT_VAX;
4289
4290 /* Output address size */
4291 switch (FIELD_GET(IDR5_OAS, reg)) {
4292 case IDR5_OAS_32_BIT:
4293 smmu->oas = 32;
4294 break;
4295 case IDR5_OAS_36_BIT:
4296 smmu->oas = 36;
4297 break;
4298 case IDR5_OAS_40_BIT:
4299 smmu->oas = 40;
4300 break;
4301 case IDR5_OAS_42_BIT:
4302 smmu->oas = 42;
4303 break;
4304 case IDR5_OAS_44_BIT:
4305 smmu->oas = 44;
4306 break;
4307 case IDR5_OAS_52_BIT:
4308 smmu->oas = 52;
4309 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
4310 break;
4311 default:
4312 dev_info(smmu->dev,
4313 "unknown output address size. Truncating to 48-bit\n");
4314 fallthrough;
4315 case IDR5_OAS_48_BIT:
4316 smmu->oas = 48;
4317 }
4318
4319 if (arm_smmu_ops.pgsize_bitmap == -1UL)
4320 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
4321 else
4322 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
4323
4324 /* Set the DMA mask for our table walker */
4325 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
4326 dev_warn(smmu->dev,
4327 "failed to set DMA mask for table walker\n");
4328
4329 smmu->ias = max(smmu->ias, smmu->oas);
4330
4331 if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
4332 (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
4333 smmu->features |= ARM_SMMU_FEAT_NESTING;
4334
4335 arm_smmu_device_iidr_probe(smmu);
4336
4337 if (arm_smmu_sva_supported(smmu))
4338 smmu->features |= ARM_SMMU_FEAT_SVA;
4339
4340 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
4341 smmu->ias, smmu->oas, smmu->features);
4342 return 0;
4343 }
4344
4345 #ifdef CONFIG_ACPI
4346 #ifdef CONFIG_TEGRA241_CMDQV
acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node * node,struct arm_smmu_device * smmu)4347 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4348 struct arm_smmu_device *smmu)
4349 {
4350 const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
4351 struct acpi_device *adev;
4352
4353 /* Look for an NVDA200C node whose _UID matches the SMMU node ID */
4354 adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
4355 if (adev) {
4356 /* Tegra241 CMDQV driver is responsible for put_device() */
4357 smmu->impl_dev = &adev->dev;
4358 smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
4359 dev_info(smmu->dev, "found companion CMDQV device: %s\n",
4360 dev_name(smmu->impl_dev));
4361 }
4362 kfree(uid);
4363 }
4364 #else
acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node * node,struct arm_smmu_device * smmu)4365 static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
4366 struct arm_smmu_device *smmu)
4367 {
4368 }
4369 #endif
4370
acpi_smmu_iort_probe_model(struct acpi_iort_node * node,struct arm_smmu_device * smmu)4371 static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
4372 struct arm_smmu_device *smmu)
4373 {
4374 struct acpi_iort_smmu_v3 *iort_smmu =
4375 (struct acpi_iort_smmu_v3 *)node->node_data;
4376
4377 switch (iort_smmu->model) {
4378 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
4379 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
4380 break;
4381 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
4382 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
4383 break;
4384 case ACPI_IORT_SMMU_V3_GENERIC:
4385 /*
4386 * Tegra241 implementation stores its SMMU options and impl_dev
4387 * in DSDT. Thus, go through the ACPI tables unconditionally.
4388 */
4389 acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
4390 break;
4391 }
4392
4393 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
4394 return 0;
4395 }
4396
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)4397 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4398 struct arm_smmu_device *smmu)
4399 {
4400 struct acpi_iort_smmu_v3 *iort_smmu;
4401 struct device *dev = smmu->dev;
4402 struct acpi_iort_node *node;
4403
4404 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
4405
4406 /* Retrieve SMMUv3 specific data */
4407 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
4408
4409 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
4410 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4411
4412 switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
4413 case IDR0_HTTU_ACCESS_DIRTY:
4414 smmu->features |= ARM_SMMU_FEAT_HD;
4415 fallthrough;
4416 case IDR0_HTTU_ACCESS:
4417 smmu->features |= ARM_SMMU_FEAT_HA;
4418 }
4419
4420 return acpi_smmu_iort_probe_model(node, smmu);
4421 }
4422 #else
arm_smmu_device_acpi_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)4423 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
4424 struct arm_smmu_device *smmu)
4425 {
4426 return -ENODEV;
4427 }
4428 #endif
4429
arm_smmu_device_dt_probe(struct platform_device * pdev,struct arm_smmu_device * smmu)4430 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
4431 struct arm_smmu_device *smmu)
4432 {
4433 struct device *dev = &pdev->dev;
4434 u32 cells;
4435 int ret = -EINVAL;
4436
4437 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
4438 dev_err(dev, "missing #iommu-cells property\n");
4439 else if (cells != 1)
4440 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
4441 else
4442 ret = 0;
4443
4444 parse_driver_options(smmu);
4445
4446 if (of_dma_is_coherent(dev->of_node))
4447 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
4448
4449 return ret;
4450 }
4451
arm_smmu_resource_size(struct arm_smmu_device * smmu)4452 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
4453 {
4454 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
4455 return SZ_64K;
4456 else
4457 return SZ_128K;
4458 }
4459
arm_smmu_ioremap(struct device * dev,resource_size_t start,resource_size_t size)4460 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4461 resource_size_t size)
4462 {
4463 struct resource res = DEFINE_RES_MEM(start, size);
4464
4465 return devm_ioremap_resource(dev, &res);
4466 }
4467
arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device * smmu)4468 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
4469 {
4470 struct list_head rmr_list;
4471 struct iommu_resv_region *e;
4472
4473 INIT_LIST_HEAD(&rmr_list);
4474 iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4475
4476 list_for_each_entry(e, &rmr_list, list) {
4477 struct iommu_iort_rmr_data *rmr;
4478 int ret, i;
4479
4480 rmr = container_of(e, struct iommu_iort_rmr_data, rr);
4481 for (i = 0; i < rmr->num_sids; i++) {
4482 ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
4483 if (ret) {
4484 dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
4485 rmr->sids[i]);
4486 continue;
4487 }
4488
4489 /*
4490 * STE table is not programmed to HW, see
4491 * arm_smmu_initial_bypass_stes()
4492 */
4493 arm_smmu_make_bypass_ste(smmu,
4494 arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4495 }
4496 }
4497
4498 iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4499 }
4500
arm_smmu_impl_remove(void * data)4501 static void arm_smmu_impl_remove(void *data)
4502 {
4503 struct arm_smmu_device *smmu = data;
4504
4505 if (smmu->impl_ops && smmu->impl_ops->device_remove)
4506 smmu->impl_ops->device_remove(smmu);
4507 }
4508
4509 /*
4510 * Probe all the compiled in implementations. Each one checks to see if it
4511 * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
4512 * replaces the callers. Otherwise the original is returned or ERR_PTR.
4513 */
arm_smmu_impl_probe(struct arm_smmu_device * smmu)4514 static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
4515 {
4516 struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
4517 int ret;
4518
4519 if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
4520 new_smmu = tegra241_cmdqv_probe(smmu);
4521
4522 if (new_smmu == ERR_PTR(-ENODEV))
4523 return smmu;
4524 if (IS_ERR(new_smmu))
4525 return new_smmu;
4526
4527 ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
4528 new_smmu);
4529 if (ret)
4530 return ERR_PTR(ret);
4531 return new_smmu;
4532 }
4533
arm_smmu_device_probe(struct platform_device * pdev)4534 static int arm_smmu_device_probe(struct platform_device *pdev)
4535 {
4536 int irq, ret;
4537 struct resource *res;
4538 resource_size_t ioaddr;
4539 struct arm_smmu_device *smmu;
4540 struct device *dev = &pdev->dev;
4541
4542 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4543 if (!smmu)
4544 return -ENOMEM;
4545 smmu->dev = dev;
4546
4547 if (dev->of_node) {
4548 ret = arm_smmu_device_dt_probe(pdev, smmu);
4549 } else {
4550 ret = arm_smmu_device_acpi_probe(pdev, smmu);
4551 }
4552 if (ret)
4553 return ret;
4554
4555 smmu = arm_smmu_impl_probe(smmu);
4556 if (IS_ERR(smmu))
4557 return PTR_ERR(smmu);
4558
4559 /* Base address */
4560 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4561 if (!res)
4562 return -EINVAL;
4563 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4564 dev_err(dev, "MMIO region too small (%pr)\n", res);
4565 return -EINVAL;
4566 }
4567 ioaddr = res->start;
4568
4569 /*
4570 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4571 * the PMCG registers which are reserved by the PMU driver.
4572 */
4573 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4574 if (IS_ERR(smmu->base))
4575 return PTR_ERR(smmu->base);
4576
4577 if (arm_smmu_resource_size(smmu) > SZ_64K) {
4578 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4579 ARM_SMMU_REG_SZ);
4580 if (IS_ERR(smmu->page1))
4581 return PTR_ERR(smmu->page1);
4582 } else {
4583 smmu->page1 = smmu->base;
4584 }
4585
4586 /* Interrupt lines */
4587
4588 irq = platform_get_irq_byname_optional(pdev, "combined");
4589 if (irq > 0)
4590 smmu->combined_irq = irq;
4591 else {
4592 irq = platform_get_irq_byname_optional(pdev, "eventq");
4593 if (irq > 0)
4594 smmu->evtq.q.irq = irq;
4595
4596 irq = platform_get_irq_byname_optional(pdev, "priq");
4597 if (irq > 0)
4598 smmu->priq.q.irq = irq;
4599
4600 irq = platform_get_irq_byname_optional(pdev, "gerror");
4601 if (irq > 0)
4602 smmu->gerr_irq = irq;
4603 }
4604 /* Probe the h/w */
4605 ret = arm_smmu_device_hw_probe(smmu);
4606 if (ret)
4607 return ret;
4608
4609 /* Initialise in-memory data structures */
4610 ret = arm_smmu_init_structures(smmu);
4611 if (ret)
4612 return ret;
4613
4614 /* Record our private device structure */
4615 platform_set_drvdata(pdev, smmu);
4616
4617 /* Check for RMRs and install bypass STEs if any */
4618 arm_smmu_rmr_install_bypass_ste(smmu);
4619
4620 /* Reset the device */
4621 ret = arm_smmu_device_reset(smmu);
4622 if (ret)
4623 return ret;
4624
4625 /* And we're up. Go go go! */
4626 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4627 "smmu3.%pa", &ioaddr);
4628 if (ret)
4629 return ret;
4630
4631 ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4632 if (ret) {
4633 dev_err(dev, "Failed to register iommu\n");
4634 iommu_device_sysfs_remove(&smmu->iommu);
4635 return ret;
4636 }
4637
4638 return 0;
4639 }
4640
arm_smmu_device_remove(struct platform_device * pdev)4641 static void arm_smmu_device_remove(struct platform_device *pdev)
4642 {
4643 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4644
4645 iommu_device_unregister(&smmu->iommu);
4646 iommu_device_sysfs_remove(&smmu->iommu);
4647 arm_smmu_device_disable(smmu);
4648 iopf_queue_free(smmu->evtq.iopf);
4649 ida_destroy(&smmu->vmid_map);
4650 }
4651
arm_smmu_device_shutdown(struct platform_device * pdev)4652 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4653 {
4654 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4655
4656 arm_smmu_device_disable(smmu);
4657 }
4658
4659 static const struct of_device_id arm_smmu_of_match[] = {
4660 { .compatible = "arm,smmu-v3", },
4661 { },
4662 };
4663 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4664
arm_smmu_driver_unregister(struct platform_driver * drv)4665 static void arm_smmu_driver_unregister(struct platform_driver *drv)
4666 {
4667 arm_smmu_sva_notifier_synchronize();
4668 platform_driver_unregister(drv);
4669 }
4670
4671 static struct platform_driver arm_smmu_driver = {
4672 .driver = {
4673 .name = "arm-smmu-v3",
4674 .of_match_table = arm_smmu_of_match,
4675 .suppress_bind_attrs = true,
4676 },
4677 .probe = arm_smmu_device_probe,
4678 .remove_new = arm_smmu_device_remove,
4679 .shutdown = arm_smmu_device_shutdown,
4680 };
4681 module_driver(arm_smmu_driver, platform_driver_register,
4682 arm_smmu_driver_unregister);
4683
4684 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4685 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4686 MODULE_ALIAS("platform:arm-smmu-v3");
4687 MODULE_LICENSE("GPL v2");
4688