1e86d1aa8SWill Deacon // SPDX-License-Identifier: GPL-2.0 2e86d1aa8SWill Deacon /* 3e86d1aa8SWill Deacon * IOMMU API for ARM architected SMMUv3 implementations. 4e86d1aa8SWill Deacon * 5e86d1aa8SWill Deacon * Copyright (C) 2015 ARM Limited 6e86d1aa8SWill Deacon * 7e86d1aa8SWill Deacon * Author: Will Deacon <will.deacon@arm.com> 8e86d1aa8SWill Deacon * 9e86d1aa8SWill Deacon * This driver is powered by bad coffee and bombay mix. 10e86d1aa8SWill Deacon */ 11e86d1aa8SWill Deacon 12e86d1aa8SWill Deacon #include <linux/acpi.h> 13e86d1aa8SWill Deacon #include <linux/acpi_iort.h> 14e86d1aa8SWill Deacon #include <linux/bitops.h> 15e86d1aa8SWill Deacon #include <linux/crash_dump.h> 16e86d1aa8SWill Deacon #include <linux/delay.h> 17e86d1aa8SWill Deacon #include <linux/dma-iommu.h> 18e86d1aa8SWill Deacon #include <linux/err.h> 19e86d1aa8SWill Deacon #include <linux/interrupt.h> 20e86d1aa8SWill Deacon #include <linux/io-pgtable.h> 21e86d1aa8SWill Deacon #include <linux/iopoll.h> 22e86d1aa8SWill Deacon #include <linux/module.h> 23e86d1aa8SWill Deacon #include <linux/msi.h> 24e86d1aa8SWill Deacon #include <linux/of.h> 25e86d1aa8SWill Deacon #include <linux/of_address.h> 26e86d1aa8SWill Deacon #include <linux/of_iommu.h> 27e86d1aa8SWill Deacon #include <linux/of_platform.h> 28e86d1aa8SWill Deacon #include <linux/pci.h> 29e86d1aa8SWill Deacon #include <linux/pci-ats.h> 30e86d1aa8SWill Deacon #include <linux/platform_device.h> 31e86d1aa8SWill Deacon 32e86d1aa8SWill Deacon #include <linux/amba/bus.h> 33e86d1aa8SWill Deacon 34*e881e783SJean-Philippe Brucker #include "arm-smmu-v3.h" 35e86d1aa8SWill Deacon 36e86d1aa8SWill Deacon static bool disable_bypass = 1; 379305d02aSBarry Song module_param(disable_bypass, bool, 0444); 38e86d1aa8SWill Deacon MODULE_PARM_DESC(disable_bypass, 39e86d1aa8SWill Deacon "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU."); 40e86d1aa8SWill Deacon 41bd07a20aSBarry Song static bool disable_msipolling; 42bd07a20aSBarry Song module_param(disable_msipolling, bool, 0444); 43bd07a20aSBarry Song MODULE_PARM_DESC(disable_msipolling, 44bd07a20aSBarry Song "Disable MSI-based polling for CMD_SYNC completion."); 45bd07a20aSBarry Song 46e86d1aa8SWill Deacon enum arm_smmu_msi_index { 47e86d1aa8SWill Deacon EVTQ_MSI_INDEX, 48e86d1aa8SWill Deacon GERROR_MSI_INDEX, 49e86d1aa8SWill Deacon PRIQ_MSI_INDEX, 50e86d1aa8SWill Deacon ARM_SMMU_MAX_MSIS, 51e86d1aa8SWill Deacon }; 52e86d1aa8SWill Deacon 53e86d1aa8SWill Deacon static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { 54e86d1aa8SWill Deacon [EVTQ_MSI_INDEX] = { 55e86d1aa8SWill Deacon ARM_SMMU_EVTQ_IRQ_CFG0, 56e86d1aa8SWill Deacon ARM_SMMU_EVTQ_IRQ_CFG1, 57e86d1aa8SWill Deacon ARM_SMMU_EVTQ_IRQ_CFG2, 58e86d1aa8SWill Deacon }, 59e86d1aa8SWill Deacon [GERROR_MSI_INDEX] = { 60e86d1aa8SWill Deacon ARM_SMMU_GERROR_IRQ_CFG0, 61e86d1aa8SWill Deacon ARM_SMMU_GERROR_IRQ_CFG1, 62e86d1aa8SWill Deacon ARM_SMMU_GERROR_IRQ_CFG2, 63e86d1aa8SWill Deacon }, 64e86d1aa8SWill Deacon [PRIQ_MSI_INDEX] = { 65e86d1aa8SWill Deacon ARM_SMMU_PRIQ_IRQ_CFG0, 66e86d1aa8SWill Deacon ARM_SMMU_PRIQ_IRQ_CFG1, 67e86d1aa8SWill Deacon ARM_SMMU_PRIQ_IRQ_CFG2, 68e86d1aa8SWill Deacon }, 69e86d1aa8SWill Deacon }; 70e86d1aa8SWill Deacon 71e86d1aa8SWill Deacon struct arm_smmu_option_prop { 72e86d1aa8SWill Deacon u32 opt; 73e86d1aa8SWill Deacon const char *prop; 74e86d1aa8SWill Deacon }; 75e86d1aa8SWill Deacon 76e86d1aa8SWill Deacon static DEFINE_XARRAY_ALLOC1(asid_xa); 77e86d1aa8SWill Deacon 78e86d1aa8SWill Deacon static struct arm_smmu_option_prop arm_smmu_options[] = { 79e86d1aa8SWill Deacon { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, 80e86d1aa8SWill Deacon { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"}, 81e86d1aa8SWill Deacon { 0, NULL}, 82e86d1aa8SWill Deacon }; 83e86d1aa8SWill Deacon 84e86d1aa8SWill Deacon static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset, 85e86d1aa8SWill Deacon struct arm_smmu_device *smmu) 86e86d1aa8SWill Deacon { 87e86d1aa8SWill Deacon if (offset > SZ_64K) 88e86d1aa8SWill Deacon return smmu->page1 + offset - SZ_64K; 89e86d1aa8SWill Deacon 90e86d1aa8SWill Deacon return smmu->base + offset; 91e86d1aa8SWill Deacon } 92e86d1aa8SWill Deacon 93e86d1aa8SWill Deacon static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) 94e86d1aa8SWill Deacon { 95e86d1aa8SWill Deacon return container_of(dom, struct arm_smmu_domain, domain); 96e86d1aa8SWill Deacon } 97e86d1aa8SWill Deacon 98e86d1aa8SWill Deacon static void parse_driver_options(struct arm_smmu_device *smmu) 99e86d1aa8SWill Deacon { 100e86d1aa8SWill Deacon int i = 0; 101e86d1aa8SWill Deacon 102e86d1aa8SWill Deacon do { 103e86d1aa8SWill Deacon if (of_property_read_bool(smmu->dev->of_node, 104e86d1aa8SWill Deacon arm_smmu_options[i].prop)) { 105e86d1aa8SWill Deacon smmu->options |= arm_smmu_options[i].opt; 106e86d1aa8SWill Deacon dev_notice(smmu->dev, "option %s\n", 107e86d1aa8SWill Deacon arm_smmu_options[i].prop); 108e86d1aa8SWill Deacon } 109e86d1aa8SWill Deacon } while (arm_smmu_options[++i].opt); 110e86d1aa8SWill Deacon } 111e86d1aa8SWill Deacon 112e86d1aa8SWill Deacon /* Low-level queue manipulation functions */ 113e86d1aa8SWill Deacon static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n) 114e86d1aa8SWill Deacon { 115e86d1aa8SWill Deacon u32 space, prod, cons; 116e86d1aa8SWill Deacon 117e86d1aa8SWill Deacon prod = Q_IDX(q, q->prod); 118e86d1aa8SWill Deacon cons = Q_IDX(q, q->cons); 119e86d1aa8SWill Deacon 120e86d1aa8SWill Deacon if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons)) 121e86d1aa8SWill Deacon space = (1 << q->max_n_shift) - (prod - cons); 122e86d1aa8SWill Deacon else 123e86d1aa8SWill Deacon space = cons - prod; 124e86d1aa8SWill Deacon 125e86d1aa8SWill Deacon return space >= n; 126e86d1aa8SWill Deacon } 127e86d1aa8SWill Deacon 128e86d1aa8SWill Deacon static bool queue_full(struct arm_smmu_ll_queue *q) 129e86d1aa8SWill Deacon { 130e86d1aa8SWill Deacon return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 131e86d1aa8SWill Deacon Q_WRP(q, q->prod) != Q_WRP(q, q->cons); 132e86d1aa8SWill Deacon } 133e86d1aa8SWill Deacon 134e86d1aa8SWill Deacon static bool queue_empty(struct arm_smmu_ll_queue *q) 135e86d1aa8SWill Deacon { 136e86d1aa8SWill Deacon return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) && 137e86d1aa8SWill Deacon Q_WRP(q, q->prod) == Q_WRP(q, q->cons); 138e86d1aa8SWill Deacon } 139e86d1aa8SWill Deacon 140e86d1aa8SWill Deacon static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod) 141e86d1aa8SWill Deacon { 142e86d1aa8SWill Deacon return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) && 143e86d1aa8SWill Deacon (Q_IDX(q, q->cons) > Q_IDX(q, prod))) || 144e86d1aa8SWill Deacon ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) && 145e86d1aa8SWill Deacon (Q_IDX(q, q->cons) <= Q_IDX(q, prod))); 146e86d1aa8SWill Deacon } 147e86d1aa8SWill Deacon 148e86d1aa8SWill Deacon static void queue_sync_cons_out(struct arm_smmu_queue *q) 149e86d1aa8SWill Deacon { 150e86d1aa8SWill Deacon /* 151e86d1aa8SWill Deacon * Ensure that all CPU accesses (reads and writes) to the queue 152e86d1aa8SWill Deacon * are complete before we update the cons pointer. 153e86d1aa8SWill Deacon */ 154a76a3777SZhou Wang __iomb(); 155e86d1aa8SWill Deacon writel_relaxed(q->llq.cons, q->cons_reg); 156e86d1aa8SWill Deacon } 157e86d1aa8SWill Deacon 158e86d1aa8SWill Deacon static void queue_inc_cons(struct arm_smmu_ll_queue *q) 159e86d1aa8SWill Deacon { 160e86d1aa8SWill Deacon u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1; 161e86d1aa8SWill Deacon q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons); 162e86d1aa8SWill Deacon } 163e86d1aa8SWill Deacon 164e86d1aa8SWill Deacon static int queue_sync_prod_in(struct arm_smmu_queue *q) 165e86d1aa8SWill Deacon { 166a76a3777SZhou Wang u32 prod; 167e86d1aa8SWill Deacon int ret = 0; 168a76a3777SZhou Wang 169a76a3777SZhou Wang /* 170a76a3777SZhou Wang * We can't use the _relaxed() variant here, as we must prevent 171a76a3777SZhou Wang * speculative reads of the queue before we have determined that 172a76a3777SZhou Wang * prod has indeed moved. 173a76a3777SZhou Wang */ 174a76a3777SZhou Wang prod = readl(q->prod_reg); 175e86d1aa8SWill Deacon 176e86d1aa8SWill Deacon if (Q_OVF(prod) != Q_OVF(q->llq.prod)) 177e86d1aa8SWill Deacon ret = -EOVERFLOW; 178e86d1aa8SWill Deacon 179e86d1aa8SWill Deacon q->llq.prod = prod; 180e86d1aa8SWill Deacon return ret; 181e86d1aa8SWill Deacon } 182e86d1aa8SWill Deacon 183e86d1aa8SWill Deacon static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n) 184e86d1aa8SWill Deacon { 185e86d1aa8SWill Deacon u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n; 186e86d1aa8SWill Deacon return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod); 187e86d1aa8SWill Deacon } 188e86d1aa8SWill Deacon 189e86d1aa8SWill Deacon static void queue_poll_init(struct arm_smmu_device *smmu, 190e86d1aa8SWill Deacon struct arm_smmu_queue_poll *qp) 191e86d1aa8SWill Deacon { 192e86d1aa8SWill Deacon qp->delay = 1; 193e86d1aa8SWill Deacon qp->spin_cnt = 0; 194e86d1aa8SWill Deacon qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); 195e86d1aa8SWill Deacon qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US); 196e86d1aa8SWill Deacon } 197e86d1aa8SWill Deacon 198e86d1aa8SWill Deacon static int queue_poll(struct arm_smmu_queue_poll *qp) 199e86d1aa8SWill Deacon { 200e86d1aa8SWill Deacon if (ktime_compare(ktime_get(), qp->timeout) > 0) 201e86d1aa8SWill Deacon return -ETIMEDOUT; 202e86d1aa8SWill Deacon 203e86d1aa8SWill Deacon if (qp->wfe) { 204e86d1aa8SWill Deacon wfe(); 205e86d1aa8SWill Deacon } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) { 206e86d1aa8SWill Deacon cpu_relax(); 207e86d1aa8SWill Deacon } else { 208e86d1aa8SWill Deacon udelay(qp->delay); 209e86d1aa8SWill Deacon qp->delay *= 2; 210e86d1aa8SWill Deacon qp->spin_cnt = 0; 211e86d1aa8SWill Deacon } 212e86d1aa8SWill Deacon 213e86d1aa8SWill Deacon return 0; 214e86d1aa8SWill Deacon } 215e86d1aa8SWill Deacon 216e86d1aa8SWill Deacon static void queue_write(__le64 *dst, u64 *src, size_t n_dwords) 217e86d1aa8SWill Deacon { 218e86d1aa8SWill Deacon int i; 219e86d1aa8SWill Deacon 220e86d1aa8SWill Deacon for (i = 0; i < n_dwords; ++i) 221e86d1aa8SWill Deacon *dst++ = cpu_to_le64(*src++); 222e86d1aa8SWill Deacon } 223e86d1aa8SWill Deacon 224376cdf66SJean-Philippe Brucker static void queue_read(u64 *dst, __le64 *src, size_t n_dwords) 225e86d1aa8SWill Deacon { 226e86d1aa8SWill Deacon int i; 227e86d1aa8SWill Deacon 228e86d1aa8SWill Deacon for (i = 0; i < n_dwords; ++i) 229e86d1aa8SWill Deacon *dst++ = le64_to_cpu(*src++); 230e86d1aa8SWill Deacon } 231e86d1aa8SWill Deacon 232e86d1aa8SWill Deacon static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) 233e86d1aa8SWill Deacon { 234e86d1aa8SWill Deacon if (queue_empty(&q->llq)) 235e86d1aa8SWill Deacon return -EAGAIN; 236e86d1aa8SWill Deacon 237e86d1aa8SWill Deacon queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords); 238e86d1aa8SWill Deacon queue_inc_cons(&q->llq); 239e86d1aa8SWill Deacon queue_sync_cons_out(q); 240e86d1aa8SWill Deacon return 0; 241e86d1aa8SWill Deacon } 242e86d1aa8SWill Deacon 243e86d1aa8SWill Deacon /* High-level queue accessors */ 244e86d1aa8SWill Deacon static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) 245e86d1aa8SWill Deacon { 246e86d1aa8SWill Deacon memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT); 247e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode); 248e86d1aa8SWill Deacon 249e86d1aa8SWill Deacon switch (ent->opcode) { 250e86d1aa8SWill Deacon case CMDQ_OP_TLBI_EL2_ALL: 251e86d1aa8SWill Deacon case CMDQ_OP_TLBI_NSNH_ALL: 252e86d1aa8SWill Deacon break; 253e86d1aa8SWill Deacon case CMDQ_OP_PREFETCH_CFG: 254e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid); 255e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); 256e86d1aa8SWill Deacon cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; 257e86d1aa8SWill Deacon break; 258e86d1aa8SWill Deacon case CMDQ_OP_CFGI_CD: 259e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); 260df561f66SGustavo A. R. Silva fallthrough; 261e86d1aa8SWill Deacon case CMDQ_OP_CFGI_STE: 262e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 263e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); 264e86d1aa8SWill Deacon break; 265e86d1aa8SWill Deacon case CMDQ_OP_CFGI_CD_ALL: 266e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); 267e86d1aa8SWill Deacon break; 268e86d1aa8SWill Deacon case CMDQ_OP_CFGI_ALL: 269e86d1aa8SWill Deacon /* Cover the entire SID range */ 270e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); 271e86d1aa8SWill Deacon break; 272e86d1aa8SWill Deacon case CMDQ_OP_TLBI_NH_VA: 273e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 274e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 275e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 276e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 277e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 278e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 279e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 280e86d1aa8SWill Deacon cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; 281e86d1aa8SWill Deacon break; 282e86d1aa8SWill Deacon case CMDQ_OP_TLBI_S2_IPA: 283e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); 284e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); 285e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 286e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); 287e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); 288e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); 289e86d1aa8SWill Deacon cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; 290e86d1aa8SWill Deacon break; 291e86d1aa8SWill Deacon case CMDQ_OP_TLBI_NH_ASID: 292e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); 293df561f66SGustavo A. R. Silva fallthrough; 294e86d1aa8SWill Deacon case CMDQ_OP_TLBI_S12_VMALL: 295e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); 296e86d1aa8SWill Deacon break; 297e86d1aa8SWill Deacon case CMDQ_OP_ATC_INV: 298e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 299e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global); 300e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid); 301e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid); 302e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size); 303e86d1aa8SWill Deacon cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK; 304e86d1aa8SWill Deacon break; 305e86d1aa8SWill Deacon case CMDQ_OP_PRI_RESP: 306e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid); 307e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid); 308e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid); 309e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid); 310e86d1aa8SWill Deacon switch (ent->pri.resp) { 311e86d1aa8SWill Deacon case PRI_RESP_DENY: 312e86d1aa8SWill Deacon case PRI_RESP_FAIL: 313e86d1aa8SWill Deacon case PRI_RESP_SUCC: 314e86d1aa8SWill Deacon break; 315e86d1aa8SWill Deacon default: 316e86d1aa8SWill Deacon return -EINVAL; 317e86d1aa8SWill Deacon } 318e86d1aa8SWill Deacon cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp); 319e86d1aa8SWill Deacon break; 320e86d1aa8SWill Deacon case CMDQ_OP_CMD_SYNC: 321e86d1aa8SWill Deacon if (ent->sync.msiaddr) { 322e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ); 323e86d1aa8SWill Deacon cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; 324e86d1aa8SWill Deacon } else { 325e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV); 326e86d1aa8SWill Deacon } 327e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH); 328e86d1aa8SWill Deacon cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB); 329e86d1aa8SWill Deacon break; 330e86d1aa8SWill Deacon default: 331e86d1aa8SWill Deacon return -ENOENT; 332e86d1aa8SWill Deacon } 333e86d1aa8SWill Deacon 334e86d1aa8SWill Deacon return 0; 335e86d1aa8SWill Deacon } 336e86d1aa8SWill Deacon 337e86d1aa8SWill Deacon static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, 338e86d1aa8SWill Deacon u32 prod) 339e86d1aa8SWill Deacon { 340e86d1aa8SWill Deacon struct arm_smmu_queue *q = &smmu->cmdq.q; 341e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent ent = { 342e86d1aa8SWill Deacon .opcode = CMDQ_OP_CMD_SYNC, 343e86d1aa8SWill Deacon }; 344e86d1aa8SWill Deacon 345e86d1aa8SWill Deacon /* 346e86d1aa8SWill Deacon * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI 347e86d1aa8SWill Deacon * payload, so the write will zero the entire command on that platform. 348e86d1aa8SWill Deacon */ 349bd07a20aSBarry Song if (smmu->options & ARM_SMMU_OPT_MSIPOLL) { 350e86d1aa8SWill Deacon ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) * 351e86d1aa8SWill Deacon q->ent_dwords * 8; 352e86d1aa8SWill Deacon } 353e86d1aa8SWill Deacon 354e86d1aa8SWill Deacon arm_smmu_cmdq_build_cmd(cmd, &ent); 355e86d1aa8SWill Deacon } 356e86d1aa8SWill Deacon 357e86d1aa8SWill Deacon static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) 358e86d1aa8SWill Deacon { 359e86d1aa8SWill Deacon static const char *cerror_str[] = { 360e86d1aa8SWill Deacon [CMDQ_ERR_CERROR_NONE_IDX] = "No error", 361e86d1aa8SWill Deacon [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command", 362e86d1aa8SWill Deacon [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", 363e86d1aa8SWill Deacon [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", 364e86d1aa8SWill Deacon }; 365e86d1aa8SWill Deacon 366e86d1aa8SWill Deacon int i; 367e86d1aa8SWill Deacon u64 cmd[CMDQ_ENT_DWORDS]; 368e86d1aa8SWill Deacon struct arm_smmu_queue *q = &smmu->cmdq.q; 369e86d1aa8SWill Deacon u32 cons = readl_relaxed(q->cons_reg); 370e86d1aa8SWill Deacon u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); 371e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd_sync = { 372e86d1aa8SWill Deacon .opcode = CMDQ_OP_CMD_SYNC, 373e86d1aa8SWill Deacon }; 374e86d1aa8SWill Deacon 375e86d1aa8SWill Deacon dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, 376e86d1aa8SWill Deacon idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); 377e86d1aa8SWill Deacon 378e86d1aa8SWill Deacon switch (idx) { 379e86d1aa8SWill Deacon case CMDQ_ERR_CERROR_ABT_IDX: 380e86d1aa8SWill Deacon dev_err(smmu->dev, "retrying command fetch\n"); 381e86d1aa8SWill Deacon case CMDQ_ERR_CERROR_NONE_IDX: 382e86d1aa8SWill Deacon return; 383e86d1aa8SWill Deacon case CMDQ_ERR_CERROR_ATC_INV_IDX: 384e86d1aa8SWill Deacon /* 385e86d1aa8SWill Deacon * ATC Invalidation Completion timeout. CONS is still pointing 386e86d1aa8SWill Deacon * at the CMD_SYNC. Attempt to complete other pending commands 387e86d1aa8SWill Deacon * by repeating the CMD_SYNC, though we might well end up back 388e86d1aa8SWill Deacon * here since the ATC invalidation may still be pending. 389e86d1aa8SWill Deacon */ 390e86d1aa8SWill Deacon return; 391e86d1aa8SWill Deacon case CMDQ_ERR_CERROR_ILL_IDX: 392e86d1aa8SWill Deacon default: 393e86d1aa8SWill Deacon break; 394e86d1aa8SWill Deacon } 395e86d1aa8SWill Deacon 396e86d1aa8SWill Deacon /* 397e86d1aa8SWill Deacon * We may have concurrent producers, so we need to be careful 398e86d1aa8SWill Deacon * not to touch any of the shadow cmdq state. 399e86d1aa8SWill Deacon */ 400e86d1aa8SWill Deacon queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); 401e86d1aa8SWill Deacon dev_err(smmu->dev, "skipping command in error state:\n"); 402e86d1aa8SWill Deacon for (i = 0; i < ARRAY_SIZE(cmd); ++i) 403e86d1aa8SWill Deacon dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); 404e86d1aa8SWill Deacon 405e86d1aa8SWill Deacon /* Convert the erroneous command into a CMD_SYNC */ 406e86d1aa8SWill Deacon if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) { 407e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to convert to CMD_SYNC\n"); 408e86d1aa8SWill Deacon return; 409e86d1aa8SWill Deacon } 410e86d1aa8SWill Deacon 411e86d1aa8SWill Deacon queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); 412e86d1aa8SWill Deacon } 413e86d1aa8SWill Deacon 414e86d1aa8SWill Deacon /* 415e86d1aa8SWill Deacon * Command queue locking. 416e86d1aa8SWill Deacon * This is a form of bastardised rwlock with the following major changes: 417e86d1aa8SWill Deacon * 418e86d1aa8SWill Deacon * - The only LOCK routines are exclusive_trylock() and shared_lock(). 419e86d1aa8SWill Deacon * Neither have barrier semantics, and instead provide only a control 420e86d1aa8SWill Deacon * dependency. 421e86d1aa8SWill Deacon * 422e86d1aa8SWill Deacon * - The UNLOCK routines are supplemented with shared_tryunlock(), which 423e86d1aa8SWill Deacon * fails if the caller appears to be the last lock holder (yes, this is 424e86d1aa8SWill Deacon * racy). All successful UNLOCK routines have RELEASE semantics. 425e86d1aa8SWill Deacon */ 426e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq) 427e86d1aa8SWill Deacon { 428e86d1aa8SWill Deacon int val; 429e86d1aa8SWill Deacon 430e86d1aa8SWill Deacon /* 431e86d1aa8SWill Deacon * We can try to avoid the cmpxchg() loop by simply incrementing the 432e86d1aa8SWill Deacon * lock counter. When held in exclusive state, the lock counter is set 433e86d1aa8SWill Deacon * to INT_MIN so these increments won't hurt as the value will remain 434e86d1aa8SWill Deacon * negative. 435e86d1aa8SWill Deacon */ 436e86d1aa8SWill Deacon if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0) 437e86d1aa8SWill Deacon return; 438e86d1aa8SWill Deacon 439e86d1aa8SWill Deacon do { 440e86d1aa8SWill Deacon val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0); 441e86d1aa8SWill Deacon } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val); 442e86d1aa8SWill Deacon } 443e86d1aa8SWill Deacon 444e86d1aa8SWill Deacon static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq) 445e86d1aa8SWill Deacon { 446e86d1aa8SWill Deacon (void)atomic_dec_return_release(&cmdq->lock); 447e86d1aa8SWill Deacon } 448e86d1aa8SWill Deacon 449e86d1aa8SWill Deacon static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq) 450e86d1aa8SWill Deacon { 451e86d1aa8SWill Deacon if (atomic_read(&cmdq->lock) == 1) 452e86d1aa8SWill Deacon return false; 453e86d1aa8SWill Deacon 454e86d1aa8SWill Deacon arm_smmu_cmdq_shared_unlock(cmdq); 455e86d1aa8SWill Deacon return true; 456e86d1aa8SWill Deacon } 457e86d1aa8SWill Deacon 458e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \ 459e86d1aa8SWill Deacon ({ \ 460e86d1aa8SWill Deacon bool __ret; \ 461e86d1aa8SWill Deacon local_irq_save(flags); \ 462e86d1aa8SWill Deacon __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \ 463e86d1aa8SWill Deacon if (!__ret) \ 464e86d1aa8SWill Deacon local_irq_restore(flags); \ 465e86d1aa8SWill Deacon __ret; \ 466e86d1aa8SWill Deacon }) 467e86d1aa8SWill Deacon 468e86d1aa8SWill Deacon #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \ 469e86d1aa8SWill Deacon ({ \ 470e86d1aa8SWill Deacon atomic_set_release(&cmdq->lock, 0); \ 471e86d1aa8SWill Deacon local_irq_restore(flags); \ 472e86d1aa8SWill Deacon }) 473e86d1aa8SWill Deacon 474e86d1aa8SWill Deacon 475e86d1aa8SWill Deacon /* 476e86d1aa8SWill Deacon * Command queue insertion. 477e86d1aa8SWill Deacon * This is made fiddly by our attempts to achieve some sort of scalability 478e86d1aa8SWill Deacon * since there is one queue shared amongst all of the CPUs in the system. If 479e86d1aa8SWill Deacon * you like mixed-size concurrency, dependency ordering and relaxed atomics, 480e86d1aa8SWill Deacon * then you'll *love* this monstrosity. 481e86d1aa8SWill Deacon * 482e86d1aa8SWill Deacon * The basic idea is to split the queue up into ranges of commands that are 483e86d1aa8SWill Deacon * owned by a given CPU; the owner may not have written all of the commands 484e86d1aa8SWill Deacon * itself, but is responsible for advancing the hardware prod pointer when 485e86d1aa8SWill Deacon * the time comes. The algorithm is roughly: 486e86d1aa8SWill Deacon * 487e86d1aa8SWill Deacon * 1. Allocate some space in the queue. At this point we also discover 488e86d1aa8SWill Deacon * whether the head of the queue is currently owned by another CPU, 489e86d1aa8SWill Deacon * or whether we are the owner. 490e86d1aa8SWill Deacon * 491e86d1aa8SWill Deacon * 2. Write our commands into our allocated slots in the queue. 492e86d1aa8SWill Deacon * 493e86d1aa8SWill Deacon * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map. 494e86d1aa8SWill Deacon * 495e86d1aa8SWill Deacon * 4. If we are an owner: 496e86d1aa8SWill Deacon * a. Wait for the previous owner to finish. 497e86d1aa8SWill Deacon * b. Mark the queue head as unowned, which tells us the range 498e86d1aa8SWill Deacon * that we are responsible for publishing. 499e86d1aa8SWill Deacon * c. Wait for all commands in our owned range to become valid. 500e86d1aa8SWill Deacon * d. Advance the hardware prod pointer. 501e86d1aa8SWill Deacon * e. Tell the next owner we've finished. 502e86d1aa8SWill Deacon * 503e86d1aa8SWill Deacon * 5. If we are inserting a CMD_SYNC (we may or may not have been an 504e86d1aa8SWill Deacon * owner), then we need to stick around until it has completed: 505e86d1aa8SWill Deacon * a. If we have MSIs, the SMMU can write back into the CMD_SYNC 506e86d1aa8SWill Deacon * to clear the first 4 bytes. 507e86d1aa8SWill Deacon * b. Otherwise, we spin waiting for the hardware cons pointer to 508e86d1aa8SWill Deacon * advance past our command. 509e86d1aa8SWill Deacon * 510e86d1aa8SWill Deacon * The devil is in the details, particularly the use of locking for handling 511e86d1aa8SWill Deacon * SYNC completion and freeing up space in the queue before we think that it is 512e86d1aa8SWill Deacon * full. 513e86d1aa8SWill Deacon */ 514e86d1aa8SWill Deacon static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq, 515e86d1aa8SWill Deacon u32 sprod, u32 eprod, bool set) 516e86d1aa8SWill Deacon { 517e86d1aa8SWill Deacon u32 swidx, sbidx, ewidx, ebidx; 518e86d1aa8SWill Deacon struct arm_smmu_ll_queue llq = { 519e86d1aa8SWill Deacon .max_n_shift = cmdq->q.llq.max_n_shift, 520e86d1aa8SWill Deacon .prod = sprod, 521e86d1aa8SWill Deacon }; 522e86d1aa8SWill Deacon 523e86d1aa8SWill Deacon ewidx = BIT_WORD(Q_IDX(&llq, eprod)); 524e86d1aa8SWill Deacon ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG; 525e86d1aa8SWill Deacon 526e86d1aa8SWill Deacon while (llq.prod != eprod) { 527e86d1aa8SWill Deacon unsigned long mask; 528e86d1aa8SWill Deacon atomic_long_t *ptr; 529e86d1aa8SWill Deacon u32 limit = BITS_PER_LONG; 530e86d1aa8SWill Deacon 531e86d1aa8SWill Deacon swidx = BIT_WORD(Q_IDX(&llq, llq.prod)); 532e86d1aa8SWill Deacon sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG; 533e86d1aa8SWill Deacon 534e86d1aa8SWill Deacon ptr = &cmdq->valid_map[swidx]; 535e86d1aa8SWill Deacon 536e86d1aa8SWill Deacon if ((swidx == ewidx) && (sbidx < ebidx)) 537e86d1aa8SWill Deacon limit = ebidx; 538e86d1aa8SWill Deacon 539e86d1aa8SWill Deacon mask = GENMASK(limit - 1, sbidx); 540e86d1aa8SWill Deacon 541e86d1aa8SWill Deacon /* 542e86d1aa8SWill Deacon * The valid bit is the inverse of the wrap bit. This means 543e86d1aa8SWill Deacon * that a zero-initialised queue is invalid and, after marking 544e86d1aa8SWill Deacon * all entries as valid, they become invalid again when we 545e86d1aa8SWill Deacon * wrap. 546e86d1aa8SWill Deacon */ 547e86d1aa8SWill Deacon if (set) { 548e86d1aa8SWill Deacon atomic_long_xor(mask, ptr); 549e86d1aa8SWill Deacon } else { /* Poll */ 550e86d1aa8SWill Deacon unsigned long valid; 551e86d1aa8SWill Deacon 552e86d1aa8SWill Deacon valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask; 553e86d1aa8SWill Deacon atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid); 554e86d1aa8SWill Deacon } 555e86d1aa8SWill Deacon 556e86d1aa8SWill Deacon llq.prod = queue_inc_prod_n(&llq, limit - sbidx); 557e86d1aa8SWill Deacon } 558e86d1aa8SWill Deacon } 559e86d1aa8SWill Deacon 560e86d1aa8SWill Deacon /* Mark all entries in the range [sprod, eprod) as valid */ 561e86d1aa8SWill Deacon static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq, 562e86d1aa8SWill Deacon u32 sprod, u32 eprod) 563e86d1aa8SWill Deacon { 564e86d1aa8SWill Deacon __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true); 565e86d1aa8SWill Deacon } 566e86d1aa8SWill Deacon 567e86d1aa8SWill Deacon /* Wait for all entries in the range [sprod, eprod) to become valid */ 568e86d1aa8SWill Deacon static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, 569e86d1aa8SWill Deacon u32 sprod, u32 eprod) 570e86d1aa8SWill Deacon { 571e86d1aa8SWill Deacon __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false); 572e86d1aa8SWill Deacon } 573e86d1aa8SWill Deacon 574e86d1aa8SWill Deacon /* Wait for the command queue to become non-full */ 575e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, 576e86d1aa8SWill Deacon struct arm_smmu_ll_queue *llq) 577e86d1aa8SWill Deacon { 578e86d1aa8SWill Deacon unsigned long flags; 579e86d1aa8SWill Deacon struct arm_smmu_queue_poll qp; 580e86d1aa8SWill Deacon struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 581e86d1aa8SWill Deacon int ret = 0; 582e86d1aa8SWill Deacon 583e86d1aa8SWill Deacon /* 584e86d1aa8SWill Deacon * Try to update our copy of cons by grabbing exclusive cmdq access. If 585e86d1aa8SWill Deacon * that fails, spin until somebody else updates it for us. 586e86d1aa8SWill Deacon */ 587e86d1aa8SWill Deacon if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) { 588e86d1aa8SWill Deacon WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg)); 589e86d1aa8SWill Deacon arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags); 590e86d1aa8SWill Deacon llq->val = READ_ONCE(cmdq->q.llq.val); 591e86d1aa8SWill Deacon return 0; 592e86d1aa8SWill Deacon } 593e86d1aa8SWill Deacon 594e86d1aa8SWill Deacon queue_poll_init(smmu, &qp); 595e86d1aa8SWill Deacon do { 596e86d1aa8SWill Deacon llq->val = READ_ONCE(smmu->cmdq.q.llq.val); 597e86d1aa8SWill Deacon if (!queue_full(llq)) 598e86d1aa8SWill Deacon break; 599e86d1aa8SWill Deacon 600e86d1aa8SWill Deacon ret = queue_poll(&qp); 601e86d1aa8SWill Deacon } while (!ret); 602e86d1aa8SWill Deacon 603e86d1aa8SWill Deacon return ret; 604e86d1aa8SWill Deacon } 605e86d1aa8SWill Deacon 606e86d1aa8SWill Deacon /* 607e86d1aa8SWill Deacon * Wait until the SMMU signals a CMD_SYNC completion MSI. 608e86d1aa8SWill Deacon * Must be called with the cmdq lock held in some capacity. 609e86d1aa8SWill Deacon */ 610e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, 611e86d1aa8SWill Deacon struct arm_smmu_ll_queue *llq) 612e86d1aa8SWill Deacon { 613e86d1aa8SWill Deacon int ret = 0; 614e86d1aa8SWill Deacon struct arm_smmu_queue_poll qp; 615e86d1aa8SWill Deacon struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 616e86d1aa8SWill Deacon u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); 617e86d1aa8SWill Deacon 618e86d1aa8SWill Deacon queue_poll_init(smmu, &qp); 619e86d1aa8SWill Deacon 620e86d1aa8SWill Deacon /* 621e86d1aa8SWill Deacon * The MSI won't generate an event, since it's being written back 622e86d1aa8SWill Deacon * into the command queue. 623e86d1aa8SWill Deacon */ 624e86d1aa8SWill Deacon qp.wfe = false; 625e86d1aa8SWill Deacon smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp))); 626e86d1aa8SWill Deacon llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1); 627e86d1aa8SWill Deacon return ret; 628e86d1aa8SWill Deacon } 629e86d1aa8SWill Deacon 630e86d1aa8SWill Deacon /* 631e86d1aa8SWill Deacon * Wait until the SMMU cons index passes llq->prod. 632e86d1aa8SWill Deacon * Must be called with the cmdq lock held in some capacity. 633e86d1aa8SWill Deacon */ 634e86d1aa8SWill Deacon static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, 635e86d1aa8SWill Deacon struct arm_smmu_ll_queue *llq) 636e86d1aa8SWill Deacon { 637e86d1aa8SWill Deacon struct arm_smmu_queue_poll qp; 638e86d1aa8SWill Deacon struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 639e86d1aa8SWill Deacon u32 prod = llq->prod; 640e86d1aa8SWill Deacon int ret = 0; 641e86d1aa8SWill Deacon 642e86d1aa8SWill Deacon queue_poll_init(smmu, &qp); 643e86d1aa8SWill Deacon llq->val = READ_ONCE(smmu->cmdq.q.llq.val); 644e86d1aa8SWill Deacon do { 645e86d1aa8SWill Deacon if (queue_consumed(llq, prod)) 646e86d1aa8SWill Deacon break; 647e86d1aa8SWill Deacon 648e86d1aa8SWill Deacon ret = queue_poll(&qp); 649e86d1aa8SWill Deacon 650e86d1aa8SWill Deacon /* 651e86d1aa8SWill Deacon * This needs to be a readl() so that our subsequent call 652e86d1aa8SWill Deacon * to arm_smmu_cmdq_shared_tryunlock() can fail accurately. 653e86d1aa8SWill Deacon * 654e86d1aa8SWill Deacon * Specifically, we need to ensure that we observe all 655e86d1aa8SWill Deacon * shared_lock()s by other CMD_SYNCs that share our owner, 656e86d1aa8SWill Deacon * so that a failing call to tryunlock() means that we're 657e86d1aa8SWill Deacon * the last one out and therefore we can safely advance 658e86d1aa8SWill Deacon * cmdq->q.llq.cons. Roughly speaking: 659e86d1aa8SWill Deacon * 660e86d1aa8SWill Deacon * CPU 0 CPU1 CPU2 (us) 661e86d1aa8SWill Deacon * 662e86d1aa8SWill Deacon * if (sync) 663e86d1aa8SWill Deacon * shared_lock(); 664e86d1aa8SWill Deacon * 665e86d1aa8SWill Deacon * dma_wmb(); 666e86d1aa8SWill Deacon * set_valid_map(); 667e86d1aa8SWill Deacon * 668e86d1aa8SWill Deacon * if (owner) { 669e86d1aa8SWill Deacon * poll_valid_map(); 670e86d1aa8SWill Deacon * <control dependency> 671e86d1aa8SWill Deacon * writel(prod_reg); 672e86d1aa8SWill Deacon * 673e86d1aa8SWill Deacon * readl(cons_reg); 674e86d1aa8SWill Deacon * tryunlock(); 675e86d1aa8SWill Deacon * 676e86d1aa8SWill Deacon * Requires us to see CPU 0's shared_lock() acquisition. 677e86d1aa8SWill Deacon */ 678e86d1aa8SWill Deacon llq->cons = readl(cmdq->q.cons_reg); 679e86d1aa8SWill Deacon } while (!ret); 680e86d1aa8SWill Deacon 681e86d1aa8SWill Deacon return ret; 682e86d1aa8SWill Deacon } 683e86d1aa8SWill Deacon 684e86d1aa8SWill Deacon static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, 685e86d1aa8SWill Deacon struct arm_smmu_ll_queue *llq) 686e86d1aa8SWill Deacon { 687bd07a20aSBarry Song if (smmu->options & ARM_SMMU_OPT_MSIPOLL) 688e86d1aa8SWill Deacon return __arm_smmu_cmdq_poll_until_msi(smmu, llq); 689e86d1aa8SWill Deacon 690e86d1aa8SWill Deacon return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); 691e86d1aa8SWill Deacon } 692e86d1aa8SWill Deacon 693e86d1aa8SWill Deacon static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, 694e86d1aa8SWill Deacon u32 prod, int n) 695e86d1aa8SWill Deacon { 696e86d1aa8SWill Deacon int i; 697e86d1aa8SWill Deacon struct arm_smmu_ll_queue llq = { 698e86d1aa8SWill Deacon .max_n_shift = cmdq->q.llq.max_n_shift, 699e86d1aa8SWill Deacon .prod = prod, 700e86d1aa8SWill Deacon }; 701e86d1aa8SWill Deacon 702e86d1aa8SWill Deacon for (i = 0; i < n; ++i) { 703e86d1aa8SWill Deacon u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS]; 704e86d1aa8SWill Deacon 705e86d1aa8SWill Deacon prod = queue_inc_prod_n(&llq, i); 706e86d1aa8SWill Deacon queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS); 707e86d1aa8SWill Deacon } 708e86d1aa8SWill Deacon } 709e86d1aa8SWill Deacon 710e86d1aa8SWill Deacon /* 711e86d1aa8SWill Deacon * This is the actual insertion function, and provides the following 712e86d1aa8SWill Deacon * ordering guarantees to callers: 713e86d1aa8SWill Deacon * 714e86d1aa8SWill Deacon * - There is a dma_wmb() before publishing any commands to the queue. 715e86d1aa8SWill Deacon * This can be relied upon to order prior writes to data structures 716e86d1aa8SWill Deacon * in memory (such as a CD or an STE) before the command. 717e86d1aa8SWill Deacon * 718e86d1aa8SWill Deacon * - On completion of a CMD_SYNC, there is a control dependency. 719e86d1aa8SWill Deacon * This can be relied upon to order subsequent writes to memory (e.g. 720e86d1aa8SWill Deacon * freeing an IOVA) after completion of the CMD_SYNC. 721e86d1aa8SWill Deacon * 722e86d1aa8SWill Deacon * - Command insertion is totally ordered, so if two CPUs each race to 723e86d1aa8SWill Deacon * insert their own list of commands then all of the commands from one 724e86d1aa8SWill Deacon * CPU will appear before any of the commands from the other CPU. 725e86d1aa8SWill Deacon */ 726e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, 727e86d1aa8SWill Deacon u64 *cmds, int n, bool sync) 728e86d1aa8SWill Deacon { 729e86d1aa8SWill Deacon u64 cmd_sync[CMDQ_ENT_DWORDS]; 730e86d1aa8SWill Deacon u32 prod; 731e86d1aa8SWill Deacon unsigned long flags; 732e86d1aa8SWill Deacon bool owner; 733e86d1aa8SWill Deacon struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 734e86d1aa8SWill Deacon struct arm_smmu_ll_queue llq = { 735e86d1aa8SWill Deacon .max_n_shift = cmdq->q.llq.max_n_shift, 736e86d1aa8SWill Deacon }, head = llq; 737e86d1aa8SWill Deacon int ret = 0; 738e86d1aa8SWill Deacon 739e86d1aa8SWill Deacon /* 1. Allocate some space in the queue */ 740e86d1aa8SWill Deacon local_irq_save(flags); 741e86d1aa8SWill Deacon llq.val = READ_ONCE(cmdq->q.llq.val); 742e86d1aa8SWill Deacon do { 743e86d1aa8SWill Deacon u64 old; 744e86d1aa8SWill Deacon 745e86d1aa8SWill Deacon while (!queue_has_space(&llq, n + sync)) { 746e86d1aa8SWill Deacon local_irq_restore(flags); 747e86d1aa8SWill Deacon if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) 748e86d1aa8SWill Deacon dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); 749e86d1aa8SWill Deacon local_irq_save(flags); 750e86d1aa8SWill Deacon } 751e86d1aa8SWill Deacon 752e86d1aa8SWill Deacon head.cons = llq.cons; 753e86d1aa8SWill Deacon head.prod = queue_inc_prod_n(&llq, n + sync) | 754e86d1aa8SWill Deacon CMDQ_PROD_OWNED_FLAG; 755e86d1aa8SWill Deacon 756e86d1aa8SWill Deacon old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); 757e86d1aa8SWill Deacon if (old == llq.val) 758e86d1aa8SWill Deacon break; 759e86d1aa8SWill Deacon 760e86d1aa8SWill Deacon llq.val = old; 761e86d1aa8SWill Deacon } while (1); 762e86d1aa8SWill Deacon owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG); 763e86d1aa8SWill Deacon head.prod &= ~CMDQ_PROD_OWNED_FLAG; 764e86d1aa8SWill Deacon llq.prod &= ~CMDQ_PROD_OWNED_FLAG; 765e86d1aa8SWill Deacon 766e86d1aa8SWill Deacon /* 767e86d1aa8SWill Deacon * 2. Write our commands into the queue 768e86d1aa8SWill Deacon * Dependency ordering from the cmpxchg() loop above. 769e86d1aa8SWill Deacon */ 770e86d1aa8SWill Deacon arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); 771e86d1aa8SWill Deacon if (sync) { 772e86d1aa8SWill Deacon prod = queue_inc_prod_n(&llq, n); 773e86d1aa8SWill Deacon arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod); 774e86d1aa8SWill Deacon queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); 775e86d1aa8SWill Deacon 776e86d1aa8SWill Deacon /* 777e86d1aa8SWill Deacon * In order to determine completion of our CMD_SYNC, we must 778e86d1aa8SWill Deacon * ensure that the queue can't wrap twice without us noticing. 779e86d1aa8SWill Deacon * We achieve that by taking the cmdq lock as shared before 780e86d1aa8SWill Deacon * marking our slot as valid. 781e86d1aa8SWill Deacon */ 782e86d1aa8SWill Deacon arm_smmu_cmdq_shared_lock(cmdq); 783e86d1aa8SWill Deacon } 784e86d1aa8SWill Deacon 785e86d1aa8SWill Deacon /* 3. Mark our slots as valid, ensuring commands are visible first */ 786e86d1aa8SWill Deacon dma_wmb(); 787e86d1aa8SWill Deacon arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod); 788e86d1aa8SWill Deacon 789e86d1aa8SWill Deacon /* 4. If we are the owner, take control of the SMMU hardware */ 790e86d1aa8SWill Deacon if (owner) { 791e86d1aa8SWill Deacon /* a. Wait for previous owner to finish */ 792e86d1aa8SWill Deacon atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod); 793e86d1aa8SWill Deacon 794e86d1aa8SWill Deacon /* b. Stop gathering work by clearing the owned flag */ 795e86d1aa8SWill Deacon prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG, 796e86d1aa8SWill Deacon &cmdq->q.llq.atomic.prod); 797e86d1aa8SWill Deacon prod &= ~CMDQ_PROD_OWNED_FLAG; 798e86d1aa8SWill Deacon 799e86d1aa8SWill Deacon /* 800e86d1aa8SWill Deacon * c. Wait for any gathered work to be written to the queue. 801e86d1aa8SWill Deacon * Note that we read our own entries so that we have the control 802e86d1aa8SWill Deacon * dependency required by (d). 803e86d1aa8SWill Deacon */ 804e86d1aa8SWill Deacon arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod); 805e86d1aa8SWill Deacon 806e86d1aa8SWill Deacon /* 807e86d1aa8SWill Deacon * d. Advance the hardware prod pointer 808e86d1aa8SWill Deacon * Control dependency ordering from the entries becoming valid. 809e86d1aa8SWill Deacon */ 810e86d1aa8SWill Deacon writel_relaxed(prod, cmdq->q.prod_reg); 811e86d1aa8SWill Deacon 812e86d1aa8SWill Deacon /* 813e86d1aa8SWill Deacon * e. Tell the next owner we're done 814e86d1aa8SWill Deacon * Make sure we've updated the hardware first, so that we don't 815e86d1aa8SWill Deacon * race to update prod and potentially move it backwards. 816e86d1aa8SWill Deacon */ 817e86d1aa8SWill Deacon atomic_set_release(&cmdq->owner_prod, prod); 818e86d1aa8SWill Deacon } 819e86d1aa8SWill Deacon 820e86d1aa8SWill Deacon /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ 821e86d1aa8SWill Deacon if (sync) { 822e86d1aa8SWill Deacon llq.prod = queue_inc_prod_n(&llq, n); 823e86d1aa8SWill Deacon ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); 824e86d1aa8SWill Deacon if (ret) { 825e86d1aa8SWill Deacon dev_err_ratelimited(smmu->dev, 826e86d1aa8SWill Deacon "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", 827e86d1aa8SWill Deacon llq.prod, 828e86d1aa8SWill Deacon readl_relaxed(cmdq->q.prod_reg), 829e86d1aa8SWill Deacon readl_relaxed(cmdq->q.cons_reg)); 830e86d1aa8SWill Deacon } 831e86d1aa8SWill Deacon 832e86d1aa8SWill Deacon /* 833e86d1aa8SWill Deacon * Try to unlock the cmdq lock. This will fail if we're the last 834e86d1aa8SWill Deacon * reader, in which case we can safely update cmdq->q.llq.cons 835e86d1aa8SWill Deacon */ 836e86d1aa8SWill Deacon if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) { 837e86d1aa8SWill Deacon WRITE_ONCE(cmdq->q.llq.cons, llq.cons); 838e86d1aa8SWill Deacon arm_smmu_cmdq_shared_unlock(cmdq); 839e86d1aa8SWill Deacon } 840e86d1aa8SWill Deacon } 841e86d1aa8SWill Deacon 842e86d1aa8SWill Deacon local_irq_restore(flags); 843e86d1aa8SWill Deacon return ret; 844e86d1aa8SWill Deacon } 845e86d1aa8SWill Deacon 846e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, 847e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent *ent) 848e86d1aa8SWill Deacon { 849e86d1aa8SWill Deacon u64 cmd[CMDQ_ENT_DWORDS]; 850e86d1aa8SWill Deacon 851e86d1aa8SWill Deacon if (arm_smmu_cmdq_build_cmd(cmd, ent)) { 852e86d1aa8SWill Deacon dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", 853e86d1aa8SWill Deacon ent->opcode); 854e86d1aa8SWill Deacon return -EINVAL; 855e86d1aa8SWill Deacon } 856e86d1aa8SWill Deacon 857e86d1aa8SWill Deacon return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false); 858e86d1aa8SWill Deacon } 859e86d1aa8SWill Deacon 860e86d1aa8SWill Deacon static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) 861e86d1aa8SWill Deacon { 862e86d1aa8SWill Deacon return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); 863e86d1aa8SWill Deacon } 864e86d1aa8SWill Deacon 865e86d1aa8SWill Deacon static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, 866e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch *cmds, 867e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent *cmd) 868e86d1aa8SWill Deacon { 869e86d1aa8SWill Deacon if (cmds->num == CMDQ_BATCH_ENTRIES) { 870e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); 871e86d1aa8SWill Deacon cmds->num = 0; 872e86d1aa8SWill Deacon } 873e86d1aa8SWill Deacon arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd); 874e86d1aa8SWill Deacon cmds->num++; 875e86d1aa8SWill Deacon } 876e86d1aa8SWill Deacon 877e86d1aa8SWill Deacon static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, 878e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch *cmds) 879e86d1aa8SWill Deacon { 880e86d1aa8SWill Deacon return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); 881e86d1aa8SWill Deacon } 882e86d1aa8SWill Deacon 883e86d1aa8SWill Deacon /* Context descriptor manipulation functions */ 884e86d1aa8SWill Deacon static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, 885e86d1aa8SWill Deacon int ssid, bool leaf) 886e86d1aa8SWill Deacon { 887e86d1aa8SWill Deacon size_t i; 888e86d1aa8SWill Deacon unsigned long flags; 889e86d1aa8SWill Deacon struct arm_smmu_master *master; 890e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch cmds = {}; 891e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 892e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd = { 893e86d1aa8SWill Deacon .opcode = CMDQ_OP_CFGI_CD, 894e86d1aa8SWill Deacon .cfgi = { 895e86d1aa8SWill Deacon .ssid = ssid, 896e86d1aa8SWill Deacon .leaf = leaf, 897e86d1aa8SWill Deacon }, 898e86d1aa8SWill Deacon }; 899e86d1aa8SWill Deacon 900e86d1aa8SWill Deacon spin_lock_irqsave(&smmu_domain->devices_lock, flags); 901e86d1aa8SWill Deacon list_for_each_entry(master, &smmu_domain->devices, domain_head) { 902e86d1aa8SWill Deacon for (i = 0; i < master->num_sids; i++) { 903e86d1aa8SWill Deacon cmd.cfgi.sid = master->sids[i]; 904e86d1aa8SWill Deacon arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 905e86d1aa8SWill Deacon } 906e86d1aa8SWill Deacon } 907e86d1aa8SWill Deacon spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 908e86d1aa8SWill Deacon 909e86d1aa8SWill Deacon arm_smmu_cmdq_batch_submit(smmu, &cmds); 910e86d1aa8SWill Deacon } 911e86d1aa8SWill Deacon 912e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, 913e86d1aa8SWill Deacon struct arm_smmu_l1_ctx_desc *l1_desc) 914e86d1aa8SWill Deacon { 915e86d1aa8SWill Deacon size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); 916e86d1aa8SWill Deacon 917e86d1aa8SWill Deacon l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, 918e86d1aa8SWill Deacon &l1_desc->l2ptr_dma, GFP_KERNEL); 919e86d1aa8SWill Deacon if (!l1_desc->l2ptr) { 920e86d1aa8SWill Deacon dev_warn(smmu->dev, 921e86d1aa8SWill Deacon "failed to allocate context descriptor table\n"); 922e86d1aa8SWill Deacon return -ENOMEM; 923e86d1aa8SWill Deacon } 924e86d1aa8SWill Deacon return 0; 925e86d1aa8SWill Deacon } 926e86d1aa8SWill Deacon 927e86d1aa8SWill Deacon static void arm_smmu_write_cd_l1_desc(__le64 *dst, 928e86d1aa8SWill Deacon struct arm_smmu_l1_ctx_desc *l1_desc) 929e86d1aa8SWill Deacon { 930e86d1aa8SWill Deacon u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | 931e86d1aa8SWill Deacon CTXDESC_L1_DESC_V; 932e86d1aa8SWill Deacon 933e86d1aa8SWill Deacon /* See comment in arm_smmu_write_ctx_desc() */ 934e86d1aa8SWill Deacon WRITE_ONCE(*dst, cpu_to_le64(val)); 935e86d1aa8SWill Deacon } 936e86d1aa8SWill Deacon 937e86d1aa8SWill Deacon static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, 938e86d1aa8SWill Deacon u32 ssid) 939e86d1aa8SWill Deacon { 940e86d1aa8SWill Deacon __le64 *l1ptr; 941e86d1aa8SWill Deacon unsigned int idx; 942e86d1aa8SWill Deacon struct arm_smmu_l1_ctx_desc *l1_desc; 943e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 944e86d1aa8SWill Deacon struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; 945e86d1aa8SWill Deacon 946e86d1aa8SWill Deacon if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) 947e86d1aa8SWill Deacon return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; 948e86d1aa8SWill Deacon 949e86d1aa8SWill Deacon idx = ssid >> CTXDESC_SPLIT; 950e86d1aa8SWill Deacon l1_desc = &cdcfg->l1_desc[idx]; 951e86d1aa8SWill Deacon if (!l1_desc->l2ptr) { 952e86d1aa8SWill Deacon if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) 953e86d1aa8SWill Deacon return NULL; 954e86d1aa8SWill Deacon 955e86d1aa8SWill Deacon l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; 956e86d1aa8SWill Deacon arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); 957e86d1aa8SWill Deacon /* An invalid L1CD can be cached */ 958e86d1aa8SWill Deacon arm_smmu_sync_cd(smmu_domain, ssid, false); 959e86d1aa8SWill Deacon } 960e86d1aa8SWill Deacon idx = ssid & (CTXDESC_L2_ENTRIES - 1); 961e86d1aa8SWill Deacon return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; 962e86d1aa8SWill Deacon } 963e86d1aa8SWill Deacon 964e86d1aa8SWill Deacon static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, 965e86d1aa8SWill Deacon int ssid, struct arm_smmu_ctx_desc *cd) 966e86d1aa8SWill Deacon { 967e86d1aa8SWill Deacon /* 968e86d1aa8SWill Deacon * This function handles the following cases: 969e86d1aa8SWill Deacon * 970e86d1aa8SWill Deacon * (1) Install primary CD, for normal DMA traffic (SSID = 0). 971e86d1aa8SWill Deacon * (2) Install a secondary CD, for SID+SSID traffic. 972e86d1aa8SWill Deacon * (3) Update ASID of a CD. Atomically write the first 64 bits of the 973e86d1aa8SWill Deacon * CD, then invalidate the old entry and mappings. 974e86d1aa8SWill Deacon * (4) Remove a secondary CD. 975e86d1aa8SWill Deacon */ 976e86d1aa8SWill Deacon u64 val; 977e86d1aa8SWill Deacon bool cd_live; 978e86d1aa8SWill Deacon __le64 *cdptr; 979e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 980e86d1aa8SWill Deacon 981e86d1aa8SWill Deacon if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) 982e86d1aa8SWill Deacon return -E2BIG; 983e86d1aa8SWill Deacon 984e86d1aa8SWill Deacon cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); 985e86d1aa8SWill Deacon if (!cdptr) 986e86d1aa8SWill Deacon return -ENOMEM; 987e86d1aa8SWill Deacon 988e86d1aa8SWill Deacon val = le64_to_cpu(cdptr[0]); 989e86d1aa8SWill Deacon cd_live = !!(val & CTXDESC_CD_0_V); 990e86d1aa8SWill Deacon 991e86d1aa8SWill Deacon if (!cd) { /* (4) */ 992e86d1aa8SWill Deacon val = 0; 993e86d1aa8SWill Deacon } else if (cd_live) { /* (3) */ 994e86d1aa8SWill Deacon val &= ~CTXDESC_CD_0_ASID; 995e86d1aa8SWill Deacon val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); 996e86d1aa8SWill Deacon /* 997e86d1aa8SWill Deacon * Until CD+TLB invalidation, both ASIDs may be used for tagging 998e86d1aa8SWill Deacon * this substream's traffic 999e86d1aa8SWill Deacon */ 1000e86d1aa8SWill Deacon } else { /* (1) and (2) */ 1001e86d1aa8SWill Deacon cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); 1002e86d1aa8SWill Deacon cdptr[2] = 0; 1003e86d1aa8SWill Deacon cdptr[3] = cpu_to_le64(cd->mair); 1004e86d1aa8SWill Deacon 1005e86d1aa8SWill Deacon /* 1006e86d1aa8SWill Deacon * STE is live, and the SMMU might read dwords of this CD in any 1007e86d1aa8SWill Deacon * order. Ensure that it observes valid values before reading 1008e86d1aa8SWill Deacon * V=1. 1009e86d1aa8SWill Deacon */ 1010e86d1aa8SWill Deacon arm_smmu_sync_cd(smmu_domain, ssid, true); 1011e86d1aa8SWill Deacon 1012e86d1aa8SWill Deacon val = cd->tcr | 1013e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN 1014e86d1aa8SWill Deacon CTXDESC_CD_0_ENDI | 1015e86d1aa8SWill Deacon #endif 1016e86d1aa8SWill Deacon CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | 1017e86d1aa8SWill Deacon CTXDESC_CD_0_AA64 | 1018e86d1aa8SWill Deacon FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | 1019e86d1aa8SWill Deacon CTXDESC_CD_0_V; 1020e86d1aa8SWill Deacon 1021e86d1aa8SWill Deacon /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ 1022e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) 1023e86d1aa8SWill Deacon val |= CTXDESC_CD_0_S; 1024e86d1aa8SWill Deacon } 1025e86d1aa8SWill Deacon 1026e86d1aa8SWill Deacon /* 1027e86d1aa8SWill Deacon * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 1028e86d1aa8SWill Deacon * "Configuration structures and configuration invalidation completion" 1029e86d1aa8SWill Deacon * 1030e86d1aa8SWill Deacon * The size of single-copy atomic reads made by the SMMU is 1031e86d1aa8SWill Deacon * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single 1032e86d1aa8SWill Deacon * field within an aligned 64-bit span of a structure can be altered 1033e86d1aa8SWill Deacon * without first making the structure invalid. 1034e86d1aa8SWill Deacon */ 1035e86d1aa8SWill Deacon WRITE_ONCE(cdptr[0], cpu_to_le64(val)); 1036e86d1aa8SWill Deacon arm_smmu_sync_cd(smmu_domain, ssid, true); 1037e86d1aa8SWill Deacon return 0; 1038e86d1aa8SWill Deacon } 1039e86d1aa8SWill Deacon 1040e86d1aa8SWill Deacon static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) 1041e86d1aa8SWill Deacon { 1042e86d1aa8SWill Deacon int ret; 1043e86d1aa8SWill Deacon size_t l1size; 1044e86d1aa8SWill Deacon size_t max_contexts; 1045e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1046e86d1aa8SWill Deacon struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; 1047e86d1aa8SWill Deacon struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; 1048e86d1aa8SWill Deacon 1049e86d1aa8SWill Deacon max_contexts = 1 << cfg->s1cdmax; 1050e86d1aa8SWill Deacon 1051e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || 1052e86d1aa8SWill Deacon max_contexts <= CTXDESC_L2_ENTRIES) { 1053e86d1aa8SWill Deacon cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; 1054e86d1aa8SWill Deacon cdcfg->num_l1_ents = max_contexts; 1055e86d1aa8SWill Deacon 1056e86d1aa8SWill Deacon l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); 1057e86d1aa8SWill Deacon } else { 1058e86d1aa8SWill Deacon cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; 1059e86d1aa8SWill Deacon cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, 1060e86d1aa8SWill Deacon CTXDESC_L2_ENTRIES); 1061e86d1aa8SWill Deacon 1062e86d1aa8SWill Deacon cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, 1063e86d1aa8SWill Deacon sizeof(*cdcfg->l1_desc), 1064e86d1aa8SWill Deacon GFP_KERNEL); 1065e86d1aa8SWill Deacon if (!cdcfg->l1_desc) 1066e86d1aa8SWill Deacon return -ENOMEM; 1067e86d1aa8SWill Deacon 1068e86d1aa8SWill Deacon l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); 1069e86d1aa8SWill Deacon } 1070e86d1aa8SWill Deacon 1071e86d1aa8SWill Deacon cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, 1072e86d1aa8SWill Deacon GFP_KERNEL); 1073e86d1aa8SWill Deacon if (!cdcfg->cdtab) { 1074e86d1aa8SWill Deacon dev_warn(smmu->dev, "failed to allocate context descriptor\n"); 1075e86d1aa8SWill Deacon ret = -ENOMEM; 1076e86d1aa8SWill Deacon goto err_free_l1; 1077e86d1aa8SWill Deacon } 1078e86d1aa8SWill Deacon 1079e86d1aa8SWill Deacon return 0; 1080e86d1aa8SWill Deacon 1081e86d1aa8SWill Deacon err_free_l1: 1082e86d1aa8SWill Deacon if (cdcfg->l1_desc) { 1083e86d1aa8SWill Deacon devm_kfree(smmu->dev, cdcfg->l1_desc); 1084e86d1aa8SWill Deacon cdcfg->l1_desc = NULL; 1085e86d1aa8SWill Deacon } 1086e86d1aa8SWill Deacon return ret; 1087e86d1aa8SWill Deacon } 1088e86d1aa8SWill Deacon 1089e86d1aa8SWill Deacon static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) 1090e86d1aa8SWill Deacon { 1091e86d1aa8SWill Deacon int i; 1092e86d1aa8SWill Deacon size_t size, l1size; 1093e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1094e86d1aa8SWill Deacon struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; 1095e86d1aa8SWill Deacon 1096e86d1aa8SWill Deacon if (cdcfg->l1_desc) { 1097e86d1aa8SWill Deacon size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); 1098e86d1aa8SWill Deacon 1099e86d1aa8SWill Deacon for (i = 0; i < cdcfg->num_l1_ents; i++) { 1100e86d1aa8SWill Deacon if (!cdcfg->l1_desc[i].l2ptr) 1101e86d1aa8SWill Deacon continue; 1102e86d1aa8SWill Deacon 1103e86d1aa8SWill Deacon dmam_free_coherent(smmu->dev, size, 1104e86d1aa8SWill Deacon cdcfg->l1_desc[i].l2ptr, 1105e86d1aa8SWill Deacon cdcfg->l1_desc[i].l2ptr_dma); 1106e86d1aa8SWill Deacon } 1107e86d1aa8SWill Deacon devm_kfree(smmu->dev, cdcfg->l1_desc); 1108e86d1aa8SWill Deacon cdcfg->l1_desc = NULL; 1109e86d1aa8SWill Deacon 1110e86d1aa8SWill Deacon l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); 1111e86d1aa8SWill Deacon } else { 1112e86d1aa8SWill Deacon l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); 1113e86d1aa8SWill Deacon } 1114e86d1aa8SWill Deacon 1115e86d1aa8SWill Deacon dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); 1116e86d1aa8SWill Deacon cdcfg->cdtab_dma = 0; 1117e86d1aa8SWill Deacon cdcfg->cdtab = NULL; 1118e86d1aa8SWill Deacon } 1119e86d1aa8SWill Deacon 1120e86d1aa8SWill Deacon static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) 1121e86d1aa8SWill Deacon { 1122e86d1aa8SWill Deacon if (!cd->asid) 1123e86d1aa8SWill Deacon return; 1124e86d1aa8SWill Deacon 1125e86d1aa8SWill Deacon xa_erase(&asid_xa, cd->asid); 1126e86d1aa8SWill Deacon } 1127e86d1aa8SWill Deacon 1128e86d1aa8SWill Deacon /* Stream table manipulation functions */ 1129e86d1aa8SWill Deacon static void 1130e86d1aa8SWill Deacon arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) 1131e86d1aa8SWill Deacon { 1132e86d1aa8SWill Deacon u64 val = 0; 1133e86d1aa8SWill Deacon 1134e86d1aa8SWill Deacon val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); 1135e86d1aa8SWill Deacon val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; 1136e86d1aa8SWill Deacon 1137e86d1aa8SWill Deacon /* See comment in arm_smmu_write_ctx_desc() */ 1138e86d1aa8SWill Deacon WRITE_ONCE(*dst, cpu_to_le64(val)); 1139e86d1aa8SWill Deacon } 1140e86d1aa8SWill Deacon 1141e86d1aa8SWill Deacon static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) 1142e86d1aa8SWill Deacon { 1143e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd = { 1144e86d1aa8SWill Deacon .opcode = CMDQ_OP_CFGI_STE, 1145e86d1aa8SWill Deacon .cfgi = { 1146e86d1aa8SWill Deacon .sid = sid, 1147e86d1aa8SWill Deacon .leaf = true, 1148e86d1aa8SWill Deacon }, 1149e86d1aa8SWill Deacon }; 1150e86d1aa8SWill Deacon 1151e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1152e86d1aa8SWill Deacon arm_smmu_cmdq_issue_sync(smmu); 1153e86d1aa8SWill Deacon } 1154e86d1aa8SWill Deacon 1155e86d1aa8SWill Deacon static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, 1156e86d1aa8SWill Deacon __le64 *dst) 1157e86d1aa8SWill Deacon { 1158e86d1aa8SWill Deacon /* 1159e86d1aa8SWill Deacon * This is hideously complicated, but we only really care about 1160e86d1aa8SWill Deacon * three cases at the moment: 1161e86d1aa8SWill Deacon * 1162e86d1aa8SWill Deacon * 1. Invalid (all zero) -> bypass/fault (init) 1163e86d1aa8SWill Deacon * 2. Bypass/fault -> translation/bypass (attach) 1164e86d1aa8SWill Deacon * 3. Translation/bypass -> bypass/fault (detach) 1165e86d1aa8SWill Deacon * 1166e86d1aa8SWill Deacon * Given that we can't update the STE atomically and the SMMU 1167e86d1aa8SWill Deacon * doesn't read the thing in a defined order, that leaves us 1168e86d1aa8SWill Deacon * with the following maintenance requirements: 1169e86d1aa8SWill Deacon * 1170e86d1aa8SWill Deacon * 1. Update Config, return (init time STEs aren't live) 1171e86d1aa8SWill Deacon * 2. Write everything apart from dword 0, sync, write dword 0, sync 1172e86d1aa8SWill Deacon * 3. Update Config, sync 1173e86d1aa8SWill Deacon */ 1174e86d1aa8SWill Deacon u64 val = le64_to_cpu(dst[0]); 1175e86d1aa8SWill Deacon bool ste_live = false; 1176e86d1aa8SWill Deacon struct arm_smmu_device *smmu = NULL; 1177e86d1aa8SWill Deacon struct arm_smmu_s1_cfg *s1_cfg = NULL; 1178e86d1aa8SWill Deacon struct arm_smmu_s2_cfg *s2_cfg = NULL; 1179e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = NULL; 1180e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent prefetch_cmd = { 1181e86d1aa8SWill Deacon .opcode = CMDQ_OP_PREFETCH_CFG, 1182e86d1aa8SWill Deacon .prefetch = { 1183e86d1aa8SWill Deacon .sid = sid, 1184e86d1aa8SWill Deacon }, 1185e86d1aa8SWill Deacon }; 1186e86d1aa8SWill Deacon 1187e86d1aa8SWill Deacon if (master) { 1188e86d1aa8SWill Deacon smmu_domain = master->domain; 1189e86d1aa8SWill Deacon smmu = master->smmu; 1190e86d1aa8SWill Deacon } 1191e86d1aa8SWill Deacon 1192e86d1aa8SWill Deacon if (smmu_domain) { 1193e86d1aa8SWill Deacon switch (smmu_domain->stage) { 1194e86d1aa8SWill Deacon case ARM_SMMU_DOMAIN_S1: 1195e86d1aa8SWill Deacon s1_cfg = &smmu_domain->s1_cfg; 1196e86d1aa8SWill Deacon break; 1197e86d1aa8SWill Deacon case ARM_SMMU_DOMAIN_S2: 1198e86d1aa8SWill Deacon case ARM_SMMU_DOMAIN_NESTED: 1199e86d1aa8SWill Deacon s2_cfg = &smmu_domain->s2_cfg; 1200e86d1aa8SWill Deacon break; 1201e86d1aa8SWill Deacon default: 1202e86d1aa8SWill Deacon break; 1203e86d1aa8SWill Deacon } 1204e86d1aa8SWill Deacon } 1205e86d1aa8SWill Deacon 1206e86d1aa8SWill Deacon if (val & STRTAB_STE_0_V) { 1207e86d1aa8SWill Deacon switch (FIELD_GET(STRTAB_STE_0_CFG, val)) { 1208e86d1aa8SWill Deacon case STRTAB_STE_0_CFG_BYPASS: 1209e86d1aa8SWill Deacon break; 1210e86d1aa8SWill Deacon case STRTAB_STE_0_CFG_S1_TRANS: 1211e86d1aa8SWill Deacon case STRTAB_STE_0_CFG_S2_TRANS: 1212e86d1aa8SWill Deacon ste_live = true; 1213e86d1aa8SWill Deacon break; 1214e86d1aa8SWill Deacon case STRTAB_STE_0_CFG_ABORT: 1215e86d1aa8SWill Deacon BUG_ON(!disable_bypass); 1216e86d1aa8SWill Deacon break; 1217e86d1aa8SWill Deacon default: 1218e86d1aa8SWill Deacon BUG(); /* STE corruption */ 1219e86d1aa8SWill Deacon } 1220e86d1aa8SWill Deacon } 1221e86d1aa8SWill Deacon 1222e86d1aa8SWill Deacon /* Nuke the existing STE_0 value, as we're going to rewrite it */ 1223e86d1aa8SWill Deacon val = STRTAB_STE_0_V; 1224e86d1aa8SWill Deacon 1225e86d1aa8SWill Deacon /* Bypass/fault */ 1226e86d1aa8SWill Deacon if (!smmu_domain || !(s1_cfg || s2_cfg)) { 1227e86d1aa8SWill Deacon if (!smmu_domain && disable_bypass) 1228e86d1aa8SWill Deacon val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); 1229e86d1aa8SWill Deacon else 1230e86d1aa8SWill Deacon val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); 1231e86d1aa8SWill Deacon 1232e86d1aa8SWill Deacon dst[0] = cpu_to_le64(val); 1233e86d1aa8SWill Deacon dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, 1234e86d1aa8SWill Deacon STRTAB_STE_1_SHCFG_INCOMING)); 1235e86d1aa8SWill Deacon dst[2] = 0; /* Nuke the VMID */ 1236e86d1aa8SWill Deacon /* 1237e86d1aa8SWill Deacon * The SMMU can perform negative caching, so we must sync 1238e86d1aa8SWill Deacon * the STE regardless of whether the old value was live. 1239e86d1aa8SWill Deacon */ 1240e86d1aa8SWill Deacon if (smmu) 1241e86d1aa8SWill Deacon arm_smmu_sync_ste_for_sid(smmu, sid); 1242e86d1aa8SWill Deacon return; 1243e86d1aa8SWill Deacon } 1244e86d1aa8SWill Deacon 1245e86d1aa8SWill Deacon if (s1_cfg) { 1246e86d1aa8SWill Deacon BUG_ON(ste_live); 1247e86d1aa8SWill Deacon dst[1] = cpu_to_le64( 1248e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | 1249e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1250e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | 1251e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | 1252e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1)); 1253e86d1aa8SWill Deacon 1254e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_STALLS && 1255e86d1aa8SWill Deacon !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) 1256e86d1aa8SWill Deacon dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); 1257e86d1aa8SWill Deacon 1258e86d1aa8SWill Deacon val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | 1259e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | 1260e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | 1261e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); 1262e86d1aa8SWill Deacon } 1263e86d1aa8SWill Deacon 1264e86d1aa8SWill Deacon if (s2_cfg) { 1265e86d1aa8SWill Deacon BUG_ON(ste_live); 1266e86d1aa8SWill Deacon dst[2] = cpu_to_le64( 1267e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | 1268e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | 1269e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN 1270e86d1aa8SWill Deacon STRTAB_STE_2_S2ENDI | 1271e86d1aa8SWill Deacon #endif 1272e86d1aa8SWill Deacon STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | 1273e86d1aa8SWill Deacon STRTAB_STE_2_S2R); 1274e86d1aa8SWill Deacon 1275e86d1aa8SWill Deacon dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); 1276e86d1aa8SWill Deacon 1277e86d1aa8SWill Deacon val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); 1278e86d1aa8SWill Deacon } 1279e86d1aa8SWill Deacon 1280e86d1aa8SWill Deacon if (master->ats_enabled) 1281e86d1aa8SWill Deacon dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, 1282e86d1aa8SWill Deacon STRTAB_STE_1_EATS_TRANS)); 1283e86d1aa8SWill Deacon 1284e86d1aa8SWill Deacon arm_smmu_sync_ste_for_sid(smmu, sid); 1285e86d1aa8SWill Deacon /* See comment in arm_smmu_write_ctx_desc() */ 1286e86d1aa8SWill Deacon WRITE_ONCE(dst[0], cpu_to_le64(val)); 1287e86d1aa8SWill Deacon arm_smmu_sync_ste_for_sid(smmu, sid); 1288e86d1aa8SWill Deacon 1289e86d1aa8SWill Deacon /* It's likely that we'll want to use the new STE soon */ 1290e86d1aa8SWill Deacon if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) 1291e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); 1292e86d1aa8SWill Deacon } 1293e86d1aa8SWill Deacon 1294376cdf66SJean-Philippe Brucker static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent) 1295e86d1aa8SWill Deacon { 1296e86d1aa8SWill Deacon unsigned int i; 1297e86d1aa8SWill Deacon 1298e86d1aa8SWill Deacon for (i = 0; i < nent; ++i) { 1299e86d1aa8SWill Deacon arm_smmu_write_strtab_ent(NULL, -1, strtab); 1300e86d1aa8SWill Deacon strtab += STRTAB_STE_DWORDS; 1301e86d1aa8SWill Deacon } 1302e86d1aa8SWill Deacon } 1303e86d1aa8SWill Deacon 1304e86d1aa8SWill Deacon static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) 1305e86d1aa8SWill Deacon { 1306e86d1aa8SWill Deacon size_t size; 1307e86d1aa8SWill Deacon void *strtab; 1308e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 1309e86d1aa8SWill Deacon struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; 1310e86d1aa8SWill Deacon 1311e86d1aa8SWill Deacon if (desc->l2ptr) 1312e86d1aa8SWill Deacon return 0; 1313e86d1aa8SWill Deacon 1314e86d1aa8SWill Deacon size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); 1315e86d1aa8SWill Deacon strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; 1316e86d1aa8SWill Deacon 1317e86d1aa8SWill Deacon desc->span = STRTAB_SPLIT + 1; 1318e86d1aa8SWill Deacon desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, 1319e86d1aa8SWill Deacon GFP_KERNEL); 1320e86d1aa8SWill Deacon if (!desc->l2ptr) { 1321e86d1aa8SWill Deacon dev_err(smmu->dev, 1322e86d1aa8SWill Deacon "failed to allocate l2 stream table for SID %u\n", 1323e86d1aa8SWill Deacon sid); 1324e86d1aa8SWill Deacon return -ENOMEM; 1325e86d1aa8SWill Deacon } 1326e86d1aa8SWill Deacon 1327e86d1aa8SWill Deacon arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT); 1328e86d1aa8SWill Deacon arm_smmu_write_strtab_l1_desc(strtab, desc); 1329e86d1aa8SWill Deacon return 0; 1330e86d1aa8SWill Deacon } 1331e86d1aa8SWill Deacon 1332e86d1aa8SWill Deacon /* IRQ and event handlers */ 1333e86d1aa8SWill Deacon static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev) 1334e86d1aa8SWill Deacon { 1335e86d1aa8SWill Deacon int i; 1336e86d1aa8SWill Deacon struct arm_smmu_device *smmu = dev; 1337e86d1aa8SWill Deacon struct arm_smmu_queue *q = &smmu->evtq.q; 1338e86d1aa8SWill Deacon struct arm_smmu_ll_queue *llq = &q->llq; 1339e86d1aa8SWill Deacon u64 evt[EVTQ_ENT_DWORDS]; 1340e86d1aa8SWill Deacon 1341e86d1aa8SWill Deacon do { 1342e86d1aa8SWill Deacon while (!queue_remove_raw(q, evt)) { 1343e86d1aa8SWill Deacon u8 id = FIELD_GET(EVTQ_0_ID, evt[0]); 1344e86d1aa8SWill Deacon 1345e86d1aa8SWill Deacon dev_info(smmu->dev, "event 0x%02x received:\n", id); 1346e86d1aa8SWill Deacon for (i = 0; i < ARRAY_SIZE(evt); ++i) 1347e86d1aa8SWill Deacon dev_info(smmu->dev, "\t0x%016llx\n", 1348e86d1aa8SWill Deacon (unsigned long long)evt[i]); 1349e86d1aa8SWill Deacon 1350e86d1aa8SWill Deacon } 1351e86d1aa8SWill Deacon 1352e86d1aa8SWill Deacon /* 1353e86d1aa8SWill Deacon * Not much we can do on overflow, so scream and pretend we're 1354e86d1aa8SWill Deacon * trying harder. 1355e86d1aa8SWill Deacon */ 1356e86d1aa8SWill Deacon if (queue_sync_prod_in(q) == -EOVERFLOW) 1357e86d1aa8SWill Deacon dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n"); 1358e86d1aa8SWill Deacon } while (!queue_empty(llq)); 1359e86d1aa8SWill Deacon 1360e86d1aa8SWill Deacon /* Sync our overflow flag, as we believe we're up to speed */ 1361e86d1aa8SWill Deacon llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 1362e86d1aa8SWill Deacon Q_IDX(llq, llq->cons); 1363e86d1aa8SWill Deacon return IRQ_HANDLED; 1364e86d1aa8SWill Deacon } 1365e86d1aa8SWill Deacon 1366e86d1aa8SWill Deacon static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) 1367e86d1aa8SWill Deacon { 1368e86d1aa8SWill Deacon u32 sid, ssid; 1369e86d1aa8SWill Deacon u16 grpid; 1370e86d1aa8SWill Deacon bool ssv, last; 1371e86d1aa8SWill Deacon 1372e86d1aa8SWill Deacon sid = FIELD_GET(PRIQ_0_SID, evt[0]); 1373e86d1aa8SWill Deacon ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); 1374e86d1aa8SWill Deacon ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0; 1375e86d1aa8SWill Deacon last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); 1376e86d1aa8SWill Deacon grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); 1377e86d1aa8SWill Deacon 1378e86d1aa8SWill Deacon dev_info(smmu->dev, "unexpected PRI request received:\n"); 1379e86d1aa8SWill Deacon dev_info(smmu->dev, 1380e86d1aa8SWill Deacon "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n", 1381e86d1aa8SWill Deacon sid, ssid, grpid, last ? "L" : "", 1382e86d1aa8SWill Deacon evt[0] & PRIQ_0_PERM_PRIV ? "" : "un", 1383e86d1aa8SWill Deacon evt[0] & PRIQ_0_PERM_READ ? "R" : "", 1384e86d1aa8SWill Deacon evt[0] & PRIQ_0_PERM_WRITE ? "W" : "", 1385e86d1aa8SWill Deacon evt[0] & PRIQ_0_PERM_EXEC ? "X" : "", 1386e86d1aa8SWill Deacon evt[1] & PRIQ_1_ADDR_MASK); 1387e86d1aa8SWill Deacon 1388e86d1aa8SWill Deacon if (last) { 1389e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd = { 1390e86d1aa8SWill Deacon .opcode = CMDQ_OP_PRI_RESP, 1391e86d1aa8SWill Deacon .substream_valid = ssv, 1392e86d1aa8SWill Deacon .pri = { 1393e86d1aa8SWill Deacon .sid = sid, 1394e86d1aa8SWill Deacon .ssid = ssid, 1395e86d1aa8SWill Deacon .grpid = grpid, 1396e86d1aa8SWill Deacon .resp = PRI_RESP_DENY, 1397e86d1aa8SWill Deacon }, 1398e86d1aa8SWill Deacon }; 1399e86d1aa8SWill Deacon 1400e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1401e86d1aa8SWill Deacon } 1402e86d1aa8SWill Deacon } 1403e86d1aa8SWill Deacon 1404e86d1aa8SWill Deacon static irqreturn_t arm_smmu_priq_thread(int irq, void *dev) 1405e86d1aa8SWill Deacon { 1406e86d1aa8SWill Deacon struct arm_smmu_device *smmu = dev; 1407e86d1aa8SWill Deacon struct arm_smmu_queue *q = &smmu->priq.q; 1408e86d1aa8SWill Deacon struct arm_smmu_ll_queue *llq = &q->llq; 1409e86d1aa8SWill Deacon u64 evt[PRIQ_ENT_DWORDS]; 1410e86d1aa8SWill Deacon 1411e86d1aa8SWill Deacon do { 1412e86d1aa8SWill Deacon while (!queue_remove_raw(q, evt)) 1413e86d1aa8SWill Deacon arm_smmu_handle_ppr(smmu, evt); 1414e86d1aa8SWill Deacon 1415e86d1aa8SWill Deacon if (queue_sync_prod_in(q) == -EOVERFLOW) 1416e86d1aa8SWill Deacon dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n"); 1417e86d1aa8SWill Deacon } while (!queue_empty(llq)); 1418e86d1aa8SWill Deacon 1419e86d1aa8SWill Deacon /* Sync our overflow flag, as we believe we're up to speed */ 1420e86d1aa8SWill Deacon llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) | 1421e86d1aa8SWill Deacon Q_IDX(llq, llq->cons); 1422e86d1aa8SWill Deacon queue_sync_cons_out(q); 1423e86d1aa8SWill Deacon return IRQ_HANDLED; 1424e86d1aa8SWill Deacon } 1425e86d1aa8SWill Deacon 1426e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu); 1427e86d1aa8SWill Deacon 1428e86d1aa8SWill Deacon static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) 1429e86d1aa8SWill Deacon { 1430e86d1aa8SWill Deacon u32 gerror, gerrorn, active; 1431e86d1aa8SWill Deacon struct arm_smmu_device *smmu = dev; 1432e86d1aa8SWill Deacon 1433e86d1aa8SWill Deacon gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); 1434e86d1aa8SWill Deacon gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); 1435e86d1aa8SWill Deacon 1436e86d1aa8SWill Deacon active = gerror ^ gerrorn; 1437e86d1aa8SWill Deacon if (!(active & GERROR_ERR_MASK)) 1438e86d1aa8SWill Deacon return IRQ_NONE; /* No errors pending */ 1439e86d1aa8SWill Deacon 1440e86d1aa8SWill Deacon dev_warn(smmu->dev, 1441e86d1aa8SWill Deacon "unexpected global error reported (0x%08x), this could be serious\n", 1442e86d1aa8SWill Deacon active); 1443e86d1aa8SWill Deacon 1444e86d1aa8SWill Deacon if (active & GERROR_SFM_ERR) { 1445e86d1aa8SWill Deacon dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); 1446e86d1aa8SWill Deacon arm_smmu_device_disable(smmu); 1447e86d1aa8SWill Deacon } 1448e86d1aa8SWill Deacon 1449e86d1aa8SWill Deacon if (active & GERROR_MSI_GERROR_ABT_ERR) 1450e86d1aa8SWill Deacon dev_warn(smmu->dev, "GERROR MSI write aborted\n"); 1451e86d1aa8SWill Deacon 1452e86d1aa8SWill Deacon if (active & GERROR_MSI_PRIQ_ABT_ERR) 1453e86d1aa8SWill Deacon dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); 1454e86d1aa8SWill Deacon 1455e86d1aa8SWill Deacon if (active & GERROR_MSI_EVTQ_ABT_ERR) 1456e86d1aa8SWill Deacon dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); 1457e86d1aa8SWill Deacon 1458e86d1aa8SWill Deacon if (active & GERROR_MSI_CMDQ_ABT_ERR) 1459e86d1aa8SWill Deacon dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); 1460e86d1aa8SWill Deacon 1461e86d1aa8SWill Deacon if (active & GERROR_PRIQ_ABT_ERR) 1462e86d1aa8SWill Deacon dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); 1463e86d1aa8SWill Deacon 1464e86d1aa8SWill Deacon if (active & GERROR_EVTQ_ABT_ERR) 1465e86d1aa8SWill Deacon dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); 1466e86d1aa8SWill Deacon 1467e86d1aa8SWill Deacon if (active & GERROR_CMDQ_ERR) 1468e86d1aa8SWill Deacon arm_smmu_cmdq_skip_err(smmu); 1469e86d1aa8SWill Deacon 1470e86d1aa8SWill Deacon writel(gerror, smmu->base + ARM_SMMU_GERRORN); 1471e86d1aa8SWill Deacon return IRQ_HANDLED; 1472e86d1aa8SWill Deacon } 1473e86d1aa8SWill Deacon 1474e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev) 1475e86d1aa8SWill Deacon { 1476e86d1aa8SWill Deacon struct arm_smmu_device *smmu = dev; 1477e86d1aa8SWill Deacon 1478e86d1aa8SWill Deacon arm_smmu_evtq_thread(irq, dev); 1479e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_PRI) 1480e86d1aa8SWill Deacon arm_smmu_priq_thread(irq, dev); 1481e86d1aa8SWill Deacon 1482e86d1aa8SWill Deacon return IRQ_HANDLED; 1483e86d1aa8SWill Deacon } 1484e86d1aa8SWill Deacon 1485e86d1aa8SWill Deacon static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) 1486e86d1aa8SWill Deacon { 1487e86d1aa8SWill Deacon arm_smmu_gerror_handler(irq, dev); 1488e86d1aa8SWill Deacon return IRQ_WAKE_THREAD; 1489e86d1aa8SWill Deacon } 1490e86d1aa8SWill Deacon 1491e86d1aa8SWill Deacon static void 1492e86d1aa8SWill Deacon arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, 1493e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent *cmd) 1494e86d1aa8SWill Deacon { 1495e86d1aa8SWill Deacon size_t log2_span; 1496e86d1aa8SWill Deacon size_t span_mask; 1497e86d1aa8SWill Deacon /* ATC invalidates are always on 4096-bytes pages */ 1498e86d1aa8SWill Deacon size_t inval_grain_shift = 12; 1499e86d1aa8SWill Deacon unsigned long page_start, page_end; 1500e86d1aa8SWill Deacon 1501e86d1aa8SWill Deacon *cmd = (struct arm_smmu_cmdq_ent) { 1502e86d1aa8SWill Deacon .opcode = CMDQ_OP_ATC_INV, 1503e86d1aa8SWill Deacon .substream_valid = !!ssid, 1504e86d1aa8SWill Deacon .atc.ssid = ssid, 1505e86d1aa8SWill Deacon }; 1506e86d1aa8SWill Deacon 1507e86d1aa8SWill Deacon if (!size) { 1508e86d1aa8SWill Deacon cmd->atc.size = ATC_INV_SIZE_ALL; 1509e86d1aa8SWill Deacon return; 1510e86d1aa8SWill Deacon } 1511e86d1aa8SWill Deacon 1512e86d1aa8SWill Deacon page_start = iova >> inval_grain_shift; 1513e86d1aa8SWill Deacon page_end = (iova + size - 1) >> inval_grain_shift; 1514e86d1aa8SWill Deacon 1515e86d1aa8SWill Deacon /* 1516e86d1aa8SWill Deacon * In an ATS Invalidate Request, the address must be aligned on the 1517e86d1aa8SWill Deacon * range size, which must be a power of two number of page sizes. We 1518e86d1aa8SWill Deacon * thus have to choose between grossly over-invalidating the region, or 1519e86d1aa8SWill Deacon * splitting the invalidation into multiple commands. For simplicity 1520e86d1aa8SWill Deacon * we'll go with the first solution, but should refine it in the future 1521e86d1aa8SWill Deacon * if multiple commands are shown to be more efficient. 1522e86d1aa8SWill Deacon * 1523e86d1aa8SWill Deacon * Find the smallest power of two that covers the range. The most 1524e86d1aa8SWill Deacon * significant differing bit between the start and end addresses, 1525e86d1aa8SWill Deacon * fls(start ^ end), indicates the required span. For example: 1526e86d1aa8SWill Deacon * 1527e86d1aa8SWill Deacon * We want to invalidate pages [8; 11]. This is already the ideal range: 1528e86d1aa8SWill Deacon * x = 0b1000 ^ 0b1011 = 0b11 1529e86d1aa8SWill Deacon * span = 1 << fls(x) = 4 1530e86d1aa8SWill Deacon * 1531e86d1aa8SWill Deacon * To invalidate pages [7; 10], we need to invalidate [0; 15]: 1532e86d1aa8SWill Deacon * x = 0b0111 ^ 0b1010 = 0b1101 1533e86d1aa8SWill Deacon * span = 1 << fls(x) = 16 1534e86d1aa8SWill Deacon */ 1535e86d1aa8SWill Deacon log2_span = fls_long(page_start ^ page_end); 1536e86d1aa8SWill Deacon span_mask = (1ULL << log2_span) - 1; 1537e86d1aa8SWill Deacon 1538e86d1aa8SWill Deacon page_start &= ~span_mask; 1539e86d1aa8SWill Deacon 1540e86d1aa8SWill Deacon cmd->atc.addr = page_start << inval_grain_shift; 1541e86d1aa8SWill Deacon cmd->atc.size = log2_span; 1542e86d1aa8SWill Deacon } 1543e86d1aa8SWill Deacon 1544e86d1aa8SWill Deacon static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) 1545e86d1aa8SWill Deacon { 1546e86d1aa8SWill Deacon int i; 1547e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd; 1548e86d1aa8SWill Deacon 1549e86d1aa8SWill Deacon arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); 1550e86d1aa8SWill Deacon 1551e86d1aa8SWill Deacon for (i = 0; i < master->num_sids; i++) { 1552e86d1aa8SWill Deacon cmd.atc.sid = master->sids[i]; 1553e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); 1554e86d1aa8SWill Deacon } 1555e86d1aa8SWill Deacon 1556e86d1aa8SWill Deacon return arm_smmu_cmdq_issue_sync(master->smmu); 1557e86d1aa8SWill Deacon } 1558e86d1aa8SWill Deacon 1559e86d1aa8SWill Deacon static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, 1560e86d1aa8SWill Deacon int ssid, unsigned long iova, size_t size) 1561e86d1aa8SWill Deacon { 1562e86d1aa8SWill Deacon int i; 1563e86d1aa8SWill Deacon unsigned long flags; 1564e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd; 1565e86d1aa8SWill Deacon struct arm_smmu_master *master; 1566e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch cmds = {}; 1567e86d1aa8SWill Deacon 1568e86d1aa8SWill Deacon if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) 1569e86d1aa8SWill Deacon return 0; 1570e86d1aa8SWill Deacon 1571e86d1aa8SWill Deacon /* 1572e86d1aa8SWill Deacon * Ensure that we've completed prior invalidation of the main TLBs 1573e86d1aa8SWill Deacon * before we read 'nr_ats_masters' in case of a concurrent call to 1574e86d1aa8SWill Deacon * arm_smmu_enable_ats(): 1575e86d1aa8SWill Deacon * 1576e86d1aa8SWill Deacon * // unmap() // arm_smmu_enable_ats() 1577e86d1aa8SWill Deacon * TLBI+SYNC atomic_inc(&nr_ats_masters); 1578e86d1aa8SWill Deacon * smp_mb(); [...] 1579e86d1aa8SWill Deacon * atomic_read(&nr_ats_masters); pci_enable_ats() // writel() 1580e86d1aa8SWill Deacon * 1581e86d1aa8SWill Deacon * Ensures that we always see the incremented 'nr_ats_masters' count if 1582e86d1aa8SWill Deacon * ATS was enabled at the PCI device before completion of the TLBI. 1583e86d1aa8SWill Deacon */ 1584e86d1aa8SWill Deacon smp_mb(); 1585e86d1aa8SWill Deacon if (!atomic_read(&smmu_domain->nr_ats_masters)) 1586e86d1aa8SWill Deacon return 0; 1587e86d1aa8SWill Deacon 1588e86d1aa8SWill Deacon arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); 1589e86d1aa8SWill Deacon 1590e86d1aa8SWill Deacon spin_lock_irqsave(&smmu_domain->devices_lock, flags); 1591e86d1aa8SWill Deacon list_for_each_entry(master, &smmu_domain->devices, domain_head) { 1592e86d1aa8SWill Deacon if (!master->ats_enabled) 1593e86d1aa8SWill Deacon continue; 1594e86d1aa8SWill Deacon 1595e86d1aa8SWill Deacon for (i = 0; i < master->num_sids; i++) { 1596e86d1aa8SWill Deacon cmd.atc.sid = master->sids[i]; 1597e86d1aa8SWill Deacon arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); 1598e86d1aa8SWill Deacon } 1599e86d1aa8SWill Deacon } 1600e86d1aa8SWill Deacon spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 1601e86d1aa8SWill Deacon 1602e86d1aa8SWill Deacon return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); 1603e86d1aa8SWill Deacon } 1604e86d1aa8SWill Deacon 1605e86d1aa8SWill Deacon /* IO_PGTABLE API */ 1606e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_context(void *cookie) 1607e86d1aa8SWill Deacon { 1608e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = cookie; 1609e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1610e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd; 1611e86d1aa8SWill Deacon 1612e86d1aa8SWill Deacon if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1613e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_TLBI_NH_ASID; 1614e86d1aa8SWill Deacon cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; 1615e86d1aa8SWill Deacon cmd.tlbi.vmid = 0; 1616e86d1aa8SWill Deacon } else { 1617e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; 1618e86d1aa8SWill Deacon cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1619e86d1aa8SWill Deacon } 1620e86d1aa8SWill Deacon 1621e86d1aa8SWill Deacon /* 1622e86d1aa8SWill Deacon * NOTE: when io-pgtable is in non-strict mode, we may get here with 1623e86d1aa8SWill Deacon * PTEs previously cleared by unmaps on the current CPU not yet visible 1624e86d1aa8SWill Deacon * to the SMMU. We are relying on the dma_wmb() implicit during cmd 1625e86d1aa8SWill Deacon * insertion to guarantee those are observed before the TLBI. Do be 1626e86d1aa8SWill Deacon * careful, 007. 1627e86d1aa8SWill Deacon */ 1628e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &cmd); 1629e86d1aa8SWill Deacon arm_smmu_cmdq_issue_sync(smmu); 1630e86d1aa8SWill Deacon arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); 1631e86d1aa8SWill Deacon } 1632e86d1aa8SWill Deacon 1633e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, 1634e86d1aa8SWill Deacon size_t granule, bool leaf, 1635e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain) 1636e86d1aa8SWill Deacon { 1637e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1638e86d1aa8SWill Deacon unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0; 1639e86d1aa8SWill Deacon size_t inv_range = granule; 1640e86d1aa8SWill Deacon struct arm_smmu_cmdq_batch cmds = {}; 1641e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd = { 1642e86d1aa8SWill Deacon .tlbi = { 1643e86d1aa8SWill Deacon .leaf = leaf, 1644e86d1aa8SWill Deacon }, 1645e86d1aa8SWill Deacon }; 1646e86d1aa8SWill Deacon 1647e86d1aa8SWill Deacon if (!size) 1648e86d1aa8SWill Deacon return; 1649e86d1aa8SWill Deacon 1650e86d1aa8SWill Deacon if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1651e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_TLBI_NH_VA; 1652e86d1aa8SWill Deacon cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; 1653e86d1aa8SWill Deacon } else { 1654e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_TLBI_S2_IPA; 1655e86d1aa8SWill Deacon cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; 1656e86d1aa8SWill Deacon } 1657e86d1aa8SWill Deacon 1658e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 1659e86d1aa8SWill Deacon /* Get the leaf page size */ 1660e86d1aa8SWill Deacon tg = __ffs(smmu_domain->domain.pgsize_bitmap); 1661e86d1aa8SWill Deacon 1662e86d1aa8SWill Deacon /* Convert page size of 12,14,16 (log2) to 1,2,3 */ 1663e86d1aa8SWill Deacon cmd.tlbi.tg = (tg - 10) / 2; 1664e86d1aa8SWill Deacon 1665e86d1aa8SWill Deacon /* Determine what level the granule is at */ 1666e86d1aa8SWill Deacon cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); 1667e86d1aa8SWill Deacon 1668e86d1aa8SWill Deacon num_pages = size >> tg; 1669e86d1aa8SWill Deacon } 1670e86d1aa8SWill Deacon 1671e86d1aa8SWill Deacon while (iova < end) { 1672e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { 1673e86d1aa8SWill Deacon /* 1674e86d1aa8SWill Deacon * On each iteration of the loop, the range is 5 bits 1675e86d1aa8SWill Deacon * worth of the aligned size remaining. 1676e86d1aa8SWill Deacon * The range in pages is: 1677e86d1aa8SWill Deacon * 1678e86d1aa8SWill Deacon * range = (num_pages & (0x1f << __ffs(num_pages))) 1679e86d1aa8SWill Deacon */ 1680e86d1aa8SWill Deacon unsigned long scale, num; 1681e86d1aa8SWill Deacon 1682e86d1aa8SWill Deacon /* Determine the power of 2 multiple number of pages */ 1683e86d1aa8SWill Deacon scale = __ffs(num_pages); 1684e86d1aa8SWill Deacon cmd.tlbi.scale = scale; 1685e86d1aa8SWill Deacon 1686e86d1aa8SWill Deacon /* Determine how many chunks of 2^scale size we have */ 1687e86d1aa8SWill Deacon num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; 1688e86d1aa8SWill Deacon cmd.tlbi.num = num - 1; 1689e86d1aa8SWill Deacon 1690e86d1aa8SWill Deacon /* range is num * 2^scale * pgsize */ 1691e86d1aa8SWill Deacon inv_range = num << (scale + tg); 1692e86d1aa8SWill Deacon 1693e86d1aa8SWill Deacon /* Clear out the lower order bits for the next iteration */ 1694e86d1aa8SWill Deacon num_pages -= num << scale; 1695e86d1aa8SWill Deacon } 1696e86d1aa8SWill Deacon 1697e86d1aa8SWill Deacon cmd.tlbi.addr = iova; 1698e86d1aa8SWill Deacon arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); 1699e86d1aa8SWill Deacon iova += inv_range; 1700e86d1aa8SWill Deacon } 1701e86d1aa8SWill Deacon arm_smmu_cmdq_batch_submit(smmu, &cmds); 1702e86d1aa8SWill Deacon 1703e86d1aa8SWill Deacon /* 1704e86d1aa8SWill Deacon * Unfortunately, this can't be leaf-only since we may have 1705e86d1aa8SWill Deacon * zapped an entire table. 1706e86d1aa8SWill Deacon */ 1707e86d1aa8SWill Deacon arm_smmu_atc_inv_domain(smmu_domain, 0, start, size); 1708e86d1aa8SWill Deacon } 1709e86d1aa8SWill Deacon 1710e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, 1711e86d1aa8SWill Deacon unsigned long iova, size_t granule, 1712e86d1aa8SWill Deacon void *cookie) 1713e86d1aa8SWill Deacon { 1714e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = cookie; 1715e86d1aa8SWill Deacon struct iommu_domain *domain = &smmu_domain->domain; 1716e86d1aa8SWill Deacon 1717e86d1aa8SWill Deacon iommu_iotlb_gather_add_page(domain, gather, iova, granule); 1718e86d1aa8SWill Deacon } 1719e86d1aa8SWill Deacon 1720e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, 1721e86d1aa8SWill Deacon size_t granule, void *cookie) 1722e86d1aa8SWill Deacon { 1723e86d1aa8SWill Deacon arm_smmu_tlb_inv_range(iova, size, granule, false, cookie); 1724e86d1aa8SWill Deacon } 1725e86d1aa8SWill Deacon 1726e86d1aa8SWill Deacon static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, 1727e86d1aa8SWill Deacon size_t granule, void *cookie) 1728e86d1aa8SWill Deacon { 1729e86d1aa8SWill Deacon arm_smmu_tlb_inv_range(iova, size, granule, true, cookie); 1730e86d1aa8SWill Deacon } 1731e86d1aa8SWill Deacon 1732e86d1aa8SWill Deacon static const struct iommu_flush_ops arm_smmu_flush_ops = { 1733e86d1aa8SWill Deacon .tlb_flush_all = arm_smmu_tlb_inv_context, 1734e86d1aa8SWill Deacon .tlb_flush_walk = arm_smmu_tlb_inv_walk, 1735e86d1aa8SWill Deacon .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, 1736e86d1aa8SWill Deacon .tlb_add_page = arm_smmu_tlb_inv_page_nosync, 1737e86d1aa8SWill Deacon }; 1738e86d1aa8SWill Deacon 1739e86d1aa8SWill Deacon /* IOMMU API */ 1740e86d1aa8SWill Deacon static bool arm_smmu_capable(enum iommu_cap cap) 1741e86d1aa8SWill Deacon { 1742e86d1aa8SWill Deacon switch (cap) { 1743e86d1aa8SWill Deacon case IOMMU_CAP_CACHE_COHERENCY: 1744e86d1aa8SWill Deacon return true; 1745e86d1aa8SWill Deacon case IOMMU_CAP_NOEXEC: 1746e86d1aa8SWill Deacon return true; 1747e86d1aa8SWill Deacon default: 1748e86d1aa8SWill Deacon return false; 1749e86d1aa8SWill Deacon } 1750e86d1aa8SWill Deacon } 1751e86d1aa8SWill Deacon 1752e86d1aa8SWill Deacon static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) 1753e86d1aa8SWill Deacon { 1754e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain; 1755e86d1aa8SWill Deacon 1756e86d1aa8SWill Deacon if (type != IOMMU_DOMAIN_UNMANAGED && 1757e86d1aa8SWill Deacon type != IOMMU_DOMAIN_DMA && 1758e86d1aa8SWill Deacon type != IOMMU_DOMAIN_IDENTITY) 1759e86d1aa8SWill Deacon return NULL; 1760e86d1aa8SWill Deacon 1761e86d1aa8SWill Deacon /* 1762e86d1aa8SWill Deacon * Allocate the domain and initialise some of its data structures. 1763e86d1aa8SWill Deacon * We can't really do anything meaningful until we've added a 1764e86d1aa8SWill Deacon * master. 1765e86d1aa8SWill Deacon */ 1766e86d1aa8SWill Deacon smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); 1767e86d1aa8SWill Deacon if (!smmu_domain) 1768e86d1aa8SWill Deacon return NULL; 1769e86d1aa8SWill Deacon 1770e86d1aa8SWill Deacon if (type == IOMMU_DOMAIN_DMA && 1771e86d1aa8SWill Deacon iommu_get_dma_cookie(&smmu_domain->domain)) { 1772e86d1aa8SWill Deacon kfree(smmu_domain); 1773e86d1aa8SWill Deacon return NULL; 1774e86d1aa8SWill Deacon } 1775e86d1aa8SWill Deacon 1776e86d1aa8SWill Deacon mutex_init(&smmu_domain->init_mutex); 1777e86d1aa8SWill Deacon INIT_LIST_HEAD(&smmu_domain->devices); 1778e86d1aa8SWill Deacon spin_lock_init(&smmu_domain->devices_lock); 1779e86d1aa8SWill Deacon 1780e86d1aa8SWill Deacon return &smmu_domain->domain; 1781e86d1aa8SWill Deacon } 1782e86d1aa8SWill Deacon 1783e86d1aa8SWill Deacon static int arm_smmu_bitmap_alloc(unsigned long *map, int span) 1784e86d1aa8SWill Deacon { 1785e86d1aa8SWill Deacon int idx, size = 1 << span; 1786e86d1aa8SWill Deacon 1787e86d1aa8SWill Deacon do { 1788e86d1aa8SWill Deacon idx = find_first_zero_bit(map, size); 1789e86d1aa8SWill Deacon if (idx == size) 1790e86d1aa8SWill Deacon return -ENOSPC; 1791e86d1aa8SWill Deacon } while (test_and_set_bit(idx, map)); 1792e86d1aa8SWill Deacon 1793e86d1aa8SWill Deacon return idx; 1794e86d1aa8SWill Deacon } 1795e86d1aa8SWill Deacon 1796e86d1aa8SWill Deacon static void arm_smmu_bitmap_free(unsigned long *map, int idx) 1797e86d1aa8SWill Deacon { 1798e86d1aa8SWill Deacon clear_bit(idx, map); 1799e86d1aa8SWill Deacon } 1800e86d1aa8SWill Deacon 1801e86d1aa8SWill Deacon static void arm_smmu_domain_free(struct iommu_domain *domain) 1802e86d1aa8SWill Deacon { 1803e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1804e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1805e86d1aa8SWill Deacon 1806e86d1aa8SWill Deacon iommu_put_dma_cookie(domain); 1807e86d1aa8SWill Deacon free_io_pgtable_ops(smmu_domain->pgtbl_ops); 1808e86d1aa8SWill Deacon 1809e86d1aa8SWill Deacon /* Free the CD and ASID, if we allocated them */ 1810e86d1aa8SWill Deacon if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { 1811e86d1aa8SWill Deacon struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; 1812e86d1aa8SWill Deacon 1813e86d1aa8SWill Deacon if (cfg->cdcfg.cdtab) 1814e86d1aa8SWill Deacon arm_smmu_free_cd_tables(smmu_domain); 1815e86d1aa8SWill Deacon arm_smmu_free_asid(&cfg->cd); 1816e86d1aa8SWill Deacon } else { 1817e86d1aa8SWill Deacon struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 1818e86d1aa8SWill Deacon if (cfg->vmid) 1819e86d1aa8SWill Deacon arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid); 1820e86d1aa8SWill Deacon } 1821e86d1aa8SWill Deacon 1822e86d1aa8SWill Deacon kfree(smmu_domain); 1823e86d1aa8SWill Deacon } 1824e86d1aa8SWill Deacon 1825e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, 1826e86d1aa8SWill Deacon struct arm_smmu_master *master, 1827e86d1aa8SWill Deacon struct io_pgtable_cfg *pgtbl_cfg) 1828e86d1aa8SWill Deacon { 1829e86d1aa8SWill Deacon int ret; 1830e86d1aa8SWill Deacon u32 asid; 1831e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1832e86d1aa8SWill Deacon struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; 1833e86d1aa8SWill Deacon typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; 1834e86d1aa8SWill Deacon 1835e86d1aa8SWill Deacon ret = xa_alloc(&asid_xa, &asid, &cfg->cd, 1836e86d1aa8SWill Deacon XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); 1837e86d1aa8SWill Deacon if (ret) 1838e86d1aa8SWill Deacon return ret; 1839e86d1aa8SWill Deacon 1840e86d1aa8SWill Deacon cfg->s1cdmax = master->ssid_bits; 1841e86d1aa8SWill Deacon 1842e86d1aa8SWill Deacon ret = arm_smmu_alloc_cd_tables(smmu_domain); 1843e86d1aa8SWill Deacon if (ret) 1844e86d1aa8SWill Deacon goto out_free_asid; 1845e86d1aa8SWill Deacon 1846e86d1aa8SWill Deacon cfg->cd.asid = (u16)asid; 1847e86d1aa8SWill Deacon cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; 1848e86d1aa8SWill Deacon cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | 1849e86d1aa8SWill Deacon FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | 1850e86d1aa8SWill Deacon FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | 1851e86d1aa8SWill Deacon FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | 1852e86d1aa8SWill Deacon FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | 1853e86d1aa8SWill Deacon FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | 1854e86d1aa8SWill Deacon CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; 1855e86d1aa8SWill Deacon cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; 1856e86d1aa8SWill Deacon 1857e86d1aa8SWill Deacon /* 1858e86d1aa8SWill Deacon * Note that this will end up calling arm_smmu_sync_cd() before 1859e86d1aa8SWill Deacon * the master has been added to the devices list for this domain. 1860e86d1aa8SWill Deacon * This isn't an issue because the STE hasn't been installed yet. 1861e86d1aa8SWill Deacon */ 1862e86d1aa8SWill Deacon ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); 1863e86d1aa8SWill Deacon if (ret) 1864e86d1aa8SWill Deacon goto out_free_cd_tables; 1865e86d1aa8SWill Deacon 1866e86d1aa8SWill Deacon return 0; 1867e86d1aa8SWill Deacon 1868e86d1aa8SWill Deacon out_free_cd_tables: 1869e86d1aa8SWill Deacon arm_smmu_free_cd_tables(smmu_domain); 1870e86d1aa8SWill Deacon out_free_asid: 1871e86d1aa8SWill Deacon arm_smmu_free_asid(&cfg->cd); 1872e86d1aa8SWill Deacon return ret; 1873e86d1aa8SWill Deacon } 1874e86d1aa8SWill Deacon 1875e86d1aa8SWill Deacon static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, 1876e86d1aa8SWill Deacon struct arm_smmu_master *master, 1877e86d1aa8SWill Deacon struct io_pgtable_cfg *pgtbl_cfg) 1878e86d1aa8SWill Deacon { 1879e86d1aa8SWill Deacon int vmid; 1880e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1881e86d1aa8SWill Deacon struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; 1882e86d1aa8SWill Deacon typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; 1883e86d1aa8SWill Deacon 1884e86d1aa8SWill Deacon vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); 1885e86d1aa8SWill Deacon if (vmid < 0) 1886e86d1aa8SWill Deacon return vmid; 1887e86d1aa8SWill Deacon 1888e86d1aa8SWill Deacon vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; 1889e86d1aa8SWill Deacon cfg->vmid = (u16)vmid; 1890e86d1aa8SWill Deacon cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; 1891e86d1aa8SWill Deacon cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | 1892e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | 1893e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | 1894e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | 1895e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | 1896e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | 1897e86d1aa8SWill Deacon FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); 1898e86d1aa8SWill Deacon return 0; 1899e86d1aa8SWill Deacon } 1900e86d1aa8SWill Deacon 1901e86d1aa8SWill Deacon static int arm_smmu_domain_finalise(struct iommu_domain *domain, 1902e86d1aa8SWill Deacon struct arm_smmu_master *master) 1903e86d1aa8SWill Deacon { 1904e86d1aa8SWill Deacon int ret; 1905e86d1aa8SWill Deacon unsigned long ias, oas; 1906e86d1aa8SWill Deacon enum io_pgtable_fmt fmt; 1907e86d1aa8SWill Deacon struct io_pgtable_cfg pgtbl_cfg; 1908e86d1aa8SWill Deacon struct io_pgtable_ops *pgtbl_ops; 1909e86d1aa8SWill Deacon int (*finalise_stage_fn)(struct arm_smmu_domain *, 1910e86d1aa8SWill Deacon struct arm_smmu_master *, 1911e86d1aa8SWill Deacon struct io_pgtable_cfg *); 1912e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 1913e86d1aa8SWill Deacon struct arm_smmu_device *smmu = smmu_domain->smmu; 1914e86d1aa8SWill Deacon 1915e86d1aa8SWill Deacon if (domain->type == IOMMU_DOMAIN_IDENTITY) { 1916e86d1aa8SWill Deacon smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; 1917e86d1aa8SWill Deacon return 0; 1918e86d1aa8SWill Deacon } 1919e86d1aa8SWill Deacon 1920e86d1aa8SWill Deacon /* Restrict the stage to what we can actually support */ 1921e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) 1922e86d1aa8SWill Deacon smmu_domain->stage = ARM_SMMU_DOMAIN_S2; 1923e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) 1924e86d1aa8SWill Deacon smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 1925e86d1aa8SWill Deacon 1926e86d1aa8SWill Deacon switch (smmu_domain->stage) { 1927e86d1aa8SWill Deacon case ARM_SMMU_DOMAIN_S1: 1928e86d1aa8SWill Deacon ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48; 1929e86d1aa8SWill Deacon ias = min_t(unsigned long, ias, VA_BITS); 1930e86d1aa8SWill Deacon oas = smmu->ias; 1931e86d1aa8SWill Deacon fmt = ARM_64_LPAE_S1; 1932e86d1aa8SWill Deacon finalise_stage_fn = arm_smmu_domain_finalise_s1; 1933e86d1aa8SWill Deacon break; 1934e86d1aa8SWill Deacon case ARM_SMMU_DOMAIN_NESTED: 1935e86d1aa8SWill Deacon case ARM_SMMU_DOMAIN_S2: 1936e86d1aa8SWill Deacon ias = smmu->ias; 1937e86d1aa8SWill Deacon oas = smmu->oas; 1938e86d1aa8SWill Deacon fmt = ARM_64_LPAE_S2; 1939e86d1aa8SWill Deacon finalise_stage_fn = arm_smmu_domain_finalise_s2; 1940e86d1aa8SWill Deacon break; 1941e86d1aa8SWill Deacon default: 1942e86d1aa8SWill Deacon return -EINVAL; 1943e86d1aa8SWill Deacon } 1944e86d1aa8SWill Deacon 1945e86d1aa8SWill Deacon pgtbl_cfg = (struct io_pgtable_cfg) { 1946e86d1aa8SWill Deacon .pgsize_bitmap = smmu->pgsize_bitmap, 1947e86d1aa8SWill Deacon .ias = ias, 1948e86d1aa8SWill Deacon .oas = oas, 1949e86d1aa8SWill Deacon .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, 1950e86d1aa8SWill Deacon .tlb = &arm_smmu_flush_ops, 1951e86d1aa8SWill Deacon .iommu_dev = smmu->dev, 1952e86d1aa8SWill Deacon }; 1953e86d1aa8SWill Deacon 1954e86d1aa8SWill Deacon if (smmu_domain->non_strict) 1955e86d1aa8SWill Deacon pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; 1956e86d1aa8SWill Deacon 1957e86d1aa8SWill Deacon pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); 1958e86d1aa8SWill Deacon if (!pgtbl_ops) 1959e86d1aa8SWill Deacon return -ENOMEM; 1960e86d1aa8SWill Deacon 1961e86d1aa8SWill Deacon domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; 1962e86d1aa8SWill Deacon domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; 1963e86d1aa8SWill Deacon domain->geometry.force_aperture = true; 1964e86d1aa8SWill Deacon 1965e86d1aa8SWill Deacon ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); 1966e86d1aa8SWill Deacon if (ret < 0) { 1967e86d1aa8SWill Deacon free_io_pgtable_ops(pgtbl_ops); 1968e86d1aa8SWill Deacon return ret; 1969e86d1aa8SWill Deacon } 1970e86d1aa8SWill Deacon 1971e86d1aa8SWill Deacon smmu_domain->pgtbl_ops = pgtbl_ops; 1972e86d1aa8SWill Deacon return 0; 1973e86d1aa8SWill Deacon } 1974e86d1aa8SWill Deacon 1975e86d1aa8SWill Deacon static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) 1976e86d1aa8SWill Deacon { 1977e86d1aa8SWill Deacon __le64 *step; 1978e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 1979e86d1aa8SWill Deacon 1980e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 1981e86d1aa8SWill Deacon struct arm_smmu_strtab_l1_desc *l1_desc; 1982e86d1aa8SWill Deacon int idx; 1983e86d1aa8SWill Deacon 1984e86d1aa8SWill Deacon /* Two-level walk */ 1985e86d1aa8SWill Deacon idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; 1986e86d1aa8SWill Deacon l1_desc = &cfg->l1_desc[idx]; 1987e86d1aa8SWill Deacon idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; 1988e86d1aa8SWill Deacon step = &l1_desc->l2ptr[idx]; 1989e86d1aa8SWill Deacon } else { 1990e86d1aa8SWill Deacon /* Simple linear lookup */ 1991e86d1aa8SWill Deacon step = &cfg->strtab[sid * STRTAB_STE_DWORDS]; 1992e86d1aa8SWill Deacon } 1993e86d1aa8SWill Deacon 1994e86d1aa8SWill Deacon return step; 1995e86d1aa8SWill Deacon } 1996e86d1aa8SWill Deacon 1997e86d1aa8SWill Deacon static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) 1998e86d1aa8SWill Deacon { 1999e86d1aa8SWill Deacon int i, j; 2000e86d1aa8SWill Deacon struct arm_smmu_device *smmu = master->smmu; 2001e86d1aa8SWill Deacon 2002e86d1aa8SWill Deacon for (i = 0; i < master->num_sids; ++i) { 2003e86d1aa8SWill Deacon u32 sid = master->sids[i]; 2004e86d1aa8SWill Deacon __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); 2005e86d1aa8SWill Deacon 2006e86d1aa8SWill Deacon /* Bridged PCI devices may end up with duplicated IDs */ 2007e86d1aa8SWill Deacon for (j = 0; j < i; j++) 2008e86d1aa8SWill Deacon if (master->sids[j] == sid) 2009e86d1aa8SWill Deacon break; 2010e86d1aa8SWill Deacon if (j < i) 2011e86d1aa8SWill Deacon continue; 2012e86d1aa8SWill Deacon 2013e86d1aa8SWill Deacon arm_smmu_write_strtab_ent(master, sid, step); 2014e86d1aa8SWill Deacon } 2015e86d1aa8SWill Deacon } 2016e86d1aa8SWill Deacon 2017e86d1aa8SWill Deacon static bool arm_smmu_ats_supported(struct arm_smmu_master *master) 2018e86d1aa8SWill Deacon { 2019e86d1aa8SWill Deacon struct device *dev = master->dev; 2020e86d1aa8SWill Deacon struct arm_smmu_device *smmu = master->smmu; 2021e86d1aa8SWill Deacon struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2022e86d1aa8SWill Deacon 2023e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_ATS)) 2024e86d1aa8SWill Deacon return false; 2025e86d1aa8SWill Deacon 2026e86d1aa8SWill Deacon if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS)) 2027e86d1aa8SWill Deacon return false; 2028e86d1aa8SWill Deacon 2029e86d1aa8SWill Deacon return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); 2030e86d1aa8SWill Deacon } 2031e86d1aa8SWill Deacon 2032e86d1aa8SWill Deacon static void arm_smmu_enable_ats(struct arm_smmu_master *master) 2033e86d1aa8SWill Deacon { 2034e86d1aa8SWill Deacon size_t stu; 2035e86d1aa8SWill Deacon struct pci_dev *pdev; 2036e86d1aa8SWill Deacon struct arm_smmu_device *smmu = master->smmu; 2037e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = master->domain; 2038e86d1aa8SWill Deacon 2039e86d1aa8SWill Deacon /* Don't enable ATS at the endpoint if it's not enabled in the STE */ 2040e86d1aa8SWill Deacon if (!master->ats_enabled) 2041e86d1aa8SWill Deacon return; 2042e86d1aa8SWill Deacon 2043e86d1aa8SWill Deacon /* Smallest Translation Unit: log2 of the smallest supported granule */ 2044e86d1aa8SWill Deacon stu = __ffs(smmu->pgsize_bitmap); 2045e86d1aa8SWill Deacon pdev = to_pci_dev(master->dev); 2046e86d1aa8SWill Deacon 2047e86d1aa8SWill Deacon atomic_inc(&smmu_domain->nr_ats_masters); 2048e86d1aa8SWill Deacon arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); 2049e86d1aa8SWill Deacon if (pci_enable_ats(pdev, stu)) 2050e86d1aa8SWill Deacon dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); 2051e86d1aa8SWill Deacon } 2052e86d1aa8SWill Deacon 2053e86d1aa8SWill Deacon static void arm_smmu_disable_ats(struct arm_smmu_master *master) 2054e86d1aa8SWill Deacon { 2055e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = master->domain; 2056e86d1aa8SWill Deacon 2057e86d1aa8SWill Deacon if (!master->ats_enabled) 2058e86d1aa8SWill Deacon return; 2059e86d1aa8SWill Deacon 2060e86d1aa8SWill Deacon pci_disable_ats(to_pci_dev(master->dev)); 2061e86d1aa8SWill Deacon /* 2062e86d1aa8SWill Deacon * Ensure ATS is disabled at the endpoint before we issue the 2063e86d1aa8SWill Deacon * ATC invalidation via the SMMU. 2064e86d1aa8SWill Deacon */ 2065e86d1aa8SWill Deacon wmb(); 2066e86d1aa8SWill Deacon arm_smmu_atc_inv_master(master); 2067e86d1aa8SWill Deacon atomic_dec(&smmu_domain->nr_ats_masters); 2068e86d1aa8SWill Deacon } 2069e86d1aa8SWill Deacon 2070e86d1aa8SWill Deacon static int arm_smmu_enable_pasid(struct arm_smmu_master *master) 2071e86d1aa8SWill Deacon { 2072e86d1aa8SWill Deacon int ret; 2073e86d1aa8SWill Deacon int features; 2074e86d1aa8SWill Deacon int num_pasids; 2075e86d1aa8SWill Deacon struct pci_dev *pdev; 2076e86d1aa8SWill Deacon 2077e86d1aa8SWill Deacon if (!dev_is_pci(master->dev)) 2078e86d1aa8SWill Deacon return -ENODEV; 2079e86d1aa8SWill Deacon 2080e86d1aa8SWill Deacon pdev = to_pci_dev(master->dev); 2081e86d1aa8SWill Deacon 2082e86d1aa8SWill Deacon features = pci_pasid_features(pdev); 2083e86d1aa8SWill Deacon if (features < 0) 2084e86d1aa8SWill Deacon return features; 2085e86d1aa8SWill Deacon 2086e86d1aa8SWill Deacon num_pasids = pci_max_pasids(pdev); 2087e86d1aa8SWill Deacon if (num_pasids <= 0) 2088e86d1aa8SWill Deacon return num_pasids; 2089e86d1aa8SWill Deacon 2090e86d1aa8SWill Deacon ret = pci_enable_pasid(pdev, features); 2091e86d1aa8SWill Deacon if (ret) { 2092e86d1aa8SWill Deacon dev_err(&pdev->dev, "Failed to enable PASID\n"); 2093e86d1aa8SWill Deacon return ret; 2094e86d1aa8SWill Deacon } 2095e86d1aa8SWill Deacon 2096e86d1aa8SWill Deacon master->ssid_bits = min_t(u8, ilog2(num_pasids), 2097e86d1aa8SWill Deacon master->smmu->ssid_bits); 2098e86d1aa8SWill Deacon return 0; 2099e86d1aa8SWill Deacon } 2100e86d1aa8SWill Deacon 2101e86d1aa8SWill Deacon static void arm_smmu_disable_pasid(struct arm_smmu_master *master) 2102e86d1aa8SWill Deacon { 2103e86d1aa8SWill Deacon struct pci_dev *pdev; 2104e86d1aa8SWill Deacon 2105e86d1aa8SWill Deacon if (!dev_is_pci(master->dev)) 2106e86d1aa8SWill Deacon return; 2107e86d1aa8SWill Deacon 2108e86d1aa8SWill Deacon pdev = to_pci_dev(master->dev); 2109e86d1aa8SWill Deacon 2110e86d1aa8SWill Deacon if (!pdev->pasid_enabled) 2111e86d1aa8SWill Deacon return; 2112e86d1aa8SWill Deacon 2113e86d1aa8SWill Deacon master->ssid_bits = 0; 2114e86d1aa8SWill Deacon pci_disable_pasid(pdev); 2115e86d1aa8SWill Deacon } 2116e86d1aa8SWill Deacon 2117e86d1aa8SWill Deacon static void arm_smmu_detach_dev(struct arm_smmu_master *master) 2118e86d1aa8SWill Deacon { 2119e86d1aa8SWill Deacon unsigned long flags; 2120e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = master->domain; 2121e86d1aa8SWill Deacon 2122e86d1aa8SWill Deacon if (!smmu_domain) 2123e86d1aa8SWill Deacon return; 2124e86d1aa8SWill Deacon 2125e86d1aa8SWill Deacon arm_smmu_disable_ats(master); 2126e86d1aa8SWill Deacon 2127e86d1aa8SWill Deacon spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2128e86d1aa8SWill Deacon list_del(&master->domain_head); 2129e86d1aa8SWill Deacon spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2130e86d1aa8SWill Deacon 2131e86d1aa8SWill Deacon master->domain = NULL; 2132e86d1aa8SWill Deacon master->ats_enabled = false; 2133e86d1aa8SWill Deacon arm_smmu_install_ste_for_dev(master); 2134e86d1aa8SWill Deacon } 2135e86d1aa8SWill Deacon 2136e86d1aa8SWill Deacon static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) 2137e86d1aa8SWill Deacon { 2138e86d1aa8SWill Deacon int ret = 0; 2139e86d1aa8SWill Deacon unsigned long flags; 2140e86d1aa8SWill Deacon struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2141e86d1aa8SWill Deacon struct arm_smmu_device *smmu; 2142e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2143e86d1aa8SWill Deacon struct arm_smmu_master *master; 2144e86d1aa8SWill Deacon 2145e86d1aa8SWill Deacon if (!fwspec) 2146e86d1aa8SWill Deacon return -ENOENT; 2147e86d1aa8SWill Deacon 2148e86d1aa8SWill Deacon master = dev_iommu_priv_get(dev); 2149e86d1aa8SWill Deacon smmu = master->smmu; 2150e86d1aa8SWill Deacon 2151e86d1aa8SWill Deacon arm_smmu_detach_dev(master); 2152e86d1aa8SWill Deacon 2153e86d1aa8SWill Deacon mutex_lock(&smmu_domain->init_mutex); 2154e86d1aa8SWill Deacon 2155e86d1aa8SWill Deacon if (!smmu_domain->smmu) { 2156e86d1aa8SWill Deacon smmu_domain->smmu = smmu; 2157e86d1aa8SWill Deacon ret = arm_smmu_domain_finalise(domain, master); 2158e86d1aa8SWill Deacon if (ret) { 2159e86d1aa8SWill Deacon smmu_domain->smmu = NULL; 2160e86d1aa8SWill Deacon goto out_unlock; 2161e86d1aa8SWill Deacon } 2162e86d1aa8SWill Deacon } else if (smmu_domain->smmu != smmu) { 2163e86d1aa8SWill Deacon dev_err(dev, 2164e86d1aa8SWill Deacon "cannot attach to SMMU %s (upstream of %s)\n", 2165e86d1aa8SWill Deacon dev_name(smmu_domain->smmu->dev), 2166e86d1aa8SWill Deacon dev_name(smmu->dev)); 2167e86d1aa8SWill Deacon ret = -ENXIO; 2168e86d1aa8SWill Deacon goto out_unlock; 2169e86d1aa8SWill Deacon } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && 2170e86d1aa8SWill Deacon master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { 2171e86d1aa8SWill Deacon dev_err(dev, 2172e86d1aa8SWill Deacon "cannot attach to incompatible domain (%u SSID bits != %u)\n", 2173e86d1aa8SWill Deacon smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); 2174e86d1aa8SWill Deacon ret = -EINVAL; 2175e86d1aa8SWill Deacon goto out_unlock; 2176e86d1aa8SWill Deacon } 2177e86d1aa8SWill Deacon 2178e86d1aa8SWill Deacon master->domain = smmu_domain; 2179e86d1aa8SWill Deacon 2180e86d1aa8SWill Deacon if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) 2181e86d1aa8SWill Deacon master->ats_enabled = arm_smmu_ats_supported(master); 2182e86d1aa8SWill Deacon 2183e86d1aa8SWill Deacon arm_smmu_install_ste_for_dev(master); 2184e86d1aa8SWill Deacon 2185e86d1aa8SWill Deacon spin_lock_irqsave(&smmu_domain->devices_lock, flags); 2186e86d1aa8SWill Deacon list_add(&master->domain_head, &smmu_domain->devices); 2187e86d1aa8SWill Deacon spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); 2188e86d1aa8SWill Deacon 2189e86d1aa8SWill Deacon arm_smmu_enable_ats(master); 2190e86d1aa8SWill Deacon 2191e86d1aa8SWill Deacon out_unlock: 2192e86d1aa8SWill Deacon mutex_unlock(&smmu_domain->init_mutex); 2193e86d1aa8SWill Deacon return ret; 2194e86d1aa8SWill Deacon } 2195e86d1aa8SWill Deacon 2196e86d1aa8SWill Deacon static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, 2197e86d1aa8SWill Deacon phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 2198e86d1aa8SWill Deacon { 2199e86d1aa8SWill Deacon struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 2200e86d1aa8SWill Deacon 2201e86d1aa8SWill Deacon if (!ops) 2202e86d1aa8SWill Deacon return -ENODEV; 2203e86d1aa8SWill Deacon 2204e46b3c0dSJoerg Roedel return ops->map(ops, iova, paddr, size, prot, gfp); 2205e86d1aa8SWill Deacon } 2206e86d1aa8SWill Deacon 2207e86d1aa8SWill Deacon static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, 2208e86d1aa8SWill Deacon size_t size, struct iommu_iotlb_gather *gather) 2209e86d1aa8SWill Deacon { 2210e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2211e86d1aa8SWill Deacon struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; 2212e86d1aa8SWill Deacon 2213e86d1aa8SWill Deacon if (!ops) 2214e86d1aa8SWill Deacon return 0; 2215e86d1aa8SWill Deacon 2216e86d1aa8SWill Deacon return ops->unmap(ops, iova, size, gather); 2217e86d1aa8SWill Deacon } 2218e86d1aa8SWill Deacon 2219e86d1aa8SWill Deacon static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) 2220e86d1aa8SWill Deacon { 2221e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2222e86d1aa8SWill Deacon 2223e86d1aa8SWill Deacon if (smmu_domain->smmu) 2224e86d1aa8SWill Deacon arm_smmu_tlb_inv_context(smmu_domain); 2225e86d1aa8SWill Deacon } 2226e86d1aa8SWill Deacon 2227e86d1aa8SWill Deacon static void arm_smmu_iotlb_sync(struct iommu_domain *domain, 2228e86d1aa8SWill Deacon struct iommu_iotlb_gather *gather) 2229e86d1aa8SWill Deacon { 2230e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2231e86d1aa8SWill Deacon 2232e86d1aa8SWill Deacon arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start, 2233e86d1aa8SWill Deacon gather->pgsize, true, smmu_domain); 2234e86d1aa8SWill Deacon } 2235e86d1aa8SWill Deacon 2236e86d1aa8SWill Deacon static phys_addr_t 2237e86d1aa8SWill Deacon arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) 2238e86d1aa8SWill Deacon { 2239e86d1aa8SWill Deacon struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; 2240e86d1aa8SWill Deacon 2241e86d1aa8SWill Deacon if (domain->type == IOMMU_DOMAIN_IDENTITY) 2242e86d1aa8SWill Deacon return iova; 2243e86d1aa8SWill Deacon 2244e86d1aa8SWill Deacon if (!ops) 2245e86d1aa8SWill Deacon return 0; 2246e86d1aa8SWill Deacon 2247e86d1aa8SWill Deacon return ops->iova_to_phys(ops, iova); 2248e86d1aa8SWill Deacon } 2249e86d1aa8SWill Deacon 2250e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver; 2251e86d1aa8SWill Deacon 2252e86d1aa8SWill Deacon static 2253e86d1aa8SWill Deacon struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode) 2254e86d1aa8SWill Deacon { 2255e86d1aa8SWill Deacon struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver, 2256e86d1aa8SWill Deacon fwnode); 2257e86d1aa8SWill Deacon put_device(dev); 2258e86d1aa8SWill Deacon return dev ? dev_get_drvdata(dev) : NULL; 2259e86d1aa8SWill Deacon } 2260e86d1aa8SWill Deacon 2261e86d1aa8SWill Deacon static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid) 2262e86d1aa8SWill Deacon { 2263e86d1aa8SWill Deacon unsigned long limit = smmu->strtab_cfg.num_l1_ents; 2264e86d1aa8SWill Deacon 2265e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 2266e86d1aa8SWill Deacon limit *= 1UL << STRTAB_SPLIT; 2267e86d1aa8SWill Deacon 2268e86d1aa8SWill Deacon return sid < limit; 2269e86d1aa8SWill Deacon } 2270e86d1aa8SWill Deacon 2271e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops; 2272e86d1aa8SWill Deacon 2273e86d1aa8SWill Deacon static struct iommu_device *arm_smmu_probe_device(struct device *dev) 2274e86d1aa8SWill Deacon { 2275e86d1aa8SWill Deacon int i, ret; 2276e86d1aa8SWill Deacon struct arm_smmu_device *smmu; 2277e86d1aa8SWill Deacon struct arm_smmu_master *master; 2278e86d1aa8SWill Deacon struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2279e86d1aa8SWill Deacon 2280e86d1aa8SWill Deacon if (!fwspec || fwspec->ops != &arm_smmu_ops) 2281e86d1aa8SWill Deacon return ERR_PTR(-ENODEV); 2282e86d1aa8SWill Deacon 2283e86d1aa8SWill Deacon if (WARN_ON_ONCE(dev_iommu_priv_get(dev))) 2284e86d1aa8SWill Deacon return ERR_PTR(-EBUSY); 2285e86d1aa8SWill Deacon 2286e86d1aa8SWill Deacon smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); 2287e86d1aa8SWill Deacon if (!smmu) 2288e86d1aa8SWill Deacon return ERR_PTR(-ENODEV); 2289e86d1aa8SWill Deacon 2290e86d1aa8SWill Deacon master = kzalloc(sizeof(*master), GFP_KERNEL); 2291e86d1aa8SWill Deacon if (!master) 2292e86d1aa8SWill Deacon return ERR_PTR(-ENOMEM); 2293e86d1aa8SWill Deacon 2294e86d1aa8SWill Deacon master->dev = dev; 2295e86d1aa8SWill Deacon master->smmu = smmu; 2296e86d1aa8SWill Deacon master->sids = fwspec->ids; 2297e86d1aa8SWill Deacon master->num_sids = fwspec->num_ids; 2298e86d1aa8SWill Deacon dev_iommu_priv_set(dev, master); 2299e86d1aa8SWill Deacon 2300e86d1aa8SWill Deacon /* Check the SIDs are in range of the SMMU and our stream table */ 2301e86d1aa8SWill Deacon for (i = 0; i < master->num_sids; i++) { 2302e86d1aa8SWill Deacon u32 sid = master->sids[i]; 2303e86d1aa8SWill Deacon 2304e86d1aa8SWill Deacon if (!arm_smmu_sid_in_range(smmu, sid)) { 2305e86d1aa8SWill Deacon ret = -ERANGE; 2306e86d1aa8SWill Deacon goto err_free_master; 2307e86d1aa8SWill Deacon } 2308e86d1aa8SWill Deacon 2309e86d1aa8SWill Deacon /* Ensure l2 strtab is initialised */ 2310e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { 2311e86d1aa8SWill Deacon ret = arm_smmu_init_l2_strtab(smmu, sid); 2312e86d1aa8SWill Deacon if (ret) 2313e86d1aa8SWill Deacon goto err_free_master; 2314e86d1aa8SWill Deacon } 2315e86d1aa8SWill Deacon } 2316e86d1aa8SWill Deacon 2317e86d1aa8SWill Deacon master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); 2318e86d1aa8SWill Deacon 2319e86d1aa8SWill Deacon /* 2320e86d1aa8SWill Deacon * Note that PASID must be enabled before, and disabled after ATS: 2321e86d1aa8SWill Deacon * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register 2322e86d1aa8SWill Deacon * 2323e86d1aa8SWill Deacon * Behavior is undefined if this bit is Set and the value of the PASID 2324e86d1aa8SWill Deacon * Enable, Execute Requested Enable, or Privileged Mode Requested bits 2325e86d1aa8SWill Deacon * are changed. 2326e86d1aa8SWill Deacon */ 2327e86d1aa8SWill Deacon arm_smmu_enable_pasid(master); 2328e86d1aa8SWill Deacon 2329e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) 2330e86d1aa8SWill Deacon master->ssid_bits = min_t(u8, master->ssid_bits, 2331e86d1aa8SWill Deacon CTXDESC_LINEAR_CDMAX); 2332e86d1aa8SWill Deacon 2333e86d1aa8SWill Deacon return &smmu->iommu; 2334e86d1aa8SWill Deacon 2335e86d1aa8SWill Deacon err_free_master: 2336e86d1aa8SWill Deacon kfree(master); 2337e86d1aa8SWill Deacon dev_iommu_priv_set(dev, NULL); 2338e86d1aa8SWill Deacon return ERR_PTR(ret); 2339e86d1aa8SWill Deacon } 2340e86d1aa8SWill Deacon 2341e86d1aa8SWill Deacon static void arm_smmu_release_device(struct device *dev) 2342e86d1aa8SWill Deacon { 2343e86d1aa8SWill Deacon struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 2344e86d1aa8SWill Deacon struct arm_smmu_master *master; 2345e86d1aa8SWill Deacon 2346e86d1aa8SWill Deacon if (!fwspec || fwspec->ops != &arm_smmu_ops) 2347e86d1aa8SWill Deacon return; 2348e86d1aa8SWill Deacon 2349e86d1aa8SWill Deacon master = dev_iommu_priv_get(dev); 2350e86d1aa8SWill Deacon arm_smmu_detach_dev(master); 2351e86d1aa8SWill Deacon arm_smmu_disable_pasid(master); 2352e86d1aa8SWill Deacon kfree(master); 2353e86d1aa8SWill Deacon iommu_fwspec_free(dev); 2354e86d1aa8SWill Deacon } 2355e86d1aa8SWill Deacon 2356e86d1aa8SWill Deacon static struct iommu_group *arm_smmu_device_group(struct device *dev) 2357e86d1aa8SWill Deacon { 2358e86d1aa8SWill Deacon struct iommu_group *group; 2359e86d1aa8SWill Deacon 2360e86d1aa8SWill Deacon /* 2361e86d1aa8SWill Deacon * We don't support devices sharing stream IDs other than PCI RID 2362e86d1aa8SWill Deacon * aliases, since the necessary ID-to-device lookup becomes rather 2363e86d1aa8SWill Deacon * impractical given a potential sparse 32-bit stream ID space. 2364e86d1aa8SWill Deacon */ 2365e86d1aa8SWill Deacon if (dev_is_pci(dev)) 2366e86d1aa8SWill Deacon group = pci_device_group(dev); 2367e86d1aa8SWill Deacon else 2368e86d1aa8SWill Deacon group = generic_device_group(dev); 2369e86d1aa8SWill Deacon 2370e86d1aa8SWill Deacon return group; 2371e86d1aa8SWill Deacon } 2372e86d1aa8SWill Deacon 2373e86d1aa8SWill Deacon static int arm_smmu_domain_get_attr(struct iommu_domain *domain, 2374e86d1aa8SWill Deacon enum iommu_attr attr, void *data) 2375e86d1aa8SWill Deacon { 2376e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2377e86d1aa8SWill Deacon 2378e86d1aa8SWill Deacon switch (domain->type) { 2379e86d1aa8SWill Deacon case IOMMU_DOMAIN_UNMANAGED: 2380e86d1aa8SWill Deacon switch (attr) { 2381e86d1aa8SWill Deacon case DOMAIN_ATTR_NESTING: 2382e86d1aa8SWill Deacon *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); 2383e86d1aa8SWill Deacon return 0; 2384e86d1aa8SWill Deacon default: 2385e86d1aa8SWill Deacon return -ENODEV; 2386e86d1aa8SWill Deacon } 2387e86d1aa8SWill Deacon break; 2388e86d1aa8SWill Deacon case IOMMU_DOMAIN_DMA: 2389e86d1aa8SWill Deacon switch (attr) { 2390e86d1aa8SWill Deacon case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: 2391e86d1aa8SWill Deacon *(int *)data = smmu_domain->non_strict; 2392e86d1aa8SWill Deacon return 0; 2393e86d1aa8SWill Deacon default: 2394e86d1aa8SWill Deacon return -ENODEV; 2395e86d1aa8SWill Deacon } 2396e86d1aa8SWill Deacon break; 2397e86d1aa8SWill Deacon default: 2398e86d1aa8SWill Deacon return -EINVAL; 2399e86d1aa8SWill Deacon } 2400e86d1aa8SWill Deacon } 2401e86d1aa8SWill Deacon 2402e86d1aa8SWill Deacon static int arm_smmu_domain_set_attr(struct iommu_domain *domain, 2403e86d1aa8SWill Deacon enum iommu_attr attr, void *data) 2404e86d1aa8SWill Deacon { 2405e86d1aa8SWill Deacon int ret = 0; 2406e86d1aa8SWill Deacon struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); 2407e86d1aa8SWill Deacon 2408e86d1aa8SWill Deacon mutex_lock(&smmu_domain->init_mutex); 2409e86d1aa8SWill Deacon 2410e86d1aa8SWill Deacon switch (domain->type) { 2411e86d1aa8SWill Deacon case IOMMU_DOMAIN_UNMANAGED: 2412e86d1aa8SWill Deacon switch (attr) { 2413e86d1aa8SWill Deacon case DOMAIN_ATTR_NESTING: 2414e86d1aa8SWill Deacon if (smmu_domain->smmu) { 2415e86d1aa8SWill Deacon ret = -EPERM; 2416e86d1aa8SWill Deacon goto out_unlock; 2417e86d1aa8SWill Deacon } 2418e86d1aa8SWill Deacon 2419e86d1aa8SWill Deacon if (*(int *)data) 2420e86d1aa8SWill Deacon smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; 2421e86d1aa8SWill Deacon else 2422e86d1aa8SWill Deacon smmu_domain->stage = ARM_SMMU_DOMAIN_S1; 2423e86d1aa8SWill Deacon break; 2424e86d1aa8SWill Deacon default: 2425e86d1aa8SWill Deacon ret = -ENODEV; 2426e86d1aa8SWill Deacon } 2427e86d1aa8SWill Deacon break; 2428e86d1aa8SWill Deacon case IOMMU_DOMAIN_DMA: 2429e86d1aa8SWill Deacon switch(attr) { 2430e86d1aa8SWill Deacon case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: 2431e86d1aa8SWill Deacon smmu_domain->non_strict = *(int *)data; 2432e86d1aa8SWill Deacon break; 2433e86d1aa8SWill Deacon default: 2434e86d1aa8SWill Deacon ret = -ENODEV; 2435e86d1aa8SWill Deacon } 2436e86d1aa8SWill Deacon break; 2437e86d1aa8SWill Deacon default: 2438e86d1aa8SWill Deacon ret = -EINVAL; 2439e86d1aa8SWill Deacon } 2440e86d1aa8SWill Deacon 2441e86d1aa8SWill Deacon out_unlock: 2442e86d1aa8SWill Deacon mutex_unlock(&smmu_domain->init_mutex); 2443e86d1aa8SWill Deacon return ret; 2444e86d1aa8SWill Deacon } 2445e86d1aa8SWill Deacon 2446e86d1aa8SWill Deacon static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) 2447e86d1aa8SWill Deacon { 2448e86d1aa8SWill Deacon return iommu_fwspec_add_ids(dev, args->args, 1); 2449e86d1aa8SWill Deacon } 2450e86d1aa8SWill Deacon 2451e86d1aa8SWill Deacon static void arm_smmu_get_resv_regions(struct device *dev, 2452e86d1aa8SWill Deacon struct list_head *head) 2453e86d1aa8SWill Deacon { 2454e86d1aa8SWill Deacon struct iommu_resv_region *region; 2455e86d1aa8SWill Deacon int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 2456e86d1aa8SWill Deacon 2457e86d1aa8SWill Deacon region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, 2458e86d1aa8SWill Deacon prot, IOMMU_RESV_SW_MSI); 2459e86d1aa8SWill Deacon if (!region) 2460e86d1aa8SWill Deacon return; 2461e86d1aa8SWill Deacon 2462e86d1aa8SWill Deacon list_add_tail(®ion->list, head); 2463e86d1aa8SWill Deacon 2464e86d1aa8SWill Deacon iommu_dma_get_resv_regions(dev, head); 2465e86d1aa8SWill Deacon } 2466e86d1aa8SWill Deacon 2467e86d1aa8SWill Deacon static struct iommu_ops arm_smmu_ops = { 2468e86d1aa8SWill Deacon .capable = arm_smmu_capable, 2469e86d1aa8SWill Deacon .domain_alloc = arm_smmu_domain_alloc, 2470e86d1aa8SWill Deacon .domain_free = arm_smmu_domain_free, 2471e86d1aa8SWill Deacon .attach_dev = arm_smmu_attach_dev, 2472e86d1aa8SWill Deacon .map = arm_smmu_map, 2473e86d1aa8SWill Deacon .unmap = arm_smmu_unmap, 2474e86d1aa8SWill Deacon .flush_iotlb_all = arm_smmu_flush_iotlb_all, 2475e86d1aa8SWill Deacon .iotlb_sync = arm_smmu_iotlb_sync, 2476e86d1aa8SWill Deacon .iova_to_phys = arm_smmu_iova_to_phys, 2477e86d1aa8SWill Deacon .probe_device = arm_smmu_probe_device, 2478e86d1aa8SWill Deacon .release_device = arm_smmu_release_device, 2479e86d1aa8SWill Deacon .device_group = arm_smmu_device_group, 2480e86d1aa8SWill Deacon .domain_get_attr = arm_smmu_domain_get_attr, 2481e86d1aa8SWill Deacon .domain_set_attr = arm_smmu_domain_set_attr, 2482e86d1aa8SWill Deacon .of_xlate = arm_smmu_of_xlate, 2483e86d1aa8SWill Deacon .get_resv_regions = arm_smmu_get_resv_regions, 2484e86d1aa8SWill Deacon .put_resv_regions = generic_iommu_put_resv_regions, 2485e86d1aa8SWill Deacon .pgsize_bitmap = -1UL, /* Restricted during device attach */ 2486e86d1aa8SWill Deacon }; 2487e86d1aa8SWill Deacon 2488e86d1aa8SWill Deacon /* Probing and initialisation functions */ 2489e86d1aa8SWill Deacon static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, 2490e86d1aa8SWill Deacon struct arm_smmu_queue *q, 2491e86d1aa8SWill Deacon unsigned long prod_off, 2492e86d1aa8SWill Deacon unsigned long cons_off, 2493e86d1aa8SWill Deacon size_t dwords, const char *name) 2494e86d1aa8SWill Deacon { 2495e86d1aa8SWill Deacon size_t qsz; 2496e86d1aa8SWill Deacon 2497e86d1aa8SWill Deacon do { 2498e86d1aa8SWill Deacon qsz = ((1 << q->llq.max_n_shift) * dwords) << 3; 2499e86d1aa8SWill Deacon q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, 2500e86d1aa8SWill Deacon GFP_KERNEL); 2501e86d1aa8SWill Deacon if (q->base || qsz < PAGE_SIZE) 2502e86d1aa8SWill Deacon break; 2503e86d1aa8SWill Deacon 2504e86d1aa8SWill Deacon q->llq.max_n_shift--; 2505e86d1aa8SWill Deacon } while (1); 2506e86d1aa8SWill Deacon 2507e86d1aa8SWill Deacon if (!q->base) { 2508e86d1aa8SWill Deacon dev_err(smmu->dev, 2509e86d1aa8SWill Deacon "failed to allocate queue (0x%zx bytes) for %s\n", 2510e86d1aa8SWill Deacon qsz, name); 2511e86d1aa8SWill Deacon return -ENOMEM; 2512e86d1aa8SWill Deacon } 2513e86d1aa8SWill Deacon 2514e86d1aa8SWill Deacon if (!WARN_ON(q->base_dma & (qsz - 1))) { 2515e86d1aa8SWill Deacon dev_info(smmu->dev, "allocated %u entries for %s\n", 2516e86d1aa8SWill Deacon 1 << q->llq.max_n_shift, name); 2517e86d1aa8SWill Deacon } 2518e86d1aa8SWill Deacon 2519e86d1aa8SWill Deacon q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu); 2520e86d1aa8SWill Deacon q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu); 2521e86d1aa8SWill Deacon q->ent_dwords = dwords; 2522e86d1aa8SWill Deacon 2523e86d1aa8SWill Deacon q->q_base = Q_BASE_RWA; 2524e86d1aa8SWill Deacon q->q_base |= q->base_dma & Q_BASE_ADDR_MASK; 2525e86d1aa8SWill Deacon q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift); 2526e86d1aa8SWill Deacon 2527e86d1aa8SWill Deacon q->llq.prod = q->llq.cons = 0; 2528e86d1aa8SWill Deacon return 0; 2529e86d1aa8SWill Deacon } 2530e86d1aa8SWill Deacon 2531e86d1aa8SWill Deacon static void arm_smmu_cmdq_free_bitmap(void *data) 2532e86d1aa8SWill Deacon { 2533e86d1aa8SWill Deacon unsigned long *bitmap = data; 2534e86d1aa8SWill Deacon bitmap_free(bitmap); 2535e86d1aa8SWill Deacon } 2536e86d1aa8SWill Deacon 2537e86d1aa8SWill Deacon static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) 2538e86d1aa8SWill Deacon { 2539e86d1aa8SWill Deacon int ret = 0; 2540e86d1aa8SWill Deacon struct arm_smmu_cmdq *cmdq = &smmu->cmdq; 2541e86d1aa8SWill Deacon unsigned int nents = 1 << cmdq->q.llq.max_n_shift; 2542e86d1aa8SWill Deacon atomic_long_t *bitmap; 2543e86d1aa8SWill Deacon 2544e86d1aa8SWill Deacon atomic_set(&cmdq->owner_prod, 0); 2545e86d1aa8SWill Deacon atomic_set(&cmdq->lock, 0); 2546e86d1aa8SWill Deacon 2547e86d1aa8SWill Deacon bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL); 2548e86d1aa8SWill Deacon if (!bitmap) { 2549e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to allocate cmdq bitmap\n"); 2550e86d1aa8SWill Deacon ret = -ENOMEM; 2551e86d1aa8SWill Deacon } else { 2552e86d1aa8SWill Deacon cmdq->valid_map = bitmap; 2553e86d1aa8SWill Deacon devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap); 2554e86d1aa8SWill Deacon } 2555e86d1aa8SWill Deacon 2556e86d1aa8SWill Deacon return ret; 2557e86d1aa8SWill Deacon } 2558e86d1aa8SWill Deacon 2559e86d1aa8SWill Deacon static int arm_smmu_init_queues(struct arm_smmu_device *smmu) 2560e86d1aa8SWill Deacon { 2561e86d1aa8SWill Deacon int ret; 2562e86d1aa8SWill Deacon 2563e86d1aa8SWill Deacon /* cmdq */ 2564e86d1aa8SWill Deacon ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, 2565e86d1aa8SWill Deacon ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS, 2566e86d1aa8SWill Deacon "cmdq"); 2567e86d1aa8SWill Deacon if (ret) 2568e86d1aa8SWill Deacon return ret; 2569e86d1aa8SWill Deacon 2570e86d1aa8SWill Deacon ret = arm_smmu_cmdq_init(smmu); 2571e86d1aa8SWill Deacon if (ret) 2572e86d1aa8SWill Deacon return ret; 2573e86d1aa8SWill Deacon 2574e86d1aa8SWill Deacon /* evtq */ 2575e86d1aa8SWill Deacon ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, 2576e86d1aa8SWill Deacon ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS, 2577e86d1aa8SWill Deacon "evtq"); 2578e86d1aa8SWill Deacon if (ret) 2579e86d1aa8SWill Deacon return ret; 2580e86d1aa8SWill Deacon 2581e86d1aa8SWill Deacon /* priq */ 2582e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_PRI)) 2583e86d1aa8SWill Deacon return 0; 2584e86d1aa8SWill Deacon 2585e86d1aa8SWill Deacon return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, 2586e86d1aa8SWill Deacon ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS, 2587e86d1aa8SWill Deacon "priq"); 2588e86d1aa8SWill Deacon } 2589e86d1aa8SWill Deacon 2590e86d1aa8SWill Deacon static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) 2591e86d1aa8SWill Deacon { 2592e86d1aa8SWill Deacon unsigned int i; 2593e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2594e86d1aa8SWill Deacon size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents; 2595e86d1aa8SWill Deacon void *strtab = smmu->strtab_cfg.strtab; 2596e86d1aa8SWill Deacon 2597e86d1aa8SWill Deacon cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL); 2598e86d1aa8SWill Deacon if (!cfg->l1_desc) { 2599e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to allocate l1 stream table desc\n"); 2600e86d1aa8SWill Deacon return -ENOMEM; 2601e86d1aa8SWill Deacon } 2602e86d1aa8SWill Deacon 2603e86d1aa8SWill Deacon for (i = 0; i < cfg->num_l1_ents; ++i) { 2604e86d1aa8SWill Deacon arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); 2605e86d1aa8SWill Deacon strtab += STRTAB_L1_DESC_DWORDS << 3; 2606e86d1aa8SWill Deacon } 2607e86d1aa8SWill Deacon 2608e86d1aa8SWill Deacon return 0; 2609e86d1aa8SWill Deacon } 2610e86d1aa8SWill Deacon 2611e86d1aa8SWill Deacon static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) 2612e86d1aa8SWill Deacon { 2613e86d1aa8SWill Deacon void *strtab; 2614e86d1aa8SWill Deacon u64 reg; 2615e86d1aa8SWill Deacon u32 size, l1size; 2616e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2617e86d1aa8SWill Deacon 2618e86d1aa8SWill Deacon /* Calculate the L1 size, capped to the SIDSIZE. */ 2619e86d1aa8SWill Deacon size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); 2620e86d1aa8SWill Deacon size = min(size, smmu->sid_bits - STRTAB_SPLIT); 2621e86d1aa8SWill Deacon cfg->num_l1_ents = 1 << size; 2622e86d1aa8SWill Deacon 2623e86d1aa8SWill Deacon size += STRTAB_SPLIT; 2624e86d1aa8SWill Deacon if (size < smmu->sid_bits) 2625e86d1aa8SWill Deacon dev_warn(smmu->dev, 2626e86d1aa8SWill Deacon "2-level strtab only covers %u/%u bits of SID\n", 2627e86d1aa8SWill Deacon size, smmu->sid_bits); 2628e86d1aa8SWill Deacon 2629e86d1aa8SWill Deacon l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); 2630e86d1aa8SWill Deacon strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, 2631e86d1aa8SWill Deacon GFP_KERNEL); 2632e86d1aa8SWill Deacon if (!strtab) { 2633e86d1aa8SWill Deacon dev_err(smmu->dev, 2634e86d1aa8SWill Deacon "failed to allocate l1 stream table (%u bytes)\n", 2635dc898eb8SZenghui Yu l1size); 2636e86d1aa8SWill Deacon return -ENOMEM; 2637e86d1aa8SWill Deacon } 2638e86d1aa8SWill Deacon cfg->strtab = strtab; 2639e86d1aa8SWill Deacon 2640e86d1aa8SWill Deacon /* Configure strtab_base_cfg for 2 levels */ 2641e86d1aa8SWill Deacon reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL); 2642e86d1aa8SWill Deacon reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size); 2643e86d1aa8SWill Deacon reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); 2644e86d1aa8SWill Deacon cfg->strtab_base_cfg = reg; 2645e86d1aa8SWill Deacon 2646e86d1aa8SWill Deacon return arm_smmu_init_l1_strtab(smmu); 2647e86d1aa8SWill Deacon } 2648e86d1aa8SWill Deacon 2649e86d1aa8SWill Deacon static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) 2650e86d1aa8SWill Deacon { 2651e86d1aa8SWill Deacon void *strtab; 2652e86d1aa8SWill Deacon u64 reg; 2653e86d1aa8SWill Deacon u32 size; 2654e86d1aa8SWill Deacon struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; 2655e86d1aa8SWill Deacon 2656e86d1aa8SWill Deacon size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); 2657e86d1aa8SWill Deacon strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, 2658e86d1aa8SWill Deacon GFP_KERNEL); 2659e86d1aa8SWill Deacon if (!strtab) { 2660e86d1aa8SWill Deacon dev_err(smmu->dev, 2661e86d1aa8SWill Deacon "failed to allocate linear stream table (%u bytes)\n", 2662e86d1aa8SWill Deacon size); 2663e86d1aa8SWill Deacon return -ENOMEM; 2664e86d1aa8SWill Deacon } 2665e86d1aa8SWill Deacon cfg->strtab = strtab; 2666e86d1aa8SWill Deacon cfg->num_l1_ents = 1 << smmu->sid_bits; 2667e86d1aa8SWill Deacon 2668e86d1aa8SWill Deacon /* Configure strtab_base_cfg for a linear table covering all SIDs */ 2669e86d1aa8SWill Deacon reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR); 2670e86d1aa8SWill Deacon reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits); 2671e86d1aa8SWill Deacon cfg->strtab_base_cfg = reg; 2672e86d1aa8SWill Deacon 2673e86d1aa8SWill Deacon arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents); 2674e86d1aa8SWill Deacon return 0; 2675e86d1aa8SWill Deacon } 2676e86d1aa8SWill Deacon 2677e86d1aa8SWill Deacon static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) 2678e86d1aa8SWill Deacon { 2679e86d1aa8SWill Deacon u64 reg; 2680e86d1aa8SWill Deacon int ret; 2681e86d1aa8SWill Deacon 2682e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) 2683e86d1aa8SWill Deacon ret = arm_smmu_init_strtab_2lvl(smmu); 2684e86d1aa8SWill Deacon else 2685e86d1aa8SWill Deacon ret = arm_smmu_init_strtab_linear(smmu); 2686e86d1aa8SWill Deacon 2687e86d1aa8SWill Deacon if (ret) 2688e86d1aa8SWill Deacon return ret; 2689e86d1aa8SWill Deacon 2690e86d1aa8SWill Deacon /* Set the strtab base address */ 2691e86d1aa8SWill Deacon reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK; 2692e86d1aa8SWill Deacon reg |= STRTAB_BASE_RA; 2693e86d1aa8SWill Deacon smmu->strtab_cfg.strtab_base = reg; 2694e86d1aa8SWill Deacon 2695e86d1aa8SWill Deacon /* Allocate the first VMID for stage-2 bypass STEs */ 2696e86d1aa8SWill Deacon set_bit(0, smmu->vmid_map); 2697e86d1aa8SWill Deacon return 0; 2698e86d1aa8SWill Deacon } 2699e86d1aa8SWill Deacon 2700e86d1aa8SWill Deacon static int arm_smmu_init_structures(struct arm_smmu_device *smmu) 2701e86d1aa8SWill Deacon { 2702e86d1aa8SWill Deacon int ret; 2703e86d1aa8SWill Deacon 2704e86d1aa8SWill Deacon ret = arm_smmu_init_queues(smmu); 2705e86d1aa8SWill Deacon if (ret) 2706e86d1aa8SWill Deacon return ret; 2707e86d1aa8SWill Deacon 2708e86d1aa8SWill Deacon return arm_smmu_init_strtab(smmu); 2709e86d1aa8SWill Deacon } 2710e86d1aa8SWill Deacon 2711e86d1aa8SWill Deacon static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, 2712e86d1aa8SWill Deacon unsigned int reg_off, unsigned int ack_off) 2713e86d1aa8SWill Deacon { 2714e86d1aa8SWill Deacon u32 reg; 2715e86d1aa8SWill Deacon 2716e86d1aa8SWill Deacon writel_relaxed(val, smmu->base + reg_off); 2717e86d1aa8SWill Deacon return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val, 2718e86d1aa8SWill Deacon 1, ARM_SMMU_POLL_TIMEOUT_US); 2719e86d1aa8SWill Deacon } 2720e86d1aa8SWill Deacon 2721e86d1aa8SWill Deacon /* GBPA is "special" */ 2722e86d1aa8SWill Deacon static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr) 2723e86d1aa8SWill Deacon { 2724e86d1aa8SWill Deacon int ret; 2725e86d1aa8SWill Deacon u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA; 2726e86d1aa8SWill Deacon 2727e86d1aa8SWill Deacon ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 2728e86d1aa8SWill Deacon 1, ARM_SMMU_POLL_TIMEOUT_US); 2729e86d1aa8SWill Deacon if (ret) 2730e86d1aa8SWill Deacon return ret; 2731e86d1aa8SWill Deacon 2732e86d1aa8SWill Deacon reg &= ~clr; 2733e86d1aa8SWill Deacon reg |= set; 2734e86d1aa8SWill Deacon writel_relaxed(reg | GBPA_UPDATE, gbpa); 2735e86d1aa8SWill Deacon ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE), 2736e86d1aa8SWill Deacon 1, ARM_SMMU_POLL_TIMEOUT_US); 2737e86d1aa8SWill Deacon 2738e86d1aa8SWill Deacon if (ret) 2739e86d1aa8SWill Deacon dev_err(smmu->dev, "GBPA not responding to update\n"); 2740e86d1aa8SWill Deacon return ret; 2741e86d1aa8SWill Deacon } 2742e86d1aa8SWill Deacon 2743e86d1aa8SWill Deacon static void arm_smmu_free_msis(void *data) 2744e86d1aa8SWill Deacon { 2745e86d1aa8SWill Deacon struct device *dev = data; 2746e86d1aa8SWill Deacon platform_msi_domain_free_irqs(dev); 2747e86d1aa8SWill Deacon } 2748e86d1aa8SWill Deacon 2749e86d1aa8SWill Deacon static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 2750e86d1aa8SWill Deacon { 2751e86d1aa8SWill Deacon phys_addr_t doorbell; 2752e86d1aa8SWill Deacon struct device *dev = msi_desc_to_dev(desc); 2753e86d1aa8SWill Deacon struct arm_smmu_device *smmu = dev_get_drvdata(dev); 2754e86d1aa8SWill Deacon phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index]; 2755e86d1aa8SWill Deacon 2756e86d1aa8SWill Deacon doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; 2757e86d1aa8SWill Deacon doorbell &= MSI_CFG0_ADDR_MASK; 2758e86d1aa8SWill Deacon 2759e86d1aa8SWill Deacon writeq_relaxed(doorbell, smmu->base + cfg[0]); 2760e86d1aa8SWill Deacon writel_relaxed(msg->data, smmu->base + cfg[1]); 2761e86d1aa8SWill Deacon writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]); 2762e86d1aa8SWill Deacon } 2763e86d1aa8SWill Deacon 2764e86d1aa8SWill Deacon static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) 2765e86d1aa8SWill Deacon { 2766e86d1aa8SWill Deacon struct msi_desc *desc; 2767e86d1aa8SWill Deacon int ret, nvec = ARM_SMMU_MAX_MSIS; 2768e86d1aa8SWill Deacon struct device *dev = smmu->dev; 2769e86d1aa8SWill Deacon 2770e86d1aa8SWill Deacon /* Clear the MSI address regs */ 2771e86d1aa8SWill Deacon writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); 2772e86d1aa8SWill Deacon writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); 2773e86d1aa8SWill Deacon 2774e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_PRI) 2775e86d1aa8SWill Deacon writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); 2776e86d1aa8SWill Deacon else 2777e86d1aa8SWill Deacon nvec--; 2778e86d1aa8SWill Deacon 2779e86d1aa8SWill Deacon if (!(smmu->features & ARM_SMMU_FEAT_MSI)) 2780e86d1aa8SWill Deacon return; 2781e86d1aa8SWill Deacon 2782e86d1aa8SWill Deacon if (!dev->msi_domain) { 2783e86d1aa8SWill Deacon dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n"); 2784e86d1aa8SWill Deacon return; 2785e86d1aa8SWill Deacon } 2786e86d1aa8SWill Deacon 2787e86d1aa8SWill Deacon /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ 2788e86d1aa8SWill Deacon ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); 2789e86d1aa8SWill Deacon if (ret) { 2790e86d1aa8SWill Deacon dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n"); 2791e86d1aa8SWill Deacon return; 2792e86d1aa8SWill Deacon } 2793e86d1aa8SWill Deacon 2794e86d1aa8SWill Deacon for_each_msi_entry(desc, dev) { 2795e86d1aa8SWill Deacon switch (desc->platform.msi_index) { 2796e86d1aa8SWill Deacon case EVTQ_MSI_INDEX: 2797e86d1aa8SWill Deacon smmu->evtq.q.irq = desc->irq; 2798e86d1aa8SWill Deacon break; 2799e86d1aa8SWill Deacon case GERROR_MSI_INDEX: 2800e86d1aa8SWill Deacon smmu->gerr_irq = desc->irq; 2801e86d1aa8SWill Deacon break; 2802e86d1aa8SWill Deacon case PRIQ_MSI_INDEX: 2803e86d1aa8SWill Deacon smmu->priq.q.irq = desc->irq; 2804e86d1aa8SWill Deacon break; 2805e86d1aa8SWill Deacon default: /* Unknown */ 2806e86d1aa8SWill Deacon continue; 2807e86d1aa8SWill Deacon } 2808e86d1aa8SWill Deacon } 2809e86d1aa8SWill Deacon 2810e86d1aa8SWill Deacon /* Add callback to free MSIs on teardown */ 2811e86d1aa8SWill Deacon devm_add_action(dev, arm_smmu_free_msis, dev); 2812e86d1aa8SWill Deacon } 2813e86d1aa8SWill Deacon 2814e86d1aa8SWill Deacon static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) 2815e86d1aa8SWill Deacon { 2816e86d1aa8SWill Deacon int irq, ret; 2817e86d1aa8SWill Deacon 2818e86d1aa8SWill Deacon arm_smmu_setup_msis(smmu); 2819e86d1aa8SWill Deacon 2820e86d1aa8SWill Deacon /* Request interrupt lines */ 2821e86d1aa8SWill Deacon irq = smmu->evtq.q.irq; 2822e86d1aa8SWill Deacon if (irq) { 2823e86d1aa8SWill Deacon ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 2824e86d1aa8SWill Deacon arm_smmu_evtq_thread, 2825e86d1aa8SWill Deacon IRQF_ONESHOT, 2826e86d1aa8SWill Deacon "arm-smmu-v3-evtq", smmu); 2827e86d1aa8SWill Deacon if (ret < 0) 2828e86d1aa8SWill Deacon dev_warn(smmu->dev, "failed to enable evtq irq\n"); 2829e86d1aa8SWill Deacon } else { 2830e86d1aa8SWill Deacon dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n"); 2831e86d1aa8SWill Deacon } 2832e86d1aa8SWill Deacon 2833e86d1aa8SWill Deacon irq = smmu->gerr_irq; 2834e86d1aa8SWill Deacon if (irq) { 2835e86d1aa8SWill Deacon ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler, 2836e86d1aa8SWill Deacon 0, "arm-smmu-v3-gerror", smmu); 2837e86d1aa8SWill Deacon if (ret < 0) 2838e86d1aa8SWill Deacon dev_warn(smmu->dev, "failed to enable gerror irq\n"); 2839e86d1aa8SWill Deacon } else { 2840e86d1aa8SWill Deacon dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n"); 2841e86d1aa8SWill Deacon } 2842e86d1aa8SWill Deacon 2843e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_PRI) { 2844e86d1aa8SWill Deacon irq = smmu->priq.q.irq; 2845e86d1aa8SWill Deacon if (irq) { 2846e86d1aa8SWill Deacon ret = devm_request_threaded_irq(smmu->dev, irq, NULL, 2847e86d1aa8SWill Deacon arm_smmu_priq_thread, 2848e86d1aa8SWill Deacon IRQF_ONESHOT, 2849e86d1aa8SWill Deacon "arm-smmu-v3-priq", 2850e86d1aa8SWill Deacon smmu); 2851e86d1aa8SWill Deacon if (ret < 0) 2852e86d1aa8SWill Deacon dev_warn(smmu->dev, 2853e86d1aa8SWill Deacon "failed to enable priq irq\n"); 2854e86d1aa8SWill Deacon } else { 2855e86d1aa8SWill Deacon dev_warn(smmu->dev, "no priq irq - PRI will be broken\n"); 2856e86d1aa8SWill Deacon } 2857e86d1aa8SWill Deacon } 2858e86d1aa8SWill Deacon } 2859e86d1aa8SWill Deacon 2860e86d1aa8SWill Deacon static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) 2861e86d1aa8SWill Deacon { 2862e86d1aa8SWill Deacon int ret, irq; 2863e86d1aa8SWill Deacon u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 2864e86d1aa8SWill Deacon 2865e86d1aa8SWill Deacon /* Disable IRQs first */ 2866e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, 2867e86d1aa8SWill Deacon ARM_SMMU_IRQ_CTRLACK); 2868e86d1aa8SWill Deacon if (ret) { 2869e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to disable irqs\n"); 2870e86d1aa8SWill Deacon return ret; 2871e86d1aa8SWill Deacon } 2872e86d1aa8SWill Deacon 2873e86d1aa8SWill Deacon irq = smmu->combined_irq; 2874e86d1aa8SWill Deacon if (irq) { 2875e86d1aa8SWill Deacon /* 2876e86d1aa8SWill Deacon * Cavium ThunderX2 implementation doesn't support unique irq 2877e86d1aa8SWill Deacon * lines. Use a single irq line for all the SMMUv3 interrupts. 2878e86d1aa8SWill Deacon */ 2879e86d1aa8SWill Deacon ret = devm_request_threaded_irq(smmu->dev, irq, 2880e86d1aa8SWill Deacon arm_smmu_combined_irq_handler, 2881e86d1aa8SWill Deacon arm_smmu_combined_irq_thread, 2882e86d1aa8SWill Deacon IRQF_ONESHOT, 2883e86d1aa8SWill Deacon "arm-smmu-v3-combined-irq", smmu); 2884e86d1aa8SWill Deacon if (ret < 0) 2885e86d1aa8SWill Deacon dev_warn(smmu->dev, "failed to enable combined irq\n"); 2886e86d1aa8SWill Deacon } else 2887e86d1aa8SWill Deacon arm_smmu_setup_unique_irqs(smmu); 2888e86d1aa8SWill Deacon 2889e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_PRI) 2890e86d1aa8SWill Deacon irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; 2891e86d1aa8SWill Deacon 2892e86d1aa8SWill Deacon /* Enable interrupt generation on the SMMU */ 2893e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, irqen_flags, 2894e86d1aa8SWill Deacon ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); 2895e86d1aa8SWill Deacon if (ret) 2896e86d1aa8SWill Deacon dev_warn(smmu->dev, "failed to enable irqs\n"); 2897e86d1aa8SWill Deacon 2898e86d1aa8SWill Deacon return 0; 2899e86d1aa8SWill Deacon } 2900e86d1aa8SWill Deacon 2901e86d1aa8SWill Deacon static int arm_smmu_device_disable(struct arm_smmu_device *smmu) 2902e86d1aa8SWill Deacon { 2903e86d1aa8SWill Deacon int ret; 2904e86d1aa8SWill Deacon 2905e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); 2906e86d1aa8SWill Deacon if (ret) 2907e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to clear cr0\n"); 2908e86d1aa8SWill Deacon 2909e86d1aa8SWill Deacon return ret; 2910e86d1aa8SWill Deacon } 2911e86d1aa8SWill Deacon 2912e86d1aa8SWill Deacon static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) 2913e86d1aa8SWill Deacon { 2914e86d1aa8SWill Deacon int ret; 2915e86d1aa8SWill Deacon u32 reg, enables; 2916e86d1aa8SWill Deacon struct arm_smmu_cmdq_ent cmd; 2917e86d1aa8SWill Deacon 2918e86d1aa8SWill Deacon /* Clear CR0 and sync (disables SMMU and queue processing) */ 2919e86d1aa8SWill Deacon reg = readl_relaxed(smmu->base + ARM_SMMU_CR0); 2920e86d1aa8SWill Deacon if (reg & CR0_SMMUEN) { 2921e86d1aa8SWill Deacon dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n"); 2922e86d1aa8SWill Deacon WARN_ON(is_kdump_kernel() && !disable_bypass); 2923e86d1aa8SWill Deacon arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0); 2924e86d1aa8SWill Deacon } 2925e86d1aa8SWill Deacon 2926e86d1aa8SWill Deacon ret = arm_smmu_device_disable(smmu); 2927e86d1aa8SWill Deacon if (ret) 2928e86d1aa8SWill Deacon return ret; 2929e86d1aa8SWill Deacon 2930e86d1aa8SWill Deacon /* CR1 (table and queue memory attributes) */ 2931e86d1aa8SWill Deacon reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) | 2932e86d1aa8SWill Deacon FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) | 2933e86d1aa8SWill Deacon FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) | 2934e86d1aa8SWill Deacon FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) | 2935e86d1aa8SWill Deacon FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) | 2936e86d1aa8SWill Deacon FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB); 2937e86d1aa8SWill Deacon writel_relaxed(reg, smmu->base + ARM_SMMU_CR1); 2938e86d1aa8SWill Deacon 2939e86d1aa8SWill Deacon /* CR2 (random crap) */ 2940e86d1aa8SWill Deacon reg = CR2_PTM | CR2_RECINVSID | CR2_E2H; 2941e86d1aa8SWill Deacon writel_relaxed(reg, smmu->base + ARM_SMMU_CR2); 2942e86d1aa8SWill Deacon 2943e86d1aa8SWill Deacon /* Stream table */ 2944e86d1aa8SWill Deacon writeq_relaxed(smmu->strtab_cfg.strtab_base, 2945e86d1aa8SWill Deacon smmu->base + ARM_SMMU_STRTAB_BASE); 2946e86d1aa8SWill Deacon writel_relaxed(smmu->strtab_cfg.strtab_base_cfg, 2947e86d1aa8SWill Deacon smmu->base + ARM_SMMU_STRTAB_BASE_CFG); 2948e86d1aa8SWill Deacon 2949e86d1aa8SWill Deacon /* Command queue */ 2950e86d1aa8SWill Deacon writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE); 2951e86d1aa8SWill Deacon writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); 2952e86d1aa8SWill Deacon writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS); 2953e86d1aa8SWill Deacon 2954e86d1aa8SWill Deacon enables = CR0_CMDQEN; 2955e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 2956e86d1aa8SWill Deacon ARM_SMMU_CR0ACK); 2957e86d1aa8SWill Deacon if (ret) { 2958e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to enable command queue\n"); 2959e86d1aa8SWill Deacon return ret; 2960e86d1aa8SWill Deacon } 2961e86d1aa8SWill Deacon 2962e86d1aa8SWill Deacon /* Invalidate any cached configuration */ 2963e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_CFGI_ALL; 2964e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2965e86d1aa8SWill Deacon arm_smmu_cmdq_issue_sync(smmu); 2966e86d1aa8SWill Deacon 2967e86d1aa8SWill Deacon /* Invalidate any stale TLB entries */ 2968e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_HYP) { 2969e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; 2970e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2971e86d1aa8SWill Deacon } 2972e86d1aa8SWill Deacon 2973e86d1aa8SWill Deacon cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; 2974e86d1aa8SWill Deacon arm_smmu_cmdq_issue_cmd(smmu, &cmd); 2975e86d1aa8SWill Deacon arm_smmu_cmdq_issue_sync(smmu); 2976e86d1aa8SWill Deacon 2977e86d1aa8SWill Deacon /* Event queue */ 2978e86d1aa8SWill Deacon writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE); 2979e86d1aa8SWill Deacon writel_relaxed(smmu->evtq.q.llq.prod, 2980e86d1aa8SWill Deacon arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu)); 2981e86d1aa8SWill Deacon writel_relaxed(smmu->evtq.q.llq.cons, 2982e86d1aa8SWill Deacon arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu)); 2983e86d1aa8SWill Deacon 2984e86d1aa8SWill Deacon enables |= CR0_EVTQEN; 2985e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 2986e86d1aa8SWill Deacon ARM_SMMU_CR0ACK); 2987e86d1aa8SWill Deacon if (ret) { 2988e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to enable event queue\n"); 2989e86d1aa8SWill Deacon return ret; 2990e86d1aa8SWill Deacon } 2991e86d1aa8SWill Deacon 2992e86d1aa8SWill Deacon /* PRI queue */ 2993e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_PRI) { 2994e86d1aa8SWill Deacon writeq_relaxed(smmu->priq.q.q_base, 2995e86d1aa8SWill Deacon smmu->base + ARM_SMMU_PRIQ_BASE); 2996e86d1aa8SWill Deacon writel_relaxed(smmu->priq.q.llq.prod, 2997e86d1aa8SWill Deacon arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu)); 2998e86d1aa8SWill Deacon writel_relaxed(smmu->priq.q.llq.cons, 2999e86d1aa8SWill Deacon arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu)); 3000e86d1aa8SWill Deacon 3001e86d1aa8SWill Deacon enables |= CR0_PRIQEN; 3002e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3003e86d1aa8SWill Deacon ARM_SMMU_CR0ACK); 3004e86d1aa8SWill Deacon if (ret) { 3005e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to enable PRI queue\n"); 3006e86d1aa8SWill Deacon return ret; 3007e86d1aa8SWill Deacon } 3008e86d1aa8SWill Deacon } 3009e86d1aa8SWill Deacon 3010e86d1aa8SWill Deacon if (smmu->features & ARM_SMMU_FEAT_ATS) { 3011e86d1aa8SWill Deacon enables |= CR0_ATSCHK; 3012e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3013e86d1aa8SWill Deacon ARM_SMMU_CR0ACK); 3014e86d1aa8SWill Deacon if (ret) { 3015e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to enable ATS check\n"); 3016e86d1aa8SWill Deacon return ret; 3017e86d1aa8SWill Deacon } 3018e86d1aa8SWill Deacon } 3019e86d1aa8SWill Deacon 3020e86d1aa8SWill Deacon ret = arm_smmu_setup_irqs(smmu); 3021e86d1aa8SWill Deacon if (ret) { 3022e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to setup irqs\n"); 3023e86d1aa8SWill Deacon return ret; 3024e86d1aa8SWill Deacon } 3025e86d1aa8SWill Deacon 3026e86d1aa8SWill Deacon if (is_kdump_kernel()) 3027e86d1aa8SWill Deacon enables &= ~(CR0_EVTQEN | CR0_PRIQEN); 3028e86d1aa8SWill Deacon 3029e86d1aa8SWill Deacon /* Enable the SMMU interface, or ensure bypass */ 3030e86d1aa8SWill Deacon if (!bypass || disable_bypass) { 3031e86d1aa8SWill Deacon enables |= CR0_SMMUEN; 3032e86d1aa8SWill Deacon } else { 3033e86d1aa8SWill Deacon ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT); 3034e86d1aa8SWill Deacon if (ret) 3035e86d1aa8SWill Deacon return ret; 3036e86d1aa8SWill Deacon } 3037e86d1aa8SWill Deacon ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, 3038e86d1aa8SWill Deacon ARM_SMMU_CR0ACK); 3039e86d1aa8SWill Deacon if (ret) { 3040e86d1aa8SWill Deacon dev_err(smmu->dev, "failed to enable SMMU interface\n"); 3041e86d1aa8SWill Deacon return ret; 3042e86d1aa8SWill Deacon } 3043e86d1aa8SWill Deacon 3044e86d1aa8SWill Deacon return 0; 3045e86d1aa8SWill Deacon } 3046e86d1aa8SWill Deacon 3047e86d1aa8SWill Deacon static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) 3048e86d1aa8SWill Deacon { 3049e86d1aa8SWill Deacon u32 reg; 3050e86d1aa8SWill Deacon bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY; 3051e86d1aa8SWill Deacon 3052e86d1aa8SWill Deacon /* IDR0 */ 3053e86d1aa8SWill Deacon reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0); 3054e86d1aa8SWill Deacon 3055e86d1aa8SWill Deacon /* 2-level structures */ 3056e86d1aa8SWill Deacon if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL) 3057e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB; 3058e86d1aa8SWill Deacon 3059e86d1aa8SWill Deacon if (reg & IDR0_CD2L) 3060e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB; 3061e86d1aa8SWill Deacon 3062e86d1aa8SWill Deacon /* 3063e86d1aa8SWill Deacon * Translation table endianness. 3064e86d1aa8SWill Deacon * We currently require the same endianness as the CPU, but this 3065e86d1aa8SWill Deacon * could be changed later by adding a new IO_PGTABLE_QUIRK. 3066e86d1aa8SWill Deacon */ 3067e86d1aa8SWill Deacon switch (FIELD_GET(IDR0_TTENDIAN, reg)) { 3068e86d1aa8SWill Deacon case IDR0_TTENDIAN_MIXED: 3069e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE; 3070e86d1aa8SWill Deacon break; 3071e86d1aa8SWill Deacon #ifdef __BIG_ENDIAN 3072e86d1aa8SWill Deacon case IDR0_TTENDIAN_BE: 3073e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_TT_BE; 3074e86d1aa8SWill Deacon break; 3075e86d1aa8SWill Deacon #else 3076e86d1aa8SWill Deacon case IDR0_TTENDIAN_LE: 3077e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_TT_LE; 3078e86d1aa8SWill Deacon break; 3079e86d1aa8SWill Deacon #endif 3080e86d1aa8SWill Deacon default: 3081e86d1aa8SWill Deacon dev_err(smmu->dev, "unknown/unsupported TT endianness!\n"); 3082e86d1aa8SWill Deacon return -ENXIO; 3083e86d1aa8SWill Deacon } 3084e86d1aa8SWill Deacon 3085e86d1aa8SWill Deacon /* Boolean feature flags */ 3086e86d1aa8SWill Deacon if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI) 3087e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_PRI; 3088e86d1aa8SWill Deacon 3089e86d1aa8SWill Deacon if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS) 3090e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_ATS; 3091e86d1aa8SWill Deacon 3092e86d1aa8SWill Deacon if (reg & IDR0_SEV) 3093e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_SEV; 3094e86d1aa8SWill Deacon 3095bd07a20aSBarry Song if (reg & IDR0_MSI) { 3096e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_MSI; 3097bd07a20aSBarry Song if (coherent && !disable_msipolling) 3098bd07a20aSBarry Song smmu->options |= ARM_SMMU_OPT_MSIPOLL; 3099bd07a20aSBarry Song } 3100e86d1aa8SWill Deacon 3101e86d1aa8SWill Deacon if (reg & IDR0_HYP) 3102e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_HYP; 3103e86d1aa8SWill Deacon 3104e86d1aa8SWill Deacon /* 3105e86d1aa8SWill Deacon * The coherency feature as set by FW is used in preference to the ID 3106e86d1aa8SWill Deacon * register, but warn on mismatch. 3107e86d1aa8SWill Deacon */ 3108e86d1aa8SWill Deacon if (!!(reg & IDR0_COHACC) != coherent) 3109e86d1aa8SWill Deacon dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n", 3110e86d1aa8SWill Deacon coherent ? "true" : "false"); 3111e86d1aa8SWill Deacon 3112e86d1aa8SWill Deacon switch (FIELD_GET(IDR0_STALL_MODEL, reg)) { 3113e86d1aa8SWill Deacon case IDR0_STALL_MODEL_FORCE: 3114e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_STALL_FORCE; 3115df561f66SGustavo A. R. Silva fallthrough; 3116e86d1aa8SWill Deacon case IDR0_STALL_MODEL_STALL: 3117e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_STALLS; 3118e86d1aa8SWill Deacon } 3119e86d1aa8SWill Deacon 3120e86d1aa8SWill Deacon if (reg & IDR0_S1P) 3121e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_TRANS_S1; 3122e86d1aa8SWill Deacon 3123e86d1aa8SWill Deacon if (reg & IDR0_S2P) 3124e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_TRANS_S2; 3125e86d1aa8SWill Deacon 3126e86d1aa8SWill Deacon if (!(reg & (IDR0_S1P | IDR0_S2P))) { 3127e86d1aa8SWill Deacon dev_err(smmu->dev, "no translation support!\n"); 3128e86d1aa8SWill Deacon return -ENXIO; 3129e86d1aa8SWill Deacon } 3130e86d1aa8SWill Deacon 3131e86d1aa8SWill Deacon /* We only support the AArch64 table format at present */ 3132e86d1aa8SWill Deacon switch (FIELD_GET(IDR0_TTF, reg)) { 3133e86d1aa8SWill Deacon case IDR0_TTF_AARCH32_64: 3134e86d1aa8SWill Deacon smmu->ias = 40; 3135df561f66SGustavo A. R. Silva fallthrough; 3136e86d1aa8SWill Deacon case IDR0_TTF_AARCH64: 3137e86d1aa8SWill Deacon break; 3138e86d1aa8SWill Deacon default: 3139e86d1aa8SWill Deacon dev_err(smmu->dev, "AArch64 table format not supported!\n"); 3140e86d1aa8SWill Deacon return -ENXIO; 3141e86d1aa8SWill Deacon } 3142e86d1aa8SWill Deacon 3143e86d1aa8SWill Deacon /* ASID/VMID sizes */ 3144e86d1aa8SWill Deacon smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8; 3145e86d1aa8SWill Deacon smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8; 3146e86d1aa8SWill Deacon 3147e86d1aa8SWill Deacon /* IDR1 */ 3148e86d1aa8SWill Deacon reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1); 3149e86d1aa8SWill Deacon if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { 3150e86d1aa8SWill Deacon dev_err(smmu->dev, "embedded implementation not supported\n"); 3151e86d1aa8SWill Deacon return -ENXIO; 3152e86d1aa8SWill Deacon } 3153e86d1aa8SWill Deacon 3154e86d1aa8SWill Deacon /* Queue sizes, capped to ensure natural alignment */ 3155e86d1aa8SWill Deacon smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, 3156e86d1aa8SWill Deacon FIELD_GET(IDR1_CMDQS, reg)); 3157e86d1aa8SWill Deacon if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) { 3158e86d1aa8SWill Deacon /* 3159e86d1aa8SWill Deacon * We don't support splitting up batches, so one batch of 3160e86d1aa8SWill Deacon * commands plus an extra sync needs to fit inside the command 3161e86d1aa8SWill Deacon * queue. There's also no way we can handle the weird alignment 3162e86d1aa8SWill Deacon * restrictions on the base pointer for a unit-length queue. 3163e86d1aa8SWill Deacon */ 3164e86d1aa8SWill Deacon dev_err(smmu->dev, "command queue size <= %d entries not supported\n", 3165e86d1aa8SWill Deacon CMDQ_BATCH_ENTRIES); 3166e86d1aa8SWill Deacon return -ENXIO; 3167e86d1aa8SWill Deacon } 3168e86d1aa8SWill Deacon 3169e86d1aa8SWill Deacon smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT, 3170e86d1aa8SWill Deacon FIELD_GET(IDR1_EVTQS, reg)); 3171e86d1aa8SWill Deacon smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT, 3172e86d1aa8SWill Deacon FIELD_GET(IDR1_PRIQS, reg)); 3173e86d1aa8SWill Deacon 3174e86d1aa8SWill Deacon /* SID/SSID sizes */ 3175e86d1aa8SWill Deacon smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); 3176e86d1aa8SWill Deacon smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); 3177e86d1aa8SWill Deacon 3178e86d1aa8SWill Deacon /* 3179e86d1aa8SWill Deacon * If the SMMU supports fewer bits than would fill a single L2 stream 3180e86d1aa8SWill Deacon * table, use a linear table instead. 3181e86d1aa8SWill Deacon */ 3182e86d1aa8SWill Deacon if (smmu->sid_bits <= STRTAB_SPLIT) 3183e86d1aa8SWill Deacon smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB; 3184e86d1aa8SWill Deacon 3185e86d1aa8SWill Deacon /* IDR3 */ 3186e86d1aa8SWill Deacon reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); 3187e86d1aa8SWill Deacon if (FIELD_GET(IDR3_RIL, reg)) 3188e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_RANGE_INV; 3189e86d1aa8SWill Deacon 3190e86d1aa8SWill Deacon /* IDR5 */ 3191e86d1aa8SWill Deacon reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); 3192e86d1aa8SWill Deacon 3193e86d1aa8SWill Deacon /* Maximum number of outstanding stalls */ 3194e86d1aa8SWill Deacon smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg); 3195e86d1aa8SWill Deacon 3196e86d1aa8SWill Deacon /* Page sizes */ 3197e86d1aa8SWill Deacon if (reg & IDR5_GRAN64K) 3198e86d1aa8SWill Deacon smmu->pgsize_bitmap |= SZ_64K | SZ_512M; 3199e86d1aa8SWill Deacon if (reg & IDR5_GRAN16K) 3200e86d1aa8SWill Deacon smmu->pgsize_bitmap |= SZ_16K | SZ_32M; 3201e86d1aa8SWill Deacon if (reg & IDR5_GRAN4K) 3202e86d1aa8SWill Deacon smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G; 3203e86d1aa8SWill Deacon 3204e86d1aa8SWill Deacon /* Input address size */ 3205e86d1aa8SWill Deacon if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT) 3206e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_VAX; 3207e86d1aa8SWill Deacon 3208e86d1aa8SWill Deacon /* Output address size */ 3209e86d1aa8SWill Deacon switch (FIELD_GET(IDR5_OAS, reg)) { 3210e86d1aa8SWill Deacon case IDR5_OAS_32_BIT: 3211e86d1aa8SWill Deacon smmu->oas = 32; 3212e86d1aa8SWill Deacon break; 3213e86d1aa8SWill Deacon case IDR5_OAS_36_BIT: 3214e86d1aa8SWill Deacon smmu->oas = 36; 3215e86d1aa8SWill Deacon break; 3216e86d1aa8SWill Deacon case IDR5_OAS_40_BIT: 3217e86d1aa8SWill Deacon smmu->oas = 40; 3218e86d1aa8SWill Deacon break; 3219e86d1aa8SWill Deacon case IDR5_OAS_42_BIT: 3220e86d1aa8SWill Deacon smmu->oas = 42; 3221e86d1aa8SWill Deacon break; 3222e86d1aa8SWill Deacon case IDR5_OAS_44_BIT: 3223e86d1aa8SWill Deacon smmu->oas = 44; 3224e86d1aa8SWill Deacon break; 3225e86d1aa8SWill Deacon case IDR5_OAS_52_BIT: 3226e86d1aa8SWill Deacon smmu->oas = 52; 3227e86d1aa8SWill Deacon smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */ 3228e86d1aa8SWill Deacon break; 3229e86d1aa8SWill Deacon default: 3230e86d1aa8SWill Deacon dev_info(smmu->dev, 3231e86d1aa8SWill Deacon "unknown output address size. Truncating to 48-bit\n"); 3232df561f66SGustavo A. R. Silva fallthrough; 3233e86d1aa8SWill Deacon case IDR5_OAS_48_BIT: 3234e86d1aa8SWill Deacon smmu->oas = 48; 3235e86d1aa8SWill Deacon } 3236e86d1aa8SWill Deacon 3237e86d1aa8SWill Deacon if (arm_smmu_ops.pgsize_bitmap == -1UL) 3238e86d1aa8SWill Deacon arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; 3239e86d1aa8SWill Deacon else 3240e86d1aa8SWill Deacon arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; 3241e86d1aa8SWill Deacon 3242e86d1aa8SWill Deacon /* Set the DMA mask for our table walker */ 3243e86d1aa8SWill Deacon if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) 3244e86d1aa8SWill Deacon dev_warn(smmu->dev, 3245e86d1aa8SWill Deacon "failed to set DMA mask for table walker\n"); 3246e86d1aa8SWill Deacon 3247e86d1aa8SWill Deacon smmu->ias = max(smmu->ias, smmu->oas); 3248e86d1aa8SWill Deacon 3249e86d1aa8SWill Deacon dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", 3250e86d1aa8SWill Deacon smmu->ias, smmu->oas, smmu->features); 3251e86d1aa8SWill Deacon return 0; 3252e86d1aa8SWill Deacon } 3253e86d1aa8SWill Deacon 3254e86d1aa8SWill Deacon #ifdef CONFIG_ACPI 3255e86d1aa8SWill Deacon static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) 3256e86d1aa8SWill Deacon { 3257e86d1aa8SWill Deacon switch (model) { 3258e86d1aa8SWill Deacon case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: 3259e86d1aa8SWill Deacon smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; 3260e86d1aa8SWill Deacon break; 3261e86d1aa8SWill Deacon case ACPI_IORT_SMMU_V3_HISILICON_HI161X: 3262e86d1aa8SWill Deacon smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; 3263e86d1aa8SWill Deacon break; 3264e86d1aa8SWill Deacon } 3265e86d1aa8SWill Deacon 3266e86d1aa8SWill Deacon dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); 3267e86d1aa8SWill Deacon } 3268e86d1aa8SWill Deacon 3269e86d1aa8SWill Deacon static int arm_smmu_device_acpi_probe(struct platform_device *pdev, 3270e86d1aa8SWill Deacon struct arm_smmu_device *smmu) 3271e86d1aa8SWill Deacon { 3272e86d1aa8SWill Deacon struct acpi_iort_smmu_v3 *iort_smmu; 3273e86d1aa8SWill Deacon struct device *dev = smmu->dev; 3274e86d1aa8SWill Deacon struct acpi_iort_node *node; 3275e86d1aa8SWill Deacon 3276e86d1aa8SWill Deacon node = *(struct acpi_iort_node **)dev_get_platdata(dev); 3277e86d1aa8SWill Deacon 3278e86d1aa8SWill Deacon /* Retrieve SMMUv3 specific data */ 3279e86d1aa8SWill Deacon iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; 3280e86d1aa8SWill Deacon 3281e86d1aa8SWill Deacon acpi_smmu_get_options(iort_smmu->model, smmu); 3282e86d1aa8SWill Deacon 3283e86d1aa8SWill Deacon if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) 3284e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_COHERENCY; 3285e86d1aa8SWill Deacon 3286e86d1aa8SWill Deacon return 0; 3287e86d1aa8SWill Deacon } 3288e86d1aa8SWill Deacon #else 3289e86d1aa8SWill Deacon static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, 3290e86d1aa8SWill Deacon struct arm_smmu_device *smmu) 3291e86d1aa8SWill Deacon { 3292e86d1aa8SWill Deacon return -ENODEV; 3293e86d1aa8SWill Deacon } 3294e86d1aa8SWill Deacon #endif 3295e86d1aa8SWill Deacon 3296e86d1aa8SWill Deacon static int arm_smmu_device_dt_probe(struct platform_device *pdev, 3297e86d1aa8SWill Deacon struct arm_smmu_device *smmu) 3298e86d1aa8SWill Deacon { 3299e86d1aa8SWill Deacon struct device *dev = &pdev->dev; 3300e86d1aa8SWill Deacon u32 cells; 3301e86d1aa8SWill Deacon int ret = -EINVAL; 3302e86d1aa8SWill Deacon 3303e86d1aa8SWill Deacon if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) 3304e86d1aa8SWill Deacon dev_err(dev, "missing #iommu-cells property\n"); 3305e86d1aa8SWill Deacon else if (cells != 1) 3306e86d1aa8SWill Deacon dev_err(dev, "invalid #iommu-cells value (%d)\n", cells); 3307e86d1aa8SWill Deacon else 3308e86d1aa8SWill Deacon ret = 0; 3309e86d1aa8SWill Deacon 3310e86d1aa8SWill Deacon parse_driver_options(smmu); 3311e86d1aa8SWill Deacon 3312e86d1aa8SWill Deacon if (of_dma_is_coherent(dev->of_node)) 3313e86d1aa8SWill Deacon smmu->features |= ARM_SMMU_FEAT_COHERENCY; 3314e86d1aa8SWill Deacon 3315e86d1aa8SWill Deacon return ret; 3316e86d1aa8SWill Deacon } 3317e86d1aa8SWill Deacon 3318e86d1aa8SWill Deacon static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu) 3319e86d1aa8SWill Deacon { 3320e86d1aa8SWill Deacon if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY) 3321e86d1aa8SWill Deacon return SZ_64K; 3322e86d1aa8SWill Deacon else 3323e86d1aa8SWill Deacon return SZ_128K; 3324e86d1aa8SWill Deacon } 3325e86d1aa8SWill Deacon 3326e86d1aa8SWill Deacon static int arm_smmu_set_bus_ops(struct iommu_ops *ops) 3327e86d1aa8SWill Deacon { 3328e86d1aa8SWill Deacon int err; 3329e86d1aa8SWill Deacon 3330e86d1aa8SWill Deacon #ifdef CONFIG_PCI 3331e86d1aa8SWill Deacon if (pci_bus_type.iommu_ops != ops) { 3332e86d1aa8SWill Deacon err = bus_set_iommu(&pci_bus_type, ops); 3333e86d1aa8SWill Deacon if (err) 3334e86d1aa8SWill Deacon return err; 3335e86d1aa8SWill Deacon } 3336e86d1aa8SWill Deacon #endif 3337e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA 3338e86d1aa8SWill Deacon if (amba_bustype.iommu_ops != ops) { 3339e86d1aa8SWill Deacon err = bus_set_iommu(&amba_bustype, ops); 3340e86d1aa8SWill Deacon if (err) 3341e86d1aa8SWill Deacon goto err_reset_pci_ops; 3342e86d1aa8SWill Deacon } 3343e86d1aa8SWill Deacon #endif 3344e86d1aa8SWill Deacon if (platform_bus_type.iommu_ops != ops) { 3345e86d1aa8SWill Deacon err = bus_set_iommu(&platform_bus_type, ops); 3346e86d1aa8SWill Deacon if (err) 3347e86d1aa8SWill Deacon goto err_reset_amba_ops; 3348e86d1aa8SWill Deacon } 3349e86d1aa8SWill Deacon 3350e86d1aa8SWill Deacon return 0; 3351e86d1aa8SWill Deacon 3352e86d1aa8SWill Deacon err_reset_amba_ops: 3353e86d1aa8SWill Deacon #ifdef CONFIG_ARM_AMBA 3354e86d1aa8SWill Deacon bus_set_iommu(&amba_bustype, NULL); 3355e86d1aa8SWill Deacon #endif 3356e86d1aa8SWill Deacon err_reset_pci_ops: __maybe_unused; 3357e86d1aa8SWill Deacon #ifdef CONFIG_PCI 3358e86d1aa8SWill Deacon bus_set_iommu(&pci_bus_type, NULL); 3359e86d1aa8SWill Deacon #endif 3360e86d1aa8SWill Deacon return err; 3361e86d1aa8SWill Deacon } 3362e86d1aa8SWill Deacon 3363e86d1aa8SWill Deacon static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, 3364e86d1aa8SWill Deacon resource_size_t size) 3365e86d1aa8SWill Deacon { 3366e86d1aa8SWill Deacon struct resource res = { 3367e86d1aa8SWill Deacon .flags = IORESOURCE_MEM, 3368e86d1aa8SWill Deacon .start = start, 3369e86d1aa8SWill Deacon .end = start + size - 1, 3370e86d1aa8SWill Deacon }; 3371e86d1aa8SWill Deacon 3372e86d1aa8SWill Deacon return devm_ioremap_resource(dev, &res); 3373e86d1aa8SWill Deacon } 3374e86d1aa8SWill Deacon 3375e86d1aa8SWill Deacon static int arm_smmu_device_probe(struct platform_device *pdev) 3376e86d1aa8SWill Deacon { 3377e86d1aa8SWill Deacon int irq, ret; 3378e86d1aa8SWill Deacon struct resource *res; 3379e86d1aa8SWill Deacon resource_size_t ioaddr; 3380e86d1aa8SWill Deacon struct arm_smmu_device *smmu; 3381e86d1aa8SWill Deacon struct device *dev = &pdev->dev; 3382e86d1aa8SWill Deacon bool bypass; 3383e86d1aa8SWill Deacon 3384e86d1aa8SWill Deacon smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); 3385e86d1aa8SWill Deacon if (!smmu) { 3386e86d1aa8SWill Deacon dev_err(dev, "failed to allocate arm_smmu_device\n"); 3387e86d1aa8SWill Deacon return -ENOMEM; 3388e86d1aa8SWill Deacon } 3389e86d1aa8SWill Deacon smmu->dev = dev; 3390e86d1aa8SWill Deacon 3391e86d1aa8SWill Deacon if (dev->of_node) { 3392e86d1aa8SWill Deacon ret = arm_smmu_device_dt_probe(pdev, smmu); 3393e86d1aa8SWill Deacon } else { 3394e86d1aa8SWill Deacon ret = arm_smmu_device_acpi_probe(pdev, smmu); 3395e86d1aa8SWill Deacon if (ret == -ENODEV) 3396e86d1aa8SWill Deacon return ret; 3397e86d1aa8SWill Deacon } 3398e86d1aa8SWill Deacon 3399e86d1aa8SWill Deacon /* Set bypass mode according to firmware probing result */ 3400e86d1aa8SWill Deacon bypass = !!ret; 3401e86d1aa8SWill Deacon 3402e86d1aa8SWill Deacon /* Base address */ 3403e86d1aa8SWill Deacon res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 3404e86d1aa8SWill Deacon if (resource_size(res) < arm_smmu_resource_size(smmu)) { 3405e86d1aa8SWill Deacon dev_err(dev, "MMIO region too small (%pr)\n", res); 3406e86d1aa8SWill Deacon return -EINVAL; 3407e86d1aa8SWill Deacon } 3408e86d1aa8SWill Deacon ioaddr = res->start; 3409e86d1aa8SWill Deacon 3410e86d1aa8SWill Deacon /* 3411e86d1aa8SWill Deacon * Don't map the IMPLEMENTATION DEFINED regions, since they may contain 3412e86d1aa8SWill Deacon * the PMCG registers which are reserved by the PMU driver. 3413e86d1aa8SWill Deacon */ 3414e86d1aa8SWill Deacon smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ); 3415e86d1aa8SWill Deacon if (IS_ERR(smmu->base)) 3416e86d1aa8SWill Deacon return PTR_ERR(smmu->base); 3417e86d1aa8SWill Deacon 3418e86d1aa8SWill Deacon if (arm_smmu_resource_size(smmu) > SZ_64K) { 3419e86d1aa8SWill Deacon smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K, 3420e86d1aa8SWill Deacon ARM_SMMU_REG_SZ); 3421e86d1aa8SWill Deacon if (IS_ERR(smmu->page1)) 3422e86d1aa8SWill Deacon return PTR_ERR(smmu->page1); 3423e86d1aa8SWill Deacon } else { 3424e86d1aa8SWill Deacon smmu->page1 = smmu->base; 3425e86d1aa8SWill Deacon } 3426e86d1aa8SWill Deacon 3427e86d1aa8SWill Deacon /* Interrupt lines */ 3428e86d1aa8SWill Deacon 3429e86d1aa8SWill Deacon irq = platform_get_irq_byname_optional(pdev, "combined"); 3430e86d1aa8SWill Deacon if (irq > 0) 3431e86d1aa8SWill Deacon smmu->combined_irq = irq; 3432e86d1aa8SWill Deacon else { 3433e86d1aa8SWill Deacon irq = platform_get_irq_byname_optional(pdev, "eventq"); 3434e86d1aa8SWill Deacon if (irq > 0) 3435e86d1aa8SWill Deacon smmu->evtq.q.irq = irq; 3436e86d1aa8SWill Deacon 3437e86d1aa8SWill Deacon irq = platform_get_irq_byname_optional(pdev, "priq"); 3438e86d1aa8SWill Deacon if (irq > 0) 3439e86d1aa8SWill Deacon smmu->priq.q.irq = irq; 3440e86d1aa8SWill Deacon 3441e86d1aa8SWill Deacon irq = platform_get_irq_byname_optional(pdev, "gerror"); 3442e86d1aa8SWill Deacon if (irq > 0) 3443e86d1aa8SWill Deacon smmu->gerr_irq = irq; 3444e86d1aa8SWill Deacon } 3445e86d1aa8SWill Deacon /* Probe the h/w */ 3446e86d1aa8SWill Deacon ret = arm_smmu_device_hw_probe(smmu); 3447e86d1aa8SWill Deacon if (ret) 3448e86d1aa8SWill Deacon return ret; 3449e86d1aa8SWill Deacon 3450e86d1aa8SWill Deacon /* Initialise in-memory data structures */ 3451e86d1aa8SWill Deacon ret = arm_smmu_init_structures(smmu); 3452e86d1aa8SWill Deacon if (ret) 3453e86d1aa8SWill Deacon return ret; 3454e86d1aa8SWill Deacon 3455e86d1aa8SWill Deacon /* Record our private device structure */ 3456e86d1aa8SWill Deacon platform_set_drvdata(pdev, smmu); 3457e86d1aa8SWill Deacon 3458e86d1aa8SWill Deacon /* Reset the device */ 3459e86d1aa8SWill Deacon ret = arm_smmu_device_reset(smmu, bypass); 3460e86d1aa8SWill Deacon if (ret) 3461e86d1aa8SWill Deacon return ret; 3462e86d1aa8SWill Deacon 3463e86d1aa8SWill Deacon /* And we're up. Go go go! */ 3464e86d1aa8SWill Deacon ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, 3465e86d1aa8SWill Deacon "smmu3.%pa", &ioaddr); 3466e86d1aa8SWill Deacon if (ret) 3467e86d1aa8SWill Deacon return ret; 3468e86d1aa8SWill Deacon 3469e86d1aa8SWill Deacon iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops); 3470e86d1aa8SWill Deacon iommu_device_set_fwnode(&smmu->iommu, dev->fwnode); 3471e86d1aa8SWill Deacon 3472e86d1aa8SWill Deacon ret = iommu_device_register(&smmu->iommu); 3473e86d1aa8SWill Deacon if (ret) { 3474e86d1aa8SWill Deacon dev_err(dev, "Failed to register iommu\n"); 3475e86d1aa8SWill Deacon return ret; 3476e86d1aa8SWill Deacon } 3477e86d1aa8SWill Deacon 3478e86d1aa8SWill Deacon return arm_smmu_set_bus_ops(&arm_smmu_ops); 3479e86d1aa8SWill Deacon } 3480e86d1aa8SWill Deacon 3481e86d1aa8SWill Deacon static int arm_smmu_device_remove(struct platform_device *pdev) 3482e86d1aa8SWill Deacon { 3483e86d1aa8SWill Deacon struct arm_smmu_device *smmu = platform_get_drvdata(pdev); 3484e86d1aa8SWill Deacon 3485e86d1aa8SWill Deacon arm_smmu_set_bus_ops(NULL); 3486e86d1aa8SWill Deacon iommu_device_unregister(&smmu->iommu); 3487e86d1aa8SWill Deacon iommu_device_sysfs_remove(&smmu->iommu); 3488e86d1aa8SWill Deacon arm_smmu_device_disable(smmu); 3489e86d1aa8SWill Deacon 3490e86d1aa8SWill Deacon return 0; 3491e86d1aa8SWill Deacon } 3492e86d1aa8SWill Deacon 3493e86d1aa8SWill Deacon static void arm_smmu_device_shutdown(struct platform_device *pdev) 3494e86d1aa8SWill Deacon { 3495e86d1aa8SWill Deacon arm_smmu_device_remove(pdev); 3496e86d1aa8SWill Deacon } 3497e86d1aa8SWill Deacon 3498e86d1aa8SWill Deacon static const struct of_device_id arm_smmu_of_match[] = { 3499e86d1aa8SWill Deacon { .compatible = "arm,smmu-v3", }, 3500e86d1aa8SWill Deacon { }, 3501e86d1aa8SWill Deacon }; 3502e86d1aa8SWill Deacon MODULE_DEVICE_TABLE(of, arm_smmu_of_match); 3503e86d1aa8SWill Deacon 3504e86d1aa8SWill Deacon static struct platform_driver arm_smmu_driver = { 3505e86d1aa8SWill Deacon .driver = { 3506e86d1aa8SWill Deacon .name = "arm-smmu-v3", 3507e86d1aa8SWill Deacon .of_match_table = arm_smmu_of_match, 3508e86d1aa8SWill Deacon .suppress_bind_attrs = true, 3509e86d1aa8SWill Deacon }, 3510e86d1aa8SWill Deacon .probe = arm_smmu_device_probe, 3511e86d1aa8SWill Deacon .remove = arm_smmu_device_remove, 3512e86d1aa8SWill Deacon .shutdown = arm_smmu_device_shutdown, 3513e86d1aa8SWill Deacon }; 3514e86d1aa8SWill Deacon module_platform_driver(arm_smmu_driver); 3515e86d1aa8SWill Deacon 3516e86d1aa8SWill Deacon MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); 3517e86d1aa8SWill Deacon MODULE_AUTHOR("Will Deacon <will@kernel.org>"); 3518e86d1aa8SWill Deacon MODULE_ALIAS("platform:arm-smmu-v3"); 3519e86d1aa8SWill Deacon MODULE_LICENSE("GPL v2"); 3520