1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2019-2020 Ruslan Bukin <br@bsdpad.com> 5 * 6 * This software was developed by SRI International and the University of 7 * Cambridge Computer Laboratory (Department of Computer Science and 8 * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the 9 * DARPA SSITH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Hardware overview. 35 * 36 * An incoming transaction from a peripheral device has an address, size, 37 * attributes and StreamID. 38 * 39 * In case of PCI-based devices, StreamID is a PCI rid. 40 * 41 * The StreamID is used to select a Stream Table Entry (STE) in a Stream table, 42 * which contains per-device configuration. 43 * 44 * Stream table is a linear or 2-level walk table (this driver supports both). 45 * Note that a linear table could occupy 1GB or more of memory depending on 46 * sid_bits value. 47 * 48 * STE is used to locate a Context Descriptor, which is a struct in memory 49 * that describes stages of translation, translation table type, pointer to 50 * level 0 of page tables, ASID, etc. 51 * 52 * Hardware supports two stages of translation: Stage1 (S1) and Stage2 (S2): 53 * o S1 is used for the host machine traffic translation 54 * o S2 is for a hypervisor 55 * 56 * This driver enables S1 stage with standard AArch64 page tables. 57 * 58 * Note that SMMU does not share TLB with a main CPU. 59 * Command queue is used by this driver to Invalidate SMMU TLB, STE cache. 60 * 61 * An arm64 SoC could have more than one SMMU instance. 62 * ACPI IORT table describes which SMMU unit is assigned for a particular 63 * peripheral device. 64 * 65 * Queues. 66 * 67 * Register interface and Memory-based circular buffer queues are used 68 * to interface SMMU. 69 * 70 * These are a Command queue for commands to send to the SMMU and an Event 71 * queue for event/fault reports from the SMMU. Optionally PRI queue is 72 * designed for PCIe page requests reception. 73 * 74 * Note that not every hardware supports PRI services. For instance they were 75 * not found in Neoverse N1 SDP machine. 76 * (This drivers does not implement PRI queue.) 77 * 78 * All SMMU queues are arranged as circular buffers in memory. They are used 79 * in a producer-consumer fashion so that an output queue contains data 80 * produced by the SMMU and consumed by software. 81 * An input queue contains data produced by software, consumed by the SMMU. 82 * 83 * Interrupts. 84 * 85 * Interrupts are not required by this driver for normal operation. 86 * The standard wired interrupt is only triggered when an event comes from 87 * the SMMU, which is only in a case of errors (e.g. translation fault). 88 */ 89 90 #include "opt_platform.h" 91 #include "opt_acpi.h" 92 93 #include <sys/cdefs.h> 94 #include <sys/param.h> 95 #include <sys/bitstring.h> 96 #include <sys/bus.h> 97 #include <sys/kernel.h> 98 #include <sys/malloc.h> 99 #include <sys/mutex.h> 100 #include <sys/rman.h> 101 #include <sys/lock.h> 102 #include <sys/sysctl.h> 103 #include <sys/tree.h> 104 #include <sys/taskqueue.h> 105 #include <vm/vm.h> 106 #include <vm/vm_page.h> 107 #ifdef DEV_ACPI 108 #include <contrib/dev/acpica/include/acpi.h> 109 #include <dev/acpica/acpivar.h> 110 #endif 111 #include <dev/pci/pcireg.h> 112 #include <dev/pci/pcivar.h> 113 #include <dev/iommu/iommu.h> 114 #include <arm64/iommu/iommu_pmap.h> 115 116 #include <machine/bus.h> 117 118 #ifdef FDT 119 #include <dev/fdt/fdt_common.h> 120 #include <dev/ofw/ofw_bus.h> 121 #include <dev/ofw/ofw_bus_subr.h> 122 #endif 123 124 #include "iommu.h" 125 #include "iommu_if.h" 126 127 #include "smmureg.h" 128 #include "smmuvar.h" 129 130 #define STRTAB_L1_SZ_SHIFT 20 131 #define STRTAB_SPLIT 8 132 133 #define STRTAB_L1_DESC_L2PTR_M (0x3fffffffffff << 6) 134 #define STRTAB_L1_DESC_DWORDS 1 135 136 #define STRTAB_STE_DWORDS 8 137 138 #define CMDQ_ENTRY_DWORDS 2 139 #define EVTQ_ENTRY_DWORDS 4 140 #define PRIQ_ENTRY_DWORDS 2 141 142 #define CD_DWORDS 8 143 144 #define Q_WRP(q, p) ((p) & (1 << (q)->size_log2)) 145 #define Q_IDX(q, p) ((p) & ((1 << (q)->size_log2) - 1)) 146 #define Q_OVF(p) ((p) & (1 << 31)) /* Event queue overflowed */ 147 148 #define SMMU_Q_ALIGN (64 * 1024) 149 150 #define MAXADDR_48BIT 0xFFFFFFFFFFFFUL 151 #define MAXADDR_52BIT 0xFFFFFFFFFFFFFUL 152 153 static struct resource_spec smmu_spec[] = { 154 { SYS_RES_MEMORY, 0, RF_ACTIVE }, 155 { SYS_RES_IRQ, 0, RF_ACTIVE }, 156 { SYS_RES_IRQ, 1, RF_ACTIVE | RF_OPTIONAL }, 157 { SYS_RES_IRQ, 2, RF_ACTIVE }, 158 { SYS_RES_IRQ, 3, RF_ACTIVE }, 159 RESOURCE_SPEC_END 160 }; 161 162 MALLOC_DEFINE(M_SMMU, "SMMU", SMMU_DEVSTR); 163 164 #define dprintf(fmt, ...) 165 166 struct smmu_event { 167 int ident; 168 char *str; 169 char *msg; 170 }; 171 172 static struct smmu_event events[] = { 173 { 0x01, "F_UUT", 174 "Unsupported Upstream Transaction."}, 175 { 0x02, "C_BAD_STREAMID", 176 "Transaction StreamID out of range."}, 177 { 0x03, "F_STE_FETCH", 178 "Fetch of STE caused external abort."}, 179 { 0x04, "C_BAD_STE", 180 "Used STE invalid."}, 181 { 0x05, "F_BAD_ATS_TREQ", 182 "Address Translation Request disallowed for a StreamID " 183 "and a PCIe ATS Translation Request received."}, 184 { 0x06, "F_STREAM_DISABLED", 185 "The STE of a transaction marks non-substream transactions " 186 "disabled."}, 187 { 0x07, "F_TRANSL_FORBIDDEN", 188 "An incoming PCIe transaction is marked Translated but " 189 "SMMU bypass is disallowed for this StreamID."}, 190 { 0x08, "C_BAD_SUBSTREAMID", 191 "Incoming SubstreamID present, but configuration is invalid."}, 192 { 0x09, "F_CD_FETCH", 193 "Fetch of CD caused external abort."}, 194 { 0x0a, "C_BAD_CD", 195 "Fetched CD invalid."}, 196 { 0x0b, "F_WALK_EABT", 197 "An external abort occurred fetching (or updating) " 198 "a translation table descriptor."}, 199 { 0x10, "F_TRANSLATION", 200 "Translation fault."}, 201 { 0x11, "F_ADDR_SIZE", 202 "Address Size fault."}, 203 { 0x12, "F_ACCESS", 204 "Access flag fault due to AF == 0 in a page or block TTD."}, 205 { 0x13, "F_PERMISSION", 206 "Permission fault occurred on page access."}, 207 { 0x20, "F_TLB_CONFLICT", 208 "A TLB conflict occurred because of the transaction."}, 209 { 0x21, "F_CFG_CONFLICT", 210 "A configuration cache conflict occurred due to " 211 "the transaction."}, 212 { 0x24, "E_PAGE_REQUEST", 213 "Speculative page request hint."}, 214 { 0x25, "F_VMS_FETCH", 215 "Fetch of VMS caused external abort."}, 216 { 0, NULL, NULL }, 217 }; 218 219 static int 220 smmu_q_has_space(struct smmu_queue *q) 221 { 222 223 /* 224 * See 6.3.27 SMMU_CMDQ_PROD 225 * 226 * There is space in the queue for additional commands if: 227 * SMMU_CMDQ_CONS.RD != SMMU_CMDQ_PROD.WR || 228 * SMMU_CMDQ_CONS.RD_WRAP == SMMU_CMDQ_PROD.WR_WRAP 229 */ 230 231 if (Q_IDX(q, q->lc.cons) != Q_IDX(q, q->lc.prod) || 232 Q_WRP(q, q->lc.cons) == Q_WRP(q, q->lc.prod)) 233 return (1); 234 235 return (0); 236 } 237 238 static int 239 smmu_q_empty(struct smmu_queue *q) 240 { 241 242 if (Q_IDX(q, q->lc.cons) == Q_IDX(q, q->lc.prod) && 243 Q_WRP(q, q->lc.cons) == Q_WRP(q, q->lc.prod)) 244 return (1); 245 246 return (0); 247 } 248 249 static int __unused 250 smmu_q_consumed(struct smmu_queue *q, uint32_t prod) 251 { 252 253 if ((Q_WRP(q, q->lc.cons) == Q_WRP(q, prod)) && 254 (Q_IDX(q, q->lc.cons) >= Q_IDX(q, prod))) 255 return (1); 256 257 if ((Q_WRP(q, q->lc.cons) != Q_WRP(q, prod)) && 258 (Q_IDX(q, q->lc.cons) <= Q_IDX(q, prod))) 259 return (1); 260 261 return (0); 262 } 263 264 static uint32_t 265 smmu_q_inc_cons(struct smmu_queue *q) 266 { 267 uint32_t cons; 268 uint32_t val; 269 270 cons = (Q_WRP(q, q->lc.cons) | Q_IDX(q, q->lc.cons)) + 1; 271 val = (Q_OVF(q->lc.cons) | Q_WRP(q, cons) | Q_IDX(q, cons)); 272 273 return (val); 274 } 275 276 static uint32_t 277 smmu_q_inc_prod(struct smmu_queue *q) 278 { 279 uint32_t prod; 280 uint32_t val; 281 282 prod = (Q_WRP(q, q->lc.prod) | Q_IDX(q, q->lc.prod)) + 1; 283 val = (Q_OVF(q->lc.prod) | Q_WRP(q, prod) | Q_IDX(q, prod)); 284 285 return (val); 286 } 287 288 static int 289 smmu_write_ack(struct smmu_softc *sc, uint32_t reg, 290 uint32_t reg_ack, uint32_t val) 291 { 292 uint32_t v; 293 int timeout; 294 295 timeout = 100000; 296 297 bus_write_4(sc->res[0], reg, val); 298 299 do { 300 v = bus_read_4(sc->res[0], reg_ack); 301 if (v == val) 302 break; 303 } while (timeout--); 304 305 if (timeout <= 0) { 306 device_printf(sc->dev, "Failed to write reg.\n"); 307 return (-1); 308 } 309 310 return (0); 311 } 312 313 static inline int 314 ilog2(long x) 315 { 316 317 KASSERT(x > 0 && powerof2(x), ("%s: invalid arg %ld", __func__, x)); 318 319 return (flsl(x) - 1); 320 } 321 322 static int 323 smmu_init_queue(struct smmu_softc *sc, struct smmu_queue *q, 324 uint32_t prod_off, uint32_t cons_off, uint32_t dwords) 325 { 326 int sz; 327 328 sz = (1 << q->size_log2) * dwords * 8; 329 330 /* Set up the command circular buffer */ 331 q->vaddr = contigmalloc(sz, M_SMMU, 332 M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, SMMU_Q_ALIGN, 0); 333 if (q->vaddr == NULL) { 334 device_printf(sc->dev, "failed to allocate %d bytes\n", sz); 335 return (-1); 336 } 337 338 q->prod_off = prod_off; 339 q->cons_off = cons_off; 340 q->paddr = vtophys(q->vaddr); 341 342 q->base = CMDQ_BASE_RA | EVENTQ_BASE_WA | PRIQ_BASE_WA; 343 q->base |= q->paddr & Q_BASE_ADDR_M; 344 q->base |= q->size_log2 << Q_LOG2SIZE_S; 345 346 return (0); 347 } 348 349 static int 350 smmu_init_queues(struct smmu_softc *sc) 351 { 352 int err; 353 354 /* Command queue. */ 355 err = smmu_init_queue(sc, &sc->cmdq, 356 SMMU_CMDQ_PROD, SMMU_CMDQ_CONS, CMDQ_ENTRY_DWORDS); 357 if (err) 358 return (ENXIO); 359 360 /* Event queue. */ 361 err = smmu_init_queue(sc, &sc->evtq, 362 SMMU_EVENTQ_PROD, SMMU_EVENTQ_CONS, EVTQ_ENTRY_DWORDS); 363 if (err) 364 return (ENXIO); 365 366 if (!(sc->features & SMMU_FEATURE_PRI)) 367 return (0); 368 369 /* PRI queue. */ 370 err = smmu_init_queue(sc, &sc->priq, 371 SMMU_PRIQ_PROD, SMMU_PRIQ_CONS, PRIQ_ENTRY_DWORDS); 372 if (err) 373 return (ENXIO); 374 375 return (0); 376 } 377 378 /* 379 * Dump 2LVL or linear STE. 380 */ 381 static void 382 smmu_dump_ste(struct smmu_softc *sc, int sid) 383 { 384 struct smmu_strtab *strtab; 385 struct l1_desc *l1_desc; 386 uint64_t *ste, *l1; 387 int i; 388 389 strtab = &sc->strtab; 390 391 if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) { 392 i = sid >> STRTAB_SPLIT; 393 l1 = (void *)((uint64_t)strtab->vaddr + 394 STRTAB_L1_DESC_DWORDS * 8 * i); 395 device_printf(sc->dev, "L1 ste == %lx\n", l1[0]); 396 397 l1_desc = &strtab->l1[i]; 398 ste = l1_desc->va; 399 if (ste == NULL) /* L2 is not initialized */ 400 return; 401 } else { 402 ste = (void *)((uint64_t)strtab->vaddr + 403 sid * (STRTAB_STE_DWORDS << 3)); 404 } 405 406 /* Dump L2 or linear STE. */ 407 for (i = 0; i < STRTAB_STE_DWORDS; i++) 408 device_printf(sc->dev, "ste[%d] == %lx\n", i, ste[i]); 409 } 410 411 static void __unused 412 smmu_dump_cd(struct smmu_softc *sc, struct smmu_cd *cd) 413 { 414 uint64_t *vaddr; 415 int i; 416 417 device_printf(sc->dev, "%s\n", __func__); 418 419 vaddr = cd->vaddr; 420 for (i = 0; i < CD_DWORDS; i++) 421 device_printf(sc->dev, "cd[%d] == %lx\n", i, vaddr[i]); 422 } 423 424 static void 425 smmu_evtq_dequeue(struct smmu_softc *sc, uint32_t *evt) 426 { 427 struct smmu_queue *evtq; 428 void *entry_addr; 429 430 evtq = &sc->evtq; 431 432 evtq->lc.val = bus_read_8(sc->res[0], evtq->prod_off); 433 entry_addr = (void *)((uint64_t)evtq->vaddr + 434 evtq->lc.cons * EVTQ_ENTRY_DWORDS * 8); 435 memcpy(evt, entry_addr, EVTQ_ENTRY_DWORDS * 8); 436 evtq->lc.cons = smmu_q_inc_cons(evtq); 437 bus_write_4(sc->res[0], evtq->cons_off, evtq->lc.cons); 438 } 439 440 static void 441 smmu_print_event(struct smmu_softc *sc, uint32_t *evt) 442 { 443 struct smmu_event *ev; 444 uintptr_t input_addr; 445 uint8_t event_id; 446 device_t dev; 447 int sid; 448 int i; 449 450 dev = sc->dev; 451 452 ev = NULL; 453 event_id = evt[0] & 0xff; 454 for (i = 0; events[i].ident != 0; i++) { 455 if (events[i].ident == event_id) { 456 ev = &events[i]; 457 break; 458 } 459 } 460 461 sid = evt[1]; 462 input_addr = evt[5]; 463 input_addr <<= 32; 464 input_addr |= evt[4]; 465 466 if (smmu_quirks_check(dev, sid, event_id, input_addr)) { 467 /* The event is known. Don't print anything. */ 468 return; 469 } 470 471 if (ev) { 472 device_printf(sc->dev, 473 "Event %s (%s) received.\n", ev->str, ev->msg); 474 } else 475 device_printf(sc->dev, "Event 0x%x received\n", event_id); 476 477 device_printf(sc->dev, "SID %x, Input Address: %jx\n", 478 sid, input_addr); 479 480 for (i = 0; i < 8; i++) 481 device_printf(sc->dev, "evt[%d] %x\n", i, evt[i]); 482 483 smmu_dump_ste(sc, sid); 484 } 485 486 static void 487 make_cmd(struct smmu_softc *sc, uint64_t *cmd, 488 struct smmu_cmdq_entry *entry) 489 { 490 491 memset(cmd, 0, CMDQ_ENTRY_DWORDS * 8); 492 cmd[0] = entry->opcode << CMD_QUEUE_OPCODE_S; 493 494 switch (entry->opcode) { 495 case CMD_TLBI_NH_VA: 496 cmd[0] |= (uint64_t)entry->tlbi.asid << TLBI_0_ASID_S; 497 cmd[1] = entry->tlbi.addr & TLBI_1_ADDR_M; 498 if (entry->tlbi.leaf) { 499 /* 500 * Leaf flag means that only cached entries 501 * for the last level of translation table walk 502 * are required to be invalidated. 503 */ 504 cmd[1] |= TLBI_1_LEAF; 505 } 506 break; 507 case CMD_TLBI_NH_ASID: 508 cmd[0] |= (uint64_t)entry->tlbi.asid << TLBI_0_ASID_S; 509 break; 510 case CMD_TLBI_NSNH_ALL: 511 case CMD_TLBI_NH_ALL: 512 case CMD_TLBI_EL2_ALL: 513 break; 514 case CMD_CFGI_CD: 515 cmd[0] |= ((uint64_t)entry->cfgi.ssid << CFGI_0_SSID_S); 516 /* FALLTROUGH */ 517 case CMD_CFGI_STE: 518 cmd[0] |= ((uint64_t)entry->cfgi.sid << CFGI_0_STE_SID_S); 519 cmd[1] |= ((uint64_t)entry->cfgi.leaf << CFGI_1_LEAF_S); 520 break; 521 case CMD_CFGI_STE_RANGE: 522 cmd[1] = (31 << CFGI_1_STE_RANGE_S); 523 break; 524 case CMD_SYNC: 525 cmd[0] |= SYNC_0_MSH_IS | SYNC_0_MSIATTR_OIWB; 526 if (entry->sync.msiaddr) { 527 cmd[0] |= SYNC_0_CS_SIG_IRQ; 528 cmd[1] |= (entry->sync.msiaddr & SYNC_1_MSIADDRESS_M); 529 } else 530 cmd[0] |= SYNC_0_CS_SIG_SEV; 531 break; 532 case CMD_PREFETCH_CONFIG: 533 cmd[0] |= ((uint64_t)entry->prefetch.sid << PREFETCH_0_SID_S); 534 break; 535 }; 536 } 537 538 static void 539 smmu_cmdq_enqueue_cmd(struct smmu_softc *sc, struct smmu_cmdq_entry *entry) 540 { 541 uint64_t cmd[CMDQ_ENTRY_DWORDS]; 542 struct smmu_queue *cmdq; 543 void *entry_addr; 544 545 cmdq = &sc->cmdq; 546 547 make_cmd(sc, cmd, entry); 548 549 SMMU_LOCK(sc); 550 551 /* Ensure that a space is available. */ 552 do { 553 cmdq->lc.cons = bus_read_4(sc->res[0], cmdq->cons_off); 554 } while (smmu_q_has_space(cmdq) == 0); 555 556 /* Write the command to the current prod entry. */ 557 entry_addr = (void *)((uint64_t)cmdq->vaddr + 558 Q_IDX(cmdq, cmdq->lc.prod) * CMDQ_ENTRY_DWORDS * 8); 559 memcpy(entry_addr, cmd, CMDQ_ENTRY_DWORDS * 8); 560 561 /* Increment prod index. */ 562 cmdq->lc.prod = smmu_q_inc_prod(cmdq); 563 bus_write_4(sc->res[0], cmdq->prod_off, cmdq->lc.prod); 564 565 SMMU_UNLOCK(sc); 566 } 567 568 static void __unused 569 smmu_poll_until_consumed(struct smmu_softc *sc, struct smmu_queue *q) 570 { 571 572 while (1) { 573 q->lc.val = bus_read_8(sc->res[0], q->prod_off); 574 if (smmu_q_empty(q)) 575 break; 576 cpu_spinwait(); 577 } 578 } 579 580 static int 581 smmu_sync(struct smmu_softc *sc) 582 { 583 struct smmu_cmdq_entry cmd; 584 struct smmu_queue *q; 585 uint32_t *base; 586 int timeout; 587 int prod; 588 589 q = &sc->cmdq; 590 prod = q->lc.prod; 591 592 /* Enqueue sync command. */ 593 cmd.opcode = CMD_SYNC; 594 cmd.sync.msiaddr = q->paddr + Q_IDX(q, prod) * CMDQ_ENTRY_DWORDS * 8; 595 smmu_cmdq_enqueue_cmd(sc, &cmd); 596 597 /* Wait for the sync completion. */ 598 base = (void *)((uint64_t)q->vaddr + 599 Q_IDX(q, prod) * CMDQ_ENTRY_DWORDS * 8); 600 601 /* 602 * It takes around 200 loops (6 instructions each) 603 * on Neoverse N1 to complete the sync. 604 */ 605 timeout = 10000; 606 607 do { 608 if (*base == 0) { 609 /* MSI write completed. */ 610 break; 611 } 612 cpu_spinwait(); 613 } while (timeout--); 614 615 if (timeout < 0) 616 device_printf(sc->dev, "Failed to sync\n"); 617 618 return (0); 619 } 620 621 static int 622 smmu_sync_cd(struct smmu_softc *sc, int sid, int ssid, bool leaf) 623 { 624 struct smmu_cmdq_entry cmd; 625 626 cmd.opcode = CMD_CFGI_CD; 627 cmd.cfgi.sid = sid; 628 cmd.cfgi.ssid = ssid; 629 cmd.cfgi.leaf = leaf; 630 smmu_cmdq_enqueue_cmd(sc, &cmd); 631 632 return (0); 633 } 634 635 static void 636 smmu_invalidate_all_sid(struct smmu_softc *sc) 637 { 638 struct smmu_cmdq_entry cmd; 639 640 /* Invalidate cached config */ 641 cmd.opcode = CMD_CFGI_STE_RANGE; 642 smmu_cmdq_enqueue_cmd(sc, &cmd); 643 smmu_sync(sc); 644 } 645 646 static void 647 smmu_tlbi_all(struct smmu_softc *sc) 648 { 649 struct smmu_cmdq_entry cmd; 650 651 /* Invalidate entire TLB */ 652 cmd.opcode = CMD_TLBI_NSNH_ALL; 653 smmu_cmdq_enqueue_cmd(sc, &cmd); 654 smmu_sync(sc); 655 } 656 657 static void 658 smmu_tlbi_asid(struct smmu_softc *sc, uint16_t asid) 659 { 660 struct smmu_cmdq_entry cmd; 661 662 /* Invalidate TLB for an ASID. */ 663 cmd.opcode = CMD_TLBI_NH_ASID; 664 cmd.tlbi.asid = asid; 665 smmu_cmdq_enqueue_cmd(sc, &cmd); 666 smmu_sync(sc); 667 } 668 669 static void 670 smmu_tlbi_va(struct smmu_softc *sc, vm_offset_t va, uint16_t asid) 671 { 672 struct smmu_cmdq_entry cmd; 673 674 /* Invalidate specific range */ 675 cmd.opcode = CMD_TLBI_NH_VA; 676 cmd.tlbi.asid = asid; 677 cmd.tlbi.vmid = 0; 678 cmd.tlbi.leaf = true; /* We change only L3. */ 679 cmd.tlbi.addr = va; 680 smmu_cmdq_enqueue_cmd(sc, &cmd); 681 } 682 683 static void 684 smmu_invalidate_sid(struct smmu_softc *sc, uint32_t sid) 685 { 686 struct smmu_cmdq_entry cmd; 687 688 /* Invalidate cached config */ 689 cmd.opcode = CMD_CFGI_STE; 690 cmd.cfgi.sid = sid; 691 smmu_cmdq_enqueue_cmd(sc, &cmd); 692 smmu_sync(sc); 693 } 694 695 static void 696 smmu_prefetch_sid(struct smmu_softc *sc, uint32_t sid) 697 { 698 struct smmu_cmdq_entry cmd; 699 700 cmd.opcode = CMD_PREFETCH_CONFIG; 701 cmd.prefetch.sid = sid; 702 smmu_cmdq_enqueue_cmd(sc, &cmd); 703 smmu_sync(sc); 704 } 705 706 /* 707 * Init STE in bypass mode. Traffic is not translated for the sid. 708 */ 709 static void 710 smmu_init_ste_bypass(struct smmu_softc *sc, uint32_t sid, uint64_t *ste) 711 { 712 uint64_t val; 713 714 val = STE0_VALID | STE0_CONFIG_BYPASS; 715 716 ste[1] = STE1_SHCFG_INCOMING | STE1_EATS_FULLATS; 717 ste[2] = 0; 718 ste[3] = 0; 719 ste[4] = 0; 720 ste[5] = 0; 721 ste[6] = 0; 722 ste[7] = 0; 723 724 smmu_invalidate_sid(sc, sid); 725 ste[0] = val; 726 dsb(sy); 727 smmu_invalidate_sid(sc, sid); 728 729 smmu_prefetch_sid(sc, sid); 730 } 731 732 /* 733 * Enable Stage1 (S1) translation for the sid. 734 */ 735 static int 736 smmu_init_ste_s1(struct smmu_softc *sc, struct smmu_cd *cd, 737 uint32_t sid, uint64_t *ste) 738 { 739 uint64_t val; 740 741 val = STE0_VALID; 742 743 /* S1 */ 744 ste[1] = STE1_EATS_FULLATS | 745 STE1_S1CSH_IS | 746 STE1_S1CIR_WBRA | 747 STE1_S1COR_WBRA | 748 STE1_STRW_NS_EL1; 749 ste[2] = 0; 750 ste[3] = 0; 751 ste[4] = 0; 752 ste[5] = 0; 753 ste[6] = 0; 754 ste[7] = 0; 755 756 if (sc->features & SMMU_FEATURE_STALL && 757 ((sc->features & SMMU_FEATURE_STALL_FORCE) == 0)) 758 ste[1] |= STE1_S1STALLD; 759 760 /* Configure STE */ 761 val |= (cd->paddr & STE0_S1CONTEXTPTR_M); 762 val |= STE0_CONFIG_S1_TRANS; 763 764 smmu_invalidate_sid(sc, sid); 765 766 /* The STE[0] has to be written in a single blast, last of all. */ 767 ste[0] = val; 768 dsb(sy); 769 770 smmu_invalidate_sid(sc, sid); 771 smmu_sync_cd(sc, sid, 0, true); 772 smmu_invalidate_sid(sc, sid); 773 774 /* The sid will be used soon most likely. */ 775 smmu_prefetch_sid(sc, sid); 776 777 return (0); 778 } 779 780 static uint64_t * 781 smmu_get_ste_addr(struct smmu_softc *sc, int sid) 782 { 783 struct smmu_strtab *strtab; 784 struct l1_desc *l1_desc; 785 uint64_t *addr; 786 787 strtab = &sc->strtab; 788 789 if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) { 790 l1_desc = &strtab->l1[sid >> STRTAB_SPLIT]; 791 addr = l1_desc->va; 792 addr += (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; 793 } else { 794 addr = (void *)((uint64_t)strtab->vaddr + 795 STRTAB_STE_DWORDS * 8 * sid); 796 }; 797 798 return (addr); 799 } 800 801 static int 802 smmu_init_ste(struct smmu_softc *sc, struct smmu_cd *cd, int sid, bool bypass) 803 { 804 uint64_t *addr; 805 806 addr = smmu_get_ste_addr(sc, sid); 807 808 if (bypass) 809 smmu_init_ste_bypass(sc, sid, addr); 810 else 811 smmu_init_ste_s1(sc, cd, sid, addr); 812 813 smmu_sync(sc); 814 815 return (0); 816 } 817 818 static void 819 smmu_deinit_ste(struct smmu_softc *sc, int sid) 820 { 821 uint64_t *ste; 822 823 ste = smmu_get_ste_addr(sc, sid); 824 ste[0] = 0; 825 826 smmu_invalidate_sid(sc, sid); 827 smmu_sync_cd(sc, sid, 0, true); 828 smmu_invalidate_sid(sc, sid); 829 830 smmu_sync(sc); 831 } 832 833 static int 834 smmu_init_cd(struct smmu_softc *sc, struct smmu_domain *domain) 835 { 836 vm_paddr_t paddr; 837 uint64_t *ptr; 838 uint64_t val; 839 vm_size_t size; 840 struct smmu_cd *cd; 841 struct smmu_pmap *p; 842 843 size = 1 * (CD_DWORDS << 3); 844 845 p = &domain->p; 846 cd = domain->cd = malloc(sizeof(struct smmu_cd), 847 M_SMMU, M_WAITOK | M_ZERO); 848 849 cd->vaddr = contigmalloc(size, M_SMMU, 850 M_WAITOK | M_ZERO, /* flags */ 851 0, /* low */ 852 (1ul << 40) - 1, /* high */ 853 size, /* alignment */ 854 0); /* boundary */ 855 if (cd->vaddr == NULL) { 856 device_printf(sc->dev, "Failed to allocate CD\n"); 857 return (ENXIO); 858 } 859 860 cd->size = size; 861 cd->paddr = vtophys(cd->vaddr); 862 863 ptr = cd->vaddr; 864 865 val = CD0_VALID; 866 val |= CD0_AA64; 867 val |= CD0_R; 868 val |= CD0_A; 869 val |= CD0_ASET; 870 val |= (uint64_t)domain->asid << CD0_ASID_S; 871 val |= CD0_TG0_4KB; 872 val |= CD0_EPD1; /* Disable TT1 */ 873 val |= ((64 - sc->ias) << CD0_T0SZ_S); 874 val |= CD0_IPS_48BITS; 875 876 paddr = p->sp_l0_paddr & CD1_TTB0_M; 877 KASSERT(paddr == p->sp_l0_paddr, ("bad allocation 1")); 878 879 ptr[1] = paddr; 880 ptr[2] = 0; 881 ptr[3] = MAIR_ATTR(MAIR_DEVICE_nGnRnE, VM_MEMATTR_DEVICE) | 882 MAIR_ATTR(MAIR_NORMAL_NC, VM_MEMATTR_UNCACHEABLE) | 883 MAIR_ATTR(MAIR_NORMAL_WB, VM_MEMATTR_WRITE_BACK) | 884 MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH); 885 886 /* Install the CD. */ 887 ptr[0] = val; 888 889 return (0); 890 } 891 892 static int 893 smmu_init_strtab_linear(struct smmu_softc *sc) 894 { 895 struct smmu_strtab *strtab; 896 vm_paddr_t base; 897 uint32_t size; 898 uint64_t reg; 899 900 strtab = &sc->strtab; 901 strtab->num_l1_entries = (1 << sc->sid_bits); 902 903 size = strtab->num_l1_entries * (STRTAB_STE_DWORDS << 3); 904 905 if (bootverbose) 906 device_printf(sc->dev, 907 "%s: linear strtab size %d, num_l1_entries %d\n", 908 __func__, size, strtab->num_l1_entries); 909 910 strtab->vaddr = contigmalloc(size, M_SMMU, 911 M_WAITOK | M_ZERO, /* flags */ 912 0, /* low */ 913 (1ul << 48) - 1, /* high */ 914 size, /* alignment */ 915 0); /* boundary */ 916 if (strtab->vaddr == NULL) { 917 device_printf(sc->dev, "failed to allocate strtab\n"); 918 return (ENXIO); 919 } 920 921 reg = STRTAB_BASE_CFG_FMT_LINEAR; 922 reg |= sc->sid_bits << STRTAB_BASE_CFG_LOG2SIZE_S; 923 strtab->base_cfg = (uint32_t)reg; 924 925 base = vtophys(strtab->vaddr); 926 927 reg = base & STRTAB_BASE_ADDR_M; 928 KASSERT(reg == base, ("bad allocation 2")); 929 reg |= STRTAB_BASE_RA; 930 strtab->base = reg; 931 932 return (0); 933 } 934 935 static int 936 smmu_init_strtab_2lvl(struct smmu_softc *sc) 937 { 938 struct smmu_strtab *strtab; 939 vm_paddr_t base; 940 uint64_t reg_base; 941 uint32_t l1size; 942 uint32_t size; 943 uint32_t reg; 944 int sz; 945 946 strtab = &sc->strtab; 947 948 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); 949 size = min(size, sc->sid_bits - STRTAB_SPLIT); 950 strtab->num_l1_entries = (1 << size); 951 size += STRTAB_SPLIT; 952 953 l1size = strtab->num_l1_entries * (STRTAB_L1_DESC_DWORDS << 3); 954 955 if (bootverbose) 956 device_printf(sc->dev, 957 "%s: size %d, l1 entries %d, l1size %d\n", 958 __func__, size, strtab->num_l1_entries, l1size); 959 960 strtab->vaddr = contigmalloc(l1size, M_SMMU, 961 M_WAITOK | M_ZERO, /* flags */ 962 0, /* low */ 963 (1ul << 48) - 1, /* high */ 964 l1size, /* alignment */ 965 0); /* boundary */ 966 if (strtab->vaddr == NULL) { 967 device_printf(sc->dev, "Failed to allocate 2lvl strtab.\n"); 968 return (ENOMEM); 969 } 970 971 sz = strtab->num_l1_entries * sizeof(struct l1_desc); 972 973 strtab->l1 = malloc(sz, M_SMMU, M_WAITOK | M_ZERO); 974 if (strtab->l1 == NULL) { 975 contigfree(strtab->vaddr, l1size, M_SMMU); 976 return (ENOMEM); 977 } 978 979 reg = STRTAB_BASE_CFG_FMT_2LVL; 980 reg |= size << STRTAB_BASE_CFG_LOG2SIZE_S; 981 reg |= STRTAB_SPLIT << STRTAB_BASE_CFG_SPLIT_S; 982 strtab->base_cfg = (uint32_t)reg; 983 984 base = vtophys(strtab->vaddr); 985 986 reg_base = base & STRTAB_BASE_ADDR_M; 987 KASSERT(reg_base == base, ("bad allocation 3")); 988 reg_base |= STRTAB_BASE_RA; 989 strtab->base = reg_base; 990 991 return (0); 992 } 993 994 static int 995 smmu_init_strtab(struct smmu_softc *sc) 996 { 997 int error; 998 999 if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) 1000 error = smmu_init_strtab_2lvl(sc); 1001 else 1002 error = smmu_init_strtab_linear(sc); 1003 1004 return (error); 1005 } 1006 1007 static int 1008 smmu_init_l1_entry(struct smmu_softc *sc, int sid) 1009 { 1010 struct smmu_strtab *strtab; 1011 struct l1_desc *l1_desc; 1012 uint64_t *addr; 1013 uint64_t val; 1014 size_t size; 1015 int i; 1016 1017 strtab = &sc->strtab; 1018 l1_desc = &strtab->l1[sid >> STRTAB_SPLIT]; 1019 if (l1_desc->va) { 1020 /* Already allocated. */ 1021 return (0); 1022 } 1023 1024 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); 1025 1026 l1_desc->span = STRTAB_SPLIT + 1; 1027 l1_desc->size = size; 1028 l1_desc->va = contigmalloc(size, M_SMMU, 1029 M_WAITOK | M_ZERO, /* flags */ 1030 0, /* low */ 1031 (1ul << 48) - 1, /* high */ 1032 size, /* alignment */ 1033 0); /* boundary */ 1034 if (l1_desc->va == NULL) { 1035 device_printf(sc->dev, "failed to allocate l2 entry\n"); 1036 return (ENXIO); 1037 } 1038 1039 l1_desc->pa = vtophys(l1_desc->va); 1040 1041 i = sid >> STRTAB_SPLIT; 1042 addr = (void *)((uint64_t)strtab->vaddr + 1043 STRTAB_L1_DESC_DWORDS * 8 * i); 1044 1045 /* Install the L1 entry. */ 1046 val = l1_desc->pa & STRTAB_L1_DESC_L2PTR_M; 1047 KASSERT(val == l1_desc->pa, ("bad allocation 4")); 1048 val |= l1_desc->span; 1049 *addr = val; 1050 1051 return (0); 1052 } 1053 1054 static void __unused 1055 smmu_deinit_l1_entry(struct smmu_softc *sc, int sid) 1056 { 1057 struct smmu_strtab *strtab; 1058 struct l1_desc *l1_desc; 1059 uint64_t *addr; 1060 int i; 1061 1062 strtab = &sc->strtab; 1063 1064 i = sid >> STRTAB_SPLIT; 1065 addr = (void *)((uint64_t)strtab->vaddr + 1066 STRTAB_L1_DESC_DWORDS * 8 * i); 1067 *addr = 0; 1068 1069 l1_desc = &strtab->l1[sid >> STRTAB_SPLIT]; 1070 contigfree(l1_desc->va, l1_desc->size, M_SMMU); 1071 } 1072 1073 static int 1074 smmu_disable(struct smmu_softc *sc) 1075 { 1076 uint32_t reg; 1077 int error; 1078 1079 /* Disable SMMU */ 1080 reg = bus_read_4(sc->res[0], SMMU_CR0); 1081 reg &= ~CR0_SMMUEN; 1082 error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); 1083 if (error) 1084 device_printf(sc->dev, "Could not disable SMMU.\n"); 1085 1086 return (0); 1087 } 1088 1089 static int 1090 smmu_event_intr(void *arg) 1091 { 1092 uint32_t evt[EVTQ_ENTRY_DWORDS * 2]; 1093 struct smmu_softc *sc; 1094 1095 sc = arg; 1096 1097 do { 1098 smmu_evtq_dequeue(sc, evt); 1099 smmu_print_event(sc, evt); 1100 } while (!smmu_q_empty(&sc->evtq)); 1101 1102 return (FILTER_HANDLED); 1103 } 1104 1105 static int __unused 1106 smmu_sync_intr(void *arg) 1107 { 1108 struct smmu_softc *sc; 1109 1110 sc = arg; 1111 1112 device_printf(sc->dev, "%s\n", __func__); 1113 1114 return (FILTER_HANDLED); 1115 } 1116 1117 static int 1118 smmu_gerr_intr(void *arg) 1119 { 1120 struct smmu_softc *sc; 1121 1122 sc = arg; 1123 1124 device_printf(sc->dev, "SMMU Global Error\n"); 1125 1126 return (FILTER_HANDLED); 1127 } 1128 1129 static int 1130 smmu_enable_interrupts(struct smmu_softc *sc) 1131 { 1132 uint32_t reg; 1133 int error; 1134 1135 /* Disable MSI. */ 1136 bus_write_8(sc->res[0], SMMU_GERROR_IRQ_CFG0, 0); 1137 bus_write_4(sc->res[0], SMMU_GERROR_IRQ_CFG1, 0); 1138 bus_write_4(sc->res[0], SMMU_GERROR_IRQ_CFG2, 0); 1139 1140 bus_write_8(sc->res[0], SMMU_EVENTQ_IRQ_CFG0, 0); 1141 bus_write_4(sc->res[0], SMMU_EVENTQ_IRQ_CFG1, 0); 1142 bus_write_4(sc->res[0], SMMU_EVENTQ_IRQ_CFG2, 0); 1143 1144 if (sc->features & CR0_PRIQEN) { 1145 bus_write_8(sc->res[0], SMMU_PRIQ_IRQ_CFG0, 0); 1146 bus_write_4(sc->res[0], SMMU_PRIQ_IRQ_CFG1, 0); 1147 bus_write_4(sc->res[0], SMMU_PRIQ_IRQ_CFG2, 0); 1148 } 1149 1150 /* Disable any interrupts. */ 1151 error = smmu_write_ack(sc, SMMU_IRQ_CTRL, SMMU_IRQ_CTRLACK, 0); 1152 if (error) { 1153 device_printf(sc->dev, "Could not disable interrupts.\n"); 1154 return (ENXIO); 1155 } 1156 1157 /* Enable interrupts. */ 1158 reg = IRQ_CTRL_EVENTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; 1159 if (sc->features & SMMU_FEATURE_PRI) 1160 reg |= IRQ_CTRL_PRIQ_IRQEN; 1161 1162 error = smmu_write_ack(sc, SMMU_IRQ_CTRL, SMMU_IRQ_CTRLACK, reg); 1163 if (error) { 1164 device_printf(sc->dev, "Could not enable interrupts.\n"); 1165 return (ENXIO); 1166 } 1167 1168 return (0); 1169 } 1170 1171 #ifdef DEV_ACPI 1172 static void 1173 smmu_configure_intr(struct smmu_softc *sc, struct resource *res) 1174 { 1175 struct intr_map_data_acpi *ad; 1176 struct intr_map_data *data; 1177 1178 data = rman_get_virtual(res); 1179 KASSERT(data != NULL, ("data is NULL")); 1180 1181 if (data->type == INTR_MAP_DATA_ACPI) { 1182 ad = (struct intr_map_data_acpi *)data; 1183 ad->trig = INTR_TRIGGER_EDGE; 1184 ad->pol = INTR_POLARITY_HIGH; 1185 } 1186 } 1187 #endif 1188 1189 static int 1190 smmu_setup_interrupts(struct smmu_softc *sc) 1191 { 1192 device_t dev; 1193 int error; 1194 1195 dev = sc->dev; 1196 1197 #ifdef DEV_ACPI 1198 /* 1199 * Configure SMMU interrupts as EDGE triggered manually 1200 * as ACPI tables carries no information for that. 1201 */ 1202 smmu_configure_intr(sc, sc->res[1]); 1203 /* PRIQ is not in use. */ 1204 smmu_configure_intr(sc, sc->res[3]); 1205 smmu_configure_intr(sc, sc->res[4]); 1206 #endif 1207 1208 error = bus_setup_intr(dev, sc->res[1], INTR_TYPE_MISC, 1209 smmu_event_intr, NULL, sc, &sc->intr_cookie[0]); 1210 if (error) { 1211 device_printf(dev, "Couldn't setup Event interrupt handler\n"); 1212 return (ENXIO); 1213 } 1214 1215 error = bus_setup_intr(dev, sc->res[4], INTR_TYPE_MISC, 1216 smmu_gerr_intr, NULL, sc, &sc->intr_cookie[2]); 1217 if (error) { 1218 device_printf(dev, "Couldn't setup Gerr interrupt handler\n"); 1219 return (ENXIO); 1220 } 1221 1222 return (0); 1223 } 1224 1225 static int 1226 smmu_reset(struct smmu_softc *sc) 1227 { 1228 struct smmu_cmdq_entry cmd; 1229 struct smmu_strtab *strtab; 1230 int error; 1231 int reg; 1232 1233 reg = bus_read_4(sc->res[0], SMMU_CR0); 1234 1235 if (reg & CR0_SMMUEN) 1236 device_printf(sc->dev, 1237 "%s: Warning: SMMU is enabled\n", __func__); 1238 1239 error = smmu_disable(sc); 1240 if (error) 1241 device_printf(sc->dev, 1242 "%s: Could not disable SMMU.\n", __func__); 1243 1244 if (smmu_enable_interrupts(sc) != 0) { 1245 device_printf(sc->dev, "Could not enable interrupts.\n"); 1246 return (ENXIO); 1247 } 1248 1249 reg = CR1_TABLE_SH_IS | 1250 CR1_TABLE_OC_WBC | 1251 CR1_TABLE_IC_WBC | 1252 CR1_QUEUE_SH_IS | 1253 CR1_QUEUE_OC_WBC | 1254 CR1_QUEUE_IC_WBC; 1255 bus_write_4(sc->res[0], SMMU_CR1, reg); 1256 1257 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H; 1258 bus_write_4(sc->res[0], SMMU_CR2, reg); 1259 1260 /* Stream table. */ 1261 strtab = &sc->strtab; 1262 bus_write_8(sc->res[0], SMMU_STRTAB_BASE, strtab->base); 1263 bus_write_4(sc->res[0], SMMU_STRTAB_BASE_CFG, strtab->base_cfg); 1264 1265 /* Command queue. */ 1266 bus_write_8(sc->res[0], SMMU_CMDQ_BASE, sc->cmdq.base); 1267 bus_write_4(sc->res[0], SMMU_CMDQ_PROD, sc->cmdq.lc.prod); 1268 bus_write_4(sc->res[0], SMMU_CMDQ_CONS, sc->cmdq.lc.cons); 1269 1270 reg = CR0_CMDQEN; 1271 error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); 1272 if (error) { 1273 device_printf(sc->dev, "Could not enable command queue\n"); 1274 return (ENXIO); 1275 } 1276 1277 /* Invalidate cached configuration. */ 1278 smmu_invalidate_all_sid(sc); 1279 1280 if (sc->features & SMMU_FEATURE_HYP) { 1281 cmd.opcode = CMD_TLBI_EL2_ALL; 1282 smmu_cmdq_enqueue_cmd(sc, &cmd); 1283 }; 1284 1285 /* Invalidate TLB. */ 1286 smmu_tlbi_all(sc); 1287 1288 /* Event queue */ 1289 bus_write_8(sc->res[0], SMMU_EVENTQ_BASE, sc->evtq.base); 1290 bus_write_4(sc->res[0], SMMU_EVENTQ_PROD, sc->evtq.lc.prod); 1291 bus_write_4(sc->res[0], SMMU_EVENTQ_CONS, sc->evtq.lc.cons); 1292 1293 reg |= CR0_EVENTQEN; 1294 error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); 1295 if (error) { 1296 device_printf(sc->dev, "Could not enable event queue\n"); 1297 return (ENXIO); 1298 } 1299 1300 if (sc->features & SMMU_FEATURE_PRI) { 1301 /* PRI queue */ 1302 bus_write_8(sc->res[0], SMMU_PRIQ_BASE, sc->priq.base); 1303 bus_write_4(sc->res[0], SMMU_PRIQ_PROD, sc->priq.lc.prod); 1304 bus_write_4(sc->res[0], SMMU_PRIQ_CONS, sc->priq.lc.cons); 1305 1306 reg |= CR0_PRIQEN; 1307 error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); 1308 if (error) { 1309 device_printf(sc->dev, "Could not enable PRI queue\n"); 1310 return (ENXIO); 1311 } 1312 } 1313 1314 if (sc->features & SMMU_FEATURE_ATS) { 1315 reg |= CR0_ATSCHK; 1316 error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); 1317 if (error) { 1318 device_printf(sc->dev, "Could not enable ATS check.\n"); 1319 return (ENXIO); 1320 } 1321 } 1322 1323 reg |= CR0_SMMUEN; 1324 error = smmu_write_ack(sc, SMMU_CR0, SMMU_CR0ACK, reg); 1325 if (error) { 1326 device_printf(sc->dev, "Could not enable SMMU.\n"); 1327 return (ENXIO); 1328 } 1329 1330 return (0); 1331 } 1332 1333 static int 1334 smmu_check_features(struct smmu_softc *sc) 1335 { 1336 uint32_t reg; 1337 uint32_t val; 1338 1339 sc->features = 0; 1340 1341 reg = bus_read_4(sc->res[0], SMMU_IDR0); 1342 1343 if (reg & IDR0_ST_LVL_2) { 1344 if (bootverbose) 1345 device_printf(sc->dev, 1346 "2-level stream table supported.\n"); 1347 sc->features |= SMMU_FEATURE_2_LVL_STREAM_TABLE; 1348 } 1349 1350 if (reg & IDR0_CD2L) { 1351 if (bootverbose) 1352 device_printf(sc->dev, 1353 "2-level CD table supported.\n"); 1354 sc->features |= SMMU_FEATURE_2_LVL_CD; 1355 } 1356 1357 switch (reg & IDR0_TTENDIAN_M) { 1358 case IDR0_TTENDIAN_MIXED: 1359 if (bootverbose) 1360 device_printf(sc->dev, "Mixed endianness supported.\n"); 1361 sc->features |= SMMU_FEATURE_TT_LE; 1362 sc->features |= SMMU_FEATURE_TT_BE; 1363 break; 1364 case IDR0_TTENDIAN_LITTLE: 1365 if (bootverbose) 1366 device_printf(sc->dev, 1367 "Little endian supported only.\n"); 1368 sc->features |= SMMU_FEATURE_TT_LE; 1369 break; 1370 case IDR0_TTENDIAN_BIG: 1371 if (bootverbose) 1372 device_printf(sc->dev, "Big endian supported only.\n"); 1373 sc->features |= SMMU_FEATURE_TT_BE; 1374 break; 1375 default: 1376 device_printf(sc->dev, "Unsupported endianness.\n"); 1377 return (ENXIO); 1378 } 1379 1380 if (reg & IDR0_SEV) 1381 sc->features |= SMMU_FEATURE_SEV; 1382 1383 if (reg & IDR0_MSI) { 1384 if (bootverbose) 1385 device_printf(sc->dev, "MSI feature present.\n"); 1386 sc->features |= SMMU_FEATURE_MSI; 1387 } 1388 1389 if (reg & IDR0_HYP) { 1390 if (bootverbose) 1391 device_printf(sc->dev, "HYP feature present.\n"); 1392 sc->features |= SMMU_FEATURE_HYP; 1393 } 1394 1395 if (reg & IDR0_ATS) 1396 sc->features |= SMMU_FEATURE_ATS; 1397 1398 if (reg & IDR0_PRI) 1399 sc->features |= SMMU_FEATURE_PRI; 1400 1401 switch (reg & IDR0_STALL_MODEL_M) { 1402 case IDR0_STALL_MODEL_FORCE: 1403 /* Stall is forced. */ 1404 sc->features |= SMMU_FEATURE_STALL_FORCE; 1405 /* FALLTHROUGH */ 1406 case IDR0_STALL_MODEL_STALL: 1407 sc->features |= SMMU_FEATURE_STALL; 1408 break; 1409 } 1410 1411 /* Grab translation stages supported. */ 1412 if (reg & IDR0_S1P) { 1413 if (bootverbose) 1414 device_printf(sc->dev, 1415 "Stage 1 translation supported.\n"); 1416 sc->features |= SMMU_FEATURE_S1P; 1417 } 1418 if (reg & IDR0_S2P) { 1419 if (bootverbose) 1420 device_printf(sc->dev, 1421 "Stage 2 translation supported.\n"); 1422 sc->features |= SMMU_FEATURE_S2P; 1423 } 1424 1425 switch (reg & IDR0_TTF_M) { 1426 case IDR0_TTF_ALL: 1427 case IDR0_TTF_AA64: 1428 sc->ias = 40; 1429 break; 1430 default: 1431 device_printf(sc->dev, "No AArch64 table format support.\n"); 1432 return (ENXIO); 1433 } 1434 1435 if (reg & IDR0_ASID16) 1436 sc->asid_bits = 16; 1437 else 1438 sc->asid_bits = 8; 1439 1440 if (bootverbose) 1441 device_printf(sc->dev, "ASID bits %d\n", sc->asid_bits); 1442 1443 if (reg & IDR0_VMID16) 1444 sc->vmid_bits = 16; 1445 else 1446 sc->vmid_bits = 8; 1447 1448 reg = bus_read_4(sc->res[0], SMMU_IDR1); 1449 1450 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) { 1451 device_printf(sc->dev, 1452 "Embedded implementations not supported by this driver.\n"); 1453 return (ENXIO); 1454 } 1455 1456 val = (reg & IDR1_CMDQS_M) >> IDR1_CMDQS_S; 1457 sc->cmdq.size_log2 = val; 1458 if (bootverbose) 1459 device_printf(sc->dev, "CMD queue bits %d\n", val); 1460 1461 val = (reg & IDR1_EVENTQS_M) >> IDR1_EVENTQS_S; 1462 sc->evtq.size_log2 = val; 1463 if (bootverbose) 1464 device_printf(sc->dev, "EVENT queue bits %d\n", val); 1465 1466 if (sc->features & SMMU_FEATURE_PRI) { 1467 val = (reg & IDR1_PRIQS_M) >> IDR1_PRIQS_S; 1468 sc->priq.size_log2 = val; 1469 if (bootverbose) 1470 device_printf(sc->dev, "PRI queue bits %d\n", val); 1471 } 1472 1473 sc->ssid_bits = (reg & IDR1_SSIDSIZE_M) >> IDR1_SSIDSIZE_S; 1474 sc->sid_bits = (reg & IDR1_SIDSIZE_M) >> IDR1_SIDSIZE_S; 1475 1476 if (sc->sid_bits <= STRTAB_SPLIT) 1477 sc->features &= ~SMMU_FEATURE_2_LVL_STREAM_TABLE; 1478 1479 if (bootverbose) { 1480 device_printf(sc->dev, "SSID bits %d\n", sc->ssid_bits); 1481 device_printf(sc->dev, "SID bits %d\n", sc->sid_bits); 1482 } 1483 1484 /* IDR3 */ 1485 reg = bus_read_4(sc->res[0], SMMU_IDR3); 1486 if (reg & IDR3_RIL) 1487 sc->features |= SMMU_FEATURE_RANGE_INV; 1488 1489 /* IDR5 */ 1490 reg = bus_read_4(sc->res[0], SMMU_IDR5); 1491 1492 switch (reg & IDR5_OAS_M) { 1493 case IDR5_OAS_32: 1494 sc->oas = 32; 1495 break; 1496 case IDR5_OAS_36: 1497 sc->oas = 36; 1498 break; 1499 case IDR5_OAS_40: 1500 sc->oas = 40; 1501 break; 1502 case IDR5_OAS_42: 1503 sc->oas = 42; 1504 break; 1505 case IDR5_OAS_44: 1506 sc->oas = 44; 1507 break; 1508 case IDR5_OAS_48: 1509 sc->oas = 48; 1510 break; 1511 case IDR5_OAS_52: 1512 sc->oas = 52; 1513 break; 1514 } 1515 1516 sc->pgsizes = 0; 1517 if (reg & IDR5_GRAN64K) 1518 sc->pgsizes |= 64 * 1024; 1519 if (reg & IDR5_GRAN16K) 1520 sc->pgsizes |= 16 * 1024; 1521 if (reg & IDR5_GRAN4K) 1522 sc->pgsizes |= 4 * 1024; 1523 1524 if ((reg & IDR5_VAX_M) == IDR5_VAX_52) 1525 sc->features |= SMMU_FEATURE_VAX; 1526 1527 return (0); 1528 } 1529 1530 static void 1531 smmu_init_asids(struct smmu_softc *sc) 1532 { 1533 1534 sc->asid_set_size = (1 << sc->asid_bits); 1535 sc->asid_set = bit_alloc(sc->asid_set_size, M_SMMU, M_WAITOK); 1536 mtx_init(&sc->asid_set_mutex, "asid set", NULL, MTX_SPIN); 1537 } 1538 1539 static int 1540 smmu_asid_alloc(struct smmu_softc *sc, int *new_asid) 1541 { 1542 1543 mtx_lock_spin(&sc->asid_set_mutex); 1544 bit_ffc(sc->asid_set, sc->asid_set_size, new_asid); 1545 if (*new_asid == -1) { 1546 mtx_unlock_spin(&sc->asid_set_mutex); 1547 return (ENOMEM); 1548 } 1549 bit_set(sc->asid_set, *new_asid); 1550 mtx_unlock_spin(&sc->asid_set_mutex); 1551 1552 return (0); 1553 } 1554 1555 static void 1556 smmu_asid_free(struct smmu_softc *sc, int asid) 1557 { 1558 1559 mtx_lock_spin(&sc->asid_set_mutex); 1560 bit_clear(sc->asid_set, asid); 1561 mtx_unlock_spin(&sc->asid_set_mutex); 1562 } 1563 1564 /* 1565 * Device interface. 1566 */ 1567 int 1568 smmu_attach(device_t dev) 1569 { 1570 struct smmu_softc *sc; 1571 int error; 1572 1573 sc = device_get_softc(dev); 1574 sc->dev = dev; 1575 1576 mtx_init(&sc->sc_mtx, device_get_nameunit(sc->dev), "smmu", MTX_DEF); 1577 1578 error = smmu_setup_interrupts(sc); 1579 if (error) { 1580 bus_release_resources(dev, smmu_spec, sc->res); 1581 return (ENXIO); 1582 } 1583 1584 error = smmu_check_features(sc); 1585 if (error) { 1586 device_printf(dev, "Some features are required " 1587 "but not supported by hardware.\n"); 1588 return (ENXIO); 1589 } 1590 1591 smmu_init_asids(sc); 1592 1593 error = smmu_init_queues(sc); 1594 if (error) { 1595 device_printf(dev, "Couldn't allocate queues.\n"); 1596 return (ENXIO); 1597 } 1598 1599 error = smmu_init_strtab(sc); 1600 if (error) { 1601 device_printf(dev, "Couldn't allocate strtab.\n"); 1602 return (ENXIO); 1603 } 1604 1605 error = smmu_reset(sc); 1606 if (error) { 1607 device_printf(dev, "Couldn't reset SMMU.\n"); 1608 return (ENXIO); 1609 } 1610 1611 return (0); 1612 } 1613 1614 int 1615 smmu_detach(device_t dev) 1616 { 1617 struct smmu_softc *sc; 1618 1619 sc = device_get_softc(dev); 1620 1621 bus_release_resources(dev, smmu_spec, sc->res); 1622 1623 return (0); 1624 } 1625 1626 static int 1627 smmu_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) 1628 { 1629 struct smmu_softc *sc; 1630 1631 sc = device_get_softc(dev); 1632 1633 device_printf(sc->dev, "%s\n", __func__); 1634 1635 return (ENOENT); 1636 } 1637 1638 static int 1639 smmu_unmap(device_t dev, struct iommu_domain *iodom, 1640 vm_offset_t va, bus_size_t size) 1641 { 1642 struct smmu_domain *domain; 1643 struct smmu_softc *sc; 1644 int err; 1645 int i; 1646 1647 sc = device_get_softc(dev); 1648 1649 domain = (struct smmu_domain *)iodom; 1650 1651 err = 0; 1652 1653 dprintf("%s: %lx, %ld, domain %d\n", __func__, va, size, domain->asid); 1654 1655 for (i = 0; i < size; i += PAGE_SIZE) { 1656 if (smmu_pmap_remove(&domain->p, va) == 0) { 1657 /* pmap entry removed, invalidate TLB. */ 1658 smmu_tlbi_va(sc, va, domain->asid); 1659 } else { 1660 err = ENOENT; 1661 break; 1662 } 1663 va += PAGE_SIZE; 1664 } 1665 1666 smmu_sync(sc); 1667 1668 return (err); 1669 } 1670 1671 static int 1672 smmu_map(device_t dev, struct iommu_domain *iodom, 1673 vm_offset_t va, vm_page_t *ma, vm_size_t size, 1674 vm_prot_t prot) 1675 { 1676 struct smmu_domain *domain; 1677 struct smmu_softc *sc; 1678 vm_paddr_t pa; 1679 int error; 1680 int i; 1681 1682 sc = device_get_softc(dev); 1683 1684 domain = (struct smmu_domain *)iodom; 1685 1686 dprintf("%s: %lx -> %lx, %ld, domain %d\n", __func__, va, pa, size, 1687 domain->asid); 1688 1689 for (i = 0; size > 0; size -= PAGE_SIZE) { 1690 pa = VM_PAGE_TO_PHYS(ma[i++]); 1691 error = smmu_pmap_enter(&domain->p, va, pa, prot, 0); 1692 if (error) 1693 return (error); 1694 smmu_tlbi_va(sc, va, domain->asid); 1695 va += PAGE_SIZE; 1696 } 1697 1698 smmu_sync(sc); 1699 1700 return (0); 1701 } 1702 1703 static struct iommu_domain * 1704 smmu_domain_alloc(device_t dev, struct iommu_unit *iommu) 1705 { 1706 struct iommu_domain *iodom; 1707 struct smmu_domain *domain; 1708 struct smmu_unit *unit; 1709 struct smmu_softc *sc; 1710 int error; 1711 int new_asid; 1712 1713 sc = device_get_softc(dev); 1714 1715 unit = (struct smmu_unit *)iommu; 1716 1717 domain = malloc(sizeof(*domain), M_SMMU, M_WAITOK | M_ZERO); 1718 1719 error = smmu_asid_alloc(sc, &new_asid); 1720 if (error) { 1721 free(domain, M_SMMU); 1722 device_printf(sc->dev, 1723 "Could not allocate ASID for a new domain.\n"); 1724 return (NULL); 1725 } 1726 1727 domain->asid = (uint16_t)new_asid; 1728 1729 smmu_pmap_pinit(&domain->p); 1730 1731 error = smmu_init_cd(sc, domain); 1732 if (error) { 1733 free(domain, M_SMMU); 1734 device_printf(sc->dev, "Could not initialize CD\n"); 1735 return (NULL); 1736 } 1737 1738 smmu_tlbi_asid(sc, domain->asid); 1739 1740 LIST_INIT(&domain->ctx_list); 1741 1742 IOMMU_LOCK(iommu); 1743 LIST_INSERT_HEAD(&unit->domain_list, domain, next); 1744 IOMMU_UNLOCK(iommu); 1745 1746 iodom = &domain->iodom; 1747 1748 /* 1749 * Use 48-bit address space regardless of VAX bit 1750 * as we need 64k IOMMU_PAGE_SIZE for 52-bit space. 1751 */ 1752 iodom->end = MAXADDR_48BIT; 1753 1754 return (iodom); 1755 } 1756 1757 static void 1758 smmu_domain_free(device_t dev, struct iommu_domain *iodom) 1759 { 1760 struct smmu_domain *domain; 1761 struct smmu_softc *sc; 1762 struct smmu_cd *cd; 1763 1764 sc = device_get_softc(dev); 1765 1766 domain = (struct smmu_domain *)iodom; 1767 1768 LIST_REMOVE(domain, next); 1769 1770 cd = domain->cd; 1771 1772 smmu_pmap_remove_pages(&domain->p); 1773 smmu_pmap_release(&domain->p); 1774 1775 smmu_tlbi_asid(sc, domain->asid); 1776 smmu_asid_free(sc, domain->asid); 1777 1778 contigfree(cd->vaddr, cd->size, M_SMMU); 1779 free(cd, M_SMMU); 1780 1781 free(domain, M_SMMU); 1782 } 1783 1784 static int 1785 smmu_set_buswide(device_t dev, struct smmu_domain *domain, 1786 struct smmu_ctx *ctx) 1787 { 1788 struct smmu_softc *sc; 1789 int i; 1790 1791 sc = device_get_softc(dev); 1792 1793 for (i = 0; i < PCI_SLOTMAX; i++) 1794 smmu_init_ste(sc, domain->cd, (ctx->sid | i), ctx->bypass); 1795 1796 return (0); 1797 } 1798 1799 static int 1800 smmu_pci_get_sid(device_t child, u_int *xref0, u_int *sid0) 1801 { 1802 struct pci_id_ofw_iommu pi; 1803 int err; 1804 1805 err = pci_get_id(child, PCI_ID_OFW_IOMMU, (uintptr_t *)&pi); 1806 if (err == 0) { 1807 if (sid0) 1808 *sid0 = pi.id; 1809 if (xref0) 1810 *xref0 = pi.xref; 1811 } 1812 1813 return (err); 1814 } 1815 1816 static struct iommu_ctx * 1817 smmu_ctx_alloc(device_t dev, struct iommu_domain *iodom, device_t child, 1818 bool disabled) 1819 { 1820 struct smmu_domain *domain; 1821 struct smmu_ctx *ctx; 1822 1823 domain = (struct smmu_domain *)iodom; 1824 1825 ctx = malloc(sizeof(struct smmu_ctx), M_SMMU, M_WAITOK | M_ZERO); 1826 ctx->dev = child; 1827 ctx->domain = domain; 1828 if (disabled) 1829 ctx->bypass = true; 1830 1831 IOMMU_DOMAIN_LOCK(iodom); 1832 LIST_INSERT_HEAD(&domain->ctx_list, ctx, next); 1833 IOMMU_DOMAIN_UNLOCK(iodom); 1834 1835 return (&ctx->ioctx); 1836 } 1837 1838 static int 1839 smmu_ctx_init(device_t dev, struct iommu_ctx *ioctx) 1840 { 1841 struct smmu_domain *domain; 1842 struct iommu_domain *iodom; 1843 struct smmu_softc *sc; 1844 struct smmu_ctx *ctx; 1845 devclass_t pci_class; 1846 u_int sid; 1847 int err; 1848 1849 ctx = (struct smmu_ctx *)ioctx; 1850 1851 sc = device_get_softc(dev); 1852 1853 domain = ctx->domain; 1854 iodom = (struct iommu_domain *)domain; 1855 1856 pci_class = devclass_find("pci"); 1857 if (device_get_devclass(device_get_parent(ctx->dev)) == pci_class) { 1858 err = smmu_pci_get_sid(ctx->dev, NULL, &sid); 1859 if (err) 1860 return (err); 1861 1862 ioctx->rid = pci_get_rid(dev); 1863 ctx->sid = sid; 1864 ctx->vendor = pci_get_vendor(ctx->dev); 1865 ctx->device = pci_get_device(ctx->dev); 1866 } 1867 1868 if (sc->features & SMMU_FEATURE_2_LVL_STREAM_TABLE) { 1869 err = smmu_init_l1_entry(sc, ctx->sid); 1870 if (err) 1871 return (err); 1872 } 1873 1874 /* 1875 * Neoverse N1 SDP: 1876 * 0x800 xhci 1877 * 0x700 re 1878 * 0x600 sata 1879 */ 1880 1881 smmu_init_ste(sc, domain->cd, ctx->sid, ctx->bypass); 1882 1883 if (device_get_devclass(device_get_parent(ctx->dev)) == pci_class) 1884 if (iommu_is_buswide_ctx(iodom->iommu, pci_get_bus(ctx->dev))) 1885 smmu_set_buswide(dev, domain, ctx); 1886 1887 return (0); 1888 } 1889 1890 static void 1891 smmu_ctx_free(device_t dev, struct iommu_ctx *ioctx) 1892 { 1893 struct smmu_softc *sc; 1894 struct smmu_ctx *ctx; 1895 1896 IOMMU_ASSERT_LOCKED(ioctx->domain->iommu); 1897 1898 sc = device_get_softc(dev); 1899 ctx = (struct smmu_ctx *)ioctx; 1900 1901 smmu_deinit_ste(sc, ctx->sid); 1902 1903 LIST_REMOVE(ctx, next); 1904 1905 free(ctx, M_SMMU); 1906 } 1907 1908 struct smmu_ctx * 1909 smmu_ctx_lookup_by_sid(device_t dev, u_int sid) 1910 { 1911 struct smmu_softc *sc; 1912 struct smmu_domain *domain; 1913 struct smmu_unit *unit; 1914 struct smmu_ctx *ctx; 1915 1916 sc = device_get_softc(dev); 1917 1918 unit = &sc->unit; 1919 1920 LIST_FOREACH(domain, &unit->domain_list, next) { 1921 LIST_FOREACH(ctx, &domain->ctx_list, next) { 1922 if (ctx->sid == sid) 1923 return (ctx); 1924 } 1925 } 1926 1927 return (NULL); 1928 } 1929 1930 static struct iommu_ctx * 1931 smmu_ctx_lookup(device_t dev, device_t child) 1932 { 1933 struct iommu_unit *iommu __diagused; 1934 struct smmu_softc *sc; 1935 struct smmu_domain *domain; 1936 struct smmu_unit *unit; 1937 struct smmu_ctx *ctx; 1938 1939 sc = device_get_softc(dev); 1940 1941 unit = &sc->unit; 1942 iommu = &unit->iommu; 1943 1944 IOMMU_ASSERT_LOCKED(iommu); 1945 1946 LIST_FOREACH(domain, &unit->domain_list, next) { 1947 IOMMU_DOMAIN_LOCK(&domain->iodom); 1948 LIST_FOREACH(ctx, &domain->ctx_list, next) { 1949 if (ctx->dev == child) { 1950 IOMMU_DOMAIN_UNLOCK(&domain->iodom); 1951 return (&ctx->ioctx); 1952 } 1953 } 1954 IOMMU_DOMAIN_UNLOCK(&domain->iodom); 1955 } 1956 1957 return (NULL); 1958 } 1959 1960 static int 1961 smmu_find(device_t dev, device_t child) 1962 { 1963 struct smmu_softc *sc; 1964 u_int xref; 1965 int err; 1966 1967 sc = device_get_softc(dev); 1968 1969 err = smmu_pci_get_sid(child, &xref, NULL); 1970 if (err) 1971 return (ENOENT); 1972 1973 /* Check if xref is ours. */ 1974 if (xref != sc->xref) 1975 return (EFAULT); 1976 1977 return (0); 1978 } 1979 1980 #ifdef FDT 1981 static int 1982 smmu_ofw_md_data(device_t dev, struct iommu_ctx *ioctx, pcell_t *cells, 1983 int ncells) 1984 { 1985 struct smmu_ctx *ctx; 1986 1987 ctx = (struct smmu_ctx *)ioctx; 1988 1989 if (ncells != 1) 1990 return (-1); 1991 1992 ctx->sid = cells[0]; 1993 1994 return (0); 1995 } 1996 #endif 1997 1998 static device_method_t smmu_methods[] = { 1999 /* Device interface */ 2000 DEVMETHOD(device_detach, smmu_detach), 2001 2002 /* SMMU interface */ 2003 DEVMETHOD(iommu_find, smmu_find), 2004 DEVMETHOD(iommu_map, smmu_map), 2005 DEVMETHOD(iommu_unmap, smmu_unmap), 2006 DEVMETHOD(iommu_domain_alloc, smmu_domain_alloc), 2007 DEVMETHOD(iommu_domain_free, smmu_domain_free), 2008 DEVMETHOD(iommu_ctx_alloc, smmu_ctx_alloc), 2009 DEVMETHOD(iommu_ctx_init, smmu_ctx_init), 2010 DEVMETHOD(iommu_ctx_free, smmu_ctx_free), 2011 DEVMETHOD(iommu_ctx_lookup, smmu_ctx_lookup), 2012 #ifdef FDT 2013 DEVMETHOD(iommu_ofw_md_data, smmu_ofw_md_data), 2014 #endif 2015 2016 /* Bus interface */ 2017 DEVMETHOD(bus_read_ivar, smmu_read_ivar), 2018 2019 /* End */ 2020 DEVMETHOD_END 2021 }; 2022 2023 DEFINE_CLASS_0(smmu, smmu_driver, smmu_methods, sizeof(struct smmu_softc)); 2024