1*68f185ccSZachary Leaf /*- 2*68f185ccSZachary Leaf * SPDX-License-Identifier: BSD-2-Clause 3*68f185ccSZachary Leaf * 4*68f185ccSZachary Leaf * Copyright (c) 2024 Arm Ltd 5*68f185ccSZachary Leaf * Copyright (c) 2022 The FreeBSD Foundation 6*68f185ccSZachary Leaf * 7*68f185ccSZachary Leaf * Portions of this software were developed by Andrew Turner under sponsorship 8*68f185ccSZachary Leaf * from the FreeBSD Foundation. 9*68f185ccSZachary Leaf * 10*68f185ccSZachary Leaf * Redistribution and use in source and binary forms, with or without 11*68f185ccSZachary Leaf * modification, are permitted provided that the following conditions 12*68f185ccSZachary Leaf * are met: 13*68f185ccSZachary Leaf * 1. Redistributions of source code must retain the above copyright 14*68f185ccSZachary Leaf * notice, this list of conditions and the following disclaimer. 15*68f185ccSZachary Leaf * 2. Redistributions in binary form must reproduce the above copyright 16*68f185ccSZachary Leaf * notice, this list of conditions and the following disclaimer in the 17*68f185ccSZachary Leaf * documentation and/or other materials provided with the distribution. 18*68f185ccSZachary Leaf * 19*68f185ccSZachary Leaf * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20*68f185ccSZachary Leaf * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21*68f185ccSZachary Leaf * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22*68f185ccSZachary Leaf * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23*68f185ccSZachary Leaf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24*68f185ccSZachary Leaf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25*68f185ccSZachary Leaf * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26*68f185ccSZachary Leaf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27*68f185ccSZachary Leaf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28*68f185ccSZachary Leaf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29*68f185ccSZachary Leaf * SUCH DAMAGE. 30*68f185ccSZachary Leaf */ 31*68f185ccSZachary Leaf 32*68f185ccSZachary Leaf #include <sys/param.h> 33*68f185ccSZachary Leaf #include <sys/bus.h> 34*68f185ccSZachary Leaf #include <sys/conf.h> 35*68f185ccSZachary Leaf #include <sys/event.h> 36*68f185ccSZachary Leaf #include <sys/hwt.h> 37*68f185ccSZachary Leaf #include <sys/kernel.h> 38*68f185ccSZachary Leaf #include <sys/malloc.h> 39*68f185ccSZachary Leaf #include <sys/module.h> 40*68f185ccSZachary Leaf #include <sys/mutex.h> 41*68f185ccSZachary Leaf #include <sys/rman.h> 42*68f185ccSZachary Leaf #include <sys/smp.h> 43*68f185ccSZachary Leaf #include <sys/systm.h> 44*68f185ccSZachary Leaf #include <sys/taskqueue.h> 45*68f185ccSZachary Leaf 46*68f185ccSZachary Leaf #include <machine/bus.h> 47*68f185ccSZachary Leaf 48*68f185ccSZachary Leaf #include <arm64/spe/arm_spe.h> 49*68f185ccSZachary Leaf #include <arm64/spe/arm_spe_dev.h> 50*68f185ccSZachary Leaf 51*68f185ccSZachary Leaf MALLOC_DEFINE(M_ARM_SPE, "armspe", "Arm SPE tracing"); 52*68f185ccSZachary Leaf 53*68f185ccSZachary Leaf /* 54*68f185ccSZachary Leaf * taskqueue(9) used for sleepable routines called from interrupt handlers 55*68f185ccSZachary Leaf */ 56*68f185ccSZachary Leaf TASKQUEUE_FAST_DEFINE_THREAD(arm_spe); 57*68f185ccSZachary Leaf 58*68f185ccSZachary Leaf void arm_spe_send_buffer(void *, int); 59*68f185ccSZachary Leaf static void arm_spe_error(void *, int); 60*68f185ccSZachary Leaf static int arm_spe_intr(void *); 61*68f185ccSZachary Leaf device_attach_t arm_spe_attach; 62*68f185ccSZachary Leaf 63*68f185ccSZachary Leaf static device_method_t arm_spe_methods[] = { 64*68f185ccSZachary Leaf /* Device interface */ 65*68f185ccSZachary Leaf DEVMETHOD(device_attach, arm_spe_attach), 66*68f185ccSZachary Leaf 67*68f185ccSZachary Leaf DEVMETHOD_END, 68*68f185ccSZachary Leaf }; 69*68f185ccSZachary Leaf 70*68f185ccSZachary Leaf DEFINE_CLASS_0(spe, arm_spe_driver, arm_spe_methods, 71*68f185ccSZachary Leaf sizeof(struct arm_spe_softc)); 72*68f185ccSZachary Leaf 73*68f185ccSZachary Leaf #define ARM_SPE_KVA_MAX_ALIGN UL(2048) 74*68f185ccSZachary Leaf 75*68f185ccSZachary Leaf int 76*68f185ccSZachary Leaf arm_spe_attach(device_t dev) 77*68f185ccSZachary Leaf { 78*68f185ccSZachary Leaf struct arm_spe_softc *sc; 79*68f185ccSZachary Leaf int error, rid; 80*68f185ccSZachary Leaf 81*68f185ccSZachary Leaf sc = device_get_softc(dev); 82*68f185ccSZachary Leaf sc->dev = dev; 83*68f185ccSZachary Leaf 84*68f185ccSZachary Leaf sc->pmbidr = READ_SPECIALREG(PMBIDR_EL1_REG); 85*68f185ccSZachary Leaf sc->pmsidr = READ_SPECIALREG(PMSIDR_EL1_REG); 86*68f185ccSZachary Leaf device_printf(dev, "PMBIDR_EL1: %#lx\n", sc->pmbidr); 87*68f185ccSZachary Leaf device_printf(dev, "PMSIDR_EL1: %#lx\n", sc->pmsidr); 88*68f185ccSZachary Leaf if ((sc->pmbidr & PMBIDR_P) != 0) { 89*68f185ccSZachary Leaf device_printf(dev, "Profiling Buffer is owned by a higher Exception level\n"); 90*68f185ccSZachary Leaf return (EPERM); 91*68f185ccSZachary Leaf } 92*68f185ccSZachary Leaf 93*68f185ccSZachary Leaf sc->kva_align = 1 << ((sc->pmbidr & PMBIDR_Align_MASK) >> PMBIDR_Align_SHIFT); 94*68f185ccSZachary Leaf if (sc->kva_align > ARM_SPE_KVA_MAX_ALIGN) { 95*68f185ccSZachary Leaf device_printf(dev, "Invalid PMBIDR.Align value of %d\n", sc->kva_align); 96*68f185ccSZachary Leaf return (EINVAL); 97*68f185ccSZachary Leaf } 98*68f185ccSZachary Leaf 99*68f185ccSZachary Leaf rid = 0; 100*68f185ccSZachary Leaf sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, 101*68f185ccSZachary Leaf RF_ACTIVE); 102*68f185ccSZachary Leaf if (sc->sc_irq_res == NULL) { 103*68f185ccSZachary Leaf device_printf(dev, "Unable to allocate interrupt\n"); 104*68f185ccSZachary Leaf return (ENXIO); 105*68f185ccSZachary Leaf } 106*68f185ccSZachary Leaf error = bus_setup_intr(dev, sc->sc_irq_res, 107*68f185ccSZachary Leaf INTR_TYPE_MISC | INTR_MPSAFE, arm_spe_intr, NULL, sc, 108*68f185ccSZachary Leaf &sc->sc_irq_cookie); 109*68f185ccSZachary Leaf if (error != 0) { 110*68f185ccSZachary Leaf device_printf(dev, "Unable to set up interrupt\n"); 111*68f185ccSZachary Leaf return (error); 112*68f185ccSZachary Leaf } 113*68f185ccSZachary Leaf 114*68f185ccSZachary Leaf mtx_init(&sc->sc_lock, "Arm SPE lock", NULL, MTX_SPIN); 115*68f185ccSZachary Leaf 116*68f185ccSZachary Leaf STAILQ_INIT(&sc->pending); 117*68f185ccSZachary Leaf sc->npending = 0; 118*68f185ccSZachary Leaf 119*68f185ccSZachary Leaf spe_register(dev); 120*68f185ccSZachary Leaf 121*68f185ccSZachary Leaf return (0); 122*68f185ccSZachary Leaf } 123*68f185ccSZachary Leaf 124*68f185ccSZachary Leaf /* Interrupt handler runs on the same core that triggered the exception */ 125*68f185ccSZachary Leaf static int 126*68f185ccSZachary Leaf arm_spe_intr(void *arg) 127*68f185ccSZachary Leaf { 128*68f185ccSZachary Leaf int cpu_id = PCPU_GET(cpuid); 129*68f185ccSZachary Leaf struct arm_spe_softc *sc = arg; 130*68f185ccSZachary Leaf uint64_t pmbsr; 131*68f185ccSZachary Leaf uint64_t base, limit; 132*68f185ccSZachary Leaf uint8_t ec; 133*68f185ccSZachary Leaf struct arm_spe_info *info = &sc->spe_info[cpu_id]; 134*68f185ccSZachary Leaf uint8_t i = info->buf_idx; 135*68f185ccSZachary Leaf struct arm_spe_buf_info *buf = &info->buf_info[i]; 136*68f185ccSZachary Leaf struct arm_spe_buf_info *prev_buf = &info->buf_info[!i]; 137*68f185ccSZachary Leaf device_t dev = sc->dev; 138*68f185ccSZachary Leaf 139*68f185ccSZachary Leaf /* Make sure the profiling data is visible to the CPU */ 140*68f185ccSZachary Leaf psb_csync(); 141*68f185ccSZachary Leaf dsb(nsh); 142*68f185ccSZachary Leaf 143*68f185ccSZachary Leaf /* Make sure any HW update of PMBPTR_EL1 is visible to the CPU */ 144*68f185ccSZachary Leaf isb(); 145*68f185ccSZachary Leaf 146*68f185ccSZachary Leaf pmbsr = READ_SPECIALREG(PMBSR_EL1_REG); 147*68f185ccSZachary Leaf 148*68f185ccSZachary Leaf if (!(pmbsr & PMBSR_S)) 149*68f185ccSZachary Leaf return (FILTER_STRAY); 150*68f185ccSZachary Leaf 151*68f185ccSZachary Leaf /* Event Class */ 152*68f185ccSZachary Leaf ec = PMBSR_EC_VAL(pmbsr); 153*68f185ccSZachary Leaf switch (ec) 154*68f185ccSZachary Leaf { 155*68f185ccSZachary Leaf case PMBSR_EC_OTHER_BUF_MGMT: /* Other buffer management event */ 156*68f185ccSZachary Leaf break; 157*68f185ccSZachary Leaf case PMBSR_EC_GRAN_PROT_CHK: /* Granule Protection Check fault */ 158*68f185ccSZachary Leaf device_printf(dev, "PMBSR_EC_GRAN_PROT_CHK\n"); 159*68f185ccSZachary Leaf break; 160*68f185ccSZachary Leaf case PMBSR_EC_STAGE1_DA: /* Stage 1 Data Abort */ 161*68f185ccSZachary Leaf device_printf(dev, "PMBSR_EC_STAGE1_DA\n"); 162*68f185ccSZachary Leaf break; 163*68f185ccSZachary Leaf case PMBSR_EC_STAGE2_DA: /* Stage 2 Data Abort */ 164*68f185ccSZachary Leaf device_printf(dev, "PMBSR_EC_STAGE2_DA\n"); 165*68f185ccSZachary Leaf break; 166*68f185ccSZachary Leaf default: 167*68f185ccSZachary Leaf /* Unknown EC */ 168*68f185ccSZachary Leaf device_printf(dev, "unknown PMBSR_EC: %#x\n", ec); 169*68f185ccSZachary Leaf arm_spe_disable(NULL); 170*68f185ccSZachary Leaf TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx); 171*68f185ccSZachary Leaf taskqueue_enqueue(taskqueue_arm_spe, &sc->task); 172*68f185ccSZachary Leaf return (FILTER_HANDLED); 173*68f185ccSZachary Leaf } 174*68f185ccSZachary Leaf 175*68f185ccSZachary Leaf switch (ec) { 176*68f185ccSZachary Leaf case PMBSR_EC_OTHER_BUF_MGMT: 177*68f185ccSZachary Leaf /* Buffer Status Code = buffer filled */ 178*68f185ccSZachary Leaf if ((pmbsr & PMBSR_MSS_BSC_MASK) == PMBSR_MSS_BSC_BUFFER_FILLED) { 179*68f185ccSZachary Leaf dprintf("%s SPE buffer full event (cpu:%d)\n", 180*68f185ccSZachary Leaf __func__, cpu_id); 181*68f185ccSZachary Leaf break; 182*68f185ccSZachary Leaf } 183*68f185ccSZachary Leaf case PMBSR_EC_GRAN_PROT_CHK: 184*68f185ccSZachary Leaf case PMBSR_EC_STAGE1_DA: 185*68f185ccSZachary Leaf case PMBSR_EC_STAGE2_DA: 186*68f185ccSZachary Leaf /* 187*68f185ccSZachary Leaf * If we have one of these, we've messed up the 188*68f185ccSZachary Leaf * programming somehow (e.g. passed invalid memory to 189*68f185ccSZachary Leaf * SPE) and can't recover 190*68f185ccSZachary Leaf */ 191*68f185ccSZachary Leaf arm_spe_disable(NULL); 192*68f185ccSZachary Leaf TASK_INIT(&sc->task, 0, (task_fn_t *)arm_spe_error, sc->ctx); 193*68f185ccSZachary Leaf taskqueue_enqueue(taskqueue_arm_spe, &sc->task); 194*68f185ccSZachary Leaf /* PMBPTR_EL1 is fault address if PMBSR_DL is 1 */ 195*68f185ccSZachary Leaf device_printf(dev, "CPU:%d PMBSR_EL1:%#lx\n", cpu_id, pmbsr); 196*68f185ccSZachary Leaf device_printf(dev, "PMBPTR_EL1:%#lx PMBLIMITR_EL1:%#lx\n", 197*68f185ccSZachary Leaf READ_SPECIALREG(PMBPTR_EL1_REG), 198*68f185ccSZachary Leaf READ_SPECIALREG(PMBLIMITR_EL1_REG)); 199*68f185ccSZachary Leaf return (FILTER_HANDLED); 200*68f185ccSZachary Leaf } 201*68f185ccSZachary Leaf 202*68f185ccSZachary Leaf mtx_lock_spin(&info->lock); 203*68f185ccSZachary Leaf 204*68f185ccSZachary Leaf /* 205*68f185ccSZachary Leaf * Data Loss bit - pmbptr might not be pointing to the end of the last 206*68f185ccSZachary Leaf * complete record 207*68f185ccSZachary Leaf */ 208*68f185ccSZachary Leaf if ((pmbsr & PMBSR_DL) == PMBSR_DL) 209*68f185ccSZachary Leaf buf->partial_rec = 1; 210*68f185ccSZachary Leaf buf->pmbptr = READ_SPECIALREG(PMBPTR_EL1_REG); 211*68f185ccSZachary Leaf buf->buf_svc = true; 212*68f185ccSZachary Leaf 213*68f185ccSZachary Leaf /* Setup regs ready to start writing to the other half of the buffer */ 214*68f185ccSZachary Leaf info->buf_idx = !info->buf_idx; 215*68f185ccSZachary Leaf base = buf_start_addr(info->buf_idx, info); 216*68f185ccSZachary Leaf limit = base + (info->buf_size/2); 217*68f185ccSZachary Leaf limit &= PMBLIMITR_LIMIT_MASK; 218*68f185ccSZachary Leaf limit |= PMBLIMITR_E; 219*68f185ccSZachary Leaf WRITE_SPECIALREG(PMBPTR_EL1_REG, base); 220*68f185ccSZachary Leaf WRITE_SPECIALREG(PMBLIMITR_EL1_REG, limit); 221*68f185ccSZachary Leaf isb(); 222*68f185ccSZachary Leaf 223*68f185ccSZachary Leaf /* 224*68f185ccSZachary Leaf * Notify userspace via kqueue that buffer is full and needs copying 225*68f185ccSZachary Leaf * out - since kqueue can sleep, don't do this in the interrupt handler, 226*68f185ccSZachary Leaf * add to a taskqueue to be scheduled later instead 227*68f185ccSZachary Leaf */ 228*68f185ccSZachary Leaf TASK_INIT(&info->task[i], 0, (task_fn_t *)arm_spe_send_buffer, buf); 229*68f185ccSZachary Leaf taskqueue_enqueue(taskqueue_arm_spe, &info->task[i]); 230*68f185ccSZachary Leaf 231*68f185ccSZachary Leaf /* 232*68f185ccSZachary Leaf * It's possible userspace hasn't yet notified us they've copied out the 233*68f185ccSZachary Leaf * other half of the buffer 234*68f185ccSZachary Leaf * 235*68f185ccSZachary Leaf * This might be because: 236*68f185ccSZachary Leaf * a) Kernel hasn't scheduled the task via taskqueue to notify 237*68f185ccSZachary Leaf * userspace to copy out the data 238*68f185ccSZachary Leaf * b) Userspace is still copying the buffer or hasn't notified us 239*68f185ccSZachary Leaf * back via the HWT_IOC_SVC_BUF ioctl 240*68f185ccSZachary Leaf * 241*68f185ccSZachary Leaf * Either way we need to avoid overwriting uncopied data in the 242*68f185ccSZachary Leaf * buffer, so disable profiling until we receive that SVC_BUF 243*68f185ccSZachary Leaf * ioctl 244*68f185ccSZachary Leaf * 245*68f185ccSZachary Leaf * Using a larger buffer size should help to minimise these events and 246*68f185ccSZachary Leaf * loss of profiling data while profiling is disabled 247*68f185ccSZachary Leaf */ 248*68f185ccSZachary Leaf if (prev_buf->buf_svc) { 249*68f185ccSZachary Leaf device_printf(sc->dev, "cpu%d: buffer full interrupt, but other" 250*68f185ccSZachary Leaf " half of buffer has not been copied out - consider" 251*68f185ccSZachary Leaf " increasing buffer size to minimise loss of profiling data\n", 252*68f185ccSZachary Leaf cpu_id); 253*68f185ccSZachary Leaf WRITE_SPECIALREG(PMSCR_EL1_REG, 0x0); 254*68f185ccSZachary Leaf prev_buf->buf_wait = true; 255*68f185ccSZachary Leaf } 256*68f185ccSZachary Leaf 257*68f185ccSZachary Leaf mtx_unlock_spin(&info->lock); 258*68f185ccSZachary Leaf 259*68f185ccSZachary Leaf /* Clear Profiling Buffer Status Register */ 260*68f185ccSZachary Leaf WRITE_SPECIALREG(PMBSR_EL1_REG, 0); 261*68f185ccSZachary Leaf 262*68f185ccSZachary Leaf isb(); 263*68f185ccSZachary Leaf 264*68f185ccSZachary Leaf return (FILTER_HANDLED); 265*68f185ccSZachary Leaf } 266*68f185ccSZachary Leaf 267*68f185ccSZachary Leaf /* note: Scheduled and run via taskqueue, so can run on any CPU at any time */ 268*68f185ccSZachary Leaf void 269*68f185ccSZachary Leaf arm_spe_send_buffer(void *arg, int pending __unused) 270*68f185ccSZachary Leaf { 271*68f185ccSZachary Leaf struct arm_spe_buf_info *buf = (struct arm_spe_buf_info *)arg; 272*68f185ccSZachary Leaf struct arm_spe_info *info = buf->info; 273*68f185ccSZachary Leaf struct arm_spe_queue *queue; 274*68f185ccSZachary Leaf struct kevent kev; 275*68f185ccSZachary Leaf int ret; 276*68f185ccSZachary Leaf 277*68f185ccSZachary Leaf queue = malloc(sizeof(struct arm_spe_queue), M_ARM_SPE, 278*68f185ccSZachary Leaf M_WAITOK | M_ZERO); 279*68f185ccSZachary Leaf 280*68f185ccSZachary Leaf mtx_lock_spin(&info->lock); 281*68f185ccSZachary Leaf 282*68f185ccSZachary Leaf /* Add to queue for userspace to pickup */ 283*68f185ccSZachary Leaf queue->ident = info->ident; 284*68f185ccSZachary Leaf queue->offset = buf->pmbptr - buf_start_addr(buf->buf_idx, info); 285*68f185ccSZachary Leaf queue->buf_idx = buf->buf_idx; 286*68f185ccSZachary Leaf queue->final_buf = !info->enabled; 287*68f185ccSZachary Leaf queue->partial_rec = buf->partial_rec; 288*68f185ccSZachary Leaf mtx_unlock_spin(&info->lock); 289*68f185ccSZachary Leaf 290*68f185ccSZachary Leaf mtx_lock_spin(&info->sc->sc_lock); 291*68f185ccSZachary Leaf STAILQ_INSERT_TAIL(&info->sc->pending, queue, next); 292*68f185ccSZachary Leaf info->sc->npending++; 293*68f185ccSZachary Leaf EV_SET(&kev, ARM_SPE_KQ_BUF, EVFILT_USER, 0, NOTE_TRIGGER, 294*68f185ccSZachary Leaf info->sc->npending, NULL); 295*68f185ccSZachary Leaf mtx_unlock_spin(&info->sc->sc_lock); 296*68f185ccSZachary Leaf 297*68f185ccSZachary Leaf /* Notify userspace */ 298*68f185ccSZachary Leaf ret = kqfd_register(info->sc->kqueue_fd, &kev, info->sc->hwt_td, 299*68f185ccSZachary Leaf M_WAITOK); 300*68f185ccSZachary Leaf if (ret) { 301*68f185ccSZachary Leaf dprintf("%s kqfd_register ret:%d\n", __func__, ret); 302*68f185ccSZachary Leaf arm_spe_error(info->sc->ctx, 0); 303*68f185ccSZachary Leaf } 304*68f185ccSZachary Leaf } 305*68f185ccSZachary Leaf 306*68f185ccSZachary Leaf static void 307*68f185ccSZachary Leaf arm_spe_error(void *arg, int pending __unused) 308*68f185ccSZachary Leaf { 309*68f185ccSZachary Leaf struct hwt_context *ctx = arg; 310*68f185ccSZachary Leaf struct kevent kev; 311*68f185ccSZachary Leaf int ret; 312*68f185ccSZachary Leaf 313*68f185ccSZachary Leaf smp_rendezvous_cpus(ctx->cpu_map, smp_no_rendezvous_barrier, 314*68f185ccSZachary Leaf arm_spe_disable, smp_no_rendezvous_barrier, NULL); 315*68f185ccSZachary Leaf 316*68f185ccSZachary Leaf EV_SET(&kev, ARM_SPE_KQ_SHUTDOWN, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); 317*68f185ccSZachary Leaf ret = kqfd_register(ctx->kqueue_fd, &kev, ctx->hwt_td, M_WAITOK); 318*68f185ccSZachary Leaf if (ret) 319*68f185ccSZachary Leaf dprintf("%s kqfd_register ret:%d\n", __func__, ret); 320*68f185ccSZachary Leaf } 321*68f185ccSZachary Leaf 322*68f185ccSZachary Leaf MODULE_DEPEND(spe, hwt, 1, 1, 1); 323*68f185ccSZachary Leaf MODULE_VERSION(spe, 1); 324