1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ARMv6 Performance counter handling code. 4 * 5 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 6 * 7 * ARMv6 has 2 configurable performance counters and a single cycle counter. 8 * They all share a single reset bit but can be written to zero so we can use 9 * that for a reset. 10 * 11 * The counters can't be individually enabled or disabled so when we remove 12 * one event and replace it with another we could get spurious counts from the 13 * wrong event. However, we can take advantage of the fact that the 14 * performance counters can export events to the event bus, and the event bus 15 * itself can be monitored. This requires that we *don't* export the events to 16 * the event bus. The procedure for disabling a configurable counter is: 17 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This 18 * effectively stops the counter from counting. 19 * - disable the counter's interrupt generation (each counter has it's 20 * own interrupt enable bit). 21 * Once stopped, the counter value can be written as 0 to reset. 22 * 23 * To enable a counter: 24 * - enable the counter's interrupt generation. 25 * - set the new event type. 26 * 27 * Note: the dedicated cycle counter only counts cycles and can't be 28 * enabled/disabled independently of the others. When we want to disable the 29 * cycle counter, we have to just disable the interrupt reporting and start 30 * ignoring that counter. When re-enabling, we have to reset the value and 31 * enable the interrupt. 32 */ 33 34 #include <asm/cputype.h> 35 #include <asm/irq_regs.h> 36 37 #include <linux/of.h> 38 #include <linux/perf/arm_pmu.h> 39 #include <linux/platform_device.h> 40 41 enum armv6_perf_types { 42 ARMV6_PERFCTR_ICACHE_MISS = 0x0, 43 ARMV6_PERFCTR_IBUF_STALL = 0x1, 44 ARMV6_PERFCTR_DDEP_STALL = 0x2, 45 ARMV6_PERFCTR_ITLB_MISS = 0x3, 46 ARMV6_PERFCTR_DTLB_MISS = 0x4, 47 ARMV6_PERFCTR_BR_EXEC = 0x5, 48 ARMV6_PERFCTR_BR_MISPREDICT = 0x6, 49 ARMV6_PERFCTR_INSTR_EXEC = 0x7, 50 ARMV6_PERFCTR_DCACHE_HIT = 0x9, 51 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, 52 ARMV6_PERFCTR_DCACHE_MISS = 0xB, 53 ARMV6_PERFCTR_DCACHE_WBACK = 0xC, 54 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, 55 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, 56 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, 57 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, 58 ARMV6_PERFCTR_WBUF_DRAINED = 0x12, 59 ARMV6_PERFCTR_CPU_CYCLES = 0xFF, 60 ARMV6_PERFCTR_NOP = 0x20, 61 }; 62 63 enum armv6_counters { 64 ARMV6_CYCLE_COUNTER = 0, 65 ARMV6_COUNTER0, 66 ARMV6_COUNTER1, 67 ARMV6_NUM_COUNTERS 68 }; 69 70 /* 71 * The hardware events that we support. We do support cache operations but 72 * we have harvard caches and no way to combine instruction and data 73 * accesses/misses in hardware. 74 */ 75 static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { 76 PERF_MAP_ALL_UNSUPPORTED, 77 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, 78 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, 79 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, 80 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, 81 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, 82 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, 83 }; 84 85 static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 86 [PERF_COUNT_HW_CACHE_OP_MAX] 87 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 88 PERF_CACHE_MAP_ALL_UNSUPPORTED, 89 90 /* 91 * The performance counters don't differentiate between read and write 92 * accesses/misses so this isn't strictly correct, but it's the best we 93 * can do. Writes and reads get combined. 94 */ 95 [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 96 [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 97 [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 98 [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 99 100 [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 101 102 /* 103 * The ARM performance counters can count micro DTLB misses, micro ITLB 104 * misses and main TLB misses. There isn't an event for TLB misses, so 105 * use the micro misses here and if users want the main TLB misses they 106 * can use a raw counter. 107 */ 108 [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 109 [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 110 111 [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 112 [C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 113 }; 114 115 static inline unsigned long 116 armv6_pmcr_read(void) 117 { 118 u32 val; 119 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); 120 return val; 121 } 122 123 static inline void 124 armv6_pmcr_write(unsigned long val) 125 { 126 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); 127 } 128 129 #define ARMV6_PMCR_ENABLE (1 << 0) 130 #define ARMV6_PMCR_CTR01_RESET (1 << 1) 131 #define ARMV6_PMCR_CCOUNT_RESET (1 << 2) 132 #define ARMV6_PMCR_CCOUNT_DIV (1 << 3) 133 #define ARMV6_PMCR_COUNT0_IEN (1 << 4) 134 #define ARMV6_PMCR_COUNT1_IEN (1 << 5) 135 #define ARMV6_PMCR_CCOUNT_IEN (1 << 6) 136 #define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) 137 #define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) 138 #define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) 139 #define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 140 #define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) 141 #define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 142 #define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) 143 144 #define ARMV6_PMCR_OVERFLOWED_MASK \ 145 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ 146 ARMV6_PMCR_CCOUNT_OVERFLOW) 147 148 static inline int 149 armv6_pmcr_has_overflowed(unsigned long pmcr) 150 { 151 return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; 152 } 153 154 static inline int 155 armv6_pmcr_counter_has_overflowed(unsigned long pmcr, 156 enum armv6_counters counter) 157 { 158 int ret = 0; 159 160 if (ARMV6_CYCLE_COUNTER == counter) 161 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; 162 else if (ARMV6_COUNTER0 == counter) 163 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; 164 else if (ARMV6_COUNTER1 == counter) 165 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; 166 else 167 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 168 169 return ret; 170 } 171 172 static inline u64 armv6pmu_read_counter(struct perf_event *event) 173 { 174 struct hw_perf_event *hwc = &event->hw; 175 int counter = hwc->idx; 176 unsigned long value = 0; 177 178 if (ARMV6_CYCLE_COUNTER == counter) 179 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); 180 else if (ARMV6_COUNTER0 == counter) 181 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); 182 else if (ARMV6_COUNTER1 == counter) 183 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); 184 else 185 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 186 187 return value; 188 } 189 190 static inline void armv6pmu_write_counter(struct perf_event *event, u64 value) 191 { 192 struct hw_perf_event *hwc = &event->hw; 193 int counter = hwc->idx; 194 195 if (ARMV6_CYCLE_COUNTER == counter) 196 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); 197 else if (ARMV6_COUNTER0 == counter) 198 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); 199 else if (ARMV6_COUNTER1 == counter) 200 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); 201 else 202 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 203 } 204 205 static void armv6pmu_enable_event(struct perf_event *event) 206 { 207 unsigned long val, mask, evt; 208 struct hw_perf_event *hwc = &event->hw; 209 int idx = hwc->idx; 210 211 if (ARMV6_CYCLE_COUNTER == idx) { 212 mask = 0; 213 evt = ARMV6_PMCR_CCOUNT_IEN; 214 } else if (ARMV6_COUNTER0 == idx) { 215 mask = ARMV6_PMCR_EVT_COUNT0_MASK; 216 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | 217 ARMV6_PMCR_COUNT0_IEN; 218 } else if (ARMV6_COUNTER1 == idx) { 219 mask = ARMV6_PMCR_EVT_COUNT1_MASK; 220 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | 221 ARMV6_PMCR_COUNT1_IEN; 222 } else { 223 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 224 return; 225 } 226 227 /* 228 * Mask out the current event and set the counter to count the event 229 * that we're interested in. 230 */ 231 val = armv6_pmcr_read(); 232 val &= ~mask; 233 val |= evt; 234 armv6_pmcr_write(val); 235 } 236 237 static irqreturn_t 238 armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) 239 { 240 unsigned long pmcr = armv6_pmcr_read(); 241 struct perf_sample_data data; 242 struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); 243 struct pt_regs *regs; 244 int idx; 245 246 if (!armv6_pmcr_has_overflowed(pmcr)) 247 return IRQ_NONE; 248 249 regs = get_irq_regs(); 250 251 /* 252 * The interrupts are cleared by writing the overflow flags back to 253 * the control register. All of the other bits don't have any effect 254 * if they are rewritten, so write the whole value back. 255 */ 256 armv6_pmcr_write(pmcr); 257 258 for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV6_NUM_COUNTERS) { 259 struct perf_event *event = cpuc->events[idx]; 260 struct hw_perf_event *hwc; 261 262 /* Ignore if we don't have an event. */ 263 if (!event) 264 continue; 265 266 /* 267 * We have a single interrupt for all counters. Check that 268 * each counter has overflowed before we process it. 269 */ 270 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) 271 continue; 272 273 hwc = &event->hw; 274 armpmu_event_update(event); 275 perf_sample_data_init(&data, 0, hwc->last_period); 276 if (!armpmu_event_set_period(event)) 277 continue; 278 279 if (perf_event_overflow(event, &data, regs)) 280 cpu_pmu->disable(event); 281 } 282 283 /* 284 * Handle the pending perf events. 285 * 286 * Note: this call *must* be run with interrupts disabled. For 287 * platforms that can have the PMU interrupts raised as an NMI, this 288 * will not work. 289 */ 290 irq_work_run(); 291 292 return IRQ_HANDLED; 293 } 294 295 static void armv6pmu_start(struct arm_pmu *cpu_pmu) 296 { 297 unsigned long val; 298 299 val = armv6_pmcr_read(); 300 val |= ARMV6_PMCR_ENABLE; 301 armv6_pmcr_write(val); 302 } 303 304 static void armv6pmu_stop(struct arm_pmu *cpu_pmu) 305 { 306 unsigned long val; 307 308 val = armv6_pmcr_read(); 309 val &= ~ARMV6_PMCR_ENABLE; 310 armv6_pmcr_write(val); 311 } 312 313 static int 314 armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, 315 struct perf_event *event) 316 { 317 struct hw_perf_event *hwc = &event->hw; 318 /* Always place a cycle counter into the cycle counter. */ 319 if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { 320 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) 321 return -EAGAIN; 322 323 return ARMV6_CYCLE_COUNTER; 324 } else { 325 /* 326 * For anything other than a cycle counter, try and use 327 * counter0 and counter1. 328 */ 329 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) 330 return ARMV6_COUNTER1; 331 332 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) 333 return ARMV6_COUNTER0; 334 335 /* The counters are all in use. */ 336 return -EAGAIN; 337 } 338 } 339 340 static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc, 341 struct perf_event *event) 342 { 343 clear_bit(event->hw.idx, cpuc->used_mask); 344 } 345 346 static void armv6pmu_disable_event(struct perf_event *event) 347 { 348 unsigned long val, mask, evt; 349 struct hw_perf_event *hwc = &event->hw; 350 int idx = hwc->idx; 351 352 if (ARMV6_CYCLE_COUNTER == idx) { 353 mask = ARMV6_PMCR_CCOUNT_IEN; 354 evt = 0; 355 } else if (ARMV6_COUNTER0 == idx) { 356 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; 357 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; 358 } else if (ARMV6_COUNTER1 == idx) { 359 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; 360 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; 361 } else { 362 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 363 return; 364 } 365 366 /* 367 * Mask out the current event and set the counter to count the number 368 * of ETM bus signal assertion cycles. The external reporting should 369 * be disabled and so this should never increment. 370 */ 371 val = armv6_pmcr_read(); 372 val &= ~mask; 373 val |= evt; 374 armv6_pmcr_write(val); 375 } 376 377 static int armv6_map_event(struct perf_event *event) 378 { 379 return armpmu_map_event(event, &armv6_perf_map, 380 &armv6_perf_cache_map, 0xFF); 381 } 382 383 static void armv6pmu_init(struct arm_pmu *cpu_pmu) 384 { 385 cpu_pmu->handle_irq = armv6pmu_handle_irq; 386 cpu_pmu->enable = armv6pmu_enable_event; 387 cpu_pmu->disable = armv6pmu_disable_event; 388 cpu_pmu->read_counter = armv6pmu_read_counter; 389 cpu_pmu->write_counter = armv6pmu_write_counter; 390 cpu_pmu->get_event_idx = armv6pmu_get_event_idx; 391 cpu_pmu->clear_event_idx = armv6pmu_clear_event_idx; 392 cpu_pmu->start = armv6pmu_start; 393 cpu_pmu->stop = armv6pmu_stop; 394 cpu_pmu->map_event = armv6_map_event; 395 396 bitmap_set(cpu_pmu->cntr_mask, 0, ARMV6_NUM_COUNTERS); 397 } 398 399 static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu) 400 { 401 armv6pmu_init(cpu_pmu); 402 cpu_pmu->name = "armv6_1136"; 403 return 0; 404 } 405 406 static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu) 407 { 408 armv6pmu_init(cpu_pmu); 409 cpu_pmu->name = "armv6_1176"; 410 return 0; 411 } 412 413 static const struct of_device_id armv6_pmu_of_device_ids[] = { 414 {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, 415 {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, 416 { /* sentinel value */ } 417 }; 418 419 static int armv6_pmu_device_probe(struct platform_device *pdev) 420 { 421 return arm_pmu_device_probe(pdev, armv6_pmu_of_device_ids, NULL); 422 } 423 424 static struct platform_driver armv6_pmu_driver = { 425 .driver = { 426 .name = "armv6-pmu", 427 .of_match_table = armv6_pmu_of_device_ids, 428 }, 429 .probe = armv6_pmu_device_probe, 430 }; 431 432 builtin_platform_driver(armv6_pmu_driver); 433