12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 275b82472SChristophe Leroy /* 375b82472SChristophe Leroy * Performance event support - PPC 8xx 475b82472SChristophe Leroy * 575b82472SChristophe Leroy * Copyright 2016 Christophe Leroy, CS Systemes d'Information 675b82472SChristophe Leroy */ 775b82472SChristophe Leroy 875b82472SChristophe Leroy #include <linux/kernel.h> 975b82472SChristophe Leroy #include <linux/sched.h> 1075b82472SChristophe Leroy #include <linux/perf_event.h> 1175b82472SChristophe Leroy #include <linux/percpu.h> 1275b82472SChristophe Leroy #include <linux/hardirq.h> 1375b82472SChristophe Leroy #include <asm/pmc.h> 1475b82472SChristophe Leroy #include <asm/machdep.h> 1575b82472SChristophe Leroy #include <asm/firmware.h> 1675b82472SChristophe Leroy #include <asm/ptrace.h> 17cd99ddbeSChristophe Leroy #include <asm/code-patching.h> 1875346251SJordan Niethe #include <asm/inst.h> 1975b82472SChristophe Leroy 2075b82472SChristophe Leroy #define PERF_8xx_ID_CPU_CYCLES 1 2175b82472SChristophe Leroy #define PERF_8xx_ID_HW_INSTRUCTIONS 2 2275b82472SChristophe Leroy #define PERF_8xx_ID_ITLB_LOAD_MISS 3 2375b82472SChristophe Leroy #define PERF_8xx_ID_DTLB_LOAD_MISS 4 2475b82472SChristophe Leroy 2575b82472SChristophe Leroy #define C(x) PERF_COUNT_HW_CACHE_##x 2675b82472SChristophe Leroy #define DTLB_LOAD_MISS (C(DTLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16)) 2775b82472SChristophe Leroy #define ITLB_LOAD_MISS (C(ITLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16)) 2875b82472SChristophe Leroy 2975b82472SChristophe Leroy extern unsigned long itlb_miss_counter, dtlb_miss_counter; 3075b82472SChristophe Leroy extern atomic_t instruction_counter; 3175b82472SChristophe Leroy 3275b82472SChristophe Leroy static atomic_t insn_ctr_ref; 33cd99ddbeSChristophe Leroy static atomic_t itlb_miss_ref; 34cd99ddbeSChristophe Leroy static atomic_t dtlb_miss_ref; 3575b82472SChristophe Leroy 3675b82472SChristophe Leroy static s64 get_insn_ctr(void) 3775b82472SChristophe Leroy { 3875b82472SChristophe Leroy int ctr; 3975b82472SChristophe Leroy unsigned long counta; 4075b82472SChristophe Leroy 4175b82472SChristophe Leroy do { 4275b82472SChristophe Leroy ctr = atomic_read(&instruction_counter); 4375b82472SChristophe Leroy counta = mfspr(SPRN_COUNTA); 4475b82472SChristophe Leroy } while (ctr != atomic_read(&instruction_counter)); 4575b82472SChristophe Leroy 4675b82472SChristophe Leroy return ((s64)ctr << 16) | (counta >> 16); 4775b82472SChristophe Leroy } 4875b82472SChristophe Leroy 4975b82472SChristophe Leroy static int event_type(struct perf_event *event) 5075b82472SChristophe Leroy { 5175b82472SChristophe Leroy switch (event->attr.type) { 5275b82472SChristophe Leroy case PERF_TYPE_HARDWARE: 5375b82472SChristophe Leroy if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) 5475b82472SChristophe Leroy return PERF_8xx_ID_CPU_CYCLES; 5575b82472SChristophe Leroy if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) 5675b82472SChristophe Leroy return PERF_8xx_ID_HW_INSTRUCTIONS; 5775b82472SChristophe Leroy break; 5875b82472SChristophe Leroy case PERF_TYPE_HW_CACHE: 5975b82472SChristophe Leroy if (event->attr.config == ITLB_LOAD_MISS) 6075b82472SChristophe Leroy return PERF_8xx_ID_ITLB_LOAD_MISS; 6175b82472SChristophe Leroy if (event->attr.config == DTLB_LOAD_MISS) 6275b82472SChristophe Leroy return PERF_8xx_ID_DTLB_LOAD_MISS; 6375b82472SChristophe Leroy break; 6475b82472SChristophe Leroy case PERF_TYPE_RAW: 6575b82472SChristophe Leroy break; 6675b82472SChristophe Leroy default: 6775b82472SChristophe Leroy return -ENOENT; 6875b82472SChristophe Leroy } 6975b82472SChristophe Leroy return -EOPNOTSUPP; 7075b82472SChristophe Leroy } 7175b82472SChristophe Leroy 7275b82472SChristophe Leroy static int mpc8xx_pmu_event_init(struct perf_event *event) 7375b82472SChristophe Leroy { 7475b82472SChristophe Leroy int type = event_type(event); 7575b82472SChristophe Leroy 7675b82472SChristophe Leroy if (type < 0) 7775b82472SChristophe Leroy return type; 7875b82472SChristophe Leroy return 0; 7975b82472SChristophe Leroy } 8075b82472SChristophe Leroy 8175b82472SChristophe Leroy static int mpc8xx_pmu_add(struct perf_event *event, int flags) 8275b82472SChristophe Leroy { 8375b82472SChristophe Leroy int type = event_type(event); 8475b82472SChristophe Leroy s64 val = 0; 8575b82472SChristophe Leroy 8675b82472SChristophe Leroy if (type < 0) 8775b82472SChristophe Leroy return type; 8875b82472SChristophe Leroy 8975b82472SChristophe Leroy switch (type) { 9075b82472SChristophe Leroy case PERF_8xx_ID_CPU_CYCLES: 9175b82472SChristophe Leroy val = get_tb(); 9275b82472SChristophe Leroy break; 9375b82472SChristophe Leroy case PERF_8xx_ID_HW_INSTRUCTIONS: 9475b82472SChristophe Leroy if (atomic_inc_return(&insn_ctr_ref) == 1) 9575b82472SChristophe Leroy mtspr(SPRN_ICTRL, 0xc0080007); 9675b82472SChristophe Leroy val = get_insn_ctr(); 9775b82472SChristophe Leroy break; 9875b82472SChristophe Leroy case PERF_8xx_ID_ITLB_LOAD_MISS: 99cd99ddbeSChristophe Leroy if (atomic_inc_return(&itlb_miss_ref) == 1) { 100709cf19cSChristophe Leroy unsigned long target = patch_site_addr(&patch__itlbmiss_perf); 101cd99ddbeSChristophe Leroy 102709cf19cSChristophe Leroy patch_branch_site(&patch__itlbmiss_exit_1, target, 0); 103cd99ddbeSChristophe Leroy } 10475b82472SChristophe Leroy val = itlb_miss_counter; 10575b82472SChristophe Leroy break; 10675b82472SChristophe Leroy case PERF_8xx_ID_DTLB_LOAD_MISS: 107cd99ddbeSChristophe Leroy if (atomic_inc_return(&dtlb_miss_ref) == 1) { 108709cf19cSChristophe Leroy unsigned long target = patch_site_addr(&patch__dtlbmiss_perf); 109cd99ddbeSChristophe Leroy 110709cf19cSChristophe Leroy patch_branch_site(&patch__dtlbmiss_exit_1, target, 0); 111cd99ddbeSChristophe Leroy } 11275b82472SChristophe Leroy val = dtlb_miss_counter; 11375b82472SChristophe Leroy break; 11475b82472SChristophe Leroy } 11575b82472SChristophe Leroy local64_set(&event->hw.prev_count, val); 11675b82472SChristophe Leroy return 0; 11775b82472SChristophe Leroy } 11875b82472SChristophe Leroy 11975b82472SChristophe Leroy static void mpc8xx_pmu_read(struct perf_event *event) 12075b82472SChristophe Leroy { 12175b82472SChristophe Leroy int type = event_type(event); 12275b82472SChristophe Leroy s64 prev, val = 0, delta = 0; 12375b82472SChristophe Leroy 12475b82472SChristophe Leroy if (type < 0) 12575b82472SChristophe Leroy return; 12675b82472SChristophe Leroy 12775b82472SChristophe Leroy do { 12875b82472SChristophe Leroy prev = local64_read(&event->hw.prev_count); 12975b82472SChristophe Leroy switch (type) { 13075b82472SChristophe Leroy case PERF_8xx_ID_CPU_CYCLES: 13175b82472SChristophe Leroy val = get_tb(); 13275b82472SChristophe Leroy delta = 16 * (val - prev); 13375b82472SChristophe Leroy break; 13475b82472SChristophe Leroy case PERF_8xx_ID_HW_INSTRUCTIONS: 13575b82472SChristophe Leroy val = get_insn_ctr(); 13675b82472SChristophe Leroy delta = prev - val; 13775b82472SChristophe Leroy if (delta < 0) 13875b82472SChristophe Leroy delta += 0x1000000000000LL; 13975b82472SChristophe Leroy break; 14075b82472SChristophe Leroy case PERF_8xx_ID_ITLB_LOAD_MISS: 14175b82472SChristophe Leroy val = itlb_miss_counter; 14275b82472SChristophe Leroy delta = (s64)((s32)val - (s32)prev); 14375b82472SChristophe Leroy break; 14475b82472SChristophe Leroy case PERF_8xx_ID_DTLB_LOAD_MISS: 14575b82472SChristophe Leroy val = dtlb_miss_counter; 14675b82472SChristophe Leroy delta = (s64)((s32)val - (s32)prev); 14775b82472SChristophe Leroy break; 14875b82472SChristophe Leroy } 14975b82472SChristophe Leroy } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 15075b82472SChristophe Leroy 15175b82472SChristophe Leroy local64_add(delta, &event->count); 15275b82472SChristophe Leroy } 15375b82472SChristophe Leroy 15475b82472SChristophe Leroy static void mpc8xx_pmu_del(struct perf_event *event, int flags) 15575b82472SChristophe Leroy { 156*c545b9f0SChristophe Leroy ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); 15789eecd93SChristophe Leroy 15875b82472SChristophe Leroy mpc8xx_pmu_read(event); 15975b82472SChristophe Leroy 16075b82472SChristophe Leroy /* If it was the last user, stop counting to avoid useles overhead */ 161cd99ddbeSChristophe Leroy switch (event_type(event)) { 162cd99ddbeSChristophe Leroy case PERF_8xx_ID_CPU_CYCLES: 163cd99ddbeSChristophe Leroy break; 164cd99ddbeSChristophe Leroy case PERF_8xx_ID_HW_INSTRUCTIONS: 16575b82472SChristophe Leroy if (atomic_dec_return(&insn_ctr_ref) == 0) 16675b82472SChristophe Leroy mtspr(SPRN_ICTRL, 7); 167cd99ddbeSChristophe Leroy break; 168cd99ddbeSChristophe Leroy case PERF_8xx_ID_ITLB_LOAD_MISS: 16989eecd93SChristophe Leroy if (atomic_dec_return(&itlb_miss_ref) == 0) 170709cf19cSChristophe Leroy patch_instruction_site(&patch__itlbmiss_exit_1, insn); 171cd99ddbeSChristophe Leroy break; 172cd99ddbeSChristophe Leroy case PERF_8xx_ID_DTLB_LOAD_MISS: 17389eecd93SChristophe Leroy if (atomic_dec_return(&dtlb_miss_ref) == 0) 174709cf19cSChristophe Leroy patch_instruction_site(&patch__dtlbmiss_exit_1, insn); 175cd99ddbeSChristophe Leroy break; 176cd99ddbeSChristophe Leroy } 17775b82472SChristophe Leroy } 17875b82472SChristophe Leroy 17975b82472SChristophe Leroy static struct pmu mpc8xx_pmu = { 18075b82472SChristophe Leroy .event_init = mpc8xx_pmu_event_init, 18175b82472SChristophe Leroy .add = mpc8xx_pmu_add, 18275b82472SChristophe Leroy .del = mpc8xx_pmu_del, 18375b82472SChristophe Leroy .read = mpc8xx_pmu_read, 18475b82472SChristophe Leroy .capabilities = PERF_PMU_CAP_NO_INTERRUPT | 18575b82472SChristophe Leroy PERF_PMU_CAP_NO_NMI, 18675b82472SChristophe Leroy }; 18775b82472SChristophe Leroy 18875b82472SChristophe Leroy static int init_mpc8xx_pmu(void) 18975b82472SChristophe Leroy { 19075b82472SChristophe Leroy mtspr(SPRN_ICTRL, 7); 19175b82472SChristophe Leroy mtspr(SPRN_CMPA, 0); 19275b82472SChristophe Leroy mtspr(SPRN_COUNTA, 0xffff); 19375b82472SChristophe Leroy 19475b82472SChristophe Leroy return perf_pmu_register(&mpc8xx_pmu, "cpu", PERF_TYPE_RAW); 19575b82472SChristophe Leroy } 19675b82472SChristophe Leroy 19775b82472SChristophe Leroy early_initcall(init_mpc8xx_pmu); 198