1 /* 2 * Linux performance counter support for ARC 3 * 4 * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com) 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 */ 11 12 #ifndef __ASM_PERF_EVENT_H 13 #define __ASM_PERF_EVENT_H 14 15 /* real maximum varies per CPU, this is the maximum supported by the driver */ 16 #define ARC_PMU_MAX_HWEVENTS 64 17 18 #define ARC_REG_CC_BUILD 0xF6 19 #define ARC_REG_CC_INDEX 0x240 20 #define ARC_REG_CC_NAME0 0x241 21 #define ARC_REG_CC_NAME1 0x242 22 23 #define ARC_REG_PCT_BUILD 0xF5 24 #define ARC_REG_PCT_COUNTL 0x250 25 #define ARC_REG_PCT_COUNTH 0x251 26 #define ARC_REG_PCT_SNAPL 0x252 27 #define ARC_REG_PCT_SNAPH 0x253 28 #define ARC_REG_PCT_CONFIG 0x254 29 #define ARC_REG_PCT_CONTROL 0x255 30 #define ARC_REG_PCT_INDEX 0x256 31 32 #define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */ 33 #define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */ 34 35 struct arc_reg_pct_build { 36 #ifdef CONFIG_CPU_BIG_ENDIAN 37 unsigned int m:8, c:8, r:6, s:2, v:8; 38 #else 39 unsigned int v:8, s:2, r:6, c:8, m:8; 40 #endif 41 }; 42 43 struct arc_reg_cc_build { 44 #ifdef CONFIG_CPU_BIG_ENDIAN 45 unsigned int c:16, r:8, v:8; 46 #else 47 unsigned int v:8, r:8, c:16; 48 #endif 49 }; 50 51 #define PERF_COUNT_ARC_DCLM (PERF_COUNT_HW_MAX + 0) 52 #define PERF_COUNT_ARC_DCSM (PERF_COUNT_HW_MAX + 1) 53 #define PERF_COUNT_ARC_ICM (PERF_COUNT_HW_MAX + 2) 54 #define PERF_COUNT_ARC_BPOK (PERF_COUNT_HW_MAX + 3) 55 #define PERF_COUNT_ARC_EDTLB (PERF_COUNT_HW_MAX + 4) 56 #define PERF_COUNT_ARC_EITLB (PERF_COUNT_HW_MAX + 5) 57 #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) 58 59 /* 60 * The "generalized" performance events seem to really be a copy 61 * of the available events on x86 processors; the mapping to ARC 62 * events is not always possible 1-to-1. Fortunately, there doesn't 63 * seem to be an exact definition for these events, so we can cheat 64 * a bit where necessary. 65 * 66 * In particular, the following PERF events may behave a bit differently 67 * compared to other architectures: 68 * 69 * PERF_COUNT_HW_CPU_CYCLES 70 * Cycles not in halted state 71 * 72 * PERF_COUNT_HW_REF_CPU_CYCLES 73 * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES 74 * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) 75 * 76 * PERF_COUNT_HW_BUS_CYCLES 77 * Unclear what this means, Intel uses 0x013c, which according to 78 * their datasheet means "unhalted reference cycles". It sounds similar 79 * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. 80 * 81 * PERF_COUNT_HW_STALLED_CYCLES_BACKEND 82 * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND 83 * The ARC 700 can either measure stalls per pipeline stage, or all stalls 84 * combined; for now we assign all stalls to STALLED_CYCLES_BACKEND 85 * and all pipeline flushes (e.g. caused by mispredicts, etc.) to 86 * STALLED_CYCLES_FRONTEND. 87 * 88 * We could start multiple performance counters and combine everything 89 * afterwards, but that makes it complicated. 90 * 91 * Note that I$ cache misses aren't counted by either of the two! 92 */ 93 94 static const char * const arc_pmu_ev_hw_map[] = { 95 [PERF_COUNT_HW_CPU_CYCLES] = "crun", 96 [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", 97 [PERF_COUNT_HW_BUS_CYCLES] = "crun", 98 [PERF_COUNT_HW_INSTRUCTIONS] = "iall", 99 [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", 100 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", 101 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", 102 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", 103 [PERF_COUNT_ARC_DCLM] = "dclm", 104 [PERF_COUNT_ARC_DCSM] = "dcsm", 105 [PERF_COUNT_ARC_ICM] = "icm", 106 [PERF_COUNT_ARC_BPOK] = "bpok", 107 [PERF_COUNT_ARC_EDTLB] = "edtlb", 108 [PERF_COUNT_ARC_EITLB] = "eitlb", 109 }; 110 111 #define C(_x) PERF_COUNT_HW_CACHE_##_x 112 #define CACHE_OP_UNSUPPORTED 0xffff 113 114 static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 115 [C(L1D)] = { 116 [C(OP_READ)] = { 117 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 118 [C(RESULT_MISS)] = PERF_COUNT_ARC_DCLM, 119 }, 120 [C(OP_WRITE)] = { 121 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 122 [C(RESULT_MISS)] = PERF_COUNT_ARC_DCSM, 123 }, 124 [C(OP_PREFETCH)] = { 125 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 126 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 127 }, 128 }, 129 [C(L1I)] = { 130 [C(OP_READ)] = { 131 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 132 [C(RESULT_MISS)] = PERF_COUNT_ARC_ICM, 133 }, 134 [C(OP_WRITE)] = { 135 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 136 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 137 }, 138 [C(OP_PREFETCH)] = { 139 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 140 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 141 }, 142 }, 143 [C(LL)] = { 144 [C(OP_READ)] = { 145 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 146 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 147 }, 148 [C(OP_WRITE)] = { 149 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 150 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 151 }, 152 [C(OP_PREFETCH)] = { 153 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 154 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 155 }, 156 }, 157 [C(DTLB)] = { 158 [C(OP_READ)] = { 159 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 160 [C(RESULT_MISS)] = PERF_COUNT_ARC_EDTLB, 161 }, 162 [C(OP_WRITE)] = { 163 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 164 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 165 }, 166 [C(OP_PREFETCH)] = { 167 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 168 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 169 }, 170 }, 171 [C(ITLB)] = { 172 [C(OP_READ)] = { 173 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 174 [C(RESULT_MISS)] = PERF_COUNT_ARC_EITLB, 175 }, 176 [C(OP_WRITE)] = { 177 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 178 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 179 }, 180 [C(OP_PREFETCH)] = { 181 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 182 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 183 }, 184 }, 185 [C(BPU)] = { 186 [C(OP_READ)] = { 187 [C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 188 [C(RESULT_MISS)] = PERF_COUNT_HW_BRANCH_MISSES, 189 }, 190 [C(OP_WRITE)] = { 191 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 192 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 193 }, 194 [C(OP_PREFETCH)] = { 195 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 196 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 197 }, 198 }, 199 [C(NODE)] = { 200 [C(OP_READ)] = { 201 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 202 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 203 }, 204 [C(OP_WRITE)] = { 205 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 206 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 207 }, 208 [C(OP_PREFETCH)] = { 209 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 210 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 211 }, 212 }, 213 }; 214 215 #endif /* __ASM_PERF_EVENT_H */ 216