1 /* 2 * Performance counter support for POWER9 processors. 3 * 4 * Copyright 2009 Paul Mackerras, IBM Corporation. 5 * Copyright 2013 Michael Ellerman, IBM Corporation. 6 * Copyright 2016 Madhavan Srinivasan, IBM Corporation. 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or later version. 12 */ 13 14 #define pr_fmt(fmt) "power9-pmu: " fmt 15 16 #include "isa207-common.h" 17 18 /* 19 * Some power9 event codes. 20 */ 21 #define EVENT(_name, _code) _name = _code, 22 23 enum { 24 #include "power9-events-list.h" 25 }; 26 27 #undef EVENT 28 29 /* MMCRA IFM bits - POWER9 */ 30 #define POWER9_MMCRA_IFM1 0x0000000040000000UL 31 #define POWER9_MMCRA_IFM2 0x0000000080000000UL 32 #define POWER9_MMCRA_IFM3 0x00000000C0000000UL 33 34 GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); 35 GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); 36 GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); 37 GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); 38 GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_CMPL); 39 GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); 40 GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); 41 GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN); 42 43 CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN); 44 CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); 45 CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF); 46 CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); 47 CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); 48 CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); 49 CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE); 50 CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); 51 CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); 52 CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); 53 CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); 54 CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); 55 CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); 56 CACHE_EVENT_ATTR(branch-loads, PM_BRU_CMPL); 57 CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); 58 CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); 59 60 static struct attribute *power9_events_attr[] = { 61 GENERIC_EVENT_PTR(PM_CYC), 62 GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC), 63 GENERIC_EVENT_PTR(PM_CMPLU_STALL), 64 GENERIC_EVENT_PTR(PM_INST_CMPL), 65 GENERIC_EVENT_PTR(PM_BRU_CMPL), 66 GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), 67 GENERIC_EVENT_PTR(PM_LD_REF_L1), 68 GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN), 69 CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN), 70 CACHE_EVENT_PTR(PM_LD_REF_L1), 71 CACHE_EVENT_PTR(PM_L1_PREF), 72 CACHE_EVENT_PTR(PM_ST_MISS_L1), 73 CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), 74 CACHE_EVENT_PTR(PM_INST_FROM_L1), 75 CACHE_EVENT_PTR(PM_IC_PREF_WRITE), 76 CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), 77 CACHE_EVENT_PTR(PM_DATA_FROM_L3), 78 CACHE_EVENT_PTR(PM_L3_PREF_ALL), 79 CACHE_EVENT_PTR(PM_L2_ST_MISS), 80 CACHE_EVENT_PTR(PM_L2_ST), 81 CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), 82 CACHE_EVENT_PTR(PM_BRU_CMPL), 83 CACHE_EVENT_PTR(PM_DTLB_MISS), 84 CACHE_EVENT_PTR(PM_ITLB_MISS), 85 NULL 86 }; 87 88 static struct attribute_group power9_pmu_events_group = { 89 .name = "events", 90 .attrs = power9_events_attr, 91 }; 92 93 PMU_FORMAT_ATTR(event, "config:0-49"); 94 PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); 95 PMU_FORMAT_ATTR(mark, "config:8"); 96 PMU_FORMAT_ATTR(combine, "config:11"); 97 PMU_FORMAT_ATTR(unit, "config:12-15"); 98 PMU_FORMAT_ATTR(pmc, "config:16-19"); 99 PMU_FORMAT_ATTR(cache_sel, "config:20-23"); 100 PMU_FORMAT_ATTR(sample_mode, "config:24-28"); 101 PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); 102 PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); 103 PMU_FORMAT_ATTR(thresh_start, "config:36-39"); 104 PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); 105 106 static struct attribute *power9_pmu_format_attr[] = { 107 &format_attr_event.attr, 108 &format_attr_pmcxsel.attr, 109 &format_attr_mark.attr, 110 &format_attr_combine.attr, 111 &format_attr_unit.attr, 112 &format_attr_pmc.attr, 113 &format_attr_cache_sel.attr, 114 &format_attr_sample_mode.attr, 115 &format_attr_thresh_sel.attr, 116 &format_attr_thresh_stop.attr, 117 &format_attr_thresh_start.attr, 118 &format_attr_thresh_cmp.attr, 119 NULL, 120 }; 121 122 struct attribute_group power9_pmu_format_group = { 123 .name = "format", 124 .attrs = power9_pmu_format_attr, 125 }; 126 127 static const struct attribute_group *power9_pmu_attr_groups[] = { 128 &power9_pmu_format_group, 129 &power9_pmu_events_group, 130 NULL, 131 }; 132 133 static int power9_generic_events[] = { 134 [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, 135 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC, 136 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, 137 [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, 138 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL, 139 [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, 140 [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, 141 [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN, 142 }; 143 144 static u64 power9_bhrb_filter_map(u64 branch_sample_type) 145 { 146 u64 pmu_bhrb_filter = 0; 147 148 /* BHRB and regular PMU events share the same privilege state 149 * filter configuration. BHRB is always recorded along with a 150 * regular PMU event. As the privilege state filter is handled 151 * in the basic PMC configuration of the accompanying regular 152 * PMU event, we ignore any separate BHRB specific request. 153 */ 154 155 /* No branch filter requested */ 156 if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) 157 return pmu_bhrb_filter; 158 159 /* Invalid branch filter options - HW does not support */ 160 if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) 161 return -1; 162 163 if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) 164 return -1; 165 166 if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) 167 return -1; 168 169 if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { 170 pmu_bhrb_filter |= POWER9_MMCRA_IFM1; 171 return pmu_bhrb_filter; 172 } 173 174 /* Every thing else is unsupported */ 175 return -1; 176 } 177 178 static void power9_config_bhrb(u64 pmu_bhrb_filter) 179 { 180 /* Enable BHRB filter in PMU */ 181 mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); 182 } 183 184 #define C(x) PERF_COUNT_HW_CACHE_##x 185 186 /* 187 * Table of generalized cache-related events. 188 * 0 means not supported, -1 means nonsensical, other values 189 * are event codes. 190 */ 191 static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 192 [ C(L1D) ] = { 193 [ C(OP_READ) ] = { 194 [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, 195 [ C(RESULT_MISS) ] = PM_LD_MISS_L1_FIN, 196 }, 197 [ C(OP_WRITE) ] = { 198 [ C(RESULT_ACCESS) ] = 0, 199 [ C(RESULT_MISS) ] = PM_ST_MISS_L1, 200 }, 201 [ C(OP_PREFETCH) ] = { 202 [ C(RESULT_ACCESS) ] = PM_L1_PREF, 203 [ C(RESULT_MISS) ] = 0, 204 }, 205 }, 206 [ C(L1I) ] = { 207 [ C(OP_READ) ] = { 208 [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1, 209 [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS, 210 }, 211 [ C(OP_WRITE) ] = { 212 [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE, 213 [ C(RESULT_MISS) ] = -1, 214 }, 215 [ C(OP_PREFETCH) ] = { 216 [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE, 217 [ C(RESULT_MISS) ] = 0, 218 }, 219 }, 220 [ C(LL) ] = { 221 [ C(OP_READ) ] = { 222 [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3, 223 [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS, 224 }, 225 [ C(OP_WRITE) ] = { 226 [ C(RESULT_ACCESS) ] = PM_L2_ST, 227 [ C(RESULT_MISS) ] = PM_L2_ST_MISS, 228 }, 229 [ C(OP_PREFETCH) ] = { 230 [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL, 231 [ C(RESULT_MISS) ] = 0, 232 }, 233 }, 234 [ C(DTLB) ] = { 235 [ C(OP_READ) ] = { 236 [ C(RESULT_ACCESS) ] = 0, 237 [ C(RESULT_MISS) ] = PM_DTLB_MISS, 238 }, 239 [ C(OP_WRITE) ] = { 240 [ C(RESULT_ACCESS) ] = -1, 241 [ C(RESULT_MISS) ] = -1, 242 }, 243 [ C(OP_PREFETCH) ] = { 244 [ C(RESULT_ACCESS) ] = -1, 245 [ C(RESULT_MISS) ] = -1, 246 }, 247 }, 248 [ C(ITLB) ] = { 249 [ C(OP_READ) ] = { 250 [ C(RESULT_ACCESS) ] = 0, 251 [ C(RESULT_MISS) ] = PM_ITLB_MISS, 252 }, 253 [ C(OP_WRITE) ] = { 254 [ C(RESULT_ACCESS) ] = -1, 255 [ C(RESULT_MISS) ] = -1, 256 }, 257 [ C(OP_PREFETCH) ] = { 258 [ C(RESULT_ACCESS) ] = -1, 259 [ C(RESULT_MISS) ] = -1, 260 }, 261 }, 262 [ C(BPU) ] = { 263 [ C(OP_READ) ] = { 264 [ C(RESULT_ACCESS) ] = PM_BRU_CMPL, 265 [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL, 266 }, 267 [ C(OP_WRITE) ] = { 268 [ C(RESULT_ACCESS) ] = -1, 269 [ C(RESULT_MISS) ] = -1, 270 }, 271 [ C(OP_PREFETCH) ] = { 272 [ C(RESULT_ACCESS) ] = -1, 273 [ C(RESULT_MISS) ] = -1, 274 }, 275 }, 276 [ C(NODE) ] = { 277 [ C(OP_READ) ] = { 278 [ C(RESULT_ACCESS) ] = -1, 279 [ C(RESULT_MISS) ] = -1, 280 }, 281 [ C(OP_WRITE) ] = { 282 [ C(RESULT_ACCESS) ] = -1, 283 [ C(RESULT_MISS) ] = -1, 284 }, 285 [ C(OP_PREFETCH) ] = { 286 [ C(RESULT_ACCESS) ] = -1, 287 [ C(RESULT_MISS) ] = -1, 288 }, 289 }, 290 }; 291 292 #undef C 293 294 static struct power_pmu power9_pmu = { 295 .name = "POWER9", 296 .n_counter = MAX_PMU_COUNTERS, 297 .add_fields = ISA207_ADD_FIELDS, 298 .test_adder = ISA207_TEST_ADDER, 299 .compute_mmcr = isa207_compute_mmcr, 300 .config_bhrb = power9_config_bhrb, 301 .bhrb_filter_map = power9_bhrb_filter_map, 302 .get_constraint = isa207_get_constraint, 303 .disable_pmc = isa207_disable_pmc, 304 .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, 305 .n_generic = ARRAY_SIZE(power9_generic_events), 306 .generic_events = power9_generic_events, 307 .cache_events = &power9_cache_events, 308 .attr_groups = power9_pmu_attr_groups, 309 .bhrb_nr = 32, 310 }; 311 312 static int __init init_power9_pmu(void) 313 { 314 int rc; 315 316 /* Comes from cpu_specs[] */ 317 if (!cur_cpu_spec->oprofile_cpu_type || 318 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) 319 return -ENODEV; 320 321 rc = register_power_pmu(&power9_pmu); 322 if (rc) 323 return rc; 324 325 /* Tell userspace that EBB is supported */ 326 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; 327 328 return 0; 329 } 330 early_initcall(init_power9_pmu); 331