1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support framework for SuperH hardware counters. 4 * 5 * Copyright (C) 2009 Paul Mundt 6 * 7 * Heavily based on the x86 and PowerPC implementations. 8 * 9 * x86: 10 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 11 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 12 * Copyright (C) 2009 Jaswinder Singh Rajput 13 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 14 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra 15 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 16 * 17 * ppc: 18 * Copyright 2008-2009 Paul Mackerras, IBM Corporation. 19 */ 20 #include <linux/kernel.h> 21 #include <linux/init.h> 22 #include <linux/io.h> 23 #include <linux/irq.h> 24 #include <linux/perf_event.h> 25 #include <linux/export.h> 26 #include <asm/processor.h> 27 28 struct cpu_hw_events { 29 struct perf_event *events[MAX_HWEVENTS]; 30 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 31 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 32 }; 33 34 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 35 36 static struct sh_pmu *sh_pmu __read_mostly; 37 38 /* Number of perf_events counting hardware events */ 39 static atomic_t num_events; 40 /* Used to avoid races in calling reserve/release_pmc_hardware */ 41 static DEFINE_MUTEX(pmc_reserve_mutex); 42 43 /* 44 * Stub these out for now, do something more profound later. 45 */ 46 int reserve_pmc_hardware(void) 47 { 48 return 0; 49 } 50 51 void release_pmc_hardware(void) 52 { 53 } 54 55 static inline int sh_pmu_initialized(void) 56 { 57 return !!sh_pmu; 58 } 59 60 const char *perf_pmu_name(void) 61 { 62 if (!sh_pmu) 63 return NULL; 64 65 return sh_pmu->name; 66 } 67 EXPORT_SYMBOL_GPL(perf_pmu_name); 68 69 int perf_num_counters(void) 70 { 71 if (!sh_pmu) 72 return 0; 73 74 return sh_pmu->num_events; 75 } 76 EXPORT_SYMBOL_GPL(perf_num_counters); 77 78 /* 79 * Release the PMU if this is the last perf_event. 80 */ 81 static void hw_perf_event_destroy(struct perf_event *event) 82 { 83 if (!atomic_add_unless(&num_events, -1, 1)) { 84 mutex_lock(&pmc_reserve_mutex); 85 if (atomic_dec_return(&num_events) == 0) 86 release_pmc_hardware(); 87 mutex_unlock(&pmc_reserve_mutex); 88 } 89 } 90 91 static int hw_perf_cache_event(int config, int *evp) 92 { 93 unsigned long type, op, result; 94 int ev; 95 96 if (!sh_pmu->cache_events) 97 return -EINVAL; 98 99 /* unpack config */ 100 type = config & 0xff; 101 op = (config >> 8) & 0xff; 102 result = (config >> 16) & 0xff; 103 104 if (type >= PERF_COUNT_HW_CACHE_MAX || 105 op >= PERF_COUNT_HW_CACHE_OP_MAX || 106 result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 107 return -EINVAL; 108 109 ev = (*sh_pmu->cache_events)[type][op][result]; 110 if (ev == 0) 111 return -EOPNOTSUPP; 112 if (ev == -1) 113 return -EINVAL; 114 *evp = ev; 115 return 0; 116 } 117 118 static int __hw_perf_event_init(struct perf_event *event) 119 { 120 struct perf_event_attr *attr = &event->attr; 121 struct hw_perf_event *hwc = &event->hw; 122 int config = -1; 123 int err; 124 125 if (!sh_pmu_initialized()) 126 return -ENODEV; 127 128 /* 129 * See if we need to reserve the counter. 130 * 131 * If no events are currently in use, then we have to take a 132 * mutex to ensure that we don't race with another task doing 133 * reserve_pmc_hardware or release_pmc_hardware. 134 */ 135 err = 0; 136 if (!atomic_inc_not_zero(&num_events)) { 137 mutex_lock(&pmc_reserve_mutex); 138 if (atomic_read(&num_events) == 0 && 139 reserve_pmc_hardware()) 140 err = -EBUSY; 141 else 142 atomic_inc(&num_events); 143 mutex_unlock(&pmc_reserve_mutex); 144 } 145 146 if (err) 147 return err; 148 149 event->destroy = hw_perf_event_destroy; 150 151 switch (attr->type) { 152 case PERF_TYPE_RAW: 153 config = attr->config & sh_pmu->raw_event_mask; 154 break; 155 case PERF_TYPE_HW_CACHE: 156 err = hw_perf_cache_event(attr->config, &config); 157 if (err) 158 return err; 159 break; 160 case PERF_TYPE_HARDWARE: 161 if (attr->config >= sh_pmu->max_events) 162 return -EINVAL; 163 164 config = sh_pmu->event_map(attr->config); 165 break; 166 } 167 168 if (config == -1) 169 return -EINVAL; 170 171 hwc->config |= config; 172 173 return 0; 174 } 175 176 static void sh_perf_event_update(struct perf_event *event, 177 struct hw_perf_event *hwc, int idx) 178 { 179 u64 prev_raw_count, new_raw_count; 180 s64 delta; 181 int shift = 0; 182 183 /* 184 * Depending on the counter configuration, they may or may not 185 * be chained, in which case the previous counter value can be 186 * updated underneath us if the lower-half overflows. 187 * 188 * Our tactic to handle this is to first atomically read and 189 * exchange a new raw count - then add that new-prev delta 190 * count to the generic counter atomically. 191 * 192 * As there is no interrupt associated with the overflow events, 193 * this is the simplest approach for maintaining consistency. 194 */ 195 again: 196 prev_raw_count = local64_read(&hwc->prev_count); 197 new_raw_count = sh_pmu->read(idx); 198 199 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 200 new_raw_count) != prev_raw_count) 201 goto again; 202 203 /* 204 * Now we have the new raw value and have updated the prev 205 * timestamp already. We can now calculate the elapsed delta 206 * (counter-)time and add that to the generic counter. 207 * 208 * Careful, not all hw sign-extends above the physical width 209 * of the count. 210 */ 211 delta = (new_raw_count << shift) - (prev_raw_count << shift); 212 delta >>= shift; 213 214 local64_add(delta, &event->count); 215 } 216 217 static void sh_pmu_stop(struct perf_event *event, int flags) 218 { 219 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 220 struct hw_perf_event *hwc = &event->hw; 221 int idx = hwc->idx; 222 223 if (!(event->hw.state & PERF_HES_STOPPED)) { 224 sh_pmu->disable(hwc, idx); 225 cpuc->events[idx] = NULL; 226 event->hw.state |= PERF_HES_STOPPED; 227 } 228 229 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { 230 sh_perf_event_update(event, &event->hw, idx); 231 event->hw.state |= PERF_HES_UPTODATE; 232 } 233 } 234 235 static void sh_pmu_start(struct perf_event *event, int flags) 236 { 237 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 238 struct hw_perf_event *hwc = &event->hw; 239 int idx = hwc->idx; 240 241 if (WARN_ON_ONCE(idx == -1)) 242 return; 243 244 if (flags & PERF_EF_RELOAD) 245 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 246 247 cpuc->events[idx] = event; 248 event->hw.state = 0; 249 sh_pmu->enable(hwc, idx); 250 } 251 252 static void sh_pmu_del(struct perf_event *event, int flags) 253 { 254 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 255 256 sh_pmu_stop(event, PERF_EF_UPDATE); 257 __clear_bit(event->hw.idx, cpuc->used_mask); 258 259 perf_event_update_userpage(event); 260 } 261 262 static int sh_pmu_add(struct perf_event *event, int flags) 263 { 264 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 265 struct hw_perf_event *hwc = &event->hw; 266 int idx = hwc->idx; 267 int ret = -EAGAIN; 268 269 perf_pmu_disable(event->pmu); 270 271 if (__test_and_set_bit(idx, cpuc->used_mask)) { 272 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); 273 if (idx == sh_pmu->num_events) 274 goto out; 275 276 __set_bit(idx, cpuc->used_mask); 277 hwc->idx = idx; 278 } 279 280 sh_pmu->disable(hwc, idx); 281 282 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 283 if (flags & PERF_EF_START) 284 sh_pmu_start(event, PERF_EF_RELOAD); 285 286 perf_event_update_userpage(event); 287 ret = 0; 288 out: 289 perf_pmu_enable(event->pmu); 290 return ret; 291 } 292 293 static void sh_pmu_read(struct perf_event *event) 294 { 295 sh_perf_event_update(event, &event->hw, event->hw.idx); 296 } 297 298 static int sh_pmu_event_init(struct perf_event *event) 299 { 300 int err; 301 302 /* does not support taken branch sampling */ 303 if (has_branch_stack(event)) 304 return -EOPNOTSUPP; 305 306 switch (event->attr.type) { 307 case PERF_TYPE_RAW: 308 case PERF_TYPE_HW_CACHE: 309 case PERF_TYPE_HARDWARE: 310 err = __hw_perf_event_init(event); 311 break; 312 313 default: 314 return -ENOENT; 315 } 316 317 if (unlikely(err)) { 318 if (event->destroy) 319 event->destroy(event); 320 } 321 322 return err; 323 } 324 325 static void sh_pmu_enable(struct pmu *pmu) 326 { 327 if (!sh_pmu_initialized()) 328 return; 329 330 sh_pmu->enable_all(); 331 } 332 333 static void sh_pmu_disable(struct pmu *pmu) 334 { 335 if (!sh_pmu_initialized()) 336 return; 337 338 sh_pmu->disable_all(); 339 } 340 341 static struct pmu pmu = { 342 .pmu_enable = sh_pmu_enable, 343 .pmu_disable = sh_pmu_disable, 344 .event_init = sh_pmu_event_init, 345 .add = sh_pmu_add, 346 .del = sh_pmu_del, 347 .start = sh_pmu_start, 348 .stop = sh_pmu_stop, 349 .read = sh_pmu_read, 350 }; 351 352 static int sh_pmu_prepare_cpu(unsigned int cpu) 353 { 354 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 355 356 memset(cpuhw, 0, sizeof(struct cpu_hw_events)); 357 return 0; 358 } 359 360 int register_sh_pmu(struct sh_pmu *_pmu) 361 { 362 if (sh_pmu) 363 return -EBUSY; 364 sh_pmu = _pmu; 365 366 pr_info("Performance Events: %s support registered\n", _pmu->name); 367 368 /* 369 * All of the on-chip counters are "limited", in that they have 370 * no interrupts, and are therefore unable to do sampling without 371 * further work and timer assistance. 372 */ 373 pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 374 375 WARN_ON(_pmu->num_events > MAX_HWEVENTS); 376 377 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 378 cpuhp_setup_state(CPUHP_PERF_SUPERH, "PERF_SUPERH", sh_pmu_prepare_cpu, 379 NULL); 380 return 0; 381 } 382