1 /* 2 * Performance event support framework for SuperH hardware counters. 3 * 4 * Copyright (C) 2009 Paul Mundt 5 * 6 * Heavily based on the x86 and PowerPC implementations. 7 * 8 * x86: 9 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 10 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar 11 * Copyright (C) 2009 Jaswinder Singh Rajput 12 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 13 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 14 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 15 * 16 * ppc: 17 * Copyright 2008-2009 Paul Mackerras, IBM Corporation. 18 * 19 * This file is subject to the terms and conditions of the GNU General Public 20 * License. See the file "COPYING" in the main directory of this archive 21 * for more details. 22 */ 23 #include <linux/kernel.h> 24 #include <linux/init.h> 25 #include <linux/io.h> 26 #include <linux/irq.h> 27 #include <linux/perf_event.h> 28 #include <linux/export.h> 29 #include <asm/processor.h> 30 31 struct cpu_hw_events { 32 struct perf_event *events[MAX_HWEVENTS]; 33 unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 34 unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; 35 }; 36 37 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 38 39 static struct sh_pmu *sh_pmu __read_mostly; 40 41 /* Number of perf_events counting hardware events */ 42 static atomic_t num_events; 43 /* Used to avoid races in calling reserve/release_pmc_hardware */ 44 static DEFINE_MUTEX(pmc_reserve_mutex); 45 46 /* 47 * Stub these out for now, do something more profound later. 48 */ 49 int reserve_pmc_hardware(void) 50 { 51 return 0; 52 } 53 54 void release_pmc_hardware(void) 55 { 56 } 57 58 static inline int sh_pmu_initialized(void) 59 { 60 return !!sh_pmu; 61 } 62 63 const char *perf_pmu_name(void) 64 { 65 if (!sh_pmu) 66 return NULL; 67 68 return sh_pmu->name; 69 } 70 EXPORT_SYMBOL_GPL(perf_pmu_name); 71 72 int perf_num_counters(void) 73 { 74 if (!sh_pmu) 75 return 0; 76 77 return sh_pmu->num_events; 78 } 79 EXPORT_SYMBOL_GPL(perf_num_counters); 80 81 /* 82 * Release the PMU if this is the last perf_event. 83 */ 84 static void hw_perf_event_destroy(struct perf_event *event) 85 { 86 if (!atomic_add_unless(&num_events, -1, 1)) { 87 mutex_lock(&pmc_reserve_mutex); 88 if (atomic_dec_return(&num_events) == 0) 89 release_pmc_hardware(); 90 mutex_unlock(&pmc_reserve_mutex); 91 } 92 } 93 94 static int hw_perf_cache_event(int config, int *evp) 95 { 96 unsigned long type, op, result; 97 int ev; 98 99 if (!sh_pmu->cache_events) 100 return -EINVAL; 101 102 /* unpack config */ 103 type = config & 0xff; 104 op = (config >> 8) & 0xff; 105 result = (config >> 16) & 0xff; 106 107 if (type >= PERF_COUNT_HW_CACHE_MAX || 108 op >= PERF_COUNT_HW_CACHE_OP_MAX || 109 result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 110 return -EINVAL; 111 112 ev = (*sh_pmu->cache_events)[type][op][result]; 113 if (ev == 0) 114 return -EOPNOTSUPP; 115 if (ev == -1) 116 return -EINVAL; 117 *evp = ev; 118 return 0; 119 } 120 121 static int __hw_perf_event_init(struct perf_event *event) 122 { 123 struct perf_event_attr *attr = &event->attr; 124 struct hw_perf_event *hwc = &event->hw; 125 int config = -1; 126 int err; 127 128 if (!sh_pmu_initialized()) 129 return -ENODEV; 130 131 /* 132 * All of the on-chip counters are "limited", in that they have 133 * no interrupts, and are therefore unable to do sampling without 134 * further work and timer assistance. 135 */ 136 if (hwc->sample_period) 137 return -EINVAL; 138 139 /* 140 * See if we need to reserve the counter. 141 * 142 * If no events are currently in use, then we have to take a 143 * mutex to ensure that we don't race with another task doing 144 * reserve_pmc_hardware or release_pmc_hardware. 145 */ 146 err = 0; 147 if (!atomic_inc_not_zero(&num_events)) { 148 mutex_lock(&pmc_reserve_mutex); 149 if (atomic_read(&num_events) == 0 && 150 reserve_pmc_hardware()) 151 err = -EBUSY; 152 else 153 atomic_inc(&num_events); 154 mutex_unlock(&pmc_reserve_mutex); 155 } 156 157 if (err) 158 return err; 159 160 event->destroy = hw_perf_event_destroy; 161 162 switch (attr->type) { 163 case PERF_TYPE_RAW: 164 config = attr->config & sh_pmu->raw_event_mask; 165 break; 166 case PERF_TYPE_HW_CACHE: 167 err = hw_perf_cache_event(attr->config, &config); 168 if (err) 169 return err; 170 break; 171 case PERF_TYPE_HARDWARE: 172 if (attr->config >= sh_pmu->max_events) 173 return -EINVAL; 174 175 config = sh_pmu->event_map(attr->config); 176 break; 177 } 178 179 if (config == -1) 180 return -EINVAL; 181 182 hwc->config |= config; 183 184 return 0; 185 } 186 187 static void sh_perf_event_update(struct perf_event *event, 188 struct hw_perf_event *hwc, int idx) 189 { 190 u64 prev_raw_count, new_raw_count; 191 s64 delta; 192 int shift = 0; 193 194 /* 195 * Depending on the counter configuration, they may or may not 196 * be chained, in which case the previous counter value can be 197 * updated underneath us if the lower-half overflows. 198 * 199 * Our tactic to handle this is to first atomically read and 200 * exchange a new raw count - then add that new-prev delta 201 * count to the generic counter atomically. 202 * 203 * As there is no interrupt associated with the overflow events, 204 * this is the simplest approach for maintaining consistency. 205 */ 206 again: 207 prev_raw_count = local64_read(&hwc->prev_count); 208 new_raw_count = sh_pmu->read(idx); 209 210 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 211 new_raw_count) != prev_raw_count) 212 goto again; 213 214 /* 215 * Now we have the new raw value and have updated the prev 216 * timestamp already. We can now calculate the elapsed delta 217 * (counter-)time and add that to the generic counter. 218 * 219 * Careful, not all hw sign-extends above the physical width 220 * of the count. 221 */ 222 delta = (new_raw_count << shift) - (prev_raw_count << shift); 223 delta >>= shift; 224 225 local64_add(delta, &event->count); 226 } 227 228 static void sh_pmu_stop(struct perf_event *event, int flags) 229 { 230 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 231 struct hw_perf_event *hwc = &event->hw; 232 int idx = hwc->idx; 233 234 if (!(event->hw.state & PERF_HES_STOPPED)) { 235 sh_pmu->disable(hwc, idx); 236 cpuc->events[idx] = NULL; 237 event->hw.state |= PERF_HES_STOPPED; 238 } 239 240 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { 241 sh_perf_event_update(event, &event->hw, idx); 242 event->hw.state |= PERF_HES_UPTODATE; 243 } 244 } 245 246 static void sh_pmu_start(struct perf_event *event, int flags) 247 { 248 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 249 struct hw_perf_event *hwc = &event->hw; 250 int idx = hwc->idx; 251 252 if (WARN_ON_ONCE(idx == -1)) 253 return; 254 255 if (flags & PERF_EF_RELOAD) 256 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 257 258 cpuc->events[idx] = event; 259 event->hw.state = 0; 260 sh_pmu->enable(hwc, idx); 261 } 262 263 static void sh_pmu_del(struct perf_event *event, int flags) 264 { 265 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 266 267 sh_pmu_stop(event, PERF_EF_UPDATE); 268 __clear_bit(event->hw.idx, cpuc->used_mask); 269 270 perf_event_update_userpage(event); 271 } 272 273 static int sh_pmu_add(struct perf_event *event, int flags) 274 { 275 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 276 struct hw_perf_event *hwc = &event->hw; 277 int idx = hwc->idx; 278 int ret = -EAGAIN; 279 280 perf_pmu_disable(event->pmu); 281 282 if (__test_and_set_bit(idx, cpuc->used_mask)) { 283 idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); 284 if (idx == sh_pmu->num_events) 285 goto out; 286 287 __set_bit(idx, cpuc->used_mask); 288 hwc->idx = idx; 289 } 290 291 sh_pmu->disable(hwc, idx); 292 293 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 294 if (flags & PERF_EF_START) 295 sh_pmu_start(event, PERF_EF_RELOAD); 296 297 perf_event_update_userpage(event); 298 ret = 0; 299 out: 300 perf_pmu_enable(event->pmu); 301 return ret; 302 } 303 304 static void sh_pmu_read(struct perf_event *event) 305 { 306 sh_perf_event_update(event, &event->hw, event->hw.idx); 307 } 308 309 static int sh_pmu_event_init(struct perf_event *event) 310 { 311 int err; 312 313 /* does not support taken branch sampling */ 314 if (has_branch_stack(event)) 315 return -EOPNOTSUPP; 316 317 switch (event->attr.type) { 318 case PERF_TYPE_RAW: 319 case PERF_TYPE_HW_CACHE: 320 case PERF_TYPE_HARDWARE: 321 err = __hw_perf_event_init(event); 322 break; 323 324 default: 325 return -ENOENT; 326 } 327 328 if (unlikely(err)) { 329 if (event->destroy) 330 event->destroy(event); 331 } 332 333 return err; 334 } 335 336 static void sh_pmu_enable(struct pmu *pmu) 337 { 338 if (!sh_pmu_initialized()) 339 return; 340 341 sh_pmu->enable_all(); 342 } 343 344 static void sh_pmu_disable(struct pmu *pmu) 345 { 346 if (!sh_pmu_initialized()) 347 return; 348 349 sh_pmu->disable_all(); 350 } 351 352 static struct pmu pmu = { 353 .pmu_enable = sh_pmu_enable, 354 .pmu_disable = sh_pmu_disable, 355 .event_init = sh_pmu_event_init, 356 .add = sh_pmu_add, 357 .del = sh_pmu_del, 358 .start = sh_pmu_start, 359 .stop = sh_pmu_stop, 360 .read = sh_pmu_read, 361 }; 362 363 static void sh_pmu_setup(int cpu) 364 { 365 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 366 367 memset(cpuhw, 0, sizeof(struct cpu_hw_events)); 368 } 369 370 static int __cpuinit 371 sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 372 { 373 unsigned int cpu = (long)hcpu; 374 375 switch (action & ~CPU_TASKS_FROZEN) { 376 case CPU_UP_PREPARE: 377 sh_pmu_setup(cpu); 378 break; 379 380 default: 381 break; 382 } 383 384 return NOTIFY_OK; 385 } 386 387 int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) 388 { 389 if (sh_pmu) 390 return -EBUSY; 391 sh_pmu = _pmu; 392 393 pr_info("Performance Events: %s support registered\n", _pmu->name); 394 395 WARN_ON(_pmu->num_events > MAX_HWEVENTS); 396 397 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 398 perf_cpu_notifier(sh_pmu_notifier); 399 return 0; 400 } 401