1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Performance event support for s390x - CPU-measurement Counter Facility 4 * 5 * Copyright IBM Corp. 2012, 2023 6 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> 7 * Thomas Richter <tmricht@linux.ibm.com> 8 */ 9 #define pr_fmt(fmt) "cpum_cf: " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/kernel_stat.h> 13 #include <linux/percpu.h> 14 #include <linux/notifier.h> 15 #include <linux/init.h> 16 #include <linux/miscdevice.h> 17 #include <linux/perf_event.h> 18 19 #include <asm/cpu_mf.h> 20 #include <asm/hwctrset.h> 21 #include <asm/debug.h> 22 23 /* Perf PMU definitions for the counter facility */ 24 #define PERF_CPUM_CF_MAX_CTR 0xffffUL /* Max ctr for ECCTR */ 25 #define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */ 26 27 enum cpumf_ctr_set { 28 CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ 29 CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ 30 CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ 31 CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ 32 CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ 33 34 /* Maximum number of counter sets */ 35 CPUMF_CTR_SET_MAX, 36 }; 37 38 #define CPUMF_LCCTL_ENABLE_SHIFT 16 39 #define CPUMF_LCCTL_ACTCTL_SHIFT 0 40 41 static inline void ctr_set_enable(u64 *state, u64 ctrsets) 42 { 43 *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; 44 } 45 46 static inline void ctr_set_disable(u64 *state, u64 ctrsets) 47 { 48 *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); 49 } 50 51 static inline void ctr_set_start(u64 *state, u64 ctrsets) 52 { 53 *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; 54 } 55 56 static inline void ctr_set_stop(u64 *state, u64 ctrsets) 57 { 58 *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); 59 } 60 61 static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) 62 { 63 switch (set) { 64 case CPUMF_CTR_SET_BASIC: 65 return stcctm(BASIC, range, dest); 66 case CPUMF_CTR_SET_USER: 67 return stcctm(PROBLEM_STATE, range, dest); 68 case CPUMF_CTR_SET_CRYPTO: 69 return stcctm(CRYPTO_ACTIVITY, range, dest); 70 case CPUMF_CTR_SET_EXT: 71 return stcctm(EXTENDED, range, dest); 72 case CPUMF_CTR_SET_MT_DIAG: 73 return stcctm(MT_DIAG_CLEARING, range, dest); 74 case CPUMF_CTR_SET_MAX: 75 return 3; 76 } 77 return 3; 78 } 79 80 struct cpu_cf_events { 81 refcount_t refcnt; /* Reference count */ 82 atomic_t ctr_set[CPUMF_CTR_SET_MAX]; 83 u64 state; /* For perf_event_open SVC */ 84 u64 dev_state; /* For /dev/hwctr */ 85 unsigned int flags; 86 size_t used; /* Bytes used in data */ 87 size_t usedss; /* Bytes used in start/stop */ 88 unsigned char start[PAGE_SIZE]; /* Counter set at event add */ 89 unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ 90 unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ 91 unsigned int sets; /* # Counter set saved in memory */ 92 }; 93 94 static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ 95 static debug_info_t *cf_dbg; 96 97 /* 98 * The CPU Measurement query counter information instruction contains 99 * information which varies per machine generation, but is constant and 100 * does not change when running on a particular machine, such as counter 101 * first and second version number. This is needed to determine the size 102 * of counter sets. Extract this information at device driver initialization. 103 */ 104 static struct cpumf_ctr_info cpumf_ctr_info; 105 106 struct cpu_cf_ptr { 107 struct cpu_cf_events *cpucf; 108 }; 109 110 static struct cpu_cf_root { /* Anchor to per CPU data */ 111 refcount_t refcnt; /* Overall active events */ 112 struct cpu_cf_ptr __percpu *cfptr; 113 } cpu_cf_root; 114 115 /* 116 * Serialize event initialization and event removal. Both are called from 117 * user space in task context with perf_event_open() and close() 118 * system calls. 119 * 120 * This mutex serializes functions cpum_cf_alloc_cpu() called at event 121 * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu() 122 * called at event removal via call back function hw_perf_event_destroy() 123 * when the event is deleted. They are serialized to enforce correct 124 * bookkeeping of pointer and reference counts anchored by 125 * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the 126 * per CPU pointers stored in cpu_cf_root::cfptr. 127 */ 128 static DEFINE_MUTEX(pmc_reserve_mutex); 129 130 /* 131 * Get pointer to per-cpu structure. 132 * 133 * Function get_cpu_cfhw() is called from 134 * - cfset_copy_all(): This function is protected by cpus_read_lock(), so 135 * CPU hot plug remove can not happen. Event removal requires a close() 136 * first. 137 * 138 * Function this_cpu_cfhw() is called from perf common code functions: 139 * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}(): 140 * All functions execute with interrupts disabled on that particular CPU. 141 * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all(). 142 * 143 * Therefore it is safe to access the CPU specific pointer to the event. 144 */ 145 static struct cpu_cf_events *get_cpu_cfhw(int cpu) 146 { 147 struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr; 148 149 if (p) { 150 struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu); 151 152 return q->cpucf; 153 } 154 return NULL; 155 } 156 157 static struct cpu_cf_events *this_cpu_cfhw(void) 158 { 159 return get_cpu_cfhw(smp_processor_id()); 160 } 161 162 /* Disable counter sets on dedicated CPU */ 163 static void cpum_cf_reset_cpu(void *flags) 164 { 165 lcctl(0); 166 } 167 168 /* Free per CPU data when the last event is removed. */ 169 static void cpum_cf_free_root(void) 170 { 171 if (!refcount_dec_and_test(&cpu_cf_root.refcnt)) 172 return; 173 free_percpu(cpu_cf_root.cfptr); 174 cpu_cf_root.cfptr = NULL; 175 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); 176 on_each_cpu(cpum_cf_reset_cpu, NULL, 1); 177 debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n", 178 __func__, refcount_read(&cpu_cf_root.refcnt), 179 !cpu_cf_root.cfptr); 180 } 181 182 /* 183 * On initialization of first event also allocate per CPU data dynamically. 184 * Start with an array of pointers, the array size is the maximum number of 185 * CPUs possible, which might be larger than the number of CPUs currently 186 * online. 187 */ 188 static int cpum_cf_alloc_root(void) 189 { 190 int rc = 0; 191 192 if (refcount_inc_not_zero(&cpu_cf_root.refcnt)) 193 return rc; 194 195 /* The memory is already zeroed. */ 196 cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr); 197 if (cpu_cf_root.cfptr) { 198 refcount_set(&cpu_cf_root.refcnt, 1); 199 on_each_cpu(cpum_cf_reset_cpu, NULL, 1); 200 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); 201 } else { 202 rc = -ENOMEM; 203 } 204 205 return rc; 206 } 207 208 /* Free CPU counter data structure for a PMU */ 209 static void cpum_cf_free_cpu(int cpu) 210 { 211 struct cpu_cf_events *cpuhw; 212 struct cpu_cf_ptr *p; 213 214 mutex_lock(&pmc_reserve_mutex); 215 /* 216 * When invoked via CPU hotplug handler, there might be no events 217 * installed or that particular CPU might not have an 218 * event installed. This anchor pointer can be NULL! 219 */ 220 if (!cpu_cf_root.cfptr) 221 goto out; 222 p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); 223 cpuhw = p->cpucf; 224 /* 225 * Might be zero when called from CPU hotplug handler and no event 226 * installed on that CPU, but on different CPUs. 227 */ 228 if (!cpuhw) 229 goto out; 230 231 if (refcount_dec_and_test(&cpuhw->refcnt)) { 232 kfree(cpuhw); 233 p->cpucf = NULL; 234 } 235 cpum_cf_free_root(); 236 out: 237 mutex_unlock(&pmc_reserve_mutex); 238 } 239 240 /* Allocate CPU counter data structure for a PMU. Called under mutex lock. */ 241 static int cpum_cf_alloc_cpu(int cpu) 242 { 243 struct cpu_cf_events *cpuhw; 244 struct cpu_cf_ptr *p; 245 int rc; 246 247 mutex_lock(&pmc_reserve_mutex); 248 rc = cpum_cf_alloc_root(); 249 if (rc) 250 goto unlock; 251 p = per_cpu_ptr(cpu_cf_root.cfptr, cpu); 252 cpuhw = p->cpucf; 253 254 if (!cpuhw) { 255 cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL); 256 if (cpuhw) { 257 p->cpucf = cpuhw; 258 refcount_set(&cpuhw->refcnt, 1); 259 } else { 260 rc = -ENOMEM; 261 } 262 } else { 263 refcount_inc(&cpuhw->refcnt); 264 } 265 if (rc) { 266 /* 267 * Error in allocation of event, decrement anchor. Since 268 * cpu_cf_event in not created, its destroy() function is not 269 * invoked. Adjust the reference counter for the anchor. 270 */ 271 cpum_cf_free_root(); 272 } 273 unlock: 274 mutex_unlock(&pmc_reserve_mutex); 275 return rc; 276 } 277 278 /* 279 * Create/delete per CPU data structures for /dev/hwctr interface and events 280 * created by perf_event_open(). 281 * If cpu is -1, track task on all available CPUs. This requires 282 * allocation of hardware data structures for all CPUs. This setup handles 283 * perf_event_open() with task context and /dev/hwctr interface. 284 * If cpu is non-zero install event on this CPU only. This setup handles 285 * perf_event_open() with CPU context. 286 */ 287 static int cpum_cf_alloc(int cpu) 288 { 289 cpumask_var_t mask; 290 int rc; 291 292 if (cpu == -1) { 293 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 294 return -ENOMEM; 295 for_each_online_cpu(cpu) { 296 rc = cpum_cf_alloc_cpu(cpu); 297 if (rc) { 298 for_each_cpu(cpu, mask) 299 cpum_cf_free_cpu(cpu); 300 break; 301 } 302 cpumask_set_cpu(cpu, mask); 303 } 304 free_cpumask_var(mask); 305 } else { 306 rc = cpum_cf_alloc_cpu(cpu); 307 } 308 return rc; 309 } 310 311 static void cpum_cf_free(int cpu) 312 { 313 if (cpu == -1) { 314 for_each_online_cpu(cpu) 315 cpum_cf_free_cpu(cpu); 316 } else { 317 cpum_cf_free_cpu(cpu); 318 } 319 } 320 321 #define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ 322 /* interval in seconds */ 323 324 /* Counter sets are stored as data stream in a page sized memory buffer and 325 * exported to user space via raw data attached to the event sample data. 326 * Each counter set starts with an eight byte header consisting of: 327 * - a two byte eye catcher (0xfeef) 328 * - a one byte counter set number 329 * - a two byte counter set size (indicates the number of counters in this set) 330 * - a three byte reserved value (must be zero) to make the header the same 331 * size as a counter value. 332 * All counter values are eight byte in size. 333 * 334 * All counter sets are followed by a 64 byte trailer. 335 * The trailer consists of a: 336 * - flag field indicating valid fields when corresponding bit set 337 * - the counter facility first and second version number 338 * - the CPU speed if nonzero 339 * - the time stamp the counter sets have been collected 340 * - the time of day (TOD) base value 341 * - the machine type. 342 * 343 * The counter sets are saved when the process is prepared to be executed on a 344 * CPU and saved again when the process is going to be removed from a CPU. 345 * The difference of both counter sets are calculated and stored in the event 346 * sample data area. 347 */ 348 struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ 349 unsigned int def:16; /* 0-15 Data Entry Format */ 350 unsigned int set:16; /* 16-31 Counter set identifier */ 351 unsigned int ctr:16; /* 32-47 Number of stored counters */ 352 unsigned int res1:16; /* 48-63 Reserved */ 353 }; 354 355 struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ 356 /* 0 - 7 */ 357 union { 358 struct { 359 unsigned int clock_base:1; /* TOD clock base set */ 360 unsigned int speed:1; /* CPU speed set */ 361 /* Measurement alerts */ 362 unsigned int mtda:1; /* Loss of MT ctr. data alert */ 363 unsigned int caca:1; /* Counter auth. change alert */ 364 unsigned int lcda:1; /* Loss of counter data alert */ 365 }; 366 unsigned long flags; /* 0-63 All indicators */ 367 }; 368 /* 8 - 15 */ 369 unsigned int cfvn:16; /* 64-79 Ctr First Version */ 370 unsigned int csvn:16; /* 80-95 Ctr Second Version */ 371 unsigned int cpu_speed:32; /* 96-127 CPU speed */ 372 /* 16 - 23 */ 373 unsigned long timestamp; /* 128-191 Timestamp (TOD) */ 374 /* 24 - 55 */ 375 union { 376 struct { 377 unsigned long progusage1; 378 unsigned long progusage2; 379 unsigned long progusage3; 380 unsigned long tod_base; 381 }; 382 unsigned long progusage[4]; 383 }; 384 /* 56 - 63 */ 385 unsigned int mach_type:16; /* Machine type */ 386 unsigned int res1:16; /* Reserved */ 387 unsigned int res2:32; /* Reserved */ 388 }; 389 390 /* Create the trailer data at the end of a page. */ 391 static void cfdiag_trailer(struct cf_trailer_entry *te) 392 { 393 struct cpuid cpuid; 394 395 te->cfvn = cpumf_ctr_info.cfvn; /* Counter version numbers */ 396 te->csvn = cpumf_ctr_info.csvn; 397 398 get_cpu_id(&cpuid); /* Machine type */ 399 te->mach_type = cpuid.machine; 400 te->cpu_speed = cfdiag_cpu_speed; 401 if (te->cpu_speed) 402 te->speed = 1; 403 te->clock_base = 1; /* Save clock base */ 404 te->tod_base = tod_clock_base.tod; 405 te->timestamp = get_tod_clock_fast(); 406 } 407 408 /* 409 * The number of counters per counter set varies between machine generations, 410 * but is constant when running on a particular machine generation. 411 * Determine each counter set size at device driver initialization and 412 * retrieve it later. 413 */ 414 static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX]; 415 static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) 416 { 417 size_t ctrset_size = 0; 418 419 switch (ctrset) { 420 case CPUMF_CTR_SET_BASIC: 421 if (cpumf_ctr_info.cfvn >= 1) 422 ctrset_size = 6; 423 break; 424 case CPUMF_CTR_SET_USER: 425 if (cpumf_ctr_info.cfvn == 1) 426 ctrset_size = 6; 427 else if (cpumf_ctr_info.cfvn >= 3) 428 ctrset_size = 2; 429 break; 430 case CPUMF_CTR_SET_CRYPTO: 431 if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5) 432 ctrset_size = 16; 433 else if (cpumf_ctr_info.csvn >= 6) 434 ctrset_size = 20; 435 break; 436 case CPUMF_CTR_SET_EXT: 437 if (cpumf_ctr_info.csvn == 1) 438 ctrset_size = 32; 439 else if (cpumf_ctr_info.csvn == 2) 440 ctrset_size = 48; 441 else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) 442 ctrset_size = 128; 443 else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) 444 ctrset_size = 160; 445 break; 446 case CPUMF_CTR_SET_MT_DIAG: 447 if (cpumf_ctr_info.csvn > 3) 448 ctrset_size = 48; 449 break; 450 case CPUMF_CTR_SET_MAX: 451 break; 452 } 453 cpumf_ctr_setsizes[ctrset] = ctrset_size; 454 } 455 456 /* 457 * Return the maximum possible counter set size (in number of 8 byte counters) 458 * depending on type and model number. 459 */ 460 static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset) 461 { 462 return cpumf_ctr_setsizes[ctrset]; 463 } 464 465 /* Read a counter set. The counter set number determines the counter set and 466 * the CPUM-CF first and second version number determine the number of 467 * available counters in each counter set. 468 * Each counter set starts with header containing the counter set number and 469 * the number of eight byte counters. 470 * 471 * The functions returns the number of bytes occupied by this counter set 472 * including the header. 473 * If there is no counter in the counter set, this counter set is useless and 474 * zero is returned on this case. 475 * 476 * Note that the counter sets may not be enabled or active and the stcctm 477 * instruction might return error 3. Depending on error_ok value this is ok, 478 * for example when called from cpumf_pmu_start() call back function. 479 */ 480 static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, 481 size_t room, bool error_ok) 482 { 483 size_t ctrset_size, need = 0; 484 int rc = 3; /* Assume write failure */ 485 486 ctrdata->def = CF_DIAG_CTRSET_DEF; 487 ctrdata->set = ctrset; 488 ctrdata->res1 = 0; 489 ctrset_size = cpum_cf_read_setsize(ctrset); 490 491 if (ctrset_size) { /* Save data */ 492 need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); 493 if (need <= room) { 494 rc = ctr_stcctm(ctrset, ctrset_size, 495 (u64 *)(ctrdata + 1)); 496 } 497 if (rc != 3 || error_ok) 498 ctrdata->ctr = ctrset_size; 499 else 500 need = 0; 501 } 502 503 return need; 504 } 505 506 static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { 507 [CPUMF_CTR_SET_BASIC] = 0x02, 508 [CPUMF_CTR_SET_USER] = 0x04, 509 [CPUMF_CTR_SET_CRYPTO] = 0x08, 510 [CPUMF_CTR_SET_EXT] = 0x01, 511 [CPUMF_CTR_SET_MT_DIAG] = 0x20, 512 }; 513 514 /* Read out all counter sets and save them in the provided data buffer. 515 * The last 64 byte host an artificial trailer entry. 516 */ 517 static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth, 518 bool error_ok) 519 { 520 struct cf_trailer_entry *trailer; 521 size_t offset = 0, done; 522 int i; 523 524 memset(data, 0, sz); 525 sz -= sizeof(*trailer); /* Always room for trailer */ 526 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 527 struct cf_ctrset_entry *ctrdata = data + offset; 528 529 if (!(auth & cpumf_ctr_ctl[i])) 530 continue; /* Counter set not authorized */ 531 532 done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok); 533 offset += done; 534 } 535 trailer = data + offset; 536 cfdiag_trailer(trailer); 537 return offset + sizeof(*trailer); 538 } 539 540 /* Calculate the difference for each counter in a counter set. */ 541 static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters) 542 { 543 for (; --counters >= 0; ++pstart, ++pstop) 544 if (*pstop >= *pstart) 545 *pstop -= *pstart; 546 else 547 *pstop = *pstart - *pstop + 1; 548 } 549 550 /* Scan the counter sets and calculate the difference of each counter 551 * in each set. The result is the increment of each counter during the 552 * period the counter set has been activated. 553 * 554 * Return true on success. 555 */ 556 static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) 557 { 558 struct cf_trailer_entry *trailer_start, *trailer_stop; 559 struct cf_ctrset_entry *ctrstart, *ctrstop; 560 size_t offset = 0; 561 int i; 562 563 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 564 ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); 565 ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); 566 567 /* Counter set not authorized */ 568 if (!(auth & cpumf_ctr_ctl[i])) 569 continue; 570 /* Counter set size zero was not saved */ 571 if (!cpum_cf_read_setsize(i)) 572 continue; 573 574 if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { 575 pr_err_once("cpum_cf_diag counter set compare error " 576 "in set %i\n", ctrstart->set); 577 return 0; 578 } 579 if (ctrstart->def == CF_DIAG_CTRSET_DEF) { 580 cfdiag_diffctrset((u64 *)(ctrstart + 1), 581 (u64 *)(ctrstop + 1), ctrstart->ctr); 582 offset += ctrstart->ctr * sizeof(u64) + 583 sizeof(*ctrstart); 584 } 585 } 586 587 /* Save time_stamp from start of event in stop's trailer */ 588 trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); 589 trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset); 590 trailer_stop->progusage[0] = trailer_start->timestamp; 591 592 return 1; 593 } 594 595 static enum cpumf_ctr_set get_counter_set(u64 event) 596 { 597 int set = CPUMF_CTR_SET_MAX; 598 599 if (event < 32) 600 set = CPUMF_CTR_SET_BASIC; 601 else if (event < 64) 602 set = CPUMF_CTR_SET_USER; 603 else if (event < 128) 604 set = CPUMF_CTR_SET_CRYPTO; 605 else if (event < 288) 606 set = CPUMF_CTR_SET_EXT; 607 else if (event >= 448 && event < 496) 608 set = CPUMF_CTR_SET_MT_DIAG; 609 610 return set; 611 } 612 613 static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set) 614 { 615 u16 mtdiag_ctl; 616 int err = 0; 617 618 /* check required version for counter sets */ 619 switch (set) { 620 case CPUMF_CTR_SET_BASIC: 621 case CPUMF_CTR_SET_USER: 622 if (cpumf_ctr_info.cfvn < 1) 623 err = -EOPNOTSUPP; 624 break; 625 case CPUMF_CTR_SET_CRYPTO: 626 if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 && 627 config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83)) 628 err = -EOPNOTSUPP; 629 break; 630 case CPUMF_CTR_SET_EXT: 631 if (cpumf_ctr_info.csvn < 1) 632 err = -EOPNOTSUPP; 633 if ((cpumf_ctr_info.csvn == 1 && config > 159) || 634 (cpumf_ctr_info.csvn == 2 && config > 175) || 635 (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 && 636 config > 255) || 637 (cpumf_ctr_info.csvn >= 6 && config > 287)) 638 err = -EOPNOTSUPP; 639 break; 640 case CPUMF_CTR_SET_MT_DIAG: 641 if (cpumf_ctr_info.csvn <= 3) 642 err = -EOPNOTSUPP; 643 /* 644 * MT-diagnostic counters are read-only. The counter set 645 * is automatically enabled and activated on all CPUs with 646 * multithreading (SMT). Deactivation of multithreading 647 * also disables the counter set. State changes are ignored 648 * by lcctl(). Because Linux controls SMT enablement through 649 * a kernel parameter only, the counter set is either disabled 650 * or enabled and active. 651 * 652 * Thus, the counters can only be used if SMT is on and the 653 * counter set is enabled and active. 654 */ 655 mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; 656 if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) && 657 (cpumf_ctr_info.enable_ctl & mtdiag_ctl) && 658 (cpumf_ctr_info.act_ctl & mtdiag_ctl))) 659 err = -EOPNOTSUPP; 660 break; 661 case CPUMF_CTR_SET_MAX: 662 err = -EOPNOTSUPP; 663 } 664 665 return err; 666 } 667 668 /* 669 * Change the CPUMF state to active. 670 * Enable and activate the CPU-counter sets according 671 * to the per-cpu control state. 672 */ 673 static void cpumf_pmu_enable(struct pmu *pmu) 674 { 675 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 676 int err; 677 678 if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED)) 679 return; 680 681 err = lcctl(cpuhw->state | cpuhw->dev_state); 682 if (err) 683 pr_err("Enabling the performance measuring unit failed with rc=%x\n", err); 684 else 685 cpuhw->flags |= PMU_F_ENABLED; 686 } 687 688 /* 689 * Change the CPUMF state to inactive. 690 * Disable and enable (inactive) the CPU-counter sets according 691 * to the per-cpu control state. 692 */ 693 static void cpumf_pmu_disable(struct pmu *pmu) 694 { 695 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 696 u64 inactive; 697 int err; 698 699 if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED)) 700 return; 701 702 inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); 703 inactive |= cpuhw->dev_state; 704 err = lcctl(inactive); 705 if (err) 706 pr_err("Disabling the performance measuring unit failed with rc=%x\n", err); 707 else 708 cpuhw->flags &= ~PMU_F_ENABLED; 709 } 710 711 /* Release the PMU if event is the last perf event */ 712 static void hw_perf_event_destroy(struct perf_event *event) 713 { 714 cpum_cf_free(event->cpu); 715 } 716 717 /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ 718 static const int cpumf_generic_events_basic[] = { 719 [PERF_COUNT_HW_CPU_CYCLES] = 0, 720 [PERF_COUNT_HW_INSTRUCTIONS] = 1, 721 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 722 [PERF_COUNT_HW_CACHE_MISSES] = -1, 723 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 724 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 725 [PERF_COUNT_HW_BUS_CYCLES] = -1, 726 }; 727 /* CPUMF <-> perf event mappings for userspace (problem-state set) */ 728 static const int cpumf_generic_events_user[] = { 729 [PERF_COUNT_HW_CPU_CYCLES] = 32, 730 [PERF_COUNT_HW_INSTRUCTIONS] = 33, 731 [PERF_COUNT_HW_CACHE_REFERENCES] = -1, 732 [PERF_COUNT_HW_CACHE_MISSES] = -1, 733 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1, 734 [PERF_COUNT_HW_BRANCH_MISSES] = -1, 735 [PERF_COUNT_HW_BUS_CYCLES] = -1, 736 }; 737 738 static int is_userspace_event(u64 ev) 739 { 740 return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || 741 cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev; 742 } 743 744 static int __hw_perf_event_init(struct perf_event *event, unsigned int type) 745 { 746 struct perf_event_attr *attr = &event->attr; 747 struct hw_perf_event *hwc = &event->hw; 748 enum cpumf_ctr_set set; 749 u64 ev; 750 751 switch (type) { 752 case PERF_TYPE_RAW: 753 /* Raw events are used to access counters directly, 754 * hence do not permit excludes */ 755 if (attr->exclude_kernel || attr->exclude_user || 756 attr->exclude_hv) 757 return -EOPNOTSUPP; 758 ev = attr->config; 759 break; 760 761 case PERF_TYPE_HARDWARE: 762 ev = attr->config; 763 if (!attr->exclude_user && attr->exclude_kernel) { 764 /* 765 * Count user space (problem-state) only 766 * Handle events 32 and 33 as 0:u and 1:u 767 */ 768 if (!is_userspace_event(ev)) { 769 if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) 770 return -EOPNOTSUPP; 771 ev = cpumf_generic_events_user[ev]; 772 } 773 } else if (!attr->exclude_kernel && attr->exclude_user) { 774 /* No support for kernel space counters only */ 775 return -EOPNOTSUPP; 776 } else { 777 /* Count user and kernel space, incl. events 32 + 33 */ 778 if (!is_userspace_event(ev)) { 779 if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) 780 return -EOPNOTSUPP; 781 ev = cpumf_generic_events_basic[ev]; 782 } 783 } 784 break; 785 786 default: 787 return -ENOENT; 788 } 789 790 if (ev == -1) 791 return -ENOENT; 792 793 if (ev > PERF_CPUM_CF_MAX_CTR) 794 return -ENOENT; 795 796 /* Obtain the counter set to which the specified counter belongs */ 797 set = get_counter_set(ev); 798 switch (set) { 799 case CPUMF_CTR_SET_BASIC: 800 case CPUMF_CTR_SET_USER: 801 case CPUMF_CTR_SET_CRYPTO: 802 case CPUMF_CTR_SET_EXT: 803 case CPUMF_CTR_SET_MT_DIAG: 804 /* 805 * Use the hardware perf event structure to store the 806 * counter number in the 'config' member and the counter 807 * set number in the 'config_base' as bit mask. 808 * It is later used to enable/disable the counter(s). 809 */ 810 hwc->config = ev; 811 hwc->config_base = cpumf_ctr_ctl[set]; 812 break; 813 case CPUMF_CTR_SET_MAX: 814 /* The counter could not be associated to a counter set */ 815 return -EINVAL; 816 } 817 818 /* Initialize for using the CPU-measurement counter facility */ 819 if (cpum_cf_alloc(event->cpu)) 820 return -ENOMEM; 821 event->destroy = hw_perf_event_destroy; 822 823 /* 824 * Finally, validate version and authorization of the counter set. 825 * If the particular CPU counter set is not authorized, 826 * return with -ENOENT in order to fall back to other 827 * PMUs that might suffice the event request. 828 */ 829 if (!(hwc->config_base & cpumf_ctr_info.auth_ctl)) 830 return -ENOENT; 831 return validate_ctr_version(hwc->config, set); 832 } 833 834 /* Events CPU_CYCLES and INSTRUCTIONS can be submitted with two different 835 * attribute::type values: 836 * - PERF_TYPE_HARDWARE: 837 * - pmu->type: 838 * Handle both type of invocations identical. They address the same hardware. 839 * The result is different when event modifiers exclude_kernel and/or 840 * exclude_user are also set. 841 */ 842 static int cpumf_pmu_event_type(struct perf_event *event) 843 { 844 u64 ev = event->attr.config; 845 846 if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || 847 cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || 848 cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || 849 cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) 850 return PERF_TYPE_HARDWARE; 851 return PERF_TYPE_RAW; 852 } 853 854 static int cpumf_pmu_event_init(struct perf_event *event) 855 { 856 unsigned int type = event->attr.type; 857 int err = -ENOENT; 858 859 if (is_sampling_event(event)) /* No sampling support */ 860 return err; 861 if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) 862 err = __hw_perf_event_init(event, type); 863 else if (event->pmu->type == type) 864 /* Registered as unknown PMU */ 865 err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); 866 867 return err; 868 } 869 870 static int hw_perf_event_reset(struct perf_event *event) 871 { 872 u64 prev, new; 873 int err; 874 875 prev = local64_read(&event->hw.prev_count); 876 do { 877 err = ecctr(event->hw.config, &new); 878 if (err) { 879 if (err != 3) 880 break; 881 /* The counter is not (yet) available. This 882 * might happen if the counter set to which 883 * this counter belongs is in the disabled 884 * state. 885 */ 886 new = 0; 887 } 888 } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new)); 889 890 return err; 891 } 892 893 static void hw_perf_event_update(struct perf_event *event) 894 { 895 u64 prev, new, delta; 896 int err; 897 898 prev = local64_read(&event->hw.prev_count); 899 do { 900 err = ecctr(event->hw.config, &new); 901 if (err) 902 return; 903 } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, new)); 904 905 delta = (prev <= new) ? new - prev 906 : (-1ULL - prev) + new + 1; /* overflow */ 907 local64_add(delta, &event->count); 908 } 909 910 static void cpumf_pmu_read(struct perf_event *event) 911 { 912 if (event->hw.state & PERF_HES_STOPPED) 913 return; 914 915 hw_perf_event_update(event); 916 } 917 918 static void cpumf_pmu_start(struct perf_event *event, int flags) 919 { 920 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 921 struct hw_perf_event *hwc = &event->hw; 922 int i; 923 924 if (!(hwc->state & PERF_HES_STOPPED)) 925 return; 926 927 hwc->state = 0; 928 929 /* (Re-)enable and activate the counter set */ 930 ctr_set_enable(&cpuhw->state, hwc->config_base); 931 ctr_set_start(&cpuhw->state, hwc->config_base); 932 933 /* The counter set to which this counter belongs can be already active. 934 * Because all counters in a set are active, the event->hw.prev_count 935 * needs to be synchronized. At this point, the counter set can be in 936 * the inactive or disabled state. 937 */ 938 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 939 cpuhw->usedss = cfdiag_getctr(cpuhw->start, 940 sizeof(cpuhw->start), 941 hwc->config_base, true); 942 } else { 943 hw_perf_event_reset(event); 944 } 945 946 /* Increment refcount for counter sets */ 947 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 948 if ((hwc->config_base & cpumf_ctr_ctl[i])) 949 atomic_inc(&cpuhw->ctr_set[i]); 950 } 951 952 /* Create perf event sample with the counter sets as raw data. The sample 953 * is then pushed to the event subsystem and the function checks for 954 * possible event overflows. If an event overflow occurs, the PMU is 955 * stopped. 956 * 957 * Return non-zero if an event overflow occurred. 958 */ 959 static int cfdiag_push_sample(struct perf_event *event, 960 struct cpu_cf_events *cpuhw) 961 { 962 struct perf_sample_data data; 963 struct perf_raw_record raw; 964 struct pt_regs regs; 965 int overflow; 966 967 /* Setup perf sample */ 968 perf_sample_data_init(&data, 0, event->hw.last_period); 969 memset(®s, 0, sizeof(regs)); 970 memset(&raw, 0, sizeof(raw)); 971 972 if (event->attr.sample_type & PERF_SAMPLE_CPU) 973 data.cpu_entry.cpu = event->cpu; 974 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 975 raw.frag.size = cpuhw->usedss; 976 raw.frag.data = cpuhw->stop; 977 perf_sample_save_raw_data(&data, event, &raw); 978 } 979 980 overflow = perf_event_overflow(event, &data, ®s); 981 982 perf_event_update_userpage(event); 983 return overflow; 984 } 985 986 static void cpumf_pmu_stop(struct perf_event *event, int flags) 987 { 988 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 989 struct hw_perf_event *hwc = &event->hw; 990 int i; 991 992 if (!(hwc->state & PERF_HES_STOPPED)) { 993 /* Decrement reference count for this counter set and if this 994 * is the last used counter in the set, clear activation 995 * control and set the counter set state to inactive. 996 */ 997 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 998 if (!(hwc->config_base & cpumf_ctr_ctl[i])) 999 continue; 1000 if (!atomic_dec_return(&cpuhw->ctr_set[i])) 1001 ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]); 1002 } 1003 hwc->state |= PERF_HES_STOPPED; 1004 } 1005 1006 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { 1007 if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) { 1008 local64_inc(&event->count); 1009 cpuhw->usedss = cfdiag_getctr(cpuhw->stop, 1010 sizeof(cpuhw->stop), 1011 event->hw.config_base, 1012 false); 1013 if (cfdiag_diffctr(cpuhw, event->hw.config_base)) 1014 cfdiag_push_sample(event, cpuhw); 1015 } else { 1016 hw_perf_event_update(event); 1017 } 1018 hwc->state |= PERF_HES_UPTODATE; 1019 } 1020 } 1021 1022 static int cpumf_pmu_add(struct perf_event *event, int flags) 1023 { 1024 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1025 1026 ctr_set_enable(&cpuhw->state, event->hw.config_base); 1027 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 1028 1029 if (flags & PERF_EF_START) 1030 cpumf_pmu_start(event, PERF_EF_RELOAD); 1031 1032 return 0; 1033 } 1034 1035 static void cpumf_pmu_del(struct perf_event *event, int flags) 1036 { 1037 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1038 int i; 1039 1040 cpumf_pmu_stop(event, PERF_EF_UPDATE); 1041 1042 /* Check if any counter in the counter set is still used. If not used, 1043 * change the counter set to the disabled state. This also clears the 1044 * content of all counters in the set. 1045 * 1046 * When a new perf event has been added but not yet started, this can 1047 * clear enable control and resets all counters in a set. Therefore, 1048 * cpumf_pmu_start() always has to re-enable a counter set. 1049 */ 1050 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) 1051 if (!atomic_read(&cpuhw->ctr_set[i])) 1052 ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]); 1053 } 1054 1055 /* Performance monitoring unit for s390x */ 1056 static struct pmu cpumf_pmu = { 1057 .task_ctx_nr = perf_sw_context, 1058 .capabilities = PERF_PMU_CAP_NO_INTERRUPT, 1059 .pmu_enable = cpumf_pmu_enable, 1060 .pmu_disable = cpumf_pmu_disable, 1061 .event_init = cpumf_pmu_event_init, 1062 .add = cpumf_pmu_add, 1063 .del = cpumf_pmu_del, 1064 .start = cpumf_pmu_start, 1065 .stop = cpumf_pmu_stop, 1066 .read = cpumf_pmu_read, 1067 }; 1068 1069 static struct cfset_session { /* CPUs and counter set bit mask */ 1070 struct list_head head; /* Head of list of active processes */ 1071 } cfset_session = { 1072 .head = LIST_HEAD_INIT(cfset_session.head) 1073 }; 1074 1075 static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */ 1076 /* 1077 * Synchronize access to device /dev/hwc. This mutex protects against 1078 * concurrent access to functions cfset_open() and cfset_release(). 1079 * Same for CPU hotplug add and remove events triggering 1080 * cpum_cf_online_cpu() and cpum_cf_offline_cpu(). 1081 * It also serializes concurrent device ioctl access from multiple 1082 * processes accessing /dev/hwc. 1083 * 1084 * The mutex protects concurrent access to the /dev/hwctr session management 1085 * struct cfset_session and reference counting variable cfset_opencnt. 1086 */ 1087 static DEFINE_MUTEX(cfset_ctrset_mutex); 1088 1089 /* 1090 * CPU hotplug handles only /dev/hwctr device. 1091 * For perf_event_open() the CPU hotplug handling is done on kernel common 1092 * code: 1093 * - CPU add: Nothing is done since a file descriptor can not be created 1094 * and returned to the user. 1095 * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and 1096 * pmu_delete(). The event itself is removed when the file descriptor is 1097 * closed. 1098 */ 1099 static int cfset_online_cpu(unsigned int cpu); 1100 1101 static int cpum_cf_online_cpu(unsigned int cpu) 1102 { 1103 int rc = 0; 1104 1105 /* 1106 * Ignore notification for perf_event_open(). 1107 * Handle only /dev/hwctr device sessions. 1108 */ 1109 mutex_lock(&cfset_ctrset_mutex); 1110 if (refcount_read(&cfset_opencnt)) { 1111 rc = cpum_cf_alloc_cpu(cpu); 1112 if (!rc) 1113 cfset_online_cpu(cpu); 1114 } 1115 mutex_unlock(&cfset_ctrset_mutex); 1116 return rc; 1117 } 1118 1119 static int cfset_offline_cpu(unsigned int cpu); 1120 1121 static int cpum_cf_offline_cpu(unsigned int cpu) 1122 { 1123 /* 1124 * During task exit processing of grouped perf events triggered by CPU 1125 * hotplug processing, pmu_disable() is called as part of perf context 1126 * removal process. Therefore do not trigger event removal now for 1127 * perf_event_open() created events. Perf common code triggers event 1128 * destruction when the event file descriptor is closed. 1129 * 1130 * Handle only /dev/hwctr device sessions. 1131 */ 1132 mutex_lock(&cfset_ctrset_mutex); 1133 if (refcount_read(&cfset_opencnt)) { 1134 cfset_offline_cpu(cpu); 1135 cpum_cf_free_cpu(cpu); 1136 } 1137 mutex_unlock(&cfset_ctrset_mutex); 1138 return 0; 1139 } 1140 1141 /* Return true if store counter set multiple instruction is available */ 1142 static inline int stccm_avail(void) 1143 { 1144 return test_facility(142); 1145 } 1146 1147 /* CPU-measurement alerts for the counter facility */ 1148 static void cpumf_measurement_alert(struct ext_code ext_code, 1149 unsigned int alert, unsigned long unused) 1150 { 1151 struct cpu_cf_events *cpuhw; 1152 1153 if (!(alert & CPU_MF_INT_CF_MASK)) 1154 return; 1155 1156 inc_irq_stat(IRQEXT_CMC); 1157 1158 /* 1159 * Measurement alerts are shared and might happen when the PMU 1160 * is not reserved. Ignore these alerts in this case. 1161 */ 1162 cpuhw = this_cpu_cfhw(); 1163 if (!cpuhw) 1164 return; 1165 1166 /* counter authorization change alert */ 1167 if (alert & CPU_MF_INT_CF_CACA) 1168 qctri(&cpumf_ctr_info); 1169 1170 /* loss of counter data alert */ 1171 if (alert & CPU_MF_INT_CF_LCDA) 1172 pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); 1173 1174 /* loss of MT counter data alert */ 1175 if (alert & CPU_MF_INT_CF_MTDA) 1176 pr_warn("CPU[%i] MT counter data was lost\n", 1177 smp_processor_id()); 1178 } 1179 1180 static int cfset_init(void); 1181 static int __init cpumf_pmu_init(void) 1182 { 1183 int rc; 1184 1185 /* Extract counter measurement facility information */ 1186 if (!cpum_cf_avail() || qctri(&cpumf_ctr_info)) 1187 return -ENODEV; 1188 1189 /* Determine and store counter set sizes for later reference */ 1190 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 1191 cpum_cf_make_setsize(rc); 1192 1193 /* 1194 * Clear bit 15 of cr0 to unauthorize problem-state to 1195 * extract measurement counters 1196 */ 1197 system_ctl_clear_bit(0, CR0_CPUMF_EXTRACTION_AUTH_BIT); 1198 1199 /* register handler for measurement-alert interruptions */ 1200 rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, 1201 cpumf_measurement_alert); 1202 if (rc) { 1203 pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc); 1204 return rc; 1205 } 1206 1207 /* Setup s390dbf facility */ 1208 cf_dbg = debug_register("cpum_cf", 2, 1, 128); 1209 if (!cf_dbg) { 1210 pr_err("Registration of s390dbf(cpum_cf) failed\n"); 1211 rc = -ENOMEM; 1212 goto out1; 1213 } 1214 debug_register_view(cf_dbg, &debug_sprintf_view); 1215 1216 cpumf_pmu.attr_groups = cpumf_cf_event_group(); 1217 rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); 1218 if (rc) { 1219 pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); 1220 goto out2; 1221 } else if (stccm_avail()) { /* Setup counter set device */ 1222 cfset_init(); 1223 } 1224 1225 rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, 1226 "perf/s390/cf:online", 1227 cpum_cf_online_cpu, cpum_cf_offline_cpu); 1228 return rc; 1229 1230 out2: 1231 debug_unregister_view(cf_dbg, &debug_sprintf_view); 1232 debug_unregister(cf_dbg); 1233 out1: 1234 unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); 1235 return rc; 1236 } 1237 1238 /* Support for the CPU Measurement Facility counter set extraction using 1239 * device /dev/hwctr. This allows user space programs to extract complete 1240 * counter set via normal file operations. 1241 */ 1242 1243 struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */ 1244 unsigned int sets; /* Counter set bit mask */ 1245 atomic_t cpus_ack; /* # CPUs successfully executed func */ 1246 }; 1247 1248 struct cfset_request { /* CPUs and counter set bit mask */ 1249 unsigned long ctrset; /* Bit mask of counter set to read */ 1250 cpumask_t mask; /* CPU mask to read from */ 1251 struct list_head node; /* Chain to cfset_session.head */ 1252 }; 1253 1254 static void cfset_session_init(void) 1255 { 1256 INIT_LIST_HEAD(&cfset_session.head); 1257 } 1258 1259 /* Remove current request from global bookkeeping. Maintain a counter set bit 1260 * mask on a per CPU basis. 1261 * Done in process context under mutex protection. 1262 */ 1263 static void cfset_session_del(struct cfset_request *p) 1264 { 1265 list_del(&p->node); 1266 } 1267 1268 /* Add current request to global bookkeeping. Maintain a counter set bit mask 1269 * on a per CPU basis. 1270 * Done in process context under mutex protection. 1271 */ 1272 static void cfset_session_add(struct cfset_request *p) 1273 { 1274 list_add(&p->node, &cfset_session.head); 1275 } 1276 1277 /* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access 1278 * path is currently used. 1279 * The cpu_cf_events::dev_state is used to denote counter sets in use by this 1280 * interface. It is always or'ed in. If this interface is not active, its 1281 * value is zero and no additional counter sets will be included. 1282 * 1283 * The cpu_cf_events::state is used by the perf_event_open SVC and remains 1284 * unchanged. 1285 * 1286 * perf_pmu_enable() and perf_pmu_enable() and its call backs 1287 * cpumf_pmu_enable() and cpumf_pmu_disable() are called by the 1288 * performance measurement subsystem to enable per process 1289 * CPU Measurement counter facility. 1290 * The XXX_enable() and XXX_disable functions are used to turn off 1291 * x86 performance monitoring interrupt (PMI) during scheduling. 1292 * s390 uses these calls to temporarily stop and resume the active CPU 1293 * counters sets during scheduling. 1294 * 1295 * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr 1296 * device access. The perf_event_open() SVC interface makes a lot of effort 1297 * to only run the counters while the calling process is actively scheduled 1298 * to run. 1299 * When /dev/hwctr interface is also used at the same time, the counter sets 1300 * will keep running, even when the process is scheduled off a CPU. 1301 * However this is not a problem and does not lead to wrong counter values 1302 * for the perf_event_open() SVC. The current counter value will be recorded 1303 * during schedule-in. At schedule-out time the current counter value is 1304 * extracted again and the delta is calculated and added to the event. 1305 */ 1306 /* Stop all counter sets via ioctl interface */ 1307 static void cfset_ioctl_off(void *parm) 1308 { 1309 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1310 struct cfset_call_on_cpu_parm *p = parm; 1311 int rc; 1312 1313 /* Check if any counter set used by /dev/hwctr */ 1314 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 1315 if ((p->sets & cpumf_ctr_ctl[rc])) { 1316 if (!atomic_dec_return(&cpuhw->ctr_set[rc])) { 1317 ctr_set_disable(&cpuhw->dev_state, 1318 cpumf_ctr_ctl[rc]); 1319 ctr_set_stop(&cpuhw->dev_state, 1320 cpumf_ctr_ctl[rc]); 1321 } 1322 } 1323 /* Keep perf_event_open counter sets */ 1324 rc = lcctl(cpuhw->dev_state | cpuhw->state); 1325 if (rc) 1326 pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n", 1327 cpuhw->state, S390_HWCTR_DEVICE, rc); 1328 if (!cpuhw->dev_state) 1329 cpuhw->flags &= ~PMU_F_IN_USE; 1330 } 1331 1332 /* Start counter sets on particular CPU */ 1333 static void cfset_ioctl_on(void *parm) 1334 { 1335 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1336 struct cfset_call_on_cpu_parm *p = parm; 1337 int rc; 1338 1339 cpuhw->flags |= PMU_F_IN_USE; 1340 ctr_set_enable(&cpuhw->dev_state, p->sets); 1341 ctr_set_start(&cpuhw->dev_state, p->sets); 1342 for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc) 1343 if ((p->sets & cpumf_ctr_ctl[rc])) 1344 atomic_inc(&cpuhw->ctr_set[rc]); 1345 rc = lcctl(cpuhw->dev_state | cpuhw->state); /* Start counter sets */ 1346 if (!rc) 1347 atomic_inc(&p->cpus_ack); 1348 else 1349 pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n", 1350 cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc); 1351 } 1352 1353 static void cfset_release_cpu(void *p) 1354 { 1355 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1356 int rc; 1357 1358 cpuhw->dev_state = 0; 1359 rc = lcctl(cpuhw->state); /* Keep perf_event_open counter sets */ 1360 if (rc) 1361 pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n", 1362 cpuhw->state, S390_HWCTR_DEVICE, rc); 1363 } 1364 1365 /* This modifies the process CPU mask to adopt it to the currently online 1366 * CPUs. Offline CPUs can not be addresses. This call terminates the access 1367 * and is usually followed by close() or a new iotcl(..., START, ...) which 1368 * creates a new request structure. 1369 */ 1370 static void cfset_all_stop(struct cfset_request *req) 1371 { 1372 struct cfset_call_on_cpu_parm p = { 1373 .sets = req->ctrset, 1374 }; 1375 1376 cpumask_and(&req->mask, &req->mask, cpu_online_mask); 1377 on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1); 1378 } 1379 1380 /* Release function is also called when application gets terminated without 1381 * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command. 1382 */ 1383 static int cfset_release(struct inode *inode, struct file *file) 1384 { 1385 mutex_lock(&cfset_ctrset_mutex); 1386 /* Open followed by close/exit has no private_data */ 1387 if (file->private_data) { 1388 cfset_all_stop(file->private_data); 1389 cfset_session_del(file->private_data); 1390 kfree(file->private_data); 1391 file->private_data = NULL; 1392 } 1393 if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */ 1394 on_each_cpu(cfset_release_cpu, NULL, 1); 1395 cpum_cf_free(-1); 1396 } 1397 mutex_unlock(&cfset_ctrset_mutex); 1398 return 0; 1399 } 1400 1401 /* 1402 * Open via /dev/hwctr device. Allocate all per CPU resources on the first 1403 * open of the device. The last close releases all per CPU resources. 1404 * Parallel perf_event_open system calls also use per CPU resources. 1405 * These invocations are handled via reference counting on the per CPU data 1406 * structures. 1407 */ 1408 static int cfset_open(struct inode *inode, struct file *file) 1409 { 1410 int rc = 0; 1411 1412 if (!perfmon_capable()) 1413 return -EPERM; 1414 file->private_data = NULL; 1415 1416 mutex_lock(&cfset_ctrset_mutex); 1417 if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */ 1418 rc = cpum_cf_alloc(-1); 1419 if (!rc) { 1420 cfset_session_init(); 1421 refcount_set(&cfset_opencnt, 1); 1422 } 1423 } 1424 mutex_unlock(&cfset_ctrset_mutex); 1425 1426 /* nonseekable_open() never fails */ 1427 return rc ?: nonseekable_open(inode, file); 1428 } 1429 1430 static int cfset_all_start(struct cfset_request *req) 1431 { 1432 struct cfset_call_on_cpu_parm p = { 1433 .sets = req->ctrset, 1434 .cpus_ack = ATOMIC_INIT(0), 1435 }; 1436 cpumask_var_t mask; 1437 int rc = 0; 1438 1439 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1440 return -ENOMEM; 1441 cpumask_and(mask, &req->mask, cpu_online_mask); 1442 on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1); 1443 if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) { 1444 on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1); 1445 rc = -EIO; 1446 } 1447 free_cpumask_var(mask); 1448 return rc; 1449 } 1450 1451 /* Return the maximum required space for all possible CPUs in case one 1452 * CPU will be onlined during the START, READ, STOP cycles. 1453 * To find out the size of the counter sets, any one CPU will do. They 1454 * all have the same counter sets. 1455 */ 1456 static size_t cfset_needspace(unsigned int sets) 1457 { 1458 size_t bytes = 0; 1459 int i; 1460 1461 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1462 if (!(sets & cpumf_ctr_ctl[i])) 1463 continue; 1464 bytes += cpum_cf_read_setsize(i) * sizeof(u64) + 1465 sizeof(((struct s390_ctrset_setdata *)0)->set) + 1466 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts); 1467 } 1468 bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids * 1469 (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) + 1470 sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); 1471 return bytes; 1472 } 1473 1474 static int cfset_all_copy(unsigned long arg, cpumask_t *mask) 1475 { 1476 struct s390_ctrset_read __user *ctrset_read; 1477 unsigned int cpu, cpus, rc = 0; 1478 void __user *uptr; 1479 1480 ctrset_read = (struct s390_ctrset_read __user *)arg; 1481 uptr = ctrset_read->data; 1482 for_each_cpu(cpu, mask) { 1483 struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu); 1484 struct s390_ctrset_cpudata __user *ctrset_cpudata; 1485 1486 ctrset_cpudata = uptr; 1487 rc = put_user(cpu, &ctrset_cpudata->cpu_nr); 1488 rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets); 1489 rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data, 1490 cpuhw->used); 1491 if (rc) { 1492 rc = -EFAULT; 1493 goto out; 1494 } 1495 uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used; 1496 cond_resched(); 1497 } 1498 cpus = cpumask_weight(mask); 1499 if (put_user(cpus, &ctrset_read->no_cpus)) 1500 rc = -EFAULT; 1501 out: 1502 return rc; 1503 } 1504 1505 static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset, 1506 int ctrset_size, size_t room) 1507 { 1508 size_t need = 0; 1509 int rc = -1; 1510 1511 need = sizeof(*p) + sizeof(u64) * ctrset_size; 1512 if (need <= room) { 1513 p->set = cpumf_ctr_ctl[ctrset]; 1514 p->no_cnts = ctrset_size; 1515 rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv); 1516 if (rc == 3) /* Nothing stored */ 1517 need = 0; 1518 } 1519 return need; 1520 } 1521 1522 /* Read all counter sets. */ 1523 static void cfset_cpu_read(void *parm) 1524 { 1525 struct cpu_cf_events *cpuhw = this_cpu_cfhw(); 1526 struct cfset_call_on_cpu_parm *p = parm; 1527 int set, set_size; 1528 size_t space; 1529 1530 /* No data saved yet */ 1531 cpuhw->used = 0; 1532 cpuhw->sets = 0; 1533 memset(cpuhw->data, 0, sizeof(cpuhw->data)); 1534 1535 /* Scan the counter sets */ 1536 for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) { 1537 struct s390_ctrset_setdata *sp = (void *)cpuhw->data + 1538 cpuhw->used; 1539 1540 if (!(p->sets & cpumf_ctr_ctl[set])) 1541 continue; /* Counter set not in list */ 1542 set_size = cpum_cf_read_setsize(set); 1543 space = sizeof(cpuhw->data) - cpuhw->used; 1544 space = cfset_cpuset_read(sp, set, set_size, space); 1545 if (space) { 1546 cpuhw->used += space; 1547 cpuhw->sets += 1; 1548 } 1549 } 1550 } 1551 1552 static int cfset_all_read(unsigned long arg, struct cfset_request *req) 1553 { 1554 struct cfset_call_on_cpu_parm p; 1555 cpumask_var_t mask; 1556 int rc; 1557 1558 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 1559 return -ENOMEM; 1560 1561 p.sets = req->ctrset; 1562 cpumask_and(mask, &req->mask, cpu_online_mask); 1563 on_each_cpu_mask(mask, cfset_cpu_read, &p, 1); 1564 rc = cfset_all_copy(arg, mask); 1565 free_cpumask_var(mask); 1566 return rc; 1567 } 1568 1569 static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req) 1570 { 1571 int ret = -ENODATA; 1572 1573 if (req && req->ctrset) 1574 ret = cfset_all_read(arg, req); 1575 return ret; 1576 } 1577 1578 static long cfset_ioctl_stop(struct file *file) 1579 { 1580 struct cfset_request *req = file->private_data; 1581 int ret = -ENXIO; 1582 1583 if (req) { 1584 cfset_all_stop(req); 1585 cfset_session_del(req); 1586 kfree(req); 1587 file->private_data = NULL; 1588 ret = 0; 1589 } 1590 return ret; 1591 } 1592 1593 static long cfset_ioctl_start(unsigned long arg, struct file *file) 1594 { 1595 struct s390_ctrset_start __user *ustart; 1596 struct s390_ctrset_start start; 1597 struct cfset_request *preq; 1598 void __user *umask; 1599 unsigned int len; 1600 int ret = 0; 1601 size_t need; 1602 1603 if (file->private_data) 1604 return -EBUSY; 1605 ustart = (struct s390_ctrset_start __user *)arg; 1606 if (copy_from_user(&start, ustart, sizeof(start))) 1607 return -EFAULT; 1608 if (start.version != S390_HWCTR_START_VERSION) 1609 return -EINVAL; 1610 if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] | 1611 cpumf_ctr_ctl[CPUMF_CTR_SET_USER] | 1612 cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] | 1613 cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] | 1614 cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG])) 1615 return -EINVAL; /* Invalid counter set */ 1616 if (!start.counter_sets) 1617 return -EINVAL; /* No counter set at all? */ 1618 1619 preq = kzalloc(sizeof(*preq), GFP_KERNEL); 1620 if (!preq) 1621 return -ENOMEM; 1622 cpumask_clear(&preq->mask); 1623 len = min_t(u64, start.cpumask_len, cpumask_size()); 1624 umask = (void __user *)start.cpumask; 1625 if (copy_from_user(&preq->mask, umask, len)) { 1626 kfree(preq); 1627 return -EFAULT; 1628 } 1629 if (cpumask_empty(&preq->mask)) { 1630 kfree(preq); 1631 return -EINVAL; 1632 } 1633 need = cfset_needspace(start.counter_sets); 1634 if (put_user(need, &ustart->data_bytes)) { 1635 kfree(preq); 1636 return -EFAULT; 1637 } 1638 preq->ctrset = start.counter_sets; 1639 ret = cfset_all_start(preq); 1640 if (!ret) { 1641 cfset_session_add(preq); 1642 file->private_data = preq; 1643 } else { 1644 kfree(preq); 1645 } 1646 return ret; 1647 } 1648 1649 /* Entry point to the /dev/hwctr device interface. 1650 * The ioctl system call supports three subcommands: 1651 * S390_HWCTR_START: Start the specified counter sets on a CPU list. The 1652 * counter set keeps running until explicitly stopped. Returns the number 1653 * of bytes needed to store the counter values. If another S390_HWCTR_START 1654 * ioctl subcommand is called without a previous S390_HWCTR_STOP stop 1655 * command on the same file descriptor, -EBUSY is returned. 1656 * S390_HWCTR_READ: Read the counter set values from specified CPU list given 1657 * with the S390_HWCTR_START command. 1658 * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the 1659 * previous S390_HWCTR_START subcommand. 1660 */ 1661 static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1662 { 1663 int ret; 1664 1665 cpus_read_lock(); 1666 mutex_lock(&cfset_ctrset_mutex); 1667 switch (cmd) { 1668 case S390_HWCTR_START: 1669 ret = cfset_ioctl_start(arg, file); 1670 break; 1671 case S390_HWCTR_STOP: 1672 ret = cfset_ioctl_stop(file); 1673 break; 1674 case S390_HWCTR_READ: 1675 ret = cfset_ioctl_read(arg, file->private_data); 1676 break; 1677 default: 1678 ret = -ENOTTY; 1679 break; 1680 } 1681 mutex_unlock(&cfset_ctrset_mutex); 1682 cpus_read_unlock(); 1683 return ret; 1684 } 1685 1686 static const struct file_operations cfset_fops = { 1687 .owner = THIS_MODULE, 1688 .open = cfset_open, 1689 .release = cfset_release, 1690 .unlocked_ioctl = cfset_ioctl, 1691 }; 1692 1693 static struct miscdevice cfset_dev = { 1694 .name = S390_HWCTR_DEVICE, 1695 .minor = MISC_DYNAMIC_MINOR, 1696 .fops = &cfset_fops, 1697 .mode = 0666, 1698 }; 1699 1700 /* Hotplug add of a CPU. Scan through all active processes and add 1701 * that CPU to the list of CPUs supplied with ioctl(..., START, ...). 1702 */ 1703 static int cfset_online_cpu(unsigned int cpu) 1704 { 1705 struct cfset_call_on_cpu_parm p; 1706 struct cfset_request *rp; 1707 1708 if (!list_empty(&cfset_session.head)) { 1709 list_for_each_entry(rp, &cfset_session.head, node) { 1710 p.sets = rp->ctrset; 1711 cfset_ioctl_on(&p); 1712 cpumask_set_cpu(cpu, &rp->mask); 1713 } 1714 } 1715 return 0; 1716 } 1717 1718 /* Hotplug remove of a CPU. Scan through all active processes and clear 1719 * that CPU from the list of CPUs supplied with ioctl(..., START, ...). 1720 * Adjust reference counts. 1721 */ 1722 static int cfset_offline_cpu(unsigned int cpu) 1723 { 1724 struct cfset_call_on_cpu_parm p; 1725 struct cfset_request *rp; 1726 1727 if (!list_empty(&cfset_session.head)) { 1728 list_for_each_entry(rp, &cfset_session.head, node) { 1729 p.sets = rp->ctrset; 1730 cfset_ioctl_off(&p); 1731 cpumask_clear_cpu(cpu, &rp->mask); 1732 } 1733 } 1734 return 0; 1735 } 1736 1737 static void cfdiag_read(struct perf_event *event) 1738 { 1739 } 1740 1741 static int get_authctrsets(void) 1742 { 1743 unsigned long auth = 0; 1744 enum cpumf_ctr_set i; 1745 1746 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1747 if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i]) 1748 auth |= cpumf_ctr_ctl[i]; 1749 } 1750 return auth; 1751 } 1752 1753 /* Setup the event. Test for authorized counter sets and only include counter 1754 * sets which are authorized at the time of the setup. Including unauthorized 1755 * counter sets result in specification exception (and panic). 1756 */ 1757 static int cfdiag_event_init2(struct perf_event *event) 1758 { 1759 struct perf_event_attr *attr = &event->attr; 1760 int err = 0; 1761 1762 /* Set sample_period to indicate sampling */ 1763 event->hw.config = attr->config; 1764 event->hw.sample_period = attr->sample_period; 1765 local64_set(&event->hw.period_left, event->hw.sample_period); 1766 local64_set(&event->count, 0); 1767 event->hw.last_period = event->hw.sample_period; 1768 1769 /* Add all authorized counter sets to config_base. The 1770 * the hardware init function is either called per-cpu or just once 1771 * for all CPUS (event->cpu == -1). This depends on the whether 1772 * counting is started for all CPUs or on a per workload base where 1773 * the perf event moves from one CPU to another CPU. 1774 * Checking the authorization on any CPU is fine as the hardware 1775 * applies the same authorization settings to all CPUs. 1776 */ 1777 event->hw.config_base = get_authctrsets(); 1778 1779 /* No authorized counter sets, nothing to count/sample */ 1780 if (!event->hw.config_base) 1781 err = -EINVAL; 1782 1783 return err; 1784 } 1785 1786 static int cfdiag_event_init(struct perf_event *event) 1787 { 1788 struct perf_event_attr *attr = &event->attr; 1789 int err = -ENOENT; 1790 1791 if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || 1792 event->attr.type != event->pmu->type) 1793 goto out; 1794 1795 /* Raw events are used to access counters directly, 1796 * hence do not permit excludes. 1797 * This event is useless without PERF_SAMPLE_RAW to return counter set 1798 * values as raw data. 1799 */ 1800 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || 1801 !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { 1802 err = -EOPNOTSUPP; 1803 goto out; 1804 } 1805 1806 /* Initialize for using the CPU-measurement counter facility */ 1807 if (cpum_cf_alloc(event->cpu)) 1808 return -ENOMEM; 1809 event->destroy = hw_perf_event_destroy; 1810 1811 err = cfdiag_event_init2(event); 1812 out: 1813 return err; 1814 } 1815 1816 /* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used 1817 * to collect the complete counter sets for a scheduled process. Target 1818 * are complete counter sets attached as raw data to the artificial event. 1819 * This results in complete counter sets available when a process is 1820 * scheduled. Contains the delta of every counter while the process was 1821 * running. 1822 */ 1823 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); 1824 1825 static struct attribute *cfdiag_events_attr[] = { 1826 CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), 1827 NULL, 1828 }; 1829 1830 PMU_FORMAT_ATTR(event, "config:0-63"); 1831 1832 static struct attribute *cfdiag_format_attr[] = { 1833 &format_attr_event.attr, 1834 NULL, 1835 }; 1836 1837 static struct attribute_group cfdiag_events_group = { 1838 .name = "events", 1839 .attrs = cfdiag_events_attr, 1840 }; 1841 static struct attribute_group cfdiag_format_group = { 1842 .name = "format", 1843 .attrs = cfdiag_format_attr, 1844 }; 1845 static const struct attribute_group *cfdiag_attr_groups[] = { 1846 &cfdiag_events_group, 1847 &cfdiag_format_group, 1848 NULL, 1849 }; 1850 1851 /* Performance monitoring unit for event CF_DIAG. Since this event 1852 * is also started and stopped via the perf_event_open() system call, use 1853 * the same event enable/disable call back functions. They do not 1854 * have a pointer to the perf_event structure as first parameter. 1855 * 1856 * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common. 1857 * Reuse them and distinguish the event (always first parameter) via 1858 * 'config' member. 1859 */ 1860 static struct pmu cf_diag = { 1861 .task_ctx_nr = perf_sw_context, 1862 .event_init = cfdiag_event_init, 1863 .pmu_enable = cpumf_pmu_enable, 1864 .pmu_disable = cpumf_pmu_disable, 1865 .add = cpumf_pmu_add, 1866 .del = cpumf_pmu_del, 1867 .start = cpumf_pmu_start, 1868 .stop = cpumf_pmu_stop, 1869 .read = cfdiag_read, 1870 1871 .attr_groups = cfdiag_attr_groups 1872 }; 1873 1874 /* Calculate memory needed to store all counter sets together with header and 1875 * trailer data. This is independent of the counter set authorization which 1876 * can vary depending on the configuration. 1877 */ 1878 static size_t cfdiag_maxsize(struct cpumf_ctr_info *info) 1879 { 1880 size_t max_size = sizeof(struct cf_trailer_entry); 1881 enum cpumf_ctr_set i; 1882 1883 for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { 1884 size_t size = cpum_cf_read_setsize(i); 1885 1886 if (size) 1887 max_size += size * sizeof(u64) + 1888 sizeof(struct cf_ctrset_entry); 1889 } 1890 return max_size; 1891 } 1892 1893 /* Get the CPU speed, try sampling facility first and CPU attributes second. */ 1894 static void cfdiag_get_cpu_speed(void) 1895 { 1896 unsigned long mhz; 1897 1898 if (cpum_sf_avail()) { /* Sampling facility first */ 1899 struct hws_qsi_info_block si; 1900 1901 memset(&si, 0, sizeof(si)); 1902 if (!qsi(&si)) { 1903 cfdiag_cpu_speed = si.cpu_speed; 1904 return; 1905 } 1906 } 1907 1908 /* Fallback: CPU speed extract static part. Used in case 1909 * CPU Measurement Sampling Facility is turned off. 1910 */ 1911 mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); 1912 if (mhz != -1UL) 1913 cfdiag_cpu_speed = mhz & 0xffffffff; 1914 } 1915 1916 static int cfset_init(void) 1917 { 1918 size_t need; 1919 int rc; 1920 1921 cfdiag_get_cpu_speed(); 1922 /* Make sure the counter set data fits into predefined buffer. */ 1923 need = cfdiag_maxsize(&cpumf_ctr_info); 1924 if (need > sizeof(((struct cpu_cf_events *)0)->start)) { 1925 pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", 1926 need); 1927 return -ENOMEM; 1928 } 1929 1930 rc = misc_register(&cfset_dev); 1931 if (rc) { 1932 pr_err("Registration of /dev/%s failed rc=%i\n", 1933 cfset_dev.name, rc); 1934 goto out; 1935 } 1936 1937 rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); 1938 if (rc) { 1939 misc_deregister(&cfset_dev); 1940 pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", 1941 rc); 1942 } 1943 out: 1944 return rc; 1945 } 1946 1947 device_initcall(cpumf_pmu_init); 1948