1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Hypervisor supplied "gpci" ("get performance counter info") performance 4 * counter support 5 * 6 * Author: Cody P Schafer <cody@linux.vnet.ibm.com> 7 * Copyright 2014 IBM Corporation. 8 */ 9 10 #define pr_fmt(fmt) "hv-gpci: " fmt 11 12 #include <linux/init.h> 13 #include <linux/perf_event.h> 14 #include <asm/firmware.h> 15 #include <asm/hvcall.h> 16 #include <asm/io.h> 17 18 #include "hv-gpci.h" 19 #include "hv-common.h" 20 21 /* 22 * Example usage: 23 * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, 24 * secondary_index=0,starting_index=0xffffffff,request=0x10/' ... 25 */ 26 27 /* u32 */ 28 EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); 29 /* u32 */ 30 /* 31 * Note that starting_index, phys_processor_idx, sibling_part_id, 32 * hw_chip_id, partition_id all refer to the same bit range. They 33 * are basically aliases for the starting_index. The specific alias 34 * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h 35 */ 36 EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); 37 EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); 38 EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); 39 EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); 40 EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); 41 42 /* u16 */ 43 EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); 44 /* u8 */ 45 EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); 46 /* u8, bytes of data (1-8) */ 47 EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); 48 /* u32, byte offset */ 49 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); 50 51 static cpumask_t hv_gpci_cpumask; 52 53 static struct attribute *format_attrs[] = { 54 &format_attr_request.attr, 55 &format_attr_starting_index.attr, 56 &format_attr_phys_processor_idx.attr, 57 &format_attr_sibling_part_id.attr, 58 &format_attr_hw_chip_id.attr, 59 &format_attr_partition_id.attr, 60 &format_attr_secondary_index.attr, 61 &format_attr_counter_info_version.attr, 62 63 &format_attr_offset.attr, 64 &format_attr_length.attr, 65 NULL, 66 }; 67 68 static const struct attribute_group format_group = { 69 .name = "format", 70 .attrs = format_attrs, 71 }; 72 73 static struct attribute_group event_group = { 74 .name = "events", 75 /* .attrs is set in init */ 76 }; 77 78 #define HV_CAPS_ATTR(_name, _format) \ 79 static ssize_t _name##_show(struct device *dev, \ 80 struct device_attribute *attr, \ 81 char *page) \ 82 { \ 83 struct hv_perf_caps caps; \ 84 unsigned long hret = hv_perf_caps_get(&caps); \ 85 if (hret) \ 86 return -EIO; \ 87 \ 88 return sprintf(page, _format, caps._name); \ 89 } \ 90 static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) 91 92 static ssize_t kernel_version_show(struct device *dev, 93 struct device_attribute *attr, 94 char *page) 95 { 96 return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); 97 } 98 99 static ssize_t cpumask_show(struct device *dev, 100 struct device_attribute *attr, char *buf) 101 { 102 return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); 103 } 104 105 /* Interface attribute array index to store system information */ 106 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 107 #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 108 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 109 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 110 #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 111 #define INTERFACE_NULL_ATTR 11 112 113 /* Counter request value to retrieve system information */ 114 enum { 115 PROCESSOR_BUS_TOPOLOGY, 116 PROCESSOR_CONFIG, 117 AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ 118 AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ 119 AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ 120 }; 121 122 static int sysinfo_counter_request[] = { 123 [PROCESSOR_BUS_TOPOLOGY] = 0xD0, 124 [PROCESSOR_CONFIG] = 0x90, 125 [AFFINITY_DOMAIN_VIA_VP] = 0xA0, 126 [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, 127 [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, 128 }; 129 130 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); 131 132 static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, 133 u16 secondary_index, char *buf, 134 size_t *n, struct hv_gpci_request_buffer *arg) 135 { 136 unsigned long ret; 137 size_t i, j; 138 139 arg->params.counter_request = cpu_to_be32(req); 140 arg->params.starting_index = cpu_to_be32(starting_index); 141 arg->params.secondary_index = cpu_to_be16(secondary_index); 142 143 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 144 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 145 146 /* 147 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', 148 * which means that the current buffer size cannot accommodate 149 * all the information and a partial buffer returned. 150 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. 151 * 152 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 153 * performance information, and required to set 154 * "Enable Performance Information Collection" option. 155 */ 156 if (ret == H_AUTHORITY) 157 return -EPERM; 158 159 /* 160 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE 161 * because of invalid buffer-length/address or due to some hardware 162 * error. 163 */ 164 if (ret && (ret != H_PARAMETER)) 165 return -EIO; 166 167 /* 168 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' 169 * to show the total number of counter_value array elements 170 * returned via hcall. 171 * hcall also populates 'cv_element_size' corresponds to individual 172 * counter_value array element size. Below loop go through all 173 * counter_value array elements as per their size and add it to 174 * the output buffer. 175 */ 176 for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { 177 j = i * be16_to_cpu(arg->params.cv_element_size); 178 179 for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) 180 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]); 181 *n += sprintf(buf + *n, "\n"); 182 } 183 184 if (*n >= PAGE_SIZE) { 185 pr_info("System information exceeds PAGE_SIZE\n"); 186 return -EFBIG; 187 } 188 189 return ret; 190 } 191 192 static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, 193 char *buf) 194 { 195 struct hv_gpci_request_buffer *arg; 196 unsigned long ret; 197 size_t n = 0; 198 199 arg = (void *)get_cpu_var(hv_gpci_reqb); 200 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 201 202 /* 203 * Pass the counter request value 0xD0 corresponds to request 204 * type 'Processor_bus_topology', to retrieve 205 * the system topology information. 206 * starting_index value implies the starting hardware 207 * chip id. 208 */ 209 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], 210 0, 0, buf, &n, arg); 211 212 if (!ret) 213 return n; 214 215 if (ret != H_PARAMETER) 216 goto out; 217 218 /* 219 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 220 * implies that buffer can't accommodate all information, and a partial buffer 221 * returned. To handle that, we need to make subsequent requests 222 * with next starting index to retrieve additional (missing) data. 223 * Below loop do subsequent hcalls with next starting index and add it 224 * to buffer util we get all the information. 225 */ 226 while (ret == H_PARAMETER) { 227 int returned_values = be16_to_cpu(arg->params.returned_values); 228 int elementsize = be16_to_cpu(arg->params.cv_element_size); 229 int last_element = (returned_values - 1) * elementsize; 230 231 /* 232 * Since the starting index value is part of counter_value 233 * buffer elements, use the starting index value in the last 234 * element and add 1 to make subsequent hcalls. 235 */ 236 u32 starting_index = arg->bytes[last_element + 3] + 237 (arg->bytes[last_element + 2] << 8) + 238 (arg->bytes[last_element + 1] << 16) + 239 (arg->bytes[last_element] << 24) + 1; 240 241 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 242 243 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], 244 starting_index, 0, buf, &n, arg); 245 246 if (!ret) 247 return n; 248 249 if (ret != H_PARAMETER) 250 goto out; 251 } 252 253 return n; 254 255 out: 256 put_cpu_var(hv_gpci_reqb); 257 return ret; 258 } 259 260 static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, 261 char *buf) 262 { 263 struct hv_gpci_request_buffer *arg; 264 unsigned long ret; 265 size_t n = 0; 266 267 arg = (void *)get_cpu_var(hv_gpci_reqb); 268 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 269 270 /* 271 * Pass the counter request value 0x90 corresponds to request 272 * type 'Processor_config', to retrieve 273 * the system processor information. 274 * starting_index value implies the starting hardware 275 * processor index. 276 */ 277 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], 278 0, 0, buf, &n, arg); 279 280 if (!ret) 281 return n; 282 283 if (ret != H_PARAMETER) 284 goto out; 285 286 /* 287 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 288 * implies that buffer can't accommodate all information, and a partial buffer 289 * returned. To handle that, we need to take subsequent requests 290 * with next starting index to retrieve additional (missing) data. 291 * Below loop do subsequent hcalls with next starting index and add it 292 * to buffer util we get all the information. 293 */ 294 while (ret == H_PARAMETER) { 295 int returned_values = be16_to_cpu(arg->params.returned_values); 296 int elementsize = be16_to_cpu(arg->params.cv_element_size); 297 int last_element = (returned_values - 1) * elementsize; 298 299 /* 300 * Since the starting index is part of counter_value 301 * buffer elements, use the starting index value in the last 302 * element and add 1 to subsequent hcalls. 303 */ 304 u32 starting_index = arg->bytes[last_element + 3] + 305 (arg->bytes[last_element + 2] << 8) + 306 (arg->bytes[last_element + 1] << 16) + 307 (arg->bytes[last_element] << 24) + 1; 308 309 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 310 311 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], 312 starting_index, 0, buf, &n, arg); 313 314 if (!ret) 315 return n; 316 317 if (ret != H_PARAMETER) 318 goto out; 319 } 320 321 return n; 322 323 out: 324 put_cpu_var(hv_gpci_reqb); 325 return ret; 326 } 327 328 static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, 329 struct device_attribute *attr, char *buf) 330 { 331 struct hv_gpci_request_buffer *arg; 332 unsigned long ret; 333 size_t n = 0; 334 335 arg = (void *)get_cpu_var(hv_gpci_reqb); 336 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 337 338 /* 339 * Pass the counter request 0xA0 corresponds to request 340 * type 'Affinity_domain_information_by_virutal_processor', 341 * to retrieve the system affinity domain information. 342 * starting_index value refers to the starting hardware 343 * processor index. 344 */ 345 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], 346 0, 0, buf, &n, arg); 347 348 if (!ret) 349 return n; 350 351 if (ret != H_PARAMETER) 352 goto out; 353 354 /* 355 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 356 * implies that buffer can't accommodate all information, and a partial buffer 357 * returned. To handle that, we need to take subsequent requests 358 * with next secondary index to retrieve additional (missing) data. 359 * Below loop do subsequent hcalls with next secondary index and add it 360 * to buffer util we get all the information. 361 */ 362 while (ret == H_PARAMETER) { 363 int returned_values = be16_to_cpu(arg->params.returned_values); 364 int elementsize = be16_to_cpu(arg->params.cv_element_size); 365 int last_element = (returned_values - 1) * elementsize; 366 367 /* 368 * Since the starting index and secondary index type is part of the 369 * counter_value buffer elements, use the starting index value in the 370 * last array element as subsequent starting index, and use secondary index 371 * value in the last array element plus 1 as subsequent secondary index. 372 * For counter request '0xA0', starting index points to partition id 373 * and secondary index points to corresponding virtual processor index. 374 */ 375 u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); 376 u16 secondary_index = arg->bytes[last_element + 3] + 377 (arg->bytes[last_element + 2] << 8) + 1; 378 379 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 380 381 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], 382 starting_index, secondary_index, buf, &n, arg); 383 384 if (!ret) 385 return n; 386 387 if (ret != H_PARAMETER) 388 goto out; 389 } 390 391 return n; 392 393 out: 394 put_cpu_var(hv_gpci_reqb); 395 return ret; 396 } 397 398 static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, 399 char *buf) 400 { 401 struct hv_gpci_request_buffer *arg; 402 unsigned long ret; 403 size_t n = 0; 404 405 arg = (void *)get_cpu_var(hv_gpci_reqb); 406 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 407 408 /* 409 * Pass the counter request 0xB0 corresponds to request 410 * type 'Affinity_domain_information_by_domain', 411 * to retrieve the system affinity domain information. 412 * starting_index value refers to the starting hardware 413 * processor index. 414 */ 415 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], 416 0, 0, buf, &n, arg); 417 418 if (!ret) 419 return n; 420 421 if (ret != H_PARAMETER) 422 goto out; 423 424 /* 425 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 426 * implies that buffer can't accommodate all information, and a partial buffer 427 * returned. To handle that, we need to take subsequent requests 428 * with next starting index to retrieve additional (missing) data. 429 * Below loop do subsequent hcalls with next starting index and add it 430 * to buffer util we get all the information. 431 */ 432 while (ret == H_PARAMETER) { 433 int returned_values = be16_to_cpu(arg->params.returned_values); 434 int elementsize = be16_to_cpu(arg->params.cv_element_size); 435 int last_element = (returned_values - 1) * elementsize; 436 437 /* 438 * Since the starting index value is part of counter_value 439 * buffer elements, use the starting index value in the last 440 * element and add 1 to make subsequent hcalls. 441 */ 442 u32 starting_index = arg->bytes[last_element + 1] + 443 (arg->bytes[last_element] << 8) + 1; 444 445 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 446 447 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], 448 starting_index, 0, buf, &n, arg); 449 450 if (!ret) 451 return n; 452 453 if (ret != H_PARAMETER) 454 goto out; 455 } 456 457 return n; 458 459 out: 460 put_cpu_var(hv_gpci_reqb); 461 return ret; 462 } 463 464 static void affinity_domain_via_partition_result_parse(int returned_values, 465 int element_size, char *buf, size_t *last_element, 466 size_t *n, struct hv_gpci_request_buffer *arg) 467 { 468 size_t i = 0, j = 0; 469 size_t k, l, m; 470 uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; 471 472 /* 473 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' 474 * to show the total number of counter_value array elements 475 * returned via hcall. 476 * Unlike other request types, the data structure returned by this 477 * request is variable-size. For this counter request type, 478 * hcall populates 'cv_element_size' corresponds to minimum size of 479 * the structure returned i.e; the size of the structure with no domain 480 * information. Below loop go through all counter_value array 481 * to determine the number and size of each domain array element and 482 * add it to the output buffer. 483 */ 484 while (i < returned_values) { 485 k = j; 486 for (; k < j + element_size; k++) 487 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); 488 *n += sprintf(buf + *n, "\n"); 489 490 total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; 491 size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; 492 493 for (l = 0; l < total_affinity_domain_ele; l++) { 494 for (m = 0; m < size_of_each_affinity_domain_ele; m++) { 495 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); 496 k++; 497 } 498 *n += sprintf(buf + *n, "\n"); 499 } 500 501 *n += sprintf(buf + *n, "\n"); 502 i++; 503 j = k; 504 } 505 506 *last_element = k; 507 } 508 509 static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, 510 char *buf) 511 { 512 struct hv_gpci_request_buffer *arg; 513 unsigned long ret; 514 size_t n = 0; 515 size_t last_element = 0; 516 u32 starting_index; 517 518 arg = (void *)get_cpu_var(hv_gpci_reqb); 519 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 520 521 /* 522 * Pass the counter request value 0xB1 corresponds to counter request 523 * type 'Affinity_domain_information_by_partition', 524 * to retrieve the system affinity domain by partition information. 525 * starting_index value refers to the starting hardware 526 * processor index. 527 */ 528 arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); 529 arg->params.starting_index = cpu_to_be32(0); 530 531 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 532 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 533 534 if (!ret) 535 goto parse_result; 536 537 /* 538 * ret value as 'H_PARAMETER' implies that the current buffer size 539 * can't accommodate all the information, and a partial buffer 540 * returned. To handle that, we need to make subsequent requests 541 * with next starting index to retrieve additional (missing) data. 542 * Below loop do subsequent hcalls with next starting index and add it 543 * to buffer util we get all the information. 544 */ 545 while (ret == H_PARAMETER) { 546 affinity_domain_via_partition_result_parse( 547 be16_to_cpu(arg->params.returned_values) - 1, 548 be16_to_cpu(arg->params.cv_element_size), buf, 549 &last_element, &n, arg); 550 551 if (n >= PAGE_SIZE) { 552 put_cpu_var(hv_gpci_reqb); 553 pr_debug("System information exceeds PAGE_SIZE\n"); 554 return -EFBIG; 555 } 556 557 /* 558 * Since the starting index value is part of counter_value 559 * buffer elements, use the starting_index value in the last 560 * element and add 1 to make subsequent hcalls. 561 */ 562 starting_index = (u8)arg->bytes[last_element] << 8 | 563 (u8)arg->bytes[last_element + 1]; 564 565 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 566 arg->params.counter_request = cpu_to_be32( 567 sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); 568 arg->params.starting_index = cpu_to_be32(starting_index); 569 570 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 571 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 572 573 if (ret && (ret != H_PARAMETER)) 574 goto out; 575 } 576 577 parse_result: 578 affinity_domain_via_partition_result_parse( 579 be16_to_cpu(arg->params.returned_values), 580 be16_to_cpu(arg->params.cv_element_size), 581 buf, &last_element, &n, arg); 582 583 put_cpu_var(hv_gpci_reqb); 584 return n; 585 586 out: 587 put_cpu_var(hv_gpci_reqb); 588 589 /* 590 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', 591 * which means that the current buffer size cannot accommodate 592 * all the information and a partial buffer returned. 593 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. 594 * 595 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 596 * performance information, and required to set 597 * "Enable Performance Information Collection" option. 598 */ 599 if (ret == H_AUTHORITY) 600 return -EPERM; 601 602 /* 603 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE 604 * because of invalid buffer-length/address or due to some hardware 605 * error. 606 */ 607 return -EIO; 608 } 609 610 static DEVICE_ATTR_RO(kernel_version); 611 static DEVICE_ATTR_RO(cpumask); 612 613 HV_CAPS_ATTR(version, "0x%x\n"); 614 HV_CAPS_ATTR(ga, "%d\n"); 615 HV_CAPS_ATTR(expanded, "%d\n"); 616 HV_CAPS_ATTR(lab, "%d\n"); 617 HV_CAPS_ATTR(collect_privileged, "%d\n"); 618 619 static struct attribute *interface_attrs[] = { 620 &dev_attr_kernel_version.attr, 621 &hv_caps_attr_version.attr, 622 &hv_caps_attr_ga.attr, 623 &hv_caps_attr_expanded.attr, 624 &hv_caps_attr_lab.attr, 625 &hv_caps_attr_collect_privileged.attr, 626 /* 627 * This NULL is a placeholder for the processor_bus_topology 628 * attribute, set in init function if applicable. 629 */ 630 NULL, 631 /* 632 * This NULL is a placeholder for the processor_config 633 * attribute, set in init function if applicable. 634 */ 635 NULL, 636 /* 637 * This NULL is a placeholder for the affinity_domain_via_virtual_processor 638 * attribute, set in init function if applicable. 639 */ 640 NULL, 641 /* 642 * This NULL is a placeholder for the affinity_domain_via_domain 643 * attribute, set in init function if applicable. 644 */ 645 NULL, 646 /* 647 * This NULL is a placeholder for the affinity_domain_via_partition 648 * attribute, set in init function if applicable. 649 */ 650 NULL, 651 NULL, 652 }; 653 654 static struct attribute *cpumask_attrs[] = { 655 &dev_attr_cpumask.attr, 656 NULL, 657 }; 658 659 static const struct attribute_group cpumask_attr_group = { 660 .attrs = cpumask_attrs, 661 }; 662 663 static const struct attribute_group interface_group = { 664 .name = "interface", 665 .attrs = interface_attrs, 666 }; 667 668 static const struct attribute_group *attr_groups[] = { 669 &format_group, 670 &event_group, 671 &interface_group, 672 &cpumask_attr_group, 673 NULL, 674 }; 675 676 static unsigned long single_gpci_request(u32 req, u32 starting_index, 677 u16 secondary_index, u8 version_in, u32 offset, u8 length, 678 u64 *value) 679 { 680 unsigned long ret; 681 size_t i; 682 u64 count; 683 struct hv_gpci_request_buffer *arg; 684 685 arg = (void *)get_cpu_var(hv_gpci_reqb); 686 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 687 688 arg->params.counter_request = cpu_to_be32(req); 689 arg->params.starting_index = cpu_to_be32(starting_index); 690 arg->params.secondary_index = cpu_to_be16(secondary_index); 691 arg->params.counter_info_version_in = version_in; 692 693 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 694 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 695 if (ret) { 696 pr_devel("hcall failed: 0x%lx\n", ret); 697 goto out; 698 } 699 700 /* 701 * we verify offset and length are within the zeroed buffer at event 702 * init. 703 */ 704 count = 0; 705 for (i = offset; i < offset + length; i++) 706 count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); 707 708 *value = count; 709 out: 710 put_cpu_var(hv_gpci_reqb); 711 return ret; 712 } 713 714 static u64 h_gpci_get_value(struct perf_event *event) 715 { 716 u64 count; 717 unsigned long ret = single_gpci_request(event_get_request(event), 718 event_get_starting_index(event), 719 event_get_secondary_index(event), 720 event_get_counter_info_version(event), 721 event_get_offset(event), 722 event_get_length(event), 723 &count); 724 if (ret) 725 return 0; 726 return count; 727 } 728 729 static void h_gpci_event_update(struct perf_event *event) 730 { 731 s64 prev; 732 u64 now = h_gpci_get_value(event); 733 prev = local64_xchg(&event->hw.prev_count, now); 734 local64_add(now - prev, &event->count); 735 } 736 737 static void h_gpci_event_start(struct perf_event *event, int flags) 738 { 739 local64_set(&event->hw.prev_count, h_gpci_get_value(event)); 740 } 741 742 static void h_gpci_event_stop(struct perf_event *event, int flags) 743 { 744 h_gpci_event_update(event); 745 } 746 747 static int h_gpci_event_add(struct perf_event *event, int flags) 748 { 749 if (flags & PERF_EF_START) 750 h_gpci_event_start(event, flags); 751 752 return 0; 753 } 754 755 static int h_gpci_event_init(struct perf_event *event) 756 { 757 u64 count; 758 u8 length; 759 760 /* Not our event */ 761 if (event->attr.type != event->pmu->type) 762 return -ENOENT; 763 764 /* config2 is unused */ 765 if (event->attr.config2) { 766 pr_devel("config2 set when reserved\n"); 767 return -EINVAL; 768 } 769 770 /* no branch sampling */ 771 if (has_branch_stack(event)) 772 return -EOPNOTSUPP; 773 774 length = event_get_length(event); 775 if (length < 1 || length > 8) { 776 pr_devel("length invalid\n"); 777 return -EINVAL; 778 } 779 780 /* last byte within the buffer? */ 781 if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { 782 pr_devel("request outside of buffer: %zu > %zu\n", 783 (size_t)event_get_offset(event) + length, 784 HGPCI_MAX_DATA_BYTES); 785 return -EINVAL; 786 } 787 788 /* check if the request works... */ 789 if (single_gpci_request(event_get_request(event), 790 event_get_starting_index(event), 791 event_get_secondary_index(event), 792 event_get_counter_info_version(event), 793 event_get_offset(event), 794 length, 795 &count)) { 796 pr_devel("gpci hcall failed\n"); 797 return -EINVAL; 798 } 799 800 return 0; 801 } 802 803 static struct pmu h_gpci_pmu = { 804 .task_ctx_nr = perf_invalid_context, 805 806 .name = "hv_gpci", 807 .attr_groups = attr_groups, 808 .event_init = h_gpci_event_init, 809 .add = h_gpci_event_add, 810 .del = h_gpci_event_stop, 811 .start = h_gpci_event_start, 812 .stop = h_gpci_event_stop, 813 .read = h_gpci_event_update, 814 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 815 }; 816 817 static int ppc_hv_gpci_cpu_online(unsigned int cpu) 818 { 819 if (cpumask_empty(&hv_gpci_cpumask)) 820 cpumask_set_cpu(cpu, &hv_gpci_cpumask); 821 822 return 0; 823 } 824 825 static int ppc_hv_gpci_cpu_offline(unsigned int cpu) 826 { 827 int target; 828 829 /* Check if exiting cpu is used for collecting gpci events */ 830 if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) 831 return 0; 832 833 /* Find a new cpu to collect gpci events */ 834 target = cpumask_last(cpu_active_mask); 835 836 if (target < 0 || target >= nr_cpu_ids) { 837 pr_err("hv_gpci: CPU hotplug init failed\n"); 838 return -1; 839 } 840 841 /* Migrate gpci events to the new target */ 842 cpumask_set_cpu(target, &hv_gpci_cpumask); 843 perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); 844 845 return 0; 846 } 847 848 static int hv_gpci_cpu_hotplug_init(void) 849 { 850 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, 851 "perf/powerpc/hv_gcpi:online", 852 ppc_hv_gpci_cpu_online, 853 ppc_hv_gpci_cpu_offline); 854 } 855 856 static struct device_attribute *sysinfo_device_attr_create(int 857 sysinfo_interface_group_index, u32 req) 858 { 859 struct device_attribute *attr = NULL; 860 unsigned long ret; 861 struct hv_gpci_request_buffer *arg; 862 863 if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || 864 sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { 865 pr_info("Wrong interface group index for system information\n"); 866 return NULL; 867 } 868 869 /* Check for given counter request value support */ 870 arg = (void *)get_cpu_var(hv_gpci_reqb); 871 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 872 873 arg->params.counter_request = cpu_to_be32(req); 874 875 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 876 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 877 878 put_cpu_var(hv_gpci_reqb); 879 880 /* 881 * Add given counter request value attribute in the interface_attrs 882 * attribute array, only for valid return types. 883 */ 884 if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { 885 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 886 if (!attr) 887 return NULL; 888 889 sysfs_attr_init(&attr->attr); 890 attr->attr.mode = 0444; 891 892 switch (sysinfo_interface_group_index) { 893 case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: 894 attr->attr.name = "processor_bus_topology"; 895 attr->show = processor_bus_topology_show; 896 break; 897 case INTERFACE_PROCESSOR_CONFIG_ATTR: 898 attr->attr.name = "processor_config"; 899 attr->show = processor_config_show; 900 break; 901 case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: 902 attr->attr.name = "affinity_domain_via_virtual_processor"; 903 attr->show = affinity_domain_via_virtual_processor_show; 904 break; 905 case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: 906 attr->attr.name = "affinity_domain_via_domain"; 907 attr->show = affinity_domain_via_domain_show; 908 break; 909 case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: 910 attr->attr.name = "affinity_domain_via_partition"; 911 attr->show = affinity_domain_via_partition_show; 912 break; 913 } 914 } else 915 pr_devel("hcall failed, with error: 0x%lx\n", ret); 916 917 return attr; 918 } 919 920 static void add_sysinfo_interface_files(void) 921 { 922 int sysfs_count; 923 struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; 924 int i; 925 926 sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; 927 928 /* Get device attribute for a given counter request value */ 929 for (i = 0; i < sysfs_count; i++) { 930 attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, 931 sysinfo_counter_request[i]); 932 933 if (!attr[i]) 934 goto out; 935 } 936 937 /* Add sysinfo interface attributes in the interface_attrs attribute array */ 938 for (i = 0; i < sysfs_count; i++) 939 interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; 940 941 return; 942 943 out: 944 /* 945 * The sysinfo interface attributes will be added, only if hcall passed for 946 * all the counter request values. Free the device attribute array incase 947 * of any hcall failure. 948 */ 949 if (i > 0) { 950 while (i >= 0) { 951 kfree(attr[i]); 952 i--; 953 } 954 } 955 } 956 957 static int hv_gpci_init(void) 958 { 959 int r; 960 unsigned long hret; 961 struct hv_perf_caps caps; 962 struct hv_gpci_request_buffer *arg; 963 964 hv_gpci_assert_offsets_correct(); 965 966 if (!firmware_has_feature(FW_FEATURE_LPAR)) { 967 pr_debug("not a virtualized system, not enabling\n"); 968 return -ENODEV; 969 } 970 971 hret = hv_perf_caps_get(&caps); 972 if (hret) { 973 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", 974 hret); 975 return -ENODEV; 976 } 977 978 /* init cpuhotplug */ 979 r = hv_gpci_cpu_hotplug_init(); 980 if (r) 981 return r; 982 983 /* sampling not supported */ 984 h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 985 986 arg = (void *)get_cpu_var(hv_gpci_reqb); 987 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 988 989 /* 990 * hcall H_GET_PERF_COUNTER_INFO populates the output 991 * counter_info_version value based on the system hypervisor. 992 * Pass the counter request 0x10 corresponds to request type 993 * 'Dispatch_timebase_by_processor', to get the supported 994 * counter_info_version. 995 */ 996 arg->params.counter_request = cpu_to_be32(0x10); 997 998 r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 999 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 1000 if (r) { 1001 pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); 1002 arg->params.counter_info_version_out = 0x8; 1003 } 1004 1005 /* 1006 * Use counter_info_version_out value to assign 1007 * required hv-gpci event list. 1008 */ 1009 if (arg->params.counter_info_version_out >= 0x8) 1010 event_group.attrs = hv_gpci_event_attrs; 1011 else 1012 event_group.attrs = hv_gpci_event_attrs_v6; 1013 1014 put_cpu_var(hv_gpci_reqb); 1015 1016 r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); 1017 if (r) 1018 return r; 1019 1020 /* sysinfo interface files are only available for power10 and above platforms */ 1021 if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) 1022 add_sysinfo_interface_files(); 1023 1024 return 0; 1025 } 1026 1027 device_initcall(hv_gpci_init); 1028