1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Hypervisor supplied "gpci" ("get performance counter info") performance 4 * counter support 5 * 6 * Author: Cody P Schafer <cody@linux.vnet.ibm.com> 7 * Copyright 2014 IBM Corporation. 8 */ 9 10 #define pr_fmt(fmt) "hv-gpci: " fmt 11 12 #include <linux/init.h> 13 #include <linux/perf_event.h> 14 #include <asm/firmware.h> 15 #include <asm/hvcall.h> 16 #include <asm/io.h> 17 18 #include "hv-gpci.h" 19 #include "hv-common.h" 20 21 /* 22 * Example usage: 23 * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, 24 * secondary_index=0,starting_index=0xffffffff,request=0x10/' ... 25 */ 26 27 /* u32 */ 28 EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); 29 /* u32 */ 30 /* 31 * Note that starting_index, phys_processor_idx, sibling_part_id, 32 * hw_chip_id, partition_id all refer to the same bit range. They 33 * are basically aliases for the starting_index. The specific alias 34 * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h 35 */ 36 EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); 37 EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); 38 EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); 39 EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); 40 EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); 41 42 /* u16 */ 43 EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); 44 /* u8 */ 45 EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); 46 /* u8, bytes of data (1-8) */ 47 EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); 48 /* u32, byte offset */ 49 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); 50 51 static cpumask_t hv_gpci_cpumask; 52 53 static struct attribute *format_attrs[] = { 54 &format_attr_request.attr, 55 &format_attr_starting_index.attr, 56 &format_attr_phys_processor_idx.attr, 57 &format_attr_sibling_part_id.attr, 58 &format_attr_hw_chip_id.attr, 59 &format_attr_partition_id.attr, 60 &format_attr_secondary_index.attr, 61 &format_attr_counter_info_version.attr, 62 63 &format_attr_offset.attr, 64 &format_attr_length.attr, 65 NULL, 66 }; 67 68 static const struct attribute_group format_group = { 69 .name = "format", 70 .attrs = format_attrs, 71 }; 72 73 static struct attribute_group event_group = { 74 .name = "events", 75 /* .attrs is set in init */ 76 }; 77 78 #define HV_CAPS_ATTR(_name, _format) \ 79 static ssize_t _name##_show(struct device *dev, \ 80 struct device_attribute *attr, \ 81 char *page) \ 82 { \ 83 struct hv_perf_caps caps; \ 84 unsigned long hret = hv_perf_caps_get(&caps); \ 85 if (hret) \ 86 return -EIO; \ 87 \ 88 return sprintf(page, _format, caps._name); \ 89 } \ 90 static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) 91 92 static ssize_t kernel_version_show(struct device *dev, 93 struct device_attribute *attr, 94 char *page) 95 { 96 return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); 97 } 98 99 static ssize_t cpumask_show(struct device *dev, 100 struct device_attribute *attr, char *buf) 101 { 102 return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); 103 } 104 105 /* Interface attribute array index to store system information */ 106 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 107 #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 108 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 109 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 110 #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 111 #define INTERFACE_NULL_ATTR 11 112 113 /* Counter request value to retrieve system information */ 114 enum { 115 PROCESSOR_BUS_TOPOLOGY, 116 PROCESSOR_CONFIG, 117 AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ 118 AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ 119 AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ 120 }; 121 122 static int sysinfo_counter_request[] = { 123 [PROCESSOR_BUS_TOPOLOGY] = 0xD0, 124 [PROCESSOR_CONFIG] = 0x90, 125 [AFFINITY_DOMAIN_VIA_VP] = 0xA0, 126 [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, 127 [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, 128 }; 129 130 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); 131 132 static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, 133 u16 secondary_index, char *buf, 134 size_t *n, struct hv_gpci_request_buffer *arg) 135 { 136 unsigned long ret; 137 size_t i, j; 138 139 arg->params.counter_request = cpu_to_be32(req); 140 arg->params.starting_index = cpu_to_be32(starting_index); 141 arg->params.secondary_index = cpu_to_be16(secondary_index); 142 143 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 144 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 145 146 /* 147 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', 148 * which means that the current buffer size cannot accommodate 149 * all the information and a partial buffer returned. 150 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. 151 * 152 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 153 * performance information, and required to set 154 * "Enable Performance Information Collection" option. 155 */ 156 if (ret == H_AUTHORITY) 157 return -EPERM; 158 159 /* 160 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE 161 * because of invalid buffer-length/address or due to some hardware 162 * error. 163 */ 164 if (ret && (ret != H_PARAMETER)) 165 return -EIO; 166 167 /* 168 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' 169 * to show the total number of counter_value array elements 170 * returned via hcall. 171 * hcall also populates 'cv_element_size' corresponds to individual 172 * counter_value array element size. Below loop go through all 173 * counter_value array elements as per their size and add it to 174 * the output buffer. 175 */ 176 for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { 177 j = i * be16_to_cpu(arg->params.cv_element_size); 178 179 for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) 180 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]); 181 *n += sprintf(buf + *n, "\n"); 182 } 183 184 if (*n >= PAGE_SIZE) { 185 pr_info("System information exceeds PAGE_SIZE\n"); 186 return -EFBIG; 187 } 188 189 return ret; 190 } 191 192 static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, 193 char *buf) 194 { 195 struct hv_gpci_request_buffer *arg; 196 unsigned long ret; 197 size_t n = 0; 198 199 arg = (void *)get_cpu_var(hv_gpci_reqb); 200 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 201 202 /* 203 * Pass the counter request value 0xD0 corresponds to request 204 * type 'Processor_bus_topology', to retrieve 205 * the system topology information. 206 * starting_index value implies the starting hardware 207 * chip id. 208 */ 209 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], 210 0, 0, buf, &n, arg); 211 212 if (!ret) 213 return n; 214 215 if (ret != H_PARAMETER) 216 goto out; 217 218 /* 219 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 220 * implies that buffer can't accommodate all information, and a partial buffer 221 * returned. To handle that, we need to make subsequent requests 222 * with next starting index to retrieve additional (missing) data. 223 * Below loop do subsequent hcalls with next starting index and add it 224 * to buffer util we get all the information. 225 */ 226 while (ret == H_PARAMETER) { 227 int returned_values = be16_to_cpu(arg->params.returned_values); 228 int elementsize = be16_to_cpu(arg->params.cv_element_size); 229 int last_element = (returned_values - 1) * elementsize; 230 231 /* 232 * Since the starting index value is part of counter_value 233 * buffer elements, use the starting index value in the last 234 * element and add 1 to make subsequent hcalls. 235 */ 236 u32 starting_index = arg->bytes[last_element + 3] + 237 (arg->bytes[last_element + 2] << 8) + 238 (arg->bytes[last_element + 1] << 16) + 239 (arg->bytes[last_element] << 24) + 1; 240 241 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 242 243 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], 244 starting_index, 0, buf, &n, arg); 245 246 if (!ret) 247 return n; 248 249 if (ret != H_PARAMETER) 250 goto out; 251 } 252 253 return n; 254 255 out: 256 put_cpu_var(hv_gpci_reqb); 257 return ret; 258 } 259 260 static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, 261 char *buf) 262 { 263 struct hv_gpci_request_buffer *arg; 264 unsigned long ret; 265 size_t n = 0; 266 267 arg = (void *)get_cpu_var(hv_gpci_reqb); 268 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 269 270 /* 271 * Pass the counter request value 0x90 corresponds to request 272 * type 'Processor_config', to retrieve 273 * the system processor information. 274 * starting_index value implies the starting hardware 275 * processor index. 276 */ 277 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], 278 0, 0, buf, &n, arg); 279 280 if (!ret) 281 return n; 282 283 if (ret != H_PARAMETER) 284 goto out; 285 286 /* 287 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 288 * implies that buffer can't accommodate all information, and a partial buffer 289 * returned. To handle that, we need to take subsequent requests 290 * with next starting index to retrieve additional (missing) data. 291 * Below loop do subsequent hcalls with next starting index and add it 292 * to buffer util we get all the information. 293 */ 294 while (ret == H_PARAMETER) { 295 int returned_values = be16_to_cpu(arg->params.returned_values); 296 int elementsize = be16_to_cpu(arg->params.cv_element_size); 297 int last_element = (returned_values - 1) * elementsize; 298 299 /* 300 * Since the starting index is part of counter_value 301 * buffer elements, use the starting index value in the last 302 * element and add 1 to subsequent hcalls. 303 */ 304 u32 starting_index = arg->bytes[last_element + 3] + 305 (arg->bytes[last_element + 2] << 8) + 306 (arg->bytes[last_element + 1] << 16) + 307 (arg->bytes[last_element] << 24) + 1; 308 309 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 310 311 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], 312 starting_index, 0, buf, &n, arg); 313 314 if (!ret) 315 return n; 316 317 if (ret != H_PARAMETER) 318 goto out; 319 } 320 321 return n; 322 323 out: 324 put_cpu_var(hv_gpci_reqb); 325 return ret; 326 } 327 328 static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, 329 struct device_attribute *attr, char *buf) 330 { 331 struct hv_gpci_request_buffer *arg; 332 unsigned long ret; 333 size_t n = 0; 334 335 arg = (void *)get_cpu_var(hv_gpci_reqb); 336 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 337 338 /* 339 * Pass the counter request 0xA0 corresponds to request 340 * type 'Affinity_domain_information_by_virutal_processor', 341 * to retrieve the system affinity domain information. 342 * starting_index value refers to the starting hardware 343 * processor index. 344 */ 345 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], 346 0, 0, buf, &n, arg); 347 348 if (!ret) 349 return n; 350 351 if (ret != H_PARAMETER) 352 goto out; 353 354 /* 355 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 356 * implies that buffer can't accommodate all information, and a partial buffer 357 * returned. To handle that, we need to take subsequent requests 358 * with next secondary index to retrieve additional (missing) data. 359 * Below loop do subsequent hcalls with next secondary index and add it 360 * to buffer util we get all the information. 361 */ 362 while (ret == H_PARAMETER) { 363 int returned_values = be16_to_cpu(arg->params.returned_values); 364 int elementsize = be16_to_cpu(arg->params.cv_element_size); 365 int last_element = (returned_values - 1) * elementsize; 366 367 /* 368 * Since the starting index and secondary index type is part of the 369 * counter_value buffer elements, use the starting index value in the 370 * last array element as subsequent starting index, and use secondary index 371 * value in the last array element plus 1 as subsequent secondary index. 372 * For counter request '0xA0', starting index points to partition id 373 * and secondary index points to corresponding virtual processor index. 374 */ 375 u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); 376 u16 secondary_index = arg->bytes[last_element + 3] + 377 (arg->bytes[last_element + 2] << 8) + 1; 378 379 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 380 381 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], 382 starting_index, secondary_index, buf, &n, arg); 383 384 if (!ret) 385 return n; 386 387 if (ret != H_PARAMETER) 388 goto out; 389 } 390 391 return n; 392 393 out: 394 put_cpu_var(hv_gpci_reqb); 395 return ret; 396 } 397 398 static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, 399 char *buf) 400 { 401 struct hv_gpci_request_buffer *arg; 402 unsigned long ret; 403 size_t n = 0; 404 405 arg = (void *)get_cpu_var(hv_gpci_reqb); 406 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 407 408 /* 409 * Pass the counter request 0xB0 corresponds to request 410 * type 'Affinity_domain_information_by_domain', 411 * to retrieve the system affinity domain information. 412 * starting_index value refers to the starting hardware 413 * processor index. 414 */ 415 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], 416 0, 0, buf, &n, arg); 417 418 if (!ret) 419 return n; 420 421 if (ret != H_PARAMETER) 422 goto out; 423 424 /* 425 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 426 * implies that buffer can't accommodate all information, and a partial buffer 427 * returned. To handle that, we need to take subsequent requests 428 * with next starting index to retrieve additional (missing) data. 429 * Below loop do subsequent hcalls with next starting index and add it 430 * to buffer util we get all the information. 431 */ 432 while (ret == H_PARAMETER) { 433 int returned_values = be16_to_cpu(arg->params.returned_values); 434 int elementsize = be16_to_cpu(arg->params.cv_element_size); 435 int last_element = (returned_values - 1) * elementsize; 436 437 /* 438 * Since the starting index value is part of counter_value 439 * buffer elements, use the starting index value in the last 440 * element and add 1 to make subsequent hcalls. 441 */ 442 u32 starting_index = arg->bytes[last_element + 1] + 443 (arg->bytes[last_element] << 8) + 1; 444 445 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 446 447 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], 448 starting_index, 0, buf, &n, arg); 449 450 if (!ret) 451 return n; 452 453 if (ret != H_PARAMETER) 454 goto out; 455 } 456 457 return n; 458 459 out: 460 put_cpu_var(hv_gpci_reqb); 461 return ret; 462 } 463 464 static void affinity_domain_via_partition_result_parse(int returned_values, 465 int element_size, char *buf, size_t *last_element, 466 size_t *n, struct hv_gpci_request_buffer *arg) 467 { 468 size_t i = 0, j = 0; 469 size_t k, l, m; 470 uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; 471 472 /* 473 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' 474 * to show the total number of counter_value array elements 475 * returned via hcall. 476 * Unlike other request types, the data structure returned by this 477 * request is variable-size. For this counter request type, 478 * hcall populates 'cv_element_size' corresponds to minimum size of 479 * the structure returned i.e; the size of the structure with no domain 480 * information. Below loop go through all counter_value array 481 * to determine the number and size of each domain array element and 482 * add it to the output buffer. 483 */ 484 while (i < returned_values) { 485 k = j; 486 for (; k < j + element_size; k++) 487 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); 488 *n += sprintf(buf + *n, "\n"); 489 490 total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; 491 size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; 492 493 for (l = 0; l < total_affinity_domain_ele; l++) { 494 for (m = 0; m < size_of_each_affinity_domain_ele; m++) { 495 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); 496 k++; 497 } 498 *n += sprintf(buf + *n, "\n"); 499 } 500 501 *n += sprintf(buf + *n, "\n"); 502 i++; 503 j = k; 504 } 505 506 *last_element = k; 507 } 508 509 static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, 510 char *buf) 511 { 512 struct hv_gpci_request_buffer *arg; 513 unsigned long ret; 514 size_t n = 0; 515 size_t last_element = 0; 516 u32 starting_index; 517 518 arg = (void *)get_cpu_var(hv_gpci_reqb); 519 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 520 521 /* 522 * Pass the counter request value 0xB1 corresponds to counter request 523 * type 'Affinity_domain_information_by_partition', 524 * to retrieve the system affinity domain by partition information. 525 * starting_index value refers to the starting hardware 526 * processor index. 527 */ 528 arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); 529 arg->params.starting_index = cpu_to_be32(0); 530 531 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 532 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 533 534 if (!ret) 535 goto parse_result; 536 537 if (ret && (ret != H_PARAMETER)) 538 goto out; 539 540 /* 541 * ret value as 'H_PARAMETER' implies that the current buffer size 542 * can't accommodate all the information, and a partial buffer 543 * returned. To handle that, we need to make subsequent requests 544 * with next starting index to retrieve additional (missing) data. 545 * Below loop do subsequent hcalls with next starting index and add it 546 * to buffer util we get all the information. 547 */ 548 while (ret == H_PARAMETER) { 549 affinity_domain_via_partition_result_parse( 550 be16_to_cpu(arg->params.returned_values) - 1, 551 be16_to_cpu(arg->params.cv_element_size), buf, 552 &last_element, &n, arg); 553 554 if (n >= PAGE_SIZE) { 555 put_cpu_var(hv_gpci_reqb); 556 pr_debug("System information exceeds PAGE_SIZE\n"); 557 return -EFBIG; 558 } 559 560 /* 561 * Since the starting index value is part of counter_value 562 * buffer elements, use the starting_index value in the last 563 * element and add 1 to make subsequent hcalls. 564 */ 565 starting_index = (u8)arg->bytes[last_element] << 8 | 566 (u8)arg->bytes[last_element + 1]; 567 568 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 569 arg->params.counter_request = cpu_to_be32( 570 sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); 571 arg->params.starting_index = cpu_to_be32(starting_index); 572 573 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 574 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 575 576 if (ret && (ret != H_PARAMETER)) 577 goto out; 578 } 579 580 parse_result: 581 affinity_domain_via_partition_result_parse( 582 be16_to_cpu(arg->params.returned_values), 583 be16_to_cpu(arg->params.cv_element_size), 584 buf, &last_element, &n, arg); 585 586 put_cpu_var(hv_gpci_reqb); 587 return n; 588 589 out: 590 put_cpu_var(hv_gpci_reqb); 591 592 /* 593 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', 594 * which means that the current buffer size cannot accommodate 595 * all the information and a partial buffer returned. 596 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. 597 * 598 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 599 * performance information, and required to set 600 * "Enable Performance Information Collection" option. 601 */ 602 if (ret == H_AUTHORITY) 603 return -EPERM; 604 605 /* 606 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE 607 * because of invalid buffer-length/address or due to some hardware 608 * error. 609 */ 610 return -EIO; 611 } 612 613 static DEVICE_ATTR_RO(kernel_version); 614 static DEVICE_ATTR_RO(cpumask); 615 616 HV_CAPS_ATTR(version, "0x%x\n"); 617 HV_CAPS_ATTR(ga, "%d\n"); 618 HV_CAPS_ATTR(expanded, "%d\n"); 619 HV_CAPS_ATTR(lab, "%d\n"); 620 HV_CAPS_ATTR(collect_privileged, "%d\n"); 621 622 static struct attribute *interface_attrs[] = { 623 &dev_attr_kernel_version.attr, 624 &hv_caps_attr_version.attr, 625 &hv_caps_attr_ga.attr, 626 &hv_caps_attr_expanded.attr, 627 &hv_caps_attr_lab.attr, 628 &hv_caps_attr_collect_privileged.attr, 629 /* 630 * This NULL is a placeholder for the processor_bus_topology 631 * attribute, set in init function if applicable. 632 */ 633 NULL, 634 /* 635 * This NULL is a placeholder for the processor_config 636 * attribute, set in init function if applicable. 637 */ 638 NULL, 639 /* 640 * This NULL is a placeholder for the affinity_domain_via_virtual_processor 641 * attribute, set in init function if applicable. 642 */ 643 NULL, 644 /* 645 * This NULL is a placeholder for the affinity_domain_via_domain 646 * attribute, set in init function if applicable. 647 */ 648 NULL, 649 /* 650 * This NULL is a placeholder for the affinity_domain_via_partition 651 * attribute, set in init function if applicable. 652 */ 653 NULL, 654 NULL, 655 }; 656 657 static struct attribute *cpumask_attrs[] = { 658 &dev_attr_cpumask.attr, 659 NULL, 660 }; 661 662 static const struct attribute_group cpumask_attr_group = { 663 .attrs = cpumask_attrs, 664 }; 665 666 static const struct attribute_group interface_group = { 667 .name = "interface", 668 .attrs = interface_attrs, 669 }; 670 671 static const struct attribute_group *attr_groups[] = { 672 &format_group, 673 &event_group, 674 &interface_group, 675 &cpumask_attr_group, 676 NULL, 677 }; 678 679 static unsigned long single_gpci_request(u32 req, u32 starting_index, 680 u16 secondary_index, u8 version_in, u32 offset, u8 length, 681 u64 *value) 682 { 683 unsigned long ret; 684 size_t i; 685 u64 count; 686 struct hv_gpci_request_buffer *arg; 687 688 arg = (void *)get_cpu_var(hv_gpci_reqb); 689 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 690 691 arg->params.counter_request = cpu_to_be32(req); 692 arg->params.starting_index = cpu_to_be32(starting_index); 693 arg->params.secondary_index = cpu_to_be16(secondary_index); 694 arg->params.counter_info_version_in = version_in; 695 696 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 697 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 698 699 /* 700 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', 701 * specifies that the current buffer size cannot accommodate 702 * all the information and a partial buffer returned. 703 * Since in this function we are only accessing data for a given starting index, 704 * we don't need to accommodate whole data and can get required count by 705 * accessing first entry data. 706 * Hence hcall fails only incase the ret value is other than H_SUCCESS or 707 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). 708 */ 709 if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) 710 ret = 0; 711 712 if (ret) { 713 pr_devel("hcall failed: 0x%lx\n", ret); 714 goto out; 715 } 716 717 /* 718 * we verify offset and length are within the zeroed buffer at event 719 * init. 720 */ 721 count = 0; 722 for (i = offset; i < offset + length; i++) 723 count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); 724 725 *value = count; 726 out: 727 put_cpu_var(hv_gpci_reqb); 728 return ret; 729 } 730 731 static u64 h_gpci_get_value(struct perf_event *event) 732 { 733 u64 count; 734 unsigned long ret = single_gpci_request(event_get_request(event), 735 event_get_starting_index(event), 736 event_get_secondary_index(event), 737 event_get_counter_info_version(event), 738 event_get_offset(event), 739 event_get_length(event), 740 &count); 741 if (ret) 742 return 0; 743 return count; 744 } 745 746 static void h_gpci_event_update(struct perf_event *event) 747 { 748 s64 prev; 749 u64 now = h_gpci_get_value(event); 750 prev = local64_xchg(&event->hw.prev_count, now); 751 local64_add(now - prev, &event->count); 752 } 753 754 static void h_gpci_event_start(struct perf_event *event, int flags) 755 { 756 local64_set(&event->hw.prev_count, h_gpci_get_value(event)); 757 } 758 759 static void h_gpci_event_stop(struct perf_event *event, int flags) 760 { 761 h_gpci_event_update(event); 762 } 763 764 static int h_gpci_event_add(struct perf_event *event, int flags) 765 { 766 if (flags & PERF_EF_START) 767 h_gpci_event_start(event, flags); 768 769 return 0; 770 } 771 772 static int h_gpci_event_init(struct perf_event *event) 773 { 774 u64 count; 775 u8 length; 776 unsigned long ret; 777 778 /* Not our event */ 779 if (event->attr.type != event->pmu->type) 780 return -ENOENT; 781 782 /* config2 is unused */ 783 if (event->attr.config2) { 784 pr_devel("config2 set when reserved\n"); 785 return -EINVAL; 786 } 787 788 /* no branch sampling */ 789 if (has_branch_stack(event)) 790 return -EOPNOTSUPP; 791 792 length = event_get_length(event); 793 if (length < 1 || length > 8) { 794 pr_devel("length invalid\n"); 795 return -EINVAL; 796 } 797 798 /* last byte within the buffer? */ 799 if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { 800 pr_devel("request outside of buffer: %zu > %zu\n", 801 (size_t)event_get_offset(event) + length, 802 HGPCI_MAX_DATA_BYTES); 803 return -EINVAL; 804 } 805 806 /* check if the request works... */ 807 ret = single_gpci_request(event_get_request(event), 808 event_get_starting_index(event), 809 event_get_secondary_index(event), 810 event_get_counter_info_version(event), 811 event_get_offset(event), 812 length, 813 &count); 814 815 /* 816 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 817 * performance information, and required to set 818 * "Enable Performance Information Collection" option. 819 */ 820 if (ret == H_AUTHORITY) 821 return -EPERM; 822 823 if (ret) { 824 pr_devel("gpci hcall failed\n"); 825 return -EINVAL; 826 } 827 828 return 0; 829 } 830 831 static struct pmu h_gpci_pmu = { 832 .task_ctx_nr = perf_invalid_context, 833 834 .name = "hv_gpci", 835 .attr_groups = attr_groups, 836 .event_init = h_gpci_event_init, 837 .add = h_gpci_event_add, 838 .del = h_gpci_event_stop, 839 .start = h_gpci_event_start, 840 .stop = h_gpci_event_stop, 841 .read = h_gpci_event_update, 842 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 843 }; 844 845 static int ppc_hv_gpci_cpu_online(unsigned int cpu) 846 { 847 if (cpumask_empty(&hv_gpci_cpumask)) 848 cpumask_set_cpu(cpu, &hv_gpci_cpumask); 849 850 return 0; 851 } 852 853 static int ppc_hv_gpci_cpu_offline(unsigned int cpu) 854 { 855 int target; 856 857 /* Check if exiting cpu is used for collecting gpci events */ 858 if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) 859 return 0; 860 861 /* Find a new cpu to collect gpci events */ 862 target = cpumask_last(cpu_active_mask); 863 864 if (target < 0 || target >= nr_cpu_ids) { 865 pr_err("hv_gpci: CPU hotplug init failed\n"); 866 return -1; 867 } 868 869 /* Migrate gpci events to the new target */ 870 cpumask_set_cpu(target, &hv_gpci_cpumask); 871 perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); 872 873 return 0; 874 } 875 876 static int hv_gpci_cpu_hotplug_init(void) 877 { 878 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, 879 "perf/powerpc/hv_gcpi:online", 880 ppc_hv_gpci_cpu_online, 881 ppc_hv_gpci_cpu_offline); 882 } 883 884 static struct device_attribute *sysinfo_device_attr_create(int 885 sysinfo_interface_group_index, u32 req) 886 { 887 struct device_attribute *attr = NULL; 888 unsigned long ret; 889 struct hv_gpci_request_buffer *arg; 890 891 if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || 892 sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { 893 pr_info("Wrong interface group index for system information\n"); 894 return NULL; 895 } 896 897 /* Check for given counter request value support */ 898 arg = (void *)get_cpu_var(hv_gpci_reqb); 899 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 900 901 arg->params.counter_request = cpu_to_be32(req); 902 903 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 904 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 905 906 put_cpu_var(hv_gpci_reqb); 907 908 /* 909 * Add given counter request value attribute in the interface_attrs 910 * attribute array, only for valid return types. 911 */ 912 if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { 913 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 914 if (!attr) 915 return NULL; 916 917 sysfs_attr_init(&attr->attr); 918 attr->attr.mode = 0444; 919 920 switch (sysinfo_interface_group_index) { 921 case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: 922 attr->attr.name = "processor_bus_topology"; 923 attr->show = processor_bus_topology_show; 924 break; 925 case INTERFACE_PROCESSOR_CONFIG_ATTR: 926 attr->attr.name = "processor_config"; 927 attr->show = processor_config_show; 928 break; 929 case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: 930 attr->attr.name = "affinity_domain_via_virtual_processor"; 931 attr->show = affinity_domain_via_virtual_processor_show; 932 break; 933 case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: 934 attr->attr.name = "affinity_domain_via_domain"; 935 attr->show = affinity_domain_via_domain_show; 936 break; 937 case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: 938 attr->attr.name = "affinity_domain_via_partition"; 939 attr->show = affinity_domain_via_partition_show; 940 break; 941 } 942 } else 943 pr_devel("hcall failed, with error: 0x%lx\n", ret); 944 945 return attr; 946 } 947 948 static void add_sysinfo_interface_files(void) 949 { 950 int sysfs_count; 951 struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; 952 int i; 953 954 sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; 955 956 /* Get device attribute for a given counter request value */ 957 for (i = 0; i < sysfs_count; i++) { 958 attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, 959 sysinfo_counter_request[i]); 960 961 if (!attr[i]) 962 goto out; 963 } 964 965 /* Add sysinfo interface attributes in the interface_attrs attribute array */ 966 for (i = 0; i < sysfs_count; i++) 967 interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; 968 969 return; 970 971 out: 972 /* 973 * The sysinfo interface attributes will be added, only if hcall passed for 974 * all the counter request values. Free the device attribute array incase 975 * of any hcall failure. 976 */ 977 if (i > 0) { 978 while (i >= 0) { 979 kfree(attr[i]); 980 i--; 981 } 982 } 983 } 984 985 static int hv_gpci_init(void) 986 { 987 int r; 988 unsigned long hret; 989 struct hv_perf_caps caps; 990 struct hv_gpci_request_buffer *arg; 991 992 hv_gpci_assert_offsets_correct(); 993 994 if (!firmware_has_feature(FW_FEATURE_LPAR)) { 995 pr_debug("not a virtualized system, not enabling\n"); 996 return -ENODEV; 997 } 998 999 hret = hv_perf_caps_get(&caps); 1000 if (hret) { 1001 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", 1002 hret); 1003 return -ENODEV; 1004 } 1005 1006 /* init cpuhotplug */ 1007 r = hv_gpci_cpu_hotplug_init(); 1008 if (r) 1009 return r; 1010 1011 /* sampling not supported */ 1012 h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1013 1014 arg = (void *)get_cpu_var(hv_gpci_reqb); 1015 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 1016 1017 /* 1018 * hcall H_GET_PERF_COUNTER_INFO populates the output 1019 * counter_info_version value based on the system hypervisor. 1020 * Pass the counter request 0x10 corresponds to request type 1021 * 'Dispatch_timebase_by_processor', to get the supported 1022 * counter_info_version. 1023 */ 1024 arg->params.counter_request = cpu_to_be32(0x10); 1025 1026 r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 1027 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 1028 if (r) { 1029 pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); 1030 arg->params.counter_info_version_out = 0x8; 1031 } 1032 1033 /* 1034 * Use counter_info_version_out value to assign 1035 * required hv-gpci event list. 1036 */ 1037 if (arg->params.counter_info_version_out >= 0x8) 1038 event_group.attrs = hv_gpci_event_attrs; 1039 else 1040 event_group.attrs = hv_gpci_event_attrs_v6; 1041 1042 put_cpu_var(hv_gpci_reqb); 1043 1044 r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); 1045 if (r) 1046 return r; 1047 1048 /* sysinfo interface files are only available for power10 and above platforms */ 1049 if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) 1050 add_sysinfo_interface_files(); 1051 1052 return 0; 1053 } 1054 1055 device_initcall(hv_gpci_init); 1056