1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Hypervisor supplied "gpci" ("get performance counter info") performance 4 * counter support 5 * 6 * Author: Cody P Schafer <cody@linux.vnet.ibm.com> 7 * Copyright 2014 IBM Corporation. 8 */ 9 10 #define pr_fmt(fmt) "hv-gpci: " fmt 11 12 #include <linux/init.h> 13 #include <linux/perf_event.h> 14 #include <asm/firmware.h> 15 #include <asm/hvcall.h> 16 #include <asm/io.h> 17 18 #include "hv-gpci.h" 19 #include "hv-common.h" 20 21 /* 22 * Example usage: 23 * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, 24 * secondary_index=0,starting_index=0xffffffff,request=0x10/' ... 25 */ 26 27 /* u32 */ 28 EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); 29 /* u32 */ 30 /* 31 * Note that starting_index, phys_processor_idx, sibling_part_id, 32 * hw_chip_id, partition_id all refer to the same bit range. They 33 * are basically aliases for the starting_index. The specific alias 34 * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h 35 */ 36 EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); 37 EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); 38 EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); 39 EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); 40 EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); 41 42 /* u16 */ 43 EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); 44 /* u8 */ 45 EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); 46 /* u8, bytes of data (1-8) */ 47 EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); 48 /* u32, byte offset */ 49 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); 50 51 static cpumask_t hv_gpci_cpumask; 52 53 static struct attribute *format_attrs[] = { 54 &format_attr_request.attr, 55 &format_attr_starting_index.attr, 56 &format_attr_phys_processor_idx.attr, 57 &format_attr_sibling_part_id.attr, 58 &format_attr_hw_chip_id.attr, 59 &format_attr_partition_id.attr, 60 &format_attr_secondary_index.attr, 61 &format_attr_counter_info_version.attr, 62 63 &format_attr_offset.attr, 64 &format_attr_length.attr, 65 NULL, 66 }; 67 68 static const struct attribute_group format_group = { 69 .name = "format", 70 .attrs = format_attrs, 71 }; 72 73 static struct attribute_group event_group = { 74 .name = "events", 75 /* .attrs is set in init */ 76 }; 77 78 #define HV_CAPS_ATTR(_name, _format) \ 79 static ssize_t _name##_show(struct device *dev, \ 80 struct device_attribute *attr, \ 81 char *page) \ 82 { \ 83 struct hv_perf_caps caps; \ 84 unsigned long hret = hv_perf_caps_get(&caps); \ 85 if (hret) \ 86 return -EIO; \ 87 \ 88 return sprintf(page, _format, caps._name); \ 89 } \ 90 static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) 91 92 static ssize_t kernel_version_show(struct device *dev, 93 struct device_attribute *attr, 94 char *page) 95 { 96 return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); 97 } 98 99 static ssize_t cpumask_show(struct device *dev, 100 struct device_attribute *attr, char *buf) 101 { 102 return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); 103 } 104 105 /* Interface attribute array index to store system information */ 106 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 107 #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 108 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 109 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 110 #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 111 #define INTERFACE_NULL_ATTR 11 112 113 /* Counter request value to retrieve system information */ 114 enum { 115 PROCESSOR_BUS_TOPOLOGY, 116 PROCESSOR_CONFIG, 117 AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ 118 AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ 119 AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ 120 }; 121 122 static int sysinfo_counter_request[] = { 123 [PROCESSOR_BUS_TOPOLOGY] = 0xD0, 124 [PROCESSOR_CONFIG] = 0x90, 125 [AFFINITY_DOMAIN_VIA_VP] = 0xA0, 126 [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, 127 [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, 128 }; 129 130 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); 131 132 static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, 133 u16 secondary_index, char *buf, 134 size_t *n, struct hv_gpci_request_buffer *arg) 135 { 136 unsigned long ret; 137 size_t i, j; 138 139 arg->params.counter_request = cpu_to_be32(req); 140 arg->params.starting_index = cpu_to_be32(starting_index); 141 arg->params.secondary_index = cpu_to_be16(secondary_index); 142 143 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 144 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 145 146 /* 147 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', 148 * which means that the current buffer size cannot accommodate 149 * all the information and a partial buffer returned. 150 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. 151 * 152 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 153 * performance information, and required to set 154 * "Enable Performance Information Collection" option. 155 */ 156 if (ret == H_AUTHORITY) 157 return -EPERM; 158 159 /* 160 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE 161 * because of invalid buffer-length/address or due to some hardware 162 * error. 163 */ 164 if (ret && (ret != H_PARAMETER)) 165 return -EIO; 166 167 /* 168 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' 169 * to show the total number of counter_value array elements 170 * returned via hcall. 171 * hcall also populates 'cv_element_size' corresponds to individual 172 * counter_value array element size. Below loop go through all 173 * counter_value array elements as per their size and add it to 174 * the output buffer. 175 */ 176 for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { 177 j = i * be16_to_cpu(arg->params.cv_element_size); 178 179 for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) 180 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]); 181 *n += sprintf(buf + *n, "\n"); 182 } 183 184 if (*n >= PAGE_SIZE) { 185 pr_info("System information exceeds PAGE_SIZE\n"); 186 return -EFBIG; 187 } 188 189 return ret; 190 } 191 192 static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, 193 char *buf) 194 { 195 struct hv_gpci_request_buffer *arg; 196 unsigned long ret; 197 size_t n = 0; 198 199 arg = (void *)get_cpu_var(hv_gpci_reqb); 200 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 201 202 /* 203 * Pass the counter request value 0xD0 corresponds to request 204 * type 'Processor_bus_topology', to retrieve 205 * the system topology information. 206 * starting_index value implies the starting hardware 207 * chip id. 208 */ 209 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], 210 0, 0, buf, &n, arg); 211 212 if (!ret) 213 goto out_success; 214 215 if (ret != H_PARAMETER) 216 goto out; 217 218 /* 219 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 220 * implies that buffer can't accommodate all information, and a partial buffer 221 * returned. To handle that, we need to make subsequent requests 222 * with next starting index to retrieve additional (missing) data. 223 * Below loop do subsequent hcalls with next starting index and add it 224 * to buffer util we get all the information. 225 */ 226 while (ret == H_PARAMETER) { 227 int returned_values = be16_to_cpu(arg->params.returned_values); 228 int elementsize = be16_to_cpu(arg->params.cv_element_size); 229 int last_element = (returned_values - 1) * elementsize; 230 231 /* 232 * Since the starting index value is part of counter_value 233 * buffer elements, use the starting index value in the last 234 * element and add 1 to make subsequent hcalls. 235 */ 236 u32 starting_index = arg->bytes[last_element + 3] + 237 (arg->bytes[last_element + 2] << 8) + 238 (arg->bytes[last_element + 1] << 16) + 239 (arg->bytes[last_element] << 24) + 1; 240 241 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 242 243 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], 244 starting_index, 0, buf, &n, arg); 245 246 if (!ret) 247 goto out_success; 248 249 if (ret != H_PARAMETER) 250 goto out; 251 } 252 253 out_success: 254 put_cpu_var(hv_gpci_reqb); 255 return n; 256 257 out: 258 put_cpu_var(hv_gpci_reqb); 259 return ret; 260 } 261 262 static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, 263 char *buf) 264 { 265 struct hv_gpci_request_buffer *arg; 266 unsigned long ret; 267 size_t n = 0; 268 269 arg = (void *)get_cpu_var(hv_gpci_reqb); 270 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 271 272 /* 273 * Pass the counter request value 0x90 corresponds to request 274 * type 'Processor_config', to retrieve 275 * the system processor information. 276 * starting_index value implies the starting hardware 277 * processor index. 278 */ 279 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], 280 0, 0, buf, &n, arg); 281 282 if (!ret) 283 goto out_success; 284 285 if (ret != H_PARAMETER) 286 goto out; 287 288 /* 289 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 290 * implies that buffer can't accommodate all information, and a partial buffer 291 * returned. To handle that, we need to take subsequent requests 292 * with next starting index to retrieve additional (missing) data. 293 * Below loop do subsequent hcalls with next starting index and add it 294 * to buffer util we get all the information. 295 */ 296 while (ret == H_PARAMETER) { 297 int returned_values = be16_to_cpu(arg->params.returned_values); 298 int elementsize = be16_to_cpu(arg->params.cv_element_size); 299 int last_element = (returned_values - 1) * elementsize; 300 301 /* 302 * Since the starting index is part of counter_value 303 * buffer elements, use the starting index value in the last 304 * element and add 1 to subsequent hcalls. 305 */ 306 u32 starting_index = arg->bytes[last_element + 3] + 307 (arg->bytes[last_element + 2] << 8) + 308 (arg->bytes[last_element + 1] << 16) + 309 (arg->bytes[last_element] << 24) + 1; 310 311 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 312 313 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], 314 starting_index, 0, buf, &n, arg); 315 316 if (!ret) 317 goto out_success; 318 319 if (ret != H_PARAMETER) 320 goto out; 321 } 322 323 out_success: 324 put_cpu_var(hv_gpci_reqb); 325 return n; 326 327 out: 328 put_cpu_var(hv_gpci_reqb); 329 return ret; 330 } 331 332 static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, 333 struct device_attribute *attr, char *buf) 334 { 335 struct hv_gpci_request_buffer *arg; 336 unsigned long ret; 337 size_t n = 0; 338 339 arg = (void *)get_cpu_var(hv_gpci_reqb); 340 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 341 342 /* 343 * Pass the counter request 0xA0 corresponds to request 344 * type 'Affinity_domain_information_by_virutal_processor', 345 * to retrieve the system affinity domain information. 346 * starting_index value refers to the starting hardware 347 * processor index. 348 */ 349 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], 350 0, 0, buf, &n, arg); 351 352 if (!ret) 353 goto out_success; 354 355 if (ret != H_PARAMETER) 356 goto out; 357 358 /* 359 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 360 * implies that buffer can't accommodate all information, and a partial buffer 361 * returned. To handle that, we need to take subsequent requests 362 * with next secondary index to retrieve additional (missing) data. 363 * Below loop do subsequent hcalls with next secondary index and add it 364 * to buffer util we get all the information. 365 */ 366 while (ret == H_PARAMETER) { 367 int returned_values = be16_to_cpu(arg->params.returned_values); 368 int elementsize = be16_to_cpu(arg->params.cv_element_size); 369 int last_element = (returned_values - 1) * elementsize; 370 371 /* 372 * Since the starting index and secondary index type is part of the 373 * counter_value buffer elements, use the starting index value in the 374 * last array element as subsequent starting index, and use secondary index 375 * value in the last array element plus 1 as subsequent secondary index. 376 * For counter request '0xA0', starting index points to partition id 377 * and secondary index points to corresponding virtual processor index. 378 */ 379 u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); 380 u16 secondary_index = arg->bytes[last_element + 3] + 381 (arg->bytes[last_element + 2] << 8) + 1; 382 383 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 384 385 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], 386 starting_index, secondary_index, buf, &n, arg); 387 388 if (!ret) 389 goto out_success; 390 391 if (ret != H_PARAMETER) 392 goto out; 393 } 394 395 out_success: 396 put_cpu_var(hv_gpci_reqb); 397 return n; 398 399 out: 400 put_cpu_var(hv_gpci_reqb); 401 return ret; 402 } 403 404 static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, 405 char *buf) 406 { 407 struct hv_gpci_request_buffer *arg; 408 unsigned long ret; 409 size_t n = 0; 410 411 arg = (void *)get_cpu_var(hv_gpci_reqb); 412 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 413 414 /* 415 * Pass the counter request 0xB0 corresponds to request 416 * type 'Affinity_domain_information_by_domain', 417 * to retrieve the system affinity domain information. 418 * starting_index value refers to the starting hardware 419 * processor index. 420 */ 421 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], 422 0, 0, buf, &n, arg); 423 424 if (!ret) 425 goto out_success; 426 427 if (ret != H_PARAMETER) 428 goto out; 429 430 /* 431 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which 432 * implies that buffer can't accommodate all information, and a partial buffer 433 * returned. To handle that, we need to take subsequent requests 434 * with next starting index to retrieve additional (missing) data. 435 * Below loop do subsequent hcalls with next starting index and add it 436 * to buffer util we get all the information. 437 */ 438 while (ret == H_PARAMETER) { 439 int returned_values = be16_to_cpu(arg->params.returned_values); 440 int elementsize = be16_to_cpu(arg->params.cv_element_size); 441 int last_element = (returned_values - 1) * elementsize; 442 443 /* 444 * Since the starting index value is part of counter_value 445 * buffer elements, use the starting index value in the last 446 * element and add 1 to make subsequent hcalls. 447 */ 448 u32 starting_index = arg->bytes[last_element + 1] + 449 (arg->bytes[last_element] << 8) + 1; 450 451 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 452 453 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], 454 starting_index, 0, buf, &n, arg); 455 456 if (!ret) 457 goto out_success; 458 459 if (ret != H_PARAMETER) 460 goto out; 461 } 462 463 out_success: 464 put_cpu_var(hv_gpci_reqb); 465 return n; 466 467 out: 468 put_cpu_var(hv_gpci_reqb); 469 return ret; 470 } 471 472 static void affinity_domain_via_partition_result_parse(int returned_values, 473 int element_size, char *buf, size_t *last_element, 474 size_t *n, struct hv_gpci_request_buffer *arg) 475 { 476 size_t i = 0, j = 0; 477 size_t k, l, m; 478 uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; 479 480 /* 481 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' 482 * to show the total number of counter_value array elements 483 * returned via hcall. 484 * Unlike other request types, the data structure returned by this 485 * request is variable-size. For this counter request type, 486 * hcall populates 'cv_element_size' corresponds to minimum size of 487 * the structure returned i.e; the size of the structure with no domain 488 * information. Below loop go through all counter_value array 489 * to determine the number and size of each domain array element and 490 * add it to the output buffer. 491 */ 492 while (i < returned_values) { 493 k = j; 494 for (; k < j + element_size; k++) 495 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); 496 *n += sprintf(buf + *n, "\n"); 497 498 total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; 499 size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; 500 501 for (l = 0; l < total_affinity_domain_ele; l++) { 502 for (m = 0; m < size_of_each_affinity_domain_ele; m++) { 503 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); 504 k++; 505 } 506 *n += sprintf(buf + *n, "\n"); 507 } 508 509 *n += sprintf(buf + *n, "\n"); 510 i++; 511 j = k; 512 } 513 514 *last_element = k; 515 } 516 517 static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, 518 char *buf) 519 { 520 struct hv_gpci_request_buffer *arg; 521 unsigned long ret; 522 size_t n = 0; 523 size_t last_element = 0; 524 u32 starting_index; 525 526 arg = (void *)get_cpu_var(hv_gpci_reqb); 527 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 528 529 /* 530 * Pass the counter request value 0xB1 corresponds to counter request 531 * type 'Affinity_domain_information_by_partition', 532 * to retrieve the system affinity domain by partition information. 533 * starting_index value refers to the starting hardware 534 * processor index. 535 */ 536 arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); 537 arg->params.starting_index = cpu_to_be32(0); 538 539 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 540 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 541 542 if (!ret) 543 goto parse_result; 544 545 if (ret && (ret != H_PARAMETER)) 546 goto out; 547 548 /* 549 * ret value as 'H_PARAMETER' implies that the current buffer size 550 * can't accommodate all the information, and a partial buffer 551 * returned. To handle that, we need to make subsequent requests 552 * with next starting index to retrieve additional (missing) data. 553 * Below loop do subsequent hcalls with next starting index and add it 554 * to buffer util we get all the information. 555 */ 556 while (ret == H_PARAMETER) { 557 affinity_domain_via_partition_result_parse( 558 be16_to_cpu(arg->params.returned_values) - 1, 559 be16_to_cpu(arg->params.cv_element_size), buf, 560 &last_element, &n, arg); 561 562 if (n >= PAGE_SIZE) { 563 put_cpu_var(hv_gpci_reqb); 564 pr_debug("System information exceeds PAGE_SIZE\n"); 565 return -EFBIG; 566 } 567 568 /* 569 * Since the starting index value is part of counter_value 570 * buffer elements, use the starting_index value in the last 571 * element and add 1 to make subsequent hcalls. 572 */ 573 starting_index = (u8)arg->bytes[last_element] << 8 | 574 (u8)arg->bytes[last_element + 1]; 575 576 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 577 arg->params.counter_request = cpu_to_be32( 578 sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); 579 arg->params.starting_index = cpu_to_be32(starting_index); 580 581 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 582 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 583 584 if (ret && (ret != H_PARAMETER)) 585 goto out; 586 } 587 588 parse_result: 589 affinity_domain_via_partition_result_parse( 590 be16_to_cpu(arg->params.returned_values), 591 be16_to_cpu(arg->params.cv_element_size), 592 buf, &last_element, &n, arg); 593 594 put_cpu_var(hv_gpci_reqb); 595 return n; 596 597 out: 598 put_cpu_var(hv_gpci_reqb); 599 600 /* 601 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', 602 * which means that the current buffer size cannot accommodate 603 * all the information and a partial buffer returned. 604 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. 605 * 606 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 607 * performance information, and required to set 608 * "Enable Performance Information Collection" option. 609 */ 610 if (ret == H_AUTHORITY) 611 return -EPERM; 612 613 /* 614 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE 615 * because of invalid buffer-length/address or due to some hardware 616 * error. 617 */ 618 return -EIO; 619 } 620 621 static DEVICE_ATTR_RO(kernel_version); 622 static DEVICE_ATTR_RO(cpumask); 623 624 HV_CAPS_ATTR(version, "0x%x\n"); 625 HV_CAPS_ATTR(ga, "%d\n"); 626 HV_CAPS_ATTR(expanded, "%d\n"); 627 HV_CAPS_ATTR(lab, "%d\n"); 628 HV_CAPS_ATTR(collect_privileged, "%d\n"); 629 630 static struct attribute *interface_attrs[] = { 631 &dev_attr_kernel_version.attr, 632 &hv_caps_attr_version.attr, 633 &hv_caps_attr_ga.attr, 634 &hv_caps_attr_expanded.attr, 635 &hv_caps_attr_lab.attr, 636 &hv_caps_attr_collect_privileged.attr, 637 /* 638 * This NULL is a placeholder for the processor_bus_topology 639 * attribute, set in init function if applicable. 640 */ 641 NULL, 642 /* 643 * This NULL is a placeholder for the processor_config 644 * attribute, set in init function if applicable. 645 */ 646 NULL, 647 /* 648 * This NULL is a placeholder for the affinity_domain_via_virtual_processor 649 * attribute, set in init function if applicable. 650 */ 651 NULL, 652 /* 653 * This NULL is a placeholder for the affinity_domain_via_domain 654 * attribute, set in init function if applicable. 655 */ 656 NULL, 657 /* 658 * This NULL is a placeholder for the affinity_domain_via_partition 659 * attribute, set in init function if applicable. 660 */ 661 NULL, 662 NULL, 663 }; 664 665 static struct attribute *cpumask_attrs[] = { 666 &dev_attr_cpumask.attr, 667 NULL, 668 }; 669 670 static const struct attribute_group cpumask_attr_group = { 671 .attrs = cpumask_attrs, 672 }; 673 674 static const struct attribute_group interface_group = { 675 .name = "interface", 676 .attrs = interface_attrs, 677 }; 678 679 static const struct attribute_group *attr_groups[] = { 680 &format_group, 681 &event_group, 682 &interface_group, 683 &cpumask_attr_group, 684 NULL, 685 }; 686 687 static unsigned long single_gpci_request(u32 req, u32 starting_index, 688 u16 secondary_index, u8 version_in, u32 offset, u8 length, 689 u64 *value) 690 { 691 unsigned long ret; 692 size_t i; 693 u64 count; 694 struct hv_gpci_request_buffer *arg; 695 696 arg = (void *)get_cpu_var(hv_gpci_reqb); 697 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 698 699 arg->params.counter_request = cpu_to_be32(req); 700 arg->params.starting_index = cpu_to_be32(starting_index); 701 arg->params.secondary_index = cpu_to_be16(secondary_index); 702 arg->params.counter_info_version_in = version_in; 703 704 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 705 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 706 707 /* 708 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', 709 * specifies that the current buffer size cannot accommodate 710 * all the information and a partial buffer returned. 711 * Since in this function we are only accessing data for a given starting index, 712 * we don't need to accommodate whole data and can get required count by 713 * accessing first entry data. 714 * Hence hcall fails only incase the ret value is other than H_SUCCESS or 715 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). 716 */ 717 if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) 718 ret = 0; 719 720 if (ret) { 721 pr_devel("hcall failed: 0x%lx\n", ret); 722 goto out; 723 } 724 725 /* 726 * we verify offset and length are within the zeroed buffer at event 727 * init. 728 */ 729 count = 0; 730 for (i = offset; i < offset + length; i++) 731 count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); 732 733 *value = count; 734 out: 735 put_cpu_var(hv_gpci_reqb); 736 return ret; 737 } 738 739 static u64 h_gpci_get_value(struct perf_event *event) 740 { 741 u64 count; 742 unsigned long ret = single_gpci_request(event_get_request(event), 743 event_get_starting_index(event), 744 event_get_secondary_index(event), 745 event_get_counter_info_version(event), 746 event_get_offset(event), 747 event_get_length(event), 748 &count); 749 if (ret) 750 return 0; 751 return count; 752 } 753 754 static void h_gpci_event_update(struct perf_event *event) 755 { 756 s64 prev; 757 u64 now = h_gpci_get_value(event); 758 prev = local64_xchg(&event->hw.prev_count, now); 759 local64_add(now - prev, &event->count); 760 } 761 762 static void h_gpci_event_start(struct perf_event *event, int flags) 763 { 764 local64_set(&event->hw.prev_count, h_gpci_get_value(event)); 765 } 766 767 static void h_gpci_event_stop(struct perf_event *event, int flags) 768 { 769 h_gpci_event_update(event); 770 } 771 772 static int h_gpci_event_add(struct perf_event *event, int flags) 773 { 774 if (flags & PERF_EF_START) 775 h_gpci_event_start(event, flags); 776 777 return 0; 778 } 779 780 static int h_gpci_event_init(struct perf_event *event) 781 { 782 u64 count; 783 u8 length; 784 unsigned long ret; 785 786 /* Not our event */ 787 if (event->attr.type != event->pmu->type) 788 return -ENOENT; 789 790 /* config2 is unused */ 791 if (event->attr.config2) { 792 pr_devel("config2 set when reserved\n"); 793 return -EINVAL; 794 } 795 796 /* no branch sampling */ 797 if (has_branch_stack(event)) 798 return -EOPNOTSUPP; 799 800 length = event_get_length(event); 801 if (length < 1 || length > 8) { 802 pr_devel("length invalid\n"); 803 return -EINVAL; 804 } 805 806 /* last byte within the buffer? */ 807 if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { 808 pr_devel("request outside of buffer: %zu > %zu\n", 809 (size_t)event_get_offset(event) + length, 810 HGPCI_MAX_DATA_BYTES); 811 return -EINVAL; 812 } 813 814 /* check if the request works... */ 815 ret = single_gpci_request(event_get_request(event), 816 event_get_starting_index(event), 817 event_get_secondary_index(event), 818 event_get_counter_info_version(event), 819 event_get_offset(event), 820 length, 821 &count); 822 823 /* 824 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve 825 * performance information, and required to set 826 * "Enable Performance Information Collection" option. 827 */ 828 if (ret == H_AUTHORITY) 829 return -EPERM; 830 831 if (ret) { 832 pr_devel("gpci hcall failed\n"); 833 return -EINVAL; 834 } 835 836 return 0; 837 } 838 839 static struct pmu h_gpci_pmu = { 840 .task_ctx_nr = perf_invalid_context, 841 842 .name = "hv_gpci", 843 .attr_groups = attr_groups, 844 .event_init = h_gpci_event_init, 845 .add = h_gpci_event_add, 846 .del = h_gpci_event_stop, 847 .start = h_gpci_event_start, 848 .stop = h_gpci_event_stop, 849 .read = h_gpci_event_update, 850 .capabilities = PERF_PMU_CAP_NO_EXCLUDE, 851 }; 852 853 static int ppc_hv_gpci_cpu_online(unsigned int cpu) 854 { 855 if (cpumask_empty(&hv_gpci_cpumask)) 856 cpumask_set_cpu(cpu, &hv_gpci_cpumask); 857 858 return 0; 859 } 860 861 static int ppc_hv_gpci_cpu_offline(unsigned int cpu) 862 { 863 int target; 864 865 /* Check if exiting cpu is used for collecting gpci events */ 866 if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) 867 return 0; 868 869 /* Find a new cpu to collect gpci events */ 870 target = cpumask_last(cpu_active_mask); 871 872 if (target < 0 || target >= nr_cpu_ids) { 873 pr_err("hv_gpci: CPU hotplug init failed\n"); 874 return -1; 875 } 876 877 /* Migrate gpci events to the new target */ 878 cpumask_set_cpu(target, &hv_gpci_cpumask); 879 perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); 880 881 return 0; 882 } 883 884 static int hv_gpci_cpu_hotplug_init(void) 885 { 886 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, 887 "perf/powerpc/hv_gcpi:online", 888 ppc_hv_gpci_cpu_online, 889 ppc_hv_gpci_cpu_offline); 890 } 891 892 static struct device_attribute *sysinfo_device_attr_create(int 893 sysinfo_interface_group_index, u32 req) 894 { 895 struct device_attribute *attr = NULL; 896 unsigned long ret; 897 struct hv_gpci_request_buffer *arg; 898 899 if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || 900 sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { 901 pr_info("Wrong interface group index for system information\n"); 902 return NULL; 903 } 904 905 /* Check for given counter request value support */ 906 arg = (void *)get_cpu_var(hv_gpci_reqb); 907 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 908 909 arg->params.counter_request = cpu_to_be32(req); 910 911 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 912 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 913 914 put_cpu_var(hv_gpci_reqb); 915 916 /* 917 * Add given counter request value attribute in the interface_attrs 918 * attribute array, only for valid return types. 919 */ 920 if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { 921 attr = kzalloc_obj(*attr); 922 if (!attr) 923 return NULL; 924 925 sysfs_attr_init(&attr->attr); 926 attr->attr.mode = 0444; 927 928 switch (sysinfo_interface_group_index) { 929 case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: 930 attr->attr.name = "processor_bus_topology"; 931 attr->show = processor_bus_topology_show; 932 break; 933 case INTERFACE_PROCESSOR_CONFIG_ATTR: 934 attr->attr.name = "processor_config"; 935 attr->show = processor_config_show; 936 break; 937 case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: 938 attr->attr.name = "affinity_domain_via_virtual_processor"; 939 attr->show = affinity_domain_via_virtual_processor_show; 940 break; 941 case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: 942 attr->attr.name = "affinity_domain_via_domain"; 943 attr->show = affinity_domain_via_domain_show; 944 break; 945 case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: 946 attr->attr.name = "affinity_domain_via_partition"; 947 attr->show = affinity_domain_via_partition_show; 948 break; 949 } 950 } else 951 pr_devel("hcall failed, with error: 0x%lx\n", ret); 952 953 return attr; 954 } 955 956 static void add_sysinfo_interface_files(void) 957 { 958 int sysfs_count; 959 struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; 960 int i; 961 962 sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; 963 964 /* Get device attribute for a given counter request value */ 965 for (i = 0; i < sysfs_count; i++) { 966 attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, 967 sysinfo_counter_request[i]); 968 969 if (!attr[i]) 970 goto out; 971 } 972 973 /* Add sysinfo interface attributes in the interface_attrs attribute array */ 974 for (i = 0; i < sysfs_count; i++) 975 interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; 976 977 return; 978 979 out: 980 /* 981 * The sysinfo interface attributes will be added, only if hcall passed for 982 * all the counter request values. Free the device attribute array incase 983 * of any hcall failure. 984 */ 985 if (i > 0) { 986 while (i >= 0) { 987 kfree(attr[i]); 988 i--; 989 } 990 } 991 } 992 993 static int hv_gpci_init(void) 994 { 995 int r; 996 unsigned long hret; 997 struct hv_perf_caps caps; 998 struct hv_gpci_request_buffer *arg; 999 1000 hv_gpci_assert_offsets_correct(); 1001 1002 if (!firmware_has_feature(FW_FEATURE_LPAR)) { 1003 pr_debug("not a virtualized system, not enabling\n"); 1004 return -ENODEV; 1005 } 1006 1007 hret = hv_perf_caps_get(&caps); 1008 if (hret) { 1009 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", 1010 hret); 1011 return -ENODEV; 1012 } 1013 1014 /* init cpuhotplug */ 1015 r = hv_gpci_cpu_hotplug_init(); 1016 if (r) 1017 return r; 1018 1019 /* sampling not supported */ 1020 h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1021 1022 arg = (void *)get_cpu_var(hv_gpci_reqb); 1023 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); 1024 1025 /* 1026 * hcall H_GET_PERF_COUNTER_INFO populates the output 1027 * counter_info_version value based on the system hypervisor. 1028 * Pass the counter request 0x10 corresponds to request type 1029 * 'Dispatch_timebase_by_processor', to get the supported 1030 * counter_info_version. 1031 */ 1032 arg->params.counter_request = cpu_to_be32(0x10); 1033 1034 r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, 1035 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); 1036 if (r) { 1037 pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); 1038 arg->params.counter_info_version_out = 0x8; 1039 } 1040 1041 /* 1042 * Use counter_info_version_out value to assign 1043 * required hv-gpci event list. 1044 */ 1045 if (arg->params.counter_info_version_out >= 0x8) 1046 event_group.attrs = hv_gpci_event_attrs; 1047 else 1048 event_group.attrs = hv_gpci_event_attrs_v6; 1049 1050 put_cpu_var(hv_gpci_reqb); 1051 1052 r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); 1053 if (r) 1054 return r; 1055 1056 /* sysinfo interface files are only available for power10 and above platforms */ 1057 if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) 1058 add_sysinfo_interface_files(); 1059 1060 return 0; 1061 } 1062 1063 device_initcall(hv_gpci_init); 1064