1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Hypervisor supplied "gpci" ("get performance counter info") performance
4 * counter support
5 *
6 * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
7 * Copyright 2014 IBM Corporation.
8 */
9
10 #define pr_fmt(fmt) "hv-gpci: " fmt
11
12 #include <linux/init.h>
13 #include <linux/perf_event.h>
14 #include <asm/firmware.h>
15 #include <asm/hvcall.h>
16 #include <asm/io.h>
17
18 #include "hv-gpci.h"
19 #include "hv-common.h"
20
21 /*
22 * Example usage:
23 * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,
24 * secondary_index=0,starting_index=0xffffffff,request=0x10/' ...
25 */
26
27 /* u32 */
28 EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
29 /* u32 */
30 /*
31 * Note that starting_index, phys_processor_idx, sibling_part_id,
32 * hw_chip_id, partition_id all refer to the same bit range. They
33 * are basically aliases for the starting_index. The specific alias
34 * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
35 */
36 EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
37 EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
38 EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
39 EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
40 EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
41
42 /* u16 */
43 EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
44 /* u8 */
45 EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23);
46 /* u8, bytes of data (1-8) */
47 EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
48 /* u32, byte offset */
49 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
50
51 static cpumask_t hv_gpci_cpumask;
52
53 static struct attribute *format_attrs[] = {
54 &format_attr_request.attr,
55 &format_attr_starting_index.attr,
56 &format_attr_phys_processor_idx.attr,
57 &format_attr_sibling_part_id.attr,
58 &format_attr_hw_chip_id.attr,
59 &format_attr_partition_id.attr,
60 &format_attr_secondary_index.attr,
61 &format_attr_counter_info_version.attr,
62
63 &format_attr_offset.attr,
64 &format_attr_length.attr,
65 NULL,
66 };
67
68 static const struct attribute_group format_group = {
69 .name = "format",
70 .attrs = format_attrs,
71 };
72
73 static struct attribute_group event_group = {
74 .name = "events",
75 /* .attrs is set in init */
76 };
77
78 #define HV_CAPS_ATTR(_name, _format) \
79 static ssize_t _name##_show(struct device *dev, \
80 struct device_attribute *attr, \
81 char *page) \
82 { \
83 struct hv_perf_caps caps; \
84 unsigned long hret = hv_perf_caps_get(&caps); \
85 if (hret) \
86 return -EIO; \
87 \
88 return sprintf(page, _format, caps._name); \
89 } \
90 static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name)
91
kernel_version_show(struct device * dev,struct device_attribute * attr,char * page)92 static ssize_t kernel_version_show(struct device *dev,
93 struct device_attribute *attr,
94 char *page)
95 {
96 return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
97 }
98
cpumask_show(struct device * dev,struct device_attribute * attr,char * buf)99 static ssize_t cpumask_show(struct device *dev,
100 struct device_attribute *attr, char *buf)
101 {
102 return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
103 }
104
105 /* Interface attribute array index to store system information */
106 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6
107 #define INTERFACE_PROCESSOR_CONFIG_ATTR 7
108 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8
109 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
110 #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10
111 #define INTERFACE_NULL_ATTR 11
112
113 /* Counter request value to retrieve system information */
114 enum {
115 PROCESSOR_BUS_TOPOLOGY,
116 PROCESSOR_CONFIG,
117 AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
118 AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
119 AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
120 };
121
122 static int sysinfo_counter_request[] = {
123 [PROCESSOR_BUS_TOPOLOGY] = 0xD0,
124 [PROCESSOR_CONFIG] = 0x90,
125 [AFFINITY_DOMAIN_VIA_VP] = 0xA0,
126 [AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
127 [AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
128 };
129
130 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
131
systeminfo_gpci_request(u32 req,u32 starting_index,u16 secondary_index,char * buf,size_t * n,struct hv_gpci_request_buffer * arg)132 static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
133 u16 secondary_index, char *buf,
134 size_t *n, struct hv_gpci_request_buffer *arg)
135 {
136 unsigned long ret;
137 size_t i, j;
138
139 arg->params.counter_request = cpu_to_be32(req);
140 arg->params.starting_index = cpu_to_be32(starting_index);
141 arg->params.secondary_index = cpu_to_be16(secondary_index);
142
143 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
144 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
145
146 /*
147 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
148 * which means that the current buffer size cannot accommodate
149 * all the information and a partial buffer returned.
150 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
151 *
152 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
153 * performance information, and required to set
154 * "Enable Performance Information Collection" option.
155 */
156 if (ret == H_AUTHORITY)
157 return -EPERM;
158
159 /*
160 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
161 * because of invalid buffer-length/address or due to some hardware
162 * error.
163 */
164 if (ret && (ret != H_PARAMETER))
165 return -EIO;
166
167 /*
168 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
169 * to show the total number of counter_value array elements
170 * returned via hcall.
171 * hcall also populates 'cv_element_size' corresponds to individual
172 * counter_value array element size. Below loop go through all
173 * counter_value array elements as per their size and add it to
174 * the output buffer.
175 */
176 for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
177 j = i * be16_to_cpu(arg->params.cv_element_size);
178
179 for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
180 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]);
181 *n += sprintf(buf + *n, "\n");
182 }
183
184 if (*n >= PAGE_SIZE) {
185 pr_info("System information exceeds PAGE_SIZE\n");
186 return -EFBIG;
187 }
188
189 return ret;
190 }
191
processor_bus_topology_show(struct device * dev,struct device_attribute * attr,char * buf)192 static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
193 char *buf)
194 {
195 struct hv_gpci_request_buffer *arg;
196 unsigned long ret;
197 size_t n = 0;
198
199 arg = (void *)get_cpu_var(hv_gpci_reqb);
200 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
201
202 /*
203 * Pass the counter request value 0xD0 corresponds to request
204 * type 'Processor_bus_topology', to retrieve
205 * the system topology information.
206 * starting_index value implies the starting hardware
207 * chip id.
208 */
209 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
210 0, 0, buf, &n, arg);
211
212 if (!ret)
213 goto out_success;
214
215 if (ret != H_PARAMETER)
216 goto out;
217
218 /*
219 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
220 * implies that buffer can't accommodate all information, and a partial buffer
221 * returned. To handle that, we need to make subsequent requests
222 * with next starting index to retrieve additional (missing) data.
223 * Below loop do subsequent hcalls with next starting index and add it
224 * to buffer util we get all the information.
225 */
226 while (ret == H_PARAMETER) {
227 int returned_values = be16_to_cpu(arg->params.returned_values);
228 int elementsize = be16_to_cpu(arg->params.cv_element_size);
229 int last_element = (returned_values - 1) * elementsize;
230
231 /*
232 * Since the starting index value is part of counter_value
233 * buffer elements, use the starting index value in the last
234 * element and add 1 to make subsequent hcalls.
235 */
236 u32 starting_index = arg->bytes[last_element + 3] +
237 (arg->bytes[last_element + 2] << 8) +
238 (arg->bytes[last_element + 1] << 16) +
239 (arg->bytes[last_element] << 24) + 1;
240
241 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
242
243 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
244 starting_index, 0, buf, &n, arg);
245
246 if (!ret)
247 goto out_success;
248
249 if (ret != H_PARAMETER)
250 goto out;
251 }
252
253 out_success:
254 put_cpu_var(hv_gpci_reqb);
255 return n;
256
257 out:
258 put_cpu_var(hv_gpci_reqb);
259 return ret;
260 }
261
processor_config_show(struct device * dev,struct device_attribute * attr,char * buf)262 static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
263 char *buf)
264 {
265 struct hv_gpci_request_buffer *arg;
266 unsigned long ret;
267 size_t n = 0;
268
269 arg = (void *)get_cpu_var(hv_gpci_reqb);
270 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
271
272 /*
273 * Pass the counter request value 0x90 corresponds to request
274 * type 'Processor_config', to retrieve
275 * the system processor information.
276 * starting_index value implies the starting hardware
277 * processor index.
278 */
279 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
280 0, 0, buf, &n, arg);
281
282 if (!ret)
283 goto out_success;
284
285 if (ret != H_PARAMETER)
286 goto out;
287
288 /*
289 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
290 * implies that buffer can't accommodate all information, and a partial buffer
291 * returned. To handle that, we need to take subsequent requests
292 * with next starting index to retrieve additional (missing) data.
293 * Below loop do subsequent hcalls with next starting index and add it
294 * to buffer util we get all the information.
295 */
296 while (ret == H_PARAMETER) {
297 int returned_values = be16_to_cpu(arg->params.returned_values);
298 int elementsize = be16_to_cpu(arg->params.cv_element_size);
299 int last_element = (returned_values - 1) * elementsize;
300
301 /*
302 * Since the starting index is part of counter_value
303 * buffer elements, use the starting index value in the last
304 * element and add 1 to subsequent hcalls.
305 */
306 u32 starting_index = arg->bytes[last_element + 3] +
307 (arg->bytes[last_element + 2] << 8) +
308 (arg->bytes[last_element + 1] << 16) +
309 (arg->bytes[last_element] << 24) + 1;
310
311 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
312
313 ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
314 starting_index, 0, buf, &n, arg);
315
316 if (!ret)
317 goto out_success;
318
319 if (ret != H_PARAMETER)
320 goto out;
321 }
322
323 out_success:
324 put_cpu_var(hv_gpci_reqb);
325 return n;
326
327 out:
328 put_cpu_var(hv_gpci_reqb);
329 return ret;
330 }
331
affinity_domain_via_virtual_processor_show(struct device * dev,struct device_attribute * attr,char * buf)332 static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
333 struct device_attribute *attr, char *buf)
334 {
335 struct hv_gpci_request_buffer *arg;
336 unsigned long ret;
337 size_t n = 0;
338
339 arg = (void *)get_cpu_var(hv_gpci_reqb);
340 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
341
342 /*
343 * Pass the counter request 0xA0 corresponds to request
344 * type 'Affinity_domain_information_by_virutal_processor',
345 * to retrieve the system affinity domain information.
346 * starting_index value refers to the starting hardware
347 * processor index.
348 */
349 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
350 0, 0, buf, &n, arg);
351
352 if (!ret)
353 goto out_success;
354
355 if (ret != H_PARAMETER)
356 goto out;
357
358 /*
359 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
360 * implies that buffer can't accommodate all information, and a partial buffer
361 * returned. To handle that, we need to take subsequent requests
362 * with next secondary index to retrieve additional (missing) data.
363 * Below loop do subsequent hcalls with next secondary index and add it
364 * to buffer util we get all the information.
365 */
366 while (ret == H_PARAMETER) {
367 int returned_values = be16_to_cpu(arg->params.returned_values);
368 int elementsize = be16_to_cpu(arg->params.cv_element_size);
369 int last_element = (returned_values - 1) * elementsize;
370
371 /*
372 * Since the starting index and secondary index type is part of the
373 * counter_value buffer elements, use the starting index value in the
374 * last array element as subsequent starting index, and use secondary index
375 * value in the last array element plus 1 as subsequent secondary index.
376 * For counter request '0xA0', starting index points to partition id
377 * and secondary index points to corresponding virtual processor index.
378 */
379 u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
380 u16 secondary_index = arg->bytes[last_element + 3] +
381 (arg->bytes[last_element + 2] << 8) + 1;
382
383 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
384
385 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
386 starting_index, secondary_index, buf, &n, arg);
387
388 if (!ret)
389 goto out_success;
390
391 if (ret != H_PARAMETER)
392 goto out;
393 }
394
395 out_success:
396 put_cpu_var(hv_gpci_reqb);
397 return n;
398
399 out:
400 put_cpu_var(hv_gpci_reqb);
401 return ret;
402 }
403
affinity_domain_via_domain_show(struct device * dev,struct device_attribute * attr,char * buf)404 static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
405 char *buf)
406 {
407 struct hv_gpci_request_buffer *arg;
408 unsigned long ret;
409 size_t n = 0;
410
411 arg = (void *)get_cpu_var(hv_gpci_reqb);
412 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
413
414 /*
415 * Pass the counter request 0xB0 corresponds to request
416 * type 'Affinity_domain_information_by_domain',
417 * to retrieve the system affinity domain information.
418 * starting_index value refers to the starting hardware
419 * processor index.
420 */
421 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
422 0, 0, buf, &n, arg);
423
424 if (!ret)
425 goto out_success;
426
427 if (ret != H_PARAMETER)
428 goto out;
429
430 /*
431 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
432 * implies that buffer can't accommodate all information, and a partial buffer
433 * returned. To handle that, we need to take subsequent requests
434 * with next starting index to retrieve additional (missing) data.
435 * Below loop do subsequent hcalls with next starting index and add it
436 * to buffer util we get all the information.
437 */
438 while (ret == H_PARAMETER) {
439 int returned_values = be16_to_cpu(arg->params.returned_values);
440 int elementsize = be16_to_cpu(arg->params.cv_element_size);
441 int last_element = (returned_values - 1) * elementsize;
442
443 /*
444 * Since the starting index value is part of counter_value
445 * buffer elements, use the starting index value in the last
446 * element and add 1 to make subsequent hcalls.
447 */
448 u32 starting_index = arg->bytes[last_element + 1] +
449 (arg->bytes[last_element] << 8) + 1;
450
451 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
452
453 ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
454 starting_index, 0, buf, &n, arg);
455
456 if (!ret)
457 goto out_success;
458
459 if (ret != H_PARAMETER)
460 goto out;
461 }
462
463 out_success:
464 put_cpu_var(hv_gpci_reqb);
465 return n;
466
467 out:
468 put_cpu_var(hv_gpci_reqb);
469 return ret;
470 }
471
affinity_domain_via_partition_result_parse(int returned_values,int element_size,char * buf,size_t * last_element,size_t * n,struct hv_gpci_request_buffer * arg)472 static void affinity_domain_via_partition_result_parse(int returned_values,
473 int element_size, char *buf, size_t *last_element,
474 size_t *n, struct hv_gpci_request_buffer *arg)
475 {
476 size_t i = 0, j = 0;
477 size_t k, l, m;
478 uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
479
480 /*
481 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
482 * to show the total number of counter_value array elements
483 * returned via hcall.
484 * Unlike other request types, the data structure returned by this
485 * request is variable-size. For this counter request type,
486 * hcall populates 'cv_element_size' corresponds to minimum size of
487 * the structure returned i.e; the size of the structure with no domain
488 * information. Below loop go through all counter_value array
489 * to determine the number and size of each domain array element and
490 * add it to the output buffer.
491 */
492 while (i < returned_values) {
493 k = j;
494 for (; k < j + element_size; k++)
495 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
496 *n += sprintf(buf + *n, "\n");
497
498 total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
499 size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
500
501 for (l = 0; l < total_affinity_domain_ele; l++) {
502 for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
503 *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
504 k++;
505 }
506 *n += sprintf(buf + *n, "\n");
507 }
508
509 *n += sprintf(buf + *n, "\n");
510 i++;
511 j = k;
512 }
513
514 *last_element = k;
515 }
516
affinity_domain_via_partition_show(struct device * dev,struct device_attribute * attr,char * buf)517 static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
518 char *buf)
519 {
520 struct hv_gpci_request_buffer *arg;
521 unsigned long ret;
522 size_t n = 0;
523 size_t last_element = 0;
524 u32 starting_index;
525
526 arg = (void *)get_cpu_var(hv_gpci_reqb);
527 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
528
529 /*
530 * Pass the counter request value 0xB1 corresponds to counter request
531 * type 'Affinity_domain_information_by_partition',
532 * to retrieve the system affinity domain by partition information.
533 * starting_index value refers to the starting hardware
534 * processor index.
535 */
536 arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
537 arg->params.starting_index = cpu_to_be32(0);
538
539 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
540 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
541
542 if (!ret)
543 goto parse_result;
544
545 if (ret && (ret != H_PARAMETER))
546 goto out;
547
548 /*
549 * ret value as 'H_PARAMETER' implies that the current buffer size
550 * can't accommodate all the information, and a partial buffer
551 * returned. To handle that, we need to make subsequent requests
552 * with next starting index to retrieve additional (missing) data.
553 * Below loop do subsequent hcalls with next starting index and add it
554 * to buffer util we get all the information.
555 */
556 while (ret == H_PARAMETER) {
557 affinity_domain_via_partition_result_parse(
558 be16_to_cpu(arg->params.returned_values) - 1,
559 be16_to_cpu(arg->params.cv_element_size), buf,
560 &last_element, &n, arg);
561
562 if (n >= PAGE_SIZE) {
563 put_cpu_var(hv_gpci_reqb);
564 pr_debug("System information exceeds PAGE_SIZE\n");
565 return -EFBIG;
566 }
567
568 /*
569 * Since the starting index value is part of counter_value
570 * buffer elements, use the starting_index value in the last
571 * element and add 1 to make subsequent hcalls.
572 */
573 starting_index = (u8)arg->bytes[last_element] << 8 |
574 (u8)arg->bytes[last_element + 1];
575
576 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
577 arg->params.counter_request = cpu_to_be32(
578 sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
579 arg->params.starting_index = cpu_to_be32(starting_index);
580
581 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
582 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
583
584 if (ret && (ret != H_PARAMETER))
585 goto out;
586 }
587
588 parse_result:
589 affinity_domain_via_partition_result_parse(
590 be16_to_cpu(arg->params.returned_values),
591 be16_to_cpu(arg->params.cv_element_size),
592 buf, &last_element, &n, arg);
593
594 put_cpu_var(hv_gpci_reqb);
595 return n;
596
597 out:
598 put_cpu_var(hv_gpci_reqb);
599
600 /*
601 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
602 * which means that the current buffer size cannot accommodate
603 * all the information and a partial buffer returned.
604 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
605 *
606 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
607 * performance information, and required to set
608 * "Enable Performance Information Collection" option.
609 */
610 if (ret == H_AUTHORITY)
611 return -EPERM;
612
613 /*
614 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
615 * because of invalid buffer-length/address or due to some hardware
616 * error.
617 */
618 return -EIO;
619 }
620
621 static DEVICE_ATTR_RO(kernel_version);
622 static DEVICE_ATTR_RO(cpumask);
623
624 HV_CAPS_ATTR(version, "0x%x\n");
625 HV_CAPS_ATTR(ga, "%d\n");
626 HV_CAPS_ATTR(expanded, "%d\n");
627 HV_CAPS_ATTR(lab, "%d\n");
628 HV_CAPS_ATTR(collect_privileged, "%d\n");
629
630 static struct attribute *interface_attrs[] = {
631 &dev_attr_kernel_version.attr,
632 &hv_caps_attr_version.attr,
633 &hv_caps_attr_ga.attr,
634 &hv_caps_attr_expanded.attr,
635 &hv_caps_attr_lab.attr,
636 &hv_caps_attr_collect_privileged.attr,
637 /*
638 * This NULL is a placeholder for the processor_bus_topology
639 * attribute, set in init function if applicable.
640 */
641 NULL,
642 /*
643 * This NULL is a placeholder for the processor_config
644 * attribute, set in init function if applicable.
645 */
646 NULL,
647 /*
648 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
649 * attribute, set in init function if applicable.
650 */
651 NULL,
652 /*
653 * This NULL is a placeholder for the affinity_domain_via_domain
654 * attribute, set in init function if applicable.
655 */
656 NULL,
657 /*
658 * This NULL is a placeholder for the affinity_domain_via_partition
659 * attribute, set in init function if applicable.
660 */
661 NULL,
662 NULL,
663 };
664
665 static struct attribute *cpumask_attrs[] = {
666 &dev_attr_cpumask.attr,
667 NULL,
668 };
669
670 static const struct attribute_group cpumask_attr_group = {
671 .attrs = cpumask_attrs,
672 };
673
674 static const struct attribute_group interface_group = {
675 .name = "interface",
676 .attrs = interface_attrs,
677 };
678
679 static const struct attribute_group *attr_groups[] = {
680 &format_group,
681 &event_group,
682 &interface_group,
683 &cpumask_attr_group,
684 NULL,
685 };
686
single_gpci_request(u32 req,u32 starting_index,u16 secondary_index,u8 version_in,u32 offset,u8 length,u64 * value)687 static unsigned long single_gpci_request(u32 req, u32 starting_index,
688 u16 secondary_index, u8 version_in, u32 offset, u8 length,
689 u64 *value)
690 {
691 unsigned long ret;
692 size_t i;
693 u64 count;
694 struct hv_gpci_request_buffer *arg;
695
696 arg = (void *)get_cpu_var(hv_gpci_reqb);
697 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
698
699 arg->params.counter_request = cpu_to_be32(req);
700 arg->params.starting_index = cpu_to_be32(starting_index);
701 arg->params.secondary_index = cpu_to_be16(secondary_index);
702 arg->params.counter_info_version_in = version_in;
703
704 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
705 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
706
707 /*
708 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
709 * specifies that the current buffer size cannot accommodate
710 * all the information and a partial buffer returned.
711 * Since in this function we are only accessing data for a given starting index,
712 * we don't need to accommodate whole data and can get required count by
713 * accessing first entry data.
714 * Hence hcall fails only incase the ret value is other than H_SUCCESS or
715 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
716 */
717 if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
718 ret = 0;
719
720 if (ret) {
721 pr_devel("hcall failed: 0x%lx\n", ret);
722 goto out;
723 }
724
725 /*
726 * we verify offset and length are within the zeroed buffer at event
727 * init.
728 */
729 count = 0;
730 for (i = offset; i < offset + length; i++)
731 count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
732
733 *value = count;
734 out:
735 put_cpu_var(hv_gpci_reqb);
736 return ret;
737 }
738
h_gpci_get_value(struct perf_event * event)739 static u64 h_gpci_get_value(struct perf_event *event)
740 {
741 u64 count;
742 unsigned long ret = single_gpci_request(event_get_request(event),
743 event_get_starting_index(event),
744 event_get_secondary_index(event),
745 event_get_counter_info_version(event),
746 event_get_offset(event),
747 event_get_length(event),
748 &count);
749 if (ret)
750 return 0;
751 return count;
752 }
753
h_gpci_event_update(struct perf_event * event)754 static void h_gpci_event_update(struct perf_event *event)
755 {
756 s64 prev;
757 u64 now = h_gpci_get_value(event);
758 prev = local64_xchg(&event->hw.prev_count, now);
759 local64_add(now - prev, &event->count);
760 }
761
h_gpci_event_start(struct perf_event * event,int flags)762 static void h_gpci_event_start(struct perf_event *event, int flags)
763 {
764 local64_set(&event->hw.prev_count, h_gpci_get_value(event));
765 }
766
h_gpci_event_stop(struct perf_event * event,int flags)767 static void h_gpci_event_stop(struct perf_event *event, int flags)
768 {
769 h_gpci_event_update(event);
770 }
771
h_gpci_event_add(struct perf_event * event,int flags)772 static int h_gpci_event_add(struct perf_event *event, int flags)
773 {
774 if (flags & PERF_EF_START)
775 h_gpci_event_start(event, flags);
776
777 return 0;
778 }
779
h_gpci_event_init(struct perf_event * event)780 static int h_gpci_event_init(struct perf_event *event)
781 {
782 u64 count;
783 u8 length;
784 unsigned long ret;
785
786 /* Not our event */
787 if (event->attr.type != event->pmu->type)
788 return -ENOENT;
789
790 /* config2 is unused */
791 if (event->attr.config2) {
792 pr_devel("config2 set when reserved\n");
793 return -EINVAL;
794 }
795
796 /* no branch sampling */
797 if (has_branch_stack(event))
798 return -EOPNOTSUPP;
799
800 length = event_get_length(event);
801 if (length < 1 || length > 8) {
802 pr_devel("length invalid\n");
803 return -EINVAL;
804 }
805
806 /* last byte within the buffer? */
807 if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
808 pr_devel("request outside of buffer: %zu > %zu\n",
809 (size_t)event_get_offset(event) + length,
810 HGPCI_MAX_DATA_BYTES);
811 return -EINVAL;
812 }
813
814 /* check if the request works... */
815 ret = single_gpci_request(event_get_request(event),
816 event_get_starting_index(event),
817 event_get_secondary_index(event),
818 event_get_counter_info_version(event),
819 event_get_offset(event),
820 length,
821 &count);
822
823 /*
824 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
825 * performance information, and required to set
826 * "Enable Performance Information Collection" option.
827 */
828 if (ret == H_AUTHORITY)
829 return -EPERM;
830
831 if (ret) {
832 pr_devel("gpci hcall failed\n");
833 return -EINVAL;
834 }
835
836 return 0;
837 }
838
839 static struct pmu h_gpci_pmu = {
840 .task_ctx_nr = perf_invalid_context,
841
842 .name = "hv_gpci",
843 .attr_groups = attr_groups,
844 .event_init = h_gpci_event_init,
845 .add = h_gpci_event_add,
846 .del = h_gpci_event_stop,
847 .start = h_gpci_event_start,
848 .stop = h_gpci_event_stop,
849 .read = h_gpci_event_update,
850 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
851 };
852
ppc_hv_gpci_cpu_online(unsigned int cpu)853 static int ppc_hv_gpci_cpu_online(unsigned int cpu)
854 {
855 if (cpumask_empty(&hv_gpci_cpumask))
856 cpumask_set_cpu(cpu, &hv_gpci_cpumask);
857
858 return 0;
859 }
860
ppc_hv_gpci_cpu_offline(unsigned int cpu)861 static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
862 {
863 int target;
864
865 /* Check if exiting cpu is used for collecting gpci events */
866 if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
867 return 0;
868
869 /* Find a new cpu to collect gpci events */
870 target = cpumask_last(cpu_active_mask);
871
872 if (target < 0 || target >= nr_cpu_ids) {
873 pr_err("hv_gpci: CPU hotplug init failed\n");
874 return -1;
875 }
876
877 /* Migrate gpci events to the new target */
878 cpumask_set_cpu(target, &hv_gpci_cpumask);
879 perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
880
881 return 0;
882 }
883
hv_gpci_cpu_hotplug_init(void)884 static int hv_gpci_cpu_hotplug_init(void)
885 {
886 return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
887 "perf/powerpc/hv_gcpi:online",
888 ppc_hv_gpci_cpu_online,
889 ppc_hv_gpci_cpu_offline);
890 }
891
sysinfo_device_attr_create(int sysinfo_interface_group_index,u32 req)892 static struct device_attribute *sysinfo_device_attr_create(int
893 sysinfo_interface_group_index, u32 req)
894 {
895 struct device_attribute *attr = NULL;
896 unsigned long ret;
897 struct hv_gpci_request_buffer *arg;
898
899 if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
900 sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
901 pr_info("Wrong interface group index for system information\n");
902 return NULL;
903 }
904
905 /* Check for given counter request value support */
906 arg = (void *)get_cpu_var(hv_gpci_reqb);
907 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
908
909 arg->params.counter_request = cpu_to_be32(req);
910
911 ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
912 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
913
914 put_cpu_var(hv_gpci_reqb);
915
916 /*
917 * Add given counter request value attribute in the interface_attrs
918 * attribute array, only for valid return types.
919 */
920 if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
921 attr = kzalloc_obj(*attr);
922 if (!attr)
923 return NULL;
924
925 sysfs_attr_init(&attr->attr);
926 attr->attr.mode = 0444;
927
928 switch (sysinfo_interface_group_index) {
929 case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
930 attr->attr.name = "processor_bus_topology";
931 attr->show = processor_bus_topology_show;
932 break;
933 case INTERFACE_PROCESSOR_CONFIG_ATTR:
934 attr->attr.name = "processor_config";
935 attr->show = processor_config_show;
936 break;
937 case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
938 attr->attr.name = "affinity_domain_via_virtual_processor";
939 attr->show = affinity_domain_via_virtual_processor_show;
940 break;
941 case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
942 attr->attr.name = "affinity_domain_via_domain";
943 attr->show = affinity_domain_via_domain_show;
944 break;
945 case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
946 attr->attr.name = "affinity_domain_via_partition";
947 attr->show = affinity_domain_via_partition_show;
948 break;
949 }
950 } else
951 pr_devel("hcall failed, with error: 0x%lx\n", ret);
952
953 return attr;
954 }
955
add_sysinfo_interface_files(void)956 static void add_sysinfo_interface_files(void)
957 {
958 int sysfs_count;
959 struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
960 int i;
961
962 sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
963
964 /* Get device attribute for a given counter request value */
965 for (i = 0; i < sysfs_count; i++) {
966 attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
967 sysinfo_counter_request[i]);
968
969 if (!attr[i])
970 goto out;
971 }
972
973 /* Add sysinfo interface attributes in the interface_attrs attribute array */
974 for (i = 0; i < sysfs_count; i++)
975 interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
976
977 return;
978
979 out:
980 /*
981 * The sysinfo interface attributes will be added, only if hcall passed for
982 * all the counter request values. Free the device attribute array incase
983 * of any hcall failure.
984 */
985 if (i > 0) {
986 while (i >= 0) {
987 kfree(attr[i]);
988 i--;
989 }
990 }
991 }
992
hv_gpci_init(void)993 static int hv_gpci_init(void)
994 {
995 int r;
996 unsigned long hret;
997 struct hv_perf_caps caps;
998 struct hv_gpci_request_buffer *arg;
999
1000 hv_gpci_assert_offsets_correct();
1001
1002 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1003 pr_debug("not a virtualized system, not enabling\n");
1004 return -ENODEV;
1005 }
1006
1007 hret = hv_perf_caps_get(&caps);
1008 if (hret) {
1009 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1010 hret);
1011 return -ENODEV;
1012 }
1013
1014 /* init cpuhotplug */
1015 r = hv_gpci_cpu_hotplug_init();
1016 if (r)
1017 return r;
1018
1019 /* sampling not supported */
1020 h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1021
1022 arg = (void *)get_cpu_var(hv_gpci_reqb);
1023 memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
1024
1025 /*
1026 * hcall H_GET_PERF_COUNTER_INFO populates the output
1027 * counter_info_version value based on the system hypervisor.
1028 * Pass the counter request 0x10 corresponds to request type
1029 * 'Dispatch_timebase_by_processor', to get the supported
1030 * counter_info_version.
1031 */
1032 arg->params.counter_request = cpu_to_be32(0x10);
1033
1034 r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
1035 virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
1036 if (r) {
1037 pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
1038 arg->params.counter_info_version_out = 0x8;
1039 }
1040
1041 /*
1042 * Use counter_info_version_out value to assign
1043 * required hv-gpci event list.
1044 */
1045 if (arg->params.counter_info_version_out >= 0x8)
1046 event_group.attrs = hv_gpci_event_attrs;
1047 else
1048 event_group.attrs = hv_gpci_event_attrs_v6;
1049
1050 put_cpu_var(hv_gpci_reqb);
1051
1052 r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
1053 if (r)
1054 return r;
1055
1056 /* sysinfo interface files are only available for power10 and above platforms */
1057 if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
1058 add_sysinfo_interface_files();
1059
1060 return 0;
1061 }
1062
1063 device_initcall(hv_gpci_init);
1064