xref: /linux/arch/powerpc/perf/hv-gpci.c (revision fe6f8e913df9319db03fd107671ff02f104ca38c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Hypervisor supplied "gpci" ("get performance counter info") performance
4  * counter support
5  *
6  * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
7  * Copyright 2014 IBM Corporation.
8  */
9 
10 #define pr_fmt(fmt) "hv-gpci: " fmt
11 
12 #include <linux/init.h>
13 #include <linux/perf_event.h>
14 #include <asm/firmware.h>
15 #include <asm/hvcall.h>
16 #include <asm/io.h>
17 
18 #include "hv-gpci.h"
19 #include "hv-common.h"
20 
21 /*
22  * Example usage:
23  *  perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,
24  *		  secondary_index=0,starting_index=0xffffffff,request=0x10/' ...
25  */
26 
27 /* u32 */
28 EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
29 /* u32 */
30 /*
31  * Note that starting_index, phys_processor_idx, sibling_part_id,
32  * hw_chip_id, partition_id all refer to the same bit range. They
33  * are basically aliases for the starting_index. The specific alias
34  * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
35  */
36 EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
37 EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
38 EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
39 EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
40 EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
41 
42 /* u16 */
43 EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
44 /* u8 */
45 EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23);
46 /* u8, bytes of data (1-8) */
47 EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
48 /* u32, byte offset */
49 EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
50 
51 static cpumask_t hv_gpci_cpumask;
52 
53 static struct attribute *format_attrs[] = {
54 	&format_attr_request.attr,
55 	&format_attr_starting_index.attr,
56 	&format_attr_phys_processor_idx.attr,
57 	&format_attr_sibling_part_id.attr,
58 	&format_attr_hw_chip_id.attr,
59 	&format_attr_partition_id.attr,
60 	&format_attr_secondary_index.attr,
61 	&format_attr_counter_info_version.attr,
62 
63 	&format_attr_offset.attr,
64 	&format_attr_length.attr,
65 	NULL,
66 };
67 
68 static const struct attribute_group format_group = {
69 	.name = "format",
70 	.attrs = format_attrs,
71 };
72 
73 static struct attribute_group event_group = {
74 	.name  = "events",
75 	/* .attrs is set in init */
76 };
77 
78 #define HV_CAPS_ATTR(_name, _format)				\
79 static ssize_t _name##_show(struct device *dev,			\
80 			    struct device_attribute *attr,	\
81 			    char *page)				\
82 {								\
83 	struct hv_perf_caps caps;				\
84 	unsigned long hret = hv_perf_caps_get(&caps);		\
85 	if (hret)						\
86 		return -EIO;					\
87 								\
88 	return sprintf(page, _format, caps._name);		\
89 }								\
90 static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name)
91 
kernel_version_show(struct device * dev,struct device_attribute * attr,char * page)92 static ssize_t kernel_version_show(struct device *dev,
93 				   struct device_attribute *attr,
94 				   char *page)
95 {
96 	return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
97 }
98 
cpumask_show(struct device * dev,struct device_attribute * attr,char * buf)99 static ssize_t cpumask_show(struct device *dev,
100 			    struct device_attribute *attr, char *buf)
101 {
102 	return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
103 }
104 
105 /* Interface attribute array index to store system information */
106 #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR	6
107 #define INTERFACE_PROCESSOR_CONFIG_ATTR		7
108 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR	8
109 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR	9
110 #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR	10
111 #define INTERFACE_NULL_ATTR			11
112 
113 /* Counter request value to retrieve system information */
114 enum {
115 	PROCESSOR_BUS_TOPOLOGY,
116 	PROCESSOR_CONFIG,
117 	AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
118 	AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
119 	AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
120 };
121 
122 static int sysinfo_counter_request[] = {
123 	[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
124 	[PROCESSOR_CONFIG] = 0x90,
125 	[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
126 	[AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
127 	[AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
128 };
129 
130 static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
131 
systeminfo_gpci_request(u32 req,u32 starting_index,u16 secondary_index,char * buf,size_t * n,struct hv_gpci_request_buffer * arg)132 static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
133 			u16 secondary_index, char *buf,
134 			size_t *n, struct hv_gpci_request_buffer *arg)
135 {
136 	unsigned long ret;
137 	size_t i, j;
138 
139 	arg->params.counter_request = cpu_to_be32(req);
140 	arg->params.starting_index = cpu_to_be32(starting_index);
141 	arg->params.secondary_index = cpu_to_be16(secondary_index);
142 
143 	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
144 			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
145 
146 	/*
147 	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
148 	 * which means that the current buffer size cannot accommodate
149 	 * all the information and a partial buffer returned.
150 	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
151 	 *
152 	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
153 	 * performance information, and required to set
154 	 * "Enable Performance Information Collection" option.
155 	 */
156 	if (ret == H_AUTHORITY)
157 		return -EPERM;
158 
159 	/*
160 	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
161 	 * because of invalid buffer-length/address or due to some hardware
162 	 * error.
163 	 */
164 	if (ret && (ret != H_PARAMETER))
165 		return -EIO;
166 
167 	/*
168 	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
169 	 * to show the total number of counter_value array elements
170 	 * returned via hcall.
171 	 * hcall also populates 'cv_element_size' corresponds to individual
172 	 * counter_value array element size. Below loop go through all
173 	 * counter_value array elements as per their size and add it to
174 	 * the output buffer.
175 	 */
176 	for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
177 		j = i * be16_to_cpu(arg->params.cv_element_size);
178 
179 		for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
180 			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[j]);
181 		*n += sprintf(buf + *n,  "\n");
182 	}
183 
184 	if (*n >= PAGE_SIZE) {
185 		pr_info("System information exceeds PAGE_SIZE\n");
186 		return -EFBIG;
187 	}
188 
189 	return ret;
190 }
191 
processor_bus_topology_show(struct device * dev,struct device_attribute * attr,char * buf)192 static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
193 				char *buf)
194 {
195 	struct hv_gpci_request_buffer *arg;
196 	unsigned long ret;
197 	size_t n = 0;
198 
199 	arg = (void *)get_cpu_var(hv_gpci_reqb);
200 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
201 
202 	/*
203 	 * Pass the counter request value 0xD0 corresponds to request
204 	 * type 'Processor_bus_topology', to retrieve
205 	 * the system topology information.
206 	 * starting_index value implies the starting hardware
207 	 * chip id.
208 	 */
209 	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
210 			0, 0, buf, &n, arg);
211 
212 	if (!ret)
213 		goto out_success;
214 
215 	if (ret != H_PARAMETER)
216 		goto out;
217 
218 	/*
219 	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
220 	 * implies that buffer can't accommodate all information, and a partial buffer
221 	 * returned. To handle that, we need to make subsequent requests
222 	 * with next starting index to retrieve additional (missing) data.
223 	 * Below loop do subsequent hcalls with next starting index and add it
224 	 * to buffer util we get all the information.
225 	 */
226 	while (ret == H_PARAMETER) {
227 		int returned_values = be16_to_cpu(arg->params.returned_values);
228 		int elementsize = be16_to_cpu(arg->params.cv_element_size);
229 		int last_element = (returned_values - 1) * elementsize;
230 
231 		/*
232 		 * Since the starting index value is part of counter_value
233 		 * buffer elements, use the starting index value in the last
234 		 * element and add 1 to make subsequent hcalls.
235 		 */
236 		u32 starting_index = arg->bytes[last_element + 3] +
237 				(arg->bytes[last_element + 2] << 8) +
238 				(arg->bytes[last_element + 1] << 16) +
239 				(arg->bytes[last_element] << 24) + 1;
240 
241 		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
242 
243 		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
244 				starting_index, 0, buf, &n, arg);
245 
246 		if (!ret)
247 			goto out_success;
248 
249 		if (ret != H_PARAMETER)
250 			goto out;
251 	}
252 
253 out_success:
254 	put_cpu_var(hv_gpci_reqb);
255 	return n;
256 
257 out:
258 	put_cpu_var(hv_gpci_reqb);
259 	return ret;
260 }
261 
processor_config_show(struct device * dev,struct device_attribute * attr,char * buf)262 static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
263 					char *buf)
264 {
265 	struct hv_gpci_request_buffer *arg;
266 	unsigned long ret;
267 	size_t n = 0;
268 
269 	arg = (void *)get_cpu_var(hv_gpci_reqb);
270 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
271 
272 	/*
273 	 * Pass the counter request value 0x90 corresponds to request
274 	 * type 'Processor_config', to retrieve
275 	 * the system processor information.
276 	 * starting_index value implies the starting hardware
277 	 * processor index.
278 	 */
279 	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
280 			0, 0, buf, &n, arg);
281 
282 	if (!ret)
283 		goto out_success;
284 
285 	if (ret != H_PARAMETER)
286 		goto out;
287 
288 	/*
289 	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
290 	 * implies that buffer can't accommodate all information, and a partial buffer
291 	 * returned. To handle that, we need to take subsequent requests
292 	 * with next starting index to retrieve additional (missing) data.
293 	 * Below loop do subsequent hcalls with next starting index and add it
294 	 * to buffer util we get all the information.
295 	 */
296 	while (ret == H_PARAMETER) {
297 		int returned_values = be16_to_cpu(arg->params.returned_values);
298 		int elementsize = be16_to_cpu(arg->params.cv_element_size);
299 		int last_element = (returned_values - 1) * elementsize;
300 
301 		/*
302 		 * Since the starting index is part of counter_value
303 		 * buffer elements, use the starting index value in the last
304 		 * element and add 1 to subsequent hcalls.
305 		 */
306 		u32 starting_index = arg->bytes[last_element + 3] +
307 				(arg->bytes[last_element + 2] << 8) +
308 				(arg->bytes[last_element + 1] << 16) +
309 				(arg->bytes[last_element] << 24) + 1;
310 
311 		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
312 
313 		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
314 				starting_index, 0, buf, &n, arg);
315 
316 		if (!ret)
317 			goto out_success;
318 
319 		if (ret != H_PARAMETER)
320 			goto out;
321 	}
322 
323 out_success:
324 	put_cpu_var(hv_gpci_reqb);
325 	return n;
326 
327 out:
328 	put_cpu_var(hv_gpci_reqb);
329 	return ret;
330 }
331 
affinity_domain_via_virtual_processor_show(struct device * dev,struct device_attribute * attr,char * buf)332 static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
333 			struct device_attribute *attr, char *buf)
334 {
335 	struct hv_gpci_request_buffer *arg;
336 	unsigned long ret;
337 	size_t n = 0;
338 
339 	arg = (void *)get_cpu_var(hv_gpci_reqb);
340 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
341 
342 	/*
343 	 * Pass the counter request 0xA0 corresponds to request
344 	 * type 'Affinity_domain_information_by_virutal_processor',
345 	 * to retrieve the system affinity domain information.
346 	 * starting_index value refers to the starting hardware
347 	 * processor index.
348 	 */
349 	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
350 			0, 0, buf, &n, arg);
351 
352 	if (!ret)
353 		goto out_success;
354 
355 	if (ret != H_PARAMETER)
356 		goto out;
357 
358 	/*
359 	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
360 	 * implies that buffer can't accommodate all information, and a partial buffer
361 	 * returned. To handle that, we need to take subsequent requests
362 	 * with next secondary index to retrieve additional (missing) data.
363 	 * Below loop do subsequent hcalls with next secondary index and add it
364 	 * to buffer util we get all the information.
365 	 */
366 	while (ret == H_PARAMETER) {
367 		int returned_values = be16_to_cpu(arg->params.returned_values);
368 		int elementsize = be16_to_cpu(arg->params.cv_element_size);
369 		int last_element = (returned_values - 1) * elementsize;
370 
371 		/*
372 		 * Since the starting index and secondary index type is part of the
373 		 * counter_value buffer elements, use the starting index value in the
374 		 * last array element as subsequent starting index, and use secondary index
375 		 * value in the last array element plus 1 as subsequent secondary index.
376 		 * For counter request '0xA0', starting index points to partition id
377 		 * and secondary index points to corresponding virtual processor index.
378 		 */
379 		u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
380 		u16 secondary_index = arg->bytes[last_element + 3] +
381 				(arg->bytes[last_element + 2] << 8) + 1;
382 
383 		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
384 
385 		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
386 				starting_index, secondary_index, buf, &n, arg);
387 
388 		if (!ret)
389 			goto out_success;
390 
391 		if (ret != H_PARAMETER)
392 			goto out;
393 	}
394 
395 out_success:
396 	put_cpu_var(hv_gpci_reqb);
397 	return n;
398 
399 out:
400 	put_cpu_var(hv_gpci_reqb);
401 	return ret;
402 }
403 
affinity_domain_via_domain_show(struct device * dev,struct device_attribute * attr,char * buf)404 static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
405 						char *buf)
406 {
407 	struct hv_gpci_request_buffer *arg;
408 	unsigned long ret;
409 	size_t n = 0;
410 
411 	arg = (void *)get_cpu_var(hv_gpci_reqb);
412 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
413 
414 	/*
415 	 * Pass the counter request 0xB0 corresponds to request
416 	 * type 'Affinity_domain_information_by_domain',
417 	 * to retrieve the system affinity domain information.
418 	 * starting_index value refers to the starting hardware
419 	 * processor index.
420 	 */
421 	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
422 			0, 0, buf, &n, arg);
423 
424 	if (!ret)
425 		goto out_success;
426 
427 	if (ret != H_PARAMETER)
428 		goto out;
429 
430 	/*
431 	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
432 	 * implies that buffer can't accommodate all information, and a partial buffer
433 	 * returned. To handle that, we need to take subsequent requests
434 	 * with next starting index to retrieve additional (missing) data.
435 	 * Below loop do subsequent hcalls with next starting index and add it
436 	 * to buffer util we get all the information.
437 	 */
438 	while (ret == H_PARAMETER) {
439 		int returned_values = be16_to_cpu(arg->params.returned_values);
440 		int elementsize = be16_to_cpu(arg->params.cv_element_size);
441 		int last_element = (returned_values - 1) * elementsize;
442 
443 		/*
444 		 * Since the starting index value is part of counter_value
445 		 * buffer elements, use the starting index value in the last
446 		 * element and add 1 to make subsequent hcalls.
447 		 */
448 		u32 starting_index = arg->bytes[last_element + 1] +
449 			(arg->bytes[last_element] << 8) + 1;
450 
451 		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
452 
453 		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
454 					starting_index, 0, buf, &n, arg);
455 
456 		if (!ret)
457 			goto out_success;
458 
459 		if (ret != H_PARAMETER)
460 			goto out;
461 	}
462 
463 out_success:
464 	put_cpu_var(hv_gpci_reqb);
465 	return n;
466 
467 out:
468 	put_cpu_var(hv_gpci_reqb);
469 	return ret;
470 }
471 
affinity_domain_via_partition_result_parse(int returned_values,int element_size,char * buf,size_t * last_element,size_t * n,struct hv_gpci_request_buffer * arg)472 static void affinity_domain_via_partition_result_parse(int returned_values,
473 			int element_size, char *buf, size_t *last_element,
474 			size_t *n, struct hv_gpci_request_buffer *arg)
475 {
476 	size_t i = 0, j = 0;
477 	size_t k, l, m;
478 	uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
479 
480 	/*
481 	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
482 	 * to show the total number of counter_value array elements
483 	 * returned via hcall.
484 	 * Unlike other request types, the data structure returned by this
485 	 * request is variable-size. For this counter request type,
486 	 * hcall populates 'cv_element_size' corresponds to minimum size of
487 	 * the structure returned i.e; the size of the structure with no domain
488 	 * information. Below loop go through all counter_value array
489 	 * to determine the number and size of each domain array element and
490 	 * add it to the output buffer.
491 	 */
492 	while (i < returned_values) {
493 		k = j;
494 		for (; k < j + element_size; k++)
495 			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
496 		*n += sprintf(buf + *n,  "\n");
497 
498 		total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
499 		size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
500 
501 		for (l = 0; l < total_affinity_domain_ele; l++) {
502 			for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
503 				*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
504 				k++;
505 			}
506 			*n += sprintf(buf + *n,  "\n");
507 		}
508 
509 		*n += sprintf(buf + *n,  "\n");
510 		i++;
511 		j = k;
512 	}
513 
514 	*last_element = k;
515 }
516 
affinity_domain_via_partition_show(struct device * dev,struct device_attribute * attr,char * buf)517 static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
518 							char *buf)
519 {
520 	struct hv_gpci_request_buffer *arg;
521 	unsigned long ret;
522 	size_t n = 0;
523 	size_t last_element = 0;
524 	u32 starting_index;
525 
526 	arg = (void *)get_cpu_var(hv_gpci_reqb);
527 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
528 
529 	/*
530 	 * Pass the counter request value 0xB1 corresponds to counter request
531 	 * type 'Affinity_domain_information_by_partition',
532 	 * to retrieve the system affinity domain by partition information.
533 	 * starting_index value refers to the starting hardware
534 	 * processor index.
535 	 */
536 	arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
537 	arg->params.starting_index = cpu_to_be32(0);
538 
539 	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
540 			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
541 
542 	if (!ret)
543 		goto parse_result;
544 
545 	if (ret && (ret != H_PARAMETER))
546 		goto out;
547 
548 	/*
549 	 * ret value as 'H_PARAMETER' implies that the current buffer size
550 	 * can't accommodate all the information, and a partial buffer
551 	 * returned. To handle that, we need to make subsequent requests
552 	 * with next starting index to retrieve additional (missing) data.
553 	 * Below loop do subsequent hcalls with next starting index and add it
554 	 * to buffer util we get all the information.
555 	 */
556 	while (ret == H_PARAMETER) {
557 		affinity_domain_via_partition_result_parse(
558 			be16_to_cpu(arg->params.returned_values) - 1,
559 			be16_to_cpu(arg->params.cv_element_size), buf,
560 			&last_element, &n, arg);
561 
562 		if (n >= PAGE_SIZE) {
563 			put_cpu_var(hv_gpci_reqb);
564 			pr_debug("System information exceeds PAGE_SIZE\n");
565 			return -EFBIG;
566 		}
567 
568 		/*
569 		 * Since the starting index value is part of counter_value
570 		 * buffer elements, use the starting_index value in the last
571 		 * element and add 1 to make subsequent hcalls.
572 		 */
573 		starting_index = (u8)arg->bytes[last_element] << 8 |
574 				(u8)arg->bytes[last_element + 1];
575 
576 		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
577 		arg->params.counter_request = cpu_to_be32(
578 				sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
579 		arg->params.starting_index = cpu_to_be32(starting_index);
580 
581 		ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
582 				virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
583 
584 		if (ret && (ret != H_PARAMETER))
585 			goto out;
586 	}
587 
588 parse_result:
589 	affinity_domain_via_partition_result_parse(
590 		be16_to_cpu(arg->params.returned_values),
591 		be16_to_cpu(arg->params.cv_element_size),
592 		buf, &last_element, &n, arg);
593 
594 	put_cpu_var(hv_gpci_reqb);
595 	return n;
596 
597 out:
598 	put_cpu_var(hv_gpci_reqb);
599 
600 	/*
601 	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
602 	 * which means that the current buffer size cannot accommodate
603 	 * all the information and a partial buffer returned.
604 	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
605 	 *
606 	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
607 	 * performance information, and required to set
608 	 * "Enable Performance Information Collection" option.
609 	 */
610 	if (ret == H_AUTHORITY)
611 		return -EPERM;
612 
613 	/*
614 	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
615 	 * because of invalid buffer-length/address or due to some hardware
616 	 * error.
617 	 */
618 	return -EIO;
619 }
620 
621 static DEVICE_ATTR_RO(kernel_version);
622 static DEVICE_ATTR_RO(cpumask);
623 
624 HV_CAPS_ATTR(version, "0x%x\n");
625 HV_CAPS_ATTR(ga, "%d\n");
626 HV_CAPS_ATTR(expanded, "%d\n");
627 HV_CAPS_ATTR(lab, "%d\n");
628 HV_CAPS_ATTR(collect_privileged, "%d\n");
629 
630 static struct attribute *interface_attrs[] = {
631 	&dev_attr_kernel_version.attr,
632 	&hv_caps_attr_version.attr,
633 	&hv_caps_attr_ga.attr,
634 	&hv_caps_attr_expanded.attr,
635 	&hv_caps_attr_lab.attr,
636 	&hv_caps_attr_collect_privileged.attr,
637 	/*
638 	 * This NULL is a placeholder for the processor_bus_topology
639 	 * attribute, set in init function if applicable.
640 	 */
641 	NULL,
642 	/*
643 	 * This NULL is a placeholder for the processor_config
644 	 * attribute, set in init function if applicable.
645 	 */
646 	NULL,
647 	/*
648 	 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
649 	 * attribute, set in init function if applicable.
650 	 */
651 	NULL,
652 	/*
653 	 * This NULL is a placeholder for the affinity_domain_via_domain
654 	 * attribute, set in init function if applicable.
655 	 */
656 	NULL,
657 	/*
658 	 * This NULL is a placeholder for the affinity_domain_via_partition
659 	 * attribute, set in init function if applicable.
660 	 */
661 	NULL,
662 	NULL,
663 };
664 
665 static struct attribute *cpumask_attrs[] = {
666 	&dev_attr_cpumask.attr,
667 	NULL,
668 };
669 
670 static const struct attribute_group cpumask_attr_group = {
671 	.attrs = cpumask_attrs,
672 };
673 
674 static const struct attribute_group interface_group = {
675 	.name = "interface",
676 	.attrs = interface_attrs,
677 };
678 
679 static const struct attribute_group *attr_groups[] = {
680 	&format_group,
681 	&event_group,
682 	&interface_group,
683 	&cpumask_attr_group,
684 	NULL,
685 };
686 
single_gpci_request(u32 req,u32 starting_index,u16 secondary_index,u8 version_in,u32 offset,u8 length,u64 * value)687 static unsigned long single_gpci_request(u32 req, u32 starting_index,
688 		u16 secondary_index, u8 version_in, u32 offset, u8 length,
689 		u64 *value)
690 {
691 	unsigned long ret;
692 	size_t i;
693 	u64 count;
694 	struct hv_gpci_request_buffer *arg;
695 
696 	arg = (void *)get_cpu_var(hv_gpci_reqb);
697 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
698 
699 	arg->params.counter_request = cpu_to_be32(req);
700 	arg->params.starting_index = cpu_to_be32(starting_index);
701 	arg->params.secondary_index = cpu_to_be16(secondary_index);
702 	arg->params.counter_info_version_in = version_in;
703 
704 	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
705 			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
706 
707 	/*
708 	 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
709 	 * specifies that the current buffer size cannot accommodate
710 	 * all the information and a partial buffer returned.
711 	 * Since in this function we are only accessing data for a given starting index,
712 	 * we don't need to accommodate whole data and can get required count by
713 	 * accessing first entry data.
714 	 * Hence hcall fails only incase the ret value is other than H_SUCCESS or
715 	 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
716 	 */
717 	if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
718 		ret = 0;
719 
720 	if (ret) {
721 		pr_devel("hcall failed: 0x%lx\n", ret);
722 		goto out;
723 	}
724 
725 	/*
726 	 * we verify offset and length are within the zeroed buffer at event
727 	 * init.
728 	 */
729 	count = 0;
730 	for (i = offset; i < offset + length; i++)
731 		count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
732 
733 	*value = count;
734 out:
735 	put_cpu_var(hv_gpci_reqb);
736 	return ret;
737 }
738 
h_gpci_get_value(struct perf_event * event)739 static u64 h_gpci_get_value(struct perf_event *event)
740 {
741 	u64 count;
742 	unsigned long ret = single_gpci_request(event_get_request(event),
743 					event_get_starting_index(event),
744 					event_get_secondary_index(event),
745 					event_get_counter_info_version(event),
746 					event_get_offset(event),
747 					event_get_length(event),
748 					&count);
749 	if (ret)
750 		return 0;
751 	return count;
752 }
753 
h_gpci_event_update(struct perf_event * event)754 static void h_gpci_event_update(struct perf_event *event)
755 {
756 	s64 prev;
757 	u64 now = h_gpci_get_value(event);
758 	prev = local64_xchg(&event->hw.prev_count, now);
759 	local64_add(now - prev, &event->count);
760 }
761 
h_gpci_event_start(struct perf_event * event,int flags)762 static void h_gpci_event_start(struct perf_event *event, int flags)
763 {
764 	local64_set(&event->hw.prev_count, h_gpci_get_value(event));
765 }
766 
h_gpci_event_stop(struct perf_event * event,int flags)767 static void h_gpci_event_stop(struct perf_event *event, int flags)
768 {
769 	h_gpci_event_update(event);
770 }
771 
h_gpci_event_add(struct perf_event * event,int flags)772 static int h_gpci_event_add(struct perf_event *event, int flags)
773 {
774 	if (flags & PERF_EF_START)
775 		h_gpci_event_start(event, flags);
776 
777 	return 0;
778 }
779 
h_gpci_event_init(struct perf_event * event)780 static int h_gpci_event_init(struct perf_event *event)
781 {
782 	u64 count;
783 	u8 length;
784 	unsigned long ret;
785 
786 	/* Not our event */
787 	if (event->attr.type != event->pmu->type)
788 		return -ENOENT;
789 
790 	/* config2 is unused */
791 	if (event->attr.config2) {
792 		pr_devel("config2 set when reserved\n");
793 		return -EINVAL;
794 	}
795 
796 	/* no branch sampling */
797 	if (has_branch_stack(event))
798 		return -EOPNOTSUPP;
799 
800 	length = event_get_length(event);
801 	if (length < 1 || length > 8) {
802 		pr_devel("length invalid\n");
803 		return -EINVAL;
804 	}
805 
806 	/* last byte within the buffer? */
807 	if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
808 		pr_devel("request outside of buffer: %zu > %zu\n",
809 				(size_t)event_get_offset(event) + length,
810 				HGPCI_MAX_DATA_BYTES);
811 		return -EINVAL;
812 	}
813 
814 	/* check if the request works... */
815 	ret = single_gpci_request(event_get_request(event),
816 				event_get_starting_index(event),
817 				event_get_secondary_index(event),
818 				event_get_counter_info_version(event),
819 				event_get_offset(event),
820 				length,
821 				&count);
822 
823 	/*
824 	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
825 	 * performance information, and required to set
826 	 * "Enable Performance Information Collection" option.
827 	 */
828 	if (ret == H_AUTHORITY)
829 		return -EPERM;
830 
831 	if (ret) {
832 		pr_devel("gpci hcall failed\n");
833 		return -EINVAL;
834 	}
835 
836 	return 0;
837 }
838 
839 static struct pmu h_gpci_pmu = {
840 	.task_ctx_nr = perf_invalid_context,
841 
842 	.name = "hv_gpci",
843 	.attr_groups = attr_groups,
844 	.event_init  = h_gpci_event_init,
845 	.add         = h_gpci_event_add,
846 	.del         = h_gpci_event_stop,
847 	.start       = h_gpci_event_start,
848 	.stop        = h_gpci_event_stop,
849 	.read        = h_gpci_event_update,
850 	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
851 };
852 
ppc_hv_gpci_cpu_online(unsigned int cpu)853 static int ppc_hv_gpci_cpu_online(unsigned int cpu)
854 {
855 	if (cpumask_empty(&hv_gpci_cpumask))
856 		cpumask_set_cpu(cpu, &hv_gpci_cpumask);
857 
858 	return 0;
859 }
860 
ppc_hv_gpci_cpu_offline(unsigned int cpu)861 static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
862 {
863 	int target;
864 
865 	/* Check if exiting cpu is used for collecting gpci events */
866 	if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
867 		return 0;
868 
869 	/* Find a new cpu to collect gpci events */
870 	target = cpumask_last(cpu_active_mask);
871 
872 	if (target < 0 || target >= nr_cpu_ids) {
873 		pr_err("hv_gpci: CPU hotplug init failed\n");
874 		return -1;
875 	}
876 
877 	/* Migrate gpci events to the new target */
878 	cpumask_set_cpu(target, &hv_gpci_cpumask);
879 	perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
880 
881 	return 0;
882 }
883 
hv_gpci_cpu_hotplug_init(void)884 static int hv_gpci_cpu_hotplug_init(void)
885 {
886 	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
887 			  "perf/powerpc/hv_gcpi:online",
888 			  ppc_hv_gpci_cpu_online,
889 			  ppc_hv_gpci_cpu_offline);
890 }
891 
sysinfo_device_attr_create(int sysinfo_interface_group_index,u32 req)892 static struct device_attribute *sysinfo_device_attr_create(int
893 		sysinfo_interface_group_index, u32 req)
894 {
895 	struct device_attribute *attr = NULL;
896 	unsigned long ret;
897 	struct hv_gpci_request_buffer *arg;
898 
899 	if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
900 			sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
901 		pr_info("Wrong interface group index for system information\n");
902 		return NULL;
903 	}
904 
905 	/* Check for given counter request value support */
906 	arg = (void *)get_cpu_var(hv_gpci_reqb);
907 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
908 
909 	arg->params.counter_request = cpu_to_be32(req);
910 
911 	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
912 			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
913 
914 	put_cpu_var(hv_gpci_reqb);
915 
916 	/*
917 	 * Add given counter request value attribute in the interface_attrs
918 	 * attribute array, only for valid return types.
919 	 */
920 	if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
921 		attr = kzalloc_obj(*attr);
922 		if (!attr)
923 			return NULL;
924 
925 		sysfs_attr_init(&attr->attr);
926 		attr->attr.mode = 0444;
927 
928 		switch (sysinfo_interface_group_index) {
929 		case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
930 			attr->attr.name = "processor_bus_topology";
931 			attr->show = processor_bus_topology_show;
932 		break;
933 		case INTERFACE_PROCESSOR_CONFIG_ATTR:
934 			attr->attr.name = "processor_config";
935 			attr->show = processor_config_show;
936 		break;
937 		case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
938 			attr->attr.name = "affinity_domain_via_virtual_processor";
939 			attr->show = affinity_domain_via_virtual_processor_show;
940 		break;
941 		case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
942 			attr->attr.name = "affinity_domain_via_domain";
943 			attr->show = affinity_domain_via_domain_show;
944 		break;
945 		case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
946 			attr->attr.name = "affinity_domain_via_partition";
947 			attr->show = affinity_domain_via_partition_show;
948 		break;
949 		}
950 	} else
951 		pr_devel("hcall failed, with error: 0x%lx\n", ret);
952 
953 	return attr;
954 }
955 
add_sysinfo_interface_files(void)956 static void add_sysinfo_interface_files(void)
957 {
958 	int sysfs_count;
959 	struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
960 	int i;
961 
962 	sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
963 
964 	/* Get device attribute for a given counter request value */
965 	for (i = 0; i < sysfs_count; i++) {
966 		attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
967 				sysinfo_counter_request[i]);
968 
969 		if (!attr[i])
970 			goto out;
971 	}
972 
973 	/* Add sysinfo interface attributes in the interface_attrs attribute array */
974 	for (i = 0; i < sysfs_count; i++)
975 		interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
976 
977 	return;
978 
979 out:
980 	/*
981 	 * The sysinfo interface attributes will be added, only if hcall passed for
982 	 * all the counter request values. Free the device attribute array incase
983 	 * of any hcall failure.
984 	 */
985 	if (i > 0) {
986 		while (i >= 0) {
987 			kfree(attr[i]);
988 			i--;
989 		}
990 	}
991 }
992 
hv_gpci_init(void)993 static int hv_gpci_init(void)
994 {
995 	int r;
996 	unsigned long hret;
997 	struct hv_perf_caps caps;
998 	struct hv_gpci_request_buffer *arg;
999 
1000 	hv_gpci_assert_offsets_correct();
1001 
1002 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1003 		pr_debug("not a virtualized system, not enabling\n");
1004 		return -ENODEV;
1005 	}
1006 
1007 	hret = hv_perf_caps_get(&caps);
1008 	if (hret) {
1009 		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1010 				hret);
1011 		return -ENODEV;
1012 	}
1013 
1014 	/* init cpuhotplug */
1015 	r = hv_gpci_cpu_hotplug_init();
1016 	if (r)
1017 		return r;
1018 
1019 	/* sampling not supported */
1020 	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1021 
1022 	arg = (void *)get_cpu_var(hv_gpci_reqb);
1023 	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
1024 
1025 	/*
1026 	 * hcall H_GET_PERF_COUNTER_INFO populates the output
1027 	 * counter_info_version value based on the system hypervisor.
1028 	 * Pass the counter request 0x10 corresponds to request type
1029 	 * 'Dispatch_timebase_by_processor', to get the supported
1030 	 * counter_info_version.
1031 	 */
1032 	arg->params.counter_request = cpu_to_be32(0x10);
1033 
1034 	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
1035 			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
1036 	if (r) {
1037 		pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
1038 		arg->params.counter_info_version_out = 0x8;
1039 	}
1040 
1041 	/*
1042 	 * Use counter_info_version_out value to assign
1043 	 * required hv-gpci event list.
1044 	 */
1045 	if (arg->params.counter_info_version_out >= 0x8)
1046 		event_group.attrs = hv_gpci_event_attrs;
1047 	else
1048 		event_group.attrs = hv_gpci_event_attrs_v6;
1049 
1050 	put_cpu_var(hv_gpci_reqb);
1051 
1052 	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
1053 	if (r)
1054 		return r;
1055 
1056 	/* sysinfo interface files are only available for power10 and above platforms */
1057 	if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
1058 		add_sysinfo_interface_files();
1059 
1060 	return 0;
1061 }
1062 
1063 device_initcall(hv_gpci_init);
1064