xref: /linux/arch/s390/kernel/perf_pai_ext.c (revision bc46b7cbc58c4cb562b6a45a1fbc7b8e7b23df58)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Performance event support - Processor Activity Instrumentation Extension
4  * Facility
5  *
6  *  Copyright IBM Corp. 2022
7  *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
8  */
9 #define KMSG_COMPONENT	"pai_ext"
10 #define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
11 
12 #include <linux/kernel.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/percpu.h>
15 #include <linux/notifier.h>
16 #include <linux/init.h>
17 #include <linux/io.h>
18 #include <linux/perf_event.h>
19 #include <asm/ctlreg.h>
20 #include <asm/pai.h>
21 #include <asm/debug.h>
22 
23 #define	PAIE1_CB_SZ		0x200	/* Size of PAIE1 control block */
24 #define	PAIE1_CTRBLOCK_SZ	0x400	/* Size of PAIE1 counter blocks */
25 
26 static debug_info_t *paiext_dbg;
27 static unsigned int paiext_cnt;	/* Extracted with QPACI instruction */
28 
29 struct pai_userdata {
30 	u16 num;
31 	u64 value;
32 } __packed;
33 
34 /* Create the PAI extension 1 control block area.
35  * The PAI extension control block 1 is pointed to by lowcore
36  * address 0x1508 for each CPU. This control block is 512 bytes in size
37  * and requires a 512 byte boundary alignment.
38  */
39 struct paiext_cb {		/* PAI extension 1 control block */
40 	u64 header;		/* Not used */
41 	u64 reserved1;
42 	u64 acc;		/* Addr to analytics counter control block */
43 	u8 reserved2[488];
44 } __packed;
45 
46 struct paiext_map {
47 	unsigned long *area;		/* Area for CPU to store counters */
48 	struct pai_userdata *save;	/* Area to store non-zero counters */
49 	unsigned int active_events;	/* # of PAI Extension users */
50 	refcount_t refcnt;
51 	struct perf_event *event;	/* Perf event for sampling */
52 	struct paiext_cb *paiext_cb;	/* PAI extension control block area */
53 	struct list_head syswide_list;	/* List system-wide sampling events */
54 };
55 
56 struct paiext_mapptr {
57 	struct paiext_map *mapptr;
58 };
59 
60 static struct paiext_root {		/* Anchor to per CPU data */
61 	refcount_t refcnt;		/* Overall active events */
62 	struct paiext_mapptr __percpu *mapptr;
63 } paiext_root;
64 
65 /* Free per CPU data when the last event is removed. */
paiext_root_free(void)66 static void paiext_root_free(void)
67 {
68 	if (refcount_dec_and_test(&paiext_root.refcnt)) {
69 		free_percpu(paiext_root.mapptr);
70 		paiext_root.mapptr = NULL;
71 	}
72 	debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__,
73 			    refcount_read(&paiext_root.refcnt));
74 }
75 
76 /* On initialization of first event also allocate per CPU data dynamically.
77  * Start with an array of pointers, the array size is the maximum number of
78  * CPUs possible, which might be larger than the number of CPUs currently
79  * online.
80  */
paiext_root_alloc(void)81 static int paiext_root_alloc(void)
82 {
83 	if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
84 		/* The memory is already zeroed. */
85 		paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
86 		if (!paiext_root.mapptr) {
87 			/* Returning without refcnt adjustment is ok. The
88 			 * error code is handled by paiext_alloc() which
89 			 * decrements refcnt when an event can not be
90 			 * created.
91 			 */
92 			return -ENOMEM;
93 		}
94 		refcount_set(&paiext_root.refcnt, 1);
95 	}
96 	return 0;
97 }
98 
99 /* Protects against concurrent increment of sampler and counter member
100  * increments at the same time and prohibits concurrent execution of
101  * counting and sampling events.
102  * Ensures that analytics counter block is deallocated only when the
103  * sampling and counting on that cpu is zero.
104  * For details see paiext_alloc().
105  */
106 static DEFINE_MUTEX(paiext_reserve_mutex);
107 
108 /* Free all memory allocated for event counting/sampling setup */
paiext_free(struct paiext_mapptr * mp)109 static void paiext_free(struct paiext_mapptr *mp)
110 {
111 	kfree(mp->mapptr->area);
112 	kfree(mp->mapptr->paiext_cb);
113 	kvfree(mp->mapptr->save);
114 	kfree(mp->mapptr);
115 	mp->mapptr = NULL;
116 }
117 
118 /* Release the PMU if event is the last perf event */
paiext_event_destroy_cpu(struct perf_event * event,int cpu)119 static void paiext_event_destroy_cpu(struct perf_event *event, int cpu)
120 {
121 	struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu);
122 	struct paiext_map *cpump = mp->mapptr;
123 
124 	mutex_lock(&paiext_reserve_mutex);
125 	if (refcount_dec_and_test(&cpump->refcnt))	/* Last reference gone */
126 		paiext_free(mp);
127 	paiext_root_free();
128 	mutex_unlock(&paiext_reserve_mutex);
129 }
130 
paiext_event_destroy(struct perf_event * event)131 static void paiext_event_destroy(struct perf_event *event)
132 {
133 	int cpu;
134 
135 	free_page(PAI_SAVE_AREA(event));
136 	if (event->cpu == -1) {
137 		struct cpumask *mask = PAI_CPU_MASK(event);
138 
139 		for_each_cpu(cpu, mask)
140 			paiext_event_destroy_cpu(event, cpu);
141 		kfree(mask);
142 	} else {
143 		paiext_event_destroy_cpu(event, event->cpu);
144 	}
145 	debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__,
146 			    event->cpu);
147 }
148 
149 /* Used to avoid races in checking concurrent access of counting and
150  * sampling for pai_extension events.
151  *
152  * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
153  * allowed and when this event is running, no counting event is allowed.
154  * Several counting events are allowed in parallel, but no sampling event
155  * is allowed while one (or more) counting events are running.
156  *
157  * This function is called in process context and it is safe to block.
158  * When the event initialization functions fails, no other call back will
159  * be invoked.
160  *
161  * Allocate the memory for the event.
162  */
paiext_alloc_cpu(struct perf_event * event,int cpu)163 static int paiext_alloc_cpu(struct perf_event *event, int cpu)
164 {
165 	struct paiext_mapptr *mp;
166 	struct paiext_map *cpump;
167 	int rc;
168 
169 	mutex_lock(&paiext_reserve_mutex);
170 	rc = paiext_root_alloc();
171 	if (rc)
172 		goto unlock;
173 
174 	mp = per_cpu_ptr(paiext_root.mapptr, cpu);
175 	cpump = mp->mapptr;
176 	if (!cpump) {			/* Paiext_map allocated? */
177 		rc = -ENOMEM;
178 		cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
179 		if (!cpump)
180 			goto undo;
181 
182 		/* Allocate memory for counter area and counter extraction.
183 		 * These are
184 		 * - a 512 byte block and requires 512 byte boundary alignment.
185 		 * - a 1KB byte block and requires 1KB boundary alignment.
186 		 * Only the first counting event has to allocate the area.
187 		 *
188 		 * Note: This works with commit 59bb47985c1d by default.
189 		 * Backporting this to kernels without this commit might
190 		 * need adjustment.
191 		 */
192 		mp->mapptr = cpump;
193 		cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
194 		cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
195 		cpump->save = kvmalloc_array(paiext_cnt + 1,
196 					     sizeof(struct pai_userdata),
197 					     GFP_KERNEL);
198 		if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
199 			paiext_free(mp);
200 			goto undo;
201 		}
202 		INIT_LIST_HEAD(&cpump->syswide_list);
203 		refcount_set(&cpump->refcnt, 1);
204 		rc = 0;
205 	} else {
206 		refcount_inc(&cpump->refcnt);
207 	}
208 
209 undo:
210 	if (rc) {
211 		/* Error in allocation of event, decrement anchor. Since
212 		 * the event in not created, its destroy() function is never
213 		 * invoked. Adjust the reference counter for the anchor.
214 		 */
215 		paiext_root_free();
216 	}
217 unlock:
218 	mutex_unlock(&paiext_reserve_mutex);
219 	/* If rc is non-zero, no increment of counter/sampler was done. */
220 	return rc;
221 }
222 
paiext_alloc(struct perf_event * event)223 static int paiext_alloc(struct perf_event *event)
224 {
225 	struct cpumask *maskptr;
226 	int cpu, rc = -ENOMEM;
227 
228 	maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
229 	if (!maskptr)
230 		goto out;
231 
232 	for_each_online_cpu(cpu) {
233 		rc = paiext_alloc_cpu(event, cpu);
234 		if (rc) {
235 			for_each_cpu(cpu, maskptr)
236 				paiext_event_destroy_cpu(event, cpu);
237 			kfree(maskptr);
238 			goto out;
239 		}
240 		cpumask_set_cpu(cpu, maskptr);
241 	}
242 
243 	/*
244 	 * On error all cpumask are freed and all events have been destroyed.
245 	 * Save of which CPUs data structures have been allocated for.
246 	 * Release them in paicrypt_event_destroy call back function
247 	 * for this event.
248 	 */
249 	PAI_CPU_MASK(event) = maskptr;
250 	rc = 0;
251 out:
252 	return rc;
253 }
254 
255 /* The PAI extension 1 control block supports up to 128 entries. Return
256  * the index within PAIE1_CB given the event number. Also validate event
257  * number.
258  */
paiext_event_valid(struct perf_event * event)259 static int paiext_event_valid(struct perf_event *event)
260 {
261 	u64 cfg = event->attr.config;
262 
263 	if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
264 		/* Offset NNPA in paiext_cb */
265 		event->hw.config_base = offsetof(struct paiext_cb, acc);
266 		return 0;
267 	}
268 	return -EINVAL;
269 }
270 
271 /* Might be called on different CPU than the one the event is intended for. */
paiext_event_init(struct perf_event * event)272 static int paiext_event_init(struct perf_event *event)
273 {
274 	struct perf_event_attr *a = &event->attr;
275 	int rc;
276 
277 	/* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
278 	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
279 		return -ENOENT;
280 	/* PAI extension event must be valid and in supported range */
281 	rc = paiext_event_valid(event);
282 	if (rc)
283 		return rc;
284 	/* Allow only event NNPA_ALL for sampling. */
285 	if (a->sample_period && a->config != PAI_NNPA_BASE)
286 		return -EINVAL;
287 	/* Prohibit exclude_user event selection */
288 	if (a->exclude_user)
289 		return -EINVAL;
290 	/* Get a page to store last counter values for sampling */
291 	if (a->sample_period) {
292 		PAI_SAVE_AREA(event) = get_zeroed_page(GFP_KERNEL);
293 		if (!PAI_SAVE_AREA(event))
294 			return -ENOMEM;
295 	}
296 
297 	if (event->cpu >= 0)
298 		rc = paiext_alloc_cpu(event, event->cpu);
299 	else
300 		rc = paiext_alloc(event);
301 	if (rc) {
302 		free_page(PAI_SAVE_AREA(event));
303 		return rc;
304 	}
305 	event->destroy = paiext_event_destroy;
306 
307 	if (a->sample_period) {
308 		a->sample_period = 1;
309 		a->freq = 0;
310 		/* Register for paicrypt_sched_task() to be called */
311 		event->attach_state |= PERF_ATTACH_SCHED_CB;
312 		/* Add raw data which are the memory mapped counters */
313 		a->sample_type |= PERF_SAMPLE_RAW;
314 		/* Turn off inheritance */
315 		a->inherit = 0;
316 	}
317 
318 	return 0;
319 }
320 
paiext_getctr(unsigned long * area,int nr)321 static u64 paiext_getctr(unsigned long *area, int nr)
322 {
323 	return area[nr];
324 }
325 
326 /* Read the counter values. Return value from location in buffer. For event
327  * NNPA_ALL sum up all events.
328  */
paiext_getdata(struct perf_event * event)329 static u64 paiext_getdata(struct perf_event *event)
330 {
331 	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
332 	struct paiext_map *cpump = mp->mapptr;
333 	u64 sum = 0;
334 	int i;
335 
336 	if (event->attr.config != PAI_NNPA_BASE)
337 		return paiext_getctr(cpump->area,
338 				     event->attr.config - PAI_NNPA_BASE);
339 
340 	for (i = 1; i <= paiext_cnt; i++)
341 		sum += paiext_getctr(cpump->area, i);
342 
343 	return sum;
344 }
345 
paiext_getall(struct perf_event * event)346 static u64 paiext_getall(struct perf_event *event)
347 {
348 	return paiext_getdata(event);
349 }
350 
paiext_read(struct perf_event * event)351 static void paiext_read(struct perf_event *event)
352 {
353 	u64 prev, new, delta;
354 
355 	prev = local64_read(&event->hw.prev_count);
356 	new = paiext_getall(event);
357 	local64_set(&event->hw.prev_count, new);
358 	delta = new - prev;
359 	local64_add(delta, &event->count);
360 }
361 
paiext_start(struct perf_event * event,int flags)362 static void paiext_start(struct perf_event *event, int flags)
363 {
364 	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
365 	struct paiext_map *cpump = mp->mapptr;
366 	u64 sum;
367 
368 	if (!event->attr.sample_period) {	/* Counting */
369 		sum = paiext_getall(event);	/* Get current value */
370 		local64_set(&event->hw.prev_count, sum);
371 	} else {				/* Sampling */
372 		memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
373 		       PAIE1_CTRBLOCK_SZ);
374 		/* Enable context switch callback for system-wide sampling */
375 		if (!(event->attach_state & PERF_ATTACH_TASK)) {
376 			list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
377 			perf_sched_cb_inc(event->pmu);
378 		} else {
379 			cpump->event = event;
380 		}
381 	}
382 }
383 
paiext_add(struct perf_event * event,int flags)384 static int paiext_add(struct perf_event *event, int flags)
385 {
386 	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
387 	struct paiext_map *cpump = mp->mapptr;
388 	struct paiext_cb *pcb = cpump->paiext_cb;
389 
390 	if (++cpump->active_events == 1) {
391 		get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb);
392 		pcb->acc = virt_to_phys(cpump->area) | 0x1;
393 		/* Enable CPU instruction lookup for PAIE1 control block */
394 		local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
395 	}
396 	if (flags & PERF_EF_START)
397 		paiext_start(event, PERF_EF_RELOAD);
398 	event->hw.state = 0;
399 	return 0;
400 }
401 
402 static void paiext_have_sample(struct perf_event *, struct paiext_map *);
paiext_stop(struct perf_event * event,int flags)403 static void paiext_stop(struct perf_event *event, int flags)
404 {
405 	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
406 	struct paiext_map *cpump = mp->mapptr;
407 
408 	if (!event->attr.sample_period) {	/* Counting */
409 		paiext_read(event);
410 	} else {				/* Sampling */
411 		if (!(event->attach_state & PERF_ATTACH_TASK)) {
412 			list_del(PAI_SWLIST(event));
413 			perf_sched_cb_dec(event->pmu);
414 		} else {
415 			paiext_have_sample(event, cpump);
416 			cpump->event = NULL;
417 		}
418 	}
419 	event->hw.state = PERF_HES_STOPPED;
420 }
421 
paiext_del(struct perf_event * event,int flags)422 static void paiext_del(struct perf_event *event, int flags)
423 {
424 	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
425 	struct paiext_map *cpump = mp->mapptr;
426 	struct paiext_cb *pcb = cpump->paiext_cb;
427 
428 	paiext_stop(event, PERF_EF_UPDATE);
429 	if (--cpump->active_events == 0) {
430 		/* Disable CPU instruction lookup for PAIE1 control block */
431 		local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
432 		pcb->acc = 0;
433 		get_lowcore()->aicd = 0;
434 	}
435 }
436 
437 /* Create raw data and save it in buffer. Returns number of bytes copied.
438  * Saves only positive counter entries of the form
439  * 2 bytes: Number of counter
440  * 8 bytes: Value of counter
441  */
paiext_copy(struct pai_userdata * userdata,unsigned long * area,unsigned long * area_old)442 static size_t paiext_copy(struct pai_userdata *userdata, unsigned long *area,
443 			  unsigned long *area_old)
444 {
445 	int i, outidx = 0;
446 
447 	for (i = 1; i <= paiext_cnt; i++) {
448 		u64 val = paiext_getctr(area, i);
449 		u64 val_old = paiext_getctr(area_old, i);
450 
451 		if (val >= val_old)
452 			val -= val_old;
453 		else
454 			val = (~0ULL - val_old) + val + 1;
455 		if (val) {
456 			userdata[outidx].num = i;
457 			userdata[outidx].value = val;
458 			outidx++;
459 		}
460 	}
461 	return outidx * sizeof(*userdata);
462 }
463 
464 /* Write sample when one or more counters values are nonzero.
465  *
466  * Note: The function paiext_sched_task() and paiext_push_sample() are not
467  * invoked after function paiext_del() has been called because of function
468  * perf_sched_cb_dec().
469  * The function paiext_sched_task() and paiext_push_sample() are only
470  * called when sampling is active. Function perf_sched_cb_inc()
471  * has been invoked to install function paiext_sched_task() as call back
472  * to run at context switch time (see paiext_add()).
473  *
474  * This causes function perf_event_context_sched_out() and
475  * perf_event_context_sched_in() to check whether the PMU has installed an
476  * sched_task() callback. That callback is not active after paiext_del()
477  * returns and has deleted the event on that CPU.
478  */
paiext_push_sample(size_t rawsize,struct paiext_map * cpump,struct perf_event * event)479 static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
480 			      struct perf_event *event)
481 {
482 	struct perf_sample_data data;
483 	struct perf_raw_record raw;
484 	struct pt_regs regs;
485 	int overflow;
486 
487 	/* Setup perf sample */
488 	memset(&regs, 0, sizeof(regs));
489 	memset(&raw, 0, sizeof(raw));
490 	memset(&data, 0, sizeof(data));
491 	perf_sample_data_init(&data, 0, event->hw.last_period);
492 	if (event->attr.sample_type & PERF_SAMPLE_TID) {
493 		data.tid_entry.pid = task_tgid_nr(current);
494 		data.tid_entry.tid = task_pid_nr(current);
495 	}
496 	if (event->attr.sample_type & PERF_SAMPLE_TIME)
497 		data.time = event->clock();
498 	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
499 		data.id = event->id;
500 	if (event->attr.sample_type & PERF_SAMPLE_CPU)
501 		data.cpu_entry.cpu = smp_processor_id();
502 	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
503 		raw.frag.size = rawsize;
504 		raw.frag.data = cpump->save;
505 		perf_sample_save_raw_data(&data, event, &raw);
506 	}
507 
508 	overflow = perf_event_overflow(event, &data, &regs);
509 	perf_event_update_userpage(event);
510 	/* Save NNPA lowcore area after read in event */
511 	memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
512 	       PAIE1_CTRBLOCK_SZ);
513 	return overflow;
514 }
515 
516 /* Check if there is data to be saved on schedule out of a task. */
paiext_have_sample(struct perf_event * event,struct paiext_map * cpump)517 static void paiext_have_sample(struct perf_event *event,
518 			       struct paiext_map *cpump)
519 {
520 	size_t rawsize;
521 
522 	if (!event)
523 		return;
524 	rawsize = paiext_copy(cpump->save, cpump->area,
525 			      (unsigned long *)PAI_SAVE_AREA(event));
526 	if (rawsize)			/* Incremented counters */
527 		paiext_push_sample(rawsize, cpump, event);
528 }
529 
530 /* Check if there is data to be saved on schedule out of a task. */
paiext_have_samples(void)531 static void paiext_have_samples(void)
532 {
533 	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
534 	struct paiext_map *cpump = mp->mapptr;
535 	struct perf_event *event;
536 
537 	list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
538 		paiext_have_sample(event, cpump);
539 }
540 
541 /* Called on schedule-in and schedule-out. No access to event structure,
542  * but for sampling only event NNPA_ALL is allowed.
543  */
paiext_sched_task(struct perf_event_pmu_context * pmu_ctx,struct task_struct * task,bool sched_in)544 static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
545 			      struct task_struct *task, bool sched_in)
546 {
547 	/* We started with a clean page on event installation. So read out
548 	 * results on schedule_out and if page was dirty, save old values.
549 	 */
550 	if (!sched_in)
551 		paiext_have_samples();
552 }
553 
554 /* Attribute definitions for pai extension1 interface. As with other CPU
555  * Measurement Facilities, there is one attribute per mapped counter.
556  * The number of mapped counters may vary per machine generation. Use
557  * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
558  * to determine the number of mapped counters. The instructions returns
559  * a positive number, which is the highest number of supported counters.
560  * All counters less than this number are also supported, there are no
561  * holes. A returned number of zero means no support for mapped counters.
562  *
563  * The identification of the counter is a unique number. The chosen range
564  * is 0x1800 + offset in mapped kernel page.
565  * All CPU Measurement Facility counters identifiers must be unique and
566  * the numbers from 0 to 496 are already used for the CPU Measurement
567  * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
568  * counters.
569  * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
570  * used for the CPU Measurement Sampling facility.
571  */
572 PMU_FORMAT_ATTR(event, "config:0-63");
573 
574 static struct attribute *paiext_format_attr[] = {
575 	&format_attr_event.attr,
576 	NULL,
577 };
578 
579 static struct attribute_group paiext_events_group = {
580 	.name = "events",
581 	.attrs = NULL,			/* Filled in attr_event_init() */
582 };
583 
584 static struct attribute_group paiext_format_group = {
585 	.name = "format",
586 	.attrs = paiext_format_attr,
587 };
588 
589 static const struct attribute_group *paiext_attr_groups[] = {
590 	&paiext_events_group,
591 	&paiext_format_group,
592 	NULL,
593 };
594 
595 /* Performance monitoring unit for mapped counters */
596 static struct pmu paiext = {
597 	.task_ctx_nr  = perf_hw_context,
598 	.event_init   = paiext_event_init,
599 	.add	      = paiext_add,
600 	.del	      = paiext_del,
601 	.start	      = paiext_start,
602 	.stop	      = paiext_stop,
603 	.read	      = paiext_read,
604 	.sched_task   = paiext_sched_task,
605 	.attr_groups  = paiext_attr_groups,
606 };
607 
608 /* List of symbolic PAI extension 1 NNPA counter names. */
609 static const char * const paiext_ctrnames[] = {
610 	[0] = "NNPA_ALL",
611 	[1] = "NNPA_ADD",
612 	[2] = "NNPA_SUB",
613 	[3] = "NNPA_MUL",
614 	[4] = "NNPA_DIV",
615 	[5] = "NNPA_MIN",
616 	[6] = "NNPA_MAX",
617 	[7] = "NNPA_LOG",
618 	[8] = "NNPA_EXP",
619 	[9] = "NNPA_IBM_RESERVED_9",
620 	[10] = "NNPA_RELU",
621 	[11] = "NNPA_TANH",
622 	[12] = "NNPA_SIGMOID",
623 	[13] = "NNPA_SOFTMAX",
624 	[14] = "NNPA_BATCHNORM",
625 	[15] = "NNPA_MAXPOOL2D",
626 	[16] = "NNPA_AVGPOOL2D",
627 	[17] = "NNPA_LSTMACT",
628 	[18] = "NNPA_GRUACT",
629 	[19] = "NNPA_CONVOLUTION",
630 	[20] = "NNPA_MATMUL_OP",
631 	[21] = "NNPA_MATMUL_OP_BCAST23",
632 	[22] = "NNPA_SMALLBATCH",
633 	[23] = "NNPA_LARGEDIM",
634 	[24] = "NNPA_SMALLTENSOR",
635 	[25] = "NNPA_1MFRAME",
636 	[26] = "NNPA_2GFRAME",
637 	[27] = "NNPA_ACCESSEXCEPT",
638 	[28] = "NNPA_TRANSFORM",
639 	[29] = "NNPA_GELU",
640 	[30] = "NNPA_MOMENTS",
641 	[31] = "NNPA_LAYERNORM",
642 	[32] = "NNPA_MATMUL_OP_BCAST1",
643 	[33] = "NNPA_SQRT",
644 	[34] = "NNPA_INVSQRT",
645 	[35] = "NNPA_NORM",
646 	[36] = "NNPA_REDUCE",
647 };
648 
attr_event_free(struct attribute ** attrs,int num)649 static void __init attr_event_free(struct attribute **attrs, int num)
650 {
651 	struct perf_pmu_events_attr *pa;
652 	struct device_attribute *dap;
653 	int i;
654 
655 	for (i = 0; i < num; i++) {
656 		dap = container_of(attrs[i], struct device_attribute, attr);
657 		pa = container_of(dap, struct perf_pmu_events_attr, attr);
658 		kfree(pa);
659 	}
660 	kfree(attrs);
661 }
662 
attr_event_init_one(struct attribute ** attrs,int num)663 static int __init attr_event_init_one(struct attribute **attrs, int num)
664 {
665 	struct perf_pmu_events_attr *pa;
666 
667 	/* Index larger than array_size, no counter name available */
668 	if (num >= ARRAY_SIZE(paiext_ctrnames)) {
669 		attrs[num] = NULL;
670 		return 0;
671 	}
672 
673 	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
674 	if (!pa)
675 		return -ENOMEM;
676 
677 	sysfs_attr_init(&pa->attr.attr);
678 	pa->id = PAI_NNPA_BASE + num;
679 	pa->attr.attr.name = paiext_ctrnames[num];
680 	pa->attr.attr.mode = 0444;
681 	pa->attr.show = cpumf_events_sysfs_show;
682 	pa->attr.store = NULL;
683 	attrs[num] = &pa->attr.attr;
684 	return 0;
685 }
686 
687 /* Create PMU sysfs event attributes on the fly. */
attr_event_init(void)688 static int __init attr_event_init(void)
689 {
690 	struct attribute **attrs;
691 	int ret, i;
692 
693 	attrs = kmalloc_array(paiext_cnt + 2, sizeof(*attrs), GFP_KERNEL);
694 	if (!attrs)
695 		return -ENOMEM;
696 	for (i = 0; i <= paiext_cnt; i++) {
697 		ret = attr_event_init_one(attrs, i);
698 		if (ret) {
699 			attr_event_free(attrs, i);
700 			return ret;
701 		}
702 	}
703 	attrs[i] = NULL;
704 	paiext_events_group.attrs = attrs;
705 	return 0;
706 }
707 
paiext_init(void)708 static int __init paiext_init(void)
709 {
710 	struct qpaci_info_block ib;
711 	int rc = -ENOMEM;
712 
713 	if (!test_facility(197))
714 		return 0;
715 
716 	qpaci(&ib);
717 	paiext_cnt = ib.num_nnpa;
718 	if (paiext_cnt >= PAI_NNPA_MAXCTR)
719 		paiext_cnt = PAI_NNPA_MAXCTR;
720 	if (!paiext_cnt)
721 		return 0;
722 
723 	rc = attr_event_init();
724 	if (rc) {
725 		pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
726 		return rc;
727 	}
728 
729 	/* Setup s390dbf facility */
730 	paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
731 	if (!paiext_dbg) {
732 		pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
733 		rc = -ENOMEM;
734 		goto out_init;
735 	}
736 	debug_register_view(paiext_dbg, &debug_sprintf_view);
737 
738 	rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
739 	if (rc) {
740 		pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
741 		       "rc=%i\n", rc);
742 		goto out_pmu;
743 	}
744 
745 	return 0;
746 
747 out_pmu:
748 	debug_unregister_view(paiext_dbg, &debug_sprintf_view);
749 	debug_unregister(paiext_dbg);
750 out_init:
751 	attr_event_free(paiext_events_group.attrs,
752 			ARRAY_SIZE(paiext_ctrnames) + 1);
753 	return rc;
754 }
755 
756 device_initcall(paiext_init);
757