1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright IBM Corp. 2018
4 * Auxtrace support for s390 CPU-Measurement Sampling Facility
5 *
6 * Author(s): Thomas Richter <tmricht@linux.ibm.com>
7 *
8 * Auxiliary traces are collected during 'perf record' using rbd000 event.
9 * Several PERF_RECORD_XXX are generated during recording:
10 *
11 * PERF_RECORD_AUX:
12 * Records that new data landed in the AUX buffer part.
13 * PERF_RECORD_AUXTRACE:
14 * Defines auxtrace data. Followed by the actual data. The contents of
15 * the auxtrace data is dependent on the event and the CPU.
16 * This record is generated by perf record command. For details
17 * see Documentation/perf.data-file-format.txt.
18 * PERF_RECORD_AUXTRACE_INFO:
19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
20 * record is generated during 'perf record' command. Each record contains
21 * up to 256 entries describing offset and size of the AUXTRACE data in the
22 * perf.data file.
23 * PERF_RECORD_AUXTRACE_ERROR:
24 * Indicates an error during AUXTRACE collection such as buffer overflow.
25 * PERF_RECORD_FINISHED_ROUND:
26 * Perf events are not necessarily in time stamp order, as they can be
27 * collected in parallel on different CPUs. If the events should be
28 * processed in time order they need to be sorted first.
29 * Perf report guarantees that there is no reordering over a
30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
31 * time stamp lower than this record are processed (and displayed) before
32 * the succeeding perf record are processed.
33 *
34 * These records are evaluated during perf report command.
35 *
36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
38 * below.
39 * Auxiliary trace data is collected per CPU. To merge the data into the report
40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
41 * data is in ascending order.
42 *
43 * Each queue has a double linked list of auxtrace_buffers. This list contains
44 * the offset and size of a CPU's auxtrace data. During auxtrace processing
45 * the data portion is mmap()'ed.
46 *
47 * To sort the queues in chronological order, all queue access is controlled
48 * by the auxtrace_heap. This is basically a stack, each stack element has two
49 * entries, the queue number and a time stamp. However the stack is sorted by
50 * the time stamps. The highest time stamp is at the bottom the lowest
51 * (nearest) time stamp is at the top. That sort order is maintained at all
52 * times!
53 *
54 * After the auxtrace infrastructure has been setup, the auxtrace queues are
55 * filled with data (offset/size pairs) and the auxtrace_heap is populated.
56 *
57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of
59 * the perf record is compared with the time stamp located on the auxtrace_heap
60 * top element. If that time stamp is lower than the time stamp from the
61 * record sample, the auxtrace queues will be processed. As auxtrace queues
62 * control many auxtrace_buffers and each buffer can be quite large, the
63 * auxtrace buffer might be processed only partially. In this case the
64 * position in the auxtrace_buffer of that queue is remembered and the time
65 * stamp of the last processed entry of the auxtrace_buffer replaces the
66 * current auxtrace_heap top.
67 *
68 * 3. Auxtrace_queues might run of out data and are fed by the
69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
70 *
71 * Event Generation
72 * Each sampling-data entry in the auxiliary trace data generates a perf sample.
73 * This sample is filled
74 * with data from the auxtrace such as PID/TID, instruction address, CPU state,
75 * etc. This sample is processed with perf_session__deliver_synth_event() to
76 * be included into the GUI.
77 *
78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
79 * auxiliary traces entries until the time stamp of this record is reached
80 * auxtrace_heap top. This is triggered by ordered_event->deliver().
81 *
82 *
83 * Perf event processing.
84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
85 * This is the function call sequence:
86 *
87 * __cmd_report()
88 * |
89 * perf_session__process_events()
90 * |
91 * __perf_session__process_events()
92 * |
93 * perf_session__process_event()
94 * | This functions splits the PERF_RECORD_XXX records.
95 * | - Those generated by perf record command (type number equal or higher
96 * | than PERF_RECORD_USER_TYPE_START) are handled by
97 * | perf_session__process_user_event(see below)
98 * | - Those generated by the kernel are handled by
99 * | evlist__parse_sample_timestamp()
100 * |
101 * evlist__parse_sample_timestamp()
102 * | Extract time stamp from sample data.
103 * |
104 * perf_session__queue_event()
105 * | If timestamp is positive the sample is entered into an ordered_event
106 * | list, sort order is the timestamp. The event processing is deferred until
107 * | later (see perf_session__process_user_event()).
108 * | Other timestamps (0 or -1) are handled immediately by
109 * | perf_session__deliver_event(). These are events generated at start up
110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
111 * | records. They are needed to create a list of running processes and its
112 * | memory mappings and layout. They are needed at the beginning to enable
113 * | command perf report to create process trees and memory mappings.
114 * |
115 * perf_session__deliver_event()
116 * | Delivers a PERF_RECORD_XXX entry for handling.
117 * |
118 * auxtrace__process_event()
119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
120 * | time stamps from the auxiliary trace buffers. This enables
121 * | synchronization between auxiliary trace data and the events on the
122 * | perf.data file.
123 * |
124 * machine__deliver_event()
125 * | Handles the PERF_RECORD_XXX event. This depends on the record type.
126 * It might update the process tree, update a process memory map or enter
127 * a sample with IP and call back chain data into GUI data pool.
128 *
129 *
130 * Deferred processing determined by perf_session__process_user_event() is
131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
132 * are generated during command perf record.
133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
135 * built up while reading the perf.data file.
136 * Each event is now processed by calling perf_session__deliver_event().
137 * This enables time synchronization between the data in the perf.data file and
138 * the data in the auxiliary trace buffers.
139 */
140
141 #include <endian.h>
142 #include <errno.h>
143 #include <byteswap.h>
144 #include <inttypes.h>
145 #include <linux/kernel.h>
146 #include <linux/types.h>
147 #include <linux/bitops.h>
148 #include <linux/log2.h>
149 #include <linux/zalloc.h>
150
151 #include <sys/stat.h>
152 #include <sys/types.h>
153
154 #include "color.h"
155 #include "evsel.h"
156 #include "evlist.h"
157 #include "machine.h"
158 #include "session.h"
159 #include "tool.h"
160 #include "debug.h"
161 #include "auxtrace.h"
162 #include "s390-cpumsf.h"
163 #include "s390-cpumsf-kernel.h"
164 #include "s390-cpumcf-kernel.h"
165 #include "config.h"
166 #include "util/sample.h"
167
168 struct s390_cpumsf {
169 struct auxtrace auxtrace;
170 struct auxtrace_queues queues;
171 struct auxtrace_heap heap;
172 struct perf_session *session;
173 struct machine *machine;
174 u32 auxtrace_type;
175 u32 pmu_type;
176 u16 machine_type;
177 bool data_queued;
178 bool use_logfile;
179 char *logdir;
180 };
181
182 struct s390_cpumsf_queue {
183 struct s390_cpumsf *sf;
184 unsigned int queue_nr;
185 struct auxtrace_buffer *buffer;
186 int cpu;
187 FILE *logfile;
188 FILE *logfile_ctr;
189 };
190
191 /* Check if the raw data should be dumped to file. If this is the case and
192 * the file to dump to has not been opened for writing, do so.
193 *
194 * Return 0 on success and greater zero on error so processing continues.
195 */
s390_cpumcf_dumpctr(struct s390_cpumsf * sf,struct perf_sample * sample)196 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
197 struct perf_sample *sample)
198 {
199 struct s390_cpumsf_queue *sfq;
200 struct auxtrace_queue *q;
201 int rc = 0;
202
203 if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
204 return rc;
205
206 q = &sf->queues.queue_array[sample->cpu];
207 sfq = q->priv;
208 if (!sfq) /* Queue not yet allocated */
209 return rc;
210
211 if (!sfq->logfile_ctr) {
212 char *name;
213
214 rc = (sf->logdir)
215 ? asprintf(&name, "%s/aux.ctr.%02x",
216 sf->logdir, sample->cpu)
217 : asprintf(&name, "aux.ctr.%02x", sample->cpu);
218 if (rc > 0)
219 sfq->logfile_ctr = fopen(name, "w");
220 if (sfq->logfile_ctr == NULL) {
221 pr_err("Failed to open counter set log file %s, "
222 "continue...\n", name);
223 rc = 1;
224 }
225 free(name);
226 }
227
228 if (sfq->logfile_ctr) {
229 /* See comment above for -4 */
230 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
231 sfq->logfile_ctr);
232 if (n != 1) {
233 pr_err("Failed to write counter set data\n");
234 rc = 1;
235 }
236 }
237 return rc;
238 }
239
240 /* Display s390 CPU measurement facility basic-sampling data entry
241 * Data written on s390 in big endian byte order and contains bit
242 * fields across byte boundaries.
243 */
s390_cpumsf_basic_show(const char * color,size_t pos,struct hws_basic_entry * basicp)244 static bool s390_cpumsf_basic_show(const char *color, size_t pos,
245 struct hws_basic_entry *basicp)
246 {
247 struct hws_basic_entry *basic = basicp;
248 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
249 struct hws_basic_entry local;
250 unsigned long long word = be64toh(*(unsigned long long *)basicp);
251
252 memset(&local, 0, sizeof(local));
253 local.def = be16toh(basicp->def);
254 local.prim_asn = word & 0xffff;
255 local.CL = word >> 30 & 0x3;
256 local.I = word >> 32 & 0x1;
257 local.AS = word >> 33 & 0x3;
258 local.P = word >> 35 & 0x1;
259 local.W = word >> 36 & 0x1;
260 local.T = word >> 37 & 0x1;
261 local.U = word >> 40 & 0xf;
262 local.ia = be64toh(basicp->ia);
263 local.gpp = be64toh(basicp->gpp);
264 local.hpp = be64toh(basicp->hpp);
265 basic = &local;
266 #endif
267 if (basic->def != 1) {
268 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
269 return false;
270 }
271 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
272 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
273 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
274 pos, basic->def, basic->U,
275 basic->T ? 'T' : ' ',
276 basic->W ? 'W' : ' ',
277 basic->P ? 'P' : ' ',
278 basic->I ? 'I' : ' ',
279 basic->AS, basic->prim_asn, basic->ia, basic->CL,
280 basic->hpp, basic->gpp);
281 return true;
282 }
283
284 /* Display s390 CPU measurement facility diagnostic-sampling data entry.
285 * Data written on s390 in big endian byte order and contains bit
286 * fields across byte boundaries.
287 */
s390_cpumsf_diag_show(const char * color,size_t pos,struct hws_diag_entry * diagp)288 static bool s390_cpumsf_diag_show(const char *color, size_t pos,
289 struct hws_diag_entry *diagp)
290 {
291 struct hws_diag_entry *diag = diagp;
292 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
293 struct hws_diag_entry local;
294 unsigned long long word = be64toh(*(unsigned long long *)diagp);
295
296 local.def = be16toh(diagp->def);
297 local.I = word >> 32 & 0x1;
298 diag = &local;
299 #endif
300 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
301 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
302 return false;
303 }
304 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
305 pos, diag->def, diag->I ? 'I' : ' ');
306 return true;
307 }
308
309 /* Return TOD timestamp contained in an trailer entry */
trailer_timestamp(struct hws_trailer_entry * te,int idx)310 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
311 int idx)
312 {
313 /* te->t set: TOD in STCKE format, bytes 8-15
314 * to->t not set: TOD in STCK format, bytes 0-7
315 */
316 unsigned long long ts;
317
318 memcpy(&ts, &te->timestamp[idx], sizeof(ts));
319 return be64toh(ts);
320 }
321
322 /* Display s390 CPU measurement facility trailer entry */
s390_cpumsf_trailer_show(const char * color,size_t pos,struct hws_trailer_entry * te)323 static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
324 struct hws_trailer_entry *te)
325 {
326 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
327 struct hws_trailer_entry local;
328 const unsigned long long flags = be64toh(te->flags);
329
330 memset(&local, 0, sizeof(local));
331 local.f = flags >> 63 & 0x1;
332 local.a = flags >> 62 & 0x1;
333 local.t = flags >> 61 & 0x1;
334 local.bsdes = be16toh((flags >> 16 & 0xffff));
335 local.dsdes = be16toh((flags & 0xffff));
336 memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
337 local.overflow = be64toh(te->overflow);
338 local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
339 local.progusage2 = be64toh(te->progusage2);
340 te = &local;
341 #endif
342 if (te->bsdes != sizeof(struct hws_basic_entry)) {
343 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
344 return false;
345 }
346 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
347 " dsdes:%d Overflow:%lld Time:%#llx\n"
348 "\t\tC:%d TOD:%#lx\n",
349 pos,
350 te->f ? 'F' : ' ',
351 te->a ? 'A' : ' ',
352 te->t ? 'T' : ' ',
353 te->bsdes, te->dsdes, te->overflow,
354 trailer_timestamp(te, te->clock_base),
355 te->clock_base, te->progusage2);
356 return true;
357 }
358
359 /* Test a sample data block. It must be 4KB or a multiple thereof in size and
360 * 4KB page aligned. Each sample data page has a trailer entry at the
361 * end which contains the sample entry data sizes.
362 *
363 * Return true if the sample data block passes the checks and set the
364 * basic set entry size and diagnostic set entry size.
365 *
366 * Return false on failure.
367 *
368 * Note: Old hardware does not set the basic or diagnostic entry sizes
369 * in the trailer entry. Use the type number instead.
370 */
s390_cpumsf_validate(int machine_type,unsigned char * buf,size_t len,unsigned short * bsdes,unsigned short * dsdes)371 static bool s390_cpumsf_validate(int machine_type,
372 unsigned char *buf, size_t len,
373 unsigned short *bsdes,
374 unsigned short *dsdes)
375 {
376 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
377 struct hws_trailer_entry *te;
378
379 *dsdes = *bsdes = 0;
380 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
381 return false;
382 if (be16toh(basic->def) != 1) /* No basic set entry, must be first */
383 return false;
384 /* Check for trailer entry at end of SDB */
385 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
386 - sizeof(*te));
387 *bsdes = be16toh(te->bsdes);
388 *dsdes = be16toh(te->dsdes);
389 if (!te->bsdes && !te->dsdes) {
390 /* Very old hardware, use CPUID */
391 switch (machine_type) {
392 case 2097:
393 case 2098:
394 *dsdes = 64;
395 *bsdes = 32;
396 break;
397 case 2817:
398 case 2818:
399 *dsdes = 74;
400 *bsdes = 32;
401 break;
402 case 2827:
403 case 2828:
404 *dsdes = 85;
405 *bsdes = 32;
406 break;
407 case 2964:
408 case 2965:
409 *dsdes = 112;
410 *bsdes = 32;
411 break;
412 default:
413 /* Illegal trailer entry */
414 return false;
415 }
416 }
417 return true;
418 }
419
420 /* Return true if there is room for another entry */
s390_cpumsf_reached_trailer(size_t entry_sz,size_t pos)421 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
422 {
423 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
424
425 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
426 return false;
427 return true;
428 }
429
430 /* Dump an auxiliary buffer. These buffers are multiple of
431 * 4KB SDB pages.
432 */
s390_cpumsf_dump(struct s390_cpumsf * sf,unsigned char * buf,size_t len)433 static void s390_cpumsf_dump(struct s390_cpumsf *sf,
434 unsigned char *buf, size_t len)
435 {
436 const char *color = PERF_COLOR_BLUE;
437 struct hws_basic_entry *basic;
438 struct hws_diag_entry *diag;
439 unsigned short bsdes, dsdes;
440 size_t pos = 0;
441
442 color_fprintf(stdout, color,
443 ". ... s390 AUX data: size %zu bytes\n",
444 len);
445
446 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
447 &dsdes)) {
448 pr_err("Invalid AUX trace data block size:%zu"
449 " (type:%d bsdes:%hd dsdes:%hd)\n",
450 len, sf->machine_type, bsdes, dsdes);
451 return;
452 }
453
454 /* s390 kernel always returns 4KB blocks fully occupied,
455 * no partially filled SDBs.
456 */
457 while (pos < len) {
458 /* Handle Basic entry */
459 basic = (struct hws_basic_entry *)(buf + pos);
460 if (s390_cpumsf_basic_show(color, pos, basic))
461 pos += bsdes;
462 else
463 return;
464
465 /* Handle Diagnostic entry */
466 diag = (struct hws_diag_entry *)(buf + pos);
467 if (s390_cpumsf_diag_show(color, pos, diag))
468 pos += dsdes;
469 else
470 return;
471
472 /* Check for trailer entry */
473 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
474 /* Show trailer entry */
475 struct hws_trailer_entry te;
476
477 pos = (pos + S390_CPUMSF_PAGESZ)
478 & ~(S390_CPUMSF_PAGESZ - 1);
479 pos -= sizeof(te);
480 memcpy(&te, buf + pos, sizeof(te));
481 /* Set descriptor sizes in case of old hardware
482 * where these values are not set.
483 */
484 te.bsdes = bsdes;
485 te.dsdes = dsdes;
486 if (s390_cpumsf_trailer_show(color, pos, &te))
487 pos += sizeof(te);
488 else
489 return;
490 }
491 }
492 }
493
s390_cpumsf_dump_event(struct s390_cpumsf * sf,unsigned char * buf,size_t len)494 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
495 size_t len)
496 {
497 printf(".\n");
498 s390_cpumsf_dump(sf, buf, len);
499 }
500
501 #define S390_LPP_PID_MASK 0xffffffff
502
s390_cpumsf_make_event(size_t pos,struct hws_basic_entry * basic,struct s390_cpumsf_queue * sfq)503 static bool s390_cpumsf_make_event(size_t pos,
504 struct hws_basic_entry *basic,
505 struct s390_cpumsf_queue *sfq)
506 {
507 struct perf_sample sample = {
508 .ip = basic->ia,
509 .pid = basic->hpp & S390_LPP_PID_MASK,
510 .tid = basic->hpp & S390_LPP_PID_MASK,
511 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
512 .cpu = sfq->cpu,
513 .period = 1
514 };
515 union perf_event event;
516
517 memset(&event, 0, sizeof(event));
518 if (basic->CL == 1) /* Native LPAR mode */
519 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
520 : PERF_RECORD_MISC_KERNEL;
521 else if (basic->CL == 2) /* Guest kernel/user space */
522 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
523 : PERF_RECORD_MISC_GUEST_KERNEL;
524 else if (basic->gpp || basic->prim_asn != 0xffff)
525 /* Use heuristics on old hardware */
526 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
527 : PERF_RECORD_MISC_GUEST_KERNEL;
528 else
529 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
530 : PERF_RECORD_MISC_KERNEL;
531
532 event.sample.header.type = PERF_RECORD_SAMPLE;
533 event.sample.header.misc = sample.cpumode;
534 event.sample.header.size = sizeof(struct perf_event_header);
535
536 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
537 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
538 sample.tid, sample.cpumode, sample.cpu);
539 if (perf_session__deliver_synth_event(sfq->sf->session, &event,
540 &sample)) {
541 pr_err("s390 Auxiliary Trace: failed to deliver event\n");
542 return false;
543 }
544 return true;
545 }
546
get_trailer_time(const unsigned char * buf)547 static unsigned long long get_trailer_time(const unsigned char *buf)
548 {
549 struct hws_trailer_entry *te;
550 unsigned long long aux_time, progusage2;
551 bool clock_base;
552
553 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
554 - sizeof(*te));
555
556 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
557 clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
558 progusage2 = be64toh(te->progusage[1]);
559 #else
560 clock_base = te->clock_base;
561 progusage2 = te->progusage2;
562 #endif
563 if (!clock_base) /* TOD_CLOCK_BASE value missing */
564 return 0;
565
566 /* Correct calculation to convert time stamp in trailer entry to
567 * nano seconds (taken from arch/s390 function tod_to_ns()).
568 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
569 */
570 aux_time = trailer_timestamp(te, clock_base) - progusage2;
571 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
572 return aux_time;
573 }
574
575 /* Process the data samples of a single queue. The first parameter is a
576 * pointer to the queue, the second parameter is the time stamp. This
577 * is the time stamp:
578 * - of the event that triggered this processing.
579 * - or the time stamp when the last processing of this queue stopped.
580 * In this case it stopped at a 4KB page boundary and record the
581 * position on where to continue processing on the next invocation
582 * (see buffer->use_data and buffer->use_size).
583 *
584 * When this function returns the second parameter is updated to
585 * reflect the time stamp of the last processed auxiliary data entry
586 * (taken from the trailer entry of that page). The caller uses this
587 * returned time stamp to record the last processed entry in this
588 * queue.
589 *
590 * The function returns:
591 * 0: Processing successful. The second parameter returns the
592 * time stamp from the trailer entry until which position
593 * processing took place. Subsequent calls resume from this
594 * position.
595 * <0: An error occurred during processing. The second parameter
596 * returns the maximum time stamp.
597 * >0: Done on this queue. The second parameter returns the
598 * maximum time stamp.
599 */
s390_cpumsf_samples(struct s390_cpumsf_queue * sfq,u64 * ts)600 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
601 {
602 struct s390_cpumsf *sf = sfq->sf;
603 unsigned char *buf = sfq->buffer->use_data;
604 size_t len = sfq->buffer->use_size;
605 struct hws_basic_entry *basic;
606 unsigned short bsdes, dsdes;
607 size_t pos = 0;
608 int err = 1;
609 u64 aux_ts;
610
611 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
612 &dsdes)) {
613 *ts = ~0ULL;
614 return -1;
615 }
616
617 /* Get trailer entry time stamp and check if entries in
618 * this auxiliary page are ready for processing. If the
619 * time stamp of the first entry is too high, whole buffer
620 * can be skipped. In this case return time stamp.
621 */
622 aux_ts = get_trailer_time(buf);
623 if (!aux_ts) {
624 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
625 (s64)sfq->buffer->data_offset);
626 aux_ts = ~0ULL;
627 goto out;
628 }
629 if (aux_ts > *ts) {
630 *ts = aux_ts;
631 return 0;
632 }
633
634 while (pos < len) {
635 /* Handle Basic entry */
636 basic = (struct hws_basic_entry *)(buf + pos);
637 if (s390_cpumsf_make_event(pos, basic, sfq))
638 pos += bsdes;
639 else {
640 err = -EBADF;
641 goto out;
642 }
643
644 pos += dsdes; /* Skip diagnostic entry */
645
646 /* Check for trailer entry */
647 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
648 pos = (pos + S390_CPUMSF_PAGESZ)
649 & ~(S390_CPUMSF_PAGESZ - 1);
650 /* Check existence of next page */
651 if (pos >= len)
652 break;
653 aux_ts = get_trailer_time(buf + pos);
654 if (!aux_ts) {
655 aux_ts = ~0ULL;
656 goto out;
657 }
658 if (aux_ts > *ts) {
659 *ts = aux_ts;
660 sfq->buffer->use_data += pos;
661 sfq->buffer->use_size -= pos;
662 return 0;
663 }
664 }
665 }
666 out:
667 *ts = aux_ts;
668 sfq->buffer->use_size = 0;
669 sfq->buffer->use_data = NULL;
670 return err; /* Buffer completely scanned or error */
671 }
672
673 /* Run the s390 auxiliary trace decoder.
674 * Select the queue buffer to operate on, the caller already selected
675 * the proper queue, depending on second parameter 'ts'.
676 * This is the time stamp until which the auxiliary entries should
677 * be processed. This value is updated by called functions and
678 * returned to the caller.
679 *
680 * Resume processing in the current buffer. If there is no buffer
681 * get a new buffer from the queue and setup start position for
682 * processing.
683 * When a buffer is completely processed remove it from the queue
684 * before returning.
685 *
686 * This function returns
687 * 1: When the queue is empty. Second parameter will be set to
688 * maximum time stamp.
689 * 0: Normal processing done.
690 * <0: Error during queue buffer setup. This causes the caller
691 * to stop processing completely.
692 */
s390_cpumsf_run_decoder(struct s390_cpumsf_queue * sfq,u64 * ts)693 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
694 u64 *ts)
695 {
696
697 struct auxtrace_buffer *buffer;
698 struct auxtrace_queue *queue;
699 int err;
700
701 queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
702
703 /* Get buffer and last position in buffer to resume
704 * decoding the auxiliary entries. One buffer might be large
705 * and decoding might stop in between. This depends on the time
706 * stamp of the trailer entry in each page of the auxiliary
707 * data and the time stamp of the event triggering the decoding.
708 */
709 if (sfq->buffer == NULL) {
710 sfq->buffer = buffer = auxtrace_buffer__next(queue,
711 sfq->buffer);
712 if (!buffer) {
713 *ts = ~0ULL;
714 return 1; /* Processing done on this queue */
715 }
716 /* Start with a new buffer on this queue */
717 if (buffer->data) {
718 buffer->use_size = buffer->size;
719 buffer->use_data = buffer->data;
720 }
721 if (sfq->logfile) { /* Write into log file */
722 size_t rc = fwrite(buffer->data, buffer->size, 1,
723 sfq->logfile);
724 if (rc != 1)
725 pr_err("Failed to write auxiliary data\n");
726 }
727 } else
728 buffer = sfq->buffer;
729
730 if (!buffer->data) {
731 int fd = perf_data__fd(sfq->sf->session->data);
732
733 buffer->data = auxtrace_buffer__get_data(buffer, fd);
734 if (!buffer->data)
735 return -ENOMEM;
736 buffer->use_size = buffer->size;
737 buffer->use_data = buffer->data;
738
739 if (sfq->logfile) { /* Write into log file */
740 size_t rc = fwrite(buffer->data, buffer->size, 1,
741 sfq->logfile);
742 if (rc != 1)
743 pr_err("Failed to write auxiliary data\n");
744 }
745 }
746 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
747 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
748 buffer->size, buffer->use_size);
749 err = s390_cpumsf_samples(sfq, ts);
750
751 /* If non-zero, there is either an error (err < 0) or the buffer is
752 * completely done (err > 0). The error is unrecoverable, usually
753 * some descriptors could not be read successfully, so continue with
754 * the next buffer.
755 * In both cases the parameter 'ts' has been updated.
756 */
757 if (err) {
758 sfq->buffer = NULL;
759 list_del_init(&buffer->list);
760 auxtrace_buffer__free(buffer);
761 if (err > 0) /* Buffer done, no error */
762 err = 0;
763 }
764 return err;
765 }
766
767 static struct s390_cpumsf_queue *
s390_cpumsf_alloc_queue(struct s390_cpumsf * sf,unsigned int queue_nr)768 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
769 {
770 struct s390_cpumsf_queue *sfq;
771
772 sfq = zalloc(sizeof(struct s390_cpumsf_queue));
773 if (sfq == NULL)
774 return NULL;
775
776 sfq->sf = sf;
777 sfq->queue_nr = queue_nr;
778 sfq->cpu = -1;
779 if (sf->use_logfile) {
780 char *name;
781 int rc;
782
783 rc = (sf->logdir)
784 ? asprintf(&name, "%s/aux.smp.%02x",
785 sf->logdir, queue_nr)
786 : asprintf(&name, "aux.smp.%02x", queue_nr);
787 if (rc > 0)
788 sfq->logfile = fopen(name, "w");
789 if (sfq->logfile == NULL) {
790 pr_err("Failed to open auxiliary log file %s,"
791 "continue...\n", name);
792 sf->use_logfile = false;
793 }
794 free(name);
795 }
796 return sfq;
797 }
798
s390_cpumsf_setup_queue(struct s390_cpumsf * sf,struct auxtrace_queue * queue,unsigned int queue_nr,u64 ts)799 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
800 struct auxtrace_queue *queue,
801 unsigned int queue_nr, u64 ts)
802 {
803 struct s390_cpumsf_queue *sfq = queue->priv;
804
805 if (list_empty(&queue->head))
806 return 0;
807
808 if (sfq == NULL) {
809 sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
810 if (!sfq)
811 return -ENOMEM;
812 queue->priv = sfq;
813
814 if (queue->cpu != -1)
815 sfq->cpu = queue->cpu;
816 }
817 return auxtrace_heap__add(&sf->heap, queue_nr, ts);
818 }
819
s390_cpumsf_setup_queues(struct s390_cpumsf * sf,u64 ts)820 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
821 {
822 unsigned int i;
823 int ret = 0;
824
825 for (i = 0; i < sf->queues.nr_queues; i++) {
826 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
827 i, ts);
828 if (ret)
829 break;
830 }
831 return ret;
832 }
833
s390_cpumsf_update_queues(struct s390_cpumsf * sf,u64 ts)834 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
835 {
836 if (!sf->queues.new_data)
837 return 0;
838
839 sf->queues.new_data = false;
840 return s390_cpumsf_setup_queues(sf, ts);
841 }
842
s390_cpumsf_process_queues(struct s390_cpumsf * sf,u64 timestamp)843 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
844 {
845 unsigned int queue_nr;
846 u64 ts;
847 int ret;
848
849 while (1) {
850 struct auxtrace_queue *queue;
851 struct s390_cpumsf_queue *sfq;
852
853 if (!sf->heap.heap_cnt)
854 return 0;
855
856 if (sf->heap.heap_array[0].ordinal >= timestamp)
857 return 0;
858
859 queue_nr = sf->heap.heap_array[0].queue_nr;
860 queue = &sf->queues.queue_array[queue_nr];
861 sfq = queue->priv;
862
863 auxtrace_heap__pop(&sf->heap);
864 if (sf->heap.heap_cnt) {
865 ts = sf->heap.heap_array[0].ordinal + 1;
866 if (ts > timestamp)
867 ts = timestamp;
868 } else {
869 ts = timestamp;
870 }
871
872 ret = s390_cpumsf_run_decoder(sfq, &ts);
873 if (ret < 0) {
874 auxtrace_heap__add(&sf->heap, queue_nr, ts);
875 return ret;
876 }
877 if (!ret) {
878 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
879 if (ret < 0)
880 return ret;
881 }
882 }
883 return 0;
884 }
885
s390_cpumsf_synth_error(struct s390_cpumsf * sf,int code,int cpu,pid_t pid,pid_t tid,u64 ip,u64 timestamp)886 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
887 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
888 {
889 char msg[MAX_AUXTRACE_ERROR_MSG];
890 union perf_event event;
891 int err;
892
893 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
894 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
895 code, cpu, pid, tid, ip, msg, timestamp);
896
897 err = perf_session__deliver_synth_event(sf->session, &event, NULL);
898 if (err)
899 pr_err("s390 Auxiliary Trace: failed to deliver error event,"
900 "error %d\n", err);
901 return err;
902 }
903
s390_cpumsf_lost(struct s390_cpumsf * sf,struct perf_sample * sample)904 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
905 {
906 return s390_cpumsf_synth_error(sf, 1, sample->cpu,
907 sample->pid, sample->tid, 0,
908 sample->time);
909 }
910
911 static int
s390_cpumsf_process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,const struct perf_tool * tool)912 s390_cpumsf_process_event(struct perf_session *session,
913 union perf_event *event,
914 struct perf_sample *sample,
915 const struct perf_tool *tool)
916 {
917 struct s390_cpumsf *sf = container_of(session->auxtrace,
918 struct s390_cpumsf,
919 auxtrace);
920 u64 timestamp = sample->time;
921 struct evsel *ev_bc000;
922
923 int err = 0;
924
925 if (dump_trace)
926 return 0;
927
928 if (!tool->ordered_events) {
929 pr_err("s390 Auxiliary Trace requires ordered events\n");
930 return -EINVAL;
931 }
932
933 if (event->header.type == PERF_RECORD_SAMPLE &&
934 sample->raw_size) {
935 /* Handle event with raw data */
936 ev_bc000 = evlist__event2evsel(session->evlist, event);
937 if (ev_bc000 &&
938 ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
939 err = s390_cpumcf_dumpctr(sf, sample);
940 return err;
941 }
942
943 if (event->header.type == PERF_RECORD_AUX &&
944 event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
945 return s390_cpumsf_lost(sf, sample);
946
947 if (timestamp) {
948 err = s390_cpumsf_update_queues(sf, timestamp);
949 if (!err)
950 err = s390_cpumsf_process_queues(sf, timestamp);
951 }
952 return err;
953 }
954
955 static int
s390_cpumsf_process_auxtrace_event(struct perf_session * session,union perf_event * event __maybe_unused,const struct perf_tool * tool __maybe_unused)956 s390_cpumsf_process_auxtrace_event(struct perf_session *session,
957 union perf_event *event __maybe_unused,
958 const struct perf_tool *tool __maybe_unused)
959 {
960 struct s390_cpumsf *sf = container_of(session->auxtrace,
961 struct s390_cpumsf,
962 auxtrace);
963
964 int fd = perf_data__fd(session->data);
965 struct auxtrace_buffer *buffer;
966 off_t data_offset;
967 int err;
968
969 if (sf->data_queued)
970 return 0;
971
972 if (perf_data__is_pipe(session->data)) {
973 data_offset = 0;
974 } else {
975 data_offset = lseek(fd, 0, SEEK_CUR);
976 if (data_offset == -1)
977 return -errno;
978 }
979
980 err = auxtrace_queues__add_event(&sf->queues, session, event,
981 data_offset, &buffer);
982 if (err)
983 return err;
984
985 /* Dump here after copying piped trace out of the pipe */
986 if (dump_trace) {
987 if (auxtrace_buffer__get_data(buffer, fd)) {
988 s390_cpumsf_dump_event(sf, buffer->data,
989 buffer->size);
990 auxtrace_buffer__put_data(buffer);
991 }
992 }
993 return 0;
994 }
995
s390_cpumsf_free_events(struct perf_session * session __maybe_unused)996 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
997 {
998 }
999
s390_cpumsf_flush(struct perf_session * session __maybe_unused,const struct perf_tool * tool __maybe_unused)1000 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
1001 const struct perf_tool *tool __maybe_unused)
1002 {
1003 return 0;
1004 }
1005
s390_cpumsf_free_queues(struct perf_session * session)1006 static void s390_cpumsf_free_queues(struct perf_session *session)
1007 {
1008 struct s390_cpumsf *sf = container_of(session->auxtrace,
1009 struct s390_cpumsf,
1010 auxtrace);
1011 struct auxtrace_queues *queues = &sf->queues;
1012 unsigned int i;
1013
1014 for (i = 0; i < queues->nr_queues; i++) {
1015 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
1016 queues->queue_array[i].priv;
1017
1018 if (sfq != NULL) {
1019 if (sfq->logfile) {
1020 fclose(sfq->logfile);
1021 sfq->logfile = NULL;
1022 }
1023 if (sfq->logfile_ctr) {
1024 fclose(sfq->logfile_ctr);
1025 sfq->logfile_ctr = NULL;
1026 }
1027 }
1028 zfree(&queues->queue_array[i].priv);
1029 }
1030 auxtrace_queues__free(queues);
1031 }
1032
s390_cpumsf_free(struct perf_session * session)1033 static void s390_cpumsf_free(struct perf_session *session)
1034 {
1035 struct s390_cpumsf *sf = container_of(session->auxtrace,
1036 struct s390_cpumsf,
1037 auxtrace);
1038
1039 auxtrace_heap__free(&sf->heap);
1040 s390_cpumsf_free_queues(session);
1041 session->auxtrace = NULL;
1042 zfree(&sf->logdir);
1043 free(sf);
1044 }
1045
1046 static bool
s390_cpumsf_evsel_is_auxtrace(struct perf_session * session __maybe_unused,struct evsel * evsel)1047 s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
1048 struct evsel *evsel)
1049 {
1050 return evsel->core.attr.type == PERF_TYPE_RAW &&
1051 evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
1052 }
1053
s390_cpumsf_get_type(const char * cpuid)1054 static int s390_cpumsf_get_type(const char *cpuid)
1055 {
1056 int ret, family = 0;
1057
1058 ret = sscanf(cpuid, "%*[^,],%u", &family);
1059 return (ret == 1) ? family : 0;
1060 }
1061
1062 /* Check itrace options set on perf report command.
1063 * Return true, if none are set or all options specified can be
1064 * handled on s390 (currently only option 'd' for logging.
1065 * Return false otherwise.
1066 */
check_auxtrace_itrace(struct itrace_synth_opts * itops)1067 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
1068 {
1069 bool ison = false;
1070
1071 if (!itops || !itops->set)
1072 return true;
1073 ison = itops->inject || itops->instructions || itops->branches ||
1074 itops->transactions || itops->ptwrites ||
1075 itops->pwr_events || itops->errors ||
1076 itops->dont_decode || itops->calls || itops->returns ||
1077 itops->callchain || itops->thread_stack ||
1078 itops->last_branch || itops->add_callchain ||
1079 itops->add_last_branch;
1080 if (!ison)
1081 return true;
1082 pr_err("Unsupported --itrace options specified\n");
1083 return false;
1084 }
1085
1086 /* Check for AUXTRACE dump directory if it is needed.
1087 * On failure print an error message but continue.
1088 * Return 0 on wrong keyword in config file and 1 otherwise.
1089 */
s390_cpumsf__config(const char * var,const char * value,void * cb)1090 static int s390_cpumsf__config(const char *var, const char *value, void *cb)
1091 {
1092 struct s390_cpumsf *sf = cb;
1093 struct stat stbuf;
1094 int rc;
1095
1096 if (strcmp(var, "auxtrace.dumpdir"))
1097 return 0;
1098 sf->logdir = strdup(value);
1099 if (sf->logdir == NULL) {
1100 pr_err("Failed to find auxtrace log directory %s,"
1101 " continue with current directory...\n", value);
1102 return 1;
1103 }
1104 rc = stat(sf->logdir, &stbuf);
1105 if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
1106 pr_err("Missing auxtrace log directory %s,"
1107 " continue with current directory...\n", value);
1108 zfree(&sf->logdir);
1109 }
1110 return 1;
1111 }
1112
s390_cpumsf_process_auxtrace_info(union perf_event * event,struct perf_session * session)1113 int s390_cpumsf_process_auxtrace_info(union perf_event *event,
1114 struct perf_session *session)
1115 {
1116 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1117 struct s390_cpumsf *sf;
1118 int err;
1119
1120 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info))
1121 return -EINVAL;
1122
1123 sf = zalloc(sizeof(struct s390_cpumsf));
1124 if (sf == NULL)
1125 return -ENOMEM;
1126
1127 if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
1128 err = -EINVAL;
1129 goto err_free;
1130 }
1131 sf->use_logfile = session->itrace_synth_opts->log;
1132 if (sf->use_logfile)
1133 perf_config(s390_cpumsf__config, sf);
1134
1135 err = auxtrace_queues__init(&sf->queues);
1136 if (err)
1137 goto err_free;
1138
1139 sf->session = session;
1140 sf->machine = &session->machines.host; /* No kvm support */
1141 sf->auxtrace_type = auxtrace_info->type;
1142 sf->pmu_type = PERF_TYPE_RAW;
1143 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
1144
1145 sf->auxtrace.process_event = s390_cpumsf_process_event;
1146 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
1147 sf->auxtrace.flush_events = s390_cpumsf_flush;
1148 sf->auxtrace.free_events = s390_cpumsf_free_events;
1149 sf->auxtrace.free = s390_cpumsf_free;
1150 sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
1151 session->auxtrace = &sf->auxtrace;
1152
1153 if (dump_trace)
1154 return 0;
1155
1156 err = auxtrace_queues__process_index(&sf->queues, session);
1157 if (err)
1158 goto err_free_queues;
1159
1160 if (sf->queues.populated)
1161 sf->data_queued = true;
1162
1163 return 0;
1164
1165 err_free_queues:
1166 auxtrace_queues__free(&sf->queues);
1167 session->auxtrace = NULL;
1168 err_free:
1169 zfree(&sf->logdir);
1170 free(sf);
1171 return err;
1172 }
1173