1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2018 4 * Auxtrace support for s390 CPU-Measurement Sampling Facility 5 * 6 * Author(s): Thomas Richter <tmricht@linux.ibm.com> 7 * 8 * Auxiliary traces are collected during 'perf record' using rbd000 event. 9 * Several PERF_RECORD_XXX are generated during recording: 10 * 11 * PERF_RECORD_AUX: 12 * Records that new data landed in the AUX buffer part. 13 * PERF_RECORD_AUXTRACE: 14 * Defines auxtrace data. Followed by the actual data. The contents of 15 * the auxtrace data is dependent on the event and the CPU. 16 * This record is generated by perf record command. For details 17 * see Documentation/perf.data-file-format.txt. 18 * PERF_RECORD_AUXTRACE_INFO: 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 20 * record is generated during 'perf record' command. Each record contains up 21 * to 256 entries describing offset and size of the AUXTRACE data in the 22 * perf.data file. 23 * PERF_RECORD_AUXTRACE_ERROR: 24 * Indicates an error during AUXTRACE collection such as buffer overflow. 25 * PERF_RECORD_FINISHED_ROUND: 26 * Perf events are not necessarily in time stamp order, as they can be 27 * collected in parallel on different CPUs. If the events should be 28 * processed in time order they need to be sorted first. 29 * Perf report guarantees that there is no reordering over a 30 * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a 31 * time stamp lower than this record are processed (and displayed) before 32 * the succeeding perf record are processed. 33 * 34 * These records are evaluated during perf report command. 35 * 36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for 37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() 38 * below. 39 * Auxiliary trace data is collected per CPU. To merge the data into the report 40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace 41 * data is in ascending order. 42 * 43 * Each queue has a double linked list of auxtrace_buffers. This list contains 44 * the offset and size of a CPU's auxtrace data. During auxtrace processing 45 * the data portion is mmap()'ed. 46 * 47 * To sort the queues in chronological order, all queue access is controlled 48 * by the auxtrace_heap. This is basicly a stack, each stack element has two 49 * entries, the queue number and a time stamp. However the stack is sorted by 50 * the time stamps. The highest time stamp is at the bottom the lowest 51 * (nearest) time stamp is at the top. That sort order is maintained at all 52 * times! 53 * 54 * After the auxtrace infrastructure has been setup, the auxtrace queues are 55 * filled with data (offset/size pairs) and the auxtrace_heap is populated. 56 * 57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. 58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of 59 * the perf record is compared with the time stamp located on the auxtrace_heap 60 * top element. If that time stamp is lower than the time stamp from the 61 * record sample, the auxtrace queues will be processed. As auxtrace queues 62 * control many auxtrace_buffers and each buffer can be quite large, the 63 * auxtrace buffer might be processed only partially. In this case the 64 * position in the auxtrace_buffer of that queue is remembered and the time 65 * stamp of the last processed entry of the auxtrace_buffer replaces the 66 * current auxtrace_heap top. 67 * 68 * 3. Auxtrace_queues might run of out data and are feeded by the 69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). 70 * 71 * Event Generation 72 * Each sampling-data entry in the auxilary trace data generates a perf sample. 73 * This sample is filled 74 * with data from the auxtrace such as PID/TID, instruction address, CPU state, 75 * etc. This sample is processed with perf_session__deliver_synth_event() to 76 * be included into the GUI. 77 * 78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining 79 * auxiliary traces entries until the time stamp of this record is reached 80 * auxtrace_heap top. This is triggered by ordered_event->deliver(). 81 * 82 * 83 * Perf event processing. 84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. 85 * This is the function call sequence: 86 * 87 * __cmd_report() 88 * | 89 * perf_session__process_events() 90 * | 91 * __perf_session__process_events() 92 * | 93 * perf_session__process_event() 94 * | This functions splits the PERF_RECORD_XXX records. 95 * | - Those generated by perf record command (type number equal or higher 96 * | than PERF_RECORD_USER_TYPE_START) are handled by 97 * | perf_session__process_user_event(see below) 98 * | - Those generated by the kernel are handled by 99 * | perf_evlist__parse_sample_timestamp() 100 * | 101 * perf_evlist__parse_sample_timestamp() 102 * | Extract time stamp from sample data. 103 * | 104 * perf_session__queue_event() 105 * | If timestamp is positive the sample is entered into an ordered_event 106 * | list, sort order is the timestamp. The event processing is deferred until 107 * | later (see perf_session__process_user_event()). 108 * | Other timestamps (0 or -1) are handled immediately by 109 * | perf_session__deliver_event(). These are events generated at start up 110 * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* 111 * | records. They are needed to create a list of running processes and its 112 * | memory mappings and layout. They are needed at the beginning to enable 113 * | command perf report to create process trees and memory mappings. 114 * | 115 * perf_session__deliver_event() 116 * | Delivers a PERF_RECORD_XXX entry for handling. 117 * | 118 * auxtrace__process_event() 119 * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with 120 * | time stamps from the auxiliary trace buffers. This enables 121 * | synchronization between auxiliary trace data and the events on the 122 * | perf.data file. 123 * | 124 * machine__deliver_event() 125 * | Handles the PERF_RECORD_XXX event. This depends on the record type. 126 * It might update the process tree, update a process memory map or enter 127 * a sample with IP and call back chain data into GUI data pool. 128 * 129 * 130 * Deferred processing determined by perf_session__process_user_event() is 131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These 132 * are generated during command perf record. 133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all 134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was 135 * built up while reading the perf.data file. 136 * Each event is now processed by calling perf_session__deliver_event(). 137 * This enables time synchronization between the data in the perf.data file and 138 * the data in the auxiliary trace buffers. 139 */ 140 141 #include <endian.h> 142 #include <errno.h> 143 #include <byteswap.h> 144 #include <inttypes.h> 145 #include <linux/kernel.h> 146 #include <linux/types.h> 147 #include <linux/bitops.h> 148 #include <linux/log2.h> 149 150 #include <sys/stat.h> 151 #include <sys/types.h> 152 153 #include "cpumap.h" 154 #include "color.h" 155 #include "evsel.h" 156 #include "evlist.h" 157 #include "machine.h" 158 #include "session.h" 159 #include "util.h" 160 #include "thread.h" 161 #include "debug.h" 162 #include "auxtrace.h" 163 #include "s390-cpumsf.h" 164 #include "s390-cpumsf-kernel.h" 165 #include "s390-cpumcf-kernel.h" 166 #include "config.h" 167 168 struct s390_cpumsf { 169 struct auxtrace auxtrace; 170 struct auxtrace_queues queues; 171 struct auxtrace_heap heap; 172 struct perf_session *session; 173 struct machine *machine; 174 u32 auxtrace_type; 175 u32 pmu_type; 176 u16 machine_type; 177 bool data_queued; 178 bool use_logfile; 179 char *logdir; 180 }; 181 182 struct s390_cpumsf_queue { 183 struct s390_cpumsf *sf; 184 unsigned int queue_nr; 185 struct auxtrace_buffer *buffer; 186 int cpu; 187 FILE *logfile; 188 FILE *logfile_ctr; 189 }; 190 191 /* Check if the raw data should be dumped to file. If this is the case and 192 * the file to dump to has not been opened for writing, do so. 193 * 194 * Return 0 on success and greater zero on error so processing continues. 195 */ 196 static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, 197 struct perf_sample *sample) 198 { 199 struct s390_cpumsf_queue *sfq; 200 struct auxtrace_queue *q; 201 int rc = 0; 202 203 if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu) 204 return rc; 205 206 q = &sf->queues.queue_array[sample->cpu]; 207 sfq = q->priv; 208 if (!sfq) /* Queue not yet allocated */ 209 return rc; 210 211 if (!sfq->logfile_ctr) { 212 char *name; 213 214 rc = (sf->logdir) 215 ? asprintf(&name, "%s/aux.ctr.%02x", 216 sf->logdir, sample->cpu) 217 : asprintf(&name, "aux.ctr.%02x", sample->cpu); 218 if (rc > 0) 219 sfq->logfile_ctr = fopen(name, "w"); 220 if (sfq->logfile_ctr == NULL) { 221 pr_err("Failed to open counter set log file %s, " 222 "continue...\n", name); 223 rc = 1; 224 } 225 free(name); 226 } 227 228 if (sfq->logfile_ctr) { 229 /* See comment above for -4 */ 230 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1, 231 sfq->logfile_ctr); 232 if (n != 1) { 233 pr_err("Failed to write counter set data\n"); 234 rc = 1; 235 } 236 } 237 return rc; 238 } 239 240 /* Display s390 CPU measurement facility basic-sampling data entry */ 241 static bool s390_cpumsf_basic_show(const char *color, size_t pos, 242 struct hws_basic_entry *basic) 243 { 244 if (basic->def != 1) { 245 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 246 return false; 247 } 248 color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" 249 " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" 250 "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", 251 pos, basic->def, basic->U, 252 basic->T ? 'T' : ' ', 253 basic->W ? 'W' : ' ', 254 basic->P ? 'P' : ' ', 255 basic->I ? 'I' : ' ', 256 basic->AS, basic->prim_asn, basic->ia, basic->CL, 257 basic->hpp, basic->gpp); 258 return true; 259 } 260 261 /* Display s390 CPU measurement facility diagnostic-sampling data entry */ 262 static bool s390_cpumsf_diag_show(const char *color, size_t pos, 263 struct hws_diag_entry *diag) 264 { 265 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 266 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 267 return false; 268 } 269 color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", 270 pos, diag->def, diag->I ? 'I' : ' '); 271 return true; 272 } 273 274 /* Return TOD timestamp contained in an trailer entry */ 275 static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) 276 { 277 /* te->t set: TOD in STCKE format, bytes 8-15 278 * to->t not set: TOD in STCK format, bytes 0-7 279 */ 280 unsigned long long ts; 281 282 memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); 283 return ts; 284 } 285 286 /* Display s390 CPU measurement facility trailer entry */ 287 static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 288 struct hws_trailer_entry *te) 289 { 290 if (te->bsdes != sizeof(struct hws_basic_entry)) { 291 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 292 return false; 293 } 294 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 295 " dsdes:%d Overflow:%lld Time:%#llx\n" 296 "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", 297 pos, 298 te->f ? 'F' : ' ', 299 te->a ? 'A' : ' ', 300 te->t ? 'T' : ' ', 301 te->bsdes, te->dsdes, te->overflow, 302 trailer_timestamp(te), te->clock_base, te->progusage2, 303 te->progusage[0], te->progusage[1]); 304 return true; 305 } 306 307 /* Test a sample data block. It must be 4KB or a multiple thereof in size and 308 * 4KB page aligned. Each sample data page has a trailer entry at the 309 * end which contains the sample entry data sizes. 310 * 311 * Return true if the sample data block passes the checks and set the 312 * basic set entry size and diagnostic set entry size. 313 * 314 * Return false on failure. 315 * 316 * Note: Old hardware does not set the basic or diagnostic entry sizes 317 * in the trailer entry. Use the type number instead. 318 */ 319 static bool s390_cpumsf_validate(int machine_type, 320 unsigned char *buf, size_t len, 321 unsigned short *bsdes, 322 unsigned short *dsdes) 323 { 324 struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; 325 struct hws_trailer_entry *te; 326 327 *dsdes = *bsdes = 0; 328 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 329 return false; 330 if (basic->def != 1) /* No basic set entry, must be first */ 331 return false; 332 /* Check for trailer entry at end of SDB */ 333 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 334 - sizeof(*te)); 335 *bsdes = te->bsdes; 336 *dsdes = te->dsdes; 337 if (!te->bsdes && !te->dsdes) { 338 /* Very old hardware, use CPUID */ 339 switch (machine_type) { 340 case 2097: 341 case 2098: 342 *dsdes = 64; 343 *bsdes = 32; 344 break; 345 case 2817: 346 case 2818: 347 *dsdes = 74; 348 *bsdes = 32; 349 break; 350 case 2827: 351 case 2828: 352 *dsdes = 85; 353 *bsdes = 32; 354 break; 355 case 2964: 356 case 2965: 357 *dsdes = 112; 358 *bsdes = 32; 359 break; 360 default: 361 /* Illegal trailer entry */ 362 return false; 363 } 364 } 365 return true; 366 } 367 368 /* Return true if there is room for another entry */ 369 static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) 370 { 371 size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); 372 373 if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) 374 return false; 375 return true; 376 } 377 378 /* Dump an auxiliary buffer. These buffers are multiple of 379 * 4KB SDB pages. 380 */ 381 static void s390_cpumsf_dump(struct s390_cpumsf *sf, 382 unsigned char *buf, size_t len) 383 { 384 const char *color = PERF_COLOR_BLUE; 385 struct hws_basic_entry *basic; 386 struct hws_diag_entry *diag; 387 unsigned short bsdes, dsdes; 388 size_t pos = 0; 389 390 color_fprintf(stdout, color, 391 ". ... s390 AUX data: size %zu bytes\n", 392 len); 393 394 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 395 &dsdes)) { 396 pr_err("Invalid AUX trace data block size:%zu" 397 " (type:%d bsdes:%hd dsdes:%hd)\n", 398 len, sf->machine_type, bsdes, dsdes); 399 return; 400 } 401 402 /* s390 kernel always returns 4KB blocks fully occupied, 403 * no partially filled SDBs. 404 */ 405 while (pos < len) { 406 /* Handle Basic entry */ 407 basic = (struct hws_basic_entry *)(buf + pos); 408 if (s390_cpumsf_basic_show(color, pos, basic)) 409 pos += bsdes; 410 else 411 return; 412 413 /* Handle Diagnostic entry */ 414 diag = (struct hws_diag_entry *)(buf + pos); 415 if (s390_cpumsf_diag_show(color, pos, diag)) 416 pos += dsdes; 417 else 418 return; 419 420 /* Check for trailer entry */ 421 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 422 /* Show trailer entry */ 423 struct hws_trailer_entry te; 424 425 pos = (pos + S390_CPUMSF_PAGESZ) 426 & ~(S390_CPUMSF_PAGESZ - 1); 427 pos -= sizeof(te); 428 memcpy(&te, buf + pos, sizeof(te)); 429 /* Set descriptor sizes in case of old hardware 430 * where these values are not set. 431 */ 432 te.bsdes = bsdes; 433 te.dsdes = dsdes; 434 if (s390_cpumsf_trailer_show(color, pos, &te)) 435 pos += sizeof(te); 436 else 437 return; 438 } 439 } 440 } 441 442 static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, 443 size_t len) 444 { 445 printf(".\n"); 446 s390_cpumsf_dump(sf, buf, len); 447 } 448 449 #define S390_LPP_PID_MASK 0xffffffff 450 451 static bool s390_cpumsf_make_event(size_t pos, 452 struct hws_basic_entry *basic, 453 struct s390_cpumsf_queue *sfq) 454 { 455 struct perf_sample sample = { 456 .ip = basic->ia, 457 .pid = basic->hpp & S390_LPP_PID_MASK, 458 .tid = basic->hpp & S390_LPP_PID_MASK, 459 .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, 460 .cpu = sfq->cpu, 461 .period = 1 462 }; 463 union perf_event event; 464 465 memset(&event, 0, sizeof(event)); 466 if (basic->CL == 1) /* Native LPAR mode */ 467 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 468 : PERF_RECORD_MISC_KERNEL; 469 else if (basic->CL == 2) /* Guest kernel/user space */ 470 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 471 : PERF_RECORD_MISC_GUEST_KERNEL; 472 else if (basic->gpp || basic->prim_asn != 0xffff) 473 /* Use heuristics on old hardware */ 474 sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER 475 : PERF_RECORD_MISC_GUEST_KERNEL; 476 else 477 sample.cpumode = basic->P ? PERF_RECORD_MISC_USER 478 : PERF_RECORD_MISC_KERNEL; 479 480 event.sample.header.type = PERF_RECORD_SAMPLE; 481 event.sample.header.misc = sample.cpumode; 482 event.sample.header.size = sizeof(struct perf_event_header); 483 484 pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", 485 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, 486 sample.tid, sample.cpumode, sample.cpu); 487 if (perf_session__deliver_synth_event(sfq->sf->session, &event, 488 &sample)) { 489 pr_err("s390 Auxiliary Trace: failed to deliver event\n"); 490 return false; 491 } 492 return true; 493 } 494 495 static unsigned long long get_trailer_time(const unsigned char *buf) 496 { 497 struct hws_trailer_entry *te; 498 unsigned long long aux_time; 499 500 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 501 - sizeof(*te)); 502 503 if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ 504 return 0; 505 506 /* Correct calculation to convert time stamp in trailer entry to 507 * nano seconds (taken from arch/s390 function tod_to_ns()). 508 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 509 */ 510 aux_time = trailer_timestamp(te) - te->progusage2; 511 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 512 return aux_time; 513 } 514 515 /* Process the data samples of a single queue. The first parameter is a 516 * pointer to the queue, the second parameter is the time stamp. This 517 * is the time stamp: 518 * - of the event that triggered this processing. 519 * - or the time stamp when the last proccesing of this queue stopped. 520 * In this case it stopped at a 4KB page boundary and record the 521 * position on where to continue processing on the next invocation 522 * (see buffer->use_data and buffer->use_size). 523 * 524 * When this function returns the second parameter is updated to 525 * reflect the time stamp of the last processed auxiliary data entry 526 * (taken from the trailer entry of that page). The caller uses this 527 * returned time stamp to record the last processed entry in this 528 * queue. 529 * 530 * The function returns: 531 * 0: Processing successful. The second parameter returns the 532 * time stamp from the trailer entry until which position 533 * processing took place. Subsequent calls resume from this 534 * position. 535 * <0: An error occurred during processing. The second parameter 536 * returns the maximum time stamp. 537 * >0: Done on this queue. The second parameter returns the 538 * maximum time stamp. 539 */ 540 static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) 541 { 542 struct s390_cpumsf *sf = sfq->sf; 543 unsigned char *buf = sfq->buffer->use_data; 544 size_t len = sfq->buffer->use_size; 545 struct hws_basic_entry *basic; 546 unsigned short bsdes, dsdes; 547 size_t pos = 0; 548 int err = 1; 549 u64 aux_ts; 550 551 if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, 552 &dsdes)) { 553 *ts = ~0ULL; 554 return -1; 555 } 556 557 /* Get trailer entry time stamp and check if entries in 558 * this auxiliary page are ready for processing. If the 559 * time stamp of the first entry is too high, whole buffer 560 * can be skipped. In this case return time stamp. 561 */ 562 aux_ts = get_trailer_time(buf); 563 if (!aux_ts) { 564 pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", 565 (s64)sfq->buffer->data_offset); 566 aux_ts = ~0ULL; 567 goto out; 568 } 569 if (aux_ts > *ts) { 570 *ts = aux_ts; 571 return 0; 572 } 573 574 while (pos < len) { 575 /* Handle Basic entry */ 576 basic = (struct hws_basic_entry *)(buf + pos); 577 if (s390_cpumsf_make_event(pos, basic, sfq)) 578 pos += bsdes; 579 else { 580 err = -EBADF; 581 goto out; 582 } 583 584 pos += dsdes; /* Skip diagnositic entry */ 585 586 /* Check for trailer entry */ 587 if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { 588 pos = (pos + S390_CPUMSF_PAGESZ) 589 & ~(S390_CPUMSF_PAGESZ - 1); 590 /* Check existence of next page */ 591 if (pos >= len) 592 break; 593 aux_ts = get_trailer_time(buf + pos); 594 if (!aux_ts) { 595 aux_ts = ~0ULL; 596 goto out; 597 } 598 if (aux_ts > *ts) { 599 *ts = aux_ts; 600 sfq->buffer->use_data += pos; 601 sfq->buffer->use_size -= pos; 602 return 0; 603 } 604 } 605 } 606 out: 607 *ts = aux_ts; 608 sfq->buffer->use_size = 0; 609 sfq->buffer->use_data = NULL; 610 return err; /* Buffer completely scanned or error */ 611 } 612 613 /* Run the s390 auxiliary trace decoder. 614 * Select the queue buffer to operate on, the caller already selected 615 * the proper queue, depending on second parameter 'ts'. 616 * This is the time stamp until which the auxiliary entries should 617 * be processed. This value is updated by called functions and 618 * returned to the caller. 619 * 620 * Resume processing in the current buffer. If there is no buffer 621 * get a new buffer from the queue and setup start position for 622 * processing. 623 * When a buffer is completely processed remove it from the queue 624 * before returning. 625 * 626 * This function returns 627 * 1: When the queue is empty. Second parameter will be set to 628 * maximum time stamp. 629 * 0: Normal processing done. 630 * <0: Error during queue buffer setup. This causes the caller 631 * to stop processing completely. 632 */ 633 static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, 634 u64 *ts) 635 { 636 637 struct auxtrace_buffer *buffer; 638 struct auxtrace_queue *queue; 639 int err; 640 641 queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; 642 643 /* Get buffer and last position in buffer to resume 644 * decoding the auxiliary entries. One buffer might be large 645 * and decoding might stop in between. This depends on the time 646 * stamp of the trailer entry in each page of the auxiliary 647 * data and the time stamp of the event triggering the decoding. 648 */ 649 if (sfq->buffer == NULL) { 650 sfq->buffer = buffer = auxtrace_buffer__next(queue, 651 sfq->buffer); 652 if (!buffer) { 653 *ts = ~0ULL; 654 return 1; /* Processing done on this queue */ 655 } 656 /* Start with a new buffer on this queue */ 657 if (buffer->data) { 658 buffer->use_size = buffer->size; 659 buffer->use_data = buffer->data; 660 } 661 if (sfq->logfile) { /* Write into log file */ 662 size_t rc = fwrite(buffer->data, buffer->size, 1, 663 sfq->logfile); 664 if (rc != 1) 665 pr_err("Failed to write auxiliary data\n"); 666 } 667 } else 668 buffer = sfq->buffer; 669 670 if (!buffer->data) { 671 int fd = perf_data__fd(sfq->sf->session->data); 672 673 buffer->data = auxtrace_buffer__get_data(buffer, fd); 674 if (!buffer->data) 675 return -ENOMEM; 676 buffer->use_size = buffer->size; 677 buffer->use_data = buffer->data; 678 679 if (sfq->logfile) { /* Write into log file */ 680 size_t rc = fwrite(buffer->data, buffer->size, 1, 681 sfq->logfile); 682 if (rc != 1) 683 pr_err("Failed to write auxiliary data\n"); 684 } 685 } 686 pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", 687 __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, 688 buffer->size, buffer->use_size); 689 err = s390_cpumsf_samples(sfq, ts); 690 691 /* If non-zero, there is either an error (err < 0) or the buffer is 692 * completely done (err > 0). The error is unrecoverable, usually 693 * some descriptors could not be read successfully, so continue with 694 * the next buffer. 695 * In both cases the parameter 'ts' has been updated. 696 */ 697 if (err) { 698 sfq->buffer = NULL; 699 list_del(&buffer->list); 700 auxtrace_buffer__free(buffer); 701 if (err > 0) /* Buffer done, no error */ 702 err = 0; 703 } 704 return err; 705 } 706 707 static struct s390_cpumsf_queue * 708 s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) 709 { 710 struct s390_cpumsf_queue *sfq; 711 712 sfq = zalloc(sizeof(struct s390_cpumsf_queue)); 713 if (sfq == NULL) 714 return NULL; 715 716 sfq->sf = sf; 717 sfq->queue_nr = queue_nr; 718 sfq->cpu = -1; 719 if (sf->use_logfile) { 720 char *name; 721 int rc; 722 723 rc = (sf->logdir) 724 ? asprintf(&name, "%s/aux.smp.%02x", 725 sf->logdir, queue_nr) 726 : asprintf(&name, "aux.smp.%02x", queue_nr); 727 if (rc > 0) 728 sfq->logfile = fopen(name, "w"); 729 if (sfq->logfile == NULL) { 730 pr_err("Failed to open auxiliary log file %s," 731 "continue...\n", name); 732 sf->use_logfile = false; 733 } 734 free(name); 735 } 736 return sfq; 737 } 738 739 static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, 740 struct auxtrace_queue *queue, 741 unsigned int queue_nr, u64 ts) 742 { 743 struct s390_cpumsf_queue *sfq = queue->priv; 744 745 if (list_empty(&queue->head)) 746 return 0; 747 748 if (sfq == NULL) { 749 sfq = s390_cpumsf_alloc_queue(sf, queue_nr); 750 if (!sfq) 751 return -ENOMEM; 752 queue->priv = sfq; 753 754 if (queue->cpu != -1) 755 sfq->cpu = queue->cpu; 756 } 757 return auxtrace_heap__add(&sf->heap, queue_nr, ts); 758 } 759 760 static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) 761 { 762 unsigned int i; 763 int ret = 0; 764 765 for (i = 0; i < sf->queues.nr_queues; i++) { 766 ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], 767 i, ts); 768 if (ret) 769 break; 770 } 771 return ret; 772 } 773 774 static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) 775 { 776 if (!sf->queues.new_data) 777 return 0; 778 779 sf->queues.new_data = false; 780 return s390_cpumsf_setup_queues(sf, ts); 781 } 782 783 static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) 784 { 785 unsigned int queue_nr; 786 u64 ts; 787 int ret; 788 789 while (1) { 790 struct auxtrace_queue *queue; 791 struct s390_cpumsf_queue *sfq; 792 793 if (!sf->heap.heap_cnt) 794 return 0; 795 796 if (sf->heap.heap_array[0].ordinal >= timestamp) 797 return 0; 798 799 queue_nr = sf->heap.heap_array[0].queue_nr; 800 queue = &sf->queues.queue_array[queue_nr]; 801 sfq = queue->priv; 802 803 auxtrace_heap__pop(&sf->heap); 804 if (sf->heap.heap_cnt) { 805 ts = sf->heap.heap_array[0].ordinal + 1; 806 if (ts > timestamp) 807 ts = timestamp; 808 } else { 809 ts = timestamp; 810 } 811 812 ret = s390_cpumsf_run_decoder(sfq, &ts); 813 if (ret < 0) { 814 auxtrace_heap__add(&sf->heap, queue_nr, ts); 815 return ret; 816 } 817 if (!ret) { 818 ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); 819 if (ret < 0) 820 return ret; 821 } 822 } 823 return 0; 824 } 825 826 static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 827 pid_t pid, pid_t tid, u64 ip, u64 timestamp) 828 { 829 char msg[MAX_AUXTRACE_ERROR_MSG]; 830 union perf_event event; 831 int err; 832 833 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 834 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 835 code, cpu, pid, tid, ip, msg, timestamp); 836 837 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 838 if (err) 839 pr_err("s390 Auxiliary Trace: failed to deliver error event," 840 "error %d\n", err); 841 return err; 842 } 843 844 static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 845 { 846 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 847 sample->pid, sample->tid, 0, 848 sample->time); 849 } 850 851 static int 852 s390_cpumsf_process_event(struct perf_session *session, 853 union perf_event *event, 854 struct perf_sample *sample, 855 struct perf_tool *tool) 856 { 857 struct s390_cpumsf *sf = container_of(session->auxtrace, 858 struct s390_cpumsf, 859 auxtrace); 860 u64 timestamp = sample->time; 861 struct perf_evsel *ev_bc000; 862 863 int err = 0; 864 865 if (dump_trace) 866 return 0; 867 868 if (!tool->ordered_events) { 869 pr_err("s390 Auxiliary Trace requires ordered events\n"); 870 return -EINVAL; 871 } 872 873 if (event->header.type == PERF_RECORD_SAMPLE && 874 sample->raw_size) { 875 /* Handle event with raw data */ 876 ev_bc000 = perf_evlist__event2evsel(session->evlist, event); 877 if (ev_bc000 && 878 ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG) 879 err = s390_cpumcf_dumpctr(sf, sample); 880 return err; 881 } 882 883 if (event->header.type == PERF_RECORD_AUX && 884 event->aux.flags & PERF_AUX_FLAG_TRUNCATED) 885 return s390_cpumsf_lost(sf, sample); 886 887 if (timestamp) { 888 err = s390_cpumsf_update_queues(sf, timestamp); 889 if (!err) 890 err = s390_cpumsf_process_queues(sf, timestamp); 891 } 892 return err; 893 } 894 895 struct s390_cpumsf_synth { 896 struct perf_tool cpumsf_tool; 897 struct perf_session *session; 898 }; 899 900 static int 901 s390_cpumsf_process_auxtrace_event(struct perf_session *session, 902 union perf_event *event __maybe_unused, 903 struct perf_tool *tool __maybe_unused) 904 { 905 struct s390_cpumsf *sf = container_of(session->auxtrace, 906 struct s390_cpumsf, 907 auxtrace); 908 909 int fd = perf_data__fd(session->data); 910 struct auxtrace_buffer *buffer; 911 off_t data_offset; 912 int err; 913 914 if (sf->data_queued) 915 return 0; 916 917 if (perf_data__is_pipe(session->data)) { 918 data_offset = 0; 919 } else { 920 data_offset = lseek(fd, 0, SEEK_CUR); 921 if (data_offset == -1) 922 return -errno; 923 } 924 925 err = auxtrace_queues__add_event(&sf->queues, session, event, 926 data_offset, &buffer); 927 if (err) 928 return err; 929 930 /* Dump here after copying piped trace out of the pipe */ 931 if (dump_trace) { 932 if (auxtrace_buffer__get_data(buffer, fd)) { 933 s390_cpumsf_dump_event(sf, buffer->data, 934 buffer->size); 935 auxtrace_buffer__put_data(buffer); 936 } 937 } 938 return 0; 939 } 940 941 static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) 942 { 943 } 944 945 static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, 946 struct perf_tool *tool __maybe_unused) 947 { 948 return 0; 949 } 950 951 static void s390_cpumsf_free_queues(struct perf_session *session) 952 { 953 struct s390_cpumsf *sf = container_of(session->auxtrace, 954 struct s390_cpumsf, 955 auxtrace); 956 struct auxtrace_queues *queues = &sf->queues; 957 unsigned int i; 958 959 for (i = 0; i < queues->nr_queues; i++) { 960 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) 961 queues->queue_array[i].priv; 962 963 if (sfq != NULL) { 964 if (sfq->logfile) { 965 fclose(sfq->logfile); 966 sfq->logfile = NULL; 967 } 968 if (sfq->logfile_ctr) { 969 fclose(sfq->logfile_ctr); 970 sfq->logfile_ctr = NULL; 971 } 972 } 973 zfree(&queues->queue_array[i].priv); 974 } 975 auxtrace_queues__free(queues); 976 } 977 978 static void s390_cpumsf_free(struct perf_session *session) 979 { 980 struct s390_cpumsf *sf = container_of(session->auxtrace, 981 struct s390_cpumsf, 982 auxtrace); 983 984 auxtrace_heap__free(&sf->heap); 985 s390_cpumsf_free_queues(session); 986 session->auxtrace = NULL; 987 free(sf->logdir); 988 free(sf); 989 } 990 991 static int s390_cpumsf_get_type(const char *cpuid) 992 { 993 int ret, family = 0; 994 995 ret = sscanf(cpuid, "%*[^,],%u", &family); 996 return (ret == 1) ? family : 0; 997 } 998 999 /* Check itrace options set on perf report command. 1000 * Return true, if none are set or all options specified can be 1001 * handled on s390 (currently only option 'd' for logging. 1002 * Return false otherwise. 1003 */ 1004 static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) 1005 { 1006 bool ison = false; 1007 1008 if (!itops || !itops->set) 1009 return true; 1010 ison = itops->inject || itops->instructions || itops->branches || 1011 itops->transactions || itops->ptwrites || 1012 itops->pwr_events || itops->errors || 1013 itops->dont_decode || itops->calls || itops->returns || 1014 itops->callchain || itops->thread_stack || 1015 itops->last_branch; 1016 if (!ison) 1017 return true; 1018 pr_err("Unsupported --itrace options specified\n"); 1019 return false; 1020 } 1021 1022 /* Check for AUXTRACE dump directory if it is needed. 1023 * On failure print an error message but continue. 1024 * Return 0 on wrong keyword in config file and 1 otherwise. 1025 */ 1026 static int s390_cpumsf__config(const char *var, const char *value, void *cb) 1027 { 1028 struct s390_cpumsf *sf = cb; 1029 struct stat stbuf; 1030 int rc; 1031 1032 if (strcmp(var, "auxtrace.dumpdir")) 1033 return 0; 1034 sf->logdir = strdup(value); 1035 if (sf->logdir == NULL) { 1036 pr_err("Failed to find auxtrace log directory %s," 1037 " continue with current directory...\n", value); 1038 return 1; 1039 } 1040 rc = stat(sf->logdir, &stbuf); 1041 if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { 1042 pr_err("Missing auxtrace log directory %s," 1043 " continue with current directory...\n", value); 1044 free(sf->logdir); 1045 sf->logdir = NULL; 1046 } 1047 return 1; 1048 } 1049 1050 int s390_cpumsf_process_auxtrace_info(union perf_event *event, 1051 struct perf_session *session) 1052 { 1053 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 1054 struct s390_cpumsf *sf; 1055 int err; 1056 1057 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) 1058 return -EINVAL; 1059 1060 sf = zalloc(sizeof(struct s390_cpumsf)); 1061 if (sf == NULL) 1062 return -ENOMEM; 1063 1064 if (!check_auxtrace_itrace(session->itrace_synth_opts)) { 1065 err = -EINVAL; 1066 goto err_free; 1067 } 1068 sf->use_logfile = session->itrace_synth_opts->log; 1069 if (sf->use_logfile) 1070 perf_config(s390_cpumsf__config, sf); 1071 1072 err = auxtrace_queues__init(&sf->queues); 1073 if (err) 1074 goto err_free; 1075 1076 sf->session = session; 1077 sf->machine = &session->machines.host; /* No kvm support */ 1078 sf->auxtrace_type = auxtrace_info->type; 1079 sf->pmu_type = PERF_TYPE_RAW; 1080 sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); 1081 1082 sf->auxtrace.process_event = s390_cpumsf_process_event; 1083 sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; 1084 sf->auxtrace.flush_events = s390_cpumsf_flush; 1085 sf->auxtrace.free_events = s390_cpumsf_free_events; 1086 sf->auxtrace.free = s390_cpumsf_free; 1087 session->auxtrace = &sf->auxtrace; 1088 1089 if (dump_trace) 1090 return 0; 1091 1092 err = auxtrace_queues__process_index(&sf->queues, session); 1093 if (err) 1094 goto err_free_queues; 1095 1096 if (sf->queues.populated) 1097 sf->data_queued = true; 1098 1099 return 0; 1100 1101 err_free_queues: 1102 auxtrace_queues__free(&sf->queues); 1103 session->auxtrace = NULL; 1104 err_free: 1105 free(sf->logdir); 1106 free(sf); 1107 return err; 1108 } 1109