1 /*- 2 * Copyright (c) 2005-2007 Joseph Koshy 3 * Copyright (c) 2007 The FreeBSD Foundation 4 * All rights reserved. 5 * 6 * Portions of this software were developed by A. Joseph Koshy under 7 * sponsorship from the FreeBSD Foundation and Google, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/pmc.h> 36 #include <sys/pmclog.h> 37 38 #include <assert.h> 39 #include <errno.h> 40 #include <pmc.h> 41 #include <pmclog.h> 42 #include <stddef.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <strings.h> 46 #include <unistd.h> 47 48 #include <machine/pmc_mdep.h> 49 50 #define PMCLOG_BUFFER_SIZE 4096 51 52 /* 53 * API NOTES 54 * 55 * The pmclog(3) API is oriented towards parsing an event stream in 56 * "realtime", i.e., from an data source that may or may not preserve 57 * record boundaries -- for example when the data source is elsewhere 58 * on a network. The API allows data to be fed into the parser zero 59 * or more bytes at a time. 60 * 61 * The state for a log file parser is maintained in a 'struct 62 * pmclog_parse_state'. Parser invocations are done by calling 63 * 'pmclog_read()'; this function will inform the caller when a 64 * complete event is parsed. 65 * 66 * The parser first assembles a complete log file event in an internal 67 * work area (see "ps_saved" below). Once a complete log file event 68 * is read, the parser then parses it and converts it to an event 69 * descriptor usable by the client. We could possibly avoid this two 70 * step process by directly parsing the input log to set fields in the 71 * event record. However the parser's state machine would get 72 * insanely complicated, and this code is unlikely to be used in 73 * performance critical paths. 74 */ 75 76 enum pmclog_parser_state { 77 PL_STATE_NEW_RECORD, /* in-between records */ 78 PL_STATE_EXPECTING_HEADER, /* header being read */ 79 PL_STATE_PARTIAL_RECORD, /* header present but not the record */ 80 PL_STATE_ERROR /* parsing error encountered */ 81 }; 82 83 struct pmclog_parse_state { 84 enum pmclog_parser_state ps_state; 85 enum pmc_cputype ps_arch; /* log file architecture */ 86 uint32_t ps_version; /* hwpmc version */ 87 int ps_initialized; /* whether initialized */ 88 int ps_count; /* count of records processed */ 89 off_t ps_offset; /* stream byte offset */ 90 union pmclog_entry ps_saved; /* saved partial log entry */ 91 int ps_svcount; /* #bytes saved */ 92 int ps_fd; /* active fd or -1 */ 93 char *ps_buffer; /* scratch buffer if fd != -1 */ 94 char *ps_data; /* current parse pointer */ 95 size_t ps_len; /* length of buffered data */ 96 }; 97 98 #define PMCLOG_HEADER_FROM_SAVED_STATE(PS) \ 99 (* ((uint32_t *) &(PS)->ps_saved)) 100 101 #define PMCLOG_INITIALIZE_READER(LE,A) LE = (uint32_t *) &(A) 102 #define PMCLOG_READ32(LE,V) do { \ 103 (V) = *(LE)++; \ 104 } while (0) 105 #define PMCLOG_READ64(LE,V) do { \ 106 uint64_t _v; \ 107 _v = (uint64_t) *(LE)++; \ 108 _v |= ((uint64_t) *(LE)++) << 32; \ 109 (V) = _v; \ 110 } while (0) 111 112 #define PMCLOG_READSTRING(LE,DST,LEN) strlcpy((DST), (char *) (LE), (LEN)) 113 114 /* 115 * Assemble a log record from '*len' octets starting from address '*data'. 116 * Update 'data' and 'len' to reflect the number of bytes consumed. 117 * 118 * '*data' is potentially an unaligned address and '*len' octets may 119 * not be enough to complete a event record. 120 */ 121 122 static enum pmclog_parser_state 123 pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len) 124 { 125 int avail, copylen, recordsize, used; 126 uint32_t h; 127 const int HEADERSIZE = sizeof(uint32_t); 128 char *src, *dst; 129 130 if ((avail = *len) <= 0) 131 return (ps->ps_state = PL_STATE_ERROR); 132 133 src = *data; 134 h = used = 0; 135 136 if (ps->ps_state == PL_STATE_NEW_RECORD) 137 ps->ps_svcount = 0; 138 139 dst = (char *) &ps->ps_saved + ps->ps_svcount; 140 141 switch (ps->ps_state) { 142 case PL_STATE_NEW_RECORD: 143 144 /* 145 * Transitions: 146 * 147 * Case A: avail < headersize 148 * -> 'expecting header' 149 * 150 * Case B: avail >= headersize 151 * B.1: avail < recordsize 152 * -> 'partial record' 153 * B.2: avail >= recordsize 154 * -> 'new record' 155 */ 156 157 copylen = avail < HEADERSIZE ? avail : HEADERSIZE; 158 bcopy(src, dst, copylen); 159 ps->ps_svcount = used = copylen; 160 161 if (copylen < HEADERSIZE) { 162 ps->ps_state = PL_STATE_EXPECTING_HEADER; 163 goto done; 164 } 165 166 src += copylen; 167 dst += copylen; 168 169 h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); 170 recordsize = PMCLOG_HEADER_TO_LENGTH(h); 171 172 if (recordsize <= 0) 173 goto error; 174 175 if (recordsize <= avail) { /* full record available */ 176 bcopy(src, dst, recordsize - copylen); 177 ps->ps_svcount = used = recordsize; 178 goto done; 179 } 180 181 /* header + a partial record is available */ 182 bcopy(src, dst, avail - copylen); 183 ps->ps_svcount = used = avail; 184 ps->ps_state = PL_STATE_PARTIAL_RECORD; 185 186 break; 187 188 case PL_STATE_EXPECTING_HEADER: 189 190 /* 191 * Transitions: 192 * 193 * Case C: avail+saved < headersize 194 * -> 'expecting header' 195 * 196 * Case D: avail+saved >= headersize 197 * D.1: avail+saved < recordsize 198 * -> 'partial record' 199 * D.2: avail+saved >= recordsize 200 * -> 'new record' 201 * (see PARTIAL_RECORD handling below) 202 */ 203 204 if (avail + ps->ps_svcount < HEADERSIZE) { 205 bcopy(src, dst, avail); 206 ps->ps_svcount += avail; 207 used = avail; 208 break; 209 } 210 211 used = copylen = HEADERSIZE - ps->ps_svcount; 212 bcopy(src, dst, copylen); 213 src += copylen; 214 dst += copylen; 215 avail -= copylen; 216 ps->ps_svcount += copylen; 217 218 /*FALLTHROUGH*/ 219 220 case PL_STATE_PARTIAL_RECORD: 221 222 /* 223 * Transitions: 224 * 225 * Case E: avail+saved < recordsize 226 * -> 'partial record' 227 * 228 * Case F: avail+saved >= recordsize 229 * -> 'new record' 230 */ 231 232 h = PMCLOG_HEADER_FROM_SAVED_STATE(ps); 233 recordsize = PMCLOG_HEADER_TO_LENGTH(h); 234 235 if (recordsize <= 0) 236 goto error; 237 238 if (avail + ps->ps_svcount < recordsize) { 239 copylen = avail; 240 ps->ps_state = PL_STATE_PARTIAL_RECORD; 241 } else { 242 copylen = recordsize - ps->ps_svcount; 243 ps->ps_state = PL_STATE_NEW_RECORD; 244 } 245 246 bcopy(src, dst, copylen); 247 ps->ps_svcount += copylen; 248 used += copylen; 249 break; 250 251 default: 252 goto error; 253 } 254 255 done: 256 *data += used; 257 *len -= used; 258 return ps->ps_state; 259 260 error: 261 ps->ps_state = PL_STATE_ERROR; 262 return ps->ps_state; 263 } 264 265 /* 266 * Get an event from the stream pointed to by '*data'. '*len' 267 * indicates the number of bytes available to parse. Arguments 268 * '*data' and '*len' are updated to indicate the number of bytes 269 * consumed. 270 */ 271 272 static int 273 pmclog_get_event(void *cookie, char **data, ssize_t *len, 274 struct pmclog_ev *ev) 275 { 276 int evlen, pathlen; 277 uint32_t h, *le, npc; 278 enum pmclog_parser_state e; 279 struct pmclog_parse_state *ps; 280 281 ps = (struct pmclog_parse_state *) cookie; 282 283 assert(ps->ps_state != PL_STATE_ERROR); 284 285 if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) { 286 ev->pl_state = PMCLOG_ERROR; 287 return -1; 288 } 289 290 if (e != PL_STATE_NEW_RECORD) { 291 ev->pl_state = PMCLOG_REQUIRE_DATA; 292 return -1; 293 } 294 295 PMCLOG_INITIALIZE_READER(le, ps->ps_saved); 296 297 PMCLOG_READ32(le,h); 298 299 if (!PMCLOG_HEADER_CHECK_MAGIC(h)) { 300 ps->ps_state = PL_STATE_ERROR; 301 ev->pl_state = PMCLOG_ERROR; 302 return -1; 303 } 304 305 /* copy out the time stamp */ 306 PMCLOG_READ32(le,ev->pl_ts.tv_sec); 307 PMCLOG_READ32(le,ev->pl_ts.tv_nsec); 308 309 evlen = PMCLOG_HEADER_TO_LENGTH(h); 310 311 #define PMCLOG_GET_PATHLEN(P,E,TYPE) do { \ 312 (P) = (E) - offsetof(struct TYPE, pl_pathname); \ 313 if ((P) > PATH_MAX || (P) < 0) \ 314 goto error; \ 315 } while (0) 316 317 #define PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do { \ 318 (SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc)) \ 319 / sizeof(uintfptr_t); \ 320 } while (0); 321 322 switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) { 323 case PMCLOG_TYPE_CALLCHAIN: 324 PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid); 325 PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid); 326 PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags); 327 PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen); 328 for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++) 329 PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]); 330 for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++) 331 ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0; 332 break; 333 case PMCLOG_TYPE_CLOSELOG: 334 case PMCLOG_TYPE_DROPNOTIFY: 335 /* nothing to do */ 336 break; 337 case PMCLOG_TYPE_INITIALIZE: 338 PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version); 339 PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch); 340 ps->ps_version = ev->pl_u.pl_i.pl_version; 341 ps->ps_arch = ev->pl_u.pl_i.pl_arch; 342 ps->ps_initialized = 1; 343 break; 344 case PMCLOG_TYPE_MAP_IN: 345 PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in); 346 PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid); 347 PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start); 348 PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen); 349 break; 350 case PMCLOG_TYPE_MAP_OUT: 351 PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid); 352 PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start); 353 PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end); 354 break; 355 case PMCLOG_TYPE_PCSAMPLE: 356 PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pid); 357 PMCLOG_READADDR(le,ev->pl_u.pl_s.pl_pc); 358 PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pmcid); 359 PMCLOG_READ32(le,ev->pl_u.pl_s.pl_usermode); 360 break; 361 case PMCLOG_TYPE_PMCALLOCATE: 362 PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid); 363 PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event); 364 PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags); 365 if ((ev->pl_u.pl_a.pl_evname = 366 pmc_name_of_event(ev->pl_u.pl_a.pl_event)) == NULL) 367 goto error; 368 break; 369 case PMCLOG_TYPE_PMCATTACH: 370 PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach); 371 PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid); 372 PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid); 373 PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen); 374 break; 375 case PMCLOG_TYPE_PMCDETACH: 376 PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid); 377 PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid); 378 break; 379 case PMCLOG_TYPE_PROCCSW: 380 PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid); 381 PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value); 382 PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid); 383 break; 384 case PMCLOG_TYPE_PROCEXEC: 385 PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec); 386 PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid); 387 PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_entryaddr); 388 PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid); 389 PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen); 390 break; 391 case PMCLOG_TYPE_PROCEXIT: 392 PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid); 393 PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value); 394 PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid); 395 break; 396 case PMCLOG_TYPE_PROCFORK: 397 PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid); 398 PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid); 399 break; 400 case PMCLOG_TYPE_SYSEXIT: 401 PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid); 402 break; 403 case PMCLOG_TYPE_USERDATA: 404 PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata); 405 break; 406 default: /* unknown record type */ 407 ps->ps_state = PL_STATE_ERROR; 408 ev->pl_state = PMCLOG_ERROR; 409 return (-1); 410 } 411 412 ev->pl_offset = (ps->ps_offset += evlen); 413 ev->pl_count = (ps->ps_count += 1); 414 ev->pl_state = PMCLOG_OK; 415 return 0; 416 417 error: 418 ev->pl_state = PMCLOG_ERROR; 419 ps->ps_state = PL_STATE_ERROR; 420 return -1; 421 } 422 423 /* 424 * Extract and return the next event from the byte stream. 425 * 426 * Returns 0 and sets the event's state to PMCLOG_OK in case an event 427 * was successfully parsed. Otherwise this function returns -1 and 428 * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data 429 * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if 430 * a parse error was encountered. 431 */ 432 433 int 434 pmclog_read(void *cookie, struct pmclog_ev *ev) 435 { 436 int retval; 437 ssize_t nread; 438 struct pmclog_parse_state *ps; 439 440 ps = (struct pmclog_parse_state *) cookie; 441 442 if (ps->ps_state == PL_STATE_ERROR) { 443 ev->pl_state = PMCLOG_ERROR; 444 return -1; 445 } 446 447 /* 448 * If there isn't enough data left for a new event try and get 449 * more data. 450 */ 451 if (ps->ps_len == 0) { 452 ev->pl_state = PMCLOG_REQUIRE_DATA; 453 454 /* 455 * If we have a valid file descriptor to read from, attempt 456 * to read from that. This read may return with an error, 457 * (which may be EAGAIN or other recoverable error), or 458 * can return EOF. 459 */ 460 if (ps->ps_fd != PMCLOG_FD_NONE) { 461 refill: 462 nread = read(ps->ps_fd, ps->ps_buffer, 463 PMCLOG_BUFFER_SIZE); 464 465 if (nread <= 0) { 466 if (nread == 0) 467 ev->pl_state = PMCLOG_EOF; 468 else if (errno != EAGAIN) /* not restartable */ 469 ev->pl_state = PMCLOG_ERROR; 470 return -1; 471 } 472 473 ps->ps_len = nread; 474 ps->ps_data = ps->ps_buffer; 475 } else 476 return -1; 477 } 478 479 assert(ps->ps_len > 0); 480 481 482 /* Retrieve one event from the byte stream. */ 483 retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev); 484 485 /* 486 * If we need more data and we have a configured fd, try read 487 * from it. 488 */ 489 if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA && 490 ps->ps_fd != -1) { 491 assert(ps->ps_len == 0); 492 goto refill; 493 } 494 495 return retval; 496 } 497 498 /* 499 * Feed data to a memory based parser. 500 * 501 * The memory area pointed to by 'data' needs to be valid till the 502 * next error return from pmclog_next_event(). 503 */ 504 505 int 506 pmclog_feed(void *cookie, char *data, int len) 507 { 508 struct pmclog_parse_state *ps; 509 510 ps = (struct pmclog_parse_state *) cookie; 511 512 if (len < 0 || /* invalid length */ 513 ps->ps_buffer || /* called for a file parser */ 514 ps->ps_len != 0) /* unnecessary call */ 515 return -1; 516 517 ps->ps_data = data; 518 ps->ps_len = len; 519 520 return 0; 521 } 522 523 /* 524 * Allocate and initialize parser state. 525 */ 526 527 void * 528 pmclog_open(int fd) 529 { 530 struct pmclog_parse_state *ps; 531 532 if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL) 533 return NULL; 534 535 ps->ps_state = PL_STATE_NEW_RECORD; 536 ps->ps_arch = -1; 537 ps->ps_initialized = 0; 538 ps->ps_count = 0; 539 ps->ps_offset = (off_t) 0; 540 bzero(&ps->ps_saved, sizeof(ps->ps_saved)); 541 ps->ps_svcount = 0; 542 ps->ps_fd = fd; 543 ps->ps_data = NULL; 544 ps->ps_buffer = NULL; 545 ps->ps_len = 0; 546 547 /* allocate space for a work area */ 548 if (ps->ps_fd != PMCLOG_FD_NONE) { 549 if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) 550 return NULL; 551 } 552 553 return ps; 554 } 555 556 557 /* 558 * Free up parser state. 559 */ 560 561 void 562 pmclog_close(void *cookie) 563 { 564 struct pmclog_parse_state *ps; 565 566 ps = (struct pmclog_parse_state *) cookie; 567 568 if (ps->ps_buffer) 569 free(ps->ps_buffer); 570 571 free(ps); 572 } 573