1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <stdarg.h> 28 #include <unistd.h> 29 #include <fcntl.h> 30 #include <errno.h> 31 #include <string.h> 32 #include <deflt.h> 33 #include <time.h> 34 #include <syslog.h> 35 #include <stropts.h> 36 #include <pthread.h> 37 #include <limits.h> 38 #include <atomic.h> 39 #include <libnvpair.h> 40 #include <libintl.h> 41 #include <sys/mem.h> 42 #include <sys/statvfs.h> 43 #include <sys/dumphdr.h> 44 #include <sys/dumpadm.h> 45 #include <sys/compress.h> 46 #include <sys/panic.h> 47 #include <sys/sysmacros.h> 48 #include <sys/stat.h> 49 #include <sys/resource.h> 50 #include <bzip2/bzlib.h> 51 #include <sys/fm/util.h> 52 #include <fm/libfmevent.h> 53 #include <sys/int_fmtio.h> 54 55 56 /* fread/fwrite buffer size */ 57 #define FBUFSIZE (1ULL << 20) 58 59 /* minimum size for output buffering */ 60 #define MINCOREBLKSIZE (1ULL << 17) 61 62 /* create this file if metrics collection is enabled in the kernel */ 63 #define METRICSFILE "METRICS.csv" 64 65 static char progname[9] = "savecore"; 66 static char *savedir; /* savecore directory */ 67 static char *dumpfile; /* source of raw crash dump */ 68 static long bounds = -1; /* numeric suffix */ 69 static long pagesize; /* dump pagesize */ 70 static int dumpfd = -1; /* dumpfile descriptor */ 71 static dumphdr_t corehdr, dumphdr; /* initial and terminal dumphdrs */ 72 static boolean_t dump_incomplete; /* dumphdr indicates incomplete */ 73 static boolean_t fm_panic; /* dump is the result of fm_panic */ 74 static offset_t endoff; /* offset of end-of-dump header */ 75 static int verbose; /* chatty mode */ 76 static int disregard_valid_flag; /* disregard valid flag */ 77 static int livedump; /* dump the current running system */ 78 static int interactive; /* user invoked; no syslog */ 79 static int csave; /* save dump compressed */ 80 static int filemode; /* processing file, not dump device */ 81 static int percent_done; /* progress indicator */ 82 static hrtime_t startts; /* timestamp at start */ 83 static volatile uint64_t saved; /* count of pages written */ 84 static volatile uint64_t zpages; /* count of zero pages not written */ 85 static dumpdatahdr_t datahdr; /* compression info */ 86 static long coreblksize; /* preferred write size (st_blksize) */ 87 static int cflag; /* run as savecore -c */ 88 static int mflag; /* run as savecore -m */ 89 90 /* 91 * Payload information for the events we raise. These are used 92 * in raise_event to determine what payload to include. 93 */ 94 #define SC_PAYLOAD_SAVEDIR 0x0001 /* Include savedir in event */ 95 #define SC_PAYLOAD_INSTANCE 0x0002 /* Include bounds instance number */ 96 #define SC_PAYLOAD_IMAGEUUID 0x0004 /* Include dump OS instance uuid */ 97 #define SC_PAYLOAD_CRASHTIME 0x0008 /* Include epoch crashtime */ 98 #define SC_PAYLOAD_PANICSTR 0x0010 /* Include panic string */ 99 #define SC_PAYLOAD_PANICSTACK 0x0020 /* Include panic string */ 100 #define SC_PAYLOAD_FAILREASON 0x0040 /* Include failure reason */ 101 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080 /* Include completeness indicator */ 102 #define SC_PAYLOAD_ISCOMPRESSED 0x0100 /* Dump is in vmdump.N form */ 103 #define SC_PAYLOAD_DUMPADM_EN 0x0200 /* Is dumpadm enabled or not? */ 104 #define SC_PAYLOAD_FM_PANIC 0x0400 /* Panic initiated by FMA */ 105 #define SC_PAYLOAD_JUSTCHECKING 0x0800 /* Run with -c flag? */ 106 107 enum sc_event_type { 108 SC_EVENT_DUMP_PENDING, 109 SC_EVENT_SAVECORE_FAILURE, 110 SC_EVENT_DUMP_AVAILABLE 111 }; 112 113 /* 114 * Common payload 115 */ 116 #define _SC_PAYLOAD_CMN \ 117 SC_PAYLOAD_IMAGEUUID | \ 118 SC_PAYLOAD_CRASHTIME | \ 119 SC_PAYLOAD_PANICSTR | \ 120 SC_PAYLOAD_PANICSTACK | \ 121 SC_PAYLOAD_DUMPCOMPLETE | \ 122 SC_PAYLOAD_FM_PANIC | \ 123 SC_PAYLOAD_SAVEDIR 124 125 static const struct { 126 const char *sce_subclass; 127 uint32_t sce_payload; 128 } sc_event[] = { 129 /* 130 * SC_EVENT_DUMP_PENDING 131 */ 132 { 133 "dump_pending_on_device", 134 _SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN | 135 SC_PAYLOAD_JUSTCHECKING 136 }, 137 138 /* 139 * SC_EVENT_SAVECORE_FAILURE 140 */ 141 { 142 "savecore_failure", 143 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON 144 }, 145 146 /* 147 * SC_EVENT_DUMP_AVAILABLE 148 */ 149 { 150 "dump_available", 151 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED 152 }, 153 }; 154 155 static void raise_event(enum sc_event_type, char *); 156 157 static void 158 usage(void) 159 { 160 (void) fprintf(stderr, 161 "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname); 162 exit(1); 163 } 164 165 #define SC_SL_NONE 0x0001 /* no syslog */ 166 #define SC_SL_ERR 0x0002 /* syslog if !interactive, LOG_ERR */ 167 #define SC_SL_WARN 0x0004 /* syslog if !interactive, LOG_WARNING */ 168 #define SC_IF_VERBOSE 0x0008 /* message only if -v */ 169 #define SC_IF_ISATTY 0x0010 /* message only if interactive */ 170 #define SC_EXIT_OK 0x0020 /* exit(0) */ 171 #define SC_EXIT_ERR 0x0040 /* exit(1) */ 172 #define SC_EXIT_PEND 0x0080 /* exit(2) */ 173 #define SC_EXIT_FM 0x0100 /* exit(3) */ 174 175 #define _SC_ALLEXIT (SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM) 176 177 static void 178 logprint(uint32_t flags, char *message, ...) 179 { 180 va_list args; 181 char buf[1024]; 182 int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0); 183 int do_ifverb = (flags & SC_IF_VERBOSE) && verbose; 184 int do_ifisatty = (flags & SC_IF_ISATTY) && interactive; 185 int code; 186 static int logprint_raised = 0; 187 188 if (do_always || do_ifverb || do_ifisatty) { 189 va_start(args, message); 190 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 191 (void) vsnprintf(buf, sizeof (buf), message, args); 192 (void) fprintf(stderr, "%s: %s\n", progname, buf); 193 if (!interactive) { 194 switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) { 195 case SC_SL_ERR: 196 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 197 syslog(LOG_ERR, buf); 198 break; 199 200 case SC_SL_WARN: 201 /*LINTED: E_SEC_PRINTF_VAR_FMT*/ 202 syslog(LOG_WARNING, buf); 203 break; 204 205 default: 206 break; 207 } 208 } 209 va_end(args); 210 } 211 212 switch (flags & _SC_ALLEXIT) { 213 case 0: 214 return; 215 216 case SC_EXIT_OK: 217 code = 0; 218 break; 219 220 case SC_EXIT_PEND: 221 code = 2; 222 break; 223 224 case SC_EXIT_FM: 225 code = 3; 226 break; 227 228 case SC_EXIT_ERR: 229 default: 230 /* 231 * Raise an ireport saying why we are exiting. Do not 232 * raise if run as savecore -m. If something in the 233 * raise_event codepath calls logprint avoid recursion. 234 */ 235 if (!mflag && logprint_raised++ == 0) 236 raise_event(SC_EVENT_SAVECORE_FAILURE, buf); 237 code = 1; 238 break; 239 } 240 241 exit(code); 242 } 243 244 /* 245 * System call / libc wrappers that exit on error. 246 */ 247 static int 248 Open(const char *name, int oflags, mode_t mode) 249 { 250 int fd; 251 252 if ((fd = open64(name, oflags, mode)) == -1) 253 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s", 254 name, strerror(errno)); 255 return (fd); 256 } 257 258 static void 259 Fread(void *buf, size_t size, FILE *f) 260 { 261 if (fread(buf, size, 1, f) != 1) 262 logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: ferror %d feof %d", 263 ferror(f), feof(f)); 264 } 265 266 static void 267 Fwrite(void *buf, size_t size, FILE *f) 268 { 269 if (fwrite(buf, size, 1, f) != 1) 270 logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s", 271 strerror(errno)); 272 } 273 274 static void 275 Fseek(offset_t off, FILE *f) 276 { 277 if (fseeko64(f, off, SEEK_SET) != 0) 278 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s", 279 strerror(errno)); 280 } 281 282 typedef struct stat64 Stat_t; 283 284 static void 285 Fstat(int fd, Stat_t *sb, const char *fname) 286 { 287 if (fstat64(fd, sb) != 0) 288 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname, 289 strerror(errno)); 290 } 291 292 static void 293 Stat(const char *fname, Stat_t *sb) 294 { 295 if (stat64(fname, sb) != 0) 296 logprint(SC_SL_ERR | SC_EXIT_ERR, "stat(\"%s\"): %s", fname, 297 strerror(errno)); 298 } 299 300 static void 301 Pread(int fd, void *buf, size_t size, offset_t off) 302 { 303 ssize_t sz = pread64(fd, buf, size, off); 304 305 if (sz < 0) 306 logprint(SC_SL_ERR | SC_EXIT_ERR, 307 "pread: %s", strerror(errno)); 308 else if (sz != size) 309 logprint(SC_SL_ERR | SC_EXIT_ERR, 310 "pread: size %ld != %ld", sz, size); 311 } 312 313 static void 314 Pwrite(int fd, void *buf, size_t size, off64_t off) 315 { 316 if (pwrite64(fd, buf, size, off) != size) 317 logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s", 318 strerror(errno)); 319 } 320 321 static void * 322 Zalloc(size_t size) 323 { 324 void *buf; 325 326 if ((buf = calloc(size, 1)) == NULL) 327 logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s", 328 strerror(errno)); 329 return (buf); 330 } 331 332 static long 333 read_number_from_file(const char *filename, long default_value) 334 { 335 long file_value = -1; 336 FILE *fp; 337 338 if ((fp = fopen(filename, "r")) != NULL) { 339 (void) fscanf(fp, "%ld", &file_value); 340 (void) fclose(fp); 341 } 342 return (file_value < 0 ? default_value : file_value); 343 } 344 345 static void 346 read_dumphdr(void) 347 { 348 if (filemode) 349 dumpfd = Open(dumpfile, O_RDONLY, 0644); 350 else 351 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 352 endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET; 353 Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 354 Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr)); 355 356 pagesize = dumphdr.dump_pagesize; 357 358 if (dumphdr.dump_magic != DUMP_MAGIC) 359 logprint(SC_SL_NONE | SC_EXIT_OK, "bad magic number %x", 360 dumphdr.dump_magic); 361 362 if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag) 363 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK, 364 "dump already processed"); 365 366 if (dumphdr.dump_version != DUMP_VERSION) 367 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK, 368 "dump version (%d) != %s version (%d)", 369 dumphdr.dump_version, progname, DUMP_VERSION); 370 371 if (dumphdr.dump_wordsize != DUMP_WORDSIZE) 372 logprint(SC_SL_NONE | SC_EXIT_OK, 373 "dump is from %u-bit kernel - cannot save on %u-bit kernel", 374 dumphdr.dump_wordsize, DUMP_WORDSIZE); 375 376 if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) { 377 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION) 378 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK, 379 "dump data version (%d) != %s data version (%d)", 380 datahdr.dump_datahdr_version, progname, 381 DUMP_DATAHDR_VERSION); 382 } else { 383 (void) memset(&datahdr, 0, sizeof (datahdr)); 384 datahdr.dump_maxcsize = pagesize; 385 } 386 387 /* 388 * Read the initial header, clear the valid bits, and compare headers. 389 * The main header may have been overwritten by swapping if we're 390 * using a swap partition as the dump device, in which case we bail. 391 */ 392 Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start); 393 394 corehdr.dump_flags &= ~DF_VALID; 395 dumphdr.dump_flags &= ~DF_VALID; 396 397 if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) { 398 /* 399 * Clear valid bit so we don't complain on every invocation. 400 */ 401 if (!filemode) 402 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 403 logprint(SC_SL_ERR | SC_EXIT_ERR, 404 "initial dump header corrupt"); 405 } 406 } 407 408 static void 409 check_space(int csave) 410 { 411 struct statvfs fsb; 412 int64_t spacefree, dumpsize, minfree, datasize; 413 414 if (statvfs(".", &fsb) < 0) 415 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s", 416 strerror(errno)); 417 418 dumpsize = dumphdr.dump_data - dumphdr.dump_start; 419 datasize = dumphdr.dump_npages * pagesize; 420 if (!csave) 421 dumpsize += datasize; 422 else 423 dumpsize += datahdr.dump_data_csize; 424 425 spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize; 426 minfree = 1024LL * read_number_from_file("minfree", 1024); 427 if (spacefree < minfree + dumpsize) { 428 logprint(SC_SL_ERR | SC_EXIT_ERR, 429 "not enough space in %s (%lld MB avail, %lld MB needed)", 430 savedir, spacefree >> 20, (minfree + dumpsize) >> 20); 431 } 432 } 433 434 static void 435 build_dump_map(int corefd, const pfn_t *pfn_table) 436 { 437 long i; 438 static long misses = 0; 439 size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t); 440 mem_vtop_t vtop; 441 dump_map_t *dmp = Zalloc(dump_mapsize); 442 char *inbuf = Zalloc(FBUFSIZE); 443 FILE *in = fdopen(dup(dumpfd), "rb"); 444 445 (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE); 446 Fseek(dumphdr.dump_map, in); 447 448 corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize); 449 450 for (i = 0; i < corehdr.dump_nvtop; i++) { 451 long first = 0; 452 long last = corehdr.dump_npages - 1; 453 long middle; 454 pfn_t pfn; 455 uintptr_t h; 456 457 Fread(&vtop, sizeof (mem_vtop_t), in); 458 while (last >= first) { 459 middle = (first + last) / 2; 460 pfn = pfn_table[middle]; 461 if (pfn == vtop.m_pfn) 462 break; 463 if (pfn < vtop.m_pfn) 464 first = middle + 1; 465 else 466 last = middle - 1; 467 } 468 if (pfn != vtop.m_pfn) { 469 if (++misses <= 10) 470 (void) fprintf(stderr, 471 "pfn %ld not found for as=%p, va=%p\n", 472 vtop.m_pfn, (void *)vtop.m_as, vtop.m_va); 473 continue; 474 } 475 476 dmp[i].dm_as = vtop.m_as; 477 dmp[i].dm_va = (uintptr_t)vtop.m_va; 478 dmp[i].dm_data = corehdr.dump_data + 479 ((uint64_t)middle << corehdr.dump_pageshift); 480 481 h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va); 482 dmp[i].dm_next = dmp[h].dm_first; 483 dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t); 484 } 485 486 Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map); 487 free(dmp); 488 (void) fclose(in); 489 free(inbuf); 490 } 491 492 /* 493 * Copy whole sections of the dump device to the file. 494 */ 495 static void 496 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf, 497 size_t sz) 498 { 499 size_t nr; 500 offset_t off = *offp; 501 502 while (nb > 0) { 503 nr = sz < nb ? sz : (size_t)nb; 504 Pread(dumpfd, buf, nr, dumpoff); 505 Pwrite(fd, buf, nr, off); 506 off += nr; 507 dumpoff += nr; 508 nb -= nr; 509 } 510 *offp = off; 511 } 512 513 /* 514 * Copy pages when the dump data header is missing. 515 * This supports older kernels with latest savecore. 516 */ 517 static void 518 CopyPages(offset_t *offp, int fd, char *buf, size_t sz) 519 { 520 uint32_t csize; 521 FILE *in = fdopen(dup(dumpfd), "rb"); 522 FILE *out = fdopen(dup(fd), "wb"); 523 char *cbuf = Zalloc(pagesize); 524 char *outbuf = Zalloc(FBUFSIZE); 525 pgcnt_t np = dumphdr.dump_npages; 526 527 (void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE); 528 (void) setvbuf(in, buf, _IOFBF, sz); 529 Fseek(dumphdr.dump_data, in); 530 531 Fseek(*offp, out); 532 while (np > 0) { 533 Fread(&csize, sizeof (uint32_t), in); 534 Fwrite(&csize, sizeof (uint32_t), out); 535 *offp += sizeof (uint32_t); 536 if (csize > pagesize || csize == 0) { 537 logprint(SC_SL_ERR, 538 "CopyPages: page %lu csize %d (0x%x) pagesize %d", 539 dumphdr.dump_npages - np, csize, csize, 540 pagesize); 541 break; 542 } 543 Fread(cbuf, csize, in); 544 Fwrite(cbuf, csize, out); 545 *offp += csize; 546 np--; 547 } 548 (void) fclose(in); 549 (void) fclose(out); 550 free(outbuf); 551 free(buf); 552 } 553 554 /* 555 * Concatenate dump contents into a new file. 556 * Update corehdr with new offsets. 557 */ 558 static void 559 copy_crashfile(const char *corefile) 560 { 561 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 562 size_t bufsz = FBUFSIZE; 563 char *inbuf = Zalloc(bufsz); 564 offset_t coreoff; 565 size_t nb; 566 567 logprint(SC_SL_ERR | SC_IF_VERBOSE, 568 "Copying %s to %s/%s\n", dumpfile, savedir, corefile); 569 570 /* 571 * This dump file is still compressed 572 */ 573 corehdr.dump_flags |= DF_COMPRESSED | DF_VALID; 574 575 /* 576 * Leave room for corehdr, it is updated and written last 577 */ 578 corehdr.dump_start = 0; 579 coreoff = sizeof (corehdr); 580 581 /* 582 * Read in the compressed symbol table, copy it to corefile. 583 */ 584 coreoff = roundup(coreoff, pagesize); 585 corehdr.dump_ksyms = coreoff; 586 Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd, 587 inbuf, bufsz); 588 589 /* 590 * Save the pfn table. 591 */ 592 coreoff = roundup(coreoff, pagesize); 593 corehdr.dump_pfn = coreoff; 594 Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff, 595 corefd, inbuf, bufsz); 596 597 /* 598 * Save the dump map. 599 */ 600 coreoff = roundup(coreoff, pagesize); 601 corehdr.dump_map = coreoff; 602 Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t), 603 &coreoff, corefd, inbuf, bufsz); 604 605 /* 606 * Save the data pages. 607 */ 608 coreoff = roundup(coreoff, pagesize); 609 corehdr.dump_data = coreoff; 610 if (datahdr.dump_data_csize != 0) 611 Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff, 612 corefd, inbuf, bufsz); 613 else 614 CopyPages(&coreoff, corefd, inbuf, bufsz); 615 616 /* 617 * Now write the modified dump header to front and end of the copy. 618 * Make it look like a valid dump device. 619 * 620 * From dumphdr.h: Two headers are written out: one at the 621 * beginning of the dump, and the other at the very end of the 622 * dump device. The terminal header is at a known location 623 * (end of device) so we can always find it. 624 * 625 * Pad with zeros to each DUMP_OFFSET boundary. 626 */ 627 (void) memset(inbuf, 0, DUMP_OFFSET); 628 629 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1)); 630 if (nb > 0) { 631 Pwrite(corefd, inbuf, nb, coreoff); 632 coreoff += nb; 633 } 634 635 Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff); 636 coreoff += sizeof (corehdr); 637 638 Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff); 639 coreoff += sizeof (datahdr); 640 641 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1)); 642 if (nb > 0) { 643 Pwrite(corefd, inbuf, nb, coreoff); 644 } 645 646 free(inbuf); 647 Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start); 648 649 /* 650 * Write out the modified dump header to the dump device. 651 * The dump device has been processed, so DF_VALID is clear. 652 */ 653 if (!filemode) 654 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 655 656 (void) close(corefd); 657 } 658 659 /* 660 * compressed streams 661 */ 662 typedef struct blockhdr blockhdr_t; 663 typedef struct block block_t; 664 665 struct blockhdr { 666 block_t *head; 667 block_t *tail; 668 }; 669 670 struct block { 671 block_t *next; 672 char *block; 673 int size; 674 }; 675 676 typedef enum streamstate { 677 STREAMSTART, 678 STREAMPAGES 679 } streamstate_t; 680 681 typedef struct stream { 682 streamstate_t state; 683 int init; 684 int tag; 685 int bound; 686 int nout; 687 char *blkbuf; 688 blockhdr_t blocks; 689 pgcnt_t pagenum; 690 pgcnt_t curpage; 691 pgcnt_t npages; 692 pgcnt_t done; 693 bz_stream strm; 694 dumpcsize_t sc; 695 dumpstreamhdr_t sh; 696 } stream_t; 697 698 static stream_t *streams; 699 static stream_t *endstreams; 700 701 const int cs = sizeof (dumpcsize_t); 702 703 typedef struct tinfo { 704 pthread_t tid; 705 int corefd; 706 } tinfo_t; 707 708 static int threads_stop; 709 static int threads_active; 710 static tinfo_t *tinfo; 711 static tinfo_t *endtinfo; 712 713 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; 714 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER; 715 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER; 716 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER; 717 718 static blockhdr_t freeblocks; 719 720 static void 721 enqt(blockhdr_t *h, block_t *b) 722 { 723 b->next = NULL; 724 if (h->tail == NULL) 725 h->head = b; 726 else 727 h->tail->next = b; 728 h->tail = b; 729 } 730 731 static block_t * 732 deqh(blockhdr_t *h) 733 { 734 block_t *b = h->head; 735 736 if (b != NULL) { 737 h->head = b->next; 738 if (h->head == NULL) 739 h->tail = NULL; 740 } 741 return (b); 742 } 743 744 static void *runstreams(void *arg); 745 746 static void 747 initstreams(int corefd, int nstreams, int maxcsize) 748 { 749 int nthreads; 750 int nblocks; 751 int i; 752 block_t *b; 753 tinfo_t *t; 754 755 nthreads = sysconf(_SC_NPROCESSORS_ONLN); 756 if (nstreams < nthreads) 757 nthreads = nstreams; 758 if (nthreads < 1) 759 nthreads = 1; 760 nblocks = nthreads * 2; 761 762 tinfo = Zalloc(nthreads * sizeof (tinfo_t)); 763 endtinfo = &tinfo[nthreads]; 764 765 /* init streams */ 766 streams = Zalloc(nstreams * sizeof (stream_t)); 767 endstreams = &streams[nstreams]; 768 769 /* init stream block buffers */ 770 for (i = 0; i < nblocks; i++) { 771 b = Zalloc(sizeof (block_t)); 772 b->block = Zalloc(maxcsize); 773 enqt(&freeblocks, b); 774 } 775 776 /* init worker threads */ 777 (void) pthread_mutex_lock(&lock); 778 threads_active = 1; 779 threads_stop = 0; 780 for (t = tinfo; t != endtinfo; t++) { 781 t->corefd = dup(corefd); 782 if (t->corefd < 0) { 783 nthreads = t - tinfo; 784 endtinfo = t; 785 break; 786 } 787 if (pthread_create(&t->tid, NULL, runstreams, t) != 0) 788 logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s", 789 strerror(errno)); 790 } 791 (void) pthread_mutex_unlock(&lock); 792 } 793 794 static void 795 sbarrier() 796 { 797 stream_t *s; 798 799 (void) pthread_mutex_lock(&lock); 800 for (s = streams; s != endstreams; s++) { 801 while (s->bound || s->blocks.head != NULL) 802 (void) pthread_cond_wait(&cvbarrier, &lock); 803 } 804 (void) pthread_mutex_unlock(&lock); 805 } 806 807 static void 808 stopstreams() 809 { 810 tinfo_t *t; 811 812 if (threads_active) { 813 sbarrier(); 814 (void) pthread_mutex_lock(&lock); 815 threads_stop = 1; 816 (void) pthread_cond_signal(&cvwork); 817 (void) pthread_mutex_unlock(&lock); 818 for (t = tinfo; t != endtinfo; t++) 819 (void) pthread_join(t->tid, NULL); 820 free(tinfo); 821 tinfo = NULL; 822 threads_active = 0; 823 } 824 } 825 826 static block_t * 827 getfreeblock() 828 { 829 block_t *b; 830 831 (void) pthread_mutex_lock(&lock); 832 while ((b = deqh(&freeblocks)) == NULL) 833 (void) pthread_cond_wait(&cvfree, &lock); 834 (void) pthread_mutex_unlock(&lock); 835 return (b); 836 } 837 838 /* data page offset from page number */ 839 #define BTOP(b) ((b) >> dumphdr.dump_pageshift) 840 #define PTOB(p) ((p) << dumphdr.dump_pageshift) 841 #define DATAOFF(p) (corehdr.dump_data + PTOB(p)) 842 843 /* check for coreblksize boundary */ 844 static int 845 isblkbnd(pgcnt_t pgnum) 846 { 847 return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0); 848 } 849 850 static int 851 iszpage(char *buf) 852 { 853 size_t sz; 854 uint64_t *pl; 855 856 /*LINTED:E_BAD_PTR_CAST_ALIGN*/ 857 pl = (uint64_t *)(buf); 858 for (sz = 0; sz < pagesize; sz += sizeof (*pl)) 859 if (*pl++ != 0) 860 return (0); 861 return (1); 862 } 863 864 volatile uint_t *hist; 865 866 /* write pages to the core file */ 867 static void 868 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np) 869 { 870 atomic_inc_uint(&hist[np]); 871 if (np > 0) 872 Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum)); 873 } 874 875 /* 876 * Process one lzjb block. 877 * No object (stream header or page) will be split over a block boundary. 878 */ 879 static void 880 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz) 881 { 882 int in = 0; 883 int csize; 884 int doflush; 885 char *out; 886 size_t dsize; 887 dumpcsize_t sc; 888 dumpstreamhdr_t sh; 889 890 if (!s->init) { 891 s->init = 1; 892 if (s->blkbuf == NULL) 893 s->blkbuf = Zalloc(coreblksize); 894 s->state = STREAMSTART; 895 } 896 while (in < blocksz) { 897 switch (s->state) { 898 case STREAMSTART: 899 (void) memcpy(&sh, block + in, sizeof (sh)); 900 in += sizeof (sh); 901 if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0) 902 logprint(SC_SL_ERR | SC_EXIT_ERR, 903 "LZJB STREAMSTART: bad stream header"); 904 if (sh.stream_npages > datahdr.dump_maxrange) 905 logprint(SC_SL_ERR | SC_EXIT_ERR, 906 "LZJB STREAMSTART: bad range: %d > %d", 907 sh.stream_npages, datahdr.dump_maxrange); 908 s->pagenum = sh.stream_pagenum; 909 s->npages = sh.stream_npages; 910 s->curpage = s->pagenum; 911 s->nout = 0; 912 s->done = 0; 913 s->state = STREAMPAGES; 914 break; 915 case STREAMPAGES: 916 (void) memcpy(&sc, block + in, cs); 917 in += cs; 918 csize = DUMP_GET_CSIZE(sc); 919 if (csize > pagesize) 920 logprint(SC_SL_ERR | SC_EXIT_ERR, 921 "LZJB STREAMPAGES: bad csize=%d", csize); 922 923 out = s->blkbuf + PTOB(s->nout); 924 dsize = decompress(block + in, out, csize, pagesize); 925 926 if (dsize != pagesize) 927 logprint(SC_SL_ERR | SC_EXIT_ERR, 928 "LZJB STREAMPAGES: dsize %d != pagesize %d", 929 dsize, pagesize); 930 931 in += csize; 932 atomic_inc_64(&saved); 933 934 doflush = 0; 935 if (s->nout == 0 && iszpage(out)) { 936 doflush = 1; 937 atomic_inc_64(&zpages); 938 } else if (++s->nout >= BTOP(coreblksize) || 939 isblkbnd(s->curpage + s->nout)) { 940 doflush = 1; 941 } 942 if (++s->done >= s->npages) { 943 s->state = STREAMSTART; 944 doflush = 1; 945 } 946 if (doflush) { 947 putpage(corefd, s->blkbuf, s->curpage, s->nout); 948 s->nout = 0; 949 s->curpage = s->pagenum + s->done; 950 } 951 break; 952 } 953 } 954 } 955 956 /* bzlib library reports errors with this callback */ 957 void 958 bz_internal_error(int errcode) 959 { 960 logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n", 961 BZ2_bzErrorString(errcode)); 962 } 963 964 /* 965 * Return one object in the stream. 966 * 967 * An object (stream header or page) will likely span an input block 968 * of compression data. Return non-zero when an entire object has been 969 * retrieved from the stream. 970 */ 971 static int 972 bz2decompress(stream_t *s, void *buf, size_t size) 973 { 974 int rc; 975 976 if (s->strm.avail_out == 0) { 977 s->strm.next_out = buf; 978 s->strm.avail_out = size; 979 } 980 while (s->strm.avail_in > 0) { 981 rc = BZ2_bzDecompress(&s->strm); 982 if (rc == BZ_STREAM_END) { 983 rc = BZ2_bzDecompressReset(&s->strm); 984 if (rc != BZ_OK) 985 logprint(SC_SL_ERR | SC_EXIT_ERR, 986 "BZ2_bzDecompressReset: %s", 987 BZ2_bzErrorString(rc)); 988 continue; 989 } 990 991 if (s->strm.avail_out == 0) 992 break; 993 } 994 return (s->strm.avail_out == 0); 995 } 996 997 /* 998 * Process one bzip2 block. 999 * The interface is documented here: 1000 * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html 1001 */ 1002 static void 1003 bz2block(int corefd, stream_t *s, char *block, size_t blocksz) 1004 { 1005 int rc = 0; 1006 int doflush; 1007 char *out; 1008 1009 if (!s->init) { 1010 s->init = 1; 1011 rc = BZ2_bzDecompressInit(&s->strm, 0, 0); 1012 if (rc != BZ_OK) 1013 logprint(SC_SL_ERR | SC_EXIT_ERR, 1014 "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc)); 1015 if (s->blkbuf == NULL) 1016 s->blkbuf = Zalloc(coreblksize); 1017 s->strm.avail_out = 0; 1018 s->state = STREAMSTART; 1019 } 1020 s->strm.next_in = block; 1021 s->strm.avail_in = blocksz; 1022 1023 while (s->strm.avail_in > 0) { 1024 switch (s->state) { 1025 case STREAMSTART: 1026 if (!bz2decompress(s, &s->sh, sizeof (s->sh))) 1027 return; 1028 if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0) 1029 logprint(SC_SL_ERR | SC_EXIT_ERR, 1030 "BZ2 STREAMSTART: bad stream header"); 1031 if (s->sh.stream_npages > datahdr.dump_maxrange) 1032 logprint(SC_SL_ERR | SC_EXIT_ERR, 1033 "BZ2 STREAMSTART: bad range: %d > %d", 1034 s->sh.stream_npages, datahdr.dump_maxrange); 1035 s->pagenum = s->sh.stream_pagenum; 1036 s->npages = s->sh.stream_npages; 1037 s->curpage = s->pagenum; 1038 s->nout = 0; 1039 s->done = 0; 1040 s->state = STREAMPAGES; 1041 break; 1042 case STREAMPAGES: 1043 out = s->blkbuf + PTOB(s->nout); 1044 if (!bz2decompress(s, out, pagesize)) 1045 return; 1046 1047 atomic_inc_64(&saved); 1048 1049 doflush = 0; 1050 if (s->nout == 0 && iszpage(out)) { 1051 doflush = 1; 1052 atomic_inc_64(&zpages); 1053 } else if (++s->nout >= BTOP(coreblksize) || 1054 isblkbnd(s->curpage + s->nout)) { 1055 doflush = 1; 1056 } 1057 if (++s->done >= s->npages) { 1058 s->state = STREAMSTART; 1059 doflush = 1; 1060 } 1061 if (doflush) { 1062 putpage(corefd, s->blkbuf, s->curpage, s->nout); 1063 s->nout = 0; 1064 s->curpage = s->pagenum + s->done; 1065 } 1066 break; 1067 } 1068 } 1069 } 1070 1071 /* report progress */ 1072 static void 1073 report_progress() 1074 { 1075 int sec, percent; 1076 1077 if (!interactive) 1078 return; 1079 1080 percent = saved * 100LL / corehdr.dump_npages; 1081 if (percent > percent_done) { 1082 sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1083 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60, 1084 percent); 1085 (void) fflush(stdout); 1086 percent_done = percent; 1087 } 1088 } 1089 1090 /* thread body */ 1091 static void * 1092 runstreams(void *arg) 1093 { 1094 tinfo_t *t = arg; 1095 stream_t *s; 1096 block_t *b; 1097 int bound; 1098 1099 (void) pthread_mutex_lock(&lock); 1100 while (!threads_stop) { 1101 bound = 0; 1102 for (s = streams; s != endstreams; s++) { 1103 if (s->bound || s->blocks.head == NULL) 1104 continue; 1105 s->bound = 1; 1106 bound = 1; 1107 (void) pthread_cond_signal(&cvwork); 1108 while (s->blocks.head != NULL) { 1109 b = deqh(&s->blocks); 1110 (void) pthread_mutex_unlock(&lock); 1111 1112 if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2) 1113 lzjbblock(t->corefd, s, b->block, 1114 b->size); 1115 else 1116 bz2block(t->corefd, s, b->block, 1117 b->size); 1118 1119 (void) pthread_mutex_lock(&lock); 1120 enqt(&freeblocks, b); 1121 (void) pthread_cond_signal(&cvfree); 1122 1123 report_progress(); 1124 } 1125 s->bound = 0; 1126 (void) pthread_cond_signal(&cvbarrier); 1127 } 1128 if (!bound && !threads_stop) 1129 (void) pthread_cond_wait(&cvwork, &lock); 1130 } 1131 (void) close(t->corefd); 1132 (void) pthread_cond_signal(&cvwork); 1133 (void) pthread_mutex_unlock(&lock); 1134 return (arg); 1135 } 1136 1137 /* 1138 * Process compressed pages. 1139 * 1140 * The old format, now called single-threaded lzjb, is a 32-bit size 1141 * word followed by 'size' bytes of lzjb compression data for one 1142 * page. The new format extends this by storing a 12-bit "tag" in the 1143 * upper bits of the size word. When the size word is pagesize or 1144 * less, it is assumed to be one lzjb page. When the size word is 1145 * greater than pagesize, it is assumed to be a "stream block", 1146 * belonging to up to 4095 streams. In practice, the number of streams 1147 * is set to one less than the number of CPUs running at crash 1148 * time. One CPU processes the crash dump, the remaining CPUs 1149 * separately process groups of data pages. 1150 * 1151 * savecore creates a thread per stream, but never more threads than 1152 * the number of CPUs running savecore. This is because savecore can 1153 * be processing a crash file from a remote machine, which may have 1154 * more CPUs. 1155 * 1156 * When the kernel uses parallel lzjb or parallel bzip2, we expect a 1157 * series of 128KB blocks of compression data. In this case, each 1158 * block has a "tag", in the range 1-4095. Each block is handed off to 1159 * to the threads running "runstreams". The dump format is either lzjb 1160 * or bzip2, never a mixture. These threads, in turn, process the 1161 * compression data for groups of pages. Groups of pages are delimited 1162 * by a "stream header", which indicates a starting pfn and number of 1163 * pages. When a stream block has been read, the condition variable 1164 * "cvwork" is signalled, which causes one of the avaiable threads to 1165 * wake up and process the stream. 1166 * 1167 * In the parallel case there will be streams blocks encoding all data 1168 * pages. The stream of blocks is terminated by a zero size 1169 * word. There can be a few lzjb pages tacked on the end, depending on 1170 * the architecture. The sbarrier function ensures that all stream 1171 * blocks have been processed so that the page number for the few 1172 * single pages at the end can be known. 1173 */ 1174 static void 1175 decompress_pages(int corefd) 1176 { 1177 char *cpage = NULL; 1178 char *dpage = NULL; 1179 char *out; 1180 pgcnt_t curpage; 1181 block_t *b; 1182 FILE *dumpf; 1183 FILE *tracef = NULL; 1184 stream_t *s; 1185 size_t dsize; 1186 size_t insz = FBUFSIZE; 1187 char *inbuf = Zalloc(insz); 1188 uint32_t csize; 1189 dumpcsize_t dcsize; 1190 int nstreams = datahdr.dump_nstreams; 1191 int maxcsize = datahdr.dump_maxcsize; 1192 int nout, tag, doflush; 1193 1194 dumpf = fdopen(dup(dumpfd), "rb"); 1195 if (dumpf == NULL) 1196 logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s", 1197 strerror(errno)); 1198 1199 (void) setvbuf(dumpf, inbuf, _IOFBF, insz); 1200 Fseek(dumphdr.dump_data, dumpf); 1201 1202 /*LINTED: E_CONSTANT_CONDITION*/ 1203 while (1) { 1204 1205 /* 1206 * The csize word delimits stream blocks. 1207 * See dumphdr.h for a description. 1208 */ 1209 Fread(&dcsize, sizeof (dcsize), dumpf); 1210 1211 tag = DUMP_GET_TAG(dcsize); 1212 csize = DUMP_GET_CSIZE(dcsize); 1213 1214 if (tag != 0) { /* a stream block */ 1215 1216 if (nstreams == 0) 1217 logprint(SC_SL_ERR | SC_EXIT_ERR, 1218 "starting data header is missing"); 1219 1220 if (tag > nstreams) 1221 logprint(SC_SL_ERR | SC_EXIT_ERR, 1222 "stream tag %d not in range 1..%d", 1223 tag, nstreams); 1224 1225 if (csize > maxcsize) 1226 logprint(SC_SL_ERR | SC_EXIT_ERR, 1227 "block size 0x%x > max csize 0x%x", 1228 csize, maxcsize); 1229 1230 if (streams == NULL) 1231 initstreams(corefd, nstreams, maxcsize); 1232 s = &streams[tag - 1]; 1233 s->tag = tag; 1234 1235 b = getfreeblock(); 1236 b->size = csize; 1237 Fread(b->block, csize, dumpf); 1238 1239 (void) pthread_mutex_lock(&lock); 1240 enqt(&s->blocks, b); 1241 if (!s->bound) 1242 (void) pthread_cond_signal(&cvwork); 1243 (void) pthread_mutex_unlock(&lock); 1244 1245 } else if (csize > 0) { /* one lzjb page */ 1246 1247 if (csize > pagesize) 1248 logprint(SC_SL_ERR | SC_EXIT_ERR, 1249 "csize 0x%x > pagesize 0x%x", 1250 csize, pagesize); 1251 1252 if (cpage == NULL) 1253 cpage = Zalloc(pagesize); 1254 if (dpage == NULL) { 1255 dpage = Zalloc(coreblksize); 1256 nout = 0; 1257 } 1258 1259 Fread(cpage, csize, dumpf); 1260 1261 out = dpage + PTOB(nout); 1262 dsize = decompress(cpage, out, csize, pagesize); 1263 1264 if (dsize != pagesize) 1265 logprint(SC_SL_ERR | SC_EXIT_ERR, 1266 "dsize 0x%x != pagesize 0x%x", 1267 dsize, pagesize); 1268 1269 /* 1270 * wait for streams to flush so that 'saved' is correct 1271 */ 1272 if (threads_active) 1273 sbarrier(); 1274 1275 doflush = 0; 1276 if (nout == 0) 1277 curpage = saved; 1278 1279 atomic_inc_64(&saved); 1280 1281 if (nout == 0 && iszpage(dpage)) { 1282 doflush = 1; 1283 atomic_inc_64(&zpages); 1284 } else if (++nout >= BTOP(coreblksize) || 1285 isblkbnd(curpage + nout) || 1286 saved >= dumphdr.dump_npages) { 1287 doflush = 1; 1288 } 1289 1290 if (doflush) { 1291 putpage(corefd, dpage, curpage, nout); 1292 nout = 0; 1293 } 1294 1295 report_progress(); 1296 1297 /* 1298 * Non-streams lzjb does not use blocks. Stop 1299 * here if all the pages have been decompressed. 1300 */ 1301 if (saved >= dumphdr.dump_npages) 1302 break; 1303 1304 } else { 1305 break; /* end of data */ 1306 } 1307 } 1308 1309 stopstreams(); 1310 if (tracef != NULL) 1311 (void) fclose(tracef); 1312 (void) fclose(dumpf); 1313 if (inbuf) 1314 free(inbuf); 1315 if (cpage) 1316 free(cpage); 1317 if (dpage) 1318 free(dpage); 1319 if (streams) 1320 free(streams); 1321 } 1322 1323 static void 1324 build_corefile(const char *namelist, const char *corefile) 1325 { 1326 size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t); 1327 size_t ksyms_size = dumphdr.dump_ksyms_size; 1328 size_t ksyms_csize = dumphdr.dump_ksyms_csize; 1329 pfn_t *pfn_table; 1330 char *ksyms_base = Zalloc(ksyms_size); 1331 char *ksyms_cbase = Zalloc(ksyms_csize); 1332 size_t ksyms_dsize; 1333 Stat_t st; 1334 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644); 1335 int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644); 1336 1337 (void) printf("Constructing namelist %s/%s\n", savedir, namelist); 1338 1339 /* 1340 * Determine the optimum write size for the core file 1341 */ 1342 Fstat(corefd, &st, corefile); 1343 1344 if (verbose > 1) 1345 (void) printf("%s: %ld block size\n", corefile, 1346 (long)st.st_blksize); 1347 coreblksize = st.st_blksize; 1348 if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize)) 1349 coreblksize = MINCOREBLKSIZE; 1350 1351 hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1); 1352 1353 /* 1354 * This dump file is now uncompressed 1355 */ 1356 corehdr.dump_flags &= ~DF_COMPRESSED; 1357 1358 /* 1359 * Read in the compressed symbol table, copy it to corefile, 1360 * decompress it, and write the result to namelist. 1361 */ 1362 corehdr.dump_ksyms = pagesize; 1363 Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms); 1364 Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms); 1365 1366 ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize, 1367 ksyms_size); 1368 if (ksyms_dsize != ksyms_size) 1369 logprint(SC_SL_WARN, 1370 "bad data in symbol table, %lu of %lu bytes saved", 1371 ksyms_dsize, ksyms_size); 1372 1373 Pwrite(namefd, ksyms_base, ksyms_size, 0); 1374 (void) close(namefd); 1375 free(ksyms_cbase); 1376 free(ksyms_base); 1377 1378 (void) printf("Constructing corefile %s/%s\n", savedir, corefile); 1379 1380 /* 1381 * Read in and write out the pfn table. 1382 */ 1383 pfn_table = Zalloc(pfn_table_size); 1384 corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize); 1385 Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn); 1386 Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn); 1387 1388 /* 1389 * Convert the raw translation data into a hashed dump map. 1390 */ 1391 corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize); 1392 build_dump_map(corefd, pfn_table); 1393 free(pfn_table); 1394 1395 /* 1396 * Decompress the pages 1397 */ 1398 decompress_pages(corefd); 1399 (void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved, 1400 dumphdr.dump_npages); 1401 1402 if (verbose) 1403 (void) printf("%ld (%ld%%) zero pages were not written\n", 1404 (pgcnt_t)zpages, (pgcnt_t)zpages * 100 / 1405 dumphdr.dump_npages); 1406 1407 if (saved != dumphdr.dump_npages) 1408 logprint(SC_SL_WARN, "bad data after page %ld", saved); 1409 1410 /* 1411 * Write out the modified dump headers. 1412 */ 1413 Pwrite(corefd, &corehdr, sizeof (corehdr), 0); 1414 if (!filemode) 1415 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff); 1416 1417 (void) close(corefd); 1418 } 1419 1420 /* 1421 * When the system panics, the kernel saves all undelivered messages (messages 1422 * that never made it out to syslogd(1M)) in the dump. At a mimimum, the 1423 * panic message itself will always fall into this category. Upon reboot, 1424 * the syslog startup script runs savecore -m to recover these messages. 1425 * 1426 * To do this, we read the unsent messages from the dump and send them to 1427 * /dev/conslog on priority band 1. This has the effect of prepending them 1428 * to any already-accumulated messages in the console backlog, thus preserving 1429 * temporal ordering across the reboot. 1430 * 1431 * Note: since savecore -m is used *only* for this purpose, it does *not* 1432 * attempt to save the crash dump. The dump will be saved later, after 1433 * syslogd(1M) starts, by the savecore startup script. 1434 */ 1435 static int 1436 message_save(void) 1437 { 1438 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE); 1439 offset_t ldoff; 1440 log_dump_t ld; 1441 log_ctl_t lc; 1442 struct strbuf ctl, dat; 1443 int logfd; 1444 1445 logfd = Open("/dev/conslog", O_WRONLY, 0644); 1446 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 1447 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET; 1448 1449 ctl.buf = (void *)&lc; 1450 ctl.len = sizeof (log_ctl_t); 1451 1452 dat.buf = Zalloc(DUMP_LOGSIZE); 1453 1454 for (;;) { 1455 ldoff = dumpoff; 1456 1457 Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff); 1458 dumpoff += sizeof (log_dump_t); 1459 dat.len = ld.ld_msgsize; 1460 1461 if (ld.ld_magic == 0) 1462 break; 1463 1464 if (ld.ld_magic != LOG_MAGIC) 1465 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR, 1466 "bad magic %x", ld.ld_magic); 1467 1468 if (dat.len >= DUMP_LOGSIZE) 1469 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR, 1470 "bad size %d", ld.ld_msgsize); 1471 1472 Pread(dumpfd, ctl.buf, ctl.len, dumpoff); 1473 dumpoff += ctl.len; 1474 1475 if (ld.ld_csum != checksum32(ctl.buf, ctl.len)) 1476 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK, 1477 "bad log_ctl checksum"); 1478 1479 lc.flags |= SL_LOGONLY; 1480 1481 Pread(dumpfd, dat.buf, dat.len, dumpoff); 1482 dumpoff += dat.len; 1483 1484 if (ld.ld_msum != checksum32(dat.buf, dat.len)) 1485 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK, 1486 "bad message checksum"); 1487 1488 if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1) 1489 logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s", 1490 strerror(errno)); 1491 1492 ld.ld_magic = 0; /* clear magic so we never save twice */ 1493 Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff); 1494 } 1495 return (0); 1496 } 1497 1498 static long 1499 getbounds(const char *f) 1500 { 1501 long b = -1; 1502 const char *p = strrchr(f, '/'); 1503 1504 (void) sscanf(p ? p + 1 : f, "vmdump.%ld", &b); 1505 return (b); 1506 } 1507 1508 static void 1509 stack_retrieve(char *stack) 1510 { 1511 summary_dump_t sd; 1512 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE + 1513 DUMP_ERPTSIZE); 1514 dumpoff -= DUMP_SUMMARYSIZE; 1515 1516 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644); 1517 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET; 1518 1519 Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff); 1520 dumpoff += sizeof (summary_dump_t); 1521 1522 if (sd.sd_magic == 0) { 1523 *stack = '\0'; 1524 return; 1525 } 1526 1527 if (sd.sd_magic != SUMMARY_MAGIC) { 1528 *stack = '\0'; 1529 logprint(SC_SL_NONE | SC_IF_VERBOSE, 1530 "bad summary magic %x", sd.sd_magic); 1531 return; 1532 } 1533 Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff); 1534 if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE)) 1535 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum"); 1536 } 1537 1538 static void 1539 raise_event(enum sc_event_type evidx, char *warn_string) 1540 { 1541 uint32_t pl = sc_event[evidx].sce_payload; 1542 char panic_stack[STACK_BUF_SIZE]; 1543 nvlist_t *attr = NULL; 1544 char uuidbuf[36 + 1]; 1545 int err = 0; 1546 1547 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0) 1548 goto publish; /* try to send payload-free event */ 1549 1550 if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL) 1551 err |= nvlist_add_string(attr, "dumpdir", savedir); 1552 1553 if (pl & SC_PAYLOAD_INSTANCE && bounds != -1) 1554 err |= nvlist_add_int64(attr, "instance", bounds); 1555 1556 if (pl & SC_PAYLOAD_ISCOMPRESSED) { 1557 err |= nvlist_add_boolean_value(attr, "compressed", 1558 csave ? B_TRUE : B_FALSE); 1559 } 1560 1561 if (pl & SC_PAYLOAD_DUMPADM_EN) { 1562 char *disabled = defread("DUMPADM_ENABLE=no"); 1563 1564 err |= nvlist_add_boolean_value(attr, "savecore-enabled", 1565 disabled ? B_FALSE : B_TRUE); 1566 } 1567 1568 if (pl & SC_PAYLOAD_IMAGEUUID) { 1569 (void) strncpy(uuidbuf, corehdr.dump_uuid, 36); 1570 uuidbuf[36] = '\0'; 1571 err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf); 1572 } 1573 1574 if (pl & SC_PAYLOAD_CRASHTIME) { 1575 err |= nvlist_add_int64(attr, "crashtime", 1576 (int64_t)corehdr.dump_crashtime); 1577 } 1578 1579 if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') { 1580 err |= nvlist_add_string(attr, "panicstr", 1581 corehdr.dump_panicstring); 1582 } 1583 1584 if (pl & SC_PAYLOAD_PANICSTACK) { 1585 stack_retrieve(panic_stack); 1586 1587 if (panic_stack[0] != '\0') { 1588 /* 1589 * The summary page may not be present if the dump 1590 * was previously recorded compressed. 1591 */ 1592 (void) nvlist_add_string(attr, "panicstack", 1593 panic_stack); 1594 } 1595 } 1596 1597 /* add warning string if this is an ireport for dump failure */ 1598 if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL) 1599 (void) nvlist_add_string(attr, "failure-reason", warn_string); 1600 1601 if (pl & SC_PAYLOAD_DUMPCOMPLETE) 1602 err |= nvlist_add_boolean_value(attr, "dump-incomplete", 1603 dump_incomplete ? B_TRUE : B_FALSE); 1604 1605 if (pl & SC_PAYLOAD_FM_PANIC) { 1606 err |= nvlist_add_boolean_value(attr, "fm-panic", 1607 fm_panic ? B_TRUE : B_FALSE); 1608 } 1609 1610 if (pl & SC_PAYLOAD_JUSTCHECKING) { 1611 err |= nvlist_add_boolean_value(attr, "will-attempt-savecore", 1612 cflag ? B_FALSE : B_TRUE); 1613 } 1614 1615 if (err) 1616 logprint(SC_SL_WARN, "Errors while constructing '%s' " 1617 "event payload; will try to publish anyway."); 1618 publish: 1619 if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS, 1620 "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI, 1621 attr) != FMEV_SUCCESS) { 1622 logprint(SC_SL_ERR, "failed to publish '%s' event: %s", 1623 sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno)); 1624 nvlist_free(attr); 1625 } 1626 1627 } 1628 1629 1630 int 1631 main(int argc, char *argv[]) 1632 { 1633 int i, c, bfd; 1634 Stat_t st; 1635 struct rlimit rl; 1636 long filebounds = -1; 1637 char namelist[30], corefile[30], boundstr[30]; 1638 1639 if (geteuid() != 0) { 1640 (void) fprintf(stderr, "%s: %s %s\n", progname, 1641 gettext("you must be root to use"), progname); 1642 exit(1); 1643 } 1644 1645 startts = gethrtime(); 1646 1647 (void) getrlimit(RLIMIT_NOFILE, &rl); 1648 rl.rlim_cur = rl.rlim_max; 1649 (void) setrlimit(RLIMIT_NOFILE, &rl); 1650 1651 openlog(progname, LOG_ODELAY, LOG_AUTH); 1652 1653 (void) defopen("/etc/dumpadm.conf"); 1654 savedir = defread("DUMPADM_SAVDIR="); 1655 if (savedir != NULL) 1656 savedir = strdup(savedir); 1657 1658 while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) { 1659 switch (c) { 1660 case 'L': 1661 livedump++; 1662 break; 1663 case 'v': 1664 verbose++; 1665 break; 1666 case 'c': 1667 cflag++; 1668 break; 1669 case 'd': 1670 disregard_valid_flag++; 1671 break; 1672 case 'm': 1673 mflag++; 1674 break; 1675 case 'f': 1676 dumpfile = optarg; 1677 filebounds = getbounds(dumpfile); 1678 break; 1679 case '?': 1680 usage(); 1681 } 1682 } 1683 1684 interactive = isatty(STDOUT_FILENO); 1685 1686 if (cflag && livedump) 1687 usage(); 1688 1689 if (dumpfile == NULL || livedump) 1690 dumpfd = Open("/dev/dump", O_RDONLY, 0444); 1691 1692 if (dumpfile == NULL) { 1693 dumpfile = Zalloc(MAXPATHLEN); 1694 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) 1695 logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR, 1696 "no dump device configured"); 1697 } 1698 1699 if (mflag) 1700 return (message_save()); 1701 1702 if (optind == argc - 1) 1703 savedir = argv[optind]; 1704 1705 if (savedir == NULL || optind < argc - 1) 1706 usage(); 1707 1708 if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1) 1709 logprint(SC_SL_NONE | SC_EXIT_ERR, 1710 "dedicated dump device required"); 1711 1712 (void) close(dumpfd); 1713 dumpfd = -1; 1714 1715 Stat(dumpfile, &st); 1716 1717 filemode = S_ISREG(st.st_mode); 1718 1719 if (!filemode && defread("DUMPADM_CSAVE=off") == NULL) 1720 csave = 1; 1721 1722 read_dumphdr(); 1723 1724 /* 1725 * We want this message to go to the log file, but not the console. 1726 * There's no good way to do that with the existing syslog facility. 1727 * We could extend it to handle this, but there doesn't seem to be 1728 * a general need for it, so we isolate the complexity here instead. 1729 */ 1730 if (dumphdr.dump_panicstring[0] != '\0') { 1731 int logfd = Open("/dev/conslog", O_WRONLY, 0644); 1732 log_ctl_t lc; 1733 struct strbuf ctl, dat; 1734 char msg[DUMP_PANICSIZE + 100]; 1735 char fmt[] = "reboot after panic: %s"; 1736 uint32_t msgid; 1737 1738 STRLOG_MAKE_MSGID(fmt, msgid); 1739 1740 /* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */ 1741 (void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ", 1742 progname, msgid); 1743 /* LINTED: E_SEC_PRINTF_VAR_FMT */ 1744 (void) sprintf(msg + strlen(msg), fmt, 1745 dumphdr.dump_panicstring); 1746 1747 lc.pri = LOG_AUTH | LOG_ERR; 1748 lc.flags = SL_CONSOLE | SL_LOGONLY; 1749 lc.level = 0; 1750 1751 ctl.buf = (void *)&lc; 1752 ctl.len = sizeof (log_ctl_t); 1753 1754 dat.buf = (void *)msg; 1755 dat.len = strlen(msg) + 1; 1756 1757 (void) putmsg(logfd, &ctl, &dat, 0); 1758 (void) close(logfd); 1759 } 1760 1761 if ((dumphdr.dump_flags & DF_COMPLETE) == 0) { 1762 logprint(SC_SL_WARN, "incomplete dump on dump device"); 1763 dump_incomplete = B_TRUE; 1764 } 1765 1766 if (dumphdr.dump_fm_panic) 1767 fm_panic = B_TRUE; 1768 1769 /* 1770 * We have a valid dump on a dump device and know as much about 1771 * it as we're going to at this stage. Raise an event for 1772 * logging and so that FMA can open a case for this panic. 1773 * Avoid this step for FMA-initiated panics - FMA will replay 1774 * ereports off the dump device independently of savecore and 1775 * will make a diagnosis, so we don't want to open two cases 1776 * for the same event. Also avoid raising an event for a 1777 * livedump, or when we inflating a compressed dump. 1778 */ 1779 if (!fm_panic && !livedump && !filemode) 1780 raise_event(SC_EVENT_DUMP_PENDING, NULL); 1781 1782 logprint(SC_SL_WARN, "System dump time: %s", 1783 ctime(&dumphdr.dump_crashtime)); 1784 1785 /* 1786 * Option -c is designed for use from svc-dumpadm where we know 1787 * that dumpadm -n is in effect but run savecore -c just to 1788 * get the above dump_pending_on_device event raised. If it is run 1789 * interactively then just print further panic details. 1790 */ 1791 if (cflag) { 1792 char *disabled = defread("DUMPADM_ENABLE=no"); 1793 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR; 1794 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND; 1795 1796 logprint(lvl | ec, 1797 "Panic crashdump pending on dump device%s " 1798 "run savecore(1M) manually to extract. " 1799 "Image UUID %s%s.", 1800 disabled ? " but dumpadm -n in effect;" : ";", 1801 corehdr.dump_uuid, 1802 fm_panic ? "(fault-management initiated)" : ""); 1803 /*NOTREACHED*/ 1804 } 1805 1806 if (chdir(savedir) == -1) 1807 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s", 1808 savedir, strerror(errno)); 1809 1810 check_space(csave); 1811 1812 if (filebounds < 0) 1813 bounds = read_number_from_file("bounds", 0); 1814 else 1815 bounds = filebounds; 1816 1817 if (csave) { 1818 size_t metrics_size = datahdr.dump_metrics; 1819 1820 (void) sprintf(corefile, "vmdump.%ld", bounds); 1821 1822 datahdr.dump_metrics = 0; 1823 1824 logprint(SC_SL_ERR, 1825 "Saving compressed system crash dump in %s/%s", 1826 savedir, corefile); 1827 1828 copy_crashfile(corefile); 1829 1830 /* 1831 * Raise a fault management event that indicates the system 1832 * has panicked. We know a reasonable amount about the 1833 * condition at this time, but the dump is still compressed. 1834 */ 1835 if (!livedump && !fm_panic) 1836 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL); 1837 1838 if (metrics_size > 0) { 1839 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1840 FILE *mfile = fopen(METRICSFILE, "a"); 1841 char *metrics = Zalloc(metrics_size + 1); 1842 1843 Pread(dumpfd, metrics, metrics_size, endoff + 1844 sizeof (dumphdr) + sizeof (datahdr)); 1845 1846 if (sec < 1) 1847 sec = 1; 1848 1849 if (mfile == NULL) { 1850 logprint(SC_SL_WARN, 1851 "Can't create %s:\n%s", 1852 METRICSFILE, metrics); 1853 } else { 1854 (void) fprintf(mfile, "[[[[,,,"); 1855 for (i = 0; i < argc; i++) 1856 (void) fprintf(mfile, "%s ", argv[i]); 1857 (void) fprintf(mfile, "\n"); 1858 (void) fprintf(mfile, ",,,%s %s %s %s %s\n", 1859 dumphdr.dump_utsname.sysname, 1860 dumphdr.dump_utsname.nodename, 1861 dumphdr.dump_utsname.release, 1862 dumphdr.dump_utsname.version, 1863 dumphdr.dump_utsname.machine); 1864 (void) fprintf(mfile, ",,,%s dump time %s\n", 1865 dumphdr.dump_flags & DF_LIVE ? "Live" : 1866 "Crash", ctime(&dumphdr.dump_crashtime)); 1867 (void) fprintf(mfile, ",,,%s/%s\n", savedir, 1868 corefile); 1869 (void) fprintf(mfile, "Metrics:\n%s\n", 1870 metrics); 1871 (void) fprintf(mfile, "Copy pages,%ld\n", 1872 dumphdr. dump_npages); 1873 (void) fprintf(mfile, "Copy time,%d\n", sec); 1874 (void) fprintf(mfile, "Copy pages/sec,%ld\n", 1875 dumphdr.dump_npages / sec); 1876 (void) fprintf(mfile, "]]]]\n"); 1877 (void) fclose(mfile); 1878 } 1879 free(metrics); 1880 } 1881 1882 logprint(SC_SL_ERR, 1883 "Decompress the crash dump with " 1884 "\n'savecore -vf %s/%s'", 1885 savedir, corefile); 1886 1887 } else { 1888 (void) sprintf(namelist, "unix.%ld", bounds); 1889 (void) sprintf(corefile, "vmcore.%ld", bounds); 1890 1891 if (interactive && filebounds >= 0 && access(corefile, F_OK) 1892 == 0) 1893 logprint(SC_SL_NONE | SC_EXIT_ERR, 1894 "%s already exists: remove with " 1895 "'rm -f %s/{unix,vmcore}.%ld'", 1896 corefile, savedir, bounds); 1897 1898 logprint(SC_SL_ERR, 1899 "saving system crash dump in %s/{unix,vmcore}.%ld", 1900 savedir, bounds); 1901 1902 build_corefile(namelist, corefile); 1903 1904 if (!livedump && !filemode && !fm_panic) 1905 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL); 1906 1907 if (access(METRICSFILE, F_OK) == 0) { 1908 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1909 FILE *mfile = fopen(METRICSFILE, "a"); 1910 1911 if (sec < 1) 1912 sec = 1; 1913 1914 (void) fprintf(mfile, "[[[[,,,"); 1915 for (i = 0; i < argc; i++) 1916 (void) fprintf(mfile, "%s ", argv[i]); 1917 (void) fprintf(mfile, "\n"); 1918 (void) fprintf(mfile, ",,,%s/%s\n", savedir, corefile); 1919 (void) fprintf(mfile, ",,,%s %s %s %s %s\n", 1920 dumphdr.dump_utsname.sysname, 1921 dumphdr.dump_utsname.nodename, 1922 dumphdr.dump_utsname.release, 1923 dumphdr.dump_utsname.version, 1924 dumphdr.dump_utsname.machine); 1925 (void) fprintf(mfile, "Uncompress pages,%"PRIu64"\n", 1926 saved); 1927 (void) fprintf(mfile, "Uncompress time,%d\n", sec); 1928 (void) fprintf(mfile, "Uncompress pages/sec,%" 1929 PRIu64"\n", saved / sec); 1930 (void) fprintf(mfile, "]]]]\n"); 1931 (void) fclose(mfile); 1932 } 1933 } 1934 1935 if (filebounds < 0) { 1936 (void) sprintf(boundstr, "%ld\n", bounds + 1); 1937 bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644); 1938 Pwrite(bfd, boundstr, strlen(boundstr), 0); 1939 (void) close(bfd); 1940 } 1941 1942 if (verbose) { 1943 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000; 1944 1945 (void) printf("%d:%02d dump %s is done\n", 1946 sec / 60, sec % 60, 1947 csave ? "copy" : "decompress"); 1948 } 1949 1950 if (verbose > 1 && hist != NULL) { 1951 int i, nw; 1952 1953 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i) 1954 nw += hist[i] * i; 1955 (void) printf("pages count %%\n"); 1956 for (i = 0; i <= BTOP(coreblksize); ++i) { 1957 if (hist[i] == 0) 1958 continue; 1959 (void) printf("%3d %5u %6.2f\n", 1960 i, hist[i], 100.0 * hist[i] * i / nw); 1961 } 1962 } 1963 1964 (void) close(dumpfd); 1965 dumpfd = -1; 1966 1967 return (0); 1968 } 1969