xref: /illumos-gate/usr/src/cmd/savecore/savecore.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2019 Joyent, Inc.
24  */
25 /*
26  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
27  * Copyright 2024 Oxide Computer Company
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdarg.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <deflt.h>
38 #include <time.h>
39 #include <syslog.h>
40 #include <stropts.h>
41 #include <pthread.h>
42 #include <limits.h>
43 #include <atomic.h>
44 #include <libnvpair.h>
45 #include <libintl.h>
46 #include <sys/mem.h>
47 #include <sys/statvfs.h>
48 #include <sys/dumphdr.h>
49 #include <sys/dumpadm.h>
50 #include <sys/compress.h>
51 #include <sys/panic.h>
52 #include <sys/sysmacros.h>
53 #include <sys/stat.h>
54 #include <sys/resource.h>
55 #include <bzip2/bzlib.h>
56 #include <sys/fm/util.h>
57 #include <fm/libfmevent.h>
58 #include <sys/int_fmtio.h>
59 
60 
61 /* fread/fwrite buffer size */
62 #define	FBUFSIZE		(1ULL << 20)
63 
64 /* minimum size for output buffering */
65 #define	MINCOREBLKSIZE		(1ULL << 17)
66 
67 /* create this file if metrics collection is enabled in the kernel */
68 #define	METRICSFILE "METRICS.csv"
69 
70 static char	progname[9] = "savecore";
71 static char	*savedir;		/* savecore directory */
72 static char	*dumpfile;		/* source of raw crash dump */
73 static long	bounds = -1;		/* numeric suffix */
74 static long	pagesize;		/* dump pagesize */
75 static int	dumpfd = -1;		/* dumpfile descriptor */
76 static boolean_t have_dumpfile = B_TRUE;	/* dumpfile existence */
77 static dumphdr_t corehdr, dumphdr;	/* initial and terminal dumphdrs */
78 static boolean_t dump_incomplete;	/* dumphdr indicates incomplete */
79 static boolean_t fm_panic;		/* dump is the result of fm_panic */
80 static offset_t	endoff;			/* offset of end-of-dump header */
81 static int	verbose;		/* chatty mode */
82 static int	disregard_valid_flag;	/* disregard valid flag */
83 static int	livedump;		/* dump the current running system */
84 static int	interactive;		/* user invoked; no syslog */
85 static int	csave;			/* save dump compressed */
86 static int	filemode;		/* processing file, not dump device */
87 static hrtime_t	startts;		/* timestamp at start */
88 static volatile uint64_t saved;		/* count of pages written */
89 static volatile uint64_t zpages;	/* count of zero pages not written */
90 static dumpdatahdr_t datahdr;		/* compression info */
91 static long	coreblksize;		/* preferred write size (st_blksize) */
92 static int	cflag;			/* run as savecore -c */
93 static int	mflag;			/* run as savecore -m */
94 static int	rflag;			/* run as savecore -r */
95 
96 /*
97  * Payload information for the events we raise.  These are used
98  * in raise_event to determine what payload to include.
99  */
100 #define	SC_PAYLOAD_SAVEDIR	0x0001	/* Include savedir in event */
101 #define	SC_PAYLOAD_INSTANCE	0x0002	/* Include bounds instance number */
102 #define	SC_PAYLOAD_IMAGEUUID	0x0004	/* Include dump OS instance uuid */
103 #define	SC_PAYLOAD_CRASHTIME	0x0008	/* Include epoch crashtime */
104 #define	SC_PAYLOAD_PANICSTR	0x0010	/* Include panic string */
105 #define	SC_PAYLOAD_PANICSTACK	0x0020	/* Include panic string */
106 #define	SC_PAYLOAD_FAILREASON	0x0040	/* Include failure reason */
107 #define	SC_PAYLOAD_DUMPCOMPLETE	0x0080	/* Include completeness indicator */
108 #define	SC_PAYLOAD_ISCOMPRESSED	0x0100	/* Dump is in vmdump.N form */
109 #define	SC_PAYLOAD_DUMPADM_EN	0x0200	/* Is dumpadm enabled or not? */
110 #define	SC_PAYLOAD_FM_PANIC	0x0400	/* Panic initiated by FMA */
111 #define	SC_PAYLOAD_JUSTCHECKING	0x0800	/* Run with -c flag? */
112 
113 enum sc_event_type {
114 	SC_EVENT_DUMP_PENDING,
115 	SC_EVENT_SAVECORE_FAILURE,
116 	SC_EVENT_DUMP_AVAILABLE
117 };
118 
119 /*
120  * Common payload
121  */
122 #define	_SC_PAYLOAD_CMN \
123     SC_PAYLOAD_IMAGEUUID | \
124     SC_PAYLOAD_CRASHTIME | \
125     SC_PAYLOAD_PANICSTR | \
126     SC_PAYLOAD_PANICSTACK | \
127     SC_PAYLOAD_DUMPCOMPLETE | \
128     SC_PAYLOAD_FM_PANIC | \
129     SC_PAYLOAD_SAVEDIR
130 
131 static const struct {
132 	const char *sce_subclass;
133 	uint32_t sce_payload;
134 } sc_event[] = {
135 	/*
136 	 * SC_EVENT_DUMP_PENDING
137 	 */
138 	{
139 		"dump_pending_on_device",
140 		_SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN |
141 		    SC_PAYLOAD_JUSTCHECKING
142 	},
143 
144 	/*
145 	 * SC_EVENT_SAVECORE_FAILURE
146 	 */
147 	{
148 		"savecore_failure",
149 		_SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON
150 	},
151 
152 	/*
153 	 * SC_EVENT_DUMP_AVAILABLE
154 	 */
155 	{
156 		"dump_available",
157 		_SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED
158 	},
159 };
160 
161 static void raise_event(enum sc_event_type, char *);
162 static void report_progress(len_t, len_t);
163 static void end_progress(len_t, len_t);
164 
165 static void
166 usage(void)
167 {
168 	(void) fprintf(stderr,
169 	    "usage: %s [-L | -r] [-vd] [-f dumpfile] [dirname]\n", progname);
170 	exit(1);
171 }
172 
173 #define	SC_SL_NONE	0x0001	/* no syslog */
174 #define	SC_SL_ERR	0x0002	/* syslog if !interactive, LOG_ERR */
175 #define	SC_SL_WARN	0x0004	/* syslog if !interactive, LOG_WARNING */
176 #define	SC_IF_VERBOSE	0x0008	/* message only if -v */
177 #define	SC_IF_ISATTY	0x0010	/* message only if interactive */
178 #define	SC_EXIT_OK	0x0020	/* exit(0) */
179 #define	SC_EXIT_ERR	0x0040	/* exit(1) */
180 #define	SC_EXIT_PEND	0x0080	/* exit(2) */
181 #define	SC_EXIT_FM	0x0100	/* exit(3) */
182 
183 #define	_SC_ALLEXIT	(SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM)
184 
185 static void
186 logprint(uint32_t flags, char *message, ...)
187 {
188 	va_list args;
189 	char buf[1024];
190 	int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0);
191 	int do_ifverb = (flags & SC_IF_VERBOSE) && verbose;
192 	int do_ifisatty = (flags & SC_IF_ISATTY) && interactive;
193 	int code;
194 	static int logprint_raised = 0;
195 
196 	if (do_always || do_ifverb || do_ifisatty) {
197 		va_start(args, message);
198 		/*LINTED: E_SEC_PRINTF_VAR_FMT*/
199 		(void) vsnprintf(buf, sizeof (buf), message, args);
200 		(void) fprintf(stderr, "%s: %s\n", progname, buf);
201 		if (!interactive) {
202 			switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) {
203 			case SC_SL_ERR:
204 				/*LINTED: E_SEC_PRINTF_VAR_FMT*/
205 				syslog(LOG_ERR, buf);
206 				break;
207 
208 			case SC_SL_WARN:
209 				/*LINTED: E_SEC_PRINTF_VAR_FMT*/
210 				syslog(LOG_WARNING, buf);
211 				break;
212 
213 			default:
214 				break;
215 			}
216 		}
217 		va_end(args);
218 	}
219 
220 	switch (flags & _SC_ALLEXIT) {
221 	case 0:
222 		return;
223 
224 	case SC_EXIT_OK:
225 		code = 0;
226 		break;
227 
228 	case SC_EXIT_PEND:
229 		/*
230 		 * Raise an ireport saying why we are exiting.  Do not
231 		 * raise if run as savecore -m.  If something in the
232 		 * raise_event codepath calls logprint avoid recursion.
233 		 */
234 		if (!mflag && !rflag && logprint_raised++ == 0)
235 			raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
236 		code = 2;
237 		break;
238 
239 	case SC_EXIT_FM:
240 		code = 3;
241 		break;
242 
243 	case SC_EXIT_ERR:
244 	default:
245 		if (!mflag && !rflag && logprint_raised++ == 0 && have_dumpfile)
246 			raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
247 		code = 1;
248 		break;
249 	}
250 
251 	exit(code);
252 }
253 
254 /*
255  * System call / libc wrappers that exit on error.
256  */
257 static int
258 Open(const char *name, int oflags, mode_t mode)
259 {
260 	int fd;
261 
262 	if ((fd = open64(name, oflags, mode)) == -1)
263 		logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
264 		    name, strerror(errno));
265 	return (fd);
266 }
267 
268 static void
269 Fread(void *buf, size_t size, FILE *f)
270 {
271 	if (fread(buf, size, 1, f) != 1)
272 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: %s",
273 		    strerror(errno));
274 }
275 
276 static void
277 Fwrite(void *buf, size_t size, FILE *f)
278 {
279 	if (fwrite(buf, size, 1, f) != 1)
280 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s",
281 		    strerror(errno));
282 }
283 
284 static void
285 Fseek(offset_t off, FILE *f)
286 {
287 	if (fseeko64(f, off, SEEK_SET) != 0)
288 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
289 		    strerror(errno));
290 }
291 
292 typedef struct stat64 Stat_t;
293 
294 static void
295 Fstat(int fd, Stat_t *sb, const char *fname)
296 {
297 	if (fstat64(fd, sb) != 0)
298 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
299 		    strerror(errno));
300 }
301 
302 static void
303 Stat(const char *fname, Stat_t *sb)
304 {
305 	if (stat64(fname, sb) != 0) {
306 		have_dumpfile = B_FALSE;
307 		logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
308 		    "of file %s", fname);
309 	}
310 }
311 
312 static void
313 Pread(int fd, void *buf, size_t size, offset_t off)
314 {
315 	ssize_t sz = pread64(fd, buf, size, off);
316 
317 	if (sz < 0)
318 		logprint(SC_SL_ERR | SC_EXIT_ERR,
319 		    "pread: %s", strerror(errno));
320 	else if (sz != size)
321 		logprint(SC_SL_ERR | SC_EXIT_ERR,
322 		    "pread: size %ld != %ld", sz, size);
323 }
324 
325 static void
326 Pwrite(int fd, void *buf, size_t size, off64_t off)
327 {
328 	if (pwrite64(fd, buf, size, off) != size)
329 		logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s",
330 		    strerror(errno));
331 }
332 
333 static void *
334 Zalloc(size_t size)
335 {
336 	void *buf;
337 
338 	if ((buf = calloc(size, 1)) == NULL)
339 		logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s",
340 		    strerror(errno));
341 	return (buf);
342 }
343 
344 static long
345 read_number_from_file(const char *filename, long default_value)
346 {
347 	long file_value = -1;
348 	FILE *fp;
349 
350 	if ((fp = fopen(filename, "r")) != NULL) {
351 		(void) fscanf(fp, "%ld", &file_value);
352 		(void) fclose(fp);
353 	}
354 	return (file_value < 0 ? default_value : file_value);
355 }
356 
357 static void
358 read_dumphdr(void)
359 {
360 	if (filemode || rflag)
361 		dumpfd = Open(dumpfile, O_RDONLY, 0644);
362 	else
363 		dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
364 	endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
365 	Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
366 	Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
367 
368 	pagesize = dumphdr.dump_pagesize;
369 
370 	if (dumphdr.dump_magic != DUMP_MAGIC)
371 		logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
372 		    dumphdr.dump_magic);
373 
374 	if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
375 		logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
376 		    "dump already processed");
377 
378 	if (dumphdr.dump_version != DUMP_VERSION)
379 		logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
380 		    "dump version (%d) != %s version (%d)",
381 		    dumphdr.dump_version, progname, DUMP_VERSION);
382 
383 	if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
384 		logprint(SC_SL_NONE | SC_EXIT_PEND,
385 		    "dump is from %u-bit kernel - cannot save on %u-bit kernel",
386 		    dumphdr.dump_wordsize, DUMP_WORDSIZE);
387 
388 	if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
389 		if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
390 			logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
391 			    "dump data version (%d) != %s data version (%d)",
392 			    datahdr.dump_datahdr_version, progname,
393 			    DUMP_DATAHDR_VERSION);
394 	} else {
395 		(void) memset(&datahdr, 0, sizeof (datahdr));
396 		datahdr.dump_maxcsize = pagesize;
397 	}
398 
399 	/*
400 	 * Read the initial header, clear the valid bits, and compare headers.
401 	 * The main header may have been overwritten by swapping if we're
402 	 * using a swap partition as the dump device, in which case we bail.
403 	 */
404 	Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start);
405 
406 	corehdr.dump_flags &= ~DF_VALID;
407 	dumphdr.dump_flags &= ~DF_VALID;
408 
409 	if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
410 		/*
411 		 * Clear valid bit so we don't complain on every invocation.
412 		 */
413 		if (!filemode && !rflag)
414 			Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
415 		logprint(SC_SL_ERR | SC_EXIT_ERR,
416 		    "initial dump header corrupt");
417 	}
418 }
419 
420 static void
421 check_space(int csave)
422 {
423 	struct statvfs fsb;
424 	int64_t spacefree, dumpsize, minfree, datasize;
425 
426 	if (statvfs(".", &fsb) < 0)
427 		logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
428 		    strerror(errno));
429 
430 	dumpsize = dumphdr.dump_data - dumphdr.dump_start;
431 	datasize = dumphdr.dump_npages * pagesize;
432 	if (!csave)
433 		dumpsize += datasize;
434 	else
435 		dumpsize += datahdr.dump_data_csize;
436 
437 	spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
438 	minfree = 1024LL * read_number_from_file("minfree", 1024);
439 	if (spacefree < minfree + dumpsize) {
440 		logprint(SC_SL_ERR | SC_EXIT_ERR,
441 		    "not enough space in %s (%lld MB avail, %lld MB needed)",
442 		    savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
443 	}
444 }
445 
446 static void
447 build_dump_map(int corefd, const pfn_t *pfn_table)
448 {
449 	long i;
450 	static long misses = 0;
451 	size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
452 	mem_vtop_t vtop;
453 	dump_map_t *dmp = Zalloc(dump_mapsize);
454 	char *inbuf = Zalloc(FBUFSIZE);
455 	FILE *in = fdopen(dup(dumpfd), "rb");
456 
457 	(void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
458 	Fseek(dumphdr.dump_map, in);
459 
460 	corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize);
461 
462 	for (i = 0; i < corehdr.dump_nvtop; i++) {
463 		long first = 0;
464 		long last = corehdr.dump_npages - 1;
465 		long middle = 0;
466 		pfn_t pfn = 0;
467 		uintptr_t h;
468 
469 		Fread(&vtop, sizeof (mem_vtop_t), in);
470 		while (last >= first) {
471 			middle = (first + last) / 2;
472 			pfn = pfn_table[middle];
473 			if (pfn == vtop.m_pfn)
474 				break;
475 			if (pfn < vtop.m_pfn)
476 				first = middle + 1;
477 			else
478 				last = middle - 1;
479 		}
480 		if (pfn != vtop.m_pfn) {
481 			if (++misses <= 10)
482 				(void) fprintf(stderr,
483 				    "pfn %ld not found for as=%p, va=%p\n",
484 				    vtop.m_pfn, (void *)vtop.m_as, vtop.m_va);
485 			continue;
486 		}
487 
488 		dmp[i].dm_as = vtop.m_as;
489 		dmp[i].dm_va = (uintptr_t)vtop.m_va;
490 		dmp[i].dm_data = corehdr.dump_data +
491 		    ((uint64_t)middle << corehdr.dump_pageshift);
492 
493 		h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va);
494 		dmp[i].dm_next = dmp[h].dm_first;
495 		dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t);
496 	}
497 
498 	Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map);
499 	free(dmp);
500 	(void) fclose(in);
501 	free(inbuf);
502 }
503 
504 /*
505  * Copy whole sections of the dump device to the file.
506  */
507 static void
508 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf,
509     size_t sz, len_t *savedp, len_t total)
510 {
511 	size_t nr;
512 	offset_t off = *offp;
513 
514 	while (nb > 0) {
515 		report_progress(*savedp, total);
516 		nr = sz < nb ? sz : (size_t)nb;
517 		Pread(dumpfd, buf, nr, dumpoff);
518 		Pwrite(fd, buf, nr, off);
519 		off += nr;
520 		dumpoff += nr;
521 		nb -= nr;
522 		*savedp += nr;
523 	}
524 	*offp = off;
525 }
526 
527 /*
528  * Copy pages when the dump data header is missing.
529  * This supports older kernels with latest savecore.
530  */
531 static void
532 CopyPages(offset_t *offp, int fd, char *buf, size_t sz, len_t *savedp,
533     len_t total)
534 {
535 	uint32_t csize;
536 	FILE *in = fdopen(dup(dumpfd), "rb");
537 	FILE *out = fdopen(dup(fd), "wb");
538 	char *cbuf = Zalloc(pagesize);
539 	char *outbuf = Zalloc(FBUFSIZE);
540 	pgcnt_t np = dumphdr.dump_npages;
541 
542 	(void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE);
543 	(void) setvbuf(in, buf, _IOFBF, sz);
544 	Fseek(dumphdr.dump_data, in);
545 
546 	Fseek(*offp, out);
547 	while (np > 0) {
548 		report_progress(*savedp, total);
549 		Fread(&csize, sizeof (uint32_t), in);
550 		Fwrite(&csize, sizeof (uint32_t), out);
551 		*offp += sizeof (uint32_t);
552 		if (csize > pagesize || csize == 0) {
553 			logprint(SC_SL_ERR,
554 			    "CopyPages: page %lu csize %d (0x%x) pagesize %d",
555 			    dumphdr.dump_npages - np, csize, csize,
556 			    pagesize);
557 			break;
558 		}
559 		Fread(cbuf, csize, in);
560 		Fwrite(cbuf, csize, out);
561 		*offp += csize;
562 		np--;
563 		(*savedp)++;
564 	}
565 	(void) fclose(in);
566 	(void) fclose(out);
567 	free(outbuf);
568 	free(buf);
569 }
570 
571 /*
572  * Concatenate dump contents into a new file.
573  * Update corehdr with new offsets.
574  */
575 static void
576 copy_crashfile(const char *corefile)
577 {
578 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
579 	size_t bufsz = FBUFSIZE;
580 	char *inbuf = Zalloc(bufsz);
581 	len_t completed, total;
582 	offset_t coreoff;
583 	size_t nb;
584 
585 	logprint(SC_SL_ERR | SC_IF_VERBOSE,
586 	    "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
587 
588 	/*
589 	 * This dump file is still compressed
590 	 */
591 	corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
592 
593 	/*
594 	 * Leave room for corehdr, it is updated and written last
595 	 */
596 	corehdr.dump_start = 0;
597 	coreoff = sizeof (corehdr);
598 
599 	/*
600 	 * Calculate the total number of bytes to be copied.
601 	 */
602 	total = dumphdr.dump_ksyms_csize +
603 	    dumphdr.dump_npages * sizeof (pfn_t) +
604 	    dumphdr.dump_nvtop * sizeof (mem_vtop_t);
605 	total += datahdr.dump_data_csize != 0 ? datahdr.dump_data_csize :
606 	    dumphdr.dump_npages;
607 	completed = 0;
608 
609 	/*
610 	 * Read in the compressed symbol table, copy it to corefile.
611 	 */
612 	coreoff = roundup(coreoff, pagesize);
613 	corehdr.dump_ksyms = coreoff;
614 	Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
615 	    inbuf, bufsz, &completed, total);
616 
617 	/*
618 	 * Save the pfn table.
619 	 */
620 	coreoff = roundup(coreoff, pagesize);
621 	corehdr.dump_pfn = coreoff;
622 	Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff,
623 	    corefd, inbuf, bufsz, &completed, total);
624 
625 	/*
626 	 * Save the dump map.
627 	 */
628 	coreoff = roundup(coreoff, pagesize);
629 	corehdr.dump_map = coreoff;
630 	Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t),
631 	    &coreoff, corefd, inbuf, bufsz, &completed, total);
632 
633 	/*
634 	 * Save the data pages.
635 	 */
636 	coreoff = roundup(coreoff, pagesize);
637 	corehdr.dump_data = coreoff;
638 	if (datahdr.dump_data_csize != 0) {
639 		Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff,
640 		    corefd, inbuf, bufsz, &completed, total);
641 	} else {
642 		CopyPages(&coreoff, corefd, inbuf, bufsz, &completed, total);
643 	}
644 
645 	/*
646 	 * Now write the modified dump header to front and end of the copy.
647 	 * Make it look like a valid dump device.
648 	 *
649 	 * From dumphdr.h: Two headers are written out: one at the
650 	 * beginning of the dump, and the other at the very end of the
651 	 * dump device. The terminal header is at a known location
652 	 * (end of device) so we can always find it.
653 	 *
654 	 * Pad with zeros to each DUMP_OFFSET boundary.
655 	 */
656 	(void) memset(inbuf, 0, DUMP_OFFSET);
657 
658 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
659 	if (nb > 0) {
660 		Pwrite(corefd, inbuf, nb, coreoff);
661 		coreoff += nb;
662 	}
663 
664 	Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff);
665 	coreoff += sizeof (corehdr);
666 
667 	Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff);
668 	coreoff += sizeof (datahdr);
669 
670 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
671 	if (nb > 0) {
672 		Pwrite(corefd, inbuf, nb, coreoff);
673 	}
674 
675 	free(inbuf);
676 	Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start);
677 
678 	/*
679 	 * Write out the modified dump header to the dump device.
680 	 * The dump device has been processed, so DF_VALID is clear.
681 	 */
682 	if (!filemode && !rflag)
683 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
684 
685 	(void) close(corefd);
686 
687 	end_progress(completed, total);
688 }
689 
690 /*
691  * compressed streams
692  */
693 typedef struct blockhdr blockhdr_t;
694 typedef struct block block_t;
695 
696 struct blockhdr {
697 	block_t *head;
698 	block_t *tail;
699 };
700 
701 struct block {
702 	block_t *next;
703 	char *block;
704 	int size;
705 };
706 
707 typedef enum streamstate {
708 	STREAMSTART,
709 	STREAMPAGES
710 } streamstate_t;
711 
712 typedef struct stream {
713 	streamstate_t state;
714 	int init;
715 	int tag;
716 	int bound;
717 	int nout;
718 	char *blkbuf;
719 	blockhdr_t blocks;
720 	pgcnt_t pagenum;
721 	pgcnt_t curpage;
722 	pgcnt_t npages;
723 	pgcnt_t done;
724 	bz_stream strm;
725 	dumpcsize_t sc;
726 	dumpstreamhdr_t sh;
727 } stream_t;
728 
729 static stream_t *streams;
730 static stream_t *endstreams;
731 
732 const int cs = sizeof (dumpcsize_t);
733 
734 typedef struct tinfo {
735 	pthread_t tid;
736 	int corefd;
737 } tinfo_t;
738 
739 static int threads_stop;
740 static int threads_active;
741 static tinfo_t *tinfo;
742 static tinfo_t *endtinfo;
743 
744 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
745 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER;
746 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER;
747 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER;
748 
749 static blockhdr_t freeblocks;
750 
751 static void
752 enqt(blockhdr_t *h, block_t *b)
753 {
754 	b->next = NULL;
755 	if (h->tail == NULL)
756 		h->head = b;
757 	else
758 		h->tail->next = b;
759 	h->tail = b;
760 }
761 
762 static block_t *
763 deqh(blockhdr_t *h)
764 {
765 	block_t *b = h->head;
766 
767 	if (b != NULL) {
768 		h->head = b->next;
769 		if (h->head == NULL)
770 			h->tail = NULL;
771 	}
772 	return (b);
773 }
774 
775 static void *runstreams(void *arg);
776 
777 static void
778 initstreams(int corefd, int nstreams, int maxcsize)
779 {
780 	int nthreads;
781 	int nblocks;
782 	int i;
783 	block_t *b;
784 	tinfo_t *t;
785 
786 	nthreads = sysconf(_SC_NPROCESSORS_ONLN);
787 	if (nstreams < nthreads)
788 		nthreads = nstreams;
789 	if (nthreads < 1)
790 		nthreads = 1;
791 	nblocks = nthreads * 2;
792 
793 	tinfo = Zalloc(nthreads * sizeof (tinfo_t));
794 	endtinfo = &tinfo[nthreads];
795 
796 	/* init streams */
797 	streams = Zalloc(nstreams * sizeof (stream_t));
798 	endstreams = &streams[nstreams];
799 
800 	/* init stream block buffers */
801 	for (i = 0; i < nblocks; i++) {
802 		b = Zalloc(sizeof (block_t));
803 		b->block = Zalloc(maxcsize);
804 		enqt(&freeblocks, b);
805 	}
806 
807 	/* init worker threads */
808 	(void) pthread_mutex_lock(&lock);
809 	threads_active = 1;
810 	threads_stop = 0;
811 	for (t = tinfo; t != endtinfo; t++) {
812 		t->corefd = dup(corefd);
813 		if (t->corefd < 0) {
814 			nthreads = t - tinfo;
815 			endtinfo = t;
816 			break;
817 		}
818 		if (pthread_create(&t->tid, NULL, runstreams, t) != 0)
819 			logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s",
820 			    strerror(errno));
821 	}
822 	(void) pthread_mutex_unlock(&lock);
823 }
824 
825 static void
826 sbarrier()
827 {
828 	stream_t *s;
829 
830 	(void) pthread_mutex_lock(&lock);
831 	for (s = streams; s != endstreams; s++) {
832 		while (s->bound || s->blocks.head != NULL)
833 			(void) pthread_cond_wait(&cvbarrier, &lock);
834 	}
835 	(void) pthread_mutex_unlock(&lock);
836 }
837 
838 static void
839 stopstreams()
840 {
841 	tinfo_t *t;
842 
843 	if (threads_active) {
844 		sbarrier();
845 		(void) pthread_mutex_lock(&lock);
846 		threads_stop = 1;
847 		(void) pthread_cond_signal(&cvwork);
848 		(void) pthread_mutex_unlock(&lock);
849 		for (t = tinfo; t != endtinfo; t++)
850 			(void) pthread_join(t->tid, NULL);
851 		free(tinfo);
852 		tinfo = NULL;
853 		threads_active = 0;
854 	}
855 }
856 
857 static block_t *
858 getfreeblock()
859 {
860 	block_t *b;
861 
862 	(void) pthread_mutex_lock(&lock);
863 	while ((b = deqh(&freeblocks)) == NULL)
864 		(void) pthread_cond_wait(&cvfree, &lock);
865 	(void) pthread_mutex_unlock(&lock);
866 	return (b);
867 }
868 
869 /* data page offset from page number */
870 #define	BTOP(b)		((b) >> dumphdr.dump_pageshift)
871 #define	PTOB(p)		((p) << dumphdr.dump_pageshift)
872 #define	DATAOFF(p)	(corehdr.dump_data + PTOB(p))
873 
874 /* check for coreblksize boundary */
875 static int
876 isblkbnd(pgcnt_t pgnum)
877 {
878 	return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0);
879 }
880 
881 static int
882 iszpage(char *buf)
883 {
884 	size_t sz;
885 	uint64_t *pl;
886 
887 	/*LINTED:E_BAD_PTR_CAST_ALIGN*/
888 	pl = (uint64_t *)(buf);
889 	for (sz = 0; sz < pagesize; sz += sizeof (*pl))
890 		if (*pl++ != 0)
891 			return (0);
892 	return (1);
893 }
894 
895 volatile uint_t *hist;
896 
897 /* write pages to the core file */
898 static void
899 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np)
900 {
901 	atomic_inc_uint(&hist[np]);
902 	if (np > 0)
903 		Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum));
904 }
905 
906 /*
907  * Process one lzjb block.
908  * No object (stream header or page) will be split over a block boundary.
909  */
910 static void
911 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz)
912 {
913 	int in = 0;
914 	int csize;
915 	int doflush;
916 	char *out;
917 	size_t dsize;
918 	dumpcsize_t sc;
919 	dumpstreamhdr_t sh;
920 
921 	if (!s->init) {
922 		s->init = 1;
923 		if (s->blkbuf == NULL)
924 			s->blkbuf = Zalloc(coreblksize);
925 		s->state = STREAMSTART;
926 	}
927 	while (in < blocksz) {
928 		switch (s->state) {
929 		case STREAMSTART:
930 			(void) memcpy(&sh, block + in, sizeof (sh));
931 			in += sizeof (sh);
932 			if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0)
933 				logprint(SC_SL_ERR | SC_EXIT_ERR,
934 				    "LZJB STREAMSTART: bad stream header");
935 			if (sh.stream_npages > datahdr.dump_maxrange)
936 				logprint(SC_SL_ERR | SC_EXIT_ERR,
937 				    "LZJB STREAMSTART: bad range: %d > %d",
938 				    sh.stream_npages, datahdr.dump_maxrange);
939 			s->pagenum = sh.stream_pagenum;
940 			s->npages = sh.stream_npages;
941 			s->curpage = s->pagenum;
942 			s->nout = 0;
943 			s->done = 0;
944 			s->state = STREAMPAGES;
945 			break;
946 		case STREAMPAGES:
947 			(void) memcpy(&sc, block + in, cs);
948 			in += cs;
949 			csize = DUMP_GET_CSIZE(sc);
950 			if (csize > pagesize)
951 				logprint(SC_SL_ERR | SC_EXIT_ERR,
952 				    "LZJB STREAMPAGES: bad csize=%d", csize);
953 
954 			out =  s->blkbuf + PTOB(s->nout);
955 			dsize = decompress(block + in, out, csize, pagesize);
956 
957 			if (dsize != pagesize)
958 				logprint(SC_SL_ERR | SC_EXIT_ERR,
959 				    "LZJB STREAMPAGES: dsize %d != pagesize %d",
960 				    dsize, pagesize);
961 
962 			in += csize;
963 			atomic_inc_64(&saved);
964 
965 			doflush = 0;
966 			if (s->nout == 0 && iszpage(out)) {
967 				doflush = 1;
968 				atomic_inc_64(&zpages);
969 			} else if (++s->nout >= BTOP(coreblksize) ||
970 			    isblkbnd(s->curpage + s->nout)) {
971 				doflush = 1;
972 			}
973 			if (++s->done >= s->npages) {
974 				s->state = STREAMSTART;
975 				doflush = 1;
976 			}
977 			if (doflush) {
978 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
979 				s->nout = 0;
980 				s->curpage = s->pagenum + s->done;
981 			}
982 			break;
983 		}
984 	}
985 }
986 
987 /* bzlib library reports errors with this callback */
988 void
989 bz_internal_error(int errcode)
990 {
991 	logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
992 	    BZ2_bzErrorString(errcode));
993 }
994 
995 /*
996  * Return one object in the stream.
997  *
998  * An object (stream header or page) will likely span an input block
999  * of compression data. Return non-zero when an entire object has been
1000  * retrieved from the stream.
1001  */
1002 static int
1003 bz2decompress(stream_t *s, void *buf, size_t size)
1004 {
1005 	int rc;
1006 
1007 	if (s->strm.avail_out == 0) {
1008 		s->strm.next_out = buf;
1009 		s->strm.avail_out = size;
1010 	}
1011 	while (s->strm.avail_in > 0) {
1012 		rc = BZ2_bzDecompress(&s->strm);
1013 		if (rc == BZ_STREAM_END) {
1014 			rc = BZ2_bzDecompressReset(&s->strm);
1015 			if (rc != BZ_OK)
1016 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1017 				    "BZ2_bzDecompressReset: %s",
1018 				    BZ2_bzErrorString(rc));
1019 			continue;
1020 		}
1021 
1022 		if (s->strm.avail_out == 0)
1023 			break;
1024 	}
1025 	return (s->strm.avail_out == 0);
1026 }
1027 
1028 /*
1029  * Process one bzip2 block.
1030  * The interface is documented here:
1031  * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
1032  */
1033 static void
1034 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
1035 {
1036 	int rc = 0;
1037 	int doflush;
1038 	char *out;
1039 
1040 	if (!s->init) {
1041 		s->init = 1;
1042 		rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
1043 		if (rc != BZ_OK)
1044 			logprint(SC_SL_ERR | SC_EXIT_ERR,
1045 			    "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
1046 		if (s->blkbuf == NULL)
1047 			s->blkbuf = Zalloc(coreblksize);
1048 		s->strm.avail_out = 0;
1049 		s->state = STREAMSTART;
1050 	}
1051 	s->strm.next_in = block;
1052 	s->strm.avail_in = blocksz;
1053 
1054 	while (s->strm.avail_in > 0) {
1055 		switch (s->state) {
1056 		case STREAMSTART:
1057 			if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
1058 				return;
1059 			if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
1060 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1061 				    "BZ2 STREAMSTART: bad stream header");
1062 			if (s->sh.stream_npages > datahdr.dump_maxrange)
1063 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1064 				    "BZ2 STREAMSTART: bad range: %d > %d",
1065 				    s->sh.stream_npages, datahdr.dump_maxrange);
1066 			s->pagenum = s->sh.stream_pagenum;
1067 			s->npages = s->sh.stream_npages;
1068 			s->curpage = s->pagenum;
1069 			s->nout = 0;
1070 			s->done = 0;
1071 			s->state = STREAMPAGES;
1072 			break;
1073 		case STREAMPAGES:
1074 			out = s->blkbuf + PTOB(s->nout);
1075 			if (!bz2decompress(s, out, pagesize))
1076 				return;
1077 
1078 			atomic_inc_64(&saved);
1079 
1080 			doflush = 0;
1081 			if (s->nout == 0 && iszpage(out)) {
1082 				doflush = 1;
1083 				atomic_inc_64(&zpages);
1084 			} else if (++s->nout >= BTOP(coreblksize) ||
1085 			    isblkbnd(s->curpage + s->nout)) {
1086 				doflush = 1;
1087 			}
1088 			if (++s->done >= s->npages) {
1089 				s->state = STREAMSTART;
1090 				doflush = 1;
1091 			}
1092 			if (doflush) {
1093 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
1094 				s->nout = 0;
1095 				s->curpage = s->pagenum + s->done;
1096 			}
1097 			break;
1098 		}
1099 	}
1100 }
1101 
1102 /* report progress */
1103 static void
1104 report_progress(len_t done, len_t total)
1105 {
1106 	static uint_t sec_last, percent_last;
1107 	uint_t sec, percent;
1108 
1109 	if (!interactive)
1110 		return;
1111 
1112 	percent = done * 100LL / total;
1113 	sec = (gethrtime() - startts) / NANOSEC;
1114 	if (percent != percent_last || sec != sec_last) {
1115 		(void) printf("\r%2u:%02u %3u%% done", sec / 60, sec % 60,
1116 		    percent);
1117 		(void) fflush(stdout);
1118 		sec_last = sec;
1119 		percent_last = percent;
1120 	}
1121 }
1122 
1123 static void
1124 end_progress(len_t done, len_t total)
1125 {
1126 	report_progress(total, total);
1127 	(void) printf(": %lld of %lld pages saved\n", done, total);
1128 }
1129 
1130 /* thread body */
1131 static void *
1132 runstreams(void *arg)
1133 {
1134 	tinfo_t *t = arg;
1135 	stream_t *s;
1136 	block_t *b;
1137 	int bound;
1138 
1139 	(void) pthread_mutex_lock(&lock);
1140 	while (!threads_stop) {
1141 		bound = 0;
1142 		for (s = streams; s != endstreams; s++) {
1143 			if (s->bound || s->blocks.head == NULL)
1144 				continue;
1145 			s->bound = 1;
1146 			bound = 1;
1147 			(void) pthread_cond_signal(&cvwork);
1148 			while (s->blocks.head != NULL) {
1149 				b = deqh(&s->blocks);
1150 				(void) pthread_mutex_unlock(&lock);
1151 
1152 				if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
1153 					lzjbblock(t->corefd, s, b->block,
1154 					    b->size);
1155 				else
1156 					bz2block(t->corefd, s, b->block,
1157 					    b->size);
1158 
1159 				(void) pthread_mutex_lock(&lock);
1160 				enqt(&freeblocks, b);
1161 				(void) pthread_cond_signal(&cvfree);
1162 
1163 				report_progress(saved, corehdr.dump_npages);
1164 			}
1165 			s->bound = 0;
1166 			(void) pthread_cond_signal(&cvbarrier);
1167 		}
1168 		if (!bound && !threads_stop)
1169 			(void) pthread_cond_wait(&cvwork, &lock);
1170 	}
1171 	(void) close(t->corefd);
1172 	(void) pthread_cond_signal(&cvwork);
1173 	(void) pthread_mutex_unlock(&lock);
1174 	return (arg);
1175 }
1176 
1177 /*
1178  * Process compressed pages.
1179  *
1180  * The old format, now called single-threaded lzjb, is a 32-bit size
1181  * word followed by 'size' bytes of lzjb compression data for one
1182  * page. The new format extends this by storing a 12-bit "tag" in the
1183  * upper bits of the size word. When the size word is pagesize or
1184  * less, it is assumed to be one lzjb page. When the size word is
1185  * greater than pagesize, it is assumed to be a "stream block",
1186  * belonging to up to 4095 streams. In practice, the number of streams
1187  * is set to one less than the number of CPUs running at crash
1188  * time. One CPU processes the crash dump, the remaining CPUs
1189  * separately process groups of data pages.
1190  *
1191  * savecore creates a thread per stream, but never more threads than
1192  * the number of CPUs running savecore. This is because savecore can
1193  * be processing a crash file from a remote machine, which may have
1194  * more CPUs.
1195  *
1196  * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1197  * series of 128KB blocks of compression data. In this case, each
1198  * block has a "tag", in the range 1-4095. Each block is handed off to
1199  * to the threads running "runstreams". The dump format is either lzjb
1200  * or bzip2, never a mixture. These threads, in turn, process the
1201  * compression data for groups of pages. Groups of pages are delimited
1202  * by a "stream header", which indicates a starting pfn and number of
1203  * pages. When a stream block has been read, the condition variable
1204  * "cvwork" is signalled, which causes one of the avaiable threads to
1205  * wake up and process the stream.
1206  *
1207  * In the parallel case there will be streams blocks encoding all data
1208  * pages. The stream of blocks is terminated by a zero size
1209  * word. There can be a few lzjb pages tacked on the end, depending on
1210  * the architecture. The sbarrier function ensures that all stream
1211  * blocks have been processed so that the page number for the few
1212  * single pages at the end can be known.
1213  */
1214 static void
1215 decompress_pages(int corefd)
1216 {
1217 	char *cpage = NULL;
1218 	char *dpage = NULL;
1219 	char *out;
1220 	pgcnt_t curpage = 0;
1221 	block_t *b;
1222 	FILE *dumpf;
1223 	FILE *tracef = NULL;
1224 	stream_t *s;
1225 	size_t dsize;
1226 	size_t insz = FBUFSIZE;
1227 	char *inbuf = Zalloc(insz);
1228 	uint32_t csize;
1229 	dumpcsize_t dcsize;
1230 	int nstreams = datahdr.dump_nstreams;
1231 	int maxcsize = datahdr.dump_maxcsize;
1232 	int nout = 0, tag, doflush;
1233 
1234 	dumpf = fdopen(dup(dumpfd), "rb");
1235 	if (dumpf == NULL)
1236 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s",
1237 		    strerror(errno));
1238 
1239 	(void) setvbuf(dumpf, inbuf, _IOFBF, insz);
1240 	Fseek(dumphdr.dump_data, dumpf);
1241 
1242 	/*LINTED: E_CONSTANT_CONDITION*/
1243 	while (1) {
1244 
1245 		/*
1246 		 * The csize word delimits stream blocks.
1247 		 * See dumphdr.h for a description.
1248 		 */
1249 		Fread(&dcsize, sizeof (dcsize), dumpf);
1250 
1251 		tag = DUMP_GET_TAG(dcsize);
1252 		csize = DUMP_GET_CSIZE(dcsize);
1253 
1254 		if (tag != 0) {		/* a stream block */
1255 
1256 			if (nstreams == 0)
1257 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1258 				    "starting data header is missing");
1259 
1260 			if (tag > nstreams)
1261 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1262 				    "stream tag %d not in range 1..%d",
1263 				    tag, nstreams);
1264 
1265 			if (csize > maxcsize)
1266 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1267 				    "block size 0x%x > max csize 0x%x",
1268 				    csize, maxcsize);
1269 
1270 			if (streams == NULL)
1271 				initstreams(corefd, nstreams, maxcsize);
1272 			s = &streams[tag - 1];
1273 			s->tag = tag;
1274 
1275 			b = getfreeblock();
1276 			b->size = csize;
1277 			Fread(b->block, csize, dumpf);
1278 
1279 			(void) pthread_mutex_lock(&lock);
1280 			enqt(&s->blocks, b);
1281 			if (!s->bound)
1282 				(void) pthread_cond_signal(&cvwork);
1283 			(void) pthread_mutex_unlock(&lock);
1284 
1285 		} else if (csize > 0) {		/* one lzjb page */
1286 
1287 			if (csize > pagesize)
1288 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1289 				    "csize 0x%x > pagesize 0x%x",
1290 				    csize, pagesize);
1291 
1292 			if (cpage == NULL)
1293 				cpage = Zalloc(pagesize);
1294 			if (dpage == NULL) {
1295 				dpage = Zalloc(coreblksize);
1296 				nout = 0;
1297 			}
1298 
1299 			Fread(cpage, csize, dumpf);
1300 
1301 			out = dpage + PTOB(nout);
1302 			dsize = decompress(cpage, out, csize, pagesize);
1303 
1304 			if (dsize != pagesize)
1305 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1306 				    "dsize 0x%x != pagesize 0x%x",
1307 				    dsize, pagesize);
1308 
1309 			/*
1310 			 * wait for streams to flush so that 'saved' is correct
1311 			 */
1312 			if (threads_active)
1313 				sbarrier();
1314 
1315 			doflush = 0;
1316 			if (nout == 0)
1317 				curpage = saved;
1318 
1319 			atomic_inc_64(&saved);
1320 
1321 			if (nout == 0 && iszpage(dpage)) {
1322 				doflush = 1;
1323 				atomic_inc_64(&zpages);
1324 			} else if (++nout >= BTOP(coreblksize) ||
1325 			    isblkbnd(curpage + nout) ||
1326 			    saved >= dumphdr.dump_npages) {
1327 				doflush = 1;
1328 			}
1329 
1330 			if (doflush) {
1331 				putpage(corefd, dpage, curpage, nout);
1332 				nout = 0;
1333 			}
1334 
1335 			report_progress(saved, corehdr.dump_npages);
1336 
1337 			/*
1338 			 * Non-streams lzjb does not use blocks.  Stop
1339 			 * here if all the pages have been decompressed.
1340 			 */
1341 			if (saved >= dumphdr.dump_npages)
1342 				break;
1343 
1344 		} else {
1345 			break;			/* end of data */
1346 		}
1347 	}
1348 
1349 	stopstreams();
1350 	if (tracef != NULL)
1351 		(void) fclose(tracef);
1352 	(void) fclose(dumpf);
1353 	if (inbuf)
1354 		free(inbuf);
1355 	if (cpage)
1356 		free(cpage);
1357 	if (dpage)
1358 		free(dpage);
1359 	if (streams)
1360 		free(streams);
1361 }
1362 
1363 static void
1364 build_corefile(const char *namelist, const char *corefile)
1365 {
1366 	size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1367 	size_t ksyms_size = dumphdr.dump_ksyms_size;
1368 	size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1369 	pfn_t *pfn_table;
1370 	char *ksyms_base = Zalloc(ksyms_size);
1371 	char *ksyms_cbase = Zalloc(ksyms_csize);
1372 	size_t ksyms_dsize;
1373 	Stat_t st;
1374 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1375 	int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1376 
1377 	(void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1378 
1379 	/*
1380 	 * Determine the optimum write size for the core file
1381 	 */
1382 	Fstat(corefd, &st, corefile);
1383 
1384 	if (verbose > 1)
1385 		(void) printf("%s: %ld block size\n", corefile,
1386 		    (long)st.st_blksize);
1387 	coreblksize = st.st_blksize;
1388 	if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1389 		coreblksize = MINCOREBLKSIZE;
1390 
1391 	hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1392 
1393 	/*
1394 	 * This dump file is now uncompressed
1395 	 */
1396 	corehdr.dump_flags &= ~DF_COMPRESSED;
1397 
1398 	/*
1399 	 * Read in the compressed symbol table, copy it to corefile,
1400 	 * decompress it, and write the result to namelist.
1401 	 */
1402 	corehdr.dump_ksyms = pagesize;
1403 	Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1404 	Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1405 
1406 	ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1407 	    ksyms_size);
1408 	if (ksyms_dsize != ksyms_size)
1409 		logprint(SC_SL_WARN,
1410 		    "bad data in symbol table, %lu of %lu bytes saved",
1411 		    ksyms_dsize, ksyms_size);
1412 
1413 	Pwrite(namefd, ksyms_base, ksyms_size, 0);
1414 	(void) close(namefd);
1415 	free(ksyms_cbase);
1416 	free(ksyms_base);
1417 
1418 	(void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1419 
1420 	/*
1421 	 * Read in and write out the pfn table.
1422 	 */
1423 	pfn_table = Zalloc(pfn_table_size);
1424 	corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1425 	Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1426 	Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1427 
1428 	/*
1429 	 * Convert the raw translation data into a hashed dump map.
1430 	 */
1431 	corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1432 	build_dump_map(corefd, pfn_table);
1433 	free(pfn_table);
1434 
1435 	/*
1436 	 * Decompress the pages
1437 	 */
1438 	decompress_pages(corefd);
1439 	end_progress(saved, dumphdr.dump_npages);
1440 
1441 	if (verbose)
1442 		(void) printf("%ld (%ld%%) zero pages were not written\n",
1443 		    (pgcnt_t)zpages, (pgcnt_t)zpages * 100 /
1444 		    dumphdr.dump_npages);
1445 
1446 	if (saved != dumphdr.dump_npages)
1447 		logprint(SC_SL_WARN, "bad data after page %ld", saved);
1448 
1449 	/*
1450 	 * Write out the modified dump headers.
1451 	 */
1452 	Pwrite(corefd, &corehdr, sizeof (corehdr), 0);
1453 	if (!filemode && !rflag)
1454 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
1455 
1456 	(void) close(corefd);
1457 }
1458 
1459 /*
1460  * When the system panics, the kernel saves all undelivered messages (messages
1461  * that never made it out to syslogd(8)) in the dump.  At a mimimum, the
1462  * panic message itself will always fall into this category.  Upon reboot,
1463  * the syslog startup script runs savecore -m to recover these messages.
1464  *
1465  * To do this, we read the unsent messages from the dump and send them to
1466  * /dev/conslog on priority band 1.  This has the effect of prepending them
1467  * to any already-accumulated messages in the console backlog, thus preserving
1468  * temporal ordering across the reboot.
1469  *
1470  * Note: since savecore -m is used *only* for this purpose, it does *not*
1471  * attempt to save the crash dump.  The dump will be saved later, after
1472  * syslogd(8) starts, by the savecore startup script.
1473  */
1474 static int
1475 message_save(void)
1476 {
1477 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE);
1478 	offset_t ldoff;
1479 	log_dump_t ld;
1480 	log_ctl_t lc;
1481 	struct strbuf ctl, dat;
1482 	int logfd;
1483 
1484 	logfd = Open("/dev/conslog", O_WRONLY, 0644);
1485 	dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1486 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1487 
1488 	ctl.buf = (void *)&lc;
1489 	ctl.len = sizeof (log_ctl_t);
1490 
1491 	dat.buf = Zalloc(DUMP_LOGSIZE);
1492 
1493 	for (;;) {
1494 		ldoff = dumpoff;
1495 
1496 		Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff);
1497 		dumpoff += sizeof (log_dump_t);
1498 		dat.len = ld.ld_msgsize;
1499 
1500 		if (ld.ld_magic == 0)
1501 			break;
1502 
1503 		if (ld.ld_magic != LOG_MAGIC)
1504 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1505 			    "bad magic %x", ld.ld_magic);
1506 
1507 		if (dat.len >= DUMP_LOGSIZE)
1508 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1509 			    "bad size %d", ld.ld_msgsize);
1510 
1511 		Pread(dumpfd, ctl.buf, ctl.len, dumpoff);
1512 		dumpoff += ctl.len;
1513 
1514 		if (ld.ld_csum != checksum32(ctl.buf, ctl.len))
1515 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1516 			    "bad log_ctl checksum");
1517 
1518 		lc.flags |= SL_LOGONLY;
1519 
1520 		Pread(dumpfd, dat.buf, dat.len, dumpoff);
1521 		dumpoff += dat.len;
1522 
1523 		if (ld.ld_msum != checksum32(dat.buf, dat.len))
1524 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1525 			    "bad message checksum");
1526 
1527 		if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1)
1528 			logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s",
1529 			    strerror(errno));
1530 
1531 		ld.ld_magic = 0;	/* clear magic so we never save twice */
1532 		Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff);
1533 	}
1534 	return (0);
1535 }
1536 
1537 static long
1538 getbounds(const char *f)
1539 {
1540 	long b = -1;
1541 	const char *p = strrchr(f, '/');
1542 
1543 	if (p == NULL || strncmp(p, "vmdump", 6) != 0)
1544 		p = strstr(f, "vmdump");
1545 
1546 	if (p != NULL && *p == '/')
1547 		p++;
1548 
1549 	(void) sscanf(p ? p : f, "vmdump.%ld", &b);
1550 
1551 	return (b);
1552 }
1553 
1554 static void
1555 stack_retrieve(char *stack)
1556 {
1557 	summary_dump_t sd;
1558 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE +
1559 	    DUMP_ERPTSIZE);
1560 	dumpoff -= DUMP_SUMMARYSIZE;
1561 
1562 	if (rflag)
1563 		dumpfd = Open(dumpfile, O_RDONLY, 0644);
1564 	else
1565 		dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1566 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1567 
1568 	Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff);
1569 	dumpoff += sizeof (summary_dump_t);
1570 
1571 	if (sd.sd_magic == 0) {
1572 		*stack = '\0';
1573 		return;
1574 	}
1575 
1576 	if (sd.sd_magic != SUMMARY_MAGIC) {
1577 		*stack = '\0';
1578 		logprint(SC_SL_NONE | SC_IF_VERBOSE,
1579 		    "bad summary magic %x", sd.sd_magic);
1580 		return;
1581 	}
1582 	Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1583 	if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1584 		logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1585 }
1586 
1587 static void
1588 raise_event(enum sc_event_type evidx, char *warn_string)
1589 {
1590 	uint32_t pl = sc_event[evidx].sce_payload;
1591 	char panic_stack[STACK_BUF_SIZE];
1592 	nvlist_t *attr = NULL;
1593 	char uuidbuf[36 + 1];
1594 	int err = 0;
1595 
1596 	if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1597 		goto publish;	/* try to send payload-free event */
1598 
1599 	if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1600 		err |= nvlist_add_string(attr, "dumpdir", savedir);
1601 
1602 	if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1603 		err |= nvlist_add_int64(attr, "instance", bounds);
1604 
1605 	if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1606 		err |= nvlist_add_boolean_value(attr, "compressed",
1607 		    csave ? B_TRUE : B_FALSE);
1608 	}
1609 
1610 	if (pl & SC_PAYLOAD_DUMPADM_EN) {
1611 		char *disabled = defread("DUMPADM_ENABLE=no");
1612 
1613 		err |= nvlist_add_boolean_value(attr, "savecore-enabled",
1614 		    disabled ? B_FALSE : B_TRUE);
1615 	}
1616 
1617 	if (pl & SC_PAYLOAD_IMAGEUUID) {
1618 		(void) strncpy(uuidbuf, corehdr.dump_uuid, 36);
1619 		uuidbuf[36] = '\0';
1620 		err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf);
1621 	}
1622 
1623 	if (pl & SC_PAYLOAD_CRASHTIME) {
1624 		err |= nvlist_add_int64(attr, "crashtime",
1625 		    (int64_t)corehdr.dump_crashtime);
1626 	}
1627 
1628 	if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') {
1629 		err |= nvlist_add_string(attr, "panicstr",
1630 		    corehdr.dump_panicstring);
1631 	}
1632 
1633 	if (pl & SC_PAYLOAD_PANICSTACK) {
1634 		stack_retrieve(panic_stack);
1635 
1636 		if (panic_stack[0] != '\0') {
1637 			/*
1638 			 * The summary page may not be present if the dump
1639 			 * was previously recorded compressed.
1640 			 */
1641 			(void) nvlist_add_string(attr, "panicstack",
1642 			    panic_stack);
1643 		}
1644 	}
1645 
1646 	/* add warning string if this is an ireport for dump failure */
1647 	if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL)
1648 		(void) nvlist_add_string(attr, "failure-reason", warn_string);
1649 
1650 	if (pl & SC_PAYLOAD_DUMPCOMPLETE)
1651 		err |= nvlist_add_boolean_value(attr, "dump-incomplete",
1652 		    dump_incomplete ? B_TRUE : B_FALSE);
1653 
1654 	if (pl & SC_PAYLOAD_FM_PANIC) {
1655 		err |= nvlist_add_boolean_value(attr, "fm-panic",
1656 		    fm_panic ? B_TRUE : B_FALSE);
1657 	}
1658 
1659 	if (pl & SC_PAYLOAD_JUSTCHECKING) {
1660 		err |= nvlist_add_boolean_value(attr, "will-attempt-savecore",
1661 		    cflag ? B_FALSE : B_TRUE);
1662 	}
1663 
1664 	if (err)
1665 		logprint(SC_SL_WARN, "Errors while constructing '%s' "
1666 		    "event payload; will try to publish anyway.");
1667 publish:
1668 	if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1669 	    "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1670 	    attr) != FMEV_SUCCESS) {
1671 		logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1672 		    sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1673 		nvlist_free(attr);
1674 	}
1675 
1676 }
1677 
1678 
1679 int
1680 main(int argc, char *argv[])
1681 {
1682 	int i, c, bfd;
1683 	Stat_t st;
1684 	struct rlimit rl;
1685 	long filebounds = -1;
1686 	char namelist[30], corefile[30], boundstr[30];
1687 	dumpfile = NULL;
1688 
1689 	startts = gethrtime();
1690 
1691 	(void) getrlimit(RLIMIT_NOFILE, &rl);
1692 	rl.rlim_cur = rl.rlim_max;
1693 	(void) setrlimit(RLIMIT_NOFILE, &rl);
1694 
1695 	openlog(progname, LOG_ODELAY, LOG_AUTH);
1696 
1697 	(void) defopen("/etc/dumpadm.conf");
1698 	savedir = defread("DUMPADM_SAVDIR=");
1699 	if (savedir != NULL)
1700 		savedir = strdup(savedir);
1701 
1702 	while ((c = getopt(argc, argv, "Lvcdmf:r")) != EOF) {
1703 		switch (c) {
1704 		case 'L':
1705 			livedump++;
1706 			break;
1707 		case 'v':
1708 			verbose++;
1709 			break;
1710 		case 'c':
1711 			cflag++;
1712 			break;
1713 		case 'd':
1714 			disregard_valid_flag++;
1715 			break;
1716 		case 'm':
1717 			mflag++;
1718 			break;
1719 		case 'r':
1720 			rflag++;
1721 			break;
1722 		case 'f':
1723 			dumpfile = optarg;
1724 			filebounds = getbounds(dumpfile);
1725 			break;
1726 		case '?':
1727 			usage();
1728 		}
1729 	}
1730 
1731 	/*
1732 	 * If doing something other than extracting an existing dump (i.e.
1733 	 * dumpfile has been provided as an option), the user must be root.
1734 	 */
1735 	if (geteuid() != 0 && dumpfile == NULL) {
1736 		(void) fprintf(stderr, "%s: %s %s\n", progname,
1737 		    gettext("you must be root to use"), progname);
1738 		exit(1);
1739 	}
1740 
1741 	interactive = isatty(STDOUT_FILENO);
1742 
1743 	if (cflag && livedump)
1744 		usage();
1745 
1746 	if (rflag && (cflag || mflag || livedump))
1747 		usage();
1748 
1749 	if (dumpfile == NULL || livedump)
1750 		dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1751 
1752 	if (dumpfile == NULL) {
1753 		dumpfile = Zalloc(MAXPATHLEN);
1754 		if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {
1755 			have_dumpfile = B_FALSE;
1756 			logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1757 			    "no dump device configured");
1758 		}
1759 	}
1760 
1761 	if (mflag)
1762 		return (message_save());
1763 
1764 	if (optind == argc - 1)
1765 		savedir = argv[optind];
1766 
1767 	if (savedir == NULL || optind < argc - 1)
1768 		usage();
1769 
1770 	if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
1771 		logprint(SC_SL_NONE | SC_EXIT_ERR,
1772 		    "dedicated dump device required");
1773 
1774 	(void) close(dumpfd);
1775 	dumpfd = -1;
1776 
1777 	Stat(dumpfile, &st);
1778 
1779 	filemode = S_ISREG(st.st_mode);
1780 
1781 	if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1782 		csave = 1;
1783 
1784 	read_dumphdr();
1785 
1786 	/*
1787 	 * We want this message to go to the log file, but not the console.
1788 	 * There's no good way to do that with the existing syslog facility.
1789 	 * We could extend it to handle this, but there doesn't seem to be
1790 	 * a general need for it, so we isolate the complexity here instead.
1791 	 */
1792 	if (dumphdr.dump_panicstring[0] != '\0' && !rflag) {
1793 		int logfd = Open("/dev/conslog", O_WRONLY, 0644);
1794 		log_ctl_t lc;
1795 		struct strbuf ctl, dat;
1796 		char msg[DUMP_PANICSIZE + 100];
1797 		char fmt[] = "reboot after panic: %s";
1798 		uint32_t msgid;
1799 
1800 		STRLOG_MAKE_MSGID(fmt, msgid);
1801 
1802 		/* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
1803 		(void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ",
1804 		    progname, msgid);
1805 		/* LINTED: E_SEC_PRINTF_VAR_FMT */
1806 		(void) sprintf(msg + strlen(msg), fmt,
1807 		    dumphdr.dump_panicstring);
1808 
1809 		lc.pri = LOG_AUTH | LOG_ERR;
1810 		lc.flags = SL_CONSOLE | SL_LOGONLY;
1811 		lc.level = 0;
1812 
1813 		ctl.buf = (void *)&lc;
1814 		ctl.len = sizeof (log_ctl_t);
1815 
1816 		dat.buf = (void *)msg;
1817 		dat.len = strlen(msg) + 1;
1818 
1819 		(void) putmsg(logfd, &ctl, &dat, 0);
1820 		(void) close(logfd);
1821 	}
1822 
1823 	if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1824 		logprint(SC_SL_WARN, "incomplete dump on dump device");
1825 		dump_incomplete = B_TRUE;
1826 	}
1827 
1828 	if (dumphdr.dump_fm_panic)
1829 		fm_panic = B_TRUE;
1830 
1831 	/*
1832 	 * We have a valid dump on a dump device and know as much about
1833 	 * it as we're going to at this stage.  Raise an event for
1834 	 * logging and so that FMA can open a case for this panic.
1835 	 * Avoid this step for FMA-initiated panics - FMA will replay
1836 	 * ereports off the dump device independently of savecore and
1837 	 * will make a diagnosis, so we don't want to open two cases
1838 	 * for the same event.  Also avoid raising an event for a
1839 	 * livedump, or when we inflating a compressed dump.
1840 	 */
1841 	if (!fm_panic && !livedump && !filemode && !rflag)
1842 		raise_event(SC_EVENT_DUMP_PENDING, NULL);
1843 
1844 	logprint(SC_SL_WARN, "System dump time: %s",
1845 	    ctime(&dumphdr.dump_crashtime));
1846 
1847 	/*
1848 	 * Option -c is designed for use from svc-dumpadm where we know
1849 	 * that dumpadm -n is in effect but run savecore -c just to
1850 	 * get the above dump_pending_on_device event raised.  If it is run
1851 	 * interactively then just print further panic details.
1852 	 */
1853 	if (cflag) {
1854 		char *disabled = defread("DUMPADM_ENABLE=no");
1855 		int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1856 		int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1857 
1858 		logprint(lvl | ec,
1859 		    "Panic crashdump pending on dump device%s "
1860 		    "run savecore(8) manually to extract. "
1861 		    "Image UUID %s%s.",
1862 		    disabled ? " but dumpadm -n in effect;" : ";",
1863 		    corehdr.dump_uuid,
1864 		    fm_panic ?  "(fault-management initiated)" : "");
1865 		/*NOTREACHED*/
1866 	}
1867 
1868 	if (chdir(savedir) == -1)
1869 		logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1870 		    savedir, strerror(errno));
1871 
1872 	check_space(csave);
1873 
1874 	if (filebounds < 0)
1875 		bounds = read_number_from_file("bounds", 0);
1876 	else
1877 		bounds = filebounds;
1878 
1879 	if (csave) {
1880 		size_t metrics_size = datahdr.dump_metrics;
1881 
1882 		(void) sprintf(corefile, "vmdump.%ld", bounds);
1883 
1884 		datahdr.dump_metrics = 0;
1885 
1886 		logprint(SC_SL_ERR,
1887 		    "Saving compressed system crash dump in %s/%s",
1888 		    savedir, corefile);
1889 
1890 		copy_crashfile(corefile);
1891 
1892 		/*
1893 		 * Raise a fault management event that indicates the system
1894 		 * has panicked. We know a reasonable amount about the
1895 		 * condition at this time, but the dump is still compressed.
1896 		 */
1897 		if (!livedump && !fm_panic && !rflag)
1898 			raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1899 
1900 		if (metrics_size > 0) {
1901 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1902 			FILE *mfile = fopen(METRICSFILE, "a");
1903 			char *metrics = Zalloc(metrics_size + 1);
1904 
1905 			Pread(dumpfd, metrics, metrics_size, endoff +
1906 			    sizeof (dumphdr) + sizeof (datahdr));
1907 
1908 			if (sec < 1)
1909 				sec = 1;
1910 
1911 			if (mfile == NULL) {
1912 				logprint(SC_SL_WARN,
1913 				    "Can't create %s:\n%s",
1914 				    METRICSFILE, metrics);
1915 			} else {
1916 				(void) fprintf(mfile, "[[[[,,,");
1917 				for (i = 0; i < argc; i++)
1918 					(void) fprintf(mfile, "%s ", argv[i]);
1919 				(void) fprintf(mfile, "\n");
1920 				(void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1921 				    dumphdr.dump_utsname.sysname,
1922 				    dumphdr.dump_utsname.nodename,
1923 				    dumphdr.dump_utsname.release,
1924 				    dumphdr.dump_utsname.version,
1925 				    dumphdr.dump_utsname.machine);
1926 				(void) fprintf(mfile, ",,,%s dump time %s\n",
1927 				    dumphdr.dump_flags & DF_LIVE ? "Live" :
1928 				    "Crash", ctime(&dumphdr.dump_crashtime));
1929 				(void) fprintf(mfile, ",,,%s/%s\n", savedir,
1930 				    corefile);
1931 				(void) fprintf(mfile, "Metrics:\n%s\n",
1932 				    metrics);
1933 				(void) fprintf(mfile, "Copy pages,%ld\n",
1934 				    dumphdr.dump_npages);
1935 				(void) fprintf(mfile, "Copy time,%d\n", sec);
1936 				(void) fprintf(mfile, "Copy pages/sec,%ld\n",
1937 				    dumphdr.dump_npages / sec);
1938 				(void) fprintf(mfile, "]]]]\n");
1939 				(void) fclose(mfile);
1940 			}
1941 			free(metrics);
1942 		}
1943 
1944 		logprint(SC_SL_ERR,
1945 		    "Decompress the crash dump with "
1946 		    "\n'savecore -vf %s/%s'",
1947 		    savedir, corefile);
1948 
1949 	} else {
1950 		(void) sprintf(namelist, "unix.%ld", bounds);
1951 		(void) sprintf(corefile, "vmcore.%ld", bounds);
1952 
1953 		if (interactive && filebounds >= 0 && access(corefile, F_OK)
1954 		    == 0)
1955 			logprint(SC_SL_NONE | SC_EXIT_ERR,
1956 			    "%s already exists: remove with "
1957 			    "'rm -f %s/{unix,vmcore}.%ld'",
1958 			    corefile, savedir, bounds);
1959 
1960 		logprint(SC_SL_ERR,
1961 		    "saving system crash dump in %s/{unix,vmcore}.%ld",
1962 		    savedir, bounds);
1963 
1964 		build_corefile(namelist, corefile);
1965 
1966 		if (!livedump && !filemode && !fm_panic && !rflag)
1967 			raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1968 
1969 		if (access(METRICSFILE, F_OK) == 0) {
1970 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1971 			FILE *mfile = fopen(METRICSFILE, "a");
1972 
1973 			if (sec < 1)
1974 				sec = 1;
1975 
1976 			if (mfile == NULL) {
1977 				logprint(SC_SL_WARN,
1978 				    "Can't create %s: %s",
1979 				    METRICSFILE, strerror(errno));
1980 			} else {
1981 				(void) fprintf(mfile, "[[[[,,,");
1982 				for (i = 0; i < argc; i++)
1983 					(void) fprintf(mfile, "%s ", argv[i]);
1984 				(void) fprintf(mfile, "\n");
1985 				(void) fprintf(mfile, ",,,%s/%s\n", savedir,
1986 				    corefile);
1987 				(void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1988 				    dumphdr.dump_utsname.sysname,
1989 				    dumphdr.dump_utsname.nodename,
1990 				    dumphdr.dump_utsname.release,
1991 				    dumphdr.dump_utsname.version,
1992 				    dumphdr.dump_utsname.machine);
1993 				(void) fprintf(mfile,
1994 				    "Uncompress pages,%"PRIu64"\n", saved);
1995 				(void) fprintf(mfile, "Uncompress time,%d\n",
1996 				    sec);
1997 				(void) fprintf(mfile, "Uncompress pages/sec,%"
1998 				    PRIu64"\n", saved / sec);
1999 				(void) fprintf(mfile, "]]]]\n");
2000 				(void) fclose(mfile);
2001 			}
2002 		}
2003 	}
2004 
2005 	if (filebounds < 0) {
2006 		(void) sprintf(boundstr, "%ld\n", bounds + 1);
2007 		bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
2008 		Pwrite(bfd, boundstr, strlen(boundstr), 0);
2009 		(void) close(bfd);
2010 	}
2011 
2012 	if (verbose) {
2013 		int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
2014 
2015 		(void) printf("%d:%02d dump %s is done\n",
2016 		    sec / 60, sec % 60,
2017 		    csave ? "copy" : "decompress");
2018 	}
2019 
2020 	if (verbose > 1 && hist != NULL) {
2021 		int i, nw;
2022 
2023 		for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
2024 			nw += hist[i] * i;
2025 		(void) printf("pages count     %%\n");
2026 		for (i = 0; i <= BTOP(coreblksize); ++i) {
2027 			if (hist[i] == 0)
2028 				continue;
2029 			(void) printf("%3d   %5u  %6.2f\n",
2030 			    i, hist[i], 100.0 * hist[i] * i / nw);
2031 		}
2032 	}
2033 
2034 	(void) close(dumpfd);
2035 	dumpfd = -1;
2036 
2037 	return (0);
2038 }
2039