xref: /illumos-gate/usr/src/cmd/savecore/savecore.c (revision 843c398e8904ed9d833d2af3103894f909fb4b52)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2016 Joyent, Inc.
24  */
25 /*
26  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <deflt.h>
37 #include <time.h>
38 #include <syslog.h>
39 #include <stropts.h>
40 #include <pthread.h>
41 #include <limits.h>
42 #include <atomic.h>
43 #include <libnvpair.h>
44 #include <libintl.h>
45 #include <sys/mem.h>
46 #include <sys/statvfs.h>
47 #include <sys/dumphdr.h>
48 #include <sys/dumpadm.h>
49 #include <sys/compress.h>
50 #include <sys/panic.h>
51 #include <sys/sysmacros.h>
52 #include <sys/stat.h>
53 #include <sys/resource.h>
54 #include <bzip2/bzlib.h>
55 #include <sys/fm/util.h>
56 #include <fm/libfmevent.h>
57 #include <sys/int_fmtio.h>
58 
59 
60 /* fread/fwrite buffer size */
61 #define	FBUFSIZE		(1ULL << 20)
62 
63 /* minimum size for output buffering */
64 #define	MINCOREBLKSIZE		(1ULL << 17)
65 
66 /* create this file if metrics collection is enabled in the kernel */
67 #define	METRICSFILE "METRICS.csv"
68 
69 static char	progname[9] = "savecore";
70 static char	*savedir;		/* savecore directory */
71 static char	*dumpfile;		/* source of raw crash dump */
72 static long	bounds = -1;		/* numeric suffix */
73 static long	pagesize;		/* dump pagesize */
74 static int	dumpfd = -1;		/* dumpfile descriptor */
75 static dumphdr_t corehdr, dumphdr;	/* initial and terminal dumphdrs */
76 static boolean_t dump_incomplete;	/* dumphdr indicates incomplete */
77 static boolean_t fm_panic;		/* dump is the result of fm_panic */
78 static offset_t	endoff;			/* offset of end-of-dump header */
79 static int	verbose;		/* chatty mode */
80 static int	disregard_valid_flag;	/* disregard valid flag */
81 static int	livedump;		/* dump the current running system */
82 static int	interactive;		/* user invoked; no syslog */
83 static int	csave;			/* save dump compressed */
84 static int	filemode;		/* processing file, not dump device */
85 static int	percent_done;		/* progress indicator */
86 static int	sec_done;		/* progress last report time */
87 static hrtime_t	startts;		/* timestamp at start */
88 static volatile uint64_t saved;		/* count of pages written */
89 static volatile uint64_t zpages;	/* count of zero pages not written */
90 static dumpdatahdr_t datahdr;		/* compression info */
91 static long	coreblksize;		/* preferred write size (st_blksize) */
92 static int	cflag;			/* run as savecore -c */
93 static int	mflag;			/* run as savecore -m */
94 
95 /*
96  * Payload information for the events we raise.  These are used
97  * in raise_event to determine what payload to include.
98  */
99 #define	SC_PAYLOAD_SAVEDIR	0x0001	/* Include savedir in event */
100 #define	SC_PAYLOAD_INSTANCE	0x0002	/* Include bounds instance number */
101 #define	SC_PAYLOAD_IMAGEUUID	0x0004	/* Include dump OS instance uuid */
102 #define	SC_PAYLOAD_CRASHTIME	0x0008	/* Include epoch crashtime */
103 #define	SC_PAYLOAD_PANICSTR	0x0010	/* Include panic string */
104 #define	SC_PAYLOAD_PANICSTACK	0x0020	/* Include panic string */
105 #define	SC_PAYLOAD_FAILREASON	0x0040	/* Include failure reason */
106 #define	SC_PAYLOAD_DUMPCOMPLETE	0x0080	/* Include completeness indicator */
107 #define	SC_PAYLOAD_ISCOMPRESSED	0x0100	/* Dump is in vmdump.N form */
108 #define	SC_PAYLOAD_DUMPADM_EN	0x0200	/* Is dumpadm enabled or not? */
109 #define	SC_PAYLOAD_FM_PANIC	0x0400	/* Panic initiated by FMA */
110 #define	SC_PAYLOAD_JUSTCHECKING	0x0800	/* Run with -c flag? */
111 
112 enum sc_event_type {
113 	SC_EVENT_DUMP_PENDING,
114 	SC_EVENT_SAVECORE_FAILURE,
115 	SC_EVENT_DUMP_AVAILABLE
116 };
117 
118 /*
119  * Common payload
120  */
121 #define	_SC_PAYLOAD_CMN \
122     SC_PAYLOAD_IMAGEUUID | \
123     SC_PAYLOAD_CRASHTIME | \
124     SC_PAYLOAD_PANICSTR | \
125     SC_PAYLOAD_PANICSTACK | \
126     SC_PAYLOAD_DUMPCOMPLETE | \
127     SC_PAYLOAD_FM_PANIC | \
128     SC_PAYLOAD_SAVEDIR
129 
130 static const struct {
131 	const char *sce_subclass;
132 	uint32_t sce_payload;
133 } sc_event[] = {
134 	/*
135 	 * SC_EVENT_DUMP_PENDING
136 	 */
137 	{
138 		"dump_pending_on_device",
139 		_SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN |
140 		    SC_PAYLOAD_JUSTCHECKING
141 	},
142 
143 	/*
144 	 * SC_EVENT_SAVECORE_FAILURE
145 	 */
146 	{
147 		"savecore_failure",
148 		_SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON
149 	},
150 
151 	/*
152 	 * SC_EVENT_DUMP_AVAILABLE
153 	 */
154 	{
155 		"dump_available",
156 		_SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED
157 	},
158 };
159 
160 static void raise_event(enum sc_event_type, char *);
161 
162 static void
163 usage(void)
164 {
165 	(void) fprintf(stderr,
166 	    "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname);
167 	exit(1);
168 }
169 
170 #define	SC_SL_NONE	0x0001	/* no syslog */
171 #define	SC_SL_ERR	0x0002	/* syslog if !interactive, LOG_ERR */
172 #define	SC_SL_WARN	0x0004	/* syslog if !interactive, LOG_WARNING */
173 #define	SC_IF_VERBOSE	0x0008	/* message only if -v */
174 #define	SC_IF_ISATTY	0x0010	/* message only if interactive */
175 #define	SC_EXIT_OK	0x0020	/* exit(0) */
176 #define	SC_EXIT_ERR	0x0040	/* exit(1) */
177 #define	SC_EXIT_PEND	0x0080	/* exit(2) */
178 #define	SC_EXIT_FM	0x0100	/* exit(3) */
179 
180 #define	_SC_ALLEXIT	(SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM)
181 
182 static void
183 logprint(uint32_t flags, char *message, ...)
184 {
185 	va_list args;
186 	char buf[1024];
187 	int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0);
188 	int do_ifverb = (flags & SC_IF_VERBOSE) && verbose;
189 	int do_ifisatty = (flags & SC_IF_ISATTY) && interactive;
190 	int code;
191 	static int logprint_raised = 0;
192 
193 	if (do_always || do_ifverb || do_ifisatty) {
194 		va_start(args, message);
195 		/*LINTED: E_SEC_PRINTF_VAR_FMT*/
196 		(void) vsnprintf(buf, sizeof (buf), message, args);
197 		(void) fprintf(stderr, "%s: %s\n", progname, buf);
198 		if (!interactive) {
199 			switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) {
200 			case SC_SL_ERR:
201 				/*LINTED: E_SEC_PRINTF_VAR_FMT*/
202 				syslog(LOG_ERR, buf);
203 				break;
204 
205 			case SC_SL_WARN:
206 				/*LINTED: E_SEC_PRINTF_VAR_FMT*/
207 				syslog(LOG_WARNING, buf);
208 				break;
209 
210 			default:
211 				break;
212 			}
213 		}
214 		va_end(args);
215 	}
216 
217 	switch (flags & _SC_ALLEXIT) {
218 	case 0:
219 		return;
220 
221 	case SC_EXIT_OK:
222 		code = 0;
223 		break;
224 
225 	case SC_EXIT_PEND:
226 		/*
227 		 * Raise an ireport saying why we are exiting.  Do not
228 		 * raise if run as savecore -m.  If something in the
229 		 * raise_event codepath calls logprint avoid recursion.
230 		 */
231 		if (!mflag && logprint_raised++ == 0)
232 			raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
233 		code = 2;
234 		break;
235 
236 	case SC_EXIT_FM:
237 		code = 3;
238 		break;
239 
240 	case SC_EXIT_ERR:
241 	default:
242 		if (!mflag && logprint_raised++ == 0)
243 			raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
244 		code = 1;
245 		break;
246 	}
247 
248 	exit(code);
249 }
250 
251 /*
252  * System call / libc wrappers that exit on error.
253  */
254 static int
255 Open(const char *name, int oflags, mode_t mode)
256 {
257 	int fd;
258 
259 	if ((fd = open64(name, oflags, mode)) == -1)
260 		logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
261 		    name, strerror(errno));
262 	return (fd);
263 }
264 
265 static void
266 Fread(void *buf, size_t size, FILE *f)
267 {
268 	if (fread(buf, size, 1, f) != 1)
269 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: ferror %d feof %d",
270 		    ferror(f), feof(f));
271 }
272 
273 static void
274 Fwrite(void *buf, size_t size, FILE *f)
275 {
276 	if (fwrite(buf, size, 1, f) != 1)
277 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s",
278 		    strerror(errno));
279 }
280 
281 static void
282 Fseek(offset_t off, FILE *f)
283 {
284 	if (fseeko64(f, off, SEEK_SET) != 0)
285 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
286 		    strerror(errno));
287 }
288 
289 typedef struct stat64 Stat_t;
290 
291 static void
292 Fstat(int fd, Stat_t *sb, const char *fname)
293 {
294 	if (fstat64(fd, sb) != 0)
295 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
296 		    strerror(errno));
297 }
298 
299 static void
300 Stat(const char *fname, Stat_t *sb)
301 {
302 	if (stat64(fname, sb) != 0)
303 		logprint(SC_SL_ERR | SC_EXIT_ERR, "stat(\"%s\"): %s", fname,
304 		    strerror(errno));
305 }
306 
307 static void
308 Pread(int fd, void *buf, size_t size, offset_t off)
309 {
310 	ssize_t sz = pread64(fd, buf, size, off);
311 
312 	if (sz < 0)
313 		logprint(SC_SL_ERR | SC_EXIT_ERR,
314 		    "pread: %s", strerror(errno));
315 	else if (sz != size)
316 		logprint(SC_SL_ERR | SC_EXIT_ERR,
317 		    "pread: size %ld != %ld", sz, size);
318 }
319 
320 static void
321 Pwrite(int fd, void *buf, size_t size, off64_t off)
322 {
323 	if (pwrite64(fd, buf, size, off) != size)
324 		logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s",
325 		    strerror(errno));
326 }
327 
328 static void *
329 Zalloc(size_t size)
330 {
331 	void *buf;
332 
333 	if ((buf = calloc(size, 1)) == NULL)
334 		logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s",
335 		    strerror(errno));
336 	return (buf);
337 }
338 
339 static long
340 read_number_from_file(const char *filename, long default_value)
341 {
342 	long file_value = -1;
343 	FILE *fp;
344 
345 	if ((fp = fopen(filename, "r")) != NULL) {
346 		(void) fscanf(fp, "%ld", &file_value);
347 		(void) fclose(fp);
348 	}
349 	return (file_value < 0 ? default_value : file_value);
350 }
351 
352 static void
353 read_dumphdr(void)
354 {
355 	if (filemode)
356 		dumpfd = Open(dumpfile, O_RDONLY, 0644);
357 	else
358 		dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
359 	endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
360 	Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
361 	Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
362 
363 	pagesize = dumphdr.dump_pagesize;
364 
365 	if (dumphdr.dump_magic != DUMP_MAGIC)
366 		logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
367 		    dumphdr.dump_magic);
368 
369 	if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
370 		logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
371 		    "dump already processed");
372 
373 	if (dumphdr.dump_version != DUMP_VERSION)
374 		logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
375 		    "dump version (%d) != %s version (%d)",
376 		    dumphdr.dump_version, progname, DUMP_VERSION);
377 
378 	if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
379 		logprint(SC_SL_NONE | SC_EXIT_PEND,
380 		    "dump is from %u-bit kernel - cannot save on %u-bit kernel",
381 		    dumphdr.dump_wordsize, DUMP_WORDSIZE);
382 
383 	if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
384 		if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
385 			logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
386 			    "dump data version (%d) != %s data version (%d)",
387 			    datahdr.dump_datahdr_version, progname,
388 			    DUMP_DATAHDR_VERSION);
389 	} else {
390 		(void) memset(&datahdr, 0, sizeof (datahdr));
391 		datahdr.dump_maxcsize = pagesize;
392 	}
393 
394 	/*
395 	 * Read the initial header, clear the valid bits, and compare headers.
396 	 * The main header may have been overwritten by swapping if we're
397 	 * using a swap partition as the dump device, in which case we bail.
398 	 */
399 	Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start);
400 
401 	corehdr.dump_flags &= ~DF_VALID;
402 	dumphdr.dump_flags &= ~DF_VALID;
403 
404 	if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
405 		/*
406 		 * Clear valid bit so we don't complain on every invocation.
407 		 */
408 		if (!filemode)
409 			Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
410 		logprint(SC_SL_ERR | SC_EXIT_ERR,
411 		    "initial dump header corrupt");
412 	}
413 }
414 
415 static void
416 check_space(int csave)
417 {
418 	struct statvfs fsb;
419 	int64_t spacefree, dumpsize, minfree, datasize;
420 
421 	if (statvfs(".", &fsb) < 0)
422 		logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
423 		    strerror(errno));
424 
425 	dumpsize = dumphdr.dump_data - dumphdr.dump_start;
426 	datasize = dumphdr.dump_npages * pagesize;
427 	if (!csave)
428 		dumpsize += datasize;
429 	else
430 		dumpsize += datahdr.dump_data_csize;
431 
432 	spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
433 	minfree = 1024LL * read_number_from_file("minfree", 1024);
434 	if (spacefree < minfree + dumpsize) {
435 		logprint(SC_SL_ERR | SC_EXIT_ERR,
436 		    "not enough space in %s (%lld MB avail, %lld MB needed)",
437 		    savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
438 	}
439 }
440 
441 static void
442 build_dump_map(int corefd, const pfn_t *pfn_table)
443 {
444 	long i;
445 	static long misses = 0;
446 	size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
447 	mem_vtop_t vtop;
448 	dump_map_t *dmp = Zalloc(dump_mapsize);
449 	char *inbuf = Zalloc(FBUFSIZE);
450 	FILE *in = fdopen(dup(dumpfd), "rb");
451 
452 	(void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
453 	Fseek(dumphdr.dump_map, in);
454 
455 	corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize);
456 
457 	for (i = 0; i < corehdr.dump_nvtop; i++) {
458 		long first = 0;
459 		long last = corehdr.dump_npages - 1;
460 		long middle = 0;
461 		pfn_t pfn = 0;
462 		uintptr_t h;
463 
464 		Fread(&vtop, sizeof (mem_vtop_t), in);
465 		while (last >= first) {
466 			middle = (first + last) / 2;
467 			pfn = pfn_table[middle];
468 			if (pfn == vtop.m_pfn)
469 				break;
470 			if (pfn < vtop.m_pfn)
471 				first = middle + 1;
472 			else
473 				last = middle - 1;
474 		}
475 		if (pfn != vtop.m_pfn) {
476 			if (++misses <= 10)
477 				(void) fprintf(stderr,
478 				    "pfn %ld not found for as=%p, va=%p\n",
479 				    vtop.m_pfn, (void *)vtop.m_as, vtop.m_va);
480 			continue;
481 		}
482 
483 		dmp[i].dm_as = vtop.m_as;
484 		dmp[i].dm_va = (uintptr_t)vtop.m_va;
485 		dmp[i].dm_data = corehdr.dump_data +
486 		    ((uint64_t)middle << corehdr.dump_pageshift);
487 
488 		h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va);
489 		dmp[i].dm_next = dmp[h].dm_first;
490 		dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t);
491 	}
492 
493 	Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map);
494 	free(dmp);
495 	(void) fclose(in);
496 	free(inbuf);
497 }
498 
499 /*
500  * Copy whole sections of the dump device to the file.
501  */
502 static void
503 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf,
504     size_t sz)
505 {
506 	size_t nr;
507 	offset_t off = *offp;
508 
509 	while (nb > 0) {
510 		nr = sz < nb ? sz : (size_t)nb;
511 		Pread(dumpfd, buf, nr, dumpoff);
512 		Pwrite(fd, buf, nr, off);
513 		off += nr;
514 		dumpoff += nr;
515 		nb -= nr;
516 	}
517 	*offp = off;
518 }
519 
520 /*
521  * Copy pages when the dump data header is missing.
522  * This supports older kernels with latest savecore.
523  */
524 static void
525 CopyPages(offset_t *offp, int fd, char *buf, size_t sz)
526 {
527 	uint32_t csize;
528 	FILE *in = fdopen(dup(dumpfd), "rb");
529 	FILE *out = fdopen(dup(fd), "wb");
530 	char *cbuf = Zalloc(pagesize);
531 	char *outbuf = Zalloc(FBUFSIZE);
532 	pgcnt_t np = dumphdr.dump_npages;
533 
534 	(void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE);
535 	(void) setvbuf(in, buf, _IOFBF, sz);
536 	Fseek(dumphdr.dump_data, in);
537 
538 	Fseek(*offp, out);
539 	while (np > 0) {
540 		Fread(&csize, sizeof (uint32_t), in);
541 		Fwrite(&csize, sizeof (uint32_t), out);
542 		*offp += sizeof (uint32_t);
543 		if (csize > pagesize || csize == 0) {
544 			logprint(SC_SL_ERR,
545 			    "CopyPages: page %lu csize %d (0x%x) pagesize %d",
546 			    dumphdr.dump_npages - np, csize, csize,
547 			    pagesize);
548 			break;
549 		}
550 		Fread(cbuf, csize, in);
551 		Fwrite(cbuf, csize, out);
552 		*offp += csize;
553 		np--;
554 	}
555 	(void) fclose(in);
556 	(void) fclose(out);
557 	free(outbuf);
558 	free(buf);
559 }
560 
561 /*
562  * Concatenate dump contents into a new file.
563  * Update corehdr with new offsets.
564  */
565 static void
566 copy_crashfile(const char *corefile)
567 {
568 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
569 	size_t bufsz = FBUFSIZE;
570 	char *inbuf = Zalloc(bufsz);
571 	offset_t coreoff;
572 	size_t nb;
573 
574 	logprint(SC_SL_ERR | SC_IF_VERBOSE,
575 	    "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
576 
577 	/*
578 	 * This dump file is still compressed
579 	 */
580 	corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
581 
582 	/*
583 	 * Leave room for corehdr, it is updated and written last
584 	 */
585 	corehdr.dump_start = 0;
586 	coreoff = sizeof (corehdr);
587 
588 	/*
589 	 * Read in the compressed symbol table, copy it to corefile.
590 	 */
591 	coreoff = roundup(coreoff, pagesize);
592 	corehdr.dump_ksyms = coreoff;
593 	Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
594 	    inbuf, bufsz);
595 
596 	/*
597 	 * Save the pfn table.
598 	 */
599 	coreoff = roundup(coreoff, pagesize);
600 	corehdr.dump_pfn = coreoff;
601 	Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff,
602 	    corefd, inbuf, bufsz);
603 
604 	/*
605 	 * Save the dump map.
606 	 */
607 	coreoff = roundup(coreoff, pagesize);
608 	corehdr.dump_map = coreoff;
609 	Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t),
610 	    &coreoff, corefd, inbuf, bufsz);
611 
612 	/*
613 	 * Save the data pages.
614 	 */
615 	coreoff = roundup(coreoff, pagesize);
616 	corehdr.dump_data = coreoff;
617 	if (datahdr.dump_data_csize != 0)
618 		Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff,
619 		    corefd, inbuf, bufsz);
620 	else
621 		CopyPages(&coreoff, corefd, inbuf, bufsz);
622 
623 	/*
624 	 * Now write the modified dump header to front and end of the copy.
625 	 * Make it look like a valid dump device.
626 	 *
627 	 * From dumphdr.h: Two headers are written out: one at the
628 	 * beginning of the dump, and the other at the very end of the
629 	 * dump device. The terminal header is at a known location
630 	 * (end of device) so we can always find it.
631 	 *
632 	 * Pad with zeros to each DUMP_OFFSET boundary.
633 	 */
634 	(void) memset(inbuf, 0, DUMP_OFFSET);
635 
636 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
637 	if (nb > 0) {
638 		Pwrite(corefd, inbuf, nb, coreoff);
639 		coreoff += nb;
640 	}
641 
642 	Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff);
643 	coreoff += sizeof (corehdr);
644 
645 	Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff);
646 	coreoff += sizeof (datahdr);
647 
648 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
649 	if (nb > 0) {
650 		Pwrite(corefd, inbuf, nb, coreoff);
651 	}
652 
653 	free(inbuf);
654 	Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start);
655 
656 	/*
657 	 * Write out the modified dump header to the dump device.
658 	 * The dump device has been processed, so DF_VALID is clear.
659 	 */
660 	if (!filemode)
661 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
662 
663 	(void) close(corefd);
664 }
665 
666 /*
667  * compressed streams
668  */
669 typedef struct blockhdr blockhdr_t;
670 typedef struct block block_t;
671 
672 struct blockhdr {
673 	block_t *head;
674 	block_t *tail;
675 };
676 
677 struct block {
678 	block_t *next;
679 	char *block;
680 	int size;
681 };
682 
683 typedef enum streamstate {
684 	STREAMSTART,
685 	STREAMPAGES
686 } streamstate_t;
687 
688 typedef struct stream {
689 	streamstate_t state;
690 	int init;
691 	int tag;
692 	int bound;
693 	int nout;
694 	char *blkbuf;
695 	blockhdr_t blocks;
696 	pgcnt_t pagenum;
697 	pgcnt_t curpage;
698 	pgcnt_t npages;
699 	pgcnt_t done;
700 	bz_stream strm;
701 	dumpcsize_t sc;
702 	dumpstreamhdr_t sh;
703 } stream_t;
704 
705 static stream_t *streams;
706 static stream_t *endstreams;
707 
708 const int cs = sizeof (dumpcsize_t);
709 
710 typedef struct tinfo {
711 	pthread_t tid;
712 	int corefd;
713 } tinfo_t;
714 
715 static int threads_stop;
716 static int threads_active;
717 static tinfo_t *tinfo;
718 static tinfo_t *endtinfo;
719 
720 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
721 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER;
722 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER;
723 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER;
724 
725 static blockhdr_t freeblocks;
726 
727 static void
728 enqt(blockhdr_t *h, block_t *b)
729 {
730 	b->next = NULL;
731 	if (h->tail == NULL)
732 		h->head = b;
733 	else
734 		h->tail->next = b;
735 	h->tail = b;
736 }
737 
738 static block_t *
739 deqh(blockhdr_t *h)
740 {
741 	block_t *b = h->head;
742 
743 	if (b != NULL) {
744 		h->head = b->next;
745 		if (h->head == NULL)
746 			h->tail = NULL;
747 	}
748 	return (b);
749 }
750 
751 static void *runstreams(void *arg);
752 
753 static void
754 initstreams(int corefd, int nstreams, int maxcsize)
755 {
756 	int nthreads;
757 	int nblocks;
758 	int i;
759 	block_t *b;
760 	tinfo_t *t;
761 
762 	nthreads = sysconf(_SC_NPROCESSORS_ONLN);
763 	if (nstreams < nthreads)
764 		nthreads = nstreams;
765 	if (nthreads < 1)
766 		nthreads = 1;
767 	nblocks = nthreads * 2;
768 
769 	tinfo = Zalloc(nthreads * sizeof (tinfo_t));
770 	endtinfo = &tinfo[nthreads];
771 
772 	/* init streams */
773 	streams = Zalloc(nstreams * sizeof (stream_t));
774 	endstreams = &streams[nstreams];
775 
776 	/* init stream block buffers */
777 	for (i = 0; i < nblocks; i++) {
778 		b = Zalloc(sizeof (block_t));
779 		b->block = Zalloc(maxcsize);
780 		enqt(&freeblocks, b);
781 	}
782 
783 	/* init worker threads */
784 	(void) pthread_mutex_lock(&lock);
785 	threads_active = 1;
786 	threads_stop = 0;
787 	for (t = tinfo; t != endtinfo; t++) {
788 		t->corefd = dup(corefd);
789 		if (t->corefd < 0) {
790 			nthreads = t - tinfo;
791 			endtinfo = t;
792 			break;
793 		}
794 		if (pthread_create(&t->tid, NULL, runstreams, t) != 0)
795 			logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s",
796 			    strerror(errno));
797 	}
798 	(void) pthread_mutex_unlock(&lock);
799 }
800 
801 static void
802 sbarrier()
803 {
804 	stream_t *s;
805 
806 	(void) pthread_mutex_lock(&lock);
807 	for (s = streams; s != endstreams; s++) {
808 		while (s->bound || s->blocks.head != NULL)
809 			(void) pthread_cond_wait(&cvbarrier, &lock);
810 	}
811 	(void) pthread_mutex_unlock(&lock);
812 }
813 
814 static void
815 stopstreams()
816 {
817 	tinfo_t *t;
818 
819 	if (threads_active) {
820 		sbarrier();
821 		(void) pthread_mutex_lock(&lock);
822 		threads_stop = 1;
823 		(void) pthread_cond_signal(&cvwork);
824 		(void) pthread_mutex_unlock(&lock);
825 		for (t = tinfo; t != endtinfo; t++)
826 			(void) pthread_join(t->tid, NULL);
827 		free(tinfo);
828 		tinfo = NULL;
829 		threads_active = 0;
830 	}
831 }
832 
833 static block_t *
834 getfreeblock()
835 {
836 	block_t *b;
837 
838 	(void) pthread_mutex_lock(&lock);
839 	while ((b = deqh(&freeblocks)) == NULL)
840 		(void) pthread_cond_wait(&cvfree, &lock);
841 	(void) pthread_mutex_unlock(&lock);
842 	return (b);
843 }
844 
845 /* data page offset from page number */
846 #define	BTOP(b)		((b) >> dumphdr.dump_pageshift)
847 #define	PTOB(p)		((p) << dumphdr.dump_pageshift)
848 #define	DATAOFF(p)	(corehdr.dump_data + PTOB(p))
849 
850 /* check for coreblksize boundary */
851 static int
852 isblkbnd(pgcnt_t pgnum)
853 {
854 	return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0);
855 }
856 
857 static int
858 iszpage(char *buf)
859 {
860 	size_t sz;
861 	uint64_t *pl;
862 
863 	/*LINTED:E_BAD_PTR_CAST_ALIGN*/
864 	pl = (uint64_t *)(buf);
865 	for (sz = 0; sz < pagesize; sz += sizeof (*pl))
866 		if (*pl++ != 0)
867 			return (0);
868 	return (1);
869 }
870 
871 volatile uint_t *hist;
872 
873 /* write pages to the core file */
874 static void
875 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np)
876 {
877 	atomic_inc_uint(&hist[np]);
878 	if (np > 0)
879 		Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum));
880 }
881 
882 /*
883  * Process one lzjb block.
884  * No object (stream header or page) will be split over a block boundary.
885  */
886 static void
887 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz)
888 {
889 	int in = 0;
890 	int csize;
891 	int doflush;
892 	char *out;
893 	size_t dsize;
894 	dumpcsize_t sc;
895 	dumpstreamhdr_t sh;
896 
897 	if (!s->init) {
898 		s->init = 1;
899 		if (s->blkbuf == NULL)
900 			s->blkbuf = Zalloc(coreblksize);
901 		s->state = STREAMSTART;
902 	}
903 	while (in < blocksz) {
904 		switch (s->state) {
905 		case STREAMSTART:
906 			(void) memcpy(&sh, block + in, sizeof (sh));
907 			in += sizeof (sh);
908 			if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0)
909 				logprint(SC_SL_ERR | SC_EXIT_ERR,
910 				    "LZJB STREAMSTART: bad stream header");
911 			if (sh.stream_npages > datahdr.dump_maxrange)
912 				logprint(SC_SL_ERR | SC_EXIT_ERR,
913 				    "LZJB STREAMSTART: bad range: %d > %d",
914 				    sh.stream_npages, datahdr.dump_maxrange);
915 			s->pagenum = sh.stream_pagenum;
916 			s->npages = sh.stream_npages;
917 			s->curpage = s->pagenum;
918 			s->nout = 0;
919 			s->done = 0;
920 			s->state = STREAMPAGES;
921 			break;
922 		case STREAMPAGES:
923 			(void) memcpy(&sc, block + in, cs);
924 			in += cs;
925 			csize = DUMP_GET_CSIZE(sc);
926 			if (csize > pagesize)
927 				logprint(SC_SL_ERR | SC_EXIT_ERR,
928 				    "LZJB STREAMPAGES: bad csize=%d", csize);
929 
930 			out =  s->blkbuf + PTOB(s->nout);
931 			dsize = decompress(block + in, out, csize, pagesize);
932 
933 			if (dsize != pagesize)
934 				logprint(SC_SL_ERR | SC_EXIT_ERR,
935 				    "LZJB STREAMPAGES: dsize %d != pagesize %d",
936 				    dsize, pagesize);
937 
938 			in += csize;
939 			atomic_inc_64(&saved);
940 
941 			doflush = 0;
942 			if (s->nout == 0 && iszpage(out)) {
943 				doflush = 1;
944 				atomic_inc_64(&zpages);
945 			} else if (++s->nout >= BTOP(coreblksize) ||
946 			    isblkbnd(s->curpage + s->nout)) {
947 				doflush = 1;
948 			}
949 			if (++s->done >= s->npages) {
950 				s->state = STREAMSTART;
951 				doflush = 1;
952 			}
953 			if (doflush) {
954 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
955 				s->nout = 0;
956 				s->curpage = s->pagenum + s->done;
957 			}
958 			break;
959 		}
960 	}
961 }
962 
963 /* bzlib library reports errors with this callback */
964 void
965 bz_internal_error(int errcode)
966 {
967 	logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
968 	    BZ2_bzErrorString(errcode));
969 }
970 
971 /*
972  * Return one object in the stream.
973  *
974  * An object (stream header or page) will likely span an input block
975  * of compression data. Return non-zero when an entire object has been
976  * retrieved from the stream.
977  */
978 static int
979 bz2decompress(stream_t *s, void *buf, size_t size)
980 {
981 	int rc;
982 
983 	if (s->strm.avail_out == 0) {
984 		s->strm.next_out = buf;
985 		s->strm.avail_out = size;
986 	}
987 	while (s->strm.avail_in > 0) {
988 		rc = BZ2_bzDecompress(&s->strm);
989 		if (rc == BZ_STREAM_END) {
990 			rc = BZ2_bzDecompressReset(&s->strm);
991 			if (rc != BZ_OK)
992 				logprint(SC_SL_ERR | SC_EXIT_ERR,
993 				    "BZ2_bzDecompressReset: %s",
994 				    BZ2_bzErrorString(rc));
995 			continue;
996 		}
997 
998 		if (s->strm.avail_out == 0)
999 			break;
1000 	}
1001 	return (s->strm.avail_out == 0);
1002 }
1003 
1004 /*
1005  * Process one bzip2 block.
1006  * The interface is documented here:
1007  * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
1008  */
1009 static void
1010 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
1011 {
1012 	int rc = 0;
1013 	int doflush;
1014 	char *out;
1015 
1016 	if (!s->init) {
1017 		s->init = 1;
1018 		rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
1019 		if (rc != BZ_OK)
1020 			logprint(SC_SL_ERR | SC_EXIT_ERR,
1021 			    "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
1022 		if (s->blkbuf == NULL)
1023 			s->blkbuf = Zalloc(coreblksize);
1024 		s->strm.avail_out = 0;
1025 		s->state = STREAMSTART;
1026 	}
1027 	s->strm.next_in = block;
1028 	s->strm.avail_in = blocksz;
1029 
1030 	while (s->strm.avail_in > 0) {
1031 		switch (s->state) {
1032 		case STREAMSTART:
1033 			if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
1034 				return;
1035 			if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
1036 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1037 				    "BZ2 STREAMSTART: bad stream header");
1038 			if (s->sh.stream_npages > datahdr.dump_maxrange)
1039 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1040 				    "BZ2 STREAMSTART: bad range: %d > %d",
1041 				    s->sh.stream_npages, datahdr.dump_maxrange);
1042 			s->pagenum = s->sh.stream_pagenum;
1043 			s->npages = s->sh.stream_npages;
1044 			s->curpage = s->pagenum;
1045 			s->nout = 0;
1046 			s->done = 0;
1047 			s->state = STREAMPAGES;
1048 			break;
1049 		case STREAMPAGES:
1050 			out = s->blkbuf + PTOB(s->nout);
1051 			if (!bz2decompress(s, out, pagesize))
1052 				return;
1053 
1054 			atomic_inc_64(&saved);
1055 
1056 			doflush = 0;
1057 			if (s->nout == 0 && iszpage(out)) {
1058 				doflush = 1;
1059 				atomic_inc_64(&zpages);
1060 			} else if (++s->nout >= BTOP(coreblksize) ||
1061 			    isblkbnd(s->curpage + s->nout)) {
1062 				doflush = 1;
1063 			}
1064 			if (++s->done >= s->npages) {
1065 				s->state = STREAMSTART;
1066 				doflush = 1;
1067 			}
1068 			if (doflush) {
1069 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
1070 				s->nout = 0;
1071 				s->curpage = s->pagenum + s->done;
1072 			}
1073 			break;
1074 		}
1075 	}
1076 }
1077 
1078 /* report progress */
1079 static void
1080 report_progress()
1081 {
1082 	int sec, percent;
1083 
1084 	if (!interactive)
1085 		return;
1086 
1087 	percent = saved * 100LL / corehdr.dump_npages;
1088 	sec = (gethrtime() - startts) / NANOSEC;
1089 	if (percent > percent_done || sec > sec_done) {
1090 		(void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1091 		    percent);
1092 		(void) fflush(stdout);
1093 		sec_done = sec;
1094 		percent_done = percent;
1095 	}
1096 }
1097 
1098 /* thread body */
1099 static void *
1100 runstreams(void *arg)
1101 {
1102 	tinfo_t *t = arg;
1103 	stream_t *s;
1104 	block_t *b;
1105 	int bound;
1106 
1107 	(void) pthread_mutex_lock(&lock);
1108 	while (!threads_stop) {
1109 		bound = 0;
1110 		for (s = streams; s != endstreams; s++) {
1111 			if (s->bound || s->blocks.head == NULL)
1112 				continue;
1113 			s->bound = 1;
1114 			bound = 1;
1115 			(void) pthread_cond_signal(&cvwork);
1116 			while (s->blocks.head != NULL) {
1117 				b = deqh(&s->blocks);
1118 				(void) pthread_mutex_unlock(&lock);
1119 
1120 				if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
1121 					lzjbblock(t->corefd, s, b->block,
1122 					    b->size);
1123 				else
1124 					bz2block(t->corefd, s, b->block,
1125 					    b->size);
1126 
1127 				(void) pthread_mutex_lock(&lock);
1128 				enqt(&freeblocks, b);
1129 				(void) pthread_cond_signal(&cvfree);
1130 
1131 				report_progress();
1132 			}
1133 			s->bound = 0;
1134 			(void) pthread_cond_signal(&cvbarrier);
1135 		}
1136 		if (!bound && !threads_stop)
1137 			(void) pthread_cond_wait(&cvwork, &lock);
1138 	}
1139 	(void) close(t->corefd);
1140 	(void) pthread_cond_signal(&cvwork);
1141 	(void) pthread_mutex_unlock(&lock);
1142 	return (arg);
1143 }
1144 
1145 /*
1146  * Process compressed pages.
1147  *
1148  * The old format, now called single-threaded lzjb, is a 32-bit size
1149  * word followed by 'size' bytes of lzjb compression data for one
1150  * page. The new format extends this by storing a 12-bit "tag" in the
1151  * upper bits of the size word. When the size word is pagesize or
1152  * less, it is assumed to be one lzjb page. When the size word is
1153  * greater than pagesize, it is assumed to be a "stream block",
1154  * belonging to up to 4095 streams. In practice, the number of streams
1155  * is set to one less than the number of CPUs running at crash
1156  * time. One CPU processes the crash dump, the remaining CPUs
1157  * separately process groups of data pages.
1158  *
1159  * savecore creates a thread per stream, but never more threads than
1160  * the number of CPUs running savecore. This is because savecore can
1161  * be processing a crash file from a remote machine, which may have
1162  * more CPUs.
1163  *
1164  * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1165  * series of 128KB blocks of compression data. In this case, each
1166  * block has a "tag", in the range 1-4095. Each block is handed off to
1167  * to the threads running "runstreams". The dump format is either lzjb
1168  * or bzip2, never a mixture. These threads, in turn, process the
1169  * compression data for groups of pages. Groups of pages are delimited
1170  * by a "stream header", which indicates a starting pfn and number of
1171  * pages. When a stream block has been read, the condition variable
1172  * "cvwork" is signalled, which causes one of the avaiable threads to
1173  * wake up and process the stream.
1174  *
1175  * In the parallel case there will be streams blocks encoding all data
1176  * pages. The stream of blocks is terminated by a zero size
1177  * word. There can be a few lzjb pages tacked on the end, depending on
1178  * the architecture. The sbarrier function ensures that all stream
1179  * blocks have been processed so that the page number for the few
1180  * single pages at the end can be known.
1181  */
1182 static void
1183 decompress_pages(int corefd)
1184 {
1185 	char *cpage = NULL;
1186 	char *dpage = NULL;
1187 	char *out;
1188 	pgcnt_t curpage = 0;
1189 	block_t *b;
1190 	FILE *dumpf;
1191 	FILE *tracef = NULL;
1192 	stream_t *s;
1193 	size_t dsize;
1194 	size_t insz = FBUFSIZE;
1195 	char *inbuf = Zalloc(insz);
1196 	uint32_t csize;
1197 	dumpcsize_t dcsize;
1198 	int nstreams = datahdr.dump_nstreams;
1199 	int maxcsize = datahdr.dump_maxcsize;
1200 	int nout = 0, tag, doflush;
1201 
1202 	dumpf = fdopen(dup(dumpfd), "rb");
1203 	if (dumpf == NULL)
1204 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s",
1205 		    strerror(errno));
1206 
1207 	(void) setvbuf(dumpf, inbuf, _IOFBF, insz);
1208 	Fseek(dumphdr.dump_data, dumpf);
1209 
1210 	/*LINTED: E_CONSTANT_CONDITION*/
1211 	while (1) {
1212 
1213 		/*
1214 		 * The csize word delimits stream blocks.
1215 		 * See dumphdr.h for a description.
1216 		 */
1217 		Fread(&dcsize, sizeof (dcsize), dumpf);
1218 
1219 		tag = DUMP_GET_TAG(dcsize);
1220 		csize = DUMP_GET_CSIZE(dcsize);
1221 
1222 		if (tag != 0) {		/* a stream block */
1223 
1224 			if (nstreams == 0)
1225 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1226 				    "starting data header is missing");
1227 
1228 			if (tag > nstreams)
1229 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1230 				    "stream tag %d not in range 1..%d",
1231 				    tag, nstreams);
1232 
1233 			if (csize > maxcsize)
1234 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1235 				    "block size 0x%x > max csize 0x%x",
1236 				    csize, maxcsize);
1237 
1238 			if (streams == NULL)
1239 				initstreams(corefd, nstreams, maxcsize);
1240 			s = &streams[tag - 1];
1241 			s->tag = tag;
1242 
1243 			b = getfreeblock();
1244 			b->size = csize;
1245 			Fread(b->block, csize, dumpf);
1246 
1247 			(void) pthread_mutex_lock(&lock);
1248 			enqt(&s->blocks, b);
1249 			if (!s->bound)
1250 				(void) pthread_cond_signal(&cvwork);
1251 			(void) pthread_mutex_unlock(&lock);
1252 
1253 		} else if (csize > 0) {		/* one lzjb page */
1254 
1255 			if (csize > pagesize)
1256 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1257 				    "csize 0x%x > pagesize 0x%x",
1258 				    csize, pagesize);
1259 
1260 			if (cpage == NULL)
1261 				cpage = Zalloc(pagesize);
1262 			if (dpage == NULL) {
1263 				dpage = Zalloc(coreblksize);
1264 				nout = 0;
1265 			}
1266 
1267 			Fread(cpage, csize, dumpf);
1268 
1269 			out = dpage + PTOB(nout);
1270 			dsize = decompress(cpage, out, csize, pagesize);
1271 
1272 			if (dsize != pagesize)
1273 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1274 				    "dsize 0x%x != pagesize 0x%x",
1275 				    dsize, pagesize);
1276 
1277 			/*
1278 			 * wait for streams to flush so that 'saved' is correct
1279 			 */
1280 			if (threads_active)
1281 				sbarrier();
1282 
1283 			doflush = 0;
1284 			if (nout == 0)
1285 				curpage = saved;
1286 
1287 			atomic_inc_64(&saved);
1288 
1289 			if (nout == 0 && iszpage(dpage)) {
1290 				doflush = 1;
1291 				atomic_inc_64(&zpages);
1292 			} else if (++nout >= BTOP(coreblksize) ||
1293 			    isblkbnd(curpage + nout) ||
1294 			    saved >= dumphdr.dump_npages) {
1295 				doflush = 1;
1296 			}
1297 
1298 			if (doflush) {
1299 				putpage(corefd, dpage, curpage, nout);
1300 				nout = 0;
1301 			}
1302 
1303 			report_progress();
1304 
1305 			/*
1306 			 * Non-streams lzjb does not use blocks.  Stop
1307 			 * here if all the pages have been decompressed.
1308 			 */
1309 			if (saved >= dumphdr.dump_npages)
1310 				break;
1311 
1312 		} else {
1313 			break;			/* end of data */
1314 		}
1315 	}
1316 
1317 	stopstreams();
1318 	if (tracef != NULL)
1319 		(void) fclose(tracef);
1320 	(void) fclose(dumpf);
1321 	if (inbuf)
1322 		free(inbuf);
1323 	if (cpage)
1324 		free(cpage);
1325 	if (dpage)
1326 		free(dpage);
1327 	if (streams)
1328 		free(streams);
1329 }
1330 
1331 static void
1332 build_corefile(const char *namelist, const char *corefile)
1333 {
1334 	size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1335 	size_t ksyms_size = dumphdr.dump_ksyms_size;
1336 	size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1337 	pfn_t *pfn_table;
1338 	char *ksyms_base = Zalloc(ksyms_size);
1339 	char *ksyms_cbase = Zalloc(ksyms_csize);
1340 	size_t ksyms_dsize;
1341 	Stat_t st;
1342 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1343 	int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1344 
1345 	(void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1346 
1347 	/*
1348 	 * Determine the optimum write size for the core file
1349 	 */
1350 	Fstat(corefd, &st, corefile);
1351 
1352 	if (verbose > 1)
1353 		(void) printf("%s: %ld block size\n", corefile,
1354 		    (long)st.st_blksize);
1355 	coreblksize = st.st_blksize;
1356 	if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1357 		coreblksize = MINCOREBLKSIZE;
1358 
1359 	hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1360 
1361 	/*
1362 	 * This dump file is now uncompressed
1363 	 */
1364 	corehdr.dump_flags &= ~DF_COMPRESSED;
1365 
1366 	/*
1367 	 * Read in the compressed symbol table, copy it to corefile,
1368 	 * decompress it, and write the result to namelist.
1369 	 */
1370 	corehdr.dump_ksyms = pagesize;
1371 	Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1372 	Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1373 
1374 	ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1375 	    ksyms_size);
1376 	if (ksyms_dsize != ksyms_size)
1377 		logprint(SC_SL_WARN,
1378 		    "bad data in symbol table, %lu of %lu bytes saved",
1379 		    ksyms_dsize, ksyms_size);
1380 
1381 	Pwrite(namefd, ksyms_base, ksyms_size, 0);
1382 	(void) close(namefd);
1383 	free(ksyms_cbase);
1384 	free(ksyms_base);
1385 
1386 	(void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1387 
1388 	/*
1389 	 * Read in and write out the pfn table.
1390 	 */
1391 	pfn_table = Zalloc(pfn_table_size);
1392 	corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1393 	Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1394 	Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1395 
1396 	/*
1397 	 * Convert the raw translation data into a hashed dump map.
1398 	 */
1399 	corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1400 	build_dump_map(corefd, pfn_table);
1401 	free(pfn_table);
1402 
1403 	/*
1404 	 * Decompress the pages
1405 	 */
1406 	decompress_pages(corefd);
1407 	(void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved,
1408 	    dumphdr.dump_npages);
1409 
1410 	if (verbose)
1411 		(void) printf("%ld (%ld%%) zero pages were not written\n",
1412 		    (pgcnt_t)zpages, (pgcnt_t)zpages * 100 /
1413 		    dumphdr.dump_npages);
1414 
1415 	if (saved != dumphdr.dump_npages)
1416 		logprint(SC_SL_WARN, "bad data after page %ld", saved);
1417 
1418 	/*
1419 	 * Write out the modified dump headers.
1420 	 */
1421 	Pwrite(corefd, &corehdr, sizeof (corehdr), 0);
1422 	if (!filemode)
1423 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
1424 
1425 	(void) close(corefd);
1426 }
1427 
1428 /*
1429  * When the system panics, the kernel saves all undelivered messages (messages
1430  * that never made it out to syslogd(1M)) in the dump.  At a mimimum, the
1431  * panic message itself will always fall into this category.  Upon reboot,
1432  * the syslog startup script runs savecore -m to recover these messages.
1433  *
1434  * To do this, we read the unsent messages from the dump and send them to
1435  * /dev/conslog on priority band 1.  This has the effect of prepending them
1436  * to any already-accumulated messages in the console backlog, thus preserving
1437  * temporal ordering across the reboot.
1438  *
1439  * Note: since savecore -m is used *only* for this purpose, it does *not*
1440  * attempt to save the crash dump.  The dump will be saved later, after
1441  * syslogd(1M) starts, by the savecore startup script.
1442  */
1443 static int
1444 message_save(void)
1445 {
1446 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE);
1447 	offset_t ldoff;
1448 	log_dump_t ld;
1449 	log_ctl_t lc;
1450 	struct strbuf ctl, dat;
1451 	int logfd;
1452 
1453 	logfd = Open("/dev/conslog", O_WRONLY, 0644);
1454 	dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1455 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1456 
1457 	ctl.buf = (void *)&lc;
1458 	ctl.len = sizeof (log_ctl_t);
1459 
1460 	dat.buf = Zalloc(DUMP_LOGSIZE);
1461 
1462 	for (;;) {
1463 		ldoff = dumpoff;
1464 
1465 		Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff);
1466 		dumpoff += sizeof (log_dump_t);
1467 		dat.len = ld.ld_msgsize;
1468 
1469 		if (ld.ld_magic == 0)
1470 			break;
1471 
1472 		if (ld.ld_magic != LOG_MAGIC)
1473 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1474 			    "bad magic %x", ld.ld_magic);
1475 
1476 		if (dat.len >= DUMP_LOGSIZE)
1477 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1478 			    "bad size %d", ld.ld_msgsize);
1479 
1480 		Pread(dumpfd, ctl.buf, ctl.len, dumpoff);
1481 		dumpoff += ctl.len;
1482 
1483 		if (ld.ld_csum != checksum32(ctl.buf, ctl.len))
1484 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1485 			    "bad log_ctl checksum");
1486 
1487 		lc.flags |= SL_LOGONLY;
1488 
1489 		Pread(dumpfd, dat.buf, dat.len, dumpoff);
1490 		dumpoff += dat.len;
1491 
1492 		if (ld.ld_msum != checksum32(dat.buf, dat.len))
1493 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1494 			    "bad message checksum");
1495 
1496 		if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1)
1497 			logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s",
1498 			    strerror(errno));
1499 
1500 		ld.ld_magic = 0;	/* clear magic so we never save twice */
1501 		Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff);
1502 	}
1503 	return (0);
1504 }
1505 
1506 static long
1507 getbounds(const char *f)
1508 {
1509 	long b = -1;
1510 	const char *p = strrchr(f, '/');
1511 
1512 	if (p == NULL || strncmp(p, "vmdump", 6) != 0)
1513 		p = strstr(f, "vmdump");
1514 
1515 	if (p != NULL && *p == '/')
1516 		p++;
1517 
1518 	(void) sscanf(p ? p : f, "vmdump.%ld", &b);
1519 
1520 	return (b);
1521 }
1522 
1523 static void
1524 stack_retrieve(char *stack)
1525 {
1526 	summary_dump_t sd;
1527 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE +
1528 	    DUMP_ERPTSIZE);
1529 	dumpoff -= DUMP_SUMMARYSIZE;
1530 
1531 	dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1532 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1533 
1534 	Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff);
1535 	dumpoff += sizeof (summary_dump_t);
1536 
1537 	if (sd.sd_magic == 0) {
1538 		*stack = '\0';
1539 		return;
1540 	}
1541 
1542 	if (sd.sd_magic != SUMMARY_MAGIC) {
1543 		*stack = '\0';
1544 		logprint(SC_SL_NONE | SC_IF_VERBOSE,
1545 		    "bad summary magic %x", sd.sd_magic);
1546 		return;
1547 	}
1548 	Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1549 	if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1550 		logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1551 }
1552 
1553 static void
1554 raise_event(enum sc_event_type evidx, char *warn_string)
1555 {
1556 	uint32_t pl = sc_event[evidx].sce_payload;
1557 	char panic_stack[STACK_BUF_SIZE];
1558 	nvlist_t *attr = NULL;
1559 	char uuidbuf[36 + 1];
1560 	int err = 0;
1561 
1562 	if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1563 		goto publish;	/* try to send payload-free event */
1564 
1565 	if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1566 		err |= nvlist_add_string(attr, "dumpdir", savedir);
1567 
1568 	if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1569 		err |= nvlist_add_int64(attr, "instance", bounds);
1570 
1571 	if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1572 		err |= nvlist_add_boolean_value(attr, "compressed",
1573 		    csave ? B_TRUE : B_FALSE);
1574 	}
1575 
1576 	if (pl & SC_PAYLOAD_DUMPADM_EN) {
1577 		char *disabled = defread("DUMPADM_ENABLE=no");
1578 
1579 		err |= nvlist_add_boolean_value(attr, "savecore-enabled",
1580 		    disabled ? B_FALSE : B_TRUE);
1581 	}
1582 
1583 	if (pl & SC_PAYLOAD_IMAGEUUID) {
1584 		(void) strncpy(uuidbuf, corehdr.dump_uuid, 36);
1585 		uuidbuf[36] = '\0';
1586 		err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf);
1587 	}
1588 
1589 	if (pl & SC_PAYLOAD_CRASHTIME) {
1590 		err |= nvlist_add_int64(attr, "crashtime",
1591 		    (int64_t)corehdr.dump_crashtime);
1592 	}
1593 
1594 	if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') {
1595 		err |= nvlist_add_string(attr, "panicstr",
1596 		    corehdr.dump_panicstring);
1597 	}
1598 
1599 	if (pl & SC_PAYLOAD_PANICSTACK) {
1600 		stack_retrieve(panic_stack);
1601 
1602 		if (panic_stack[0] != '\0') {
1603 			/*
1604 			 * The summary page may not be present if the dump
1605 			 * was previously recorded compressed.
1606 			 */
1607 			(void) nvlist_add_string(attr, "panicstack",
1608 			    panic_stack);
1609 		}
1610 	}
1611 
1612 	/* add warning string if this is an ireport for dump failure */
1613 	if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL)
1614 		(void) nvlist_add_string(attr, "failure-reason", warn_string);
1615 
1616 	if (pl & SC_PAYLOAD_DUMPCOMPLETE)
1617 		err |= nvlist_add_boolean_value(attr, "dump-incomplete",
1618 		    dump_incomplete ? B_TRUE : B_FALSE);
1619 
1620 	if (pl & SC_PAYLOAD_FM_PANIC) {
1621 		err |= nvlist_add_boolean_value(attr, "fm-panic",
1622 		    fm_panic ? B_TRUE : B_FALSE);
1623 	}
1624 
1625 	if (pl & SC_PAYLOAD_JUSTCHECKING) {
1626 		err |= nvlist_add_boolean_value(attr, "will-attempt-savecore",
1627 		    cflag ? B_FALSE : B_TRUE);
1628 	}
1629 
1630 	if (err)
1631 		logprint(SC_SL_WARN, "Errors while constructing '%s' "
1632 		    "event payload; will try to publish anyway.");
1633 publish:
1634 	if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1635 	    "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1636 	    attr) != FMEV_SUCCESS) {
1637 		logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1638 		    sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1639 		nvlist_free(attr);
1640 	}
1641 
1642 }
1643 
1644 
1645 int
1646 main(int argc, char *argv[])
1647 {
1648 	int i, c, bfd;
1649 	Stat_t st;
1650 	struct rlimit rl;
1651 	long filebounds = -1;
1652 	char namelist[30], corefile[30], boundstr[30];
1653 	dumpfile = NULL;
1654 
1655 	startts = gethrtime();
1656 
1657 	(void) getrlimit(RLIMIT_NOFILE, &rl);
1658 	rl.rlim_cur = rl.rlim_max;
1659 	(void) setrlimit(RLIMIT_NOFILE, &rl);
1660 
1661 	openlog(progname, LOG_ODELAY, LOG_AUTH);
1662 
1663 	(void) defopen("/etc/dumpadm.conf");
1664 	savedir = defread("DUMPADM_SAVDIR=");
1665 	if (savedir != NULL)
1666 		savedir = strdup(savedir);
1667 
1668 	while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) {
1669 		switch (c) {
1670 		case 'L':
1671 			livedump++;
1672 			break;
1673 		case 'v':
1674 			verbose++;
1675 			break;
1676 		case 'c':
1677 			cflag++;
1678 			break;
1679 		case 'd':
1680 			disregard_valid_flag++;
1681 			break;
1682 		case 'm':
1683 			mflag++;
1684 			break;
1685 		case 'f':
1686 			dumpfile = optarg;
1687 			filebounds = getbounds(dumpfile);
1688 			break;
1689 		case '?':
1690 			usage();
1691 		}
1692 	}
1693 
1694 	/*
1695 	 * If doing something other than extracting an existing dump (i.e.
1696 	 * dumpfile has been provided as an option), the user must be root.
1697 	 */
1698 	if (geteuid() != 0 && dumpfile == NULL) {
1699 		(void) fprintf(stderr, "%s: %s %s\n", progname,
1700 		    gettext("you must be root to use"), progname);
1701 		exit(1);
1702 	}
1703 
1704 	interactive = isatty(STDOUT_FILENO);
1705 
1706 	if (cflag && livedump)
1707 		usage();
1708 
1709 	if (dumpfile == NULL || livedump)
1710 		dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1711 
1712 	if (dumpfile == NULL) {
1713 		dumpfile = Zalloc(MAXPATHLEN);
1714 		if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1)
1715 			logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1716 			    "no dump device configured");
1717 	}
1718 
1719 	if (mflag)
1720 		return (message_save());
1721 
1722 	if (optind == argc - 1)
1723 		savedir = argv[optind];
1724 
1725 	if (savedir == NULL || optind < argc - 1)
1726 		usage();
1727 
1728 	if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
1729 		logprint(SC_SL_NONE | SC_EXIT_ERR,
1730 		    "dedicated dump device required");
1731 
1732 	(void) close(dumpfd);
1733 	dumpfd = -1;
1734 
1735 	Stat(dumpfile, &st);
1736 
1737 	filemode = S_ISREG(st.st_mode);
1738 
1739 	if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1740 		csave = 1;
1741 
1742 	read_dumphdr();
1743 
1744 	/*
1745 	 * We want this message to go to the log file, but not the console.
1746 	 * There's no good way to do that with the existing syslog facility.
1747 	 * We could extend it to handle this, but there doesn't seem to be
1748 	 * a general need for it, so we isolate the complexity here instead.
1749 	 */
1750 	if (dumphdr.dump_panicstring[0] != '\0') {
1751 		int logfd = Open("/dev/conslog", O_WRONLY, 0644);
1752 		log_ctl_t lc;
1753 		struct strbuf ctl, dat;
1754 		char msg[DUMP_PANICSIZE + 100];
1755 		char fmt[] = "reboot after panic: %s";
1756 		uint32_t msgid;
1757 
1758 		STRLOG_MAKE_MSGID(fmt, msgid);
1759 
1760 		/* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
1761 		(void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ",
1762 		    progname, msgid);
1763 		/* LINTED: E_SEC_PRINTF_VAR_FMT */
1764 		(void) sprintf(msg + strlen(msg), fmt,
1765 		    dumphdr.dump_panicstring);
1766 
1767 		lc.pri = LOG_AUTH | LOG_ERR;
1768 		lc.flags = SL_CONSOLE | SL_LOGONLY;
1769 		lc.level = 0;
1770 
1771 		ctl.buf = (void *)&lc;
1772 		ctl.len = sizeof (log_ctl_t);
1773 
1774 		dat.buf = (void *)msg;
1775 		dat.len = strlen(msg) + 1;
1776 
1777 		(void) putmsg(logfd, &ctl, &dat, 0);
1778 		(void) close(logfd);
1779 	}
1780 
1781 	if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1782 		logprint(SC_SL_WARN, "incomplete dump on dump device");
1783 		dump_incomplete = B_TRUE;
1784 	}
1785 
1786 	if (dumphdr.dump_fm_panic)
1787 		fm_panic = B_TRUE;
1788 
1789 	/*
1790 	 * We have a valid dump on a dump device and know as much about
1791 	 * it as we're going to at this stage.  Raise an event for
1792 	 * logging and so that FMA can open a case for this panic.
1793 	 * Avoid this step for FMA-initiated panics - FMA will replay
1794 	 * ereports off the dump device independently of savecore and
1795 	 * will make a diagnosis, so we don't want to open two cases
1796 	 * for the same event.  Also avoid raising an event for a
1797 	 * livedump, or when we inflating a compressed dump.
1798 	 */
1799 	if (!fm_panic && !livedump && !filemode)
1800 		raise_event(SC_EVENT_DUMP_PENDING, NULL);
1801 
1802 	logprint(SC_SL_WARN, "System dump time: %s",
1803 	    ctime(&dumphdr.dump_crashtime));
1804 
1805 	/*
1806 	 * Option -c is designed for use from svc-dumpadm where we know
1807 	 * that dumpadm -n is in effect but run savecore -c just to
1808 	 * get the above dump_pending_on_device event raised.  If it is run
1809 	 * interactively then just print further panic details.
1810 	 */
1811 	if (cflag) {
1812 		char *disabled = defread("DUMPADM_ENABLE=no");
1813 		int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1814 		int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1815 
1816 		logprint(lvl | ec,
1817 		    "Panic crashdump pending on dump device%s "
1818 		    "run savecore(1M) manually to extract. "
1819 		    "Image UUID %s%s.",
1820 		    disabled ? " but dumpadm -n in effect;" : ";",
1821 		    corehdr.dump_uuid,
1822 		    fm_panic ?  "(fault-management initiated)" : "");
1823 		/*NOTREACHED*/
1824 	}
1825 
1826 	if (chdir(savedir) == -1)
1827 		logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1828 		    savedir, strerror(errno));
1829 
1830 	check_space(csave);
1831 
1832 	if (filebounds < 0)
1833 		bounds = read_number_from_file("bounds", 0);
1834 	else
1835 		bounds = filebounds;
1836 
1837 	if (csave) {
1838 		size_t metrics_size = datahdr.dump_metrics;
1839 
1840 		(void) sprintf(corefile, "vmdump.%ld", bounds);
1841 
1842 		datahdr.dump_metrics = 0;
1843 
1844 		logprint(SC_SL_ERR,
1845 		    "Saving compressed system crash dump in %s/%s",
1846 		    savedir, corefile);
1847 
1848 		copy_crashfile(corefile);
1849 
1850 		/*
1851 		 * Raise a fault management event that indicates the system
1852 		 * has panicked. We know a reasonable amount about the
1853 		 * condition at this time, but the dump is still compressed.
1854 		 */
1855 		if (!livedump && !fm_panic)
1856 			raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1857 
1858 		if (metrics_size > 0) {
1859 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1860 			FILE *mfile = fopen(METRICSFILE, "a");
1861 			char *metrics = Zalloc(metrics_size + 1);
1862 
1863 			Pread(dumpfd, metrics, metrics_size, endoff +
1864 			    sizeof (dumphdr) + sizeof (datahdr));
1865 
1866 			if (sec < 1)
1867 				sec = 1;
1868 
1869 			if (mfile == NULL) {
1870 				logprint(SC_SL_WARN,
1871 				    "Can't create %s:\n%s",
1872 				    METRICSFILE, metrics);
1873 			} else {
1874 				(void) fprintf(mfile, "[[[[,,,");
1875 				for (i = 0; i < argc; i++)
1876 					(void) fprintf(mfile, "%s ", argv[i]);
1877 				(void) fprintf(mfile, "\n");
1878 				(void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1879 				    dumphdr.dump_utsname.sysname,
1880 				    dumphdr.dump_utsname.nodename,
1881 				    dumphdr.dump_utsname.release,
1882 				    dumphdr.dump_utsname.version,
1883 				    dumphdr.dump_utsname.machine);
1884 				(void) fprintf(mfile, ",,,%s dump time %s\n",
1885 				    dumphdr.dump_flags & DF_LIVE ? "Live" :
1886 				    "Crash", ctime(&dumphdr.dump_crashtime));
1887 				(void) fprintf(mfile, ",,,%s/%s\n", savedir,
1888 				    corefile);
1889 				(void) fprintf(mfile, "Metrics:\n%s\n",
1890 				    metrics);
1891 				(void) fprintf(mfile, "Copy pages,%ld\n",
1892 				    dumphdr.  dump_npages);
1893 				(void) fprintf(mfile, "Copy time,%d\n", sec);
1894 				(void) fprintf(mfile, "Copy pages/sec,%ld\n",
1895 				    dumphdr.dump_npages / sec);
1896 				(void) fprintf(mfile, "]]]]\n");
1897 				(void) fclose(mfile);
1898 			}
1899 			free(metrics);
1900 		}
1901 
1902 		logprint(SC_SL_ERR,
1903 		    "Decompress the crash dump with "
1904 		    "\n'savecore -vf %s/%s'",
1905 		    savedir, corefile);
1906 
1907 	} else {
1908 		(void) sprintf(namelist, "unix.%ld", bounds);
1909 		(void) sprintf(corefile, "vmcore.%ld", bounds);
1910 
1911 		if (interactive && filebounds >= 0 && access(corefile, F_OK)
1912 		    == 0)
1913 			logprint(SC_SL_NONE | SC_EXIT_ERR,
1914 			    "%s already exists: remove with "
1915 			    "'rm -f %s/{unix,vmcore}.%ld'",
1916 			    corefile, savedir, bounds);
1917 
1918 		logprint(SC_SL_ERR,
1919 		    "saving system crash dump in %s/{unix,vmcore}.%ld",
1920 		    savedir, bounds);
1921 
1922 		build_corefile(namelist, corefile);
1923 
1924 		if (!livedump && !filemode && !fm_panic)
1925 			raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1926 
1927 		if (access(METRICSFILE, F_OK) == 0) {
1928 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1929 			FILE *mfile = fopen(METRICSFILE, "a");
1930 
1931 			if (sec < 1)
1932 				sec = 1;
1933 
1934 			if (mfile == NULL) {
1935 				logprint(SC_SL_WARN,
1936 				    "Can't create %s: %s",
1937 				    METRICSFILE, strerror(errno));
1938 			} else {
1939 				(void) fprintf(mfile, "[[[[,,,");
1940 				for (i = 0; i < argc; i++)
1941 					(void) fprintf(mfile, "%s ", argv[i]);
1942 				(void) fprintf(mfile, "\n");
1943 				(void) fprintf(mfile, ",,,%s/%s\n", savedir,
1944 				    corefile);
1945 				(void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1946 				    dumphdr.dump_utsname.sysname,
1947 				    dumphdr.dump_utsname.nodename,
1948 				    dumphdr.dump_utsname.release,
1949 				    dumphdr.dump_utsname.version,
1950 				    dumphdr.dump_utsname.machine);
1951 				(void) fprintf(mfile,
1952 				    "Uncompress pages,%"PRIu64"\n", saved);
1953 				(void) fprintf(mfile, "Uncompress time,%d\n",
1954 				    sec);
1955 				(void) fprintf(mfile, "Uncompress pages/sec,%"
1956 				    PRIu64"\n", saved / sec);
1957 				(void) fprintf(mfile, "]]]]\n");
1958 				(void) fclose(mfile);
1959 			}
1960 		}
1961 	}
1962 
1963 	if (filebounds < 0) {
1964 		(void) sprintf(boundstr, "%ld\n", bounds + 1);
1965 		bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1966 		Pwrite(bfd, boundstr, strlen(boundstr), 0);
1967 		(void) close(bfd);
1968 	}
1969 
1970 	if (verbose) {
1971 		int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1972 
1973 		(void) printf("%d:%02d dump %s is done\n",
1974 		    sec / 60, sec % 60,
1975 		    csave ? "copy" : "decompress");
1976 	}
1977 
1978 	if (verbose > 1 && hist != NULL) {
1979 		int i, nw;
1980 
1981 		for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1982 			nw += hist[i] * i;
1983 		(void) printf("pages count     %%\n");
1984 		for (i = 0; i <= BTOP(coreblksize); ++i) {
1985 			if (hist[i] == 0)
1986 				continue;
1987 			(void) printf("%3d   %5u  %6.2f\n",
1988 			    i, hist[i], 100.0 * hist[i] * i / nw);
1989 		}
1990 	}
1991 
1992 	(void) close(dumpfd);
1993 	dumpfd = -1;
1994 
1995 	return (0);
1996 }
1997