xref: /titanic_51/usr/src/cmd/savecore/savecore.c (revision 88e89651dc6499b7fb3cdef093572d0851a843a7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2016 Joyent, Inc.
24  */
25 /*
26  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <deflt.h>
37 #include <time.h>
38 #include <syslog.h>
39 #include <stropts.h>
40 #include <pthread.h>
41 #include <limits.h>
42 #include <atomic.h>
43 #include <libnvpair.h>
44 #include <libintl.h>
45 #include <sys/mem.h>
46 #include <sys/statvfs.h>
47 #include <sys/dumphdr.h>
48 #include <sys/dumpadm.h>
49 #include <sys/compress.h>
50 #include <sys/panic.h>
51 #include <sys/sysmacros.h>
52 #include <sys/stat.h>
53 #include <sys/resource.h>
54 #include <bzip2/bzlib.h>
55 #include <sys/fm/util.h>
56 #include <fm/libfmevent.h>
57 #include <sys/int_fmtio.h>
58 
59 
60 /* fread/fwrite buffer size */
61 #define	FBUFSIZE		(1ULL << 20)
62 
63 /* minimum size for output buffering */
64 #define	MINCOREBLKSIZE		(1ULL << 17)
65 
66 /* create this file if metrics collection is enabled in the kernel */
67 #define	METRICSFILE "METRICS.csv"
68 
69 static char	progname[9] = "savecore";
70 static char	*savedir;		/* savecore directory */
71 static char	*dumpfile;		/* source of raw crash dump */
72 static long	bounds = -1;		/* numeric suffix */
73 static long	pagesize;		/* dump pagesize */
74 static int	dumpfd = -1;		/* dumpfile descriptor */
75 static boolean_t have_dumpfile = B_TRUE;	/* dumpfile existence */
76 static dumphdr_t corehdr, dumphdr;	/* initial and terminal dumphdrs */
77 static boolean_t dump_incomplete;	/* dumphdr indicates incomplete */
78 static boolean_t fm_panic;		/* dump is the result of fm_panic */
79 static offset_t	endoff;			/* offset of end-of-dump header */
80 static int	verbose;		/* chatty mode */
81 static int	disregard_valid_flag;	/* disregard valid flag */
82 static int	livedump;		/* dump the current running system */
83 static int	interactive;		/* user invoked; no syslog */
84 static int	csave;			/* save dump compressed */
85 static int	filemode;		/* processing file, not dump device */
86 static int	percent_done;		/* progress indicator */
87 static int	sec_done;		/* progress last report time */
88 static hrtime_t	startts;		/* timestamp at start */
89 static volatile uint64_t saved;		/* count of pages written */
90 static volatile uint64_t zpages;	/* count of zero pages not written */
91 static dumpdatahdr_t datahdr;		/* compression info */
92 static long	coreblksize;		/* preferred write size (st_blksize) */
93 static int	cflag;			/* run as savecore -c */
94 static int	mflag;			/* run as savecore -m */
95 
96 /*
97  * Payload information for the events we raise.  These are used
98  * in raise_event to determine what payload to include.
99  */
100 #define	SC_PAYLOAD_SAVEDIR	0x0001	/* Include savedir in event */
101 #define	SC_PAYLOAD_INSTANCE	0x0002	/* Include bounds instance number */
102 #define	SC_PAYLOAD_IMAGEUUID	0x0004	/* Include dump OS instance uuid */
103 #define	SC_PAYLOAD_CRASHTIME	0x0008	/* Include epoch crashtime */
104 #define	SC_PAYLOAD_PANICSTR	0x0010	/* Include panic string */
105 #define	SC_PAYLOAD_PANICSTACK	0x0020	/* Include panic string */
106 #define	SC_PAYLOAD_FAILREASON	0x0040	/* Include failure reason */
107 #define	SC_PAYLOAD_DUMPCOMPLETE	0x0080	/* Include completeness indicator */
108 #define	SC_PAYLOAD_ISCOMPRESSED	0x0100	/* Dump is in vmdump.N form */
109 #define	SC_PAYLOAD_DUMPADM_EN	0x0200	/* Is dumpadm enabled or not? */
110 #define	SC_PAYLOAD_FM_PANIC	0x0400	/* Panic initiated by FMA */
111 #define	SC_PAYLOAD_JUSTCHECKING	0x0800	/* Run with -c flag? */
112 
113 enum sc_event_type {
114 	SC_EVENT_DUMP_PENDING,
115 	SC_EVENT_SAVECORE_FAILURE,
116 	SC_EVENT_DUMP_AVAILABLE
117 };
118 
119 /*
120  * Common payload
121  */
122 #define	_SC_PAYLOAD_CMN \
123     SC_PAYLOAD_IMAGEUUID | \
124     SC_PAYLOAD_CRASHTIME | \
125     SC_PAYLOAD_PANICSTR | \
126     SC_PAYLOAD_PANICSTACK | \
127     SC_PAYLOAD_DUMPCOMPLETE | \
128     SC_PAYLOAD_FM_PANIC | \
129     SC_PAYLOAD_SAVEDIR
130 
131 static const struct {
132 	const char *sce_subclass;
133 	uint32_t sce_payload;
134 } sc_event[] = {
135 	/*
136 	 * SC_EVENT_DUMP_PENDING
137 	 */
138 	{
139 		"dump_pending_on_device",
140 		_SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN |
141 		    SC_PAYLOAD_JUSTCHECKING
142 	},
143 
144 	/*
145 	 * SC_EVENT_SAVECORE_FAILURE
146 	 */
147 	{
148 		"savecore_failure",
149 		_SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON
150 	},
151 
152 	/*
153 	 * SC_EVENT_DUMP_AVAILABLE
154 	 */
155 	{
156 		"dump_available",
157 		_SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED
158 	},
159 };
160 
161 static void raise_event(enum sc_event_type, char *);
162 
163 static void
164 usage(void)
165 {
166 	(void) fprintf(stderr,
167 	    "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname);
168 	exit(1);
169 }
170 
171 #define	SC_SL_NONE	0x0001	/* no syslog */
172 #define	SC_SL_ERR	0x0002	/* syslog if !interactive, LOG_ERR */
173 #define	SC_SL_WARN	0x0004	/* syslog if !interactive, LOG_WARNING */
174 #define	SC_IF_VERBOSE	0x0008	/* message only if -v */
175 #define	SC_IF_ISATTY	0x0010	/* message only if interactive */
176 #define	SC_EXIT_OK	0x0020	/* exit(0) */
177 #define	SC_EXIT_ERR	0x0040	/* exit(1) */
178 #define	SC_EXIT_PEND	0x0080	/* exit(2) */
179 #define	SC_EXIT_FM	0x0100	/* exit(3) */
180 
181 #define	_SC_ALLEXIT	(SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM)
182 
183 static void
184 logprint(uint32_t flags, char *message, ...)
185 {
186 	va_list args;
187 	char buf[1024];
188 	int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0);
189 	int do_ifverb = (flags & SC_IF_VERBOSE) && verbose;
190 	int do_ifisatty = (flags & SC_IF_ISATTY) && interactive;
191 	int code;
192 	static int logprint_raised = 0;
193 
194 	if (do_always || do_ifverb || do_ifisatty) {
195 		va_start(args, message);
196 		/*LINTED: E_SEC_PRINTF_VAR_FMT*/
197 		(void) vsnprintf(buf, sizeof (buf), message, args);
198 		(void) fprintf(stderr, "%s: %s\n", progname, buf);
199 		if (!interactive) {
200 			switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) {
201 			case SC_SL_ERR:
202 				/*LINTED: E_SEC_PRINTF_VAR_FMT*/
203 				syslog(LOG_ERR, buf);
204 				break;
205 
206 			case SC_SL_WARN:
207 				/*LINTED: E_SEC_PRINTF_VAR_FMT*/
208 				syslog(LOG_WARNING, buf);
209 				break;
210 
211 			default:
212 				break;
213 			}
214 		}
215 		va_end(args);
216 	}
217 
218 	switch (flags & _SC_ALLEXIT) {
219 	case 0:
220 		return;
221 
222 	case SC_EXIT_OK:
223 		code = 0;
224 		break;
225 
226 	case SC_EXIT_PEND:
227 		/*
228 		 * Raise an ireport saying why we are exiting.  Do not
229 		 * raise if run as savecore -m.  If something in the
230 		 * raise_event codepath calls logprint avoid recursion.
231 		 */
232 		if (!mflag && logprint_raised++ == 0)
233 			raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
234 		code = 2;
235 		break;
236 
237 	case SC_EXIT_FM:
238 		code = 3;
239 		break;
240 
241 	case SC_EXIT_ERR:
242 	default:
243 		if (!mflag && logprint_raised++ == 0 && have_dumpfile)
244 			raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
245 		code = 1;
246 		break;
247 	}
248 
249 	exit(code);
250 }
251 
252 /*
253  * System call / libc wrappers that exit on error.
254  */
255 static int
256 Open(const char *name, int oflags, mode_t mode)
257 {
258 	int fd;
259 
260 	if ((fd = open64(name, oflags, mode)) == -1)
261 		logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
262 		    name, strerror(errno));
263 	return (fd);
264 }
265 
266 static void
267 Fread(void *buf, size_t size, FILE *f)
268 {
269 	if (fread(buf, size, 1, f) != 1)
270 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: %s",
271 		    strerror(errno));
272 }
273 
274 static void
275 Fwrite(void *buf, size_t size, FILE *f)
276 {
277 	if (fwrite(buf, size, 1, f) != 1)
278 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s",
279 		    strerror(errno));
280 }
281 
282 static void
283 Fseek(offset_t off, FILE *f)
284 {
285 	if (fseeko64(f, off, SEEK_SET) != 0)
286 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
287 		    strerror(errno));
288 }
289 
290 typedef struct stat64 Stat_t;
291 
292 static void
293 Fstat(int fd, Stat_t *sb, const char *fname)
294 {
295 	if (fstat64(fd, sb) != 0)
296 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
297 		    strerror(errno));
298 }
299 
300 static void
301 Stat(const char *fname, Stat_t *sb)
302 {
303 	if (stat64(fname, sb) != 0) {
304 		have_dumpfile = B_FALSE;
305 		logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
306 		    "of file %s", fname);
307 	}
308 }
309 
310 static void
311 Pread(int fd, void *buf, size_t size, offset_t off)
312 {
313 	ssize_t sz = pread64(fd, buf, size, off);
314 
315 	if (sz < 0)
316 		logprint(SC_SL_ERR | SC_EXIT_ERR,
317 		    "pread: %s", strerror(errno));
318 	else if (sz != size)
319 		logprint(SC_SL_ERR | SC_EXIT_ERR,
320 		    "pread: size %ld != %ld", sz, size);
321 }
322 
323 static void
324 Pwrite(int fd, void *buf, size_t size, off64_t off)
325 {
326 	if (pwrite64(fd, buf, size, off) != size)
327 		logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s",
328 		    strerror(errno));
329 }
330 
331 static void *
332 Zalloc(size_t size)
333 {
334 	void *buf;
335 
336 	if ((buf = calloc(size, 1)) == NULL)
337 		logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s",
338 		    strerror(errno));
339 	return (buf);
340 }
341 
342 static long
343 read_number_from_file(const char *filename, long default_value)
344 {
345 	long file_value = -1;
346 	FILE *fp;
347 
348 	if ((fp = fopen(filename, "r")) != NULL) {
349 		(void) fscanf(fp, "%ld", &file_value);
350 		(void) fclose(fp);
351 	}
352 	return (file_value < 0 ? default_value : file_value);
353 }
354 
355 static void
356 read_dumphdr(void)
357 {
358 	if (filemode)
359 		dumpfd = Open(dumpfile, O_RDONLY, 0644);
360 	else
361 		dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
362 	endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
363 	Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
364 	Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
365 
366 	pagesize = dumphdr.dump_pagesize;
367 
368 	if (dumphdr.dump_magic != DUMP_MAGIC)
369 		logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
370 		    dumphdr.dump_magic);
371 
372 	if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
373 		logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
374 		    "dump already processed");
375 
376 	if (dumphdr.dump_version != DUMP_VERSION)
377 		logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
378 		    "dump version (%d) != %s version (%d)",
379 		    dumphdr.dump_version, progname, DUMP_VERSION);
380 
381 	if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
382 		logprint(SC_SL_NONE | SC_EXIT_PEND,
383 		    "dump is from %u-bit kernel - cannot save on %u-bit kernel",
384 		    dumphdr.dump_wordsize, DUMP_WORDSIZE);
385 
386 	if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
387 		if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
388 			logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
389 			    "dump data version (%d) != %s data version (%d)",
390 			    datahdr.dump_datahdr_version, progname,
391 			    DUMP_DATAHDR_VERSION);
392 	} else {
393 		(void) memset(&datahdr, 0, sizeof (datahdr));
394 		datahdr.dump_maxcsize = pagesize;
395 	}
396 
397 	/*
398 	 * Read the initial header, clear the valid bits, and compare headers.
399 	 * The main header may have been overwritten by swapping if we're
400 	 * using a swap partition as the dump device, in which case we bail.
401 	 */
402 	Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start);
403 
404 	corehdr.dump_flags &= ~DF_VALID;
405 	dumphdr.dump_flags &= ~DF_VALID;
406 
407 	if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
408 		/*
409 		 * Clear valid bit so we don't complain on every invocation.
410 		 */
411 		if (!filemode)
412 			Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
413 		logprint(SC_SL_ERR | SC_EXIT_ERR,
414 		    "initial dump header corrupt");
415 	}
416 }
417 
418 static void
419 check_space(int csave)
420 {
421 	struct statvfs fsb;
422 	int64_t spacefree, dumpsize, minfree, datasize;
423 
424 	if (statvfs(".", &fsb) < 0)
425 		logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
426 		    strerror(errno));
427 
428 	dumpsize = dumphdr.dump_data - dumphdr.dump_start;
429 	datasize = dumphdr.dump_npages * pagesize;
430 	if (!csave)
431 		dumpsize += datasize;
432 	else
433 		dumpsize += datahdr.dump_data_csize;
434 
435 	spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
436 	minfree = 1024LL * read_number_from_file("minfree", 1024);
437 	if (spacefree < minfree + dumpsize) {
438 		logprint(SC_SL_ERR | SC_EXIT_ERR,
439 		    "not enough space in %s (%lld MB avail, %lld MB needed)",
440 		    savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
441 	}
442 }
443 
444 static void
445 build_dump_map(int corefd, const pfn_t *pfn_table)
446 {
447 	long i;
448 	static long misses = 0;
449 	size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
450 	mem_vtop_t vtop;
451 	dump_map_t *dmp = Zalloc(dump_mapsize);
452 	char *inbuf = Zalloc(FBUFSIZE);
453 	FILE *in = fdopen(dup(dumpfd), "rb");
454 
455 	(void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
456 	Fseek(dumphdr.dump_map, in);
457 
458 	corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize);
459 
460 	for (i = 0; i < corehdr.dump_nvtop; i++) {
461 		long first = 0;
462 		long last = corehdr.dump_npages - 1;
463 		long middle = 0;
464 		pfn_t pfn = 0;
465 		uintptr_t h;
466 
467 		Fread(&vtop, sizeof (mem_vtop_t), in);
468 		while (last >= first) {
469 			middle = (first + last) / 2;
470 			pfn = pfn_table[middle];
471 			if (pfn == vtop.m_pfn)
472 				break;
473 			if (pfn < vtop.m_pfn)
474 				first = middle + 1;
475 			else
476 				last = middle - 1;
477 		}
478 		if (pfn != vtop.m_pfn) {
479 			if (++misses <= 10)
480 				(void) fprintf(stderr,
481 				    "pfn %ld not found for as=%p, va=%p\n",
482 				    vtop.m_pfn, (void *)vtop.m_as, vtop.m_va);
483 			continue;
484 		}
485 
486 		dmp[i].dm_as = vtop.m_as;
487 		dmp[i].dm_va = (uintptr_t)vtop.m_va;
488 		dmp[i].dm_data = corehdr.dump_data +
489 		    ((uint64_t)middle << corehdr.dump_pageshift);
490 
491 		h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va);
492 		dmp[i].dm_next = dmp[h].dm_first;
493 		dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t);
494 	}
495 
496 	Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map);
497 	free(dmp);
498 	(void) fclose(in);
499 	free(inbuf);
500 }
501 
502 /*
503  * Copy whole sections of the dump device to the file.
504  */
505 static void
506 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf,
507     size_t sz)
508 {
509 	size_t nr;
510 	offset_t off = *offp;
511 
512 	while (nb > 0) {
513 		nr = sz < nb ? sz : (size_t)nb;
514 		Pread(dumpfd, buf, nr, dumpoff);
515 		Pwrite(fd, buf, nr, off);
516 		off += nr;
517 		dumpoff += nr;
518 		nb -= nr;
519 	}
520 	*offp = off;
521 }
522 
523 /*
524  * Copy pages when the dump data header is missing.
525  * This supports older kernels with latest savecore.
526  */
527 static void
528 CopyPages(offset_t *offp, int fd, char *buf, size_t sz)
529 {
530 	uint32_t csize;
531 	FILE *in = fdopen(dup(dumpfd), "rb");
532 	FILE *out = fdopen(dup(fd), "wb");
533 	char *cbuf = Zalloc(pagesize);
534 	char *outbuf = Zalloc(FBUFSIZE);
535 	pgcnt_t np = dumphdr.dump_npages;
536 
537 	(void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE);
538 	(void) setvbuf(in, buf, _IOFBF, sz);
539 	Fseek(dumphdr.dump_data, in);
540 
541 	Fseek(*offp, out);
542 	while (np > 0) {
543 		Fread(&csize, sizeof (uint32_t), in);
544 		Fwrite(&csize, sizeof (uint32_t), out);
545 		*offp += sizeof (uint32_t);
546 		if (csize > pagesize || csize == 0) {
547 			logprint(SC_SL_ERR,
548 			    "CopyPages: page %lu csize %d (0x%x) pagesize %d",
549 			    dumphdr.dump_npages - np, csize, csize,
550 			    pagesize);
551 			break;
552 		}
553 		Fread(cbuf, csize, in);
554 		Fwrite(cbuf, csize, out);
555 		*offp += csize;
556 		np--;
557 	}
558 	(void) fclose(in);
559 	(void) fclose(out);
560 	free(outbuf);
561 	free(buf);
562 }
563 
564 /*
565  * Concatenate dump contents into a new file.
566  * Update corehdr with new offsets.
567  */
568 static void
569 copy_crashfile(const char *corefile)
570 {
571 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
572 	size_t bufsz = FBUFSIZE;
573 	char *inbuf = Zalloc(bufsz);
574 	offset_t coreoff;
575 	size_t nb;
576 
577 	logprint(SC_SL_ERR | SC_IF_VERBOSE,
578 	    "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
579 
580 	/*
581 	 * This dump file is still compressed
582 	 */
583 	corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
584 
585 	/*
586 	 * Leave room for corehdr, it is updated and written last
587 	 */
588 	corehdr.dump_start = 0;
589 	coreoff = sizeof (corehdr);
590 
591 	/*
592 	 * Read in the compressed symbol table, copy it to corefile.
593 	 */
594 	coreoff = roundup(coreoff, pagesize);
595 	corehdr.dump_ksyms = coreoff;
596 	Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
597 	    inbuf, bufsz);
598 
599 	/*
600 	 * Save the pfn table.
601 	 */
602 	coreoff = roundup(coreoff, pagesize);
603 	corehdr.dump_pfn = coreoff;
604 	Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff,
605 	    corefd, inbuf, bufsz);
606 
607 	/*
608 	 * Save the dump map.
609 	 */
610 	coreoff = roundup(coreoff, pagesize);
611 	corehdr.dump_map = coreoff;
612 	Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t),
613 	    &coreoff, corefd, inbuf, bufsz);
614 
615 	/*
616 	 * Save the data pages.
617 	 */
618 	coreoff = roundup(coreoff, pagesize);
619 	corehdr.dump_data = coreoff;
620 	if (datahdr.dump_data_csize != 0)
621 		Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff,
622 		    corefd, inbuf, bufsz);
623 	else
624 		CopyPages(&coreoff, corefd, inbuf, bufsz);
625 
626 	/*
627 	 * Now write the modified dump header to front and end of the copy.
628 	 * Make it look like a valid dump device.
629 	 *
630 	 * From dumphdr.h: Two headers are written out: one at the
631 	 * beginning of the dump, and the other at the very end of the
632 	 * dump device. The terminal header is at a known location
633 	 * (end of device) so we can always find it.
634 	 *
635 	 * Pad with zeros to each DUMP_OFFSET boundary.
636 	 */
637 	(void) memset(inbuf, 0, DUMP_OFFSET);
638 
639 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
640 	if (nb > 0) {
641 		Pwrite(corefd, inbuf, nb, coreoff);
642 		coreoff += nb;
643 	}
644 
645 	Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff);
646 	coreoff += sizeof (corehdr);
647 
648 	Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff);
649 	coreoff += sizeof (datahdr);
650 
651 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
652 	if (nb > 0) {
653 		Pwrite(corefd, inbuf, nb, coreoff);
654 	}
655 
656 	free(inbuf);
657 	Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start);
658 
659 	/*
660 	 * Write out the modified dump header to the dump device.
661 	 * The dump device has been processed, so DF_VALID is clear.
662 	 */
663 	if (!filemode)
664 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
665 
666 	(void) close(corefd);
667 }
668 
669 /*
670  * compressed streams
671  */
672 typedef struct blockhdr blockhdr_t;
673 typedef struct block block_t;
674 
675 struct blockhdr {
676 	block_t *head;
677 	block_t *tail;
678 };
679 
680 struct block {
681 	block_t *next;
682 	char *block;
683 	int size;
684 };
685 
686 typedef enum streamstate {
687 	STREAMSTART,
688 	STREAMPAGES
689 } streamstate_t;
690 
691 typedef struct stream {
692 	streamstate_t state;
693 	int init;
694 	int tag;
695 	int bound;
696 	int nout;
697 	char *blkbuf;
698 	blockhdr_t blocks;
699 	pgcnt_t pagenum;
700 	pgcnt_t curpage;
701 	pgcnt_t npages;
702 	pgcnt_t done;
703 	bz_stream strm;
704 	dumpcsize_t sc;
705 	dumpstreamhdr_t sh;
706 } stream_t;
707 
708 static stream_t *streams;
709 static stream_t *endstreams;
710 
711 const int cs = sizeof (dumpcsize_t);
712 
713 typedef struct tinfo {
714 	pthread_t tid;
715 	int corefd;
716 } tinfo_t;
717 
718 static int threads_stop;
719 static int threads_active;
720 static tinfo_t *tinfo;
721 static tinfo_t *endtinfo;
722 
723 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
724 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER;
725 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER;
726 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER;
727 
728 static blockhdr_t freeblocks;
729 
730 static void
731 enqt(blockhdr_t *h, block_t *b)
732 {
733 	b->next = NULL;
734 	if (h->tail == NULL)
735 		h->head = b;
736 	else
737 		h->tail->next = b;
738 	h->tail = b;
739 }
740 
741 static block_t *
742 deqh(blockhdr_t *h)
743 {
744 	block_t *b = h->head;
745 
746 	if (b != NULL) {
747 		h->head = b->next;
748 		if (h->head == NULL)
749 			h->tail = NULL;
750 	}
751 	return (b);
752 }
753 
754 static void *runstreams(void *arg);
755 
756 static void
757 initstreams(int corefd, int nstreams, int maxcsize)
758 {
759 	int nthreads;
760 	int nblocks;
761 	int i;
762 	block_t *b;
763 	tinfo_t *t;
764 
765 	nthreads = sysconf(_SC_NPROCESSORS_ONLN);
766 	if (nstreams < nthreads)
767 		nthreads = nstreams;
768 	if (nthreads < 1)
769 		nthreads = 1;
770 	nblocks = nthreads * 2;
771 
772 	tinfo = Zalloc(nthreads * sizeof (tinfo_t));
773 	endtinfo = &tinfo[nthreads];
774 
775 	/* init streams */
776 	streams = Zalloc(nstreams * sizeof (stream_t));
777 	endstreams = &streams[nstreams];
778 
779 	/* init stream block buffers */
780 	for (i = 0; i < nblocks; i++) {
781 		b = Zalloc(sizeof (block_t));
782 		b->block = Zalloc(maxcsize);
783 		enqt(&freeblocks, b);
784 	}
785 
786 	/* init worker threads */
787 	(void) pthread_mutex_lock(&lock);
788 	threads_active = 1;
789 	threads_stop = 0;
790 	for (t = tinfo; t != endtinfo; t++) {
791 		t->corefd = dup(corefd);
792 		if (t->corefd < 0) {
793 			nthreads = t - tinfo;
794 			endtinfo = t;
795 			break;
796 		}
797 		if (pthread_create(&t->tid, NULL, runstreams, t) != 0)
798 			logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s",
799 			    strerror(errno));
800 	}
801 	(void) pthread_mutex_unlock(&lock);
802 }
803 
804 static void
805 sbarrier()
806 {
807 	stream_t *s;
808 
809 	(void) pthread_mutex_lock(&lock);
810 	for (s = streams; s != endstreams; s++) {
811 		while (s->bound || s->blocks.head != NULL)
812 			(void) pthread_cond_wait(&cvbarrier, &lock);
813 	}
814 	(void) pthread_mutex_unlock(&lock);
815 }
816 
817 static void
818 stopstreams()
819 {
820 	tinfo_t *t;
821 
822 	if (threads_active) {
823 		sbarrier();
824 		(void) pthread_mutex_lock(&lock);
825 		threads_stop = 1;
826 		(void) pthread_cond_signal(&cvwork);
827 		(void) pthread_mutex_unlock(&lock);
828 		for (t = tinfo; t != endtinfo; t++)
829 			(void) pthread_join(t->tid, NULL);
830 		free(tinfo);
831 		tinfo = NULL;
832 		threads_active = 0;
833 	}
834 }
835 
836 static block_t *
837 getfreeblock()
838 {
839 	block_t *b;
840 
841 	(void) pthread_mutex_lock(&lock);
842 	while ((b = deqh(&freeblocks)) == NULL)
843 		(void) pthread_cond_wait(&cvfree, &lock);
844 	(void) pthread_mutex_unlock(&lock);
845 	return (b);
846 }
847 
848 /* data page offset from page number */
849 #define	BTOP(b)		((b) >> dumphdr.dump_pageshift)
850 #define	PTOB(p)		((p) << dumphdr.dump_pageshift)
851 #define	DATAOFF(p)	(corehdr.dump_data + PTOB(p))
852 
853 /* check for coreblksize boundary */
854 static int
855 isblkbnd(pgcnt_t pgnum)
856 {
857 	return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0);
858 }
859 
860 static int
861 iszpage(char *buf)
862 {
863 	size_t sz;
864 	uint64_t *pl;
865 
866 	/*LINTED:E_BAD_PTR_CAST_ALIGN*/
867 	pl = (uint64_t *)(buf);
868 	for (sz = 0; sz < pagesize; sz += sizeof (*pl))
869 		if (*pl++ != 0)
870 			return (0);
871 	return (1);
872 }
873 
874 volatile uint_t *hist;
875 
876 /* write pages to the core file */
877 static void
878 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np)
879 {
880 	atomic_inc_uint(&hist[np]);
881 	if (np > 0)
882 		Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum));
883 }
884 
885 /*
886  * Process one lzjb block.
887  * No object (stream header or page) will be split over a block boundary.
888  */
889 static void
890 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz)
891 {
892 	int in = 0;
893 	int csize;
894 	int doflush;
895 	char *out;
896 	size_t dsize;
897 	dumpcsize_t sc;
898 	dumpstreamhdr_t sh;
899 
900 	if (!s->init) {
901 		s->init = 1;
902 		if (s->blkbuf == NULL)
903 			s->blkbuf = Zalloc(coreblksize);
904 		s->state = STREAMSTART;
905 	}
906 	while (in < blocksz) {
907 		switch (s->state) {
908 		case STREAMSTART:
909 			(void) memcpy(&sh, block + in, sizeof (sh));
910 			in += sizeof (sh);
911 			if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0)
912 				logprint(SC_SL_ERR | SC_EXIT_ERR,
913 				    "LZJB STREAMSTART: bad stream header");
914 			if (sh.stream_npages > datahdr.dump_maxrange)
915 				logprint(SC_SL_ERR | SC_EXIT_ERR,
916 				    "LZJB STREAMSTART: bad range: %d > %d",
917 				    sh.stream_npages, datahdr.dump_maxrange);
918 			s->pagenum = sh.stream_pagenum;
919 			s->npages = sh.stream_npages;
920 			s->curpage = s->pagenum;
921 			s->nout = 0;
922 			s->done = 0;
923 			s->state = STREAMPAGES;
924 			break;
925 		case STREAMPAGES:
926 			(void) memcpy(&sc, block + in, cs);
927 			in += cs;
928 			csize = DUMP_GET_CSIZE(sc);
929 			if (csize > pagesize)
930 				logprint(SC_SL_ERR | SC_EXIT_ERR,
931 				    "LZJB STREAMPAGES: bad csize=%d", csize);
932 
933 			out =  s->blkbuf + PTOB(s->nout);
934 			dsize = decompress(block + in, out, csize, pagesize);
935 
936 			if (dsize != pagesize)
937 				logprint(SC_SL_ERR | SC_EXIT_ERR,
938 				    "LZJB STREAMPAGES: dsize %d != pagesize %d",
939 				    dsize, pagesize);
940 
941 			in += csize;
942 			atomic_inc_64(&saved);
943 
944 			doflush = 0;
945 			if (s->nout == 0 && iszpage(out)) {
946 				doflush = 1;
947 				atomic_inc_64(&zpages);
948 			} else if (++s->nout >= BTOP(coreblksize) ||
949 			    isblkbnd(s->curpage + s->nout)) {
950 				doflush = 1;
951 			}
952 			if (++s->done >= s->npages) {
953 				s->state = STREAMSTART;
954 				doflush = 1;
955 			}
956 			if (doflush) {
957 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
958 				s->nout = 0;
959 				s->curpage = s->pagenum + s->done;
960 			}
961 			break;
962 		}
963 	}
964 }
965 
966 /* bzlib library reports errors with this callback */
967 void
968 bz_internal_error(int errcode)
969 {
970 	logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
971 	    BZ2_bzErrorString(errcode));
972 }
973 
974 /*
975  * Return one object in the stream.
976  *
977  * An object (stream header or page) will likely span an input block
978  * of compression data. Return non-zero when an entire object has been
979  * retrieved from the stream.
980  */
981 static int
982 bz2decompress(stream_t *s, void *buf, size_t size)
983 {
984 	int rc;
985 
986 	if (s->strm.avail_out == 0) {
987 		s->strm.next_out = buf;
988 		s->strm.avail_out = size;
989 	}
990 	while (s->strm.avail_in > 0) {
991 		rc = BZ2_bzDecompress(&s->strm);
992 		if (rc == BZ_STREAM_END) {
993 			rc = BZ2_bzDecompressReset(&s->strm);
994 			if (rc != BZ_OK)
995 				logprint(SC_SL_ERR | SC_EXIT_ERR,
996 				    "BZ2_bzDecompressReset: %s",
997 				    BZ2_bzErrorString(rc));
998 			continue;
999 		}
1000 
1001 		if (s->strm.avail_out == 0)
1002 			break;
1003 	}
1004 	return (s->strm.avail_out == 0);
1005 }
1006 
1007 /*
1008  * Process one bzip2 block.
1009  * The interface is documented here:
1010  * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
1011  */
1012 static void
1013 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
1014 {
1015 	int rc = 0;
1016 	int doflush;
1017 	char *out;
1018 
1019 	if (!s->init) {
1020 		s->init = 1;
1021 		rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
1022 		if (rc != BZ_OK)
1023 			logprint(SC_SL_ERR | SC_EXIT_ERR,
1024 			    "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
1025 		if (s->blkbuf == NULL)
1026 			s->blkbuf = Zalloc(coreblksize);
1027 		s->strm.avail_out = 0;
1028 		s->state = STREAMSTART;
1029 	}
1030 	s->strm.next_in = block;
1031 	s->strm.avail_in = blocksz;
1032 
1033 	while (s->strm.avail_in > 0) {
1034 		switch (s->state) {
1035 		case STREAMSTART:
1036 			if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
1037 				return;
1038 			if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
1039 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1040 				    "BZ2 STREAMSTART: bad stream header");
1041 			if (s->sh.stream_npages > datahdr.dump_maxrange)
1042 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1043 				    "BZ2 STREAMSTART: bad range: %d > %d",
1044 				    s->sh.stream_npages, datahdr.dump_maxrange);
1045 			s->pagenum = s->sh.stream_pagenum;
1046 			s->npages = s->sh.stream_npages;
1047 			s->curpage = s->pagenum;
1048 			s->nout = 0;
1049 			s->done = 0;
1050 			s->state = STREAMPAGES;
1051 			break;
1052 		case STREAMPAGES:
1053 			out = s->blkbuf + PTOB(s->nout);
1054 			if (!bz2decompress(s, out, pagesize))
1055 				return;
1056 
1057 			atomic_inc_64(&saved);
1058 
1059 			doflush = 0;
1060 			if (s->nout == 0 && iszpage(out)) {
1061 				doflush = 1;
1062 				atomic_inc_64(&zpages);
1063 			} else if (++s->nout >= BTOP(coreblksize) ||
1064 			    isblkbnd(s->curpage + s->nout)) {
1065 				doflush = 1;
1066 			}
1067 			if (++s->done >= s->npages) {
1068 				s->state = STREAMSTART;
1069 				doflush = 1;
1070 			}
1071 			if (doflush) {
1072 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
1073 				s->nout = 0;
1074 				s->curpage = s->pagenum + s->done;
1075 			}
1076 			break;
1077 		}
1078 	}
1079 }
1080 
1081 /* report progress */
1082 static void
1083 report_progress()
1084 {
1085 	int sec, percent;
1086 
1087 	if (!interactive)
1088 		return;
1089 
1090 	percent = saved * 100LL / corehdr.dump_npages;
1091 	sec = (gethrtime() - startts) / NANOSEC;
1092 	if (percent > percent_done || sec > sec_done) {
1093 		(void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1094 		    percent);
1095 		(void) fflush(stdout);
1096 		sec_done = sec;
1097 		percent_done = percent;
1098 	}
1099 }
1100 
1101 /* thread body */
1102 static void *
1103 runstreams(void *arg)
1104 {
1105 	tinfo_t *t = arg;
1106 	stream_t *s;
1107 	block_t *b;
1108 	int bound;
1109 
1110 	(void) pthread_mutex_lock(&lock);
1111 	while (!threads_stop) {
1112 		bound = 0;
1113 		for (s = streams; s != endstreams; s++) {
1114 			if (s->bound || s->blocks.head == NULL)
1115 				continue;
1116 			s->bound = 1;
1117 			bound = 1;
1118 			(void) pthread_cond_signal(&cvwork);
1119 			while (s->blocks.head != NULL) {
1120 				b = deqh(&s->blocks);
1121 				(void) pthread_mutex_unlock(&lock);
1122 
1123 				if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
1124 					lzjbblock(t->corefd, s, b->block,
1125 					    b->size);
1126 				else
1127 					bz2block(t->corefd, s, b->block,
1128 					    b->size);
1129 
1130 				(void) pthread_mutex_lock(&lock);
1131 				enqt(&freeblocks, b);
1132 				(void) pthread_cond_signal(&cvfree);
1133 
1134 				report_progress();
1135 			}
1136 			s->bound = 0;
1137 			(void) pthread_cond_signal(&cvbarrier);
1138 		}
1139 		if (!bound && !threads_stop)
1140 			(void) pthread_cond_wait(&cvwork, &lock);
1141 	}
1142 	(void) close(t->corefd);
1143 	(void) pthread_cond_signal(&cvwork);
1144 	(void) pthread_mutex_unlock(&lock);
1145 	return (arg);
1146 }
1147 
1148 /*
1149  * Process compressed pages.
1150  *
1151  * The old format, now called single-threaded lzjb, is a 32-bit size
1152  * word followed by 'size' bytes of lzjb compression data for one
1153  * page. The new format extends this by storing a 12-bit "tag" in the
1154  * upper bits of the size word. When the size word is pagesize or
1155  * less, it is assumed to be one lzjb page. When the size word is
1156  * greater than pagesize, it is assumed to be a "stream block",
1157  * belonging to up to 4095 streams. In practice, the number of streams
1158  * is set to one less than the number of CPUs running at crash
1159  * time. One CPU processes the crash dump, the remaining CPUs
1160  * separately process groups of data pages.
1161  *
1162  * savecore creates a thread per stream, but never more threads than
1163  * the number of CPUs running savecore. This is because savecore can
1164  * be processing a crash file from a remote machine, which may have
1165  * more CPUs.
1166  *
1167  * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1168  * series of 128KB blocks of compression data. In this case, each
1169  * block has a "tag", in the range 1-4095. Each block is handed off to
1170  * to the threads running "runstreams". The dump format is either lzjb
1171  * or bzip2, never a mixture. These threads, in turn, process the
1172  * compression data for groups of pages. Groups of pages are delimited
1173  * by a "stream header", which indicates a starting pfn and number of
1174  * pages. When a stream block has been read, the condition variable
1175  * "cvwork" is signalled, which causes one of the avaiable threads to
1176  * wake up and process the stream.
1177  *
1178  * In the parallel case there will be streams blocks encoding all data
1179  * pages. The stream of blocks is terminated by a zero size
1180  * word. There can be a few lzjb pages tacked on the end, depending on
1181  * the architecture. The sbarrier function ensures that all stream
1182  * blocks have been processed so that the page number for the few
1183  * single pages at the end can be known.
1184  */
1185 static void
1186 decompress_pages(int corefd)
1187 {
1188 	char *cpage = NULL;
1189 	char *dpage = NULL;
1190 	char *out;
1191 	pgcnt_t curpage = 0;
1192 	block_t *b;
1193 	FILE *dumpf;
1194 	FILE *tracef = NULL;
1195 	stream_t *s;
1196 	size_t dsize;
1197 	size_t insz = FBUFSIZE;
1198 	char *inbuf = Zalloc(insz);
1199 	uint32_t csize;
1200 	dumpcsize_t dcsize;
1201 	int nstreams = datahdr.dump_nstreams;
1202 	int maxcsize = datahdr.dump_maxcsize;
1203 	int nout = 0, tag, doflush;
1204 
1205 	dumpf = fdopen(dup(dumpfd), "rb");
1206 	if (dumpf == NULL)
1207 		logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s",
1208 		    strerror(errno));
1209 
1210 	(void) setvbuf(dumpf, inbuf, _IOFBF, insz);
1211 	Fseek(dumphdr.dump_data, dumpf);
1212 
1213 	/*LINTED: E_CONSTANT_CONDITION*/
1214 	while (1) {
1215 
1216 		/*
1217 		 * The csize word delimits stream blocks.
1218 		 * See dumphdr.h for a description.
1219 		 */
1220 		Fread(&dcsize, sizeof (dcsize), dumpf);
1221 
1222 		tag = DUMP_GET_TAG(dcsize);
1223 		csize = DUMP_GET_CSIZE(dcsize);
1224 
1225 		if (tag != 0) {		/* a stream block */
1226 
1227 			if (nstreams == 0)
1228 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1229 				    "starting data header is missing");
1230 
1231 			if (tag > nstreams)
1232 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1233 				    "stream tag %d not in range 1..%d",
1234 				    tag, nstreams);
1235 
1236 			if (csize > maxcsize)
1237 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1238 				    "block size 0x%x > max csize 0x%x",
1239 				    csize, maxcsize);
1240 
1241 			if (streams == NULL)
1242 				initstreams(corefd, nstreams, maxcsize);
1243 			s = &streams[tag - 1];
1244 			s->tag = tag;
1245 
1246 			b = getfreeblock();
1247 			b->size = csize;
1248 			Fread(b->block, csize, dumpf);
1249 
1250 			(void) pthread_mutex_lock(&lock);
1251 			enqt(&s->blocks, b);
1252 			if (!s->bound)
1253 				(void) pthread_cond_signal(&cvwork);
1254 			(void) pthread_mutex_unlock(&lock);
1255 
1256 		} else if (csize > 0) {		/* one lzjb page */
1257 
1258 			if (csize > pagesize)
1259 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1260 				    "csize 0x%x > pagesize 0x%x",
1261 				    csize, pagesize);
1262 
1263 			if (cpage == NULL)
1264 				cpage = Zalloc(pagesize);
1265 			if (dpage == NULL) {
1266 				dpage = Zalloc(coreblksize);
1267 				nout = 0;
1268 			}
1269 
1270 			Fread(cpage, csize, dumpf);
1271 
1272 			out = dpage + PTOB(nout);
1273 			dsize = decompress(cpage, out, csize, pagesize);
1274 
1275 			if (dsize != pagesize)
1276 				logprint(SC_SL_ERR | SC_EXIT_ERR,
1277 				    "dsize 0x%x != pagesize 0x%x",
1278 				    dsize, pagesize);
1279 
1280 			/*
1281 			 * wait for streams to flush so that 'saved' is correct
1282 			 */
1283 			if (threads_active)
1284 				sbarrier();
1285 
1286 			doflush = 0;
1287 			if (nout == 0)
1288 				curpage = saved;
1289 
1290 			atomic_inc_64(&saved);
1291 
1292 			if (nout == 0 && iszpage(dpage)) {
1293 				doflush = 1;
1294 				atomic_inc_64(&zpages);
1295 			} else if (++nout >= BTOP(coreblksize) ||
1296 			    isblkbnd(curpage + nout) ||
1297 			    saved >= dumphdr.dump_npages) {
1298 				doflush = 1;
1299 			}
1300 
1301 			if (doflush) {
1302 				putpage(corefd, dpage, curpage, nout);
1303 				nout = 0;
1304 			}
1305 
1306 			report_progress();
1307 
1308 			/*
1309 			 * Non-streams lzjb does not use blocks.  Stop
1310 			 * here if all the pages have been decompressed.
1311 			 */
1312 			if (saved >= dumphdr.dump_npages)
1313 				break;
1314 
1315 		} else {
1316 			break;			/* end of data */
1317 		}
1318 	}
1319 
1320 	stopstreams();
1321 	if (tracef != NULL)
1322 		(void) fclose(tracef);
1323 	(void) fclose(dumpf);
1324 	if (inbuf)
1325 		free(inbuf);
1326 	if (cpage)
1327 		free(cpage);
1328 	if (dpage)
1329 		free(dpage);
1330 	if (streams)
1331 		free(streams);
1332 }
1333 
1334 static void
1335 build_corefile(const char *namelist, const char *corefile)
1336 {
1337 	size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1338 	size_t ksyms_size = dumphdr.dump_ksyms_size;
1339 	size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1340 	pfn_t *pfn_table;
1341 	char *ksyms_base = Zalloc(ksyms_size);
1342 	char *ksyms_cbase = Zalloc(ksyms_csize);
1343 	size_t ksyms_dsize;
1344 	Stat_t st;
1345 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1346 	int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1347 
1348 	(void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1349 
1350 	/*
1351 	 * Determine the optimum write size for the core file
1352 	 */
1353 	Fstat(corefd, &st, corefile);
1354 
1355 	if (verbose > 1)
1356 		(void) printf("%s: %ld block size\n", corefile,
1357 		    (long)st.st_blksize);
1358 	coreblksize = st.st_blksize;
1359 	if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1360 		coreblksize = MINCOREBLKSIZE;
1361 
1362 	hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1363 
1364 	/*
1365 	 * This dump file is now uncompressed
1366 	 */
1367 	corehdr.dump_flags &= ~DF_COMPRESSED;
1368 
1369 	/*
1370 	 * Read in the compressed symbol table, copy it to corefile,
1371 	 * decompress it, and write the result to namelist.
1372 	 */
1373 	corehdr.dump_ksyms = pagesize;
1374 	Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1375 	Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1376 
1377 	ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1378 	    ksyms_size);
1379 	if (ksyms_dsize != ksyms_size)
1380 		logprint(SC_SL_WARN,
1381 		    "bad data in symbol table, %lu of %lu bytes saved",
1382 		    ksyms_dsize, ksyms_size);
1383 
1384 	Pwrite(namefd, ksyms_base, ksyms_size, 0);
1385 	(void) close(namefd);
1386 	free(ksyms_cbase);
1387 	free(ksyms_base);
1388 
1389 	(void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1390 
1391 	/*
1392 	 * Read in and write out the pfn table.
1393 	 */
1394 	pfn_table = Zalloc(pfn_table_size);
1395 	corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1396 	Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1397 	Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1398 
1399 	/*
1400 	 * Convert the raw translation data into a hashed dump map.
1401 	 */
1402 	corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1403 	build_dump_map(corefd, pfn_table);
1404 	free(pfn_table);
1405 
1406 	/*
1407 	 * Decompress the pages
1408 	 */
1409 	decompress_pages(corefd);
1410 	(void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved,
1411 	    dumphdr.dump_npages);
1412 
1413 	if (verbose)
1414 		(void) printf("%ld (%ld%%) zero pages were not written\n",
1415 		    (pgcnt_t)zpages, (pgcnt_t)zpages * 100 /
1416 		    dumphdr.dump_npages);
1417 
1418 	if (saved != dumphdr.dump_npages)
1419 		logprint(SC_SL_WARN, "bad data after page %ld", saved);
1420 
1421 	/*
1422 	 * Write out the modified dump headers.
1423 	 */
1424 	Pwrite(corefd, &corehdr, sizeof (corehdr), 0);
1425 	if (!filemode)
1426 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
1427 
1428 	(void) close(corefd);
1429 }
1430 
1431 /*
1432  * When the system panics, the kernel saves all undelivered messages (messages
1433  * that never made it out to syslogd(1M)) in the dump.  At a mimimum, the
1434  * panic message itself will always fall into this category.  Upon reboot,
1435  * the syslog startup script runs savecore -m to recover these messages.
1436  *
1437  * To do this, we read the unsent messages from the dump and send them to
1438  * /dev/conslog on priority band 1.  This has the effect of prepending them
1439  * to any already-accumulated messages in the console backlog, thus preserving
1440  * temporal ordering across the reboot.
1441  *
1442  * Note: since savecore -m is used *only* for this purpose, it does *not*
1443  * attempt to save the crash dump.  The dump will be saved later, after
1444  * syslogd(1M) starts, by the savecore startup script.
1445  */
1446 static int
1447 message_save(void)
1448 {
1449 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE);
1450 	offset_t ldoff;
1451 	log_dump_t ld;
1452 	log_ctl_t lc;
1453 	struct strbuf ctl, dat;
1454 	int logfd;
1455 
1456 	logfd = Open("/dev/conslog", O_WRONLY, 0644);
1457 	dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1458 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1459 
1460 	ctl.buf = (void *)&lc;
1461 	ctl.len = sizeof (log_ctl_t);
1462 
1463 	dat.buf = Zalloc(DUMP_LOGSIZE);
1464 
1465 	for (;;) {
1466 		ldoff = dumpoff;
1467 
1468 		Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff);
1469 		dumpoff += sizeof (log_dump_t);
1470 		dat.len = ld.ld_msgsize;
1471 
1472 		if (ld.ld_magic == 0)
1473 			break;
1474 
1475 		if (ld.ld_magic != LOG_MAGIC)
1476 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1477 			    "bad magic %x", ld.ld_magic);
1478 
1479 		if (dat.len >= DUMP_LOGSIZE)
1480 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1481 			    "bad size %d", ld.ld_msgsize);
1482 
1483 		Pread(dumpfd, ctl.buf, ctl.len, dumpoff);
1484 		dumpoff += ctl.len;
1485 
1486 		if (ld.ld_csum != checksum32(ctl.buf, ctl.len))
1487 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1488 			    "bad log_ctl checksum");
1489 
1490 		lc.flags |= SL_LOGONLY;
1491 
1492 		Pread(dumpfd, dat.buf, dat.len, dumpoff);
1493 		dumpoff += dat.len;
1494 
1495 		if (ld.ld_msum != checksum32(dat.buf, dat.len))
1496 			logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1497 			    "bad message checksum");
1498 
1499 		if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1)
1500 			logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s",
1501 			    strerror(errno));
1502 
1503 		ld.ld_magic = 0;	/* clear magic so we never save twice */
1504 		Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff);
1505 	}
1506 	return (0);
1507 }
1508 
1509 static long
1510 getbounds(const char *f)
1511 {
1512 	long b = -1;
1513 	const char *p = strrchr(f, '/');
1514 
1515 	if (p == NULL || strncmp(p, "vmdump", 6) != 0)
1516 		p = strstr(f, "vmdump");
1517 
1518 	if (p != NULL && *p == '/')
1519 		p++;
1520 
1521 	(void) sscanf(p ? p : f, "vmdump.%ld", &b);
1522 
1523 	return (b);
1524 }
1525 
1526 static void
1527 stack_retrieve(char *stack)
1528 {
1529 	summary_dump_t sd;
1530 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE +
1531 	    DUMP_ERPTSIZE);
1532 	dumpoff -= DUMP_SUMMARYSIZE;
1533 
1534 	dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1535 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1536 
1537 	Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff);
1538 	dumpoff += sizeof (summary_dump_t);
1539 
1540 	if (sd.sd_magic == 0) {
1541 		*stack = '\0';
1542 		return;
1543 	}
1544 
1545 	if (sd.sd_magic != SUMMARY_MAGIC) {
1546 		*stack = '\0';
1547 		logprint(SC_SL_NONE | SC_IF_VERBOSE,
1548 		    "bad summary magic %x", sd.sd_magic);
1549 		return;
1550 	}
1551 	Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1552 	if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1553 		logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1554 }
1555 
1556 static void
1557 raise_event(enum sc_event_type evidx, char *warn_string)
1558 {
1559 	uint32_t pl = sc_event[evidx].sce_payload;
1560 	char panic_stack[STACK_BUF_SIZE];
1561 	nvlist_t *attr = NULL;
1562 	char uuidbuf[36 + 1];
1563 	int err = 0;
1564 
1565 	if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1566 		goto publish;	/* try to send payload-free event */
1567 
1568 	if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1569 		err |= nvlist_add_string(attr, "dumpdir", savedir);
1570 
1571 	if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1572 		err |= nvlist_add_int64(attr, "instance", bounds);
1573 
1574 	if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1575 		err |= nvlist_add_boolean_value(attr, "compressed",
1576 		    csave ? B_TRUE : B_FALSE);
1577 	}
1578 
1579 	if (pl & SC_PAYLOAD_DUMPADM_EN) {
1580 		char *disabled = defread("DUMPADM_ENABLE=no");
1581 
1582 		err |= nvlist_add_boolean_value(attr, "savecore-enabled",
1583 		    disabled ? B_FALSE : B_TRUE);
1584 	}
1585 
1586 	if (pl & SC_PAYLOAD_IMAGEUUID) {
1587 		(void) strncpy(uuidbuf, corehdr.dump_uuid, 36);
1588 		uuidbuf[36] = '\0';
1589 		err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf);
1590 	}
1591 
1592 	if (pl & SC_PAYLOAD_CRASHTIME) {
1593 		err |= nvlist_add_int64(attr, "crashtime",
1594 		    (int64_t)corehdr.dump_crashtime);
1595 	}
1596 
1597 	if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') {
1598 		err |= nvlist_add_string(attr, "panicstr",
1599 		    corehdr.dump_panicstring);
1600 	}
1601 
1602 	if (pl & SC_PAYLOAD_PANICSTACK) {
1603 		stack_retrieve(panic_stack);
1604 
1605 		if (panic_stack[0] != '\0') {
1606 			/*
1607 			 * The summary page may not be present if the dump
1608 			 * was previously recorded compressed.
1609 			 */
1610 			(void) nvlist_add_string(attr, "panicstack",
1611 			    panic_stack);
1612 		}
1613 	}
1614 
1615 	/* add warning string if this is an ireport for dump failure */
1616 	if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL)
1617 		(void) nvlist_add_string(attr, "failure-reason", warn_string);
1618 
1619 	if (pl & SC_PAYLOAD_DUMPCOMPLETE)
1620 		err |= nvlist_add_boolean_value(attr, "dump-incomplete",
1621 		    dump_incomplete ? B_TRUE : B_FALSE);
1622 
1623 	if (pl & SC_PAYLOAD_FM_PANIC) {
1624 		err |= nvlist_add_boolean_value(attr, "fm-panic",
1625 		    fm_panic ? B_TRUE : B_FALSE);
1626 	}
1627 
1628 	if (pl & SC_PAYLOAD_JUSTCHECKING) {
1629 		err |= nvlist_add_boolean_value(attr, "will-attempt-savecore",
1630 		    cflag ? B_FALSE : B_TRUE);
1631 	}
1632 
1633 	if (err)
1634 		logprint(SC_SL_WARN, "Errors while constructing '%s' "
1635 		    "event payload; will try to publish anyway.");
1636 publish:
1637 	if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1638 	    "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1639 	    attr) != FMEV_SUCCESS) {
1640 		logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1641 		    sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1642 		nvlist_free(attr);
1643 	}
1644 
1645 }
1646 
1647 
1648 int
1649 main(int argc, char *argv[])
1650 {
1651 	int i, c, bfd;
1652 	Stat_t st;
1653 	struct rlimit rl;
1654 	long filebounds = -1;
1655 	char namelist[30], corefile[30], boundstr[30];
1656 	dumpfile = NULL;
1657 
1658 	startts = gethrtime();
1659 
1660 	(void) getrlimit(RLIMIT_NOFILE, &rl);
1661 	rl.rlim_cur = rl.rlim_max;
1662 	(void) setrlimit(RLIMIT_NOFILE, &rl);
1663 
1664 	openlog(progname, LOG_ODELAY, LOG_AUTH);
1665 
1666 	(void) defopen("/etc/dumpadm.conf");
1667 	savedir = defread("DUMPADM_SAVDIR=");
1668 	if (savedir != NULL)
1669 		savedir = strdup(savedir);
1670 
1671 	while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) {
1672 		switch (c) {
1673 		case 'L':
1674 			livedump++;
1675 			break;
1676 		case 'v':
1677 			verbose++;
1678 			break;
1679 		case 'c':
1680 			cflag++;
1681 			break;
1682 		case 'd':
1683 			disregard_valid_flag++;
1684 			break;
1685 		case 'm':
1686 			mflag++;
1687 			break;
1688 		case 'f':
1689 			dumpfile = optarg;
1690 			filebounds = getbounds(dumpfile);
1691 			break;
1692 		case '?':
1693 			usage();
1694 		}
1695 	}
1696 
1697 	/*
1698 	 * If doing something other than extracting an existing dump (i.e.
1699 	 * dumpfile has been provided as an option), the user must be root.
1700 	 */
1701 	if (geteuid() != 0 && dumpfile == NULL) {
1702 		(void) fprintf(stderr, "%s: %s %s\n", progname,
1703 		    gettext("you must be root to use"), progname);
1704 		exit(1);
1705 	}
1706 
1707 	interactive = isatty(STDOUT_FILENO);
1708 
1709 	if (cflag && livedump)
1710 		usage();
1711 
1712 	if (dumpfile == NULL || livedump)
1713 		dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1714 
1715 	if (dumpfile == NULL) {
1716 		dumpfile = Zalloc(MAXPATHLEN);
1717 		if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {
1718 			have_dumpfile = B_FALSE;
1719 			logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1720 			    "no dump device configured");
1721 		}
1722 	}
1723 
1724 	if (mflag)
1725 		return (message_save());
1726 
1727 	if (optind == argc - 1)
1728 		savedir = argv[optind];
1729 
1730 	if (savedir == NULL || optind < argc - 1)
1731 		usage();
1732 
1733 	if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
1734 		logprint(SC_SL_NONE | SC_EXIT_ERR,
1735 		    "dedicated dump device required");
1736 
1737 	(void) close(dumpfd);
1738 	dumpfd = -1;
1739 
1740 	Stat(dumpfile, &st);
1741 
1742 	filemode = S_ISREG(st.st_mode);
1743 
1744 	if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1745 		csave = 1;
1746 
1747 	read_dumphdr();
1748 
1749 	/*
1750 	 * We want this message to go to the log file, but not the console.
1751 	 * There's no good way to do that with the existing syslog facility.
1752 	 * We could extend it to handle this, but there doesn't seem to be
1753 	 * a general need for it, so we isolate the complexity here instead.
1754 	 */
1755 	if (dumphdr.dump_panicstring[0] != '\0') {
1756 		int logfd = Open("/dev/conslog", O_WRONLY, 0644);
1757 		log_ctl_t lc;
1758 		struct strbuf ctl, dat;
1759 		char msg[DUMP_PANICSIZE + 100];
1760 		char fmt[] = "reboot after panic: %s";
1761 		uint32_t msgid;
1762 
1763 		STRLOG_MAKE_MSGID(fmt, msgid);
1764 
1765 		/* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
1766 		(void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ",
1767 		    progname, msgid);
1768 		/* LINTED: E_SEC_PRINTF_VAR_FMT */
1769 		(void) sprintf(msg + strlen(msg), fmt,
1770 		    dumphdr.dump_panicstring);
1771 
1772 		lc.pri = LOG_AUTH | LOG_ERR;
1773 		lc.flags = SL_CONSOLE | SL_LOGONLY;
1774 		lc.level = 0;
1775 
1776 		ctl.buf = (void *)&lc;
1777 		ctl.len = sizeof (log_ctl_t);
1778 
1779 		dat.buf = (void *)msg;
1780 		dat.len = strlen(msg) + 1;
1781 
1782 		(void) putmsg(logfd, &ctl, &dat, 0);
1783 		(void) close(logfd);
1784 	}
1785 
1786 	if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1787 		logprint(SC_SL_WARN, "incomplete dump on dump device");
1788 		dump_incomplete = B_TRUE;
1789 	}
1790 
1791 	if (dumphdr.dump_fm_panic)
1792 		fm_panic = B_TRUE;
1793 
1794 	/*
1795 	 * We have a valid dump on a dump device and know as much about
1796 	 * it as we're going to at this stage.  Raise an event for
1797 	 * logging and so that FMA can open a case for this panic.
1798 	 * Avoid this step for FMA-initiated panics - FMA will replay
1799 	 * ereports off the dump device independently of savecore and
1800 	 * will make a diagnosis, so we don't want to open two cases
1801 	 * for the same event.  Also avoid raising an event for a
1802 	 * livedump, or when we inflating a compressed dump.
1803 	 */
1804 	if (!fm_panic && !livedump && !filemode)
1805 		raise_event(SC_EVENT_DUMP_PENDING, NULL);
1806 
1807 	logprint(SC_SL_WARN, "System dump time: %s",
1808 	    ctime(&dumphdr.dump_crashtime));
1809 
1810 	/*
1811 	 * Option -c is designed for use from svc-dumpadm where we know
1812 	 * that dumpadm -n is in effect but run savecore -c just to
1813 	 * get the above dump_pending_on_device event raised.  If it is run
1814 	 * interactively then just print further panic details.
1815 	 */
1816 	if (cflag) {
1817 		char *disabled = defread("DUMPADM_ENABLE=no");
1818 		int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1819 		int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1820 
1821 		logprint(lvl | ec,
1822 		    "Panic crashdump pending on dump device%s "
1823 		    "run savecore(1M) manually to extract. "
1824 		    "Image UUID %s%s.",
1825 		    disabled ? " but dumpadm -n in effect;" : ";",
1826 		    corehdr.dump_uuid,
1827 		    fm_panic ?  "(fault-management initiated)" : "");
1828 		/*NOTREACHED*/
1829 	}
1830 
1831 	if (chdir(savedir) == -1)
1832 		logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1833 		    savedir, strerror(errno));
1834 
1835 	check_space(csave);
1836 
1837 	if (filebounds < 0)
1838 		bounds = read_number_from_file("bounds", 0);
1839 	else
1840 		bounds = filebounds;
1841 
1842 	if (csave) {
1843 		size_t metrics_size = datahdr.dump_metrics;
1844 
1845 		(void) sprintf(corefile, "vmdump.%ld", bounds);
1846 
1847 		datahdr.dump_metrics = 0;
1848 
1849 		logprint(SC_SL_ERR,
1850 		    "Saving compressed system crash dump in %s/%s",
1851 		    savedir, corefile);
1852 
1853 		copy_crashfile(corefile);
1854 
1855 		/*
1856 		 * Raise a fault management event that indicates the system
1857 		 * has panicked. We know a reasonable amount about the
1858 		 * condition at this time, but the dump is still compressed.
1859 		 */
1860 		if (!livedump && !fm_panic)
1861 			raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1862 
1863 		if (metrics_size > 0) {
1864 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1865 			FILE *mfile = fopen(METRICSFILE, "a");
1866 			char *metrics = Zalloc(metrics_size + 1);
1867 
1868 			Pread(dumpfd, metrics, metrics_size, endoff +
1869 			    sizeof (dumphdr) + sizeof (datahdr));
1870 
1871 			if (sec < 1)
1872 				sec = 1;
1873 
1874 			if (mfile == NULL) {
1875 				logprint(SC_SL_WARN,
1876 				    "Can't create %s:\n%s",
1877 				    METRICSFILE, metrics);
1878 			} else {
1879 				(void) fprintf(mfile, "[[[[,,,");
1880 				for (i = 0; i < argc; i++)
1881 					(void) fprintf(mfile, "%s ", argv[i]);
1882 				(void) fprintf(mfile, "\n");
1883 				(void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1884 				    dumphdr.dump_utsname.sysname,
1885 				    dumphdr.dump_utsname.nodename,
1886 				    dumphdr.dump_utsname.release,
1887 				    dumphdr.dump_utsname.version,
1888 				    dumphdr.dump_utsname.machine);
1889 				(void) fprintf(mfile, ",,,%s dump time %s\n",
1890 				    dumphdr.dump_flags & DF_LIVE ? "Live" :
1891 				    "Crash", ctime(&dumphdr.dump_crashtime));
1892 				(void) fprintf(mfile, ",,,%s/%s\n", savedir,
1893 				    corefile);
1894 				(void) fprintf(mfile, "Metrics:\n%s\n",
1895 				    metrics);
1896 				(void) fprintf(mfile, "Copy pages,%ld\n",
1897 				    dumphdr.  dump_npages);
1898 				(void) fprintf(mfile, "Copy time,%d\n", sec);
1899 				(void) fprintf(mfile, "Copy pages/sec,%ld\n",
1900 				    dumphdr.dump_npages / sec);
1901 				(void) fprintf(mfile, "]]]]\n");
1902 				(void) fclose(mfile);
1903 			}
1904 			free(metrics);
1905 		}
1906 
1907 		logprint(SC_SL_ERR,
1908 		    "Decompress the crash dump with "
1909 		    "\n'savecore -vf %s/%s'",
1910 		    savedir, corefile);
1911 
1912 	} else {
1913 		(void) sprintf(namelist, "unix.%ld", bounds);
1914 		(void) sprintf(corefile, "vmcore.%ld", bounds);
1915 
1916 		if (interactive && filebounds >= 0 && access(corefile, F_OK)
1917 		    == 0)
1918 			logprint(SC_SL_NONE | SC_EXIT_ERR,
1919 			    "%s already exists: remove with "
1920 			    "'rm -f %s/{unix,vmcore}.%ld'",
1921 			    corefile, savedir, bounds);
1922 
1923 		logprint(SC_SL_ERR,
1924 		    "saving system crash dump in %s/{unix,vmcore}.%ld",
1925 		    savedir, bounds);
1926 
1927 		build_corefile(namelist, corefile);
1928 
1929 		if (!livedump && !filemode && !fm_panic)
1930 			raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1931 
1932 		if (access(METRICSFILE, F_OK) == 0) {
1933 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1934 			FILE *mfile = fopen(METRICSFILE, "a");
1935 
1936 			if (sec < 1)
1937 				sec = 1;
1938 
1939 			if (mfile == NULL) {
1940 				logprint(SC_SL_WARN,
1941 				    "Can't create %s: %s",
1942 				    METRICSFILE, strerror(errno));
1943 			} else {
1944 				(void) fprintf(mfile, "[[[[,,,");
1945 				for (i = 0; i < argc; i++)
1946 					(void) fprintf(mfile, "%s ", argv[i]);
1947 				(void) fprintf(mfile, "\n");
1948 				(void) fprintf(mfile, ",,,%s/%s\n", savedir,
1949 				    corefile);
1950 				(void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1951 				    dumphdr.dump_utsname.sysname,
1952 				    dumphdr.dump_utsname.nodename,
1953 				    dumphdr.dump_utsname.release,
1954 				    dumphdr.dump_utsname.version,
1955 				    dumphdr.dump_utsname.machine);
1956 				(void) fprintf(mfile,
1957 				    "Uncompress pages,%"PRIu64"\n", saved);
1958 				(void) fprintf(mfile, "Uncompress time,%d\n",
1959 				    sec);
1960 				(void) fprintf(mfile, "Uncompress pages/sec,%"
1961 				    PRIu64"\n", saved / sec);
1962 				(void) fprintf(mfile, "]]]]\n");
1963 				(void) fclose(mfile);
1964 			}
1965 		}
1966 	}
1967 
1968 	if (filebounds < 0) {
1969 		(void) sprintf(boundstr, "%ld\n", bounds + 1);
1970 		bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1971 		Pwrite(bfd, boundstr, strlen(boundstr), 0);
1972 		(void) close(bfd);
1973 	}
1974 
1975 	if (verbose) {
1976 		int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1977 
1978 		(void) printf("%d:%02d dump %s is done\n",
1979 		    sec / 60, sec % 60,
1980 		    csave ? "copy" : "decompress");
1981 	}
1982 
1983 	if (verbose > 1 && hist != NULL) {
1984 		int i, nw;
1985 
1986 		for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1987 			nw += hist[i] * i;
1988 		(void) printf("pages count     %%\n");
1989 		for (i = 0; i <= BTOP(coreblksize); ++i) {
1990 			if (hist[i] == 0)
1991 				continue;
1992 			(void) printf("%3d   %5u  %6.2f\n",
1993 			    i, hist[i], 100.0 * hist[i] * i / nw);
1994 		}
1995 	}
1996 
1997 	(void) close(dumpfd);
1998 	dumpfd = -1;
1999 
2000 	return (0);
2001 }
2002