xref: /illumos-gate/usr/src/cmd/savecore/savecore.c (revision ab5a7454a6d76e82a121d74c74d5589cc3d37a8f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <stdarg.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <errno.h>
32 #include <string.h>
33 #include <deflt.h>
34 #include <time.h>
35 #include <syslog.h>
36 #include <stropts.h>
37 #include <pthread.h>
38 #include <limits.h>
39 #include <atomic.h>
40 #include <sys/mem.h>
41 #include <sys/statvfs.h>
42 #include <sys/dumphdr.h>
43 #include <sys/dumpadm.h>
44 #include <sys/compress.h>
45 #include <sys/sysmacros.h>
46 #include <sys/stat.h>
47 #include <sys/resource.h>
48 #include <bzip2/bzlib.h>
49 
50 /* fread/fwrite buffer size */
51 #define	FBUFSIZE		(1ULL << 20)
52 
53 /* minimum size for output buffering */
54 #define	MINCOREBLKSIZE		(1ULL << 17)
55 
56 /* create this file if metrics collection is enabled in the kernel */
57 #define	METRICSFILE "METRICS.csv"
58 
59 static char 	progname[9] = "savecore";
60 static char	*savedir;		/* savecore directory */
61 static char	*dumpfile;		/* source of raw crash dump */
62 static long	bounds;			/* numeric suffix */
63 static long	pagesize;		/* dump pagesize */
64 static int	dumpfd = -1;		/* dumpfile descriptor */
65 static dumphdr_t corehdr, dumphdr;	/* initial and terminal dumphdrs */
66 static offset_t	endoff;			/* offset of end-of-dump header */
67 static int	verbose;		/* chatty mode */
68 static int	disregard_valid_flag;	/* disregard valid flag */
69 static int	livedump;		/* dump the current running system */
70 static int	interactive;		/* user invoked; no syslog */
71 static int	csave;			/* save dump compressed */
72 static int	filemode;		/* processing file, not dump device */
73 static int	percent_done;		/* progress indicator */
74 static hrtime_t	startts;		/* timestamp at start */
75 static volatile uint64_t saved;		/* count of pages written */
76 static volatile uint64_t zpages;	/* count of zero pages not written */
77 static dumpdatahdr_t datahdr;		/* compression info */
78 static long	coreblksize;		/* preferred write size (st_blksize) */
79 
80 static void
81 usage(void)
82 {
83 	(void) fprintf(stderr,
84 	    "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname);
85 	exit(1);
86 }
87 
88 static void
89 logprint(int logpri, int showmsg, int exitcode, char *message, ...)
90 {
91 	va_list args;
92 	char buf[1024];
93 
94 	if (showmsg) {
95 		va_start(args, message);
96 		(void) vsnprintf(buf, sizeof (buf), message, args);
97 		(void) fprintf(stderr, "%s: %s\n", progname, buf);
98 		if (!interactive && logpri >= 0)
99 			syslog(logpri, buf);
100 		va_end(args);
101 	}
102 	if (exitcode >= 0)
103 		exit(exitcode);
104 }
105 
106 /*
107  * System call / libc wrappers that exit on error.
108  */
109 static int
110 Open(const char *name, int oflags, mode_t mode)
111 {
112 	int fd;
113 
114 	if ((fd = open64(name, oflags, mode)) == -1)
115 		logprint(LOG_ERR, 1, 1, "open(\"%s\"): %s",
116 		    name, strerror(errno));
117 	return (fd);
118 }
119 
120 static void
121 Fread(void *buf, size_t size, FILE *f)
122 {
123 	if (fread(buf, size, 1, f) != 1)
124 		logprint(LOG_ERR, 1, 1, "fread: ferror %d feof %d",
125 		    ferror(f), feof(f));
126 }
127 
128 static void
129 Fwrite(void *buf, size_t size, FILE *f)
130 {
131 	if (fwrite(buf, size, 1, f) != 1)
132 		logprint(LOG_ERR, 1, 1, "fwrite: %s", strerror(errno));
133 }
134 
135 static void
136 Fseek(offset_t off, FILE *f)
137 {
138 	if (fseeko64(f, off, SEEK_SET) != 0)
139 		logprint(LOG_ERR, 1, 1, "fseeko64: %s", strerror(errno));
140 }
141 
142 typedef struct stat64 Stat_t;
143 
144 static void
145 Fstat(int fd, Stat_t *sb, const char *fname)
146 {
147 	if (fstat64(fd, sb) != 0)
148 		logprint(LOG_ERR, 1, 1, "fstat(\"%s\"): %s", fname,
149 		    strerror(errno));
150 }
151 
152 static void
153 Stat(const char *fname, Stat_t *sb)
154 {
155 	if (stat64(fname, sb) != 0)
156 		logprint(LOG_ERR, 1, 1, "stat(\"%s\"): %s", fname,
157 		    strerror(errno));
158 }
159 
160 static void
161 Pread(int fd, void *buf, size_t size, offset_t off)
162 {
163 	ssize_t sz = pread64(fd, buf, size, off);
164 
165 	if (sz < 0)
166 		logprint(LOG_ERR, 1, 1,
167 		    "pread: %s", strerror(errno));
168 	else if (sz != size)
169 		logprint(LOG_ERR, 1, 1,
170 		    "pread: size %ld != %ld", sz, size);
171 }
172 
173 static void
174 Pwrite(int fd, void *buf, size_t size, off64_t off)
175 {
176 	if (pwrite64(fd, buf, size, off) != size)
177 		logprint(LOG_ERR, 1, 1, "pwrite: %s", strerror(errno));
178 }
179 
180 static void *
181 Zalloc(size_t size)
182 {
183 	void *buf;
184 
185 	if ((buf = calloc(size, 1)) == NULL)
186 		logprint(LOG_ERR, 1, 1, "calloc: %s", strerror(errno));
187 	return (buf);
188 }
189 
190 static long
191 read_number_from_file(const char *filename, long default_value)
192 {
193 	long file_value = -1;
194 	FILE *fp;
195 
196 	if ((fp = fopen(filename, "r")) != NULL) {
197 		(void) fscanf(fp, "%ld", &file_value);
198 		(void) fclose(fp);
199 	}
200 	return (file_value < 0 ? default_value : file_value);
201 }
202 
203 static void
204 read_dumphdr(void)
205 {
206 	if (filemode)
207 		dumpfd = Open(dumpfile, O_RDONLY, 0644);
208 	else
209 		dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
210 	endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
211 	Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
212 	Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
213 
214 	pagesize = dumphdr.dump_pagesize;
215 
216 	if (dumphdr.dump_magic != DUMP_MAGIC)
217 		logprint(-1, 1, 0, "bad magic number %x",
218 		    dumphdr.dump_magic);
219 
220 	if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
221 		logprint(-1, verbose, 0, "dump already processed");
222 
223 	if (dumphdr.dump_version != DUMP_VERSION)
224 		logprint(-1, verbose, 0,
225 		    "dump version (%d) != %s version (%d)",
226 		    dumphdr.dump_version, progname, DUMP_VERSION);
227 
228 	if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
229 		logprint(-1, 1, 0,
230 		    "dump is from %u-bit kernel - cannot save on %u-bit kernel",
231 		    dumphdr.dump_wordsize, DUMP_WORDSIZE);
232 
233 	if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
234 		if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
235 			logprint(-1, verbose, 0,
236 			    "dump data version (%d) != %s data version (%d)",
237 			    datahdr.dump_datahdr_version, progname,
238 			    DUMP_DATAHDR_VERSION);
239 	} else {
240 		memset(&datahdr, 0, sizeof (datahdr));
241 		datahdr.dump_maxcsize = pagesize;
242 	}
243 
244 	/*
245 	 * Read the initial header, clear the valid bits, and compare headers.
246 	 * The main header may have been overwritten by swapping if we're
247 	 * using a swap partition as the dump device, in which case we bail.
248 	 */
249 	Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start);
250 
251 	corehdr.dump_flags &= ~DF_VALID;
252 	dumphdr.dump_flags &= ~DF_VALID;
253 
254 	if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
255 		/*
256 		 * Clear valid bit so we don't complain on every invocation.
257 		 */
258 		if (!filemode)
259 			Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
260 		logprint(LOG_ERR, 1, 1, "initial dump header corrupt");
261 	}
262 }
263 
264 static void
265 check_space(int csave)
266 {
267 	struct statvfs fsb;
268 	int64_t spacefree, dumpsize, minfree, datasize;
269 
270 	if (statvfs(".", &fsb) < 0)
271 		logprint(LOG_ERR, 1, 1, "statvfs: %s", strerror(errno));
272 
273 	dumpsize = dumphdr.dump_data - dumphdr.dump_start;
274 	datasize = dumphdr.dump_npages * pagesize;
275 	if (!csave)
276 		dumpsize += datasize;
277 	else
278 		dumpsize += datahdr.dump_data_csize;
279 
280 	spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
281 	minfree = 1024LL * read_number_from_file("minfree", 1024);
282 	if (spacefree < minfree + dumpsize)
283 		logprint(LOG_ERR, 1, 1,
284 		    "not enough space in %s (%lld MB avail, %lld MB needed)",
285 		    savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
286 }
287 
288 static void
289 build_dump_map(int corefd, const pfn_t *pfn_table)
290 {
291 	long i;
292 	static long misses = 0;
293 	size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
294 	mem_vtop_t vtop;
295 	dump_map_t *dmp = Zalloc(dump_mapsize);
296 	char *inbuf = Zalloc(FBUFSIZE);
297 	FILE *in = fdopen(dup(dumpfd), "rb");
298 
299 	setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
300 	Fseek(dumphdr.dump_map, in);
301 
302 	corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize);
303 
304 	for (i = 0; i < corehdr.dump_nvtop; i++) {
305 		long first = 0;
306 		long last = corehdr.dump_npages - 1;
307 		long middle;
308 		pfn_t pfn;
309 		uintptr_t h;
310 
311 		Fread(&vtop, sizeof (mem_vtop_t), in);
312 		while (last >= first) {
313 			middle = (first + last) / 2;
314 			pfn = pfn_table[middle];
315 			if (pfn == vtop.m_pfn)
316 				break;
317 			if (pfn < vtop.m_pfn)
318 				first = middle + 1;
319 			else
320 				last = middle - 1;
321 		}
322 		if (pfn != vtop.m_pfn) {
323 			if (++misses <= 10)
324 				(void) fprintf(stderr,
325 				    "pfn %ld not found for as=%p, va=%p\n",
326 				    vtop.m_pfn, (void *)vtop.m_as, vtop.m_va);
327 			continue;
328 		}
329 
330 		dmp[i].dm_as = vtop.m_as;
331 		dmp[i].dm_va = (uintptr_t)vtop.m_va;
332 		dmp[i].dm_data = corehdr.dump_data +
333 		    ((uint64_t)middle << corehdr.dump_pageshift);
334 
335 		h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va);
336 		dmp[i].dm_next = dmp[h].dm_first;
337 		dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t);
338 	}
339 
340 	Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map);
341 	free(dmp);
342 	fclose(in);
343 	free(inbuf);
344 }
345 
346 /*
347  * Copy whole sections of the dump device to the file.
348  */
349 static void
350 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf,
351     size_t sz)
352 {
353 	size_t nr;
354 	offset_t off = *offp;
355 
356 	while (nb > 0) {
357 		nr = sz < nb ? sz : (size_t)nb;
358 		Pread(dumpfd, buf, nr, dumpoff);
359 		Pwrite(fd, buf, nr, off);
360 		off += nr;
361 		dumpoff += nr;
362 		nb -= nr;
363 	}
364 	*offp = off;
365 }
366 
367 /*
368  * Copy pages when the dump data header is missing.
369  * This supports older kernels with latest savecore.
370  */
371 static void
372 CopyPages(offset_t dumpoff, offset_t *offp, int fd, char *buf, size_t sz)
373 {
374 	uint32_t csize;
375 	FILE *in = fdopen(dup(dumpfd), "rb");
376 	FILE *out = fdopen(dup(fd), "wb");
377 	char *cbuf = Zalloc(pagesize);
378 	char *outbuf = Zalloc(FBUFSIZE);
379 	pgcnt_t np = dumphdr.dump_npages;
380 
381 	setvbuf(out, outbuf, _IOFBF, FBUFSIZE);
382 	setvbuf(in, buf, _IOFBF, sz);
383 	Fseek(dumphdr.dump_data, in);
384 
385 	Fseek(*offp, out);
386 	while (np > 0) {
387 		Fread(&csize, sizeof (uint32_t), in);
388 		Fwrite(&csize, sizeof (uint32_t), out);
389 		*offp += sizeof (uint32_t);
390 		if (csize > pagesize || csize == 0) {
391 			logprint(LOG_ERR, 1, -1,
392 			    "CopyPages: page %lu csize %d (0x%x) pagesize %d",
393 			    dumphdr.dump_npages - np, csize, csize,
394 			    pagesize);
395 			break;
396 		}
397 		Fread(cbuf, csize, in);
398 		Fwrite(cbuf, csize, out);
399 		*offp += csize;
400 		np--;
401 	}
402 	fclose(in);
403 	fclose(out);
404 	free(outbuf);
405 	free(buf);
406 }
407 
408 /*
409  * Concatenate dump contents into a new file.
410  * Update corehdr with new offsets.
411  */
412 static void
413 copy_crashfile(const char *corefile)
414 {
415 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
416 	size_t bufsz = FBUFSIZE;
417 	char *inbuf = Zalloc(bufsz);
418 	offset_t coreoff;
419 	size_t nb;
420 
421 	logprint(LOG_ERR, verbose, -1,
422 	    "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
423 
424 	/*
425 	 * This dump file is still compressed
426 	 */
427 	corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
428 
429 	/*
430 	 * Leave room for corehdr, it is updated and written last
431 	 */
432 	corehdr.dump_start = 0;
433 	coreoff = sizeof (corehdr);
434 
435 	/*
436 	 * Read in the compressed symbol table, copy it to corefile.
437 	 */
438 	coreoff = roundup(coreoff, pagesize);
439 	corehdr.dump_ksyms = coreoff;
440 	Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
441 	    inbuf, bufsz);
442 
443 	/*
444 	 * Save the pfn table.
445 	 */
446 	coreoff = roundup(coreoff, pagesize);
447 	corehdr.dump_pfn = coreoff;
448 	Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff,
449 	    corefd, inbuf, bufsz);
450 
451 	/*
452 	 * Save the dump map.
453 	 */
454 	coreoff = roundup(coreoff, pagesize);
455 	corehdr.dump_map = coreoff;
456 	Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t),
457 	    &coreoff, corefd, inbuf, bufsz);
458 
459 	/*
460 	 * Save the data pages.
461 	 */
462 	coreoff = roundup(coreoff, pagesize);
463 	corehdr.dump_data = coreoff;
464 	if (datahdr.dump_data_csize != 0)
465 		Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff,
466 		    corefd, inbuf, bufsz);
467 	else
468 		CopyPages(dumphdr.dump_data, &coreoff, corefd, inbuf, bufsz);
469 
470 	/*
471 	 * Now write the modified dump header to front and end of the copy.
472 	 * Make it look like a valid dump device.
473 	 *
474 	 * From dumphdr.h: Two headers are written out: one at the
475 	 * beginning of the dump, and the other at the very end of the
476 	 * dump device. The terminal header is at a known location
477 	 * (end of device) so we can always find it.
478 	 *
479 	 * Pad with zeros to each DUMP_OFFSET boundary.
480 	 */
481 	memset(inbuf, 0, DUMP_OFFSET);
482 
483 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
484 	if (nb > 0) {
485 		Pwrite(corefd, inbuf, nb, coreoff);
486 		coreoff += nb;
487 	}
488 
489 	Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff);
490 	coreoff += sizeof (corehdr);
491 
492 	Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff);
493 	coreoff += sizeof (datahdr);
494 
495 	nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
496 	if (nb > 0) {
497 		Pwrite(corefd, inbuf, nb, coreoff);
498 	}
499 
500 	free(inbuf);
501 	Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start);
502 
503 	/*
504 	 * Write out the modified dump header to the dump device.
505 	 * The dump device has been processed, so DF_VALID is clear.
506 	 */
507 	if (!filemode)
508 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
509 
510 	(void) close(corefd);
511 }
512 
513 /*
514  * compressed streams
515  */
516 typedef struct blockhdr blockhdr_t;
517 typedef struct block block_t;
518 
519 struct blockhdr {
520 	block_t *head;
521 	block_t *tail;
522 };
523 
524 struct block {
525 	block_t *next;
526 	char *block;
527 	int size;
528 };
529 
530 typedef enum streamstate {
531 	STREAMSTART,
532 	STREAMPAGES
533 } streamstate_t;
534 
535 typedef struct stream {
536 	streamstate_t state;
537 	int init;
538 	int tag;
539 	int bound;
540 	int nout;
541 	char *blkbuf;
542 	blockhdr_t blocks;
543 	pgcnt_t pagenum;
544 	pgcnt_t curpage;
545 	pgcnt_t npages;
546 	pgcnt_t done;
547 	bz_stream strm;
548 	dumpcsize_t sc;
549 	dumpstreamhdr_t sh;
550 } stream_t;
551 
552 static stream_t *streams;
553 static stream_t *endstreams;
554 
555 const int cs = sizeof (dumpcsize_t);
556 
557 typedef struct tinfo {
558 	pthread_t tid;
559 	int corefd;
560 } tinfo_t;
561 
562 static int threads_stop;
563 static int threads_active;
564 static tinfo_t *tinfo;
565 static tinfo_t *endtinfo;
566 
567 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
568 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER;
569 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER;
570 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER;
571 
572 static blockhdr_t freeblocks;
573 
574 static void
575 enqt(blockhdr_t *h, block_t *b)
576 {
577 	b->next = NULL;
578 	if (h->tail == NULL)
579 		h->head = b;
580 	else
581 		h->tail->next = b;
582 	h->tail = b;
583 }
584 
585 static block_t *
586 deqh(blockhdr_t *h)
587 {
588 	block_t *b = h->head;
589 
590 	if (b != NULL) {
591 		h->head = b->next;
592 		if (h->head == NULL)
593 			h->tail = NULL;
594 	}
595 	return (b);
596 }
597 
598 static void *runstreams(void *arg);
599 
600 static void
601 initstreams(int corefd, int nstreams, int maxcsize)
602 {
603 	int nthreads;
604 	int nblocks;
605 	int i;
606 	block_t *b;
607 	tinfo_t *t;
608 
609 	nthreads = sysconf(_SC_NPROCESSORS_ONLN);
610 	if (nstreams < nthreads)
611 		nthreads = nstreams;
612 	if (nthreads < 1)
613 		nthreads = 1;
614 	nblocks = nthreads * 2;
615 
616 	tinfo = Zalloc(nthreads * sizeof (tinfo_t));
617 	endtinfo = &tinfo[nthreads];
618 
619 	/* init streams */
620 	streams = Zalloc(nstreams * sizeof (stream_t));
621 	endstreams = &streams[nstreams];
622 
623 	/* init stream block buffers */
624 	for (i = 0; i < nblocks; i++) {
625 		b = Zalloc(sizeof (block_t));
626 		b->block = Zalloc(maxcsize);
627 		enqt(&freeblocks, b);
628 	}
629 
630 	/* init worker threads */
631 	pthread_mutex_lock(&lock);
632 	threads_active = 1;
633 	threads_stop = 0;
634 	for (t = tinfo; t != endtinfo; t++) {
635 		t->corefd = dup(corefd);
636 		if (t->corefd < 0) {
637 			nthreads = t - tinfo;
638 			endtinfo = t;
639 			break;
640 		}
641 		if (pthread_create(&t->tid, NULL, runstreams, t) != 0)
642 			logprint(LOG_ERR, 1, 1, "pthread_create: %s",
643 			    strerror(errno));
644 	}
645 	pthread_mutex_unlock(&lock);
646 }
647 
648 static void
649 sbarrier()
650 {
651 	stream_t *s;
652 
653 	pthread_mutex_lock(&lock);
654 	for (s = streams; s != endstreams; s++) {
655 		while (s->bound || s->blocks.head != NULL)
656 			pthread_cond_wait(&cvbarrier, &lock);
657 	}
658 	pthread_mutex_unlock(&lock);
659 }
660 
661 static void
662 stopstreams()
663 {
664 	tinfo_t *t;
665 
666 	if (threads_active) {
667 		sbarrier();
668 		pthread_mutex_lock(&lock);
669 		threads_stop = 1;
670 		pthread_cond_signal(&cvwork);
671 		pthread_mutex_unlock(&lock);
672 		for (t = tinfo; t != endtinfo; t++)
673 			pthread_join(t->tid, NULL);
674 		free(tinfo);
675 		tinfo = NULL;
676 		threads_active = 0;
677 	}
678 }
679 
680 static block_t *
681 getfreeblock()
682 {
683 	block_t *b;
684 
685 	pthread_mutex_lock(&lock);
686 	while ((b = deqh(&freeblocks)) == NULL)
687 		pthread_cond_wait(&cvfree, &lock);
688 	pthread_mutex_unlock(&lock);
689 	return (b);
690 }
691 
692 /* data page offset from page number */
693 #define	BTOP(b)		((b) >> dumphdr.dump_pageshift)
694 #define	PTOB(p)		((p) << dumphdr.dump_pageshift)
695 #define	DATAOFF(p)	(corehdr.dump_data + PTOB(p))
696 
697 /* check for coreblksize boundary */
698 static int
699 isblkbnd(pgcnt_t pgnum)
700 {
701 	return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0);
702 }
703 
704 static int
705 iszpage(char *buf)
706 {
707 	size_t sz;
708 	uint64_t *pl;
709 
710 	pl = (uint64_t *)(buf);
711 	for (sz = 0; sz < pagesize; sz += sizeof (*pl))
712 		if (*pl++ != 0)
713 			return (0);
714 	return (1);
715 }
716 
717 volatile uint_t *hist;
718 
719 /* write pages to the core file */
720 static void
721 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np)
722 {
723 	atomic_inc_uint(&hist[np]);
724 	if (np > 0)
725 		Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum));
726 }
727 
728 /*
729  * Process one lzjb block.
730  * No object (stream header or page) will be split over a block boundary.
731  */
732 static void
733 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz)
734 {
735 	int rc = 0;
736 	int in = 0;
737 	int csize;
738 	int doflush;
739 	char *out;
740 	size_t dsize;
741 	dumpcsize_t sc;
742 	dumpstreamhdr_t sh;
743 
744 	if (!s->init) {
745 		s->init = 1;
746 		if (s->blkbuf == NULL)
747 			s->blkbuf = Zalloc(coreblksize);
748 		s->state = STREAMSTART;
749 	}
750 	while (in < blocksz) {
751 		switch (s->state) {
752 		case STREAMSTART:
753 			memcpy(&sh, block + in, sizeof (sh));
754 			in += sizeof (sh);
755 			if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0)
756 				logprint(LOG_ERR, 1, 1,
757 				    "LZJB STREAMSTART: bad stream header");
758 			if (sh.stream_npages > datahdr.dump_maxrange)
759 				logprint(LOG_ERR, 1, 1,
760 				    "LZJB STREAMSTART: bad range: %d > %d",
761 				    sh.stream_npages, datahdr.dump_maxrange);
762 			s->pagenum = sh.stream_pagenum;
763 			s->npages = sh.stream_npages;
764 			s->curpage = s->pagenum;
765 			s->nout = 0;
766 			s->done = 0;
767 			s->state = STREAMPAGES;
768 			break;
769 		case STREAMPAGES:
770 			memcpy(&sc, block + in, cs);
771 			in += cs;
772 			csize = DUMP_GET_CSIZE(sc);
773 			if (csize > pagesize)
774 				logprint(LOG_ERR, 1, 1,
775 				    "LZJB STREAMPAGES: bad csize=%d", csize);
776 
777 			out =  s->blkbuf + PTOB(s->nout);
778 			dsize = decompress(block + in, out, csize, pagesize);
779 
780 			if (dsize != pagesize)
781 				logprint(LOG_ERR, 1, 1,
782 				    "LZJB STREAMPAGES: dsize %d != pagesize %d",
783 				    dsize, pagesize);
784 
785 			in += csize;
786 			atomic_inc_64(&saved);
787 
788 			doflush = 0;
789 			if (s->nout == 0 && iszpage(out)) {
790 				doflush = 1;
791 				atomic_inc_64(&zpages);
792 			} else if (++s->nout >= BTOP(coreblksize) ||
793 			    isblkbnd(s->curpage + s->nout)) {
794 				doflush = 1;
795 			}
796 			if (++s->done >= s->npages) {
797 				s->state = STREAMSTART;
798 				doflush = 1;
799 			}
800 			if (doflush) {
801 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
802 				s->nout = 0;
803 				s->curpage = s->pagenum + s->done;
804 			}
805 			break;
806 		}
807 	}
808 }
809 
810 /* bzlib library reports errors with this callback */
811 void
812 bz_internal_error(int errcode)
813 {
814 	logprint(LOG_ERR, 1, 1, "bz_internal_error: err %s\n",
815 	    BZ2_bzErrorString(errcode));
816 }
817 
818 /*
819  * Return one object in the stream.
820  *
821  * An object (stream header or page) will likely span an input block
822  * of compression data. Return non-zero when an entire object has been
823  * retrieved from the stream.
824  */
825 static int
826 bz2decompress(stream_t *s, void *buf, size_t size)
827 {
828 	int rc;
829 
830 	if (s->strm.avail_out == 0) {
831 		s->strm.next_out = buf;
832 		s->strm.avail_out = size;
833 	}
834 	while (s->strm.avail_in > 0) {
835 		rc = BZ2_bzDecompress(&s->strm);
836 		if (rc == BZ_STREAM_END) {
837 			rc = BZ2_bzDecompressReset(&s->strm);
838 			if (rc != BZ_OK)
839 				logprint(LOG_ERR, 1, 1,
840 				    "BZ2_bzDecompressReset: %s",
841 				    BZ2_bzErrorString(rc));
842 			continue;
843 		}
844 
845 		if (s->strm.avail_out == 0)
846 			break;
847 	}
848 	return (s->strm.avail_out == 0);
849 }
850 
851 /*
852  * Process one bzip2 block.
853  * The interface is documented here:
854  * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
855  */
856 static void
857 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
858 {
859 	int rc = 0;
860 	int doflush;
861 	char *out;
862 
863 	if (!s->init) {
864 		s->init = 1;
865 		rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
866 		if (rc != BZ_OK)
867 			logprint(LOG_ERR, 1, 1,
868 			    "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
869 		if (s->blkbuf == NULL)
870 			s->blkbuf = Zalloc(coreblksize);
871 		s->strm.avail_out = 0;
872 		s->state = STREAMSTART;
873 	}
874 	s->strm.next_in = block;
875 	s->strm.avail_in = blocksz;
876 
877 	while (s->strm.avail_in > 0) {
878 		switch (s->state) {
879 		case STREAMSTART:
880 			if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
881 				return;
882 			if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
883 				logprint(LOG_ERR, 1, 1,
884 				    "BZ2 STREAMSTART: bad stream header");
885 			if (s->sh.stream_npages > datahdr.dump_maxrange)
886 				logprint(LOG_ERR, 1, 1,
887 				    "BZ2 STREAMSTART: bad range: %d > %d",
888 				    s->sh.stream_npages, datahdr.dump_maxrange);
889 			s->pagenum = s->sh.stream_pagenum;
890 			s->npages = s->sh.stream_npages;
891 			s->curpage = s->pagenum;
892 			s->nout = 0;
893 			s->done = 0;
894 			s->state = STREAMPAGES;
895 			break;
896 		case STREAMPAGES:
897 			out = s->blkbuf + PTOB(s->nout);
898 			if (!bz2decompress(s, out, pagesize))
899 				return;
900 
901 			atomic_inc_64(&saved);
902 
903 			doflush = 0;
904 			if (s->nout == 0 && iszpage(out)) {
905 				doflush = 1;
906 				atomic_inc_64(&zpages);
907 			} else if (++s->nout >= BTOP(coreblksize) ||
908 			    isblkbnd(s->curpage + s->nout)) {
909 				doflush = 1;
910 			}
911 			if (++s->done >= s->npages) {
912 				s->state = STREAMSTART;
913 				doflush = 1;
914 			}
915 			if (doflush) {
916 				putpage(corefd, s->blkbuf, s->curpage, s->nout);
917 				s->nout = 0;
918 				s->curpage = s->pagenum + s->done;
919 			}
920 			break;
921 		}
922 	}
923 }
924 
925 /* report progress */
926 static void
927 report_progress()
928 {
929 	int sec, percent;
930 
931 	if (!interactive)
932 		return;
933 
934 	percent = saved * 100LL / corehdr.dump_npages;
935 	if (percent > percent_done) {
936 		sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
937 		(void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
938 		    percent);
939 		(void) fflush(stdout);
940 		percent_done = percent;
941 	}
942 }
943 
944 /* thread body */
945 static void *
946 runstreams(void *arg)
947 {
948 	tinfo_t *t = arg;
949 	stream_t *s;
950 	block_t *b;
951 	int bound;
952 
953 	pthread_mutex_lock(&lock);
954 	while (!threads_stop) {
955 		bound = 0;
956 		for (s = streams; s != endstreams; s++) {
957 			if (s->bound || s->blocks.head == NULL)
958 				continue;
959 			s->bound = 1;
960 			bound = 1;
961 			pthread_cond_signal(&cvwork);
962 			while (s->blocks.head != NULL) {
963 				b = deqh(&s->blocks);
964 				pthread_mutex_unlock(&lock);
965 
966 				if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
967 					lzjbblock(t->corefd, s, b->block,
968 					    b->size);
969 				else
970 					bz2block(t->corefd, s, b->block,
971 					    b->size);
972 
973 				pthread_mutex_lock(&lock);
974 				enqt(&freeblocks, b);
975 				pthread_cond_signal(&cvfree);
976 
977 				report_progress();
978 			}
979 			s->bound = 0;
980 			pthread_cond_signal(&cvbarrier);
981 		}
982 		if (!bound && !threads_stop)
983 			pthread_cond_wait(&cvwork, &lock);
984 	}
985 	close(t->corefd);
986 	pthread_cond_signal(&cvwork);
987 	pthread_mutex_unlock(&lock);
988 	return (arg);
989 }
990 
991 /*
992  * Process compressed pages.
993  *
994  * The old format, now called single-threaded lzjb, is a 32-bit size
995  * word followed by 'size' bytes of lzjb compression data for one
996  * page. The new format extends this by storing a 12-bit "tag" in the
997  * upper bits of the size word. When the size word is pagesize or
998  * less, it is assumed to be one lzjb page. When the size word is
999  * greater than pagesize, it is assumed to be a "stream block",
1000  * belonging to up to 4095 streams. In practice, the number of streams
1001  * is set to one less than the number of CPUs running at crash
1002  * time. One CPU processes the crash dump, the remaining CPUs
1003  * separately process groups of data pages.
1004  *
1005  * savecore creates a thread per stream, but never more threads than
1006  * the number of CPUs running savecore. This is because savecore can
1007  * be processing a crash file from a remote machine, which may have
1008  * more CPUs.
1009  *
1010  * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1011  * series of 128KB blocks of compression data. In this case, each
1012  * block has a "tag", in the range 1-4095. Each block is handed off to
1013  * to the threads running "runstreams". The dump format is either lzjb
1014  * or bzip2, never a mixture. These threads, in turn, process the
1015  * compression data for groups of pages. Groups of pages are delimited
1016  * by a "stream header", which indicates a starting pfn and number of
1017  * pages. When a stream block has been read, the condition variable
1018  * "cvwork" is signalled, which causes one of the avaiable threads to
1019  * wake up and process the stream.
1020  *
1021  * In the parallel case there will be streams blocks encoding all data
1022  * pages. The stream of blocks is terminated by a zero size
1023  * word. There can be a few lzjb pages tacked on the end, depending on
1024  * the architecture. The sbarrier function ensures that all stream
1025  * blocks have been processed so that the page number for the few
1026  * single pages at the end can be known.
1027  */
1028 static void
1029 decompress_pages(int corefd)
1030 {
1031 	char *cpage = NULL;
1032 	char *dpage = NULL;
1033 	char *out;
1034 	pgcnt_t curpage;
1035 	block_t *b;
1036 	FILE *dumpf;
1037 	FILE *tracef = NULL;
1038 	stream_t *s;
1039 	size_t dsize;
1040 	size_t insz = FBUFSIZE;
1041 	char *inbuf = Zalloc(insz);
1042 	uint32_t csize;
1043 	dumpcsize_t dcsize;
1044 	dumpstreamhdr_t sh;
1045 	int nstreams = datahdr.dump_nstreams;
1046 	int maxcsize = datahdr.dump_maxcsize;
1047 	int nout, tag, doflush;
1048 
1049 	dumpf = fdopen(dup(dumpfd), "rb");
1050 	if (dumpf == NULL)
1051 		logprint(LOG_ERR, 1, 1, "fdopen: %s", strerror(errno));
1052 
1053 	setvbuf(dumpf, inbuf, _IOFBF, insz);
1054 	Fseek(dumphdr.dump_data, dumpf);
1055 
1056 	while (1) {
1057 
1058 		/*
1059 		 * The csize word delimits stream blocks.
1060 		 * See dumphdr.h for a description.
1061 		 */
1062 		Fread(&dcsize, sizeof (dcsize), dumpf);
1063 
1064 		tag = DUMP_GET_TAG(dcsize);
1065 		csize = DUMP_GET_CSIZE(dcsize);
1066 
1067 		if (tag != 0) {		/* a stream block */
1068 
1069 			if (nstreams == 0)
1070 				logprint(LOG_ERR, 1, 1,
1071 				    "starting data header is missing");
1072 
1073 			if (tag > nstreams)
1074 				logprint(LOG_ERR, 1, 1,
1075 				    "stream tag %d not in range 1..%d",
1076 				    tag, nstreams);
1077 
1078 			if (csize > maxcsize)
1079 				logprint(LOG_ERR, 1, 1,
1080 				    "block size 0x%x > max csize 0x%x",
1081 				    csize, maxcsize);
1082 
1083 			if (streams == NULL)
1084 				initstreams(corefd, nstreams, maxcsize);
1085 			s = &streams[tag - 1];
1086 			s->tag = tag;
1087 
1088 			b = getfreeblock();
1089 			b->size = csize;
1090 			Fread(b->block, csize, dumpf);
1091 
1092 			pthread_mutex_lock(&lock);
1093 			enqt(&s->blocks, b);
1094 			if (!s->bound)
1095 				pthread_cond_signal(&cvwork);
1096 			pthread_mutex_unlock(&lock);
1097 
1098 		} else if (csize > 0) {		/* one lzjb page */
1099 
1100 			if (csize > pagesize)
1101 				logprint(LOG_ERR, 1, 1,
1102 				    "csize 0x%x > pagesize 0x%x",
1103 				    csize, pagesize);
1104 
1105 			if (cpage == NULL)
1106 				cpage = Zalloc(pagesize);
1107 			if (dpage == NULL) {
1108 				dpage = Zalloc(coreblksize);
1109 				nout = 0;
1110 			}
1111 
1112 			Fread(cpage, csize, dumpf);
1113 
1114 			out = dpage + PTOB(nout);
1115 			dsize = decompress(cpage, out, csize, pagesize);
1116 
1117 			if (dsize != pagesize)
1118 				logprint(LOG_ERR, 1, 1,
1119 				    "dsize 0x%x != pagesize 0x%x",
1120 				    dsize, pagesize);
1121 
1122 			/*
1123 			 * wait for streams to flush so that 'saved' is correct
1124 			 */
1125 			if (threads_active)
1126 				sbarrier();
1127 
1128 			doflush = 0;
1129 			if (nout == 0)
1130 				curpage = saved;
1131 
1132 			atomic_inc_64(&saved);
1133 
1134 			if (nout == 0 && iszpage(dpage)) {
1135 				doflush = 1;
1136 				atomic_inc_64(&zpages);
1137 			} else if (++nout >= BTOP(coreblksize) ||
1138 			    isblkbnd(curpage + nout) ||
1139 			    saved >= dumphdr.dump_npages) {
1140 				doflush = 1;
1141 			}
1142 
1143 			if (doflush) {
1144 				putpage(corefd, dpage, curpage, nout);
1145 				nout = 0;
1146 			}
1147 
1148 			report_progress();
1149 
1150 			/*
1151 			 * Non-streams lzjb does not use blocks.  Stop
1152 			 * here if all the pages have been decompressed.
1153 			 */
1154 			if (saved >= dumphdr.dump_npages)
1155 				break;
1156 
1157 		} else {
1158 			break;			/* end of data */
1159 		}
1160 	}
1161 
1162 	stopstreams();
1163 	if (tracef != NULL)
1164 		fclose(tracef);
1165 	fclose(dumpf);
1166 	if (inbuf)
1167 		free(inbuf);
1168 	if (cpage)
1169 		free(cpage);
1170 	if (dpage)
1171 		free(dpage);
1172 	if (streams)
1173 		free(streams);
1174 }
1175 
1176 static void
1177 build_corefile(const char *namelist, const char *corefile)
1178 {
1179 	size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1180 	size_t ksyms_size = dumphdr.dump_ksyms_size;
1181 	size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1182 	pfn_t *pfn_table;
1183 	char *ksyms_base = Zalloc(ksyms_size);
1184 	char *ksyms_cbase = Zalloc(ksyms_csize);
1185 	size_t ksyms_dsize;
1186 	Stat_t st;
1187 	int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1188 	int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1189 
1190 	(void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1191 
1192 	/*
1193 	 * Determine the optimum write size for the core file
1194 	 */
1195 	Fstat(corefd, &st, corefile);
1196 
1197 	if (verbose > 1)
1198 		printf("%s: %ld block size\n", corefile, st.st_blksize);
1199 	coreblksize = st.st_blksize;
1200 	if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1201 		coreblksize = MINCOREBLKSIZE;
1202 
1203 	hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1204 
1205 	/*
1206 	 * This dump file is now uncompressed
1207 	 */
1208 	corehdr.dump_flags &= ~DF_COMPRESSED;
1209 
1210 	/*
1211 	 * Read in the compressed symbol table, copy it to corefile,
1212 	 * decompress it, and write the result to namelist.
1213 	 */
1214 	corehdr.dump_ksyms = pagesize;
1215 	Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1216 	Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1217 
1218 	ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1219 	    ksyms_size);
1220 	if (ksyms_dsize != ksyms_size)
1221 		logprint(LOG_WARNING, 1, -1,
1222 		    "bad data in symbol table, %lu of %lu bytes saved",
1223 		    ksyms_dsize, ksyms_size);
1224 
1225 	Pwrite(namefd, ksyms_base, ksyms_size, 0);
1226 	(void) close(namefd);
1227 	free(ksyms_cbase);
1228 	free(ksyms_base);
1229 
1230 	(void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1231 
1232 	/*
1233 	 * Read in and write out the pfn table.
1234 	 */
1235 	pfn_table = Zalloc(pfn_table_size);
1236 	corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1237 	Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1238 	Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1239 
1240 	/*
1241 	 * Convert the raw translation data into a hashed dump map.
1242 	 */
1243 	corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1244 	build_dump_map(corefd, pfn_table);
1245 	free(pfn_table);
1246 
1247 	/*
1248 	 * Decompress the pages
1249 	 */
1250 	decompress_pages(corefd);
1251 	(void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved,
1252 	    dumphdr.dump_npages);
1253 
1254 	if (verbose)
1255 		(void) printf("%ld (%ld%%) zero pages were not written\n",
1256 		    (pgcnt_t)zpages, (pgcnt_t)zpages * 100 /
1257 		    dumphdr.dump_npages);
1258 
1259 	if (saved != dumphdr.dump_npages)
1260 		logprint(LOG_WARNING, 1, -1, "bad data after page %ld", saved);
1261 
1262 	/*
1263 	 * Write out the modified dump headers.
1264 	 */
1265 	Pwrite(corefd, &corehdr, sizeof (corehdr), 0);
1266 	if (!filemode)
1267 		Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
1268 
1269 	(void) close(corefd);
1270 }
1271 
1272 /*
1273  * When the system panics, the kernel saves all undelivered messages (messages
1274  * that never made it out to syslogd(1M)) in the dump.  At a mimimum, the
1275  * panic message itself will always fall into this category.  Upon reboot,
1276  * the syslog startup script runs savecore -m to recover these messages.
1277  *
1278  * To do this, we read the unsent messages from the dump and send them to
1279  * /dev/conslog on priority band 1.  This has the effect of prepending them
1280  * to any already-accumulated messages in the console backlog, thus preserving
1281  * temporal ordering across the reboot.
1282  *
1283  * Note: since savecore -m is used *only* for this purpose, it does *not*
1284  * attempt to save the crash dump.  The dump will be saved later, after
1285  * syslogd(1M) starts, by the savecore startup script.
1286  */
1287 static int
1288 message_save(void)
1289 {
1290 	offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE);
1291 	offset_t ldoff;
1292 	log_dump_t ld;
1293 	log_ctl_t lc;
1294 	struct strbuf ctl, dat;
1295 	int logfd;
1296 
1297 	logfd = Open("/dev/conslog", O_WRONLY, 0644);
1298 	dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1299 	dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1300 
1301 	ctl.buf = (void *)&lc;
1302 	ctl.len = sizeof (log_ctl_t);
1303 
1304 	dat.buf = Zalloc(DUMP_LOGSIZE);
1305 
1306 	for (;;) {
1307 		ldoff = dumpoff;
1308 
1309 		Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff);
1310 		dumpoff += sizeof (log_dump_t);
1311 		dat.len = ld.ld_msgsize;
1312 
1313 		if (ld.ld_magic == 0)
1314 			break;
1315 
1316 		if (ld.ld_magic != LOG_MAGIC)
1317 			logprint(LOG_ERR, verbose, 0, "bad magic %x",
1318 			    ld.ld_magic);
1319 
1320 		if (dat.len >= DUMP_LOGSIZE)
1321 			logprint(LOG_ERR, verbose, 0, "bad size %d",
1322 			    ld.ld_msgsize);
1323 
1324 		Pread(dumpfd, ctl.buf, ctl.len, dumpoff);
1325 		dumpoff += ctl.len;
1326 
1327 		if (ld.ld_csum != checksum32(ctl.buf, ctl.len))
1328 			logprint(LOG_ERR, verbose, 0, "bad log_ctl checksum");
1329 
1330 		lc.flags |= SL_LOGONLY;
1331 
1332 		Pread(dumpfd, dat.buf, dat.len, dumpoff);
1333 		dumpoff += dat.len;
1334 
1335 		if (ld.ld_msum != checksum32(dat.buf, dat.len))
1336 			logprint(LOG_ERR, verbose, 0, "bad message checksum");
1337 
1338 		if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1)
1339 			logprint(LOG_ERR, 1, 1, "putpmsg: %s", strerror(errno));
1340 
1341 		ld.ld_magic = 0;	/* clear magic so we never save twice */
1342 		Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff);
1343 	}
1344 	return (0);
1345 }
1346 
1347 static long
1348 getbounds(const char *f)
1349 {
1350 	long b = -1;
1351 	const char *p = strrchr(f, '/');
1352 
1353 	sscanf(p ? p + 1 : f, "vmdump.%ld", &b);
1354 	return (b);
1355 }
1356 
1357 int
1358 main(int argc, char *argv[])
1359 {
1360 	int i, n, c, bfd;
1361 	int mflag = 0;
1362 	Stat_t st;
1363 	struct rlimit rl;
1364 	long filebounds = -1;
1365 	char namelist[30], corefile[30], boundstr[30];
1366 
1367 	startts = gethrtime();
1368 
1369 	getrlimit(RLIMIT_NOFILE, &rl);
1370 	rl.rlim_cur = rl.rlim_max;
1371 	setrlimit(RLIMIT_NOFILE, &rl);
1372 
1373 	openlog(progname, LOG_ODELAY, LOG_AUTH);
1374 
1375 	(void) defopen("/etc/dumpadm.conf");
1376 	savedir = defread("DUMPADM_SAVDIR=");
1377 	if (savedir != NULL)
1378 		savedir = strdup(savedir);
1379 
1380 	while ((c = getopt(argc, argv, "Lvdmf:")) != EOF) {
1381 		switch (c) {
1382 		case 'L':
1383 			livedump++;
1384 			break;
1385 		case 'v':
1386 			verbose++;
1387 			break;
1388 		case 'd':
1389 			disregard_valid_flag++;
1390 			break;
1391 		case 'm':
1392 			mflag++;
1393 			break;
1394 		case 'f':
1395 			dumpfile = optarg;
1396 			filebounds = getbounds(dumpfile);
1397 			break;
1398 		case '?':
1399 			usage();
1400 		}
1401 	}
1402 
1403 	interactive = isatty(STDOUT_FILENO);
1404 
1405 	if (dumpfile == NULL || livedump)
1406 		dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1407 
1408 	if (dumpfile == NULL) {
1409 		dumpfile = Zalloc(MAXPATHLEN);
1410 		if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1)
1411 			logprint(-1, interactive, 1,
1412 			    "no dump device configured");
1413 	}
1414 
1415 	if (mflag)
1416 		return (message_save());
1417 
1418 	if (optind == argc - 1)
1419 		savedir = argv[optind];
1420 
1421 	if (savedir == NULL || optind < argc - 1)
1422 		usage();
1423 
1424 	if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
1425 		logprint(-1, 1, 1, "dedicated dump device required");
1426 
1427 	(void) close(dumpfd);
1428 	dumpfd = -1;
1429 
1430 	Stat(dumpfile, &st);
1431 
1432 	filemode = S_ISREG(st.st_mode);
1433 
1434 	if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1435 		csave = 1;
1436 
1437 	read_dumphdr();
1438 
1439 	/*
1440 	 * We want this message to go to the log file, but not the console.
1441 	 * There's no good way to do that with the existing syslog facility.
1442 	 * We could extend it to handle this, but there doesn't seem to be
1443 	 * a general need for it, so we isolate the complexity here instead.
1444 	 */
1445 	if (dumphdr.dump_panicstring[0] != '\0') {
1446 		int logfd = Open("/dev/conslog", O_WRONLY, 0644);
1447 		log_ctl_t lc;
1448 		struct strbuf ctl, dat;
1449 		char msg[DUMP_PANICSIZE + 100];
1450 		char fmt[] = "reboot after panic: %s";
1451 		uint32_t msgid;
1452 
1453 		STRLOG_MAKE_MSGID(fmt, msgid);
1454 
1455 		(void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ",
1456 		    progname, msgid);
1457 		(void) sprintf(msg + strlen(msg), fmt,
1458 		    dumphdr.dump_panicstring);
1459 
1460 		lc.pri = LOG_AUTH | LOG_ERR;
1461 		lc.flags = SL_CONSOLE | SL_LOGONLY;
1462 		lc.level = 0;
1463 
1464 		ctl.buf = (void *)&lc;
1465 		ctl.len = sizeof (log_ctl_t);
1466 
1467 		dat.buf = (void *)msg;
1468 		dat.len = strlen(msg) + 1;
1469 
1470 		(void) putmsg(logfd, &ctl, &dat, 0);
1471 		(void) close(logfd);
1472 	}
1473 
1474 	if (chdir(savedir) == -1)
1475 		logprint(LOG_ERR, 1, 1, "chdir(\"%s\"): %s",
1476 		    savedir, strerror(errno));
1477 
1478 	if ((dumphdr.dump_flags & DF_COMPLETE) == 0)
1479 		logprint(LOG_WARNING, 1, -1, "incomplete dump on dump device");
1480 
1481 	logprint(LOG_WARNING, 1, -1, "System dump time: %s",
1482 	    ctime(&dumphdr.dump_crashtime));
1483 
1484 	check_space(csave);
1485 
1486 	if (filebounds < 0)
1487 		bounds = read_number_from_file("bounds", 0);
1488 	else
1489 		bounds = filebounds;
1490 
1491 	if (csave) {
1492 		size_t metrics_size = datahdr.dump_metrics;
1493 
1494 		(void) sprintf(corefile, "vmdump.%ld", bounds);
1495 
1496 		datahdr.dump_metrics = 0;
1497 
1498 		logprint(LOG_ERR, 1, -1,
1499 		    "Saving compressed system crash dump in %s/%s",
1500 		    savedir, corefile);
1501 
1502 		copy_crashfile(corefile);
1503 
1504 		if (metrics_size > 0) {
1505 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1506 			FILE *mfile = fopen(METRICSFILE, "a");
1507 			char *metrics = Zalloc(metrics_size + 1);
1508 
1509 			Pread(dumpfd, metrics, metrics_size, endoff +
1510 			    sizeof (dumphdr) + sizeof (datahdr));
1511 
1512 			if (sec < 1)
1513 				sec = 1;
1514 
1515 			if (mfile == NULL) {
1516 				logprint(LOG_WARNING, 1, -1,
1517 				    "Can't create %s:\n%s",
1518 				    METRICSFILE, metrics);
1519 			} else {
1520 				fprintf(mfile, "[[[[,,,");
1521 				for (i = 0; i < argc; i++)
1522 					fprintf(mfile, "%s ", argv[i]);
1523 				fprintf(mfile, "\n");
1524 				fprintf(mfile, ",,,%s %s %s %s %s\n",
1525 				    dumphdr.dump_utsname.sysname,
1526 				    dumphdr.dump_utsname.nodename,
1527 				    dumphdr.dump_utsname.release,
1528 				    dumphdr.dump_utsname.version,
1529 				    dumphdr.dump_utsname.machine);
1530 				fprintf(mfile, ",,,%s dump time %s\n",
1531 				    dumphdr.dump_flags & DF_LIVE ? "Live" :
1532 				    "Crash", ctime(&dumphdr.dump_crashtime));
1533 				fprintf(mfile, ",,,%s/%s\n", savedir, corefile);
1534 				fprintf(mfile, "Metrics:\n%s\n", metrics);
1535 				fprintf(mfile, "Copy pages,%ld\n", dumphdr.
1536 				    dump_npages);
1537 				fprintf(mfile, "Copy time,%d\n", sec);
1538 				fprintf(mfile, "Copy pages/sec,%ld\n",
1539 				    dumphdr.dump_npages / sec);
1540 				fprintf(mfile, "]]]]\n");
1541 				fclose(mfile);
1542 			}
1543 			free(metrics);
1544 		}
1545 
1546 		logprint(LOG_ERR, 1, -1,
1547 		    "Decompress the crash dump with "
1548 		    "\n'savecore -vf %s/%s'",
1549 		    savedir, corefile);
1550 
1551 	} else {
1552 		(void) sprintf(namelist, "unix.%ld", bounds);
1553 		(void) sprintf(corefile, "vmcore.%ld", bounds);
1554 
1555 		if (interactive && filebounds >= 0 && access(corefile, F_OK)
1556 		    == 0)
1557 			logprint(-1, 1, 1,
1558 			    "%s already exists: remove with "
1559 			    "'rm -f %s/{unix,vmcore}.%ld'",
1560 			    corefile, savedir, bounds);
1561 
1562 		logprint(LOG_ERR, 1, -1,
1563 		    "saving system crash dump in %s/{unix,vmcore}.%ld",
1564 		    savedir, bounds);
1565 
1566 		build_corefile(namelist, corefile);
1567 
1568 		if (access(METRICSFILE, F_OK) == 0) {
1569 			int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1570 			FILE *mfile = fopen(METRICSFILE, "a");
1571 
1572 			if (sec < 1)
1573 				sec = 1;
1574 
1575 			fprintf(mfile, "[[[[,,,");
1576 			for (i = 0; i < argc; i++)
1577 				fprintf(mfile, "%s ", argv[i]);
1578 			fprintf(mfile, "\n");
1579 			fprintf(mfile, ",,,%s/%s\n", savedir, corefile);
1580 			fprintf(mfile, ",,,%s %s %s %s %s\n",
1581 			    dumphdr.dump_utsname.sysname,
1582 			    dumphdr.dump_utsname.nodename,
1583 			    dumphdr.dump_utsname.release,
1584 			    dumphdr.dump_utsname.version,
1585 			    dumphdr.dump_utsname.machine);
1586 			fprintf(mfile, "Uncompress pages,%ld\n", saved);
1587 			fprintf(mfile, "Uncompress time,%d\n", sec);
1588 			fprintf(mfile, "Uncompress pages/sec,%ld\n",
1589 			    saved / sec);
1590 			fprintf(mfile, "]]]]\n");
1591 			fclose(mfile);
1592 		}
1593 	}
1594 
1595 	if (filebounds < 0) {
1596 		(void) sprintf(boundstr, "%ld\n", bounds + 1);
1597 		bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1598 		Pwrite(bfd, boundstr, strlen(boundstr), 0);
1599 		(void) close(bfd);
1600 	}
1601 
1602 	if (verbose) {
1603 		int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1604 
1605 		printf("%d:%02d dump %s is done\n",
1606 		    sec / 60, sec % 60,
1607 		    csave ? "copy" : "decompress");
1608 	}
1609 
1610 	if (verbose > 1 && hist != NULL) {
1611 		int i, nw;
1612 
1613 		for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1614 			nw += hist[i] * i;
1615 		printf("pages count     %%\n");
1616 		for (i = 0; i <= BTOP(coreblksize); ++i) {
1617 			if (hist[i] == 0)
1618 				continue;
1619 			printf("%3d   %5u  %6.2f\n",
1620 			    i, hist[i], 100.0 * hist[i] * i / nw);
1621 		}
1622 	}
1623 
1624 	(void) close(dumpfd);
1625 	dumpfd = -1;
1626 
1627 	return (0);
1628 }
1629