xref: /freebsd/sbin/savecore/savecore.c (revision a8445737e740901f5f2c8d24c12ef7fc8b00134e)
1 /*-
2  * Copyright (c) 1986, 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static const char copyright[] =
36 "@(#) Copyright (c) 1986, 1992, 1993\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)savecore.c	8.3 (Berkeley) 1/2/94";
43 #endif
44 static const char rcsid[] =
45 	"$Id$";
46 #endif /* not lint */
47 
48 #include <sys/param.h>
49 #include <sys/stat.h>
50 #include <sys/mount.h>
51 #include <sys/syslog.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_param.h>
55 #include <vm/pmap.h>
56 
57 #include <dirent.h>
58 #include <errno.h>
59 #include <fcntl.h>
60 #include <nlist.h>
61 #include <paths.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include "zopen.h"
67 
68 #define ok(number) ((number) - KERNBASE)
69 
70 struct nlist current_nl[] = {	/* Namelist for currently running system. */
71 #define X_DUMPDEV	0
72 	{ "_dumpdev" },
73 #define X_DUMPLO	1
74 	{ "_dumplo" },
75 #define X_TIME		2
76 	{ "_time_second" },
77 #define	X_DUMPSIZE	3
78 	{ "_dumpsize" },
79 #define X_VERSION	4
80 	{ "_version" },
81 #define X_PANICSTR	5
82 	{ "_panicstr" },
83 #define	X_DUMPMAG	6
84 	{ "_dumpmag" },
85 	{ "" },
86 };
87 int cursyms[] = { X_DUMPDEV, X_DUMPLO, X_VERSION, X_DUMPMAG, -1 };
88 int dumpsyms[] = { X_TIME, X_DUMPSIZE, X_VERSION, X_PANICSTR, X_DUMPMAG, -1 };
89 
90 struct nlist dump_nl[] = {	/* Name list for dumped system. */
91 	{ "_dumpdev" },		/* Entries MUST be the same as */
92 	{ "_dumplo" },		/*	those in current_nl[].  */
93 	{ "_time_second" },
94 	{ "_dumpsize" },
95 	{ "_version" },
96 	{ "_panicstr" },
97 	{ "_dumpmag" },
98 	{ "" },
99 };
100 
101 /* Types match kernel declarations. */
102 long	dumplo;				/* where dump starts on dumpdev */
103 int	dumpmag;			/* magic number in dump */
104 int	dumpsize;			/* amount of memory dumped */
105 
106 char	*kernel;
107 char	*dirname;			/* directory to save dumps in */
108 char	*ddname;			/* name of dump device */
109 dev_t	dumpdev;			/* dump device */
110 int	dumpfd;				/* read/write descriptor on block dev */
111 time_t	now;				/* current date */
112 char	panic_mesg[1024];
113 int	panicstr;
114 char	vers[1024];
115 
116 int	clear, compress, force, verbose;	/* flags */
117 
118 void	 check_kmem __P((void));
119 int	 check_space __P((void));
120 void	 clear_dump __P((void));
121 int	 Create __P((char *, int));
122 int	 dump_exists __P((void));
123 char	*find_dev __P((dev_t, int));
124 int	 get_crashtime __P((void));
125 void	 get_dumpsize __P((void));
126 void	 kmem_setup __P((void));
127 void	 log __P((int, char *, ...));
128 void	 Lseek __P((int, off_t, int));
129 int	 Open __P((const char *, int rw));
130 int	 Read __P((int, void *, int));
131 char	*rawname __P((char *s));
132 void	 save_core __P((void));
133 void	 usage __P((void));
134 void	 Write __P((int, void *, int));
135 
136 int
137 main(argc, argv)
138 	int argc;
139 	char *argv[];
140 {
141 	int ch;
142 
143 	openlog("savecore", LOG_PERROR, LOG_DAEMON);
144 
145 	while ((ch = getopt(argc, argv, "cdfN:vz")) != -1)
146 		switch(ch) {
147 		case 'c':
148 			clear = 1;
149 			break;
150 		case 'd':		/* Not documented. */
151 		case 'v':
152 			verbose = 1;
153 			break;
154 		case 'f':
155 			force = 1;
156 			break;
157 		case 'N':
158 			kernel = optarg;
159 			break;
160 		case 'z':
161 			compress = 1;
162 			break;
163 		case '?':
164 		default:
165 			usage();
166 		}
167 	argc -= optind;
168 	argv += optind;
169 
170 	if (!clear) {
171 		if (argc != 1 && argc != 2)
172 			usage();
173 		dirname = argv[0];
174 	}
175 	if (argc == 2)
176 		kernel = argv[1];
177 
178 	(void)time(&now);
179 	kmem_setup();
180 
181 	if (clear) {
182 		clear_dump();
183 		exit(0);
184 	}
185 
186 	if (!dump_exists() && !force)
187 		exit(1);
188 
189 	check_kmem();
190 
191 	if (panicstr)
192 		syslog(LOG_ALERT, "reboot after panic: %s", panic_mesg);
193 	else
194 		syslog(LOG_ALERT, "reboot");
195 
196 	get_dumpsize();
197 
198 	if ((!get_crashtime() || !check_space()) && !force)
199 		exit(1);
200 
201 	save_core();
202 
203 	clear_dump();
204 	exit(0);
205 }
206 
207 void
208 kmem_setup()
209 {
210 	FILE *fp;
211 	int kmem, i;
212 	const char *dump_sys;
213 
214 	/*
215 	 * Some names we need for the currently running system, others for
216 	 * the system that was running when the dump was made.  The values
217 	 * obtained from the current system are used to look for things in
218 	 * /dev/kmem that cannot be found in the dump_sys namelist, but are
219 	 * presumed to be the same (since the disk partitions are probably
220 	 * the same!)
221 	 */
222 	if ((nlist(getbootfile(), current_nl)) == -1)
223 		syslog(LOG_ERR, "%s: nlist: %s", getbootfile(),
224 		       strerror(errno));
225 	for (i = 0; cursyms[i] != -1; i++)
226 		if (current_nl[cursyms[i]].n_value == 0) {
227 			syslog(LOG_ERR, "%s: %s not in namelist",
228 			    getbootfile(), current_nl[cursyms[i]].n_name);
229 			exit(1);
230 		}
231 
232 	dump_sys = kernel ? kernel : getbootfile();
233 	if ((nlist(dump_sys, dump_nl)) == -1)
234 		syslog(LOG_ERR, "%s: nlist: %s", dump_sys, strerror(errno));
235 	for (i = 0; dumpsyms[i] != -1; i++)
236 		if (dump_nl[dumpsyms[i]].n_value == 0) {
237 			syslog(LOG_ERR, "%s: %s not in namelist",
238 			    dump_sys, dump_nl[dumpsyms[i]].n_name);
239 			exit(1);
240 		}
241 
242 	kmem = Open(_PATH_KMEM, O_RDONLY);
243 	Lseek(kmem, (off_t)current_nl[X_DUMPDEV].n_value, L_SET);
244 	(void)Read(kmem, &dumpdev, sizeof(dumpdev));
245 	if (dumpdev == NODEV) {
246 		syslog(LOG_WARNING, "no core dump (no dumpdev)");
247 		exit(1);
248 	}
249 	Lseek(kmem, (off_t)current_nl[X_DUMPLO].n_value, L_SET);
250 	(void)Read(kmem, &dumplo, sizeof(dumplo));
251 	if (verbose)
252 		(void)printf("dumplo = %ld (%ld * %d)\n",
253 		    dumplo, dumplo/DEV_BSIZE, DEV_BSIZE);
254 	Lseek(kmem, (off_t)current_nl[X_DUMPMAG].n_value, L_SET);
255 	(void)Read(kmem, &dumpmag, sizeof(dumpmag));
256 	dumplo *= DEV_BSIZE;
257 	ddname = find_dev(dumpdev, S_IFBLK);
258 	dumpfd = Open(ddname, O_RDWR);
259 	fp = fdopen(kmem, "r");
260 	if (fp == NULL) {
261 		syslog(LOG_ERR, "%s: fdopen: %m", _PATH_KMEM);
262 		exit(1);
263 	}
264 	if (kernel)
265 		return;
266 	(void)fseek(fp, (off_t)current_nl[X_VERSION].n_value, L_SET);
267 	(void)fgets(vers, sizeof(vers), fp);
268 
269 	/* Don't fclose(fp), we use dumpfd later. */
270 }
271 
272 void
273 check_kmem()
274 {
275 	register char *cp;
276 	FILE *fp;
277 	char core_vers[1024];
278 
279 	fp = fdopen(dumpfd, "r");
280 	if (fp == NULL) {
281 		syslog(LOG_ERR, "%s: fdopen: %m", ddname);
282 		exit(1);
283 	}
284 	fseek(fp, (off_t)(dumplo + ok(dump_nl[X_VERSION].n_value)), L_SET);
285 	fgets(core_vers, sizeof(core_vers), fp);
286 	if (strcmp(vers, core_vers) && kernel == 0)
287 		syslog(LOG_WARNING,
288 		    "warning: %s version mismatch:\n\t%s\nand\t%s\n",
289 		    getbootfile(), vers, core_vers);
290 	(void)fseek(fp,
291 	    (off_t)(dumplo + ok(dump_nl[X_PANICSTR].n_value)), L_SET);
292 	(void)fread(&panicstr, sizeof(panicstr), 1, fp);
293 	if (panicstr) {
294 		(void)fseek(fp, dumplo + ok(panicstr), L_SET);
295 		cp = panic_mesg;
296 		do
297 			*cp = getc(fp);
298 		while (*cp++ && cp < &panic_mesg[sizeof(panic_mesg)]);
299 	}
300 	/* Don't fclose(fp), we use dumpfd later. */
301 }
302 
303 void
304 clear_dump()
305 {
306 	long newdumplo;
307 
308 	newdumplo = 0;
309 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
310 	Write(dumpfd, &newdumplo, sizeof(newdumplo));
311 }
312 
313 int
314 dump_exists()
315 {
316 	int newdumpmag;
317 
318 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPMAG].n_value)), L_SET);
319 	(void)Read(dumpfd, &newdumpmag, sizeof(newdumpmag));
320 	if (newdumpmag != dumpmag) {
321 		if (verbose)
322 			syslog(LOG_WARNING, "magic number mismatch (%x != %x)",
323 			    newdumpmag, dumpmag);
324 		syslog(LOG_WARNING, "no core dump");
325 		return (0);
326 	}
327 	return (1);
328 }
329 
330 char buf[1024 * 1024];
331 
332 void
333 save_core()
334 {
335 	register FILE *fp;
336 	register int bounds, ifd, nr, nw, ofd;
337 	char *rawp, path[MAXPATHLEN];
338 	mode_t oumask;
339 
340 	/*
341 	 * Get the current number and update the bounds file.  Do the update
342 	 * now, because may fail later and don't want to overwrite anything.
343 	 */
344 	(void)snprintf(path, sizeof(path), "%s/bounds", dirname);
345 	if ((fp = fopen(path, "r")) == NULL)
346 		goto err1;
347 	if (fgets(buf, sizeof(buf), fp) == NULL) {
348 		if (ferror(fp))
349 err1:			syslog(LOG_WARNING, "%s: %s", path, strerror(errno));
350 		bounds = 0;
351 	} else
352 		bounds = atoi(buf);
353 	if (fp != NULL)
354 		(void)fclose(fp);
355 	if ((fp = fopen(path, "w")) == NULL)
356 		syslog(LOG_ERR, "%s: %m", path);
357 	else {
358 		(void)fprintf(fp, "%d\n", bounds + 1);
359 		(void)fclose(fp);
360 	}
361 
362 	/* Create the core file. */
363 	oumask = umask(S_IRWXG|S_IRWXO); /* Restrict access to the core file.*/
364 	(void)snprintf(path, sizeof(path), "%s/vmcore.%d%s",
365 	    dirname, bounds, compress ? ".Z" : "");
366 	if (compress) {
367 		if ((fp = zopen(path, "w", 0)) == NULL) {
368 			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
369 			exit(1);
370 		}
371 	} else
372 		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
373 	(void)umask(oumask);
374 
375 	/* Open the raw device. */
376 	rawp = rawname(ddname);
377 	if ((ifd = open(rawp, O_RDONLY)) == -1) {
378 		syslog(LOG_WARNING, "%s: %m; using block device", rawp);
379 		ifd = dumpfd;
380 	}
381 
382 	/* Seek to the start of the core. */
383 	Lseek(ifd, (off_t)dumplo, L_SET);
384 
385 	/* Copy the core file. */
386 	syslog(LOG_NOTICE, "writing %score to %s",
387 	    compress ? "compressed " : "", path);
388 	for (; dumpsize > 0; dumpsize -= nr) {
389 		(void)printf("%6dK\r", dumpsize / 1024);
390 		(void)fflush(stdout);
391 		nr = read(ifd, buf, MIN(dumpsize, sizeof(buf)));
392 		if (nr <= 0) {
393 			if (nr == 0)
394 				syslog(LOG_WARNING,
395 				    "WARNING: EOF on dump device");
396 			else
397 				syslog(LOG_ERR, "%s: %m", rawp);
398 			goto err2;
399 		}
400 		if (compress)
401 			nw = fwrite(buf, 1, nr, fp);
402 		else
403 			nw = write(ofd, buf, nr);
404 		if (nw != nr) {
405 			syslog(LOG_ERR, "%s: %s",
406 			    path, strerror(nw == 0 ? EIO : errno));
407 err2:			syslog(LOG_WARNING,
408 			    "WARNING: vmcore may be incomplete");
409 			(void)printf("\n");
410 			exit(1);
411 		}
412 	}
413 	(void)close(ifd);
414 	if (compress)
415 		(void)fclose(fp);
416 	else
417 		(void)close(ofd);
418 
419 	/* Copy the kernel. */
420 	ifd = Open(kernel ? kernel : getbootfile(), O_RDONLY);
421 	(void)snprintf(path, sizeof(path), "%s/kernel.%d%s",
422 	    dirname, bounds, compress ? ".Z" : "");
423 	if (compress) {
424 		if ((fp = zopen(path, "w", 0)) == NULL) {
425 			syslog(LOG_ERR, "%s: %s", path, strerror(errno));
426 			exit(1);
427 		}
428 	} else
429 		ofd = Create(path, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
430 	syslog(LOG_NOTICE, "writing %skernel to %s",
431 	    compress ? "compressed " : "", path);
432 	while ((nr = read(ifd, buf, sizeof(buf))) > 0) {
433 		if (compress)
434 			nw = fwrite(buf, 1, nr, fp);
435 		else
436 			nw = write(ofd, buf, nr);
437 		if (nw != nr) {
438 			syslog(LOG_ERR, "%s: %s",
439 			    path, strerror(nw == 0 ? EIO : errno));
440 			syslog(LOG_WARNING,
441 			    "WARNING: kernel may be incomplete");
442 			exit(1);
443 		}
444 	}
445 	if (nr < 0) {
446 		syslog(LOG_ERR, "%s: %s",
447 		    kernel ? kernel : getbootfile(), strerror(errno));
448 		syslog(LOG_WARNING,
449 		    "WARNING: kernel may be incomplete");
450 		exit(1);
451 	}
452 	if (compress)
453 		(void)fclose(fp);
454 	else
455 		(void)close(ofd);
456 }
457 
458 char *
459 find_dev(dev, type)
460 	register dev_t dev;
461 	register int type;
462 {
463 	register DIR *dfd;
464 	struct dirent *dir;
465 	struct stat sb;
466 	char *dp, devname[MAXPATHLEN + 1];
467 
468 	if ((dfd = opendir(_PATH_DEV)) == NULL) {
469 		syslog(LOG_ERR, "%s: %s", _PATH_DEV, strerror(errno));
470 		exit(1);
471 	}
472 	(void)strcpy(devname, _PATH_DEV);
473 	while ((dir = readdir(dfd))) {
474 		(void)strcpy(devname + sizeof(_PATH_DEV) - 1, dir->d_name);
475 		if (lstat(devname, &sb)) {
476 			syslog(LOG_ERR, "%s: %s", devname, strerror(errno));
477 			continue;
478 		}
479 		if ((sb.st_mode & S_IFMT) != type)
480 			continue;
481 		if (dev == sb.st_rdev) {
482 			closedir(dfd);
483 			if ((dp = strdup(devname)) == NULL) {
484 				syslog(LOG_ERR, "%s", strerror(errno));
485 				exit(1);
486 			}
487 			return (dp);
488 		}
489 	}
490 	closedir(dfd);
491 	syslog(LOG_ERR, "can't find device %d/%d", major(dev), minor(dev));
492 	exit(1);
493 }
494 
495 char *
496 rawname(s)
497 	char *s;
498 {
499 	char *sl, name[MAXPATHLEN];
500 
501 	if ((sl = rindex(s, '/')) == NULL || sl[1] == '0') {
502 		syslog(LOG_ERR,
503 		    "can't make raw dump device name from %s", s);
504 		return (s);
505 	}
506 	(void)snprintf(name, sizeof(name), "%.*s/r%s", sl - s, s, sl + 1);
507 	if ((sl = strdup(name)) == NULL) {
508 		syslog(LOG_ERR, "%s", strerror(errno));
509 		exit(1);
510 	}
511 	return (sl);
512 }
513 
514 int
515 get_crashtime()
516 {
517 	time_t dumptime;			/* Time the dump was taken. */
518 
519 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_TIME].n_value)), L_SET);
520 	(void)Read(dumpfd, &dumptime, sizeof(dumptime));
521 	if (dumptime == 0) {
522 		if (verbose)
523 			syslog(LOG_ERR, "dump time is zero");
524 		return (0);
525 	}
526 	(void)printf("savecore: system went down at %s", ctime(&dumptime));
527 #define	LEEWAY	(7 * 86400)
528 	if (dumptime < now - LEEWAY || dumptime > now + LEEWAY) {
529 		(void)printf("dump time is unreasonable\n");
530 		return (0);
531 	}
532 	return (1);
533 }
534 
535 void
536 get_dumpsize()
537 {
538 	/* Read the dump size. */
539 	Lseek(dumpfd, (off_t)(dumplo + ok(dump_nl[X_DUMPSIZE].n_value)), L_SET);
540 	(void)Read(dumpfd, &dumpsize, sizeof(dumpsize));
541 	dumpsize *= getpagesize();
542 }
543 
544 int
545 check_space()
546 {
547 	register FILE *fp;
548 	const char *tkernel;
549 	off_t minfree, spacefree, totfree, kernelsize, needed;
550 	struct stat st;
551 	struct statfs fsbuf;
552 	char buf[100], path[MAXPATHLEN];
553 
554 	tkernel = kernel ? kernel : getbootfile();
555 	if (stat(tkernel, &st) < 0) {
556 		syslog(LOG_ERR, "%s: %m", tkernel);
557 		exit(1);
558 	}
559 	kernelsize = st.st_blocks * S_BLKSIZE;
560 
561 	if (statfs(dirname, &fsbuf) < 0) {
562 		syslog(LOG_ERR, "%s: %m", dirname);
563 		exit(1);
564 	}
565  	spacefree = ((off_t) fsbuf.f_bavail * fsbuf.f_bsize) / 1024;
566 	totfree = ((off_t) fsbuf.f_bfree * fsbuf.f_bsize) / 1024;
567 
568 	(void)snprintf(path, sizeof(path), "%s/minfree", dirname);
569 	if ((fp = fopen(path, "r")) == NULL)
570 		minfree = 0;
571 	else {
572 		if (fgets(buf, sizeof(buf), fp) == NULL)
573 			minfree = 0;
574 		else
575 			minfree = atoi(buf);
576 		(void)fclose(fp);
577 	}
578 
579 	needed = (dumpsize + kernelsize) / 1024;
580  	if (((minfree > 0) ? spacefree : totfree) - needed < minfree) {
581 		syslog(LOG_WARNING,
582 		    "no dump, not enough free space on device");
583 		return (0);
584 	}
585 	if (spacefree - needed < 0)
586 		syslog(LOG_WARNING,
587 		    "dump performed, but free space threshold crossed");
588 	return (1);
589 }
590 
591 int
592 Open(name, rw)
593 	const char *name;
594 	int rw;
595 {
596 	int fd;
597 
598 	if ((fd = open(name, rw, 0)) < 0) {
599 		syslog(LOG_ERR, "%s: %m", name);
600 		exit(1);
601 	}
602 	return (fd);
603 }
604 
605 int
606 Read(fd, bp, size)
607 	int fd, size;
608 	void *bp;
609 {
610 	int nr;
611 
612 	nr = read(fd, bp, size);
613 	if (nr != size) {
614 		syslog(LOG_ERR, "read: %m");
615 		exit(1);
616 	}
617 	return (nr);
618 }
619 
620 void
621 Lseek(fd, off, flag)
622 	int fd, flag;
623 	off_t off;
624 {
625 	off_t ret;
626 
627 	ret = lseek(fd, off, flag);
628 	if (ret == -1) {
629 		syslog(LOG_ERR, "lseek: %m");
630 		exit(1);
631 	}
632 }
633 
634 int
635 Create(file, mode)
636 	char *file;
637 	int mode;
638 {
639 	register int fd;
640 
641 	fd = creat(file, mode);
642 	if (fd < 0) {
643 		syslog(LOG_ERR, "%s: %m", file);
644 		exit(1);
645 	}
646 	return (fd);
647 }
648 
649 void
650 Write(fd, bp, size)
651 	int fd, size;
652 	void *bp;
653 {
654 	int n;
655 
656 	if ((n = write(fd, bp, size)) < size) {
657 		syslog(LOG_ERR, "write: %s", strerror(n == -1 ? errno : EIO));
658 		exit(1);
659 	}
660 }
661 
662 void
663 usage()
664 {
665 	(void)syslog(LOG_ERR, "usage: savecore [-cfvz] [-N system] directory");
666 	exit(1);
667 }
668