xref: /freebsd/sys/kern/kern_shutdown.c (revision 6de306ecee3831f48debaad1d0b22418faa48e10)
1 /*-
2  * Copyright (c) 1986, 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_ddb.h"
43 #include "opt_hw_wdog.h"
44 #include "opt_panic.h"
45 #include "opt_show_busybufs.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/eventhandler.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/reboot.h>
53 #include <sys/proc.h>
54 #include <sys/vnode.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/mount.h>
58 #include <sys/mutex.h>
59 #include <sys/queue.h>
60 #include <sys/sysctl.h>
61 #include <sys/conf.h>
62 #include <sys/sysproto.h>
63 #include <sys/cons.h>
64 
65 #include <machine/pcb.h>
66 #include <machine/md_var.h>
67 #include <machine/smp.h>		/* smp_active, cpuid */
68 
69 #include <sys/signalvar.h>
70 
71 #ifndef PANIC_REBOOT_WAIT_TIME
72 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
73 #endif
74 
75 /*
76  * Note that stdarg.h and the ANSI style va_start macro is used for both
77  * ANSI and traditional C compilers.
78  */
79 #include <machine/stdarg.h>
80 
81 #ifdef DDB
82 #ifdef DDB_UNATTENDED
83 int debugger_on_panic = 0;
84 #else
85 int debugger_on_panic = 1;
86 #endif
87 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
88 	&debugger_on_panic, 0, "Run debugger on kernel panic");
89 #endif
90 
91 SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
92 
93 #ifdef	HW_WDOG
94 /*
95  * If there is a hardware watchdog, point this at the function needed to
96  * hold it off.
97  * It's needed when the kernel needs to do some lengthy operations.
98  * e.g. in wd.c when dumping core.. It's most annoying to have
99  * your precious core-dump only half written because the wdog kicked in.
100  */
101 watchdog_tickle_fn wdog_tickler = NULL;
102 #endif	/* HW_WDOG */
103 
104 /*
105  * Variable panicstr contains argument to first call to panic; used as flag
106  * to indicate that the kernel has already called panic.
107  */
108 const char *panicstr;
109 
110 int dumping;				 /* system is dumping */
111 
112 static void boot(int) __dead2;
113 static void dumpsys(void);
114 static void poweroff_wait(void *, int);
115 static void shutdown_halt(void *junk, int howto);
116 static void shutdown_panic(void *junk, int howto);
117 static void shutdown_reset(void *junk, int howto);
118 
119 /* register various local shutdown events */
120 static void
121 shutdown_conf(void *unused)
122 {
123 	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST);
124 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100);
125 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100);
126 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200);
127 }
128 
129 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
130 
131 /* ARGSUSED */
132 
133 /*
134  * The system call that results in a reboot
135  */
136 int
137 reboot(struct proc *p, struct reboot_args *uap)
138 {
139 	int error;
140 
141 	if ((error = suser(p)))
142 		return (error);
143 
144 	boot(uap->opt);
145 	return (0);
146 }
147 
148 /*
149  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
150  */
151 static int shutdown_howto = 0;
152 
153 void
154 shutdown_nice(int howto)
155 {
156 	shutdown_howto = howto;
157 
158 	/* Send a signal to init(8) and have it shutdown the world */
159 	if (initproc != NULL) {
160 		psignal(initproc, SIGINT);
161 	} else {
162 		/* No init(8) running, so simply reboot */
163 		boot(RB_NOSYNC);
164 	}
165 	return;
166 }
167 static int	waittime = -1;
168 static struct pcb dumppcb;
169 
170 static void
171 print_uptime(void)
172 {
173 	int f;
174 	struct timespec ts;
175 
176 	getnanouptime(&ts);
177 	printf("Uptime: ");
178 	f = 0;
179 	if (ts.tv_sec >= 86400) {
180 		printf("%ldd", (long)ts.tv_sec / 86400);
181 		ts.tv_sec %= 86400;
182 		f = 1;
183 	}
184 	if (f || ts.tv_sec >= 3600) {
185 		printf("%ldh", (long)ts.tv_sec / 3600);
186 		ts.tv_sec %= 3600;
187 		f = 1;
188 	}
189 	if (f || ts.tv_sec >= 60) {
190 		printf("%ldm", (long)ts.tv_sec / 60);
191 		ts.tv_sec %= 60;
192 		f = 1;
193 	}
194 	printf("%lds\n", (long)ts.tv_sec);
195 }
196 
197 /*
198  *  Go through the rigmarole of shutting down..
199  * this used to be in machdep.c but I'll be dammned if I could see
200  * anything machine dependant in it.
201  */
202 static void
203 boot(int howto)
204 {
205 
206 	/* collect extra flags that shutdown_nice might have set */
207 	howto |= shutdown_howto;
208 
209 #ifdef SMP
210 	if (smp_active)
211 		printf("boot() called on cpu#%d\n", PCPU_GET(cpuid));
212 #endif
213 	/*
214 	 * Do any callouts that should be done BEFORE syncing the filesystems.
215 	 */
216 	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
217 
218 	/*
219 	 * Now sync filesystems
220 	 */
221 	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
222 		register struct buf *bp;
223 		int iter, nbusy, pbusy;
224 		int subiter;
225 
226 		waittime = 0;
227 		printf("\nsyncing disks... ");
228 
229 		sync(&proc0, NULL);
230 
231 		/*
232 		 * With soft updates, some buffers that are
233 		 * written will be remarked as dirty until other
234 		 * buffers are written.
235 		 */
236 		for (iter = pbusy = 0; iter < 20; iter++) {
237 			nbusy = 0;
238 			for (bp = &buf[nbuf]; --bp >= buf; ) {
239 				if ((bp->b_flags & B_INVAL) == 0 &&
240 				    BUF_REFCNT(bp) > 0) {
241 					nbusy++;
242 				} else if ((bp->b_flags & (B_DELWRI | B_INVAL))
243 						== B_DELWRI) {
244 					/* bawrite(bp);*/
245 					nbusy++;
246 				}
247 			}
248 			if (nbusy == 0)
249 				break;
250 			printf("%d ", nbusy);
251 			if (nbusy < pbusy)
252 				iter = 0;
253 			pbusy = nbusy;
254 			sync(&proc0, NULL);
255  			if (curproc != NULL) {
256 				DROP_GIANT_NOSWITCH();
257    				for (subiter = 0; subiter < 50 * iter; subiter++) {
258      					mtx_lock_spin(&sched_lock);
259      					setrunqueue(curproc);
260      					mi_switch(); /* Allow interrupt threads to run */
261      					mtx_unlock_spin(&sched_lock);
262      					DELAY(1000);
263    				}
264 				PICKUP_GIANT();
265  			} else
266 			DELAY(50000 * iter);
267 		}
268 		printf("\n");
269 		/*
270 		 * Count only busy local buffers to prevent forcing
271 		 * a fsck if we're just a client of a wedged NFS server
272 		 */
273 		nbusy = 0;
274 		for (bp = &buf[nbuf]; --bp >= buf; ) {
275 			if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) ||
276 			    ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
277 				if (bp->b_dev == NODEV) {
278 					TAILQ_REMOVE(&mountlist,
279 					    bp->b_vp->v_mount, mnt_list);
280 					continue;
281 				}
282 				nbusy++;
283 #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
284 				printf(
285 			    "%d: dev:%s, flags:%08lx, blkno:%ld, lblkno:%ld\n",
286 				    nbusy, devtoname(bp->b_dev),
287 				    bp->b_flags, (long)bp->b_blkno,
288 				    (long)bp->b_lblkno);
289 #endif
290 			}
291 		}
292 		if (nbusy) {
293 			/*
294 			 * Failed to sync all blocks. Indicate this and don't
295 			 * unmount filesystems (thus forcing an fsck on reboot).
296 			 */
297 			printf("giving up on %d buffers\n", nbusy);
298 			DELAY(5000000);	/* 5 seconds */
299 		} else {
300 			printf("done\n");
301 			/*
302 			 * Unmount filesystems
303 			 */
304 			if (panicstr == 0)
305 				vfs_unmountall();
306 		}
307 		DELAY(100000);		/* wait for console output to finish */
308 	}
309 
310 	print_uptime();
311 
312 	/*
313 	 * Ok, now do things that assume all filesystem activity has
314 	 * been completed.
315 	 */
316 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
317 	splhigh();
318 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold)
319 		dumpsys();
320 
321 	/* Now that we're going to really halt the system... */
322 	EVENTHANDLER_INVOKE(shutdown_final, howto);
323 
324 	for(;;) ;	/* safety against shutdown_reset not working */
325 	/* NOTREACHED */
326 }
327 
328 /*
329  * If the shutdown was a clean halt, behave accordingly.
330  */
331 static void
332 shutdown_halt(void *junk, int howto)
333 {
334 	if (howto & RB_HALT) {
335 		printf("\n");
336 		printf("The operating system has halted.\n");
337 		printf("Please press any key to reboot.\n\n");
338 		switch (cngetc()) {
339 		case -1:		/* No console, just die */
340 			cpu_halt();
341 			/* NOTREACHED */
342 		default:
343 			howto &= ~RB_HALT;
344 			break;
345 		}
346 	}
347 }
348 
349 /*
350  * Check to see if the system paniced, pause and then reboot
351  * according to the specified delay.
352  */
353 static void
354 shutdown_panic(void *junk, int howto)
355 {
356 	int loop;
357 
358 	if (howto & RB_DUMP) {
359 		if (PANIC_REBOOT_WAIT_TIME != 0) {
360 			if (PANIC_REBOOT_WAIT_TIME != -1) {
361 				printf("Automatic reboot in %d seconds - "
362 				       "press a key on the console to abort\n",
363 					PANIC_REBOOT_WAIT_TIME);
364 				for (loop = PANIC_REBOOT_WAIT_TIME * 10;
365 				     loop > 0; --loop) {
366 					DELAY(1000 * 100); /* 1/10th second */
367 					/* Did user type a key? */
368 					if (cncheckc() != -1)
369 						break;
370 				}
371 				if (!loop)
372 					return;
373 			}
374 		} else { /* zero time specified - reboot NOW */
375 			return;
376 		}
377 		printf("--> Press a key on the console to reboot <--\n");
378 		cngetc();
379 	}
380 }
381 
382 /*
383  * Everything done, now reset
384  */
385 static void
386 shutdown_reset(void *junk, int howto)
387 {
388 	printf("Rebooting...\n");
389 	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
390 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
391 	cpu_reset();
392 	/* NOTREACHED */ /* assuming reset worked */
393 }
394 
395 /*
396  * Magic number for savecore
397  *
398  * exported (symorder) and used at least by savecore(8)
399  *
400  */
401 static u_long const	dumpmag = 0x8fca0101UL;
402 
403 static int	dumpsize = 0;		/* also for savecore */
404 
405 static int	dodump = 1;
406 
407 SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0,
408     "Try to perform coredump on kernel panic");
409 
410 static int
411 setdumpdev(dev_t dev)
412 {
413 	int psize;
414 	long newdumplo;
415 
416 	if (dev == NODEV) {
417 		dumpdev = dev;
418 		return (0);
419 	}
420 	if (devsw(dev) == NULL)
421 		return (ENXIO);		/* XXX is this right? */
422 	if (devsw(dev)->d_psize == NULL)
423 		return (ENXIO);		/* XXX should be ENODEV ? */
424 	psize = devsw(dev)->d_psize(dev);
425 	if (psize == -1)
426 		return (ENXIO);		/* XXX should be ENODEV ? */
427 	/*
428 	 * XXX should clean up checking in dumpsys() to be more like this.
429 	 */
430 	newdumplo = psize - Maxmem * PAGE_SIZE / DEV_BSIZE;
431 	if (newdumplo < 0)
432 		return (ENOSPC);
433 	dumpdev = dev;
434 	dumplo = newdumplo;
435 	return (0);
436 }
437 
438 
439 /* ARGSUSED */
440 static void
441 dump_conf(void *dummy)
442 {
443 	if (setdumpdev(dumpdev) != 0)
444 		dumpdev = NODEV;
445 }
446 
447 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
448 
449 static int
450 sysctl_kern_dumpdev(SYSCTL_HANDLER_ARGS)
451 {
452 	int error;
453 	udev_t ndumpdev;
454 
455 	ndumpdev = dev2udev(dumpdev);
456 	error = sysctl_handle_opaque(oidp, &ndumpdev, sizeof ndumpdev, req);
457 	if (error == 0 && req->newptr != NULL)
458 		error = setdumpdev(udev2dev(ndumpdev, 0));
459 	return (error);
460 }
461 
462 SYSCTL_PROC(_kern, KERN_DUMPDEV, dumpdev, CTLTYPE_OPAQUE|CTLFLAG_RW,
463 	0, sizeof dumpdev, sysctl_kern_dumpdev, "T,dev_t", "");
464 
465 /*
466  * Doadump comes here after turning off memory management and
467  * getting on the dump stack, either when called above, or by
468  * the auto-restart code.
469  */
470 static void
471 dumpsys(void)
472 {
473 	int	error;
474 
475 	savectx(&dumppcb);
476 	if (dumping++) {
477 		printf("Dump already in progress, bailing...\n");
478 		return;
479 	}
480 	if (!dodump)
481 		return;
482 	if (dumpdev == NODEV)
483 		return;
484 	if (!(devsw(dumpdev)))
485 		return;
486 	if (!(devsw(dumpdev)->d_dump))
487 		return;
488 	dumpsize = Maxmem;
489 	printf("\ndumping to dev %s, offset %ld\n", devtoname(dumpdev), dumplo);
490 	printf("dump ");
491 	error = (*devsw(dumpdev)->d_dump)(dumpdev);
492 	if (error == 0) {
493 		printf("succeeded\n");
494 		return;
495 	}
496 	printf("failed, reason: ");
497 	switch (error) {
498 	case ENODEV:
499 		printf("device doesn't support a dump routine\n");
500 		break;
501 
502 	case ENXIO:
503 		printf("device bad\n");
504 		break;
505 
506 	case EFAULT:
507 		printf("device not ready\n");
508 		break;
509 
510 	case EINVAL:
511 		printf("area improper\n");
512 		break;
513 
514 	case EIO:
515 		printf("i/o error\n");
516 		break;
517 
518 	case EINTR:
519 		printf("aborted from console\n");
520 		break;
521 
522 	default:
523 		printf("unknown, error = %d\n", error);
524 		break;
525 	}
526 }
527 
528 /*
529  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
530  * and then reboots.  If we are called twice, then we avoid trying to sync
531  * the disks as this often leads to recursive panics.
532  */
533 void
534 panic(const char *fmt, ...)
535 {
536 	int bootopt;
537 	va_list ap;
538 	static char buf[256];
539 
540 #ifdef SMP
541 	/* Only 1 CPU can panic at a time */
542 	mtx_lock(&panic_mtx);
543 #endif
544 
545 	bootopt = RB_AUTOBOOT | RB_DUMP;
546 	if (panicstr)
547 		bootopt |= RB_NOSYNC;
548 	else
549 		panicstr = fmt;
550 
551 	va_start(ap, fmt);
552 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
553 	if (panicstr == fmt)
554 		panicstr = buf;
555 	va_end(ap);
556 	printf("panic: %s\n", buf);
557 #ifdef SMP
558 	/* two separate prints in case of an unmapped page and trap */
559 	printf("cpuid = %d; ", PCPU_GET(cpuid));
560 #ifdef APIC_IO
561 	printf("lapic.id = %08x\n", lapic.id);
562 #endif
563 #endif
564 
565 #if defined(DDB)
566 	if (debugger_on_panic)
567 		Debugger ("panic");
568 #endif
569 	boot(bootopt);
570 }
571 
572 /*
573  * Support for poweroff delay.
574  */
575 #ifndef POWEROFF_DELAY
576 # define POWEROFF_DELAY 5000
577 #endif
578 static int poweroff_delay = POWEROFF_DELAY;
579 
580 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
581 	&poweroff_delay, 0, "");
582 
583 static void
584 poweroff_wait(void *junk, int howto)
585 {
586 	if(!(howto & RB_POWEROFF) || poweroff_delay <= 0)
587 		return;
588 	DELAY(poweroff_delay * 1000);
589 }
590 
591 /*
592  * Some system processes (e.g. syncer) need to be stopped at appropriate
593  * points in their main loops prior to a system shutdown, so that they
594  * won't interfere with the shutdown process (e.g. by holding a disk buf
595  * to cause sync to fail).  For each of these system processes, register
596  * shutdown_kproc() as a handler for one of shutdown events.
597  */
598 static int kproc_shutdown_wait = 60;
599 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
600     &kproc_shutdown_wait, 0, "");
601 
602 void
603 kproc_shutdown(void *arg, int howto)
604 {
605 	struct proc *p;
606 	int error;
607 
608 	if (panicstr)
609 		return;
610 
611 	p = (struct proc *)arg;
612 	printf("Waiting (max %d seconds) for system process `%s' to stop...",
613 	    kproc_shutdown_wait, p->p_comm);
614 	error = kthread_suspend(p, kproc_shutdown_wait * hz);
615 
616 	if (error == EWOULDBLOCK)
617 		printf("timed out\n");
618 	else
619 		printf("stopped\n");
620 }
621