1 /*- 2 * Copyright (c) 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_ddb.h" 41 #include "opt_ekcd.h" 42 #include "opt_kdb.h" 43 #include "opt_panic.h" 44 #include "opt_sched.h" 45 #include "opt_watchdog.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/cons.h> 53 #include <sys/eventhandler.h> 54 #include <sys/filedesc.h> 55 #include <sys/jail.h> 56 #include <sys/kdb.h> 57 #include <sys/kernel.h> 58 #include <sys/kerneldump.h> 59 #include <sys/kthread.h> 60 #include <sys/ktr.h> 61 #include <sys/malloc.h> 62 #include <sys/mount.h> 63 #include <sys/priv.h> 64 #include <sys/proc.h> 65 #include <sys/reboot.h> 66 #include <sys/resourcevar.h> 67 #include <sys/rwlock.h> 68 #include <sys/sched.h> 69 #include <sys/smp.h> 70 #include <sys/sysctl.h> 71 #include <sys/sysproto.h> 72 #include <sys/vnode.h> 73 #include <sys/watchdog.h> 74 75 #include <crypto/rijndael/rijndael-api-fst.h> 76 #include <crypto/sha2/sha256.h> 77 78 #include <ddb/ddb.h> 79 80 #include <machine/cpu.h> 81 #include <machine/dump.h> 82 #include <machine/pcb.h> 83 #include <machine/smp.h> 84 85 #include <security/mac/mac_framework.h> 86 87 #include <vm/vm.h> 88 #include <vm/vm_object.h> 89 #include <vm/vm_page.h> 90 #include <vm/vm_pager.h> 91 #include <vm/swap_pager.h> 92 93 #include <sys/signalvar.h> 94 95 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 96 97 #ifndef PANIC_REBOOT_WAIT_TIME 98 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 99 #endif 100 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 101 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 102 &panic_reboot_wait_time, 0, 103 "Seconds to wait before rebooting after a panic"); 104 105 /* 106 * Note that stdarg.h and the ANSI style va_start macro is used for both 107 * ANSI and traditional C compilers. 108 */ 109 #include <machine/stdarg.h> 110 111 #ifdef KDB 112 #ifdef KDB_UNATTENDED 113 int debugger_on_panic = 0; 114 #else 115 int debugger_on_panic = 1; 116 #endif 117 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 118 CTLFLAG_RWTUN | CTLFLAG_SECURE, 119 &debugger_on_panic, 0, "Run debugger on kernel panic"); 120 121 #ifdef KDB_TRACE 122 static int trace_on_panic = 1; 123 #else 124 static int trace_on_panic = 0; 125 #endif 126 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 127 CTLFLAG_RWTUN | CTLFLAG_SECURE, 128 &trace_on_panic, 0, "Print stack trace on kernel panic"); 129 #endif /* KDB */ 130 131 static int sync_on_panic = 0; 132 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 133 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 134 135 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, 136 "Shutdown environment"); 137 138 #ifndef DIAGNOSTIC 139 static int show_busybufs; 140 #else 141 static int show_busybufs = 1; 142 #endif 143 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 144 &show_busybufs, 0, ""); 145 146 int suspend_blocked = 0; 147 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 148 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 149 150 #ifdef EKCD 151 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 152 153 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 154 155 struct kerneldumpcrypto { 156 uint8_t kdc_encryption; 157 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 158 keyInstance kdc_ki; 159 cipherInstance kdc_ci; 160 off_t kdc_nextoffset; 161 uint32_t kdc_dumpkeysize; 162 struct kerneldumpkey kdc_dumpkey[]; 163 }; 164 #endif 165 166 /* 167 * Variable panicstr contains argument to first call to panic; used as flag 168 * to indicate that the kernel has already called panic. 169 */ 170 const char *panicstr; 171 172 int dumping; /* system is dumping */ 173 int rebooting; /* system is rebooting */ 174 static struct dumperinfo dumper; /* our selected dumper */ 175 176 /* Context information for dump-debuggers. */ 177 static struct pcb dumppcb; /* Registers. */ 178 lwpid_t dumptid; /* Thread ID. */ 179 180 static struct cdevsw reroot_cdevsw = { 181 .d_version = D_VERSION, 182 .d_name = "reroot", 183 }; 184 185 static void poweroff_wait(void *, int); 186 static void shutdown_halt(void *junk, int howto); 187 static void shutdown_panic(void *junk, int howto); 188 static void shutdown_reset(void *junk, int howto); 189 static int kern_reroot(void); 190 191 /* register various local shutdown events */ 192 static void 193 shutdown_conf(void *unused) 194 { 195 196 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 197 SHUTDOWN_PRI_FIRST); 198 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 199 SHUTDOWN_PRI_LAST + 100); 200 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 201 SHUTDOWN_PRI_LAST + 100); 202 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 203 SHUTDOWN_PRI_LAST + 200); 204 } 205 206 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 207 208 /* 209 * The only reason this exists is to create the /dev/reroot/ directory, 210 * used by reroot code in init(8) as a mountpoint for tmpfs. 211 */ 212 static void 213 reroot_conf(void *unused) 214 { 215 int error; 216 struct cdev *cdev; 217 218 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 219 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 220 if (error != 0) { 221 printf("%s: failed to create device node, error %d", 222 __func__, error); 223 } 224 } 225 226 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 227 228 /* 229 * The system call that results in a reboot. 230 */ 231 /* ARGSUSED */ 232 int 233 sys_reboot(struct thread *td, struct reboot_args *uap) 234 { 235 int error; 236 237 error = 0; 238 #ifdef MAC 239 error = mac_system_check_reboot(td->td_ucred, uap->opt); 240 #endif 241 if (error == 0) 242 error = priv_check(td, PRIV_REBOOT); 243 if (error == 0) { 244 if (uap->opt & RB_REROOT) { 245 error = kern_reroot(); 246 } else { 247 mtx_lock(&Giant); 248 kern_reboot(uap->opt); 249 mtx_unlock(&Giant); 250 } 251 } 252 return (error); 253 } 254 255 /* 256 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 257 */ 258 void 259 shutdown_nice(int howto) 260 { 261 262 if (initproc != NULL) { 263 /* Send a signal to init(8) and have it shutdown the world. */ 264 PROC_LOCK(initproc); 265 if (howto & RB_POWEROFF) 266 kern_psignal(initproc, SIGUSR2); 267 else if (howto & RB_HALT) 268 kern_psignal(initproc, SIGUSR1); 269 else 270 kern_psignal(initproc, SIGINT); 271 PROC_UNLOCK(initproc); 272 } else { 273 /* No init(8) running, so simply reboot. */ 274 kern_reboot(howto | RB_NOSYNC); 275 } 276 } 277 278 static void 279 print_uptime(void) 280 { 281 int f; 282 struct timespec ts; 283 284 getnanouptime(&ts); 285 printf("Uptime: "); 286 f = 0; 287 if (ts.tv_sec >= 86400) { 288 printf("%ldd", (long)ts.tv_sec / 86400); 289 ts.tv_sec %= 86400; 290 f = 1; 291 } 292 if (f || ts.tv_sec >= 3600) { 293 printf("%ldh", (long)ts.tv_sec / 3600); 294 ts.tv_sec %= 3600; 295 f = 1; 296 } 297 if (f || ts.tv_sec >= 60) { 298 printf("%ldm", (long)ts.tv_sec / 60); 299 ts.tv_sec %= 60; 300 f = 1; 301 } 302 printf("%lds\n", (long)ts.tv_sec); 303 } 304 305 int 306 doadump(boolean_t textdump) 307 { 308 boolean_t coredump; 309 int error; 310 311 error = 0; 312 if (dumping) 313 return (EBUSY); 314 if (dumper.dumper == NULL) 315 return (ENXIO); 316 317 savectx(&dumppcb); 318 dumptid = curthread->td_tid; 319 dumping++; 320 321 coredump = TRUE; 322 #ifdef DDB 323 if (textdump && textdump_pending) { 324 coredump = FALSE; 325 textdump_dumpsys(&dumper); 326 } 327 #endif 328 if (coredump) 329 error = dumpsys(&dumper); 330 331 dumping--; 332 return (error); 333 } 334 335 /* 336 * Shutdown the system cleanly to prepare for reboot, halt, or power off. 337 */ 338 void 339 kern_reboot(int howto) 340 { 341 static int once = 0; 342 343 #if defined(SMP) 344 /* 345 * Bind us to CPU 0 so that all shutdown code runs there. Some 346 * systems don't shutdown properly (i.e., ACPI power off) if we 347 * run on another processor. 348 */ 349 if (!SCHEDULER_STOPPED()) { 350 thread_lock(curthread); 351 sched_bind(curthread, 0); 352 thread_unlock(curthread); 353 KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); 354 } 355 #endif 356 /* We're in the process of rebooting. */ 357 rebooting = 1; 358 359 /* We are out of the debugger now. */ 360 kdb_active = 0; 361 362 /* 363 * Do any callouts that should be done BEFORE syncing the filesystems. 364 */ 365 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 366 367 /* 368 * Now sync filesystems 369 */ 370 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 371 once = 1; 372 bufshutdown(show_busybufs); 373 } 374 375 print_uptime(); 376 377 cngrab(); 378 379 /* 380 * Ok, now do things that assume all filesystem activity has 381 * been completed. 382 */ 383 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 384 385 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 386 doadump(TRUE); 387 388 /* Now that we're going to really halt the system... */ 389 EVENTHANDLER_INVOKE(shutdown_final, howto); 390 391 for(;;) ; /* safety against shutdown_reset not working */ 392 /* NOTREACHED */ 393 } 394 395 /* 396 * The system call that results in changing the rootfs. 397 */ 398 static int 399 kern_reroot(void) 400 { 401 struct vnode *oldrootvnode, *vp; 402 struct mount *mp, *devmp; 403 int error; 404 405 if (curproc != initproc) 406 return (EPERM); 407 408 /* 409 * Mark the filesystem containing currently-running executable 410 * (the temporary copy of init(8)) busy. 411 */ 412 vp = curproc->p_textvp; 413 error = vn_lock(vp, LK_SHARED); 414 if (error != 0) 415 return (error); 416 mp = vp->v_mount; 417 error = vfs_busy(mp, MBF_NOWAIT); 418 if (error != 0) { 419 vfs_ref(mp); 420 VOP_UNLOCK(vp, 0); 421 error = vfs_busy(mp, 0); 422 vn_lock(vp, LK_SHARED | LK_RETRY); 423 vfs_rel(mp); 424 if (error != 0) { 425 VOP_UNLOCK(vp, 0); 426 return (ENOENT); 427 } 428 if (vp->v_iflag & VI_DOOMED) { 429 VOP_UNLOCK(vp, 0); 430 vfs_unbusy(mp); 431 return (ENOENT); 432 } 433 } 434 VOP_UNLOCK(vp, 0); 435 436 /* 437 * Remove the filesystem containing currently-running executable 438 * from the mount list, to prevent it from being unmounted 439 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 440 * 441 * Also preserve /dev - forcibly unmounting it could cause driver 442 * reinitialization. 443 */ 444 445 vfs_ref(rootdevmp); 446 devmp = rootdevmp; 447 rootdevmp = NULL; 448 449 mtx_lock(&mountlist_mtx); 450 TAILQ_REMOVE(&mountlist, mp, mnt_list); 451 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 452 mtx_unlock(&mountlist_mtx); 453 454 oldrootvnode = rootvnode; 455 456 /* 457 * Unmount everything except for the two filesystems preserved above. 458 */ 459 vfs_unmountall(); 460 461 /* 462 * Add /dev back; vfs_mountroot() will move it into its new place. 463 */ 464 mtx_lock(&mountlist_mtx); 465 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 466 mtx_unlock(&mountlist_mtx); 467 rootdevmp = devmp; 468 vfs_rel(rootdevmp); 469 470 /* 471 * Mount the new rootfs. 472 */ 473 vfs_mountroot(); 474 475 /* 476 * Update all references to the old rootvnode. 477 */ 478 mountcheckdirs(oldrootvnode, rootvnode); 479 480 /* 481 * Add the temporary filesystem back and unbusy it. 482 */ 483 mtx_lock(&mountlist_mtx); 484 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 485 mtx_unlock(&mountlist_mtx); 486 vfs_unbusy(mp); 487 488 return (0); 489 } 490 491 /* 492 * If the shutdown was a clean halt, behave accordingly. 493 */ 494 static void 495 shutdown_halt(void *junk, int howto) 496 { 497 498 if (howto & RB_HALT) { 499 printf("\n"); 500 printf("The operating system has halted.\n"); 501 printf("Please press any key to reboot.\n\n"); 502 switch (cngetc()) { 503 case -1: /* No console, just die */ 504 cpu_halt(); 505 /* NOTREACHED */ 506 default: 507 howto &= ~RB_HALT; 508 break; 509 } 510 } 511 } 512 513 /* 514 * Check to see if the system paniced, pause and then reboot 515 * according to the specified delay. 516 */ 517 static void 518 shutdown_panic(void *junk, int howto) 519 { 520 int loop; 521 522 if (howto & RB_DUMP) { 523 if (panic_reboot_wait_time != 0) { 524 if (panic_reboot_wait_time != -1) { 525 printf("Automatic reboot in %d seconds - " 526 "press a key on the console to abort\n", 527 panic_reboot_wait_time); 528 for (loop = panic_reboot_wait_time * 10; 529 loop > 0; --loop) { 530 DELAY(1000 * 100); /* 1/10th second */ 531 /* Did user type a key? */ 532 if (cncheckc() != -1) 533 break; 534 } 535 if (!loop) 536 return; 537 } 538 } else { /* zero time specified - reboot NOW */ 539 return; 540 } 541 printf("--> Press a key on the console to reboot,\n"); 542 printf("--> or switch off the system now.\n"); 543 cngetc(); 544 } 545 } 546 547 /* 548 * Everything done, now reset 549 */ 550 static void 551 shutdown_reset(void *junk, int howto) 552 { 553 554 printf("Rebooting...\n"); 555 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 556 557 /* 558 * Acquiring smp_ipi_mtx here has a double effect: 559 * - it disables interrupts avoiding CPU0 preemption 560 * by fast handlers (thus deadlocking against other CPUs) 561 * - it avoids deadlocks against smp_rendezvous() or, more 562 * generally, threads busy-waiting, with this spinlock held, 563 * and waiting for responses by threads on other CPUs 564 * (ie. smp_tlb_shootdown()). 565 * 566 * For the !SMP case it just needs to handle the former problem. 567 */ 568 #ifdef SMP 569 mtx_lock_spin(&smp_ipi_mtx); 570 #else 571 spinlock_enter(); 572 #endif 573 574 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 575 cpu_reset(); 576 /* NOTREACHED */ /* assuming reset worked */ 577 } 578 579 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 580 static int kassert_warn_only = 0; 581 #ifdef KDB 582 static int kassert_do_kdb = 0; 583 #endif 584 #ifdef KTR 585 static int kassert_do_ktr = 0; 586 #endif 587 static int kassert_do_log = 1; 588 static int kassert_log_pps_limit = 4; 589 static int kassert_log_mute_at = 0; 590 static int kassert_log_panic_at = 0; 591 static int kassert_warnings = 0; 592 593 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); 594 595 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN, 596 &kassert_warn_only, 0, 597 "KASSERT triggers a panic (1) or just a warning (0)"); 598 599 #ifdef KDB 600 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN, 601 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 602 #endif 603 604 #ifdef KTR 605 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN, 606 &kassert_do_ktr, 0, 607 "KASSERT does a KTR, set this to the KTRMASK you want"); 608 #endif 609 610 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN, 611 &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); 612 613 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN, 614 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 615 616 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN, 617 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 618 619 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN, 620 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 621 622 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN, 623 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 624 625 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 626 627 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 628 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, 629 kassert_sysctl_kassert, "I", "set to trigger a test kassert"); 630 631 static int 632 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 633 { 634 int error, i; 635 636 error = sysctl_wire_old_buffer(req, sizeof(int)); 637 if (error == 0) { 638 i = 0; 639 error = sysctl_handle_int(oidp, &i, 0, req); 640 } 641 if (error != 0 || req->newptr == NULL) 642 return (error); 643 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 644 return (0); 645 } 646 647 /* 648 * Called by KASSERT, this decides if we will panic 649 * or if we will log via printf and/or ktr. 650 */ 651 void 652 kassert_panic(const char *fmt, ...) 653 { 654 static char buf[256]; 655 va_list ap; 656 657 va_start(ap, fmt); 658 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 659 va_end(ap); 660 661 /* 662 * panic if we're not just warning, or if we've exceeded 663 * kassert_log_panic_at warnings. 664 */ 665 if (!kassert_warn_only || 666 (kassert_log_panic_at > 0 && 667 kassert_warnings >= kassert_log_panic_at)) { 668 va_start(ap, fmt); 669 vpanic(fmt, ap); 670 /* NORETURN */ 671 } 672 #ifdef KTR 673 if (kassert_do_ktr) 674 CTR0(ktr_mask, buf); 675 #endif /* KTR */ 676 /* 677 * log if we've not yet met the mute limit. 678 */ 679 if (kassert_do_log && 680 (kassert_log_mute_at == 0 || 681 kassert_warnings < kassert_log_mute_at)) { 682 static struct timeval lasterr; 683 static int curerr; 684 685 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 686 printf("KASSERT failed: %s\n", buf); 687 kdb_backtrace(); 688 } 689 } 690 #ifdef KDB 691 if (kassert_do_kdb) { 692 kdb_enter(KDB_WHY_KASSERT, buf); 693 } 694 #endif 695 atomic_add_int(&kassert_warnings, 1); 696 } 697 #endif 698 699 /* 700 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 701 * and then reboots. If we are called twice, then we avoid trying to sync 702 * the disks as this often leads to recursive panics. 703 */ 704 void 705 panic(const char *fmt, ...) 706 { 707 va_list ap; 708 709 va_start(ap, fmt); 710 vpanic(fmt, ap); 711 } 712 713 void 714 vpanic(const char *fmt, va_list ap) 715 { 716 #ifdef SMP 717 cpuset_t other_cpus; 718 #endif 719 struct thread *td = curthread; 720 int bootopt, newpanic; 721 static char buf[256]; 722 723 spinlock_enter(); 724 725 #ifdef SMP 726 /* 727 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 728 * concurrently entering panic. Only the winner will proceed 729 * further. 730 */ 731 if (panicstr == NULL && !kdb_active) { 732 other_cpus = all_cpus; 733 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 734 stop_cpus_hard(other_cpus); 735 } 736 #endif 737 738 /* 739 * Ensure that the scheduler is stopped while panicking, even if panic 740 * has been entered from kdb. 741 */ 742 td->td_stopsched = 1; 743 744 bootopt = RB_AUTOBOOT; 745 newpanic = 0; 746 if (panicstr) 747 bootopt |= RB_NOSYNC; 748 else { 749 bootopt |= RB_DUMP; 750 panicstr = fmt; 751 newpanic = 1; 752 } 753 754 if (newpanic) { 755 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 756 panicstr = buf; 757 cngrab(); 758 printf("panic: %s\n", buf); 759 } else { 760 printf("panic: "); 761 vprintf(fmt, ap); 762 printf("\n"); 763 } 764 #ifdef SMP 765 printf("cpuid = %d\n", PCPU_GET(cpuid)); 766 #endif 767 printf("time = %jd\n", (intmax_t )time_second); 768 #ifdef KDB 769 if (newpanic && trace_on_panic) 770 kdb_backtrace(); 771 if (debugger_on_panic) 772 kdb_enter(KDB_WHY_PANIC, "panic"); 773 #endif 774 /*thread_lock(td); */ 775 td->td_flags |= TDF_INPANIC; 776 /* thread_unlock(td); */ 777 if (!sync_on_panic) 778 bootopt |= RB_NOSYNC; 779 kern_reboot(bootopt); 780 } 781 782 /* 783 * Support for poweroff delay. 784 * 785 * Please note that setting this delay too short might power off your machine 786 * before the write cache on your hard disk has been flushed, leading to 787 * soft-updates inconsistencies. 788 */ 789 #ifndef POWEROFF_DELAY 790 # define POWEROFF_DELAY 5000 791 #endif 792 static int poweroff_delay = POWEROFF_DELAY; 793 794 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 795 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 796 797 static void 798 poweroff_wait(void *junk, int howto) 799 { 800 801 if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) 802 return; 803 DELAY(poweroff_delay * 1000); 804 } 805 806 /* 807 * Some system processes (e.g. syncer) need to be stopped at appropriate 808 * points in their main loops prior to a system shutdown, so that they 809 * won't interfere with the shutdown process (e.g. by holding a disk buf 810 * to cause sync to fail). For each of these system processes, register 811 * shutdown_kproc() as a handler for one of shutdown events. 812 */ 813 static int kproc_shutdown_wait = 60; 814 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 815 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 816 817 void 818 kproc_shutdown(void *arg, int howto) 819 { 820 struct proc *p; 821 int error; 822 823 if (panicstr) 824 return; 825 826 p = (struct proc *)arg; 827 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 828 kproc_shutdown_wait, p->p_comm); 829 error = kproc_suspend(p, kproc_shutdown_wait * hz); 830 831 if (error == EWOULDBLOCK) 832 printf("timed out\n"); 833 else 834 printf("done\n"); 835 } 836 837 void 838 kthread_shutdown(void *arg, int howto) 839 { 840 struct thread *td; 841 int error; 842 843 if (panicstr) 844 return; 845 846 td = (struct thread *)arg; 847 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 848 kproc_shutdown_wait, td->td_name); 849 error = kthread_suspend(td, kproc_shutdown_wait * hz); 850 851 if (error == EWOULDBLOCK) 852 printf("timed out\n"); 853 else 854 printf("done\n"); 855 } 856 857 static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; 858 SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, 859 dumpdevname, 0, "Device for kernel dumps"); 860 861 #ifdef EKCD 862 static struct kerneldumpcrypto * 863 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 864 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 865 { 866 struct kerneldumpcrypto *kdc; 867 struct kerneldumpkey *kdk; 868 uint32_t dumpkeysize; 869 870 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 871 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 872 873 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 874 875 kdc->kdc_encryption = encryption; 876 switch (kdc->kdc_encryption) { 877 case KERNELDUMP_ENC_AES_256_CBC: 878 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 879 goto failed; 880 break; 881 default: 882 goto failed; 883 } 884 885 kdc->kdc_dumpkeysize = dumpkeysize; 886 kdk = kdc->kdc_dumpkey; 887 kdk->kdk_encryption = kdc->kdc_encryption; 888 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 889 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 890 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 891 892 return (kdc); 893 failed: 894 explicit_bzero(kdc, sizeof(*kdc) + dumpkeysize); 895 free(kdc, M_EKCD); 896 return (NULL); 897 } 898 #endif /* EKCD */ 899 900 int 901 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 902 { 903 #ifndef EKCD 904 return (0); 905 #else 906 uint8_t hash[SHA256_DIGEST_LENGTH]; 907 SHA256_CTX ctx; 908 struct kerneldumpkey *kdk; 909 int error; 910 911 error = 0; 912 913 if (kdc == NULL) 914 return (0); 915 916 /* 917 * When a user enters ddb it can write a crash dump multiple times. 918 * Each time it should be encrypted using a different IV. 919 */ 920 SHA256_Init(&ctx); 921 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 922 SHA256_Final(hash, &ctx); 923 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 924 925 switch (kdc->kdc_encryption) { 926 case KERNELDUMP_ENC_AES_256_CBC: 927 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 928 kdc->kdc_iv) <= 0) { 929 error = EINVAL; 930 goto out; 931 } 932 break; 933 default: 934 error = EINVAL; 935 goto out; 936 } 937 938 kdc->kdc_nextoffset = 0; 939 940 kdk = kdc->kdc_dumpkey; 941 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 942 out: 943 explicit_bzero(hash, sizeof(hash)); 944 return (error); 945 #endif 946 } 947 948 uint32_t 949 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 950 { 951 952 #ifdef EKCD 953 if (kdc == NULL) 954 return (0); 955 return (kdc->kdc_dumpkeysize); 956 #else 957 return (0); 958 #endif 959 } 960 961 /* Registration of dumpers */ 962 int 963 set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, 964 uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, 965 const uint8_t *encryptedkey) 966 { 967 size_t wantcopy; 968 int error; 969 970 error = priv_check(td, PRIV_SETDUMPER); 971 if (error != 0) 972 return (error); 973 974 if (di == NULL) { 975 error = 0; 976 goto cleanup; 977 } 978 if (dumper.dumper != NULL) 979 return (EBUSY); 980 dumper = *di; 981 dumper.blockbuf = NULL; 982 dumper.kdc = NULL; 983 984 if (encryption != KERNELDUMP_ENC_NONE) { 985 #ifdef EKCD 986 dumper.kdc = kerneldumpcrypto_create(di->blocksize, encryption, 987 key, encryptedkeysize, encryptedkey); 988 if (dumper.kdc == NULL) { 989 error = EINVAL; 990 goto cleanup; 991 } 992 #else 993 error = EOPNOTSUPP; 994 goto cleanup; 995 #endif 996 } 997 998 wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); 999 if (wantcopy >= sizeof(dumpdevname)) { 1000 printf("set_dumper: device name truncated from '%s' -> '%s'\n", 1001 devname, dumpdevname); 1002 } 1003 1004 dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1005 return (0); 1006 cleanup: 1007 #ifdef EKCD 1008 if (dumper.kdc != NULL) { 1009 explicit_bzero(dumper.kdc, sizeof(*dumper.kdc) + 1010 dumper.kdc->kdc_dumpkeysize); 1011 free(dumper.kdc, M_EKCD); 1012 } 1013 #endif 1014 if (dumper.blockbuf != NULL) { 1015 explicit_bzero(dumper.blockbuf, dumper.blocksize); 1016 free(dumper.blockbuf, M_DUMPER); 1017 } 1018 explicit_bzero(&dumper, sizeof(dumper)); 1019 dumpdevname[0] = '\0'; 1020 return (error); 1021 } 1022 1023 static int 1024 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1025 { 1026 1027 if (length != 0 && (offset < di->mediaoffset || 1028 offset - di->mediaoffset + length > di->mediasize)) { 1029 printf("Attempt to write outside dump device boundaries.\n" 1030 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1031 (intmax_t)offset, (intmax_t)di->mediaoffset, 1032 (uintmax_t)length, (intmax_t)di->mediasize); 1033 return (ENOSPC); 1034 } 1035 1036 return (0); 1037 } 1038 1039 #ifdef EKCD 1040 static int 1041 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1042 { 1043 1044 switch (kdc->kdc_encryption) { 1045 case KERNELDUMP_ENC_AES_256_CBC: 1046 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1047 8 * size, buf) <= 0) { 1048 return (EIO); 1049 } 1050 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1051 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1052 return (EIO); 1053 } 1054 break; 1055 default: 1056 return (EINVAL); 1057 } 1058 1059 return (0); 1060 } 1061 1062 /* Encrypt data and call dumper. */ 1063 static int 1064 dump_encrypted_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1065 off_t offset, size_t length) 1066 { 1067 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1068 struct kerneldumpcrypto *kdc; 1069 int error; 1070 size_t nbytes; 1071 off_t nextoffset; 1072 1073 kdc = di->kdc; 1074 1075 error = dump_check_bounds(di, offset, length); 1076 if (error != 0) 1077 return (error); 1078 1079 /* Signal completion. */ 1080 if (virtual == NULL && physical == 0 && offset == 0 && length == 0) { 1081 return (di->dumper(di->priv, virtual, physical, offset, 1082 length)); 1083 } 1084 1085 /* Data have to be aligned to block size. */ 1086 if ((length % di->blocksize) != 0) 1087 return (EINVAL); 1088 1089 /* 1090 * Data have to be written continuously becase we're encrypting using 1091 * CBC mode which has this assumption. 1092 */ 1093 if (kdc->kdc_nextoffset != 0 && kdc->kdc_nextoffset != offset) 1094 return (EINVAL); 1095 1096 nextoffset = offset + (off_t)length; 1097 1098 while (length > 0) { 1099 nbytes = MIN(length, sizeof(buf)); 1100 bcopy(virtual, buf, nbytes); 1101 1102 if (dump_encrypt(kdc, buf, nbytes) != 0) 1103 return (EIO); 1104 1105 error = di->dumper(di->priv, buf, physical, offset, nbytes); 1106 if (error != 0) 1107 return (error); 1108 1109 offset += nbytes; 1110 virtual = (void *)((uint8_t *)virtual + nbytes); 1111 length -= nbytes; 1112 } 1113 1114 kdc->kdc_nextoffset = nextoffset; 1115 1116 return (0); 1117 } 1118 #endif /* EKCD */ 1119 1120 /* Call dumper with bounds checking. */ 1121 static int 1122 dump_raw_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1123 off_t offset, size_t length) 1124 { 1125 int error; 1126 1127 error = dump_check_bounds(di, offset, length); 1128 if (error != 0) 1129 return (error); 1130 1131 return (di->dumper(di->priv, virtual, physical, offset, length)); 1132 } 1133 1134 int 1135 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1136 off_t offset, size_t length) 1137 { 1138 1139 #ifdef EKCD 1140 if (di->kdc != NULL) { 1141 return (dump_encrypted_write(di, virtual, physical, offset, 1142 length)); 1143 } 1144 #endif 1145 1146 return (dump_raw_write(di, virtual, physical, offset, length)); 1147 } 1148 1149 static int 1150 dump_pad(struct dumperinfo *di, void *virtual, size_t length, void **buf, 1151 size_t *size) 1152 { 1153 1154 if (length > di->blocksize) 1155 return (ENOMEM); 1156 1157 *size = di->blocksize; 1158 if (length == di->blocksize) { 1159 *buf = virtual; 1160 } else { 1161 *buf = di->blockbuf; 1162 memcpy(*buf, virtual, length); 1163 memset((uint8_t *)*buf + length, 0, di->blocksize - length); 1164 } 1165 1166 return (0); 1167 } 1168 1169 static int 1170 dump_raw_write_pad(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1171 off_t offset, size_t length, size_t *size) 1172 { 1173 void *buf; 1174 int error; 1175 1176 error = dump_pad(di, virtual, length, &buf, size); 1177 if (error != 0) 1178 return (error); 1179 1180 return (dump_raw_write(di, buf, physical, offset, *size)); 1181 } 1182 1183 int 1184 dump_write_pad(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1185 off_t offset, size_t length, size_t *size) 1186 { 1187 void *buf; 1188 int error; 1189 1190 error = dump_pad(di, virtual, length, &buf, size); 1191 if (error != 0) 1192 return (error); 1193 1194 return (dump_write(di, buf, physical, offset, *size)); 1195 } 1196 1197 int 1198 dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, 1199 vm_offset_t physical, off_t offset) 1200 { 1201 size_t size; 1202 int ret; 1203 1204 ret = dump_raw_write_pad(di, kdh, physical, offset, sizeof(*kdh), 1205 &size); 1206 if (ret == 0 && size != di->blocksize) 1207 ret = EINVAL; 1208 return (ret); 1209 } 1210 1211 int 1212 dump_write_key(struct dumperinfo *di, vm_offset_t physical, off_t offset) 1213 { 1214 #ifndef EKCD 1215 return (0); 1216 #else /* EKCD */ 1217 struct kerneldumpcrypto *kdc; 1218 1219 kdc = di->kdc; 1220 if (kdc == NULL) 1221 return (0); 1222 1223 return (dump_raw_write(di, kdc->kdc_dumpkey, physical, offset, 1224 kdc->kdc_dumpkeysize)); 1225 #endif /* !EKCD */ 1226 } 1227 1228 void 1229 mkdumpheader(struct kerneldumpheader *kdh, char *magic, uint32_t archver, 1230 uint64_t dumplen, uint32_t dumpkeysize, uint32_t blksz) 1231 { 1232 1233 bzero(kdh, sizeof(*kdh)); 1234 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1235 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1236 kdh->version = htod32(KERNELDUMPVERSION); 1237 kdh->architectureversion = htod32(archver); 1238 kdh->dumplength = htod64(dumplen); 1239 kdh->dumptime = htod64(time_second); 1240 kdh->dumpkeysize = htod32(dumpkeysize); 1241 kdh->blocksize = htod32(blksz); 1242 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1243 strlcpy(kdh->versionstring, version, sizeof(kdh->versionstring)); 1244 if (panicstr != NULL) 1245 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1246 kdh->parity = kerneldump_parity(kdh); 1247 } 1248 1249 #ifdef DDB 1250 DB_SHOW_COMMAND(panic, db_show_panic) 1251 { 1252 1253 if (panicstr == NULL) 1254 db_printf("panicstr not set\n"); 1255 else 1256 db_printf("panic: %s\n", panicstr); 1257 } 1258 #endif 1259