1 /*- 2 * Copyright (c) 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_ddb.h" 41 #include "opt_ekcd.h" 42 #include "opt_kdb.h" 43 #include "opt_panic.h" 44 #include "opt_sched.h" 45 #include "opt_watchdog.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/bio.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/cons.h> 53 #include <sys/eventhandler.h> 54 #include <sys/filedesc.h> 55 #include <sys/jail.h> 56 #include <sys/kdb.h> 57 #include <sys/kernel.h> 58 #include <sys/kerneldump.h> 59 #include <sys/kthread.h> 60 #include <sys/ktr.h> 61 #include <sys/malloc.h> 62 #include <sys/mount.h> 63 #include <sys/priv.h> 64 #include <sys/proc.h> 65 #include <sys/reboot.h> 66 #include <sys/resourcevar.h> 67 #include <sys/rwlock.h> 68 #include <sys/sched.h> 69 #include <sys/smp.h> 70 #include <sys/sysctl.h> 71 #include <sys/sysproto.h> 72 #include <sys/vnode.h> 73 #include <sys/watchdog.h> 74 75 #include <crypto/rijndael/rijndael-api-fst.h> 76 #include <crypto/sha2/sha256.h> 77 78 #include <ddb/ddb.h> 79 80 #include <machine/cpu.h> 81 #include <machine/dump.h> 82 #include <machine/pcb.h> 83 #include <machine/smp.h> 84 85 #include <security/mac/mac_framework.h> 86 87 #include <vm/vm.h> 88 #include <vm/vm_object.h> 89 #include <vm/vm_page.h> 90 #include <vm/vm_pager.h> 91 #include <vm/swap_pager.h> 92 93 #include <sys/signalvar.h> 94 95 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 96 97 #ifndef PANIC_REBOOT_WAIT_TIME 98 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 99 #endif 100 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 101 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 102 &panic_reboot_wait_time, 0, 103 "Seconds to wait before rebooting after a panic"); 104 105 /* 106 * Note that stdarg.h and the ANSI style va_start macro is used for both 107 * ANSI and traditional C compilers. 108 */ 109 #include <machine/stdarg.h> 110 111 #ifdef KDB 112 #ifdef KDB_UNATTENDED 113 int debugger_on_panic = 0; 114 #else 115 int debugger_on_panic = 1; 116 #endif 117 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 118 CTLFLAG_RWTUN | CTLFLAG_SECURE, 119 &debugger_on_panic, 0, "Run debugger on kernel panic"); 120 121 #ifdef KDB_TRACE 122 static int trace_on_panic = 1; 123 #else 124 static int trace_on_panic = 0; 125 #endif 126 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 127 CTLFLAG_RWTUN | CTLFLAG_SECURE, 128 &trace_on_panic, 0, "Print stack trace on kernel panic"); 129 #endif /* KDB */ 130 131 static int sync_on_panic = 0; 132 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 133 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 134 135 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, 136 "Shutdown environment"); 137 138 #ifndef DIAGNOSTIC 139 static int show_busybufs; 140 #else 141 static int show_busybufs = 1; 142 #endif 143 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 144 &show_busybufs, 0, ""); 145 146 int suspend_blocked = 0; 147 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 148 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 149 150 #ifdef EKCD 151 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 152 153 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 154 155 struct kerneldumpcrypto { 156 uint8_t kdc_encryption; 157 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 158 keyInstance kdc_ki; 159 cipherInstance kdc_ci; 160 off_t kdc_nextoffset; 161 uint32_t kdc_dumpkeysize; 162 struct kerneldumpkey kdc_dumpkey[]; 163 }; 164 #endif 165 166 /* 167 * Variable panicstr contains argument to first call to panic; used as flag 168 * to indicate that the kernel has already called panic. 169 */ 170 const char *panicstr; 171 172 int dumping; /* system is dumping */ 173 int rebooting; /* system is rebooting */ 174 static struct dumperinfo dumper; /* our selected dumper */ 175 176 /* Context information for dump-debuggers. */ 177 static struct pcb dumppcb; /* Registers. */ 178 lwpid_t dumptid; /* Thread ID. */ 179 180 static struct cdevsw reroot_cdevsw = { 181 .d_version = D_VERSION, 182 .d_name = "reroot", 183 }; 184 185 static void poweroff_wait(void *, int); 186 static void shutdown_halt(void *junk, int howto); 187 static void shutdown_panic(void *junk, int howto); 188 static void shutdown_reset(void *junk, int howto); 189 static int kern_reroot(void); 190 191 /* register various local shutdown events */ 192 static void 193 shutdown_conf(void *unused) 194 { 195 196 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 197 SHUTDOWN_PRI_FIRST); 198 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 199 SHUTDOWN_PRI_LAST + 100); 200 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 201 SHUTDOWN_PRI_LAST + 100); 202 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 203 SHUTDOWN_PRI_LAST + 200); 204 } 205 206 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 207 208 /* 209 * The only reason this exists is to create the /dev/reroot/ directory, 210 * used by reroot code in init(8) as a mountpoint for tmpfs. 211 */ 212 static void 213 reroot_conf(void *unused) 214 { 215 int error; 216 struct cdev *cdev; 217 218 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 219 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 220 if (error != 0) { 221 printf("%s: failed to create device node, error %d", 222 __func__, error); 223 } 224 } 225 226 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 227 228 /* 229 * The system call that results in a reboot. 230 */ 231 /* ARGSUSED */ 232 int 233 sys_reboot(struct thread *td, struct reboot_args *uap) 234 { 235 int error; 236 237 error = 0; 238 #ifdef MAC 239 error = mac_system_check_reboot(td->td_ucred, uap->opt); 240 #endif 241 if (error == 0) 242 error = priv_check(td, PRIV_REBOOT); 243 if (error == 0) { 244 if (uap->opt & RB_REROOT) { 245 error = kern_reroot(); 246 } else { 247 mtx_lock(&Giant); 248 kern_reboot(uap->opt); 249 mtx_unlock(&Giant); 250 } 251 } 252 return (error); 253 } 254 255 /* 256 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 257 */ 258 void 259 shutdown_nice(int howto) 260 { 261 262 if (initproc != NULL) { 263 /* Send a signal to init(8) and have it shutdown the world. */ 264 PROC_LOCK(initproc); 265 if (howto & RB_POWEROFF) 266 kern_psignal(initproc, SIGUSR2); 267 else if (howto & RB_HALT) 268 kern_psignal(initproc, SIGUSR1); 269 else 270 kern_psignal(initproc, SIGINT); 271 PROC_UNLOCK(initproc); 272 } else { 273 /* No init(8) running, so simply reboot. */ 274 kern_reboot(howto | RB_NOSYNC); 275 } 276 } 277 278 static void 279 print_uptime(void) 280 { 281 int f; 282 struct timespec ts; 283 284 getnanouptime(&ts); 285 printf("Uptime: "); 286 f = 0; 287 if (ts.tv_sec >= 86400) { 288 printf("%ldd", (long)ts.tv_sec / 86400); 289 ts.tv_sec %= 86400; 290 f = 1; 291 } 292 if (f || ts.tv_sec >= 3600) { 293 printf("%ldh", (long)ts.tv_sec / 3600); 294 ts.tv_sec %= 3600; 295 f = 1; 296 } 297 if (f || ts.tv_sec >= 60) { 298 printf("%ldm", (long)ts.tv_sec / 60); 299 ts.tv_sec %= 60; 300 f = 1; 301 } 302 printf("%lds\n", (long)ts.tv_sec); 303 } 304 305 int 306 doadump(boolean_t textdump) 307 { 308 boolean_t coredump; 309 int error; 310 311 error = 0; 312 if (dumping) 313 return (EBUSY); 314 if (dumper.dumper == NULL) 315 return (ENXIO); 316 317 savectx(&dumppcb); 318 dumptid = curthread->td_tid; 319 dumping++; 320 321 coredump = TRUE; 322 #ifdef DDB 323 if (textdump && textdump_pending) { 324 coredump = FALSE; 325 textdump_dumpsys(&dumper); 326 } 327 #endif 328 if (coredump) 329 error = dumpsys(&dumper); 330 331 dumping--; 332 return (error); 333 } 334 335 /* 336 * Shutdown the system cleanly to prepare for reboot, halt, or power off. 337 */ 338 void 339 kern_reboot(int howto) 340 { 341 static int once = 0; 342 343 #if defined(SMP) 344 /* 345 * Bind us to CPU 0 so that all shutdown code runs there. Some 346 * systems don't shutdown properly (i.e., ACPI power off) if we 347 * run on another processor. 348 */ 349 if (!SCHEDULER_STOPPED()) { 350 thread_lock(curthread); 351 sched_bind(curthread, 0); 352 thread_unlock(curthread); 353 KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); 354 } 355 #endif 356 /* We're in the process of rebooting. */ 357 rebooting = 1; 358 359 /* We are out of the debugger now. */ 360 kdb_active = 0; 361 362 /* 363 * Do any callouts that should be done BEFORE syncing the filesystems. 364 */ 365 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 366 367 /* 368 * Now sync filesystems 369 */ 370 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 371 once = 1; 372 bufshutdown(show_busybufs); 373 } 374 375 print_uptime(); 376 377 cngrab(); 378 379 /* 380 * Ok, now do things that assume all filesystem activity has 381 * been completed. 382 */ 383 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 384 385 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 386 doadump(TRUE); 387 388 /* Now that we're going to really halt the system... */ 389 EVENTHANDLER_INVOKE(shutdown_final, howto); 390 391 for(;;) ; /* safety against shutdown_reset not working */ 392 /* NOTREACHED */ 393 } 394 395 /* 396 * The system call that results in changing the rootfs. 397 */ 398 static int 399 kern_reroot(void) 400 { 401 struct vnode *oldrootvnode, *vp; 402 struct mount *mp, *devmp; 403 int error; 404 405 if (curproc != initproc) 406 return (EPERM); 407 408 /* 409 * Mark the filesystem containing currently-running executable 410 * (the temporary copy of init(8)) busy. 411 */ 412 vp = curproc->p_textvp; 413 error = vn_lock(vp, LK_SHARED); 414 if (error != 0) 415 return (error); 416 mp = vp->v_mount; 417 error = vfs_busy(mp, MBF_NOWAIT); 418 if (error != 0) { 419 vfs_ref(mp); 420 VOP_UNLOCK(vp, 0); 421 error = vfs_busy(mp, 0); 422 vn_lock(vp, LK_SHARED | LK_RETRY); 423 vfs_rel(mp); 424 if (error != 0) { 425 VOP_UNLOCK(vp, 0); 426 return (ENOENT); 427 } 428 if (vp->v_iflag & VI_DOOMED) { 429 VOP_UNLOCK(vp, 0); 430 vfs_unbusy(mp); 431 return (ENOENT); 432 } 433 } 434 VOP_UNLOCK(vp, 0); 435 436 /* 437 * Remove the filesystem containing currently-running executable 438 * from the mount list, to prevent it from being unmounted 439 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 440 * 441 * Also preserve /dev - forcibly unmounting it could cause driver 442 * reinitialization. 443 */ 444 445 vfs_ref(rootdevmp); 446 devmp = rootdevmp; 447 rootdevmp = NULL; 448 449 mtx_lock(&mountlist_mtx); 450 TAILQ_REMOVE(&mountlist, mp, mnt_list); 451 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 452 mtx_unlock(&mountlist_mtx); 453 454 oldrootvnode = rootvnode; 455 456 /* 457 * Unmount everything except for the two filesystems preserved above. 458 */ 459 vfs_unmountall(); 460 461 /* 462 * Add /dev back; vfs_mountroot() will move it into its new place. 463 */ 464 mtx_lock(&mountlist_mtx); 465 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 466 mtx_unlock(&mountlist_mtx); 467 rootdevmp = devmp; 468 vfs_rel(rootdevmp); 469 470 /* 471 * Mount the new rootfs. 472 */ 473 vfs_mountroot(); 474 475 /* 476 * Update all references to the old rootvnode. 477 */ 478 mountcheckdirs(oldrootvnode, rootvnode); 479 480 /* 481 * Add the temporary filesystem back and unbusy it. 482 */ 483 mtx_lock(&mountlist_mtx); 484 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 485 mtx_unlock(&mountlist_mtx); 486 vfs_unbusy(mp); 487 488 return (0); 489 } 490 491 /* 492 * If the shutdown was a clean halt, behave accordingly. 493 */ 494 static void 495 shutdown_halt(void *junk, int howto) 496 { 497 498 if (howto & RB_HALT) { 499 printf("\n"); 500 printf("The operating system has halted.\n"); 501 printf("Please press any key to reboot.\n\n"); 502 switch (cngetc()) { 503 case -1: /* No console, just die */ 504 cpu_halt(); 505 /* NOTREACHED */ 506 default: 507 howto &= ~RB_HALT; 508 break; 509 } 510 } 511 } 512 513 /* 514 * Check to see if the system paniced, pause and then reboot 515 * according to the specified delay. 516 */ 517 static void 518 shutdown_panic(void *junk, int howto) 519 { 520 int loop; 521 522 if (howto & RB_DUMP) { 523 if (panic_reboot_wait_time != 0) { 524 if (panic_reboot_wait_time != -1) { 525 printf("Automatic reboot in %d seconds - " 526 "press a key on the console to abort\n", 527 panic_reboot_wait_time); 528 for (loop = panic_reboot_wait_time * 10; 529 loop > 0; --loop) { 530 DELAY(1000 * 100); /* 1/10th second */ 531 /* Did user type a key? */ 532 if (cncheckc() != -1) 533 break; 534 } 535 if (!loop) 536 return; 537 } 538 } else { /* zero time specified - reboot NOW */ 539 return; 540 } 541 printf("--> Press a key on the console to reboot,\n"); 542 printf("--> or switch off the system now.\n"); 543 cngetc(); 544 } 545 } 546 547 /* 548 * Everything done, now reset 549 */ 550 static void 551 shutdown_reset(void *junk, int howto) 552 { 553 554 printf("Rebooting...\n"); 555 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 556 557 /* 558 * Acquiring smp_ipi_mtx here has a double effect: 559 * - it disables interrupts avoiding CPU0 preemption 560 * by fast handlers (thus deadlocking against other CPUs) 561 * - it avoids deadlocks against smp_rendezvous() or, more 562 * generally, threads busy-waiting, with this spinlock held, 563 * and waiting for responses by threads on other CPUs 564 * (ie. smp_tlb_shootdown()). 565 * 566 * For the !SMP case it just needs to handle the former problem. 567 */ 568 #ifdef SMP 569 mtx_lock_spin(&smp_ipi_mtx); 570 #else 571 spinlock_enter(); 572 #endif 573 574 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 575 cpu_reset(); 576 /* NOTREACHED */ /* assuming reset worked */ 577 } 578 579 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 580 static int kassert_warn_only = 0; 581 #ifdef KDB 582 static int kassert_do_kdb = 0; 583 #endif 584 #ifdef KTR 585 static int kassert_do_ktr = 0; 586 #endif 587 static int kassert_do_log = 1; 588 static int kassert_log_pps_limit = 4; 589 static int kassert_log_mute_at = 0; 590 static int kassert_log_panic_at = 0; 591 static int kassert_warnings = 0; 592 593 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); 594 595 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, CTLFLAG_RWTUN, 596 &kassert_warn_only, 0, 597 "KASSERT triggers a panic (1) or just a warning (0)"); 598 599 #ifdef KDB 600 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, CTLFLAG_RWTUN, 601 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 602 #endif 603 604 #ifdef KTR 605 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, CTLFLAG_RWTUN, 606 &kassert_do_ktr, 0, 607 "KASSERT does a KTR, set this to the KTRMASK you want"); 608 #endif 609 610 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, CTLFLAG_RWTUN, 611 &kassert_do_log, 0, "KASSERT triggers a panic (1) or just a warning (0)"); 612 613 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RWTUN, 614 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 615 616 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, CTLFLAG_RWTUN, 617 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 618 619 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, CTLFLAG_RWTUN, 620 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 621 622 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, CTLFLAG_RWTUN, 623 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 624 625 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 626 627 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 628 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, 629 kassert_sysctl_kassert, "I", "set to trigger a test kassert"); 630 631 static int 632 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 633 { 634 int error, i; 635 636 error = sysctl_wire_old_buffer(req, sizeof(int)); 637 if (error == 0) { 638 i = 0; 639 error = sysctl_handle_int(oidp, &i, 0, req); 640 } 641 if (error != 0 || req->newptr == NULL) 642 return (error); 643 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 644 return (0); 645 } 646 647 /* 648 * Called by KASSERT, this decides if we will panic 649 * or if we will log via printf and/or ktr. 650 */ 651 void 652 kassert_panic(const char *fmt, ...) 653 { 654 static char buf[256]; 655 va_list ap; 656 657 va_start(ap, fmt); 658 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 659 va_end(ap); 660 661 /* 662 * panic if we're not just warning, or if we've exceeded 663 * kassert_log_panic_at warnings. 664 */ 665 if (!kassert_warn_only || 666 (kassert_log_panic_at > 0 && 667 kassert_warnings >= kassert_log_panic_at)) { 668 va_start(ap, fmt); 669 vpanic(fmt, ap); 670 /* NORETURN */ 671 } 672 #ifdef KTR 673 if (kassert_do_ktr) 674 CTR0(ktr_mask, buf); 675 #endif /* KTR */ 676 /* 677 * log if we've not yet met the mute limit. 678 */ 679 if (kassert_do_log && 680 (kassert_log_mute_at == 0 || 681 kassert_warnings < kassert_log_mute_at)) { 682 static struct timeval lasterr; 683 static int curerr; 684 685 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 686 printf("KASSERT failed: %s\n", buf); 687 kdb_backtrace(); 688 } 689 } 690 #ifdef KDB 691 if (kassert_do_kdb) { 692 kdb_enter(KDB_WHY_KASSERT, buf); 693 } 694 #endif 695 atomic_add_int(&kassert_warnings, 1); 696 } 697 #endif 698 699 /* 700 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 701 * and then reboots. If we are called twice, then we avoid trying to sync 702 * the disks as this often leads to recursive panics. 703 */ 704 void 705 panic(const char *fmt, ...) 706 { 707 va_list ap; 708 709 va_start(ap, fmt); 710 vpanic(fmt, ap); 711 } 712 713 void 714 vpanic(const char *fmt, va_list ap) 715 { 716 #ifdef SMP 717 cpuset_t other_cpus; 718 #endif 719 struct thread *td = curthread; 720 int bootopt, newpanic; 721 static char buf[256]; 722 723 spinlock_enter(); 724 725 #ifdef SMP 726 /* 727 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 728 * concurrently entering panic. Only the winner will proceed 729 * further. 730 */ 731 if (panicstr == NULL && !kdb_active) { 732 other_cpus = all_cpus; 733 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 734 stop_cpus_hard(other_cpus); 735 } 736 #endif 737 738 /* 739 * Ensure that the scheduler is stopped while panicking, even if panic 740 * has been entered from kdb. 741 */ 742 td->td_stopsched = 1; 743 744 bootopt = RB_AUTOBOOT; 745 newpanic = 0; 746 if (panicstr) 747 bootopt |= RB_NOSYNC; 748 else { 749 bootopt |= RB_DUMP; 750 panicstr = fmt; 751 newpanic = 1; 752 } 753 754 if (newpanic) { 755 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 756 panicstr = buf; 757 cngrab(); 758 printf("panic: %s\n", buf); 759 } else { 760 printf("panic: "); 761 vprintf(fmt, ap); 762 printf("\n"); 763 } 764 #ifdef SMP 765 printf("cpuid = %d\n", PCPU_GET(cpuid)); 766 #endif 767 printf("time = %jd\n", (intmax_t )time_second); 768 #ifdef KDB 769 if (newpanic && trace_on_panic) 770 kdb_backtrace(); 771 if (debugger_on_panic) 772 kdb_enter(KDB_WHY_PANIC, "panic"); 773 #endif 774 /*thread_lock(td); */ 775 td->td_flags |= TDF_INPANIC; 776 /* thread_unlock(td); */ 777 if (!sync_on_panic) 778 bootopt |= RB_NOSYNC; 779 kern_reboot(bootopt); 780 } 781 782 /* 783 * Support for poweroff delay. 784 * 785 * Please note that setting this delay too short might power off your machine 786 * before the write cache on your hard disk has been flushed, leading to 787 * soft-updates inconsistencies. 788 */ 789 #ifndef POWEROFF_DELAY 790 # define POWEROFF_DELAY 5000 791 #endif 792 static int poweroff_delay = POWEROFF_DELAY; 793 794 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 795 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 796 797 static void 798 poweroff_wait(void *junk, int howto) 799 { 800 801 if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) 802 return; 803 DELAY(poweroff_delay * 1000); 804 } 805 806 /* 807 * Some system processes (e.g. syncer) need to be stopped at appropriate 808 * points in their main loops prior to a system shutdown, so that they 809 * won't interfere with the shutdown process (e.g. by holding a disk buf 810 * to cause sync to fail). For each of these system processes, register 811 * shutdown_kproc() as a handler for one of shutdown events. 812 */ 813 static int kproc_shutdown_wait = 60; 814 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 815 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 816 817 void 818 kproc_shutdown(void *arg, int howto) 819 { 820 struct proc *p; 821 int error; 822 823 if (panicstr) 824 return; 825 826 p = (struct proc *)arg; 827 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 828 kproc_shutdown_wait, p->p_comm); 829 error = kproc_suspend(p, kproc_shutdown_wait * hz); 830 831 if (error == EWOULDBLOCK) 832 printf("timed out\n"); 833 else 834 printf("done\n"); 835 } 836 837 void 838 kthread_shutdown(void *arg, int howto) 839 { 840 struct thread *td; 841 int error; 842 843 if (panicstr) 844 return; 845 846 td = (struct thread *)arg; 847 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 848 kproc_shutdown_wait, td->td_name); 849 error = kthread_suspend(td, kproc_shutdown_wait * hz); 850 851 if (error == EWOULDBLOCK) 852 printf("timed out\n"); 853 else 854 printf("done\n"); 855 } 856 857 static char dumpdevname[sizeof(((struct cdev*)NULL)->si_name)]; 858 SYSCTL_STRING(_kern_shutdown, OID_AUTO, dumpdevname, CTLFLAG_RD, 859 dumpdevname, 0, "Device for kernel dumps"); 860 861 #ifdef EKCD 862 static struct kerneldumpcrypto * 863 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 864 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 865 { 866 struct kerneldumpcrypto *kdc; 867 struct kerneldumpkey *kdk; 868 uint32_t dumpkeysize; 869 870 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 871 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 872 873 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 874 875 kdc->kdc_encryption = encryption; 876 switch (kdc->kdc_encryption) { 877 case KERNELDUMP_ENC_AES_256_CBC: 878 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 879 goto failed; 880 break; 881 default: 882 goto failed; 883 } 884 885 kdc->kdc_dumpkeysize = dumpkeysize; 886 kdk = kdc->kdc_dumpkey; 887 kdk->kdk_encryption = kdc->kdc_encryption; 888 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 889 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 890 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 891 892 return (kdc); 893 failed: 894 explicit_bzero(kdc, sizeof(*kdc) + dumpkeysize); 895 free(kdc, M_EKCD); 896 return (NULL); 897 } 898 899 static int 900 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 901 { 902 uint8_t hash[SHA256_DIGEST_LENGTH]; 903 SHA256_CTX ctx; 904 struct kerneldumpkey *kdk; 905 int error; 906 907 error = 0; 908 909 if (kdc == NULL) 910 return (0); 911 912 /* 913 * When a user enters ddb it can write a crash dump multiple times. 914 * Each time it should be encrypted using a different IV. 915 */ 916 SHA256_Init(&ctx); 917 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 918 SHA256_Final(hash, &ctx); 919 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 920 921 switch (kdc->kdc_encryption) { 922 case KERNELDUMP_ENC_AES_256_CBC: 923 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 924 kdc->kdc_iv) <= 0) { 925 error = EINVAL; 926 goto out; 927 } 928 break; 929 default: 930 error = EINVAL; 931 goto out; 932 } 933 934 kdc->kdc_nextoffset = 0; 935 936 kdk = kdc->kdc_dumpkey; 937 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 938 out: 939 explicit_bzero(hash, sizeof(hash)); 940 return (error); 941 } 942 943 static uint32_t 944 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 945 { 946 947 if (kdc == NULL) 948 return (0); 949 return (kdc->kdc_dumpkeysize); 950 } 951 #endif /* EKCD */ 952 953 /* Registration of dumpers */ 954 int 955 set_dumper(struct dumperinfo *di, const char *devname, struct thread *td, 956 uint8_t encryption, const uint8_t *key, uint32_t encryptedkeysize, 957 const uint8_t *encryptedkey) 958 { 959 size_t wantcopy; 960 int error; 961 962 error = priv_check(td, PRIV_SETDUMPER); 963 if (error != 0) 964 return (error); 965 966 if (di == NULL) { 967 error = 0; 968 goto cleanup; 969 } 970 if (dumper.dumper != NULL) 971 return (EBUSY); 972 dumper = *di; 973 dumper.blockbuf = NULL; 974 dumper.kdc = NULL; 975 976 if (encryption != KERNELDUMP_ENC_NONE) { 977 #ifdef EKCD 978 dumper.kdc = kerneldumpcrypto_create(di->blocksize, encryption, 979 key, encryptedkeysize, encryptedkey); 980 if (dumper.kdc == NULL) { 981 error = EINVAL; 982 goto cleanup; 983 } 984 #else 985 error = EOPNOTSUPP; 986 goto cleanup; 987 #endif 988 } 989 990 wantcopy = strlcpy(dumpdevname, devname, sizeof(dumpdevname)); 991 if (wantcopy >= sizeof(dumpdevname)) { 992 printf("set_dumper: device name truncated from '%s' -> '%s'\n", 993 devname, dumpdevname); 994 } 995 996 dumper.blockbuf = malloc(di->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 997 return (0); 998 cleanup: 999 #ifdef EKCD 1000 if (dumper.kdc != NULL) { 1001 explicit_bzero(dumper.kdc, sizeof(*dumper.kdc) + 1002 dumper.kdc->kdc_dumpkeysize); 1003 free(dumper.kdc, M_EKCD); 1004 } 1005 #endif 1006 if (dumper.blockbuf != NULL) { 1007 explicit_bzero(dumper.blockbuf, dumper.blocksize); 1008 free(dumper.blockbuf, M_DUMPER); 1009 } 1010 explicit_bzero(&dumper, sizeof(dumper)); 1011 dumpdevname[0] = '\0'; 1012 return (error); 1013 } 1014 1015 static int 1016 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1017 { 1018 1019 if (length != 0 && (offset < di->mediaoffset || 1020 offset - di->mediaoffset + length > di->mediasize)) { 1021 printf("Attempt to write outside dump device boundaries.\n" 1022 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1023 (intmax_t)offset, (intmax_t)di->mediaoffset, 1024 (uintmax_t)length, (intmax_t)di->mediasize); 1025 return (ENOSPC); 1026 } 1027 1028 return (0); 1029 } 1030 1031 /* Call dumper with bounds checking. */ 1032 static int 1033 dump_raw_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1034 off_t offset, size_t length) 1035 { 1036 int error; 1037 1038 error = dump_check_bounds(di, offset, length); 1039 if (error != 0) 1040 return (error); 1041 1042 return (di->dumper(di->priv, virtual, physical, offset, length)); 1043 } 1044 1045 #ifdef EKCD 1046 static int 1047 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1048 { 1049 1050 switch (kdc->kdc_encryption) { 1051 case KERNELDUMP_ENC_AES_256_CBC: 1052 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1053 8 * size, buf) <= 0) { 1054 return (EIO); 1055 } 1056 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1057 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1058 return (EIO); 1059 } 1060 break; 1061 default: 1062 return (EINVAL); 1063 } 1064 1065 return (0); 1066 } 1067 1068 /* Encrypt data and call dumper. */ 1069 static int 1070 dump_encrypted_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1071 off_t offset, size_t length) 1072 { 1073 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1074 struct kerneldumpcrypto *kdc; 1075 int error; 1076 size_t nbytes; 1077 off_t nextoffset; 1078 1079 kdc = di->kdc; 1080 1081 error = dump_check_bounds(di, offset, length); 1082 if (error != 0) 1083 return (error); 1084 1085 /* Signal completion. */ 1086 if (virtual == NULL && physical == 0 && offset == 0 && length == 0) { 1087 return (di->dumper(di->priv, virtual, physical, offset, 1088 length)); 1089 } 1090 1091 /* Data have to be aligned to block size. */ 1092 if ((length % di->blocksize) != 0) 1093 return (EINVAL); 1094 1095 /* 1096 * Data have to be written continuously becase we're encrypting using 1097 * CBC mode which has this assumption. 1098 */ 1099 if (kdc->kdc_nextoffset != 0 && kdc->kdc_nextoffset != offset) 1100 return (EINVAL); 1101 1102 nextoffset = offset + (off_t)length; 1103 1104 while (length > 0) { 1105 nbytes = MIN(length, sizeof(buf)); 1106 bcopy(virtual, buf, nbytes); 1107 1108 if (dump_encrypt(kdc, buf, nbytes) != 0) 1109 return (EIO); 1110 1111 error = di->dumper(di->priv, buf, physical, offset, nbytes); 1112 if (error != 0) 1113 return (error); 1114 1115 offset += nbytes; 1116 virtual = (void *)((uint8_t *)virtual + nbytes); 1117 length -= nbytes; 1118 } 1119 1120 kdc->kdc_nextoffset = nextoffset; 1121 1122 return (0); 1123 } 1124 1125 static int 1126 dump_write_key(struct dumperinfo *di, vm_offset_t physical, off_t offset) 1127 { 1128 struct kerneldumpcrypto *kdc; 1129 1130 kdc = di->kdc; 1131 if (kdc == NULL) 1132 return (0); 1133 1134 return (dump_raw_write(di, kdc->kdc_dumpkey, physical, offset, 1135 kdc->kdc_dumpkeysize)); 1136 } 1137 #endif /* EKCD */ 1138 1139 int 1140 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1141 off_t offset, size_t length) 1142 { 1143 1144 #ifdef EKCD 1145 if (di->kdc != NULL) { 1146 return (dump_encrypted_write(di, virtual, physical, offset, 1147 length)); 1148 } 1149 #endif 1150 1151 return (dump_raw_write(di, virtual, physical, offset, length)); 1152 } 1153 1154 static int 1155 dump_write_header(struct dumperinfo *di, struct kerneldumpheader *kdh, 1156 vm_offset_t physical, off_t offset) 1157 { 1158 void *buf; 1159 size_t hdrsz; 1160 1161 hdrsz = sizeof(*kdh); 1162 if (hdrsz > di->blocksize) 1163 return (ENOMEM); 1164 1165 if (hdrsz == di->blocksize) 1166 buf = kdh; 1167 else { 1168 buf = di->blockbuf; 1169 memset(buf, 0, di->blocksize); 1170 memcpy(buf, kdh, hdrsz); 1171 } 1172 1173 return (dump_raw_write(di, buf, physical, offset, di->blocksize)); 1174 } 1175 1176 /* 1177 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1178 * protect us from metadata and metadata from us. 1179 */ 1180 #define SIZEOF_METADATA (64 * 1024) 1181 1182 /* 1183 * Do some preliminary setup for a kernel dump: verify that we have enough space 1184 * on the dump device, write the leading header, and optionally write the crypto 1185 * key. 1186 */ 1187 int 1188 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh, off_t *dumplop) 1189 { 1190 uint64_t dumpsize; 1191 uint32_t keysize; 1192 int error; 1193 1194 #ifdef EKCD 1195 error = kerneldumpcrypto_init(di->kdc); 1196 if (error != 0) 1197 return (error); 1198 keysize = kerneldumpcrypto_dumpkeysize(di->kdc); 1199 #else 1200 keysize = 0; 1201 #endif 1202 1203 dumpsize = dtoh64(kdh->dumplength) + 2 * di->blocksize + keysize; 1204 if (di->mediasize < SIZEOF_METADATA + dumpsize) 1205 return (E2BIG); 1206 1207 *dumplop = di->mediaoffset + di->mediasize - dumpsize; 1208 1209 error = dump_write_header(di, kdh, 0, *dumplop); 1210 if (error != 0) 1211 return (error); 1212 *dumplop += di->blocksize; 1213 1214 #ifdef EKCD 1215 error = dump_write_key(di, 0, *dumplop); 1216 if (error != 0) 1217 return (error); 1218 *dumplop += keysize; 1219 #endif 1220 1221 return (0); 1222 } 1223 1224 /* 1225 * Write the trailing kernel dump header and signal to the lower layers that the 1226 * dump has completed. 1227 */ 1228 int 1229 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh, off_t dumplo) 1230 { 1231 int error; 1232 1233 error = dump_write_header(di, kdh, 0, dumplo); 1234 if (error != 0) 1235 return (error); 1236 1237 (void)dump_write(di, NULL, 0, 0, 0); 1238 return (0); 1239 } 1240 1241 void 1242 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1243 char *magic, uint32_t archver, uint64_t dumplen) 1244 { 1245 size_t dstsize; 1246 1247 bzero(kdh, sizeof(*kdh)); 1248 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1249 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1250 kdh->version = htod32(KERNELDUMPVERSION); 1251 kdh->architectureversion = htod32(archver); 1252 kdh->dumplength = htod64(dumplen); 1253 kdh->dumptime = htod64(time_second); 1254 #ifdef EKCD 1255 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdc)); 1256 #else 1257 kdh->dumpkeysize = 0; 1258 #endif 1259 kdh->blocksize = htod32(di->blocksize); 1260 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1261 dstsize = sizeof(kdh->versionstring); 1262 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1263 kdh->versionstring[dstsize - 2] = '\n'; 1264 if (panicstr != NULL) 1265 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1266 kdh->parity = kerneldump_parity(kdh); 1267 } 1268 1269 #ifdef DDB 1270 DB_SHOW_COMMAND(panic, db_show_panic) 1271 { 1272 1273 if (panicstr == NULL) 1274 db_printf("panicstr not set\n"); 1275 else 1276 db_printf("panic: %s\n", panicstr); 1277 } 1278 #endif 1279