1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_ddb.h" 41 #include "opt_ekcd.h" 42 #include "opt_kdb.h" 43 #include "opt_panic.h" 44 #include "opt_printf.h" 45 #include "opt_sched.h" 46 #include "opt_watchdog.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/bio.h> 51 #include <sys/boottrace.h> 52 #include <sys/buf.h> 53 #include <sys/conf.h> 54 #include <sys/compressor.h> 55 #include <sys/cons.h> 56 #include <sys/disk.h> 57 #include <sys/eventhandler.h> 58 #include <sys/filedesc.h> 59 #include <sys/jail.h> 60 #include <sys/kdb.h> 61 #include <sys/kernel.h> 62 #include <sys/kerneldump.h> 63 #include <sys/kthread.h> 64 #include <sys/ktr.h> 65 #include <sys/malloc.h> 66 #include <sys/mbuf.h> 67 #include <sys/mount.h> 68 #include <sys/priv.h> 69 #include <sys/proc.h> 70 #include <sys/reboot.h> 71 #include <sys/resourcevar.h> 72 #include <sys/rwlock.h> 73 #include <sys/sbuf.h> 74 #include <sys/sched.h> 75 #include <sys/smp.h> 76 #include <sys/sysctl.h> 77 #include <sys/sysproto.h> 78 #include <sys/taskqueue.h> 79 #include <sys/vnode.h> 80 #include <sys/watchdog.h> 81 82 #include <crypto/chacha20/chacha.h> 83 #include <crypto/rijndael/rijndael-api-fst.h> 84 #include <crypto/sha2/sha256.h> 85 86 #include <ddb/ddb.h> 87 88 #include <machine/cpu.h> 89 #include <machine/dump.h> 90 #include <machine/pcb.h> 91 #include <machine/smp.h> 92 93 #include <security/mac/mac_framework.h> 94 95 #include <vm/vm.h> 96 #include <vm/vm_object.h> 97 #include <vm/vm_page.h> 98 #include <vm/vm_pager.h> 99 #include <vm/swap_pager.h> 100 101 #include <sys/signalvar.h> 102 103 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 104 105 #ifndef PANIC_REBOOT_WAIT_TIME 106 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 107 #endif 108 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 109 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 110 &panic_reboot_wait_time, 0, 111 "Seconds to wait before rebooting after a panic"); 112 static int reboot_wait_time = 0; 113 SYSCTL_INT(_kern, OID_AUTO, reboot_wait_time, CTLFLAG_RWTUN, 114 &reboot_wait_time, 0, 115 "Seconds to wait before rebooting"); 116 117 /* 118 * Note that stdarg.h and the ANSI style va_start macro is used for both 119 * ANSI and traditional C compilers. 120 */ 121 #include <machine/stdarg.h> 122 123 #ifdef KDB 124 #ifdef KDB_UNATTENDED 125 int debugger_on_panic = 0; 126 #else 127 int debugger_on_panic = 1; 128 #endif 129 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 130 CTLFLAG_RWTUN, &debugger_on_panic, 0, 131 "Run debugger on kernel panic"); 132 133 static bool debugger_on_recursive_panic = false; 134 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 135 CTLFLAG_RWTUN, &debugger_on_recursive_panic, 0, 136 "Run debugger on recursive kernel panic"); 137 138 int debugger_on_trap = 0; 139 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 140 CTLFLAG_RWTUN, &debugger_on_trap, 0, 141 "Run debugger on kernel trap before panic"); 142 143 #ifdef KDB_TRACE 144 static int trace_on_panic = 1; 145 static bool trace_all_panics = true; 146 #else 147 static int trace_on_panic = 0; 148 static bool trace_all_panics = false; 149 #endif 150 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 151 CTLFLAG_RWTUN | CTLFLAG_SECURE, 152 &trace_on_panic, 0, "Print stack trace on kernel panic"); 153 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 154 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 155 #endif /* KDB */ 156 157 static int sync_on_panic = 0; 158 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 159 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 160 161 static bool poweroff_on_panic = 0; 162 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 163 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 164 165 static bool powercycle_on_panic = 0; 166 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 167 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 168 169 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 170 "Shutdown environment"); 171 172 #ifndef DIAGNOSTIC 173 static int show_busybufs; 174 #else 175 static int show_busybufs = 1; 176 #endif 177 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 178 &show_busybufs, 0, 179 "Show busy buffers during shutdown"); 180 181 int suspend_blocked = 0; 182 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 183 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 184 185 #ifdef EKCD 186 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 187 188 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 189 190 struct kerneldumpcrypto { 191 uint8_t kdc_encryption; 192 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 193 union { 194 struct { 195 keyInstance aes_ki; 196 cipherInstance aes_ci; 197 } u_aes; 198 struct chacha_ctx u_chacha; 199 } u; 200 #define kdc_ki u.u_aes.aes_ki 201 #define kdc_ci u.u_aes.aes_ci 202 #define kdc_chacha u.u_chacha 203 uint32_t kdc_dumpkeysize; 204 struct kerneldumpkey kdc_dumpkey[]; 205 }; 206 #endif 207 208 struct kerneldumpcomp { 209 uint8_t kdc_format; 210 struct compressor *kdc_stream; 211 uint8_t *kdc_buf; 212 size_t kdc_resid; 213 }; 214 215 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 216 uint8_t compression); 217 static void kerneldumpcomp_destroy(struct dumperinfo *di); 218 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 219 220 static int kerneldump_gzlevel = 6; 221 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 222 &kerneldump_gzlevel, 0, 223 "Kernel crash dump compression level"); 224 225 /* 226 * Variable panicstr contains argument to first call to panic; used as flag 227 * to indicate that the kernel has already called panic. 228 */ 229 const char *panicstr; 230 bool __read_frequently panicked; 231 232 int __read_mostly dumping; /* system is dumping */ 233 int rebooting; /* system is rebooting */ 234 /* 235 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 236 * modifications via ioctl. 237 */ 238 static struct mtx dumpconf_list_lk; 239 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 240 241 /* Our selected dumper(s). */ 242 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 243 TAILQ_HEAD_INITIALIZER(dumper_configs); 244 245 /* Context information for dump-debuggers, saved by the dump_savectx() macro. */ 246 struct pcb dumppcb; /* Registers. */ 247 lwpid_t dumptid; /* Thread ID. */ 248 249 static struct cdevsw reroot_cdevsw = { 250 .d_version = D_VERSION, 251 .d_name = "reroot", 252 }; 253 254 static void poweroff_wait(void *, int); 255 static void shutdown_halt(void *junk, int howto); 256 static void shutdown_panic(void *junk, int howto); 257 static void shutdown_reset(void *junk, int howto); 258 static int kern_reroot(void); 259 260 /* register various local shutdown events */ 261 static void 262 shutdown_conf(void *unused) 263 { 264 265 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 266 SHUTDOWN_PRI_FIRST); 267 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 268 SHUTDOWN_PRI_LAST + 100); 269 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 270 SHUTDOWN_PRI_LAST + 100); 271 } 272 273 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 274 275 /* 276 * The only reason this exists is to create the /dev/reroot/ directory, 277 * used by reroot code in init(8) as a mountpoint for tmpfs. 278 */ 279 static void 280 reroot_conf(void *unused) 281 { 282 int error; 283 struct cdev *cdev; 284 285 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 286 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 287 if (error != 0) { 288 printf("%s: failed to create device node, error %d", 289 __func__, error); 290 } 291 } 292 293 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 294 295 /* 296 * The system call that results in a reboot. 297 */ 298 /* ARGSUSED */ 299 int 300 sys_reboot(struct thread *td, struct reboot_args *uap) 301 { 302 int error; 303 304 error = 0; 305 #ifdef MAC 306 error = mac_system_check_reboot(td->td_ucred, uap->opt); 307 #endif 308 if (error == 0) 309 error = priv_check(td, PRIV_REBOOT); 310 if (error == 0) { 311 if (uap->opt & RB_REROOT) 312 error = kern_reroot(); 313 else 314 kern_reboot(uap->opt); 315 } 316 return (error); 317 } 318 319 static void 320 shutdown_nice_task_fn(void *arg, int pending __unused) 321 { 322 int howto; 323 324 howto = (uintptr_t)arg; 325 /* Send a signal to init(8) and have it shutdown the world. */ 326 PROC_LOCK(initproc); 327 if ((howto & RB_POWEROFF) != 0) { 328 BOOTTRACE("SIGUSR2 to init(8)"); 329 kern_psignal(initproc, SIGUSR2); 330 } else if ((howto & RB_POWERCYCLE) != 0) { 331 BOOTTRACE("SIGWINCH to init(8)"); 332 kern_psignal(initproc, SIGWINCH); 333 } else if ((howto & RB_HALT) != 0) { 334 BOOTTRACE("SIGUSR1 to init(8)"); 335 kern_psignal(initproc, SIGUSR1); 336 } else { 337 BOOTTRACE("SIGINT to init(8)"); 338 kern_psignal(initproc, SIGINT); 339 } 340 PROC_UNLOCK(initproc); 341 } 342 343 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 344 &shutdown_nice_task_fn, NULL); 345 346 /* 347 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 348 */ 349 void 350 shutdown_nice(int howto) 351 { 352 353 if (initproc != NULL && !SCHEDULER_STOPPED()) { 354 BOOTTRACE("shutdown initiated"); 355 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 356 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 357 } else { 358 /* 359 * No init(8) running, or scheduler would not allow it 360 * to run, so simply reboot. 361 */ 362 kern_reboot(howto | RB_NOSYNC); 363 } 364 } 365 366 static void 367 print_uptime(void) 368 { 369 int f; 370 struct timespec ts; 371 372 getnanouptime(&ts); 373 printf("Uptime: "); 374 f = 0; 375 if (ts.tv_sec >= 86400) { 376 printf("%ldd", (long)ts.tv_sec / 86400); 377 ts.tv_sec %= 86400; 378 f = 1; 379 } 380 if (f || ts.tv_sec >= 3600) { 381 printf("%ldh", (long)ts.tv_sec / 3600); 382 ts.tv_sec %= 3600; 383 f = 1; 384 } 385 if (f || ts.tv_sec >= 60) { 386 printf("%ldm", (long)ts.tv_sec / 60); 387 ts.tv_sec %= 60; 388 f = 1; 389 } 390 printf("%lds\n", (long)ts.tv_sec); 391 } 392 393 int 394 doadump(boolean_t textdump) 395 { 396 boolean_t coredump; 397 int error; 398 399 error = 0; 400 if (dumping) 401 return (EBUSY); 402 if (TAILQ_EMPTY(&dumper_configs)) 403 return (ENXIO); 404 405 dump_savectx(); 406 dumping++; 407 408 coredump = TRUE; 409 #ifdef DDB 410 if (textdump && textdump_pending) { 411 coredump = FALSE; 412 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 413 } 414 #endif 415 if (coredump) { 416 struct dumperinfo *di; 417 418 TAILQ_FOREACH(di, &dumper_configs, di_next) { 419 error = dumpsys(di); 420 if (error == 0) 421 break; 422 } 423 } 424 425 dumping--; 426 return (error); 427 } 428 429 /* 430 * Trace the shutdown reason. 431 */ 432 static void 433 reboottrace(int howto) 434 { 435 if ((howto & RB_DUMP) != 0) { 436 if ((howto & RB_HALT) != 0) 437 BOOTTRACE("system panic: halting..."); 438 if ((howto & RB_POWEROFF) != 0) 439 BOOTTRACE("system panic: powering off..."); 440 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 441 BOOTTRACE("system panic: rebooting..."); 442 } else { 443 if ((howto & RB_HALT) != 0) 444 BOOTTRACE("system halting..."); 445 if ((howto & RB_POWEROFF) != 0) 446 BOOTTRACE("system powering off..."); 447 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 448 BOOTTRACE("system rebooting..."); 449 } 450 } 451 452 /* 453 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or 454 * power off. 455 */ 456 void 457 kern_reboot(int howto) 458 { 459 static int once = 0; 460 461 if (initproc != NULL && curproc != initproc) 462 BOOTTRACE("kernel shutdown (dirty) started"); 463 else 464 BOOTTRACE("kernel shutdown (clean) started"); 465 466 /* 467 * Normal paths here don't hold Giant, but we can wind up here 468 * unexpectedly with it held. Drop it now so we don't have to 469 * drop and pick it up elsewhere. The paths it is locking will 470 * never be returned to, and it is preferable to preclude 471 * deadlock than to lock against code that won't ever 472 * continue. 473 */ 474 while (mtx_owned(&Giant)) 475 mtx_unlock(&Giant); 476 477 #if defined(SMP) 478 /* 479 * Bind us to the first CPU so that all shutdown code runs there. Some 480 * systems don't shutdown properly (i.e., ACPI power off) if we 481 * run on another processor. 482 */ 483 if (!SCHEDULER_STOPPED()) { 484 thread_lock(curthread); 485 sched_bind(curthread, CPU_FIRST()); 486 thread_unlock(curthread); 487 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 488 ("%s: not running on cpu 0", __func__)); 489 } 490 #endif 491 /* We're in the process of rebooting. */ 492 rebooting = 1; 493 reboottrace(howto); 494 495 /* We are out of the debugger now. */ 496 kdb_active = 0; 497 498 /* 499 * Do any callouts that should be done BEFORE syncing the filesystems. 500 */ 501 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 502 BOOTTRACE("shutdown pre sync complete"); 503 504 /* 505 * Now sync filesystems 506 */ 507 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 508 once = 1; 509 BOOTTRACE("bufshutdown begin"); 510 bufshutdown(show_busybufs); 511 BOOTTRACE("bufshutdown end"); 512 } 513 514 print_uptime(); 515 516 cngrab(); 517 518 /* 519 * Ok, now do things that assume all filesystem activity has 520 * been completed. 521 */ 522 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 523 BOOTTRACE("shutdown post sync complete"); 524 525 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 526 doadump(TRUE); 527 528 /* Now that we're going to really halt the system... */ 529 BOOTTRACE("shutdown final begin"); 530 531 if (shutdown_trace) 532 boottrace_dump_console(); 533 534 EVENTHANDLER_INVOKE(shutdown_final, howto); 535 536 /* 537 * Call this directly so that reset is attempted even if shutdown 538 * handlers are not yet registered. 539 */ 540 shutdown_reset(NULL, howto); 541 542 for(;;) ; /* safety against shutdown_reset not working */ 543 /* NOTREACHED */ 544 } 545 546 /* 547 * The system call that results in changing the rootfs. 548 */ 549 static int 550 kern_reroot(void) 551 { 552 struct vnode *oldrootvnode, *vp; 553 struct mount *mp, *devmp; 554 int error; 555 556 if (curproc != initproc) 557 return (EPERM); 558 559 /* 560 * Mark the filesystem containing currently-running executable 561 * (the temporary copy of init(8)) busy. 562 */ 563 vp = curproc->p_textvp; 564 error = vn_lock(vp, LK_SHARED); 565 if (error != 0) 566 return (error); 567 mp = vp->v_mount; 568 error = vfs_busy(mp, MBF_NOWAIT); 569 if (error != 0) { 570 vfs_ref(mp); 571 VOP_UNLOCK(vp); 572 error = vfs_busy(mp, 0); 573 vn_lock(vp, LK_SHARED | LK_RETRY); 574 vfs_rel(mp); 575 if (error != 0) { 576 VOP_UNLOCK(vp); 577 return (ENOENT); 578 } 579 if (VN_IS_DOOMED(vp)) { 580 VOP_UNLOCK(vp); 581 vfs_unbusy(mp); 582 return (ENOENT); 583 } 584 } 585 VOP_UNLOCK(vp); 586 587 /* 588 * Remove the filesystem containing currently-running executable 589 * from the mount list, to prevent it from being unmounted 590 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 591 * 592 * Also preserve /dev - forcibly unmounting it could cause driver 593 * reinitialization. 594 */ 595 596 vfs_ref(rootdevmp); 597 devmp = rootdevmp; 598 rootdevmp = NULL; 599 600 mtx_lock(&mountlist_mtx); 601 TAILQ_REMOVE(&mountlist, mp, mnt_list); 602 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 603 mtx_unlock(&mountlist_mtx); 604 605 oldrootvnode = rootvnode; 606 607 /* 608 * Unmount everything except for the two filesystems preserved above. 609 */ 610 vfs_unmountall(); 611 612 /* 613 * Add /dev back; vfs_mountroot() will move it into its new place. 614 */ 615 mtx_lock(&mountlist_mtx); 616 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 617 mtx_unlock(&mountlist_mtx); 618 rootdevmp = devmp; 619 vfs_rel(rootdevmp); 620 621 /* 622 * Mount the new rootfs. 623 */ 624 vfs_mountroot(); 625 626 /* 627 * Update all references to the old rootvnode. 628 */ 629 mountcheckdirs(oldrootvnode, rootvnode); 630 631 /* 632 * Add the temporary filesystem back and unbusy it. 633 */ 634 mtx_lock(&mountlist_mtx); 635 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 636 mtx_unlock(&mountlist_mtx); 637 vfs_unbusy(mp); 638 639 return (0); 640 } 641 642 /* 643 * If the shutdown was a clean halt, behave accordingly. 644 */ 645 static void 646 shutdown_halt(void *junk, int howto) 647 { 648 649 if (howto & RB_HALT) { 650 printf("\n"); 651 printf("The operating system has halted.\n"); 652 printf("Please press any key to reboot.\n\n"); 653 654 wdog_kern_pat(WD_TO_NEVER); 655 656 switch (cngetc()) { 657 case -1: /* No console, just die */ 658 cpu_halt(); 659 /* NOTREACHED */ 660 default: 661 break; 662 } 663 } 664 } 665 666 /* 667 * Check to see if the system panicked, pause and then reboot 668 * according to the specified delay. 669 */ 670 static void 671 shutdown_panic(void *junk, int howto) 672 { 673 int loop; 674 675 if (howto & RB_DUMP) { 676 if (panic_reboot_wait_time != 0) { 677 if (panic_reboot_wait_time != -1) { 678 printf("Automatic reboot in %d seconds - " 679 "press a key on the console to abort\n", 680 panic_reboot_wait_time); 681 for (loop = panic_reboot_wait_time * 10; 682 loop > 0; --loop) { 683 DELAY(1000 * 100); /* 1/10th second */ 684 /* Did user type a key? */ 685 if (cncheckc() != -1) 686 break; 687 } 688 if (!loop) 689 return; 690 } 691 } else { /* zero time specified - reboot NOW */ 692 return; 693 } 694 printf("--> Press a key on the console to reboot,\n"); 695 printf("--> or switch off the system now.\n"); 696 cngetc(); 697 } 698 } 699 700 /* 701 * Everything done, now reset 702 */ 703 static void 704 shutdown_reset(void *junk, int howto) 705 { 706 707 printf("Rebooting...\n"); 708 DELAY(reboot_wait_time * 1000000); 709 710 /* 711 * Acquiring smp_ipi_mtx here has a double effect: 712 * - it disables interrupts avoiding CPU0 preemption 713 * by fast handlers (thus deadlocking against other CPUs) 714 * - it avoids deadlocks against smp_rendezvous() or, more 715 * generally, threads busy-waiting, with this spinlock held, 716 * and waiting for responses by threads on other CPUs 717 * (ie. smp_tlb_shootdown()). 718 * 719 * For the !SMP case it just needs to handle the former problem. 720 */ 721 #ifdef SMP 722 mtx_lock_spin(&smp_ipi_mtx); 723 #else 724 spinlock_enter(); 725 #endif 726 727 cpu_reset(); 728 /* NOTREACHED */ /* assuming reset worked */ 729 } 730 731 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 732 static int kassert_warn_only = 0; 733 #ifdef KDB 734 static int kassert_do_kdb = 0; 735 #endif 736 #ifdef KTR 737 static int kassert_do_ktr = 0; 738 #endif 739 static int kassert_do_log = 1; 740 static int kassert_log_pps_limit = 4; 741 static int kassert_log_mute_at = 0; 742 static int kassert_log_panic_at = 0; 743 static int kassert_suppress_in_panic = 0; 744 static int kassert_warnings = 0; 745 746 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 747 "kassert options"); 748 749 #ifdef KASSERT_PANIC_OPTIONAL 750 #define KASSERT_RWTUN CTLFLAG_RWTUN 751 #else 752 #define KASSERT_RWTUN CTLFLAG_RDTUN 753 #endif 754 755 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 756 &kassert_warn_only, 0, 757 "KASSERT triggers a panic (0) or just a warning (1)"); 758 759 #ifdef KDB 760 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 761 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 762 #endif 763 764 #ifdef KTR 765 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 766 &kassert_do_ktr, 0, 767 "KASSERT does a KTR, set this to the KTRMASK you want"); 768 #endif 769 770 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 771 &kassert_do_log, 0, 772 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 773 774 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 775 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 776 777 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 778 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 779 780 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 781 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 782 783 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 784 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 785 786 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 787 &kassert_suppress_in_panic, 0, 788 "KASSERTs will be suppressed while handling a panic"); 789 #undef KASSERT_RWTUN 790 791 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 792 793 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 794 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 795 kassert_sysctl_kassert, "I", 796 "set to trigger a test kassert"); 797 798 static int 799 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 800 { 801 int error, i; 802 803 error = sysctl_wire_old_buffer(req, sizeof(int)); 804 if (error == 0) { 805 i = 0; 806 error = sysctl_handle_int(oidp, &i, 0, req); 807 } 808 if (error != 0 || req->newptr == NULL) 809 return (error); 810 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 811 return (0); 812 } 813 814 #ifdef KASSERT_PANIC_OPTIONAL 815 /* 816 * Called by KASSERT, this decides if we will panic 817 * or if we will log via printf and/or ktr. 818 */ 819 void 820 kassert_panic(const char *fmt, ...) 821 { 822 static char buf[256]; 823 va_list ap; 824 825 va_start(ap, fmt); 826 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 827 va_end(ap); 828 829 /* 830 * If we are suppressing secondary panics, log the warning but do not 831 * re-enter panic/kdb. 832 */ 833 if (KERNEL_PANICKED() && kassert_suppress_in_panic) { 834 if (kassert_do_log) { 835 printf("KASSERT failed: %s\n", buf); 836 #ifdef KDB 837 if (trace_all_panics && trace_on_panic) 838 kdb_backtrace(); 839 #endif 840 } 841 return; 842 } 843 844 /* 845 * panic if we're not just warning, or if we've exceeded 846 * kassert_log_panic_at warnings. 847 */ 848 if (!kassert_warn_only || 849 (kassert_log_panic_at > 0 && 850 kassert_warnings >= kassert_log_panic_at)) { 851 va_start(ap, fmt); 852 vpanic(fmt, ap); 853 /* NORETURN */ 854 } 855 #ifdef KTR 856 if (kassert_do_ktr) 857 CTR0(ktr_mask, buf); 858 #endif /* KTR */ 859 /* 860 * log if we've not yet met the mute limit. 861 */ 862 if (kassert_do_log && 863 (kassert_log_mute_at == 0 || 864 kassert_warnings < kassert_log_mute_at)) { 865 static struct timeval lasterr; 866 static int curerr; 867 868 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 869 printf("KASSERT failed: %s\n", buf); 870 kdb_backtrace(); 871 } 872 } 873 #ifdef KDB 874 if (kassert_do_kdb) { 875 kdb_enter(KDB_WHY_KASSERT, buf); 876 } 877 #endif 878 atomic_add_int(&kassert_warnings, 1); 879 } 880 #endif /* KASSERT_PANIC_OPTIONAL */ 881 #endif 882 883 /* 884 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 885 * and then reboots. If we are called twice, then we avoid trying to sync 886 * the disks as this often leads to recursive panics. 887 */ 888 void 889 panic(const char *fmt, ...) 890 { 891 va_list ap; 892 893 va_start(ap, fmt); 894 vpanic(fmt, ap); 895 } 896 897 void 898 vpanic(const char *fmt, va_list ap) 899 { 900 #ifdef SMP 901 cpuset_t other_cpus; 902 #endif 903 struct thread *td = curthread; 904 int bootopt, newpanic; 905 static char buf[256]; 906 907 spinlock_enter(); 908 909 #ifdef SMP 910 /* 911 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 912 * concurrently entering panic. Only the winner will proceed 913 * further. 914 */ 915 if (panicstr == NULL && !kdb_active) { 916 other_cpus = all_cpus; 917 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 918 stop_cpus_hard(other_cpus); 919 } 920 #endif 921 922 /* 923 * Ensure that the scheduler is stopped while panicking, even if panic 924 * has been entered from kdb. 925 */ 926 td->td_stopsched = 1; 927 928 bootopt = RB_AUTOBOOT; 929 newpanic = 0; 930 if (KERNEL_PANICKED()) 931 bootopt |= RB_NOSYNC; 932 else { 933 bootopt |= RB_DUMP; 934 panicstr = fmt; 935 panicked = true; 936 newpanic = 1; 937 } 938 939 if (newpanic) { 940 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 941 panicstr = buf; 942 cngrab(); 943 printf("panic: %s\n", buf); 944 } else { 945 printf("panic: "); 946 vprintf(fmt, ap); 947 printf("\n"); 948 } 949 #ifdef SMP 950 printf("cpuid = %d\n", PCPU_GET(cpuid)); 951 #endif 952 printf("time = %jd\n", (intmax_t )time_second); 953 #ifdef KDB 954 if ((newpanic || trace_all_panics) && trace_on_panic) 955 kdb_backtrace(); 956 if (debugger_on_panic) 957 kdb_enter(KDB_WHY_PANIC, "panic"); 958 else if (!newpanic && debugger_on_recursive_panic) 959 kdb_enter(KDB_WHY_PANIC, "re-panic"); 960 #endif 961 /*thread_lock(td); */ 962 td->td_flags |= TDF_INPANIC; 963 /* thread_unlock(td); */ 964 if (!sync_on_panic) 965 bootopt |= RB_NOSYNC; 966 if (poweroff_on_panic) 967 bootopt |= RB_POWEROFF; 968 if (powercycle_on_panic) 969 bootopt |= RB_POWERCYCLE; 970 kern_reboot(bootopt); 971 } 972 973 /* 974 * Support for poweroff delay. 975 * 976 * Please note that setting this delay too short might power off your machine 977 * before the write cache on your hard disk has been flushed, leading to 978 * soft-updates inconsistencies. 979 */ 980 #ifndef POWEROFF_DELAY 981 # define POWEROFF_DELAY 5000 982 #endif 983 static int poweroff_delay = POWEROFF_DELAY; 984 985 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 986 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 987 988 static void 989 poweroff_wait(void *junk, int howto) 990 { 991 992 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 993 return; 994 DELAY(poweroff_delay * 1000); 995 } 996 997 /* 998 * Some system processes (e.g. syncer) need to be stopped at appropriate 999 * points in their main loops prior to a system shutdown, so that they 1000 * won't interfere with the shutdown process (e.g. by holding a disk buf 1001 * to cause sync to fail). For each of these system processes, register 1002 * shutdown_kproc() as a handler for one of shutdown events. 1003 */ 1004 static int kproc_shutdown_wait = 60; 1005 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 1006 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 1007 1008 void 1009 kproc_shutdown(void *arg, int howto) 1010 { 1011 struct proc *p; 1012 int error; 1013 1014 if (KERNEL_PANICKED()) 1015 return; 1016 1017 p = (struct proc *)arg; 1018 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 1019 kproc_shutdown_wait, p->p_comm); 1020 error = kproc_suspend(p, kproc_shutdown_wait * hz); 1021 1022 if (error == EWOULDBLOCK) 1023 printf("timed out\n"); 1024 else 1025 printf("done\n"); 1026 } 1027 1028 void 1029 kthread_shutdown(void *arg, int howto) 1030 { 1031 struct thread *td; 1032 int error; 1033 1034 if (KERNEL_PANICKED()) 1035 return; 1036 1037 td = (struct thread *)arg; 1038 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 1039 kproc_shutdown_wait, td->td_name); 1040 error = kthread_suspend(td, kproc_shutdown_wait * hz); 1041 1042 if (error == EWOULDBLOCK) 1043 printf("timed out\n"); 1044 else 1045 printf("done\n"); 1046 } 1047 1048 static int 1049 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 1050 { 1051 char buf[256]; 1052 struct dumperinfo *di; 1053 struct sbuf sb; 1054 int error; 1055 1056 error = sysctl_wire_old_buffer(req, 0); 1057 if (error != 0) 1058 return (error); 1059 1060 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1061 1062 mtx_lock(&dumpconf_list_lk); 1063 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1064 if (di != TAILQ_FIRST(&dumper_configs)) 1065 sbuf_putc(&sb, ','); 1066 sbuf_cat(&sb, di->di_devname); 1067 } 1068 mtx_unlock(&dumpconf_list_lk); 1069 1070 error = sbuf_finish(&sb); 1071 sbuf_delete(&sb); 1072 return (error); 1073 } 1074 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1075 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1076 dumpdevname_sysctl_handler, "A", 1077 "Device(s) for kernel dumps"); 1078 1079 static int _dump_append(struct dumperinfo *di, void *virtual, size_t length); 1080 1081 #ifdef EKCD 1082 static struct kerneldumpcrypto * 1083 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1084 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1085 { 1086 struct kerneldumpcrypto *kdc; 1087 struct kerneldumpkey *kdk; 1088 uint32_t dumpkeysize; 1089 1090 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1091 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1092 1093 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1094 1095 kdc->kdc_encryption = encryption; 1096 switch (kdc->kdc_encryption) { 1097 case KERNELDUMP_ENC_AES_256_CBC: 1098 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1099 goto failed; 1100 break; 1101 case KERNELDUMP_ENC_CHACHA20: 1102 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1103 break; 1104 default: 1105 goto failed; 1106 } 1107 1108 kdc->kdc_dumpkeysize = dumpkeysize; 1109 kdk = kdc->kdc_dumpkey; 1110 kdk->kdk_encryption = kdc->kdc_encryption; 1111 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1112 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1113 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1114 1115 return (kdc); 1116 failed: 1117 zfree(kdc, M_EKCD); 1118 return (NULL); 1119 } 1120 1121 static int 1122 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1123 { 1124 uint8_t hash[SHA256_DIGEST_LENGTH]; 1125 SHA256_CTX ctx; 1126 struct kerneldumpkey *kdk; 1127 int error; 1128 1129 error = 0; 1130 1131 if (kdc == NULL) 1132 return (0); 1133 1134 /* 1135 * When a user enters ddb it can write a crash dump multiple times. 1136 * Each time it should be encrypted using a different IV. 1137 */ 1138 SHA256_Init(&ctx); 1139 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1140 SHA256_Final(hash, &ctx); 1141 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1142 1143 switch (kdc->kdc_encryption) { 1144 case KERNELDUMP_ENC_AES_256_CBC: 1145 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1146 kdc->kdc_iv) <= 0) { 1147 error = EINVAL; 1148 goto out; 1149 } 1150 break; 1151 case KERNELDUMP_ENC_CHACHA20: 1152 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1153 break; 1154 default: 1155 error = EINVAL; 1156 goto out; 1157 } 1158 1159 kdk = kdc->kdc_dumpkey; 1160 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1161 out: 1162 explicit_bzero(hash, sizeof(hash)); 1163 return (error); 1164 } 1165 1166 static uint32_t 1167 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1168 { 1169 1170 if (kdc == NULL) 1171 return (0); 1172 return (kdc->kdc_dumpkeysize); 1173 } 1174 #endif /* EKCD */ 1175 1176 static struct kerneldumpcomp * 1177 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1178 { 1179 struct kerneldumpcomp *kdcomp; 1180 int format; 1181 1182 switch (compression) { 1183 case KERNELDUMP_COMP_GZIP: 1184 format = COMPRESS_GZIP; 1185 break; 1186 case KERNELDUMP_COMP_ZSTD: 1187 format = COMPRESS_ZSTD; 1188 break; 1189 default: 1190 return (NULL); 1191 } 1192 1193 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1194 kdcomp->kdc_format = compression; 1195 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1196 format, di->maxiosize, kerneldump_gzlevel, di); 1197 if (kdcomp->kdc_stream == NULL) { 1198 free(kdcomp, M_DUMPER); 1199 return (NULL); 1200 } 1201 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1202 return (kdcomp); 1203 } 1204 1205 static void 1206 kerneldumpcomp_destroy(struct dumperinfo *di) 1207 { 1208 struct kerneldumpcomp *kdcomp; 1209 1210 kdcomp = di->kdcomp; 1211 if (kdcomp == NULL) 1212 return; 1213 compressor_fini(kdcomp->kdc_stream); 1214 zfree(kdcomp->kdc_buf, M_DUMPER); 1215 free(kdcomp, M_DUMPER); 1216 } 1217 1218 /* 1219 * Free a dumper. Must not be present on global list. 1220 */ 1221 void 1222 dumper_destroy(struct dumperinfo *di) 1223 { 1224 1225 if (di == NULL) 1226 return; 1227 1228 zfree(di->blockbuf, M_DUMPER); 1229 kerneldumpcomp_destroy(di); 1230 #ifdef EKCD 1231 zfree(di->kdcrypto, M_EKCD); 1232 #endif 1233 zfree(di, M_DUMPER); 1234 } 1235 1236 /* 1237 * Allocate and set up a new dumper from the provided template. 1238 */ 1239 int 1240 dumper_create(const struct dumperinfo *di_template, const char *devname, 1241 const struct diocskerneldump_arg *kda, struct dumperinfo **dip) 1242 { 1243 struct dumperinfo *newdi; 1244 int error = 0; 1245 1246 if (dip == NULL) 1247 return (EINVAL); 1248 1249 /* Allocate a new dumper */ 1250 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, 1251 M_WAITOK | M_ZERO); 1252 memcpy(newdi, di_template, sizeof(*newdi)); 1253 newdi->blockbuf = NULL; 1254 newdi->kdcrypto = NULL; 1255 newdi->kdcomp = NULL; 1256 strcpy(newdi->di_devname, devname); 1257 1258 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1259 #ifdef EKCD 1260 newdi->kdcrypto = kerneldumpcrypto_create(newdi->blocksize, 1261 kda->kda_encryption, kda->kda_key, 1262 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1263 if (newdi->kdcrypto == NULL) { 1264 error = EINVAL; 1265 goto cleanup; 1266 } 1267 #else 1268 error = EOPNOTSUPP; 1269 goto cleanup; 1270 #endif 1271 } 1272 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1273 #ifdef EKCD 1274 /* 1275 * We can't support simultaneous unpadded block cipher 1276 * encryption and compression because there is no guarantee the 1277 * length of the compressed result is exactly a multiple of the 1278 * cipher block size. 1279 */ 1280 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1281 error = EOPNOTSUPP; 1282 goto cleanup; 1283 } 1284 #endif 1285 newdi->kdcomp = kerneldumpcomp_create(newdi, 1286 kda->kda_compression); 1287 if (newdi->kdcomp == NULL) { 1288 error = EINVAL; 1289 goto cleanup; 1290 } 1291 } 1292 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1293 1294 *dip = newdi; 1295 return (0); 1296 cleanup: 1297 dumper_destroy(newdi); 1298 return (error); 1299 } 1300 1301 /* 1302 * Create a new dumper and register it in the global list. 1303 */ 1304 int 1305 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1306 const struct diocskerneldump_arg *kda) 1307 { 1308 struct dumperinfo *newdi, *listdi; 1309 bool inserted; 1310 uint8_t index; 1311 int error; 1312 1313 index = kda->kda_index; 1314 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1315 index != KDA_REMOVE_ALL); 1316 1317 error = priv_check(curthread, PRIV_SETDUMPER); 1318 if (error != 0) 1319 return (error); 1320 1321 error = dumper_create(di_template, devname, kda, &newdi); 1322 if (error != 0) 1323 return (error); 1324 1325 /* Add the new configuration to the queue */ 1326 mtx_lock(&dumpconf_list_lk); 1327 inserted = false; 1328 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1329 if (index == 0) { 1330 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1331 inserted = true; 1332 break; 1333 } 1334 index--; 1335 } 1336 if (!inserted) 1337 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1338 mtx_unlock(&dumpconf_list_lk); 1339 1340 return (0); 1341 } 1342 1343 #ifdef DDB 1344 void 1345 dumper_ddb_insert(struct dumperinfo *newdi) 1346 { 1347 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1348 } 1349 1350 void 1351 dumper_ddb_remove(struct dumperinfo *di) 1352 { 1353 TAILQ_REMOVE(&dumper_configs, di, di_next); 1354 } 1355 #endif 1356 1357 static bool 1358 dumper_config_match(const struct dumperinfo *di, const char *devname, 1359 const struct diocskerneldump_arg *kda) 1360 { 1361 if (kda->kda_index == KDA_REMOVE_ALL) 1362 return (true); 1363 1364 if (strcmp(di->di_devname, devname) != 0) 1365 return (false); 1366 1367 /* 1368 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1369 */ 1370 if (kda->kda_index == KDA_REMOVE_DEV) 1371 return (true); 1372 1373 if (di->kdcomp != NULL) { 1374 if (di->kdcomp->kdc_format != kda->kda_compression) 1375 return (false); 1376 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1377 return (false); 1378 #ifdef EKCD 1379 if (di->kdcrypto != NULL) { 1380 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1381 return (false); 1382 /* 1383 * Do we care to verify keys match to delete? It seems weird 1384 * to expect multiple fallback dump configurations on the same 1385 * device that only differ in crypto key. 1386 */ 1387 } else 1388 #endif 1389 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1390 return (false); 1391 1392 return (true); 1393 } 1394 1395 /* 1396 * Remove and free the requested dumper(s) from the global list. 1397 */ 1398 int 1399 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1400 { 1401 struct dumperinfo *di, *sdi; 1402 bool found; 1403 int error; 1404 1405 error = priv_check(curthread, PRIV_SETDUMPER); 1406 if (error != 0) 1407 return (error); 1408 1409 /* 1410 * Try to find a matching configuration, and kill it. 1411 * 1412 * NULL 'kda' indicates remove any configuration matching 'devname', 1413 * which may remove multiple configurations in atypical configurations. 1414 */ 1415 found = false; 1416 mtx_lock(&dumpconf_list_lk); 1417 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1418 if (dumper_config_match(di, devname, kda)) { 1419 found = true; 1420 TAILQ_REMOVE(&dumper_configs, di, di_next); 1421 dumper_destroy(di); 1422 } 1423 } 1424 mtx_unlock(&dumpconf_list_lk); 1425 1426 /* Only produce ENOENT if a more targeted match didn't match. */ 1427 if (!found && kda->kda_index == KDA_REMOVE) 1428 return (ENOENT); 1429 return (0); 1430 } 1431 1432 static int 1433 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1434 { 1435 1436 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1437 offset - di->mediaoffset + length > di->mediasize)) { 1438 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1439 printf( 1440 "Compressed dump failed to fit in device boundaries.\n"); 1441 return (E2BIG); 1442 } 1443 1444 printf("Attempt to write outside dump device boundaries.\n" 1445 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1446 (intmax_t)offset, (intmax_t)di->mediaoffset, 1447 (uintmax_t)length, (intmax_t)di->mediasize); 1448 return (ENOSPC); 1449 } 1450 if (length % di->blocksize != 0) { 1451 printf("Attempt to write partial block of length %ju.\n", 1452 (uintmax_t)length); 1453 return (EINVAL); 1454 } 1455 if (offset % di->blocksize != 0) { 1456 printf("Attempt to write at unaligned offset %jd.\n", 1457 (intmax_t)offset); 1458 return (EINVAL); 1459 } 1460 1461 return (0); 1462 } 1463 1464 #ifdef EKCD 1465 static int 1466 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1467 { 1468 1469 switch (kdc->kdc_encryption) { 1470 case KERNELDUMP_ENC_AES_256_CBC: 1471 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1472 8 * size, buf) <= 0) { 1473 return (EIO); 1474 } 1475 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1476 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1477 return (EIO); 1478 } 1479 break; 1480 case KERNELDUMP_ENC_CHACHA20: 1481 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1482 break; 1483 default: 1484 return (EINVAL); 1485 } 1486 1487 return (0); 1488 } 1489 1490 /* Encrypt data and call dumper. */ 1491 static int 1492 dump_encrypted_write(struct dumperinfo *di, void *virtual, off_t offset, 1493 size_t length) 1494 { 1495 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1496 struct kerneldumpcrypto *kdc; 1497 int error; 1498 size_t nbytes; 1499 1500 kdc = di->kdcrypto; 1501 1502 while (length > 0) { 1503 nbytes = MIN(length, sizeof(buf)); 1504 bcopy(virtual, buf, nbytes); 1505 1506 if (dump_encrypt(kdc, buf, nbytes) != 0) 1507 return (EIO); 1508 1509 error = dump_write(di, buf, offset, nbytes); 1510 if (error != 0) 1511 return (error); 1512 1513 offset += nbytes; 1514 virtual = (void *)((uint8_t *)virtual + nbytes); 1515 length -= nbytes; 1516 } 1517 1518 return (0); 1519 } 1520 #endif /* EKCD */ 1521 1522 static int 1523 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1524 { 1525 struct dumperinfo *di; 1526 size_t resid, rlength; 1527 int error; 1528 1529 di = arg; 1530 1531 if (length % di->blocksize != 0) { 1532 /* 1533 * This must be the final write after flushing the compression 1534 * stream. Write as many full blocks as possible and stash the 1535 * residual data in the dumper's block buffer. It will be 1536 * padded and written in dump_finish(). 1537 */ 1538 rlength = rounddown(length, di->blocksize); 1539 if (rlength != 0) { 1540 error = _dump_append(di, base, rlength); 1541 if (error != 0) 1542 return (error); 1543 } 1544 resid = length - rlength; 1545 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1546 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1547 di->kdcomp->kdc_resid = resid; 1548 return (EAGAIN); 1549 } 1550 return (_dump_append(di, base, length)); 1551 } 1552 1553 /* 1554 * Write kernel dump headers at the beginning and end of the dump extent. 1555 * Write the kernel dump encryption key after the leading header if we were 1556 * configured to do so. 1557 */ 1558 static int 1559 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1560 { 1561 #ifdef EKCD 1562 struct kerneldumpcrypto *kdc; 1563 #endif 1564 void *buf; 1565 size_t hdrsz; 1566 uint64_t extent; 1567 uint32_t keysize; 1568 int error; 1569 1570 hdrsz = sizeof(*kdh); 1571 if (hdrsz > di->blocksize) 1572 return (ENOMEM); 1573 1574 #ifdef EKCD 1575 kdc = di->kdcrypto; 1576 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1577 #else 1578 keysize = 0; 1579 #endif 1580 1581 /* 1582 * If the dump device has special handling for headers, let it take care 1583 * of writing them out. 1584 */ 1585 if (di->dumper_hdr != NULL) 1586 return (di->dumper_hdr(di, kdh)); 1587 1588 if (hdrsz == di->blocksize) 1589 buf = kdh; 1590 else { 1591 buf = di->blockbuf; 1592 memset(buf, 0, di->blocksize); 1593 memcpy(buf, kdh, hdrsz); 1594 } 1595 1596 extent = dtoh64(kdh->dumpextent); 1597 #ifdef EKCD 1598 if (kdc != NULL) { 1599 error = dump_write(di, kdc->kdc_dumpkey, 1600 di->mediaoffset + di->mediasize - di->blocksize - extent - 1601 keysize, keysize); 1602 if (error != 0) 1603 return (error); 1604 } 1605 #endif 1606 1607 error = dump_write(di, buf, 1608 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1609 keysize, di->blocksize); 1610 if (error == 0) 1611 error = dump_write(di, buf, di->mediaoffset + di->mediasize - 1612 di->blocksize, di->blocksize); 1613 return (error); 1614 } 1615 1616 /* 1617 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1618 * protect us from metadata and metadata from us. 1619 */ 1620 #define SIZEOF_METADATA (64 * 1024) 1621 1622 /* 1623 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1624 * if requested, and make sure that we have enough space on the dump device. 1625 * 1626 * We set things up so that the dump ends before the last sector of the dump 1627 * device, at which the trailing header is written. 1628 * 1629 * +-----------+------+-----+----------------------------+------+ 1630 * | | lhdr | key | ... kernel dump ... | thdr | 1631 * +-----------+------+-----+----------------------------+------+ 1632 * 1 blk opt <------- dump extent --------> 1 blk 1633 * 1634 * Dumps written using dump_append() start at the beginning of the extent. 1635 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1636 * will not. The true length of the dump is recorded in the leading and trailing 1637 * headers once the dump has been completed. 1638 * 1639 * The dump device may provide a callback, in which case it will initialize 1640 * dumpoff and take care of laying out the headers. 1641 */ 1642 int 1643 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1644 { 1645 #ifdef EKCD 1646 struct kerneldumpcrypto *kdc; 1647 #endif 1648 void *key; 1649 uint64_t dumpextent, span; 1650 uint32_t keysize; 1651 int error; 1652 1653 #ifdef EKCD 1654 /* Send the key before the dump so a partial dump is still usable. */ 1655 kdc = di->kdcrypto; 1656 error = kerneldumpcrypto_init(kdc); 1657 if (error != 0) 1658 return (error); 1659 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1660 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1661 #else 1662 error = 0; 1663 keysize = 0; 1664 key = NULL; 1665 #endif 1666 1667 if (di->dumper_start != NULL) { 1668 error = di->dumper_start(di, key, keysize); 1669 } else { 1670 dumpextent = dtoh64(kdh->dumpextent); 1671 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1672 keysize; 1673 if (di->mediasize < span) { 1674 if (di->kdcomp == NULL) 1675 return (E2BIG); 1676 1677 /* 1678 * We don't yet know how much space the compressed dump 1679 * will occupy, so try to use the whole swap partition 1680 * (minus the first 64KB) in the hope that the 1681 * compressed dump will fit. If that doesn't turn out to 1682 * be enough, the bounds checking in dump_write() 1683 * will catch us and cause the dump to fail. 1684 */ 1685 dumpextent = di->mediasize - span + dumpextent; 1686 kdh->dumpextent = htod64(dumpextent); 1687 } 1688 1689 /* 1690 * The offset at which to begin writing the dump. 1691 */ 1692 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1693 dumpextent; 1694 } 1695 di->origdumpoff = di->dumpoff; 1696 return (error); 1697 } 1698 1699 static int 1700 _dump_append(struct dumperinfo *di, void *virtual, size_t length) 1701 { 1702 int error; 1703 1704 #ifdef EKCD 1705 if (di->kdcrypto != NULL) 1706 error = dump_encrypted_write(di, virtual, di->dumpoff, length); 1707 else 1708 #endif 1709 error = dump_write(di, virtual, di->dumpoff, length); 1710 if (error == 0) 1711 di->dumpoff += length; 1712 return (error); 1713 } 1714 1715 /* 1716 * Write to the dump device starting at dumpoff. When compression is enabled, 1717 * writes to the device will be performed using a callback that gets invoked 1718 * when the compression stream's output buffer is full. 1719 */ 1720 int 1721 dump_append(struct dumperinfo *di, void *virtual, size_t length) 1722 { 1723 void *buf; 1724 1725 if (di->kdcomp != NULL) { 1726 /* Bounce through a buffer to avoid CRC errors. */ 1727 if (length > di->maxiosize) 1728 return (EINVAL); 1729 buf = di->kdcomp->kdc_buf; 1730 memmove(buf, virtual, length); 1731 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1732 } 1733 return (_dump_append(di, virtual, length)); 1734 } 1735 1736 /* 1737 * Write to the dump device at the specified offset. 1738 */ 1739 int 1740 dump_write(struct dumperinfo *di, void *virtual, off_t offset, size_t length) 1741 { 1742 int error; 1743 1744 error = dump_check_bounds(di, offset, length); 1745 if (error != 0) 1746 return (error); 1747 return (di->dumper(di->priv, virtual, offset, length)); 1748 } 1749 1750 /* 1751 * Perform kernel dump finalization: flush the compression stream, if necessary, 1752 * write the leading and trailing kernel dump headers now that we know the true 1753 * length of the dump, and optionally write the encryption key following the 1754 * leading header. 1755 */ 1756 int 1757 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1758 { 1759 int error; 1760 1761 if (di->kdcomp != NULL) { 1762 error = compressor_flush(di->kdcomp->kdc_stream); 1763 if (error == EAGAIN) { 1764 /* We have residual data in di->blockbuf. */ 1765 error = _dump_append(di, di->blockbuf, di->blocksize); 1766 if (error == 0) 1767 /* Compensate for _dump_append()'s adjustment. */ 1768 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1769 di->kdcomp->kdc_resid = 0; 1770 } 1771 if (error != 0) 1772 return (error); 1773 1774 /* 1775 * We now know the size of the compressed dump, so update the 1776 * header accordingly and recompute parity. 1777 */ 1778 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1779 kdh->parity = 0; 1780 kdh->parity = kerneldump_parity(kdh); 1781 1782 compressor_reset(di->kdcomp->kdc_stream); 1783 } 1784 1785 error = dump_write_headers(di, kdh); 1786 if (error != 0) 1787 return (error); 1788 1789 (void)dump_write(di, NULL, 0, 0); 1790 return (0); 1791 } 1792 1793 void 1794 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1795 const char *magic, uint32_t archver, uint64_t dumplen) 1796 { 1797 size_t dstsize; 1798 1799 bzero(kdh, sizeof(*kdh)); 1800 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1801 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1802 kdh->version = htod32(KERNELDUMPVERSION); 1803 kdh->architectureversion = htod32(archver); 1804 kdh->dumplength = htod64(dumplen); 1805 kdh->dumpextent = kdh->dumplength; 1806 kdh->dumptime = htod64(time_second); 1807 #ifdef EKCD 1808 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1809 #else 1810 kdh->dumpkeysize = 0; 1811 #endif 1812 kdh->blocksize = htod32(di->blocksize); 1813 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1814 dstsize = sizeof(kdh->versionstring); 1815 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1816 kdh->versionstring[dstsize - 2] = '\n'; 1817 if (panicstr != NULL) 1818 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1819 if (di->kdcomp != NULL) 1820 kdh->compression = di->kdcomp->kdc_format; 1821 kdh->parity = kerneldump_parity(kdh); 1822 } 1823 1824 #ifdef DDB 1825 DB_SHOW_COMMAND_FLAGS(panic, db_show_panic, DB_CMD_MEMSAFE) 1826 { 1827 1828 if (panicstr == NULL) 1829 db_printf("panicstr not set\n"); 1830 else 1831 db_printf("panic: %s\n", panicstr); 1832 } 1833 #endif 1834