1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/boottrace.h> 54 #include <sys/buf.h> 55 #include <sys/conf.h> 56 #include <sys/compressor.h> 57 #include <sys/cons.h> 58 #include <sys/disk.h> 59 #include <sys/eventhandler.h> 60 #include <sys/filedesc.h> 61 #include <sys/jail.h> 62 #include <sys/kdb.h> 63 #include <sys/kernel.h> 64 #include <sys/kerneldump.h> 65 #include <sys/kthread.h> 66 #include <sys/ktr.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/mount.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/reboot.h> 73 #include <sys/resourcevar.h> 74 #include <sys/rwlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/sched.h> 77 #include <sys/smp.h> 78 #include <sys/sysctl.h> 79 #include <sys/sysproto.h> 80 #include <sys/taskqueue.h> 81 #include <sys/vnode.h> 82 #include <sys/watchdog.h> 83 84 #include <crypto/chacha20/chacha.h> 85 #include <crypto/rijndael/rijndael-api-fst.h> 86 #include <crypto/sha2/sha256.h> 87 88 #include <ddb/ddb.h> 89 90 #include <machine/cpu.h> 91 #include <machine/dump.h> 92 #include <machine/pcb.h> 93 #include <machine/smp.h> 94 95 #include <security/mac/mac_framework.h> 96 97 #include <vm/vm.h> 98 #include <vm/vm_object.h> 99 #include <vm/vm_page.h> 100 #include <vm/vm_pager.h> 101 #include <vm/swap_pager.h> 102 103 #include <sys/signalvar.h> 104 105 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 106 107 #ifndef PANIC_REBOOT_WAIT_TIME 108 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 109 #endif 110 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 111 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 112 &panic_reboot_wait_time, 0, 113 "Seconds to wait before rebooting after a panic"); 114 static int reboot_wait_time = 0; 115 SYSCTL_INT(_kern, OID_AUTO, reboot_wait_time, CTLFLAG_RWTUN, 116 &reboot_wait_time, 0, 117 "Seconds to wait before rebooting"); 118 119 /* 120 * Note that stdarg.h and the ANSI style va_start macro is used for both 121 * ANSI and traditional C compilers. 122 */ 123 #include <machine/stdarg.h> 124 125 #ifdef KDB 126 #ifdef KDB_UNATTENDED 127 int debugger_on_panic = 0; 128 #else 129 int debugger_on_panic = 1; 130 #endif 131 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 132 CTLFLAG_RWTUN, &debugger_on_panic, 0, 133 "Run debugger on kernel panic"); 134 135 static bool debugger_on_recursive_panic = false; 136 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 137 CTLFLAG_RWTUN, &debugger_on_recursive_panic, 0, 138 "Run debugger on recursive kernel panic"); 139 140 int debugger_on_trap = 0; 141 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 142 CTLFLAG_RWTUN, &debugger_on_trap, 0, 143 "Run debugger on kernel trap before panic"); 144 145 #ifdef KDB_TRACE 146 static int trace_on_panic = 1; 147 static bool trace_all_panics = true; 148 #else 149 static int trace_on_panic = 0; 150 static bool trace_all_panics = false; 151 #endif 152 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 153 CTLFLAG_RWTUN | CTLFLAG_SECURE, 154 &trace_on_panic, 0, "Print stack trace on kernel panic"); 155 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 156 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 157 #endif /* KDB */ 158 159 static int sync_on_panic = 0; 160 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 161 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 162 163 static bool poweroff_on_panic = 0; 164 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 165 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 166 167 static bool powercycle_on_panic = 0; 168 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 169 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 170 171 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 172 "Shutdown environment"); 173 174 #ifndef DIAGNOSTIC 175 static int show_busybufs; 176 #else 177 static int show_busybufs = 1; 178 #endif 179 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 180 &show_busybufs, 0, 181 "Show busy buffers during shutdown"); 182 183 int suspend_blocked = 0; 184 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 185 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 186 187 #ifdef EKCD 188 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 189 190 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 191 192 struct kerneldumpcrypto { 193 uint8_t kdc_encryption; 194 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 195 union { 196 struct { 197 keyInstance aes_ki; 198 cipherInstance aes_ci; 199 } u_aes; 200 struct chacha_ctx u_chacha; 201 } u; 202 #define kdc_ki u.u_aes.aes_ki 203 #define kdc_ci u.u_aes.aes_ci 204 #define kdc_chacha u.u_chacha 205 uint32_t kdc_dumpkeysize; 206 struct kerneldumpkey kdc_dumpkey[]; 207 }; 208 #endif 209 210 struct kerneldumpcomp { 211 uint8_t kdc_format; 212 struct compressor *kdc_stream; 213 uint8_t *kdc_buf; 214 size_t kdc_resid; 215 }; 216 217 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 218 uint8_t compression); 219 static void kerneldumpcomp_destroy(struct dumperinfo *di); 220 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 221 222 static int kerneldump_gzlevel = 6; 223 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 224 &kerneldump_gzlevel, 0, 225 "Kernel crash dump compression level"); 226 227 /* 228 * Variable panicstr contains argument to first call to panic; used as flag 229 * to indicate that the kernel has already called panic. 230 */ 231 const char *panicstr; 232 bool __read_frequently panicked; 233 234 int __read_mostly dumping; /* system is dumping */ 235 int rebooting; /* system is rebooting */ 236 /* 237 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 238 * modifications via ioctl. 239 */ 240 static struct mtx dumpconf_list_lk; 241 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 242 243 /* Our selected dumper(s). */ 244 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 245 TAILQ_HEAD_INITIALIZER(dumper_configs); 246 247 /* Context information for dump-debuggers, saved by the dump_savectx() macro. */ 248 struct pcb dumppcb; /* Registers. */ 249 lwpid_t dumptid; /* Thread ID. */ 250 251 static struct cdevsw reroot_cdevsw = { 252 .d_version = D_VERSION, 253 .d_name = "reroot", 254 }; 255 256 static void poweroff_wait(void *, int); 257 static void shutdown_halt(void *junk, int howto); 258 static void shutdown_panic(void *junk, int howto); 259 static void shutdown_reset(void *junk, int howto); 260 static int kern_reroot(void); 261 262 /* register various local shutdown events */ 263 static void 264 shutdown_conf(void *unused) 265 { 266 267 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 268 SHUTDOWN_PRI_FIRST); 269 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 270 SHUTDOWN_PRI_LAST + 100); 271 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 272 SHUTDOWN_PRI_LAST + 100); 273 } 274 275 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 276 277 /* 278 * The only reason this exists is to create the /dev/reroot/ directory, 279 * used by reroot code in init(8) as a mountpoint for tmpfs. 280 */ 281 static void 282 reroot_conf(void *unused) 283 { 284 int error; 285 struct cdev *cdev; 286 287 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 288 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 289 if (error != 0) { 290 printf("%s: failed to create device node, error %d", 291 __func__, error); 292 } 293 } 294 295 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 296 297 /* 298 * The system call that results in a reboot. 299 */ 300 /* ARGSUSED */ 301 int 302 sys_reboot(struct thread *td, struct reboot_args *uap) 303 { 304 int error; 305 306 error = 0; 307 #ifdef MAC 308 error = mac_system_check_reboot(td->td_ucred, uap->opt); 309 #endif 310 if (error == 0) 311 error = priv_check(td, PRIV_REBOOT); 312 if (error == 0) { 313 if (uap->opt & RB_REROOT) 314 error = kern_reroot(); 315 else 316 kern_reboot(uap->opt); 317 } 318 return (error); 319 } 320 321 static void 322 shutdown_nice_task_fn(void *arg, int pending __unused) 323 { 324 int howto; 325 326 howto = (uintptr_t)arg; 327 /* Send a signal to init(8) and have it shutdown the world. */ 328 PROC_LOCK(initproc); 329 if ((howto & RB_POWEROFF) != 0) { 330 BOOTTRACE("SIGUSR2 to init(8)"); 331 kern_psignal(initproc, SIGUSR2); 332 } else if ((howto & RB_POWERCYCLE) != 0) { 333 BOOTTRACE("SIGWINCH to init(8)"); 334 kern_psignal(initproc, SIGWINCH); 335 } else if ((howto & RB_HALT) != 0) { 336 BOOTTRACE("SIGUSR1 to init(8)"); 337 kern_psignal(initproc, SIGUSR1); 338 } else { 339 BOOTTRACE("SIGINT to init(8)"); 340 kern_psignal(initproc, SIGINT); 341 } 342 PROC_UNLOCK(initproc); 343 } 344 345 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 346 &shutdown_nice_task_fn, NULL); 347 348 /* 349 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 350 */ 351 void 352 shutdown_nice(int howto) 353 { 354 355 if (initproc != NULL && !SCHEDULER_STOPPED()) { 356 BOOTTRACE("shutdown initiated"); 357 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 358 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 359 } else { 360 /* 361 * No init(8) running, or scheduler would not allow it 362 * to run, so simply reboot. 363 */ 364 kern_reboot(howto | RB_NOSYNC); 365 } 366 } 367 368 static void 369 print_uptime(void) 370 { 371 int f; 372 struct timespec ts; 373 374 getnanouptime(&ts); 375 printf("Uptime: "); 376 f = 0; 377 if (ts.tv_sec >= 86400) { 378 printf("%ldd", (long)ts.tv_sec / 86400); 379 ts.tv_sec %= 86400; 380 f = 1; 381 } 382 if (f || ts.tv_sec >= 3600) { 383 printf("%ldh", (long)ts.tv_sec / 3600); 384 ts.tv_sec %= 3600; 385 f = 1; 386 } 387 if (f || ts.tv_sec >= 60) { 388 printf("%ldm", (long)ts.tv_sec / 60); 389 ts.tv_sec %= 60; 390 f = 1; 391 } 392 printf("%lds\n", (long)ts.tv_sec); 393 } 394 395 int 396 doadump(boolean_t textdump) 397 { 398 boolean_t coredump; 399 int error; 400 401 error = 0; 402 if (dumping) 403 return (EBUSY); 404 if (TAILQ_EMPTY(&dumper_configs)) 405 return (ENXIO); 406 407 dump_savectx(); 408 dumping++; 409 410 coredump = TRUE; 411 #ifdef DDB 412 if (textdump && textdump_pending) { 413 coredump = FALSE; 414 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 415 } 416 #endif 417 if (coredump) { 418 struct dumperinfo *di; 419 420 TAILQ_FOREACH(di, &dumper_configs, di_next) { 421 error = dumpsys(di); 422 if (error == 0) 423 break; 424 } 425 } 426 427 dumping--; 428 return (error); 429 } 430 431 /* 432 * Trace the shutdown reason. 433 */ 434 static void 435 reboottrace(int howto) 436 { 437 if ((howto & RB_DUMP) != 0) { 438 if ((howto & RB_HALT) != 0) 439 BOOTTRACE("system panic: halting..."); 440 if ((howto & RB_POWEROFF) != 0) 441 BOOTTRACE("system panic: powering off..."); 442 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 443 BOOTTRACE("system panic: rebooting..."); 444 } else { 445 if ((howto & RB_HALT) != 0) 446 BOOTTRACE("system halting..."); 447 if ((howto & RB_POWEROFF) != 0) 448 BOOTTRACE("system powering off..."); 449 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 450 BOOTTRACE("system rebooting..."); 451 } 452 } 453 454 /* 455 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or 456 * power off. 457 */ 458 void 459 kern_reboot(int howto) 460 { 461 static int once = 0; 462 463 if (initproc != NULL && curproc != initproc) 464 BOOTTRACE("kernel shutdown (dirty) started"); 465 else 466 BOOTTRACE("kernel shutdown (clean) started"); 467 468 /* 469 * Normal paths here don't hold Giant, but we can wind up here 470 * unexpectedly with it held. Drop it now so we don't have to 471 * drop and pick it up elsewhere. The paths it is locking will 472 * never be returned to, and it is preferable to preclude 473 * deadlock than to lock against code that won't ever 474 * continue. 475 */ 476 while (mtx_owned(&Giant)) 477 mtx_unlock(&Giant); 478 479 #if defined(SMP) 480 /* 481 * Bind us to the first CPU so that all shutdown code runs there. Some 482 * systems don't shutdown properly (i.e., ACPI power off) if we 483 * run on another processor. 484 */ 485 if (!SCHEDULER_STOPPED()) { 486 thread_lock(curthread); 487 sched_bind(curthread, CPU_FIRST()); 488 thread_unlock(curthread); 489 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 490 ("%s: not running on cpu 0", __func__)); 491 } 492 #endif 493 /* We're in the process of rebooting. */ 494 rebooting = 1; 495 reboottrace(howto); 496 497 /* We are out of the debugger now. */ 498 kdb_active = 0; 499 500 /* 501 * Do any callouts that should be done BEFORE syncing the filesystems. 502 */ 503 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 504 BOOTTRACE("shutdown pre sync complete"); 505 506 /* 507 * Now sync filesystems 508 */ 509 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 510 once = 1; 511 BOOTTRACE("bufshutdown begin"); 512 bufshutdown(show_busybufs); 513 BOOTTRACE("bufshutdown end"); 514 } 515 516 print_uptime(); 517 518 cngrab(); 519 520 /* 521 * Ok, now do things that assume all filesystem activity has 522 * been completed. 523 */ 524 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 525 BOOTTRACE("shutdown post sync complete"); 526 527 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 528 doadump(TRUE); 529 530 /* Now that we're going to really halt the system... */ 531 BOOTTRACE("shutdown final begin"); 532 533 if (shutdown_trace) 534 boottrace_dump_console(); 535 536 EVENTHANDLER_INVOKE(shutdown_final, howto); 537 538 /* 539 * Call this directly so that reset is attempted even if shutdown 540 * handlers are not yet registered. 541 */ 542 shutdown_reset(NULL, howto); 543 544 for(;;) ; /* safety against shutdown_reset not working */ 545 /* NOTREACHED */ 546 } 547 548 /* 549 * The system call that results in changing the rootfs. 550 */ 551 static int 552 kern_reroot(void) 553 { 554 struct vnode *oldrootvnode, *vp; 555 struct mount *mp, *devmp; 556 int error; 557 558 if (curproc != initproc) 559 return (EPERM); 560 561 /* 562 * Mark the filesystem containing currently-running executable 563 * (the temporary copy of init(8)) busy. 564 */ 565 vp = curproc->p_textvp; 566 error = vn_lock(vp, LK_SHARED); 567 if (error != 0) 568 return (error); 569 mp = vp->v_mount; 570 error = vfs_busy(mp, MBF_NOWAIT); 571 if (error != 0) { 572 vfs_ref(mp); 573 VOP_UNLOCK(vp); 574 error = vfs_busy(mp, 0); 575 vn_lock(vp, LK_SHARED | LK_RETRY); 576 vfs_rel(mp); 577 if (error != 0) { 578 VOP_UNLOCK(vp); 579 return (ENOENT); 580 } 581 if (VN_IS_DOOMED(vp)) { 582 VOP_UNLOCK(vp); 583 vfs_unbusy(mp); 584 return (ENOENT); 585 } 586 } 587 VOP_UNLOCK(vp); 588 589 /* 590 * Remove the filesystem containing currently-running executable 591 * from the mount list, to prevent it from being unmounted 592 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 593 * 594 * Also preserve /dev - forcibly unmounting it could cause driver 595 * reinitialization. 596 */ 597 598 vfs_ref(rootdevmp); 599 devmp = rootdevmp; 600 rootdevmp = NULL; 601 602 mtx_lock(&mountlist_mtx); 603 TAILQ_REMOVE(&mountlist, mp, mnt_list); 604 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 605 mtx_unlock(&mountlist_mtx); 606 607 oldrootvnode = rootvnode; 608 609 /* 610 * Unmount everything except for the two filesystems preserved above. 611 */ 612 vfs_unmountall(); 613 614 /* 615 * Add /dev back; vfs_mountroot() will move it into its new place. 616 */ 617 mtx_lock(&mountlist_mtx); 618 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 619 mtx_unlock(&mountlist_mtx); 620 rootdevmp = devmp; 621 vfs_rel(rootdevmp); 622 623 /* 624 * Mount the new rootfs. 625 */ 626 vfs_mountroot(); 627 628 /* 629 * Update all references to the old rootvnode. 630 */ 631 mountcheckdirs(oldrootvnode, rootvnode); 632 633 /* 634 * Add the temporary filesystem back and unbusy it. 635 */ 636 mtx_lock(&mountlist_mtx); 637 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 638 mtx_unlock(&mountlist_mtx); 639 vfs_unbusy(mp); 640 641 return (0); 642 } 643 644 /* 645 * If the shutdown was a clean halt, behave accordingly. 646 */ 647 static void 648 shutdown_halt(void *junk, int howto) 649 { 650 651 if (howto & RB_HALT) { 652 printf("\n"); 653 printf("The operating system has halted.\n"); 654 printf("Please press any key to reboot.\n\n"); 655 656 wdog_kern_pat(WD_TO_NEVER); 657 658 switch (cngetc()) { 659 case -1: /* No console, just die */ 660 cpu_halt(); 661 /* NOTREACHED */ 662 default: 663 break; 664 } 665 } 666 } 667 668 /* 669 * Check to see if the system panicked, pause and then reboot 670 * according to the specified delay. 671 */ 672 static void 673 shutdown_panic(void *junk, int howto) 674 { 675 int loop; 676 677 if (howto & RB_DUMP) { 678 if (panic_reboot_wait_time != 0) { 679 if (panic_reboot_wait_time != -1) { 680 printf("Automatic reboot in %d seconds - " 681 "press a key on the console to abort\n", 682 panic_reboot_wait_time); 683 for (loop = panic_reboot_wait_time * 10; 684 loop > 0; --loop) { 685 DELAY(1000 * 100); /* 1/10th second */ 686 /* Did user type a key? */ 687 if (cncheckc() != -1) 688 break; 689 } 690 if (!loop) 691 return; 692 } 693 } else { /* zero time specified - reboot NOW */ 694 return; 695 } 696 printf("--> Press a key on the console to reboot,\n"); 697 printf("--> or switch off the system now.\n"); 698 cngetc(); 699 } 700 } 701 702 /* 703 * Everything done, now reset 704 */ 705 static void 706 shutdown_reset(void *junk, int howto) 707 { 708 709 printf("Rebooting...\n"); 710 DELAY(reboot_wait_time * 1000000); 711 712 /* 713 * Acquiring smp_ipi_mtx here has a double effect: 714 * - it disables interrupts avoiding CPU0 preemption 715 * by fast handlers (thus deadlocking against other CPUs) 716 * - it avoids deadlocks against smp_rendezvous() or, more 717 * generally, threads busy-waiting, with this spinlock held, 718 * and waiting for responses by threads on other CPUs 719 * (ie. smp_tlb_shootdown()). 720 * 721 * For the !SMP case it just needs to handle the former problem. 722 */ 723 #ifdef SMP 724 mtx_lock_spin(&smp_ipi_mtx); 725 #else 726 spinlock_enter(); 727 #endif 728 729 cpu_reset(); 730 /* NOTREACHED */ /* assuming reset worked */ 731 } 732 733 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 734 static int kassert_warn_only = 0; 735 #ifdef KDB 736 static int kassert_do_kdb = 0; 737 #endif 738 #ifdef KTR 739 static int kassert_do_ktr = 0; 740 #endif 741 static int kassert_do_log = 1; 742 static int kassert_log_pps_limit = 4; 743 static int kassert_log_mute_at = 0; 744 static int kassert_log_panic_at = 0; 745 static int kassert_suppress_in_panic = 0; 746 static int kassert_warnings = 0; 747 748 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 749 "kassert options"); 750 751 #ifdef KASSERT_PANIC_OPTIONAL 752 #define KASSERT_RWTUN CTLFLAG_RWTUN 753 #else 754 #define KASSERT_RWTUN CTLFLAG_RDTUN 755 #endif 756 757 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 758 &kassert_warn_only, 0, 759 "KASSERT triggers a panic (0) or just a warning (1)"); 760 761 #ifdef KDB 762 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 763 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 764 #endif 765 766 #ifdef KTR 767 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 768 &kassert_do_ktr, 0, 769 "KASSERT does a KTR, set this to the KTRMASK you want"); 770 #endif 771 772 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 773 &kassert_do_log, 0, 774 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 775 776 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 777 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 778 779 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 780 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 781 782 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 783 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 784 785 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 786 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 787 788 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 789 &kassert_suppress_in_panic, 0, 790 "KASSERTs will be suppressed while handling a panic"); 791 #undef KASSERT_RWTUN 792 793 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 794 795 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 796 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 797 kassert_sysctl_kassert, "I", 798 "set to trigger a test kassert"); 799 800 static int 801 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 802 { 803 int error, i; 804 805 error = sysctl_wire_old_buffer(req, sizeof(int)); 806 if (error == 0) { 807 i = 0; 808 error = sysctl_handle_int(oidp, &i, 0, req); 809 } 810 if (error != 0 || req->newptr == NULL) 811 return (error); 812 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 813 return (0); 814 } 815 816 #ifdef KASSERT_PANIC_OPTIONAL 817 /* 818 * Called by KASSERT, this decides if we will panic 819 * or if we will log via printf and/or ktr. 820 */ 821 void 822 kassert_panic(const char *fmt, ...) 823 { 824 static char buf[256]; 825 va_list ap; 826 827 va_start(ap, fmt); 828 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 829 va_end(ap); 830 831 /* 832 * If we are suppressing secondary panics, log the warning but do not 833 * re-enter panic/kdb. 834 */ 835 if (KERNEL_PANICKED() && kassert_suppress_in_panic) { 836 if (kassert_do_log) { 837 printf("KASSERT failed: %s\n", buf); 838 #ifdef KDB 839 if (trace_all_panics && trace_on_panic) 840 kdb_backtrace(); 841 #endif 842 } 843 return; 844 } 845 846 /* 847 * panic if we're not just warning, or if we've exceeded 848 * kassert_log_panic_at warnings. 849 */ 850 if (!kassert_warn_only || 851 (kassert_log_panic_at > 0 && 852 kassert_warnings >= kassert_log_panic_at)) { 853 va_start(ap, fmt); 854 vpanic(fmt, ap); 855 /* NORETURN */ 856 } 857 #ifdef KTR 858 if (kassert_do_ktr) 859 CTR0(ktr_mask, buf); 860 #endif /* KTR */ 861 /* 862 * log if we've not yet met the mute limit. 863 */ 864 if (kassert_do_log && 865 (kassert_log_mute_at == 0 || 866 kassert_warnings < kassert_log_mute_at)) { 867 static struct timeval lasterr; 868 static int curerr; 869 870 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 871 printf("KASSERT failed: %s\n", buf); 872 kdb_backtrace(); 873 } 874 } 875 #ifdef KDB 876 if (kassert_do_kdb) { 877 kdb_enter(KDB_WHY_KASSERT, buf); 878 } 879 #endif 880 atomic_add_int(&kassert_warnings, 1); 881 } 882 #endif /* KASSERT_PANIC_OPTIONAL */ 883 #endif 884 885 /* 886 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 887 * and then reboots. If we are called twice, then we avoid trying to sync 888 * the disks as this often leads to recursive panics. 889 */ 890 void 891 panic(const char *fmt, ...) 892 { 893 va_list ap; 894 895 va_start(ap, fmt); 896 vpanic(fmt, ap); 897 } 898 899 void 900 vpanic(const char *fmt, va_list ap) 901 { 902 #ifdef SMP 903 cpuset_t other_cpus; 904 #endif 905 struct thread *td = curthread; 906 int bootopt, newpanic; 907 static char buf[256]; 908 909 spinlock_enter(); 910 911 #ifdef SMP 912 /* 913 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 914 * concurrently entering panic. Only the winner will proceed 915 * further. 916 */ 917 if (panicstr == NULL && !kdb_active) { 918 other_cpus = all_cpus; 919 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 920 stop_cpus_hard(other_cpus); 921 } 922 #endif 923 924 /* 925 * Ensure that the scheduler is stopped while panicking, even if panic 926 * has been entered from kdb. 927 */ 928 td->td_stopsched = 1; 929 930 bootopt = RB_AUTOBOOT; 931 newpanic = 0; 932 if (KERNEL_PANICKED()) 933 bootopt |= RB_NOSYNC; 934 else { 935 bootopt |= RB_DUMP; 936 panicstr = fmt; 937 panicked = true; 938 newpanic = 1; 939 } 940 941 if (newpanic) { 942 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 943 panicstr = buf; 944 cngrab(); 945 printf("panic: %s\n", buf); 946 } else { 947 printf("panic: "); 948 vprintf(fmt, ap); 949 printf("\n"); 950 } 951 #ifdef SMP 952 printf("cpuid = %d\n", PCPU_GET(cpuid)); 953 #endif 954 printf("time = %jd\n", (intmax_t )time_second); 955 #ifdef KDB 956 if ((newpanic || trace_all_panics) && trace_on_panic) 957 kdb_backtrace(); 958 if (debugger_on_panic) 959 kdb_enter(KDB_WHY_PANIC, "panic"); 960 else if (!newpanic && debugger_on_recursive_panic) 961 kdb_enter(KDB_WHY_PANIC, "re-panic"); 962 #endif 963 /*thread_lock(td); */ 964 td->td_flags |= TDF_INPANIC; 965 /* thread_unlock(td); */ 966 if (!sync_on_panic) 967 bootopt |= RB_NOSYNC; 968 if (poweroff_on_panic) 969 bootopt |= RB_POWEROFF; 970 if (powercycle_on_panic) 971 bootopt |= RB_POWERCYCLE; 972 kern_reboot(bootopt); 973 } 974 975 /* 976 * Support for poweroff delay. 977 * 978 * Please note that setting this delay too short might power off your machine 979 * before the write cache on your hard disk has been flushed, leading to 980 * soft-updates inconsistencies. 981 */ 982 #ifndef POWEROFF_DELAY 983 # define POWEROFF_DELAY 5000 984 #endif 985 static int poweroff_delay = POWEROFF_DELAY; 986 987 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 988 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 989 990 static void 991 poweroff_wait(void *junk, int howto) 992 { 993 994 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 995 return; 996 DELAY(poweroff_delay * 1000); 997 } 998 999 /* 1000 * Some system processes (e.g. syncer) need to be stopped at appropriate 1001 * points in their main loops prior to a system shutdown, so that they 1002 * won't interfere with the shutdown process (e.g. by holding a disk buf 1003 * to cause sync to fail). For each of these system processes, register 1004 * shutdown_kproc() as a handler for one of shutdown events. 1005 */ 1006 static int kproc_shutdown_wait = 60; 1007 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 1008 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 1009 1010 void 1011 kproc_shutdown(void *arg, int howto) 1012 { 1013 struct proc *p; 1014 int error; 1015 1016 if (KERNEL_PANICKED()) 1017 return; 1018 1019 p = (struct proc *)arg; 1020 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 1021 kproc_shutdown_wait, p->p_comm); 1022 error = kproc_suspend(p, kproc_shutdown_wait * hz); 1023 1024 if (error == EWOULDBLOCK) 1025 printf("timed out\n"); 1026 else 1027 printf("done\n"); 1028 } 1029 1030 void 1031 kthread_shutdown(void *arg, int howto) 1032 { 1033 struct thread *td; 1034 int error; 1035 1036 if (KERNEL_PANICKED()) 1037 return; 1038 1039 td = (struct thread *)arg; 1040 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 1041 kproc_shutdown_wait, td->td_name); 1042 error = kthread_suspend(td, kproc_shutdown_wait * hz); 1043 1044 if (error == EWOULDBLOCK) 1045 printf("timed out\n"); 1046 else 1047 printf("done\n"); 1048 } 1049 1050 static int 1051 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 1052 { 1053 char buf[256]; 1054 struct dumperinfo *di; 1055 struct sbuf sb; 1056 int error; 1057 1058 error = sysctl_wire_old_buffer(req, 0); 1059 if (error != 0) 1060 return (error); 1061 1062 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1063 1064 mtx_lock(&dumpconf_list_lk); 1065 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1066 if (di != TAILQ_FIRST(&dumper_configs)) 1067 sbuf_putc(&sb, ','); 1068 sbuf_cat(&sb, di->di_devname); 1069 } 1070 mtx_unlock(&dumpconf_list_lk); 1071 1072 error = sbuf_finish(&sb); 1073 sbuf_delete(&sb); 1074 return (error); 1075 } 1076 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1077 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1078 dumpdevname_sysctl_handler, "A", 1079 "Device(s) for kernel dumps"); 1080 1081 static int _dump_append(struct dumperinfo *di, void *virtual, size_t length); 1082 1083 #ifdef EKCD 1084 static struct kerneldumpcrypto * 1085 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1086 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1087 { 1088 struct kerneldumpcrypto *kdc; 1089 struct kerneldumpkey *kdk; 1090 uint32_t dumpkeysize; 1091 1092 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1093 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1094 1095 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1096 1097 kdc->kdc_encryption = encryption; 1098 switch (kdc->kdc_encryption) { 1099 case KERNELDUMP_ENC_AES_256_CBC: 1100 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1101 goto failed; 1102 break; 1103 case KERNELDUMP_ENC_CHACHA20: 1104 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1105 break; 1106 default: 1107 goto failed; 1108 } 1109 1110 kdc->kdc_dumpkeysize = dumpkeysize; 1111 kdk = kdc->kdc_dumpkey; 1112 kdk->kdk_encryption = kdc->kdc_encryption; 1113 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1114 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1115 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1116 1117 return (kdc); 1118 failed: 1119 zfree(kdc, M_EKCD); 1120 return (NULL); 1121 } 1122 1123 static int 1124 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1125 { 1126 uint8_t hash[SHA256_DIGEST_LENGTH]; 1127 SHA256_CTX ctx; 1128 struct kerneldumpkey *kdk; 1129 int error; 1130 1131 error = 0; 1132 1133 if (kdc == NULL) 1134 return (0); 1135 1136 /* 1137 * When a user enters ddb it can write a crash dump multiple times. 1138 * Each time it should be encrypted using a different IV. 1139 */ 1140 SHA256_Init(&ctx); 1141 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1142 SHA256_Final(hash, &ctx); 1143 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1144 1145 switch (kdc->kdc_encryption) { 1146 case KERNELDUMP_ENC_AES_256_CBC: 1147 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1148 kdc->kdc_iv) <= 0) { 1149 error = EINVAL; 1150 goto out; 1151 } 1152 break; 1153 case KERNELDUMP_ENC_CHACHA20: 1154 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1155 break; 1156 default: 1157 error = EINVAL; 1158 goto out; 1159 } 1160 1161 kdk = kdc->kdc_dumpkey; 1162 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1163 out: 1164 explicit_bzero(hash, sizeof(hash)); 1165 return (error); 1166 } 1167 1168 static uint32_t 1169 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1170 { 1171 1172 if (kdc == NULL) 1173 return (0); 1174 return (kdc->kdc_dumpkeysize); 1175 } 1176 #endif /* EKCD */ 1177 1178 static struct kerneldumpcomp * 1179 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1180 { 1181 struct kerneldumpcomp *kdcomp; 1182 int format; 1183 1184 switch (compression) { 1185 case KERNELDUMP_COMP_GZIP: 1186 format = COMPRESS_GZIP; 1187 break; 1188 case KERNELDUMP_COMP_ZSTD: 1189 format = COMPRESS_ZSTD; 1190 break; 1191 default: 1192 return (NULL); 1193 } 1194 1195 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1196 kdcomp->kdc_format = compression; 1197 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1198 format, di->maxiosize, kerneldump_gzlevel, di); 1199 if (kdcomp->kdc_stream == NULL) { 1200 free(kdcomp, M_DUMPER); 1201 return (NULL); 1202 } 1203 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1204 return (kdcomp); 1205 } 1206 1207 static void 1208 kerneldumpcomp_destroy(struct dumperinfo *di) 1209 { 1210 struct kerneldumpcomp *kdcomp; 1211 1212 kdcomp = di->kdcomp; 1213 if (kdcomp == NULL) 1214 return; 1215 compressor_fini(kdcomp->kdc_stream); 1216 zfree(kdcomp->kdc_buf, M_DUMPER); 1217 free(kdcomp, M_DUMPER); 1218 } 1219 1220 /* 1221 * Free a dumper. Must not be present on global list. 1222 */ 1223 void 1224 dumper_destroy(struct dumperinfo *di) 1225 { 1226 1227 if (di == NULL) 1228 return; 1229 1230 zfree(di->blockbuf, M_DUMPER); 1231 kerneldumpcomp_destroy(di); 1232 #ifdef EKCD 1233 zfree(di->kdcrypto, M_EKCD); 1234 #endif 1235 zfree(di, M_DUMPER); 1236 } 1237 1238 /* 1239 * Allocate and set up a new dumper from the provided template. 1240 */ 1241 int 1242 dumper_create(const struct dumperinfo *di_template, const char *devname, 1243 const struct diocskerneldump_arg *kda, struct dumperinfo **dip) 1244 { 1245 struct dumperinfo *newdi; 1246 int error = 0; 1247 1248 if (dip == NULL) 1249 return (EINVAL); 1250 1251 /* Allocate a new dumper */ 1252 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, 1253 M_WAITOK | M_ZERO); 1254 memcpy(newdi, di_template, sizeof(*newdi)); 1255 newdi->blockbuf = NULL; 1256 newdi->kdcrypto = NULL; 1257 newdi->kdcomp = NULL; 1258 strcpy(newdi->di_devname, devname); 1259 1260 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1261 #ifdef EKCD 1262 newdi->kdcrypto = kerneldumpcrypto_create(newdi->blocksize, 1263 kda->kda_encryption, kda->kda_key, 1264 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1265 if (newdi->kdcrypto == NULL) { 1266 error = EINVAL; 1267 goto cleanup; 1268 } 1269 #else 1270 error = EOPNOTSUPP; 1271 goto cleanup; 1272 #endif 1273 } 1274 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1275 #ifdef EKCD 1276 /* 1277 * We can't support simultaneous unpadded block cipher 1278 * encryption and compression because there is no guarantee the 1279 * length of the compressed result is exactly a multiple of the 1280 * cipher block size. 1281 */ 1282 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1283 error = EOPNOTSUPP; 1284 goto cleanup; 1285 } 1286 #endif 1287 newdi->kdcomp = kerneldumpcomp_create(newdi, 1288 kda->kda_compression); 1289 if (newdi->kdcomp == NULL) { 1290 error = EINVAL; 1291 goto cleanup; 1292 } 1293 } 1294 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1295 1296 *dip = newdi; 1297 return (0); 1298 cleanup: 1299 dumper_destroy(newdi); 1300 return (error); 1301 } 1302 1303 /* 1304 * Create a new dumper and register it in the global list. 1305 */ 1306 int 1307 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1308 const struct diocskerneldump_arg *kda) 1309 { 1310 struct dumperinfo *newdi, *listdi; 1311 bool inserted; 1312 uint8_t index; 1313 int error; 1314 1315 index = kda->kda_index; 1316 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1317 index != KDA_REMOVE_ALL); 1318 1319 error = priv_check(curthread, PRIV_SETDUMPER); 1320 if (error != 0) 1321 return (error); 1322 1323 error = dumper_create(di_template, devname, kda, &newdi); 1324 if (error != 0) 1325 return (error); 1326 1327 /* Add the new configuration to the queue */ 1328 mtx_lock(&dumpconf_list_lk); 1329 inserted = false; 1330 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1331 if (index == 0) { 1332 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1333 inserted = true; 1334 break; 1335 } 1336 index--; 1337 } 1338 if (!inserted) 1339 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1340 mtx_unlock(&dumpconf_list_lk); 1341 1342 return (0); 1343 } 1344 1345 #ifdef DDB 1346 void 1347 dumper_ddb_insert(struct dumperinfo *newdi) 1348 { 1349 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1350 } 1351 1352 void 1353 dumper_ddb_remove(struct dumperinfo *di) 1354 { 1355 TAILQ_REMOVE(&dumper_configs, di, di_next); 1356 } 1357 #endif 1358 1359 static bool 1360 dumper_config_match(const struct dumperinfo *di, const char *devname, 1361 const struct diocskerneldump_arg *kda) 1362 { 1363 if (kda->kda_index == KDA_REMOVE_ALL) 1364 return (true); 1365 1366 if (strcmp(di->di_devname, devname) != 0) 1367 return (false); 1368 1369 /* 1370 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1371 */ 1372 if (kda->kda_index == KDA_REMOVE_DEV) 1373 return (true); 1374 1375 if (di->kdcomp != NULL) { 1376 if (di->kdcomp->kdc_format != kda->kda_compression) 1377 return (false); 1378 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1379 return (false); 1380 #ifdef EKCD 1381 if (di->kdcrypto != NULL) { 1382 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1383 return (false); 1384 /* 1385 * Do we care to verify keys match to delete? It seems weird 1386 * to expect multiple fallback dump configurations on the same 1387 * device that only differ in crypto key. 1388 */ 1389 } else 1390 #endif 1391 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1392 return (false); 1393 1394 return (true); 1395 } 1396 1397 /* 1398 * Remove and free the requested dumper(s) from the global list. 1399 */ 1400 int 1401 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1402 { 1403 struct dumperinfo *di, *sdi; 1404 bool found; 1405 int error; 1406 1407 error = priv_check(curthread, PRIV_SETDUMPER); 1408 if (error != 0) 1409 return (error); 1410 1411 /* 1412 * Try to find a matching configuration, and kill it. 1413 * 1414 * NULL 'kda' indicates remove any configuration matching 'devname', 1415 * which may remove multiple configurations in atypical configurations. 1416 */ 1417 found = false; 1418 mtx_lock(&dumpconf_list_lk); 1419 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1420 if (dumper_config_match(di, devname, kda)) { 1421 found = true; 1422 TAILQ_REMOVE(&dumper_configs, di, di_next); 1423 dumper_destroy(di); 1424 } 1425 } 1426 mtx_unlock(&dumpconf_list_lk); 1427 1428 /* Only produce ENOENT if a more targeted match didn't match. */ 1429 if (!found && kda->kda_index == KDA_REMOVE) 1430 return (ENOENT); 1431 return (0); 1432 } 1433 1434 static int 1435 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1436 { 1437 1438 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1439 offset - di->mediaoffset + length > di->mediasize)) { 1440 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1441 printf( 1442 "Compressed dump failed to fit in device boundaries.\n"); 1443 return (E2BIG); 1444 } 1445 1446 printf("Attempt to write outside dump device boundaries.\n" 1447 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1448 (intmax_t)offset, (intmax_t)di->mediaoffset, 1449 (uintmax_t)length, (intmax_t)di->mediasize); 1450 return (ENOSPC); 1451 } 1452 if (length % di->blocksize != 0) { 1453 printf("Attempt to write partial block of length %ju.\n", 1454 (uintmax_t)length); 1455 return (EINVAL); 1456 } 1457 if (offset % di->blocksize != 0) { 1458 printf("Attempt to write at unaligned offset %jd.\n", 1459 (intmax_t)offset); 1460 return (EINVAL); 1461 } 1462 1463 return (0); 1464 } 1465 1466 #ifdef EKCD 1467 static int 1468 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1469 { 1470 1471 switch (kdc->kdc_encryption) { 1472 case KERNELDUMP_ENC_AES_256_CBC: 1473 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1474 8 * size, buf) <= 0) { 1475 return (EIO); 1476 } 1477 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1478 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1479 return (EIO); 1480 } 1481 break; 1482 case KERNELDUMP_ENC_CHACHA20: 1483 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1484 break; 1485 default: 1486 return (EINVAL); 1487 } 1488 1489 return (0); 1490 } 1491 1492 /* Encrypt data and call dumper. */ 1493 static int 1494 dump_encrypted_write(struct dumperinfo *di, void *virtual, off_t offset, 1495 size_t length) 1496 { 1497 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1498 struct kerneldumpcrypto *kdc; 1499 int error; 1500 size_t nbytes; 1501 1502 kdc = di->kdcrypto; 1503 1504 while (length > 0) { 1505 nbytes = MIN(length, sizeof(buf)); 1506 bcopy(virtual, buf, nbytes); 1507 1508 if (dump_encrypt(kdc, buf, nbytes) != 0) 1509 return (EIO); 1510 1511 error = dump_write(di, buf, offset, nbytes); 1512 if (error != 0) 1513 return (error); 1514 1515 offset += nbytes; 1516 virtual = (void *)((uint8_t *)virtual + nbytes); 1517 length -= nbytes; 1518 } 1519 1520 return (0); 1521 } 1522 #endif /* EKCD */ 1523 1524 static int 1525 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1526 { 1527 struct dumperinfo *di; 1528 size_t resid, rlength; 1529 int error; 1530 1531 di = arg; 1532 1533 if (length % di->blocksize != 0) { 1534 /* 1535 * This must be the final write after flushing the compression 1536 * stream. Write as many full blocks as possible and stash the 1537 * residual data in the dumper's block buffer. It will be 1538 * padded and written in dump_finish(). 1539 */ 1540 rlength = rounddown(length, di->blocksize); 1541 if (rlength != 0) { 1542 error = _dump_append(di, base, rlength); 1543 if (error != 0) 1544 return (error); 1545 } 1546 resid = length - rlength; 1547 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1548 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1549 di->kdcomp->kdc_resid = resid; 1550 return (EAGAIN); 1551 } 1552 return (_dump_append(di, base, length)); 1553 } 1554 1555 /* 1556 * Write kernel dump headers at the beginning and end of the dump extent. 1557 * Write the kernel dump encryption key after the leading header if we were 1558 * configured to do so. 1559 */ 1560 static int 1561 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1562 { 1563 #ifdef EKCD 1564 struct kerneldumpcrypto *kdc; 1565 #endif 1566 void *buf; 1567 size_t hdrsz; 1568 uint64_t extent; 1569 uint32_t keysize; 1570 int error; 1571 1572 hdrsz = sizeof(*kdh); 1573 if (hdrsz > di->blocksize) 1574 return (ENOMEM); 1575 1576 #ifdef EKCD 1577 kdc = di->kdcrypto; 1578 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1579 #else 1580 keysize = 0; 1581 #endif 1582 1583 /* 1584 * If the dump device has special handling for headers, let it take care 1585 * of writing them out. 1586 */ 1587 if (di->dumper_hdr != NULL) 1588 return (di->dumper_hdr(di, kdh)); 1589 1590 if (hdrsz == di->blocksize) 1591 buf = kdh; 1592 else { 1593 buf = di->blockbuf; 1594 memset(buf, 0, di->blocksize); 1595 memcpy(buf, kdh, hdrsz); 1596 } 1597 1598 extent = dtoh64(kdh->dumpextent); 1599 #ifdef EKCD 1600 if (kdc != NULL) { 1601 error = dump_write(di, kdc->kdc_dumpkey, 1602 di->mediaoffset + di->mediasize - di->blocksize - extent - 1603 keysize, keysize); 1604 if (error != 0) 1605 return (error); 1606 } 1607 #endif 1608 1609 error = dump_write(di, buf, 1610 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1611 keysize, di->blocksize); 1612 if (error == 0) 1613 error = dump_write(di, buf, di->mediaoffset + di->mediasize - 1614 di->blocksize, di->blocksize); 1615 return (error); 1616 } 1617 1618 /* 1619 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1620 * protect us from metadata and metadata from us. 1621 */ 1622 #define SIZEOF_METADATA (64 * 1024) 1623 1624 /* 1625 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1626 * if requested, and make sure that we have enough space on the dump device. 1627 * 1628 * We set things up so that the dump ends before the last sector of the dump 1629 * device, at which the trailing header is written. 1630 * 1631 * +-----------+------+-----+----------------------------+------+ 1632 * | | lhdr | key | ... kernel dump ... | thdr | 1633 * +-----------+------+-----+----------------------------+------+ 1634 * 1 blk opt <------- dump extent --------> 1 blk 1635 * 1636 * Dumps written using dump_append() start at the beginning of the extent. 1637 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1638 * will not. The true length of the dump is recorded in the leading and trailing 1639 * headers once the dump has been completed. 1640 * 1641 * The dump device may provide a callback, in which case it will initialize 1642 * dumpoff and take care of laying out the headers. 1643 */ 1644 int 1645 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1646 { 1647 #ifdef EKCD 1648 struct kerneldumpcrypto *kdc; 1649 #endif 1650 void *key; 1651 uint64_t dumpextent, span; 1652 uint32_t keysize; 1653 int error; 1654 1655 #ifdef EKCD 1656 /* Send the key before the dump so a partial dump is still usable. */ 1657 kdc = di->kdcrypto; 1658 error = kerneldumpcrypto_init(kdc); 1659 if (error != 0) 1660 return (error); 1661 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1662 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1663 #else 1664 error = 0; 1665 keysize = 0; 1666 key = NULL; 1667 #endif 1668 1669 if (di->dumper_start != NULL) { 1670 error = di->dumper_start(di, key, keysize); 1671 } else { 1672 dumpextent = dtoh64(kdh->dumpextent); 1673 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1674 keysize; 1675 if (di->mediasize < span) { 1676 if (di->kdcomp == NULL) 1677 return (E2BIG); 1678 1679 /* 1680 * We don't yet know how much space the compressed dump 1681 * will occupy, so try to use the whole swap partition 1682 * (minus the first 64KB) in the hope that the 1683 * compressed dump will fit. If that doesn't turn out to 1684 * be enough, the bounds checking in dump_write() 1685 * will catch us and cause the dump to fail. 1686 */ 1687 dumpextent = di->mediasize - span + dumpextent; 1688 kdh->dumpextent = htod64(dumpextent); 1689 } 1690 1691 /* 1692 * The offset at which to begin writing the dump. 1693 */ 1694 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1695 dumpextent; 1696 } 1697 di->origdumpoff = di->dumpoff; 1698 return (error); 1699 } 1700 1701 static int 1702 _dump_append(struct dumperinfo *di, void *virtual, size_t length) 1703 { 1704 int error; 1705 1706 #ifdef EKCD 1707 if (di->kdcrypto != NULL) 1708 error = dump_encrypted_write(di, virtual, di->dumpoff, length); 1709 else 1710 #endif 1711 error = dump_write(di, virtual, di->dumpoff, length); 1712 if (error == 0) 1713 di->dumpoff += length; 1714 return (error); 1715 } 1716 1717 /* 1718 * Write to the dump device starting at dumpoff. When compression is enabled, 1719 * writes to the device will be performed using a callback that gets invoked 1720 * when the compression stream's output buffer is full. 1721 */ 1722 int 1723 dump_append(struct dumperinfo *di, void *virtual, size_t length) 1724 { 1725 void *buf; 1726 1727 if (di->kdcomp != NULL) { 1728 /* Bounce through a buffer to avoid CRC errors. */ 1729 if (length > di->maxiosize) 1730 return (EINVAL); 1731 buf = di->kdcomp->kdc_buf; 1732 memmove(buf, virtual, length); 1733 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1734 } 1735 return (_dump_append(di, virtual, length)); 1736 } 1737 1738 /* 1739 * Write to the dump device at the specified offset. 1740 */ 1741 int 1742 dump_write(struct dumperinfo *di, void *virtual, off_t offset, size_t length) 1743 { 1744 int error; 1745 1746 error = dump_check_bounds(di, offset, length); 1747 if (error != 0) 1748 return (error); 1749 return (di->dumper(di->priv, virtual, offset, length)); 1750 } 1751 1752 /* 1753 * Perform kernel dump finalization: flush the compression stream, if necessary, 1754 * write the leading and trailing kernel dump headers now that we know the true 1755 * length of the dump, and optionally write the encryption key following the 1756 * leading header. 1757 */ 1758 int 1759 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1760 { 1761 int error; 1762 1763 if (di->kdcomp != NULL) { 1764 error = compressor_flush(di->kdcomp->kdc_stream); 1765 if (error == EAGAIN) { 1766 /* We have residual data in di->blockbuf. */ 1767 error = _dump_append(di, di->blockbuf, di->blocksize); 1768 if (error == 0) 1769 /* Compensate for _dump_append()'s adjustment. */ 1770 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1771 di->kdcomp->kdc_resid = 0; 1772 } 1773 if (error != 0) 1774 return (error); 1775 1776 /* 1777 * We now know the size of the compressed dump, so update the 1778 * header accordingly and recompute parity. 1779 */ 1780 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1781 kdh->parity = 0; 1782 kdh->parity = kerneldump_parity(kdh); 1783 1784 compressor_reset(di->kdcomp->kdc_stream); 1785 } 1786 1787 error = dump_write_headers(di, kdh); 1788 if (error != 0) 1789 return (error); 1790 1791 (void)dump_write(di, NULL, 0, 0); 1792 return (0); 1793 } 1794 1795 void 1796 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1797 const char *magic, uint32_t archver, uint64_t dumplen) 1798 { 1799 size_t dstsize; 1800 1801 bzero(kdh, sizeof(*kdh)); 1802 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1803 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1804 kdh->version = htod32(KERNELDUMPVERSION); 1805 kdh->architectureversion = htod32(archver); 1806 kdh->dumplength = htod64(dumplen); 1807 kdh->dumpextent = kdh->dumplength; 1808 kdh->dumptime = htod64(time_second); 1809 #ifdef EKCD 1810 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1811 #else 1812 kdh->dumpkeysize = 0; 1813 #endif 1814 kdh->blocksize = htod32(di->blocksize); 1815 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1816 dstsize = sizeof(kdh->versionstring); 1817 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1818 kdh->versionstring[dstsize - 2] = '\n'; 1819 if (panicstr != NULL) 1820 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1821 if (di->kdcomp != NULL) 1822 kdh->compression = di->kdcomp->kdc_format; 1823 kdh->parity = kerneldump_parity(kdh); 1824 } 1825 1826 #ifdef DDB 1827 DB_SHOW_COMMAND_FLAGS(panic, db_show_panic, DB_CMD_MEMSAFE) 1828 { 1829 1830 if (panicstr == NULL) 1831 db_printf("panicstr not set\n"); 1832 else 1833 db_printf("panic: %s\n", panicstr); 1834 } 1835 #endif 1836