1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/boottrace.h> 54 #include <sys/buf.h> 55 #include <sys/conf.h> 56 #include <sys/compressor.h> 57 #include <sys/cons.h> 58 #include <sys/disk.h> 59 #include <sys/eventhandler.h> 60 #include <sys/filedesc.h> 61 #include <sys/jail.h> 62 #include <sys/kdb.h> 63 #include <sys/kernel.h> 64 #include <sys/kerneldump.h> 65 #include <sys/kthread.h> 66 #include <sys/ktr.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/mount.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/reboot.h> 73 #include <sys/resourcevar.h> 74 #include <sys/rwlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/sched.h> 77 #include <sys/smp.h> 78 #include <sys/sysctl.h> 79 #include <sys/sysproto.h> 80 #include <sys/taskqueue.h> 81 #include <sys/vnode.h> 82 #include <sys/watchdog.h> 83 84 #include <crypto/chacha20/chacha.h> 85 #include <crypto/rijndael/rijndael-api-fst.h> 86 #include <crypto/sha2/sha256.h> 87 88 #include <ddb/ddb.h> 89 90 #include <machine/cpu.h> 91 #include <machine/dump.h> 92 #include <machine/pcb.h> 93 #include <machine/smp.h> 94 95 #include <security/mac/mac_framework.h> 96 97 #include <vm/vm.h> 98 #include <vm/vm_object.h> 99 #include <vm/vm_page.h> 100 #include <vm/vm_pager.h> 101 #include <vm/swap_pager.h> 102 103 #include <sys/signalvar.h> 104 105 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 106 107 #ifndef PANIC_REBOOT_WAIT_TIME 108 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 109 #endif 110 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 111 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 112 &panic_reboot_wait_time, 0, 113 "Seconds to wait before rebooting after a panic"); 114 115 /* 116 * Note that stdarg.h and the ANSI style va_start macro is used for both 117 * ANSI and traditional C compilers. 118 */ 119 #include <machine/stdarg.h> 120 121 #ifdef KDB 122 #ifdef KDB_UNATTENDED 123 int debugger_on_panic = 0; 124 #else 125 int debugger_on_panic = 1; 126 #endif 127 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 128 CTLFLAG_RWTUN | CTLFLAG_SECURE, 129 &debugger_on_panic, 0, "Run debugger on kernel panic"); 130 131 static bool debugger_on_recursive_panic = false; 132 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 133 CTLFLAG_RWTUN | CTLFLAG_SECURE, 134 &debugger_on_recursive_panic, 0, "Run debugger on recursive kernel panic"); 135 136 int debugger_on_trap = 0; 137 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 138 CTLFLAG_RWTUN | CTLFLAG_SECURE, 139 &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); 140 141 #ifdef KDB_TRACE 142 static int trace_on_panic = 1; 143 static bool trace_all_panics = true; 144 #else 145 static int trace_on_panic = 0; 146 static bool trace_all_panics = false; 147 #endif 148 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 149 CTLFLAG_RWTUN | CTLFLAG_SECURE, 150 &trace_on_panic, 0, "Print stack trace on kernel panic"); 151 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 152 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 153 #endif /* KDB */ 154 155 static int sync_on_panic = 0; 156 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 157 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 158 159 static bool poweroff_on_panic = 0; 160 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 161 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 162 163 static bool powercycle_on_panic = 0; 164 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 165 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 166 167 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 168 "Shutdown environment"); 169 170 #ifndef DIAGNOSTIC 171 static int show_busybufs; 172 #else 173 static int show_busybufs = 1; 174 #endif 175 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 176 &show_busybufs, 0, 177 "Show busy buffers during shutdown"); 178 179 int suspend_blocked = 0; 180 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 181 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 182 183 #ifdef EKCD 184 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 185 186 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 187 188 struct kerneldumpcrypto { 189 uint8_t kdc_encryption; 190 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 191 union { 192 struct { 193 keyInstance aes_ki; 194 cipherInstance aes_ci; 195 } u_aes; 196 struct chacha_ctx u_chacha; 197 } u; 198 #define kdc_ki u.u_aes.aes_ki 199 #define kdc_ci u.u_aes.aes_ci 200 #define kdc_chacha u.u_chacha 201 uint32_t kdc_dumpkeysize; 202 struct kerneldumpkey kdc_dumpkey[]; 203 }; 204 #endif 205 206 struct kerneldumpcomp { 207 uint8_t kdc_format; 208 struct compressor *kdc_stream; 209 uint8_t *kdc_buf; 210 size_t kdc_resid; 211 }; 212 213 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 214 uint8_t compression); 215 static void kerneldumpcomp_destroy(struct dumperinfo *di); 216 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 217 218 static int kerneldump_gzlevel = 6; 219 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 220 &kerneldump_gzlevel, 0, 221 "Kernel crash dump compression level"); 222 223 /* 224 * Variable panicstr contains argument to first call to panic; used as flag 225 * to indicate that the kernel has already called panic. 226 */ 227 const char *panicstr; 228 bool __read_frequently panicked; 229 230 int __read_mostly dumping; /* system is dumping */ 231 int rebooting; /* system is rebooting */ 232 /* 233 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 234 * modifications via ioctl. 235 */ 236 static struct mtx dumpconf_list_lk; 237 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 238 239 /* Our selected dumper(s). */ 240 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 241 TAILQ_HEAD_INITIALIZER(dumper_configs); 242 243 /* Context information for dump-debuggers. */ 244 static struct pcb dumppcb; /* Registers. */ 245 lwpid_t dumptid; /* Thread ID. */ 246 247 static struct cdevsw reroot_cdevsw = { 248 .d_version = D_VERSION, 249 .d_name = "reroot", 250 }; 251 252 static void poweroff_wait(void *, int); 253 static void shutdown_halt(void *junk, int howto); 254 static void shutdown_panic(void *junk, int howto); 255 static void shutdown_reset(void *junk, int howto); 256 static int kern_reroot(void); 257 258 /* register various local shutdown events */ 259 static void 260 shutdown_conf(void *unused) 261 { 262 263 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 264 SHUTDOWN_PRI_FIRST); 265 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 266 SHUTDOWN_PRI_LAST + 100); 267 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 268 SHUTDOWN_PRI_LAST + 100); 269 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 270 SHUTDOWN_PRI_LAST + 200); 271 } 272 273 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 274 275 /* 276 * The only reason this exists is to create the /dev/reroot/ directory, 277 * used by reroot code in init(8) as a mountpoint for tmpfs. 278 */ 279 static void 280 reroot_conf(void *unused) 281 { 282 int error; 283 struct cdev *cdev; 284 285 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 286 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 287 if (error != 0) { 288 printf("%s: failed to create device node, error %d", 289 __func__, error); 290 } 291 } 292 293 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 294 295 /* 296 * The system call that results in a reboot. 297 */ 298 /* ARGSUSED */ 299 int 300 sys_reboot(struct thread *td, struct reboot_args *uap) 301 { 302 int error; 303 304 error = 0; 305 #ifdef MAC 306 error = mac_system_check_reboot(td->td_ucred, uap->opt); 307 #endif 308 if (error == 0) 309 error = priv_check(td, PRIV_REBOOT); 310 if (error == 0) { 311 if (uap->opt & RB_REROOT) 312 error = kern_reroot(); 313 else 314 kern_reboot(uap->opt); 315 } 316 return (error); 317 } 318 319 static void 320 shutdown_nice_task_fn(void *arg, int pending __unused) 321 { 322 int howto; 323 324 howto = (uintptr_t)arg; 325 /* Send a signal to init(8) and have it shutdown the world. */ 326 PROC_LOCK(initproc); 327 if ((howto & RB_POWEROFF) != 0) { 328 BOOTTRACE("SIGUSR2 to init(8)"); 329 kern_psignal(initproc, SIGUSR2); 330 } else if ((howto & RB_POWERCYCLE) != 0) { 331 BOOTTRACE("SIGWINCH to init(8)"); 332 kern_psignal(initproc, SIGWINCH); 333 } else if ((howto & RB_HALT) != 0) { 334 BOOTTRACE("SIGUSR1 to init(8)"); 335 kern_psignal(initproc, SIGUSR1); 336 } else { 337 BOOTTRACE("SIGINT to init(8)"); 338 kern_psignal(initproc, SIGINT); 339 } 340 PROC_UNLOCK(initproc); 341 } 342 343 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 344 &shutdown_nice_task_fn, NULL); 345 346 /* 347 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 348 */ 349 void 350 shutdown_nice(int howto) 351 { 352 353 if (initproc != NULL && !SCHEDULER_STOPPED()) { 354 BOOTTRACE("shutdown initiated"); 355 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 356 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 357 } else { 358 /* 359 * No init(8) running, or scheduler would not allow it 360 * to run, so simply reboot. 361 */ 362 kern_reboot(howto | RB_NOSYNC); 363 } 364 } 365 366 static void 367 print_uptime(void) 368 { 369 int f; 370 struct timespec ts; 371 372 getnanouptime(&ts); 373 printf("Uptime: "); 374 f = 0; 375 if (ts.tv_sec >= 86400) { 376 printf("%ldd", (long)ts.tv_sec / 86400); 377 ts.tv_sec %= 86400; 378 f = 1; 379 } 380 if (f || ts.tv_sec >= 3600) { 381 printf("%ldh", (long)ts.tv_sec / 3600); 382 ts.tv_sec %= 3600; 383 f = 1; 384 } 385 if (f || ts.tv_sec >= 60) { 386 printf("%ldm", (long)ts.tv_sec / 60); 387 ts.tv_sec %= 60; 388 f = 1; 389 } 390 printf("%lds\n", (long)ts.tv_sec); 391 } 392 393 /* 394 * Set up a context that can be extracted from the dump. 395 */ 396 void 397 dump_savectx(void) 398 { 399 400 savectx(&dumppcb); 401 dumptid = curthread->td_tid; 402 } 403 404 int 405 doadump(boolean_t textdump) 406 { 407 boolean_t coredump; 408 int error; 409 410 error = 0; 411 if (dumping) 412 return (EBUSY); 413 if (TAILQ_EMPTY(&dumper_configs)) 414 return (ENXIO); 415 416 dump_savectx(); 417 dumping++; 418 419 coredump = TRUE; 420 #ifdef DDB 421 if (textdump && textdump_pending) { 422 coredump = FALSE; 423 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 424 } 425 #endif 426 if (coredump) { 427 struct dumperinfo *di; 428 429 TAILQ_FOREACH(di, &dumper_configs, di_next) { 430 error = dumpsys(di); 431 if (error == 0) 432 break; 433 } 434 } 435 436 dumping--; 437 return (error); 438 } 439 440 /* 441 * Trace the shutdown reason. 442 */ 443 static void 444 reboottrace(int howto) 445 { 446 if ((howto & RB_DUMP) != 0) { 447 if ((howto & RB_HALT) != 0) 448 BOOTTRACE("system panic: halting..."); 449 if ((howto & RB_POWEROFF) != 0) 450 BOOTTRACE("system panic: powering off..."); 451 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 452 BOOTTRACE("system panic: rebooting..."); 453 } else { 454 if ((howto & RB_HALT) != 0) 455 BOOTTRACE("system halting..."); 456 if ((howto & RB_POWEROFF) != 0) 457 BOOTTRACE("system powering off..."); 458 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 459 BOOTTRACE("system rebooting..."); 460 } 461 } 462 463 /* 464 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or 465 * power off. 466 */ 467 void 468 kern_reboot(int howto) 469 { 470 static int once = 0; 471 472 if (initproc != NULL && curproc != initproc) 473 BOOTTRACE("kernel shutdown (dirty) started"); 474 else 475 BOOTTRACE("kernel shutdown (clean) started"); 476 477 /* 478 * Normal paths here don't hold Giant, but we can wind up here 479 * unexpectedly with it held. Drop it now so we don't have to 480 * drop and pick it up elsewhere. The paths it is locking will 481 * never be returned to, and it is preferable to preclude 482 * deadlock than to lock against code that won't ever 483 * continue. 484 */ 485 while (mtx_owned(&Giant)) 486 mtx_unlock(&Giant); 487 488 #if defined(SMP) 489 /* 490 * Bind us to the first CPU so that all shutdown code runs there. Some 491 * systems don't shutdown properly (i.e., ACPI power off) if we 492 * run on another processor. 493 */ 494 if (!SCHEDULER_STOPPED()) { 495 thread_lock(curthread); 496 sched_bind(curthread, CPU_FIRST()); 497 thread_unlock(curthread); 498 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 499 ("%s: not running on cpu 0", __func__)); 500 } 501 #endif 502 /* We're in the process of rebooting. */ 503 rebooting = 1; 504 reboottrace(howto); 505 506 /* We are out of the debugger now. */ 507 kdb_active = 0; 508 509 /* 510 * Do any callouts that should be done BEFORE syncing the filesystems. 511 */ 512 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 513 BOOTTRACE("shutdown pre sync complete"); 514 515 /* 516 * Now sync filesystems 517 */ 518 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 519 once = 1; 520 BOOTTRACE("bufshutdown begin"); 521 bufshutdown(show_busybufs); 522 BOOTTRACE("bufshutdown end"); 523 } 524 525 print_uptime(); 526 527 cngrab(); 528 529 /* 530 * Ok, now do things that assume all filesystem activity has 531 * been completed. 532 */ 533 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 534 BOOTTRACE("shutdown post sync complete"); 535 536 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 537 doadump(TRUE); 538 539 /* Now that we're going to really halt the system... */ 540 BOOTTRACE("shutdown final begin"); 541 542 if (shutdown_trace) 543 boottrace_dump_console(); 544 545 EVENTHANDLER_INVOKE(shutdown_final, howto); 546 547 for(;;) ; /* safety against shutdown_reset not working */ 548 /* NOTREACHED */ 549 } 550 551 /* 552 * The system call that results in changing the rootfs. 553 */ 554 static int 555 kern_reroot(void) 556 { 557 struct vnode *oldrootvnode, *vp; 558 struct mount *mp, *devmp; 559 int error; 560 561 if (curproc != initproc) 562 return (EPERM); 563 564 /* 565 * Mark the filesystem containing currently-running executable 566 * (the temporary copy of init(8)) busy. 567 */ 568 vp = curproc->p_textvp; 569 error = vn_lock(vp, LK_SHARED); 570 if (error != 0) 571 return (error); 572 mp = vp->v_mount; 573 error = vfs_busy(mp, MBF_NOWAIT); 574 if (error != 0) { 575 vfs_ref(mp); 576 VOP_UNLOCK(vp); 577 error = vfs_busy(mp, 0); 578 vn_lock(vp, LK_SHARED | LK_RETRY); 579 vfs_rel(mp); 580 if (error != 0) { 581 VOP_UNLOCK(vp); 582 return (ENOENT); 583 } 584 if (VN_IS_DOOMED(vp)) { 585 VOP_UNLOCK(vp); 586 vfs_unbusy(mp); 587 return (ENOENT); 588 } 589 } 590 VOP_UNLOCK(vp); 591 592 /* 593 * Remove the filesystem containing currently-running executable 594 * from the mount list, to prevent it from being unmounted 595 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 596 * 597 * Also preserve /dev - forcibly unmounting it could cause driver 598 * reinitialization. 599 */ 600 601 vfs_ref(rootdevmp); 602 devmp = rootdevmp; 603 rootdevmp = NULL; 604 605 mtx_lock(&mountlist_mtx); 606 TAILQ_REMOVE(&mountlist, mp, mnt_list); 607 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 608 mtx_unlock(&mountlist_mtx); 609 610 oldrootvnode = rootvnode; 611 612 /* 613 * Unmount everything except for the two filesystems preserved above. 614 */ 615 vfs_unmountall(); 616 617 /* 618 * Add /dev back; vfs_mountroot() will move it into its new place. 619 */ 620 mtx_lock(&mountlist_mtx); 621 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 622 mtx_unlock(&mountlist_mtx); 623 rootdevmp = devmp; 624 vfs_rel(rootdevmp); 625 626 /* 627 * Mount the new rootfs. 628 */ 629 vfs_mountroot(); 630 631 /* 632 * Update all references to the old rootvnode. 633 */ 634 mountcheckdirs(oldrootvnode, rootvnode); 635 636 /* 637 * Add the temporary filesystem back and unbusy it. 638 */ 639 mtx_lock(&mountlist_mtx); 640 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 641 mtx_unlock(&mountlist_mtx); 642 vfs_unbusy(mp); 643 644 return (0); 645 } 646 647 /* 648 * If the shutdown was a clean halt, behave accordingly. 649 */ 650 static void 651 shutdown_halt(void *junk, int howto) 652 { 653 654 if (howto & RB_HALT) { 655 printf("\n"); 656 printf("The operating system has halted.\n"); 657 printf("Please press any key to reboot.\n\n"); 658 659 wdog_kern_pat(WD_TO_NEVER); 660 661 switch (cngetc()) { 662 case -1: /* No console, just die */ 663 cpu_halt(); 664 /* NOTREACHED */ 665 default: 666 break; 667 } 668 } 669 } 670 671 /* 672 * Check to see if the system panicked, pause and then reboot 673 * according to the specified delay. 674 */ 675 static void 676 shutdown_panic(void *junk, int howto) 677 { 678 int loop; 679 680 if (howto & RB_DUMP) { 681 if (panic_reboot_wait_time != 0) { 682 if (panic_reboot_wait_time != -1) { 683 printf("Automatic reboot in %d seconds - " 684 "press a key on the console to abort\n", 685 panic_reboot_wait_time); 686 for (loop = panic_reboot_wait_time * 10; 687 loop > 0; --loop) { 688 DELAY(1000 * 100); /* 1/10th second */ 689 /* Did user type a key? */ 690 if (cncheckc() != -1) 691 break; 692 } 693 if (!loop) 694 return; 695 } 696 } else { /* zero time specified - reboot NOW */ 697 return; 698 } 699 printf("--> Press a key on the console to reboot,\n"); 700 printf("--> or switch off the system now.\n"); 701 cngetc(); 702 } 703 } 704 705 /* 706 * Everything done, now reset 707 */ 708 static void 709 shutdown_reset(void *junk, int howto) 710 { 711 712 printf("Rebooting...\n"); 713 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 714 715 /* 716 * Acquiring smp_ipi_mtx here has a double effect: 717 * - it disables interrupts avoiding CPU0 preemption 718 * by fast handlers (thus deadlocking against other CPUs) 719 * - it avoids deadlocks against smp_rendezvous() or, more 720 * generally, threads busy-waiting, with this spinlock held, 721 * and waiting for responses by threads on other CPUs 722 * (ie. smp_tlb_shootdown()). 723 * 724 * For the !SMP case it just needs to handle the former problem. 725 */ 726 #ifdef SMP 727 mtx_lock_spin(&smp_ipi_mtx); 728 #else 729 spinlock_enter(); 730 #endif 731 732 cpu_reset(); 733 /* NOTREACHED */ /* assuming reset worked */ 734 } 735 736 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 737 static int kassert_warn_only = 0; 738 #ifdef KDB 739 static int kassert_do_kdb = 0; 740 #endif 741 #ifdef KTR 742 static int kassert_do_ktr = 0; 743 #endif 744 static int kassert_do_log = 1; 745 static int kassert_log_pps_limit = 4; 746 static int kassert_log_mute_at = 0; 747 static int kassert_log_panic_at = 0; 748 static int kassert_suppress_in_panic = 0; 749 static int kassert_warnings = 0; 750 751 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 752 "kassert options"); 753 754 #ifdef KASSERT_PANIC_OPTIONAL 755 #define KASSERT_RWTUN CTLFLAG_RWTUN 756 #else 757 #define KASSERT_RWTUN CTLFLAG_RDTUN 758 #endif 759 760 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 761 &kassert_warn_only, 0, 762 "KASSERT triggers a panic (0) or just a warning (1)"); 763 764 #ifdef KDB 765 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 766 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 767 #endif 768 769 #ifdef KTR 770 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 771 &kassert_do_ktr, 0, 772 "KASSERT does a KTR, set this to the KTRMASK you want"); 773 #endif 774 775 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 776 &kassert_do_log, 0, 777 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 778 779 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 780 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 781 782 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 783 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 784 785 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 786 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 787 788 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 789 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 790 791 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 792 &kassert_suppress_in_panic, 0, 793 "KASSERTs will be suppressed while handling a panic"); 794 #undef KASSERT_RWTUN 795 796 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 797 798 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 799 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 800 kassert_sysctl_kassert, "I", 801 "set to trigger a test kassert"); 802 803 static int 804 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 805 { 806 int error, i; 807 808 error = sysctl_wire_old_buffer(req, sizeof(int)); 809 if (error == 0) { 810 i = 0; 811 error = sysctl_handle_int(oidp, &i, 0, req); 812 } 813 if (error != 0 || req->newptr == NULL) 814 return (error); 815 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 816 return (0); 817 } 818 819 #ifdef KASSERT_PANIC_OPTIONAL 820 /* 821 * Called by KASSERT, this decides if we will panic 822 * or if we will log via printf and/or ktr. 823 */ 824 void 825 kassert_panic(const char *fmt, ...) 826 { 827 static char buf[256]; 828 va_list ap; 829 830 va_start(ap, fmt); 831 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 832 va_end(ap); 833 834 /* 835 * If we are suppressing secondary panics, log the warning but do not 836 * re-enter panic/kdb. 837 */ 838 if (panicstr != NULL && kassert_suppress_in_panic) { 839 if (kassert_do_log) { 840 printf("KASSERT failed: %s\n", buf); 841 #ifdef KDB 842 if (trace_all_panics && trace_on_panic) 843 kdb_backtrace(); 844 #endif 845 } 846 return; 847 } 848 849 /* 850 * panic if we're not just warning, or if we've exceeded 851 * kassert_log_panic_at warnings. 852 */ 853 if (!kassert_warn_only || 854 (kassert_log_panic_at > 0 && 855 kassert_warnings >= kassert_log_panic_at)) { 856 va_start(ap, fmt); 857 vpanic(fmt, ap); 858 /* NORETURN */ 859 } 860 #ifdef KTR 861 if (kassert_do_ktr) 862 CTR0(ktr_mask, buf); 863 #endif /* KTR */ 864 /* 865 * log if we've not yet met the mute limit. 866 */ 867 if (kassert_do_log && 868 (kassert_log_mute_at == 0 || 869 kassert_warnings < kassert_log_mute_at)) { 870 static struct timeval lasterr; 871 static int curerr; 872 873 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 874 printf("KASSERT failed: %s\n", buf); 875 kdb_backtrace(); 876 } 877 } 878 #ifdef KDB 879 if (kassert_do_kdb) { 880 kdb_enter(KDB_WHY_KASSERT, buf); 881 } 882 #endif 883 atomic_add_int(&kassert_warnings, 1); 884 } 885 #endif /* KASSERT_PANIC_OPTIONAL */ 886 #endif 887 888 /* 889 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 890 * and then reboots. If we are called twice, then we avoid trying to sync 891 * the disks as this often leads to recursive panics. 892 */ 893 void 894 panic(const char *fmt, ...) 895 { 896 va_list ap; 897 898 va_start(ap, fmt); 899 vpanic(fmt, ap); 900 } 901 902 void 903 vpanic(const char *fmt, va_list ap) 904 { 905 #ifdef SMP 906 cpuset_t other_cpus; 907 #endif 908 struct thread *td = curthread; 909 int bootopt, newpanic; 910 static char buf[256]; 911 912 spinlock_enter(); 913 914 #ifdef SMP 915 /* 916 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 917 * concurrently entering panic. Only the winner will proceed 918 * further. 919 */ 920 if (panicstr == NULL && !kdb_active) { 921 other_cpus = all_cpus; 922 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 923 stop_cpus_hard(other_cpus); 924 } 925 #endif 926 927 /* 928 * Ensure that the scheduler is stopped while panicking, even if panic 929 * has been entered from kdb. 930 */ 931 td->td_stopsched = 1; 932 933 bootopt = RB_AUTOBOOT; 934 newpanic = 0; 935 if (panicstr) 936 bootopt |= RB_NOSYNC; 937 else { 938 bootopt |= RB_DUMP; 939 panicstr = fmt; 940 panicked = true; 941 newpanic = 1; 942 } 943 944 if (newpanic) { 945 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 946 panicstr = buf; 947 cngrab(); 948 printf("panic: %s\n", buf); 949 } else { 950 printf("panic: "); 951 vprintf(fmt, ap); 952 printf("\n"); 953 } 954 #ifdef SMP 955 printf("cpuid = %d\n", PCPU_GET(cpuid)); 956 #endif 957 printf("time = %jd\n", (intmax_t )time_second); 958 #ifdef KDB 959 if ((newpanic || trace_all_panics) && trace_on_panic) 960 kdb_backtrace(); 961 if (debugger_on_panic) 962 kdb_enter(KDB_WHY_PANIC, "panic"); 963 else if (!newpanic && debugger_on_recursive_panic) 964 kdb_enter(KDB_WHY_PANIC, "re-panic"); 965 #endif 966 /*thread_lock(td); */ 967 td->td_flags |= TDF_INPANIC; 968 /* thread_unlock(td); */ 969 if (!sync_on_panic) 970 bootopt |= RB_NOSYNC; 971 if (poweroff_on_panic) 972 bootopt |= RB_POWEROFF; 973 if (powercycle_on_panic) 974 bootopt |= RB_POWERCYCLE; 975 kern_reboot(bootopt); 976 } 977 978 /* 979 * Support for poweroff delay. 980 * 981 * Please note that setting this delay too short might power off your machine 982 * before the write cache on your hard disk has been flushed, leading to 983 * soft-updates inconsistencies. 984 */ 985 #ifndef POWEROFF_DELAY 986 # define POWEROFF_DELAY 5000 987 #endif 988 static int poweroff_delay = POWEROFF_DELAY; 989 990 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 991 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 992 993 static void 994 poweroff_wait(void *junk, int howto) 995 { 996 997 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 998 return; 999 DELAY(poweroff_delay * 1000); 1000 } 1001 1002 /* 1003 * Some system processes (e.g. syncer) need to be stopped at appropriate 1004 * points in their main loops prior to a system shutdown, so that they 1005 * won't interfere with the shutdown process (e.g. by holding a disk buf 1006 * to cause sync to fail). For each of these system processes, register 1007 * shutdown_kproc() as a handler for one of shutdown events. 1008 */ 1009 static int kproc_shutdown_wait = 60; 1010 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 1011 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 1012 1013 void 1014 kproc_shutdown(void *arg, int howto) 1015 { 1016 struct proc *p; 1017 int error; 1018 1019 if (panicstr) 1020 return; 1021 1022 p = (struct proc *)arg; 1023 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 1024 kproc_shutdown_wait, p->p_comm); 1025 error = kproc_suspend(p, kproc_shutdown_wait * hz); 1026 1027 if (error == EWOULDBLOCK) 1028 printf("timed out\n"); 1029 else 1030 printf("done\n"); 1031 } 1032 1033 void 1034 kthread_shutdown(void *arg, int howto) 1035 { 1036 struct thread *td; 1037 int error; 1038 1039 if (panicstr) 1040 return; 1041 1042 td = (struct thread *)arg; 1043 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 1044 kproc_shutdown_wait, td->td_name); 1045 error = kthread_suspend(td, kproc_shutdown_wait * hz); 1046 1047 if (error == EWOULDBLOCK) 1048 printf("timed out\n"); 1049 else 1050 printf("done\n"); 1051 } 1052 1053 static int 1054 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 1055 { 1056 char buf[256]; 1057 struct dumperinfo *di; 1058 struct sbuf sb; 1059 int error; 1060 1061 error = sysctl_wire_old_buffer(req, 0); 1062 if (error != 0) 1063 return (error); 1064 1065 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1066 1067 mtx_lock(&dumpconf_list_lk); 1068 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1069 if (di != TAILQ_FIRST(&dumper_configs)) 1070 sbuf_putc(&sb, ','); 1071 sbuf_cat(&sb, di->di_devname); 1072 } 1073 mtx_unlock(&dumpconf_list_lk); 1074 1075 error = sbuf_finish(&sb); 1076 sbuf_delete(&sb); 1077 return (error); 1078 } 1079 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1080 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1081 dumpdevname_sysctl_handler, "A", 1082 "Device(s) for kernel dumps"); 1083 1084 static int _dump_append(struct dumperinfo *di, void *virtual, size_t length); 1085 1086 #ifdef EKCD 1087 static struct kerneldumpcrypto * 1088 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1089 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1090 { 1091 struct kerneldumpcrypto *kdc; 1092 struct kerneldumpkey *kdk; 1093 uint32_t dumpkeysize; 1094 1095 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1096 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1097 1098 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1099 1100 kdc->kdc_encryption = encryption; 1101 switch (kdc->kdc_encryption) { 1102 case KERNELDUMP_ENC_AES_256_CBC: 1103 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1104 goto failed; 1105 break; 1106 case KERNELDUMP_ENC_CHACHA20: 1107 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1108 break; 1109 default: 1110 goto failed; 1111 } 1112 1113 kdc->kdc_dumpkeysize = dumpkeysize; 1114 kdk = kdc->kdc_dumpkey; 1115 kdk->kdk_encryption = kdc->kdc_encryption; 1116 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1117 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1118 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1119 1120 return (kdc); 1121 failed: 1122 zfree(kdc, M_EKCD); 1123 return (NULL); 1124 } 1125 1126 static int 1127 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1128 { 1129 uint8_t hash[SHA256_DIGEST_LENGTH]; 1130 SHA256_CTX ctx; 1131 struct kerneldumpkey *kdk; 1132 int error; 1133 1134 error = 0; 1135 1136 if (kdc == NULL) 1137 return (0); 1138 1139 /* 1140 * When a user enters ddb it can write a crash dump multiple times. 1141 * Each time it should be encrypted using a different IV. 1142 */ 1143 SHA256_Init(&ctx); 1144 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1145 SHA256_Final(hash, &ctx); 1146 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1147 1148 switch (kdc->kdc_encryption) { 1149 case KERNELDUMP_ENC_AES_256_CBC: 1150 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1151 kdc->kdc_iv) <= 0) { 1152 error = EINVAL; 1153 goto out; 1154 } 1155 break; 1156 case KERNELDUMP_ENC_CHACHA20: 1157 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1158 break; 1159 default: 1160 error = EINVAL; 1161 goto out; 1162 } 1163 1164 kdk = kdc->kdc_dumpkey; 1165 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1166 out: 1167 explicit_bzero(hash, sizeof(hash)); 1168 return (error); 1169 } 1170 1171 static uint32_t 1172 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1173 { 1174 1175 if (kdc == NULL) 1176 return (0); 1177 return (kdc->kdc_dumpkeysize); 1178 } 1179 #endif /* EKCD */ 1180 1181 static struct kerneldumpcomp * 1182 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1183 { 1184 struct kerneldumpcomp *kdcomp; 1185 int format; 1186 1187 switch (compression) { 1188 case KERNELDUMP_COMP_GZIP: 1189 format = COMPRESS_GZIP; 1190 break; 1191 case KERNELDUMP_COMP_ZSTD: 1192 format = COMPRESS_ZSTD; 1193 break; 1194 default: 1195 return (NULL); 1196 } 1197 1198 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1199 kdcomp->kdc_format = compression; 1200 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1201 format, di->maxiosize, kerneldump_gzlevel, di); 1202 if (kdcomp->kdc_stream == NULL) { 1203 free(kdcomp, M_DUMPER); 1204 return (NULL); 1205 } 1206 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1207 return (kdcomp); 1208 } 1209 1210 static void 1211 kerneldumpcomp_destroy(struct dumperinfo *di) 1212 { 1213 struct kerneldumpcomp *kdcomp; 1214 1215 kdcomp = di->kdcomp; 1216 if (kdcomp == NULL) 1217 return; 1218 compressor_fini(kdcomp->kdc_stream); 1219 zfree(kdcomp->kdc_buf, M_DUMPER); 1220 free(kdcomp, M_DUMPER); 1221 } 1222 1223 /* 1224 * Free a dumper. Must not be present on global list. 1225 */ 1226 void 1227 dumper_destroy(struct dumperinfo *di) 1228 { 1229 1230 if (di == NULL) 1231 return; 1232 1233 zfree(di->blockbuf, M_DUMPER); 1234 kerneldumpcomp_destroy(di); 1235 #ifdef EKCD 1236 zfree(di->kdcrypto, M_EKCD); 1237 #endif 1238 zfree(di, M_DUMPER); 1239 } 1240 1241 /* 1242 * Allocate and set up a new dumper from the provided template. 1243 */ 1244 int 1245 dumper_create(const struct dumperinfo *di_template, const char *devname, 1246 const struct diocskerneldump_arg *kda, struct dumperinfo **dip) 1247 { 1248 struct dumperinfo *newdi; 1249 int error = 0; 1250 1251 if (dip == NULL) 1252 return (EINVAL); 1253 1254 /* Allocate a new dumper */ 1255 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, 1256 M_WAITOK | M_ZERO); 1257 memcpy(newdi, di_template, sizeof(*newdi)); 1258 newdi->blockbuf = NULL; 1259 newdi->kdcrypto = NULL; 1260 newdi->kdcomp = NULL; 1261 strcpy(newdi->di_devname, devname); 1262 1263 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1264 #ifdef EKCD 1265 newdi->kdcrypto = kerneldumpcrypto_create(newdi->blocksize, 1266 kda->kda_encryption, kda->kda_key, 1267 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1268 if (newdi->kdcrypto == NULL) { 1269 error = EINVAL; 1270 goto cleanup; 1271 } 1272 #else 1273 error = EOPNOTSUPP; 1274 goto cleanup; 1275 #endif 1276 } 1277 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1278 #ifdef EKCD 1279 /* 1280 * We can't support simultaneous unpadded block cipher 1281 * encryption and compression because there is no guarantee the 1282 * length of the compressed result is exactly a multiple of the 1283 * cipher block size. 1284 */ 1285 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1286 error = EOPNOTSUPP; 1287 goto cleanup; 1288 } 1289 #endif 1290 newdi->kdcomp = kerneldumpcomp_create(newdi, 1291 kda->kda_compression); 1292 if (newdi->kdcomp == NULL) { 1293 error = EINVAL; 1294 goto cleanup; 1295 } 1296 } 1297 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1298 1299 *dip = newdi; 1300 return (0); 1301 cleanup: 1302 dumper_destroy(newdi); 1303 return (error); 1304 } 1305 1306 /* 1307 * Create a new dumper and register it in the global list. 1308 */ 1309 int 1310 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1311 const struct diocskerneldump_arg *kda) 1312 { 1313 struct dumperinfo *newdi, *listdi; 1314 bool inserted; 1315 uint8_t index; 1316 int error; 1317 1318 index = kda->kda_index; 1319 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1320 index != KDA_REMOVE_ALL); 1321 1322 error = priv_check(curthread, PRIV_SETDUMPER); 1323 if (error != 0) 1324 return (error); 1325 1326 error = dumper_create(di_template, devname, kda, &newdi); 1327 if (error != 0) 1328 return (error); 1329 1330 /* Add the new configuration to the queue */ 1331 mtx_lock(&dumpconf_list_lk); 1332 inserted = false; 1333 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1334 if (index == 0) { 1335 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1336 inserted = true; 1337 break; 1338 } 1339 index--; 1340 } 1341 if (!inserted) 1342 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1343 mtx_unlock(&dumpconf_list_lk); 1344 1345 return (0); 1346 } 1347 1348 #ifdef DDB 1349 void 1350 dumper_ddb_insert(struct dumperinfo *newdi) 1351 { 1352 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1353 } 1354 1355 void 1356 dumper_ddb_remove(struct dumperinfo *di) 1357 { 1358 TAILQ_REMOVE(&dumper_configs, di, di_next); 1359 } 1360 #endif 1361 1362 static bool 1363 dumper_config_match(const struct dumperinfo *di, const char *devname, 1364 const struct diocskerneldump_arg *kda) 1365 { 1366 if (kda->kda_index == KDA_REMOVE_ALL) 1367 return (true); 1368 1369 if (strcmp(di->di_devname, devname) != 0) 1370 return (false); 1371 1372 /* 1373 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1374 */ 1375 if (kda->kda_index == KDA_REMOVE_DEV) 1376 return (true); 1377 1378 if (di->kdcomp != NULL) { 1379 if (di->kdcomp->kdc_format != kda->kda_compression) 1380 return (false); 1381 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1382 return (false); 1383 #ifdef EKCD 1384 if (di->kdcrypto != NULL) { 1385 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1386 return (false); 1387 /* 1388 * Do we care to verify keys match to delete? It seems weird 1389 * to expect multiple fallback dump configurations on the same 1390 * device that only differ in crypto key. 1391 */ 1392 } else 1393 #endif 1394 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1395 return (false); 1396 1397 return (true); 1398 } 1399 1400 /* 1401 * Remove and free the requested dumper(s) from the global list. 1402 */ 1403 int 1404 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1405 { 1406 struct dumperinfo *di, *sdi; 1407 bool found; 1408 int error; 1409 1410 error = priv_check(curthread, PRIV_SETDUMPER); 1411 if (error != 0) 1412 return (error); 1413 1414 /* 1415 * Try to find a matching configuration, and kill it. 1416 * 1417 * NULL 'kda' indicates remove any configuration matching 'devname', 1418 * which may remove multiple configurations in atypical configurations. 1419 */ 1420 found = false; 1421 mtx_lock(&dumpconf_list_lk); 1422 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1423 if (dumper_config_match(di, devname, kda)) { 1424 found = true; 1425 TAILQ_REMOVE(&dumper_configs, di, di_next); 1426 dumper_destroy(di); 1427 } 1428 } 1429 mtx_unlock(&dumpconf_list_lk); 1430 1431 /* Only produce ENOENT if a more targeted match didn't match. */ 1432 if (!found && kda->kda_index == KDA_REMOVE) 1433 return (ENOENT); 1434 return (0); 1435 } 1436 1437 static int 1438 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1439 { 1440 1441 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1442 offset - di->mediaoffset + length > di->mediasize)) { 1443 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1444 printf( 1445 "Compressed dump failed to fit in device boundaries.\n"); 1446 return (E2BIG); 1447 } 1448 1449 printf("Attempt to write outside dump device boundaries.\n" 1450 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1451 (intmax_t)offset, (intmax_t)di->mediaoffset, 1452 (uintmax_t)length, (intmax_t)di->mediasize); 1453 return (ENOSPC); 1454 } 1455 if (length % di->blocksize != 0) { 1456 printf("Attempt to write partial block of length %ju.\n", 1457 (uintmax_t)length); 1458 return (EINVAL); 1459 } 1460 if (offset % di->blocksize != 0) { 1461 printf("Attempt to write at unaligned offset %jd.\n", 1462 (intmax_t)offset); 1463 return (EINVAL); 1464 } 1465 1466 return (0); 1467 } 1468 1469 #ifdef EKCD 1470 static int 1471 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1472 { 1473 1474 switch (kdc->kdc_encryption) { 1475 case KERNELDUMP_ENC_AES_256_CBC: 1476 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1477 8 * size, buf) <= 0) { 1478 return (EIO); 1479 } 1480 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1481 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1482 return (EIO); 1483 } 1484 break; 1485 case KERNELDUMP_ENC_CHACHA20: 1486 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1487 break; 1488 default: 1489 return (EINVAL); 1490 } 1491 1492 return (0); 1493 } 1494 1495 /* Encrypt data and call dumper. */ 1496 static int 1497 dump_encrypted_write(struct dumperinfo *di, void *virtual, off_t offset, 1498 size_t length) 1499 { 1500 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1501 struct kerneldumpcrypto *kdc; 1502 int error; 1503 size_t nbytes; 1504 1505 kdc = di->kdcrypto; 1506 1507 while (length > 0) { 1508 nbytes = MIN(length, sizeof(buf)); 1509 bcopy(virtual, buf, nbytes); 1510 1511 if (dump_encrypt(kdc, buf, nbytes) != 0) 1512 return (EIO); 1513 1514 error = dump_write(di, buf, offset, nbytes); 1515 if (error != 0) 1516 return (error); 1517 1518 offset += nbytes; 1519 virtual = (void *)((uint8_t *)virtual + nbytes); 1520 length -= nbytes; 1521 } 1522 1523 return (0); 1524 } 1525 #endif /* EKCD */ 1526 1527 static int 1528 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1529 { 1530 struct dumperinfo *di; 1531 size_t resid, rlength; 1532 int error; 1533 1534 di = arg; 1535 1536 if (length % di->blocksize != 0) { 1537 /* 1538 * This must be the final write after flushing the compression 1539 * stream. Write as many full blocks as possible and stash the 1540 * residual data in the dumper's block buffer. It will be 1541 * padded and written in dump_finish(). 1542 */ 1543 rlength = rounddown(length, di->blocksize); 1544 if (rlength != 0) { 1545 error = _dump_append(di, base, rlength); 1546 if (error != 0) 1547 return (error); 1548 } 1549 resid = length - rlength; 1550 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1551 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1552 di->kdcomp->kdc_resid = resid; 1553 return (EAGAIN); 1554 } 1555 return (_dump_append(di, base, length)); 1556 } 1557 1558 /* 1559 * Write kernel dump headers at the beginning and end of the dump extent. 1560 * Write the kernel dump encryption key after the leading header if we were 1561 * configured to do so. 1562 */ 1563 static int 1564 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1565 { 1566 #ifdef EKCD 1567 struct kerneldumpcrypto *kdc; 1568 #endif 1569 void *buf; 1570 size_t hdrsz; 1571 uint64_t extent; 1572 uint32_t keysize; 1573 int error; 1574 1575 hdrsz = sizeof(*kdh); 1576 if (hdrsz > di->blocksize) 1577 return (ENOMEM); 1578 1579 #ifdef EKCD 1580 kdc = di->kdcrypto; 1581 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1582 #else 1583 keysize = 0; 1584 #endif 1585 1586 /* 1587 * If the dump device has special handling for headers, let it take care 1588 * of writing them out. 1589 */ 1590 if (di->dumper_hdr != NULL) 1591 return (di->dumper_hdr(di, kdh)); 1592 1593 if (hdrsz == di->blocksize) 1594 buf = kdh; 1595 else { 1596 buf = di->blockbuf; 1597 memset(buf, 0, di->blocksize); 1598 memcpy(buf, kdh, hdrsz); 1599 } 1600 1601 extent = dtoh64(kdh->dumpextent); 1602 #ifdef EKCD 1603 if (kdc != NULL) { 1604 error = dump_write(di, kdc->kdc_dumpkey, 1605 di->mediaoffset + di->mediasize - di->blocksize - extent - 1606 keysize, keysize); 1607 if (error != 0) 1608 return (error); 1609 } 1610 #endif 1611 1612 error = dump_write(di, buf, 1613 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1614 keysize, di->blocksize); 1615 if (error == 0) 1616 error = dump_write(di, buf, di->mediaoffset + di->mediasize - 1617 di->blocksize, di->blocksize); 1618 return (error); 1619 } 1620 1621 /* 1622 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1623 * protect us from metadata and metadata from us. 1624 */ 1625 #define SIZEOF_METADATA (64 * 1024) 1626 1627 /* 1628 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1629 * if requested, and make sure that we have enough space on the dump device. 1630 * 1631 * We set things up so that the dump ends before the last sector of the dump 1632 * device, at which the trailing header is written. 1633 * 1634 * +-----------+------+-----+----------------------------+------+ 1635 * | | lhdr | key | ... kernel dump ... | thdr | 1636 * +-----------+------+-----+----------------------------+------+ 1637 * 1 blk opt <------- dump extent --------> 1 blk 1638 * 1639 * Dumps written using dump_append() start at the beginning of the extent. 1640 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1641 * will not. The true length of the dump is recorded in the leading and trailing 1642 * headers once the dump has been completed. 1643 * 1644 * The dump device may provide a callback, in which case it will initialize 1645 * dumpoff and take care of laying out the headers. 1646 */ 1647 int 1648 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1649 { 1650 #ifdef EKCD 1651 struct kerneldumpcrypto *kdc; 1652 #endif 1653 void *key; 1654 uint64_t dumpextent, span; 1655 uint32_t keysize; 1656 int error; 1657 1658 #ifdef EKCD 1659 /* Send the key before the dump so a partial dump is still usable. */ 1660 kdc = di->kdcrypto; 1661 error = kerneldumpcrypto_init(kdc); 1662 if (error != 0) 1663 return (error); 1664 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1665 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1666 #else 1667 error = 0; 1668 keysize = 0; 1669 key = NULL; 1670 #endif 1671 1672 if (di->dumper_start != NULL) { 1673 error = di->dumper_start(di, key, keysize); 1674 } else { 1675 dumpextent = dtoh64(kdh->dumpextent); 1676 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1677 keysize; 1678 if (di->mediasize < span) { 1679 if (di->kdcomp == NULL) 1680 return (E2BIG); 1681 1682 /* 1683 * We don't yet know how much space the compressed dump 1684 * will occupy, so try to use the whole swap partition 1685 * (minus the first 64KB) in the hope that the 1686 * compressed dump will fit. If that doesn't turn out to 1687 * be enough, the bounds checking in dump_write() 1688 * will catch us and cause the dump to fail. 1689 */ 1690 dumpextent = di->mediasize - span + dumpextent; 1691 kdh->dumpextent = htod64(dumpextent); 1692 } 1693 1694 /* 1695 * The offset at which to begin writing the dump. 1696 */ 1697 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1698 dumpextent; 1699 } 1700 di->origdumpoff = di->dumpoff; 1701 return (error); 1702 } 1703 1704 static int 1705 _dump_append(struct dumperinfo *di, void *virtual, size_t length) 1706 { 1707 int error; 1708 1709 #ifdef EKCD 1710 if (di->kdcrypto != NULL) 1711 error = dump_encrypted_write(di, virtual, di->dumpoff, length); 1712 else 1713 #endif 1714 error = dump_write(di, virtual, di->dumpoff, length); 1715 if (error == 0) 1716 di->dumpoff += length; 1717 return (error); 1718 } 1719 1720 /* 1721 * Write to the dump device starting at dumpoff. When compression is enabled, 1722 * writes to the device will be performed using a callback that gets invoked 1723 * when the compression stream's output buffer is full. 1724 */ 1725 int 1726 dump_append(struct dumperinfo *di, void *virtual, size_t length) 1727 { 1728 void *buf; 1729 1730 if (di->kdcomp != NULL) { 1731 /* Bounce through a buffer to avoid CRC errors. */ 1732 if (length > di->maxiosize) 1733 return (EINVAL); 1734 buf = di->kdcomp->kdc_buf; 1735 memmove(buf, virtual, length); 1736 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1737 } 1738 return (_dump_append(di, virtual, length)); 1739 } 1740 1741 /* 1742 * Write to the dump device at the specified offset. 1743 */ 1744 int 1745 dump_write(struct dumperinfo *di, void *virtual, off_t offset, size_t length) 1746 { 1747 int error; 1748 1749 error = dump_check_bounds(di, offset, length); 1750 if (error != 0) 1751 return (error); 1752 return (di->dumper(di->priv, virtual, offset, length)); 1753 } 1754 1755 /* 1756 * Perform kernel dump finalization: flush the compression stream, if necessary, 1757 * write the leading and trailing kernel dump headers now that we know the true 1758 * length of the dump, and optionally write the encryption key following the 1759 * leading header. 1760 */ 1761 int 1762 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1763 { 1764 int error; 1765 1766 if (di->kdcomp != NULL) { 1767 error = compressor_flush(di->kdcomp->kdc_stream); 1768 if (error == EAGAIN) { 1769 /* We have residual data in di->blockbuf. */ 1770 error = _dump_append(di, di->blockbuf, di->blocksize); 1771 if (error == 0) 1772 /* Compensate for _dump_append()'s adjustment. */ 1773 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1774 di->kdcomp->kdc_resid = 0; 1775 } 1776 if (error != 0) 1777 return (error); 1778 1779 /* 1780 * We now know the size of the compressed dump, so update the 1781 * header accordingly and recompute parity. 1782 */ 1783 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1784 kdh->parity = 0; 1785 kdh->parity = kerneldump_parity(kdh); 1786 1787 compressor_reset(di->kdcomp->kdc_stream); 1788 } 1789 1790 error = dump_write_headers(di, kdh); 1791 if (error != 0) 1792 return (error); 1793 1794 (void)dump_write(di, NULL, 0, 0); 1795 return (0); 1796 } 1797 1798 void 1799 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1800 const char *magic, uint32_t archver, uint64_t dumplen) 1801 { 1802 size_t dstsize; 1803 1804 bzero(kdh, sizeof(*kdh)); 1805 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1806 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1807 kdh->version = htod32(KERNELDUMPVERSION); 1808 kdh->architectureversion = htod32(archver); 1809 kdh->dumplength = htod64(dumplen); 1810 kdh->dumpextent = kdh->dumplength; 1811 kdh->dumptime = htod64(time_second); 1812 #ifdef EKCD 1813 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1814 #else 1815 kdh->dumpkeysize = 0; 1816 #endif 1817 kdh->blocksize = htod32(di->blocksize); 1818 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1819 dstsize = sizeof(kdh->versionstring); 1820 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1821 kdh->versionstring[dstsize - 2] = '\n'; 1822 if (panicstr != NULL) 1823 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1824 if (di->kdcomp != NULL) 1825 kdh->compression = di->kdcomp->kdc_format; 1826 kdh->parity = kerneldump_parity(kdh); 1827 } 1828 1829 #ifdef DDB 1830 DB_SHOW_COMMAND(panic, db_show_panic) 1831 { 1832 1833 if (panicstr == NULL) 1834 db_printf("panicstr not set\n"); 1835 else 1836 db_printf("panic: %s\n", panicstr); 1837 } 1838 #endif 1839