1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_ddb.h" 39 #include "opt_ekcd.h" 40 #include "opt_kdb.h" 41 #include "opt_panic.h" 42 #include "opt_printf.h" 43 #include "opt_sched.h" 44 #include "opt_watchdog.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/bio.h> 49 #include <sys/boottrace.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/compressor.h> 53 #include <sys/cons.h> 54 #include <sys/disk.h> 55 #include <sys/eventhandler.h> 56 #include <sys/filedesc.h> 57 #include <sys/jail.h> 58 #include <sys/kdb.h> 59 #include <sys/kernel.h> 60 #include <sys/kerneldump.h> 61 #include <sys/kthread.h> 62 #include <sys/ktr.h> 63 #include <sys/malloc.h> 64 #include <sys/mbuf.h> 65 #include <sys/mount.h> 66 #include <sys/priv.h> 67 #include <sys/proc.h> 68 #include <sys/reboot.h> 69 #include <sys/resourcevar.h> 70 #include <sys/rwlock.h> 71 #include <sys/sbuf.h> 72 #include <sys/sched.h> 73 #include <sys/smp.h> 74 #include <sys/stdarg.h> 75 #include <sys/sysctl.h> 76 #include <sys/sysproto.h> 77 #include <sys/taskqueue.h> 78 #include <sys/vnode.h> 79 #include <sys/watchdog.h> 80 81 #include <crypto/chacha20/chacha.h> 82 #include <crypto/rijndael/rijndael-api-fst.h> 83 #include <crypto/sha2/sha256.h> 84 85 #include <ddb/ddb.h> 86 87 #include <machine/cpu.h> 88 #include <machine/dump.h> 89 #include <machine/pcb.h> 90 #include <machine/smp.h> 91 92 #include <security/mac/mac_framework.h> 93 94 #include <vm/vm.h> 95 #include <vm/vm_object.h> 96 #include <vm/vm_page.h> 97 #include <vm/vm_pager.h> 98 #include <vm/swap_pager.h> 99 100 #include <sys/signalvar.h> 101 102 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 103 104 #ifndef PANIC_REBOOT_WAIT_TIME 105 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 106 #endif 107 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 108 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 109 &panic_reboot_wait_time, 0, 110 "Seconds to wait before rebooting after a panic"); 111 static int reboot_wait_time = 0; 112 SYSCTL_INT(_kern, OID_AUTO, reboot_wait_time, CTLFLAG_RWTUN, 113 &reboot_wait_time, 0, 114 "Seconds to wait before rebooting"); 115 116 #ifdef KDB 117 #ifdef KDB_UNATTENDED 118 int debugger_on_panic = 0; 119 #else 120 int debugger_on_panic = 1; 121 #endif 122 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 123 CTLFLAG_RWTUN, &debugger_on_panic, 0, 124 "Run debugger on kernel panic"); 125 126 static bool debugger_on_recursive_panic = false; 127 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 128 CTLFLAG_RWTUN, &debugger_on_recursive_panic, 0, 129 "Run debugger on recursive kernel panic"); 130 131 int debugger_on_trap = 0; 132 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 133 CTLFLAG_RWTUN, &debugger_on_trap, 0, 134 "Run debugger on kernel trap before panic"); 135 136 #ifdef KDB_TRACE 137 static int trace_on_panic = 1; 138 static bool trace_all_panics = true; 139 #else 140 static int trace_on_panic = 0; 141 static bool trace_all_panics = false; 142 #endif 143 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 144 CTLFLAG_RWTUN | CTLFLAG_SECURE, 145 &trace_on_panic, 0, "Print stack trace on kernel panic"); 146 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 147 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 148 #endif /* KDB */ 149 150 static int sync_on_panic = 0; 151 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 152 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 153 154 static bool poweroff_on_panic = 0; 155 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 156 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 157 158 static bool powercycle_on_panic = 0; 159 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 160 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 161 162 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 163 "Shutdown environment"); 164 165 #ifndef DIAGNOSTIC 166 static int show_busybufs; 167 #else 168 static int show_busybufs = 1; 169 #endif 170 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 171 &show_busybufs, 0, 172 "Show busy buffers during shutdown"); 173 174 int suspend_blocked = 0; 175 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 176 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 177 178 #ifdef EKCD 179 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 180 181 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 182 183 struct kerneldumpcrypto { 184 uint8_t kdc_encryption; 185 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 186 union { 187 struct { 188 keyInstance aes_ki; 189 cipherInstance aes_ci; 190 } u_aes; 191 struct chacha_ctx u_chacha; 192 } u; 193 #define kdc_ki u.u_aes.aes_ki 194 #define kdc_ci u.u_aes.aes_ci 195 #define kdc_chacha u.u_chacha 196 uint32_t kdc_dumpkeysize; 197 struct kerneldumpkey kdc_dumpkey[]; 198 }; 199 #endif 200 201 struct kerneldumpcomp { 202 uint8_t kdc_format; 203 struct compressor *kdc_stream; 204 uint8_t *kdc_buf; 205 size_t kdc_resid; 206 }; 207 208 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 209 uint8_t compression); 210 static void kerneldumpcomp_destroy(struct dumperinfo *di); 211 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 212 213 static int kerneldump_gzlevel = 6; 214 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 215 &kerneldump_gzlevel, 0, 216 "Kernel crash dump compression level"); 217 218 /* 219 * Variable panicstr contains argument to first call to panic; used as flag 220 * to indicate that the kernel has already called panic. 221 */ 222 const char *panicstr __read_mostly; 223 bool scheduler_stopped __read_frequently; 224 225 int dumping __read_mostly; /* system is dumping */ 226 int rebooting __read_mostly; /* system is rebooting */ 227 bool dumped_core __read_mostly; /* system successfully dumped core */ 228 /* 229 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 230 * modifications via ioctl. 231 */ 232 static struct mtx dumpconf_list_lk; 233 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 234 235 /* Our selected dumper(s). */ 236 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 237 TAILQ_HEAD_INITIALIZER(dumper_configs); 238 239 /* Context information for dump-debuggers, saved by the dump_savectx() macro. */ 240 struct pcb dumppcb; /* Registers. */ 241 lwpid_t dumptid; /* Thread ID. */ 242 243 static struct cdevsw reroot_cdevsw = { 244 .d_version = D_VERSION, 245 .d_name = "reroot", 246 }; 247 248 static void poweroff_wait(void *, int); 249 static void shutdown_halt(void *junk, int howto); 250 static void shutdown_panic(void *junk, int howto); 251 static void shutdown_reset(void *junk, int howto); 252 static int kern_reroot(void); 253 254 /* register various local shutdown events */ 255 static void 256 shutdown_conf(void *unused) 257 { 258 259 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 260 SHUTDOWN_PRI_FIRST); 261 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 262 SHUTDOWN_PRI_LAST + 100); 263 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 264 SHUTDOWN_PRI_LAST + 200); 265 } 266 267 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 268 269 /* 270 * The only reason this exists is to create the /dev/reroot/ directory, 271 * used by reroot code in init(8) as a mountpoint for tmpfs. 272 */ 273 static void 274 reroot_conf(void *unused) 275 { 276 int error; 277 struct cdev *cdev; 278 279 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 280 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 281 if (error != 0) { 282 printf("%s: failed to create device node, error %d", 283 __func__, error); 284 } 285 } 286 287 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 288 289 /* 290 * The system call that results in a reboot. 291 */ 292 /* ARGSUSED */ 293 int 294 sys_reboot(struct thread *td, struct reboot_args *uap) 295 { 296 int error; 297 298 error = 0; 299 #ifdef MAC 300 error = mac_system_check_reboot(td->td_ucred, uap->opt); 301 #endif 302 if (error == 0) 303 error = priv_check(td, PRIV_REBOOT); 304 if (error == 0) { 305 if (uap->opt & RB_REROOT) 306 error = kern_reroot(); 307 else 308 kern_reboot(uap->opt); 309 } 310 return (error); 311 } 312 313 static void 314 shutdown_nice_task_fn(void *arg, int pending __unused) 315 { 316 int howto; 317 318 howto = (uintptr_t)arg; 319 /* Send a signal to init(8) and have it shutdown the world. */ 320 PROC_LOCK(initproc); 321 if ((howto & RB_POWEROFF) != 0) { 322 BOOTTRACE("SIGUSR2 to init(8)"); 323 kern_psignal(initproc, SIGUSR2); 324 } else if ((howto & RB_POWERCYCLE) != 0) { 325 BOOTTRACE("SIGWINCH to init(8)"); 326 kern_psignal(initproc, SIGWINCH); 327 } else if ((howto & RB_HALT) != 0) { 328 BOOTTRACE("SIGUSR1 to init(8)"); 329 kern_psignal(initproc, SIGUSR1); 330 } else { 331 BOOTTRACE("SIGINT to init(8)"); 332 kern_psignal(initproc, SIGINT); 333 } 334 PROC_UNLOCK(initproc); 335 } 336 337 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 338 &shutdown_nice_task_fn, NULL); 339 340 /* 341 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 342 */ 343 void 344 shutdown_nice(int howto) 345 { 346 347 if (initproc != NULL && !SCHEDULER_STOPPED()) { 348 BOOTTRACE("shutdown initiated"); 349 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 350 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 351 } else { 352 /* 353 * No init(8) running, or scheduler would not allow it 354 * to run, so simply reboot. 355 */ 356 kern_reboot(howto | RB_NOSYNC); 357 } 358 } 359 360 static void 361 print_uptime(void) 362 { 363 int f; 364 struct timespec ts; 365 366 getnanouptime(&ts); 367 printf("Uptime: "); 368 f = 0; 369 if (ts.tv_sec >= 86400) { 370 printf("%ldd", (long)ts.tv_sec / 86400); 371 ts.tv_sec %= 86400; 372 f = 1; 373 } 374 if (f || ts.tv_sec >= 3600) { 375 printf("%ldh", (long)ts.tv_sec / 3600); 376 ts.tv_sec %= 3600; 377 f = 1; 378 } 379 if (f || ts.tv_sec >= 60) { 380 printf("%ldm", (long)ts.tv_sec / 60); 381 ts.tv_sec %= 60; 382 f = 1; 383 } 384 printf("%lds\n", (long)ts.tv_sec); 385 } 386 387 int 388 doadump(boolean_t textdump) 389 { 390 boolean_t coredump; 391 int error; 392 393 error = 0; 394 if (dumping) 395 return (EBUSY); 396 if (TAILQ_EMPTY(&dumper_configs)) 397 return (ENXIO); 398 399 dump_savectx(); 400 dumping++; 401 402 coredump = TRUE; 403 #ifdef DDB 404 if (textdump && textdump_pending) { 405 coredump = FALSE; 406 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 407 } 408 #endif 409 if (coredump) { 410 struct dumperinfo *di; 411 412 TAILQ_FOREACH(di, &dumper_configs, di_next) { 413 error = dumpsys(di); 414 if (error == 0) { 415 dumped_core = true; 416 break; 417 } 418 } 419 } 420 421 dumping--; 422 return (error); 423 } 424 425 /* 426 * Trace the shutdown reason. 427 */ 428 static void 429 reboottrace(int howto) 430 { 431 if ((howto & RB_DUMP) != 0) { 432 if ((howto & RB_HALT) != 0) 433 BOOTTRACE("system panic: halting..."); 434 if ((howto & RB_POWEROFF) != 0) 435 BOOTTRACE("system panic: powering off..."); 436 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 437 BOOTTRACE("system panic: rebooting..."); 438 } else { 439 if ((howto & RB_HALT) != 0) 440 BOOTTRACE("system halting..."); 441 if ((howto & RB_POWEROFF) != 0) 442 BOOTTRACE("system powering off..."); 443 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 444 BOOTTRACE("system rebooting..."); 445 } 446 } 447 448 /* 449 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or 450 * power off. 451 */ 452 void 453 kern_reboot(int howto) 454 { 455 static int once = 0; 456 457 if (initproc != NULL && curproc != initproc) 458 BOOTTRACE("kernel shutdown (dirty) started"); 459 else 460 BOOTTRACE("kernel shutdown (clean) started"); 461 462 /* 463 * Normal paths here don't hold Giant, but we can wind up here 464 * unexpectedly with it held. Drop it now so we don't have to 465 * drop and pick it up elsewhere. The paths it is locking will 466 * never be returned to, and it is preferable to preclude 467 * deadlock than to lock against code that won't ever 468 * continue. 469 */ 470 while (!SCHEDULER_STOPPED() && mtx_owned(&Giant)) 471 mtx_unlock(&Giant); 472 473 #if defined(SMP) 474 /* 475 * Bind us to the first CPU so that all shutdown code runs there. Some 476 * systems don't shutdown properly (i.e., ACPI power off) if we 477 * run on another processor. 478 */ 479 if (!SCHEDULER_STOPPED()) { 480 thread_lock(curthread); 481 sched_bind(curthread, CPU_FIRST()); 482 thread_unlock(curthread); 483 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 484 ("%s: not running on cpu 0", __func__)); 485 } 486 #endif 487 /* We're in the process of rebooting. */ 488 rebooting = 1; 489 reboottrace(howto); 490 491 /* 492 * Do any callouts that should be done BEFORE syncing the filesystems. 493 */ 494 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 495 BOOTTRACE("shutdown pre sync complete"); 496 497 /* 498 * Now sync filesystems 499 */ 500 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 501 once = 1; 502 BOOTTRACE("bufshutdown begin"); 503 bufshutdown(show_busybufs); 504 BOOTTRACE("bufshutdown end"); 505 } 506 507 print_uptime(); 508 509 cngrab(); 510 511 /* 512 * Ok, now do things that assume all filesystem activity has 513 * been completed. 514 */ 515 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 516 BOOTTRACE("shutdown post sync complete"); 517 518 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 519 doadump(TRUE); 520 521 /* Now that we're going to really halt the system... */ 522 BOOTTRACE("shutdown final begin"); 523 524 if (shutdown_trace) 525 boottrace_dump_console(); 526 527 EVENTHANDLER_INVOKE(shutdown_final, howto); 528 529 /* 530 * Call this directly so that reset is attempted even if shutdown 531 * handlers are not yet registered. 532 */ 533 shutdown_reset(NULL, howto); 534 535 for(;;) ; /* safety against shutdown_reset not working */ 536 /* NOTREACHED */ 537 } 538 539 /* 540 * The system call that results in changing the rootfs. 541 */ 542 static int 543 kern_reroot(void) 544 { 545 struct vnode *oldrootvnode, *vp; 546 struct mount *mp, *devmp; 547 int error; 548 549 if (curproc != initproc) 550 return (EPERM); 551 552 /* 553 * Mark the filesystem containing currently-running executable 554 * (the temporary copy of init(8)) busy. 555 */ 556 vp = curproc->p_textvp; 557 error = vn_lock(vp, LK_SHARED); 558 if (error != 0) 559 return (error); 560 mp = vp->v_mount; 561 error = vfs_busy(mp, MBF_NOWAIT); 562 if (error != 0) { 563 vfs_ref(mp); 564 VOP_UNLOCK(vp); 565 error = vfs_busy(mp, 0); 566 vn_lock(vp, LK_SHARED | LK_RETRY); 567 vfs_rel(mp); 568 if (error != 0) { 569 VOP_UNLOCK(vp); 570 return (ENOENT); 571 } 572 if (VN_IS_DOOMED(vp)) { 573 VOP_UNLOCK(vp); 574 vfs_unbusy(mp); 575 return (ENOENT); 576 } 577 } 578 VOP_UNLOCK(vp); 579 580 /* 581 * Remove the filesystem containing currently-running executable 582 * from the mount list, to prevent it from being unmounted 583 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 584 * 585 * Also preserve /dev - forcibly unmounting it could cause driver 586 * reinitialization. 587 */ 588 589 vfs_ref(rootdevmp); 590 devmp = rootdevmp; 591 rootdevmp = NULL; 592 593 mtx_lock(&mountlist_mtx); 594 TAILQ_REMOVE(&mountlist, mp, mnt_list); 595 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 596 mtx_unlock(&mountlist_mtx); 597 598 oldrootvnode = rootvnode; 599 600 /* 601 * Unmount everything except for the two filesystems preserved above. 602 */ 603 vfs_unmountall(); 604 605 /* 606 * Add /dev back; vfs_mountroot() will move it into its new place. 607 */ 608 mtx_lock(&mountlist_mtx); 609 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 610 mtx_unlock(&mountlist_mtx); 611 rootdevmp = devmp; 612 vfs_rel(rootdevmp); 613 614 /* 615 * Mount the new rootfs. 616 */ 617 vfs_mountroot(); 618 619 /* 620 * Update all references to the old rootvnode. 621 */ 622 mountcheckdirs(oldrootvnode, rootvnode); 623 624 /* 625 * Add the temporary filesystem back and unbusy it. 626 */ 627 mtx_lock(&mountlist_mtx); 628 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 629 mtx_unlock(&mountlist_mtx); 630 vfs_unbusy(mp); 631 632 return (0); 633 } 634 635 /* 636 * If the shutdown was a clean halt, behave accordingly. 637 */ 638 static void 639 shutdown_halt(void *junk, int howto) 640 { 641 642 if (howto & RB_HALT) { 643 printf("\n"); 644 printf("The operating system has halted.\n"); 645 printf("Please press any key to reboot.\n\n"); 646 647 wdog_kern_pat(WD_TO_NEVER); 648 649 switch (cngetc()) { 650 case -1: /* No console, just die */ 651 cpu_halt(); 652 /* NOTREACHED */ 653 default: 654 break; 655 } 656 } 657 } 658 659 /* 660 * Check to see if the system panicked, pause and then reboot 661 * according to the specified delay. 662 */ 663 static void 664 shutdown_panic(void *junk, int howto) 665 { 666 int loop; 667 668 if (howto & RB_DUMP) { 669 if (panic_reboot_wait_time != 0) { 670 if (panic_reboot_wait_time != -1) { 671 printf("Automatic reboot in %d seconds - " 672 "press a key on the console to abort\n", 673 panic_reboot_wait_time); 674 for (loop = panic_reboot_wait_time * 10; 675 loop > 0; --loop) { 676 DELAY(1000 * 100); /* 1/10th second */ 677 /* Did user type a key? */ 678 if (cncheckc() != -1) 679 break; 680 } 681 if (!loop) 682 return; 683 } 684 } else { /* zero time specified - reboot NOW */ 685 return; 686 } 687 printf("--> Press a key on the console to reboot,\n"); 688 printf("--> or switch off the system now.\n"); 689 cngetc(); 690 } 691 } 692 693 /* 694 * Everything done, now reset 695 */ 696 static void 697 shutdown_reset(void *junk, int howto) 698 { 699 700 printf("Rebooting...\n"); 701 DELAY(reboot_wait_time * 1000000); 702 703 /* 704 * Acquiring smp_ipi_mtx here has a double effect: 705 * - it disables interrupts avoiding CPU0 preemption 706 * by fast handlers (thus deadlocking against other CPUs) 707 * - it avoids deadlocks against smp_rendezvous() or, more 708 * generally, threads busy-waiting, with this spinlock held, 709 * and waiting for responses by threads on other CPUs 710 * (ie. smp_tlb_shootdown()). 711 * 712 * For the !SMP case it just needs to handle the former problem. 713 */ 714 #ifdef SMP 715 mtx_lock_spin(&smp_ipi_mtx); 716 #else 717 spinlock_enter(); 718 #endif 719 720 cpu_reset(); 721 /* NOTREACHED */ /* assuming reset worked */ 722 } 723 724 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 725 static int kassert_warn_only = 0; 726 #ifdef KDB 727 static int kassert_do_kdb = 0; 728 #endif 729 #ifdef KTR 730 static int kassert_do_ktr = 0; 731 #endif 732 static int kassert_do_log = 1; 733 static int kassert_log_pps_limit = 4; 734 static int kassert_log_mute_at = 0; 735 static int kassert_log_panic_at = 0; 736 static int kassert_suppress_in_panic = 0; 737 static int kassert_warnings = 0; 738 739 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 740 "kassert options"); 741 742 #ifdef KASSERT_PANIC_OPTIONAL 743 #define KASSERT_RWTUN CTLFLAG_RWTUN 744 #else 745 #define KASSERT_RWTUN CTLFLAG_RDTUN 746 #endif 747 748 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 749 &kassert_warn_only, 0, 750 "KASSERT triggers a panic (0) or just a warning (1)"); 751 752 #ifdef KDB 753 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 754 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 755 #endif 756 757 #ifdef KTR 758 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 759 &kassert_do_ktr, 0, 760 "KASSERT does a KTR, set this to the KTRMASK you want"); 761 #endif 762 763 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 764 &kassert_do_log, 0, 765 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 766 767 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 768 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 769 770 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 771 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 772 773 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 774 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 775 776 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 777 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 778 779 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 780 &kassert_suppress_in_panic, 0, 781 "KASSERTs will be suppressed while handling a panic"); 782 #undef KASSERT_RWTUN 783 784 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 785 786 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 787 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 788 kassert_sysctl_kassert, "I", 789 "set to trigger a test kassert"); 790 791 static int 792 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 793 { 794 int error, i; 795 796 error = sysctl_wire_old_buffer(req, sizeof(int)); 797 if (error == 0) { 798 i = 0; 799 error = sysctl_handle_int(oidp, &i, 0, req); 800 } 801 if (error != 0 || req->newptr == NULL) 802 return (error); 803 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 804 return (0); 805 } 806 807 #ifdef KASSERT_PANIC_OPTIONAL 808 /* 809 * Called by KASSERT, this decides if we will panic 810 * or if we will log via printf and/or ktr. 811 */ 812 void 813 kassert_panic(const char *fmt, ...) 814 { 815 static char buf[256]; 816 va_list ap; 817 818 va_start(ap, fmt); 819 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 820 va_end(ap); 821 822 /* 823 * If we are suppressing secondary panics, log the warning but do not 824 * re-enter panic/kdb. 825 */ 826 if (KERNEL_PANICKED() && kassert_suppress_in_panic) { 827 if (kassert_do_log) { 828 printf("KASSERT failed: %s\n", buf); 829 #ifdef KDB 830 if (trace_all_panics && trace_on_panic) 831 kdb_backtrace(); 832 #endif 833 } 834 return; 835 } 836 837 /* 838 * panic if we're not just warning, or if we've exceeded 839 * kassert_log_panic_at warnings. 840 */ 841 if (!kassert_warn_only || 842 (kassert_log_panic_at > 0 && 843 kassert_warnings >= kassert_log_panic_at)) { 844 va_start(ap, fmt); 845 vpanic(fmt, ap); 846 /* NORETURN */ 847 } 848 #ifdef KTR 849 if (kassert_do_ktr) 850 CTR0(ktr_mask, buf); 851 #endif /* KTR */ 852 /* 853 * log if we've not yet met the mute limit. 854 */ 855 if (kassert_do_log && 856 (kassert_log_mute_at == 0 || 857 kassert_warnings < kassert_log_mute_at)) { 858 static struct timeval lasterr; 859 static int curerr; 860 861 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 862 printf("KASSERT failed: %s\n", buf); 863 kdb_backtrace(); 864 } 865 } 866 #ifdef KDB 867 if (kassert_do_kdb) { 868 kdb_enter(KDB_WHY_KASSERT, buf); 869 } 870 #endif 871 atomic_add_int(&kassert_warnings, 1); 872 } 873 #endif /* KASSERT_PANIC_OPTIONAL */ 874 #endif 875 876 /* 877 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 878 * and then reboots. If we are called twice, then we avoid trying to sync 879 * the disks as this often leads to recursive panics. 880 */ 881 void 882 panic(const char *fmt, ...) 883 { 884 va_list ap; 885 886 va_start(ap, fmt); 887 vpanic(fmt, ap); 888 } 889 890 void 891 vpanic(const char *fmt, va_list ap) 892 { 893 #ifdef SMP 894 cpuset_t other_cpus; 895 #endif 896 struct thread *td = curthread; 897 int bootopt, newpanic; 898 static char buf[256]; 899 900 /* 901 * 'fmt' must not be NULL as it is put into 'panicstr' which is then 902 * used as a flag to detect if the kernel has panicked. Also, although 903 * vsnprintf() supports a NULL 'fmt' argument, use a more informative 904 * message. 905 */ 906 if (fmt == NULL) 907 fmt = "<no panic string!>"; 908 909 spinlock_enter(); 910 911 #ifdef SMP 912 /* 913 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 914 * concurrently entering panic. Only the winner will proceed 915 * further. 916 */ 917 if (!KERNEL_PANICKED() && !kdb_active) { 918 other_cpus = all_cpus; 919 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 920 stop_cpus_hard(other_cpus); 921 } 922 #endif 923 924 /* 925 * Ensure that the scheduler is stopped while panicking, even if panic 926 * has been entered from kdb. 927 */ 928 scheduler_stopped = true; 929 930 bootopt = RB_AUTOBOOT; 931 newpanic = 0; 932 if (KERNEL_PANICKED()) 933 bootopt |= RB_NOSYNC; 934 else { 935 bootopt |= RB_DUMP; 936 panicstr = fmt; 937 newpanic = 1; 938 } 939 940 /* Unmute when panic */ 941 cn_mute = 0; 942 943 if (newpanic) { 944 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 945 panicstr = buf; 946 cngrab(); 947 printf("panic: %s\n", buf); 948 } else { 949 printf("panic: "); 950 vprintf(fmt, ap); 951 printf("\n"); 952 } 953 #ifdef SMP 954 printf("cpuid = %d\n", PCPU_GET(cpuid)); 955 #endif 956 printf("time = %jd\n", (intmax_t )time_second); 957 #ifdef KDB 958 if ((newpanic || trace_all_panics) && trace_on_panic) 959 kdb_backtrace(); 960 if (debugger_on_panic) 961 kdb_enter(KDB_WHY_PANIC, "panic"); 962 else if (!newpanic && debugger_on_recursive_panic) 963 kdb_enter(KDB_WHY_PANIC, "re-panic"); 964 #endif 965 /*thread_lock(td); */ 966 td->td_flags |= TDF_INPANIC; 967 /* thread_unlock(td); */ 968 if (!sync_on_panic) 969 bootopt |= RB_NOSYNC; 970 if (poweroff_on_panic) 971 bootopt |= RB_POWEROFF; 972 if (powercycle_on_panic) 973 bootopt |= RB_POWERCYCLE; 974 kern_reboot(bootopt); 975 } 976 977 /* 978 * Support for poweroff delay. 979 * 980 * Please note that setting this delay too short might power off your machine 981 * before the write cache on your hard disk has been flushed, leading to 982 * soft-updates inconsistencies. 983 */ 984 #ifndef POWEROFF_DELAY 985 # define POWEROFF_DELAY 5000 986 #endif 987 static int poweroff_delay = POWEROFF_DELAY; 988 989 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 990 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 991 992 static void 993 poweroff_wait(void *junk, int howto) 994 { 995 996 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 997 return; 998 DELAY(poweroff_delay * 1000); 999 } 1000 1001 /* 1002 * Some system processes (e.g. syncer) need to be stopped at appropriate 1003 * points in their main loops prior to a system shutdown, so that they 1004 * won't interfere with the shutdown process (e.g. by holding a disk buf 1005 * to cause sync to fail). For each of these system processes, register 1006 * shutdown_kproc() as a handler for one of shutdown events. 1007 */ 1008 static int kproc_shutdown_wait = 60; 1009 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 1010 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 1011 1012 void 1013 kproc_shutdown(void *arg, int howto) 1014 { 1015 struct proc *p; 1016 int error; 1017 1018 if (SCHEDULER_STOPPED()) 1019 return; 1020 1021 p = (struct proc *)arg; 1022 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 1023 kproc_shutdown_wait, p->p_comm); 1024 error = kproc_suspend(p, kproc_shutdown_wait * hz); 1025 1026 if (error == EWOULDBLOCK) 1027 printf("timed out\n"); 1028 else 1029 printf("done\n"); 1030 } 1031 1032 void 1033 kthread_shutdown(void *arg, int howto) 1034 { 1035 struct thread *td; 1036 int error; 1037 1038 if (SCHEDULER_STOPPED()) 1039 return; 1040 1041 td = (struct thread *)arg; 1042 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 1043 kproc_shutdown_wait, td->td_name); 1044 error = kthread_suspend(td, kproc_shutdown_wait * hz); 1045 1046 if (error == EWOULDBLOCK) 1047 printf("timed out\n"); 1048 else 1049 printf("done\n"); 1050 } 1051 1052 static int 1053 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 1054 { 1055 char buf[256]; 1056 struct dumperinfo *di; 1057 struct sbuf sb; 1058 int error; 1059 1060 error = sysctl_wire_old_buffer(req, 0); 1061 if (error != 0) 1062 return (error); 1063 1064 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1065 1066 mtx_lock(&dumpconf_list_lk); 1067 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1068 if (di != TAILQ_FIRST(&dumper_configs)) 1069 sbuf_putc(&sb, ','); 1070 sbuf_cat(&sb, di->di_devname); 1071 } 1072 mtx_unlock(&dumpconf_list_lk); 1073 1074 error = sbuf_finish(&sb); 1075 sbuf_delete(&sb); 1076 return (error); 1077 } 1078 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1079 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1080 dumpdevname_sysctl_handler, "A", 1081 "Device(s) for kernel dumps"); 1082 1083 static int _dump_append(struct dumperinfo *di, void *virtual, size_t length); 1084 1085 #ifdef EKCD 1086 static struct kerneldumpcrypto * 1087 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1088 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1089 { 1090 struct kerneldumpcrypto *kdc; 1091 struct kerneldumpkey *kdk; 1092 uint32_t dumpkeysize; 1093 1094 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1095 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1096 1097 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1098 1099 kdc->kdc_encryption = encryption; 1100 switch (kdc->kdc_encryption) { 1101 case KERNELDUMP_ENC_AES_256_CBC: 1102 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1103 goto failed; 1104 break; 1105 case KERNELDUMP_ENC_CHACHA20: 1106 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1107 break; 1108 default: 1109 goto failed; 1110 } 1111 1112 kdc->kdc_dumpkeysize = dumpkeysize; 1113 kdk = kdc->kdc_dumpkey; 1114 kdk->kdk_encryption = kdc->kdc_encryption; 1115 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1116 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1117 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1118 1119 return (kdc); 1120 failed: 1121 zfree(kdc, M_EKCD); 1122 return (NULL); 1123 } 1124 1125 static int 1126 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1127 { 1128 uint8_t hash[SHA256_DIGEST_LENGTH]; 1129 SHA256_CTX ctx; 1130 struct kerneldumpkey *kdk; 1131 int error; 1132 1133 error = 0; 1134 1135 if (kdc == NULL) 1136 return (0); 1137 1138 /* 1139 * When a user enters ddb it can write a crash dump multiple times. 1140 * Each time it should be encrypted using a different IV. 1141 */ 1142 SHA256_Init(&ctx); 1143 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1144 SHA256_Final(hash, &ctx); 1145 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1146 1147 switch (kdc->kdc_encryption) { 1148 case KERNELDUMP_ENC_AES_256_CBC: 1149 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1150 kdc->kdc_iv) <= 0) { 1151 error = EINVAL; 1152 goto out; 1153 } 1154 break; 1155 case KERNELDUMP_ENC_CHACHA20: 1156 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1157 break; 1158 default: 1159 error = EINVAL; 1160 goto out; 1161 } 1162 1163 kdk = kdc->kdc_dumpkey; 1164 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1165 out: 1166 explicit_bzero(hash, sizeof(hash)); 1167 return (error); 1168 } 1169 1170 static uint32_t 1171 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1172 { 1173 1174 if (kdc == NULL) 1175 return (0); 1176 return (kdc->kdc_dumpkeysize); 1177 } 1178 #endif /* EKCD */ 1179 1180 static struct kerneldumpcomp * 1181 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1182 { 1183 struct kerneldumpcomp *kdcomp; 1184 int format; 1185 1186 switch (compression) { 1187 case KERNELDUMP_COMP_GZIP: 1188 format = COMPRESS_GZIP; 1189 break; 1190 case KERNELDUMP_COMP_ZSTD: 1191 format = COMPRESS_ZSTD; 1192 break; 1193 default: 1194 return (NULL); 1195 } 1196 1197 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1198 kdcomp->kdc_format = compression; 1199 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1200 format, di->maxiosize, kerneldump_gzlevel, di); 1201 if (kdcomp->kdc_stream == NULL) { 1202 free(kdcomp, M_DUMPER); 1203 return (NULL); 1204 } 1205 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1206 return (kdcomp); 1207 } 1208 1209 static void 1210 kerneldumpcomp_destroy(struct dumperinfo *di) 1211 { 1212 struct kerneldumpcomp *kdcomp; 1213 1214 kdcomp = di->kdcomp; 1215 if (kdcomp == NULL) 1216 return; 1217 compressor_fini(kdcomp->kdc_stream); 1218 zfree(kdcomp->kdc_buf, M_DUMPER); 1219 free(kdcomp, M_DUMPER); 1220 } 1221 1222 /* 1223 * Free a dumper. Must not be present on global list. 1224 */ 1225 void 1226 dumper_destroy(struct dumperinfo *di) 1227 { 1228 1229 if (di == NULL) 1230 return; 1231 1232 zfree(di->blockbuf, M_DUMPER); 1233 kerneldumpcomp_destroy(di); 1234 #ifdef EKCD 1235 zfree(di->kdcrypto, M_EKCD); 1236 #endif 1237 zfree(di, M_DUMPER); 1238 } 1239 1240 /* 1241 * Allocate and set up a new dumper from the provided template. 1242 */ 1243 int 1244 dumper_create(const struct dumperinfo *di_template, const char *devname, 1245 const struct diocskerneldump_arg *kda, struct dumperinfo **dip) 1246 { 1247 struct dumperinfo *newdi; 1248 int error = 0; 1249 1250 if (dip == NULL) 1251 return (EINVAL); 1252 1253 /* Allocate a new dumper */ 1254 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, 1255 M_WAITOK | M_ZERO); 1256 memcpy(newdi, di_template, sizeof(*newdi)); 1257 newdi->blockbuf = NULL; 1258 newdi->kdcrypto = NULL; 1259 newdi->kdcomp = NULL; 1260 strcpy(newdi->di_devname, devname); 1261 1262 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1263 #ifdef EKCD 1264 newdi->kdcrypto = kerneldumpcrypto_create(newdi->blocksize, 1265 kda->kda_encryption, kda->kda_key, 1266 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1267 if (newdi->kdcrypto == NULL) { 1268 error = EINVAL; 1269 goto cleanup; 1270 } 1271 #else 1272 error = EOPNOTSUPP; 1273 goto cleanup; 1274 #endif 1275 } 1276 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1277 #ifdef EKCD 1278 /* 1279 * We can't support simultaneous unpadded block cipher 1280 * encryption and compression because there is no guarantee the 1281 * length of the compressed result is exactly a multiple of the 1282 * cipher block size. 1283 */ 1284 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1285 error = EOPNOTSUPP; 1286 goto cleanup; 1287 } 1288 #endif 1289 newdi->kdcomp = kerneldumpcomp_create(newdi, 1290 kda->kda_compression); 1291 if (newdi->kdcomp == NULL) { 1292 error = EINVAL; 1293 goto cleanup; 1294 } 1295 } 1296 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1297 1298 *dip = newdi; 1299 return (0); 1300 cleanup: 1301 dumper_destroy(newdi); 1302 return (error); 1303 } 1304 1305 /* 1306 * Create a new dumper and register it in the global list. 1307 */ 1308 int 1309 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1310 const struct diocskerneldump_arg *kda) 1311 { 1312 struct dumperinfo *newdi, *listdi; 1313 bool inserted; 1314 uint8_t index; 1315 int error; 1316 1317 index = kda->kda_index; 1318 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1319 index != KDA_REMOVE_ALL); 1320 1321 error = priv_check(curthread, PRIV_SETDUMPER); 1322 if (error != 0) 1323 return (error); 1324 1325 error = dumper_create(di_template, devname, kda, &newdi); 1326 if (error != 0) 1327 return (error); 1328 1329 /* Add the new configuration to the queue */ 1330 mtx_lock(&dumpconf_list_lk); 1331 inserted = false; 1332 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1333 if (index == 0) { 1334 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1335 inserted = true; 1336 break; 1337 } 1338 index--; 1339 } 1340 if (!inserted) 1341 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1342 mtx_unlock(&dumpconf_list_lk); 1343 1344 return (0); 1345 } 1346 1347 #ifdef DDB 1348 void 1349 dumper_ddb_insert(struct dumperinfo *newdi) 1350 { 1351 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1352 } 1353 1354 void 1355 dumper_ddb_remove(struct dumperinfo *di) 1356 { 1357 TAILQ_REMOVE(&dumper_configs, di, di_next); 1358 } 1359 #endif 1360 1361 static bool 1362 dumper_config_match(const struct dumperinfo *di, const char *devname, 1363 const struct diocskerneldump_arg *kda) 1364 { 1365 if (kda->kda_index == KDA_REMOVE_ALL) 1366 return (true); 1367 1368 if (strcmp(di->di_devname, devname) != 0) 1369 return (false); 1370 1371 /* 1372 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1373 */ 1374 if (kda->kda_index == KDA_REMOVE_DEV) 1375 return (true); 1376 1377 if (di->kdcomp != NULL) { 1378 if (di->kdcomp->kdc_format != kda->kda_compression) 1379 return (false); 1380 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1381 return (false); 1382 #ifdef EKCD 1383 if (di->kdcrypto != NULL) { 1384 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1385 return (false); 1386 /* 1387 * Do we care to verify keys match to delete? It seems weird 1388 * to expect multiple fallback dump configurations on the same 1389 * device that only differ in crypto key. 1390 */ 1391 } else 1392 #endif 1393 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1394 return (false); 1395 1396 return (true); 1397 } 1398 1399 /* 1400 * Remove and free the requested dumper(s) from the global list. 1401 */ 1402 int 1403 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1404 { 1405 struct dumperinfo *di, *sdi; 1406 bool found; 1407 int error; 1408 1409 error = priv_check(curthread, PRIV_SETDUMPER); 1410 if (error != 0) 1411 return (error); 1412 1413 /* 1414 * Try to find a matching configuration, and kill it. 1415 * 1416 * NULL 'kda' indicates remove any configuration matching 'devname', 1417 * which may remove multiple configurations in atypical configurations. 1418 */ 1419 found = false; 1420 mtx_lock(&dumpconf_list_lk); 1421 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1422 if (dumper_config_match(di, devname, kda)) { 1423 found = true; 1424 TAILQ_REMOVE(&dumper_configs, di, di_next); 1425 dumper_destroy(di); 1426 } 1427 } 1428 mtx_unlock(&dumpconf_list_lk); 1429 1430 /* Only produce ENOENT if a more targeted match didn't match. */ 1431 if (!found && kda->kda_index == KDA_REMOVE) 1432 return (ENOENT); 1433 return (0); 1434 } 1435 1436 static int 1437 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1438 { 1439 1440 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1441 offset - di->mediaoffset + length > di->mediasize)) { 1442 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1443 printf( 1444 "Compressed dump failed to fit in device boundaries.\n"); 1445 return (E2BIG); 1446 } 1447 1448 printf("Attempt to write outside dump device boundaries.\n" 1449 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1450 (intmax_t)offset, (intmax_t)di->mediaoffset, 1451 (uintmax_t)length, (intmax_t)di->mediasize); 1452 return (ENOSPC); 1453 } 1454 if (length % di->blocksize != 0) { 1455 printf("Attempt to write partial block of length %ju.\n", 1456 (uintmax_t)length); 1457 return (EINVAL); 1458 } 1459 if (offset % di->blocksize != 0) { 1460 printf("Attempt to write at unaligned offset %jd.\n", 1461 (intmax_t)offset); 1462 return (EINVAL); 1463 } 1464 1465 return (0); 1466 } 1467 1468 #ifdef EKCD 1469 static int 1470 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1471 { 1472 1473 switch (kdc->kdc_encryption) { 1474 case KERNELDUMP_ENC_AES_256_CBC: 1475 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1476 8 * size, buf) <= 0) { 1477 return (EIO); 1478 } 1479 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1480 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1481 return (EIO); 1482 } 1483 break; 1484 case KERNELDUMP_ENC_CHACHA20: 1485 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1486 break; 1487 default: 1488 return (EINVAL); 1489 } 1490 1491 return (0); 1492 } 1493 1494 /* Encrypt data and call dumper. */ 1495 static int 1496 dump_encrypted_write(struct dumperinfo *di, void *virtual, off_t offset, 1497 size_t length) 1498 { 1499 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1500 struct kerneldumpcrypto *kdc; 1501 int error; 1502 size_t nbytes; 1503 1504 kdc = di->kdcrypto; 1505 1506 while (length > 0) { 1507 nbytes = MIN(length, sizeof(buf)); 1508 bcopy(virtual, buf, nbytes); 1509 1510 if (dump_encrypt(kdc, buf, nbytes) != 0) 1511 return (EIO); 1512 1513 error = dump_write(di, buf, offset, nbytes); 1514 if (error != 0) 1515 return (error); 1516 1517 offset += nbytes; 1518 virtual = (void *)((uint8_t *)virtual + nbytes); 1519 length -= nbytes; 1520 } 1521 1522 return (0); 1523 } 1524 #endif /* EKCD */ 1525 1526 static int 1527 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1528 { 1529 struct dumperinfo *di; 1530 size_t resid, rlength; 1531 int error; 1532 1533 di = arg; 1534 1535 if (length % di->blocksize != 0) { 1536 /* 1537 * This must be the final write after flushing the compression 1538 * stream. Write as many full blocks as possible and stash the 1539 * residual data in the dumper's block buffer. It will be 1540 * padded and written in dump_finish(). 1541 */ 1542 rlength = rounddown(length, di->blocksize); 1543 if (rlength != 0) { 1544 error = _dump_append(di, base, rlength); 1545 if (error != 0) 1546 return (error); 1547 } 1548 resid = length - rlength; 1549 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1550 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1551 di->kdcomp->kdc_resid = resid; 1552 return (EAGAIN); 1553 } 1554 return (_dump_append(di, base, length)); 1555 } 1556 1557 /* 1558 * Write kernel dump headers at the beginning and end of the dump extent. 1559 * Write the kernel dump encryption key after the leading header if we were 1560 * configured to do so. 1561 */ 1562 static int 1563 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1564 { 1565 #ifdef EKCD 1566 struct kerneldumpcrypto *kdc; 1567 #endif 1568 void *buf; 1569 size_t hdrsz; 1570 uint64_t extent; 1571 uint32_t keysize; 1572 int error; 1573 1574 hdrsz = sizeof(*kdh); 1575 if (hdrsz > di->blocksize) 1576 return (ENOMEM); 1577 1578 #ifdef EKCD 1579 kdc = di->kdcrypto; 1580 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1581 #else 1582 keysize = 0; 1583 #endif 1584 1585 /* 1586 * If the dump device has special handling for headers, let it take care 1587 * of writing them out. 1588 */ 1589 if (di->dumper_hdr != NULL) 1590 return (di->dumper_hdr(di, kdh)); 1591 1592 if (hdrsz == di->blocksize) 1593 buf = kdh; 1594 else { 1595 buf = di->blockbuf; 1596 memset(buf, 0, di->blocksize); 1597 memcpy(buf, kdh, hdrsz); 1598 } 1599 1600 extent = dtoh64(kdh->dumpextent); 1601 #ifdef EKCD 1602 if (kdc != NULL) { 1603 error = dump_write(di, kdc->kdc_dumpkey, 1604 di->mediaoffset + di->mediasize - di->blocksize - extent - 1605 keysize, keysize); 1606 if (error != 0) 1607 return (error); 1608 } 1609 #endif 1610 1611 error = dump_write(di, buf, 1612 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1613 keysize, di->blocksize); 1614 if (error == 0) 1615 error = dump_write(di, buf, di->mediaoffset + di->mediasize - 1616 di->blocksize, di->blocksize); 1617 return (error); 1618 } 1619 1620 /* 1621 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1622 * protect us from metadata and metadata from us. 1623 */ 1624 #define SIZEOF_METADATA (64 * 1024) 1625 1626 /* 1627 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1628 * if requested, and make sure that we have enough space on the dump device. 1629 * 1630 * We set things up so that the dump ends before the last sector of the dump 1631 * device, at which the trailing header is written. 1632 * 1633 * +-----------+------+-----+----------------------------+------+ 1634 * | | lhdr | key | ... kernel dump ... | thdr | 1635 * +-----------+------+-----+----------------------------+------+ 1636 * 1 blk opt <------- dump extent --------> 1 blk 1637 * 1638 * Dumps written using dump_append() start at the beginning of the extent. 1639 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1640 * will not. The true length of the dump is recorded in the leading and trailing 1641 * headers once the dump has been completed. 1642 * 1643 * The dump device may provide a callback, in which case it will initialize 1644 * dumpoff and take care of laying out the headers. 1645 */ 1646 int 1647 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1648 { 1649 #ifdef EKCD 1650 struct kerneldumpcrypto *kdc; 1651 #endif 1652 void *key; 1653 uint64_t dumpextent, span; 1654 uint32_t keysize; 1655 int error; 1656 1657 #ifdef EKCD 1658 /* Send the key before the dump so a partial dump is still usable. */ 1659 kdc = di->kdcrypto; 1660 error = kerneldumpcrypto_init(kdc); 1661 if (error != 0) 1662 return (error); 1663 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1664 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1665 #else 1666 error = 0; 1667 keysize = 0; 1668 key = NULL; 1669 #endif 1670 1671 if (di->dumper_start != NULL) { 1672 error = di->dumper_start(di, key, keysize); 1673 } else { 1674 dumpextent = dtoh64(kdh->dumpextent); 1675 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1676 keysize; 1677 if (di->mediasize < span) { 1678 if (di->kdcomp == NULL) 1679 return (E2BIG); 1680 1681 /* 1682 * We don't yet know how much space the compressed dump 1683 * will occupy, so try to use the whole swap partition 1684 * (minus the first 64KB) in the hope that the 1685 * compressed dump will fit. If that doesn't turn out to 1686 * be enough, the bounds checking in dump_write() 1687 * will catch us and cause the dump to fail. 1688 */ 1689 dumpextent = di->mediasize - span + dumpextent; 1690 kdh->dumpextent = htod64(dumpextent); 1691 } 1692 1693 /* 1694 * The offset at which to begin writing the dump. 1695 */ 1696 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1697 dumpextent; 1698 } 1699 di->origdumpoff = di->dumpoff; 1700 return (error); 1701 } 1702 1703 static int 1704 _dump_append(struct dumperinfo *di, void *virtual, size_t length) 1705 { 1706 int error; 1707 1708 #ifdef EKCD 1709 if (di->kdcrypto != NULL) 1710 error = dump_encrypted_write(di, virtual, di->dumpoff, length); 1711 else 1712 #endif 1713 error = dump_write(di, virtual, di->dumpoff, length); 1714 if (error == 0) 1715 di->dumpoff += length; 1716 return (error); 1717 } 1718 1719 /* 1720 * Write to the dump device starting at dumpoff. When compression is enabled, 1721 * writes to the device will be performed using a callback that gets invoked 1722 * when the compression stream's output buffer is full. 1723 */ 1724 int 1725 dump_append(struct dumperinfo *di, void *virtual, size_t length) 1726 { 1727 void *buf; 1728 1729 if (di->kdcomp != NULL) { 1730 /* Bounce through a buffer to avoid CRC errors. */ 1731 if (length > di->maxiosize) 1732 return (EINVAL); 1733 buf = di->kdcomp->kdc_buf; 1734 memmove(buf, virtual, length); 1735 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1736 } 1737 return (_dump_append(di, virtual, length)); 1738 } 1739 1740 /* 1741 * Write to the dump device at the specified offset. 1742 */ 1743 int 1744 dump_write(struct dumperinfo *di, void *virtual, off_t offset, size_t length) 1745 { 1746 int error; 1747 1748 error = dump_check_bounds(di, offset, length); 1749 if (error != 0) 1750 return (error); 1751 return (di->dumper(di->priv, virtual, offset, length)); 1752 } 1753 1754 /* 1755 * Perform kernel dump finalization: flush the compression stream, if necessary, 1756 * write the leading and trailing kernel dump headers now that we know the true 1757 * length of the dump, and optionally write the encryption key following the 1758 * leading header. 1759 */ 1760 int 1761 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1762 { 1763 int error; 1764 1765 if (di->kdcomp != NULL) { 1766 error = compressor_flush(di->kdcomp->kdc_stream); 1767 if (error == EAGAIN) { 1768 /* We have residual data in di->blockbuf. */ 1769 error = _dump_append(di, di->blockbuf, di->blocksize); 1770 if (error == 0) 1771 /* Compensate for _dump_append()'s adjustment. */ 1772 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1773 di->kdcomp->kdc_resid = 0; 1774 } 1775 if (error != 0) 1776 return (error); 1777 1778 /* 1779 * We now know the size of the compressed dump, so update the 1780 * header accordingly and recompute parity. 1781 */ 1782 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1783 kdh->parity = 0; 1784 kdh->parity = kerneldump_parity(kdh); 1785 1786 compressor_reset(di->kdcomp->kdc_stream); 1787 } 1788 1789 error = dump_write_headers(di, kdh); 1790 if (error != 0) 1791 return (error); 1792 1793 (void)dump_write(di, NULL, 0, 0); 1794 return (0); 1795 } 1796 1797 void 1798 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1799 const char *magic, uint32_t archver, uint64_t dumplen) 1800 { 1801 size_t dstsize; 1802 1803 bzero(kdh, sizeof(*kdh)); 1804 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1805 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1806 kdh->version = htod32(KERNELDUMPVERSION); 1807 kdh->architectureversion = htod32(archver); 1808 kdh->dumplength = htod64(dumplen); 1809 kdh->dumpextent = kdh->dumplength; 1810 kdh->dumptime = htod64(time_second); 1811 #ifdef EKCD 1812 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1813 #else 1814 kdh->dumpkeysize = 0; 1815 #endif 1816 kdh->blocksize = htod32(di->blocksize); 1817 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1818 dstsize = sizeof(kdh->versionstring); 1819 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1820 kdh->versionstring[dstsize - 2] = '\n'; 1821 if (panicstr != NULL) 1822 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1823 if (di->kdcomp != NULL) 1824 kdh->compression = di->kdcomp->kdc_format; 1825 kdh->parity = kerneldump_parity(kdh); 1826 } 1827 1828 #ifdef DDB 1829 DB_SHOW_COMMAND_FLAGS(panic, db_show_panic, DB_CMD_MEMSAFE) 1830 { 1831 1832 if (panicstr == NULL) 1833 db_printf("panicstr not set\n"); 1834 else 1835 db_printf("panic: %s\n", panicstr); 1836 } 1837 #endif 1838