1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/boottrace.h> 54 #include <sys/buf.h> 55 #include <sys/conf.h> 56 #include <sys/compressor.h> 57 #include <sys/cons.h> 58 #include <sys/disk.h> 59 #include <sys/eventhandler.h> 60 #include <sys/filedesc.h> 61 #include <sys/jail.h> 62 #include <sys/kdb.h> 63 #include <sys/kernel.h> 64 #include <sys/kerneldump.h> 65 #include <sys/kthread.h> 66 #include <sys/ktr.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/mount.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/reboot.h> 73 #include <sys/resourcevar.h> 74 #include <sys/rwlock.h> 75 #include <sys/sbuf.h> 76 #include <sys/sched.h> 77 #include <sys/smp.h> 78 #include <sys/sysctl.h> 79 #include <sys/sysproto.h> 80 #include <sys/taskqueue.h> 81 #include <sys/vnode.h> 82 #include <sys/watchdog.h> 83 84 #include <crypto/chacha20/chacha.h> 85 #include <crypto/rijndael/rijndael-api-fst.h> 86 #include <crypto/sha2/sha256.h> 87 88 #include <ddb/ddb.h> 89 90 #include <machine/cpu.h> 91 #include <machine/dump.h> 92 #include <machine/pcb.h> 93 #include <machine/smp.h> 94 95 #include <security/mac/mac_framework.h> 96 97 #include <vm/vm.h> 98 #include <vm/vm_object.h> 99 #include <vm/vm_page.h> 100 #include <vm/vm_pager.h> 101 #include <vm/swap_pager.h> 102 103 #include <sys/signalvar.h> 104 105 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 106 107 #ifndef PANIC_REBOOT_WAIT_TIME 108 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 109 #endif 110 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 111 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 112 &panic_reboot_wait_time, 0, 113 "Seconds to wait before rebooting after a panic"); 114 115 /* 116 * Note that stdarg.h and the ANSI style va_start macro is used for both 117 * ANSI and traditional C compilers. 118 */ 119 #include <machine/stdarg.h> 120 121 #ifdef KDB 122 #ifdef KDB_UNATTENDED 123 int debugger_on_panic = 0; 124 #else 125 int debugger_on_panic = 1; 126 #endif 127 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 128 CTLFLAG_RWTUN | CTLFLAG_SECURE, 129 &debugger_on_panic, 0, "Run debugger on kernel panic"); 130 131 static bool debugger_on_recursive_panic = false; 132 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 133 CTLFLAG_RWTUN | CTLFLAG_SECURE, 134 &debugger_on_recursive_panic, 0, "Run debugger on recursive kernel panic"); 135 136 int debugger_on_trap = 0; 137 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 138 CTLFLAG_RWTUN | CTLFLAG_SECURE, 139 &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); 140 141 #ifdef KDB_TRACE 142 static int trace_on_panic = 1; 143 static bool trace_all_panics = true; 144 #else 145 static int trace_on_panic = 0; 146 static bool trace_all_panics = false; 147 #endif 148 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 149 CTLFLAG_RWTUN | CTLFLAG_SECURE, 150 &trace_on_panic, 0, "Print stack trace on kernel panic"); 151 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 152 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 153 #endif /* KDB */ 154 155 static int sync_on_panic = 0; 156 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 157 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 158 159 static bool poweroff_on_panic = 0; 160 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 161 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 162 163 static bool powercycle_on_panic = 0; 164 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 165 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 166 167 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 168 "Shutdown environment"); 169 170 #ifndef DIAGNOSTIC 171 static int show_busybufs; 172 #else 173 static int show_busybufs = 1; 174 #endif 175 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 176 &show_busybufs, 0, 177 "Show busy buffers during shutdown"); 178 179 int suspend_blocked = 0; 180 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 181 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 182 183 #ifdef EKCD 184 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 185 186 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 187 188 struct kerneldumpcrypto { 189 uint8_t kdc_encryption; 190 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 191 union { 192 struct { 193 keyInstance aes_ki; 194 cipherInstance aes_ci; 195 } u_aes; 196 struct chacha_ctx u_chacha; 197 } u; 198 #define kdc_ki u.u_aes.aes_ki 199 #define kdc_ci u.u_aes.aes_ci 200 #define kdc_chacha u.u_chacha 201 uint32_t kdc_dumpkeysize; 202 struct kerneldumpkey kdc_dumpkey[]; 203 }; 204 #endif 205 206 struct kerneldumpcomp { 207 uint8_t kdc_format; 208 struct compressor *kdc_stream; 209 uint8_t *kdc_buf; 210 size_t kdc_resid; 211 }; 212 213 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 214 uint8_t compression); 215 static void kerneldumpcomp_destroy(struct dumperinfo *di); 216 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 217 218 static int kerneldump_gzlevel = 6; 219 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 220 &kerneldump_gzlevel, 0, 221 "Kernel crash dump compression level"); 222 223 /* 224 * Variable panicstr contains argument to first call to panic; used as flag 225 * to indicate that the kernel has already called panic. 226 */ 227 const char *panicstr; 228 bool __read_frequently panicked; 229 230 int __read_mostly dumping; /* system is dumping */ 231 int rebooting; /* system is rebooting */ 232 /* 233 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 234 * modifications via ioctl. 235 */ 236 static struct mtx dumpconf_list_lk; 237 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 238 239 /* Our selected dumper(s). */ 240 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 241 TAILQ_HEAD_INITIALIZER(dumper_configs); 242 243 /* Context information for dump-debuggers. */ 244 static struct pcb dumppcb; /* Registers. */ 245 lwpid_t dumptid; /* Thread ID. */ 246 247 static struct cdevsw reroot_cdevsw = { 248 .d_version = D_VERSION, 249 .d_name = "reroot", 250 }; 251 252 static void poweroff_wait(void *, int); 253 static void shutdown_halt(void *junk, int howto); 254 static void shutdown_panic(void *junk, int howto); 255 static void shutdown_reset(void *junk, int howto); 256 static int kern_reroot(void); 257 258 /* register various local shutdown events */ 259 static void 260 shutdown_conf(void *unused) 261 { 262 263 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 264 SHUTDOWN_PRI_FIRST); 265 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 266 SHUTDOWN_PRI_LAST + 100); 267 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 268 SHUTDOWN_PRI_LAST + 100); 269 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 270 SHUTDOWN_PRI_LAST + 200); 271 } 272 273 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 274 275 /* 276 * The only reason this exists is to create the /dev/reroot/ directory, 277 * used by reroot code in init(8) as a mountpoint for tmpfs. 278 */ 279 static void 280 reroot_conf(void *unused) 281 { 282 int error; 283 struct cdev *cdev; 284 285 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 286 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 287 if (error != 0) { 288 printf("%s: failed to create device node, error %d", 289 __func__, error); 290 } 291 } 292 293 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 294 295 /* 296 * The system call that results in a reboot. 297 */ 298 /* ARGSUSED */ 299 int 300 sys_reboot(struct thread *td, struct reboot_args *uap) 301 { 302 int error; 303 304 error = 0; 305 #ifdef MAC 306 error = mac_system_check_reboot(td->td_ucred, uap->opt); 307 #endif 308 if (error == 0) 309 error = priv_check(td, PRIV_REBOOT); 310 if (error == 0) { 311 if (uap->opt & RB_REROOT) 312 error = kern_reroot(); 313 else 314 kern_reboot(uap->opt); 315 } 316 return (error); 317 } 318 319 static void 320 shutdown_nice_task_fn(void *arg, int pending __unused) 321 { 322 int howto; 323 324 howto = (uintptr_t)arg; 325 /* Send a signal to init(8) and have it shutdown the world. */ 326 PROC_LOCK(initproc); 327 if ((howto & RB_POWEROFF) != 0) { 328 BOOTTRACE("SIGUSR2 to init(8)"); 329 kern_psignal(initproc, SIGUSR2); 330 } else if ((howto & RB_POWERCYCLE) != 0) { 331 BOOTTRACE("SIGWINCH to init(8)"); 332 kern_psignal(initproc, SIGWINCH); 333 } else if ((howto & RB_HALT) != 0) { 334 BOOTTRACE("SIGUSR1 to init(8)"); 335 kern_psignal(initproc, SIGUSR1); 336 } else { 337 BOOTTRACE("SIGINT to init(8)"); 338 kern_psignal(initproc, SIGINT); 339 } 340 PROC_UNLOCK(initproc); 341 } 342 343 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 344 &shutdown_nice_task_fn, NULL); 345 346 /* 347 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 348 */ 349 void 350 shutdown_nice(int howto) 351 { 352 353 if (initproc != NULL && !SCHEDULER_STOPPED()) { 354 BOOTTRACE("shutdown initiated"); 355 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 356 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 357 } else { 358 /* 359 * No init(8) running, or scheduler would not allow it 360 * to run, so simply reboot. 361 */ 362 kern_reboot(howto | RB_NOSYNC); 363 } 364 } 365 366 static void 367 print_uptime(void) 368 { 369 int f; 370 struct timespec ts; 371 372 getnanouptime(&ts); 373 printf("Uptime: "); 374 f = 0; 375 if (ts.tv_sec >= 86400) { 376 printf("%ldd", (long)ts.tv_sec / 86400); 377 ts.tv_sec %= 86400; 378 f = 1; 379 } 380 if (f || ts.tv_sec >= 3600) { 381 printf("%ldh", (long)ts.tv_sec / 3600); 382 ts.tv_sec %= 3600; 383 f = 1; 384 } 385 if (f || ts.tv_sec >= 60) { 386 printf("%ldm", (long)ts.tv_sec / 60); 387 ts.tv_sec %= 60; 388 f = 1; 389 } 390 printf("%lds\n", (long)ts.tv_sec); 391 } 392 393 /* 394 * Set up a context that can be extracted from the dump. 395 */ 396 void 397 dump_savectx(void) 398 { 399 400 savectx(&dumppcb); 401 dumptid = curthread->td_tid; 402 } 403 404 int 405 doadump(boolean_t textdump) 406 { 407 boolean_t coredump; 408 int error; 409 410 error = 0; 411 if (dumping) 412 return (EBUSY); 413 if (TAILQ_EMPTY(&dumper_configs)) 414 return (ENXIO); 415 416 dump_savectx(); 417 dumping++; 418 419 coredump = TRUE; 420 #ifdef DDB 421 if (textdump && textdump_pending) { 422 coredump = FALSE; 423 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 424 } 425 #endif 426 if (coredump) { 427 struct dumperinfo *di; 428 429 TAILQ_FOREACH(di, &dumper_configs, di_next) { 430 error = dumpsys(di); 431 if (error == 0) 432 break; 433 } 434 } 435 436 dumping--; 437 return (error); 438 } 439 440 /* 441 * Trace the shutdown reason. 442 */ 443 static void 444 reboottrace(int howto) 445 { 446 if ((howto & RB_DUMP) != 0) { 447 if ((howto & RB_HALT) != 0) 448 BOOTTRACE("system panic: halting..."); 449 if ((howto & RB_POWEROFF) != 0) 450 BOOTTRACE("system panic: powering off..."); 451 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 452 BOOTTRACE("system panic: rebooting..."); 453 } else { 454 if ((howto & RB_HALT) != 0) 455 BOOTTRACE("system halting..."); 456 if ((howto & RB_POWEROFF) != 0) 457 BOOTTRACE("system powering off..."); 458 if ((howto & (RB_HALT|RB_POWEROFF)) == 0) 459 BOOTTRACE("system rebooting..."); 460 } 461 } 462 463 /* 464 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or 465 * power off. 466 */ 467 void 468 kern_reboot(int howto) 469 { 470 static int once = 0; 471 472 if (initproc != NULL && curproc != initproc) 473 BOOTTRACE("kernel shutdown (dirty) started"); 474 else 475 BOOTTRACE("kernel shutdown (clean) started"); 476 477 /* 478 * Normal paths here don't hold Giant, but we can wind up here 479 * unexpectedly with it held. Drop it now so we don't have to 480 * drop and pick it up elsewhere. The paths it is locking will 481 * never be returned to, and it is preferable to preclude 482 * deadlock than to lock against code that won't ever 483 * continue. 484 */ 485 while (mtx_owned(&Giant)) 486 mtx_unlock(&Giant); 487 488 #if defined(SMP) 489 /* 490 * Bind us to the first CPU so that all shutdown code runs there. Some 491 * systems don't shutdown properly (i.e., ACPI power off) if we 492 * run on another processor. 493 */ 494 if (!SCHEDULER_STOPPED()) { 495 thread_lock(curthread); 496 sched_bind(curthread, CPU_FIRST()); 497 thread_unlock(curthread); 498 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 499 ("%s: not running on cpu 0", __func__)); 500 } 501 #endif 502 /* We're in the process of rebooting. */ 503 rebooting = 1; 504 reboottrace(howto); 505 506 /* We are out of the debugger now. */ 507 kdb_active = 0; 508 509 /* 510 * Do any callouts that should be done BEFORE syncing the filesystems. 511 */ 512 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 513 BOOTTRACE("shutdown pre sync complete"); 514 515 /* 516 * Now sync filesystems 517 */ 518 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 519 once = 1; 520 BOOTTRACE("bufshutdown begin"); 521 bufshutdown(show_busybufs); 522 BOOTTRACE("bufshutdown end"); 523 } 524 525 print_uptime(); 526 527 cngrab(); 528 529 /* 530 * Ok, now do things that assume all filesystem activity has 531 * been completed. 532 */ 533 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 534 BOOTTRACE("shutdown post sync complete"); 535 536 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 537 doadump(TRUE); 538 539 /* Now that we're going to really halt the system... */ 540 BOOTTRACE("shutdown final begin"); 541 542 if (shutdown_trace) 543 boottrace_dump_console(); 544 545 EVENTHANDLER_INVOKE(shutdown_final, howto); 546 547 for(;;) ; /* safety against shutdown_reset not working */ 548 /* NOTREACHED */ 549 } 550 551 /* 552 * The system call that results in changing the rootfs. 553 */ 554 static int 555 kern_reroot(void) 556 { 557 struct vnode *oldrootvnode, *vp; 558 struct mount *mp, *devmp; 559 int error; 560 561 if (curproc != initproc) 562 return (EPERM); 563 564 /* 565 * Mark the filesystem containing currently-running executable 566 * (the temporary copy of init(8)) busy. 567 */ 568 vp = curproc->p_textvp; 569 error = vn_lock(vp, LK_SHARED); 570 if (error != 0) 571 return (error); 572 mp = vp->v_mount; 573 error = vfs_busy(mp, MBF_NOWAIT); 574 if (error != 0) { 575 vfs_ref(mp); 576 VOP_UNLOCK(vp); 577 error = vfs_busy(mp, 0); 578 vn_lock(vp, LK_SHARED | LK_RETRY); 579 vfs_rel(mp); 580 if (error != 0) { 581 VOP_UNLOCK(vp); 582 return (ENOENT); 583 } 584 if (VN_IS_DOOMED(vp)) { 585 VOP_UNLOCK(vp); 586 vfs_unbusy(mp); 587 return (ENOENT); 588 } 589 } 590 VOP_UNLOCK(vp); 591 592 /* 593 * Remove the filesystem containing currently-running executable 594 * from the mount list, to prevent it from being unmounted 595 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 596 * 597 * Also preserve /dev - forcibly unmounting it could cause driver 598 * reinitialization. 599 */ 600 601 vfs_ref(rootdevmp); 602 devmp = rootdevmp; 603 rootdevmp = NULL; 604 605 mtx_lock(&mountlist_mtx); 606 TAILQ_REMOVE(&mountlist, mp, mnt_list); 607 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 608 mtx_unlock(&mountlist_mtx); 609 610 oldrootvnode = rootvnode; 611 612 /* 613 * Unmount everything except for the two filesystems preserved above. 614 */ 615 vfs_unmountall(); 616 617 /* 618 * Add /dev back; vfs_mountroot() will move it into its new place. 619 */ 620 mtx_lock(&mountlist_mtx); 621 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 622 mtx_unlock(&mountlist_mtx); 623 rootdevmp = devmp; 624 vfs_rel(rootdevmp); 625 626 /* 627 * Mount the new rootfs. 628 */ 629 vfs_mountroot(); 630 631 /* 632 * Update all references to the old rootvnode. 633 */ 634 mountcheckdirs(oldrootvnode, rootvnode); 635 636 /* 637 * Add the temporary filesystem back and unbusy it. 638 */ 639 mtx_lock(&mountlist_mtx); 640 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 641 mtx_unlock(&mountlist_mtx); 642 vfs_unbusy(mp); 643 644 return (0); 645 } 646 647 /* 648 * If the shutdown was a clean halt, behave accordingly. 649 */ 650 static void 651 shutdown_halt(void *junk, int howto) 652 { 653 654 if (howto & RB_HALT) { 655 printf("\n"); 656 printf("The operating system has halted.\n"); 657 printf("Please press any key to reboot.\n\n"); 658 659 wdog_kern_pat(WD_TO_NEVER); 660 661 switch (cngetc()) { 662 case -1: /* No console, just die */ 663 cpu_halt(); 664 /* NOTREACHED */ 665 default: 666 break; 667 } 668 } 669 } 670 671 /* 672 * Check to see if the system panicked, pause and then reboot 673 * according to the specified delay. 674 */ 675 static void 676 shutdown_panic(void *junk, int howto) 677 { 678 int loop; 679 680 if (howto & RB_DUMP) { 681 if (panic_reboot_wait_time != 0) { 682 if (panic_reboot_wait_time != -1) { 683 printf("Automatic reboot in %d seconds - " 684 "press a key on the console to abort\n", 685 panic_reboot_wait_time); 686 for (loop = panic_reboot_wait_time * 10; 687 loop > 0; --loop) { 688 DELAY(1000 * 100); /* 1/10th second */ 689 /* Did user type a key? */ 690 if (cncheckc() != -1) 691 break; 692 } 693 if (!loop) 694 return; 695 } 696 } else { /* zero time specified - reboot NOW */ 697 return; 698 } 699 printf("--> Press a key on the console to reboot,\n"); 700 printf("--> or switch off the system now.\n"); 701 cngetc(); 702 } 703 } 704 705 /* 706 * Everything done, now reset 707 */ 708 static void 709 shutdown_reset(void *junk, int howto) 710 { 711 712 printf("Rebooting...\n"); 713 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 714 715 /* 716 * Acquiring smp_ipi_mtx here has a double effect: 717 * - it disables interrupts avoiding CPU0 preemption 718 * by fast handlers (thus deadlocking against other CPUs) 719 * - it avoids deadlocks against smp_rendezvous() or, more 720 * generally, threads busy-waiting, with this spinlock held, 721 * and waiting for responses by threads on other CPUs 722 * (ie. smp_tlb_shootdown()). 723 * 724 * For the !SMP case it just needs to handle the former problem. 725 */ 726 #ifdef SMP 727 mtx_lock_spin(&smp_ipi_mtx); 728 #else 729 spinlock_enter(); 730 #endif 731 732 cpu_reset(); 733 /* NOTREACHED */ /* assuming reset worked */ 734 } 735 736 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 737 static int kassert_warn_only = 0; 738 #ifdef KDB 739 static int kassert_do_kdb = 0; 740 #endif 741 #ifdef KTR 742 static int kassert_do_ktr = 0; 743 #endif 744 static int kassert_do_log = 1; 745 static int kassert_log_pps_limit = 4; 746 static int kassert_log_mute_at = 0; 747 static int kassert_log_panic_at = 0; 748 static int kassert_suppress_in_panic = 0; 749 static int kassert_warnings = 0; 750 751 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 752 "kassert options"); 753 754 #ifdef KASSERT_PANIC_OPTIONAL 755 #define KASSERT_RWTUN CTLFLAG_RWTUN 756 #else 757 #define KASSERT_RWTUN CTLFLAG_RDTUN 758 #endif 759 760 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 761 &kassert_warn_only, 0, 762 "KASSERT triggers a panic (0) or just a warning (1)"); 763 764 #ifdef KDB 765 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 766 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 767 #endif 768 769 #ifdef KTR 770 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 771 &kassert_do_ktr, 0, 772 "KASSERT does a KTR, set this to the KTRMASK you want"); 773 #endif 774 775 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 776 &kassert_do_log, 0, 777 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 778 779 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 780 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 781 782 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 783 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 784 785 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 786 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 787 788 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 789 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 790 791 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 792 &kassert_suppress_in_panic, 0, 793 "KASSERTs will be suppressed while handling a panic"); 794 #undef KASSERT_RWTUN 795 796 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 797 798 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 799 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 800 kassert_sysctl_kassert, "I", 801 "set to trigger a test kassert"); 802 803 static int 804 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 805 { 806 int error, i; 807 808 error = sysctl_wire_old_buffer(req, sizeof(int)); 809 if (error == 0) { 810 i = 0; 811 error = sysctl_handle_int(oidp, &i, 0, req); 812 } 813 if (error != 0 || req->newptr == NULL) 814 return (error); 815 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 816 return (0); 817 } 818 819 #ifdef KASSERT_PANIC_OPTIONAL 820 /* 821 * Called by KASSERT, this decides if we will panic 822 * or if we will log via printf and/or ktr. 823 */ 824 void 825 kassert_panic(const char *fmt, ...) 826 { 827 static char buf[256]; 828 va_list ap; 829 830 va_start(ap, fmt); 831 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 832 va_end(ap); 833 834 /* 835 * If we are suppressing secondary panics, log the warning but do not 836 * re-enter panic/kdb. 837 */ 838 if (panicstr != NULL && kassert_suppress_in_panic) { 839 if (kassert_do_log) { 840 printf("KASSERT failed: %s\n", buf); 841 #ifdef KDB 842 if (trace_all_panics && trace_on_panic) 843 kdb_backtrace(); 844 #endif 845 } 846 return; 847 } 848 849 /* 850 * panic if we're not just warning, or if we've exceeded 851 * kassert_log_panic_at warnings. 852 */ 853 if (!kassert_warn_only || 854 (kassert_log_panic_at > 0 && 855 kassert_warnings >= kassert_log_panic_at)) { 856 va_start(ap, fmt); 857 vpanic(fmt, ap); 858 /* NORETURN */ 859 } 860 #ifdef KTR 861 if (kassert_do_ktr) 862 CTR0(ktr_mask, buf); 863 #endif /* KTR */ 864 /* 865 * log if we've not yet met the mute limit. 866 */ 867 if (kassert_do_log && 868 (kassert_log_mute_at == 0 || 869 kassert_warnings < kassert_log_mute_at)) { 870 static struct timeval lasterr; 871 static int curerr; 872 873 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 874 printf("KASSERT failed: %s\n", buf); 875 kdb_backtrace(); 876 } 877 } 878 #ifdef KDB 879 if (kassert_do_kdb) { 880 kdb_enter(KDB_WHY_KASSERT, buf); 881 } 882 #endif 883 atomic_add_int(&kassert_warnings, 1); 884 } 885 #endif /* KASSERT_PANIC_OPTIONAL */ 886 #endif 887 888 /* 889 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 890 * and then reboots. If we are called twice, then we avoid trying to sync 891 * the disks as this often leads to recursive panics. 892 */ 893 void 894 panic(const char *fmt, ...) 895 { 896 va_list ap; 897 898 va_start(ap, fmt); 899 vpanic(fmt, ap); 900 } 901 902 void 903 vpanic(const char *fmt, va_list ap) 904 { 905 #ifdef SMP 906 cpuset_t other_cpus; 907 #endif 908 struct thread *td = curthread; 909 int bootopt, newpanic; 910 static char buf[256]; 911 912 spinlock_enter(); 913 914 #ifdef SMP 915 /* 916 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 917 * concurrently entering panic. Only the winner will proceed 918 * further. 919 */ 920 if (panicstr == NULL && !kdb_active) { 921 other_cpus = all_cpus; 922 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 923 stop_cpus_hard(other_cpus); 924 } 925 #endif 926 927 /* 928 * Ensure that the scheduler is stopped while panicking, even if panic 929 * has been entered from kdb. 930 */ 931 td->td_stopsched = 1; 932 933 bootopt = RB_AUTOBOOT; 934 newpanic = 0; 935 if (panicstr) 936 bootopt |= RB_NOSYNC; 937 else { 938 bootopt |= RB_DUMP; 939 panicstr = fmt; 940 panicked = true; 941 newpanic = 1; 942 } 943 944 if (newpanic) { 945 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 946 panicstr = buf; 947 cngrab(); 948 printf("panic: %s\n", buf); 949 } else { 950 printf("panic: "); 951 vprintf(fmt, ap); 952 printf("\n"); 953 } 954 #ifdef SMP 955 printf("cpuid = %d\n", PCPU_GET(cpuid)); 956 #endif 957 printf("time = %jd\n", (intmax_t )time_second); 958 #ifdef KDB 959 if ((newpanic || trace_all_panics) && trace_on_panic) 960 kdb_backtrace(); 961 if (debugger_on_panic) 962 kdb_enter(KDB_WHY_PANIC, "panic"); 963 else if (!newpanic && debugger_on_recursive_panic) 964 kdb_enter(KDB_WHY_PANIC, "re-panic"); 965 #endif 966 /*thread_lock(td); */ 967 td->td_flags |= TDF_INPANIC; 968 /* thread_unlock(td); */ 969 if (!sync_on_panic) 970 bootopt |= RB_NOSYNC; 971 if (poweroff_on_panic) 972 bootopt |= RB_POWEROFF; 973 if (powercycle_on_panic) 974 bootopt |= RB_POWERCYCLE; 975 kern_reboot(bootopt); 976 } 977 978 /* 979 * Support for poweroff delay. 980 * 981 * Please note that setting this delay too short might power off your machine 982 * before the write cache on your hard disk has been flushed, leading to 983 * soft-updates inconsistencies. 984 */ 985 #ifndef POWEROFF_DELAY 986 # define POWEROFF_DELAY 5000 987 #endif 988 static int poweroff_delay = POWEROFF_DELAY; 989 990 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 991 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 992 993 static void 994 poweroff_wait(void *junk, int howto) 995 { 996 997 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 998 return; 999 DELAY(poweroff_delay * 1000); 1000 } 1001 1002 /* 1003 * Some system processes (e.g. syncer) need to be stopped at appropriate 1004 * points in their main loops prior to a system shutdown, so that they 1005 * won't interfere with the shutdown process (e.g. by holding a disk buf 1006 * to cause sync to fail). For each of these system processes, register 1007 * shutdown_kproc() as a handler for one of shutdown events. 1008 */ 1009 static int kproc_shutdown_wait = 60; 1010 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 1011 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 1012 1013 void 1014 kproc_shutdown(void *arg, int howto) 1015 { 1016 struct proc *p; 1017 int error; 1018 1019 if (panicstr) 1020 return; 1021 1022 p = (struct proc *)arg; 1023 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 1024 kproc_shutdown_wait, p->p_comm); 1025 error = kproc_suspend(p, kproc_shutdown_wait * hz); 1026 1027 if (error == EWOULDBLOCK) 1028 printf("timed out\n"); 1029 else 1030 printf("done\n"); 1031 } 1032 1033 void 1034 kthread_shutdown(void *arg, int howto) 1035 { 1036 struct thread *td; 1037 int error; 1038 1039 if (panicstr) 1040 return; 1041 1042 td = (struct thread *)arg; 1043 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 1044 kproc_shutdown_wait, td->td_name); 1045 error = kthread_suspend(td, kproc_shutdown_wait * hz); 1046 1047 if (error == EWOULDBLOCK) 1048 printf("timed out\n"); 1049 else 1050 printf("done\n"); 1051 } 1052 1053 static int 1054 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 1055 { 1056 char buf[256]; 1057 struct dumperinfo *di; 1058 struct sbuf sb; 1059 int error; 1060 1061 error = sysctl_wire_old_buffer(req, 0); 1062 if (error != 0) 1063 return (error); 1064 1065 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1066 1067 mtx_lock(&dumpconf_list_lk); 1068 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1069 if (di != TAILQ_FIRST(&dumper_configs)) 1070 sbuf_putc(&sb, ','); 1071 sbuf_cat(&sb, di->di_devname); 1072 } 1073 mtx_unlock(&dumpconf_list_lk); 1074 1075 error = sbuf_finish(&sb); 1076 sbuf_delete(&sb); 1077 return (error); 1078 } 1079 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1080 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1081 dumpdevname_sysctl_handler, "A", 1082 "Device(s) for kernel dumps"); 1083 1084 static int _dump_append(struct dumperinfo *di, void *virtual, 1085 vm_offset_t physical, size_t length); 1086 1087 #ifdef EKCD 1088 static struct kerneldumpcrypto * 1089 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1090 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1091 { 1092 struct kerneldumpcrypto *kdc; 1093 struct kerneldumpkey *kdk; 1094 uint32_t dumpkeysize; 1095 1096 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1097 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1098 1099 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1100 1101 kdc->kdc_encryption = encryption; 1102 switch (kdc->kdc_encryption) { 1103 case KERNELDUMP_ENC_AES_256_CBC: 1104 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1105 goto failed; 1106 break; 1107 case KERNELDUMP_ENC_CHACHA20: 1108 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1109 break; 1110 default: 1111 goto failed; 1112 } 1113 1114 kdc->kdc_dumpkeysize = dumpkeysize; 1115 kdk = kdc->kdc_dumpkey; 1116 kdk->kdk_encryption = kdc->kdc_encryption; 1117 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1118 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1119 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1120 1121 return (kdc); 1122 failed: 1123 zfree(kdc, M_EKCD); 1124 return (NULL); 1125 } 1126 1127 static int 1128 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1129 { 1130 uint8_t hash[SHA256_DIGEST_LENGTH]; 1131 SHA256_CTX ctx; 1132 struct kerneldumpkey *kdk; 1133 int error; 1134 1135 error = 0; 1136 1137 if (kdc == NULL) 1138 return (0); 1139 1140 /* 1141 * When a user enters ddb it can write a crash dump multiple times. 1142 * Each time it should be encrypted using a different IV. 1143 */ 1144 SHA256_Init(&ctx); 1145 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1146 SHA256_Final(hash, &ctx); 1147 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1148 1149 switch (kdc->kdc_encryption) { 1150 case KERNELDUMP_ENC_AES_256_CBC: 1151 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1152 kdc->kdc_iv) <= 0) { 1153 error = EINVAL; 1154 goto out; 1155 } 1156 break; 1157 case KERNELDUMP_ENC_CHACHA20: 1158 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1159 break; 1160 default: 1161 error = EINVAL; 1162 goto out; 1163 } 1164 1165 kdk = kdc->kdc_dumpkey; 1166 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1167 out: 1168 explicit_bzero(hash, sizeof(hash)); 1169 return (error); 1170 } 1171 1172 static uint32_t 1173 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1174 { 1175 1176 if (kdc == NULL) 1177 return (0); 1178 return (kdc->kdc_dumpkeysize); 1179 } 1180 #endif /* EKCD */ 1181 1182 static struct kerneldumpcomp * 1183 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1184 { 1185 struct kerneldumpcomp *kdcomp; 1186 int format; 1187 1188 switch (compression) { 1189 case KERNELDUMP_COMP_GZIP: 1190 format = COMPRESS_GZIP; 1191 break; 1192 case KERNELDUMP_COMP_ZSTD: 1193 format = COMPRESS_ZSTD; 1194 break; 1195 default: 1196 return (NULL); 1197 } 1198 1199 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1200 kdcomp->kdc_format = compression; 1201 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1202 format, di->maxiosize, kerneldump_gzlevel, di); 1203 if (kdcomp->kdc_stream == NULL) { 1204 free(kdcomp, M_DUMPER); 1205 return (NULL); 1206 } 1207 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1208 return (kdcomp); 1209 } 1210 1211 static void 1212 kerneldumpcomp_destroy(struct dumperinfo *di) 1213 { 1214 struct kerneldumpcomp *kdcomp; 1215 1216 kdcomp = di->kdcomp; 1217 if (kdcomp == NULL) 1218 return; 1219 compressor_fini(kdcomp->kdc_stream); 1220 zfree(kdcomp->kdc_buf, M_DUMPER); 1221 free(kdcomp, M_DUMPER); 1222 } 1223 1224 /* 1225 * Free a dumper. Must not be present on global list. 1226 */ 1227 void 1228 dumper_destroy(struct dumperinfo *di) 1229 { 1230 1231 if (di == NULL) 1232 return; 1233 1234 zfree(di->blockbuf, M_DUMPER); 1235 kerneldumpcomp_destroy(di); 1236 #ifdef EKCD 1237 zfree(di->kdcrypto, M_EKCD); 1238 #endif 1239 zfree(di, M_DUMPER); 1240 } 1241 1242 /* 1243 * Allocate and set up a new dumper from the provided template. 1244 */ 1245 int 1246 dumper_create(const struct dumperinfo *di_template, const char *devname, 1247 const struct diocskerneldump_arg *kda, struct dumperinfo **dip) 1248 { 1249 struct dumperinfo *newdi; 1250 int error = 0; 1251 1252 if (dip == NULL) 1253 return (EINVAL); 1254 1255 /* Allocate a new dumper */ 1256 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, 1257 M_WAITOK | M_ZERO); 1258 memcpy(newdi, di_template, sizeof(*newdi)); 1259 newdi->blockbuf = NULL; 1260 newdi->kdcrypto = NULL; 1261 newdi->kdcomp = NULL; 1262 strcpy(newdi->di_devname, devname); 1263 1264 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1265 #ifdef EKCD 1266 newdi->kdcrypto = kerneldumpcrypto_create(newdi->blocksize, 1267 kda->kda_encryption, kda->kda_key, 1268 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1269 if (newdi->kdcrypto == NULL) { 1270 error = EINVAL; 1271 goto cleanup; 1272 } 1273 #else 1274 error = EOPNOTSUPP; 1275 goto cleanup; 1276 #endif 1277 } 1278 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1279 #ifdef EKCD 1280 /* 1281 * We can't support simultaneous unpadded block cipher 1282 * encryption and compression because there is no guarantee the 1283 * length of the compressed result is exactly a multiple of the 1284 * cipher block size. 1285 */ 1286 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1287 error = EOPNOTSUPP; 1288 goto cleanup; 1289 } 1290 #endif 1291 newdi->kdcomp = kerneldumpcomp_create(newdi, 1292 kda->kda_compression); 1293 if (newdi->kdcomp == NULL) { 1294 error = EINVAL; 1295 goto cleanup; 1296 } 1297 } 1298 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1299 1300 *dip = newdi; 1301 return (0); 1302 cleanup: 1303 dumper_destroy(newdi); 1304 return (error); 1305 } 1306 1307 /* 1308 * Create a new dumper and register it in the global list. 1309 */ 1310 int 1311 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1312 const struct diocskerneldump_arg *kda) 1313 { 1314 struct dumperinfo *newdi, *listdi; 1315 bool inserted; 1316 uint8_t index; 1317 int error; 1318 1319 index = kda->kda_index; 1320 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1321 index != KDA_REMOVE_ALL); 1322 1323 error = priv_check(curthread, PRIV_SETDUMPER); 1324 if (error != 0) 1325 return (error); 1326 1327 error = dumper_create(di_template, devname, kda, &newdi); 1328 if (error != 0) 1329 return (error); 1330 1331 /* Add the new configuration to the queue */ 1332 mtx_lock(&dumpconf_list_lk); 1333 inserted = false; 1334 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1335 if (index == 0) { 1336 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1337 inserted = true; 1338 break; 1339 } 1340 index--; 1341 } 1342 if (!inserted) 1343 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1344 mtx_unlock(&dumpconf_list_lk); 1345 1346 return (0); 1347 } 1348 1349 #ifdef DDB 1350 void 1351 dumper_ddb_insert(struct dumperinfo *newdi) 1352 { 1353 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1354 } 1355 1356 void 1357 dumper_ddb_remove(struct dumperinfo *di) 1358 { 1359 TAILQ_REMOVE(&dumper_configs, di, di_next); 1360 } 1361 #endif 1362 1363 static bool 1364 dumper_config_match(const struct dumperinfo *di, const char *devname, 1365 const struct diocskerneldump_arg *kda) 1366 { 1367 if (kda->kda_index == KDA_REMOVE_ALL) 1368 return (true); 1369 1370 if (strcmp(di->di_devname, devname) != 0) 1371 return (false); 1372 1373 /* 1374 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1375 */ 1376 if (kda->kda_index == KDA_REMOVE_DEV) 1377 return (true); 1378 1379 if (di->kdcomp != NULL) { 1380 if (di->kdcomp->kdc_format != kda->kda_compression) 1381 return (false); 1382 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1383 return (false); 1384 #ifdef EKCD 1385 if (di->kdcrypto != NULL) { 1386 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1387 return (false); 1388 /* 1389 * Do we care to verify keys match to delete? It seems weird 1390 * to expect multiple fallback dump configurations on the same 1391 * device that only differ in crypto key. 1392 */ 1393 } else 1394 #endif 1395 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1396 return (false); 1397 1398 return (true); 1399 } 1400 1401 /* 1402 * Remove and free the requested dumper(s) from the global list. 1403 */ 1404 int 1405 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1406 { 1407 struct dumperinfo *di, *sdi; 1408 bool found; 1409 int error; 1410 1411 error = priv_check(curthread, PRIV_SETDUMPER); 1412 if (error != 0) 1413 return (error); 1414 1415 /* 1416 * Try to find a matching configuration, and kill it. 1417 * 1418 * NULL 'kda' indicates remove any configuration matching 'devname', 1419 * which may remove multiple configurations in atypical configurations. 1420 */ 1421 found = false; 1422 mtx_lock(&dumpconf_list_lk); 1423 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1424 if (dumper_config_match(di, devname, kda)) { 1425 found = true; 1426 TAILQ_REMOVE(&dumper_configs, di, di_next); 1427 dumper_destroy(di); 1428 } 1429 } 1430 mtx_unlock(&dumpconf_list_lk); 1431 1432 /* Only produce ENOENT if a more targeted match didn't match. */ 1433 if (!found && kda->kda_index == KDA_REMOVE) 1434 return (ENOENT); 1435 return (0); 1436 } 1437 1438 static int 1439 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1440 { 1441 1442 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1443 offset - di->mediaoffset + length > di->mediasize)) { 1444 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1445 printf( 1446 "Compressed dump failed to fit in device boundaries.\n"); 1447 return (E2BIG); 1448 } 1449 1450 printf("Attempt to write outside dump device boundaries.\n" 1451 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1452 (intmax_t)offset, (intmax_t)di->mediaoffset, 1453 (uintmax_t)length, (intmax_t)di->mediasize); 1454 return (ENOSPC); 1455 } 1456 if (length % di->blocksize != 0) { 1457 printf("Attempt to write partial block of length %ju.\n", 1458 (uintmax_t)length); 1459 return (EINVAL); 1460 } 1461 if (offset % di->blocksize != 0) { 1462 printf("Attempt to write at unaligned offset %jd.\n", 1463 (intmax_t)offset); 1464 return (EINVAL); 1465 } 1466 1467 return (0); 1468 } 1469 1470 #ifdef EKCD 1471 static int 1472 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1473 { 1474 1475 switch (kdc->kdc_encryption) { 1476 case KERNELDUMP_ENC_AES_256_CBC: 1477 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1478 8 * size, buf) <= 0) { 1479 return (EIO); 1480 } 1481 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1482 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1483 return (EIO); 1484 } 1485 break; 1486 case KERNELDUMP_ENC_CHACHA20: 1487 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1488 break; 1489 default: 1490 return (EINVAL); 1491 } 1492 1493 return (0); 1494 } 1495 1496 /* Encrypt data and call dumper. */ 1497 static int 1498 dump_encrypted_write(struct dumperinfo *di, void *virtual, 1499 vm_offset_t physical, off_t offset, size_t length) 1500 { 1501 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1502 struct kerneldumpcrypto *kdc; 1503 int error; 1504 size_t nbytes; 1505 1506 kdc = di->kdcrypto; 1507 1508 while (length > 0) { 1509 nbytes = MIN(length, sizeof(buf)); 1510 bcopy(virtual, buf, nbytes); 1511 1512 if (dump_encrypt(kdc, buf, nbytes) != 0) 1513 return (EIO); 1514 1515 error = dump_write(di, buf, physical, offset, nbytes); 1516 if (error != 0) 1517 return (error); 1518 1519 offset += nbytes; 1520 virtual = (void *)((uint8_t *)virtual + nbytes); 1521 length -= nbytes; 1522 } 1523 1524 return (0); 1525 } 1526 #endif /* EKCD */ 1527 1528 static int 1529 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1530 { 1531 struct dumperinfo *di; 1532 size_t resid, rlength; 1533 int error; 1534 1535 di = arg; 1536 1537 if (length % di->blocksize != 0) { 1538 /* 1539 * This must be the final write after flushing the compression 1540 * stream. Write as many full blocks as possible and stash the 1541 * residual data in the dumper's block buffer. It will be 1542 * padded and written in dump_finish(). 1543 */ 1544 rlength = rounddown(length, di->blocksize); 1545 if (rlength != 0) { 1546 error = _dump_append(di, base, 0, rlength); 1547 if (error != 0) 1548 return (error); 1549 } 1550 resid = length - rlength; 1551 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1552 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1553 di->kdcomp->kdc_resid = resid; 1554 return (EAGAIN); 1555 } 1556 return (_dump_append(di, base, 0, length)); 1557 } 1558 1559 /* 1560 * Write kernel dump headers at the beginning and end of the dump extent. 1561 * Write the kernel dump encryption key after the leading header if we were 1562 * configured to do so. 1563 */ 1564 static int 1565 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1566 { 1567 #ifdef EKCD 1568 struct kerneldumpcrypto *kdc; 1569 #endif 1570 void *buf; 1571 size_t hdrsz; 1572 uint64_t extent; 1573 uint32_t keysize; 1574 int error; 1575 1576 hdrsz = sizeof(*kdh); 1577 if (hdrsz > di->blocksize) 1578 return (ENOMEM); 1579 1580 #ifdef EKCD 1581 kdc = di->kdcrypto; 1582 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1583 #else 1584 keysize = 0; 1585 #endif 1586 1587 /* 1588 * If the dump device has special handling for headers, let it take care 1589 * of writing them out. 1590 */ 1591 if (di->dumper_hdr != NULL) 1592 return (di->dumper_hdr(di, kdh)); 1593 1594 if (hdrsz == di->blocksize) 1595 buf = kdh; 1596 else { 1597 buf = di->blockbuf; 1598 memset(buf, 0, di->blocksize); 1599 memcpy(buf, kdh, hdrsz); 1600 } 1601 1602 extent = dtoh64(kdh->dumpextent); 1603 #ifdef EKCD 1604 if (kdc != NULL) { 1605 error = dump_write(di, kdc->kdc_dumpkey, 0, 1606 di->mediaoffset + di->mediasize - di->blocksize - extent - 1607 keysize, keysize); 1608 if (error != 0) 1609 return (error); 1610 } 1611 #endif 1612 1613 error = dump_write(di, buf, 0, 1614 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1615 keysize, di->blocksize); 1616 if (error == 0) 1617 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - 1618 di->blocksize, di->blocksize); 1619 return (error); 1620 } 1621 1622 /* 1623 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1624 * protect us from metadata and metadata from us. 1625 */ 1626 #define SIZEOF_METADATA (64 * 1024) 1627 1628 /* 1629 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1630 * if requested, and make sure that we have enough space on the dump device. 1631 * 1632 * We set things up so that the dump ends before the last sector of the dump 1633 * device, at which the trailing header is written. 1634 * 1635 * +-----------+------+-----+----------------------------+------+ 1636 * | | lhdr | key | ... kernel dump ... | thdr | 1637 * +-----------+------+-----+----------------------------+------+ 1638 * 1 blk opt <------- dump extent --------> 1 blk 1639 * 1640 * Dumps written using dump_append() start at the beginning of the extent. 1641 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1642 * will not. The true length of the dump is recorded in the leading and trailing 1643 * headers once the dump has been completed. 1644 * 1645 * The dump device may provide a callback, in which case it will initialize 1646 * dumpoff and take care of laying out the headers. 1647 */ 1648 int 1649 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1650 { 1651 #ifdef EKCD 1652 struct kerneldumpcrypto *kdc; 1653 #endif 1654 void *key; 1655 uint64_t dumpextent, span; 1656 uint32_t keysize; 1657 int error; 1658 1659 #ifdef EKCD 1660 /* Send the key before the dump so a partial dump is still usable. */ 1661 kdc = di->kdcrypto; 1662 error = kerneldumpcrypto_init(kdc); 1663 if (error != 0) 1664 return (error); 1665 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1666 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1667 #else 1668 error = 0; 1669 keysize = 0; 1670 key = NULL; 1671 #endif 1672 1673 if (di->dumper_start != NULL) { 1674 error = di->dumper_start(di, key, keysize); 1675 } else { 1676 dumpextent = dtoh64(kdh->dumpextent); 1677 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1678 keysize; 1679 if (di->mediasize < span) { 1680 if (di->kdcomp == NULL) 1681 return (E2BIG); 1682 1683 /* 1684 * We don't yet know how much space the compressed dump 1685 * will occupy, so try to use the whole swap partition 1686 * (minus the first 64KB) in the hope that the 1687 * compressed dump will fit. If that doesn't turn out to 1688 * be enough, the bounds checking in dump_write() 1689 * will catch us and cause the dump to fail. 1690 */ 1691 dumpextent = di->mediasize - span + dumpextent; 1692 kdh->dumpextent = htod64(dumpextent); 1693 } 1694 1695 /* 1696 * The offset at which to begin writing the dump. 1697 */ 1698 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1699 dumpextent; 1700 } 1701 di->origdumpoff = di->dumpoff; 1702 return (error); 1703 } 1704 1705 static int 1706 _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1707 size_t length) 1708 { 1709 int error; 1710 1711 #ifdef EKCD 1712 if (di->kdcrypto != NULL) 1713 error = dump_encrypted_write(di, virtual, physical, di->dumpoff, 1714 length); 1715 else 1716 #endif 1717 error = dump_write(di, virtual, physical, di->dumpoff, length); 1718 if (error == 0) 1719 di->dumpoff += length; 1720 return (error); 1721 } 1722 1723 /* 1724 * Write to the dump device starting at dumpoff. When compression is enabled, 1725 * writes to the device will be performed using a callback that gets invoked 1726 * when the compression stream's output buffer is full. 1727 */ 1728 int 1729 dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1730 size_t length) 1731 { 1732 void *buf; 1733 1734 if (di->kdcomp != NULL) { 1735 /* Bounce through a buffer to avoid CRC errors. */ 1736 if (length > di->maxiosize) 1737 return (EINVAL); 1738 buf = di->kdcomp->kdc_buf; 1739 memmove(buf, virtual, length); 1740 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1741 } 1742 return (_dump_append(di, virtual, physical, length)); 1743 } 1744 1745 /* 1746 * Write to the dump device at the specified offset. 1747 */ 1748 int 1749 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1750 off_t offset, size_t length) 1751 { 1752 int error; 1753 1754 error = dump_check_bounds(di, offset, length); 1755 if (error != 0) 1756 return (error); 1757 return (di->dumper(di->priv, virtual, physical, offset, length)); 1758 } 1759 1760 /* 1761 * Perform kernel dump finalization: flush the compression stream, if necessary, 1762 * write the leading and trailing kernel dump headers now that we know the true 1763 * length of the dump, and optionally write the encryption key following the 1764 * leading header. 1765 */ 1766 int 1767 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1768 { 1769 int error; 1770 1771 if (di->kdcomp != NULL) { 1772 error = compressor_flush(di->kdcomp->kdc_stream); 1773 if (error == EAGAIN) { 1774 /* We have residual data in di->blockbuf. */ 1775 error = _dump_append(di, di->blockbuf, 0, di->blocksize); 1776 if (error == 0) 1777 /* Compensate for _dump_append()'s adjustment. */ 1778 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1779 di->kdcomp->kdc_resid = 0; 1780 } 1781 if (error != 0) 1782 return (error); 1783 1784 /* 1785 * We now know the size of the compressed dump, so update the 1786 * header accordingly and recompute parity. 1787 */ 1788 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1789 kdh->parity = 0; 1790 kdh->parity = kerneldump_parity(kdh); 1791 1792 compressor_reset(di->kdcomp->kdc_stream); 1793 } 1794 1795 error = dump_write_headers(di, kdh); 1796 if (error != 0) 1797 return (error); 1798 1799 (void)dump_write(di, NULL, 0, 0, 0); 1800 return (0); 1801 } 1802 1803 void 1804 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1805 const char *magic, uint32_t archver, uint64_t dumplen) 1806 { 1807 size_t dstsize; 1808 1809 bzero(kdh, sizeof(*kdh)); 1810 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1811 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1812 kdh->version = htod32(KERNELDUMPVERSION); 1813 kdh->architectureversion = htod32(archver); 1814 kdh->dumplength = htod64(dumplen); 1815 kdh->dumpextent = kdh->dumplength; 1816 kdh->dumptime = htod64(time_second); 1817 #ifdef EKCD 1818 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1819 #else 1820 kdh->dumpkeysize = 0; 1821 #endif 1822 kdh->blocksize = htod32(di->blocksize); 1823 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1824 dstsize = sizeof(kdh->versionstring); 1825 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1826 kdh->versionstring[dstsize - 2] = '\n'; 1827 if (panicstr != NULL) 1828 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1829 if (di->kdcomp != NULL) 1830 kdh->compression = di->kdcomp->kdc_format; 1831 kdh->parity = kerneldump_parity(kdh); 1832 } 1833 1834 #ifdef DDB 1835 DB_SHOW_COMMAND(panic, db_show_panic) 1836 { 1837 1838 if (panicstr == NULL) 1839 db_printf("panicstr not set\n"); 1840 else 1841 db_printf("panic: %s\n", panicstr); 1842 } 1843 #endif 1844