1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/conf.h> 55 #include <sys/compressor.h> 56 #include <sys/cons.h> 57 #include <sys/disk.h> 58 #include <sys/eventhandler.h> 59 #include <sys/filedesc.h> 60 #include <sys/jail.h> 61 #include <sys/kdb.h> 62 #include <sys/kernel.h> 63 #include <sys/kerneldump.h> 64 #include <sys/kthread.h> 65 #include <sys/ktr.h> 66 #include <sys/malloc.h> 67 #include <sys/mbuf.h> 68 #include <sys/mount.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/reboot.h> 72 #include <sys/resourcevar.h> 73 #include <sys/rwlock.h> 74 #include <sys/sbuf.h> 75 #include <sys/sched.h> 76 #include <sys/smp.h> 77 #include <sys/sysctl.h> 78 #include <sys/sysproto.h> 79 #include <sys/taskqueue.h> 80 #include <sys/vnode.h> 81 #include <sys/watchdog.h> 82 83 #include <crypto/chacha20/chacha.h> 84 #include <crypto/rijndael/rijndael-api-fst.h> 85 #include <crypto/sha2/sha256.h> 86 87 #include <ddb/ddb.h> 88 89 #include <machine/cpu.h> 90 #include <machine/dump.h> 91 #include <machine/pcb.h> 92 #include <machine/smp.h> 93 94 #include <security/mac/mac_framework.h> 95 96 #include <vm/vm.h> 97 #include <vm/vm_object.h> 98 #include <vm/vm_page.h> 99 #include <vm/vm_pager.h> 100 #include <vm/swap_pager.h> 101 102 #include <sys/signalvar.h> 103 104 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 105 106 #ifndef PANIC_REBOOT_WAIT_TIME 107 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 108 #endif 109 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 110 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 111 &panic_reboot_wait_time, 0, 112 "Seconds to wait before rebooting after a panic"); 113 114 /* 115 * Note that stdarg.h and the ANSI style va_start macro is used for both 116 * ANSI and traditional C compilers. 117 */ 118 #include <machine/stdarg.h> 119 120 #ifdef KDB 121 #ifdef KDB_UNATTENDED 122 int debugger_on_panic = 0; 123 #else 124 int debugger_on_panic = 1; 125 #endif 126 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 127 CTLFLAG_RWTUN | CTLFLAG_SECURE, 128 &debugger_on_panic, 0, "Run debugger on kernel panic"); 129 130 static bool debugger_on_recursive_panic = false; 131 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 132 CTLFLAG_RWTUN | CTLFLAG_SECURE, 133 &debugger_on_recursive_panic, 0, "Run debugger on recursive kernel panic"); 134 135 int debugger_on_trap = 0; 136 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 137 CTLFLAG_RWTUN | CTLFLAG_SECURE, 138 &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); 139 140 #ifdef KDB_TRACE 141 static int trace_on_panic = 1; 142 static bool trace_all_panics = true; 143 #else 144 static int trace_on_panic = 0; 145 static bool trace_all_panics = false; 146 #endif 147 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 148 CTLFLAG_RWTUN | CTLFLAG_SECURE, 149 &trace_on_panic, 0, "Print stack trace on kernel panic"); 150 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 151 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 152 #endif /* KDB */ 153 154 static int sync_on_panic = 0; 155 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 156 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 157 158 static bool poweroff_on_panic = 0; 159 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 160 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 161 162 static bool powercycle_on_panic = 0; 163 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 164 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 165 166 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 167 "Shutdown environment"); 168 169 #ifndef DIAGNOSTIC 170 static int show_busybufs; 171 #else 172 static int show_busybufs = 1; 173 #endif 174 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 175 &show_busybufs, 0, 176 "Show busy buffers during shutdown"); 177 178 int suspend_blocked = 0; 179 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 180 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 181 182 #ifdef EKCD 183 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 184 185 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 186 187 struct kerneldumpcrypto { 188 uint8_t kdc_encryption; 189 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 190 union { 191 struct { 192 keyInstance aes_ki; 193 cipherInstance aes_ci; 194 } u_aes; 195 struct chacha_ctx u_chacha; 196 } u; 197 #define kdc_ki u.u_aes.aes_ki 198 #define kdc_ci u.u_aes.aes_ci 199 #define kdc_chacha u.u_chacha 200 uint32_t kdc_dumpkeysize; 201 struct kerneldumpkey kdc_dumpkey[]; 202 }; 203 #endif 204 205 struct kerneldumpcomp { 206 uint8_t kdc_format; 207 struct compressor *kdc_stream; 208 uint8_t *kdc_buf; 209 size_t kdc_resid; 210 }; 211 212 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 213 uint8_t compression); 214 static void kerneldumpcomp_destroy(struct dumperinfo *di); 215 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 216 217 static int kerneldump_gzlevel = 6; 218 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 219 &kerneldump_gzlevel, 0, 220 "Kernel crash dump compression level"); 221 222 /* 223 * Variable panicstr contains argument to first call to panic; used as flag 224 * to indicate that the kernel has already called panic. 225 */ 226 const char *panicstr; 227 bool __read_frequently panicked; 228 229 int __read_mostly dumping; /* system is dumping */ 230 int rebooting; /* system is rebooting */ 231 /* 232 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 233 * modifications via ioctl. 234 */ 235 static struct mtx dumpconf_list_lk; 236 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 237 238 /* Our selected dumper(s). */ 239 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 240 TAILQ_HEAD_INITIALIZER(dumper_configs); 241 242 /* Context information for dump-debuggers. */ 243 static struct pcb dumppcb; /* Registers. */ 244 lwpid_t dumptid; /* Thread ID. */ 245 246 static struct cdevsw reroot_cdevsw = { 247 .d_version = D_VERSION, 248 .d_name = "reroot", 249 }; 250 251 static void poweroff_wait(void *, int); 252 static void shutdown_halt(void *junk, int howto); 253 static void shutdown_panic(void *junk, int howto); 254 static void shutdown_reset(void *junk, int howto); 255 static int kern_reroot(void); 256 257 /* register various local shutdown events */ 258 static void 259 shutdown_conf(void *unused) 260 { 261 262 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 263 SHUTDOWN_PRI_FIRST); 264 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 265 SHUTDOWN_PRI_LAST + 100); 266 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 267 SHUTDOWN_PRI_LAST + 100); 268 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 269 SHUTDOWN_PRI_LAST + 200); 270 } 271 272 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 273 274 /* 275 * The only reason this exists is to create the /dev/reroot/ directory, 276 * used by reroot code in init(8) as a mountpoint for tmpfs. 277 */ 278 static void 279 reroot_conf(void *unused) 280 { 281 int error; 282 struct cdev *cdev; 283 284 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 285 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 286 if (error != 0) { 287 printf("%s: failed to create device node, error %d", 288 __func__, error); 289 } 290 } 291 292 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 293 294 /* 295 * The system call that results in a reboot. 296 */ 297 /* ARGSUSED */ 298 int 299 sys_reboot(struct thread *td, struct reboot_args *uap) 300 { 301 int error; 302 303 error = 0; 304 #ifdef MAC 305 error = mac_system_check_reboot(td->td_ucred, uap->opt); 306 #endif 307 if (error == 0) 308 error = priv_check(td, PRIV_REBOOT); 309 if (error == 0) { 310 if (uap->opt & RB_REROOT) 311 error = kern_reroot(); 312 else 313 kern_reboot(uap->opt); 314 } 315 return (error); 316 } 317 318 static void 319 shutdown_nice_task_fn(void *arg, int pending __unused) 320 { 321 int howto; 322 323 howto = (uintptr_t)arg; 324 /* Send a signal to init(8) and have it shutdown the world. */ 325 PROC_LOCK(initproc); 326 if (howto & RB_POWEROFF) 327 kern_psignal(initproc, SIGUSR2); 328 else if (howto & RB_POWERCYCLE) 329 kern_psignal(initproc, SIGWINCH); 330 else if (howto & RB_HALT) 331 kern_psignal(initproc, SIGUSR1); 332 else 333 kern_psignal(initproc, SIGINT); 334 PROC_UNLOCK(initproc); 335 } 336 337 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 338 &shutdown_nice_task_fn, NULL); 339 340 /* 341 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 342 */ 343 void 344 shutdown_nice(int howto) 345 { 346 347 if (initproc != NULL && !SCHEDULER_STOPPED()) { 348 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 349 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 350 } else { 351 /* 352 * No init(8) running, or scheduler would not allow it 353 * to run, so simply reboot. 354 */ 355 kern_reboot(howto | RB_NOSYNC); 356 } 357 } 358 359 static void 360 print_uptime(void) 361 { 362 int f; 363 struct timespec ts; 364 365 getnanouptime(&ts); 366 printf("Uptime: "); 367 f = 0; 368 if (ts.tv_sec >= 86400) { 369 printf("%ldd", (long)ts.tv_sec / 86400); 370 ts.tv_sec %= 86400; 371 f = 1; 372 } 373 if (f || ts.tv_sec >= 3600) { 374 printf("%ldh", (long)ts.tv_sec / 3600); 375 ts.tv_sec %= 3600; 376 f = 1; 377 } 378 if (f || ts.tv_sec >= 60) { 379 printf("%ldm", (long)ts.tv_sec / 60); 380 ts.tv_sec %= 60; 381 f = 1; 382 } 383 printf("%lds\n", (long)ts.tv_sec); 384 } 385 386 int 387 doadump(boolean_t textdump) 388 { 389 boolean_t coredump; 390 int error; 391 392 error = 0; 393 if (dumping) 394 return (EBUSY); 395 if (TAILQ_EMPTY(&dumper_configs)) 396 return (ENXIO); 397 398 savectx(&dumppcb); 399 dumptid = curthread->td_tid; 400 dumping++; 401 402 coredump = TRUE; 403 #ifdef DDB 404 if (textdump && textdump_pending) { 405 coredump = FALSE; 406 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 407 } 408 #endif 409 if (coredump) { 410 struct dumperinfo *di; 411 412 TAILQ_FOREACH(di, &dumper_configs, di_next) { 413 error = dumpsys(di); 414 if (error == 0) 415 break; 416 } 417 } 418 419 dumping--; 420 return (error); 421 } 422 423 /* 424 * Shutdown the system cleanly to prepare for reboot, halt, or power off. 425 */ 426 void 427 kern_reboot(int howto) 428 { 429 static int once = 0; 430 431 /* 432 * Normal paths here don't hold Giant, but we can wind up here 433 * unexpectedly with it held. Drop it now so we don't have to 434 * drop and pick it up elsewhere. The paths it is locking will 435 * never be returned to, and it is preferable to preclude 436 * deadlock than to lock against code that won't ever 437 * continue. 438 */ 439 while (mtx_owned(&Giant)) 440 mtx_unlock(&Giant); 441 442 #if defined(SMP) 443 /* 444 * Bind us to the first CPU so that all shutdown code runs there. Some 445 * systems don't shutdown properly (i.e., ACPI power off) if we 446 * run on another processor. 447 */ 448 if (!SCHEDULER_STOPPED()) { 449 thread_lock(curthread); 450 sched_bind(curthread, CPU_FIRST()); 451 thread_unlock(curthread); 452 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 453 ("boot: not running on cpu 0")); 454 } 455 #endif 456 /* We're in the process of rebooting. */ 457 rebooting = 1; 458 459 /* We are out of the debugger now. */ 460 kdb_active = 0; 461 462 /* 463 * Do any callouts that should be done BEFORE syncing the filesystems. 464 */ 465 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 466 467 /* 468 * Now sync filesystems 469 */ 470 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 471 once = 1; 472 bufshutdown(show_busybufs); 473 } 474 475 print_uptime(); 476 477 cngrab(); 478 479 /* 480 * Ok, now do things that assume all filesystem activity has 481 * been completed. 482 */ 483 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 484 485 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 486 doadump(TRUE); 487 488 /* Now that we're going to really halt the system... */ 489 EVENTHANDLER_INVOKE(shutdown_final, howto); 490 491 for(;;) ; /* safety against shutdown_reset not working */ 492 /* NOTREACHED */ 493 } 494 495 /* 496 * The system call that results in changing the rootfs. 497 */ 498 static int 499 kern_reroot(void) 500 { 501 struct vnode *oldrootvnode, *vp; 502 struct mount *mp, *devmp; 503 int error; 504 505 if (curproc != initproc) 506 return (EPERM); 507 508 /* 509 * Mark the filesystem containing currently-running executable 510 * (the temporary copy of init(8)) busy. 511 */ 512 vp = curproc->p_textvp; 513 error = vn_lock(vp, LK_SHARED); 514 if (error != 0) 515 return (error); 516 mp = vp->v_mount; 517 error = vfs_busy(mp, MBF_NOWAIT); 518 if (error != 0) { 519 vfs_ref(mp); 520 VOP_UNLOCK(vp); 521 error = vfs_busy(mp, 0); 522 vn_lock(vp, LK_SHARED | LK_RETRY); 523 vfs_rel(mp); 524 if (error != 0) { 525 VOP_UNLOCK(vp); 526 return (ENOENT); 527 } 528 if (VN_IS_DOOMED(vp)) { 529 VOP_UNLOCK(vp); 530 vfs_unbusy(mp); 531 return (ENOENT); 532 } 533 } 534 VOP_UNLOCK(vp); 535 536 /* 537 * Remove the filesystem containing currently-running executable 538 * from the mount list, to prevent it from being unmounted 539 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 540 * 541 * Also preserve /dev - forcibly unmounting it could cause driver 542 * reinitialization. 543 */ 544 545 vfs_ref(rootdevmp); 546 devmp = rootdevmp; 547 rootdevmp = NULL; 548 549 mtx_lock(&mountlist_mtx); 550 TAILQ_REMOVE(&mountlist, mp, mnt_list); 551 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 552 mtx_unlock(&mountlist_mtx); 553 554 oldrootvnode = rootvnode; 555 556 /* 557 * Unmount everything except for the two filesystems preserved above. 558 */ 559 vfs_unmountall(); 560 561 /* 562 * Add /dev back; vfs_mountroot() will move it into its new place. 563 */ 564 mtx_lock(&mountlist_mtx); 565 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 566 mtx_unlock(&mountlist_mtx); 567 rootdevmp = devmp; 568 vfs_rel(rootdevmp); 569 570 /* 571 * Mount the new rootfs. 572 */ 573 vfs_mountroot(); 574 575 /* 576 * Update all references to the old rootvnode. 577 */ 578 mountcheckdirs(oldrootvnode, rootvnode); 579 580 /* 581 * Add the temporary filesystem back and unbusy it. 582 */ 583 mtx_lock(&mountlist_mtx); 584 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 585 mtx_unlock(&mountlist_mtx); 586 vfs_unbusy(mp); 587 588 return (0); 589 } 590 591 /* 592 * If the shutdown was a clean halt, behave accordingly. 593 */ 594 static void 595 shutdown_halt(void *junk, int howto) 596 { 597 598 if (howto & RB_HALT) { 599 printf("\n"); 600 printf("The operating system has halted.\n"); 601 printf("Please press any key to reboot.\n\n"); 602 603 wdog_kern_pat(WD_TO_NEVER); 604 605 switch (cngetc()) { 606 case -1: /* No console, just die */ 607 cpu_halt(); 608 /* NOTREACHED */ 609 default: 610 break; 611 } 612 } 613 } 614 615 /* 616 * Check to see if the system paniced, pause and then reboot 617 * according to the specified delay. 618 */ 619 static void 620 shutdown_panic(void *junk, int howto) 621 { 622 int loop; 623 624 if (howto & RB_DUMP) { 625 if (panic_reboot_wait_time != 0) { 626 if (panic_reboot_wait_time != -1) { 627 printf("Automatic reboot in %d seconds - " 628 "press a key on the console to abort\n", 629 panic_reboot_wait_time); 630 for (loop = panic_reboot_wait_time * 10; 631 loop > 0; --loop) { 632 DELAY(1000 * 100); /* 1/10th second */ 633 /* Did user type a key? */ 634 if (cncheckc() != -1) 635 break; 636 } 637 if (!loop) 638 return; 639 } 640 } else { /* zero time specified - reboot NOW */ 641 return; 642 } 643 printf("--> Press a key on the console to reboot,\n"); 644 printf("--> or switch off the system now.\n"); 645 cngetc(); 646 } 647 } 648 649 /* 650 * Everything done, now reset 651 */ 652 static void 653 shutdown_reset(void *junk, int howto) 654 { 655 656 printf("Rebooting...\n"); 657 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 658 659 /* 660 * Acquiring smp_ipi_mtx here has a double effect: 661 * - it disables interrupts avoiding CPU0 preemption 662 * by fast handlers (thus deadlocking against other CPUs) 663 * - it avoids deadlocks against smp_rendezvous() or, more 664 * generally, threads busy-waiting, with this spinlock held, 665 * and waiting for responses by threads on other CPUs 666 * (ie. smp_tlb_shootdown()). 667 * 668 * For the !SMP case it just needs to handle the former problem. 669 */ 670 #ifdef SMP 671 mtx_lock_spin(&smp_ipi_mtx); 672 #else 673 spinlock_enter(); 674 #endif 675 676 cpu_reset(); 677 /* NOTREACHED */ /* assuming reset worked */ 678 } 679 680 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 681 static int kassert_warn_only = 0; 682 #ifdef KDB 683 static int kassert_do_kdb = 0; 684 #endif 685 #ifdef KTR 686 static int kassert_do_ktr = 0; 687 #endif 688 static int kassert_do_log = 1; 689 static int kassert_log_pps_limit = 4; 690 static int kassert_log_mute_at = 0; 691 static int kassert_log_panic_at = 0; 692 static int kassert_suppress_in_panic = 0; 693 static int kassert_warnings = 0; 694 695 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 696 "kassert options"); 697 698 #ifdef KASSERT_PANIC_OPTIONAL 699 #define KASSERT_RWTUN CTLFLAG_RWTUN 700 #else 701 #define KASSERT_RWTUN CTLFLAG_RDTUN 702 #endif 703 704 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 705 &kassert_warn_only, 0, 706 "KASSERT triggers a panic (0) or just a warning (1)"); 707 708 #ifdef KDB 709 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 710 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 711 #endif 712 713 #ifdef KTR 714 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 715 &kassert_do_ktr, 0, 716 "KASSERT does a KTR, set this to the KTRMASK you want"); 717 #endif 718 719 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 720 &kassert_do_log, 0, 721 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 722 723 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 724 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 725 726 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 727 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 728 729 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 730 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 731 732 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 733 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 734 735 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 736 &kassert_suppress_in_panic, 0, 737 "KASSERTs will be suppressed while handling a panic"); 738 #undef KASSERT_RWTUN 739 740 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 741 742 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 743 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 744 kassert_sysctl_kassert, "I", 745 "set to trigger a test kassert"); 746 747 static int 748 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 749 { 750 int error, i; 751 752 error = sysctl_wire_old_buffer(req, sizeof(int)); 753 if (error == 0) { 754 i = 0; 755 error = sysctl_handle_int(oidp, &i, 0, req); 756 } 757 if (error != 0 || req->newptr == NULL) 758 return (error); 759 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 760 return (0); 761 } 762 763 #ifdef KASSERT_PANIC_OPTIONAL 764 /* 765 * Called by KASSERT, this decides if we will panic 766 * or if we will log via printf and/or ktr. 767 */ 768 void 769 kassert_panic(const char *fmt, ...) 770 { 771 static char buf[256]; 772 va_list ap; 773 774 va_start(ap, fmt); 775 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 776 va_end(ap); 777 778 /* 779 * If we are suppressing secondary panics, log the warning but do not 780 * re-enter panic/kdb. 781 */ 782 if (panicstr != NULL && kassert_suppress_in_panic) { 783 if (kassert_do_log) { 784 printf("KASSERT failed: %s\n", buf); 785 #ifdef KDB 786 if (trace_all_panics && trace_on_panic) 787 kdb_backtrace(); 788 #endif 789 } 790 return; 791 } 792 793 /* 794 * panic if we're not just warning, or if we've exceeded 795 * kassert_log_panic_at warnings. 796 */ 797 if (!kassert_warn_only || 798 (kassert_log_panic_at > 0 && 799 kassert_warnings >= kassert_log_panic_at)) { 800 va_start(ap, fmt); 801 vpanic(fmt, ap); 802 /* NORETURN */ 803 } 804 #ifdef KTR 805 if (kassert_do_ktr) 806 CTR0(ktr_mask, buf); 807 #endif /* KTR */ 808 /* 809 * log if we've not yet met the mute limit. 810 */ 811 if (kassert_do_log && 812 (kassert_log_mute_at == 0 || 813 kassert_warnings < kassert_log_mute_at)) { 814 static struct timeval lasterr; 815 static int curerr; 816 817 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 818 printf("KASSERT failed: %s\n", buf); 819 kdb_backtrace(); 820 } 821 } 822 #ifdef KDB 823 if (kassert_do_kdb) { 824 kdb_enter(KDB_WHY_KASSERT, buf); 825 } 826 #endif 827 atomic_add_int(&kassert_warnings, 1); 828 } 829 #endif /* KASSERT_PANIC_OPTIONAL */ 830 #endif 831 832 /* 833 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 834 * and then reboots. If we are called twice, then we avoid trying to sync 835 * the disks as this often leads to recursive panics. 836 */ 837 void 838 panic(const char *fmt, ...) 839 { 840 va_list ap; 841 842 va_start(ap, fmt); 843 vpanic(fmt, ap); 844 } 845 846 void 847 vpanic(const char *fmt, va_list ap) 848 { 849 #ifdef SMP 850 cpuset_t other_cpus; 851 #endif 852 struct thread *td = curthread; 853 int bootopt, newpanic; 854 static char buf[256]; 855 856 spinlock_enter(); 857 858 #ifdef SMP 859 /* 860 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 861 * concurrently entering panic. Only the winner will proceed 862 * further. 863 */ 864 if (panicstr == NULL && !kdb_active) { 865 other_cpus = all_cpus; 866 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 867 stop_cpus_hard(other_cpus); 868 } 869 #endif 870 871 /* 872 * Ensure that the scheduler is stopped while panicking, even if panic 873 * has been entered from kdb. 874 */ 875 td->td_stopsched = 1; 876 877 bootopt = RB_AUTOBOOT; 878 newpanic = 0; 879 if (panicstr) 880 bootopt |= RB_NOSYNC; 881 else { 882 bootopt |= RB_DUMP; 883 panicstr = fmt; 884 panicked = true; 885 newpanic = 1; 886 } 887 888 if (newpanic) { 889 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 890 panicstr = buf; 891 cngrab(); 892 printf("panic: %s\n", buf); 893 } else { 894 printf("panic: "); 895 vprintf(fmt, ap); 896 printf("\n"); 897 } 898 #ifdef SMP 899 printf("cpuid = %d\n", PCPU_GET(cpuid)); 900 #endif 901 printf("time = %jd\n", (intmax_t )time_second); 902 #ifdef KDB 903 if ((newpanic || trace_all_panics) && trace_on_panic) 904 kdb_backtrace(); 905 if (debugger_on_panic) 906 kdb_enter(KDB_WHY_PANIC, "panic"); 907 else if (!newpanic && debugger_on_recursive_panic) 908 kdb_enter(KDB_WHY_PANIC, "re-panic"); 909 #endif 910 /*thread_lock(td); */ 911 td->td_flags |= TDF_INPANIC; 912 /* thread_unlock(td); */ 913 if (!sync_on_panic) 914 bootopt |= RB_NOSYNC; 915 if (poweroff_on_panic) 916 bootopt |= RB_POWEROFF; 917 if (powercycle_on_panic) 918 bootopt |= RB_POWERCYCLE; 919 kern_reboot(bootopt); 920 } 921 922 /* 923 * Support for poweroff delay. 924 * 925 * Please note that setting this delay too short might power off your machine 926 * before the write cache on your hard disk has been flushed, leading to 927 * soft-updates inconsistencies. 928 */ 929 #ifndef POWEROFF_DELAY 930 # define POWEROFF_DELAY 5000 931 #endif 932 static int poweroff_delay = POWEROFF_DELAY; 933 934 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 935 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 936 937 static void 938 poweroff_wait(void *junk, int howto) 939 { 940 941 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 942 return; 943 DELAY(poweroff_delay * 1000); 944 } 945 946 /* 947 * Some system processes (e.g. syncer) need to be stopped at appropriate 948 * points in their main loops prior to a system shutdown, so that they 949 * won't interfere with the shutdown process (e.g. by holding a disk buf 950 * to cause sync to fail). For each of these system processes, register 951 * shutdown_kproc() as a handler for one of shutdown events. 952 */ 953 static int kproc_shutdown_wait = 60; 954 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 955 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 956 957 void 958 kproc_shutdown(void *arg, int howto) 959 { 960 struct proc *p; 961 int error; 962 963 if (panicstr) 964 return; 965 966 p = (struct proc *)arg; 967 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 968 kproc_shutdown_wait, p->p_comm); 969 error = kproc_suspend(p, kproc_shutdown_wait * hz); 970 971 if (error == EWOULDBLOCK) 972 printf("timed out\n"); 973 else 974 printf("done\n"); 975 } 976 977 void 978 kthread_shutdown(void *arg, int howto) 979 { 980 struct thread *td; 981 int error; 982 983 if (panicstr) 984 return; 985 986 td = (struct thread *)arg; 987 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 988 kproc_shutdown_wait, td->td_name); 989 error = kthread_suspend(td, kproc_shutdown_wait * hz); 990 991 if (error == EWOULDBLOCK) 992 printf("timed out\n"); 993 else 994 printf("done\n"); 995 } 996 997 static int 998 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 999 { 1000 char buf[256]; 1001 struct dumperinfo *di; 1002 struct sbuf sb; 1003 int error; 1004 1005 error = sysctl_wire_old_buffer(req, 0); 1006 if (error != 0) 1007 return (error); 1008 1009 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1010 1011 mtx_lock(&dumpconf_list_lk); 1012 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1013 if (di != TAILQ_FIRST(&dumper_configs)) 1014 sbuf_putc(&sb, ','); 1015 sbuf_cat(&sb, di->di_devname); 1016 } 1017 mtx_unlock(&dumpconf_list_lk); 1018 1019 error = sbuf_finish(&sb); 1020 sbuf_delete(&sb); 1021 return (error); 1022 } 1023 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1024 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1025 dumpdevname_sysctl_handler, "A", 1026 "Device(s) for kernel dumps"); 1027 1028 static int _dump_append(struct dumperinfo *di, void *virtual, 1029 vm_offset_t physical, size_t length); 1030 1031 #ifdef EKCD 1032 static struct kerneldumpcrypto * 1033 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1034 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1035 { 1036 struct kerneldumpcrypto *kdc; 1037 struct kerneldumpkey *kdk; 1038 uint32_t dumpkeysize; 1039 1040 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1041 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1042 1043 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1044 1045 kdc->kdc_encryption = encryption; 1046 switch (kdc->kdc_encryption) { 1047 case KERNELDUMP_ENC_AES_256_CBC: 1048 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1049 goto failed; 1050 break; 1051 case KERNELDUMP_ENC_CHACHA20: 1052 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1053 break; 1054 default: 1055 goto failed; 1056 } 1057 1058 kdc->kdc_dumpkeysize = dumpkeysize; 1059 kdk = kdc->kdc_dumpkey; 1060 kdk->kdk_encryption = kdc->kdc_encryption; 1061 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1062 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1063 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1064 1065 return (kdc); 1066 failed: 1067 zfree(kdc, M_EKCD); 1068 return (NULL); 1069 } 1070 1071 static int 1072 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1073 { 1074 uint8_t hash[SHA256_DIGEST_LENGTH]; 1075 SHA256_CTX ctx; 1076 struct kerneldumpkey *kdk; 1077 int error; 1078 1079 error = 0; 1080 1081 if (kdc == NULL) 1082 return (0); 1083 1084 /* 1085 * When a user enters ddb it can write a crash dump multiple times. 1086 * Each time it should be encrypted using a different IV. 1087 */ 1088 SHA256_Init(&ctx); 1089 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1090 SHA256_Final(hash, &ctx); 1091 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1092 1093 switch (kdc->kdc_encryption) { 1094 case KERNELDUMP_ENC_AES_256_CBC: 1095 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1096 kdc->kdc_iv) <= 0) { 1097 error = EINVAL; 1098 goto out; 1099 } 1100 break; 1101 case KERNELDUMP_ENC_CHACHA20: 1102 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1103 break; 1104 default: 1105 error = EINVAL; 1106 goto out; 1107 } 1108 1109 kdk = kdc->kdc_dumpkey; 1110 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1111 out: 1112 explicit_bzero(hash, sizeof(hash)); 1113 return (error); 1114 } 1115 1116 static uint32_t 1117 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1118 { 1119 1120 if (kdc == NULL) 1121 return (0); 1122 return (kdc->kdc_dumpkeysize); 1123 } 1124 #endif /* EKCD */ 1125 1126 static struct kerneldumpcomp * 1127 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1128 { 1129 struct kerneldumpcomp *kdcomp; 1130 int format; 1131 1132 switch (compression) { 1133 case KERNELDUMP_COMP_GZIP: 1134 format = COMPRESS_GZIP; 1135 break; 1136 case KERNELDUMP_COMP_ZSTD: 1137 format = COMPRESS_ZSTD; 1138 break; 1139 default: 1140 return (NULL); 1141 } 1142 1143 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1144 kdcomp->kdc_format = compression; 1145 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1146 format, di->maxiosize, kerneldump_gzlevel, di); 1147 if (kdcomp->kdc_stream == NULL) { 1148 free(kdcomp, M_DUMPER); 1149 return (NULL); 1150 } 1151 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1152 return (kdcomp); 1153 } 1154 1155 static void 1156 kerneldumpcomp_destroy(struct dumperinfo *di) 1157 { 1158 struct kerneldumpcomp *kdcomp; 1159 1160 kdcomp = di->kdcomp; 1161 if (kdcomp == NULL) 1162 return; 1163 compressor_fini(kdcomp->kdc_stream); 1164 zfree(kdcomp->kdc_buf, M_DUMPER); 1165 free(kdcomp, M_DUMPER); 1166 } 1167 1168 /* 1169 * Must not be present on global list. 1170 */ 1171 static void 1172 free_single_dumper(struct dumperinfo *di) 1173 { 1174 1175 if (di == NULL) 1176 return; 1177 1178 zfree(di->blockbuf, M_DUMPER); 1179 1180 kerneldumpcomp_destroy(di); 1181 1182 #ifdef EKCD 1183 zfree(di->kdcrypto, M_EKCD); 1184 #endif 1185 zfree(di, M_DUMPER); 1186 } 1187 1188 /* Registration of dumpers */ 1189 int 1190 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1191 const struct diocskerneldump_arg *kda) 1192 { 1193 struct dumperinfo *newdi, *listdi; 1194 bool inserted; 1195 uint8_t index; 1196 int error; 1197 1198 index = kda->kda_index; 1199 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1200 index != KDA_REMOVE_ALL); 1201 1202 error = priv_check(curthread, PRIV_SETDUMPER); 1203 if (error != 0) 1204 return (error); 1205 1206 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK 1207 | M_ZERO); 1208 memcpy(newdi, di_template, sizeof(*newdi)); 1209 newdi->blockbuf = NULL; 1210 newdi->kdcrypto = NULL; 1211 newdi->kdcomp = NULL; 1212 strcpy(newdi->di_devname, devname); 1213 1214 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1215 #ifdef EKCD 1216 newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize, 1217 kda->kda_encryption, kda->kda_key, 1218 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1219 if (newdi->kdcrypto == NULL) { 1220 error = EINVAL; 1221 goto cleanup; 1222 } 1223 #else 1224 error = EOPNOTSUPP; 1225 goto cleanup; 1226 #endif 1227 } 1228 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1229 #ifdef EKCD 1230 /* 1231 * We can't support simultaneous unpadded block cipher 1232 * encryption and compression because there is no guarantee the 1233 * length of the compressed result is exactly a multiple of the 1234 * cipher block size. 1235 */ 1236 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1237 error = EOPNOTSUPP; 1238 goto cleanup; 1239 } 1240 #endif 1241 newdi->kdcomp = kerneldumpcomp_create(newdi, 1242 kda->kda_compression); 1243 if (newdi->kdcomp == NULL) { 1244 error = EINVAL; 1245 goto cleanup; 1246 } 1247 } 1248 1249 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1250 1251 /* Add the new configuration to the queue */ 1252 mtx_lock(&dumpconf_list_lk); 1253 inserted = false; 1254 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1255 if (index == 0) { 1256 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1257 inserted = true; 1258 break; 1259 } 1260 index--; 1261 } 1262 if (!inserted) 1263 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1264 mtx_unlock(&dumpconf_list_lk); 1265 1266 return (0); 1267 1268 cleanup: 1269 free_single_dumper(newdi); 1270 return (error); 1271 } 1272 1273 #ifdef DDB 1274 void 1275 dumper_ddb_insert(struct dumperinfo *newdi) 1276 { 1277 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1278 } 1279 1280 void 1281 dumper_ddb_remove(struct dumperinfo *di) 1282 { 1283 TAILQ_REMOVE(&dumper_configs, di, di_next); 1284 } 1285 #endif 1286 1287 static bool 1288 dumper_config_match(const struct dumperinfo *di, const char *devname, 1289 const struct diocskerneldump_arg *kda) 1290 { 1291 if (kda->kda_index == KDA_REMOVE_ALL) 1292 return (true); 1293 1294 if (strcmp(di->di_devname, devname) != 0) 1295 return (false); 1296 1297 /* 1298 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1299 */ 1300 if (kda->kda_index == KDA_REMOVE_DEV) 1301 return (true); 1302 1303 if (di->kdcomp != NULL) { 1304 if (di->kdcomp->kdc_format != kda->kda_compression) 1305 return (false); 1306 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1307 return (false); 1308 #ifdef EKCD 1309 if (di->kdcrypto != NULL) { 1310 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1311 return (false); 1312 /* 1313 * Do we care to verify keys match to delete? It seems weird 1314 * to expect multiple fallback dump configurations on the same 1315 * device that only differ in crypto key. 1316 */ 1317 } else 1318 #endif 1319 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1320 return (false); 1321 1322 return (true); 1323 } 1324 1325 int 1326 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1327 { 1328 struct dumperinfo *di, *sdi; 1329 bool found; 1330 int error; 1331 1332 error = priv_check(curthread, PRIV_SETDUMPER); 1333 if (error != 0) 1334 return (error); 1335 1336 /* 1337 * Try to find a matching configuration, and kill it. 1338 * 1339 * NULL 'kda' indicates remove any configuration matching 'devname', 1340 * which may remove multiple configurations in atypical configurations. 1341 */ 1342 found = false; 1343 mtx_lock(&dumpconf_list_lk); 1344 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1345 if (dumper_config_match(di, devname, kda)) { 1346 found = true; 1347 TAILQ_REMOVE(&dumper_configs, di, di_next); 1348 free_single_dumper(di); 1349 } 1350 } 1351 mtx_unlock(&dumpconf_list_lk); 1352 1353 /* Only produce ENOENT if a more targeted match didn't match. */ 1354 if (!found && kda->kda_index == KDA_REMOVE) 1355 return (ENOENT); 1356 return (0); 1357 } 1358 1359 static int 1360 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1361 { 1362 1363 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1364 offset - di->mediaoffset + length > di->mediasize)) { 1365 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1366 printf( 1367 "Compressed dump failed to fit in device boundaries.\n"); 1368 return (E2BIG); 1369 } 1370 1371 printf("Attempt to write outside dump device boundaries.\n" 1372 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1373 (intmax_t)offset, (intmax_t)di->mediaoffset, 1374 (uintmax_t)length, (intmax_t)di->mediasize); 1375 return (ENOSPC); 1376 } 1377 if (length % di->blocksize != 0) { 1378 printf("Attempt to write partial block of length %ju.\n", 1379 (uintmax_t)length); 1380 return (EINVAL); 1381 } 1382 if (offset % di->blocksize != 0) { 1383 printf("Attempt to write at unaligned offset %jd.\n", 1384 (intmax_t)offset); 1385 return (EINVAL); 1386 } 1387 1388 return (0); 1389 } 1390 1391 #ifdef EKCD 1392 static int 1393 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1394 { 1395 1396 switch (kdc->kdc_encryption) { 1397 case KERNELDUMP_ENC_AES_256_CBC: 1398 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1399 8 * size, buf) <= 0) { 1400 return (EIO); 1401 } 1402 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1403 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1404 return (EIO); 1405 } 1406 break; 1407 case KERNELDUMP_ENC_CHACHA20: 1408 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1409 break; 1410 default: 1411 return (EINVAL); 1412 } 1413 1414 return (0); 1415 } 1416 1417 /* Encrypt data and call dumper. */ 1418 static int 1419 dump_encrypted_write(struct dumperinfo *di, void *virtual, 1420 vm_offset_t physical, off_t offset, size_t length) 1421 { 1422 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1423 struct kerneldumpcrypto *kdc; 1424 int error; 1425 size_t nbytes; 1426 1427 kdc = di->kdcrypto; 1428 1429 while (length > 0) { 1430 nbytes = MIN(length, sizeof(buf)); 1431 bcopy(virtual, buf, nbytes); 1432 1433 if (dump_encrypt(kdc, buf, nbytes) != 0) 1434 return (EIO); 1435 1436 error = dump_write(di, buf, physical, offset, nbytes); 1437 if (error != 0) 1438 return (error); 1439 1440 offset += nbytes; 1441 virtual = (void *)((uint8_t *)virtual + nbytes); 1442 length -= nbytes; 1443 } 1444 1445 return (0); 1446 } 1447 #endif /* EKCD */ 1448 1449 static int 1450 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1451 { 1452 struct dumperinfo *di; 1453 size_t resid, rlength; 1454 int error; 1455 1456 di = arg; 1457 1458 if (length % di->blocksize != 0) { 1459 /* 1460 * This must be the final write after flushing the compression 1461 * stream. Write as many full blocks as possible and stash the 1462 * residual data in the dumper's block buffer. It will be 1463 * padded and written in dump_finish(). 1464 */ 1465 rlength = rounddown(length, di->blocksize); 1466 if (rlength != 0) { 1467 error = _dump_append(di, base, 0, rlength); 1468 if (error != 0) 1469 return (error); 1470 } 1471 resid = length - rlength; 1472 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1473 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1474 di->kdcomp->kdc_resid = resid; 1475 return (EAGAIN); 1476 } 1477 return (_dump_append(di, base, 0, length)); 1478 } 1479 1480 /* 1481 * Write kernel dump headers at the beginning and end of the dump extent. 1482 * Write the kernel dump encryption key after the leading header if we were 1483 * configured to do so. 1484 */ 1485 static int 1486 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1487 { 1488 #ifdef EKCD 1489 struct kerneldumpcrypto *kdc; 1490 #endif 1491 void *buf; 1492 size_t hdrsz; 1493 uint64_t extent; 1494 uint32_t keysize; 1495 int error; 1496 1497 hdrsz = sizeof(*kdh); 1498 if (hdrsz > di->blocksize) 1499 return (ENOMEM); 1500 1501 #ifdef EKCD 1502 kdc = di->kdcrypto; 1503 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1504 #else 1505 keysize = 0; 1506 #endif 1507 1508 /* 1509 * If the dump device has special handling for headers, let it take care 1510 * of writing them out. 1511 */ 1512 if (di->dumper_hdr != NULL) 1513 return (di->dumper_hdr(di, kdh)); 1514 1515 if (hdrsz == di->blocksize) 1516 buf = kdh; 1517 else { 1518 buf = di->blockbuf; 1519 memset(buf, 0, di->blocksize); 1520 memcpy(buf, kdh, hdrsz); 1521 } 1522 1523 extent = dtoh64(kdh->dumpextent); 1524 #ifdef EKCD 1525 if (kdc != NULL) { 1526 error = dump_write(di, kdc->kdc_dumpkey, 0, 1527 di->mediaoffset + di->mediasize - di->blocksize - extent - 1528 keysize, keysize); 1529 if (error != 0) 1530 return (error); 1531 } 1532 #endif 1533 1534 error = dump_write(di, buf, 0, 1535 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1536 keysize, di->blocksize); 1537 if (error == 0) 1538 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - 1539 di->blocksize, di->blocksize); 1540 return (error); 1541 } 1542 1543 /* 1544 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1545 * protect us from metadata and metadata from us. 1546 */ 1547 #define SIZEOF_METADATA (64 * 1024) 1548 1549 /* 1550 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1551 * if requested, and make sure that we have enough space on the dump device. 1552 * 1553 * We set things up so that the dump ends before the last sector of the dump 1554 * device, at which the trailing header is written. 1555 * 1556 * +-----------+------+-----+----------------------------+------+ 1557 * | | lhdr | key | ... kernel dump ... | thdr | 1558 * +-----------+------+-----+----------------------------+------+ 1559 * 1 blk opt <------- dump extent --------> 1 blk 1560 * 1561 * Dumps written using dump_append() start at the beginning of the extent. 1562 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1563 * will not. The true length of the dump is recorded in the leading and trailing 1564 * headers once the dump has been completed. 1565 * 1566 * The dump device may provide a callback, in which case it will initialize 1567 * dumpoff and take care of laying out the headers. 1568 */ 1569 int 1570 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1571 { 1572 #ifdef EKCD 1573 struct kerneldumpcrypto *kdc; 1574 #endif 1575 void *key; 1576 uint64_t dumpextent, span; 1577 uint32_t keysize; 1578 int error; 1579 1580 #ifdef EKCD 1581 /* Send the key before the dump so a partial dump is still usable. */ 1582 kdc = di->kdcrypto; 1583 error = kerneldumpcrypto_init(kdc); 1584 if (error != 0) 1585 return (error); 1586 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1587 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1588 #else 1589 error = 0; 1590 keysize = 0; 1591 key = NULL; 1592 #endif 1593 1594 if (di->dumper_start != NULL) { 1595 error = di->dumper_start(di, key, keysize); 1596 } else { 1597 dumpextent = dtoh64(kdh->dumpextent); 1598 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1599 keysize; 1600 if (di->mediasize < span) { 1601 if (di->kdcomp == NULL) 1602 return (E2BIG); 1603 1604 /* 1605 * We don't yet know how much space the compressed dump 1606 * will occupy, so try to use the whole swap partition 1607 * (minus the first 64KB) in the hope that the 1608 * compressed dump will fit. If that doesn't turn out to 1609 * be enough, the bounds checking in dump_write() 1610 * will catch us and cause the dump to fail. 1611 */ 1612 dumpextent = di->mediasize - span + dumpextent; 1613 kdh->dumpextent = htod64(dumpextent); 1614 } 1615 1616 /* 1617 * The offset at which to begin writing the dump. 1618 */ 1619 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1620 dumpextent; 1621 } 1622 di->origdumpoff = di->dumpoff; 1623 return (error); 1624 } 1625 1626 static int 1627 _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1628 size_t length) 1629 { 1630 int error; 1631 1632 #ifdef EKCD 1633 if (di->kdcrypto != NULL) 1634 error = dump_encrypted_write(di, virtual, physical, di->dumpoff, 1635 length); 1636 else 1637 #endif 1638 error = dump_write(di, virtual, physical, di->dumpoff, length); 1639 if (error == 0) 1640 di->dumpoff += length; 1641 return (error); 1642 } 1643 1644 /* 1645 * Write to the dump device starting at dumpoff. When compression is enabled, 1646 * writes to the device will be performed using a callback that gets invoked 1647 * when the compression stream's output buffer is full. 1648 */ 1649 int 1650 dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1651 size_t length) 1652 { 1653 void *buf; 1654 1655 if (di->kdcomp != NULL) { 1656 /* Bounce through a buffer to avoid CRC errors. */ 1657 if (length > di->maxiosize) 1658 return (EINVAL); 1659 buf = di->kdcomp->kdc_buf; 1660 memmove(buf, virtual, length); 1661 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1662 } 1663 return (_dump_append(di, virtual, physical, length)); 1664 } 1665 1666 /* 1667 * Write to the dump device at the specified offset. 1668 */ 1669 int 1670 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1671 off_t offset, size_t length) 1672 { 1673 int error; 1674 1675 error = dump_check_bounds(di, offset, length); 1676 if (error != 0) 1677 return (error); 1678 return (di->dumper(di->priv, virtual, physical, offset, length)); 1679 } 1680 1681 /* 1682 * Perform kernel dump finalization: flush the compression stream, if necessary, 1683 * write the leading and trailing kernel dump headers now that we know the true 1684 * length of the dump, and optionally write the encryption key following the 1685 * leading header. 1686 */ 1687 int 1688 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1689 { 1690 int error; 1691 1692 if (di->kdcomp != NULL) { 1693 error = compressor_flush(di->kdcomp->kdc_stream); 1694 if (error == EAGAIN) { 1695 /* We have residual data in di->blockbuf. */ 1696 error = _dump_append(di, di->blockbuf, 0, di->blocksize); 1697 if (error == 0) 1698 /* Compensate for _dump_append()'s adjustment. */ 1699 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1700 di->kdcomp->kdc_resid = 0; 1701 } 1702 if (error != 0) 1703 return (error); 1704 1705 /* 1706 * We now know the size of the compressed dump, so update the 1707 * header accordingly and recompute parity. 1708 */ 1709 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1710 kdh->parity = 0; 1711 kdh->parity = kerneldump_parity(kdh); 1712 1713 compressor_reset(di->kdcomp->kdc_stream); 1714 } 1715 1716 error = dump_write_headers(di, kdh); 1717 if (error != 0) 1718 return (error); 1719 1720 (void)dump_write(di, NULL, 0, 0, 0); 1721 return (0); 1722 } 1723 1724 void 1725 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1726 const char *magic, uint32_t archver, uint64_t dumplen) 1727 { 1728 size_t dstsize; 1729 1730 bzero(kdh, sizeof(*kdh)); 1731 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1732 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1733 kdh->version = htod32(KERNELDUMPVERSION); 1734 kdh->architectureversion = htod32(archver); 1735 kdh->dumplength = htod64(dumplen); 1736 kdh->dumpextent = kdh->dumplength; 1737 kdh->dumptime = htod64(time_second); 1738 #ifdef EKCD 1739 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1740 #else 1741 kdh->dumpkeysize = 0; 1742 #endif 1743 kdh->blocksize = htod32(di->blocksize); 1744 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1745 dstsize = sizeof(kdh->versionstring); 1746 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1747 kdh->versionstring[dstsize - 2] = '\n'; 1748 if (panicstr != NULL) 1749 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1750 if (di->kdcomp != NULL) 1751 kdh->compression = di->kdcomp->kdc_format; 1752 kdh->parity = kerneldump_parity(kdh); 1753 } 1754 1755 #ifdef DDB 1756 DB_SHOW_COMMAND(panic, db_show_panic) 1757 { 1758 1759 if (panicstr == NULL) 1760 db_printf("panicstr not set\n"); 1761 else 1762 db_printf("panic: %s\n", panicstr); 1763 } 1764 #endif 1765