1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/conf.h> 55 #include <sys/compressor.h> 56 #include <sys/cons.h> 57 #include <sys/disk.h> 58 #include <sys/eventhandler.h> 59 #include <sys/filedesc.h> 60 #include <sys/jail.h> 61 #include <sys/kdb.h> 62 #include <sys/kernel.h> 63 #include <sys/kerneldump.h> 64 #include <sys/kthread.h> 65 #include <sys/ktr.h> 66 #include <sys/malloc.h> 67 #include <sys/mbuf.h> 68 #include <sys/mount.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/reboot.h> 72 #include <sys/resourcevar.h> 73 #include <sys/rwlock.h> 74 #include <sys/sbuf.h> 75 #include <sys/sched.h> 76 #include <sys/smp.h> 77 #include <sys/sysctl.h> 78 #include <sys/sysproto.h> 79 #include <sys/taskqueue.h> 80 #include <sys/vnode.h> 81 #include <sys/watchdog.h> 82 83 #include <crypto/chacha20/chacha.h> 84 #include <crypto/rijndael/rijndael-api-fst.h> 85 #include <crypto/sha2/sha256.h> 86 87 #include <ddb/ddb.h> 88 89 #include <machine/cpu.h> 90 #include <machine/dump.h> 91 #include <machine/pcb.h> 92 #include <machine/smp.h> 93 94 #include <security/mac/mac_framework.h> 95 96 #include <vm/vm.h> 97 #include <vm/vm_object.h> 98 #include <vm/vm_page.h> 99 #include <vm/vm_pager.h> 100 #include <vm/swap_pager.h> 101 102 #include <sys/signalvar.h> 103 104 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 105 106 #ifndef PANIC_REBOOT_WAIT_TIME 107 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 108 #endif 109 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 110 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 111 &panic_reboot_wait_time, 0, 112 "Seconds to wait before rebooting after a panic"); 113 114 /* 115 * Note that stdarg.h and the ANSI style va_start macro is used for both 116 * ANSI and traditional C compilers. 117 */ 118 #include <machine/stdarg.h> 119 120 #ifdef KDB 121 #ifdef KDB_UNATTENDED 122 static int debugger_on_panic = 0; 123 #else 124 static int debugger_on_panic = 1; 125 #endif 126 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 127 CTLFLAG_RWTUN | CTLFLAG_SECURE, 128 &debugger_on_panic, 0, "Run debugger on kernel panic"); 129 130 int debugger_on_trap = 0; 131 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 132 CTLFLAG_RWTUN | CTLFLAG_SECURE, 133 &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); 134 135 #ifdef KDB_TRACE 136 static int trace_on_panic = 1; 137 static bool trace_all_panics = true; 138 #else 139 static int trace_on_panic = 0; 140 static bool trace_all_panics = false; 141 #endif 142 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 143 CTLFLAG_RWTUN | CTLFLAG_SECURE, 144 &trace_on_panic, 0, "Print stack trace on kernel panic"); 145 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 146 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 147 #endif /* KDB */ 148 149 static int sync_on_panic = 0; 150 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 151 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 152 153 static bool poweroff_on_panic = 0; 154 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 155 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 156 157 static bool powercycle_on_panic = 0; 158 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 159 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 160 161 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, 162 "Shutdown environment"); 163 164 #ifndef DIAGNOSTIC 165 static int show_busybufs; 166 #else 167 static int show_busybufs = 1; 168 #endif 169 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 170 &show_busybufs, 0, ""); 171 172 int suspend_blocked = 0; 173 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 174 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 175 176 #ifdef EKCD 177 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 178 179 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 180 181 struct kerneldumpcrypto { 182 uint8_t kdc_encryption; 183 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 184 union { 185 struct { 186 keyInstance aes_ki; 187 cipherInstance aes_ci; 188 } u_aes; 189 struct chacha_ctx u_chacha; 190 } u; 191 #define kdc_ki u.u_aes.aes_ki 192 #define kdc_ci u.u_aes.aes_ci 193 #define kdc_chacha u.u_chacha 194 uint32_t kdc_dumpkeysize; 195 struct kerneldumpkey kdc_dumpkey[]; 196 }; 197 #endif 198 199 struct kerneldumpcomp { 200 uint8_t kdc_format; 201 struct compressor *kdc_stream; 202 uint8_t *kdc_buf; 203 size_t kdc_resid; 204 }; 205 206 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 207 uint8_t compression); 208 static void kerneldumpcomp_destroy(struct dumperinfo *di); 209 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 210 211 static int kerneldump_gzlevel = 6; 212 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 213 &kerneldump_gzlevel, 0, 214 "Kernel crash dump compression level"); 215 216 /* 217 * Variable panicstr contains argument to first call to panic; used as flag 218 * to indicate that the kernel has already called panic. 219 */ 220 const char *panicstr; 221 bool __read_frequently panicked; 222 223 int __read_mostly dumping; /* system is dumping */ 224 int rebooting; /* system is rebooting */ 225 /* 226 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 227 * modifications via ioctl. 228 */ 229 static struct mtx dumpconf_list_lk; 230 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 231 232 /* Our selected dumper(s). */ 233 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 234 TAILQ_HEAD_INITIALIZER(dumper_configs); 235 236 /* Context information for dump-debuggers. */ 237 static struct pcb dumppcb; /* Registers. */ 238 lwpid_t dumptid; /* Thread ID. */ 239 240 static struct cdevsw reroot_cdevsw = { 241 .d_version = D_VERSION, 242 .d_name = "reroot", 243 }; 244 245 static void poweroff_wait(void *, int); 246 static void shutdown_halt(void *junk, int howto); 247 static void shutdown_panic(void *junk, int howto); 248 static void shutdown_reset(void *junk, int howto); 249 static int kern_reroot(void); 250 251 /* register various local shutdown events */ 252 static void 253 shutdown_conf(void *unused) 254 { 255 256 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 257 SHUTDOWN_PRI_FIRST); 258 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 259 SHUTDOWN_PRI_LAST + 100); 260 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 261 SHUTDOWN_PRI_LAST + 100); 262 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 263 SHUTDOWN_PRI_LAST + 200); 264 } 265 266 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 267 268 /* 269 * The only reason this exists is to create the /dev/reroot/ directory, 270 * used by reroot code in init(8) as a mountpoint for tmpfs. 271 */ 272 static void 273 reroot_conf(void *unused) 274 { 275 int error; 276 struct cdev *cdev; 277 278 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 279 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 280 if (error != 0) { 281 printf("%s: failed to create device node, error %d", 282 __func__, error); 283 } 284 } 285 286 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 287 288 /* 289 * The system call that results in a reboot. 290 */ 291 /* ARGSUSED */ 292 int 293 sys_reboot(struct thread *td, struct reboot_args *uap) 294 { 295 int error; 296 297 error = 0; 298 #ifdef MAC 299 error = mac_system_check_reboot(td->td_ucred, uap->opt); 300 #endif 301 if (error == 0) 302 error = priv_check(td, PRIV_REBOOT); 303 if (error == 0) { 304 if (uap->opt & RB_REROOT) 305 error = kern_reroot(); 306 else 307 kern_reboot(uap->opt); 308 } 309 return (error); 310 } 311 312 static void 313 shutdown_nice_task_fn(void *arg, int pending __unused) 314 { 315 int howto; 316 317 howto = (uintptr_t)arg; 318 /* Send a signal to init(8) and have it shutdown the world. */ 319 PROC_LOCK(initproc); 320 if (howto & RB_POWEROFF) 321 kern_psignal(initproc, SIGUSR2); 322 else if (howto & RB_POWERCYCLE) 323 kern_psignal(initproc, SIGWINCH); 324 else if (howto & RB_HALT) 325 kern_psignal(initproc, SIGUSR1); 326 else 327 kern_psignal(initproc, SIGINT); 328 PROC_UNLOCK(initproc); 329 } 330 331 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 332 &shutdown_nice_task_fn, NULL); 333 334 /* 335 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 336 */ 337 void 338 shutdown_nice(int howto) 339 { 340 341 if (initproc != NULL && !SCHEDULER_STOPPED()) { 342 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 343 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 344 } else { 345 /* 346 * No init(8) running, or scheduler would not allow it 347 * to run, so simply reboot. 348 */ 349 kern_reboot(howto | RB_NOSYNC); 350 } 351 } 352 353 static void 354 print_uptime(void) 355 { 356 int f; 357 struct timespec ts; 358 359 getnanouptime(&ts); 360 printf("Uptime: "); 361 f = 0; 362 if (ts.tv_sec >= 86400) { 363 printf("%ldd", (long)ts.tv_sec / 86400); 364 ts.tv_sec %= 86400; 365 f = 1; 366 } 367 if (f || ts.tv_sec >= 3600) { 368 printf("%ldh", (long)ts.tv_sec / 3600); 369 ts.tv_sec %= 3600; 370 f = 1; 371 } 372 if (f || ts.tv_sec >= 60) { 373 printf("%ldm", (long)ts.tv_sec / 60); 374 ts.tv_sec %= 60; 375 f = 1; 376 } 377 printf("%lds\n", (long)ts.tv_sec); 378 } 379 380 int 381 doadump(boolean_t textdump) 382 { 383 boolean_t coredump; 384 int error; 385 386 error = 0; 387 if (dumping) 388 return (EBUSY); 389 if (TAILQ_EMPTY(&dumper_configs)) 390 return (ENXIO); 391 392 savectx(&dumppcb); 393 dumptid = curthread->td_tid; 394 dumping++; 395 396 coredump = TRUE; 397 #ifdef DDB 398 if (textdump && textdump_pending) { 399 coredump = FALSE; 400 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 401 } 402 #endif 403 if (coredump) { 404 struct dumperinfo *di; 405 406 TAILQ_FOREACH(di, &dumper_configs, di_next) { 407 error = dumpsys(di); 408 if (error == 0) 409 break; 410 } 411 } 412 413 dumping--; 414 return (error); 415 } 416 417 /* 418 * Shutdown the system cleanly to prepare for reboot, halt, or power off. 419 */ 420 void 421 kern_reboot(int howto) 422 { 423 static int once = 0; 424 425 /* 426 * Normal paths here don't hold Giant, but we can wind up here 427 * unexpectedly with it held. Drop it now so we don't have to 428 * drop and pick it up elsewhere. The paths it is locking will 429 * never be returned to, and it is preferable to preclude 430 * deadlock than to lock against code that won't ever 431 * continue. 432 */ 433 while (mtx_owned(&Giant)) 434 mtx_unlock(&Giant); 435 436 #if defined(SMP) 437 /* 438 * Bind us to the first CPU so that all shutdown code runs there. Some 439 * systems don't shutdown properly (i.e., ACPI power off) if we 440 * run on another processor. 441 */ 442 if (!SCHEDULER_STOPPED()) { 443 thread_lock(curthread); 444 sched_bind(curthread, CPU_FIRST()); 445 thread_unlock(curthread); 446 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 447 ("boot: not running on cpu 0")); 448 } 449 #endif 450 /* We're in the process of rebooting. */ 451 rebooting = 1; 452 453 /* We are out of the debugger now. */ 454 kdb_active = 0; 455 456 /* 457 * Do any callouts that should be done BEFORE syncing the filesystems. 458 */ 459 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 460 461 /* 462 * Now sync filesystems 463 */ 464 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 465 once = 1; 466 bufshutdown(show_busybufs); 467 } 468 469 print_uptime(); 470 471 cngrab(); 472 473 /* 474 * Ok, now do things that assume all filesystem activity has 475 * been completed. 476 */ 477 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 478 479 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 480 doadump(TRUE); 481 482 /* Now that we're going to really halt the system... */ 483 EVENTHANDLER_INVOKE(shutdown_final, howto); 484 485 for(;;) ; /* safety against shutdown_reset not working */ 486 /* NOTREACHED */ 487 } 488 489 /* 490 * The system call that results in changing the rootfs. 491 */ 492 static int 493 kern_reroot(void) 494 { 495 struct vnode *oldrootvnode, *vp; 496 struct mount *mp, *devmp; 497 int error; 498 499 if (curproc != initproc) 500 return (EPERM); 501 502 /* 503 * Mark the filesystem containing currently-running executable 504 * (the temporary copy of init(8)) busy. 505 */ 506 vp = curproc->p_textvp; 507 error = vn_lock(vp, LK_SHARED); 508 if (error != 0) 509 return (error); 510 mp = vp->v_mount; 511 error = vfs_busy(mp, MBF_NOWAIT); 512 if (error != 0) { 513 vfs_ref(mp); 514 VOP_UNLOCK(vp); 515 error = vfs_busy(mp, 0); 516 vn_lock(vp, LK_SHARED | LK_RETRY); 517 vfs_rel(mp); 518 if (error != 0) { 519 VOP_UNLOCK(vp); 520 return (ENOENT); 521 } 522 if (VN_IS_DOOMED(vp)) { 523 VOP_UNLOCK(vp); 524 vfs_unbusy(mp); 525 return (ENOENT); 526 } 527 } 528 VOP_UNLOCK(vp); 529 530 /* 531 * Remove the filesystem containing currently-running executable 532 * from the mount list, to prevent it from being unmounted 533 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 534 * 535 * Also preserve /dev - forcibly unmounting it could cause driver 536 * reinitialization. 537 */ 538 539 vfs_ref(rootdevmp); 540 devmp = rootdevmp; 541 rootdevmp = NULL; 542 543 mtx_lock(&mountlist_mtx); 544 TAILQ_REMOVE(&mountlist, mp, mnt_list); 545 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 546 mtx_unlock(&mountlist_mtx); 547 548 oldrootvnode = rootvnode; 549 550 /* 551 * Unmount everything except for the two filesystems preserved above. 552 */ 553 vfs_unmountall(); 554 555 /* 556 * Add /dev back; vfs_mountroot() will move it into its new place. 557 */ 558 mtx_lock(&mountlist_mtx); 559 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 560 mtx_unlock(&mountlist_mtx); 561 rootdevmp = devmp; 562 vfs_rel(rootdevmp); 563 564 /* 565 * Mount the new rootfs. 566 */ 567 vfs_mountroot(); 568 569 /* 570 * Update all references to the old rootvnode. 571 */ 572 mountcheckdirs(oldrootvnode, rootvnode); 573 574 /* 575 * Add the temporary filesystem back and unbusy it. 576 */ 577 mtx_lock(&mountlist_mtx); 578 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 579 mtx_unlock(&mountlist_mtx); 580 vfs_unbusy(mp); 581 582 return (0); 583 } 584 585 /* 586 * If the shutdown was a clean halt, behave accordingly. 587 */ 588 static void 589 shutdown_halt(void *junk, int howto) 590 { 591 592 if (howto & RB_HALT) { 593 printf("\n"); 594 printf("The operating system has halted.\n"); 595 printf("Please press any key to reboot.\n\n"); 596 597 wdog_kern_pat(WD_TO_NEVER); 598 599 switch (cngetc()) { 600 case -1: /* No console, just die */ 601 cpu_halt(); 602 /* NOTREACHED */ 603 default: 604 break; 605 } 606 } 607 } 608 609 /* 610 * Check to see if the system paniced, pause and then reboot 611 * according to the specified delay. 612 */ 613 static void 614 shutdown_panic(void *junk, int howto) 615 { 616 int loop; 617 618 if (howto & RB_DUMP) { 619 if (panic_reboot_wait_time != 0) { 620 if (panic_reboot_wait_time != -1) { 621 printf("Automatic reboot in %d seconds - " 622 "press a key on the console to abort\n", 623 panic_reboot_wait_time); 624 for (loop = panic_reboot_wait_time * 10; 625 loop > 0; --loop) { 626 DELAY(1000 * 100); /* 1/10th second */ 627 /* Did user type a key? */ 628 if (cncheckc() != -1) 629 break; 630 } 631 if (!loop) 632 return; 633 } 634 } else { /* zero time specified - reboot NOW */ 635 return; 636 } 637 printf("--> Press a key on the console to reboot,\n"); 638 printf("--> or switch off the system now.\n"); 639 cngetc(); 640 } 641 } 642 643 /* 644 * Everything done, now reset 645 */ 646 static void 647 shutdown_reset(void *junk, int howto) 648 { 649 650 printf("Rebooting...\n"); 651 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 652 653 /* 654 * Acquiring smp_ipi_mtx here has a double effect: 655 * - it disables interrupts avoiding CPU0 preemption 656 * by fast handlers (thus deadlocking against other CPUs) 657 * - it avoids deadlocks against smp_rendezvous() or, more 658 * generally, threads busy-waiting, with this spinlock held, 659 * and waiting for responses by threads on other CPUs 660 * (ie. smp_tlb_shootdown()). 661 * 662 * For the !SMP case it just needs to handle the former problem. 663 */ 664 #ifdef SMP 665 mtx_lock_spin(&smp_ipi_mtx); 666 #else 667 spinlock_enter(); 668 #endif 669 670 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 671 cpu_reset(); 672 /* NOTREACHED */ /* assuming reset worked */ 673 } 674 675 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 676 static int kassert_warn_only = 0; 677 #ifdef KDB 678 static int kassert_do_kdb = 0; 679 #endif 680 #ifdef KTR 681 static int kassert_do_ktr = 0; 682 #endif 683 static int kassert_do_log = 1; 684 static int kassert_log_pps_limit = 4; 685 static int kassert_log_mute_at = 0; 686 static int kassert_log_panic_at = 0; 687 static int kassert_suppress_in_panic = 0; 688 static int kassert_warnings = 0; 689 690 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); 691 692 #ifdef KASSERT_PANIC_OPTIONAL 693 #define KASSERT_RWTUN CTLFLAG_RWTUN 694 #else 695 #define KASSERT_RWTUN CTLFLAG_RDTUN 696 #endif 697 698 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 699 &kassert_warn_only, 0, 700 "KASSERT triggers a panic (0) or just a warning (1)"); 701 702 #ifdef KDB 703 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 704 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 705 #endif 706 707 #ifdef KTR 708 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 709 &kassert_do_ktr, 0, 710 "KASSERT does a KTR, set this to the KTRMASK you want"); 711 #endif 712 713 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 714 &kassert_do_log, 0, 715 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 716 717 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 718 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 719 720 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 721 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 722 723 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 724 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 725 726 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 727 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 728 729 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 730 &kassert_suppress_in_panic, 0, 731 "KASSERTs will be suppressed while handling a panic"); 732 #undef KASSERT_RWTUN 733 734 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 735 736 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 737 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, 738 kassert_sysctl_kassert, "I", "set to trigger a test kassert"); 739 740 static int 741 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 742 { 743 int error, i; 744 745 error = sysctl_wire_old_buffer(req, sizeof(int)); 746 if (error == 0) { 747 i = 0; 748 error = sysctl_handle_int(oidp, &i, 0, req); 749 } 750 if (error != 0 || req->newptr == NULL) 751 return (error); 752 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 753 return (0); 754 } 755 756 #ifdef KASSERT_PANIC_OPTIONAL 757 /* 758 * Called by KASSERT, this decides if we will panic 759 * or if we will log via printf and/or ktr. 760 */ 761 void 762 kassert_panic(const char *fmt, ...) 763 { 764 static char buf[256]; 765 va_list ap; 766 767 va_start(ap, fmt); 768 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 769 va_end(ap); 770 771 /* 772 * If we are suppressing secondary panics, log the warning but do not 773 * re-enter panic/kdb. 774 */ 775 if (panicstr != NULL && kassert_suppress_in_panic) { 776 if (kassert_do_log) { 777 printf("KASSERT failed: %s\n", buf); 778 #ifdef KDB 779 if (trace_all_panics && trace_on_panic) 780 kdb_backtrace(); 781 #endif 782 } 783 return; 784 } 785 786 /* 787 * panic if we're not just warning, or if we've exceeded 788 * kassert_log_panic_at warnings. 789 */ 790 if (!kassert_warn_only || 791 (kassert_log_panic_at > 0 && 792 kassert_warnings >= kassert_log_panic_at)) { 793 va_start(ap, fmt); 794 vpanic(fmt, ap); 795 /* NORETURN */ 796 } 797 #ifdef KTR 798 if (kassert_do_ktr) 799 CTR0(ktr_mask, buf); 800 #endif /* KTR */ 801 /* 802 * log if we've not yet met the mute limit. 803 */ 804 if (kassert_do_log && 805 (kassert_log_mute_at == 0 || 806 kassert_warnings < kassert_log_mute_at)) { 807 static struct timeval lasterr; 808 static int curerr; 809 810 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 811 printf("KASSERT failed: %s\n", buf); 812 kdb_backtrace(); 813 } 814 } 815 #ifdef KDB 816 if (kassert_do_kdb) { 817 kdb_enter(KDB_WHY_KASSERT, buf); 818 } 819 #endif 820 atomic_add_int(&kassert_warnings, 1); 821 } 822 #endif /* KASSERT_PANIC_OPTIONAL */ 823 #endif 824 825 /* 826 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 827 * and then reboots. If we are called twice, then we avoid trying to sync 828 * the disks as this often leads to recursive panics. 829 */ 830 void 831 panic(const char *fmt, ...) 832 { 833 va_list ap; 834 835 va_start(ap, fmt); 836 vpanic(fmt, ap); 837 } 838 839 void 840 vpanic(const char *fmt, va_list ap) 841 { 842 #ifdef SMP 843 cpuset_t other_cpus; 844 #endif 845 struct thread *td = curthread; 846 int bootopt, newpanic; 847 static char buf[256]; 848 849 spinlock_enter(); 850 851 #ifdef SMP 852 /* 853 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 854 * concurrently entering panic. Only the winner will proceed 855 * further. 856 */ 857 if (panicstr == NULL && !kdb_active) { 858 other_cpus = all_cpus; 859 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 860 stop_cpus_hard(other_cpus); 861 } 862 #endif 863 864 /* 865 * Ensure that the scheduler is stopped while panicking, even if panic 866 * has been entered from kdb. 867 */ 868 td->td_stopsched = 1; 869 870 bootopt = RB_AUTOBOOT; 871 newpanic = 0; 872 if (panicstr) 873 bootopt |= RB_NOSYNC; 874 else { 875 bootopt |= RB_DUMP; 876 panicstr = fmt; 877 panicked = true; 878 newpanic = 1; 879 } 880 881 if (newpanic) { 882 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 883 panicstr = buf; 884 cngrab(); 885 printf("panic: %s\n", buf); 886 } else { 887 printf("panic: "); 888 vprintf(fmt, ap); 889 printf("\n"); 890 } 891 #ifdef SMP 892 printf("cpuid = %d\n", PCPU_GET(cpuid)); 893 #endif 894 printf("time = %jd\n", (intmax_t )time_second); 895 #ifdef KDB 896 if ((newpanic || trace_all_panics) && trace_on_panic) 897 kdb_backtrace(); 898 if (debugger_on_panic) 899 kdb_enter(KDB_WHY_PANIC, "panic"); 900 #endif 901 /*thread_lock(td); */ 902 td->td_flags |= TDF_INPANIC; 903 /* thread_unlock(td); */ 904 if (!sync_on_panic) 905 bootopt |= RB_NOSYNC; 906 if (poweroff_on_panic) 907 bootopt |= RB_POWEROFF; 908 if (powercycle_on_panic) 909 bootopt |= RB_POWERCYCLE; 910 kern_reboot(bootopt); 911 } 912 913 /* 914 * Support for poweroff delay. 915 * 916 * Please note that setting this delay too short might power off your machine 917 * before the write cache on your hard disk has been flushed, leading to 918 * soft-updates inconsistencies. 919 */ 920 #ifndef POWEROFF_DELAY 921 # define POWEROFF_DELAY 5000 922 #endif 923 static int poweroff_delay = POWEROFF_DELAY; 924 925 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 926 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 927 928 static void 929 poweroff_wait(void *junk, int howto) 930 { 931 932 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 933 return; 934 DELAY(poweroff_delay * 1000); 935 } 936 937 /* 938 * Some system processes (e.g. syncer) need to be stopped at appropriate 939 * points in their main loops prior to a system shutdown, so that they 940 * won't interfere with the shutdown process (e.g. by holding a disk buf 941 * to cause sync to fail). For each of these system processes, register 942 * shutdown_kproc() as a handler for one of shutdown events. 943 */ 944 static int kproc_shutdown_wait = 60; 945 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 946 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 947 948 void 949 kproc_shutdown(void *arg, int howto) 950 { 951 struct proc *p; 952 int error; 953 954 if (panicstr) 955 return; 956 957 p = (struct proc *)arg; 958 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 959 kproc_shutdown_wait, p->p_comm); 960 error = kproc_suspend(p, kproc_shutdown_wait * hz); 961 962 if (error == EWOULDBLOCK) 963 printf("timed out\n"); 964 else 965 printf("done\n"); 966 } 967 968 void 969 kthread_shutdown(void *arg, int howto) 970 { 971 struct thread *td; 972 int error; 973 974 if (panicstr) 975 return; 976 977 td = (struct thread *)arg; 978 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 979 kproc_shutdown_wait, td->td_name); 980 error = kthread_suspend(td, kproc_shutdown_wait * hz); 981 982 if (error == EWOULDBLOCK) 983 printf("timed out\n"); 984 else 985 printf("done\n"); 986 } 987 988 static int 989 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 990 { 991 char buf[256]; 992 struct dumperinfo *di; 993 struct sbuf sb; 994 int error; 995 996 error = sysctl_wire_old_buffer(req, 0); 997 if (error != 0) 998 return (error); 999 1000 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1001 1002 mtx_lock(&dumpconf_list_lk); 1003 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1004 if (di != TAILQ_FIRST(&dumper_configs)) 1005 sbuf_putc(&sb, ','); 1006 sbuf_cat(&sb, di->di_devname); 1007 } 1008 mtx_unlock(&dumpconf_list_lk); 1009 1010 error = sbuf_finish(&sb); 1011 sbuf_delete(&sb); 1012 return (error); 1013 } 1014 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, CTLTYPE_STRING | CTLFLAG_RD, 1015 &dumper_configs, 0, dumpdevname_sysctl_handler, "A", 1016 "Device(s) for kernel dumps"); 1017 1018 static int _dump_append(struct dumperinfo *di, void *virtual, 1019 vm_offset_t physical, size_t length); 1020 1021 #ifdef EKCD 1022 static struct kerneldumpcrypto * 1023 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1024 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1025 { 1026 struct kerneldumpcrypto *kdc; 1027 struct kerneldumpkey *kdk; 1028 uint32_t dumpkeysize; 1029 1030 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1031 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1032 1033 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1034 1035 kdc->kdc_encryption = encryption; 1036 switch (kdc->kdc_encryption) { 1037 case KERNELDUMP_ENC_AES_256_CBC: 1038 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1039 goto failed; 1040 break; 1041 case KERNELDUMP_ENC_CHACHA20: 1042 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1043 break; 1044 default: 1045 goto failed; 1046 } 1047 1048 kdc->kdc_dumpkeysize = dumpkeysize; 1049 kdk = kdc->kdc_dumpkey; 1050 kdk->kdk_encryption = kdc->kdc_encryption; 1051 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1052 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1053 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1054 1055 return (kdc); 1056 failed: 1057 explicit_bzero(kdc, sizeof(*kdc) + dumpkeysize); 1058 free(kdc, M_EKCD); 1059 return (NULL); 1060 } 1061 1062 static int 1063 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1064 { 1065 uint8_t hash[SHA256_DIGEST_LENGTH]; 1066 SHA256_CTX ctx; 1067 struct kerneldumpkey *kdk; 1068 int error; 1069 1070 error = 0; 1071 1072 if (kdc == NULL) 1073 return (0); 1074 1075 /* 1076 * When a user enters ddb it can write a crash dump multiple times. 1077 * Each time it should be encrypted using a different IV. 1078 */ 1079 SHA256_Init(&ctx); 1080 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1081 SHA256_Final(hash, &ctx); 1082 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1083 1084 switch (kdc->kdc_encryption) { 1085 case KERNELDUMP_ENC_AES_256_CBC: 1086 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1087 kdc->kdc_iv) <= 0) { 1088 error = EINVAL; 1089 goto out; 1090 } 1091 break; 1092 case KERNELDUMP_ENC_CHACHA20: 1093 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1094 break; 1095 default: 1096 error = EINVAL; 1097 goto out; 1098 } 1099 1100 kdk = kdc->kdc_dumpkey; 1101 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1102 out: 1103 explicit_bzero(hash, sizeof(hash)); 1104 return (error); 1105 } 1106 1107 static uint32_t 1108 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1109 { 1110 1111 if (kdc == NULL) 1112 return (0); 1113 return (kdc->kdc_dumpkeysize); 1114 } 1115 #endif /* EKCD */ 1116 1117 static struct kerneldumpcomp * 1118 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1119 { 1120 struct kerneldumpcomp *kdcomp; 1121 int format; 1122 1123 switch (compression) { 1124 case KERNELDUMP_COMP_GZIP: 1125 format = COMPRESS_GZIP; 1126 break; 1127 case KERNELDUMP_COMP_ZSTD: 1128 format = COMPRESS_ZSTD; 1129 break; 1130 default: 1131 return (NULL); 1132 } 1133 1134 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1135 kdcomp->kdc_format = compression; 1136 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1137 format, di->maxiosize, kerneldump_gzlevel, di); 1138 if (kdcomp->kdc_stream == NULL) { 1139 free(kdcomp, M_DUMPER); 1140 return (NULL); 1141 } 1142 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1143 return (kdcomp); 1144 } 1145 1146 static void 1147 kerneldumpcomp_destroy(struct dumperinfo *di) 1148 { 1149 struct kerneldumpcomp *kdcomp; 1150 1151 kdcomp = di->kdcomp; 1152 if (kdcomp == NULL) 1153 return; 1154 compressor_fini(kdcomp->kdc_stream); 1155 explicit_bzero(kdcomp->kdc_buf, di->maxiosize); 1156 free(kdcomp->kdc_buf, M_DUMPER); 1157 free(kdcomp, M_DUMPER); 1158 } 1159 1160 /* 1161 * Must not be present on global list. 1162 */ 1163 static void 1164 free_single_dumper(struct dumperinfo *di) 1165 { 1166 1167 if (di == NULL) 1168 return; 1169 1170 if (di->blockbuf != NULL) { 1171 explicit_bzero(di->blockbuf, di->blocksize); 1172 free(di->blockbuf, M_DUMPER); 1173 } 1174 1175 kerneldumpcomp_destroy(di); 1176 1177 #ifdef EKCD 1178 if (di->kdcrypto != NULL) { 1179 explicit_bzero(di->kdcrypto, sizeof(*di->kdcrypto) + 1180 di->kdcrypto->kdc_dumpkeysize); 1181 free(di->kdcrypto, M_EKCD); 1182 } 1183 #endif 1184 1185 explicit_bzero(di, sizeof(*di)); 1186 free(di, M_DUMPER); 1187 } 1188 1189 /* Registration of dumpers */ 1190 int 1191 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1192 const struct diocskerneldump_arg *kda) 1193 { 1194 struct dumperinfo *newdi, *listdi; 1195 bool inserted; 1196 uint8_t index; 1197 int error; 1198 1199 index = kda->kda_index; 1200 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1201 index != KDA_REMOVE_ALL); 1202 1203 error = priv_check(curthread, PRIV_SETDUMPER); 1204 if (error != 0) 1205 return (error); 1206 1207 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK 1208 | M_ZERO); 1209 memcpy(newdi, di_template, sizeof(*newdi)); 1210 newdi->blockbuf = NULL; 1211 newdi->kdcrypto = NULL; 1212 newdi->kdcomp = NULL; 1213 strcpy(newdi->di_devname, devname); 1214 1215 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1216 #ifdef EKCD 1217 newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize, 1218 kda->kda_encryption, kda->kda_key, 1219 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1220 if (newdi->kdcrypto == NULL) { 1221 error = EINVAL; 1222 goto cleanup; 1223 } 1224 #else 1225 error = EOPNOTSUPP; 1226 goto cleanup; 1227 #endif 1228 } 1229 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1230 /* 1231 * We can't support simultaneous unpadded block cipher 1232 * encryption and compression because there is no guarantee the 1233 * length of the compressed result is exactly a multiple of the 1234 * cipher block size. 1235 */ 1236 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1237 error = EOPNOTSUPP; 1238 goto cleanup; 1239 } 1240 newdi->kdcomp = kerneldumpcomp_create(newdi, 1241 kda->kda_compression); 1242 if (newdi->kdcomp == NULL) { 1243 error = EINVAL; 1244 goto cleanup; 1245 } 1246 } 1247 1248 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1249 1250 /* Add the new configuration to the queue */ 1251 mtx_lock(&dumpconf_list_lk); 1252 inserted = false; 1253 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1254 if (index == 0) { 1255 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1256 inserted = true; 1257 break; 1258 } 1259 index--; 1260 } 1261 if (!inserted) 1262 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1263 mtx_unlock(&dumpconf_list_lk); 1264 1265 return (0); 1266 1267 cleanup: 1268 free_single_dumper(newdi); 1269 return (error); 1270 } 1271 1272 #ifdef DDB 1273 void 1274 dumper_ddb_insert(struct dumperinfo *newdi) 1275 { 1276 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1277 } 1278 1279 void 1280 dumper_ddb_remove(struct dumperinfo *di) 1281 { 1282 TAILQ_REMOVE(&dumper_configs, di, di_next); 1283 } 1284 #endif 1285 1286 static bool 1287 dumper_config_match(const struct dumperinfo *di, const char *devname, 1288 const struct diocskerneldump_arg *kda) 1289 { 1290 if (kda->kda_index == KDA_REMOVE_ALL) 1291 return (true); 1292 1293 if (strcmp(di->di_devname, devname) != 0) 1294 return (false); 1295 1296 /* 1297 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1298 */ 1299 if (kda->kda_index == KDA_REMOVE_DEV) 1300 return (true); 1301 1302 if (di->kdcomp != NULL) { 1303 if (di->kdcomp->kdc_format != kda->kda_compression) 1304 return (false); 1305 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1306 return (false); 1307 #ifdef EKCD 1308 if (di->kdcrypto != NULL) { 1309 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1310 return (false); 1311 /* 1312 * Do we care to verify keys match to delete? It seems weird 1313 * to expect multiple fallback dump configurations on the same 1314 * device that only differ in crypto key. 1315 */ 1316 } else 1317 #endif 1318 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1319 return (false); 1320 1321 return (true); 1322 } 1323 1324 int 1325 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1326 { 1327 struct dumperinfo *di, *sdi; 1328 bool found; 1329 int error; 1330 1331 error = priv_check(curthread, PRIV_SETDUMPER); 1332 if (error != 0) 1333 return (error); 1334 1335 /* 1336 * Try to find a matching configuration, and kill it. 1337 * 1338 * NULL 'kda' indicates remove any configuration matching 'devname', 1339 * which may remove multiple configurations in atypical configurations. 1340 */ 1341 found = false; 1342 mtx_lock(&dumpconf_list_lk); 1343 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1344 if (dumper_config_match(di, devname, kda)) { 1345 found = true; 1346 TAILQ_REMOVE(&dumper_configs, di, di_next); 1347 free_single_dumper(di); 1348 } 1349 } 1350 mtx_unlock(&dumpconf_list_lk); 1351 1352 /* Only produce ENOENT if a more targeted match didn't match. */ 1353 if (!found && kda->kda_index == KDA_REMOVE) 1354 return (ENOENT); 1355 return (0); 1356 } 1357 1358 static int 1359 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1360 { 1361 1362 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1363 offset - di->mediaoffset + length > di->mediasize)) { 1364 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1365 printf( 1366 "Compressed dump failed to fit in device boundaries.\n"); 1367 return (E2BIG); 1368 } 1369 1370 printf("Attempt to write outside dump device boundaries.\n" 1371 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1372 (intmax_t)offset, (intmax_t)di->mediaoffset, 1373 (uintmax_t)length, (intmax_t)di->mediasize); 1374 return (ENOSPC); 1375 } 1376 if (length % di->blocksize != 0) { 1377 printf("Attempt to write partial block of length %ju.\n", 1378 (uintmax_t)length); 1379 return (EINVAL); 1380 } 1381 if (offset % di->blocksize != 0) { 1382 printf("Attempt to write at unaligned offset %jd.\n", 1383 (intmax_t)offset); 1384 return (EINVAL); 1385 } 1386 1387 return (0); 1388 } 1389 1390 #ifdef EKCD 1391 static int 1392 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1393 { 1394 1395 switch (kdc->kdc_encryption) { 1396 case KERNELDUMP_ENC_AES_256_CBC: 1397 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1398 8 * size, buf) <= 0) { 1399 return (EIO); 1400 } 1401 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1402 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1403 return (EIO); 1404 } 1405 break; 1406 case KERNELDUMP_ENC_CHACHA20: 1407 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1408 break; 1409 default: 1410 return (EINVAL); 1411 } 1412 1413 return (0); 1414 } 1415 1416 /* Encrypt data and call dumper. */ 1417 static int 1418 dump_encrypted_write(struct dumperinfo *di, void *virtual, 1419 vm_offset_t physical, off_t offset, size_t length) 1420 { 1421 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1422 struct kerneldumpcrypto *kdc; 1423 int error; 1424 size_t nbytes; 1425 1426 kdc = di->kdcrypto; 1427 1428 while (length > 0) { 1429 nbytes = MIN(length, sizeof(buf)); 1430 bcopy(virtual, buf, nbytes); 1431 1432 if (dump_encrypt(kdc, buf, nbytes) != 0) 1433 return (EIO); 1434 1435 error = dump_write(di, buf, physical, offset, nbytes); 1436 if (error != 0) 1437 return (error); 1438 1439 offset += nbytes; 1440 virtual = (void *)((uint8_t *)virtual + nbytes); 1441 length -= nbytes; 1442 } 1443 1444 return (0); 1445 } 1446 #endif /* EKCD */ 1447 1448 static int 1449 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1450 { 1451 struct dumperinfo *di; 1452 size_t resid, rlength; 1453 int error; 1454 1455 di = arg; 1456 1457 if (length % di->blocksize != 0) { 1458 /* 1459 * This must be the final write after flushing the compression 1460 * stream. Write as many full blocks as possible and stash the 1461 * residual data in the dumper's block buffer. It will be 1462 * padded and written in dump_finish(). 1463 */ 1464 rlength = rounddown(length, di->blocksize); 1465 if (rlength != 0) { 1466 error = _dump_append(di, base, 0, rlength); 1467 if (error != 0) 1468 return (error); 1469 } 1470 resid = length - rlength; 1471 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1472 di->kdcomp->kdc_resid = resid; 1473 return (EAGAIN); 1474 } 1475 return (_dump_append(di, base, 0, length)); 1476 } 1477 1478 /* 1479 * Write kernel dump headers at the beginning and end of the dump extent. 1480 * Write the kernel dump encryption key after the leading header if we were 1481 * configured to do so. 1482 */ 1483 static int 1484 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1485 { 1486 #ifdef EKCD 1487 struct kerneldumpcrypto *kdc; 1488 #endif 1489 void *buf, *key; 1490 size_t hdrsz; 1491 uint64_t extent; 1492 uint32_t keysize; 1493 int error; 1494 1495 hdrsz = sizeof(*kdh); 1496 if (hdrsz > di->blocksize) 1497 return (ENOMEM); 1498 1499 #ifdef EKCD 1500 kdc = di->kdcrypto; 1501 key = kdc->kdc_dumpkey; 1502 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1503 #else 1504 key = NULL; 1505 keysize = 0; 1506 #endif 1507 1508 /* 1509 * If the dump device has special handling for headers, let it take care 1510 * of writing them out. 1511 */ 1512 if (di->dumper_hdr != NULL) 1513 return (di->dumper_hdr(di, kdh, key, keysize)); 1514 1515 if (hdrsz == di->blocksize) 1516 buf = kdh; 1517 else { 1518 buf = di->blockbuf; 1519 memset(buf, 0, di->blocksize); 1520 memcpy(buf, kdh, hdrsz); 1521 } 1522 1523 extent = dtoh64(kdh->dumpextent); 1524 #ifdef EKCD 1525 if (kdc != NULL) { 1526 error = dump_write(di, kdc->kdc_dumpkey, 0, 1527 di->mediaoffset + di->mediasize - di->blocksize - extent - 1528 keysize, keysize); 1529 if (error != 0) 1530 return (error); 1531 } 1532 #endif 1533 1534 error = dump_write(di, buf, 0, 1535 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1536 keysize, di->blocksize); 1537 if (error == 0) 1538 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - 1539 di->blocksize, di->blocksize); 1540 return (error); 1541 } 1542 1543 /* 1544 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1545 * protect us from metadata and metadata from us. 1546 */ 1547 #define SIZEOF_METADATA (64 * 1024) 1548 1549 /* 1550 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1551 * if requested, and make sure that we have enough space on the dump device. 1552 * 1553 * We set things up so that the dump ends before the last sector of the dump 1554 * device, at which the trailing header is written. 1555 * 1556 * +-----------+------+-----+----------------------------+------+ 1557 * | | lhdr | key | ... kernel dump ... | thdr | 1558 * +-----------+------+-----+----------------------------+------+ 1559 * 1 blk opt <------- dump extent --------> 1 blk 1560 * 1561 * Dumps written using dump_append() start at the beginning of the extent. 1562 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1563 * will not. The true length of the dump is recorded in the leading and trailing 1564 * headers once the dump has been completed. 1565 * 1566 * The dump device may provide a callback, in which case it will initialize 1567 * dumpoff and take care of laying out the headers. 1568 */ 1569 int 1570 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1571 { 1572 uint64_t dumpextent, span; 1573 uint32_t keysize; 1574 int error; 1575 1576 #ifdef EKCD 1577 error = kerneldumpcrypto_init(di->kdcrypto); 1578 if (error != 0) 1579 return (error); 1580 keysize = kerneldumpcrypto_dumpkeysize(di->kdcrypto); 1581 #else 1582 error = 0; 1583 keysize = 0; 1584 #endif 1585 1586 if (di->dumper_start != NULL) { 1587 error = di->dumper_start(di); 1588 } else { 1589 dumpextent = dtoh64(kdh->dumpextent); 1590 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1591 keysize; 1592 if (di->mediasize < span) { 1593 if (di->kdcomp == NULL) 1594 return (E2BIG); 1595 1596 /* 1597 * We don't yet know how much space the compressed dump 1598 * will occupy, so try to use the whole swap partition 1599 * (minus the first 64KB) in the hope that the 1600 * compressed dump will fit. If that doesn't turn out to 1601 * be enough, the bounds checking in dump_write() 1602 * will catch us and cause the dump to fail. 1603 */ 1604 dumpextent = di->mediasize - span + dumpextent; 1605 kdh->dumpextent = htod64(dumpextent); 1606 } 1607 1608 /* 1609 * The offset at which to begin writing the dump. 1610 */ 1611 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1612 dumpextent; 1613 } 1614 di->origdumpoff = di->dumpoff; 1615 return (error); 1616 } 1617 1618 static int 1619 _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1620 size_t length) 1621 { 1622 int error; 1623 1624 #ifdef EKCD 1625 if (di->kdcrypto != NULL) 1626 error = dump_encrypted_write(di, virtual, physical, di->dumpoff, 1627 length); 1628 else 1629 #endif 1630 error = dump_write(di, virtual, physical, di->dumpoff, length); 1631 if (error == 0) 1632 di->dumpoff += length; 1633 return (error); 1634 } 1635 1636 /* 1637 * Write to the dump device starting at dumpoff. When compression is enabled, 1638 * writes to the device will be performed using a callback that gets invoked 1639 * when the compression stream's output buffer is full. 1640 */ 1641 int 1642 dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1643 size_t length) 1644 { 1645 void *buf; 1646 1647 if (di->kdcomp != NULL) { 1648 /* Bounce through a buffer to avoid CRC errors. */ 1649 if (length > di->maxiosize) 1650 return (EINVAL); 1651 buf = di->kdcomp->kdc_buf; 1652 memmove(buf, virtual, length); 1653 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1654 } 1655 return (_dump_append(di, virtual, physical, length)); 1656 } 1657 1658 /* 1659 * Write to the dump device at the specified offset. 1660 */ 1661 int 1662 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1663 off_t offset, size_t length) 1664 { 1665 int error; 1666 1667 error = dump_check_bounds(di, offset, length); 1668 if (error != 0) 1669 return (error); 1670 return (di->dumper(di->priv, virtual, physical, offset, length)); 1671 } 1672 1673 /* 1674 * Perform kernel dump finalization: flush the compression stream, if necessary, 1675 * write the leading and trailing kernel dump headers now that we know the true 1676 * length of the dump, and optionally write the encryption key following the 1677 * leading header. 1678 */ 1679 int 1680 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1681 { 1682 int error; 1683 1684 if (di->kdcomp != NULL) { 1685 error = compressor_flush(di->kdcomp->kdc_stream); 1686 if (error == EAGAIN) { 1687 /* We have residual data in di->blockbuf. */ 1688 error = dump_write(di, di->blockbuf, 0, di->dumpoff, 1689 di->blocksize); 1690 di->dumpoff += di->kdcomp->kdc_resid; 1691 di->kdcomp->kdc_resid = 0; 1692 } 1693 if (error != 0) 1694 return (error); 1695 1696 /* 1697 * We now know the size of the compressed dump, so update the 1698 * header accordingly and recompute parity. 1699 */ 1700 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1701 kdh->parity = 0; 1702 kdh->parity = kerneldump_parity(kdh); 1703 1704 compressor_reset(di->kdcomp->kdc_stream); 1705 } 1706 1707 error = dump_write_headers(di, kdh); 1708 if (error != 0) 1709 return (error); 1710 1711 (void)dump_write(di, NULL, 0, 0, 0); 1712 return (0); 1713 } 1714 1715 void 1716 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1717 char *magic, uint32_t archver, uint64_t dumplen) 1718 { 1719 size_t dstsize; 1720 1721 bzero(kdh, sizeof(*kdh)); 1722 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1723 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1724 kdh->version = htod32(KERNELDUMPVERSION); 1725 kdh->architectureversion = htod32(archver); 1726 kdh->dumplength = htod64(dumplen); 1727 kdh->dumpextent = kdh->dumplength; 1728 kdh->dumptime = htod64(time_second); 1729 #ifdef EKCD 1730 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1731 #else 1732 kdh->dumpkeysize = 0; 1733 #endif 1734 kdh->blocksize = htod32(di->blocksize); 1735 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1736 dstsize = sizeof(kdh->versionstring); 1737 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1738 kdh->versionstring[dstsize - 2] = '\n'; 1739 if (panicstr != NULL) 1740 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1741 if (di->kdcomp != NULL) 1742 kdh->compression = di->kdcomp->kdc_format; 1743 kdh->parity = kerneldump_parity(kdh); 1744 } 1745 1746 #ifdef DDB 1747 DB_SHOW_COMMAND(panic, db_show_panic) 1748 { 1749 1750 if (panicstr == NULL) 1751 db_printf("panicstr not set\n"); 1752 else 1753 db_printf("panic: %s\n", panicstr); 1754 } 1755 #endif 1756