1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/conf.h> 55 #include <sys/compressor.h> 56 #include <sys/cons.h> 57 #include <sys/disk.h> 58 #include <sys/eventhandler.h> 59 #include <sys/filedesc.h> 60 #include <sys/jail.h> 61 #include <sys/kdb.h> 62 #include <sys/kernel.h> 63 #include <sys/kerneldump.h> 64 #include <sys/kthread.h> 65 #include <sys/ktr.h> 66 #include <sys/malloc.h> 67 #include <sys/mbuf.h> 68 #include <sys/mount.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/reboot.h> 72 #include <sys/resourcevar.h> 73 #include <sys/rwlock.h> 74 #include <sys/sbuf.h> 75 #include <sys/sched.h> 76 #include <sys/smp.h> 77 #include <sys/sysctl.h> 78 #include <sys/sysproto.h> 79 #include <sys/taskqueue.h> 80 #include <sys/vnode.h> 81 #include <sys/watchdog.h> 82 83 #include <crypto/chacha20/chacha.h> 84 #include <crypto/rijndael/rijndael-api-fst.h> 85 #include <crypto/sha2/sha256.h> 86 87 #include <ddb/ddb.h> 88 89 #include <machine/cpu.h> 90 #include <machine/dump.h> 91 #include <machine/pcb.h> 92 #include <machine/smp.h> 93 94 #include <security/mac/mac_framework.h> 95 96 #include <vm/vm.h> 97 #include <vm/vm_object.h> 98 #include <vm/vm_page.h> 99 #include <vm/vm_pager.h> 100 #include <vm/swap_pager.h> 101 102 #include <sys/signalvar.h> 103 104 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 105 106 #ifndef PANIC_REBOOT_WAIT_TIME 107 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 108 #endif 109 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 110 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 111 &panic_reboot_wait_time, 0, 112 "Seconds to wait before rebooting after a panic"); 113 114 /* 115 * Note that stdarg.h and the ANSI style va_start macro is used for both 116 * ANSI and traditional C compilers. 117 */ 118 #include <machine/stdarg.h> 119 120 #ifdef KDB 121 #ifdef KDB_UNATTENDED 122 int debugger_on_panic = 0; 123 #else 124 int debugger_on_panic = 1; 125 #endif 126 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 127 CTLFLAG_RWTUN | CTLFLAG_SECURE, 128 &debugger_on_panic, 0, "Run debugger on kernel panic"); 129 130 static bool debugger_on_recursive_panic = false; 131 SYSCTL_BOOL(_debug, OID_AUTO, debugger_on_recursive_panic, 132 CTLFLAG_RWTUN | CTLFLAG_SECURE, 133 &debugger_on_recursive_panic, 0, "Run debugger on recursive kernel panic"); 134 135 int debugger_on_trap = 0; 136 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 137 CTLFLAG_RWTUN | CTLFLAG_SECURE, 138 &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); 139 140 #ifdef KDB_TRACE 141 static int trace_on_panic = 1; 142 static bool trace_all_panics = true; 143 #else 144 static int trace_on_panic = 0; 145 static bool trace_all_panics = false; 146 #endif 147 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 148 CTLFLAG_RWTUN | CTLFLAG_SECURE, 149 &trace_on_panic, 0, "Print stack trace on kernel panic"); 150 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 151 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 152 #endif /* KDB */ 153 154 static int sync_on_panic = 0; 155 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 156 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 157 158 static bool poweroff_on_panic = 0; 159 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 160 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 161 162 static bool powercycle_on_panic = 0; 163 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 164 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 165 166 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 167 "Shutdown environment"); 168 169 #ifndef DIAGNOSTIC 170 static int show_busybufs; 171 #else 172 static int show_busybufs = 1; 173 #endif 174 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 175 &show_busybufs, 0, 176 "Show busy buffers during shutdown"); 177 178 int suspend_blocked = 0; 179 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 180 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 181 182 #ifdef EKCD 183 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 184 185 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 186 187 struct kerneldumpcrypto { 188 uint8_t kdc_encryption; 189 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 190 union { 191 struct { 192 keyInstance aes_ki; 193 cipherInstance aes_ci; 194 } u_aes; 195 struct chacha_ctx u_chacha; 196 } u; 197 #define kdc_ki u.u_aes.aes_ki 198 #define kdc_ci u.u_aes.aes_ci 199 #define kdc_chacha u.u_chacha 200 uint32_t kdc_dumpkeysize; 201 struct kerneldumpkey kdc_dumpkey[]; 202 }; 203 #endif 204 205 struct kerneldumpcomp { 206 uint8_t kdc_format; 207 struct compressor *kdc_stream; 208 uint8_t *kdc_buf; 209 size_t kdc_resid; 210 }; 211 212 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 213 uint8_t compression); 214 static void kerneldumpcomp_destroy(struct dumperinfo *di); 215 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 216 217 static int kerneldump_gzlevel = 6; 218 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 219 &kerneldump_gzlevel, 0, 220 "Kernel crash dump compression level"); 221 222 /* 223 * Variable panicstr contains argument to first call to panic; used as flag 224 * to indicate that the kernel has already called panic. 225 */ 226 const char *panicstr; 227 bool __read_frequently panicked; 228 229 int __read_mostly dumping; /* system is dumping */ 230 int rebooting; /* system is rebooting */ 231 /* 232 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 233 * modifications via ioctl. 234 */ 235 static struct mtx dumpconf_list_lk; 236 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 237 238 /* Our selected dumper(s). */ 239 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 240 TAILQ_HEAD_INITIALIZER(dumper_configs); 241 242 /* Context information for dump-debuggers. */ 243 static struct pcb dumppcb; /* Registers. */ 244 lwpid_t dumptid; /* Thread ID. */ 245 246 static struct cdevsw reroot_cdevsw = { 247 .d_version = D_VERSION, 248 .d_name = "reroot", 249 }; 250 251 static void poweroff_wait(void *, int); 252 static void shutdown_halt(void *junk, int howto); 253 static void shutdown_panic(void *junk, int howto); 254 static void shutdown_reset(void *junk, int howto); 255 static int kern_reroot(void); 256 257 /* register various local shutdown events */ 258 static void 259 shutdown_conf(void *unused) 260 { 261 262 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 263 SHUTDOWN_PRI_FIRST); 264 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 265 SHUTDOWN_PRI_LAST + 100); 266 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 267 SHUTDOWN_PRI_LAST + 100); 268 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 269 SHUTDOWN_PRI_LAST + 200); 270 } 271 272 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 273 274 /* 275 * The only reason this exists is to create the /dev/reroot/ directory, 276 * used by reroot code in init(8) as a mountpoint for tmpfs. 277 */ 278 static void 279 reroot_conf(void *unused) 280 { 281 int error; 282 struct cdev *cdev; 283 284 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 285 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 286 if (error != 0) { 287 printf("%s: failed to create device node, error %d", 288 __func__, error); 289 } 290 } 291 292 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 293 294 /* 295 * The system call that results in a reboot. 296 */ 297 /* ARGSUSED */ 298 int 299 sys_reboot(struct thread *td, struct reboot_args *uap) 300 { 301 int error; 302 303 error = 0; 304 #ifdef MAC 305 error = mac_system_check_reboot(td->td_ucred, uap->opt); 306 #endif 307 if (error == 0) 308 error = priv_check(td, PRIV_REBOOT); 309 if (error == 0) { 310 if (uap->opt & RB_REROOT) 311 error = kern_reroot(); 312 else 313 kern_reboot(uap->opt); 314 } 315 return (error); 316 } 317 318 static void 319 shutdown_nice_task_fn(void *arg, int pending __unused) 320 { 321 int howto; 322 323 howto = (uintptr_t)arg; 324 /* Send a signal to init(8) and have it shutdown the world. */ 325 PROC_LOCK(initproc); 326 if (howto & RB_POWEROFF) 327 kern_psignal(initproc, SIGUSR2); 328 else if (howto & RB_POWERCYCLE) 329 kern_psignal(initproc, SIGWINCH); 330 else if (howto & RB_HALT) 331 kern_psignal(initproc, SIGUSR1); 332 else 333 kern_psignal(initproc, SIGINT); 334 PROC_UNLOCK(initproc); 335 } 336 337 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 338 &shutdown_nice_task_fn, NULL); 339 340 /* 341 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 342 */ 343 void 344 shutdown_nice(int howto) 345 { 346 347 if (initproc != NULL && !SCHEDULER_STOPPED()) { 348 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 349 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 350 } else { 351 /* 352 * No init(8) running, or scheduler would not allow it 353 * to run, so simply reboot. 354 */ 355 kern_reboot(howto | RB_NOSYNC); 356 } 357 } 358 359 static void 360 print_uptime(void) 361 { 362 int f; 363 struct timespec ts; 364 365 getnanouptime(&ts); 366 printf("Uptime: "); 367 f = 0; 368 if (ts.tv_sec >= 86400) { 369 printf("%ldd", (long)ts.tv_sec / 86400); 370 ts.tv_sec %= 86400; 371 f = 1; 372 } 373 if (f || ts.tv_sec >= 3600) { 374 printf("%ldh", (long)ts.tv_sec / 3600); 375 ts.tv_sec %= 3600; 376 f = 1; 377 } 378 if (f || ts.tv_sec >= 60) { 379 printf("%ldm", (long)ts.tv_sec / 60); 380 ts.tv_sec %= 60; 381 f = 1; 382 } 383 printf("%lds\n", (long)ts.tv_sec); 384 } 385 386 int 387 doadump(boolean_t textdump) 388 { 389 boolean_t coredump; 390 int error; 391 392 error = 0; 393 if (dumping) 394 return (EBUSY); 395 if (TAILQ_EMPTY(&dumper_configs)) 396 return (ENXIO); 397 398 savectx(&dumppcb); 399 dumptid = curthread->td_tid; 400 dumping++; 401 402 coredump = TRUE; 403 #ifdef DDB 404 if (textdump && textdump_pending) { 405 coredump = FALSE; 406 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 407 } 408 #endif 409 if (coredump) { 410 struct dumperinfo *di; 411 412 TAILQ_FOREACH(di, &dumper_configs, di_next) { 413 error = dumpsys(di); 414 if (error == 0) 415 break; 416 } 417 } 418 419 dumping--; 420 return (error); 421 } 422 423 /* 424 * kern_reboot(9): Shut down the system cleanly to prepare for reboot, halt, or 425 * power off. 426 */ 427 void 428 kern_reboot(int howto) 429 { 430 static int once = 0; 431 432 /* 433 * Normal paths here don't hold Giant, but we can wind up here 434 * unexpectedly with it held. Drop it now so we don't have to 435 * drop and pick it up elsewhere. The paths it is locking will 436 * never be returned to, and it is preferable to preclude 437 * deadlock than to lock against code that won't ever 438 * continue. 439 */ 440 while (mtx_owned(&Giant)) 441 mtx_unlock(&Giant); 442 443 #if defined(SMP) 444 /* 445 * Bind us to the first CPU so that all shutdown code runs there. Some 446 * systems don't shutdown properly (i.e., ACPI power off) if we 447 * run on another processor. 448 */ 449 if (!SCHEDULER_STOPPED()) { 450 thread_lock(curthread); 451 sched_bind(curthread, CPU_FIRST()); 452 thread_unlock(curthread); 453 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 454 ("%s: not running on cpu 0", __func__)); 455 } 456 #endif 457 /* We're in the process of rebooting. */ 458 rebooting = 1; 459 460 /* We are out of the debugger now. */ 461 kdb_active = 0; 462 463 /* 464 * Do any callouts that should be done BEFORE syncing the filesystems. 465 */ 466 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 467 468 /* 469 * Now sync filesystems 470 */ 471 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 472 once = 1; 473 bufshutdown(show_busybufs); 474 } 475 476 print_uptime(); 477 478 cngrab(); 479 480 /* 481 * Ok, now do things that assume all filesystem activity has 482 * been completed. 483 */ 484 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 485 486 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 487 doadump(TRUE); 488 489 /* Now that we're going to really halt the system... */ 490 EVENTHANDLER_INVOKE(shutdown_final, howto); 491 492 for(;;) ; /* safety against shutdown_reset not working */ 493 /* NOTREACHED */ 494 } 495 496 /* 497 * The system call that results in changing the rootfs. 498 */ 499 static int 500 kern_reroot(void) 501 { 502 struct vnode *oldrootvnode, *vp; 503 struct mount *mp, *devmp; 504 int error; 505 506 if (curproc != initproc) 507 return (EPERM); 508 509 /* 510 * Mark the filesystem containing currently-running executable 511 * (the temporary copy of init(8)) busy. 512 */ 513 vp = curproc->p_textvp; 514 error = vn_lock(vp, LK_SHARED); 515 if (error != 0) 516 return (error); 517 mp = vp->v_mount; 518 error = vfs_busy(mp, MBF_NOWAIT); 519 if (error != 0) { 520 vfs_ref(mp); 521 VOP_UNLOCK(vp); 522 error = vfs_busy(mp, 0); 523 vn_lock(vp, LK_SHARED | LK_RETRY); 524 vfs_rel(mp); 525 if (error != 0) { 526 VOP_UNLOCK(vp); 527 return (ENOENT); 528 } 529 if (VN_IS_DOOMED(vp)) { 530 VOP_UNLOCK(vp); 531 vfs_unbusy(mp); 532 return (ENOENT); 533 } 534 } 535 VOP_UNLOCK(vp); 536 537 /* 538 * Remove the filesystem containing currently-running executable 539 * from the mount list, to prevent it from being unmounted 540 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 541 * 542 * Also preserve /dev - forcibly unmounting it could cause driver 543 * reinitialization. 544 */ 545 546 vfs_ref(rootdevmp); 547 devmp = rootdevmp; 548 rootdevmp = NULL; 549 550 mtx_lock(&mountlist_mtx); 551 TAILQ_REMOVE(&mountlist, mp, mnt_list); 552 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 553 mtx_unlock(&mountlist_mtx); 554 555 oldrootvnode = rootvnode; 556 557 /* 558 * Unmount everything except for the two filesystems preserved above. 559 */ 560 vfs_unmountall(); 561 562 /* 563 * Add /dev back; vfs_mountroot() will move it into its new place. 564 */ 565 mtx_lock(&mountlist_mtx); 566 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 567 mtx_unlock(&mountlist_mtx); 568 rootdevmp = devmp; 569 vfs_rel(rootdevmp); 570 571 /* 572 * Mount the new rootfs. 573 */ 574 vfs_mountroot(); 575 576 /* 577 * Update all references to the old rootvnode. 578 */ 579 mountcheckdirs(oldrootvnode, rootvnode); 580 581 /* 582 * Add the temporary filesystem back and unbusy it. 583 */ 584 mtx_lock(&mountlist_mtx); 585 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 586 mtx_unlock(&mountlist_mtx); 587 vfs_unbusy(mp); 588 589 return (0); 590 } 591 592 /* 593 * If the shutdown was a clean halt, behave accordingly. 594 */ 595 static void 596 shutdown_halt(void *junk, int howto) 597 { 598 599 if (howto & RB_HALT) { 600 printf("\n"); 601 printf("The operating system has halted.\n"); 602 printf("Please press any key to reboot.\n\n"); 603 604 wdog_kern_pat(WD_TO_NEVER); 605 606 switch (cngetc()) { 607 case -1: /* No console, just die */ 608 cpu_halt(); 609 /* NOTREACHED */ 610 default: 611 break; 612 } 613 } 614 } 615 616 /* 617 * Check to see if the system paniced, pause and then reboot 618 * according to the specified delay. 619 */ 620 static void 621 shutdown_panic(void *junk, int howto) 622 { 623 int loop; 624 625 if (howto & RB_DUMP) { 626 if (panic_reboot_wait_time != 0) { 627 if (panic_reboot_wait_time != -1) { 628 printf("Automatic reboot in %d seconds - " 629 "press a key on the console to abort\n", 630 panic_reboot_wait_time); 631 for (loop = panic_reboot_wait_time * 10; 632 loop > 0; --loop) { 633 DELAY(1000 * 100); /* 1/10th second */ 634 /* Did user type a key? */ 635 if (cncheckc() != -1) 636 break; 637 } 638 if (!loop) 639 return; 640 } 641 } else { /* zero time specified - reboot NOW */ 642 return; 643 } 644 printf("--> Press a key on the console to reboot,\n"); 645 printf("--> or switch off the system now.\n"); 646 cngetc(); 647 } 648 } 649 650 /* 651 * Everything done, now reset 652 */ 653 static void 654 shutdown_reset(void *junk, int howto) 655 { 656 657 printf("Rebooting...\n"); 658 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 659 660 /* 661 * Acquiring smp_ipi_mtx here has a double effect: 662 * - it disables interrupts avoiding CPU0 preemption 663 * by fast handlers (thus deadlocking against other CPUs) 664 * - it avoids deadlocks against smp_rendezvous() or, more 665 * generally, threads busy-waiting, with this spinlock held, 666 * and waiting for responses by threads on other CPUs 667 * (ie. smp_tlb_shootdown()). 668 * 669 * For the !SMP case it just needs to handle the former problem. 670 */ 671 #ifdef SMP 672 mtx_lock_spin(&smp_ipi_mtx); 673 #else 674 spinlock_enter(); 675 #endif 676 677 cpu_reset(); 678 /* NOTREACHED */ /* assuming reset worked */ 679 } 680 681 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 682 static int kassert_warn_only = 0; 683 #ifdef KDB 684 static int kassert_do_kdb = 0; 685 #endif 686 #ifdef KTR 687 static int kassert_do_ktr = 0; 688 #endif 689 static int kassert_do_log = 1; 690 static int kassert_log_pps_limit = 4; 691 static int kassert_log_mute_at = 0; 692 static int kassert_log_panic_at = 0; 693 static int kassert_suppress_in_panic = 0; 694 static int kassert_warnings = 0; 695 696 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 697 "kassert options"); 698 699 #ifdef KASSERT_PANIC_OPTIONAL 700 #define KASSERT_RWTUN CTLFLAG_RWTUN 701 #else 702 #define KASSERT_RWTUN CTLFLAG_RDTUN 703 #endif 704 705 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 706 &kassert_warn_only, 0, 707 "KASSERT triggers a panic (0) or just a warning (1)"); 708 709 #ifdef KDB 710 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 711 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 712 #endif 713 714 #ifdef KTR 715 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 716 &kassert_do_ktr, 0, 717 "KASSERT does a KTR, set this to the KTRMASK you want"); 718 #endif 719 720 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 721 &kassert_do_log, 0, 722 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 723 724 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, CTLFLAG_RD | CTLFLAG_STATS, 725 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 726 727 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 728 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 729 730 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 731 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 732 733 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 734 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 735 736 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 737 &kassert_suppress_in_panic, 0, 738 "KASSERTs will be suppressed while handling a panic"); 739 #undef KASSERT_RWTUN 740 741 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 742 743 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 744 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, NULL, 0, 745 kassert_sysctl_kassert, "I", 746 "set to trigger a test kassert"); 747 748 static int 749 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 750 { 751 int error, i; 752 753 error = sysctl_wire_old_buffer(req, sizeof(int)); 754 if (error == 0) { 755 i = 0; 756 error = sysctl_handle_int(oidp, &i, 0, req); 757 } 758 if (error != 0 || req->newptr == NULL) 759 return (error); 760 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 761 return (0); 762 } 763 764 #ifdef KASSERT_PANIC_OPTIONAL 765 /* 766 * Called by KASSERT, this decides if we will panic 767 * or if we will log via printf and/or ktr. 768 */ 769 void 770 kassert_panic(const char *fmt, ...) 771 { 772 static char buf[256]; 773 va_list ap; 774 775 va_start(ap, fmt); 776 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 777 va_end(ap); 778 779 /* 780 * If we are suppressing secondary panics, log the warning but do not 781 * re-enter panic/kdb. 782 */ 783 if (panicstr != NULL && kassert_suppress_in_panic) { 784 if (kassert_do_log) { 785 printf("KASSERT failed: %s\n", buf); 786 #ifdef KDB 787 if (trace_all_panics && trace_on_panic) 788 kdb_backtrace(); 789 #endif 790 } 791 return; 792 } 793 794 /* 795 * panic if we're not just warning, or if we've exceeded 796 * kassert_log_panic_at warnings. 797 */ 798 if (!kassert_warn_only || 799 (kassert_log_panic_at > 0 && 800 kassert_warnings >= kassert_log_panic_at)) { 801 va_start(ap, fmt); 802 vpanic(fmt, ap); 803 /* NORETURN */ 804 } 805 #ifdef KTR 806 if (kassert_do_ktr) 807 CTR0(ktr_mask, buf); 808 #endif /* KTR */ 809 /* 810 * log if we've not yet met the mute limit. 811 */ 812 if (kassert_do_log && 813 (kassert_log_mute_at == 0 || 814 kassert_warnings < kassert_log_mute_at)) { 815 static struct timeval lasterr; 816 static int curerr; 817 818 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 819 printf("KASSERT failed: %s\n", buf); 820 kdb_backtrace(); 821 } 822 } 823 #ifdef KDB 824 if (kassert_do_kdb) { 825 kdb_enter(KDB_WHY_KASSERT, buf); 826 } 827 #endif 828 atomic_add_int(&kassert_warnings, 1); 829 } 830 #endif /* KASSERT_PANIC_OPTIONAL */ 831 #endif 832 833 /* 834 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 835 * and then reboots. If we are called twice, then we avoid trying to sync 836 * the disks as this often leads to recursive panics. 837 */ 838 void 839 panic(const char *fmt, ...) 840 { 841 va_list ap; 842 843 va_start(ap, fmt); 844 vpanic(fmt, ap); 845 } 846 847 void 848 vpanic(const char *fmt, va_list ap) 849 { 850 #ifdef SMP 851 cpuset_t other_cpus; 852 #endif 853 struct thread *td = curthread; 854 int bootopt, newpanic; 855 static char buf[256]; 856 857 spinlock_enter(); 858 859 #ifdef SMP 860 /* 861 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 862 * concurrently entering panic. Only the winner will proceed 863 * further. 864 */ 865 if (panicstr == NULL && !kdb_active) { 866 other_cpus = all_cpus; 867 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 868 stop_cpus_hard(other_cpus); 869 } 870 #endif 871 872 /* 873 * Ensure that the scheduler is stopped while panicking, even if panic 874 * has been entered from kdb. 875 */ 876 td->td_stopsched = 1; 877 878 bootopt = RB_AUTOBOOT; 879 newpanic = 0; 880 if (panicstr) 881 bootopt |= RB_NOSYNC; 882 else { 883 bootopt |= RB_DUMP; 884 panicstr = fmt; 885 panicked = true; 886 newpanic = 1; 887 } 888 889 if (newpanic) { 890 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 891 panicstr = buf; 892 cngrab(); 893 printf("panic: %s\n", buf); 894 } else { 895 printf("panic: "); 896 vprintf(fmt, ap); 897 printf("\n"); 898 } 899 #ifdef SMP 900 printf("cpuid = %d\n", PCPU_GET(cpuid)); 901 #endif 902 printf("time = %jd\n", (intmax_t )time_second); 903 #ifdef KDB 904 if ((newpanic || trace_all_panics) && trace_on_panic) 905 kdb_backtrace(); 906 if (debugger_on_panic) 907 kdb_enter(KDB_WHY_PANIC, "panic"); 908 else if (!newpanic && debugger_on_recursive_panic) 909 kdb_enter(KDB_WHY_PANIC, "re-panic"); 910 #endif 911 /*thread_lock(td); */ 912 td->td_flags |= TDF_INPANIC; 913 /* thread_unlock(td); */ 914 if (!sync_on_panic) 915 bootopt |= RB_NOSYNC; 916 if (poweroff_on_panic) 917 bootopt |= RB_POWEROFF; 918 if (powercycle_on_panic) 919 bootopt |= RB_POWERCYCLE; 920 kern_reboot(bootopt); 921 } 922 923 /* 924 * Support for poweroff delay. 925 * 926 * Please note that setting this delay too short might power off your machine 927 * before the write cache on your hard disk has been flushed, leading to 928 * soft-updates inconsistencies. 929 */ 930 #ifndef POWEROFF_DELAY 931 # define POWEROFF_DELAY 5000 932 #endif 933 static int poweroff_delay = POWEROFF_DELAY; 934 935 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 936 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 937 938 static void 939 poweroff_wait(void *junk, int howto) 940 { 941 942 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 943 return; 944 DELAY(poweroff_delay * 1000); 945 } 946 947 /* 948 * Some system processes (e.g. syncer) need to be stopped at appropriate 949 * points in their main loops prior to a system shutdown, so that they 950 * won't interfere with the shutdown process (e.g. by holding a disk buf 951 * to cause sync to fail). For each of these system processes, register 952 * shutdown_kproc() as a handler for one of shutdown events. 953 */ 954 static int kproc_shutdown_wait = 60; 955 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 956 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 957 958 void 959 kproc_shutdown(void *arg, int howto) 960 { 961 struct proc *p; 962 int error; 963 964 if (panicstr) 965 return; 966 967 p = (struct proc *)arg; 968 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 969 kproc_shutdown_wait, p->p_comm); 970 error = kproc_suspend(p, kproc_shutdown_wait * hz); 971 972 if (error == EWOULDBLOCK) 973 printf("timed out\n"); 974 else 975 printf("done\n"); 976 } 977 978 void 979 kthread_shutdown(void *arg, int howto) 980 { 981 struct thread *td; 982 int error; 983 984 if (panicstr) 985 return; 986 987 td = (struct thread *)arg; 988 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 989 kproc_shutdown_wait, td->td_name); 990 error = kthread_suspend(td, kproc_shutdown_wait * hz); 991 992 if (error == EWOULDBLOCK) 993 printf("timed out\n"); 994 else 995 printf("done\n"); 996 } 997 998 static int 999 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 1000 { 1001 char buf[256]; 1002 struct dumperinfo *di; 1003 struct sbuf sb; 1004 int error; 1005 1006 error = sysctl_wire_old_buffer(req, 0); 1007 if (error != 0) 1008 return (error); 1009 1010 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 1011 1012 mtx_lock(&dumpconf_list_lk); 1013 TAILQ_FOREACH(di, &dumper_configs, di_next) { 1014 if (di != TAILQ_FIRST(&dumper_configs)) 1015 sbuf_putc(&sb, ','); 1016 sbuf_cat(&sb, di->di_devname); 1017 } 1018 mtx_unlock(&dumpconf_list_lk); 1019 1020 error = sbuf_finish(&sb); 1021 sbuf_delete(&sb); 1022 return (error); 1023 } 1024 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, 1025 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, &dumper_configs, 0, 1026 dumpdevname_sysctl_handler, "A", 1027 "Device(s) for kernel dumps"); 1028 1029 static int _dump_append(struct dumperinfo *di, void *virtual, 1030 vm_offset_t physical, size_t length); 1031 1032 #ifdef EKCD 1033 static struct kerneldumpcrypto * 1034 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1035 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1036 { 1037 struct kerneldumpcrypto *kdc; 1038 struct kerneldumpkey *kdk; 1039 uint32_t dumpkeysize; 1040 1041 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1042 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1043 1044 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1045 1046 kdc->kdc_encryption = encryption; 1047 switch (kdc->kdc_encryption) { 1048 case KERNELDUMP_ENC_AES_256_CBC: 1049 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1050 goto failed; 1051 break; 1052 case KERNELDUMP_ENC_CHACHA20: 1053 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1054 break; 1055 default: 1056 goto failed; 1057 } 1058 1059 kdc->kdc_dumpkeysize = dumpkeysize; 1060 kdk = kdc->kdc_dumpkey; 1061 kdk->kdk_encryption = kdc->kdc_encryption; 1062 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1063 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1064 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1065 1066 return (kdc); 1067 failed: 1068 zfree(kdc, M_EKCD); 1069 return (NULL); 1070 } 1071 1072 static int 1073 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1074 { 1075 uint8_t hash[SHA256_DIGEST_LENGTH]; 1076 SHA256_CTX ctx; 1077 struct kerneldumpkey *kdk; 1078 int error; 1079 1080 error = 0; 1081 1082 if (kdc == NULL) 1083 return (0); 1084 1085 /* 1086 * When a user enters ddb it can write a crash dump multiple times. 1087 * Each time it should be encrypted using a different IV. 1088 */ 1089 SHA256_Init(&ctx); 1090 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1091 SHA256_Final(hash, &ctx); 1092 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1093 1094 switch (kdc->kdc_encryption) { 1095 case KERNELDUMP_ENC_AES_256_CBC: 1096 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1097 kdc->kdc_iv) <= 0) { 1098 error = EINVAL; 1099 goto out; 1100 } 1101 break; 1102 case KERNELDUMP_ENC_CHACHA20: 1103 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1104 break; 1105 default: 1106 error = EINVAL; 1107 goto out; 1108 } 1109 1110 kdk = kdc->kdc_dumpkey; 1111 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1112 out: 1113 explicit_bzero(hash, sizeof(hash)); 1114 return (error); 1115 } 1116 1117 static uint32_t 1118 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1119 { 1120 1121 if (kdc == NULL) 1122 return (0); 1123 return (kdc->kdc_dumpkeysize); 1124 } 1125 #endif /* EKCD */ 1126 1127 static struct kerneldumpcomp * 1128 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1129 { 1130 struct kerneldumpcomp *kdcomp; 1131 int format; 1132 1133 switch (compression) { 1134 case KERNELDUMP_COMP_GZIP: 1135 format = COMPRESS_GZIP; 1136 break; 1137 case KERNELDUMP_COMP_ZSTD: 1138 format = COMPRESS_ZSTD; 1139 break; 1140 default: 1141 return (NULL); 1142 } 1143 1144 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1145 kdcomp->kdc_format = compression; 1146 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1147 format, di->maxiosize, kerneldump_gzlevel, di); 1148 if (kdcomp->kdc_stream == NULL) { 1149 free(kdcomp, M_DUMPER); 1150 return (NULL); 1151 } 1152 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1153 return (kdcomp); 1154 } 1155 1156 static void 1157 kerneldumpcomp_destroy(struct dumperinfo *di) 1158 { 1159 struct kerneldumpcomp *kdcomp; 1160 1161 kdcomp = di->kdcomp; 1162 if (kdcomp == NULL) 1163 return; 1164 compressor_fini(kdcomp->kdc_stream); 1165 zfree(kdcomp->kdc_buf, M_DUMPER); 1166 free(kdcomp, M_DUMPER); 1167 } 1168 1169 /* 1170 * Must not be present on global list. 1171 */ 1172 static void 1173 free_single_dumper(struct dumperinfo *di) 1174 { 1175 1176 if (di == NULL) 1177 return; 1178 1179 zfree(di->blockbuf, M_DUMPER); 1180 1181 kerneldumpcomp_destroy(di); 1182 1183 #ifdef EKCD 1184 zfree(di->kdcrypto, M_EKCD); 1185 #endif 1186 zfree(di, M_DUMPER); 1187 } 1188 1189 /* Registration of dumpers */ 1190 int 1191 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1192 const struct diocskerneldump_arg *kda) 1193 { 1194 struct dumperinfo *newdi, *listdi; 1195 bool inserted; 1196 uint8_t index; 1197 int error; 1198 1199 index = kda->kda_index; 1200 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1201 index != KDA_REMOVE_ALL); 1202 1203 error = priv_check(curthread, PRIV_SETDUMPER); 1204 if (error != 0) 1205 return (error); 1206 1207 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK 1208 | M_ZERO); 1209 memcpy(newdi, di_template, sizeof(*newdi)); 1210 newdi->blockbuf = NULL; 1211 newdi->kdcrypto = NULL; 1212 newdi->kdcomp = NULL; 1213 strcpy(newdi->di_devname, devname); 1214 1215 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1216 #ifdef EKCD 1217 newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize, 1218 kda->kda_encryption, kda->kda_key, 1219 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1220 if (newdi->kdcrypto == NULL) { 1221 error = EINVAL; 1222 goto cleanup; 1223 } 1224 #else 1225 error = EOPNOTSUPP; 1226 goto cleanup; 1227 #endif 1228 } 1229 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1230 #ifdef EKCD 1231 /* 1232 * We can't support simultaneous unpadded block cipher 1233 * encryption and compression because there is no guarantee the 1234 * length of the compressed result is exactly a multiple of the 1235 * cipher block size. 1236 */ 1237 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1238 error = EOPNOTSUPP; 1239 goto cleanup; 1240 } 1241 #endif 1242 newdi->kdcomp = kerneldumpcomp_create(newdi, 1243 kda->kda_compression); 1244 if (newdi->kdcomp == NULL) { 1245 error = EINVAL; 1246 goto cleanup; 1247 } 1248 } 1249 1250 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1251 1252 /* Add the new configuration to the queue */ 1253 mtx_lock(&dumpconf_list_lk); 1254 inserted = false; 1255 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1256 if (index == 0) { 1257 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1258 inserted = true; 1259 break; 1260 } 1261 index--; 1262 } 1263 if (!inserted) 1264 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1265 mtx_unlock(&dumpconf_list_lk); 1266 1267 return (0); 1268 1269 cleanup: 1270 free_single_dumper(newdi); 1271 return (error); 1272 } 1273 1274 #ifdef DDB 1275 void 1276 dumper_ddb_insert(struct dumperinfo *newdi) 1277 { 1278 TAILQ_INSERT_HEAD(&dumper_configs, newdi, di_next); 1279 } 1280 1281 void 1282 dumper_ddb_remove(struct dumperinfo *di) 1283 { 1284 TAILQ_REMOVE(&dumper_configs, di, di_next); 1285 } 1286 #endif 1287 1288 static bool 1289 dumper_config_match(const struct dumperinfo *di, const char *devname, 1290 const struct diocskerneldump_arg *kda) 1291 { 1292 if (kda->kda_index == KDA_REMOVE_ALL) 1293 return (true); 1294 1295 if (strcmp(di->di_devname, devname) != 0) 1296 return (false); 1297 1298 /* 1299 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1300 */ 1301 if (kda->kda_index == KDA_REMOVE_DEV) 1302 return (true); 1303 1304 if (di->kdcomp != NULL) { 1305 if (di->kdcomp->kdc_format != kda->kda_compression) 1306 return (false); 1307 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1308 return (false); 1309 #ifdef EKCD 1310 if (di->kdcrypto != NULL) { 1311 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1312 return (false); 1313 /* 1314 * Do we care to verify keys match to delete? It seems weird 1315 * to expect multiple fallback dump configurations on the same 1316 * device that only differ in crypto key. 1317 */ 1318 } else 1319 #endif 1320 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1321 return (false); 1322 1323 return (true); 1324 } 1325 1326 int 1327 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1328 { 1329 struct dumperinfo *di, *sdi; 1330 bool found; 1331 int error; 1332 1333 error = priv_check(curthread, PRIV_SETDUMPER); 1334 if (error != 0) 1335 return (error); 1336 1337 /* 1338 * Try to find a matching configuration, and kill it. 1339 * 1340 * NULL 'kda' indicates remove any configuration matching 'devname', 1341 * which may remove multiple configurations in atypical configurations. 1342 */ 1343 found = false; 1344 mtx_lock(&dumpconf_list_lk); 1345 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1346 if (dumper_config_match(di, devname, kda)) { 1347 found = true; 1348 TAILQ_REMOVE(&dumper_configs, di, di_next); 1349 free_single_dumper(di); 1350 } 1351 } 1352 mtx_unlock(&dumpconf_list_lk); 1353 1354 /* Only produce ENOENT if a more targeted match didn't match. */ 1355 if (!found && kda->kda_index == KDA_REMOVE) 1356 return (ENOENT); 1357 return (0); 1358 } 1359 1360 static int 1361 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1362 { 1363 1364 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1365 offset - di->mediaoffset + length > di->mediasize)) { 1366 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1367 printf( 1368 "Compressed dump failed to fit in device boundaries.\n"); 1369 return (E2BIG); 1370 } 1371 1372 printf("Attempt to write outside dump device boundaries.\n" 1373 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1374 (intmax_t)offset, (intmax_t)di->mediaoffset, 1375 (uintmax_t)length, (intmax_t)di->mediasize); 1376 return (ENOSPC); 1377 } 1378 if (length % di->blocksize != 0) { 1379 printf("Attempt to write partial block of length %ju.\n", 1380 (uintmax_t)length); 1381 return (EINVAL); 1382 } 1383 if (offset % di->blocksize != 0) { 1384 printf("Attempt to write at unaligned offset %jd.\n", 1385 (intmax_t)offset); 1386 return (EINVAL); 1387 } 1388 1389 return (0); 1390 } 1391 1392 #ifdef EKCD 1393 static int 1394 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1395 { 1396 1397 switch (kdc->kdc_encryption) { 1398 case KERNELDUMP_ENC_AES_256_CBC: 1399 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1400 8 * size, buf) <= 0) { 1401 return (EIO); 1402 } 1403 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1404 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1405 return (EIO); 1406 } 1407 break; 1408 case KERNELDUMP_ENC_CHACHA20: 1409 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1410 break; 1411 default: 1412 return (EINVAL); 1413 } 1414 1415 return (0); 1416 } 1417 1418 /* Encrypt data and call dumper. */ 1419 static int 1420 dump_encrypted_write(struct dumperinfo *di, void *virtual, 1421 vm_offset_t physical, off_t offset, size_t length) 1422 { 1423 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1424 struct kerneldumpcrypto *kdc; 1425 int error; 1426 size_t nbytes; 1427 1428 kdc = di->kdcrypto; 1429 1430 while (length > 0) { 1431 nbytes = MIN(length, sizeof(buf)); 1432 bcopy(virtual, buf, nbytes); 1433 1434 if (dump_encrypt(kdc, buf, nbytes) != 0) 1435 return (EIO); 1436 1437 error = dump_write(di, buf, physical, offset, nbytes); 1438 if (error != 0) 1439 return (error); 1440 1441 offset += nbytes; 1442 virtual = (void *)((uint8_t *)virtual + nbytes); 1443 length -= nbytes; 1444 } 1445 1446 return (0); 1447 } 1448 #endif /* EKCD */ 1449 1450 static int 1451 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1452 { 1453 struct dumperinfo *di; 1454 size_t resid, rlength; 1455 int error; 1456 1457 di = arg; 1458 1459 if (length % di->blocksize != 0) { 1460 /* 1461 * This must be the final write after flushing the compression 1462 * stream. Write as many full blocks as possible and stash the 1463 * residual data in the dumper's block buffer. It will be 1464 * padded and written in dump_finish(). 1465 */ 1466 rlength = rounddown(length, di->blocksize); 1467 if (rlength != 0) { 1468 error = _dump_append(di, base, 0, rlength); 1469 if (error != 0) 1470 return (error); 1471 } 1472 resid = length - rlength; 1473 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1474 bzero((uint8_t *)di->blockbuf + resid, di->blocksize - resid); 1475 di->kdcomp->kdc_resid = resid; 1476 return (EAGAIN); 1477 } 1478 return (_dump_append(di, base, 0, length)); 1479 } 1480 1481 /* 1482 * Write kernel dump headers at the beginning and end of the dump extent. 1483 * Write the kernel dump encryption key after the leading header if we were 1484 * configured to do so. 1485 */ 1486 static int 1487 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1488 { 1489 #ifdef EKCD 1490 struct kerneldumpcrypto *kdc; 1491 #endif 1492 void *buf; 1493 size_t hdrsz; 1494 uint64_t extent; 1495 uint32_t keysize; 1496 int error; 1497 1498 hdrsz = sizeof(*kdh); 1499 if (hdrsz > di->blocksize) 1500 return (ENOMEM); 1501 1502 #ifdef EKCD 1503 kdc = di->kdcrypto; 1504 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1505 #else 1506 keysize = 0; 1507 #endif 1508 1509 /* 1510 * If the dump device has special handling for headers, let it take care 1511 * of writing them out. 1512 */ 1513 if (di->dumper_hdr != NULL) 1514 return (di->dumper_hdr(di, kdh)); 1515 1516 if (hdrsz == di->blocksize) 1517 buf = kdh; 1518 else { 1519 buf = di->blockbuf; 1520 memset(buf, 0, di->blocksize); 1521 memcpy(buf, kdh, hdrsz); 1522 } 1523 1524 extent = dtoh64(kdh->dumpextent); 1525 #ifdef EKCD 1526 if (kdc != NULL) { 1527 error = dump_write(di, kdc->kdc_dumpkey, 0, 1528 di->mediaoffset + di->mediasize - di->blocksize - extent - 1529 keysize, keysize); 1530 if (error != 0) 1531 return (error); 1532 } 1533 #endif 1534 1535 error = dump_write(di, buf, 0, 1536 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1537 keysize, di->blocksize); 1538 if (error == 0) 1539 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - 1540 di->blocksize, di->blocksize); 1541 return (error); 1542 } 1543 1544 /* 1545 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1546 * protect us from metadata and metadata from us. 1547 */ 1548 #define SIZEOF_METADATA (64 * 1024) 1549 1550 /* 1551 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1552 * if requested, and make sure that we have enough space on the dump device. 1553 * 1554 * We set things up so that the dump ends before the last sector of the dump 1555 * device, at which the trailing header is written. 1556 * 1557 * +-----------+------+-----+----------------------------+------+ 1558 * | | lhdr | key | ... kernel dump ... | thdr | 1559 * +-----------+------+-----+----------------------------+------+ 1560 * 1 blk opt <------- dump extent --------> 1 blk 1561 * 1562 * Dumps written using dump_append() start at the beginning of the extent. 1563 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1564 * will not. The true length of the dump is recorded in the leading and trailing 1565 * headers once the dump has been completed. 1566 * 1567 * The dump device may provide a callback, in which case it will initialize 1568 * dumpoff and take care of laying out the headers. 1569 */ 1570 int 1571 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1572 { 1573 #ifdef EKCD 1574 struct kerneldumpcrypto *kdc; 1575 #endif 1576 void *key; 1577 uint64_t dumpextent, span; 1578 uint32_t keysize; 1579 int error; 1580 1581 #ifdef EKCD 1582 /* Send the key before the dump so a partial dump is still usable. */ 1583 kdc = di->kdcrypto; 1584 error = kerneldumpcrypto_init(kdc); 1585 if (error != 0) 1586 return (error); 1587 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1588 key = keysize > 0 ? kdc->kdc_dumpkey : NULL; 1589 #else 1590 error = 0; 1591 keysize = 0; 1592 key = NULL; 1593 #endif 1594 1595 if (di->dumper_start != NULL) { 1596 error = di->dumper_start(di, key, keysize); 1597 } else { 1598 dumpextent = dtoh64(kdh->dumpextent); 1599 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1600 keysize; 1601 if (di->mediasize < span) { 1602 if (di->kdcomp == NULL) 1603 return (E2BIG); 1604 1605 /* 1606 * We don't yet know how much space the compressed dump 1607 * will occupy, so try to use the whole swap partition 1608 * (minus the first 64KB) in the hope that the 1609 * compressed dump will fit. If that doesn't turn out to 1610 * be enough, the bounds checking in dump_write() 1611 * will catch us and cause the dump to fail. 1612 */ 1613 dumpextent = di->mediasize - span + dumpextent; 1614 kdh->dumpextent = htod64(dumpextent); 1615 } 1616 1617 /* 1618 * The offset at which to begin writing the dump. 1619 */ 1620 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1621 dumpextent; 1622 } 1623 di->origdumpoff = di->dumpoff; 1624 return (error); 1625 } 1626 1627 static int 1628 _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1629 size_t length) 1630 { 1631 int error; 1632 1633 #ifdef EKCD 1634 if (di->kdcrypto != NULL) 1635 error = dump_encrypted_write(di, virtual, physical, di->dumpoff, 1636 length); 1637 else 1638 #endif 1639 error = dump_write(di, virtual, physical, di->dumpoff, length); 1640 if (error == 0) 1641 di->dumpoff += length; 1642 return (error); 1643 } 1644 1645 /* 1646 * Write to the dump device starting at dumpoff. When compression is enabled, 1647 * writes to the device will be performed using a callback that gets invoked 1648 * when the compression stream's output buffer is full. 1649 */ 1650 int 1651 dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1652 size_t length) 1653 { 1654 void *buf; 1655 1656 if (di->kdcomp != NULL) { 1657 /* Bounce through a buffer to avoid CRC errors. */ 1658 if (length > di->maxiosize) 1659 return (EINVAL); 1660 buf = di->kdcomp->kdc_buf; 1661 memmove(buf, virtual, length); 1662 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1663 } 1664 return (_dump_append(di, virtual, physical, length)); 1665 } 1666 1667 /* 1668 * Write to the dump device at the specified offset. 1669 */ 1670 int 1671 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1672 off_t offset, size_t length) 1673 { 1674 int error; 1675 1676 error = dump_check_bounds(di, offset, length); 1677 if (error != 0) 1678 return (error); 1679 return (di->dumper(di->priv, virtual, physical, offset, length)); 1680 } 1681 1682 /* 1683 * Perform kernel dump finalization: flush the compression stream, if necessary, 1684 * write the leading and trailing kernel dump headers now that we know the true 1685 * length of the dump, and optionally write the encryption key following the 1686 * leading header. 1687 */ 1688 int 1689 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1690 { 1691 int error; 1692 1693 if (di->kdcomp != NULL) { 1694 error = compressor_flush(di->kdcomp->kdc_stream); 1695 if (error == EAGAIN) { 1696 /* We have residual data in di->blockbuf. */ 1697 error = _dump_append(di, di->blockbuf, 0, di->blocksize); 1698 if (error == 0) 1699 /* Compensate for _dump_append()'s adjustment. */ 1700 di->dumpoff -= di->blocksize - di->kdcomp->kdc_resid; 1701 di->kdcomp->kdc_resid = 0; 1702 } 1703 if (error != 0) 1704 return (error); 1705 1706 /* 1707 * We now know the size of the compressed dump, so update the 1708 * header accordingly and recompute parity. 1709 */ 1710 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1711 kdh->parity = 0; 1712 kdh->parity = kerneldump_parity(kdh); 1713 1714 compressor_reset(di->kdcomp->kdc_stream); 1715 } 1716 1717 error = dump_write_headers(di, kdh); 1718 if (error != 0) 1719 return (error); 1720 1721 (void)dump_write(di, NULL, 0, 0, 0); 1722 return (0); 1723 } 1724 1725 void 1726 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1727 const char *magic, uint32_t archver, uint64_t dumplen) 1728 { 1729 size_t dstsize; 1730 1731 bzero(kdh, sizeof(*kdh)); 1732 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1733 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1734 kdh->version = htod32(KERNELDUMPVERSION); 1735 kdh->architectureversion = htod32(archver); 1736 kdh->dumplength = htod64(dumplen); 1737 kdh->dumpextent = kdh->dumplength; 1738 kdh->dumptime = htod64(time_second); 1739 #ifdef EKCD 1740 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1741 #else 1742 kdh->dumpkeysize = 0; 1743 #endif 1744 kdh->blocksize = htod32(di->blocksize); 1745 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1746 dstsize = sizeof(kdh->versionstring); 1747 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1748 kdh->versionstring[dstsize - 2] = '\n'; 1749 if (panicstr != NULL) 1750 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1751 if (di->kdcomp != NULL) 1752 kdh->compression = di->kdcomp->kdc_format; 1753 kdh->parity = kerneldump_parity(kdh); 1754 } 1755 1756 #ifdef DDB 1757 DB_SHOW_COMMAND(panic, db_show_panic) 1758 { 1759 1760 if (panicstr == NULL) 1761 db_printf("panicstr not set\n"); 1762 else 1763 db_printf("panic: %s\n", panicstr); 1764 } 1765 #endif 1766