1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1986, 1988, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_ekcd.h" 44 #include "opt_kdb.h" 45 #include "opt_panic.h" 46 #include "opt_printf.h" 47 #include "opt_sched.h" 48 #include "opt_watchdog.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/bio.h> 53 #include <sys/buf.h> 54 #include <sys/conf.h> 55 #include <sys/compressor.h> 56 #include <sys/cons.h> 57 #include <sys/disk.h> 58 #include <sys/eventhandler.h> 59 #include <sys/filedesc.h> 60 #include <sys/jail.h> 61 #include <sys/kdb.h> 62 #include <sys/kernel.h> 63 #include <sys/kerneldump.h> 64 #include <sys/kthread.h> 65 #include <sys/ktr.h> 66 #include <sys/malloc.h> 67 #include <sys/mbuf.h> 68 #include <sys/mount.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/reboot.h> 72 #include <sys/resourcevar.h> 73 #include <sys/rwlock.h> 74 #include <sys/sbuf.h> 75 #include <sys/sched.h> 76 #include <sys/smp.h> 77 #include <sys/sysctl.h> 78 #include <sys/sysproto.h> 79 #include <sys/taskqueue.h> 80 #include <sys/vnode.h> 81 #include <sys/watchdog.h> 82 83 #include <crypto/chacha20/chacha.h> 84 #include <crypto/rijndael/rijndael-api-fst.h> 85 #include <crypto/sha2/sha256.h> 86 87 #include <ddb/ddb.h> 88 89 #include <machine/cpu.h> 90 #include <machine/dump.h> 91 #include <machine/pcb.h> 92 #include <machine/smp.h> 93 94 #include <security/mac/mac_framework.h> 95 96 #include <vm/vm.h> 97 #include <vm/vm_object.h> 98 #include <vm/vm_page.h> 99 #include <vm/vm_pager.h> 100 #include <vm/swap_pager.h> 101 102 #include <sys/signalvar.h> 103 104 static MALLOC_DEFINE(M_DUMPER, "dumper", "dumper block buffer"); 105 106 #ifndef PANIC_REBOOT_WAIT_TIME 107 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ 108 #endif 109 static int panic_reboot_wait_time = PANIC_REBOOT_WAIT_TIME; 110 SYSCTL_INT(_kern, OID_AUTO, panic_reboot_wait_time, CTLFLAG_RWTUN, 111 &panic_reboot_wait_time, 0, 112 "Seconds to wait before rebooting after a panic"); 113 114 /* 115 * Note that stdarg.h and the ANSI style va_start macro is used for both 116 * ANSI and traditional C compilers. 117 */ 118 #include <machine/stdarg.h> 119 120 #ifdef KDB 121 #ifdef KDB_UNATTENDED 122 static int debugger_on_panic = 0; 123 #else 124 static int debugger_on_panic = 1; 125 #endif 126 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, 127 CTLFLAG_RWTUN | CTLFLAG_SECURE, 128 &debugger_on_panic, 0, "Run debugger on kernel panic"); 129 130 int debugger_on_trap = 0; 131 SYSCTL_INT(_debug, OID_AUTO, debugger_on_trap, 132 CTLFLAG_RWTUN | CTLFLAG_SECURE, 133 &debugger_on_trap, 0, "Run debugger on kernel trap before panic"); 134 135 #ifdef KDB_TRACE 136 static int trace_on_panic = 1; 137 static bool trace_all_panics = true; 138 #else 139 static int trace_on_panic = 0; 140 static bool trace_all_panics = false; 141 #endif 142 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, 143 CTLFLAG_RWTUN | CTLFLAG_SECURE, 144 &trace_on_panic, 0, "Print stack trace on kernel panic"); 145 SYSCTL_BOOL(_debug, OID_AUTO, trace_all_panics, CTLFLAG_RWTUN, 146 &trace_all_panics, 0, "Print stack traces on secondary kernel panics"); 147 #endif /* KDB */ 148 149 static int sync_on_panic = 0; 150 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RWTUN, 151 &sync_on_panic, 0, "Do a sync before rebooting from a panic"); 152 153 static bool poweroff_on_panic = 0; 154 SYSCTL_BOOL(_kern, OID_AUTO, poweroff_on_panic, CTLFLAG_RWTUN, 155 &poweroff_on_panic, 0, "Do a power off instead of a reboot on a panic"); 156 157 static bool powercycle_on_panic = 0; 158 SYSCTL_BOOL(_kern, OID_AUTO, powercycle_on_panic, CTLFLAG_RWTUN, 159 &powercycle_on_panic, 0, "Do a power cycle instead of a reboot on a panic"); 160 161 static SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, 162 "Shutdown environment"); 163 164 #ifndef DIAGNOSTIC 165 static int show_busybufs; 166 #else 167 static int show_busybufs = 1; 168 #endif 169 SYSCTL_INT(_kern_shutdown, OID_AUTO, show_busybufs, CTLFLAG_RW, 170 &show_busybufs, 0, ""); 171 172 int suspend_blocked = 0; 173 SYSCTL_INT(_kern, OID_AUTO, suspend_blocked, CTLFLAG_RW, 174 &suspend_blocked, 0, "Block suspend due to a pending shutdown"); 175 176 #ifdef EKCD 177 FEATURE(ekcd, "Encrypted kernel crash dumps support"); 178 179 MALLOC_DEFINE(M_EKCD, "ekcd", "Encrypted kernel crash dumps data"); 180 181 struct kerneldumpcrypto { 182 uint8_t kdc_encryption; 183 uint8_t kdc_iv[KERNELDUMP_IV_MAX_SIZE]; 184 union { 185 struct { 186 keyInstance aes_ki; 187 cipherInstance aes_ci; 188 } u_aes; 189 struct chacha_ctx u_chacha; 190 } u; 191 #define kdc_ki u.u_aes.aes_ki 192 #define kdc_ci u.u_aes.aes_ci 193 #define kdc_chacha u.u_chacha 194 uint32_t kdc_dumpkeysize; 195 struct kerneldumpkey kdc_dumpkey[]; 196 }; 197 #endif 198 199 struct kerneldumpcomp { 200 uint8_t kdc_format; 201 struct compressor *kdc_stream; 202 uint8_t *kdc_buf; 203 size_t kdc_resid; 204 }; 205 206 static struct kerneldumpcomp *kerneldumpcomp_create(struct dumperinfo *di, 207 uint8_t compression); 208 static void kerneldumpcomp_destroy(struct dumperinfo *di); 209 static int kerneldumpcomp_write_cb(void *base, size_t len, off_t off, void *arg); 210 211 static int kerneldump_gzlevel = 6; 212 SYSCTL_INT(_kern, OID_AUTO, kerneldump_gzlevel, CTLFLAG_RWTUN, 213 &kerneldump_gzlevel, 0, 214 "Kernel crash dump compression level"); 215 216 /* 217 * Variable panicstr contains argument to first call to panic; used as flag 218 * to indicate that the kernel has already called panic. 219 */ 220 const char *panicstr; 221 222 int dumping; /* system is dumping */ 223 int rebooting; /* system is rebooting */ 224 /* 225 * Used to serialize between sysctl kern.shutdown.dumpdevname and list 226 * modifications via ioctl. 227 */ 228 static struct mtx dumpconf_list_lk; 229 MTX_SYSINIT(dumper_configs, &dumpconf_list_lk, "dumper config list", MTX_DEF); 230 231 /* Our selected dumper(s). */ 232 static TAILQ_HEAD(dumpconflist, dumperinfo) dumper_configs = 233 TAILQ_HEAD_INITIALIZER(dumper_configs); 234 235 /* Context information for dump-debuggers. */ 236 static struct pcb dumppcb; /* Registers. */ 237 lwpid_t dumptid; /* Thread ID. */ 238 239 static struct cdevsw reroot_cdevsw = { 240 .d_version = D_VERSION, 241 .d_name = "reroot", 242 }; 243 244 static void poweroff_wait(void *, int); 245 static void shutdown_halt(void *junk, int howto); 246 static void shutdown_panic(void *junk, int howto); 247 static void shutdown_reset(void *junk, int howto); 248 static int kern_reroot(void); 249 250 /* register various local shutdown events */ 251 static void 252 shutdown_conf(void *unused) 253 { 254 255 EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, 256 SHUTDOWN_PRI_FIRST); 257 EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, 258 SHUTDOWN_PRI_LAST + 100); 259 EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, 260 SHUTDOWN_PRI_LAST + 100); 261 EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, 262 SHUTDOWN_PRI_LAST + 200); 263 } 264 265 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL); 266 267 /* 268 * The only reason this exists is to create the /dev/reroot/ directory, 269 * used by reroot code in init(8) as a mountpoint for tmpfs. 270 */ 271 static void 272 reroot_conf(void *unused) 273 { 274 int error; 275 struct cdev *cdev; 276 277 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &cdev, 278 &reroot_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "reroot/reroot"); 279 if (error != 0) { 280 printf("%s: failed to create device node, error %d", 281 __func__, error); 282 } 283 } 284 285 SYSINIT(reroot_conf, SI_SUB_DEVFS, SI_ORDER_ANY, reroot_conf, NULL); 286 287 /* 288 * The system call that results in a reboot. 289 */ 290 /* ARGSUSED */ 291 int 292 sys_reboot(struct thread *td, struct reboot_args *uap) 293 { 294 int error; 295 296 error = 0; 297 #ifdef MAC 298 error = mac_system_check_reboot(td->td_ucred, uap->opt); 299 #endif 300 if (error == 0) 301 error = priv_check(td, PRIV_REBOOT); 302 if (error == 0) { 303 if (uap->opt & RB_REROOT) 304 error = kern_reroot(); 305 else 306 kern_reboot(uap->opt); 307 } 308 return (error); 309 } 310 311 static void 312 shutdown_nice_task_fn(void *arg, int pending __unused) 313 { 314 int howto; 315 316 howto = (uintptr_t)arg; 317 /* Send a signal to init(8) and have it shutdown the world. */ 318 PROC_LOCK(initproc); 319 if (howto & RB_POWEROFF) 320 kern_psignal(initproc, SIGUSR2); 321 else if (howto & RB_POWERCYCLE) 322 kern_psignal(initproc, SIGWINCH); 323 else if (howto & RB_HALT) 324 kern_psignal(initproc, SIGUSR1); 325 else 326 kern_psignal(initproc, SIGINT); 327 PROC_UNLOCK(initproc); 328 } 329 330 static struct task shutdown_nice_task = TASK_INITIALIZER(0, 331 &shutdown_nice_task_fn, NULL); 332 333 /* 334 * Called by events that want to shut down.. e.g <CTL><ALT><DEL> on a PC 335 */ 336 void 337 shutdown_nice(int howto) 338 { 339 340 if (initproc != NULL && !SCHEDULER_STOPPED()) { 341 shutdown_nice_task.ta_context = (void *)(uintptr_t)howto; 342 taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task); 343 } else { 344 /* 345 * No init(8) running, or scheduler would not allow it 346 * to run, so simply reboot. 347 */ 348 kern_reboot(howto | RB_NOSYNC); 349 } 350 } 351 352 static void 353 print_uptime(void) 354 { 355 int f; 356 struct timespec ts; 357 358 getnanouptime(&ts); 359 printf("Uptime: "); 360 f = 0; 361 if (ts.tv_sec >= 86400) { 362 printf("%ldd", (long)ts.tv_sec / 86400); 363 ts.tv_sec %= 86400; 364 f = 1; 365 } 366 if (f || ts.tv_sec >= 3600) { 367 printf("%ldh", (long)ts.tv_sec / 3600); 368 ts.tv_sec %= 3600; 369 f = 1; 370 } 371 if (f || ts.tv_sec >= 60) { 372 printf("%ldm", (long)ts.tv_sec / 60); 373 ts.tv_sec %= 60; 374 f = 1; 375 } 376 printf("%lds\n", (long)ts.tv_sec); 377 } 378 379 int 380 doadump(boolean_t textdump) 381 { 382 boolean_t coredump; 383 int error; 384 385 error = 0; 386 if (dumping) 387 return (EBUSY); 388 if (TAILQ_EMPTY(&dumper_configs)) 389 return (ENXIO); 390 391 savectx(&dumppcb); 392 dumptid = curthread->td_tid; 393 dumping++; 394 395 coredump = TRUE; 396 #ifdef DDB 397 if (textdump && textdump_pending) { 398 coredump = FALSE; 399 textdump_dumpsys(TAILQ_FIRST(&dumper_configs)); 400 } 401 #endif 402 if (coredump) { 403 struct dumperinfo *di; 404 405 TAILQ_FOREACH(di, &dumper_configs, di_next) { 406 error = dumpsys(di); 407 if (error == 0) 408 break; 409 } 410 } 411 412 dumping--; 413 return (error); 414 } 415 416 /* 417 * Shutdown the system cleanly to prepare for reboot, halt, or power off. 418 */ 419 void 420 kern_reboot(int howto) 421 { 422 static int once = 0; 423 424 /* 425 * Normal paths here don't hold Giant, but we can wind up here 426 * unexpectedly with it held. Drop it now so we don't have to 427 * drop and pick it up elsewhere. The paths it is locking will 428 * never be returned to, and it is preferable to preclude 429 * deadlock than to lock against code that won't ever 430 * continue. 431 */ 432 while (mtx_owned(&Giant)) 433 mtx_unlock(&Giant); 434 435 #if defined(SMP) 436 /* 437 * Bind us to the first CPU so that all shutdown code runs there. Some 438 * systems don't shutdown properly (i.e., ACPI power off) if we 439 * run on another processor. 440 */ 441 if (!SCHEDULER_STOPPED()) { 442 thread_lock(curthread); 443 sched_bind(curthread, CPU_FIRST()); 444 thread_unlock(curthread); 445 KASSERT(PCPU_GET(cpuid) == CPU_FIRST(), 446 ("boot: not running on cpu 0")); 447 } 448 #endif 449 /* We're in the process of rebooting. */ 450 rebooting = 1; 451 452 /* We are out of the debugger now. */ 453 kdb_active = 0; 454 455 /* 456 * Do any callouts that should be done BEFORE syncing the filesystems. 457 */ 458 EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); 459 460 /* 461 * Now sync filesystems 462 */ 463 if (!cold && (howto & RB_NOSYNC) == 0 && once == 0) { 464 once = 1; 465 bufshutdown(show_busybufs); 466 } 467 468 print_uptime(); 469 470 cngrab(); 471 472 /* 473 * Ok, now do things that assume all filesystem activity has 474 * been completed. 475 */ 476 EVENTHANDLER_INVOKE(shutdown_post_sync, howto); 477 478 if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 479 doadump(TRUE); 480 481 /* Now that we're going to really halt the system... */ 482 EVENTHANDLER_INVOKE(shutdown_final, howto); 483 484 for(;;) ; /* safety against shutdown_reset not working */ 485 /* NOTREACHED */ 486 } 487 488 /* 489 * The system call that results in changing the rootfs. 490 */ 491 static int 492 kern_reroot(void) 493 { 494 struct vnode *oldrootvnode, *vp; 495 struct mount *mp, *devmp; 496 int error; 497 498 if (curproc != initproc) 499 return (EPERM); 500 501 /* 502 * Mark the filesystem containing currently-running executable 503 * (the temporary copy of init(8)) busy. 504 */ 505 vp = curproc->p_textvp; 506 error = vn_lock(vp, LK_SHARED); 507 if (error != 0) 508 return (error); 509 mp = vp->v_mount; 510 error = vfs_busy(mp, MBF_NOWAIT); 511 if (error != 0) { 512 vfs_ref(mp); 513 VOP_UNLOCK(vp, 0); 514 error = vfs_busy(mp, 0); 515 vn_lock(vp, LK_SHARED | LK_RETRY); 516 vfs_rel(mp); 517 if (error != 0) { 518 VOP_UNLOCK(vp, 0); 519 return (ENOENT); 520 } 521 if (vp->v_iflag & VI_DOOMED) { 522 VOP_UNLOCK(vp, 0); 523 vfs_unbusy(mp); 524 return (ENOENT); 525 } 526 } 527 VOP_UNLOCK(vp, 0); 528 529 /* 530 * Remove the filesystem containing currently-running executable 531 * from the mount list, to prevent it from being unmounted 532 * by vfs_unmountall(), and to avoid confusing vfs_mountroot(). 533 * 534 * Also preserve /dev - forcibly unmounting it could cause driver 535 * reinitialization. 536 */ 537 538 vfs_ref(rootdevmp); 539 devmp = rootdevmp; 540 rootdevmp = NULL; 541 542 mtx_lock(&mountlist_mtx); 543 TAILQ_REMOVE(&mountlist, mp, mnt_list); 544 TAILQ_REMOVE(&mountlist, devmp, mnt_list); 545 mtx_unlock(&mountlist_mtx); 546 547 oldrootvnode = rootvnode; 548 549 /* 550 * Unmount everything except for the two filesystems preserved above. 551 */ 552 vfs_unmountall(); 553 554 /* 555 * Add /dev back; vfs_mountroot() will move it into its new place. 556 */ 557 mtx_lock(&mountlist_mtx); 558 TAILQ_INSERT_HEAD(&mountlist, devmp, mnt_list); 559 mtx_unlock(&mountlist_mtx); 560 rootdevmp = devmp; 561 vfs_rel(rootdevmp); 562 563 /* 564 * Mount the new rootfs. 565 */ 566 vfs_mountroot(); 567 568 /* 569 * Update all references to the old rootvnode. 570 */ 571 mountcheckdirs(oldrootvnode, rootvnode); 572 573 /* 574 * Add the temporary filesystem back and unbusy it. 575 */ 576 mtx_lock(&mountlist_mtx); 577 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 578 mtx_unlock(&mountlist_mtx); 579 vfs_unbusy(mp); 580 581 return (0); 582 } 583 584 /* 585 * If the shutdown was a clean halt, behave accordingly. 586 */ 587 static void 588 shutdown_halt(void *junk, int howto) 589 { 590 591 if (howto & RB_HALT) { 592 printf("\n"); 593 printf("The operating system has halted.\n"); 594 printf("Please press any key to reboot.\n\n"); 595 switch (cngetc()) { 596 case -1: /* No console, just die */ 597 cpu_halt(); 598 /* NOTREACHED */ 599 default: 600 break; 601 } 602 } 603 } 604 605 /* 606 * Check to see if the system paniced, pause and then reboot 607 * according to the specified delay. 608 */ 609 static void 610 shutdown_panic(void *junk, int howto) 611 { 612 int loop; 613 614 if (howto & RB_DUMP) { 615 if (panic_reboot_wait_time != 0) { 616 if (panic_reboot_wait_time != -1) { 617 printf("Automatic reboot in %d seconds - " 618 "press a key on the console to abort\n", 619 panic_reboot_wait_time); 620 for (loop = panic_reboot_wait_time * 10; 621 loop > 0; --loop) { 622 DELAY(1000 * 100); /* 1/10th second */ 623 /* Did user type a key? */ 624 if (cncheckc() != -1) 625 break; 626 } 627 if (!loop) 628 return; 629 } 630 } else { /* zero time specified - reboot NOW */ 631 return; 632 } 633 printf("--> Press a key on the console to reboot,\n"); 634 printf("--> or switch off the system now.\n"); 635 cngetc(); 636 } 637 } 638 639 /* 640 * Everything done, now reset 641 */ 642 static void 643 shutdown_reset(void *junk, int howto) 644 { 645 646 printf("Rebooting...\n"); 647 DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ 648 649 /* 650 * Acquiring smp_ipi_mtx here has a double effect: 651 * - it disables interrupts avoiding CPU0 preemption 652 * by fast handlers (thus deadlocking against other CPUs) 653 * - it avoids deadlocks against smp_rendezvous() or, more 654 * generally, threads busy-waiting, with this spinlock held, 655 * and waiting for responses by threads on other CPUs 656 * (ie. smp_tlb_shootdown()). 657 * 658 * For the !SMP case it just needs to handle the former problem. 659 */ 660 #ifdef SMP 661 mtx_lock_spin(&smp_ipi_mtx); 662 #else 663 spinlock_enter(); 664 #endif 665 666 /* cpu_boot(howto); */ /* doesn't do anything at the moment */ 667 cpu_reset(); 668 /* NOTREACHED */ /* assuming reset worked */ 669 } 670 671 #if defined(WITNESS) || defined(INVARIANT_SUPPORT) 672 static int kassert_warn_only = 0; 673 #ifdef KDB 674 static int kassert_do_kdb = 0; 675 #endif 676 #ifdef KTR 677 static int kassert_do_ktr = 0; 678 #endif 679 static int kassert_do_log = 1; 680 static int kassert_log_pps_limit = 4; 681 static int kassert_log_mute_at = 0; 682 static int kassert_log_panic_at = 0; 683 static int kassert_suppress_in_panic = 0; 684 static int kassert_warnings = 0; 685 686 SYSCTL_NODE(_debug, OID_AUTO, kassert, CTLFLAG_RW, NULL, "kassert options"); 687 688 #ifdef KASSERT_PANIC_OPTIONAL 689 #define KASSERT_RWTUN CTLFLAG_RWTUN 690 #else 691 #define KASSERT_RWTUN CTLFLAG_RDTUN 692 #endif 693 694 SYSCTL_INT(_debug_kassert, OID_AUTO, warn_only, KASSERT_RWTUN, 695 &kassert_warn_only, 0, 696 "KASSERT triggers a panic (0) or just a warning (1)"); 697 698 #ifdef KDB 699 SYSCTL_INT(_debug_kassert, OID_AUTO, do_kdb, KASSERT_RWTUN, 700 &kassert_do_kdb, 0, "KASSERT will enter the debugger"); 701 #endif 702 703 #ifdef KTR 704 SYSCTL_UINT(_debug_kassert, OID_AUTO, do_ktr, KASSERT_RWTUN, 705 &kassert_do_ktr, 0, 706 "KASSERT does a KTR, set this to the KTRMASK you want"); 707 #endif 708 709 SYSCTL_INT(_debug_kassert, OID_AUTO, do_log, KASSERT_RWTUN, 710 &kassert_do_log, 0, 711 "If warn_only is enabled, log (1) or do not log (0) assertion violations"); 712 713 SYSCTL_INT(_debug_kassert, OID_AUTO, warnings, KASSERT_RWTUN, 714 &kassert_warnings, 0, "number of KASSERTs that have been triggered"); 715 716 SYSCTL_INT(_debug_kassert, OID_AUTO, log_panic_at, KASSERT_RWTUN, 717 &kassert_log_panic_at, 0, "max number of KASSERTS before we will panic"); 718 719 SYSCTL_INT(_debug_kassert, OID_AUTO, log_pps_limit, KASSERT_RWTUN, 720 &kassert_log_pps_limit, 0, "limit number of log messages per second"); 721 722 SYSCTL_INT(_debug_kassert, OID_AUTO, log_mute_at, KASSERT_RWTUN, 723 &kassert_log_mute_at, 0, "max number of KASSERTS to log"); 724 725 SYSCTL_INT(_debug_kassert, OID_AUTO, suppress_in_panic, KASSERT_RWTUN, 726 &kassert_suppress_in_panic, 0, 727 "KASSERTs will be suppressed while handling a panic"); 728 #undef KASSERT_RWTUN 729 730 static int kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS); 731 732 SYSCTL_PROC(_debug_kassert, OID_AUTO, kassert, 733 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE, NULL, 0, 734 kassert_sysctl_kassert, "I", "set to trigger a test kassert"); 735 736 static int 737 kassert_sysctl_kassert(SYSCTL_HANDLER_ARGS) 738 { 739 int error, i; 740 741 error = sysctl_wire_old_buffer(req, sizeof(int)); 742 if (error == 0) { 743 i = 0; 744 error = sysctl_handle_int(oidp, &i, 0, req); 745 } 746 if (error != 0 || req->newptr == NULL) 747 return (error); 748 KASSERT(0, ("kassert_sysctl_kassert triggered kassert %d", i)); 749 return (0); 750 } 751 752 #ifdef KASSERT_PANIC_OPTIONAL 753 /* 754 * Called by KASSERT, this decides if we will panic 755 * or if we will log via printf and/or ktr. 756 */ 757 void 758 kassert_panic(const char *fmt, ...) 759 { 760 static char buf[256]; 761 va_list ap; 762 763 va_start(ap, fmt); 764 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 765 va_end(ap); 766 767 /* 768 * If we are suppressing secondary panics, log the warning but do not 769 * re-enter panic/kdb. 770 */ 771 if (panicstr != NULL && kassert_suppress_in_panic) { 772 if (kassert_do_log) { 773 printf("KASSERT failed: %s\n", buf); 774 #ifdef KDB 775 if (trace_all_panics && trace_on_panic) 776 kdb_backtrace(); 777 #endif 778 } 779 return; 780 } 781 782 /* 783 * panic if we're not just warning, or if we've exceeded 784 * kassert_log_panic_at warnings. 785 */ 786 if (!kassert_warn_only || 787 (kassert_log_panic_at > 0 && 788 kassert_warnings >= kassert_log_panic_at)) { 789 va_start(ap, fmt); 790 vpanic(fmt, ap); 791 /* NORETURN */ 792 } 793 #ifdef KTR 794 if (kassert_do_ktr) 795 CTR0(ktr_mask, buf); 796 #endif /* KTR */ 797 /* 798 * log if we've not yet met the mute limit. 799 */ 800 if (kassert_do_log && 801 (kassert_log_mute_at == 0 || 802 kassert_warnings < kassert_log_mute_at)) { 803 static struct timeval lasterr; 804 static int curerr; 805 806 if (ppsratecheck(&lasterr, &curerr, kassert_log_pps_limit)) { 807 printf("KASSERT failed: %s\n", buf); 808 kdb_backtrace(); 809 } 810 } 811 #ifdef KDB 812 if (kassert_do_kdb) { 813 kdb_enter(KDB_WHY_KASSERT, buf); 814 } 815 #endif 816 atomic_add_int(&kassert_warnings, 1); 817 } 818 #endif /* KASSERT_PANIC_OPTIONAL */ 819 #endif 820 821 /* 822 * Panic is called on unresolvable fatal errors. It prints "panic: mesg", 823 * and then reboots. If we are called twice, then we avoid trying to sync 824 * the disks as this often leads to recursive panics. 825 */ 826 void 827 panic(const char *fmt, ...) 828 { 829 va_list ap; 830 831 va_start(ap, fmt); 832 vpanic(fmt, ap); 833 } 834 835 void 836 vpanic(const char *fmt, va_list ap) 837 { 838 #ifdef SMP 839 cpuset_t other_cpus; 840 #endif 841 struct thread *td = curthread; 842 int bootopt, newpanic; 843 static char buf[256]; 844 845 spinlock_enter(); 846 847 #ifdef SMP 848 /* 849 * stop_cpus_hard(other_cpus) should prevent multiple CPUs from 850 * concurrently entering panic. Only the winner will proceed 851 * further. 852 */ 853 if (panicstr == NULL && !kdb_active) { 854 other_cpus = all_cpus; 855 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 856 stop_cpus_hard(other_cpus); 857 } 858 #endif 859 860 /* 861 * Ensure that the scheduler is stopped while panicking, even if panic 862 * has been entered from kdb. 863 */ 864 td->td_stopsched = 1; 865 866 bootopt = RB_AUTOBOOT; 867 newpanic = 0; 868 if (panicstr) 869 bootopt |= RB_NOSYNC; 870 else { 871 bootopt |= RB_DUMP; 872 panicstr = fmt; 873 newpanic = 1; 874 } 875 876 if (newpanic) { 877 (void)vsnprintf(buf, sizeof(buf), fmt, ap); 878 panicstr = buf; 879 cngrab(); 880 printf("panic: %s\n", buf); 881 } else { 882 printf("panic: "); 883 vprintf(fmt, ap); 884 printf("\n"); 885 } 886 #ifdef SMP 887 printf("cpuid = %d\n", PCPU_GET(cpuid)); 888 #endif 889 printf("time = %jd\n", (intmax_t )time_second); 890 #ifdef KDB 891 if ((newpanic || trace_all_panics) && trace_on_panic) 892 kdb_backtrace(); 893 if (debugger_on_panic) 894 kdb_enter(KDB_WHY_PANIC, "panic"); 895 #endif 896 /*thread_lock(td); */ 897 td->td_flags |= TDF_INPANIC; 898 /* thread_unlock(td); */ 899 if (!sync_on_panic) 900 bootopt |= RB_NOSYNC; 901 if (poweroff_on_panic) 902 bootopt |= RB_POWEROFF; 903 if (powercycle_on_panic) 904 bootopt |= RB_POWERCYCLE; 905 kern_reboot(bootopt); 906 } 907 908 /* 909 * Support for poweroff delay. 910 * 911 * Please note that setting this delay too short might power off your machine 912 * before the write cache on your hard disk has been flushed, leading to 913 * soft-updates inconsistencies. 914 */ 915 #ifndef POWEROFF_DELAY 916 # define POWEROFF_DELAY 5000 917 #endif 918 static int poweroff_delay = POWEROFF_DELAY; 919 920 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, 921 &poweroff_delay, 0, "Delay before poweroff to write disk caches (msec)"); 922 923 static void 924 poweroff_wait(void *junk, int howto) 925 { 926 927 if ((howto & (RB_POWEROFF | RB_POWERCYCLE)) == 0 || poweroff_delay <= 0) 928 return; 929 DELAY(poweroff_delay * 1000); 930 } 931 932 /* 933 * Some system processes (e.g. syncer) need to be stopped at appropriate 934 * points in their main loops prior to a system shutdown, so that they 935 * won't interfere with the shutdown process (e.g. by holding a disk buf 936 * to cause sync to fail). For each of these system processes, register 937 * shutdown_kproc() as a handler for one of shutdown events. 938 */ 939 static int kproc_shutdown_wait = 60; 940 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, 941 &kproc_shutdown_wait, 0, "Max wait time (sec) to stop for each process"); 942 943 void 944 kproc_shutdown(void *arg, int howto) 945 { 946 struct proc *p; 947 int error; 948 949 if (panicstr) 950 return; 951 952 p = (struct proc *)arg; 953 printf("Waiting (max %d seconds) for system process `%s' to stop... ", 954 kproc_shutdown_wait, p->p_comm); 955 error = kproc_suspend(p, kproc_shutdown_wait * hz); 956 957 if (error == EWOULDBLOCK) 958 printf("timed out\n"); 959 else 960 printf("done\n"); 961 } 962 963 void 964 kthread_shutdown(void *arg, int howto) 965 { 966 struct thread *td; 967 int error; 968 969 if (panicstr) 970 return; 971 972 td = (struct thread *)arg; 973 printf("Waiting (max %d seconds) for system thread `%s' to stop... ", 974 kproc_shutdown_wait, td->td_name); 975 error = kthread_suspend(td, kproc_shutdown_wait * hz); 976 977 if (error == EWOULDBLOCK) 978 printf("timed out\n"); 979 else 980 printf("done\n"); 981 } 982 983 static int 984 dumpdevname_sysctl_handler(SYSCTL_HANDLER_ARGS) 985 { 986 char buf[256]; 987 struct dumperinfo *di; 988 struct sbuf sb; 989 int error; 990 991 error = sysctl_wire_old_buffer(req, 0); 992 if (error != 0) 993 return (error); 994 995 sbuf_new_for_sysctl(&sb, buf, sizeof(buf), req); 996 997 mtx_lock(&dumpconf_list_lk); 998 TAILQ_FOREACH(di, &dumper_configs, di_next) { 999 if (di != TAILQ_FIRST(&dumper_configs)) 1000 sbuf_putc(&sb, ','); 1001 sbuf_cat(&sb, di->di_devname); 1002 } 1003 mtx_unlock(&dumpconf_list_lk); 1004 1005 error = sbuf_finish(&sb); 1006 sbuf_delete(&sb); 1007 return (error); 1008 } 1009 SYSCTL_PROC(_kern_shutdown, OID_AUTO, dumpdevname, CTLTYPE_STRING | CTLFLAG_RD, 1010 &dumper_configs, 0, dumpdevname_sysctl_handler, "A", 1011 "Device(s) for kernel dumps"); 1012 1013 static int _dump_append(struct dumperinfo *di, void *virtual, 1014 vm_offset_t physical, size_t length); 1015 1016 #ifdef EKCD 1017 static struct kerneldumpcrypto * 1018 kerneldumpcrypto_create(size_t blocksize, uint8_t encryption, 1019 const uint8_t *key, uint32_t encryptedkeysize, const uint8_t *encryptedkey) 1020 { 1021 struct kerneldumpcrypto *kdc; 1022 struct kerneldumpkey *kdk; 1023 uint32_t dumpkeysize; 1024 1025 dumpkeysize = roundup2(sizeof(*kdk) + encryptedkeysize, blocksize); 1026 kdc = malloc(sizeof(*kdc) + dumpkeysize, M_EKCD, M_WAITOK | M_ZERO); 1027 1028 arc4rand(kdc->kdc_iv, sizeof(kdc->kdc_iv), 0); 1029 1030 kdc->kdc_encryption = encryption; 1031 switch (kdc->kdc_encryption) { 1032 case KERNELDUMP_ENC_AES_256_CBC: 1033 if (rijndael_makeKey(&kdc->kdc_ki, DIR_ENCRYPT, 256, key) <= 0) 1034 goto failed; 1035 break; 1036 case KERNELDUMP_ENC_CHACHA20: 1037 chacha_keysetup(&kdc->kdc_chacha, key, 256); 1038 break; 1039 default: 1040 goto failed; 1041 } 1042 1043 kdc->kdc_dumpkeysize = dumpkeysize; 1044 kdk = kdc->kdc_dumpkey; 1045 kdk->kdk_encryption = kdc->kdc_encryption; 1046 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1047 kdk->kdk_encryptedkeysize = htod32(encryptedkeysize); 1048 memcpy(kdk->kdk_encryptedkey, encryptedkey, encryptedkeysize); 1049 1050 return (kdc); 1051 failed: 1052 explicit_bzero(kdc, sizeof(*kdc) + dumpkeysize); 1053 free(kdc, M_EKCD); 1054 return (NULL); 1055 } 1056 1057 static int 1058 kerneldumpcrypto_init(struct kerneldumpcrypto *kdc) 1059 { 1060 uint8_t hash[SHA256_DIGEST_LENGTH]; 1061 SHA256_CTX ctx; 1062 struct kerneldumpkey *kdk; 1063 int error; 1064 1065 error = 0; 1066 1067 if (kdc == NULL) 1068 return (0); 1069 1070 /* 1071 * When a user enters ddb it can write a crash dump multiple times. 1072 * Each time it should be encrypted using a different IV. 1073 */ 1074 SHA256_Init(&ctx); 1075 SHA256_Update(&ctx, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1076 SHA256_Final(hash, &ctx); 1077 bcopy(hash, kdc->kdc_iv, sizeof(kdc->kdc_iv)); 1078 1079 switch (kdc->kdc_encryption) { 1080 case KERNELDUMP_ENC_AES_256_CBC: 1081 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1082 kdc->kdc_iv) <= 0) { 1083 error = EINVAL; 1084 goto out; 1085 } 1086 break; 1087 case KERNELDUMP_ENC_CHACHA20: 1088 chacha_ivsetup(&kdc->kdc_chacha, kdc->kdc_iv, NULL); 1089 break; 1090 default: 1091 error = EINVAL; 1092 goto out; 1093 } 1094 1095 kdk = kdc->kdc_dumpkey; 1096 memcpy(kdk->kdk_iv, kdc->kdc_iv, sizeof(kdk->kdk_iv)); 1097 out: 1098 explicit_bzero(hash, sizeof(hash)); 1099 return (error); 1100 } 1101 1102 static uint32_t 1103 kerneldumpcrypto_dumpkeysize(const struct kerneldumpcrypto *kdc) 1104 { 1105 1106 if (kdc == NULL) 1107 return (0); 1108 return (kdc->kdc_dumpkeysize); 1109 } 1110 #endif /* EKCD */ 1111 1112 static struct kerneldumpcomp * 1113 kerneldumpcomp_create(struct dumperinfo *di, uint8_t compression) 1114 { 1115 struct kerneldumpcomp *kdcomp; 1116 int format; 1117 1118 switch (compression) { 1119 case KERNELDUMP_COMP_GZIP: 1120 format = COMPRESS_GZIP; 1121 break; 1122 case KERNELDUMP_COMP_ZSTD: 1123 format = COMPRESS_ZSTD; 1124 break; 1125 default: 1126 return (NULL); 1127 } 1128 1129 kdcomp = malloc(sizeof(*kdcomp), M_DUMPER, M_WAITOK | M_ZERO); 1130 kdcomp->kdc_format = compression; 1131 kdcomp->kdc_stream = compressor_init(kerneldumpcomp_write_cb, 1132 format, di->maxiosize, kerneldump_gzlevel, di); 1133 if (kdcomp->kdc_stream == NULL) { 1134 free(kdcomp, M_DUMPER); 1135 return (NULL); 1136 } 1137 kdcomp->kdc_buf = malloc(di->maxiosize, M_DUMPER, M_WAITOK | M_NODUMP); 1138 return (kdcomp); 1139 } 1140 1141 static void 1142 kerneldumpcomp_destroy(struct dumperinfo *di) 1143 { 1144 struct kerneldumpcomp *kdcomp; 1145 1146 kdcomp = di->kdcomp; 1147 if (kdcomp == NULL) 1148 return; 1149 compressor_fini(kdcomp->kdc_stream); 1150 explicit_bzero(kdcomp->kdc_buf, di->maxiosize); 1151 free(kdcomp->kdc_buf, M_DUMPER); 1152 free(kdcomp, M_DUMPER); 1153 } 1154 1155 /* 1156 * Must not be present on global list. 1157 */ 1158 static void 1159 free_single_dumper(struct dumperinfo *di) 1160 { 1161 1162 if (di == NULL) 1163 return; 1164 1165 if (di->blockbuf != NULL) { 1166 explicit_bzero(di->blockbuf, di->blocksize); 1167 free(di->blockbuf, M_DUMPER); 1168 } 1169 1170 kerneldumpcomp_destroy(di); 1171 1172 #ifdef EKCD 1173 if (di->kdcrypto != NULL) { 1174 explicit_bzero(di->kdcrypto, sizeof(*di->kdcrypto) + 1175 di->kdcrypto->kdc_dumpkeysize); 1176 free(di->kdcrypto, M_EKCD); 1177 } 1178 #endif 1179 1180 explicit_bzero(di, sizeof(*di)); 1181 free(di, M_DUMPER); 1182 } 1183 1184 /* Registration of dumpers */ 1185 int 1186 dumper_insert(const struct dumperinfo *di_template, const char *devname, 1187 const struct diocskerneldump_arg *kda) 1188 { 1189 struct dumperinfo *newdi, *listdi; 1190 bool inserted; 1191 uint8_t index; 1192 int error; 1193 1194 index = kda->kda_index; 1195 MPASS(index != KDA_REMOVE && index != KDA_REMOVE_DEV && 1196 index != KDA_REMOVE_ALL); 1197 1198 error = priv_check(curthread, PRIV_SETDUMPER); 1199 if (error != 0) 1200 return (error); 1201 1202 newdi = malloc(sizeof(*newdi) + strlen(devname) + 1, M_DUMPER, M_WAITOK 1203 | M_ZERO); 1204 memcpy(newdi, di_template, sizeof(*newdi)); 1205 newdi->blockbuf = NULL; 1206 newdi->kdcrypto = NULL; 1207 newdi->kdcomp = NULL; 1208 strcpy(newdi->di_devname, devname); 1209 1210 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) { 1211 #ifdef EKCD 1212 newdi->kdcrypto = kerneldumpcrypto_create(di_template->blocksize, 1213 kda->kda_encryption, kda->kda_key, 1214 kda->kda_encryptedkeysize, kda->kda_encryptedkey); 1215 if (newdi->kdcrypto == NULL) { 1216 error = EINVAL; 1217 goto cleanup; 1218 } 1219 #else 1220 error = EOPNOTSUPP; 1221 goto cleanup; 1222 #endif 1223 } 1224 if (kda->kda_compression != KERNELDUMP_COMP_NONE) { 1225 /* 1226 * We can't support simultaneous unpadded block cipher 1227 * encryption and compression because there is no guarantee the 1228 * length of the compressed result is exactly a multiple of the 1229 * cipher block size. 1230 */ 1231 if (kda->kda_encryption == KERNELDUMP_ENC_AES_256_CBC) { 1232 error = EOPNOTSUPP; 1233 goto cleanup; 1234 } 1235 newdi->kdcomp = kerneldumpcomp_create(newdi, 1236 kda->kda_compression); 1237 if (newdi->kdcomp == NULL) { 1238 error = EINVAL; 1239 goto cleanup; 1240 } 1241 } 1242 1243 newdi->blockbuf = malloc(newdi->blocksize, M_DUMPER, M_WAITOK | M_ZERO); 1244 1245 /* Add the new configuration to the queue */ 1246 mtx_lock(&dumpconf_list_lk); 1247 inserted = false; 1248 TAILQ_FOREACH(listdi, &dumper_configs, di_next) { 1249 if (index == 0) { 1250 TAILQ_INSERT_BEFORE(listdi, newdi, di_next); 1251 inserted = true; 1252 break; 1253 } 1254 index--; 1255 } 1256 if (!inserted) 1257 TAILQ_INSERT_TAIL(&dumper_configs, newdi, di_next); 1258 mtx_unlock(&dumpconf_list_lk); 1259 1260 return (0); 1261 1262 cleanup: 1263 free_single_dumper(newdi); 1264 return (error); 1265 } 1266 1267 static bool 1268 dumper_config_match(const struct dumperinfo *di, const char *devname, 1269 const struct diocskerneldump_arg *kda) 1270 { 1271 if (kda->kda_index == KDA_REMOVE_ALL) 1272 return (true); 1273 1274 if (strcmp(di->di_devname, devname) != 0) 1275 return (false); 1276 1277 /* 1278 * Allow wildcard removal of configs matching a device on g_dev_orphan. 1279 */ 1280 if (kda->kda_index == KDA_REMOVE_DEV) 1281 return (true); 1282 1283 if (di->kdcomp != NULL) { 1284 if (di->kdcomp->kdc_format != kda->kda_compression) 1285 return (false); 1286 } else if (kda->kda_compression != KERNELDUMP_COMP_NONE) 1287 return (false); 1288 #ifdef EKCD 1289 if (di->kdcrypto != NULL) { 1290 if (di->kdcrypto->kdc_encryption != kda->kda_encryption) 1291 return (false); 1292 /* 1293 * Do we care to verify keys match to delete? It seems weird 1294 * to expect multiple fallback dump configurations on the same 1295 * device that only differ in crypto key. 1296 */ 1297 } else 1298 #endif 1299 if (kda->kda_encryption != KERNELDUMP_ENC_NONE) 1300 return (false); 1301 1302 return (true); 1303 } 1304 1305 int 1306 dumper_remove(const char *devname, const struct diocskerneldump_arg *kda) 1307 { 1308 struct dumperinfo *di, *sdi; 1309 bool found; 1310 int error; 1311 1312 error = priv_check(curthread, PRIV_SETDUMPER); 1313 if (error != 0) 1314 return (error); 1315 1316 /* 1317 * Try to find a matching configuration, and kill it. 1318 * 1319 * NULL 'kda' indicates remove any configuration matching 'devname', 1320 * which may remove multiple configurations in atypical configurations. 1321 */ 1322 found = false; 1323 mtx_lock(&dumpconf_list_lk); 1324 TAILQ_FOREACH_SAFE(di, &dumper_configs, di_next, sdi) { 1325 if (dumper_config_match(di, devname, kda)) { 1326 found = true; 1327 TAILQ_REMOVE(&dumper_configs, di, di_next); 1328 free_single_dumper(di); 1329 } 1330 } 1331 mtx_unlock(&dumpconf_list_lk); 1332 1333 /* Only produce ENOENT if a more targeted match didn't match. */ 1334 if (!found && kda->kda_index == KDA_REMOVE) 1335 return (ENOENT); 1336 return (0); 1337 } 1338 1339 static int 1340 dump_check_bounds(struct dumperinfo *di, off_t offset, size_t length) 1341 { 1342 1343 if (di->mediasize > 0 && length != 0 && (offset < di->mediaoffset || 1344 offset - di->mediaoffset + length > di->mediasize)) { 1345 if (di->kdcomp != NULL && offset >= di->mediaoffset) { 1346 printf( 1347 "Compressed dump failed to fit in device boundaries.\n"); 1348 return (E2BIG); 1349 } 1350 1351 printf("Attempt to write outside dump device boundaries.\n" 1352 "offset(%jd), mediaoffset(%jd), length(%ju), mediasize(%jd).\n", 1353 (intmax_t)offset, (intmax_t)di->mediaoffset, 1354 (uintmax_t)length, (intmax_t)di->mediasize); 1355 return (ENOSPC); 1356 } 1357 if (length % di->blocksize != 0) { 1358 printf("Attempt to write partial block of length %ju.\n", 1359 (uintmax_t)length); 1360 return (EINVAL); 1361 } 1362 if (offset % di->blocksize != 0) { 1363 printf("Attempt to write at unaligned offset %jd.\n", 1364 (intmax_t)offset); 1365 return (EINVAL); 1366 } 1367 1368 return (0); 1369 } 1370 1371 #ifdef EKCD 1372 static int 1373 dump_encrypt(struct kerneldumpcrypto *kdc, uint8_t *buf, size_t size) 1374 { 1375 1376 switch (kdc->kdc_encryption) { 1377 case KERNELDUMP_ENC_AES_256_CBC: 1378 if (rijndael_blockEncrypt(&kdc->kdc_ci, &kdc->kdc_ki, buf, 1379 8 * size, buf) <= 0) { 1380 return (EIO); 1381 } 1382 if (rijndael_cipherInit(&kdc->kdc_ci, MODE_CBC, 1383 buf + size - 16 /* IV size for AES-256-CBC */) <= 0) { 1384 return (EIO); 1385 } 1386 break; 1387 case KERNELDUMP_ENC_CHACHA20: 1388 chacha_encrypt_bytes(&kdc->kdc_chacha, buf, buf, size); 1389 break; 1390 default: 1391 return (EINVAL); 1392 } 1393 1394 return (0); 1395 } 1396 1397 /* Encrypt data and call dumper. */ 1398 static int 1399 dump_encrypted_write(struct dumperinfo *di, void *virtual, 1400 vm_offset_t physical, off_t offset, size_t length) 1401 { 1402 static uint8_t buf[KERNELDUMP_BUFFER_SIZE]; 1403 struct kerneldumpcrypto *kdc; 1404 int error; 1405 size_t nbytes; 1406 1407 kdc = di->kdcrypto; 1408 1409 while (length > 0) { 1410 nbytes = MIN(length, sizeof(buf)); 1411 bcopy(virtual, buf, nbytes); 1412 1413 if (dump_encrypt(kdc, buf, nbytes) != 0) 1414 return (EIO); 1415 1416 error = dump_write(di, buf, physical, offset, nbytes); 1417 if (error != 0) 1418 return (error); 1419 1420 offset += nbytes; 1421 virtual = (void *)((uint8_t *)virtual + nbytes); 1422 length -= nbytes; 1423 } 1424 1425 return (0); 1426 } 1427 #endif /* EKCD */ 1428 1429 static int 1430 kerneldumpcomp_write_cb(void *base, size_t length, off_t offset, void *arg) 1431 { 1432 struct dumperinfo *di; 1433 size_t resid, rlength; 1434 int error; 1435 1436 di = arg; 1437 1438 if (length % di->blocksize != 0) { 1439 /* 1440 * This must be the final write after flushing the compression 1441 * stream. Write as many full blocks as possible and stash the 1442 * residual data in the dumper's block buffer. It will be 1443 * padded and written in dump_finish(). 1444 */ 1445 rlength = rounddown(length, di->blocksize); 1446 if (rlength != 0) { 1447 error = _dump_append(di, base, 0, rlength); 1448 if (error != 0) 1449 return (error); 1450 } 1451 resid = length - rlength; 1452 memmove(di->blockbuf, (uint8_t *)base + rlength, resid); 1453 di->kdcomp->kdc_resid = resid; 1454 return (EAGAIN); 1455 } 1456 return (_dump_append(di, base, 0, length)); 1457 } 1458 1459 /* 1460 * Write kernel dump headers at the beginning and end of the dump extent. 1461 * Write the kernel dump encryption key after the leading header if we were 1462 * configured to do so. 1463 */ 1464 static int 1465 dump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh) 1466 { 1467 #ifdef EKCD 1468 struct kerneldumpcrypto *kdc; 1469 #endif 1470 void *buf, *key; 1471 size_t hdrsz; 1472 uint64_t extent; 1473 uint32_t keysize; 1474 int error; 1475 1476 hdrsz = sizeof(*kdh); 1477 if (hdrsz > di->blocksize) 1478 return (ENOMEM); 1479 1480 #ifdef EKCD 1481 kdc = di->kdcrypto; 1482 key = kdc->kdc_dumpkey; 1483 keysize = kerneldumpcrypto_dumpkeysize(kdc); 1484 #else 1485 key = NULL; 1486 keysize = 0; 1487 #endif 1488 1489 /* 1490 * If the dump device has special handling for headers, let it take care 1491 * of writing them out. 1492 */ 1493 if (di->dumper_hdr != NULL) 1494 return (di->dumper_hdr(di, kdh, key, keysize)); 1495 1496 if (hdrsz == di->blocksize) 1497 buf = kdh; 1498 else { 1499 buf = di->blockbuf; 1500 memset(buf, 0, di->blocksize); 1501 memcpy(buf, kdh, hdrsz); 1502 } 1503 1504 extent = dtoh64(kdh->dumpextent); 1505 #ifdef EKCD 1506 if (kdc != NULL) { 1507 error = dump_write(di, kdc->kdc_dumpkey, 0, 1508 di->mediaoffset + di->mediasize - di->blocksize - extent - 1509 keysize, keysize); 1510 if (error != 0) 1511 return (error); 1512 } 1513 #endif 1514 1515 error = dump_write(di, buf, 0, 1516 di->mediaoffset + di->mediasize - 2 * di->blocksize - extent - 1517 keysize, di->blocksize); 1518 if (error == 0) 1519 error = dump_write(di, buf, 0, di->mediaoffset + di->mediasize - 1520 di->blocksize, di->blocksize); 1521 return (error); 1522 } 1523 1524 /* 1525 * Don't touch the first SIZEOF_METADATA bytes on the dump device. This is to 1526 * protect us from metadata and metadata from us. 1527 */ 1528 #define SIZEOF_METADATA (64 * 1024) 1529 1530 /* 1531 * Do some preliminary setup for a kernel dump: initialize state for encryption, 1532 * if requested, and make sure that we have enough space on the dump device. 1533 * 1534 * We set things up so that the dump ends before the last sector of the dump 1535 * device, at which the trailing header is written. 1536 * 1537 * +-----------+------+-----+----------------------------+------+ 1538 * | | lhdr | key | ... kernel dump ... | thdr | 1539 * +-----------+------+-----+----------------------------+------+ 1540 * 1 blk opt <------- dump extent --------> 1 blk 1541 * 1542 * Dumps written using dump_append() start at the beginning of the extent. 1543 * Uncompressed dumps will use the entire extent, but compressed dumps typically 1544 * will not. The true length of the dump is recorded in the leading and trailing 1545 * headers once the dump has been completed. 1546 * 1547 * The dump device may provide a callback, in which case it will initialize 1548 * dumpoff and take care of laying out the headers. 1549 */ 1550 int 1551 dump_start(struct dumperinfo *di, struct kerneldumpheader *kdh) 1552 { 1553 uint64_t dumpextent, span; 1554 uint32_t keysize; 1555 int error; 1556 1557 #ifdef EKCD 1558 error = kerneldumpcrypto_init(di->kdcrypto); 1559 if (error != 0) 1560 return (error); 1561 keysize = kerneldumpcrypto_dumpkeysize(di->kdcrypto); 1562 #else 1563 error = 0; 1564 keysize = 0; 1565 #endif 1566 1567 if (di->dumper_start != NULL) { 1568 error = di->dumper_start(di); 1569 } else { 1570 dumpextent = dtoh64(kdh->dumpextent); 1571 span = SIZEOF_METADATA + dumpextent + 2 * di->blocksize + 1572 keysize; 1573 if (di->mediasize < span) { 1574 if (di->kdcomp == NULL) 1575 return (E2BIG); 1576 1577 /* 1578 * We don't yet know how much space the compressed dump 1579 * will occupy, so try to use the whole swap partition 1580 * (minus the first 64KB) in the hope that the 1581 * compressed dump will fit. If that doesn't turn out to 1582 * be enough, the bounds checking in dump_write() 1583 * will catch us and cause the dump to fail. 1584 */ 1585 dumpextent = di->mediasize - span + dumpextent; 1586 kdh->dumpextent = htod64(dumpextent); 1587 } 1588 1589 /* 1590 * The offset at which to begin writing the dump. 1591 */ 1592 di->dumpoff = di->mediaoffset + di->mediasize - di->blocksize - 1593 dumpextent; 1594 } 1595 di->origdumpoff = di->dumpoff; 1596 return (error); 1597 } 1598 1599 static int 1600 _dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1601 size_t length) 1602 { 1603 int error; 1604 1605 #ifdef EKCD 1606 if (di->kdcrypto != NULL) 1607 error = dump_encrypted_write(di, virtual, physical, di->dumpoff, 1608 length); 1609 else 1610 #endif 1611 error = dump_write(di, virtual, physical, di->dumpoff, length); 1612 if (error == 0) 1613 di->dumpoff += length; 1614 return (error); 1615 } 1616 1617 /* 1618 * Write to the dump device starting at dumpoff. When compression is enabled, 1619 * writes to the device will be performed using a callback that gets invoked 1620 * when the compression stream's output buffer is full. 1621 */ 1622 int 1623 dump_append(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1624 size_t length) 1625 { 1626 void *buf; 1627 1628 if (di->kdcomp != NULL) { 1629 /* Bounce through a buffer to avoid CRC errors. */ 1630 if (length > di->maxiosize) 1631 return (EINVAL); 1632 buf = di->kdcomp->kdc_buf; 1633 memmove(buf, virtual, length); 1634 return (compressor_write(di->kdcomp->kdc_stream, buf, length)); 1635 } 1636 return (_dump_append(di, virtual, physical, length)); 1637 } 1638 1639 /* 1640 * Write to the dump device at the specified offset. 1641 */ 1642 int 1643 dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, 1644 off_t offset, size_t length) 1645 { 1646 int error; 1647 1648 error = dump_check_bounds(di, offset, length); 1649 if (error != 0) 1650 return (error); 1651 return (di->dumper(di->priv, virtual, physical, offset, length)); 1652 } 1653 1654 /* 1655 * Perform kernel dump finalization: flush the compression stream, if necessary, 1656 * write the leading and trailing kernel dump headers now that we know the true 1657 * length of the dump, and optionally write the encryption key following the 1658 * leading header. 1659 */ 1660 int 1661 dump_finish(struct dumperinfo *di, struct kerneldumpheader *kdh) 1662 { 1663 int error; 1664 1665 if (di->kdcomp != NULL) { 1666 error = compressor_flush(di->kdcomp->kdc_stream); 1667 if (error == EAGAIN) { 1668 /* We have residual data in di->blockbuf. */ 1669 error = dump_write(di, di->blockbuf, 0, di->dumpoff, 1670 di->blocksize); 1671 di->dumpoff += di->kdcomp->kdc_resid; 1672 di->kdcomp->kdc_resid = 0; 1673 } 1674 if (error != 0) 1675 return (error); 1676 1677 /* 1678 * We now know the size of the compressed dump, so update the 1679 * header accordingly and recompute parity. 1680 */ 1681 kdh->dumplength = htod64(di->dumpoff - di->origdumpoff); 1682 kdh->parity = 0; 1683 kdh->parity = kerneldump_parity(kdh); 1684 1685 compressor_reset(di->kdcomp->kdc_stream); 1686 } 1687 1688 error = dump_write_headers(di, kdh); 1689 if (error != 0) 1690 return (error); 1691 1692 (void)dump_write(di, NULL, 0, 0, 0); 1693 return (0); 1694 } 1695 1696 void 1697 dump_init_header(const struct dumperinfo *di, struct kerneldumpheader *kdh, 1698 char *magic, uint32_t archver, uint64_t dumplen) 1699 { 1700 size_t dstsize; 1701 1702 bzero(kdh, sizeof(*kdh)); 1703 strlcpy(kdh->magic, magic, sizeof(kdh->magic)); 1704 strlcpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture)); 1705 kdh->version = htod32(KERNELDUMPVERSION); 1706 kdh->architectureversion = htod32(archver); 1707 kdh->dumplength = htod64(dumplen); 1708 kdh->dumpextent = kdh->dumplength; 1709 kdh->dumptime = htod64(time_second); 1710 #ifdef EKCD 1711 kdh->dumpkeysize = htod32(kerneldumpcrypto_dumpkeysize(di->kdcrypto)); 1712 #else 1713 kdh->dumpkeysize = 0; 1714 #endif 1715 kdh->blocksize = htod32(di->blocksize); 1716 strlcpy(kdh->hostname, prison0.pr_hostname, sizeof(kdh->hostname)); 1717 dstsize = sizeof(kdh->versionstring); 1718 if (strlcpy(kdh->versionstring, version, dstsize) >= dstsize) 1719 kdh->versionstring[dstsize - 2] = '\n'; 1720 if (panicstr != NULL) 1721 strlcpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring)); 1722 if (di->kdcomp != NULL) 1723 kdh->compression = di->kdcomp->kdc_format; 1724 kdh->parity = kerneldump_parity(kdh); 1725 } 1726 1727 #ifdef DDB 1728 DB_SHOW_COMMAND(panic, db_show_panic) 1729 { 1730 1731 if (panicstr == NULL) 1732 db_printf("panicstr not set\n"); 1733 else 1734 db_printf("panic: %s\n", panicstr); 1735 } 1736 #endif 1737