1 /*- 2 * Copyright (c) 2002 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/conf.h> 33 #include <sys/cons.h> 34 #include <sys/kdb.h> 35 #include <sys/kernel.h> 36 #include <sys/kerneldump.h> 37 #include <sys/malloc.h> 38 #include <sys/msgbuf.h> 39 #include <sys/proc.h> 40 #include <sys/watchdog.h> 41 42 #include <vm/vm.h> 43 #include <vm/vm_param.h> 44 #include <vm/vm_page.h> 45 #include <vm/vm_phys.h> 46 #include <vm/vm_dumpset.h> 47 #include <vm/pmap.h> 48 49 #include <machine/dump.h> 50 #include <machine/elf.h> 51 #include <machine/md_var.h> 52 #include <machine/pcb.h> 53 54 CTASSERT(sizeof(struct kerneldumpheader) == 512); 55 56 #define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE) 57 58 /* Handle buffered writes. */ 59 static size_t fragsz; 60 61 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; 62 63 #if !defined(__powerpc__) 64 void 65 dumpsys_gen_pa_init(void) 66 { 67 int n, idx; 68 69 bzero(dump_map, sizeof(dump_map)); 70 for (n = 0; n < nitems(dump_map); n++) { 71 idx = n * 2; 72 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) 73 break; 74 dump_map[n].pa_start = dump_avail[idx]; 75 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx]; 76 } 77 } 78 #endif 79 80 struct dump_pa * 81 dumpsys_gen_pa_next(struct dump_pa *mdp) 82 { 83 84 if (mdp == NULL) 85 return (&dump_map[0]); 86 87 mdp++; 88 if (mdp->pa_size == 0) 89 mdp = NULL; 90 return (mdp); 91 } 92 93 void 94 dumpsys_gen_wbinv_all(void) 95 { 96 97 } 98 99 void 100 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, 101 void *va __unused) 102 { 103 104 } 105 106 int 107 dumpsys_gen_write_aux_headers(struct dumperinfo *di) 108 { 109 110 return (0); 111 } 112 113 int 114 dumpsys_buf_seek(struct dumperinfo *di, size_t sz) 115 { 116 static uint8_t buf[DEV_BSIZE]; 117 size_t nbytes; 118 int error; 119 120 bzero(buf, sizeof(buf)); 121 122 while (sz > 0) { 123 nbytes = MIN(sz, sizeof(buf)); 124 125 error = dump_append(di, buf, nbytes); 126 if (error) 127 return (error); 128 sz -= nbytes; 129 } 130 131 return (0); 132 } 133 134 int 135 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz) 136 { 137 size_t len; 138 int error; 139 140 while (sz) { 141 len = di->blocksize - fragsz; 142 if (len > sz) 143 len = sz; 144 memcpy((char *)di->blockbuf + fragsz, ptr, len); 145 fragsz += len; 146 ptr += len; 147 sz -= len; 148 if (fragsz == di->blocksize) { 149 error = dump_append(di, di->blockbuf, di->blocksize); 150 if (error) 151 return (error); 152 fragsz = 0; 153 } 154 } 155 return (0); 156 } 157 158 int 159 dumpsys_buf_flush(struct dumperinfo *di) 160 { 161 int error; 162 163 if (fragsz == 0) 164 return (0); 165 166 error = dump_append(di, di->blockbuf, di->blocksize); 167 fragsz = 0; 168 return (error); 169 } 170 171 CTASSERT(PAGE_SHIFT < 20); 172 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT)) 173 174 int 175 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg) 176 { 177 struct dumperinfo *di = (struct dumperinfo*)arg; 178 vm_paddr_t pa; 179 void *va; 180 uint64_t pgs; 181 size_t counter, sz, chunk; 182 int c, error; 183 u_int maxdumppgs; 184 185 error = 0; /* catch case in which chunk size is 0 */ 186 counter = 0; /* Update twiddle every 16MB */ 187 va = NULL; 188 pgs = mdp->pa_size / PAGE_SIZE; 189 pa = mdp->pa_start; 190 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS); 191 if (maxdumppgs == 0) /* seatbelt */ 192 maxdumppgs = 1; 193 194 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs), 195 (uintmax_t)pgs); 196 197 dumpsys_wbinv_all(); 198 while (pgs) { 199 chunk = pgs; 200 if (chunk > maxdumppgs) 201 chunk = maxdumppgs; 202 sz = chunk << PAGE_SHIFT; 203 counter += sz; 204 if (counter >> 24) { 205 printf(" %ju", (uintmax_t)PG2MB(pgs)); 206 counter &= (1 << 24) - 1; 207 } 208 209 dumpsys_map_chunk(pa, chunk, &va); 210 wdog_kern_pat(WD_LASTVAL); 211 212 error = dump_append(di, va, sz); 213 dumpsys_unmap_chunk(pa, chunk, va); 214 if (error) 215 break; 216 pgs -= chunk; 217 pa += sz; 218 219 /* Check for user abort. */ 220 c = cncheckc(); 221 if (c == 0x03) 222 return (ECANCELED); 223 if (c != -1) 224 printf(" (CTRL-C to abort) "); 225 } 226 printf(" ... %s\n", (error) ? "fail" : "ok"); 227 return (error); 228 } 229 230 int 231 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg) 232 { 233 struct dump_pa *mdp; 234 int error, seqnr; 235 236 seqnr = 0; 237 mdp = dumpsys_pa_next(NULL); 238 while (mdp != NULL) { 239 error = (*cb)(mdp, seqnr++, arg); 240 if (error) 241 return (-error); 242 mdp = dumpsys_pa_next(mdp); 243 } 244 return (seqnr); 245 } 246 247 static off_t fileofs; 248 249 static int 250 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg) 251 { 252 struct dumperinfo *di = (struct dumperinfo*)arg; 253 Elf_Phdr phdr; 254 uint64_t size; 255 int error; 256 257 size = mdp->pa_size; 258 bzero(&phdr, sizeof(phdr)); 259 phdr.p_type = PT_LOAD; 260 phdr.p_flags = PF_R; /* XXX */ 261 phdr.p_offset = fileofs; 262 #ifdef __powerpc__ 263 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L); 264 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start); 265 #else 266 phdr.p_vaddr = mdp->pa_start; 267 phdr.p_paddr = mdp->pa_start; 268 #endif 269 phdr.p_filesz = size; 270 phdr.p_memsz = size; 271 phdr.p_align = PAGE_SIZE; 272 273 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr)); 274 fileofs += phdr.p_filesz; 275 return (error); 276 } 277 278 static int 279 cb_size(struct dump_pa *mdp, int seqnr, void *arg) 280 { 281 uint64_t *sz; 282 283 sz = (uint64_t *)arg; 284 *sz += (uint64_t)mdp->pa_size; 285 return (0); 286 } 287 288 int 289 dumpsys_generic(struct dumperinfo *di) 290 { 291 static struct kerneldumpheader kdh; 292 Elf_Ehdr ehdr; 293 uint64_t dumpsize; 294 off_t hdrgap; 295 size_t hdrsz; 296 int error; 297 298 #if MINIDUMP_PAGE_TRACKING == 1 299 if (do_minidump) 300 return (minidumpsys(di, false)); 301 #endif 302 303 bzero(&ehdr, sizeof(ehdr)); 304 ehdr.e_ident[EI_MAG0] = ELFMAG0; 305 ehdr.e_ident[EI_MAG1] = ELFMAG1; 306 ehdr.e_ident[EI_MAG2] = ELFMAG2; 307 ehdr.e_ident[EI_MAG3] = ELFMAG3; 308 ehdr.e_ident[EI_CLASS] = ELF_CLASS; 309 #if BYTE_ORDER == LITTLE_ENDIAN 310 ehdr.e_ident[EI_DATA] = ELFDATA2LSB; 311 #else 312 ehdr.e_ident[EI_DATA] = ELFDATA2MSB; 313 #endif 314 ehdr.e_ident[EI_VERSION] = EV_CURRENT; 315 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ 316 ehdr.e_type = ET_CORE; 317 ehdr.e_machine = EM_VALUE; 318 ehdr.e_phoff = sizeof(ehdr); 319 ehdr.e_flags = 0; 320 ehdr.e_ehsize = sizeof(ehdr); 321 ehdr.e_phentsize = sizeof(Elf_Phdr); 322 ehdr.e_shentsize = sizeof(Elf_Shdr); 323 324 dumpsys_pa_init(); 325 326 /* Calculate dump size. */ 327 dumpsize = 0L; 328 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) + 329 DUMPSYS_NUM_AUX_HDRS; 330 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; 331 fileofs = MD_ALIGN(hdrsz); 332 dumpsize += fileofs; 333 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize); 334 335 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, 336 dumpsize); 337 338 error = dump_start(di, &kdh); 339 if (error != 0) 340 goto fail; 341 342 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, 343 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); 344 345 /* Dump ELF header */ 346 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); 347 if (error) 348 goto fail; 349 350 /* Dump program headers */ 351 error = dumpsys_foreach_chunk(cb_dumphdr, di); 352 if (error < 0) 353 goto fail; 354 error = dumpsys_write_aux_headers(di); 355 if (error < 0) 356 goto fail; 357 dumpsys_buf_flush(di); 358 359 /* 360 * All headers are written using blocked I/O, so we know the 361 * current offset is (still) block aligned. Skip the alignement 362 * in the file to have the segment contents aligned at page 363 * boundary. 364 */ 365 error = dumpsys_buf_seek(di, (size_t)hdrgap); 366 if (error) 367 goto fail; 368 369 /* Dump memory chunks. */ 370 error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); 371 if (error < 0) 372 goto fail; 373 374 error = dump_finish(di, &kdh); 375 if (error != 0) 376 goto fail; 377 378 printf("\nDump complete\n"); 379 return (0); 380 381 fail: 382 if (error < 0) 383 error = -error; 384 385 if (error == ECANCELED) 386 printf("\nDump aborted\n"); 387 else if (error == E2BIG || error == ENOSPC) 388 printf("\nDump failed. Partition too small.\n"); 389 else 390 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 391 return (error); 392 } 393 394 #if MINIDUMP_PAGE_TRACKING == 1 395 396 /* Minidump progress bar */ 397 static struct { 398 const int min_per; 399 const int max_per; 400 bool visited; 401 } progress_track[10] = { 402 { 0, 10, false}, 403 { 10, 20, false}, 404 { 20, 30, false}, 405 { 30, 40, false}, 406 { 40, 50, false}, 407 { 50, 60, false}, 408 { 60, 70, false}, 409 { 70, 80, false}, 410 { 80, 90, false}, 411 { 90, 100, false} 412 }; 413 414 static uint64_t dumpsys_pb_size; 415 static uint64_t dumpsys_pb_remaining; 416 static uint64_t dumpsys_pb_check; 417 418 /* Reset the progress bar for a dump of dumpsize. */ 419 void 420 dumpsys_pb_init(uint64_t dumpsize) 421 { 422 int i; 423 424 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize; 425 dumpsys_pb_check = 0; 426 427 for (i = 0; i < nitems(progress_track); i++) 428 progress_track[i].visited = false; 429 } 430 431 /* 432 * Update the progress according to the delta bytes that were written out. 433 * Check and print the progress percentage. 434 */ 435 void 436 dumpsys_pb_progress(size_t delta) 437 { 438 int sofar, i; 439 440 dumpsys_pb_remaining -= delta; 441 dumpsys_pb_check += delta; 442 443 /* 444 * To save time while dumping, only loop through progress_track 445 * occasionally. 446 */ 447 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0) 448 return; 449 else 450 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1; 451 452 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size); 453 for (i = 0; i < nitems(progress_track); i++) { 454 if (sofar < progress_track[i].min_per || 455 sofar > progress_track[i].max_per) 456 continue; 457 if (!progress_track[i].visited) { 458 progress_track[i].visited = true; 459 printf("..%d%%", sofar); 460 } 461 break; 462 } 463 } 464 465 int 466 minidumpsys(struct dumperinfo *di, bool livedump) 467 { 468 struct minidumpstate state; 469 struct msgbuf mb_copy; 470 char *msg_ptr; 471 size_t sz; 472 int error; 473 474 if (livedump) { 475 KASSERT(!dumping, ("live dump invoked from incorrect context")); 476 477 /* 478 * Before invoking cpu_minidumpsys() on the live system, we 479 * must snapshot some required global state: the message 480 * buffer, and the page dump bitset. They may be modified at 481 * any moment, so for the sake of the live dump it is best to 482 * have an unchanging snapshot to work with. Both are included 483 * as part of the dump and consumed by userspace tools. 484 * 485 * Other global state important to the minidump code is the 486 * dump_avail array and the kernel's page tables, but snapshots 487 * are not taken of these. For one, dump_avail[] is expected 488 * not to change after boot. Snapshotting the kernel page 489 * tables would involve an additional walk, so this is avoided 490 * too. 491 * 492 * This means live dumps are best effort, and the result may or 493 * may not be usable; there are no guarantees about the 494 * consistency of the dump's contents. Any of the following 495 * (and likely more) may affect the live dump: 496 * 497 * - Data may be modified, freed, or remapped during the 498 * course of the dump, such that the contents written out 499 * are partially or entirely unrecognizable. This means 500 * valid references may point to destroyed/mangled objects, 501 * and vice versa. 502 * 503 * - The dumped context of any threads that ran during the 504 * dump process may be unreliable. 505 * 506 * - The set of kernel page tables included in the dump likely 507 * won't correspond exactly to the copy of the dump bitset. 508 * This means some pages will be dumped without any way to 509 * locate them, and some pages may not have been dumped 510 * despite appearing as if they should. 511 */ 512 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK); 513 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr); 514 state.msgbufp = &mb_copy; 515 516 sz = BITSET_SIZE(vm_page_dump_pages); 517 state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK); 518 BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset); 519 } else { 520 KASSERT(dumping, ("minidump invoked outside of doadump()")); 521 522 /* Use the globals. */ 523 state.msgbufp = msgbufp; 524 state.dump_bitset = vm_page_dump; 525 } 526 527 error = cpu_minidumpsys(di, &state); 528 if (livedump) { 529 free(msg_ptr, M_TEMP); 530 free(state.dump_bitset, M_TEMP); 531 } 532 533 return (error); 534 } 535 #endif /* MINIDUMP_PAGE_TRACKING == 1 */ 536