1 /*- 2 * Copyright (c) 2002 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/conf.h> 31 #include <sys/cons.h> 32 #include <sys/kdb.h> 33 #include <sys/kernel.h> 34 #include <sys/kerneldump.h> 35 #include <sys/malloc.h> 36 #include <sys/msgbuf.h> 37 #include <sys/proc.h> 38 #include <sys/watchdog.h> 39 40 #include <vm/vm.h> 41 #include <vm/vm_param.h> 42 #include <vm/vm_page.h> 43 #include <vm/vm_phys.h> 44 #include <vm/vm_dumpset.h> 45 #include <vm/pmap.h> 46 47 #include <machine/dump.h> 48 #include <machine/elf.h> 49 #include <machine/md_var.h> 50 #include <machine/pcb.h> 51 52 CTASSERT(sizeof(struct kerneldumpheader) == 512); 53 54 #define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE) 55 56 /* Handle buffered writes. */ 57 static size_t fragsz; 58 59 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; 60 61 #if !defined(__powerpc__) 62 void 63 dumpsys_gen_pa_init(void) 64 { 65 int n, idx; 66 67 bzero(dump_map, sizeof(dump_map)); 68 for (n = 0; n < nitems(dump_map); n++) { 69 idx = n * 2; 70 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) 71 break; 72 dump_map[n].pa_start = dump_avail[idx]; 73 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx]; 74 } 75 } 76 #endif 77 78 struct dump_pa * 79 dumpsys_gen_pa_next(struct dump_pa *mdp) 80 { 81 82 if (mdp == NULL) 83 return (&dump_map[0]); 84 85 mdp++; 86 if (mdp->pa_size == 0) 87 mdp = NULL; 88 return (mdp); 89 } 90 91 void 92 dumpsys_gen_wbinv_all(void) 93 { 94 95 } 96 97 void 98 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, 99 void *va __unused) 100 { 101 102 } 103 104 int 105 dumpsys_gen_write_aux_headers(struct dumperinfo *di) 106 { 107 108 return (0); 109 } 110 111 int 112 dumpsys_buf_seek(struct dumperinfo *di, size_t sz) 113 { 114 static uint8_t buf[DEV_BSIZE]; 115 size_t nbytes; 116 int error; 117 118 bzero(buf, sizeof(buf)); 119 120 while (sz > 0) { 121 nbytes = MIN(sz, sizeof(buf)); 122 123 error = dump_append(di, buf, nbytes); 124 if (error) 125 return (error); 126 sz -= nbytes; 127 } 128 129 return (0); 130 } 131 132 int 133 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz) 134 { 135 size_t len; 136 int error; 137 138 while (sz) { 139 len = di->blocksize - fragsz; 140 if (len > sz) 141 len = sz; 142 memcpy((char *)di->blockbuf + fragsz, ptr, len); 143 fragsz += len; 144 ptr += len; 145 sz -= len; 146 if (fragsz == di->blocksize) { 147 error = dump_append(di, di->blockbuf, di->blocksize); 148 if (error) 149 return (error); 150 fragsz = 0; 151 } 152 } 153 return (0); 154 } 155 156 int 157 dumpsys_buf_flush(struct dumperinfo *di) 158 { 159 int error; 160 161 if (fragsz == 0) 162 return (0); 163 164 error = dump_append(di, di->blockbuf, di->blocksize); 165 fragsz = 0; 166 return (error); 167 } 168 169 CTASSERT(PAGE_SHIFT < 20); 170 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT)) 171 172 int 173 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg) 174 { 175 struct dumperinfo *di = (struct dumperinfo*)arg; 176 vm_paddr_t pa; 177 void *va; 178 uint64_t pgs; 179 size_t counter, sz, chunk; 180 int c, error; 181 u_int maxdumppgs; 182 183 error = 0; /* catch case in which chunk size is 0 */ 184 counter = 0; /* Update twiddle every 16MB */ 185 va = NULL; 186 pgs = mdp->pa_size / PAGE_SIZE; 187 pa = mdp->pa_start; 188 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS); 189 if (maxdumppgs == 0) /* seatbelt */ 190 maxdumppgs = 1; 191 192 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs), 193 (uintmax_t)pgs); 194 195 dumpsys_wbinv_all(); 196 while (pgs) { 197 chunk = pgs; 198 if (chunk > maxdumppgs) 199 chunk = maxdumppgs; 200 sz = chunk << PAGE_SHIFT; 201 counter += sz; 202 if (counter >> 24) { 203 printf(" %ju", (uintmax_t)PG2MB(pgs)); 204 counter &= (1 << 24) - 1; 205 } 206 207 dumpsys_map_chunk(pa, chunk, &va); 208 wdog_kern_pat(WD_LASTVAL); 209 210 error = dump_append(di, va, sz); 211 dumpsys_unmap_chunk(pa, chunk, va); 212 if (error) 213 break; 214 pgs -= chunk; 215 pa += sz; 216 217 /* Check for user abort. */ 218 c = cncheckc(); 219 if (c == 0x03) 220 return (ECANCELED); 221 if (c != -1) 222 printf(" (CTRL-C to abort) "); 223 } 224 printf(" ... %s\n", (error) ? "fail" : "ok"); 225 return (error); 226 } 227 228 int 229 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg) 230 { 231 struct dump_pa *mdp; 232 int error, seqnr; 233 234 seqnr = 0; 235 mdp = dumpsys_pa_next(NULL); 236 while (mdp != NULL) { 237 error = (*cb)(mdp, seqnr++, arg); 238 if (error) 239 return (-error); 240 mdp = dumpsys_pa_next(mdp); 241 } 242 return (seqnr); 243 } 244 245 static off_t fileofs; 246 247 static int 248 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg) 249 { 250 struct dumperinfo *di = (struct dumperinfo*)arg; 251 Elf_Phdr phdr; 252 uint64_t size; 253 int error; 254 255 size = mdp->pa_size; 256 bzero(&phdr, sizeof(phdr)); 257 phdr.p_type = PT_LOAD; 258 phdr.p_flags = PF_R; /* XXX */ 259 phdr.p_offset = fileofs; 260 #ifdef __powerpc__ 261 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L); 262 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start); 263 #else 264 phdr.p_vaddr = mdp->pa_start; 265 phdr.p_paddr = mdp->pa_start; 266 #endif 267 phdr.p_filesz = size; 268 phdr.p_memsz = size; 269 phdr.p_align = PAGE_SIZE; 270 271 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr)); 272 fileofs += phdr.p_filesz; 273 return (error); 274 } 275 276 static int 277 cb_size(struct dump_pa *mdp, int seqnr, void *arg) 278 { 279 uint64_t *sz; 280 281 sz = (uint64_t *)arg; 282 *sz += (uint64_t)mdp->pa_size; 283 return (0); 284 } 285 286 int 287 dumpsys_generic(struct dumperinfo *di) 288 { 289 static struct kerneldumpheader kdh; 290 Elf_Ehdr ehdr; 291 uint64_t dumpsize; 292 off_t hdrgap; 293 size_t hdrsz; 294 int error; 295 296 #if MINIDUMP_PAGE_TRACKING == 1 297 if (do_minidump) 298 return (minidumpsys(di, false)); 299 #endif 300 301 bzero(&ehdr, sizeof(ehdr)); 302 ehdr.e_ident[EI_MAG0] = ELFMAG0; 303 ehdr.e_ident[EI_MAG1] = ELFMAG1; 304 ehdr.e_ident[EI_MAG2] = ELFMAG2; 305 ehdr.e_ident[EI_MAG3] = ELFMAG3; 306 ehdr.e_ident[EI_CLASS] = ELF_CLASS; 307 #if BYTE_ORDER == LITTLE_ENDIAN 308 ehdr.e_ident[EI_DATA] = ELFDATA2LSB; 309 #else 310 ehdr.e_ident[EI_DATA] = ELFDATA2MSB; 311 #endif 312 ehdr.e_ident[EI_VERSION] = EV_CURRENT; 313 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ 314 ehdr.e_type = ET_CORE; 315 ehdr.e_machine = EM_VALUE; 316 ehdr.e_phoff = sizeof(ehdr); 317 ehdr.e_flags = 0; 318 ehdr.e_ehsize = sizeof(ehdr); 319 ehdr.e_phentsize = sizeof(Elf_Phdr); 320 ehdr.e_shentsize = sizeof(Elf_Shdr); 321 322 dumpsys_pa_init(); 323 324 /* Calculate dump size. */ 325 dumpsize = 0L; 326 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) + 327 DUMPSYS_NUM_AUX_HDRS; 328 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; 329 fileofs = MD_ALIGN(hdrsz); 330 dumpsize += fileofs; 331 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize); 332 333 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, 334 dumpsize); 335 336 error = dump_start(di, &kdh); 337 if (error != 0) 338 goto fail; 339 340 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, 341 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); 342 343 /* Dump ELF header */ 344 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); 345 if (error) 346 goto fail; 347 348 /* Dump program headers */ 349 error = dumpsys_foreach_chunk(cb_dumphdr, di); 350 if (error < 0) 351 goto fail; 352 error = dumpsys_write_aux_headers(di); 353 if (error < 0) 354 goto fail; 355 dumpsys_buf_flush(di); 356 357 /* 358 * All headers are written using blocked I/O, so we know the 359 * current offset is (still) block aligned. Skip the alignement 360 * in the file to have the segment contents aligned at page 361 * boundary. 362 */ 363 error = dumpsys_buf_seek(di, (size_t)hdrgap); 364 if (error) 365 goto fail; 366 367 /* Dump memory chunks. */ 368 error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); 369 if (error < 0) 370 goto fail; 371 372 error = dump_finish(di, &kdh); 373 if (error != 0) 374 goto fail; 375 376 printf("\nDump complete\n"); 377 return (0); 378 379 fail: 380 if (error < 0) 381 error = -error; 382 383 if (error == ECANCELED) 384 printf("\nDump aborted\n"); 385 else if (error == E2BIG || error == ENOSPC) 386 printf("\nDump failed. Partition too small.\n"); 387 else 388 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 389 return (error); 390 } 391 392 #if MINIDUMP_PAGE_TRACKING == 1 393 394 /* Minidump progress bar */ 395 static struct { 396 const int min_per; 397 const int max_per; 398 bool visited; 399 } progress_track[10] = { 400 { 0, 10, false}, 401 { 10, 20, false}, 402 { 20, 30, false}, 403 { 30, 40, false}, 404 { 40, 50, false}, 405 { 50, 60, false}, 406 { 60, 70, false}, 407 { 70, 80, false}, 408 { 80, 90, false}, 409 { 90, 100, false} 410 }; 411 412 static uint64_t dumpsys_pb_size; 413 static uint64_t dumpsys_pb_remaining; 414 static uint64_t dumpsys_pb_check; 415 416 /* Reset the progress bar for a dump of dumpsize. */ 417 void 418 dumpsys_pb_init(uint64_t dumpsize) 419 { 420 int i; 421 422 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize; 423 dumpsys_pb_check = 0; 424 425 for (i = 0; i < nitems(progress_track); i++) 426 progress_track[i].visited = false; 427 } 428 429 /* 430 * Update the progress according to the delta bytes that were written out. 431 * Check and print the progress percentage. 432 */ 433 void 434 dumpsys_pb_progress(size_t delta) 435 { 436 int sofar, i; 437 438 dumpsys_pb_remaining -= delta; 439 dumpsys_pb_check += delta; 440 441 /* 442 * To save time while dumping, only loop through progress_track 443 * occasionally. 444 */ 445 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0) 446 return; 447 else 448 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1; 449 450 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size); 451 for (i = 0; i < nitems(progress_track); i++) { 452 if (sofar < progress_track[i].min_per || 453 sofar > progress_track[i].max_per) 454 continue; 455 if (!progress_track[i].visited) { 456 progress_track[i].visited = true; 457 printf("..%d%%", sofar); 458 } 459 break; 460 } 461 } 462 463 int 464 minidumpsys(struct dumperinfo *di, bool livedump) 465 { 466 struct minidumpstate state; 467 struct msgbuf mb_copy; 468 char *msg_ptr; 469 size_t sz; 470 int error; 471 472 if (livedump) { 473 KASSERT(!dumping, ("live dump invoked from incorrect context")); 474 475 /* 476 * Before invoking cpu_minidumpsys() on the live system, we 477 * must snapshot some required global state: the message 478 * buffer, and the page dump bitset. They may be modified at 479 * any moment, so for the sake of the live dump it is best to 480 * have an unchanging snapshot to work with. Both are included 481 * as part of the dump and consumed by userspace tools. 482 * 483 * Other global state important to the minidump code is the 484 * dump_avail array and the kernel's page tables, but snapshots 485 * are not taken of these. For one, dump_avail[] is expected 486 * not to change after boot. Snapshotting the kernel page 487 * tables would involve an additional walk, so this is avoided 488 * too. 489 * 490 * This means live dumps are best effort, and the result may or 491 * may not be usable; there are no guarantees about the 492 * consistency of the dump's contents. Any of the following 493 * (and likely more) may affect the live dump: 494 * 495 * - Data may be modified, freed, or remapped during the 496 * course of the dump, such that the contents written out 497 * are partially or entirely unrecognizable. This means 498 * valid references may point to destroyed/mangled objects, 499 * and vice versa. 500 * 501 * - The dumped context of any threads that ran during the 502 * dump process may be unreliable. 503 * 504 * - The set of kernel page tables included in the dump likely 505 * won't correspond exactly to the copy of the dump bitset. 506 * This means some pages will be dumped without any way to 507 * locate them, and some pages may not have been dumped 508 * despite appearing as if they should. 509 */ 510 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK); 511 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr); 512 state.msgbufp = &mb_copy; 513 514 sz = BITSET_SIZE(vm_page_dump_pages); 515 state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK); 516 BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset); 517 } else { 518 KASSERT(dumping, ("minidump invoked outside of doadump()")); 519 520 /* Use the globals. */ 521 state.msgbufp = msgbufp; 522 state.dump_bitset = vm_page_dump; 523 } 524 525 error = cpu_minidumpsys(di, &state); 526 if (livedump) { 527 free(msg_ptr, M_TEMP); 528 free(state.dump_bitset, M_TEMP); 529 } 530 531 return (error); 532 } 533 #endif /* MINIDUMP_PAGE_TRACKING == 1 */ 534