1 /*- 2 * Copyright (c) 2002 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/conf.h> 30 #include <sys/cons.h> 31 #include <sys/kdb.h> 32 #include <sys/kernel.h> 33 #include <sys/kerneldump.h> 34 #include <sys/malloc.h> 35 #include <sys/msgbuf.h> 36 #include <sys/proc.h> 37 #include <sys/watchdog.h> 38 39 #include <vm/vm.h> 40 #include <vm/vm_param.h> 41 #include <vm/vm_page.h> 42 #include <vm/vm_phys.h> 43 #include <vm/vm_dumpset.h> 44 #include <vm/pmap.h> 45 46 #include <machine/dump.h> 47 #include <machine/elf.h> 48 #include <machine/md_var.h> 49 #include <machine/pcb.h> 50 51 CTASSERT(sizeof(struct kerneldumpheader) == 512); 52 53 #define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE) 54 55 /* Handle buffered writes. */ 56 static size_t fragsz; 57 58 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS]; 59 60 #if !defined(__powerpc__) 61 void 62 dumpsys_gen_pa_init(void) 63 { 64 int n, idx; 65 66 bzero(dump_map, sizeof(dump_map)); 67 for (n = 0; n < nitems(dump_map); n++) { 68 idx = n * 2; 69 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0) 70 break; 71 dump_map[n].pa_start = dump_avail[idx]; 72 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx]; 73 } 74 } 75 #endif 76 77 struct dump_pa * 78 dumpsys_gen_pa_next(struct dump_pa *mdp) 79 { 80 81 if (mdp == NULL) 82 return (&dump_map[0]); 83 84 mdp++; 85 if (mdp->pa_size == 0) 86 mdp = NULL; 87 return (mdp); 88 } 89 90 void 91 dumpsys_gen_wbinv_all(void) 92 { 93 94 } 95 96 void 97 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused, 98 void *va __unused) 99 { 100 101 } 102 103 int 104 dumpsys_gen_write_aux_headers(struct dumperinfo *di) 105 { 106 107 return (0); 108 } 109 110 int 111 dumpsys_buf_seek(struct dumperinfo *di, size_t sz) 112 { 113 static uint8_t buf[DEV_BSIZE]; 114 size_t nbytes; 115 int error; 116 117 bzero(buf, sizeof(buf)); 118 119 while (sz > 0) { 120 nbytes = MIN(sz, sizeof(buf)); 121 122 error = dump_append(di, buf, nbytes); 123 if (error) 124 return (error); 125 sz -= nbytes; 126 } 127 128 return (0); 129 } 130 131 int 132 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz) 133 { 134 size_t len; 135 int error; 136 137 while (sz) { 138 len = di->blocksize - fragsz; 139 if (len > sz) 140 len = sz; 141 memcpy((char *)di->blockbuf + fragsz, ptr, len); 142 fragsz += len; 143 ptr += len; 144 sz -= len; 145 if (fragsz == di->blocksize) { 146 error = dump_append(di, di->blockbuf, di->blocksize); 147 if (error) 148 return (error); 149 fragsz = 0; 150 } 151 } 152 return (0); 153 } 154 155 int 156 dumpsys_buf_flush(struct dumperinfo *di) 157 { 158 int error; 159 160 if (fragsz == 0) 161 return (0); 162 163 error = dump_append(di, di->blockbuf, di->blocksize); 164 fragsz = 0; 165 return (error); 166 } 167 168 CTASSERT(PAGE_SHIFT < 20); 169 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT)) 170 171 int 172 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg) 173 { 174 struct dumperinfo *di = (struct dumperinfo*)arg; 175 vm_paddr_t pa; 176 void *va; 177 uint64_t pgs; 178 size_t counter, sz, chunk; 179 int c, error; 180 u_int maxdumppgs; 181 182 error = 0; /* catch case in which chunk size is 0 */ 183 counter = 0; /* Update twiddle every 16MB */ 184 va = NULL; 185 pgs = mdp->pa_size / PAGE_SIZE; 186 pa = mdp->pa_start; 187 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS); 188 if (maxdumppgs == 0) /* seatbelt */ 189 maxdumppgs = 1; 190 191 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs), 192 (uintmax_t)pgs); 193 194 dumpsys_wbinv_all(); 195 while (pgs) { 196 chunk = pgs; 197 if (chunk > maxdumppgs) 198 chunk = maxdumppgs; 199 sz = chunk << PAGE_SHIFT; 200 counter += sz; 201 if (counter >> 24) { 202 printf(" %ju", (uintmax_t)PG2MB(pgs)); 203 counter &= (1 << 24) - 1; 204 } 205 206 dumpsys_map_chunk(pa, chunk, &va); 207 wdog_kern_pat(WD_LASTVAL); 208 209 error = dump_append(di, va, sz); 210 dumpsys_unmap_chunk(pa, chunk, va); 211 if (error) 212 break; 213 pgs -= chunk; 214 pa += sz; 215 216 /* Check for user abort. */ 217 c = cncheckc(); 218 if (c == 0x03) 219 return (ECANCELED); 220 if (c != -1) 221 printf(" (CTRL-C to abort) "); 222 } 223 printf(" ... %s\n", (error) ? "fail" : "ok"); 224 return (error); 225 } 226 227 int 228 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg) 229 { 230 struct dump_pa *mdp; 231 int error, seqnr; 232 233 seqnr = 0; 234 mdp = dumpsys_pa_next(NULL); 235 while (mdp != NULL) { 236 error = (*cb)(mdp, seqnr++, arg); 237 if (error) 238 return (-error); 239 mdp = dumpsys_pa_next(mdp); 240 } 241 return (seqnr); 242 } 243 244 static off_t fileofs; 245 246 static int 247 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg) 248 { 249 struct dumperinfo *di = (struct dumperinfo*)arg; 250 Elf_Phdr phdr; 251 uint64_t size; 252 int error; 253 254 size = mdp->pa_size; 255 bzero(&phdr, sizeof(phdr)); 256 phdr.p_type = PT_LOAD; 257 phdr.p_flags = PF_R; /* XXX */ 258 phdr.p_offset = fileofs; 259 #ifdef __powerpc__ 260 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L); 261 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start); 262 #else 263 phdr.p_vaddr = mdp->pa_start; 264 phdr.p_paddr = mdp->pa_start; 265 #endif 266 phdr.p_filesz = size; 267 phdr.p_memsz = size; 268 phdr.p_align = PAGE_SIZE; 269 270 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr)); 271 fileofs += phdr.p_filesz; 272 return (error); 273 } 274 275 static int 276 cb_size(struct dump_pa *mdp, int seqnr, void *arg) 277 { 278 uint64_t *sz; 279 280 sz = (uint64_t *)arg; 281 *sz += (uint64_t)mdp->pa_size; 282 return (0); 283 } 284 285 int 286 dumpsys_generic(struct dumperinfo *di) 287 { 288 static struct kerneldumpheader kdh; 289 Elf_Ehdr ehdr; 290 uint64_t dumpsize; 291 off_t hdrgap; 292 size_t hdrsz; 293 int error; 294 295 #if MINIDUMP_PAGE_TRACKING == 1 296 if (do_minidump) 297 return (minidumpsys(di, false)); 298 #endif 299 300 bzero(&ehdr, sizeof(ehdr)); 301 ehdr.e_ident[EI_MAG0] = ELFMAG0; 302 ehdr.e_ident[EI_MAG1] = ELFMAG1; 303 ehdr.e_ident[EI_MAG2] = ELFMAG2; 304 ehdr.e_ident[EI_MAG3] = ELFMAG3; 305 ehdr.e_ident[EI_CLASS] = ELF_CLASS; 306 #if BYTE_ORDER == LITTLE_ENDIAN 307 ehdr.e_ident[EI_DATA] = ELFDATA2LSB; 308 #else 309 ehdr.e_ident[EI_DATA] = ELFDATA2MSB; 310 #endif 311 ehdr.e_ident[EI_VERSION] = EV_CURRENT; 312 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */ 313 ehdr.e_type = ET_CORE; 314 ehdr.e_machine = EM_VALUE; 315 ehdr.e_phoff = sizeof(ehdr); 316 ehdr.e_flags = 0; 317 ehdr.e_ehsize = sizeof(ehdr); 318 ehdr.e_phentsize = sizeof(Elf_Phdr); 319 ehdr.e_shentsize = sizeof(Elf_Shdr); 320 321 dumpsys_pa_init(); 322 323 /* Calculate dump size. */ 324 dumpsize = 0L; 325 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) + 326 DUMPSYS_NUM_AUX_HDRS; 327 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize; 328 fileofs = MD_ALIGN(hdrsz); 329 dumpsize += fileofs; 330 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize); 331 332 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, 333 dumpsize); 334 335 error = dump_start(di, &kdh); 336 if (error != 0) 337 goto fail; 338 339 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20, 340 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS); 341 342 /* Dump ELF header */ 343 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr)); 344 if (error) 345 goto fail; 346 347 /* Dump program headers */ 348 error = dumpsys_foreach_chunk(cb_dumphdr, di); 349 if (error < 0) 350 goto fail; 351 error = dumpsys_write_aux_headers(di); 352 if (error < 0) 353 goto fail; 354 dumpsys_buf_flush(di); 355 356 /* 357 * All headers are written using blocked I/O, so we know the 358 * current offset is (still) block aligned. Skip the alignement 359 * in the file to have the segment contents aligned at page 360 * boundary. 361 */ 362 error = dumpsys_buf_seek(di, (size_t)hdrgap); 363 if (error) 364 goto fail; 365 366 /* Dump memory chunks. */ 367 error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di); 368 if (error < 0) 369 goto fail; 370 371 error = dump_finish(di, &kdh); 372 if (error != 0) 373 goto fail; 374 375 printf("\nDump complete\n"); 376 return (0); 377 378 fail: 379 if (error < 0) 380 error = -error; 381 382 if (error == ECANCELED) 383 printf("\nDump aborted\n"); 384 else if (error == E2BIG || error == ENOSPC) 385 printf("\nDump failed. Partition too small.\n"); 386 else 387 printf("\n** DUMP FAILED (ERROR %d) **\n", error); 388 return (error); 389 } 390 391 #if MINIDUMP_PAGE_TRACKING == 1 392 393 /* Minidump progress bar */ 394 static struct { 395 const int min_per; 396 const int max_per; 397 bool visited; 398 } progress_track[10] = { 399 { 0, 10, false}, 400 { 10, 20, false}, 401 { 20, 30, false}, 402 { 30, 40, false}, 403 { 40, 50, false}, 404 { 50, 60, false}, 405 { 60, 70, false}, 406 { 70, 80, false}, 407 { 80, 90, false}, 408 { 90, 100, false} 409 }; 410 411 static uint64_t dumpsys_pb_size; 412 static uint64_t dumpsys_pb_remaining; 413 static uint64_t dumpsys_pb_check; 414 415 /* Reset the progress bar for a dump of dumpsize. */ 416 void 417 dumpsys_pb_init(uint64_t dumpsize) 418 { 419 int i; 420 421 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize; 422 dumpsys_pb_check = 0; 423 424 for (i = 0; i < nitems(progress_track); i++) 425 progress_track[i].visited = false; 426 } 427 428 /* 429 * Update the progress according to the delta bytes that were written out. 430 * Check and print the progress percentage. 431 */ 432 void 433 dumpsys_pb_progress(size_t delta) 434 { 435 int sofar, i; 436 437 dumpsys_pb_remaining -= delta; 438 dumpsys_pb_check += delta; 439 440 /* 441 * To save time while dumping, only loop through progress_track 442 * occasionally. 443 */ 444 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0) 445 return; 446 else 447 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1; 448 449 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size); 450 for (i = 0; i < nitems(progress_track); i++) { 451 if (sofar < progress_track[i].min_per || 452 sofar > progress_track[i].max_per) 453 continue; 454 if (!progress_track[i].visited) { 455 progress_track[i].visited = true; 456 printf("..%d%%", sofar); 457 } 458 break; 459 } 460 } 461 462 int 463 minidumpsys(struct dumperinfo *di, bool livedump) 464 { 465 struct minidumpstate state; 466 struct msgbuf mb_copy; 467 char *msg_ptr; 468 int error; 469 470 if (livedump) { 471 KASSERT(!dumping, ("live dump invoked from incorrect context")); 472 473 /* 474 * Before invoking cpu_minidumpsys() on the live system, we 475 * must snapshot some required global state: the message 476 * buffer, and the page dump bitset. They may be modified at 477 * any moment, so for the sake of the live dump it is best to 478 * have an unchanging snapshot to work with. Both are included 479 * as part of the dump and consumed by userspace tools. 480 * 481 * Other global state important to the minidump code is the 482 * dump_avail array and the kernel's page tables, but snapshots 483 * are not taken of these. For one, dump_avail[] is expected 484 * not to change after boot. Snapshotting the kernel page 485 * tables would involve an additional walk, so this is avoided 486 * too. 487 * 488 * This means live dumps are best effort, and the result may or 489 * may not be usable; there are no guarantees about the 490 * consistency of the dump's contents. Any of the following 491 * (and likely more) may affect the live dump: 492 * 493 * - Data may be modified, freed, or remapped during the 494 * course of the dump, such that the contents written out 495 * are partially or entirely unrecognizable. This means 496 * valid references may point to destroyed/mangled objects, 497 * and vice versa. 498 * 499 * - The dumped context of any threads that ran during the 500 * dump process may be unreliable. 501 * 502 * - The set of kernel page tables included in the dump likely 503 * won't correspond exactly to the copy of the dump bitset. 504 * This means some pages will be dumped without any way to 505 * locate them, and some pages may not have been dumped 506 * despite appearing as if they should. 507 */ 508 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK); 509 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr); 510 state.msgbufp = &mb_copy; 511 512 state.dump_bitset = BITSET_ALLOC(vm_page_dump_pages, M_TEMP, 513 M_WAITOK); 514 BIT_COPY_STORE_REL(vm_page_dump_pages, vm_page_dump, 515 state.dump_bitset); 516 } else { 517 KASSERT(dumping, ("minidump invoked outside of doadump()")); 518 519 /* Use the globals. */ 520 state.msgbufp = msgbufp; 521 state.dump_bitset = vm_page_dump; 522 } 523 524 error = cpu_minidumpsys(di, &state); 525 if (livedump) { 526 free(msg_ptr, M_TEMP); 527 BITSET_FREE(state.dump_bitset, M_TEMP); 528 } 529 530 return (error); 531 } 532 #endif /* MINIDUMP_PAGE_TRACKING == 1 */ 533