1 /*-
2 * Copyright (c) 2002 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/cons.h>
31 #include <sys/kdb.h>
32 #include <sys/kernel.h>
33 #include <sys/kerneldump.h>
34 #include <sys/malloc.h>
35 #include <sys/msgbuf.h>
36 #include <sys/proc.h>
37 #include <sys/watchdog.h>
38
39 #include <vm/vm.h>
40 #include <vm/vm_param.h>
41 #include <vm/vm_page.h>
42 #include <vm/vm_phys.h>
43 #include <vm/vm_dumpset.h>
44 #include <vm/pmap.h>
45
46 #include <machine/dump.h>
47 #include <machine/elf.h>
48 #include <machine/md_var.h>
49 #include <machine/pcb.h>
50
51 CTASSERT(sizeof(struct kerneldumpheader) == 512);
52
53 #define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE)
54
55 /* Handle buffered writes. */
56 static size_t fragsz;
57
58 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
59
60 #if !defined(__powerpc__)
61 void
dumpsys_gen_pa_init(void)62 dumpsys_gen_pa_init(void)
63 {
64 int n, idx;
65
66 bzero(dump_map, sizeof(dump_map));
67 for (n = 0; n < nitems(dump_map); n++) {
68 idx = n * 2;
69 if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
70 break;
71 dump_map[n].pa_start = dump_avail[idx];
72 dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
73 }
74 }
75 #endif
76
77 struct dump_pa *
dumpsys_gen_pa_next(struct dump_pa * mdp)78 dumpsys_gen_pa_next(struct dump_pa *mdp)
79 {
80
81 if (mdp == NULL)
82 return (&dump_map[0]);
83
84 mdp++;
85 if (mdp->pa_size == 0)
86 mdp = NULL;
87 return (mdp);
88 }
89
90 void
dumpsys_gen_wbinv_all(void)91 dumpsys_gen_wbinv_all(void)
92 {
93
94 }
95
96 void
dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused,size_t chunk __unused,void * va __unused)97 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
98 void *va __unused)
99 {
100
101 }
102
103 int
dumpsys_gen_write_aux_headers(struct dumperinfo * di)104 dumpsys_gen_write_aux_headers(struct dumperinfo *di)
105 {
106
107 return (0);
108 }
109
110 int
dumpsys_buf_seek(struct dumperinfo * di,size_t sz)111 dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
112 {
113 static uint8_t buf[DEV_BSIZE];
114 size_t nbytes;
115 int error;
116
117 bzero(buf, sizeof(buf));
118
119 while (sz > 0) {
120 nbytes = MIN(sz, sizeof(buf));
121
122 error = dump_append(di, buf, nbytes);
123 if (error)
124 return (error);
125 sz -= nbytes;
126 }
127
128 return (0);
129 }
130
131 int
dumpsys_buf_write(struct dumperinfo * di,char * ptr,size_t sz)132 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
133 {
134 size_t len;
135 int error;
136
137 while (sz) {
138 len = di->blocksize - fragsz;
139 if (len > sz)
140 len = sz;
141 memcpy((char *)di->blockbuf + fragsz, ptr, len);
142 fragsz += len;
143 ptr += len;
144 sz -= len;
145 if (fragsz == di->blocksize) {
146 error = dump_append(di, di->blockbuf, di->blocksize);
147 if (error)
148 return (error);
149 fragsz = 0;
150 }
151 }
152 return (0);
153 }
154
155 int
dumpsys_buf_flush(struct dumperinfo * di)156 dumpsys_buf_flush(struct dumperinfo *di)
157 {
158 int error;
159
160 if (fragsz == 0)
161 return (0);
162
163 error = dump_append(di, di->blockbuf, di->blocksize);
164 fragsz = 0;
165 return (error);
166 }
167
168 CTASSERT(PAGE_SHIFT < 20);
169 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
170
171 int
dumpsys_cb_dumpdata(struct dump_pa * mdp,int seqnr,void * arg)172 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
173 {
174 struct dumperinfo *di = (struct dumperinfo*)arg;
175 vm_paddr_t pa;
176 void *va;
177 uint64_t pgs;
178 size_t counter, sz, chunk;
179 int c, error;
180 u_int maxdumppgs;
181
182 error = 0; /* catch case in which chunk size is 0 */
183 counter = 0; /* Update twiddle every 16MB */
184 va = NULL;
185 pgs = mdp->pa_size / PAGE_SIZE;
186 pa = mdp->pa_start;
187 maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
188 if (maxdumppgs == 0) /* seatbelt */
189 maxdumppgs = 1;
190
191 printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
192 (uintmax_t)pgs);
193
194 dumpsys_wbinv_all();
195 while (pgs) {
196 chunk = pgs;
197 if (chunk > maxdumppgs)
198 chunk = maxdumppgs;
199 sz = chunk << PAGE_SHIFT;
200 counter += sz;
201 if (counter >> 24) {
202 printf(" %ju", (uintmax_t)PG2MB(pgs));
203 counter &= (1 << 24) - 1;
204 }
205
206 dumpsys_map_chunk(pa, chunk, &va);
207 wdog_kern_pat(WD_LASTVAL);
208
209 error = dump_append(di, va, sz);
210 dumpsys_unmap_chunk(pa, chunk, va);
211 if (error)
212 break;
213 pgs -= chunk;
214 pa += sz;
215
216 /* Check for user abort. */
217 c = cncheckc();
218 if (c == 0x03)
219 return (ECANCELED);
220 if (c != -1)
221 printf(" (CTRL-C to abort) ");
222 }
223 printf(" ... %s\n", (error) ? "fail" : "ok");
224 return (error);
225 }
226
227 int
dumpsys_foreach_chunk(dumpsys_callback_t cb,void * arg)228 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
229 {
230 struct dump_pa *mdp;
231 int error, seqnr;
232
233 seqnr = 0;
234 mdp = dumpsys_pa_next(NULL);
235 while (mdp != NULL) {
236 error = (*cb)(mdp, seqnr++, arg);
237 if (error)
238 return (-error);
239 mdp = dumpsys_pa_next(mdp);
240 }
241 return (seqnr);
242 }
243
244 static off_t fileofs;
245
246 static int
cb_dumphdr(struct dump_pa * mdp,int seqnr,void * arg)247 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
248 {
249 struct dumperinfo *di = (struct dumperinfo*)arg;
250 Elf_Phdr phdr;
251 uint64_t size;
252 int error;
253
254 size = mdp->pa_size;
255 bzero(&phdr, sizeof(phdr));
256 phdr.p_type = PT_LOAD;
257 phdr.p_flags = PF_R; /* XXX */
258 phdr.p_offset = fileofs;
259 #ifdef __powerpc__
260 phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
261 phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
262 #else
263 phdr.p_vaddr = mdp->pa_start;
264 phdr.p_paddr = mdp->pa_start;
265 #endif
266 phdr.p_filesz = size;
267 phdr.p_memsz = size;
268 phdr.p_align = PAGE_SIZE;
269
270 error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
271 fileofs += phdr.p_filesz;
272 return (error);
273 }
274
275 static int
cb_size(struct dump_pa * mdp,int seqnr,void * arg)276 cb_size(struct dump_pa *mdp, int seqnr, void *arg)
277 {
278 uint64_t *sz;
279
280 sz = (uint64_t *)arg;
281 *sz += (uint64_t)mdp->pa_size;
282 return (0);
283 }
284
285 int
dumpsys_generic(struct dumperinfo * di)286 dumpsys_generic(struct dumperinfo *di)
287 {
288 static struct kerneldumpheader kdh;
289 Elf_Ehdr ehdr;
290 uint64_t dumpsize;
291 off_t hdrgap;
292 size_t hdrsz;
293 int error;
294
295 #if MINIDUMP_PAGE_TRACKING == 1
296 if (do_minidump)
297 return (minidumpsys(di, false));
298 #endif
299
300 bzero(&ehdr, sizeof(ehdr));
301 ehdr.e_ident[EI_MAG0] = ELFMAG0;
302 ehdr.e_ident[EI_MAG1] = ELFMAG1;
303 ehdr.e_ident[EI_MAG2] = ELFMAG2;
304 ehdr.e_ident[EI_MAG3] = ELFMAG3;
305 ehdr.e_ident[EI_CLASS] = ELF_CLASS;
306 #if BYTE_ORDER == LITTLE_ENDIAN
307 ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
308 #else
309 ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
310 #endif
311 ehdr.e_ident[EI_VERSION] = EV_CURRENT;
312 ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
313 ehdr.e_type = ET_CORE;
314 ehdr.e_machine = EM_VALUE;
315 ehdr.e_phoff = sizeof(ehdr);
316 ehdr.e_flags = 0;
317 ehdr.e_ehsize = sizeof(ehdr);
318 ehdr.e_phentsize = sizeof(Elf_Phdr);
319 ehdr.e_shentsize = sizeof(Elf_Shdr);
320
321 dumpsys_pa_init();
322
323 /* Calculate dump size. */
324 dumpsize = 0L;
325 ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
326 DUMPSYS_NUM_AUX_HDRS;
327 hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
328 fileofs = MD_ALIGN(hdrsz);
329 dumpsize += fileofs;
330 hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
331
332 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION,
333 dumpsize);
334
335 error = dump_start(di, &kdh);
336 if (error != 0)
337 goto fail;
338
339 printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
340 ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
341
342 /* Dump ELF header */
343 error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
344 if (error)
345 goto fail;
346
347 /* Dump program headers */
348 error = dumpsys_foreach_chunk(cb_dumphdr, di);
349 if (error < 0)
350 goto fail;
351 error = dumpsys_write_aux_headers(di);
352 if (error < 0)
353 goto fail;
354 dumpsys_buf_flush(di);
355
356 /*
357 * All headers are written using blocked I/O, so we know the
358 * current offset is (still) block aligned. Skip the alignement
359 * in the file to have the segment contents aligned at page
360 * boundary.
361 */
362 error = dumpsys_buf_seek(di, (size_t)hdrgap);
363 if (error)
364 goto fail;
365
366 /* Dump memory chunks. */
367 error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
368 if (error < 0)
369 goto fail;
370
371 error = dump_finish(di, &kdh);
372 if (error != 0)
373 goto fail;
374
375 printf("\nDump complete\n");
376 return (0);
377
378 fail:
379 if (error < 0)
380 error = -error;
381
382 if (error == ECANCELED)
383 printf("\nDump aborted\n");
384 else if (error == E2BIG || error == ENOSPC)
385 printf("\nDump failed. Partition too small.\n");
386 else
387 printf("\n** DUMP FAILED (ERROR %d) **\n", error);
388 return (error);
389 }
390
391 #if MINIDUMP_PAGE_TRACKING == 1
392
393 /* Minidump progress bar */
394 static struct {
395 const int min_per;
396 const int max_per;
397 bool visited;
398 } progress_track[10] = {
399 { 0, 10, false},
400 { 10, 20, false},
401 { 20, 30, false},
402 { 30, 40, false},
403 { 40, 50, false},
404 { 50, 60, false},
405 { 60, 70, false},
406 { 70, 80, false},
407 { 80, 90, false},
408 { 90, 100, false}
409 };
410
411 static uint64_t dumpsys_pb_size;
412 static uint64_t dumpsys_pb_remaining;
413 static uint64_t dumpsys_pb_check;
414
415 /* Reset the progress bar for a dump of dumpsize. */
416 void
dumpsys_pb_init(uint64_t dumpsize)417 dumpsys_pb_init(uint64_t dumpsize)
418 {
419 int i;
420
421 dumpsys_pb_size = dumpsys_pb_remaining = dumpsize;
422 dumpsys_pb_check = 0;
423
424 for (i = 0; i < nitems(progress_track); i++)
425 progress_track[i].visited = false;
426 }
427
428 /*
429 * Update the progress according to the delta bytes that were written out.
430 * Check and print the progress percentage.
431 */
432 void
dumpsys_pb_progress(size_t delta)433 dumpsys_pb_progress(size_t delta)
434 {
435 int sofar, i;
436
437 dumpsys_pb_remaining -= delta;
438 dumpsys_pb_check += delta;
439
440 /*
441 * To save time while dumping, only loop through progress_track
442 * occasionally.
443 */
444 if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0)
445 return;
446 else
447 dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1;
448
449 sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size);
450 for (i = 0; i < nitems(progress_track); i++) {
451 if (sofar < progress_track[i].min_per ||
452 sofar > progress_track[i].max_per)
453 continue;
454 if (!progress_track[i].visited) {
455 progress_track[i].visited = true;
456 printf("..%d%%", sofar);
457 }
458 break;
459 }
460 }
461
462 int
minidumpsys(struct dumperinfo * di,bool livedump)463 minidumpsys(struct dumperinfo *di, bool livedump)
464 {
465 struct minidumpstate state;
466 struct msgbuf mb_copy;
467 char *msg_ptr;
468 int error;
469
470 if (livedump) {
471 KASSERT(!dumping, ("live dump invoked from incorrect context"));
472
473 /*
474 * Before invoking cpu_minidumpsys() on the live system, we
475 * must snapshot some required global state: the message
476 * buffer, and the page dump bitset. They may be modified at
477 * any moment, so for the sake of the live dump it is best to
478 * have an unchanging snapshot to work with. Both are included
479 * as part of the dump and consumed by userspace tools.
480 *
481 * Other global state important to the minidump code is the
482 * dump_avail array and the kernel's page tables, but snapshots
483 * are not taken of these. For one, dump_avail[] is expected
484 * not to change after boot. Snapshotting the kernel page
485 * tables would involve an additional walk, so this is avoided
486 * too.
487 *
488 * This means live dumps are best effort, and the result may or
489 * may not be usable; there are no guarantees about the
490 * consistency of the dump's contents. Any of the following
491 * (and likely more) may affect the live dump:
492 *
493 * - Data may be modified, freed, or remapped during the
494 * course of the dump, such that the contents written out
495 * are partially or entirely unrecognizable. This means
496 * valid references may point to destroyed/mangled objects,
497 * and vice versa.
498 *
499 * - The dumped context of any threads that ran during the
500 * dump process may be unreliable.
501 *
502 * - The set of kernel page tables included in the dump likely
503 * won't correspond exactly to the copy of the dump bitset.
504 * This means some pages will be dumped without any way to
505 * locate them, and some pages may not have been dumped
506 * despite appearing as if they should.
507 */
508 msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
509 msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
510 state.msgbufp = &mb_copy;
511
512 state.dump_bitset = BITSET_ALLOC(vm_page_dump_pages, M_TEMP,
513 M_WAITOK);
514 BIT_COPY_STORE_REL(vm_page_dump_pages, vm_page_dump,
515 state.dump_bitset);
516 } else {
517 KASSERT(dumping, ("minidump invoked outside of doadump()"));
518
519 /* Use the globals. */
520 state.msgbufp = msgbufp;
521 state.dump_bitset = vm_page_dump;
522 }
523
524 error = cpu_minidumpsys(di, &state);
525 if (livedump) {
526 free(msg_ptr, M_TEMP);
527 BITSET_FREE(state.dump_bitset, M_TEMP);
528 }
529
530 return (error);
531 }
532 #endif /* MINIDUMP_PAGE_TRACKING == 1 */
533