xref: /freebsd/sys/kern/kern_dump.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*-
2  * Copyright (c) 2002 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/cons.h>
32 #include <sys/kdb.h>
33 #include <sys/kernel.h>
34 #include <sys/kerneldump.h>
35 #include <sys/malloc.h>
36 #include <sys/msgbuf.h>
37 #include <sys/proc.h>
38 #include <sys/watchdog.h>
39 
40 #include <vm/vm.h>
41 #include <vm/vm_param.h>
42 #include <vm/vm_page.h>
43 #include <vm/vm_phys.h>
44 #include <vm/vm_dumpset.h>
45 #include <vm/pmap.h>
46 
47 #include <machine/dump.h>
48 #include <machine/elf.h>
49 #include <machine/md_var.h>
50 #include <machine/pcb.h>
51 
52 CTASSERT(sizeof(struct kerneldumpheader) == 512);
53 
54 #define	MD_ALIGN(x)	roundup2((off_t)(x), PAGE_SIZE)
55 
56 /* Handle buffered writes. */
57 static size_t fragsz;
58 
59 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
60 
61 #if !defined(__powerpc__)
62 void
63 dumpsys_gen_pa_init(void)
64 {
65 	int n, idx;
66 
67 	bzero(dump_map, sizeof(dump_map));
68 	for (n = 0; n < nitems(dump_map); n++) {
69 		idx = n * 2;
70 		if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
71 			break;
72 		dump_map[n].pa_start = dump_avail[idx];
73 		dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
74 	}
75 }
76 #endif
77 
78 struct dump_pa *
79 dumpsys_gen_pa_next(struct dump_pa *mdp)
80 {
81 
82 	if (mdp == NULL)
83 		return (&dump_map[0]);
84 
85 	mdp++;
86 	if (mdp->pa_size == 0)
87 		mdp = NULL;
88 	return (mdp);
89 }
90 
91 void
92 dumpsys_gen_wbinv_all(void)
93 {
94 
95 }
96 
97 void
98 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
99     void *va __unused)
100 {
101 
102 }
103 
104 int
105 dumpsys_gen_write_aux_headers(struct dumperinfo *di)
106 {
107 
108 	return (0);
109 }
110 
111 int
112 dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
113 {
114 	static uint8_t buf[DEV_BSIZE];
115 	size_t nbytes;
116 	int error;
117 
118 	bzero(buf, sizeof(buf));
119 
120 	while (sz > 0) {
121 		nbytes = MIN(sz, sizeof(buf));
122 
123 		error = dump_append(di, buf, nbytes);
124 		if (error)
125 			return (error);
126 		sz -= nbytes;
127 	}
128 
129 	return (0);
130 }
131 
132 int
133 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
134 {
135 	size_t len;
136 	int error;
137 
138 	while (sz) {
139 		len = di->blocksize - fragsz;
140 		if (len > sz)
141 			len = sz;
142 		memcpy((char *)di->blockbuf + fragsz, ptr, len);
143 		fragsz += len;
144 		ptr += len;
145 		sz -= len;
146 		if (fragsz == di->blocksize) {
147 			error = dump_append(di, di->blockbuf, di->blocksize);
148 			if (error)
149 				return (error);
150 			fragsz = 0;
151 		}
152 	}
153 	return (0);
154 }
155 
156 int
157 dumpsys_buf_flush(struct dumperinfo *di)
158 {
159 	int error;
160 
161 	if (fragsz == 0)
162 		return (0);
163 
164 	error = dump_append(di, di->blockbuf, di->blocksize);
165 	fragsz = 0;
166 	return (error);
167 }
168 
169 CTASSERT(PAGE_SHIFT < 20);
170 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
171 
172 int
173 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
174 {
175 	struct dumperinfo *di = (struct dumperinfo*)arg;
176 	vm_paddr_t pa;
177 	void *va;
178 	uint64_t pgs;
179 	size_t counter, sz, chunk;
180 	int c, error;
181 	u_int maxdumppgs;
182 
183 	error = 0;	/* catch case in which chunk size is 0 */
184 	counter = 0;	/* Update twiddle every 16MB */
185 	va = NULL;
186 	pgs = mdp->pa_size / PAGE_SIZE;
187 	pa = mdp->pa_start;
188 	maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
189 	if (maxdumppgs == 0)	/* seatbelt */
190 		maxdumppgs = 1;
191 
192 	printf("  chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
193 	    (uintmax_t)pgs);
194 
195 	dumpsys_wbinv_all();
196 	while (pgs) {
197 		chunk = pgs;
198 		if (chunk > maxdumppgs)
199 			chunk = maxdumppgs;
200 		sz = chunk << PAGE_SHIFT;
201 		counter += sz;
202 		if (counter >> 24) {
203 			printf(" %ju", (uintmax_t)PG2MB(pgs));
204 			counter &= (1 << 24) - 1;
205 		}
206 
207 		dumpsys_map_chunk(pa, chunk, &va);
208 		wdog_kern_pat(WD_LASTVAL);
209 
210 		error = dump_append(di, va, sz);
211 		dumpsys_unmap_chunk(pa, chunk, va);
212 		if (error)
213 			break;
214 		pgs -= chunk;
215 		pa += sz;
216 
217 		/* Check for user abort. */
218 		c = cncheckc();
219 		if (c == 0x03)
220 			return (ECANCELED);
221 		if (c != -1)
222 			printf(" (CTRL-C to abort) ");
223 	}
224 	printf(" ... %s\n", (error) ? "fail" : "ok");
225 	return (error);
226 }
227 
228 int
229 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
230 {
231 	struct dump_pa *mdp;
232 	int error, seqnr;
233 
234 	seqnr = 0;
235 	mdp = dumpsys_pa_next(NULL);
236 	while (mdp != NULL) {
237 		error = (*cb)(mdp, seqnr++, arg);
238 		if (error)
239 			return (-error);
240 		mdp = dumpsys_pa_next(mdp);
241 	}
242 	return (seqnr);
243 }
244 
245 static off_t fileofs;
246 
247 static int
248 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
249 {
250 	struct dumperinfo *di = (struct dumperinfo*)arg;
251 	Elf_Phdr phdr;
252 	uint64_t size;
253 	int error;
254 
255 	size = mdp->pa_size;
256 	bzero(&phdr, sizeof(phdr));
257 	phdr.p_type = PT_LOAD;
258 	phdr.p_flags = PF_R;			/* XXX */
259 	phdr.p_offset = fileofs;
260 #ifdef __powerpc__
261 	phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
262 	phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
263 #else
264 	phdr.p_vaddr = mdp->pa_start;
265 	phdr.p_paddr = mdp->pa_start;
266 #endif
267 	phdr.p_filesz = size;
268 	phdr.p_memsz = size;
269 	phdr.p_align = PAGE_SIZE;
270 
271 	error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
272 	fileofs += phdr.p_filesz;
273 	return (error);
274 }
275 
276 static int
277 cb_size(struct dump_pa *mdp, int seqnr, void *arg)
278 {
279 	uint64_t *sz;
280 
281 	sz = (uint64_t *)arg;
282 	*sz += (uint64_t)mdp->pa_size;
283 	return (0);
284 }
285 
286 int
287 dumpsys_generic(struct dumperinfo *di)
288 {
289 	static struct kerneldumpheader kdh;
290 	Elf_Ehdr ehdr;
291 	uint64_t dumpsize;
292 	off_t hdrgap;
293 	size_t hdrsz;
294 	int error;
295 
296 #if MINIDUMP_PAGE_TRACKING == 1
297 	if (do_minidump)
298 		return (minidumpsys(di, false));
299 #endif
300 
301 	bzero(&ehdr, sizeof(ehdr));
302 	ehdr.e_ident[EI_MAG0] = ELFMAG0;
303 	ehdr.e_ident[EI_MAG1] = ELFMAG1;
304 	ehdr.e_ident[EI_MAG2] = ELFMAG2;
305 	ehdr.e_ident[EI_MAG3] = ELFMAG3;
306 	ehdr.e_ident[EI_CLASS] = ELF_CLASS;
307 #if BYTE_ORDER == LITTLE_ENDIAN
308 	ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
309 #else
310 	ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
311 #endif
312 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
313 	ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE;	/* XXX big picture? */
314 	ehdr.e_type = ET_CORE;
315 	ehdr.e_machine = EM_VALUE;
316 	ehdr.e_phoff = sizeof(ehdr);
317 	ehdr.e_flags = 0;
318 	ehdr.e_ehsize = sizeof(ehdr);
319 	ehdr.e_phentsize = sizeof(Elf_Phdr);
320 	ehdr.e_shentsize = sizeof(Elf_Shdr);
321 
322 	dumpsys_pa_init();
323 
324 	/* Calculate dump size. */
325 	dumpsize = 0L;
326 	ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
327 	    DUMPSYS_NUM_AUX_HDRS;
328 	hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
329 	fileofs = MD_ALIGN(hdrsz);
330 	dumpsize += fileofs;
331 	hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
332 
333 	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION,
334 	    dumpsize);
335 
336 	error = dump_start(di, &kdh);
337 	if (error != 0)
338 		goto fail;
339 
340 	printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
341 	    ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
342 
343 	/* Dump ELF header */
344 	error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
345 	if (error)
346 		goto fail;
347 
348 	/* Dump program headers */
349 	error = dumpsys_foreach_chunk(cb_dumphdr, di);
350 	if (error < 0)
351 		goto fail;
352 	error = dumpsys_write_aux_headers(di);
353 	if (error < 0)
354 		goto fail;
355 	dumpsys_buf_flush(di);
356 
357 	/*
358 	 * All headers are written using blocked I/O, so we know the
359 	 * current offset is (still) block aligned. Skip the alignement
360 	 * in the file to have the segment contents aligned at page
361 	 * boundary.
362 	 */
363 	error = dumpsys_buf_seek(di, (size_t)hdrgap);
364 	if (error)
365 		goto fail;
366 
367 	/* Dump memory chunks. */
368 	error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
369 	if (error < 0)
370 		goto fail;
371 
372 	error = dump_finish(di, &kdh);
373 	if (error != 0)
374 		goto fail;
375 
376 	printf("\nDump complete\n");
377 	return (0);
378 
379  fail:
380 	if (error < 0)
381 		error = -error;
382 
383 	if (error == ECANCELED)
384 		printf("\nDump aborted\n");
385 	else if (error == E2BIG || error == ENOSPC)
386 		printf("\nDump failed. Partition too small.\n");
387 	else
388 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
389 	return (error);
390 }
391 
392 #if MINIDUMP_PAGE_TRACKING == 1
393 
394 /* Minidump progress bar */
395 static struct {
396 	const int min_per;
397 	const int max_per;
398 	bool visited;
399 } progress_track[10] = {
400 	{  0,  10, false},
401 	{ 10,  20, false},
402 	{ 20,  30, false},
403 	{ 30,  40, false},
404 	{ 40,  50, false},
405 	{ 50,  60, false},
406 	{ 60,  70, false},
407 	{ 70,  80, false},
408 	{ 80,  90, false},
409 	{ 90, 100, false}
410 };
411 
412 static uint64_t dumpsys_pb_size;
413 static uint64_t dumpsys_pb_remaining;
414 static uint64_t dumpsys_pb_check;
415 
416 /* Reset the progress bar for a dump of dumpsize. */
417 void
418 dumpsys_pb_init(uint64_t dumpsize)
419 {
420 	int i;
421 
422 	dumpsys_pb_size = dumpsys_pb_remaining = dumpsize;
423 	dumpsys_pb_check = 0;
424 
425 	for (i = 0; i < nitems(progress_track); i++)
426 		progress_track[i].visited = false;
427 }
428 
429 /*
430  * Update the progress according to the delta bytes that were written out.
431  * Check and print the progress percentage.
432  */
433 void
434 dumpsys_pb_progress(size_t delta)
435 {
436 	int sofar, i;
437 
438 	dumpsys_pb_remaining -= delta;
439 	dumpsys_pb_check += delta;
440 
441 	/*
442 	 * To save time while dumping, only loop through progress_track
443 	 * occasionally.
444 	 */
445 	if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0)
446 		return;
447 	else
448 		dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1;
449 
450 	sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size);
451 	for (i = 0; i < nitems(progress_track); i++) {
452 		if (sofar < progress_track[i].min_per ||
453 		    sofar > progress_track[i].max_per)
454 			continue;
455 		if (!progress_track[i].visited) {
456 			progress_track[i].visited = true;
457 			printf("..%d%%", sofar);
458 		}
459 		break;
460 	}
461 }
462 
463 int
464 minidumpsys(struct dumperinfo *di, bool livedump)
465 {
466 	struct minidumpstate state;
467 	struct msgbuf mb_copy;
468 	char *msg_ptr;
469 	size_t sz;
470 	int error;
471 
472 	if (livedump) {
473 		KASSERT(!dumping, ("live dump invoked from incorrect context"));
474 
475 		/*
476 		 * Before invoking cpu_minidumpsys() on the live system, we
477 		 * must snapshot some required global state: the message
478 		 * buffer, and the page dump bitset. They may be modified at
479 		 * any moment, so for the sake of the live dump it is best to
480 		 * have an unchanging snapshot to work with. Both are included
481 		 * as part of the dump and consumed by userspace tools.
482 		 *
483 		 * Other global state important to the minidump code is the
484 		 * dump_avail array and the kernel's page tables, but snapshots
485 		 * are not taken of these. For one, dump_avail[] is expected
486 		 * not to change after boot. Snapshotting the kernel page
487 		 * tables would involve an additional walk, so this is avoided
488 		 * too.
489 		 *
490 		 * This means live dumps are best effort, and the result may or
491 		 * may not be usable; there are no guarantees about the
492 		 * consistency of the dump's contents. Any of the following
493 		 * (and likely more) may affect the live dump:
494 		 *
495 		 *  - Data may be modified, freed, or remapped during the
496 		 *    course of the dump, such that the contents written out
497 		 *    are partially or entirely unrecognizable. This means
498 		 *    valid references may point to destroyed/mangled objects,
499 		 *    and vice versa.
500 		 *
501 		 *  - The dumped context of any threads that ran during the
502 		 *    dump process may be unreliable.
503 		 *
504 		 *  - The set of kernel page tables included in the dump likely
505 		 *    won't correspond exactly to the copy of the dump bitset.
506 		 *    This means some pages will be dumped without any way to
507 		 *    locate them, and some pages may not have been dumped
508 		 *    despite appearing as if they should.
509 		 */
510 		msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
511 		msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
512 		state.msgbufp = &mb_copy;
513 
514 		sz = BITSET_SIZE(vm_page_dump_pages);
515 		state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK);
516 		BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset);
517 	} else {
518 		KASSERT(dumping, ("minidump invoked outside of doadump()"));
519 
520 		/* Use the globals. */
521 		state.msgbufp = msgbufp;
522 		state.dump_bitset = vm_page_dump;
523 	}
524 
525 	error = cpu_minidumpsys(di, &state);
526 	if (livedump) {
527 		free(msg_ptr, M_TEMP);
528 		free(state.dump_bitset, M_TEMP);
529 	}
530 
531 	return (error);
532 }
533 #endif /* MINIDUMP_PAGE_TRACKING == 1 */
534