xref: /freebsd/sys/kern/kern_dump.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
1 /*-
2  * Copyright (c) 2002 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/cons.h>
31 #include <sys/kdb.h>
32 #include <sys/kernel.h>
33 #include <sys/kerneldump.h>
34 #include <sys/malloc.h>
35 #include <sys/msgbuf.h>
36 #include <sys/proc.h>
37 #include <sys/watchdog.h>
38 
39 #include <vm/vm.h>
40 #include <vm/vm_param.h>
41 #include <vm/vm_page.h>
42 #include <vm/vm_phys.h>
43 #include <vm/vm_dumpset.h>
44 #include <vm/pmap.h>
45 
46 #include <machine/dump.h>
47 #include <machine/elf.h>
48 #include <machine/md_var.h>
49 #include <machine/pcb.h>
50 
51 CTASSERT(sizeof(struct kerneldumpheader) == 512);
52 
53 #define	MD_ALIGN(x)	roundup2((off_t)(x), PAGE_SIZE)
54 
55 /* Handle buffered writes. */
56 static size_t fragsz;
57 
58 struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
59 
60 #if !defined(__powerpc__)
61 void
dumpsys_gen_pa_init(void)62 dumpsys_gen_pa_init(void)
63 {
64 	int n, idx;
65 
66 	bzero(dump_map, sizeof(dump_map));
67 	for (n = 0; n < nitems(dump_map); n++) {
68 		idx = n * 2;
69 		if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
70 			break;
71 		dump_map[n].pa_start = dump_avail[idx];
72 		dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
73 	}
74 }
75 #endif
76 
77 struct dump_pa *
dumpsys_gen_pa_next(struct dump_pa * mdp)78 dumpsys_gen_pa_next(struct dump_pa *mdp)
79 {
80 
81 	if (mdp == NULL)
82 		return (&dump_map[0]);
83 
84 	mdp++;
85 	if (mdp->pa_size == 0)
86 		mdp = NULL;
87 	return (mdp);
88 }
89 
90 void
dumpsys_gen_wbinv_all(void)91 dumpsys_gen_wbinv_all(void)
92 {
93 
94 }
95 
96 void
dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused,size_t chunk __unused,void * va __unused)97 dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
98     void *va __unused)
99 {
100 
101 }
102 
103 int
dumpsys_gen_write_aux_headers(struct dumperinfo * di)104 dumpsys_gen_write_aux_headers(struct dumperinfo *di)
105 {
106 
107 	return (0);
108 }
109 
110 int
dumpsys_buf_seek(struct dumperinfo * di,size_t sz)111 dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
112 {
113 	static uint8_t buf[DEV_BSIZE];
114 	size_t nbytes;
115 	int error;
116 
117 	bzero(buf, sizeof(buf));
118 
119 	while (sz > 0) {
120 		nbytes = MIN(sz, sizeof(buf));
121 
122 		error = dump_append(di, buf, nbytes);
123 		if (error)
124 			return (error);
125 		sz -= nbytes;
126 	}
127 
128 	return (0);
129 }
130 
131 int
dumpsys_buf_write(struct dumperinfo * di,char * ptr,size_t sz)132 dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
133 {
134 	size_t len;
135 	int error;
136 
137 	while (sz) {
138 		len = di->blocksize - fragsz;
139 		if (len > sz)
140 			len = sz;
141 		memcpy((char *)di->blockbuf + fragsz, ptr, len);
142 		fragsz += len;
143 		ptr += len;
144 		sz -= len;
145 		if (fragsz == di->blocksize) {
146 			error = dump_append(di, di->blockbuf, di->blocksize);
147 			if (error)
148 				return (error);
149 			fragsz = 0;
150 		}
151 	}
152 	return (0);
153 }
154 
155 int
dumpsys_buf_flush(struct dumperinfo * di)156 dumpsys_buf_flush(struct dumperinfo *di)
157 {
158 	int error;
159 
160 	if (fragsz == 0)
161 		return (0);
162 
163 	error = dump_append(di, di->blockbuf, di->blocksize);
164 	fragsz = 0;
165 	return (error);
166 }
167 
168 CTASSERT(PAGE_SHIFT < 20);
169 #define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
170 
171 int
dumpsys_cb_dumpdata(struct dump_pa * mdp,int seqnr,void * arg)172 dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
173 {
174 	struct dumperinfo *di = (struct dumperinfo*)arg;
175 	vm_paddr_t pa;
176 	void *va;
177 	uint64_t pgs;
178 	size_t counter, sz, chunk;
179 	int c, error;
180 	u_int maxdumppgs;
181 
182 	error = 0;	/* catch case in which chunk size is 0 */
183 	counter = 0;	/* Update twiddle every 16MB */
184 	va = NULL;
185 	pgs = mdp->pa_size / PAGE_SIZE;
186 	pa = mdp->pa_start;
187 	maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
188 	if (maxdumppgs == 0)	/* seatbelt */
189 		maxdumppgs = 1;
190 
191 	printf("  chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
192 	    (uintmax_t)pgs);
193 
194 	dumpsys_wbinv_all();
195 	while (pgs) {
196 		chunk = pgs;
197 		if (chunk > maxdumppgs)
198 			chunk = maxdumppgs;
199 		sz = chunk << PAGE_SHIFT;
200 		counter += sz;
201 		if (counter >> 24) {
202 			printf(" %ju", (uintmax_t)PG2MB(pgs));
203 			counter &= (1 << 24) - 1;
204 		}
205 
206 		dumpsys_map_chunk(pa, chunk, &va);
207 		wdog_kern_pat(WD_LASTVAL);
208 
209 		error = dump_append(di, va, sz);
210 		dumpsys_unmap_chunk(pa, chunk, va);
211 		if (error)
212 			break;
213 		pgs -= chunk;
214 		pa += sz;
215 
216 		/* Check for user abort. */
217 		c = cncheckc();
218 		if (c == 0x03)
219 			return (ECANCELED);
220 		if (c != -1)
221 			printf(" (CTRL-C to abort) ");
222 	}
223 	printf(" ... %s\n", (error) ? "fail" : "ok");
224 	return (error);
225 }
226 
227 int
dumpsys_foreach_chunk(dumpsys_callback_t cb,void * arg)228 dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
229 {
230 	struct dump_pa *mdp;
231 	int error, seqnr;
232 
233 	seqnr = 0;
234 	mdp = dumpsys_pa_next(NULL);
235 	while (mdp != NULL) {
236 		error = (*cb)(mdp, seqnr++, arg);
237 		if (error)
238 			return (-error);
239 		mdp = dumpsys_pa_next(mdp);
240 	}
241 	return (seqnr);
242 }
243 
244 static off_t fileofs;
245 
246 static int
cb_dumphdr(struct dump_pa * mdp,int seqnr,void * arg)247 cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
248 {
249 	struct dumperinfo *di = (struct dumperinfo*)arg;
250 	Elf_Phdr phdr;
251 	uint64_t size;
252 	int error;
253 
254 	size = mdp->pa_size;
255 	bzero(&phdr, sizeof(phdr));
256 	phdr.p_type = PT_LOAD;
257 	phdr.p_flags = PF_R;			/* XXX */
258 	phdr.p_offset = fileofs;
259 #ifdef __powerpc__
260 	phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
261 	phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
262 #else
263 	phdr.p_vaddr = mdp->pa_start;
264 	phdr.p_paddr = mdp->pa_start;
265 #endif
266 	phdr.p_filesz = size;
267 	phdr.p_memsz = size;
268 	phdr.p_align = PAGE_SIZE;
269 
270 	error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
271 	fileofs += phdr.p_filesz;
272 	return (error);
273 }
274 
275 static int
cb_size(struct dump_pa * mdp,int seqnr,void * arg)276 cb_size(struct dump_pa *mdp, int seqnr, void *arg)
277 {
278 	uint64_t *sz;
279 
280 	sz = (uint64_t *)arg;
281 	*sz += (uint64_t)mdp->pa_size;
282 	return (0);
283 }
284 
285 int
dumpsys_generic(struct dumperinfo * di)286 dumpsys_generic(struct dumperinfo *di)
287 {
288 	static struct kerneldumpheader kdh;
289 	Elf_Ehdr ehdr;
290 	uint64_t dumpsize;
291 	off_t hdrgap;
292 	size_t hdrsz;
293 	int error;
294 
295 #if MINIDUMP_PAGE_TRACKING == 1
296 	if (do_minidump)
297 		return (minidumpsys(di, false));
298 #endif
299 
300 	bzero(&ehdr, sizeof(ehdr));
301 	ehdr.e_ident[EI_MAG0] = ELFMAG0;
302 	ehdr.e_ident[EI_MAG1] = ELFMAG1;
303 	ehdr.e_ident[EI_MAG2] = ELFMAG2;
304 	ehdr.e_ident[EI_MAG3] = ELFMAG3;
305 	ehdr.e_ident[EI_CLASS] = ELF_CLASS;
306 #if BYTE_ORDER == LITTLE_ENDIAN
307 	ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
308 #else
309 	ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
310 #endif
311 	ehdr.e_ident[EI_VERSION] = EV_CURRENT;
312 	ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE;	/* XXX big picture? */
313 	ehdr.e_type = ET_CORE;
314 	ehdr.e_machine = EM_VALUE;
315 	ehdr.e_phoff = sizeof(ehdr);
316 	ehdr.e_flags = 0;
317 	ehdr.e_ehsize = sizeof(ehdr);
318 	ehdr.e_phentsize = sizeof(Elf_Phdr);
319 	ehdr.e_shentsize = sizeof(Elf_Shdr);
320 
321 	dumpsys_pa_init();
322 
323 	/* Calculate dump size. */
324 	dumpsize = 0L;
325 	ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
326 	    DUMPSYS_NUM_AUX_HDRS;
327 	hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
328 	fileofs = MD_ALIGN(hdrsz);
329 	dumpsize += fileofs;
330 	hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
331 
332 	dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION,
333 	    dumpsize);
334 
335 	error = dump_start(di, &kdh);
336 	if (error != 0)
337 		goto fail;
338 
339 	printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
340 	    ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
341 
342 	/* Dump ELF header */
343 	error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
344 	if (error)
345 		goto fail;
346 
347 	/* Dump program headers */
348 	error = dumpsys_foreach_chunk(cb_dumphdr, di);
349 	if (error < 0)
350 		goto fail;
351 	error = dumpsys_write_aux_headers(di);
352 	if (error < 0)
353 		goto fail;
354 	dumpsys_buf_flush(di);
355 
356 	/*
357 	 * All headers are written using blocked I/O, so we know the
358 	 * current offset is (still) block aligned. Skip the alignement
359 	 * in the file to have the segment contents aligned at page
360 	 * boundary.
361 	 */
362 	error = dumpsys_buf_seek(di, (size_t)hdrgap);
363 	if (error)
364 		goto fail;
365 
366 	/* Dump memory chunks. */
367 	error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
368 	if (error < 0)
369 		goto fail;
370 
371 	error = dump_finish(di, &kdh);
372 	if (error != 0)
373 		goto fail;
374 
375 	printf("\nDump complete\n");
376 	return (0);
377 
378  fail:
379 	if (error < 0)
380 		error = -error;
381 
382 	if (error == ECANCELED)
383 		printf("\nDump aborted\n");
384 	else if (error == E2BIG || error == ENOSPC)
385 		printf("\nDump failed. Partition too small.\n");
386 	else
387 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
388 	return (error);
389 }
390 
391 #if MINIDUMP_PAGE_TRACKING == 1
392 
393 /* Minidump progress bar */
394 static struct {
395 	const int min_per;
396 	const int max_per;
397 	bool visited;
398 } progress_track[10] = {
399 	{  0,  10, false},
400 	{ 10,  20, false},
401 	{ 20,  30, false},
402 	{ 30,  40, false},
403 	{ 40,  50, false},
404 	{ 50,  60, false},
405 	{ 60,  70, false},
406 	{ 70,  80, false},
407 	{ 80,  90, false},
408 	{ 90, 100, false}
409 };
410 
411 static uint64_t dumpsys_pb_size;
412 static uint64_t dumpsys_pb_remaining;
413 static uint64_t dumpsys_pb_check;
414 
415 /* Reset the progress bar for a dump of dumpsize. */
416 void
dumpsys_pb_init(uint64_t dumpsize)417 dumpsys_pb_init(uint64_t dumpsize)
418 {
419 	int i;
420 
421 	dumpsys_pb_size = dumpsys_pb_remaining = dumpsize;
422 	dumpsys_pb_check = 0;
423 
424 	for (i = 0; i < nitems(progress_track); i++)
425 		progress_track[i].visited = false;
426 }
427 
428 /*
429  * Update the progress according to the delta bytes that were written out.
430  * Check and print the progress percentage.
431  */
432 void
dumpsys_pb_progress(size_t delta)433 dumpsys_pb_progress(size_t delta)
434 {
435 	int sofar, i;
436 
437 	dumpsys_pb_remaining -= delta;
438 	dumpsys_pb_check += delta;
439 
440 	/*
441 	 * To save time while dumping, only loop through progress_track
442 	 * occasionally.
443 	 */
444 	if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0)
445 		return;
446 	else
447 		dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1;
448 
449 	sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size);
450 	for (i = 0; i < nitems(progress_track); i++) {
451 		if (sofar < progress_track[i].min_per ||
452 		    sofar > progress_track[i].max_per)
453 			continue;
454 		if (!progress_track[i].visited) {
455 			progress_track[i].visited = true;
456 			printf("..%d%%", sofar);
457 		}
458 		break;
459 	}
460 }
461 
462 int
minidumpsys(struct dumperinfo * di,bool livedump)463 minidumpsys(struct dumperinfo *di, bool livedump)
464 {
465 	struct minidumpstate state;
466 	struct msgbuf mb_copy;
467 	char *msg_ptr;
468 	size_t sz;
469 	int error;
470 
471 	if (livedump) {
472 		KASSERT(!dumping, ("live dump invoked from incorrect context"));
473 
474 		/*
475 		 * Before invoking cpu_minidumpsys() on the live system, we
476 		 * must snapshot some required global state: the message
477 		 * buffer, and the page dump bitset. They may be modified at
478 		 * any moment, so for the sake of the live dump it is best to
479 		 * have an unchanging snapshot to work with. Both are included
480 		 * as part of the dump and consumed by userspace tools.
481 		 *
482 		 * Other global state important to the minidump code is the
483 		 * dump_avail array and the kernel's page tables, but snapshots
484 		 * are not taken of these. For one, dump_avail[] is expected
485 		 * not to change after boot. Snapshotting the kernel page
486 		 * tables would involve an additional walk, so this is avoided
487 		 * too.
488 		 *
489 		 * This means live dumps are best effort, and the result may or
490 		 * may not be usable; there are no guarantees about the
491 		 * consistency of the dump's contents. Any of the following
492 		 * (and likely more) may affect the live dump:
493 		 *
494 		 *  - Data may be modified, freed, or remapped during the
495 		 *    course of the dump, such that the contents written out
496 		 *    are partially or entirely unrecognizable. This means
497 		 *    valid references may point to destroyed/mangled objects,
498 		 *    and vice versa.
499 		 *
500 		 *  - The dumped context of any threads that ran during the
501 		 *    dump process may be unreliable.
502 		 *
503 		 *  - The set of kernel page tables included in the dump likely
504 		 *    won't correspond exactly to the copy of the dump bitset.
505 		 *    This means some pages will be dumped without any way to
506 		 *    locate them, and some pages may not have been dumped
507 		 *    despite appearing as if they should.
508 		 */
509 		msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
510 		msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
511 		state.msgbufp = &mb_copy;
512 
513 		sz = BITSET_SIZE(vm_page_dump_pages);
514 		state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK);
515 		BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset);
516 	} else {
517 		KASSERT(dumping, ("minidump invoked outside of doadump()"));
518 
519 		/* Use the globals. */
520 		state.msgbufp = msgbufp;
521 		state.dump_bitset = vm_page_dump;
522 	}
523 
524 	error = cpu_minidumpsys(di, &state);
525 	if (livedump) {
526 		free(msg_ptr, M_TEMP);
527 		free(state.dump_bitset, M_TEMP);
528 	}
529 
530 	return (error);
531 }
532 #endif /* MINIDUMP_PAGE_TRACKING == 1 */
533