xref: /titanic_44/usr/src/uts/common/os/dumpsubr.c (revision ba7b222e36bac28710a7f43739283302b617e7f5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/file.h>
33 #include <sys/conf.h>
34 #include <sys/kmem.h>
35 #include <sys/mem.h>
36 #include <sys/mman.h>
37 #include <sys/vnode.h>
38 #include <sys/errno.h>
39 #include <sys/memlist.h>
40 #include <sys/dumphdr.h>
41 #include <sys/dumpadm.h>
42 #include <sys/ksyms.h>
43 #include <sys/compress.h>
44 #include <sys/stream.h>
45 #include <sys/strsun.h>
46 #include <sys/cmn_err.h>
47 #include <sys/bitmap.h>
48 #include <sys/modctl.h>
49 #include <sys/utsname.h>
50 #include <sys/systeminfo.h>
51 #include <sys/vmem.h>
52 #include <sys/log.h>
53 #include <sys/var.h>
54 #include <sys/debug.h>
55 #include <sys/sunddi.h>
56 #include <fs/fs_subr.h>
57 #include <sys/fs/snode.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/dkio.h>
61 #include <sys/vtoc.h>
62 #include <sys/errorq.h>
63 #include <sys/fm/util.h>
64 #include <sys/fs/zfs.h>
65 
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/page.h>
69 #include <vm/seg.h>
70 #include <vm/seg_kmem.h>
71 
72 kmutex_t	dump_lock;	/* lock for dump configuration */
73 dumphdr_t	*dumphdr;	/* dump header */
74 int		dump_conflags = DUMP_KERNEL; /* dump configuration flags */
75 vnode_t		*dumpvp;	/* dump device vnode pointer */
76 u_offset_t	dumpvp_size;	/* size of dump device, in bytes */
77 static u_offset_t dumpvp_limit;	/* maximum write offset */
78 char		*dumppath;	/* pathname of dump device */
79 int		dump_timeout = 120; /* timeout for dumping page during panic */
80 int		dump_timeleft;	/* portion of dump_timeout remaining */
81 int		dump_ioerr;	/* dump i/o error */
82 
83 #ifdef DEBUG
84 int		dumpfaildebug = 1;	/* enter debugger if dump fails */
85 #else
86 int		dumpfaildebug = 0;
87 #endif
88 
89 static ulong_t	*dump_bitmap;	/* bitmap for marking pages to dump */
90 static pgcnt_t	dump_bitmapsize; /* size of bitmap */
91 static pid_t	*dump_pids;	/* list of process IDs at dump time */
92 static offset_t	dumpvp_off;	/* current dump device offset */
93 static char	*dump_cmap;	/* VA for dump compression mapping */
94 static char	*dumpbuf_cur, *dumpbuf_start, *dumpbuf_end;
95 static char	*dump_cbuf;	/* compression buffer */
96 static char	*dump_uebuf;	/* memory error detection buffer */
97 static size_t	dumpbuf_size;	/* size of dumpbuf in bytes */
98 static size_t	dumpbuf_limit = 1UL << 23;	/* 8MB */
99 static size_t	dump_iosize;	/* device's best transfer size, if any */
100 static uint64_t	dumpbuf_thresh = 1ULL << 30;	/* 1GB */
101 static ulong_t	dumpbuf_mult = 8;
102 
103 /*
104  * The dump i/o buffer must be at least one page, at most xfer_size bytes, and
105  * should scale with physmem in between.  The transfer size passed in will
106  * either represent a global default (maxphys) or the best size for the device.
107  * Once the physical memory size exceeds dumpbuf_thresh (1GB by default), we
108  * increase the percentage of physical memory that dumpbuf can consume by a
109  * factor of dumpbuf_mult (8 by default) to improve large memory performance.
110  * The size of the dumpbuf i/o buffer is limited by dumpbuf_limit (8MB by
111  * default) because the dump performance saturates beyond a certain size.
112  */
113 static size_t
114 dumpbuf_iosize(size_t xfer_size)
115 {
116 	pgcnt_t scale = physmem;
117 	size_t iosize;
118 
119 	if (scale >= dumpbuf_thresh / PAGESIZE) {
120 		scale *= dumpbuf_mult; /* increase scaling factor */
121 		iosize = MIN(xfer_size, scale) & PAGEMASK;
122 		if (dumpbuf_limit && iosize > dumpbuf_limit)
123 			iosize = MAX(PAGESIZE, dumpbuf_limit & PAGEMASK);
124 	} else
125 		iosize = MAX(PAGESIZE, MIN(xfer_size, scale) & PAGEMASK);
126 
127 	return (iosize);
128 }
129 
130 static void
131 dumpbuf_resize(void)
132 {
133 	char *old_buf = dumpbuf_start;
134 	size_t old_size = dumpbuf_size;
135 	char *new_buf;
136 	size_t new_size;
137 
138 	ASSERT(MUTEX_HELD(&dump_lock));
139 
140 	if ((new_size = dumpbuf_iosize(MAX(dump_iosize, maxphys))) <= old_size)
141 		return; /* no need to reallocate buffer */
142 
143 	new_buf = kmem_alloc(new_size, KM_SLEEP);
144 	dumpbuf_size = new_size;
145 	dumpbuf_start = new_buf;
146 	dumpbuf_end = new_buf + new_size;
147 	kmem_free(old_buf, old_size);
148 }
149 
150 static void
151 dumphdr_init(void)
152 {
153 	pgcnt_t npages = 0;
154 
155 	ASSERT(MUTEX_HELD(&dump_lock));
156 
157 	if (dumphdr == NULL) {
158 		dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP);
159 		dumphdr->dump_magic = DUMP_MAGIC;
160 		dumphdr->dump_version = DUMP_VERSION;
161 		dumphdr->dump_wordsize = DUMP_WORDSIZE;
162 		dumphdr->dump_pageshift = PAGESHIFT;
163 		dumphdr->dump_pagesize = PAGESIZE;
164 		dumphdr->dump_utsname = utsname;
165 		(void) strcpy(dumphdr->dump_platform, platform);
166 		dump_cmap = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
167 		dumpbuf_size = dumpbuf_iosize(maxphys);
168 		dumpbuf_start = kmem_alloc(dumpbuf_size, KM_SLEEP);
169 		dumpbuf_end = dumpbuf_start + dumpbuf_size;
170 		dump_cbuf = kmem_alloc(PAGESIZE, KM_SLEEP); /* compress buf */
171 		dump_uebuf = kmem_alloc(PAGESIZE, KM_SLEEP); /* UE buf */
172 		dump_pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP);
173 	}
174 
175 	npages = num_phys_pages();
176 
177 	if (dump_bitmapsize != npages) {
178 		void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP);
179 		kmem_free(dump_bitmap, BT_SIZEOFMAP(dump_bitmapsize));
180 		dump_bitmap = map;
181 		dump_bitmapsize = npages;
182 	}
183 }
184 
185 /*
186  * Establish a new dump device.
187  */
188 int
189 dumpinit(vnode_t *vp, char *name, int justchecking)
190 {
191 	vnode_t *cvp;
192 	vattr_t vattr;
193 	vnode_t *cdev_vp;
194 	int error = 0;
195 
196 	ASSERT(MUTEX_HELD(&dump_lock));
197 
198 	dumphdr_init();
199 
200 	cvp = common_specvp(vp);
201 	if (cvp == dumpvp)
202 		return (0);
203 
204 	/*
205 	 * Determine whether this is a plausible dump device.  We want either:
206 	 * (1) a real device that's not mounted and has a cb_dump routine, or
207 	 * (2) a swapfile on some filesystem that has a vop_dump routine.
208 	 */
209 	if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0)
210 		return (error);
211 
212 	vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV;
213 	if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) {
214 		if (vattr.va_type == VBLK || vattr.va_type == VCHR) {
215 			if (devopsp[getmajor(vattr.va_rdev)]->
216 			    devo_cb_ops->cb_dump == nodev)
217 				error = ENOTSUP;
218 			else if (vfs_devismounted(vattr.va_rdev))
219 				error = EBUSY;
220 			if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip),
221 			    ZFS_DRIVER) == 0 &&
222 			    IS_SWAPVP(common_specvp(cvp)))
223 					error = EBUSY;
224 		} else {
225 			if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) ||
226 			    !IS_SWAPVP(cvp))
227 				error = ENOTSUP;
228 		}
229 	}
230 
231 	if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE)
232 		error = ENOSPC;
233 
234 	if (error || justchecking) {
235 		(void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0,
236 		    kcred, NULL);
237 		return (error);
238 	}
239 
240 	VN_HOLD(cvp);
241 
242 	if (dumpvp != NULL)
243 		dumpfini();	/* unconfigure the old dump device */
244 
245 	dumpvp = cvp;
246 	dumpvp_size = vattr.va_size & -DUMP_OFFSET;
247 	dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP);
248 	(void) strcpy(dumppath, name);
249 	dump_iosize = 0;
250 
251 	/*
252 	 * If the dump device is a block device, attempt to open up the
253 	 * corresponding character device and determine its maximum transfer
254 	 * size.  We use this information to potentially resize dumpbuf to a
255 	 * larger and more optimal size for performing i/o to the dump device.
256 	 */
257 	if (cvp->v_type == VBLK &&
258 	    (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) {
259 		if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
260 			size_t blk_size;
261 			struct dk_cinfo dki;
262 			struct dk_minfo minf;
263 
264 			if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO,
265 			    (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL)
266 			    == 0 && minf.dki_lbsize != 0)
267 				blk_size = minf.dki_lbsize;
268 			else
269 				blk_size = DEV_BSIZE;
270 
271 			if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki,
272 			    FKIOCTL, kcred, NULL, NULL) == 0) {
273 				dump_iosize = dki.dki_maxtransfer * blk_size;
274 				dumpbuf_resize();
275 			}
276 			/*
277 			 * If we are working with a zvol then dumpify it
278 			 * if it's not being used as swap.
279 			 */
280 			if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) {
281 				if (IS_SWAPVP(common_specvp(cvp)))
282 					error = EBUSY;
283 				else if ((error = VOP_IOCTL(cdev_vp,
284 				    DKIOCDUMPINIT, NULL, FKIOCTL, kcred,
285 				    NULL, NULL)) != 0)
286 					dumpfini();
287 			}
288 
289 			(void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
290 			    kcred, NULL);
291 		}
292 
293 		VN_RELE(cdev_vp);
294 	}
295 
296 	cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20);
297 
298 	return (error);
299 }
300 
301 void
302 dumpfini(void)
303 {
304 	vattr_t vattr;
305 	boolean_t is_zfs = B_FALSE;
306 	vnode_t *cdev_vp;
307 	ASSERT(MUTEX_HELD(&dump_lock));
308 
309 	kmem_free(dumppath, strlen(dumppath) + 1);
310 
311 	/*
312 	 * Determine if we are using zvols for our dump device
313 	 */
314 	vattr.va_mask = AT_RDEV;
315 	if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) {
316 		is_zfs = (getmajor(vattr.va_rdev) ==
317 		    ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE;
318 	}
319 
320 	/*
321 	 * If we have a zvol dump device then we call into zfs so
322 	 * that it may have a chance to cleanup.
323 	 */
324 	if (is_zfs &&
325 	    (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) {
326 		if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
327 			(void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL,
328 			    kcred, NULL, NULL);
329 			(void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
330 			    kcred, NULL);
331 		}
332 		VN_RELE(cdev_vp);
333 	}
334 
335 	(void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL);
336 
337 	VN_RELE(dumpvp);
338 
339 	dumpvp = NULL;
340 	dumpvp_size = 0;
341 	dumppath = NULL;
342 }
343 
344 static pfn_t
345 dump_bitnum_to_pfn(pgcnt_t bitnum)
346 {
347 	struct memlist *mp;
348 
349 	for (mp = phys_install; mp != NULL; mp = mp->next) {
350 		if (bitnum < (mp->size >> PAGESHIFT))
351 			return ((mp->address >> PAGESHIFT) + bitnum);
352 		bitnum -= mp->size >> PAGESHIFT;
353 	}
354 	return (PFN_INVALID);
355 }
356 
357 static pgcnt_t
358 dump_pfn_to_bitnum(pfn_t pfn)
359 {
360 	struct memlist *mp;
361 	pgcnt_t bitnum = 0;
362 
363 	for (mp = phys_install; mp != NULL; mp = mp->next) {
364 		if (pfn >= (mp->address >> PAGESHIFT) &&
365 		    pfn < ((mp->address + mp->size) >> PAGESHIFT))
366 			return (bitnum + pfn - (mp->address >> PAGESHIFT));
367 		bitnum += mp->size >> PAGESHIFT;
368 	}
369 	return ((pgcnt_t)-1);
370 }
371 
372 static offset_t
373 dumpvp_flush(void)
374 {
375 	size_t size = P2ROUNDUP(dumpbuf_cur - dumpbuf_start, PAGESIZE);
376 	int err;
377 
378 	if (dumpvp_off + size > dumpvp_limit) {
379 		dump_ioerr = ENOSPC;
380 	} else if (size != 0) {
381 		if (panicstr)
382 			err = VOP_DUMP(dumpvp, dumpbuf_start,
383 			    lbtodb(dumpvp_off), btod(size), NULL);
384 		else
385 			err = vn_rdwr(UIO_WRITE, dumpvp, dumpbuf_start, size,
386 			    dumpvp_off, UIO_SYSSPACE, 0, dumpvp_limit,
387 			    kcred, 0);
388 		if (err && dump_ioerr == 0)
389 			dump_ioerr = err;
390 	}
391 	dumpvp_off += size;
392 	dumpbuf_cur = dumpbuf_start;
393 	dump_timeleft = dump_timeout;
394 	return (dumpvp_off);
395 }
396 
397 void
398 dumpvp_write(const void *va, size_t size)
399 {
400 	while (size != 0) {
401 		size_t len = MIN(size, dumpbuf_end - dumpbuf_cur);
402 		if (len == 0) {
403 			(void) dumpvp_flush();
404 		} else {
405 			bcopy(va, dumpbuf_cur, len);
406 			va = (char *)va + len;
407 			dumpbuf_cur += len;
408 			size -= len;
409 		}
410 	}
411 }
412 
413 /*ARGSUSED*/
414 static void
415 dumpvp_ksyms_write(const void *src, void *dst, size_t size)
416 {
417 	dumpvp_write(src, size);
418 }
419 
420 /*
421  * Mark 'pfn' in the bitmap and dump its translation table entry.
422  */
423 void
424 dump_addpage(struct as *as, void *va, pfn_t pfn)
425 {
426 	mem_vtop_t mem_vtop;
427 	pgcnt_t bitnum;
428 
429 	if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
430 		if (!BT_TEST(dump_bitmap, bitnum)) {
431 			dumphdr->dump_npages++;
432 			BT_SET(dump_bitmap, bitnum);
433 		}
434 		dumphdr->dump_nvtop++;
435 		mem_vtop.m_as = as;
436 		mem_vtop.m_va = va;
437 		mem_vtop.m_pfn = pfn;
438 		dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
439 	}
440 	dump_timeleft = dump_timeout;
441 }
442 
443 /*
444  * Mark 'pfn' in the bitmap
445  */
446 void
447 dump_page(pfn_t pfn)
448 {
449 	pgcnt_t bitnum;
450 
451 	if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
452 		if (!BT_TEST(dump_bitmap, bitnum)) {
453 			dumphdr->dump_npages++;
454 			BT_SET(dump_bitmap, bitnum);
455 		}
456 	}
457 	dump_timeleft = dump_timeout;
458 }
459 
460 /*
461  * Dump the <as, va, pfn> information for a given address space.
462  * SEGOP_DUMP() will call dump_addpage() for each page in the segment.
463  */
464 static void
465 dump_as(struct as *as)
466 {
467 	struct seg *seg;
468 
469 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
470 	for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
471 		if (seg->s_as != as)
472 			break;
473 		if (seg->s_ops == NULL)
474 			continue;
475 		SEGOP_DUMP(seg);
476 	}
477 	AS_LOCK_EXIT(as, &as->a_lock);
478 
479 	if (seg != NULL)
480 		cmn_err(CE_WARN, "invalid segment %p in address space %p",
481 		    (void *)seg, (void *)as);
482 }
483 
484 static int
485 dump_process(pid_t pid)
486 {
487 	proc_t *p = sprlock(pid);
488 
489 	if (p == NULL)
490 		return (-1);
491 	if (p->p_as != &kas) {
492 		mutex_exit(&p->p_lock);
493 		dump_as(p->p_as);
494 		mutex_enter(&p->p_lock);
495 	}
496 
497 	sprunlock(p);
498 
499 	return (0);
500 }
501 
502 void
503 dump_ereports(void)
504 {
505 	u_offset_t dumpvp_start;
506 	erpt_dump_t ed;
507 
508 	if (dumpvp == NULL || dumphdr == NULL)
509 		return;
510 
511 	dumpbuf_cur = dumpbuf_start;
512 	dumpvp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE);
513 	dumpvp_start = dumpvp_limit - DUMP_ERPTSIZE;
514 	dumpvp_off = dumpvp_start;
515 
516 	fm_ereport_dump();
517 	if (panicstr)
518 		errorq_dump();
519 
520 	bzero(&ed, sizeof (ed)); /* indicate end of ereports */
521 	dumpvp_write(&ed, sizeof (ed));
522 	(void) dumpvp_flush();
523 
524 	if (!panicstr) {
525 		(void) VOP_PUTPAGE(dumpvp, dumpvp_start,
526 		    (size_t)(dumpvp_off - dumpvp_start),
527 		    B_INVAL | B_FORCE, kcred, NULL);
528 	}
529 }
530 
531 void
532 dump_messages(void)
533 {
534 	log_dump_t ld;
535 	mblk_t *mctl, *mdata;
536 	queue_t *q, *qlast;
537 	u_offset_t dumpvp_start;
538 
539 	if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL)
540 		return;
541 
542 	dumpbuf_cur = dumpbuf_start;
543 	dumpvp_limit = dumpvp_size - DUMP_OFFSET;
544 	dumpvp_start = dumpvp_limit - DUMP_LOGSIZE;
545 	dumpvp_off = dumpvp_start;
546 
547 	qlast = NULL;
548 	do {
549 		for (q = log_consq; q->q_next != qlast; q = q->q_next)
550 			continue;
551 		for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) {
552 			dump_timeleft = dump_timeout;
553 			mdata = mctl->b_cont;
554 			ld.ld_magic = LOG_MAGIC;
555 			ld.ld_msgsize = MBLKL(mctl->b_cont);
556 			ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl));
557 			ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata));
558 			dumpvp_write(&ld, sizeof (ld));
559 			dumpvp_write(mctl->b_rptr, MBLKL(mctl));
560 			dumpvp_write(mdata->b_rptr, MBLKL(mdata));
561 		}
562 	} while ((qlast = q) != log_consq);
563 
564 	ld.ld_magic = 0;		/* indicate end of messages */
565 	dumpvp_write(&ld, sizeof (ld));
566 	(void) dumpvp_flush();
567 	if (!panicstr) {
568 		(void) VOP_PUTPAGE(dumpvp, dumpvp_start,
569 		    (size_t)(dumpvp_off - dumpvp_start),
570 		    B_INVAL | B_FORCE, kcred, NULL);
571 	}
572 }
573 
574 static void
575 dump_pagecopy(void *src, void *dst)
576 {
577 	long *wsrc = (long *)src;
578 	long *wdst = (long *)dst;
579 	const ulong_t ncopies = PAGESIZE / sizeof (long);
580 	volatile int w = 0;
581 	volatile int ueoff = -1;
582 	on_trap_data_t otd;
583 
584 	if (on_trap(&otd, OT_DATA_EC)) {
585 		if (ueoff == -1) {
586 			uint64_t pa;
587 
588 			ueoff = w * sizeof (long);
589 			pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, src))
590 			    + ueoff;
591 			cmn_err(CE_WARN, "memory error at PA 0x%08x.%08x",
592 			    (uint32_t)(pa >> 32), (uint32_t)pa);
593 		}
594 #ifdef _LP64
595 		wdst[w++] = 0xbadecc00badecc;
596 #else
597 		wdst[w++] = 0xbadecc;
598 #endif
599 	}
600 	while (w < ncopies) {
601 		wdst[w] = wsrc[w];
602 		w++;
603 	}
604 	no_trap();
605 }
606 
607 /*
608  * Dump the system.
609  */
610 void
611 dumpsys(void)
612 {
613 	pfn_t pfn;
614 	pgcnt_t bitnum;
615 	int npages = 0;
616 	int percent_done = 0;
617 	uint32_t csize;
618 	u_offset_t total_csize = 0;
619 	int compress_ratio;
620 	proc_t *p;
621 	pid_t npids, pidx;
622 	char *content;
623 
624 	if (dumpvp == NULL || dumphdr == NULL) {
625 		uprintf("skipping system dump - no dump device configured\n");
626 		return;
627 	}
628 	dumpbuf_cur = dumpbuf_start;
629 
630 	/*
631 	 * Calculate the starting block for dump.  If we're dumping on a
632 	 * swap device, start 1/5 of the way in; otherwise, start at the
633 	 * beginning.  And never use the first page -- it may be a disk label.
634 	 */
635 	if (dumpvp->v_flag & VISSWAP)
636 		dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
637 	else
638 		dumphdr->dump_start = DUMP_OFFSET;
639 
640 	dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE;
641 	dumphdr->dump_crashtime = gethrestime_sec();
642 	dumphdr->dump_npages = 0;
643 	dumphdr->dump_nvtop = 0;
644 	bzero(dump_bitmap, BT_SIZEOFMAP(dump_bitmapsize));
645 	dump_timeleft = dump_timeout;
646 
647 	if (panicstr) {
648 		dumphdr->dump_flags &= ~DF_LIVE;
649 		(void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
650 		(void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
651 		(void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
652 		    panicstr, panicargs);
653 	}
654 
655 	if (dump_conflags & DUMP_ALL)
656 		content = "all";
657 	else if (dump_conflags & DUMP_CURPROC)
658 		content = "kernel + curproc";
659 	else
660 		content = "kernel";
661 	uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
662 	    dumphdr->dump_start, content);
663 
664 	/*
665 	 * Leave room for the message and ereport save areas and terminal dump
666 	 * header.
667 	 */
668 	dumpvp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET - DUMP_ERPTSIZE;
669 
670 	/*
671 	 * Write out the symbol table.  It's no longer compressed,
672 	 * so its 'size' and 'csize' are equal.
673 	 */
674 	dumpvp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
675 	dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
676 	    ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
677 
678 	/*
679 	 * Write out the translation map.
680 	 */
681 	dumphdr->dump_map = dumpvp_flush();
682 	dump_as(&kas);
683 	dumphdr->dump_nvtop += dump_plat_addr();
684 
685 	/*
686 	 * call into hat, which may have unmapped pages that also need to
687 	 * be in the dump
688 	 */
689 	hat_dump();
690 
691 	if (dump_conflags & DUMP_ALL) {
692 		mutex_enter(&pidlock);
693 
694 		for (npids = 0, p = practive; p != NULL; p = p->p_next)
695 			dump_pids[npids++] = p->p_pid;
696 
697 		mutex_exit(&pidlock);
698 
699 		for (pidx = 0; pidx < npids; pidx++)
700 			(void) dump_process(dump_pids[pidx]);
701 
702 		for (bitnum = 0; bitnum < dump_bitmapsize; bitnum++) {
703 			dump_timeleft = dump_timeout;
704 			BT_SET(dump_bitmap, bitnum);
705 		}
706 		dumphdr->dump_npages = dump_bitmapsize;
707 		dumphdr->dump_flags |= DF_ALL;
708 
709 	} else if (dump_conflags & DUMP_CURPROC) {
710 		/*
711 		 * Determine which pid is to be dumped.  If we're panicking, we
712 		 * dump the process associated with panic_thread (if any).  If
713 		 * this is a live dump, we dump the process associated with
714 		 * curthread.
715 		 */
716 		npids = 0;
717 		if (panicstr) {
718 			if (panic_thread != NULL &&
719 			    panic_thread->t_procp != NULL &&
720 			    panic_thread->t_procp != &p0) {
721 				dump_pids[npids++] =
722 				    panic_thread->t_procp->p_pid;
723 			}
724 		} else {
725 			dump_pids[npids++] = curthread->t_procp->p_pid;
726 		}
727 
728 		if (npids && dump_process(dump_pids[0]) == 0)
729 			dumphdr->dump_flags |= DF_CURPROC;
730 		else
731 			dumphdr->dump_flags |= DF_KERNEL;
732 
733 	} else {
734 		dumphdr->dump_flags |= DF_KERNEL;
735 	}
736 
737 	dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1;
738 
739 	/*
740 	 * Write out the pfn table.
741 	 */
742 	dumphdr->dump_pfn = dumpvp_flush();
743 	for (bitnum = 0; bitnum < dump_bitmapsize; bitnum++) {
744 		dump_timeleft = dump_timeout;
745 		if (!BT_TEST(dump_bitmap, bitnum))
746 			continue;
747 		pfn = dump_bitnum_to_pfn(bitnum);
748 		ASSERT(pfn != PFN_INVALID);
749 		dumpvp_write(&pfn, sizeof (pfn_t));
750 	}
751 	dump_plat_pfn();
752 
753 	/*
754 	 * Write out all the pages.
755 	 */
756 	dumphdr->dump_data = dumpvp_flush();
757 	for (bitnum = 0; bitnum < dump_bitmapsize; bitnum++) {
758 		dump_timeleft = dump_timeout;
759 		if (!BT_TEST(dump_bitmap, bitnum))
760 			continue;
761 		pfn = dump_bitnum_to_pfn(bitnum);
762 		ASSERT(pfn != PFN_INVALID);
763 
764 		/*
765 		 * Map in page frame 'pfn', scan it for UE's while copying
766 		 * the data to dump_uebuf, unmap it, compress dump_uebuf into
767 		 * dump_cbuf, and write out dump_cbuf.  The UE check ensures
768 		 * that we don't lose the whole dump because of a latent UE.
769 		 */
770 		hat_devload(kas.a_hat, dump_cmap, PAGESIZE, pfn, PROT_READ,
771 		    HAT_LOAD_NOCONSIST);
772 		dump_pagecopy(dump_cmap, dump_uebuf);
773 		hat_unload(kas.a_hat, dump_cmap, PAGESIZE, HAT_UNLOAD);
774 		csize = (uint32_t)compress(dump_uebuf, dump_cbuf, PAGESIZE);
775 		dumpvp_write(&csize, sizeof (uint32_t));
776 		dumpvp_write(dump_cbuf, csize);
777 		if (dump_ioerr) {
778 			dumphdr->dump_flags &= ~DF_COMPLETE;
779 			dumphdr->dump_npages = npages;
780 			break;
781 		}
782 		total_csize += csize;
783 		if (++npages * 100LL / dumphdr->dump_npages > percent_done) {
784 			uprintf("^\r%3d%% done", ++percent_done);
785 			if (!panicstr)
786 				delay(1);	/* let the output be sent */
787 		}
788 	}
789 	dumphdr->dump_npages += dump_plat_data(dump_cbuf);
790 
791 	(void) dumpvp_flush();
792 
793 	/*
794 	 * Write out the initial and terminal dump headers.
795 	 */
796 	dumpvp_off = dumphdr->dump_start;
797 	dumpvp_write(dumphdr, sizeof (dumphdr_t));
798 	(void) dumpvp_flush();
799 
800 	dumpvp_limit = dumpvp_size;
801 	dumpvp_off = dumpvp_limit - DUMP_OFFSET;
802 	dumpvp_write(dumphdr, sizeof (dumphdr_t));
803 	(void) dumpvp_flush();
804 
805 	compress_ratio = (int)(100LL * npages / (btopr(total_csize + 1)));
806 
807 	uprintf("\r%3d%% done: %d pages dumped, compression ratio %d.%02d, ",
808 	    percent_done, npages, compress_ratio / 100, compress_ratio % 100);
809 
810 	if (dump_ioerr == 0) {
811 		uprintf("dump succeeded\n");
812 	} else {
813 		uprintf("dump failed: error %d\n", dump_ioerr);
814 		if (panicstr && dumpfaildebug)
815 			debug_enter("dump failed");
816 	}
817 
818 	/*
819 	 * Write out all undelivered messages.  This has to be the *last*
820 	 * thing we do because the dump process itself emits messages.
821 	 */
822 	if (panicstr) {
823 		dump_ereports();
824 		dump_messages();
825 	}
826 
827 	delay(2 * hz);	/* let people see the 'done' message */
828 	dump_timeleft = 0;
829 	dump_ioerr = 0;
830 }
831 
832 /*
833  * This function is called whenever the memory size, as represented
834  * by the phys_install list, changes.
835  */
836 void
837 dump_resize()
838 {
839 	mutex_enter(&dump_lock);
840 	dumphdr_init();
841 	dumpbuf_resize();
842 	mutex_exit(&dump_lock);
843 }
844 
845 /*
846  * This function allows for dynamic resizing of a dump area. It assumes that
847  * the underlying device has update its appropriate size(9P).
848  */
849 int
850 dumpvp_resize()
851 {
852 	int error;
853 	vattr_t vattr;
854 
855 	mutex_enter(&dump_lock);
856 	vattr.va_mask = AT_SIZE;
857 	if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
858 		mutex_exit(&dump_lock);
859 		return (error);
860 	}
861 
862 	if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
863 		mutex_exit(&dump_lock);
864 		return (ENOSPC);
865 	}
866 
867 	dumpvp_size = vattr.va_size & -DUMP_OFFSET;
868 	mutex_exit(&dump_lock);
869 	return (0);
870 }
871