1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2006 Peter Wemm 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include "opt_pmap.h" 31 #include "opt_watchdog.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/conf.h> 36 #include <sys/cons.h> 37 #include <sys/kernel.h> 38 #include <sys/kerneldump.h> 39 #include <sys/msgbuf.h> 40 #include <sys/sysctl.h> 41 #include <sys/watchdog.h> 42 #include <sys/vmmeter.h> 43 #include <vm/vm.h> 44 #include <vm/vm_param.h> 45 #include <vm/vm_page.h> 46 #include <vm/vm_phys.h> 47 #include <vm/vm_dumpset.h> 48 #include <vm/pmap.h> 49 #include <machine/atomic.h> 50 #include <machine/elf.h> 51 #include <machine/md_var.h> 52 #include <machine/minidump.h> 53 #include <machine/vmparam.h> 54 55 CTASSERT(sizeof(struct kerneldumpheader) == 512); 56 57 static struct kerneldumpheader kdh; 58 59 /* Handle chunked writes. */ 60 static size_t fragsz; 61 static void *dump_va; 62 static size_t progress, dumpsize, wdog_next; 63 64 static int dump_retry_count = 5; 65 SYSCTL_INT(_machdep, OID_AUTO, dump_retry_count, CTLFLAG_RWTUN, 66 &dump_retry_count, 0, "Number of times dump has to retry before bailing out"); 67 68 static int 69 blk_flush(struct dumperinfo *di) 70 { 71 int error; 72 73 if (fragsz == 0) 74 return (0); 75 76 error = dump_append(di, dump_va, fragsz); 77 fragsz = 0; 78 return (error); 79 } 80 81 /* Pat the watchdog approximately every 128MB of the dump. */ 82 #define WDOG_DUMP_INTERVAL (128 * 1024 * 1024) 83 84 static int 85 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) 86 { 87 size_t len; 88 int error, i, c; 89 u_int maxdumpsz; 90 91 maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); 92 if (maxdumpsz == 0) /* seatbelt */ 93 maxdumpsz = PAGE_SIZE; 94 error = 0; 95 if ((sz % PAGE_SIZE) != 0) { 96 printf("size not page aligned\n"); 97 return (EINVAL); 98 } 99 if (ptr != NULL && pa != 0) { 100 printf("can't have both va and pa!\n"); 101 return (EINVAL); 102 } 103 if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { 104 printf("address not page aligned %p\n", ptr); 105 return (EINVAL); 106 } 107 if (ptr != NULL) { 108 /* If we're doing a virtual dump, flush any pre-existing pa pages */ 109 error = blk_flush(di); 110 if (error) 111 return (error); 112 } 113 while (sz) { 114 len = maxdumpsz - fragsz; 115 if (len > sz) 116 len = sz; 117 progress -= len; 118 119 dumpsys_pb_progress(len); 120 if (progress <= wdog_next) { 121 wdog_kern_pat(WD_LASTVAL); 122 if (wdog_next > WDOG_DUMP_INTERVAL) 123 wdog_next -= WDOG_DUMP_INTERVAL; 124 else 125 wdog_next = 0; 126 } 127 128 if (ptr) { 129 error = dump_append(di, ptr, len); 130 if (error) 131 return (error); 132 ptr += len; 133 sz -= len; 134 } else { 135 for (i = 0; i < len; i += PAGE_SIZE) 136 dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); 137 fragsz += len; 138 pa += len; 139 sz -= len; 140 if (fragsz == maxdumpsz) { 141 error = blk_flush(di); 142 if (error) 143 return (error); 144 } 145 } 146 147 /* Check for user abort. */ 148 c = cncheckc(); 149 if (c == 0x03) 150 return (ECANCELED); 151 if (c != -1) 152 printf(" (CTRL-C to abort) "); 153 } 154 155 return (0); 156 } 157 158 /* A fake page table page, to avoid having to handle both 4K and 2M pages */ 159 static pd_entry_t fakepd[NPDEPG]; 160 161 int 162 cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) 163 { 164 uint32_t pmapsize; 165 vm_offset_t va, kva_end; 166 int error; 167 uint64_t *pml4, *pdp, *pd, *pt, pa; 168 uint64_t pdpe, pde, pte; 169 int ii, j, k, n; 170 int retry_count; 171 struct minidumphdr mdhdr; 172 struct msgbuf *mbp; 173 174 retry_count = 0; 175 retry: 176 retry_count++; 177 178 /* Snapshot the KVA upper bound in case it grows. */ 179 kva_end = MAX(KERNBASE + nkpt * NBPDR, kernel_vm_end); 180 181 /* 182 * Walk the kernel page table pages, setting the active entries in the 183 * dump bitmap. 184 * 185 * NB: for a live dump, we may be racing with updates to the page 186 * tables, so care must be taken to read each entry only once. 187 */ 188 pmapsize = 0; 189 for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; ) { 190 /* 191 * We always write a page, even if it is zero. Each 192 * page written corresponds to 1GB of space 193 */ 194 pmapsize += PAGE_SIZE; 195 ii = pmap_pml4e_index(va); 196 pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 197 pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 198 pdpe = atomic_load_64(&pdp[pmap_pdpe_index(va)]); 199 if ((pdpe & PG_V) == 0) { 200 va += NBPDP; 201 continue; 202 } 203 204 /* 205 * 1GB page is represented as 512 2MB pages in a dump. 206 */ 207 if ((pdpe & PG_PS) != 0) { 208 va += NBPDP; 209 pa = pdpe & PG_PS_FRAME; 210 for (n = 0; n < NPDEPG * NPTEPG; n++) { 211 if (vm_phys_is_dumpable(pa)) 212 vm_page_dump_add(state->dump_bitset, 213 pa); 214 pa += PAGE_SIZE; 215 } 216 continue; 217 } 218 219 pd = (uint64_t *)PHYS_TO_DMAP(pdpe & PG_FRAME); 220 for (n = 0; n < NPDEPG; n++, va += NBPDR) { 221 pde = atomic_load_64(&pd[pmap_pde_index(va)]); 222 223 if ((pde & PG_V) == 0) 224 continue; 225 226 if ((pde & PG_PS) != 0) { 227 /* This is an entire 2M page. */ 228 pa = pde & PG_PS_FRAME; 229 for (k = 0; k < NPTEPG; k++) { 230 if (vm_phys_is_dumpable(pa)) 231 vm_page_dump_add( 232 state->dump_bitset, pa); 233 pa += PAGE_SIZE; 234 } 235 continue; 236 } 237 238 pa = pde & PG_FRAME; 239 /* set bit for this PTE page */ 240 if (vm_phys_is_dumpable(pa)) 241 vm_page_dump_add(state->dump_bitset, pa); 242 /* and for each valid page in this 2MB block */ 243 pt = (uint64_t *)PHYS_TO_DMAP(pde & PG_FRAME); 244 for (k = 0; k < NPTEPG; k++) { 245 pte = atomic_load_64(&pt[k]); 246 if ((pte & PG_V) == 0) 247 continue; 248 pa = pte & PG_FRAME; 249 if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) 250 vm_page_dump_add(state->dump_bitset, 251 pa); 252 } 253 } 254 } 255 256 /* Calculate dump size. */ 257 mbp = state->msgbufp; 258 dumpsize = pmapsize; 259 dumpsize += round_page(mbp->msg_size); 260 dumpsize += round_page(sizeof(dump_avail)); 261 dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); 262 VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { 263 /* Clear out undumpable pages now if needed */ 264 if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) { 265 dumpsize += PAGE_SIZE; 266 } else { 267 vm_page_dump_drop(state->dump_bitset, pa); 268 } 269 } 270 dumpsize += PAGE_SIZE; 271 272 wdog_next = progress = dumpsize; 273 dumpsys_pb_init(dumpsize); 274 275 /* Initialize mdhdr */ 276 bzero(&mdhdr, sizeof(mdhdr)); 277 strcpy(mdhdr.magic, MINIDUMP_MAGIC); 278 mdhdr.version = MINIDUMP_VERSION; 279 mdhdr.msgbufsize = mbp->msg_size; 280 mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); 281 mdhdr.pmapsize = pmapsize; 282 mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; 283 mdhdr.dmapbase = DMAP_MIN_ADDRESS; 284 mdhdr.dmapend = DMAP_MAX_ADDRESS; 285 mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); 286 287 dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, 288 dumpsize); 289 290 error = dump_start(di, &kdh); 291 if (error != 0) 292 goto fail; 293 294 printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, 295 ptoa((uintmax_t)physmem) / 1048576); 296 297 /* Dump my header */ 298 bzero(&fakepd, sizeof(fakepd)); 299 bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); 300 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 301 if (error) 302 goto fail; 303 304 /* Dump msgbuf up front */ 305 error = blk_write(di, mbp->msg_ptr, 0, round_page(mbp->msg_size)); 306 if (error) 307 goto fail; 308 309 /* Dump dump_avail */ 310 _Static_assert(sizeof(dump_avail) <= sizeof(fakepd), 311 "Large dump_avail not handled"); 312 bzero(&fakepd, sizeof(fakepd)); 313 memcpy(fakepd, dump_avail, sizeof(dump_avail)); 314 error = blk_write(di, (char *)fakepd, 0, PAGE_SIZE); 315 if (error) 316 goto fail; 317 318 /* Dump bitmap */ 319 error = blk_write(di, (char *)state->dump_bitset, 0, 320 round_page(BITSET_SIZE(vm_page_dump_pages))); 321 if (error) 322 goto fail; 323 324 /* Dump kernel page directory pages */ 325 bzero(fakepd, sizeof(fakepd)); 326 for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += NBPDP) { 327 ii = pmap_pml4e_index(va); 328 pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; 329 pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 330 pdpe = atomic_load_64(&pdp[pmap_pdpe_index(va)]); 331 332 /* We always write a page, even if it is zero */ 333 if ((pdpe & PG_V) == 0) { 334 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 335 if (error) 336 goto fail; 337 /* flush, in case we reuse fakepd in the same block */ 338 error = blk_flush(di); 339 if (error) 340 goto fail; 341 continue; 342 } 343 344 /* 1GB page is represented as 512 2MB pages in a dump */ 345 if ((pdpe & PG_PS) != 0) { 346 /* PDPE and PDP have identical layout in this case */ 347 fakepd[0] = pdpe; 348 for (j = 1; j < NPDEPG; j++) 349 fakepd[j] = fakepd[j - 1] + NBPDR; 350 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 351 if (error) 352 goto fail; 353 /* flush, in case we reuse fakepd in the same block */ 354 error = blk_flush(di); 355 if (error) 356 goto fail; 357 bzero(fakepd, sizeof(fakepd)); 358 continue; 359 } 360 361 pa = pdpe & PG_FRAME; 362 if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) { 363 pd = (uint64_t *)PHYS_TO_DMAP(pa); 364 error = blk_write(di, (char *)pd, 0, PAGE_SIZE); 365 } else { 366 /* Malformed pa, write the zeroed fakepd. */ 367 error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); 368 } 369 if (error) 370 goto fail; 371 error = blk_flush(di); 372 if (error) 373 goto fail; 374 } 375 376 /* Dump memory chunks */ 377 VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { 378 error = blk_write(di, 0, pa, PAGE_SIZE); 379 if (error) 380 goto fail; 381 } 382 383 error = blk_flush(di); 384 if (error) 385 goto fail; 386 387 error = dump_finish(di, &kdh); 388 if (error != 0) 389 goto fail; 390 391 printf("\nDump complete\n"); 392 return (0); 393 394 fail: 395 if (error < 0) 396 error = -error; 397 398 printf("\n"); 399 if (error == ENOSPC) { 400 printf("Dump map grown while dumping. "); 401 if (retry_count < dump_retry_count) { 402 printf("Retrying...\n"); 403 goto retry; 404 } 405 printf("Dump failed.\n"); 406 } 407 else if (error == ECANCELED) 408 printf("Dump aborted\n"); 409 else if (error == E2BIG) { 410 printf("Dump failed. Partition too small (about %lluMB were " 411 "needed this time).\n", (long long)dumpsize >> 20); 412 } else 413 printf("** DUMP FAILED (ERROR %d) **\n", error); 414 return (error); 415 } 416