1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Fill in and write out the cpr state file 31 * 1. Allocate and write headers, ELF and cpr dump header 32 * 2. Allocate bitmaps according to phys_install 33 * 3. Tag kernel pages into corresponding bitmap 34 * 4. Write bitmaps to state file 35 * 5. Write actual physical page data to state file 36 */ 37 38 #include <sys/types.h> 39 #include <sys/systm.h> 40 #include <sys/vm.h> 41 #include <sys/memlist.h> 42 #include <sys/kmem.h> 43 #include <sys/vnode.h> 44 #include <sys/fs/ufs_inode.h> 45 #include <sys/errno.h> 46 #include <sys/cmn_err.h> 47 #include <sys/debug.h> 48 #include <vm/page.h> 49 #include <vm/seg.h> 50 #include <vm/seg_kmem.h> 51 #include <vm/seg_kpm.h> 52 #include <vm/hat.h> 53 #include <sys/cpr.h> 54 #include <sys/conf.h> 55 #include <sys/ddi.h> 56 #include <sys/panic.h> 57 #include <sys/thread.h> 58 59 /* Local defines and variables */ 60 #define BTOb(bytes) ((bytes) << 3) /* Bytes to bits, log2(NBBY) */ 61 #define bTOB(bits) ((bits) >> 3) /* bits to Bytes, log2(NBBY) */ 62 63 static uint_t cpr_pages_tobe_dumped; 64 static uint_t cpr_regular_pgs_dumped; 65 66 static int cpr_dump_regular_pages(vnode_t *); 67 static int cpr_count_upages(int, bitfunc_t); 68 static int cpr_compress_and_write(vnode_t *, uint_t, pfn_t, pgcnt_t); 69 int cpr_flush_write(vnode_t *); 70 71 int cpr_contig_pages(vnode_t *, int); 72 73 void cpr_clear_bitmaps(); 74 75 extern size_t cpr_get_devsize(dev_t); 76 extern int i_cpr_dump_setup(vnode_t *); 77 extern int i_cpr_blockzero(char *, char **, int *, vnode_t *); 78 extern int cpr_test_mode; 79 80 ctrm_t cpr_term; 81 82 char *cpr_buf, *cpr_buf_end; 83 int cpr_buf_blocks; /* size of cpr_buf in blocks */ 84 size_t cpr_buf_size; /* size of cpr_buf in bytes */ 85 size_t cpr_bitmap_size; 86 int cpr_nbitmaps; 87 88 char *cpr_pagedata; /* page buffer for compression / tmp copy */ 89 size_t cpr_pagedata_size; /* page buffer size in bytes */ 90 91 static char *cpr_wptr; /* keep track of where to write to next */ 92 static int cpr_file_bn; /* cpr state-file block offset */ 93 static int cpr_disk_writes_ok; 94 static size_t cpr_dev_space = 0; 95 96 char cpr_pagecopy[CPR_MAXCONTIG * MMU_PAGESIZE]; 97 98 /* 99 * On some platforms bcopy may modify the thread structure 100 * during bcopy (eg, to prevent cpu migration). If the 101 * range we are currently writing out includes our own 102 * thread structure then it will be snapshotted by bcopy 103 * including those modified members - and the updates made 104 * on exit from bcopy will no longer be seen when we later 105 * restore the mid-bcopy kthread_t. So if the range we 106 * need to copy overlaps with our thread structure we will 107 * use a simple byte copy. 108 */ 109 void 110 cprbcopy(void *from, void *to, size_t bytes) 111 { 112 extern int curthreadremapped; 113 caddr_t kthrend; 114 115 kthrend = (caddr_t)curthread + sizeof (kthread_t) - 1; 116 if (curthreadremapped || (kthrend >= (caddr_t)from && 117 kthrend < (caddr_t)from + bytes + sizeof (kthread_t) - 1)) { 118 caddr_t src = from, dst = to; 119 120 while (bytes-- > 0) 121 *dst++ = *src++; 122 } else { 123 bcopy(from, to, bytes); 124 } 125 } 126 127 /* 128 * Allocate pages for buffers used in writing out the statefile 129 */ 130 static int 131 cpr_alloc_bufs(void) 132 { 133 char *allocerr = "Unable to allocate memory for cpr buffer"; 134 size_t size; 135 136 /* 137 * set the cpr write buffer size to at least the historic 138 * size (128k) or large enough to store the both the early 139 * set of statefile structures (well under 0x800) plus the 140 * bitmaps, and roundup to the next pagesize. 141 */ 142 size = PAGE_ROUNDUP(dbtob(4) + cpr_bitmap_size); 143 cpr_buf_size = MAX(size, CPRBUFSZ); 144 cpr_buf_blocks = btodb(cpr_buf_size); 145 cpr_buf = kmem_alloc(cpr_buf_size, KM_NOSLEEP); 146 if (cpr_buf == NULL) { 147 cpr_err(CE_WARN, allocerr); 148 return (ENOMEM); 149 } 150 cpr_buf_end = cpr_buf + cpr_buf_size; 151 152 cpr_pagedata_size = mmu_ptob(CPR_MAXCONTIG + 1); 153 cpr_pagedata = kmem_alloc(cpr_pagedata_size, KM_NOSLEEP); 154 if (cpr_pagedata == NULL) { 155 kmem_free(cpr_buf, cpr_buf_size); 156 cpr_buf = NULL; 157 cpr_err(CE_WARN, allocerr); 158 return (ENOMEM); 159 } 160 161 return (0); 162 } 163 164 165 /* 166 * Set bitmap size in bytes based on phys_install. 167 */ 168 void 169 cpr_set_bitmap_size(void) 170 { 171 struct memlist *pmem; 172 size_t size = 0; 173 174 memlist_read_lock(); 175 for (pmem = phys_install; pmem; pmem = pmem->next) 176 size += pmem->size; 177 memlist_read_unlock(); 178 cpr_bitmap_size = BITMAP_BYTES(size); 179 } 180 181 182 /* 183 * CPR dump header contains the following information: 184 * 1. header magic -- unique to cpr state file 185 * 2. kernel return pc & ppn for resume 186 * 3. current thread info 187 * 4. debug level and test mode 188 * 5. number of bitmaps allocated 189 * 6. number of page records 190 */ 191 static int 192 cpr_write_header(vnode_t *vp) 193 { 194 extern ushort_t cpr_mach_type; 195 struct cpr_dump_desc cdump; 196 pgcnt_t bitmap_pages; 197 pgcnt_t kpages, vpages, upages; 198 199 cdump.cdd_magic = (uint_t)CPR_DUMP_MAGIC; 200 cdump.cdd_version = CPR_VERSION; 201 cdump.cdd_machine = cpr_mach_type; 202 cdump.cdd_debug = cpr_debug; 203 cdump.cdd_test_mode = cpr_test_mode; 204 cdump.cdd_bitmaprec = cpr_nbitmaps; 205 206 cpr_clear_bitmaps(); 207 208 /* 209 * Remember how many pages we plan to save to statefile. 210 * This information will be used for sanity checks. 211 * Untag those pages that will not be saved to statefile. 212 */ 213 kpages = cpr_count_kpages(REGULAR_BITMAP, cpr_setbit); 214 vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit); 215 upages = cpr_count_upages(REGULAR_BITMAP, cpr_setbit); 216 cdump.cdd_dumppgsize = kpages - vpages + upages; 217 cpr_pages_tobe_dumped = cdump.cdd_dumppgsize; 218 DEBUG7(errp( 219 "\ncpr_write_header: kpages %ld - vpages %ld + upages %ld = %d\n", 220 kpages, vpages, upages, cdump.cdd_dumppgsize)); 221 222 /* 223 * Some pages contain volatile data (cpr_buf and storage area for 224 * sensitive kpages), which are no longer needed after the statefile 225 * is dumped to disk. We have already untagged them from regular 226 * bitmaps. Now tag them into the volatile bitmaps. The pages in 227 * volatile bitmaps will be claimed during resume, and the resumed 228 * kernel will free them. 229 */ 230 (void) cpr_count_volatile_pages(VOLATILE_BITMAP, cpr_setbit); 231 232 bitmap_pages = mmu_btopr(cpr_bitmap_size); 233 234 /* 235 * Export accurate statefile size for statefile allocation retry. 236 * statefile_size = all the headers + total pages + 237 * number of pages used by the bitmaps. 238 * Roundup will be done in the file allocation code. 239 */ 240 STAT->cs_nocomp_statefsz = sizeof (cdd_t) + sizeof (cmd_t) + 241 (sizeof (cbd_t) * cdump.cdd_bitmaprec) + 242 (sizeof (cpd_t) * cdump.cdd_dumppgsize) + 243 mmu_ptob(cdump.cdd_dumppgsize + bitmap_pages); 244 245 /* 246 * If the estimated statefile is not big enough, 247 * go retry now to save un-necessary operations. 248 */ 249 if (!(CPR->c_flags & C_COMPRESSING) && 250 (STAT->cs_nocomp_statefsz > STAT->cs_est_statefsz)) { 251 if (cpr_debug & (LEVEL1 | LEVEL7)) 252 errp("cpr_write_header: STAT->cs_nocomp_statefsz > " 253 "STAT->cs_est_statefsz\n"); 254 return (ENOSPC); 255 } 256 257 /* now write cpr dump descriptor */ 258 return (cpr_write(vp, (caddr_t)&cdump, sizeof (cdd_t))); 259 } 260 261 262 /* 263 * CPR dump tail record contains the following information: 264 * 1. header magic -- unique to cpr state file 265 * 2. all misc info that needs to be passed to cprboot or resumed kernel 266 */ 267 static int 268 cpr_write_terminator(vnode_t *vp) 269 { 270 cpr_term.magic = (uint_t)CPR_TERM_MAGIC; 271 cpr_term.va = (cpr_ptr)&cpr_term; 272 cpr_term.pfn = (cpr_ext)va_to_pfn(&cpr_term); 273 274 /* count the last one (flush) */ 275 cpr_term.real_statef_size = STAT->cs_real_statefsz + 276 btod(cpr_wptr - cpr_buf) * DEV_BSIZE; 277 278 DEBUG9(errp("cpr_dump: Real Statefile Size: %d\n", 279 STAT->cs_real_statefsz)); 280 281 cpr_tod_get(&cpr_term.tm_shutdown); 282 283 return (cpr_write(vp, (caddr_t)&cpr_term, sizeof (cpr_term))); 284 } 285 286 /* 287 * Write bitmap descriptor array, followed by merged bitmaps. 288 */ 289 static int 290 cpr_write_bitmap(vnode_t *vp) 291 { 292 char *rmap, *vmap, *dst, *tail; 293 size_t size, bytes; 294 cbd_t *dp; 295 int err; 296 297 dp = CPR->c_bmda; 298 if (err = cpr_write(vp, (caddr_t)dp, cpr_nbitmaps * sizeof (*dp))) 299 return (err); 300 301 /* 302 * merge regular and volatile bitmaps into tmp space 303 * and write to disk 304 */ 305 for (; dp->cbd_size; dp++) { 306 rmap = (char *)dp->cbd_reg_bitmap; 307 vmap = (char *)dp->cbd_vlt_bitmap; 308 for (size = dp->cbd_size; size; size -= bytes) { 309 bytes = min(size, sizeof (cpr_pagecopy)); 310 tail = &cpr_pagecopy[bytes]; 311 for (dst = cpr_pagecopy; dst < tail; dst++) 312 *dst = *rmap++ | *vmap++; 313 if (err = cpr_write(vp, cpr_pagecopy, bytes)) 314 break; 315 } 316 } 317 318 return (err); 319 } 320 321 322 static int 323 cpr_write_statefile(vnode_t *vp) 324 { 325 uint_t error = 0; 326 extern int i_cpr_check_pgs_dumped(); 327 void flush_windows(void); 328 pgcnt_t spages; 329 char *str; 330 331 flush_windows(); 332 333 /* 334 * to get an accurate view of kas, we need to untag sensitive 335 * pages *before* dumping them because the disk driver makes 336 * allocations and changes kas along the way. The remaining 337 * pages referenced in the bitmaps are dumped out later as 338 * regular kpages. 339 */ 340 str = "cpr_write_statefile:"; 341 spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_clrbit); 342 DEBUG7(errp("%s untag %ld sens pages\n", str, spages)); 343 344 /* 345 * now it's OK to call a driver that makes allocations 346 */ 347 cpr_disk_writes_ok = 1; 348 349 /* 350 * now write out the clean sensitive kpages 351 * according to the sensitive descriptors 352 */ 353 error = i_cpr_dump_sensitive_kpages(vp); 354 if (error) { 355 DEBUG7(errp("%s cpr_dump_sensitive_kpages() failed!\n", str)); 356 return (error); 357 } 358 359 /* 360 * cpr_dump_regular_pages() counts cpr_regular_pgs_dumped 361 */ 362 error = cpr_dump_regular_pages(vp); 363 if (error) { 364 DEBUG7(errp("%s cpr_dump_regular_pages() failed!\n", str)); 365 return (error); 366 } 367 368 /* 369 * sanity check to verify the right number of pages were dumped 370 */ 371 error = i_cpr_check_pgs_dumped(cpr_pages_tobe_dumped, 372 cpr_regular_pgs_dumped); 373 374 if (error) { 375 errp("\n%s page count mismatch!\n", str); 376 #ifdef DEBUG 377 if (cpr_test_mode) 378 debug_enter(NULL); 379 #endif 380 } 381 382 return (error); 383 } 384 385 386 /* 387 * creates the CPR state file, the following sections are 388 * written out in sequence: 389 * - writes the cpr dump header 390 * - writes the memory usage bitmaps 391 * - writes the platform dependent info 392 * - writes the remaining user pages 393 * - writes the kernel pages 394 */ 395 int 396 cpr_dump(vnode_t *vp) 397 { 398 int error; 399 400 if (cpr_buf == NULL) { 401 ASSERT(cpr_pagedata == NULL); 402 if (error = cpr_alloc_bufs()) 403 return (error); 404 } 405 /* point to top of internal buffer */ 406 cpr_wptr = cpr_buf; 407 408 /* initialize global variables used by the write operation */ 409 cpr_file_bn = cpr_statefile_offset(); 410 cpr_dev_space = 0; 411 412 /* allocate bitmaps */ 413 if (CPR->c_bmda == NULL) { 414 if (error = i_cpr_alloc_bitmaps()) { 415 cpr_err(CE_WARN, "cannot allocate bitmaps"); 416 return (error); 417 } 418 } 419 420 if (error = i_cpr_prom_pages(CPR_PROM_SAVE)) 421 return (error); 422 423 if (error = i_cpr_dump_setup(vp)) 424 return (error); 425 426 /* 427 * set internal cross checking; we dont want to call 428 * a disk driver that makes allocations until after 429 * sensitive pages are saved 430 */ 431 cpr_disk_writes_ok = 0; 432 433 /* 434 * 1253112: heap corruption due to memory allocation when dumpping 435 * statefile. 436 * Theoretically on Sun4u only the kernel data nucleus, kvalloc and 437 * kvseg segments can be contaminated should memory allocations happen 438 * during sddump, which is not supposed to happen after the system 439 * is quiesced. Let's call the kernel pages that tend to be affected 440 * 'sensitive kpages' here. To avoid saving inconsistent pages, we 441 * will allocate some storage space to save the clean sensitive pages 442 * aside before statefile dumping takes place. Since there may not be 443 * much memory left at this stage, the sensitive pages will be 444 * compressed before they are saved into the storage area. 445 */ 446 if (error = i_cpr_save_sensitive_kpages()) { 447 DEBUG7(errp("cpr_dump: save_sensitive_kpages failed!\n")); 448 return (error); 449 } 450 451 /* 452 * since all cpr allocations are done (space for sensitive kpages, 453 * bitmaps, cpr_buf), kas is stable, and now we can accurately 454 * count regular and sensitive kpages. 455 */ 456 if (error = cpr_write_header(vp)) { 457 DEBUG7(errp("cpr_dump: cpr_write_header() failed!\n")); 458 return (error); 459 } 460 461 if (error = i_cpr_write_machdep(vp)) 462 return (error); 463 464 if (error = i_cpr_blockzero(cpr_buf, &cpr_wptr, NULL, NULL)) 465 return (error); 466 467 if (error = cpr_write_bitmap(vp)) 468 return (error); 469 470 if (error = cpr_write_statefile(vp)) { 471 DEBUG7(errp("cpr_dump: cpr_write_statefile() failed!\n")); 472 return (error); 473 } 474 475 if (error = cpr_write_terminator(vp)) 476 return (error); 477 478 if (error = cpr_flush_write(vp)) 479 return (error); 480 481 if (error = i_cpr_blockzero(cpr_buf, &cpr_wptr, &cpr_file_bn, vp)) 482 return (error); 483 484 return (0); 485 } 486 487 488 /* 489 * cpr_walk() is called many 100x with a range within kvseg; 490 * a page-count from each range is accumulated at arg->pages. 491 */ 492 static void 493 cpr_walk(void *arg, void *base, size_t size) 494 { 495 struct cpr_walkinfo *cwip = arg; 496 497 cwip->pages += cpr_count_pages(base, size, 498 cwip->mapflag, cwip->bitfunc, DBG_DONTSHOWRANGE); 499 cwip->size += size; 500 cwip->ranges++; 501 } 502 503 504 /* 505 * faster scan of kvseg using vmem_walk() to visit 506 * allocated ranges. 507 */ 508 pgcnt_t 509 cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg) 510 { 511 struct cpr_walkinfo cwinfo; 512 513 bzero(&cwinfo, sizeof (cwinfo)); 514 cwinfo.mapflag = mapflag; 515 cwinfo.bitfunc = bitfunc; 516 517 vmem_walk(heap_arena, VMEM_ALLOC, cpr_walk, &cwinfo); 518 519 if (cpr_debug & LEVEL7) { 520 errp("walked %d sub-ranges, total pages %ld\n", 521 cwinfo.ranges, mmu_btop(cwinfo.size)); 522 cpr_show_range(seg->s_base, seg->s_size, 523 mapflag, bitfunc, cwinfo.pages); 524 } 525 526 return (cwinfo.pages); 527 } 528 529 530 /* 531 * cpr_walk_kpm() is called for every used area within the large 532 * segkpm virtual address window. A page-count is accumulated at 533 * arg->pages. 534 */ 535 static void 536 cpr_walk_kpm(void *arg, void *base, size_t size) 537 { 538 struct cpr_walkinfo *cwip = arg; 539 540 cwip->pages += cpr_count_pages(base, size, 541 cwip->mapflag, cwip->bitfunc, DBG_DONTSHOWRANGE); 542 cwip->size += size; 543 cwip->ranges++; 544 } 545 546 547 /* 548 * faster scan of segkpm using hat_kpm_walk() to visit only used ranges. 549 */ 550 /*ARGSUSED*/ 551 static pgcnt_t 552 cpr_scan_segkpm(int mapflag, bitfunc_t bitfunc, struct seg *seg) 553 { 554 struct cpr_walkinfo cwinfo; 555 556 if (kpm_enable == 0) 557 return (0); 558 559 bzero(&cwinfo, sizeof (cwinfo)); 560 cwinfo.mapflag = mapflag; 561 cwinfo.bitfunc = bitfunc; 562 hat_kpm_walk(cpr_walk_kpm, &cwinfo); 563 564 if (cpr_debug & LEVEL7) { 565 errp("walked %d sub-ranges, total pages %ld\n", 566 cwinfo.ranges, mmu_btop(cwinfo.size)); 567 cpr_show_range(segkpm->s_base, segkpm->s_size, 568 mapflag, bitfunc, cwinfo.pages); 569 } 570 571 return (cwinfo.pages); 572 } 573 574 575 /* 576 * Sparsely filled kernel segments are registered in kseg_table for 577 * easier lookup. See also block comment for cpr_count_seg_pages. 578 */ 579 580 #define KSEG_SEG_ADDR 0 /* address of struct seg */ 581 #define KSEG_PTR_ADDR 1 /* address of pointer to struct seg */ 582 583 typedef struct { 584 struct seg **st_seg; /* segment pointer or segment address */ 585 pgcnt_t (*st_fcn)(int, bitfunc_t, struct seg *); /* function to call */ 586 int st_addrtype; /* address type in st_seg */ 587 } ksegtbl_entry_t; 588 589 ksegtbl_entry_t kseg_table[] = { 590 {(struct seg **)&kvseg, cpr_scan_kvseg, KSEG_SEG_ADDR}, 591 {&segkpm, cpr_scan_segkpm, KSEG_PTR_ADDR}, 592 {NULL, 0, 0} 593 }; 594 595 596 /* 597 * Compare seg with each entry in kseg_table; when there is a match 598 * return the entry pointer, otherwise return NULL. 599 */ 600 static ksegtbl_entry_t * 601 cpr_sparse_seg_check(struct seg *seg) 602 { 603 ksegtbl_entry_t *ste = &kseg_table[0]; 604 struct seg *tseg; 605 606 for (; ste->st_seg; ste++) { 607 tseg = (ste->st_addrtype == KSEG_PTR_ADDR) ? 608 *ste->st_seg : (struct seg *)ste->st_seg; 609 if (seg == tseg) 610 return (ste); 611 } 612 613 return ((ksegtbl_entry_t *)NULL); 614 } 615 616 617 /* 618 * Count pages within each kernel segment; call cpr_sparse_seg_check() 619 * to find out whether a sparsely filled segment needs special 620 * treatment (e.g. kvseg). 621 * Todo: A "SEGOP_CPR" like SEGOP_DUMP should be introduced, the cpr 622 * module shouldn't need to know segment details like if it is 623 * sparsely filled or not (makes kseg_table obsolete). 624 */ 625 pgcnt_t 626 cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc) 627 { 628 struct seg *segp; 629 pgcnt_t pages; 630 ksegtbl_entry_t *ste; 631 632 pages = 0; 633 for (segp = AS_SEGFIRST(&kas); segp; segp = AS_SEGNEXT(&kas, segp)) { 634 if (ste = cpr_sparse_seg_check(segp)) { 635 pages += (ste->st_fcn)(mapflag, bitfunc, segp); 636 } else { 637 pages += cpr_count_pages(segp->s_base, 638 segp->s_size, mapflag, bitfunc, DBG_SHOWRANGE); 639 } 640 } 641 642 return (pages); 643 } 644 645 646 /* 647 * count kernel pages within kas and any special ranges 648 */ 649 pgcnt_t 650 cpr_count_kpages(int mapflag, bitfunc_t bitfunc) 651 { 652 pgcnt_t kas_cnt; 653 654 /* 655 * Some pages need to be taken care of differently. 656 * eg: panicbuf pages of sun4m are not in kas but they need 657 * to be saved. On sun4u, the physical pages of panicbuf are 658 * allocated via prom_retain(). 659 */ 660 kas_cnt = i_cpr_count_special_kpages(mapflag, bitfunc); 661 kas_cnt += cpr_count_seg_pages(mapflag, bitfunc); 662 663 DEBUG9(errp("cpr_count_kpages: kas_cnt=%d\n", kas_cnt)); 664 DEBUG7(errp("\ncpr_count_kpages: %ld pages, 0x%lx bytes\n", 665 kas_cnt, mmu_ptob(kas_cnt))); 666 return (kas_cnt); 667 } 668 669 670 /* 671 * Set a bit corresponding to the arg phys page number; 672 * returns 0 when the ppn is valid and the corresponding 673 * map bit was clear, otherwise returns 1. 674 */ 675 int 676 cpr_setbit(pfn_t ppn, int mapflag) 677 { 678 char *bitmap; 679 cbd_t *dp; 680 pfn_t rel; 681 int clr; 682 683 for (dp = CPR->c_bmda; dp->cbd_size; dp++) { 684 if (PPN_IN_RANGE(ppn, dp)) { 685 bitmap = DESC_TO_MAP(dp, mapflag); 686 rel = ppn - dp->cbd_spfn; 687 if ((clr = isclr(bitmap, rel)) != 0) 688 setbit(bitmap, rel); 689 return (clr == 0); 690 } 691 } 692 693 return (1); 694 } 695 696 697 /* 698 * Clear a bit corresponding to the arg phys page number. 699 */ 700 int 701 cpr_clrbit(pfn_t ppn, int mapflag) 702 { 703 char *bitmap; 704 cbd_t *dp; 705 pfn_t rel; 706 int set; 707 708 for (dp = CPR->c_bmda; dp->cbd_size; dp++) { 709 if (PPN_IN_RANGE(ppn, dp)) { 710 bitmap = DESC_TO_MAP(dp, mapflag); 711 rel = ppn - dp->cbd_spfn; 712 if ((set = isset(bitmap, rel)) != 0) 713 clrbit(bitmap, rel); 714 return (set == 0); 715 } 716 } 717 718 return (1); 719 } 720 721 722 /* ARGSUSED */ 723 int 724 cpr_nobit(pfn_t ppn, int mapflag) 725 { 726 return (0); 727 } 728 729 730 /* 731 * Lookup a bit corresponding to the arg phys page number. 732 */ 733 int 734 cpr_isset(pfn_t ppn, int mapflag) 735 { 736 char *bitmap; 737 cbd_t *dp; 738 pfn_t rel; 739 740 for (dp = CPR->c_bmda; dp->cbd_size; dp++) { 741 if (PPN_IN_RANGE(ppn, dp)) { 742 bitmap = DESC_TO_MAP(dp, mapflag); 743 rel = ppn - dp->cbd_spfn; 744 return (isset(bitmap, rel)); 745 } 746 } 747 748 return (0); 749 } 750 751 752 /* 753 * Go thru all pages and pick up any page not caught during the invalidation 754 * stage. This is also used to save pages with cow lock or phys page lock held 755 * (none zero p_lckcnt or p_cowcnt) 756 */ 757 static int 758 cpr_count_upages(int mapflag, bitfunc_t bitfunc) 759 { 760 page_t *pp, *page0; 761 pgcnt_t dcnt = 0, tcnt = 0; 762 pfn_t pfn; 763 764 page0 = pp = page_first(); 765 766 do { 767 #if defined(__sparc) 768 extern struct vnode prom_ppages; 769 if (pp->p_vnode == NULL || pp->p_vnode == &kvp || 770 pp->p_vnode == &prom_ppages || 771 PP_ISFREE(pp) && PP_ISAGED(pp)) 772 #else 773 if (pp->p_vnode == NULL || pp->p_vnode == &kvp || 774 PP_ISFREE(pp) && PP_ISAGED(pp)) 775 #endif /* __sparc */ 776 continue; 777 778 pfn = page_pptonum(pp); 779 if (pf_is_memory(pfn)) { 780 tcnt++; 781 if ((*bitfunc)(pfn, mapflag) == 0) 782 dcnt++; /* dirty count */ 783 } 784 } while ((pp = page_next(pp)) != page0); 785 786 STAT->cs_upage2statef = dcnt; 787 DEBUG9(errp("cpr_count_upages: dirty=%ld total=%ld\n", 788 dcnt, tcnt)); 789 DEBUG7(errp("cpr_count_upages: %ld pages, 0x%lx bytes\n", 790 dcnt, mmu_ptob(dcnt))); 791 return (dcnt); 792 } 793 794 795 /* 796 * try compressing pages based on cflag, 797 * and for DEBUG kernels, verify uncompressed data checksum; 798 * 799 * this routine replaces common code from 800 * i_cpr_compress_and_save() and cpr_compress_and_write() 801 */ 802 char * 803 cpr_compress_pages(cpd_t *dp, pgcnt_t pages, int cflag) 804 { 805 size_t nbytes, clen, len; 806 uint32_t test_sum; 807 char *datap; 808 809 nbytes = mmu_ptob(pages); 810 811 /* 812 * set length to the original uncompressed data size; 813 * always init cpd_flag to zero 814 */ 815 dp->cpd_length = nbytes; 816 dp->cpd_flag = 0; 817 818 #ifdef DEBUG 819 /* 820 * Make a copy of the uncompressed data so we can checksum it. 821 * Compress that copy so the checksum works at the other end 822 */ 823 cprbcopy(CPR->c_mapping_area, cpr_pagecopy, nbytes); 824 dp->cpd_usum = checksum32(cpr_pagecopy, nbytes); 825 dp->cpd_flag |= CPD_USUM; 826 datap = cpr_pagecopy; 827 #else 828 datap = CPR->c_mapping_area; 829 dp->cpd_usum = 0; 830 #endif 831 832 /* 833 * try compressing the raw data to cpr_pagedata; 834 * if there was a size reduction: record the new length, 835 * flag the compression, and point to the compressed data. 836 */ 837 dp->cpd_csum = 0; 838 if (cflag) { 839 clen = compress(datap, cpr_pagedata, nbytes); 840 if (clen < nbytes) { 841 dp->cpd_flag |= CPD_COMPRESS; 842 dp->cpd_length = clen; 843 datap = cpr_pagedata; 844 #ifdef DEBUG 845 dp->cpd_csum = checksum32(datap, clen); 846 dp->cpd_flag |= CPD_CSUM; 847 848 /* 849 * decompress the data back to a scratch area 850 * and compare the new checksum with the original 851 * checksum to verify the compression. 852 */ 853 bzero(cpr_pagecopy, sizeof (cpr_pagecopy)); 854 len = decompress(datap, cpr_pagecopy, 855 clen, sizeof (cpr_pagecopy)); 856 test_sum = checksum32(cpr_pagecopy, len); 857 ASSERT(test_sum == dp->cpd_usum); 858 #endif 859 } 860 } 861 862 return (datap); 863 } 864 865 866 /* 867 * 1. Prepare cpr page descriptor and write it to file 868 * 2. Compress page data and write it out 869 */ 870 static int 871 cpr_compress_and_write(vnode_t *vp, uint_t va, pfn_t pfn, pgcnt_t npg) 872 { 873 int error = 0; 874 char *datap; 875 cpd_t cpd; /* cpr page descriptor */ 876 extern void i_cpr_mapin(caddr_t, uint_t, pfn_t); 877 extern void i_cpr_mapout(caddr_t, uint_t); 878 879 i_cpr_mapin(CPR->c_mapping_area, npg, pfn); 880 881 DEBUG3(errp("mapped-in %d pages, vaddr 0x%p, pfn 0x%x\n", 882 npg, CPR->c_mapping_area, pfn)); 883 884 /* 885 * Fill cpr page descriptor. 886 */ 887 cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC; 888 cpd.cpd_pfn = pfn; 889 cpd.cpd_pages = npg; 890 891 STAT->cs_dumped_statefsz += mmu_ptob(npg); 892 893 datap = cpr_compress_pages(&cpd, npg, CPR->c_flags & C_COMPRESSING); 894 895 /* Write cpr page descriptor */ 896 error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd_t)); 897 898 /* Write compressed page data */ 899 error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length); 900 901 /* 902 * Unmap the pages for tlb and vac flushing 903 */ 904 i_cpr_mapout(CPR->c_mapping_area, npg); 905 906 if (error) { 907 DEBUG1(errp("cpr_compress_and_write: vp 0x%p va 0x%x ", 908 vp, va)); 909 DEBUG1(errp("pfn 0x%lx blk %d err %d\n", 910 pfn, cpr_file_bn, error)); 911 } else { 912 cpr_regular_pgs_dumped += npg; 913 } 914 915 return (error); 916 } 917 918 919 int 920 cpr_write(vnode_t *vp, caddr_t buffer, size_t size) 921 { 922 caddr_t fromp = buffer; 923 size_t bytes, wbytes; 924 int error; 925 926 if (cpr_dev_space == 0) { 927 if (vp->v_type == VBLK) { 928 cpr_dev_space = cpr_get_devsize(vp->v_rdev); 929 ASSERT(cpr_dev_space); 930 } else 931 cpr_dev_space = 1; /* not used in this case */ 932 } 933 934 /* 935 * break the write into multiple part if request is large, 936 * calculate count up to buf page boundary, then write it out. 937 * repeat until done. 938 */ 939 while (size) { 940 bytes = MIN(size, cpr_buf_end - cpr_wptr); 941 cprbcopy(fromp, cpr_wptr, bytes); 942 cpr_wptr += bytes; 943 fromp += bytes; 944 size -= bytes; 945 if (cpr_wptr < cpr_buf_end) 946 return (0); /* buffer not full yet */ 947 ASSERT(cpr_wptr == cpr_buf_end); 948 949 wbytes = dbtob(cpr_file_bn + cpr_buf_blocks); 950 if (vp->v_type == VBLK) { 951 if (wbytes > cpr_dev_space) 952 return (ENOSPC); 953 } else { 954 if (wbytes > VTOI(vp)->i_size) 955 return (ENOSPC); 956 } 957 958 DEBUG3(errp("cpr_write: frmp=%x wptr=%x cnt=%x...", 959 fromp, cpr_wptr, bytes)); 960 /* 961 * cross check, this should not happen! 962 */ 963 if (cpr_disk_writes_ok == 0) { 964 errp("cpr_write: disk write too early!\n"); 965 return (EINVAL); 966 } 967 968 do_polled_io = 1; 969 error = VOP_DUMP(vp, cpr_buf, cpr_file_bn, cpr_buf_blocks); 970 do_polled_io = 0; 971 DEBUG3(errp("done\n")); 972 973 STAT->cs_real_statefsz += cpr_buf_size; 974 975 if (error) { 976 cpr_err(CE_WARN, "cpr_write error %d", error); 977 return (error); 978 } 979 cpr_file_bn += cpr_buf_blocks; /* Increment block count */ 980 cpr_wptr = cpr_buf; /* back to top of buffer */ 981 } 982 return (0); 983 } 984 985 986 int 987 cpr_flush_write(vnode_t *vp) 988 { 989 int nblk; 990 int error; 991 992 /* 993 * Calculate remaining blocks in buffer, rounded up to nearest 994 * disk block 995 */ 996 nblk = btod(cpr_wptr - cpr_buf); 997 998 do_polled_io = 1; 999 error = VOP_DUMP(vp, (caddr_t)cpr_buf, cpr_file_bn, nblk); 1000 do_polled_io = 0; 1001 1002 cpr_file_bn += nblk; 1003 if (error) 1004 DEBUG2(errp("cpr_flush_write: error (%d)\n", error)); 1005 return (error); 1006 } 1007 1008 void 1009 cpr_clear_bitmaps(void) 1010 { 1011 cbd_t *dp; 1012 1013 for (dp = CPR->c_bmda; dp->cbd_size; dp++) { 1014 bzero((void *)dp->cbd_reg_bitmap, 1015 (size_t)dp->cbd_size * 2); 1016 } 1017 DEBUG7(errp("\ncleared reg and vlt bitmaps\n")); 1018 } 1019 1020 int 1021 cpr_contig_pages(vnode_t *vp, int flag) 1022 { 1023 int chunks = 0, error = 0; 1024 pgcnt_t i, j, totbit; 1025 pfn_t spfn; 1026 cbd_t *dp; 1027 uint_t spin_cnt = 0; 1028 extern int i_cpr_compress_and_save(); 1029 1030 for (dp = CPR->c_bmda; dp->cbd_size; dp++) { 1031 spfn = dp->cbd_spfn; 1032 totbit = BTOb(dp->cbd_size); 1033 i = 0; /* Beginning of bitmap */ 1034 j = 0; 1035 while (i < totbit) { 1036 while ((j < CPR_MAXCONTIG) && ((j + i) < totbit)) { 1037 if (isset((char *)dp->cbd_reg_bitmap, j+i)) 1038 j++; 1039 else /* not contiguous anymore */ 1040 break; 1041 } 1042 1043 if (j) { 1044 chunks++; 1045 if (flag == SAVE_TO_STORAGE) { 1046 error = i_cpr_compress_and_save( 1047 chunks, spfn + i, j); 1048 if (error) 1049 return (error); 1050 } else if (flag == WRITE_TO_STATEFILE) { 1051 error = cpr_compress_and_write(vp, 0, 1052 spfn + i, j); 1053 if (error) 1054 return (error); 1055 else { 1056 spin_cnt++; 1057 if ((spin_cnt & 0x5F) == 1) 1058 cpr_spinning_bar(); 1059 } 1060 } 1061 } 1062 1063 i += j; 1064 if (j != CPR_MAXCONTIG) { 1065 /* Stopped on a non-tagged page */ 1066 i++; 1067 } 1068 1069 j = 0; 1070 } 1071 } 1072 1073 if (flag == STORAGE_DESC_ALLOC) 1074 return (chunks); 1075 else 1076 return (0); 1077 } 1078 1079 1080 void 1081 cpr_show_range(caddr_t vaddr, size_t size, 1082 int mapflag, bitfunc_t bitfunc, pgcnt_t count) 1083 { 1084 char *action, *bname; 1085 1086 bname = (mapflag == REGULAR_BITMAP) ? "regular" : "volatile"; 1087 if (bitfunc == cpr_setbit) 1088 action = "tag"; 1089 else if (bitfunc == cpr_clrbit) 1090 action = "untag"; 1091 else 1092 action = "none"; 1093 errp("range (0x%p, 0x%p), %s bitmap, %s %ld\n", 1094 vaddr, vaddr + size, bname, action, count); 1095 } 1096 1097 1098 pgcnt_t 1099 cpr_count_pages(caddr_t sva, size_t size, 1100 int mapflag, bitfunc_t bitfunc, int showrange) 1101 { 1102 caddr_t va, eva; 1103 pfn_t pfn; 1104 pgcnt_t count = 0; 1105 1106 eva = sva + PAGE_ROUNDUP(size); 1107 for (va = sva; va < eva; va += MMU_PAGESIZE) { 1108 pfn = va_to_pfn(va); 1109 if (pfn != PFN_INVALID && pf_is_memory(pfn)) { 1110 if ((*bitfunc)(pfn, mapflag) == 0) 1111 count++; 1112 } 1113 } 1114 1115 if ((cpr_debug & LEVEL7) && showrange == DBG_SHOWRANGE) 1116 cpr_show_range(sva, size, mapflag, bitfunc, count); 1117 1118 return (count); 1119 } 1120 1121 1122 pgcnt_t 1123 cpr_count_volatile_pages(int mapflag, bitfunc_t bitfunc) 1124 { 1125 pgcnt_t count = 0; 1126 1127 if (cpr_buf) { 1128 count += cpr_count_pages(cpr_buf, cpr_buf_size, 1129 mapflag, bitfunc, DBG_SHOWRANGE); 1130 } 1131 if (cpr_pagedata) { 1132 count += cpr_count_pages(cpr_pagedata, cpr_pagedata_size, 1133 mapflag, bitfunc, DBG_SHOWRANGE); 1134 } 1135 count += i_cpr_count_storage_pages(mapflag, bitfunc); 1136 1137 DEBUG7(errp("cpr_count_vpages: %ld pages, 0x%lx bytes\n", 1138 count, mmu_ptob(count))); 1139 return (count); 1140 } 1141 1142 1143 static int 1144 cpr_dump_regular_pages(vnode_t *vp) 1145 { 1146 int error; 1147 1148 cpr_regular_pgs_dumped = 0; 1149 error = cpr_contig_pages(vp, WRITE_TO_STATEFILE); 1150 if (!error) 1151 DEBUG7(errp("cpr_dump_regular_pages() done.\n")); 1152 return (error); 1153 } 1154