/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * Fill in and write out the cpr state file * 1. Allocate and write headers, ELF and cpr dump header * 2. Allocate bitmaps according to phys_install * 3. Tag kernel pages into corresponding bitmap * 4. Write bitmaps to state file * 5. Write actual physical page data to state file */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Local defines and variables */ #define BTOb(bytes) ((bytes) << 3) /* Bytes to bits, log2(NBBY) */ #define bTOB(bits) ((bits) >> 3) /* bits to Bytes, log2(NBBY) */ #if defined(__sparc) static uint_t cpr_pages_tobe_dumped; static uint_t cpr_regular_pgs_dumped; static int cpr_dump_regular_pages(vnode_t *); static int cpr_count_upages(int, bitfunc_t); static int cpr_compress_and_write(vnode_t *, uint_t, pfn_t, pgcnt_t); #endif int cpr_flush_write(vnode_t *); int cpr_contig_pages(vnode_t *, int); void cpr_clear_bitmaps(); extern size_t cpr_get_devsize(dev_t); extern int i_cpr_dump_setup(vnode_t *); extern int i_cpr_blockzero(char *, char **, int *, vnode_t *); extern int cpr_test_mode; int cpr_setbit(pfn_t, int); int cpr_clrbit(pfn_t, int); ctrm_t cpr_term; char *cpr_buf, *cpr_buf_end; int cpr_buf_blocks; /* size of cpr_buf in blocks */ size_t cpr_buf_size; /* size of cpr_buf in bytes */ size_t cpr_bitmap_size; int cpr_nbitmaps; char *cpr_pagedata; /* page buffer for compression / tmp copy */ size_t cpr_pagedata_size; /* page buffer size in bytes */ #if defined(__sparc) static char *cpr_wptr; /* keep track of where to write to next */ static int cpr_file_bn; /* cpr state-file block offset */ static int cpr_disk_writes_ok; static size_t cpr_dev_space = 0; #endif char cpr_pagecopy[CPR_MAXCONTIG * MMU_PAGESIZE]; #if defined(__sparc) /* * On some platforms bcopy may modify the thread structure * during bcopy (eg, to prevent cpu migration). If the * range we are currently writing out includes our own * thread structure then it will be snapshotted by bcopy * including those modified members - and the updates made * on exit from bcopy will no longer be seen when we later * restore the mid-bcopy kthread_t. So if the range we * need to copy overlaps with our thread structure we will * use a simple byte copy. */ void cprbcopy(void *from, void *to, size_t bytes) { extern int curthreadremapped; caddr_t kthrend; kthrend = (caddr_t)curthread + sizeof (kthread_t) - 1; if (curthreadremapped || (kthrend >= (caddr_t)from && kthrend < (caddr_t)from + bytes + sizeof (kthread_t) - 1)) { caddr_t src = from, dst = to; while (bytes-- > 0) *dst++ = *src++; } else { bcopy(from, to, bytes); } } /* * Allocate pages for buffers used in writing out the statefile */ static int cpr_alloc_bufs(void) { char *allocerr = "Unable to allocate memory for cpr buffer"; size_t size; /* * set the cpr write buffer size to at least the historic * size (128k) or large enough to store the both the early * set of statefile structures (well under 0x800) plus the * bitmaps, and roundup to the next pagesize. */ size = PAGE_ROUNDUP(dbtob(4) + cpr_bitmap_size); cpr_buf_size = MAX(size, CPRBUFSZ); cpr_buf_blocks = btodb(cpr_buf_size); cpr_buf = kmem_alloc(cpr_buf_size, KM_NOSLEEP); if (cpr_buf == NULL) { cpr_err(CE_WARN, allocerr); return (ENOMEM); } cpr_buf_end = cpr_buf + cpr_buf_size; cpr_pagedata_size = mmu_ptob(CPR_MAXCONTIG + 1); cpr_pagedata = kmem_alloc(cpr_pagedata_size, KM_NOSLEEP); if (cpr_pagedata == NULL) { kmem_free(cpr_buf, cpr_buf_size); cpr_buf = NULL; cpr_err(CE_WARN, allocerr); return (ENOMEM); } return (0); } /* * Set bitmap size in bytes based on phys_install. */ void cpr_set_bitmap_size(void) { struct memlist *pmem; size_t size = 0; memlist_read_lock(); for (pmem = phys_install; pmem; pmem = pmem->next) size += pmem->size; memlist_read_unlock(); cpr_bitmap_size = BITMAP_BYTES(size); } /* * CPR dump header contains the following information: * 1. header magic -- unique to cpr state file * 2. kernel return pc & ppn for resume * 3. current thread info * 4. debug level and test mode * 5. number of bitmaps allocated * 6. number of page records */ static int cpr_write_header(vnode_t *vp) { extern ushort_t cpr_mach_type; struct cpr_dump_desc cdump; pgcnt_t bitmap_pages; pgcnt_t kpages, vpages, upages; pgcnt_t cpr_count_kpages(int mapflag, bitfunc_t bitfunc); cdump.cdd_magic = (uint_t)CPR_DUMP_MAGIC; cdump.cdd_version = CPR_VERSION; cdump.cdd_machine = cpr_mach_type; cdump.cdd_debug = cpr_debug; cdump.cdd_test_mode = cpr_test_mode; cdump.cdd_bitmaprec = cpr_nbitmaps; cpr_clear_bitmaps(); /* * Remember how many pages we plan to save to statefile. * This information will be used for sanity checks. * Untag those pages that will not be saved to statefile. */ kpages = cpr_count_kpages(REGULAR_BITMAP, cpr_setbit); vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit); upages = cpr_count_upages(REGULAR_BITMAP, cpr_setbit); cdump.cdd_dumppgsize = kpages - vpages + upages; cpr_pages_tobe_dumped = cdump.cdd_dumppgsize; CPR_DEBUG(CPR_DEBUG7, "\ncpr_write_header: kpages %ld - vpages %ld + upages %ld = %d\n", kpages, vpages, upages, cdump.cdd_dumppgsize); /* * Some pages contain volatile data (cpr_buf and storage area for * sensitive kpages), which are no longer needed after the statefile * is dumped to disk. We have already untagged them from regular * bitmaps. Now tag them into the volatile bitmaps. The pages in * volatile bitmaps will be claimed during resume, and the resumed * kernel will free them. */ (void) cpr_count_volatile_pages(VOLATILE_BITMAP, cpr_setbit); bitmap_pages = mmu_btopr(cpr_bitmap_size); /* * Export accurate statefile size for statefile allocation retry. * statefile_size = all the headers + total pages + * number of pages used by the bitmaps. * Roundup will be done in the file allocation code. */ STAT->cs_nocomp_statefsz = sizeof (cdd_t) + sizeof (cmd_t) + (sizeof (cbd_t) * cdump.cdd_bitmaprec) + (sizeof (cpd_t) * cdump.cdd_dumppgsize) + mmu_ptob(cdump.cdd_dumppgsize + bitmap_pages); /* * If the estimated statefile is not big enough, * go retry now to save un-necessary operations. */ if (!(CPR->c_flags & C_COMPRESSING) && (STAT->cs_nocomp_statefsz > STAT->cs_est_statefsz)) { if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7)) prom_printf("cpr_write_header: " "STAT->cs_nocomp_statefsz > " "STAT->cs_est_statefsz\n"); return (ENOSPC); } /* now write cpr dump descriptor */ return (cpr_write(vp, (caddr_t)&cdump, sizeof (cdd_t))); } /* * CPR dump tail record contains the following information: * 1. header magic -- unique to cpr state file * 2. all misc info that needs to be passed to cprboot or resumed kernel */ static int cpr_write_terminator(vnode_t *vp) { cpr_term.magic = (uint_t)CPR_TERM_MAGIC; cpr_term.va = (cpr_ptr)&cpr_term; cpr_term.pfn = (cpr_ext)va_to_pfn(&cpr_term); /* count the last one (flush) */ cpr_term.real_statef_size = STAT->cs_real_statefsz + btod(cpr_wptr - cpr_buf) * DEV_BSIZE; CPR_DEBUG(CPR_DEBUG9, "cpr_dump: Real Statefile Size: %ld\n", STAT->cs_real_statefsz); cpr_tod_get(&cpr_term.tm_shutdown); return (cpr_write(vp, (caddr_t)&cpr_term, sizeof (cpr_term))); } /* * Write bitmap descriptor array, followed by merged bitmaps. */ static int cpr_write_bitmap(vnode_t *vp) { char *rmap, *vmap, *dst, *tail; size_t size, bytes; cbd_t *dp; int err; dp = CPR->c_bmda; if (err = cpr_write(vp, (caddr_t)dp, cpr_nbitmaps * sizeof (*dp))) return (err); /* * merge regular and volatile bitmaps into tmp space * and write to disk */ for (; dp->cbd_size; dp++) { rmap = (char *)dp->cbd_reg_bitmap; vmap = (char *)dp->cbd_vlt_bitmap; for (size = dp->cbd_size; size; size -= bytes) { bytes = min(size, sizeof (cpr_pagecopy)); tail = &cpr_pagecopy[bytes]; for (dst = cpr_pagecopy; dst < tail; dst++) *dst = *rmap++ | *vmap++; if (err = cpr_write(vp, cpr_pagecopy, bytes)) break; } } return (err); } static int cpr_write_statefile(vnode_t *vp) { uint_t error = 0; extern int i_cpr_check_pgs_dumped(); void flush_windows(void); pgcnt_t spages; char *str; flush_windows(); /* * to get an accurate view of kas, we need to untag sensitive * pages *before* dumping them because the disk driver makes * allocations and changes kas along the way. The remaining * pages referenced in the bitmaps are dumped out later as * regular kpages. */ str = "cpr_write_statefile:"; spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_clrbit); CPR_DEBUG(CPR_DEBUG7, "%s untag %ld sens pages\n", str, spages); /* * now it's OK to call a driver that makes allocations */ cpr_disk_writes_ok = 1; /* * now write out the clean sensitive kpages * according to the sensitive descriptors */ error = i_cpr_dump_sensitive_kpages(vp); if (error) { CPR_DEBUG(CPR_DEBUG7, "%s cpr_dump_sensitive_kpages() failed!\n", str); return (error); } /* * cpr_dump_regular_pages() counts cpr_regular_pgs_dumped */ error = cpr_dump_regular_pages(vp); if (error) { CPR_DEBUG(CPR_DEBUG7, "%s cpr_dump_regular_pages() failed!\n", str); return (error); } /* * sanity check to verify the right number of pages were dumped */ error = i_cpr_check_pgs_dumped(cpr_pages_tobe_dumped, cpr_regular_pgs_dumped); if (error) { prom_printf("\n%s page count mismatch!\n", str); #ifdef DEBUG if (cpr_test_mode) debug_enter(NULL); #endif } return (error); } #endif /* * creates the CPR state file, the following sections are * written out in sequence: * - writes the cpr dump header * - writes the memory usage bitmaps * - writes the platform dependent info * - writes the remaining user pages * - writes the kernel pages */ #if defined(__x86) _NOTE(ARGSUSED(0)) #endif int cpr_dump(vnode_t *vp) { #if defined(__sparc) int error; if (cpr_buf == NULL) { ASSERT(cpr_pagedata == NULL); if (error = cpr_alloc_bufs()) return (error); } /* point to top of internal buffer */ cpr_wptr = cpr_buf; /* initialize global variables used by the write operation */ cpr_file_bn = cpr_statefile_offset(); cpr_dev_space = 0; /* allocate bitmaps */ if (CPR->c_bmda == NULL) { if (error = i_cpr_alloc_bitmaps()) { cpr_err(CE_WARN, "cannot allocate bitmaps"); return (error); } } if (error = i_cpr_prom_pages(CPR_PROM_SAVE)) return (error); if (error = i_cpr_dump_setup(vp)) return (error); /* * set internal cross checking; we dont want to call * a disk driver that makes allocations until after * sensitive pages are saved */ cpr_disk_writes_ok = 0; /* * 1253112: heap corruption due to memory allocation when dumpping * statefile. * Theoretically on Sun4u only the kernel data nucleus, kvalloc and * kvseg segments can be contaminated should memory allocations happen * during sddump, which is not supposed to happen after the system * is quiesced. Let's call the kernel pages that tend to be affected * 'sensitive kpages' here. To avoid saving inconsistent pages, we * will allocate some storage space to save the clean sensitive pages * aside before statefile dumping takes place. Since there may not be * much memory left at this stage, the sensitive pages will be * compressed before they are saved into the storage area. */ if (error = i_cpr_save_sensitive_kpages()) { CPR_DEBUG(CPR_DEBUG7, "cpr_dump: save_sensitive_kpages failed!\n"); return (error); } /* * since all cpr allocations are done (space for sensitive kpages, * bitmaps, cpr_buf), kas is stable, and now we can accurately * count regular and sensitive kpages. */ if (error = cpr_write_header(vp)) { CPR_DEBUG(CPR_DEBUG7, "cpr_dump: cpr_write_header() failed!\n"); return (error); } if (error = i_cpr_write_machdep(vp)) return (error); if (error = i_cpr_blockzero(cpr_buf, &cpr_wptr, NULL, NULL)) return (error); if (error = cpr_write_bitmap(vp)) return (error); if (error = cpr_write_statefile(vp)) { CPR_DEBUG(CPR_DEBUG7, "cpr_dump: cpr_write_statefile() failed!\n"); return (error); } if (error = cpr_write_terminator(vp)) return (error); if (error = cpr_flush_write(vp)) return (error); if (error = i_cpr_blockzero(cpr_buf, &cpr_wptr, &cpr_file_bn, vp)) return (error); #endif return (0); } #if defined(__sparc) /* * cpr_xwalk() is called many 100x with a range within kvseg or kvseg_reloc; * a page-count from each range is accumulated at arg->pages. */ static void cpr_xwalk(void *arg, void *base, size_t size) { struct cpr_walkinfo *cwip = arg; cwip->pages += cpr_count_pages(base, size, cwip->mapflag, cwip->bitfunc, DBG_DONTSHOWRANGE); cwip->size += size; cwip->ranges++; } /* * cpr_walk() is called many 100x with a range within kvseg or kvseg_reloc; * a page-count from each range is accumulated at arg->pages. */ static void cpr_walk(void *arg, void *base, size_t size) { caddr_t addr = base; caddr_t addr_end = addr + size; /* * If we are about to start walking the range of addresses we * carved out of the kernel heap for the large page heap walk * heap_lp_arena to find what segments are actually populated */ if (SEGKMEM_USE_LARGEPAGES && addr == heap_lp_base && addr_end == heap_lp_end && vmem_size(heap_lp_arena, VMEM_ALLOC) < size) { vmem_walk(heap_lp_arena, VMEM_ALLOC, cpr_xwalk, arg); } else { cpr_xwalk(arg, base, size); } } /* * faster scan of kvseg using vmem_walk() to visit * allocated ranges. */ pgcnt_t cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg) { struct cpr_walkinfo cwinfo; bzero(&cwinfo, sizeof (cwinfo)); cwinfo.mapflag = mapflag; cwinfo.bitfunc = bitfunc; vmem_walk(heap_arena, VMEM_ALLOC, cpr_walk, &cwinfo); if (cpr_debug & CPR_DEBUG7) { prom_printf("walked %d sub-ranges, total pages %ld\n", cwinfo.ranges, mmu_btop(cwinfo.size)); cpr_show_range(seg->s_base, seg->s_size, mapflag, bitfunc, cwinfo.pages); } return (cwinfo.pages); } /* * cpr_walk_kpm() is called for every used area within the large * segkpm virtual address window. A page-count is accumulated at * arg->pages. */ static void cpr_walk_kpm(void *arg, void *base, size_t size) { struct cpr_walkinfo *cwip = arg; cwip->pages += cpr_count_pages(base, size, cwip->mapflag, cwip->bitfunc, DBG_DONTSHOWRANGE); cwip->size += size; cwip->ranges++; } /* * faster scan of segkpm using hat_kpm_walk() to visit only used ranges. */ /*ARGSUSED*/ static pgcnt_t cpr_scan_segkpm(int mapflag, bitfunc_t bitfunc, struct seg *seg) { struct cpr_walkinfo cwinfo; if (kpm_enable == 0) return (0); bzero(&cwinfo, sizeof (cwinfo)); cwinfo.mapflag = mapflag; cwinfo.bitfunc = bitfunc; hat_kpm_walk(cpr_walk_kpm, &cwinfo); if (cpr_debug & CPR_DEBUG7) { prom_printf("walked %d sub-ranges, total pages %ld\n", cwinfo.ranges, mmu_btop(cwinfo.size)); cpr_show_range(segkpm->s_base, segkpm->s_size, mapflag, bitfunc, cwinfo.pages); } return (cwinfo.pages); } /* * Sparsely filled kernel segments are registered in kseg_table for * easier lookup. See also block comment for cpr_count_seg_pages. */ #define KSEG_SEG_ADDR 0 /* address of struct seg */ #define KSEG_PTR_ADDR 1 /* address of pointer to struct seg */ typedef struct { struct seg **st_seg; /* segment pointer or segment address */ pgcnt_t (*st_fcn)(int, bitfunc_t, struct seg *); /* function to call */ int st_addrtype; /* address type in st_seg */ } ksegtbl_entry_t; ksegtbl_entry_t kseg_table[] = { {(struct seg **)&kvseg, cpr_scan_kvseg, KSEG_SEG_ADDR}, {&segkpm, cpr_scan_segkpm, KSEG_PTR_ADDR}, {NULL, 0, 0} }; /* * Compare seg with each entry in kseg_table; when there is a match * return the entry pointer, otherwise return NULL. */ static ksegtbl_entry_t * cpr_sparse_seg_check(struct seg *seg) { ksegtbl_entry_t *ste = &kseg_table[0]; struct seg *tseg; for (; ste->st_seg; ste++) { tseg = (ste->st_addrtype == KSEG_PTR_ADDR) ? *ste->st_seg : (struct seg *)ste->st_seg; if (seg == tseg) return (ste); } return ((ksegtbl_entry_t *)NULL); } /* * Count pages within each kernel segment; call cpr_sparse_seg_check() * to find out whether a sparsely filled segment needs special * treatment (e.g. kvseg). * Todo: A "SEGOP_CPR" like SEGOP_DUMP should be introduced, the cpr * module shouldn't need to know segment details like if it is * sparsely filled or not (makes kseg_table obsolete). */ pgcnt_t cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc) { struct seg *segp; pgcnt_t pages; ksegtbl_entry_t *ste; pages = 0; for (segp = AS_SEGFIRST(&kas); segp; segp = AS_SEGNEXT(&kas, segp)) { if (ste = cpr_sparse_seg_check(segp)) { pages += (ste->st_fcn)(mapflag, bitfunc, segp); } else { pages += cpr_count_pages(segp->s_base, segp->s_size, mapflag, bitfunc, DBG_SHOWRANGE); } } return (pages); } /* * count kernel pages within kas and any special ranges */ pgcnt_t cpr_count_kpages(int mapflag, bitfunc_t bitfunc) { pgcnt_t kas_cnt; /* * Some pages need to be taken care of differently. * eg: panicbuf pages of sun4m are not in kas but they need * to be saved. On sun4u, the physical pages of panicbuf are * allocated via prom_retain(). */ kas_cnt = i_cpr_count_special_kpages(mapflag, bitfunc); kas_cnt += cpr_count_seg_pages(mapflag, bitfunc); CPR_DEBUG(CPR_DEBUG9, "cpr_count_kpages: kas_cnt=%ld\n", kas_cnt); CPR_DEBUG(CPR_DEBUG7, "\ncpr_count_kpages: %ld pages, 0x%lx bytes\n", kas_cnt, mmu_ptob(kas_cnt)); return (kas_cnt); } /* * Set a bit corresponding to the arg phys page number; * returns 0 when the ppn is valid and the corresponding * map bit was clear, otherwise returns 1. */ int cpr_setbit(pfn_t ppn, int mapflag) { char *bitmap; cbd_t *dp; pfn_t rel; int clr; for (dp = CPR->c_bmda; dp->cbd_size; dp++) { if (PPN_IN_RANGE(ppn, dp)) { bitmap = DESC_TO_MAP(dp, mapflag); rel = ppn - dp->cbd_spfn; if ((clr = isclr(bitmap, rel)) != 0) setbit(bitmap, rel); return (clr == 0); } } return (1); } /* * Clear a bit corresponding to the arg phys page number. */ int cpr_clrbit(pfn_t ppn, int mapflag) { char *bitmap; cbd_t *dp; pfn_t rel; int set; for (dp = CPR->c_bmda; dp->cbd_size; dp++) { if (PPN_IN_RANGE(ppn, dp)) { bitmap = DESC_TO_MAP(dp, mapflag); rel = ppn - dp->cbd_spfn; if ((set = isset(bitmap, rel)) != 0) clrbit(bitmap, rel); return (set == 0); } } return (1); } /* ARGSUSED */ int cpr_nobit(pfn_t ppn, int mapflag) { return (0); } /* * Lookup a bit corresponding to the arg phys page number. */ int cpr_isset(pfn_t ppn, int mapflag) { char *bitmap; cbd_t *dp; pfn_t rel; for (dp = CPR->c_bmda; dp->cbd_size; dp++) { if (PPN_IN_RANGE(ppn, dp)) { bitmap = DESC_TO_MAP(dp, mapflag); rel = ppn - dp->cbd_spfn; return (isset(bitmap, rel)); } } return (0); } /* * Go thru all pages and pick up any page not caught during the invalidation * stage. This is also used to save pages with cow lock or phys page lock held * (none zero p_lckcnt or p_cowcnt) */ static int cpr_count_upages(int mapflag, bitfunc_t bitfunc) { page_t *pp, *page0; pgcnt_t dcnt = 0, tcnt = 0; pfn_t pfn; page0 = pp = page_first(); do { #if defined(__sparc) extern struct vnode prom_ppages; if (pp->p_vnode == NULL || PP_ISKAS(pp) || pp->p_vnode == &prom_ppages || PP_ISFREE(pp) && PP_ISAGED(pp)) #else if (pp->p_vnode == NULL || PP_ISKAS(pp) || PP_ISFREE(pp) && PP_ISAGED(pp)) #endif /* __sparc */ continue; pfn = page_pptonum(pp); if (pf_is_memory(pfn)) { tcnt++; if ((*bitfunc)(pfn, mapflag) == 0) dcnt++; /* dirty count */ } } while ((pp = page_next(pp)) != page0); STAT->cs_upage2statef = dcnt; CPR_DEBUG(CPR_DEBUG9, "cpr_count_upages: dirty=%ld total=%ld\n", dcnt, tcnt); CPR_DEBUG(CPR_DEBUG7, "cpr_count_upages: %ld pages, 0x%lx bytes\n", dcnt, mmu_ptob(dcnt)); return (dcnt); } /* * try compressing pages based on cflag, * and for DEBUG kernels, verify uncompressed data checksum; * * this routine replaces common code from * i_cpr_compress_and_save() and cpr_compress_and_write() */ char * cpr_compress_pages(cpd_t *dp, pgcnt_t pages, int cflag) { size_t nbytes, clen, len; uint32_t test_sum; char *datap; nbytes = mmu_ptob(pages); /* * set length to the original uncompressed data size; * always init cpd_flag to zero */ dp->cpd_length = nbytes; dp->cpd_flag = 0; #ifdef DEBUG /* * Make a copy of the uncompressed data so we can checksum it. * Compress that copy so the checksum works at the other end */ cprbcopy(CPR->c_mapping_area, cpr_pagecopy, nbytes); dp->cpd_usum = checksum32(cpr_pagecopy, nbytes); dp->cpd_flag |= CPD_USUM; datap = cpr_pagecopy; #else datap = CPR->c_mapping_area; dp->cpd_usum = 0; #endif /* * try compressing the raw data to cpr_pagedata; * if there was a size reduction: record the new length, * flag the compression, and point to the compressed data. */ dp->cpd_csum = 0; if (cflag) { clen = compress(datap, cpr_pagedata, nbytes); if (clen < nbytes) { dp->cpd_flag |= CPD_COMPRESS; dp->cpd_length = clen; datap = cpr_pagedata; #ifdef DEBUG dp->cpd_csum = checksum32(datap, clen); dp->cpd_flag |= CPD_CSUM; /* * decompress the data back to a scratch area * and compare the new checksum with the original * checksum to verify the compression. */ bzero(cpr_pagecopy, sizeof (cpr_pagecopy)); len = decompress(datap, cpr_pagecopy, clen, sizeof (cpr_pagecopy)); test_sum = checksum32(cpr_pagecopy, len); ASSERT(test_sum == dp->cpd_usum); #endif } } return (datap); } /* * 1. Prepare cpr page descriptor and write it to file * 2. Compress page data and write it out */ static int cpr_compress_and_write(vnode_t *vp, uint_t va, pfn_t pfn, pgcnt_t npg) { int error = 0; char *datap; cpd_t cpd; /* cpr page descriptor */ extern void i_cpr_mapin(caddr_t, uint_t, pfn_t); extern void i_cpr_mapout(caddr_t, uint_t); i_cpr_mapin(CPR->c_mapping_area, npg, pfn); CPR_DEBUG(CPR_DEBUG3, "mapped-in %ld pages, vaddr 0x%p, pfn 0x%lx\n", npg, CPR->c_mapping_area, pfn); /* * Fill cpr page descriptor. */ cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC; cpd.cpd_pfn = pfn; cpd.cpd_pages = npg; STAT->cs_dumped_statefsz += mmu_ptob(npg); datap = cpr_compress_pages(&cpd, npg, CPR->c_flags & C_COMPRESSING); /* Write cpr page descriptor */ error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd_t)); /* Write compressed page data */ error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length); /* * Unmap the pages for tlb and vac flushing */ i_cpr_mapout(CPR->c_mapping_area, npg); if (error) { CPR_DEBUG(CPR_DEBUG1, "cpr_compress_and_write: vp 0x%p va 0x%x ", vp, va); CPR_DEBUG(CPR_DEBUG1, "pfn 0x%lx blk %d err %d\n", pfn, cpr_file_bn, error); } else { cpr_regular_pgs_dumped += npg; } return (error); } int cpr_write(vnode_t *vp, caddr_t buffer, size_t size) { caddr_t fromp = buffer; size_t bytes, wbytes; int error; if (cpr_dev_space == 0) { if (vp->v_type == VBLK) { cpr_dev_space = cpr_get_devsize(vp->v_rdev); ASSERT(cpr_dev_space); } else cpr_dev_space = 1; /* not used in this case */ } /* * break the write into multiple part if request is large, * calculate count up to buf page boundary, then write it out. * repeat until done. */ while (size) { bytes = MIN(size, cpr_buf_end - cpr_wptr); cprbcopy(fromp, cpr_wptr, bytes); cpr_wptr += bytes; fromp += bytes; size -= bytes; if (cpr_wptr < cpr_buf_end) return (0); /* buffer not full yet */ ASSERT(cpr_wptr == cpr_buf_end); wbytes = dbtob(cpr_file_bn + cpr_buf_blocks); if (vp->v_type == VBLK) { if (wbytes > cpr_dev_space) return (ENOSPC); } else { if (wbytes > VTOI(vp)->i_size) return (ENOSPC); } CPR_DEBUG(CPR_DEBUG3, "cpr_write: frmp=%p wptr=%p cnt=%lx...", fromp, cpr_wptr, bytes); /* * cross check, this should not happen! */ if (cpr_disk_writes_ok == 0) { prom_printf("cpr_write: disk write too early!\n"); return (EINVAL); } do_polled_io = 1; error = VOP_DUMP(vp, cpr_buf, cpr_file_bn, cpr_buf_blocks, NULL); do_polled_io = 0; CPR_DEBUG(CPR_DEBUG3, "done\n"); STAT->cs_real_statefsz += cpr_buf_size; if (error) { cpr_err(CE_WARN, "cpr_write error %d", error); return (error); } cpr_file_bn += cpr_buf_blocks; /* Increment block count */ cpr_wptr = cpr_buf; /* back to top of buffer */ } return (0); } int cpr_flush_write(vnode_t *vp) { int nblk; int error; /* * Calculate remaining blocks in buffer, rounded up to nearest * disk block */ nblk = btod(cpr_wptr - cpr_buf); do_polled_io = 1; error = VOP_DUMP(vp, (caddr_t)cpr_buf, cpr_file_bn, nblk, NULL); do_polled_io = 0; cpr_file_bn += nblk; if (error) CPR_DEBUG(CPR_DEBUG2, "cpr_flush_write: error (%d)\n", error); return (error); } void cpr_clear_bitmaps(void) { cbd_t *dp; for (dp = CPR->c_bmda; dp->cbd_size; dp++) { bzero((void *)dp->cbd_reg_bitmap, (size_t)dp->cbd_size * 2); } CPR_DEBUG(CPR_DEBUG7, "\ncleared reg and vlt bitmaps\n"); } int cpr_contig_pages(vnode_t *vp, int flag) { int chunks = 0, error = 0; pgcnt_t i, j, totbit; pfn_t spfn; cbd_t *dp; uint_t spin_cnt = 0; extern int i_cpr_compress_and_save(); for (dp = CPR->c_bmda; dp->cbd_size; dp++) { spfn = dp->cbd_spfn; totbit = BTOb(dp->cbd_size); i = 0; /* Beginning of bitmap */ j = 0; while (i < totbit) { while ((j < CPR_MAXCONTIG) && ((j + i) < totbit)) { if (isset((char *)dp->cbd_reg_bitmap, j+i)) j++; else /* not contiguous anymore */ break; } if (j) { chunks++; if (flag == SAVE_TO_STORAGE) { error = i_cpr_compress_and_save( chunks, spfn + i, j); if (error) return (error); } else if (flag == WRITE_TO_STATEFILE) { error = cpr_compress_and_write(vp, 0, spfn + i, j); if (error) return (error); else { spin_cnt++; if ((spin_cnt & 0x5F) == 1) cpr_spinning_bar(); } } } i += j; if (j != CPR_MAXCONTIG) { /* Stopped on a non-tagged page */ i++; } j = 0; } } if (flag == STORAGE_DESC_ALLOC) return (chunks); else return (0); } void cpr_show_range(caddr_t vaddr, size_t size, int mapflag, bitfunc_t bitfunc, pgcnt_t count) { char *action, *bname; bname = (mapflag == REGULAR_BITMAP) ? "regular" : "volatile"; if (bitfunc == cpr_setbit) action = "tag"; else if (bitfunc == cpr_clrbit) action = "untag"; else action = "none"; prom_printf("range (0x%p, 0x%p), %s bitmap, %s %ld\n", vaddr, vaddr + size, bname, action, count); } pgcnt_t cpr_count_pages(caddr_t sva, size_t size, int mapflag, bitfunc_t bitfunc, int showrange) { caddr_t va, eva; pfn_t pfn; pgcnt_t count = 0; eva = sva + PAGE_ROUNDUP(size); for (va = sva; va < eva; va += MMU_PAGESIZE) { pfn = va_to_pfn(va); if (pfn != PFN_INVALID && pf_is_memory(pfn)) { if ((*bitfunc)(pfn, mapflag) == 0) count++; } } if ((cpr_debug & CPR_DEBUG7) && showrange == DBG_SHOWRANGE) cpr_show_range(sva, size, mapflag, bitfunc, count); return (count); } pgcnt_t cpr_count_volatile_pages(int mapflag, bitfunc_t bitfunc) { pgcnt_t count = 0; if (cpr_buf) { count += cpr_count_pages(cpr_buf, cpr_buf_size, mapflag, bitfunc, DBG_SHOWRANGE); } if (cpr_pagedata) { count += cpr_count_pages(cpr_pagedata, cpr_pagedata_size, mapflag, bitfunc, DBG_SHOWRANGE); } count += i_cpr_count_storage_pages(mapflag, bitfunc); CPR_DEBUG(CPR_DEBUG7, "cpr_count_vpages: %ld pages, 0x%lx bytes\n", count, mmu_ptob(count)); return (count); } static int cpr_dump_regular_pages(vnode_t *vp) { int error; cpr_regular_pgs_dumped = 0; error = cpr_contig_pages(vp, WRITE_TO_STATEFILE); if (!error) CPR_DEBUG(CPR_DEBUG7, "cpr_dump_regular_pages() done.\n"); return (error); } #endif