17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 580d34432Sfrankho * Common Development and Distribution License (the "License"). 680d34432Sfrankho * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/systm.h> 277c478bd9Sstevel@tonic-gate #include <sys/types.h> 287c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 297c478bd9Sstevel@tonic-gate #include <sys/errno.h> 307c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 317c478bd9Sstevel@tonic-gate #include <sys/debug.h> 327c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 337c478bd9Sstevel@tonic-gate #include <sys/conf.h> 347c478bd9Sstevel@tonic-gate #include <sys/proc.h> 357c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 367c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 377c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 387c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_filio.h> 397c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 407c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 417c478bd9Sstevel@tonic-gate #include <sys/inttypes.h> 427c478bd9Sstevel@tonic-gate #include <sys/callb.h> 437c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate /* 467c478bd9Sstevel@tonic-gate * Kernel threads for logging 477c478bd9Sstevel@tonic-gate * Currently only one for rolling the log (one per log). 487c478bd9Sstevel@tonic-gate */ 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate #define LUFS_DEFAULT_NUM_ROLL_BUFS 16 517c478bd9Sstevel@tonic-gate #define LUFS_DEFAULT_MIN_ROLL_BUFS 4 527c478bd9Sstevel@tonic-gate #define LUFS_DEFAULT_MAX_ROLL_BUFS 64 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate /* 557c478bd9Sstevel@tonic-gate * Macros 567c478bd9Sstevel@tonic-gate */ 577c478bd9Sstevel@tonic-gate #define logmap_need_roll(logmap) ((logmap)->mtm_nme > logmap_maxnme) 587c478bd9Sstevel@tonic-gate #define ldl_empty(ul) ((ul)->un_head_lof == (ul)->un_tail_lof) 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate /* 617c478bd9Sstevel@tonic-gate * Tunables 627c478bd9Sstevel@tonic-gate */ 637c478bd9Sstevel@tonic-gate uint32_t lufs_num_roll_bufs = LUFS_DEFAULT_NUM_ROLL_BUFS; 647c478bd9Sstevel@tonic-gate uint32_t lufs_min_roll_bufs = LUFS_DEFAULT_MIN_ROLL_BUFS; 657c478bd9Sstevel@tonic-gate uint32_t lufs_max_roll_bufs = LUFS_DEFAULT_MAX_ROLL_BUFS; 667c478bd9Sstevel@tonic-gate long logmap_maxnme = 1536; 677c478bd9Sstevel@tonic-gate int trans_roll_tics = 0; 687c478bd9Sstevel@tonic-gate uint64_t trans_roll_new_delta = 0; 697c478bd9Sstevel@tonic-gate uint64_t lrr_wait = 0; 707c478bd9Sstevel@tonic-gate /* 717c478bd9Sstevel@tonic-gate * Key for thread specific data for the roll thread to 727c478bd9Sstevel@tonic-gate * bypass snapshot throttling 737c478bd9Sstevel@tonic-gate */ 747c478bd9Sstevel@tonic-gate uint_t bypass_snapshot_throttle_key; 757c478bd9Sstevel@tonic-gate 767c478bd9Sstevel@tonic-gate /* 777c478bd9Sstevel@tonic-gate * externs 787c478bd9Sstevel@tonic-gate */ 797c478bd9Sstevel@tonic-gate extern kmutex_t ml_scan; 807c478bd9Sstevel@tonic-gate extern kcondvar_t ml_scan_cv; 817c478bd9Sstevel@tonic-gate extern int maxphys; 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate static void 847c478bd9Sstevel@tonic-gate trans_roll_wait(mt_map_t *logmap, callb_cpr_t *cprinfop) 857c478bd9Sstevel@tonic-gate { 867c478bd9Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 877c478bd9Sstevel@tonic-gate logmap->mtm_ref = 0; 887c478bd9Sstevel@tonic-gate if (logmap->mtm_flags & MTM_FORCE_ROLL) { 897c478bd9Sstevel@tonic-gate cv_broadcast(&logmap->mtm_from_roll_cv); 907c478bd9Sstevel@tonic-gate } 917c478bd9Sstevel@tonic-gate logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLLING); 927c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(cprinfop); 93*d3d50737SRafael Vanoni (void) cv_reltimedwait(&logmap->mtm_to_roll_cv, &logmap->mtm_mutex, 94*d3d50737SRafael Vanoni trans_roll_tics, TR_CLOCK_TICK); 957c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(cprinfop, &logmap->mtm_mutex); 967c478bd9Sstevel@tonic-gate logmap->mtm_flags |= MTM_ROLLING; 977c478bd9Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 987c478bd9Sstevel@tonic-gate } 997c478bd9Sstevel@tonic-gate 1007c478bd9Sstevel@tonic-gate /* 1017c478bd9Sstevel@tonic-gate * returns the number of 8K buffers to use for rolling the log 1027c478bd9Sstevel@tonic-gate */ 1037c478bd9Sstevel@tonic-gate static uint32_t 1047c478bd9Sstevel@tonic-gate log_roll_buffers() 1057c478bd9Sstevel@tonic-gate { 1067c478bd9Sstevel@tonic-gate /* 1077c478bd9Sstevel@tonic-gate * sanity validate the tunable lufs_num_roll_bufs 1087c478bd9Sstevel@tonic-gate */ 1097c478bd9Sstevel@tonic-gate if (lufs_num_roll_bufs < lufs_min_roll_bufs) { 1107c478bd9Sstevel@tonic-gate return (lufs_min_roll_bufs); 1117c478bd9Sstevel@tonic-gate } 1127c478bd9Sstevel@tonic-gate if (lufs_num_roll_bufs > lufs_max_roll_bufs) { 1137c478bd9Sstevel@tonic-gate return (lufs_max_roll_bufs); 1147c478bd9Sstevel@tonic-gate } 1157c478bd9Sstevel@tonic-gate return (lufs_num_roll_bufs); 1167c478bd9Sstevel@tonic-gate } 1177c478bd9Sstevel@tonic-gate 1187c478bd9Sstevel@tonic-gate /* 1197c478bd9Sstevel@tonic-gate * Find something to roll, then if we don't have cached roll buffers 1207c478bd9Sstevel@tonic-gate * covering all the deltas in that MAPBLOCK then read the master 1217c478bd9Sstevel@tonic-gate * and overlay the deltas. 1227c478bd9Sstevel@tonic-gate * returns; 1237c478bd9Sstevel@tonic-gate * 0 if sucessful 1247c478bd9Sstevel@tonic-gate * 1 on finding nothing to roll 1257c478bd9Sstevel@tonic-gate * 2 on error 1267c478bd9Sstevel@tonic-gate */ 1277c478bd9Sstevel@tonic-gate int 1287c478bd9Sstevel@tonic-gate log_roll_read(ml_unit_t *ul, rollbuf_t *rbs, int nmblk, caddr_t roll_bufs, 1297c478bd9Sstevel@tonic-gate int *retnbuf) 1307c478bd9Sstevel@tonic-gate { 1317c478bd9Sstevel@tonic-gate offset_t mof; 1327c478bd9Sstevel@tonic-gate buf_t *bp; 1337c478bd9Sstevel@tonic-gate rollbuf_t *rbp; 1347c478bd9Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 1357c478bd9Sstevel@tonic-gate daddr_t mblkno; 1367c478bd9Sstevel@tonic-gate int i; 1377c478bd9Sstevel@tonic-gate int error; 1387c478bd9Sstevel@tonic-gate int nbuf; 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate /* 1417c478bd9Sstevel@tonic-gate * Make sure there is really something to roll 1427c478bd9Sstevel@tonic-gate */ 1437c478bd9Sstevel@tonic-gate mof = 0; 1447c478bd9Sstevel@tonic-gate if (!logmap_next_roll(logmap, &mof)) { 1457c478bd9Sstevel@tonic-gate return (1); 1467c478bd9Sstevel@tonic-gate } 1477c478bd9Sstevel@tonic-gate 1487c478bd9Sstevel@tonic-gate /* 1497c478bd9Sstevel@tonic-gate * build some master blocks + deltas to roll forward 1507c478bd9Sstevel@tonic-gate */ 1517c478bd9Sstevel@tonic-gate rw_enter(&logmap->mtm_rwlock, RW_READER); 1527c478bd9Sstevel@tonic-gate nbuf = 0; 1537c478bd9Sstevel@tonic-gate do { 1547c478bd9Sstevel@tonic-gate mof = mof & (offset_t)MAPBLOCKMASK; 1557c478bd9Sstevel@tonic-gate mblkno = lbtodb(mof); 1567c478bd9Sstevel@tonic-gate 1577c478bd9Sstevel@tonic-gate /* 1587c478bd9Sstevel@tonic-gate * Check for the case of a new delta to a set up buffer 1597c478bd9Sstevel@tonic-gate */ 1607c478bd9Sstevel@tonic-gate for (i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) { 1617c478bd9Sstevel@tonic-gate if (P2ALIGN(rbp->rb_bh.b_blkno, 1627c478bd9Sstevel@tonic-gate MAPBLOCKSIZE / DEV_BSIZE) == mblkno) { 1637c478bd9Sstevel@tonic-gate TNF_PROBE_0(trans_roll_new_delta, "lufs", 1647c478bd9Sstevel@tonic-gate /* CSTYLED */); 1657c478bd9Sstevel@tonic-gate trans_roll_new_delta++; 1667c478bd9Sstevel@tonic-gate /* Flush out the current set of buffers */ 1677c478bd9Sstevel@tonic-gate goto flush_bufs; 1687c478bd9Sstevel@tonic-gate } 1697c478bd9Sstevel@tonic-gate } 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * Work out what to roll next. If it isn't cached then read 1737c478bd9Sstevel@tonic-gate * it asynchronously from the master. 1747c478bd9Sstevel@tonic-gate */ 1757c478bd9Sstevel@tonic-gate bp = &rbp->rb_bh; 1767c478bd9Sstevel@tonic-gate bp->b_blkno = mblkno; 1777c478bd9Sstevel@tonic-gate bp->b_flags = B_READ; 1787c478bd9Sstevel@tonic-gate bp->b_un.b_addr = roll_bufs + (nbuf << MAPBLOCKSHIFT); 1797c478bd9Sstevel@tonic-gate bp->b_bufsize = MAPBLOCKSIZE; 1807c478bd9Sstevel@tonic-gate if (top_read_roll(rbp, ul)) { 1817c478bd9Sstevel@tonic-gate /* logmap deltas were in use */ 1827c478bd9Sstevel@tonic-gate if (nbuf == 0) { 1837c478bd9Sstevel@tonic-gate /* 1847c478bd9Sstevel@tonic-gate * On first buffer wait for the logmap user 1857c478bd9Sstevel@tonic-gate * to finish by grabbing the logmap lock 1867c478bd9Sstevel@tonic-gate * exclusively rather than spinning 1877c478bd9Sstevel@tonic-gate */ 1887c478bd9Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 1897c478bd9Sstevel@tonic-gate lrr_wait++; 1907c478bd9Sstevel@tonic-gate rw_enter(&logmap->mtm_rwlock, RW_WRITER); 1917c478bd9Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 1927c478bd9Sstevel@tonic-gate return (1); 1937c478bd9Sstevel@tonic-gate } 1947c478bd9Sstevel@tonic-gate /* we have at least one buffer - flush it */ 1957c478bd9Sstevel@tonic-gate goto flush_bufs; 1967c478bd9Sstevel@tonic-gate } 1977c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_INVAL) == 0) { 1987c478bd9Sstevel@tonic-gate nbuf++; 1997c478bd9Sstevel@tonic-gate } 2007c478bd9Sstevel@tonic-gate mof += MAPBLOCKSIZE; 2017c478bd9Sstevel@tonic-gate } while ((nbuf < nmblk) && logmap_next_roll(logmap, &mof)); 2027c478bd9Sstevel@tonic-gate 2037c478bd9Sstevel@tonic-gate /* 2047c478bd9Sstevel@tonic-gate * If there was nothing to roll cycle back 2057c478bd9Sstevel@tonic-gate */ 2067c478bd9Sstevel@tonic-gate if (nbuf == 0) { 2077c478bd9Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 2087c478bd9Sstevel@tonic-gate return (1); 2097c478bd9Sstevel@tonic-gate } 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate flush_bufs: 2127c478bd9Sstevel@tonic-gate /* 2137c478bd9Sstevel@tonic-gate * For each buffer, if it isn't cached then wait for the read to 2147c478bd9Sstevel@tonic-gate * finish and overlay the deltas. 2157c478bd9Sstevel@tonic-gate */ 2167c478bd9Sstevel@tonic-gate for (error = 0, i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) { 2177c478bd9Sstevel@tonic-gate if (!rbp->rb_crb) { 2187c478bd9Sstevel@tonic-gate bp = &rbp->rb_bh; 2197c478bd9Sstevel@tonic-gate if (trans_not_wait(bp)) { 2207c478bd9Sstevel@tonic-gate ldl_seterror(ul, 2217c478bd9Sstevel@tonic-gate "Error reading master during ufs log roll"); 2227c478bd9Sstevel@tonic-gate error = 1; 2237c478bd9Sstevel@tonic-gate } 2247c478bd9Sstevel@tonic-gate /* 2257c478bd9Sstevel@tonic-gate * sync read the data from the log 2267c478bd9Sstevel@tonic-gate */ 2277c478bd9Sstevel@tonic-gate if (ldl_read(ul, bp->b_un.b_addr, 2287c478bd9Sstevel@tonic-gate ldbtob(bp->b_blkno) & (offset_t)MAPBLOCKMASK, 2297c478bd9Sstevel@tonic-gate MAPBLOCKSIZE, rbp->rb_age)) { 2307c478bd9Sstevel@tonic-gate error = 1; 2317c478bd9Sstevel@tonic-gate } 2327c478bd9Sstevel@tonic-gate } 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate /* 2357c478bd9Sstevel@tonic-gate * reset the age bit in the age list 2367c478bd9Sstevel@tonic-gate */ 2377c478bd9Sstevel@tonic-gate logmap_list_put_roll(logmap, rbp->rb_age); 2387c478bd9Sstevel@tonic-gate 2397c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 2407c478bd9Sstevel@tonic-gate error = 1; 2417c478bd9Sstevel@tonic-gate } 2427c478bd9Sstevel@tonic-gate } 2437c478bd9Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 2447c478bd9Sstevel@tonic-gate if (error) 2457c478bd9Sstevel@tonic-gate return (2); 2467c478bd9Sstevel@tonic-gate *retnbuf = nbuf; 2477c478bd9Sstevel@tonic-gate return (0); 2487c478bd9Sstevel@tonic-gate } 2497c478bd9Sstevel@tonic-gate 2507c478bd9Sstevel@tonic-gate /* 2517c478bd9Sstevel@tonic-gate * Write out a cached roll buffer 2527c478bd9Sstevel@tonic-gate */ 2537c478bd9Sstevel@tonic-gate void 2547c478bd9Sstevel@tonic-gate log_roll_write_crb(ufsvfs_t *ufsvfsp, rollbuf_t *rbp) 2557c478bd9Sstevel@tonic-gate { 2567c478bd9Sstevel@tonic-gate crb_t *crb = rbp->rb_crb; 2577c478bd9Sstevel@tonic-gate buf_t *bp = &rbp->rb_bh; 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate bp->b_blkno = lbtodb(crb->c_mof); 2607c478bd9Sstevel@tonic-gate bp->b_un.b_addr = crb->c_buf; 2617c478bd9Sstevel@tonic-gate bp->b_bcount = crb->c_nb; 2627c478bd9Sstevel@tonic-gate bp->b_bufsize = crb->c_nb; 2637c478bd9Sstevel@tonic-gate ASSERT((crb->c_nb & DEV_BMASK) == 0); 2647c478bd9Sstevel@tonic-gate bp->b_flags = B_WRITE; 2657c478bd9Sstevel@tonic-gate logstats.ls_rwrites.value.ui64++; 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate /* if snapshots are enabled, call it */ 2687c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_snapshot) { 2697c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 2707c478bd9Sstevel@tonic-gate } else { 2717c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2727c478bd9Sstevel@tonic-gate } 2737c478bd9Sstevel@tonic-gate } 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate /* 2767c478bd9Sstevel@tonic-gate * Write out a set of non cached roll buffers 2777c478bd9Sstevel@tonic-gate */ 2787c478bd9Sstevel@tonic-gate void 2797c478bd9Sstevel@tonic-gate log_roll_write_bufs(ufsvfs_t *ufsvfsp, rollbuf_t *rbp) 2807c478bd9Sstevel@tonic-gate { 2817c478bd9Sstevel@tonic-gate buf_t *bp = &rbp->rb_bh; 2827c478bd9Sstevel@tonic-gate buf_t *bp2; 2837c478bd9Sstevel@tonic-gate rbsecmap_t secmap = rbp->rb_secmap; 2847c478bd9Sstevel@tonic-gate int j, k; 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate ASSERT(secmap); 2877c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_INVAL) == 0); 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate do { /* for each contiguous block of sectors */ 2907c478bd9Sstevel@tonic-gate /* find start of next sector to write */ 2917c478bd9Sstevel@tonic-gate for (j = 0; j < 16; ++j) { 2927c478bd9Sstevel@tonic-gate if (secmap & UINT16_C(1)) 2937c478bd9Sstevel@tonic-gate break; 2947c478bd9Sstevel@tonic-gate secmap >>= 1; 2957c478bd9Sstevel@tonic-gate } 2967c478bd9Sstevel@tonic-gate bp->b_un.b_addr += (j << DEV_BSHIFT); 2977c478bd9Sstevel@tonic-gate bp->b_blkno += j; 2987c478bd9Sstevel@tonic-gate 2997c478bd9Sstevel@tonic-gate /* calculate number of sectors */ 3007c478bd9Sstevel@tonic-gate secmap >>= 1; 3017c478bd9Sstevel@tonic-gate j++; 3027c478bd9Sstevel@tonic-gate for (k = 1; j < 16; ++j) { 3037c478bd9Sstevel@tonic-gate if ((secmap & UINT16_C(1)) == 0) 3047c478bd9Sstevel@tonic-gate break; 3057c478bd9Sstevel@tonic-gate secmap >>= 1; 3067c478bd9Sstevel@tonic-gate k++; 3077c478bd9Sstevel@tonic-gate } 3087c478bd9Sstevel@tonic-gate bp->b_bcount = k << DEV_BSHIFT; 3097c478bd9Sstevel@tonic-gate bp->b_flags = B_WRITE; 3107c478bd9Sstevel@tonic-gate logstats.ls_rwrites.value.ui64++; 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate /* if snapshots are enabled, call it */ 3137c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_snapshot) 3147c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 3157c478bd9Sstevel@tonic-gate else 3167c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 3177c478bd9Sstevel@tonic-gate if (secmap) { 3187c478bd9Sstevel@tonic-gate /* 3197c478bd9Sstevel@tonic-gate * Allocate another buf_t to handle 3207c478bd9Sstevel@tonic-gate * the next write in this MAPBLOCK 3217c478bd9Sstevel@tonic-gate * Chain them via b_list. 3227c478bd9Sstevel@tonic-gate */ 3237c478bd9Sstevel@tonic-gate bp2 = kmem_alloc(sizeof (buf_t), KM_SLEEP); 3247c478bd9Sstevel@tonic-gate bp->b_list = bp2; 3257c478bd9Sstevel@tonic-gate bioinit(bp2); 3267c478bd9Sstevel@tonic-gate bp2->b_iodone = trans_not_done; 3277c478bd9Sstevel@tonic-gate bp2->b_bufsize = MAPBLOCKSIZE; 3287c478bd9Sstevel@tonic-gate bp2->b_edev = bp->b_edev; 3297c478bd9Sstevel@tonic-gate bp2->b_un.b_addr = 3307c478bd9Sstevel@tonic-gate bp->b_un.b_addr + bp->b_bcount; 3317c478bd9Sstevel@tonic-gate bp2->b_blkno = bp->b_blkno + k; 3327c478bd9Sstevel@tonic-gate bp = bp2; 3337c478bd9Sstevel@tonic-gate } 3347c478bd9Sstevel@tonic-gate } while (secmap); 3357c478bd9Sstevel@tonic-gate } 3367c478bd9Sstevel@tonic-gate 3377c478bd9Sstevel@tonic-gate /* 3387c478bd9Sstevel@tonic-gate * Asynchronously roll the deltas, using the sector map 3397c478bd9Sstevel@tonic-gate * in each rollbuf_t. 3407c478bd9Sstevel@tonic-gate */ 3417c478bd9Sstevel@tonic-gate int 3427c478bd9Sstevel@tonic-gate log_roll_write(ml_unit_t *ul, rollbuf_t *rbs, int nbuf) 3437c478bd9Sstevel@tonic-gate { 3447c478bd9Sstevel@tonic-gate 3457c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 3467c478bd9Sstevel@tonic-gate rollbuf_t *rbp; 3477c478bd9Sstevel@tonic-gate buf_t *bp, *bp2; 3487c478bd9Sstevel@tonic-gate rollbuf_t *head, *prev, *rbp2; 3497c478bd9Sstevel@tonic-gate 3507c478bd9Sstevel@tonic-gate /* 3517c478bd9Sstevel@tonic-gate * Order the buffers by blkno 3527c478bd9Sstevel@tonic-gate */ 3537c478bd9Sstevel@tonic-gate ASSERT(nbuf > 0); 3547c478bd9Sstevel@tonic-gate #ifdef lint 3557c478bd9Sstevel@tonic-gate prev = rbs; 3567c478bd9Sstevel@tonic-gate #endif 3577c478bd9Sstevel@tonic-gate for (head = rbs, rbp = rbs + 1; rbp < rbs + nbuf; rbp++) { 3587c478bd9Sstevel@tonic-gate for (rbp2 = head; rbp2; prev = rbp2, rbp2 = rbp2->rb_next) { 3597c478bd9Sstevel@tonic-gate if (rbp->rb_bh.b_blkno < rbp2->rb_bh.b_blkno) { 3607c478bd9Sstevel@tonic-gate if (rbp2 == head) { 3617c478bd9Sstevel@tonic-gate rbp->rb_next = head; 3627c478bd9Sstevel@tonic-gate head = rbp; 3637c478bd9Sstevel@tonic-gate } else { 3647c478bd9Sstevel@tonic-gate prev->rb_next = rbp; 3657c478bd9Sstevel@tonic-gate rbp->rb_next = rbp2; 3667c478bd9Sstevel@tonic-gate } 3677c478bd9Sstevel@tonic-gate break; 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate } 3707c478bd9Sstevel@tonic-gate if (rbp2 == NULL) { 3717c478bd9Sstevel@tonic-gate prev->rb_next = rbp; 3727c478bd9Sstevel@tonic-gate rbp->rb_next = NULL; 3737c478bd9Sstevel@tonic-gate } 3747c478bd9Sstevel@tonic-gate } 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate /* 3777c478bd9Sstevel@tonic-gate * issue the in-order writes 3787c478bd9Sstevel@tonic-gate */ 3797c478bd9Sstevel@tonic-gate for (rbp = head; rbp; rbp = rbp2) { 3807c478bd9Sstevel@tonic-gate if (rbp->rb_crb) { 3817c478bd9Sstevel@tonic-gate log_roll_write_crb(ufsvfsp, rbp); 3827c478bd9Sstevel@tonic-gate } else { 3837c478bd9Sstevel@tonic-gate log_roll_write_bufs(ufsvfsp, rbp); 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate /* null out the rb_next link for next set of rolling */ 3867c478bd9Sstevel@tonic-gate rbp2 = rbp->rb_next; 3877c478bd9Sstevel@tonic-gate rbp->rb_next = NULL; 3887c478bd9Sstevel@tonic-gate } 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate /* 3917c478bd9Sstevel@tonic-gate * wait for all the writes to finish 3927c478bd9Sstevel@tonic-gate */ 3937c478bd9Sstevel@tonic-gate for (rbp = rbs; rbp < rbs + nbuf; rbp++) { 3947c478bd9Sstevel@tonic-gate bp = &rbp->rb_bh; 3957c478bd9Sstevel@tonic-gate if (trans_not_wait(bp)) { 3967c478bd9Sstevel@tonic-gate ldl_seterror(ul, 3977c478bd9Sstevel@tonic-gate "Error writing master during ufs log roll"); 3987c478bd9Sstevel@tonic-gate } 3997c478bd9Sstevel@tonic-gate 4007c478bd9Sstevel@tonic-gate /* 4017c478bd9Sstevel@tonic-gate * Now wait for all the "cloned" buffer writes (if any) 4027c478bd9Sstevel@tonic-gate * and free those headers 4037c478bd9Sstevel@tonic-gate */ 4047c478bd9Sstevel@tonic-gate bp2 = bp->b_list; 4057c478bd9Sstevel@tonic-gate bp->b_list = NULL; 4067c478bd9Sstevel@tonic-gate while (bp2) { 4077c478bd9Sstevel@tonic-gate if (trans_not_wait(bp2)) { 4087c478bd9Sstevel@tonic-gate ldl_seterror(ul, 4097c478bd9Sstevel@tonic-gate "Error writing master during ufs log roll"); 4107c478bd9Sstevel@tonic-gate } 4117c478bd9Sstevel@tonic-gate bp = bp2; 4127c478bd9Sstevel@tonic-gate bp2 = bp2->b_list; 4137c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (buf_t)); 4147c478bd9Sstevel@tonic-gate } 4157c478bd9Sstevel@tonic-gate } 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) 4187c478bd9Sstevel@tonic-gate return (1); 4197c478bd9Sstevel@tonic-gate return (0); 4207c478bd9Sstevel@tonic-gate } 4217c478bd9Sstevel@tonic-gate 4227c478bd9Sstevel@tonic-gate void 4237c478bd9Sstevel@tonic-gate trans_roll(ml_unit_t *ul) 4247c478bd9Sstevel@tonic-gate { 4257c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 4267c478bd9Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 4277c478bd9Sstevel@tonic-gate rollbuf_t *rbs; 4287c478bd9Sstevel@tonic-gate rollbuf_t *rbp; 4297c478bd9Sstevel@tonic-gate buf_t *bp; 4307c478bd9Sstevel@tonic-gate caddr_t roll_bufs; 4317c478bd9Sstevel@tonic-gate uint32_t nmblk; 4327c478bd9Sstevel@tonic-gate int i; 4337c478bd9Sstevel@tonic-gate int doingforceroll; 4347c478bd9Sstevel@tonic-gate int nbuf; 4357c478bd9Sstevel@tonic-gate 4367c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &logmap->mtm_mutex, callb_generic_cpr, 4377c478bd9Sstevel@tonic-gate "trans_roll"); 4387c478bd9Sstevel@tonic-gate 4397c478bd9Sstevel@tonic-gate /* 4407c478bd9Sstevel@tonic-gate * We do not want the roll thread's writes to be 4417c478bd9Sstevel@tonic-gate * throttled by the snapshot. 4427c478bd9Sstevel@tonic-gate * If they are throttled then we can have a deadlock 4437c478bd9Sstevel@tonic-gate * between the roll thread and the snapshot taskq thread: 4447c478bd9Sstevel@tonic-gate * roll thread wants the throttling semaphore and 4457c478bd9Sstevel@tonic-gate * the snapshot taskq thread cannot release the semaphore 4467c478bd9Sstevel@tonic-gate * because it is writing to the log and the log is full. 4477c478bd9Sstevel@tonic-gate */ 4487c478bd9Sstevel@tonic-gate 4497c478bd9Sstevel@tonic-gate (void) tsd_set(bypass_snapshot_throttle_key, (void*)1); 4507c478bd9Sstevel@tonic-gate 4517c478bd9Sstevel@tonic-gate /* 4527c478bd9Sstevel@tonic-gate * setup some roll parameters 4537c478bd9Sstevel@tonic-gate */ 4547c478bd9Sstevel@tonic-gate if (trans_roll_tics == 0) 4557c478bd9Sstevel@tonic-gate trans_roll_tics = 5 * hz; 4567c478bd9Sstevel@tonic-gate nmblk = log_roll_buffers(); 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate /* 4597c478bd9Sstevel@tonic-gate * allocate the buffers and buffer headers 4607c478bd9Sstevel@tonic-gate */ 4617c478bd9Sstevel@tonic-gate roll_bufs = kmem_alloc(nmblk * MAPBLOCKSIZE, KM_SLEEP); 4627c478bd9Sstevel@tonic-gate rbs = kmem_alloc(nmblk * sizeof (rollbuf_t), KM_SLEEP); 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate /* 4657c478bd9Sstevel@tonic-gate * initialize the buffer headers 4667c478bd9Sstevel@tonic-gate */ 4677c478bd9Sstevel@tonic-gate for (i = 0, rbp = rbs; i < nmblk; ++i, ++rbp) { 4687c478bd9Sstevel@tonic-gate rbp->rb_next = NULL; 4697c478bd9Sstevel@tonic-gate bp = &rbp->rb_bh; 4707c478bd9Sstevel@tonic-gate bioinit(bp); 4717c478bd9Sstevel@tonic-gate bp->b_edev = ul->un_dev; 4727c478bd9Sstevel@tonic-gate bp->b_iodone = trans_not_done; 4737c478bd9Sstevel@tonic-gate bp->b_bufsize = MAPBLOCKSIZE; 4747c478bd9Sstevel@tonic-gate } 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate doingforceroll = 0; 4777c478bd9Sstevel@tonic-gate 4787c478bd9Sstevel@tonic-gate again: 4797c478bd9Sstevel@tonic-gate /* 4807c478bd9Sstevel@tonic-gate * LOOP FOREVER 4817c478bd9Sstevel@tonic-gate */ 4827c478bd9Sstevel@tonic-gate 4837c478bd9Sstevel@tonic-gate /* 4847c478bd9Sstevel@tonic-gate * exit on demand 4857c478bd9Sstevel@tonic-gate */ 4867c478bd9Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 4877c478bd9Sstevel@tonic-gate if ((ul->un_flags & LDL_ERROR) || (logmap->mtm_flags & MTM_ROLL_EXIT)) { 4887c478bd9Sstevel@tonic-gate kmem_free(rbs, nmblk * sizeof (rollbuf_t)); 4897c478bd9Sstevel@tonic-gate kmem_free(roll_bufs, nmblk * MAPBLOCKSIZE); 4907c478bd9Sstevel@tonic-gate logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_RUNNING | 4917c478bd9Sstevel@tonic-gate MTM_ROLL_EXIT | MTM_ROLLING); 4927c478bd9Sstevel@tonic-gate cv_broadcast(&logmap->mtm_from_roll_cv); 4937c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 4947c478bd9Sstevel@tonic-gate thread_exit(); 4957c478bd9Sstevel@tonic-gate /* NOTREACHED */ 4967c478bd9Sstevel@tonic-gate } 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate /* 4997c478bd9Sstevel@tonic-gate * MT_SCAN debug mode 5007c478bd9Sstevel@tonic-gate * don't roll except in FORCEROLL situations 5017c478bd9Sstevel@tonic-gate */ 5027c478bd9Sstevel@tonic-gate if (logmap->mtm_debug & MT_SCAN) 5037c478bd9Sstevel@tonic-gate if ((logmap->mtm_flags & MTM_FORCE_ROLL) == 0) { 5047c478bd9Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 5057c478bd9Sstevel@tonic-gate trans_roll_wait(logmap, &cprinfo); 5067c478bd9Sstevel@tonic-gate goto again; 5077c478bd9Sstevel@tonic-gate } 5087c478bd9Sstevel@tonic-gate ASSERT(logmap->mtm_trimlof == 0); 5097c478bd9Sstevel@tonic-gate 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * If we've finished a force roll cycle then wakeup any 5127c478bd9Sstevel@tonic-gate * waiters. 5137c478bd9Sstevel@tonic-gate */ 5147c478bd9Sstevel@tonic-gate if (doingforceroll) { 5157c478bd9Sstevel@tonic-gate doingforceroll = 0; 5167c478bd9Sstevel@tonic-gate logmap->mtm_flags &= ~MTM_FORCE_ROLL; 5177c478bd9Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 5187c478bd9Sstevel@tonic-gate cv_broadcast(&logmap->mtm_from_roll_cv); 5197c478bd9Sstevel@tonic-gate } else { 5207c478bd9Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 5217c478bd9Sstevel@tonic-gate } 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate /* 5247c478bd9Sstevel@tonic-gate * If someone wants us to roll something; then do it 5257c478bd9Sstevel@tonic-gate */ 5267c478bd9Sstevel@tonic-gate if (logmap->mtm_flags & MTM_FORCE_ROLL) { 5277c478bd9Sstevel@tonic-gate doingforceroll = 1; 5287c478bd9Sstevel@tonic-gate goto rollsomething; 5297c478bd9Sstevel@tonic-gate } 5307c478bd9Sstevel@tonic-gate 5317c478bd9Sstevel@tonic-gate /* 5327c478bd9Sstevel@tonic-gate * Log is busy, check if logmap is getting full. 5337c478bd9Sstevel@tonic-gate */ 5347c478bd9Sstevel@tonic-gate if (logmap_need_roll(logmap)) { 5357c478bd9Sstevel@tonic-gate goto rollsomething; 5367c478bd9Sstevel@tonic-gate } 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate /* 5397c478bd9Sstevel@tonic-gate * Check if the log is idle and is not empty 5407c478bd9Sstevel@tonic-gate */ 5417c478bd9Sstevel@tonic-gate if (!logmap->mtm_ref && !ldl_empty(ul)) { 5427c478bd9Sstevel@tonic-gate goto rollsomething; 5437c478bd9Sstevel@tonic-gate } 5447c478bd9Sstevel@tonic-gate 5457c478bd9Sstevel@tonic-gate /* 5467c478bd9Sstevel@tonic-gate * Log is busy, check if its getting full 5477c478bd9Sstevel@tonic-gate */ 5487c478bd9Sstevel@tonic-gate if (ldl_need_roll(ul)) { 5497c478bd9Sstevel@tonic-gate goto rollsomething; 5507c478bd9Sstevel@tonic-gate } 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate /* 5537c478bd9Sstevel@tonic-gate * nothing to do; wait a bit and then start over 5547c478bd9Sstevel@tonic-gate */ 5557c478bd9Sstevel@tonic-gate trans_roll_wait(logmap, &cprinfo); 5567c478bd9Sstevel@tonic-gate goto again; 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate /* 5597c478bd9Sstevel@tonic-gate * ROLL SOMETHING 5607c478bd9Sstevel@tonic-gate */ 5617c478bd9Sstevel@tonic-gate 5627c478bd9Sstevel@tonic-gate rollsomething: 5637c478bd9Sstevel@tonic-gate /* 5647c478bd9Sstevel@tonic-gate * Use the cached roll buffers, or read the master 5657c478bd9Sstevel@tonic-gate * and overlay the deltas 5667c478bd9Sstevel@tonic-gate */ 5677c478bd9Sstevel@tonic-gate switch (log_roll_read(ul, rbs, nmblk, roll_bufs, &nbuf)) { 5687c478bd9Sstevel@tonic-gate case 1: trans_roll_wait(logmap, &cprinfo); 5697c478bd9Sstevel@tonic-gate /* FALLTHROUGH */ 5707c478bd9Sstevel@tonic-gate case 2: goto again; 5717c478bd9Sstevel@tonic-gate /* default case is success */ 5727c478bd9Sstevel@tonic-gate } 5737c478bd9Sstevel@tonic-gate 5747c478bd9Sstevel@tonic-gate /* 5757c478bd9Sstevel@tonic-gate * Asynchronously write out the deltas 5767c478bd9Sstevel@tonic-gate */ 5777c478bd9Sstevel@tonic-gate if (log_roll_write(ul, rbs, nbuf)) 5787c478bd9Sstevel@tonic-gate goto again; 5797c478bd9Sstevel@tonic-gate 5807c478bd9Sstevel@tonic-gate /* 5817c478bd9Sstevel@tonic-gate * free up the deltas in the logmap 5827c478bd9Sstevel@tonic-gate */ 5837c478bd9Sstevel@tonic-gate for (i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) { 5847c478bd9Sstevel@tonic-gate bp = &rbp->rb_bh; 5857c478bd9Sstevel@tonic-gate logmap_remove_roll(logmap, 5867c478bd9Sstevel@tonic-gate ldbtob(bp->b_blkno) & (offset_t)MAPBLOCKMASK, MAPBLOCKSIZE); 5877c478bd9Sstevel@tonic-gate } 5887c478bd9Sstevel@tonic-gate 5897c478bd9Sstevel@tonic-gate /* 5907c478bd9Sstevel@tonic-gate * free up log space; if possible 5917c478bd9Sstevel@tonic-gate */ 5927c478bd9Sstevel@tonic-gate logmap_sethead(logmap, ul); 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate /* 5957c478bd9Sstevel@tonic-gate * LOOP 5967c478bd9Sstevel@tonic-gate */ 5977c478bd9Sstevel@tonic-gate goto again; 5987c478bd9Sstevel@tonic-gate } 599