17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 580d34432Sfrankho * Common Development and Distribution License (the "License"). 680d34432Sfrankho * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/systm.h> 277c478bd9Sstevel@tonic-gate #include <sys/types.h> 287c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 297c478bd9Sstevel@tonic-gate #include <sys/errno.h> 307c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 317c478bd9Sstevel@tonic-gate #include <sys/debug.h> 327c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 337c478bd9Sstevel@tonic-gate #include <sys/conf.h> 347c478bd9Sstevel@tonic-gate #include <sys/proc.h> 357c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 367c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 377c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 387c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_filio.h> 397c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 407c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 417c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 427c478bd9Sstevel@tonic-gate 437c478bd9Sstevel@tonic-gate extern int maxphys; 447c478bd9Sstevel@tonic-gate extern uint_t bypass_snapshot_throttle_key; 457c478bd9Sstevel@tonic-gate 467c478bd9Sstevel@tonic-gate extern struct kmem_cache *lufs_sv; 477c478bd9Sstevel@tonic-gate extern struct kmem_cache *lufs_bp; 487c478bd9Sstevel@tonic-gate 497c478bd9Sstevel@tonic-gate static void 507c478bd9Sstevel@tonic-gate makebusy(ml_unit_t *ul, buf_t *bp) 517c478bd9Sstevel@tonic-gate { 527c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); 537c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_ERROR) == 0) 547c478bd9Sstevel@tonic-gate return; 557c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) 567c478bd9Sstevel@tonic-gate ldl_seterror(ul, "Error reading ufs log"); 577c478bd9Sstevel@tonic-gate else 587c478bd9Sstevel@tonic-gate ldl_seterror(ul, "Error writing ufs log"); 597c478bd9Sstevel@tonic-gate } 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate static int 627c478bd9Sstevel@tonic-gate logdone(buf_t *bp) 637c478bd9Sstevel@tonic-gate { 647c478bd9Sstevel@tonic-gate bp->b_flags |= B_DONE; 657c478bd9Sstevel@tonic-gate 667c478bd9Sstevel@tonic-gate if (bp->b_flags & B_WRITE) 677c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 687c478bd9Sstevel@tonic-gate else 697c478bd9Sstevel@tonic-gate /* wakeup the thread waiting on this buf */ 707c478bd9Sstevel@tonic-gate sema_v(&bp->b_io); 717c478bd9Sstevel@tonic-gate return (0); 727c478bd9Sstevel@tonic-gate } 737c478bd9Sstevel@tonic-gate 747c478bd9Sstevel@tonic-gate static int 757c478bd9Sstevel@tonic-gate ldl_strategy_done(buf_t *cb) 767c478bd9Sstevel@tonic-gate { 777c478bd9Sstevel@tonic-gate lufs_save_t *sv; 787c478bd9Sstevel@tonic-gate lufs_buf_t *lbp; 797c478bd9Sstevel@tonic-gate buf_t *bp; 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&cb->b_sem)); 827c478bd9Sstevel@tonic-gate ASSERT((cb->b_flags & B_DONE) == 0); 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate /* 857c478bd9Sstevel@tonic-gate * Compute address of the ``save'' struct 867c478bd9Sstevel@tonic-gate */ 877c478bd9Sstevel@tonic-gate lbp = (lufs_buf_t *)cb; 887c478bd9Sstevel@tonic-gate sv = (lufs_save_t *)lbp->lb_ptr; 897c478bd9Sstevel@tonic-gate 907c478bd9Sstevel@tonic-gate if (cb->b_flags & B_ERROR) 917c478bd9Sstevel@tonic-gate sv->sv_error = 1; 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate /* 947c478bd9Sstevel@tonic-gate * If this is the last request, release the resources and 957c478bd9Sstevel@tonic-gate * ``done'' the original buffer header. 967c478bd9Sstevel@tonic-gate */ 977c478bd9Sstevel@tonic-gate if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) { 987c478bd9Sstevel@tonic-gate kmem_cache_free(lufs_bp, lbp); 997c478bd9Sstevel@tonic-gate return (1); 1007c478bd9Sstevel@tonic-gate } 1017c478bd9Sstevel@tonic-gate /* Propagate any errors back to the original buffer header */ 1027c478bd9Sstevel@tonic-gate bp = sv->sv_bp; 1037c478bd9Sstevel@tonic-gate if (sv->sv_error) 1047c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 1057c478bd9Sstevel@tonic-gate kmem_cache_free(lufs_bp, lbp); 1067c478bd9Sstevel@tonic-gate kmem_cache_free(lufs_sv, sv); 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate biodone(bp); 1097c478bd9Sstevel@tonic-gate return (0); 1107c478bd9Sstevel@tonic-gate } 1117c478bd9Sstevel@tonic-gate 1127c478bd9Sstevel@tonic-gate /* 1137c478bd9Sstevel@tonic-gate * Map the log logical block number to a physical disk block number 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate static int 1167c478bd9Sstevel@tonic-gate map_frag( 1177c478bd9Sstevel@tonic-gate ml_unit_t *ul, 1187c478bd9Sstevel@tonic-gate daddr_t lblkno, 1197c478bd9Sstevel@tonic-gate size_t bcount, 1207c478bd9Sstevel@tonic-gate daddr_t *pblkno, 1217c478bd9Sstevel@tonic-gate size_t *pbcount) 1227c478bd9Sstevel@tonic-gate { 1237c478bd9Sstevel@tonic-gate ic_extent_t *ext = ul->un_ebp->ic_extents; 1247c478bd9Sstevel@tonic-gate uint32_t e = ul->un_ebp->ic_nextents; 1257c478bd9Sstevel@tonic-gate uint32_t s = 0; 1267c478bd9Sstevel@tonic-gate uint32_t i = e >> 1; 1277c478bd9Sstevel@tonic-gate uint32_t lasti = i; 1287c478bd9Sstevel@tonic-gate uint32_t bno_off; 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate again: 1317c478bd9Sstevel@tonic-gate if (ext[i].ic_lbno <= lblkno) { 1327c478bd9Sstevel@tonic-gate if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) { 1337c478bd9Sstevel@tonic-gate /* FOUND IT */ 1347c478bd9Sstevel@tonic-gate bno_off = lblkno - (uint32_t)ext[i].ic_lbno; 1357c478bd9Sstevel@tonic-gate *pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off)); 1367c478bd9Sstevel@tonic-gate *pblkno = ext[i].ic_pbno + bno_off; 1377c478bd9Sstevel@tonic-gate return (0); 1387c478bd9Sstevel@tonic-gate } else 1397c478bd9Sstevel@tonic-gate s = i; 1407c478bd9Sstevel@tonic-gate } else 1417c478bd9Sstevel@tonic-gate e = i; 1427c478bd9Sstevel@tonic-gate i = s + ((e - s) >> 1); 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate if (i == lasti) { 1457c478bd9Sstevel@tonic-gate *pbcount = bcount; 1467c478bd9Sstevel@tonic-gate return (ENOENT); 1477c478bd9Sstevel@tonic-gate } 1487c478bd9Sstevel@tonic-gate lasti = i; 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate goto again; 1517c478bd9Sstevel@tonic-gate } 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate /* 1547c478bd9Sstevel@tonic-gate * The log is a set of extents (which typically will be only one, but 1557c478bd9Sstevel@tonic-gate * may be more if the disk was close to full when the log was created) 1567c478bd9Sstevel@tonic-gate * and hence the logical offsets into the log 1577c478bd9Sstevel@tonic-gate * have to be translated into their real device locations before 1587c478bd9Sstevel@tonic-gate * calling the device's strategy routine. The translation may result 1597c478bd9Sstevel@tonic-gate * in several IO requests if this request spans extents. 1607c478bd9Sstevel@tonic-gate */ 1617c478bd9Sstevel@tonic-gate void 1627c478bd9Sstevel@tonic-gate ldl_strategy(ml_unit_t *ul, buf_t *pb) 1637c478bd9Sstevel@tonic-gate { 1647c478bd9Sstevel@tonic-gate lufs_save_t *sv; 1657c478bd9Sstevel@tonic-gate lufs_buf_t *lbp; 1667c478bd9Sstevel@tonic-gate buf_t *cb; 1677c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 1687c478bd9Sstevel@tonic-gate daddr_t lblkno, pblkno; 1697c478bd9Sstevel@tonic-gate size_t nb_left, pbcount; 1707c478bd9Sstevel@tonic-gate off_t offset; 1717c478bd9Sstevel@tonic-gate dev_t dev = ul->un_dev; 1727c478bd9Sstevel@tonic-gate int error; 1737c478bd9Sstevel@tonic-gate int read = pb->b_flags & B_READ; 1747c478bd9Sstevel@tonic-gate 1757c478bd9Sstevel@tonic-gate /* 1767c478bd9Sstevel@tonic-gate * Allocate and initialise the save stucture, 1777c478bd9Sstevel@tonic-gate */ 1787c478bd9Sstevel@tonic-gate sv = kmem_cache_alloc(lufs_sv, KM_SLEEP); 1797c478bd9Sstevel@tonic-gate sv->sv_error = 0; 1807c478bd9Sstevel@tonic-gate sv->sv_bp = pb; 1817c478bd9Sstevel@tonic-gate nb_left = pb->b_bcount; 1827c478bd9Sstevel@tonic-gate sv->sv_nb_left = nb_left; 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate lblkno = pb->b_blkno; 1857c478bd9Sstevel@tonic-gate offset = 0; 1867c478bd9Sstevel@tonic-gate 1877c478bd9Sstevel@tonic-gate do { 1887c478bd9Sstevel@tonic-gate error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount); 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP); 1917c478bd9Sstevel@tonic-gate bioinit(&lbp->lb_buf); 1927c478bd9Sstevel@tonic-gate lbp->lb_ptr = sv; 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate cb = bioclone(pb, offset, pbcount, dev, 1957c478bd9Sstevel@tonic-gate pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP); 1967c478bd9Sstevel@tonic-gate 1977c478bd9Sstevel@tonic-gate offset += pbcount; 1987c478bd9Sstevel@tonic-gate lblkno += btodb(pbcount); 1997c478bd9Sstevel@tonic-gate nb_left -= pbcount; 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate if (error) { 2027c478bd9Sstevel@tonic-gate cb->b_flags |= B_ERROR; 2037c478bd9Sstevel@tonic-gate cb->b_resid = cb->b_bcount; 2047c478bd9Sstevel@tonic-gate biodone(cb); 2057c478bd9Sstevel@tonic-gate } else { 2067c478bd9Sstevel@tonic-gate if (read) { 2077c478bd9Sstevel@tonic-gate logstats.ls_ldlreads.value.ui64++; 208*d3d50737SRafael Vanoni ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 2097c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 2107c478bd9Sstevel@tonic-gate } else { 2117c478bd9Sstevel@tonic-gate logstats.ls_ldlwrites.value.ui64++; 2127c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_OUBLK, 1); 2137c478bd9Sstevel@tonic-gate } 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* 2167c478bd9Sstevel@tonic-gate * write through the snapshot driver if necessary 2177c478bd9Sstevel@tonic-gate * We do not want this write to be throttled because 2187c478bd9Sstevel@tonic-gate * we are holding the un_log mutex here. If we 2197c478bd9Sstevel@tonic-gate * are throttled in fssnap_translate, the fssnap_taskq 2207c478bd9Sstevel@tonic-gate * thread which can wake us up can get blocked on 2217c478bd9Sstevel@tonic-gate * the un_log mutex resulting in a deadlock. 2227c478bd9Sstevel@tonic-gate */ 2237c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_snapshot) { 22480d34432Sfrankho (void) tsd_set(bypass_snapshot_throttle_key, 2257c478bd9Sstevel@tonic-gate (void *)1); 2267c478bd9Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, cb); 2277c478bd9Sstevel@tonic-gate 22880d34432Sfrankho (void) tsd_set(bypass_snapshot_throttle_key, 2297c478bd9Sstevel@tonic-gate (void *)0); 2307c478bd9Sstevel@tonic-gate } else { 2317c478bd9Sstevel@tonic-gate (void) bdev_strategy(cb); 2327c478bd9Sstevel@tonic-gate } 2337c478bd9Sstevel@tonic-gate } 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate } while (nb_left); 2367c478bd9Sstevel@tonic-gate } 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate static void 2397c478bd9Sstevel@tonic-gate writelog(ml_unit_t *ul, buf_t *bp) 2407c478bd9Sstevel@tonic-gate { 2417c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate /* 2447c478bd9Sstevel@tonic-gate * This is really an B_ASYNC write but we want Presto to 2457c478bd9Sstevel@tonic-gate * cache this write. The iodone routine, logdone, processes 2467c478bd9Sstevel@tonic-gate * the buf correctly. 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate bp->b_flags = B_WRITE; 2497c478bd9Sstevel@tonic-gate bp->b_edev = ul->un_dev; 2507c478bd9Sstevel@tonic-gate bp->b_iodone = logdone; 2517c478bd9Sstevel@tonic-gate 2527c478bd9Sstevel@tonic-gate /* 2537c478bd9Sstevel@tonic-gate * return EIO for every IO if in hard error state 2547c478bd9Sstevel@tonic-gate */ 2557c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 2567c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 2577c478bd9Sstevel@tonic-gate bp->b_error = EIO; 2587c478bd9Sstevel@tonic-gate biodone(bp); 2597c478bd9Sstevel@tonic-gate return; 2607c478bd9Sstevel@tonic-gate } 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate ldl_strategy(ul, bp); 2637c478bd9Sstevel@tonic-gate } 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate static void 2667c478bd9Sstevel@tonic-gate readlog(ml_unit_t *ul, buf_t *bp) 2677c478bd9Sstevel@tonic-gate { 2687c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 2697c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount); 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate bp->b_flags = B_READ; 2727c478bd9Sstevel@tonic-gate bp->b_edev = ul->un_dev; 2737c478bd9Sstevel@tonic-gate bp->b_iodone = logdone; 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate /* all IO returns errors when in error state */ 2767c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 2777c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 2787c478bd9Sstevel@tonic-gate bp->b_error = EIO; 2797c478bd9Sstevel@tonic-gate biodone(bp); 2807c478bd9Sstevel@tonic-gate (void) trans_wait(bp); 2817c478bd9Sstevel@tonic-gate return; 2827c478bd9Sstevel@tonic-gate } 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate ldl_strategy(ul, bp); 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate if (trans_wait(bp)) 2877c478bd9Sstevel@tonic-gate ldl_seterror(ul, "Error reading ufs log"); 2887c478bd9Sstevel@tonic-gate } 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate /* 2917c478bd9Sstevel@tonic-gate * NOTE: writers are single threaded thru the log layer. 2927c478bd9Sstevel@tonic-gate * This means we can safely reference and change the cb and bp fields 2937c478bd9Sstevel@tonic-gate * that ldl_read does not reference w/o holding the cb_rwlock or 2947c478bd9Sstevel@tonic-gate * the bp makebusy lock. 2957c478bd9Sstevel@tonic-gate */ 2967c478bd9Sstevel@tonic-gate static void 2977c478bd9Sstevel@tonic-gate push_dirty_bp(ml_unit_t *ul, buf_t *bp) 2987c478bd9Sstevel@tonic-gate { 2997c478bd9Sstevel@tonic-gate buf_t *newbp; 3007c478bd9Sstevel@tonic-gate cirbuf_t *cb = &ul->un_wrbuf; 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 3037c478bd9Sstevel@tonic-gate ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0); 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate /* 3067c478bd9Sstevel@tonic-gate * async write the buf 3077c478bd9Sstevel@tonic-gate */ 3087c478bd9Sstevel@tonic-gate writelog(ul, bp); 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate /* 3117c478bd9Sstevel@tonic-gate * no longer filling any buf 3127c478bd9Sstevel@tonic-gate */ 3137c478bd9Sstevel@tonic-gate cb->cb_dirty = NULL; 3147c478bd9Sstevel@tonic-gate 3157c478bd9Sstevel@tonic-gate /* 3167c478bd9Sstevel@tonic-gate * no extra buffer space; all done 3177c478bd9Sstevel@tonic-gate */ 3187c478bd9Sstevel@tonic-gate if (bp->b_bcount == bp->b_bufsize) 3197c478bd9Sstevel@tonic-gate return; 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate /* 3227c478bd9Sstevel@tonic-gate * give extra buffer space to a new bp 3237c478bd9Sstevel@tonic-gate * try to take buf off of free list 3247c478bd9Sstevel@tonic-gate */ 3257c478bd9Sstevel@tonic-gate if ((newbp = cb->cb_free) != NULL) { 3267c478bd9Sstevel@tonic-gate cb->cb_free = newbp->b_forw; 3277c478bd9Sstevel@tonic-gate } else { 3287c478bd9Sstevel@tonic-gate newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 3297c478bd9Sstevel@tonic-gate sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 3307c478bd9Sstevel@tonic-gate sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 3317c478bd9Sstevel@tonic-gate } 3327c478bd9Sstevel@tonic-gate newbp->b_flags = 0; 3337c478bd9Sstevel@tonic-gate newbp->b_bcount = 0; 3347c478bd9Sstevel@tonic-gate newbp->b_file = NULL; 3357c478bd9Sstevel@tonic-gate newbp->b_offset = -1; 3367c478bd9Sstevel@tonic-gate newbp->b_bufsize = bp->b_bufsize - bp->b_bcount; 3377c478bd9Sstevel@tonic-gate newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount; 3387c478bd9Sstevel@tonic-gate bp->b_bufsize = bp->b_bcount; 3397c478bd9Sstevel@tonic-gate 3407c478bd9Sstevel@tonic-gate /* 3417c478bd9Sstevel@tonic-gate * lock out readers and put new buf at LRU position 3427c478bd9Sstevel@tonic-gate */ 3437c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 3447c478bd9Sstevel@tonic-gate newbp->b_forw = bp->b_forw; 3457c478bd9Sstevel@tonic-gate newbp->b_back = bp; 3467c478bd9Sstevel@tonic-gate bp->b_forw->b_back = newbp; 3477c478bd9Sstevel@tonic-gate bp->b_forw = newbp; 3487c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 3497c478bd9Sstevel@tonic-gate } 3507c478bd9Sstevel@tonic-gate 3517c478bd9Sstevel@tonic-gate static void 3527c478bd9Sstevel@tonic-gate inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb) 3537c478bd9Sstevel@tonic-gate { 3547c478bd9Sstevel@tonic-gate buf_t *bp; 3557c478bd9Sstevel@tonic-gate off_t elof = lof + nb; 3567c478bd9Sstevel@tonic-gate off_t buflof; 3577c478bd9Sstevel@tonic-gate off_t bufelof; 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate /* 3607c478bd9Sstevel@tonic-gate * discard all bufs that overlap the range (lof, lof + nb) 3617c478bd9Sstevel@tonic-gate */ 3627c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 3637c478bd9Sstevel@tonic-gate bp = cb->cb_bp; 3647c478bd9Sstevel@tonic-gate do { 3657c478bd9Sstevel@tonic-gate if (bp == cb->cb_dirty || bp->b_bcount == 0) { 3667c478bd9Sstevel@tonic-gate bp = bp->b_forw; 3677c478bd9Sstevel@tonic-gate continue; 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate buflof = dbtob(bp->b_blkno); 3707c478bd9Sstevel@tonic-gate bufelof = buflof + bp->b_bcount; 3717c478bd9Sstevel@tonic-gate if ((buflof < lof && bufelof <= lof) || 3727c478bd9Sstevel@tonic-gate (buflof >= elof && bufelof > elof)) { 3737c478bd9Sstevel@tonic-gate bp = bp->b_forw; 3747c478bd9Sstevel@tonic-gate continue; 3757c478bd9Sstevel@tonic-gate } 3767c478bd9Sstevel@tonic-gate makebusy(ul, bp); 3777c478bd9Sstevel@tonic-gate bp->b_flags = 0; 3787c478bd9Sstevel@tonic-gate bp->b_bcount = 0; 3797c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 3807c478bd9Sstevel@tonic-gate bp = bp->b_forw; 3817c478bd9Sstevel@tonic-gate } while (bp != cb->cb_bp); 3827c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 3837c478bd9Sstevel@tonic-gate } 3847c478bd9Sstevel@tonic-gate 3857c478bd9Sstevel@tonic-gate /* 3867c478bd9Sstevel@tonic-gate * NOTE: writers are single threaded thru the log layer. 3877c478bd9Sstevel@tonic-gate * This means we can safely reference and change the cb and bp fields 3887c478bd9Sstevel@tonic-gate * that ldl_read does not reference w/o holding the cb_rwlock or 3897c478bd9Sstevel@tonic-gate * the bp makebusy lock. 3907c478bd9Sstevel@tonic-gate */ 3917c478bd9Sstevel@tonic-gate static buf_t * 3927c478bd9Sstevel@tonic-gate get_write_bp(ml_unit_t *ul) 3937c478bd9Sstevel@tonic-gate { 3947c478bd9Sstevel@tonic-gate cirbuf_t *cb = &ul->un_wrbuf; 3957c478bd9Sstevel@tonic-gate buf_t *bp; 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate /* 3987c478bd9Sstevel@tonic-gate * cb_dirty is the buffer we are currently filling; if any 3997c478bd9Sstevel@tonic-gate */ 4007c478bd9Sstevel@tonic-gate if ((bp = cb->cb_dirty) != NULL) { 4017c478bd9Sstevel@tonic-gate makebusy(ul, bp); 4027c478bd9Sstevel@tonic-gate return (bp); 4037c478bd9Sstevel@tonic-gate } 4047c478bd9Sstevel@tonic-gate /* 4057c478bd9Sstevel@tonic-gate * discard any bp that overlaps the current tail since we are 4067c478bd9Sstevel@tonic-gate * about to overwrite it. 4077c478bd9Sstevel@tonic-gate */ 4087c478bd9Sstevel@tonic-gate inval_range(ul, cb, ul->un_tail_lof, 1); 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate /* 4117c478bd9Sstevel@tonic-gate * steal LRU buf 4127c478bd9Sstevel@tonic-gate */ 4137c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 4147c478bd9Sstevel@tonic-gate bp = cb->cb_bp->b_forw; 4157c478bd9Sstevel@tonic-gate makebusy(ul, bp); 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate cb->cb_dirty = bp; 4187c478bd9Sstevel@tonic-gate cb->cb_bp = bp; 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate bp->b_flags = 0; 4217c478bd9Sstevel@tonic-gate bp->b_bcount = 0; 4227c478bd9Sstevel@tonic-gate bp->b_blkno = btodb(ul->un_tail_lof); 4237c478bd9Sstevel@tonic-gate ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof); 4247c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 4257c478bd9Sstevel@tonic-gate 4267c478bd9Sstevel@tonic-gate /* 4277c478bd9Sstevel@tonic-gate * NOTE: 4287c478bd9Sstevel@tonic-gate * 1. un_tail_lof never addresses >= un_eol_lof 4297c478bd9Sstevel@tonic-gate * 2. b_blkno + btodb(b_bufsize) may > un_eol_lof 4307c478bd9Sstevel@tonic-gate * this case is handled in storebuf 4317c478bd9Sstevel@tonic-gate */ 4327c478bd9Sstevel@tonic-gate return (bp); 4337c478bd9Sstevel@tonic-gate } 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate void 4367c478bd9Sstevel@tonic-gate alloc_wrbuf(cirbuf_t *cb, size_t bufsize) 4377c478bd9Sstevel@tonic-gate { 4387c478bd9Sstevel@tonic-gate int i; 4397c478bd9Sstevel@tonic-gate buf_t *bp; 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate /* 4427c478bd9Sstevel@tonic-gate * Clear previous allocation 4437c478bd9Sstevel@tonic-gate */ 4447c478bd9Sstevel@tonic-gate if (cb->cb_nb) 4457c478bd9Sstevel@tonic-gate free_cirbuf(cb); 4467c478bd9Sstevel@tonic-gate 4477c478bd9Sstevel@tonic-gate bzero(cb, sizeof (*cb)); 4487c478bd9Sstevel@tonic-gate rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 4517c478bd9Sstevel@tonic-gate 4527c478bd9Sstevel@tonic-gate /* 4537c478bd9Sstevel@tonic-gate * preallocate 3 bp's and put them on the free list. 4547c478bd9Sstevel@tonic-gate */ 4557c478bd9Sstevel@tonic-gate for (i = 0; i < 3; ++i) { 4567c478bd9Sstevel@tonic-gate bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 4577c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 4587c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 4597c478bd9Sstevel@tonic-gate bp->b_offset = -1; 4607c478bd9Sstevel@tonic-gate bp->b_forw = cb->cb_free; 4617c478bd9Sstevel@tonic-gate cb->cb_free = bp; 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 4657c478bd9Sstevel@tonic-gate cb->cb_nb = bufsize; 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate /* 4687c478bd9Sstevel@tonic-gate * first bp claims entire write buffer 4697c478bd9Sstevel@tonic-gate */ 4707c478bd9Sstevel@tonic-gate bp = cb->cb_free; 4717c478bd9Sstevel@tonic-gate cb->cb_free = bp->b_forw; 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate bp->b_forw = bp; 4747c478bd9Sstevel@tonic-gate bp->b_back = bp; 4757c478bd9Sstevel@tonic-gate cb->cb_bp = bp; 4767c478bd9Sstevel@tonic-gate bp->b_un.b_addr = cb->cb_va; 4777c478bd9Sstevel@tonic-gate bp->b_bufsize = cb->cb_nb; 4787c478bd9Sstevel@tonic-gate 4797c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 4807c478bd9Sstevel@tonic-gate } 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate void 4837c478bd9Sstevel@tonic-gate alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize) 4847c478bd9Sstevel@tonic-gate { 4857c478bd9Sstevel@tonic-gate caddr_t va; 4867c478bd9Sstevel@tonic-gate size_t nb; 4877c478bd9Sstevel@tonic-gate buf_t *bp; 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate /* 4907c478bd9Sstevel@tonic-gate * Clear previous allocation 4917c478bd9Sstevel@tonic-gate */ 4927c478bd9Sstevel@tonic-gate if (cb->cb_nb) 4937c478bd9Sstevel@tonic-gate free_cirbuf(cb); 4947c478bd9Sstevel@tonic-gate 4957c478bd9Sstevel@tonic-gate bzero(cb, sizeof (*cb)); 4967c478bd9Sstevel@tonic-gate rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 4997c478bd9Sstevel@tonic-gate 5007c478bd9Sstevel@tonic-gate cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 5017c478bd9Sstevel@tonic-gate cb->cb_nb = bufsize; 5027c478bd9Sstevel@tonic-gate 5037c478bd9Sstevel@tonic-gate /* 5047c478bd9Sstevel@tonic-gate * preallocate N bufs that are hard-sized to blksize 5057c478bd9Sstevel@tonic-gate * in other words, the read buffer pool is a linked list 5067c478bd9Sstevel@tonic-gate * of statically sized bufs. 5077c478bd9Sstevel@tonic-gate */ 5087c478bd9Sstevel@tonic-gate va = cb->cb_va; 5097c478bd9Sstevel@tonic-gate while ((nb = bufsize) != 0) { 5107c478bd9Sstevel@tonic-gate if (nb > blksize) 5117c478bd9Sstevel@tonic-gate nb = blksize; 5127c478bd9Sstevel@tonic-gate bp = kmem_alloc(sizeof (buf_t), KM_SLEEP); 5137c478bd9Sstevel@tonic-gate bzero(bp, sizeof (buf_t)); 5147c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 5157c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 5167c478bd9Sstevel@tonic-gate bp->b_un.b_addr = va; 5177c478bd9Sstevel@tonic-gate bp->b_bufsize = nb; 5187c478bd9Sstevel@tonic-gate if (cb->cb_bp) { 5197c478bd9Sstevel@tonic-gate bp->b_forw = cb->cb_bp->b_forw; 5207c478bd9Sstevel@tonic-gate bp->b_back = cb->cb_bp; 5217c478bd9Sstevel@tonic-gate cb->cb_bp->b_forw->b_back = bp; 5227c478bd9Sstevel@tonic-gate cb->cb_bp->b_forw = bp; 5237c478bd9Sstevel@tonic-gate } else 5247c478bd9Sstevel@tonic-gate bp->b_forw = bp->b_back = bp; 5257c478bd9Sstevel@tonic-gate cb->cb_bp = bp; 5267c478bd9Sstevel@tonic-gate bufsize -= nb; 5277c478bd9Sstevel@tonic-gate va += nb; 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 5317c478bd9Sstevel@tonic-gate } 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate void 5347c478bd9Sstevel@tonic-gate free_cirbuf(cirbuf_t *cb) 5357c478bd9Sstevel@tonic-gate { 5367c478bd9Sstevel@tonic-gate buf_t *bp; 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate if (cb->cb_nb == 0) 5397c478bd9Sstevel@tonic-gate return; 5407c478bd9Sstevel@tonic-gate 5417c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 5427c478bd9Sstevel@tonic-gate ASSERT(cb->cb_dirty == NULL); 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate /* 5457c478bd9Sstevel@tonic-gate * free the active bufs 5467c478bd9Sstevel@tonic-gate */ 5477c478bd9Sstevel@tonic-gate while ((bp = cb->cb_bp) != NULL) { 5487c478bd9Sstevel@tonic-gate if (bp == bp->b_forw) 5497c478bd9Sstevel@tonic-gate cb->cb_bp = NULL; 5507c478bd9Sstevel@tonic-gate else 5517c478bd9Sstevel@tonic-gate cb->cb_bp = bp->b_forw; 5527c478bd9Sstevel@tonic-gate bp->b_back->b_forw = bp->b_forw; 5537c478bd9Sstevel@tonic-gate bp->b_forw->b_back = bp->b_back; 5547c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 5557c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 5567c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (buf_t)); 5577c478bd9Sstevel@tonic-gate } 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate /* 5607c478bd9Sstevel@tonic-gate * free the free bufs 5617c478bd9Sstevel@tonic-gate */ 5627c478bd9Sstevel@tonic-gate while ((bp = cb->cb_free) != NULL) { 5637c478bd9Sstevel@tonic-gate cb->cb_free = bp->b_forw; 5647c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 5657c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 5667c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (buf_t)); 5677c478bd9Sstevel@tonic-gate } 5687c478bd9Sstevel@tonic-gate kmem_free(cb->cb_va, cb->cb_nb); 5697c478bd9Sstevel@tonic-gate cb->cb_va = NULL; 5707c478bd9Sstevel@tonic-gate cb->cb_nb = 0; 5717c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 5727c478bd9Sstevel@tonic-gate rw_destroy(&cb->cb_rwlock); 5737c478bd9Sstevel@tonic-gate } 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate static int 5767c478bd9Sstevel@tonic-gate within_range(off_t lof, daddr_t blkno, ulong_t bcount) 5777c478bd9Sstevel@tonic-gate { 5787c478bd9Sstevel@tonic-gate off_t blof = dbtob(blkno); 5797c478bd9Sstevel@tonic-gate 5807c478bd9Sstevel@tonic-gate return ((lof >= blof) && (lof < (blof + bcount))); 5817c478bd9Sstevel@tonic-gate } 5827c478bd9Sstevel@tonic-gate 5837c478bd9Sstevel@tonic-gate static buf_t * 5847c478bd9Sstevel@tonic-gate find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 5857c478bd9Sstevel@tonic-gate { 5867c478bd9Sstevel@tonic-gate buf_t *bp; 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate /* 5897c478bd9Sstevel@tonic-gate * find a buf that contains the offset lof 5907c478bd9Sstevel@tonic-gate */ 5917c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_READER); 5927c478bd9Sstevel@tonic-gate bp = cb->cb_bp; 5937c478bd9Sstevel@tonic-gate do { 5947c478bd9Sstevel@tonic-gate if (bp->b_bcount && 5957c478bd9Sstevel@tonic-gate within_range(lof, bp->b_blkno, bp->b_bcount)) { 5967c478bd9Sstevel@tonic-gate makebusy(ul, bp); 5977c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 5987c478bd9Sstevel@tonic-gate return (bp); 5997c478bd9Sstevel@tonic-gate } 6007c478bd9Sstevel@tonic-gate bp = bp->b_forw; 6017c478bd9Sstevel@tonic-gate } while (bp != cb->cb_bp); 6027c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 6037c478bd9Sstevel@tonic-gate 6047c478bd9Sstevel@tonic-gate return (NULL); 6057c478bd9Sstevel@tonic-gate } 6067c478bd9Sstevel@tonic-gate 6077c478bd9Sstevel@tonic-gate static off_t 6087c478bd9Sstevel@tonic-gate find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 6097c478bd9Sstevel@tonic-gate { 6107c478bd9Sstevel@tonic-gate buf_t *bp, *bpend; 6117c478bd9Sstevel@tonic-gate off_t rlof; 6127c478bd9Sstevel@tonic-gate 6137c478bd9Sstevel@tonic-gate /* 6147c478bd9Sstevel@tonic-gate * we mustn't: 6157c478bd9Sstevel@tonic-gate * o read past eol 6167c478bd9Sstevel@tonic-gate * o read past the tail 6177c478bd9Sstevel@tonic-gate * o read data that may be being written. 6187c478bd9Sstevel@tonic-gate */ 6197c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_READER); 6207c478bd9Sstevel@tonic-gate bpend = bp = cb->cb_bp->b_forw; 6217c478bd9Sstevel@tonic-gate rlof = ul->un_tail_lof; 6227c478bd9Sstevel@tonic-gate do { 6237c478bd9Sstevel@tonic-gate if (bp->b_bcount) { 6247c478bd9Sstevel@tonic-gate rlof = dbtob(bp->b_blkno); 6257c478bd9Sstevel@tonic-gate break; 6267c478bd9Sstevel@tonic-gate } 6277c478bd9Sstevel@tonic-gate bp = bp->b_forw; 6287c478bd9Sstevel@tonic-gate } while (bp != bpend); 6297c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 6307c478bd9Sstevel@tonic-gate 6317c478bd9Sstevel@tonic-gate if (lof <= rlof) 6327c478bd9Sstevel@tonic-gate /* lof is prior to the range represented by the write buf */ 6337c478bd9Sstevel@tonic-gate return (rlof); 6347c478bd9Sstevel@tonic-gate else 6357c478bd9Sstevel@tonic-gate /* lof follows the range represented by the write buf */ 6367c478bd9Sstevel@tonic-gate return ((off_t)ul->un_eol_lof); 6377c478bd9Sstevel@tonic-gate } 6387c478bd9Sstevel@tonic-gate 6397c478bd9Sstevel@tonic-gate static buf_t * 6407c478bd9Sstevel@tonic-gate get_read_bp(ml_unit_t *ul, off_t lof) 6417c478bd9Sstevel@tonic-gate { 6427c478bd9Sstevel@tonic-gate cirbuf_t *cb; 6437c478bd9Sstevel@tonic-gate buf_t *bp; 6447c478bd9Sstevel@tonic-gate off_t rlof; 6457c478bd9Sstevel@tonic-gate 6467c478bd9Sstevel@tonic-gate /* 6477c478bd9Sstevel@tonic-gate * retrieve as much data as possible from the incore buffers 6487c478bd9Sstevel@tonic-gate */ 6497c478bd9Sstevel@tonic-gate if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) { 6507c478bd9Sstevel@tonic-gate logstats.ls_lreadsinmem.value.ui64++; 6517c478bd9Sstevel@tonic-gate return (bp); 6527c478bd9Sstevel@tonic-gate } 6537c478bd9Sstevel@tonic-gate if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) { 6547c478bd9Sstevel@tonic-gate logstats.ls_lreadsinmem.value.ui64++; 6557c478bd9Sstevel@tonic-gate return (bp); 6567c478bd9Sstevel@tonic-gate } 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate /* 6597c478bd9Sstevel@tonic-gate * steal the LRU buf 6607c478bd9Sstevel@tonic-gate */ 6617c478bd9Sstevel@tonic-gate cb = &ul->un_rdbuf; 6627c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 6637c478bd9Sstevel@tonic-gate bp = cb->cb_bp->b_forw; 6647c478bd9Sstevel@tonic-gate makebusy(ul, bp); 6657c478bd9Sstevel@tonic-gate bp->b_flags = 0; 6667c478bd9Sstevel@tonic-gate bp->b_bcount = 0; 6677c478bd9Sstevel@tonic-gate cb->cb_bp = bp; 6687c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 6697c478bd9Sstevel@tonic-gate 6707c478bd9Sstevel@tonic-gate /* 6717c478bd9Sstevel@tonic-gate * don't read past the tail or the end-of-log 6727c478bd9Sstevel@tonic-gate */ 6737c478bd9Sstevel@tonic-gate bp->b_blkno = btodb(lof); 6747c478bd9Sstevel@tonic-gate lof = dbtob(bp->b_blkno); 6757c478bd9Sstevel@tonic-gate rlof = find_read_lof(ul, &ul->un_wrbuf, lof); 6767c478bd9Sstevel@tonic-gate bp->b_bcount = MIN(bp->b_bufsize, rlof - lof); 6777c478bd9Sstevel@tonic-gate readlog(ul, bp); 6787c478bd9Sstevel@tonic-gate return (bp); 6797c478bd9Sstevel@tonic-gate } 6807c478bd9Sstevel@tonic-gate 6817c478bd9Sstevel@tonic-gate /* 6827c478bd9Sstevel@tonic-gate * NOTE: writers are single threaded thru the log layer. 6837c478bd9Sstevel@tonic-gate * This means we can safely reference and change the cb and bp fields 6847c478bd9Sstevel@tonic-gate * that ldl_read does not reference w/o holding the cb_rwlock or 6857c478bd9Sstevel@tonic-gate * the bp makebusy lock. 6867c478bd9Sstevel@tonic-gate */ 6877c478bd9Sstevel@tonic-gate static int 6887c478bd9Sstevel@tonic-gate extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp) 6897c478bd9Sstevel@tonic-gate { 6907c478bd9Sstevel@tonic-gate buf_t *bpforw = bp->b_forw; 6917c478bd9Sstevel@tonic-gate 6927c478bd9Sstevel@tonic-gate ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 6937c478bd9Sstevel@tonic-gate 6947c478bd9Sstevel@tonic-gate /* 6957c478bd9Sstevel@tonic-gate * there is no `next' bp; do nothing 6967c478bd9Sstevel@tonic-gate */ 6977c478bd9Sstevel@tonic-gate if (bpforw == bp) 6987c478bd9Sstevel@tonic-gate return (0); 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate /* 7017c478bd9Sstevel@tonic-gate * buffer space is not adjacent; do nothing 7027c478bd9Sstevel@tonic-gate */ 7037c478bd9Sstevel@tonic-gate if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr) 7047c478bd9Sstevel@tonic-gate return (0); 7057c478bd9Sstevel@tonic-gate 7067c478bd9Sstevel@tonic-gate /* 7077c478bd9Sstevel@tonic-gate * locking protocol requires giving up any bp locks before 7087c478bd9Sstevel@tonic-gate * acquiring cb_rwlock. This is okay because we hold 7097c478bd9Sstevel@tonic-gate * un_log_mutex. 7107c478bd9Sstevel@tonic-gate */ 7117c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate /* 7147c478bd9Sstevel@tonic-gate * lock out ldl_read 7157c478bd9Sstevel@tonic-gate */ 7167c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 7177c478bd9Sstevel@tonic-gate 7187c478bd9Sstevel@tonic-gate /* 7197c478bd9Sstevel@tonic-gate * wait for current IO to finish w/next bp; if necessary 7207c478bd9Sstevel@tonic-gate */ 7217c478bd9Sstevel@tonic-gate makebusy(ul, bpforw); 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate /* 7247c478bd9Sstevel@tonic-gate * free the next bp and steal its space 7257c478bd9Sstevel@tonic-gate */ 7267c478bd9Sstevel@tonic-gate bp->b_forw = bpforw->b_forw; 7277c478bd9Sstevel@tonic-gate bpforw->b_forw->b_back = bp; 7287c478bd9Sstevel@tonic-gate bp->b_bufsize += bpforw->b_bufsize; 7297c478bd9Sstevel@tonic-gate sema_v(&bpforw->b_sem); 7307c478bd9Sstevel@tonic-gate bpforw->b_forw = cb->cb_free; 7317c478bd9Sstevel@tonic-gate cb->cb_free = bpforw; 7327c478bd9Sstevel@tonic-gate makebusy(ul, bp); 7337c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 7347c478bd9Sstevel@tonic-gate 7357c478bd9Sstevel@tonic-gate return (1); 7367c478bd9Sstevel@tonic-gate } 7377c478bd9Sstevel@tonic-gate 7387c478bd9Sstevel@tonic-gate static size_t 7397c478bd9Sstevel@tonic-gate storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb) 7407c478bd9Sstevel@tonic-gate { 7417c478bd9Sstevel@tonic-gate size_t copy_nb; 7427c478bd9Sstevel@tonic-gate size_t nb_in_sec; 7437c478bd9Sstevel@tonic-gate sect_trailer_t *st; 7447c478bd9Sstevel@tonic-gate size_t nb_left = nb; 7457c478bd9Sstevel@tonic-gate cirbuf_t *cb = &ul->un_wrbuf; 7467c478bd9Sstevel@tonic-gate 7477c478bd9Sstevel@tonic-gate again: 7487c478bd9Sstevel@tonic-gate nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount); 7497c478bd9Sstevel@tonic-gate copy_nb = MIN(nb_left, nb_in_sec); 7507c478bd9Sstevel@tonic-gate 7517c478bd9Sstevel@tonic-gate ASSERT(copy_nb); 7527c478bd9Sstevel@tonic-gate 7537c478bd9Sstevel@tonic-gate bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb); 7547c478bd9Sstevel@tonic-gate bp->b_bcount += copy_nb; 7557c478bd9Sstevel@tonic-gate va += copy_nb; 7567c478bd9Sstevel@tonic-gate nb_left -= copy_nb; 7577c478bd9Sstevel@tonic-gate ul->un_tail_lof += copy_nb; 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate if ((nb_in_sec -= copy_nb) == 0) { 7607c478bd9Sstevel@tonic-gate st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount); 7617c478bd9Sstevel@tonic-gate 7627c478bd9Sstevel@tonic-gate st->st_tid = ul->un_logmap->mtm_tid; 7637c478bd9Sstevel@tonic-gate st->st_ident = ul->un_tail_ident++; 7647c478bd9Sstevel@tonic-gate bp->b_bcount += sizeof (sect_trailer_t); 7657c478bd9Sstevel@tonic-gate ul->un_tail_lof += sizeof (sect_trailer_t); 7667c478bd9Sstevel@tonic-gate /* 7677c478bd9Sstevel@tonic-gate * log wrapped; async write this bp 7687c478bd9Sstevel@tonic-gate */ 7697c478bd9Sstevel@tonic-gate if (ul->un_tail_lof == ul->un_eol_lof) { 7707c478bd9Sstevel@tonic-gate ul->un_tail_lof = ul->un_bol_lof; 7717c478bd9Sstevel@tonic-gate push_dirty_bp(ul, bp); 7727c478bd9Sstevel@tonic-gate return (nb - nb_left); 7737c478bd9Sstevel@tonic-gate } 7747c478bd9Sstevel@tonic-gate /* 7757c478bd9Sstevel@tonic-gate * out of bp space; get more or async write buf 7767c478bd9Sstevel@tonic-gate */ 7777c478bd9Sstevel@tonic-gate if (bp->b_bcount == bp->b_bufsize) { 7787c478bd9Sstevel@tonic-gate if (!extend_write_bp(ul, cb, bp)) { 7797c478bd9Sstevel@tonic-gate push_dirty_bp(ul, bp); 7807c478bd9Sstevel@tonic-gate return (nb - nb_left); 7817c478bd9Sstevel@tonic-gate } 7827c478bd9Sstevel@tonic-gate } 7837c478bd9Sstevel@tonic-gate } 7847c478bd9Sstevel@tonic-gate if (nb_left) 7857c478bd9Sstevel@tonic-gate goto again; 7867c478bd9Sstevel@tonic-gate 7877c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 7887c478bd9Sstevel@tonic-gate return (nb); 7897c478bd9Sstevel@tonic-gate } 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate static void 7927c478bd9Sstevel@tonic-gate fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me) 7937c478bd9Sstevel@tonic-gate { 7947c478bd9Sstevel@tonic-gate offset_t src_mof = me->me_mof; 7957c478bd9Sstevel@tonic-gate size_t src_nb = me->me_nb; 7967c478bd9Sstevel@tonic-gate 7977c478bd9Sstevel@tonic-gate if (src_mof > dst_mof) { 7987c478bd9Sstevel@tonic-gate ASSERT(src_mof < (dst_mof + dst_nb)); 7997c478bd9Sstevel@tonic-gate dst_va += (src_mof - dst_mof); 8007c478bd9Sstevel@tonic-gate dst_nb -= (src_mof - dst_mof); 8017c478bd9Sstevel@tonic-gate } else { 8027c478bd9Sstevel@tonic-gate ASSERT(dst_mof < (src_mof + src_nb)); 8037c478bd9Sstevel@tonic-gate src_nb -= (dst_mof - src_mof); 8047c478bd9Sstevel@tonic-gate } 8057c478bd9Sstevel@tonic-gate 8067c478bd9Sstevel@tonic-gate src_nb = MIN(src_nb, dst_nb); 8077c478bd9Sstevel@tonic-gate ASSERT(src_nb); 8087c478bd9Sstevel@tonic-gate bzero(dst_va, src_nb); 8097c478bd9Sstevel@tonic-gate } 8107c478bd9Sstevel@tonic-gate 8117c478bd9Sstevel@tonic-gate /* 8127c478bd9Sstevel@tonic-gate * dst_va == NULL means don't copy anything 8137c478bd9Sstevel@tonic-gate */ 8147c478bd9Sstevel@tonic-gate static ulong_t 8157c478bd9Sstevel@tonic-gate fetchbuf( 8167c478bd9Sstevel@tonic-gate ml_unit_t *ul, 8177c478bd9Sstevel@tonic-gate buf_t *bp, 8187c478bd9Sstevel@tonic-gate caddr_t dst_va, 8197c478bd9Sstevel@tonic-gate size_t dst_nb, 8207c478bd9Sstevel@tonic-gate off_t *dst_lofp) 8217c478bd9Sstevel@tonic-gate { 8227c478bd9Sstevel@tonic-gate caddr_t copy_va; 8237c478bd9Sstevel@tonic-gate size_t copy_nb; 8247c478bd9Sstevel@tonic-gate size_t nb_sec; 8257c478bd9Sstevel@tonic-gate off_t dst_lof = *dst_lofp; 8267c478bd9Sstevel@tonic-gate ulong_t sav_dst_nb = dst_nb; 8277c478bd9Sstevel@tonic-gate ulong_t src_nb = bp->b_bcount; 8287c478bd9Sstevel@tonic-gate off_t src_lof = dbtob(bp->b_blkno); 8297c478bd9Sstevel@tonic-gate off_t src_elof = src_lof + src_nb; 8307c478bd9Sstevel@tonic-gate caddr_t src_va = bp->b_un.b_addr; 8317c478bd9Sstevel@tonic-gate 8327c478bd9Sstevel@tonic-gate /* 8337c478bd9Sstevel@tonic-gate * copy from bp to dst_va 8347c478bd9Sstevel@tonic-gate */ 8357c478bd9Sstevel@tonic-gate while (dst_nb) { 8367c478bd9Sstevel@tonic-gate /* 8377c478bd9Sstevel@tonic-gate * compute address within bp 8387c478bd9Sstevel@tonic-gate */ 8397c478bd9Sstevel@tonic-gate copy_va = src_va + (dst_lof - src_lof); 8407c478bd9Sstevel@tonic-gate 8417c478bd9Sstevel@tonic-gate /* 8427c478bd9Sstevel@tonic-gate * adjust copy size to amount of data in bp 8437c478bd9Sstevel@tonic-gate */ 8447c478bd9Sstevel@tonic-gate copy_nb = MIN(dst_nb, src_elof - dst_lof); 8457c478bd9Sstevel@tonic-gate 8467c478bd9Sstevel@tonic-gate /* 8477c478bd9Sstevel@tonic-gate * adjust copy size to amount of data in sector 8487c478bd9Sstevel@tonic-gate */ 8497c478bd9Sstevel@tonic-gate nb_sec = NB_LEFT_IN_SECTOR(dst_lof); 8507c478bd9Sstevel@tonic-gate copy_nb = MIN(copy_nb, nb_sec); 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate /* 8537c478bd9Sstevel@tonic-gate * dst_va == NULL means don't do copy (see logseek()) 8547c478bd9Sstevel@tonic-gate */ 8557c478bd9Sstevel@tonic-gate if (dst_va) { 8567c478bd9Sstevel@tonic-gate bcopy(copy_va, dst_va, copy_nb); 8577c478bd9Sstevel@tonic-gate dst_va += copy_nb; 8587c478bd9Sstevel@tonic-gate } 8597c478bd9Sstevel@tonic-gate dst_lof += copy_nb; 8607c478bd9Sstevel@tonic-gate dst_nb -= copy_nb; 8617c478bd9Sstevel@tonic-gate nb_sec -= copy_nb; 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate /* 8647c478bd9Sstevel@tonic-gate * advance over sector trailer 8657c478bd9Sstevel@tonic-gate */ 8667c478bd9Sstevel@tonic-gate if (nb_sec == 0) 8677c478bd9Sstevel@tonic-gate dst_lof += sizeof (sect_trailer_t); 8687c478bd9Sstevel@tonic-gate 8697c478bd9Sstevel@tonic-gate /* 8707c478bd9Sstevel@tonic-gate * exhausted buffer 8717c478bd9Sstevel@tonic-gate * return current lof for next read 8727c478bd9Sstevel@tonic-gate */ 8737c478bd9Sstevel@tonic-gate if (dst_lof == src_elof) { 8747c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 8757c478bd9Sstevel@tonic-gate if (dst_lof == ul->un_eol_lof) 8767c478bd9Sstevel@tonic-gate dst_lof = ul->un_bol_lof; 8777c478bd9Sstevel@tonic-gate *dst_lofp = dst_lof; 8787c478bd9Sstevel@tonic-gate return (sav_dst_nb - dst_nb); 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate } 8817c478bd9Sstevel@tonic-gate 8827c478bd9Sstevel@tonic-gate /* 8837c478bd9Sstevel@tonic-gate * copy complete - return current lof 8847c478bd9Sstevel@tonic-gate */ 8857c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 8867c478bd9Sstevel@tonic-gate *dst_lofp = dst_lof; 8877c478bd9Sstevel@tonic-gate return (sav_dst_nb); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate void 8917c478bd9Sstevel@tonic-gate ldl_round_commit(ml_unit_t *ul) 8927c478bd9Sstevel@tonic-gate { 8937c478bd9Sstevel@tonic-gate int wrapped; 8947c478bd9Sstevel@tonic-gate buf_t *bp; 8957c478bd9Sstevel@tonic-gate sect_trailer_t *st; 8967c478bd9Sstevel@tonic-gate size_t bcount; 8977c478bd9Sstevel@tonic-gate cirbuf_t *cb = &ul->un_wrbuf; 8987c478bd9Sstevel@tonic-gate 8997c478bd9Sstevel@tonic-gate /* 9007c478bd9Sstevel@tonic-gate * if nothing to write; then do nothing 9017c478bd9Sstevel@tonic-gate */ 9027c478bd9Sstevel@tonic-gate if ((bp = cb->cb_dirty) == NULL) 9037c478bd9Sstevel@tonic-gate return; 9047c478bd9Sstevel@tonic-gate makebusy(ul, bp); 9057c478bd9Sstevel@tonic-gate 9067c478bd9Sstevel@tonic-gate /* 9077c478bd9Sstevel@tonic-gate * round up to sector boundary and set new tail 9087c478bd9Sstevel@tonic-gate * don't readjust st_ident if buf is already rounded 9097c478bd9Sstevel@tonic-gate */ 9107c478bd9Sstevel@tonic-gate bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE); 9117c478bd9Sstevel@tonic-gate if (bcount == bp->b_bcount) { 9127c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 9137c478bd9Sstevel@tonic-gate return; 9147c478bd9Sstevel@tonic-gate } 9157c478bd9Sstevel@tonic-gate bp->b_bcount = bcount; 9167c478bd9Sstevel@tonic-gate ul->un_tail_lof = dbtob(bp->b_blkno) + bcount; 9177c478bd9Sstevel@tonic-gate wrapped = 0; 9187c478bd9Sstevel@tonic-gate if (ul->un_tail_lof == ul->un_eol_lof) { 9197c478bd9Sstevel@tonic-gate ul->un_tail_lof = ul->un_bol_lof; 9207c478bd9Sstevel@tonic-gate ++wrapped; 9217c478bd9Sstevel@tonic-gate } 9227c478bd9Sstevel@tonic-gate ASSERT(ul->un_tail_lof != ul->un_head_lof); 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate /* 9257c478bd9Sstevel@tonic-gate * fix up the sector trailer 9267c478bd9Sstevel@tonic-gate */ 9277c478bd9Sstevel@tonic-gate /* LINTED */ 9287c478bd9Sstevel@tonic-gate st = (sect_trailer_t *) 9297c478bd9Sstevel@tonic-gate ((bp->b_un.b_addr + bcount) - sizeof (*st)); 9307c478bd9Sstevel@tonic-gate st->st_tid = ul->un_logmap->mtm_tid; 9317c478bd9Sstevel@tonic-gate st->st_ident = ul->un_tail_ident++; 9327c478bd9Sstevel@tonic-gate 9337c478bd9Sstevel@tonic-gate /* 9347c478bd9Sstevel@tonic-gate * if tail wrapped or we have exhausted this buffer 9357c478bd9Sstevel@tonic-gate * async write the buffer 9367c478bd9Sstevel@tonic-gate */ 9377c478bd9Sstevel@tonic-gate if (wrapped || bcount == bp->b_bufsize) 9387c478bd9Sstevel@tonic-gate push_dirty_bp(ul, bp); 9397c478bd9Sstevel@tonic-gate else 9407c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate void 9447c478bd9Sstevel@tonic-gate ldl_push_commit(ml_unit_t *ul) 9457c478bd9Sstevel@tonic-gate { 9467c478bd9Sstevel@tonic-gate buf_t *bp; 9477c478bd9Sstevel@tonic-gate cirbuf_t *cb = &ul->un_wrbuf; 9487c478bd9Sstevel@tonic-gate 9497c478bd9Sstevel@tonic-gate /* 9507c478bd9Sstevel@tonic-gate * if nothing to write; then do nothing 9517c478bd9Sstevel@tonic-gate */ 9527c478bd9Sstevel@tonic-gate if ((bp = cb->cb_dirty) == NULL) 9537c478bd9Sstevel@tonic-gate return; 9547c478bd9Sstevel@tonic-gate makebusy(ul, bp); 9557c478bd9Sstevel@tonic-gate push_dirty_bp(ul, bp); 9567c478bd9Sstevel@tonic-gate } 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate int 9597c478bd9Sstevel@tonic-gate ldl_need_commit(ml_unit_t *ul) 9607c478bd9Sstevel@tonic-gate { 9617c478bd9Sstevel@tonic-gate return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2))); 9627c478bd9Sstevel@tonic-gate } 9637c478bd9Sstevel@tonic-gate 9647c478bd9Sstevel@tonic-gate int 9657c478bd9Sstevel@tonic-gate ldl_has_space(ml_unit_t *ul, mapentry_t *me) 9667c478bd9Sstevel@tonic-gate { 9677c478bd9Sstevel@tonic-gate off_t nfb; 9687c478bd9Sstevel@tonic-gate off_t nb; 9697c478bd9Sstevel@tonic-gate 9707c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 9717c478bd9Sstevel@tonic-gate 9727c478bd9Sstevel@tonic-gate /* 9737c478bd9Sstevel@tonic-gate * Add up the size used by the deltas 9747c478bd9Sstevel@tonic-gate * round nb up to a sector length plus an extra sector 9757c478bd9Sstevel@tonic-gate * w/o the extra sector we couldn't distinguish 9767c478bd9Sstevel@tonic-gate * a full log (head == tail) from an empty log (head == tail) 9777c478bd9Sstevel@tonic-gate */ 9787c478bd9Sstevel@tonic-gate for (nb = DEV_BSIZE; me; me = me->me_hash) { 9797c478bd9Sstevel@tonic-gate nb += sizeof (struct delta); 9807c478bd9Sstevel@tonic-gate if (me->me_dt != DT_CANCEL) 9817c478bd9Sstevel@tonic-gate nb += me->me_nb; 9827c478bd9Sstevel@tonic-gate } 9837c478bd9Sstevel@tonic-gate nb = P2ROUNDUP(nb, DEV_BSIZE); 9847c478bd9Sstevel@tonic-gate 9857c478bd9Sstevel@tonic-gate if (ul->un_head_lof <= ul->un_tail_lof) 9867c478bd9Sstevel@tonic-gate nfb = (ul->un_head_lof - ul->un_bol_lof) + 9877c478bd9Sstevel@tonic-gate (ul->un_eol_lof - ul->un_tail_lof); 9887c478bd9Sstevel@tonic-gate else 9897c478bd9Sstevel@tonic-gate nfb = ul->un_head_lof - ul->un_tail_lof; 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate return (nb < nfb); 9927c478bd9Sstevel@tonic-gate } 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate void 9957c478bd9Sstevel@tonic-gate ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me) 9967c478bd9Sstevel@tonic-gate { 9977c478bd9Sstevel@tonic-gate buf_t *bp; 9987c478bd9Sstevel@tonic-gate caddr_t va; 9997c478bd9Sstevel@tonic-gate size_t nb; 10007c478bd9Sstevel@tonic-gate size_t actual; 10017c478bd9Sstevel@tonic-gate 10027c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate /* Write the delta */ 10057c478bd9Sstevel@tonic-gate 10067c478bd9Sstevel@tonic-gate nb = sizeof (struct delta); 10077c478bd9Sstevel@tonic-gate va = (caddr_t)&me->me_delta; 10087c478bd9Sstevel@tonic-gate bp = get_write_bp(ul); 10097c478bd9Sstevel@tonic-gate 10107c478bd9Sstevel@tonic-gate while (nb) { 10117c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 10127c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 10137c478bd9Sstevel@tonic-gate return; 10147c478bd9Sstevel@tonic-gate } 10157c478bd9Sstevel@tonic-gate actual = storebuf(ul, bp, va, nb); 10167c478bd9Sstevel@tonic-gate ASSERT(actual); 10177c478bd9Sstevel@tonic-gate va += actual; 10187c478bd9Sstevel@tonic-gate nb -= actual; 10197c478bd9Sstevel@tonic-gate if (nb) 10207c478bd9Sstevel@tonic-gate bp = get_write_bp(ul); 10217c478bd9Sstevel@tonic-gate } 10227c478bd9Sstevel@tonic-gate 10237c478bd9Sstevel@tonic-gate /* If a commit, cancel, or 0's; we're almost done */ 10247c478bd9Sstevel@tonic-gate switch (me->me_dt) { 10257c478bd9Sstevel@tonic-gate case DT_COMMIT: 10267c478bd9Sstevel@tonic-gate case DT_CANCEL: 10277c478bd9Sstevel@tonic-gate case DT_ABZERO: 10287c478bd9Sstevel@tonic-gate /* roll needs to know where the next delta will go */ 10297c478bd9Sstevel@tonic-gate me->me_lof = ul->un_tail_lof; 10307c478bd9Sstevel@tonic-gate return; 10317c478bd9Sstevel@tonic-gate default: 10327c478bd9Sstevel@tonic-gate break; 10337c478bd9Sstevel@tonic-gate } 10347c478bd9Sstevel@tonic-gate 10357c478bd9Sstevel@tonic-gate /* Now write the data */ 10367c478bd9Sstevel@tonic-gate 10377c478bd9Sstevel@tonic-gate ASSERT(me->me_nb != 0); 10387c478bd9Sstevel@tonic-gate 10397c478bd9Sstevel@tonic-gate nb = me->me_nb; 10407c478bd9Sstevel@tonic-gate va = (me->me_mof - bufmof) + bufp; 10417c478bd9Sstevel@tonic-gate bp = get_write_bp(ul); 10427c478bd9Sstevel@tonic-gate 10437c478bd9Sstevel@tonic-gate /* Save where we will put the data */ 10447c478bd9Sstevel@tonic-gate me->me_lof = ul->un_tail_lof; 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate while (nb) { 10477c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 10487c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 10497c478bd9Sstevel@tonic-gate return; 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate actual = storebuf(ul, bp, va, nb); 10527c478bd9Sstevel@tonic-gate ASSERT(actual); 10537c478bd9Sstevel@tonic-gate va += actual; 10547c478bd9Sstevel@tonic-gate nb -= actual; 10557c478bd9Sstevel@tonic-gate if (nb) 10567c478bd9Sstevel@tonic-gate bp = get_write_bp(ul); 10577c478bd9Sstevel@tonic-gate } 10587c478bd9Sstevel@tonic-gate } 10597c478bd9Sstevel@tonic-gate 10607c478bd9Sstevel@tonic-gate void 10617c478bd9Sstevel@tonic-gate ldl_waito(ml_unit_t *ul) 10627c478bd9Sstevel@tonic-gate { 10637c478bd9Sstevel@tonic-gate buf_t *bp; 10647c478bd9Sstevel@tonic-gate cirbuf_t *cb = &ul->un_wrbuf; 10657c478bd9Sstevel@tonic-gate 10667c478bd9Sstevel@tonic-gate rw_enter(&cb->cb_rwlock, RW_WRITER); 10677c478bd9Sstevel@tonic-gate /* 10687c478bd9Sstevel@tonic-gate * wait on them 10697c478bd9Sstevel@tonic-gate */ 10707c478bd9Sstevel@tonic-gate bp = cb->cb_bp; 10717c478bd9Sstevel@tonic-gate do { 10727c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DONE) == 0) { 10737c478bd9Sstevel@tonic-gate makebusy(ul, bp); 10747c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 10757c478bd9Sstevel@tonic-gate } 10767c478bd9Sstevel@tonic-gate bp = bp->b_forw; 10777c478bd9Sstevel@tonic-gate } while (bp != cb->cb_bp); 10787c478bd9Sstevel@tonic-gate rw_exit(&cb->cb_rwlock); 10797c478bd9Sstevel@tonic-gate } 10807c478bd9Sstevel@tonic-gate 10817c478bd9Sstevel@tonic-gate /* 10827c478bd9Sstevel@tonic-gate * seek nb bytes from location lof 10837c478bd9Sstevel@tonic-gate */ 10847c478bd9Sstevel@tonic-gate static int 10857c478bd9Sstevel@tonic-gate logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp) 10867c478bd9Sstevel@tonic-gate { 10877c478bd9Sstevel@tonic-gate buf_t *bp; 10887c478bd9Sstevel@tonic-gate ulong_t actual; 10897c478bd9Sstevel@tonic-gate 10907c478bd9Sstevel@tonic-gate while (nb) { 10917c478bd9Sstevel@tonic-gate bp = get_read_bp(ul, lof); 10927c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 10937c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 10947c478bd9Sstevel@tonic-gate return (EIO); 10957c478bd9Sstevel@tonic-gate } 10967c478bd9Sstevel@tonic-gate actual = fetchbuf(ul, bp, NULL, nb, &lof); 10977c478bd9Sstevel@tonic-gate ASSERT(actual); 10987c478bd9Sstevel@tonic-gate nb -= actual; 10997c478bd9Sstevel@tonic-gate } 11007c478bd9Sstevel@tonic-gate *lofp = lof; 11017c478bd9Sstevel@tonic-gate ASSERT(nb == 0); 11027c478bd9Sstevel@tonic-gate return (0); 11037c478bd9Sstevel@tonic-gate } 11047c478bd9Sstevel@tonic-gate 11057c478bd9Sstevel@tonic-gate int 11067c478bd9Sstevel@tonic-gate ldl_read( 11077c478bd9Sstevel@tonic-gate ml_unit_t *ul, /* Log unit */ 11087c478bd9Sstevel@tonic-gate caddr_t va, /* address of buffer to read into */ 11097c478bd9Sstevel@tonic-gate offset_t mof, /* mof of buffer */ 11107c478bd9Sstevel@tonic-gate off_t nb, /* length of buffer */ 11117c478bd9Sstevel@tonic-gate mapentry_t *me) /* Map entry list */ 11127c478bd9Sstevel@tonic-gate { 11137c478bd9Sstevel@tonic-gate buf_t *bp; 11147c478bd9Sstevel@tonic-gate crb_t *crb; 11157c478bd9Sstevel@tonic-gate caddr_t rva; /* address to read into */ 11167c478bd9Sstevel@tonic-gate size_t rnb; /* # of bytes to read */ 11177c478bd9Sstevel@tonic-gate off_t lof; /* log device offset to read from */ 11187c478bd9Sstevel@tonic-gate off_t skip; 11197c478bd9Sstevel@tonic-gate ulong_t actual; 11207c478bd9Sstevel@tonic-gate int error; 11217c478bd9Sstevel@tonic-gate caddr_t eva = va + nb; /* end of buffer */ 11227c478bd9Sstevel@tonic-gate 11237c478bd9Sstevel@tonic-gate for (; me; me = me->me_agenext) { 11247c478bd9Sstevel@tonic-gate ASSERT(me->me_dt != DT_CANCEL); 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate /* 11277c478bd9Sstevel@tonic-gate * check for an cached roll buffer 11287c478bd9Sstevel@tonic-gate */ 11297c478bd9Sstevel@tonic-gate crb = me->me_crb; 11307c478bd9Sstevel@tonic-gate if (crb) { 11317c478bd9Sstevel@tonic-gate if (mof > crb->c_mof) { 11327c478bd9Sstevel@tonic-gate /* 11337c478bd9Sstevel@tonic-gate * This mapentry overlaps with the beginning of 11347c478bd9Sstevel@tonic-gate * the supplied buffer 11357c478bd9Sstevel@tonic-gate */ 11367c478bd9Sstevel@tonic-gate skip = mof - crb->c_mof; 11377c478bd9Sstevel@tonic-gate bcopy(crb->c_buf + skip, va, 11387c478bd9Sstevel@tonic-gate MIN(nb, crb->c_nb - skip)); 11397c478bd9Sstevel@tonic-gate } else { 11407c478bd9Sstevel@tonic-gate /* 11417c478bd9Sstevel@tonic-gate * This mapentry starts at or after 11427c478bd9Sstevel@tonic-gate * the supplied buffer. 11437c478bd9Sstevel@tonic-gate */ 11447c478bd9Sstevel@tonic-gate skip = crb->c_mof - mof; 11457c478bd9Sstevel@tonic-gate bcopy(crb->c_buf, va + skip, 11467c478bd9Sstevel@tonic-gate MIN(crb->c_nb, nb - skip)); 11477c478bd9Sstevel@tonic-gate } 11487c478bd9Sstevel@tonic-gate logstats.ls_lreadsinmem.value.ui64++; 11497c478bd9Sstevel@tonic-gate continue; 11507c478bd9Sstevel@tonic-gate } 11517c478bd9Sstevel@tonic-gate 11527c478bd9Sstevel@tonic-gate /* 11537c478bd9Sstevel@tonic-gate * check for a delta full of zeroes - there's no log data 11547c478bd9Sstevel@tonic-gate */ 11557c478bd9Sstevel@tonic-gate if (me->me_dt == DT_ABZERO) { 11567c478bd9Sstevel@tonic-gate fetchzeroes(va, mof, nb, me); 11577c478bd9Sstevel@tonic-gate continue; 11587c478bd9Sstevel@tonic-gate } 11597c478bd9Sstevel@tonic-gate 11607c478bd9Sstevel@tonic-gate if (mof > me->me_mof) { 11617c478bd9Sstevel@tonic-gate rnb = (size_t)(mof - me->me_mof); 11627c478bd9Sstevel@tonic-gate error = logseek(ul, me->me_lof, rnb, &lof); 11637c478bd9Sstevel@tonic-gate if (error) 11647c478bd9Sstevel@tonic-gate return (EIO); 11657c478bd9Sstevel@tonic-gate rva = va; 11667c478bd9Sstevel@tonic-gate rnb = me->me_nb - rnb; 11677c478bd9Sstevel@tonic-gate rnb = ((rva + rnb) > eva) ? eva - rva : rnb; 11687c478bd9Sstevel@tonic-gate } else { 11697c478bd9Sstevel@tonic-gate lof = me->me_lof; 11707c478bd9Sstevel@tonic-gate rva = (me->me_mof - mof) + va; 11717c478bd9Sstevel@tonic-gate rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb; 11727c478bd9Sstevel@tonic-gate } 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate while (rnb) { 11757c478bd9Sstevel@tonic-gate bp = get_read_bp(ul, lof); 11767c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 11777c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 11787c478bd9Sstevel@tonic-gate return (EIO); 11797c478bd9Sstevel@tonic-gate } 11807c478bd9Sstevel@tonic-gate ASSERT(((me->me_flags & ME_ROLL) == 0) || 11817c478bd9Sstevel@tonic-gate (bp != ul->un_wrbuf.cb_dirty)); 11827c478bd9Sstevel@tonic-gate actual = fetchbuf(ul, bp, rva, rnb, &lof); 11837c478bd9Sstevel@tonic-gate ASSERT(actual); 11847c478bd9Sstevel@tonic-gate rva += actual; 11857c478bd9Sstevel@tonic-gate rnb -= actual; 11867c478bd9Sstevel@tonic-gate } 11877c478bd9Sstevel@tonic-gate } 11887c478bd9Sstevel@tonic-gate return (0); 11897c478bd9Sstevel@tonic-gate } 11907c478bd9Sstevel@tonic-gate 11917c478bd9Sstevel@tonic-gate void 11927c478bd9Sstevel@tonic-gate ldl_savestate(ml_unit_t *ul) 11937c478bd9Sstevel@tonic-gate { 11947c478bd9Sstevel@tonic-gate int error; 11957c478bd9Sstevel@tonic-gate buf_t *bp = ul->un_bp; 11967c478bd9Sstevel@tonic-gate ml_odunit_t *ud = (void *)bp->b_un.b_addr; 11977c478bd9Sstevel@tonic-gate ml_odunit_t *ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE); 11987c478bd9Sstevel@tonic-gate 11997c478bd9Sstevel@tonic-gate #if DEBUG 12007c478bd9Sstevel@tonic-gate /* 12017c478bd9Sstevel@tonic-gate * Scan test is running; don't update intermediate state 12027c478bd9Sstevel@tonic-gate */ 12037c478bd9Sstevel@tonic-gate if (ul->un_logmap && ul->un_logmap->mtm_trimlof) 12047c478bd9Sstevel@tonic-gate return; 12057c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate mutex_enter(&ul->un_state_mutex); 12087c478bd9Sstevel@tonic-gate bcopy(&ul->un_ondisk, ud, sizeof (*ud)); 12097c478bd9Sstevel@tonic-gate ud->od_chksum = ud->od_head_ident + ud->od_tail_ident; 12107c478bd9Sstevel@tonic-gate bcopy(ud, ud2, sizeof (*ud)); 12117c478bd9Sstevel@tonic-gate 12127c478bd9Sstevel@tonic-gate /* If a snapshot is enabled write through the shapshot driver. */ 12137c478bd9Sstevel@tonic-gate if (ul->un_ufsvfs->vfs_snapshot) 12147c478bd9Sstevel@tonic-gate UFS_BWRITE2(ul->un_ufsvfs, bp); 12157c478bd9Sstevel@tonic-gate else 12167c478bd9Sstevel@tonic-gate BWRITE2(bp); 12177c478bd9Sstevel@tonic-gate logstats.ls_ldlwrites.value.ui64++; 12187c478bd9Sstevel@tonic-gate error = bp->b_flags & B_ERROR; 12197c478bd9Sstevel@tonic-gate mutex_exit(&ul->un_state_mutex); 12207c478bd9Sstevel@tonic-gate if (error) 12217c478bd9Sstevel@tonic-gate ldl_seterror(ul, "Error writing ufs log state"); 12227c478bd9Sstevel@tonic-gate } 12237c478bd9Sstevel@tonic-gate 12247c478bd9Sstevel@tonic-gate /* 12257c478bd9Sstevel@tonic-gate * The head will be set to (new_lof - header) since ldl_sethead is 12267c478bd9Sstevel@tonic-gate * called with the new_lof of the data portion of a delta. 12277c478bd9Sstevel@tonic-gate */ 12287c478bd9Sstevel@tonic-gate void 12297c478bd9Sstevel@tonic-gate ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid) 12307c478bd9Sstevel@tonic-gate { 12317c478bd9Sstevel@tonic-gate off_t nb; 12327c478bd9Sstevel@tonic-gate off_t new_lof; 12337c478bd9Sstevel@tonic-gate uint32_t new_ident; 12347c478bd9Sstevel@tonic-gate daddr_t beg_blkno; 12357c478bd9Sstevel@tonic-gate daddr_t end_blkno; 12367c478bd9Sstevel@tonic-gate 12377c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 12387c478bd9Sstevel@tonic-gate 12397c478bd9Sstevel@tonic-gate if (data_lof == -1) { 12407c478bd9Sstevel@tonic-gate /* log is empty */ 12414f3979a5SWolfgang Schremser new_ident = lufs_hd_genid(ul); 12427c478bd9Sstevel@tonic-gate new_lof = ul->un_tail_lof; 12437c478bd9Sstevel@tonic-gate 12447c478bd9Sstevel@tonic-gate } else { 12457c478bd9Sstevel@tonic-gate /* compute header's lof */ 12467c478bd9Sstevel@tonic-gate new_ident = ul->un_head_ident; 12477c478bd9Sstevel@tonic-gate new_lof = data_lof - sizeof (struct delta); 12487c478bd9Sstevel@tonic-gate 12497c478bd9Sstevel@tonic-gate /* whoops, header spans sectors; subtract out sector trailer */ 12507c478bd9Sstevel@tonic-gate if (btodb(new_lof) != btodb(data_lof)) 12517c478bd9Sstevel@tonic-gate new_lof -= sizeof (sect_trailer_t); 12527c478bd9Sstevel@tonic-gate 12537c478bd9Sstevel@tonic-gate /* whoops, header wrapped the log; go to last sector */ 12547c478bd9Sstevel@tonic-gate if (new_lof < ul->un_bol_lof) { 12557c478bd9Sstevel@tonic-gate /* sector offset */ 12567c478bd9Sstevel@tonic-gate new_lof -= dbtob(btodb(new_lof)); 12577c478bd9Sstevel@tonic-gate /* add to last sector's lof */ 12587c478bd9Sstevel@tonic-gate new_lof += (ul->un_eol_lof - DEV_BSIZE); 12597c478bd9Sstevel@tonic-gate } 12607c478bd9Sstevel@tonic-gate ul->un_head_tid = tid; 12617c478bd9Sstevel@tonic-gate } 12627c478bd9Sstevel@tonic-gate 12637c478bd9Sstevel@tonic-gate /* 12647c478bd9Sstevel@tonic-gate * check for nop 12657c478bd9Sstevel@tonic-gate */ 12667c478bd9Sstevel@tonic-gate if (new_lof == ul->un_head_lof) 12677c478bd9Sstevel@tonic-gate return; 12687c478bd9Sstevel@tonic-gate 12697c478bd9Sstevel@tonic-gate /* 12707c478bd9Sstevel@tonic-gate * invalidate the affected bufs and calculate new ident 12717c478bd9Sstevel@tonic-gate */ 12727c478bd9Sstevel@tonic-gate if (new_lof > ul->un_head_lof) { 12737c478bd9Sstevel@tonic-gate nb = new_lof - ul->un_head_lof; 12747c478bd9Sstevel@tonic-gate inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 12757c478bd9Sstevel@tonic-gate inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 12767c478bd9Sstevel@tonic-gate 12777c478bd9Sstevel@tonic-gate end_blkno = btodb(new_lof); 12787c478bd9Sstevel@tonic-gate beg_blkno = btodb(ul->un_head_lof); 12797c478bd9Sstevel@tonic-gate new_ident += (end_blkno - beg_blkno); 12807c478bd9Sstevel@tonic-gate } else { 12817c478bd9Sstevel@tonic-gate nb = ul->un_eol_lof - ul->un_head_lof; 12827c478bd9Sstevel@tonic-gate inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 12837c478bd9Sstevel@tonic-gate inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 12847c478bd9Sstevel@tonic-gate 12857c478bd9Sstevel@tonic-gate end_blkno = btodb(ul->un_eol_lof); 12867c478bd9Sstevel@tonic-gate beg_blkno = btodb(ul->un_head_lof); 12877c478bd9Sstevel@tonic-gate new_ident += (end_blkno - beg_blkno); 12887c478bd9Sstevel@tonic-gate 12897c478bd9Sstevel@tonic-gate nb = new_lof - ul->un_bol_lof; 12907c478bd9Sstevel@tonic-gate inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb); 12917c478bd9Sstevel@tonic-gate inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb); 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate end_blkno = btodb(new_lof); 12947c478bd9Sstevel@tonic-gate beg_blkno = btodb(ul->un_bol_lof); 12957c478bd9Sstevel@tonic-gate new_ident += (end_blkno - beg_blkno); 12967c478bd9Sstevel@tonic-gate } 12977c478bd9Sstevel@tonic-gate /* 12987c478bd9Sstevel@tonic-gate * don't update the head if there has been an error 12997c478bd9Sstevel@tonic-gate */ 13007c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) 13017c478bd9Sstevel@tonic-gate return; 13027c478bd9Sstevel@tonic-gate 13037c478bd9Sstevel@tonic-gate /* Fix up the head and ident */ 13047c478bd9Sstevel@tonic-gate ASSERT(new_lof >= ul->un_bol_lof); 13057c478bd9Sstevel@tonic-gate ul->un_head_lof = new_lof; 13067c478bd9Sstevel@tonic-gate ul->un_head_ident = new_ident; 13077c478bd9Sstevel@tonic-gate if (data_lof == -1) { 13087c478bd9Sstevel@tonic-gate ul->un_tail_ident = ul->un_head_ident; 13097c478bd9Sstevel@tonic-gate } 13107c478bd9Sstevel@tonic-gate 13117c478bd9Sstevel@tonic-gate 13127c478bd9Sstevel@tonic-gate /* Commit to the database */ 13137c478bd9Sstevel@tonic-gate ldl_savestate(ul); 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) || 13167c478bd9Sstevel@tonic-gate ldl_sethead_debug(ul)); 13177c478bd9Sstevel@tonic-gate } 13187c478bd9Sstevel@tonic-gate 13197c478bd9Sstevel@tonic-gate /* 13207c478bd9Sstevel@tonic-gate * The tail will be set to the sector following lof+nb 13217c478bd9Sstevel@tonic-gate * lof + nb == size of the last delta + commit record 13227c478bd9Sstevel@tonic-gate * this function is called once after the log scan has completed. 13237c478bd9Sstevel@tonic-gate */ 13247c478bd9Sstevel@tonic-gate void 13257c478bd9Sstevel@tonic-gate ldl_settail(ml_unit_t *ul, off_t lof, size_t nb) 13267c478bd9Sstevel@tonic-gate { 13277c478bd9Sstevel@tonic-gate off_t new_lof; 13287c478bd9Sstevel@tonic-gate uint32_t new_ident; 13297c478bd9Sstevel@tonic-gate daddr_t beg_blkno; 13307c478bd9Sstevel@tonic-gate daddr_t end_blkno; 13317c478bd9Sstevel@tonic-gate 13327c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 13337c478bd9Sstevel@tonic-gate 13347c478bd9Sstevel@tonic-gate if (lof == -1) { 13357c478bd9Sstevel@tonic-gate ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)); 13367c478bd9Sstevel@tonic-gate ul->un_head_lof = ul->un_tail_lof; 13374f3979a5SWolfgang Schremser ul->un_head_ident = lufs_hd_genid(ul); 13387c478bd9Sstevel@tonic-gate ul->un_tail_ident = ul->un_head_ident; 13397c478bd9Sstevel@tonic-gate 13407c478bd9Sstevel@tonic-gate /* Commit to the database */ 13417c478bd9Sstevel@tonic-gate ldl_savestate(ul); 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate return; 13447c478bd9Sstevel@tonic-gate } 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate /* 13477c478bd9Sstevel@tonic-gate * new_lof is the offset of the sector following the last commit 13487c478bd9Sstevel@tonic-gate */ 13497c478bd9Sstevel@tonic-gate (void) logseek(ul, lof, nb, &new_lof); 13507c478bd9Sstevel@tonic-gate ASSERT(new_lof != dbtob(btodb(ul->un_head_lof))); 13517c478bd9Sstevel@tonic-gate 13527c478bd9Sstevel@tonic-gate /* 13537c478bd9Sstevel@tonic-gate * calculate new ident 13547c478bd9Sstevel@tonic-gate */ 13557c478bd9Sstevel@tonic-gate if (new_lof > ul->un_head_lof) { 13567c478bd9Sstevel@tonic-gate end_blkno = btodb(new_lof); 13577c478bd9Sstevel@tonic-gate beg_blkno = btodb(ul->un_head_lof); 13587c478bd9Sstevel@tonic-gate new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 13597c478bd9Sstevel@tonic-gate } else { 13607c478bd9Sstevel@tonic-gate end_blkno = btodb(ul->un_eol_lof); 13617c478bd9Sstevel@tonic-gate beg_blkno = btodb(ul->un_head_lof); 13627c478bd9Sstevel@tonic-gate new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 13637c478bd9Sstevel@tonic-gate 13647c478bd9Sstevel@tonic-gate end_blkno = btodb(new_lof); 13657c478bd9Sstevel@tonic-gate beg_blkno = btodb(ul->un_bol_lof); 13667c478bd9Sstevel@tonic-gate new_ident += (end_blkno - beg_blkno); 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate /* Fix up the tail and ident */ 13707c478bd9Sstevel@tonic-gate ul->un_tail_lof = new_lof; 13717c478bd9Sstevel@tonic-gate ul->un_tail_ident = new_ident; 13727c478bd9Sstevel@tonic-gate 13737c478bd9Sstevel@tonic-gate /* Commit to the database */ 13747c478bd9Sstevel@tonic-gate ldl_savestate(ul); 13757c478bd9Sstevel@tonic-gate } 13767c478bd9Sstevel@tonic-gate 13777c478bd9Sstevel@tonic-gate /* 13787c478bd9Sstevel@tonic-gate * LOGSCAN STUFF 13797c478bd9Sstevel@tonic-gate */ 13807c478bd9Sstevel@tonic-gate static int 13817c478bd9Sstevel@tonic-gate ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof) 13827c478bd9Sstevel@tonic-gate { 13837c478bd9Sstevel@tonic-gate ulong_t ident; 13847c478bd9Sstevel@tonic-gate size_t nblk, i; 13857c478bd9Sstevel@tonic-gate sect_trailer_t *st; 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate /* 13887c478bd9Sstevel@tonic-gate * compute ident for first sector in the buffer 13897c478bd9Sstevel@tonic-gate */ 13907c478bd9Sstevel@tonic-gate ident = ul->un_head_ident; 13917c478bd9Sstevel@tonic-gate if (bp->b_blkno >= btodb(ul->un_head_lof)) { 13927c478bd9Sstevel@tonic-gate ident += (bp->b_blkno - btodb(ul->un_head_lof)); 13937c478bd9Sstevel@tonic-gate } else { 13947c478bd9Sstevel@tonic-gate ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof)); 13957c478bd9Sstevel@tonic-gate ident += (bp->b_blkno - btodb(ul->un_bol_lof)); 13967c478bd9Sstevel@tonic-gate } 13977c478bd9Sstevel@tonic-gate /* 13987c478bd9Sstevel@tonic-gate * truncate the buffer down to the last valid sector 13997c478bd9Sstevel@tonic-gate */ 14007c478bd9Sstevel@tonic-gate nblk = btodb(bp->b_bcount); 14017c478bd9Sstevel@tonic-gate bp->b_bcount = 0; 14027c478bd9Sstevel@tonic-gate /* LINTED */ 14037c478bd9Sstevel@tonic-gate st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE); 14047c478bd9Sstevel@tonic-gate for (i = 0; i < nblk; ++i) { 14057c478bd9Sstevel@tonic-gate if (st->st_ident != ident) 14067c478bd9Sstevel@tonic-gate break; 14077c478bd9Sstevel@tonic-gate 14087c478bd9Sstevel@tonic-gate /* remember last valid tid for ldl_logscan_error() */ 14097c478bd9Sstevel@tonic-gate ul->un_tid = st->st_tid; 14107c478bd9Sstevel@tonic-gate 14117c478bd9Sstevel@tonic-gate /* LINTED */ 14127c478bd9Sstevel@tonic-gate st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE); 14137c478bd9Sstevel@tonic-gate ++ident; 14147c478bd9Sstevel@tonic-gate bp->b_bcount += DEV_BSIZE; 14157c478bd9Sstevel@tonic-gate } 14167c478bd9Sstevel@tonic-gate /* 14177c478bd9Sstevel@tonic-gate * make sure that lof is still within range 14187c478bd9Sstevel@tonic-gate */ 14197c478bd9Sstevel@tonic-gate return (within_range(lof, bp->b_blkno, bp->b_bcount)); 14207c478bd9Sstevel@tonic-gate } 14217c478bd9Sstevel@tonic-gate 14227c478bd9Sstevel@tonic-gate ulong_t 14237c478bd9Sstevel@tonic-gate ldl_logscan_nbcommit(off_t lof) 14247c478bd9Sstevel@tonic-gate { 14257c478bd9Sstevel@tonic-gate /* 14267c478bd9Sstevel@tonic-gate * lof is the offset following the commit header. However, 14277c478bd9Sstevel@tonic-gate * if the commit header fell on the end-of-sector, then lof 14287c478bd9Sstevel@tonic-gate * has already been advanced to the beginning of the next 14297c478bd9Sstevel@tonic-gate * sector. So do nothing. Otherwise, return the remaining 14307c478bd9Sstevel@tonic-gate * bytes in the sector. 14317c478bd9Sstevel@tonic-gate */ 14327c478bd9Sstevel@tonic-gate if ((lof & (DEV_BSIZE - 1)) == 0) 14337c478bd9Sstevel@tonic-gate return (0); 14347c478bd9Sstevel@tonic-gate return (NB_LEFT_IN_SECTOR(lof)); 14357c478bd9Sstevel@tonic-gate } 14367c478bd9Sstevel@tonic-gate 14377c478bd9Sstevel@tonic-gate int 14387c478bd9Sstevel@tonic-gate ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va) 14397c478bd9Sstevel@tonic-gate { 14407c478bd9Sstevel@tonic-gate buf_t *bp; 14417c478bd9Sstevel@tonic-gate ulong_t actual; 14427c478bd9Sstevel@tonic-gate 14437c478bd9Sstevel@tonic-gate ASSERT(ul->un_head_lof != ul->un_tail_lof); 14447c478bd9Sstevel@tonic-gate 14457c478bd9Sstevel@tonic-gate /* 14467c478bd9Sstevel@tonic-gate * Check the log data doesn't go out of bounds 14477c478bd9Sstevel@tonic-gate */ 14487c478bd9Sstevel@tonic-gate if (ul->un_head_lof < ul->un_tail_lof) { 14497c478bd9Sstevel@tonic-gate if (!WITHIN(*lofp, nb, ul->un_head_lof, 14507c478bd9Sstevel@tonic-gate (ul->un_tail_lof - ul->un_head_lof))) { 14517c478bd9Sstevel@tonic-gate return (EIO); 14527c478bd9Sstevel@tonic-gate } 14537c478bd9Sstevel@tonic-gate } else { 14547c478bd9Sstevel@tonic-gate if (OVERLAP(*lofp, nb, ul->un_tail_lof, 14557c478bd9Sstevel@tonic-gate (ul->un_head_lof - ul->un_tail_lof))) { 14567c478bd9Sstevel@tonic-gate return (EIO); 14577c478bd9Sstevel@tonic-gate } 14587c478bd9Sstevel@tonic-gate } 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate while (nb) { 14617c478bd9Sstevel@tonic-gate bp = get_read_bp(ul, *lofp); 14627c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 14637c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 14647c478bd9Sstevel@tonic-gate return (EIO); 14657c478bd9Sstevel@tonic-gate } 14667c478bd9Sstevel@tonic-gate /* 14677c478bd9Sstevel@tonic-gate * out-of-seq idents means partial transaction 14687c478bd9Sstevel@tonic-gate * panic, non-corrupting powerfail, ... 14697c478bd9Sstevel@tonic-gate */ 14707c478bd9Sstevel@tonic-gate if (!ldl_logscan_ident(ul, bp, *lofp)) { 14717c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 14727c478bd9Sstevel@tonic-gate return (EIO); 14737c478bd9Sstevel@tonic-gate } 14747c478bd9Sstevel@tonic-gate /* 14757c478bd9Sstevel@tonic-gate * copy the header into the caller's buf 14767c478bd9Sstevel@tonic-gate */ 14777c478bd9Sstevel@tonic-gate actual = fetchbuf(ul, bp, va, nb, lofp); 14787c478bd9Sstevel@tonic-gate if (va) 14797c478bd9Sstevel@tonic-gate va += actual; 14807c478bd9Sstevel@tonic-gate nb -= actual; 14817c478bd9Sstevel@tonic-gate } 14827c478bd9Sstevel@tonic-gate return (0); 14837c478bd9Sstevel@tonic-gate } 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate void 14867c478bd9Sstevel@tonic-gate ldl_logscan_begin(ml_unit_t *ul) 14877c478bd9Sstevel@tonic-gate { 14887c478bd9Sstevel@tonic-gate size_t bufsize; 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate ASSERT(ul->un_wrbuf.cb_dirty == NULL); 14917c478bd9Sstevel@tonic-gate 14927c478bd9Sstevel@tonic-gate /* 14937c478bd9Sstevel@tonic-gate * logscan has begun 14947c478bd9Sstevel@tonic-gate */ 14957c478bd9Sstevel@tonic-gate ul->un_flags |= LDL_SCAN; 14967c478bd9Sstevel@tonic-gate 14977c478bd9Sstevel@tonic-gate /* 14987c478bd9Sstevel@tonic-gate * reset the circular bufs 14997c478bd9Sstevel@tonic-gate */ 15007c478bd9Sstevel@tonic-gate bufsize = ldl_bufsize(ul); 15017c478bd9Sstevel@tonic-gate alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize); 15027c478bd9Sstevel@tonic-gate alloc_wrbuf(&ul->un_wrbuf, bufsize); 15037c478bd9Sstevel@tonic-gate 15047c478bd9Sstevel@tonic-gate /* 15057c478bd9Sstevel@tonic-gate * set the tail to reflect a full log 15067c478bd9Sstevel@tonic-gate */ 15077c478bd9Sstevel@tonic-gate ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE; 15087c478bd9Sstevel@tonic-gate 15097c478bd9Sstevel@tonic-gate if (ul->un_tail_lof < ul->un_bol_lof) 15107c478bd9Sstevel@tonic-gate ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE; 15117c478bd9Sstevel@tonic-gate if (ul->un_tail_lof >= ul->un_eol_lof) 15127c478bd9Sstevel@tonic-gate ul->un_tail_lof = ul->un_bol_lof; 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate /* 15157c478bd9Sstevel@tonic-gate * un_tid is used during error processing; it is initialized to 15167c478bd9Sstevel@tonic-gate * the tid of the delta at un_head_lof; 15177c478bd9Sstevel@tonic-gate */ 15187c478bd9Sstevel@tonic-gate ul->un_tid = ul->un_head_tid; 15197c478bd9Sstevel@tonic-gate } 15207c478bd9Sstevel@tonic-gate 15217c478bd9Sstevel@tonic-gate void 15227c478bd9Sstevel@tonic-gate ldl_logscan_end(ml_unit_t *ul) 15237c478bd9Sstevel@tonic-gate { 15247c478bd9Sstevel@tonic-gate size_t bufsize; 15257c478bd9Sstevel@tonic-gate 15267c478bd9Sstevel@tonic-gate /* 15277c478bd9Sstevel@tonic-gate * reset the circular bufs 15287c478bd9Sstevel@tonic-gate */ 15297c478bd9Sstevel@tonic-gate bufsize = ldl_bufsize(ul); 15307c478bd9Sstevel@tonic-gate alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE); 15317c478bd9Sstevel@tonic-gate alloc_wrbuf(&ul->un_wrbuf, bufsize); 15327c478bd9Sstevel@tonic-gate 15337c478bd9Sstevel@tonic-gate /* 15347c478bd9Sstevel@tonic-gate * Done w/scan 15357c478bd9Sstevel@tonic-gate */ 15367c478bd9Sstevel@tonic-gate ul->un_flags &= ~LDL_SCAN; 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate int 15407c478bd9Sstevel@tonic-gate ldl_need_roll(ml_unit_t *ul) 15417c478bd9Sstevel@tonic-gate { 15427c478bd9Sstevel@tonic-gate off_t busybytes; 15437c478bd9Sstevel@tonic-gate off_t head; 15447c478bd9Sstevel@tonic-gate off_t tail; 15457c478bd9Sstevel@tonic-gate off_t bol; 15467c478bd9Sstevel@tonic-gate off_t eol; 15477c478bd9Sstevel@tonic-gate off_t nb; 15487c478bd9Sstevel@tonic-gate 15497c478bd9Sstevel@tonic-gate /* 15507c478bd9Sstevel@tonic-gate * snapshot the log state 15517c478bd9Sstevel@tonic-gate */ 15527c478bd9Sstevel@tonic-gate head = ul->un_head_lof; 15537c478bd9Sstevel@tonic-gate tail = ul->un_tail_lof; 15547c478bd9Sstevel@tonic-gate bol = ul->un_bol_lof; 15557c478bd9Sstevel@tonic-gate eol = ul->un_eol_lof; 15567c478bd9Sstevel@tonic-gate nb = ul->un_logsize; 15577c478bd9Sstevel@tonic-gate 15587c478bd9Sstevel@tonic-gate /* 15597c478bd9Sstevel@tonic-gate * compute number of busy (inuse) bytes 15607c478bd9Sstevel@tonic-gate */ 15617c478bd9Sstevel@tonic-gate if (head <= tail) 15627c478bd9Sstevel@tonic-gate busybytes = tail - head; 15637c478bd9Sstevel@tonic-gate else 15647c478bd9Sstevel@tonic-gate busybytes = (eol - head) + (tail - bol); 15657c478bd9Sstevel@tonic-gate 15667c478bd9Sstevel@tonic-gate /* 15677c478bd9Sstevel@tonic-gate * return TRUE if > 75% full 15687c478bd9Sstevel@tonic-gate */ 15697c478bd9Sstevel@tonic-gate return (busybytes > (nb - (nb >> 2))); 15707c478bd9Sstevel@tonic-gate } 15717c478bd9Sstevel@tonic-gate 15727c478bd9Sstevel@tonic-gate void 15737c478bd9Sstevel@tonic-gate ldl_seterror(ml_unit_t *ul, char *why) 15747c478bd9Sstevel@tonic-gate { 15757c478bd9Sstevel@tonic-gate /* 15767c478bd9Sstevel@tonic-gate * already in error state; do nothing 15777c478bd9Sstevel@tonic-gate */ 15787c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) 15797c478bd9Sstevel@tonic-gate return; 15807c478bd9Sstevel@tonic-gate 15817c478bd9Sstevel@tonic-gate ul->un_flags |= LDL_ERROR; /* incore */ 15827c478bd9Sstevel@tonic-gate ul->un_badlog = 1; /* ondisk (cleared by fsck) */ 15837c478bd9Sstevel@tonic-gate 15847c478bd9Sstevel@tonic-gate /* 15857c478bd9Sstevel@tonic-gate * Commit to state sectors 15867c478bd9Sstevel@tonic-gate */ 15877c478bd9Sstevel@tonic-gate uniqtime(&ul->un_timestamp); 15887c478bd9Sstevel@tonic-gate ldl_savestate(ul); 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate /* Pretty print */ 15917c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "%s", why); 15927c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "ufs log for %s changed state to Error", 15937c478bd9Sstevel@tonic-gate ul->un_ufsvfs->vfs_fs->fs_fsmnt); 15947c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Please umount(1M) %s and run fsck(1M)", 15957c478bd9Sstevel@tonic-gate ul->un_ufsvfs->vfs_fs->fs_fsmnt); 15967c478bd9Sstevel@tonic-gate 15977c478bd9Sstevel@tonic-gate /* 15987c478bd9Sstevel@tonic-gate * If we aren't in the middle of scan (aka snarf); tell ufs 15997c478bd9Sstevel@tonic-gate * to hard lock itself. 16007c478bd9Sstevel@tonic-gate */ 16017c478bd9Sstevel@tonic-gate if ((ul->un_flags & LDL_SCAN) == 0) 16027c478bd9Sstevel@tonic-gate ufs_trans_onerror(); 16037c478bd9Sstevel@tonic-gate } 16047c478bd9Sstevel@tonic-gate 16057c478bd9Sstevel@tonic-gate size_t 16067c478bd9Sstevel@tonic-gate ldl_bufsize(ml_unit_t *ul) 16077c478bd9Sstevel@tonic-gate { 16087c478bd9Sstevel@tonic-gate size_t bufsize; 16097c478bd9Sstevel@tonic-gate extern uint32_t ldl_minbufsize; 16107c478bd9Sstevel@tonic-gate 16117c478bd9Sstevel@tonic-gate /* 16127c478bd9Sstevel@tonic-gate * initial guess is the maxtransfer value for this log device 16137c478bd9Sstevel@tonic-gate * increase if too small 16147c478bd9Sstevel@tonic-gate * decrease if too large 16157c478bd9Sstevel@tonic-gate */ 16167c478bd9Sstevel@tonic-gate bufsize = dbtob(btod(ul->un_maxtransfer)); 16177c478bd9Sstevel@tonic-gate if (bufsize < ldl_minbufsize) 16187c478bd9Sstevel@tonic-gate bufsize = ldl_minbufsize; 16197c478bd9Sstevel@tonic-gate if (bufsize > maxphys) 16207c478bd9Sstevel@tonic-gate bufsize = maxphys; 16217c478bd9Sstevel@tonic-gate if (bufsize > ul->un_maxtransfer) 16227c478bd9Sstevel@tonic-gate bufsize = ul->un_maxtransfer; 16237c478bd9Sstevel@tonic-gate return (bufsize); 16247c478bd9Sstevel@tonic-gate } 1625