17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 580d34432Sfrankho * Common Development and Distribution License (the "License"). 680d34432Sfrankho * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22e7da395aSOwen Roberts * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #include <sys/systm.h> 277c478bd9Sstevel@tonic-gate #include <sys/types.h> 287c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 297c478bd9Sstevel@tonic-gate #include <sys/buf.h> 307c478bd9Sstevel@tonic-gate #include <sys/errno.h> 317c478bd9Sstevel@tonic-gate #include <sys/fssnap_if.h> 327c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 337c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_filio.h> 347c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 357c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 367c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 377c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 387c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h> 397c478bd9Sstevel@tonic-gate #include <sys/debug.h> 40bc69f433Saguzovsk #include <sys/atomic.h> 417c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 427c478bd9Sstevel@tonic-gate #include <sys/inttypes.h> 437c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 447c478bd9Sstevel@tonic-gate #include <sys/mntent.h> 457c478bd9Sstevel@tonic-gate #include <sys/conf.h> 467c478bd9Sstevel@tonic-gate #include <sys/param.h> 477c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 487c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 494f3979a5SWolfgang Schremser #include <sys/sdt.h> 504f3979a5SWolfgang Schremser 514f3979a5SWolfgang Schremser #define LUFS_GENID_PRIME UINT64_C(4294967291) 524f3979a5SWolfgang Schremser #define LUFS_GENID_BASE UINT64_C(311) 534f3979a5SWolfgang Schremser #define LUFS_NEXT_ID(id) ((uint32_t)(((id) * LUFS_GENID_BASE) % \ 544f3979a5SWolfgang Schremser LUFS_GENID_PRIME)) 557c478bd9Sstevel@tonic-gate 5614c932c0Sbatschul extern kmutex_t ufs_scan_lock; 5714c932c0Sbatschul 587c478bd9Sstevel@tonic-gate static kmutex_t log_mutex; /* general purpose log layer lock */ 597c478bd9Sstevel@tonic-gate kmutex_t ml_scan; /* Scan thread syncronization */ 607c478bd9Sstevel@tonic-gate kcondvar_t ml_scan_cv; /* Scan thread syncronization */ 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate struct kmem_cache *lufs_sv; 637c478bd9Sstevel@tonic-gate struct kmem_cache *lufs_bp; 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate /* Tunables */ 667c478bd9Sstevel@tonic-gate uint_t ldl_maxlogsize = LDL_MAXLOGSIZE; 677c478bd9Sstevel@tonic-gate uint_t ldl_minlogsize = LDL_MINLOGSIZE; 68e7da395aSOwen Roberts uint_t ldl_softlogcap = LDL_SOFTLOGCAP; 697c478bd9Sstevel@tonic-gate uint32_t ldl_divisor = LDL_DIVISOR; 707c478bd9Sstevel@tonic-gate uint32_t ldl_mintransfer = LDL_MINTRANSFER; 717c478bd9Sstevel@tonic-gate uint32_t ldl_maxtransfer = LDL_MAXTRANSFER; 727c478bd9Sstevel@tonic-gate uint32_t ldl_minbufsize = LDL_MINBUFSIZE; 73e7da395aSOwen Roberts uint32_t ldl_cgsizereq = 0; 747c478bd9Sstevel@tonic-gate 754f3979a5SWolfgang Schremser /* Generation of header ids */ 76698b7835SMilan Cermak static kmutex_t genid_mutex; 77698b7835SMilan Cermak static uint32_t last_loghead_ident = UINT32_C(0); 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate /* 807c478bd9Sstevel@tonic-gate * Logging delta and roll statistics 817c478bd9Sstevel@tonic-gate */ 827c478bd9Sstevel@tonic-gate struct delta_kstats { 837c478bd9Sstevel@tonic-gate kstat_named_t ds_superblock_deltas; 847c478bd9Sstevel@tonic-gate kstat_named_t ds_bitmap_deltas; 857c478bd9Sstevel@tonic-gate kstat_named_t ds_suminfo_deltas; 867c478bd9Sstevel@tonic-gate kstat_named_t ds_allocblk_deltas; 877c478bd9Sstevel@tonic-gate kstat_named_t ds_ab0_deltas; 887c478bd9Sstevel@tonic-gate kstat_named_t ds_dir_deltas; 897c478bd9Sstevel@tonic-gate kstat_named_t ds_inode_deltas; 907c478bd9Sstevel@tonic-gate kstat_named_t ds_fbiwrite_deltas; 917c478bd9Sstevel@tonic-gate kstat_named_t ds_quota_deltas; 927c478bd9Sstevel@tonic-gate kstat_named_t ds_shadow_deltas; 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate kstat_named_t ds_superblock_rolled; 957c478bd9Sstevel@tonic-gate kstat_named_t ds_bitmap_rolled; 967c478bd9Sstevel@tonic-gate kstat_named_t ds_suminfo_rolled; 977c478bd9Sstevel@tonic-gate kstat_named_t ds_allocblk_rolled; 987c478bd9Sstevel@tonic-gate kstat_named_t ds_ab0_rolled; 997c478bd9Sstevel@tonic-gate kstat_named_t ds_dir_rolled; 1007c478bd9Sstevel@tonic-gate kstat_named_t ds_inode_rolled; 1017c478bd9Sstevel@tonic-gate kstat_named_t ds_fbiwrite_rolled; 1027c478bd9Sstevel@tonic-gate kstat_named_t ds_quota_rolled; 1037c478bd9Sstevel@tonic-gate kstat_named_t ds_shadow_rolled; 1047c478bd9Sstevel@tonic-gate } dkstats = { 1057c478bd9Sstevel@tonic-gate { "superblock_deltas", KSTAT_DATA_UINT64 }, 1067c478bd9Sstevel@tonic-gate { "bitmap_deltas", KSTAT_DATA_UINT64 }, 1077c478bd9Sstevel@tonic-gate { "suminfo_deltas", KSTAT_DATA_UINT64 }, 1087c478bd9Sstevel@tonic-gate { "allocblk_deltas", KSTAT_DATA_UINT64 }, 1097c478bd9Sstevel@tonic-gate { "ab0_deltas", KSTAT_DATA_UINT64 }, 1107c478bd9Sstevel@tonic-gate { "dir_deltas", KSTAT_DATA_UINT64 }, 1117c478bd9Sstevel@tonic-gate { "inode_deltas", KSTAT_DATA_UINT64 }, 1127c478bd9Sstevel@tonic-gate { "fbiwrite_deltas", KSTAT_DATA_UINT64 }, 1137c478bd9Sstevel@tonic-gate { "quota_deltas", KSTAT_DATA_UINT64 }, 1147c478bd9Sstevel@tonic-gate { "shadow_deltas", KSTAT_DATA_UINT64 }, 1157c478bd9Sstevel@tonic-gate 1167c478bd9Sstevel@tonic-gate { "superblock_rolled", KSTAT_DATA_UINT64 }, 1177c478bd9Sstevel@tonic-gate { "bitmap_rolled", KSTAT_DATA_UINT64 }, 1187c478bd9Sstevel@tonic-gate { "suminfo_rolled", KSTAT_DATA_UINT64 }, 1197c478bd9Sstevel@tonic-gate { "allocblk_rolled", KSTAT_DATA_UINT64 }, 1207c478bd9Sstevel@tonic-gate { "ab0_rolled", KSTAT_DATA_UINT64 }, 1217c478bd9Sstevel@tonic-gate { "dir_rolled", KSTAT_DATA_UINT64 }, 1227c478bd9Sstevel@tonic-gate { "inode_rolled", KSTAT_DATA_UINT64 }, 1237c478bd9Sstevel@tonic-gate { "fbiwrite_rolled", KSTAT_DATA_UINT64 }, 1247c478bd9Sstevel@tonic-gate { "quota_rolled", KSTAT_DATA_UINT64 }, 1257c478bd9Sstevel@tonic-gate { "shadow_rolled", KSTAT_DATA_UINT64 } 1267c478bd9Sstevel@tonic-gate }; 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate uint64_t delta_stats[DT_MAX]; 1297c478bd9Sstevel@tonic-gate uint64_t roll_stats[DT_MAX]; 1307c478bd9Sstevel@tonic-gate 1317c478bd9Sstevel@tonic-gate /* 1327c478bd9Sstevel@tonic-gate * General logging kstats 1337c478bd9Sstevel@tonic-gate */ 1347c478bd9Sstevel@tonic-gate struct logstats logstats = { 1357c478bd9Sstevel@tonic-gate { "master_reads", KSTAT_DATA_UINT64 }, 1367c478bd9Sstevel@tonic-gate { "master_writes", KSTAT_DATA_UINT64 }, 1377c478bd9Sstevel@tonic-gate { "log_reads_inmem", KSTAT_DATA_UINT64 }, 1387c478bd9Sstevel@tonic-gate { "log_reads", KSTAT_DATA_UINT64 }, 1397c478bd9Sstevel@tonic-gate { "log_writes", KSTAT_DATA_UINT64 }, 1407c478bd9Sstevel@tonic-gate { "log_master_reads", KSTAT_DATA_UINT64 }, 1417c478bd9Sstevel@tonic-gate { "log_roll_reads", KSTAT_DATA_UINT64 }, 1427c478bd9Sstevel@tonic-gate { "log_roll_writes", KSTAT_DATA_UINT64 } 1437c478bd9Sstevel@tonic-gate }; 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate int 1467c478bd9Sstevel@tonic-gate trans_not_done(struct buf *cb) 1477c478bd9Sstevel@tonic-gate { 1487c478bd9Sstevel@tonic-gate sema_v(&cb->b_io); 1497c478bd9Sstevel@tonic-gate return (0); 1507c478bd9Sstevel@tonic-gate } 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate static void 1537c478bd9Sstevel@tonic-gate trans_wait_panic(struct buf *cb) 1547c478bd9Sstevel@tonic-gate { 1557c478bd9Sstevel@tonic-gate while ((cb->b_flags & B_DONE) == 0) 1567c478bd9Sstevel@tonic-gate drv_usecwait(10); 1577c478bd9Sstevel@tonic-gate } 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate int 1607c478bd9Sstevel@tonic-gate trans_not_wait(struct buf *cb) 1617c478bd9Sstevel@tonic-gate { 1627c478bd9Sstevel@tonic-gate /* 1637c478bd9Sstevel@tonic-gate * In case of panic, busy wait for completion 1647c478bd9Sstevel@tonic-gate */ 1657c478bd9Sstevel@tonic-gate if (panicstr) 1667c478bd9Sstevel@tonic-gate trans_wait_panic(cb); 1677c478bd9Sstevel@tonic-gate else 1687c478bd9Sstevel@tonic-gate sema_p(&cb->b_io); 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate return (geterror(cb)); 1717c478bd9Sstevel@tonic-gate } 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate int 1747c478bd9Sstevel@tonic-gate trans_wait(struct buf *cb) 1757c478bd9Sstevel@tonic-gate { 1767c478bd9Sstevel@tonic-gate /* 1777c478bd9Sstevel@tonic-gate * In case of panic, busy wait for completion and run md daemon queues 1787c478bd9Sstevel@tonic-gate */ 1797c478bd9Sstevel@tonic-gate if (panicstr) 1807c478bd9Sstevel@tonic-gate trans_wait_panic(cb); 1817c478bd9Sstevel@tonic-gate return (biowait(cb)); 1827c478bd9Sstevel@tonic-gate } 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate static void 1857c478bd9Sstevel@tonic-gate setsum(int32_t *sp, int32_t *lp, int nb) 1867c478bd9Sstevel@tonic-gate { 1877c478bd9Sstevel@tonic-gate int32_t csum = 0; 1887c478bd9Sstevel@tonic-gate 1897c478bd9Sstevel@tonic-gate *sp = 0; 1907c478bd9Sstevel@tonic-gate nb /= sizeof (int32_t); 1917c478bd9Sstevel@tonic-gate while (nb--) 1927c478bd9Sstevel@tonic-gate csum += *lp++; 1937c478bd9Sstevel@tonic-gate *sp = csum; 1947c478bd9Sstevel@tonic-gate } 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate static int 1977c478bd9Sstevel@tonic-gate checksum(int32_t *sp, int32_t *lp, int nb) 1987c478bd9Sstevel@tonic-gate { 1997c478bd9Sstevel@tonic-gate int32_t ssum = *sp; 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate setsum(sp, lp, nb); 2027c478bd9Sstevel@tonic-gate if (ssum != *sp) { 2037c478bd9Sstevel@tonic-gate *sp = ssum; 2047c478bd9Sstevel@tonic-gate return (0); 2057c478bd9Sstevel@tonic-gate } 2067c478bd9Sstevel@tonic-gate return (1); 2077c478bd9Sstevel@tonic-gate } 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate void 2107c478bd9Sstevel@tonic-gate lufs_unsnarf(ufsvfs_t *ufsvfsp) 2117c478bd9Sstevel@tonic-gate { 2127c478bd9Sstevel@tonic-gate ml_unit_t *ul; 2137c478bd9Sstevel@tonic-gate mt_map_t *mtm; 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate ul = ufsvfsp->vfs_log; 2167c478bd9Sstevel@tonic-gate if (ul == NULL) 2177c478bd9Sstevel@tonic-gate return; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate mtm = ul->un_logmap; 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate /* 2227c478bd9Sstevel@tonic-gate * Wait for a pending top_issue_sync which is 2237c478bd9Sstevel@tonic-gate * dispatched (via taskq_dispatch()) but hasnt completed yet. 2247c478bd9Sstevel@tonic-gate */ 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate mutex_enter(&mtm->mtm_lock); 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate while (mtm->mtm_taskq_sync_count != 0) { 2297c478bd9Sstevel@tonic-gate cv_wait(&mtm->mtm_cv, &mtm->mtm_lock); 2307c478bd9Sstevel@tonic-gate } 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate mutex_exit(&mtm->mtm_lock); 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate /* Roll committed transactions */ 2357c478bd9Sstevel@tonic-gate logmap_roll_dev(ul); 2367c478bd9Sstevel@tonic-gate 2377c478bd9Sstevel@tonic-gate /* Kill the roll thread */ 2387c478bd9Sstevel@tonic-gate logmap_kill_roll(ul); 2397c478bd9Sstevel@tonic-gate 2407c478bd9Sstevel@tonic-gate /* release saved alloction info */ 2417c478bd9Sstevel@tonic-gate if (ul->un_ebp) 2427c478bd9Sstevel@tonic-gate kmem_free(ul->un_ebp, ul->un_nbeb); 2437c478bd9Sstevel@tonic-gate 2447c478bd9Sstevel@tonic-gate /* release circular bufs */ 2457c478bd9Sstevel@tonic-gate free_cirbuf(&ul->un_rdbuf); 2467c478bd9Sstevel@tonic-gate free_cirbuf(&ul->un_wrbuf); 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate /* release maps */ 2497c478bd9Sstevel@tonic-gate if (ul->un_logmap) 2507c478bd9Sstevel@tonic-gate ul->un_logmap = map_put(ul->un_logmap); 2517c478bd9Sstevel@tonic-gate if (ul->un_deltamap) 2527c478bd9Sstevel@tonic-gate ul->un_deltamap = map_put(ul->un_deltamap); 2537c478bd9Sstevel@tonic-gate if (ul->un_matamap) 2547c478bd9Sstevel@tonic-gate ul->un_matamap = map_put(ul->un_matamap); 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate mutex_destroy(&ul->un_log_mutex); 2577c478bd9Sstevel@tonic-gate mutex_destroy(&ul->un_state_mutex); 2587c478bd9Sstevel@tonic-gate 2597c478bd9Sstevel@tonic-gate /* release state buffer MUST BE LAST!! (contains our ondisk data) */ 2607c478bd9Sstevel@tonic-gate if (ul->un_bp) 2617c478bd9Sstevel@tonic-gate brelse(ul->un_bp); 2627c478bd9Sstevel@tonic-gate kmem_free(ul, sizeof (*ul)); 2637c478bd9Sstevel@tonic-gate 2647c478bd9Sstevel@tonic-gate ufsvfsp->vfs_log = NULL; 2657c478bd9Sstevel@tonic-gate } 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate int 2687c478bd9Sstevel@tonic-gate lufs_snarf(ufsvfs_t *ufsvfsp, struct fs *fs, int ronly) 2697c478bd9Sstevel@tonic-gate { 2707c478bd9Sstevel@tonic-gate buf_t *bp, *tbp; 2717c478bd9Sstevel@tonic-gate ml_unit_t *ul; 2727c478bd9Sstevel@tonic-gate extent_block_t *ebp; 2737c478bd9Sstevel@tonic-gate ic_extent_block_t *nebp; 2747c478bd9Sstevel@tonic-gate size_t nb; 2757c478bd9Sstevel@tonic-gate daddr_t bno; /* in disk blocks */ 2767c478bd9Sstevel@tonic-gate int i; 2777c478bd9Sstevel@tonic-gate 2787c478bd9Sstevel@tonic-gate /* LINTED: warning: logical expression always true: op "||" */ 2797c478bd9Sstevel@tonic-gate ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE); 2807c478bd9Sstevel@tonic-gate 2817c478bd9Sstevel@tonic-gate /* 2827c478bd9Sstevel@tonic-gate * Get the allocation table 2837c478bd9Sstevel@tonic-gate * During a remount the superblock pointed to by the ufsvfsp 2847c478bd9Sstevel@tonic-gate * is out of date. Hence the need for the ``new'' superblock 2857c478bd9Sstevel@tonic-gate * pointer, fs, passed in as a parameter. 2867c478bd9Sstevel@tonic-gate */ 2877c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, fs->fs_logbno), 2887c478bd9Sstevel@tonic-gate fs->fs_bsize); 2897c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 2907c478bd9Sstevel@tonic-gate brelse(bp); 2917c478bd9Sstevel@tonic-gate return (EIO); 2927c478bd9Sstevel@tonic-gate } 2937c478bd9Sstevel@tonic-gate ebp = (void *)bp->b_un.b_addr; 2947c478bd9Sstevel@tonic-gate if (!checksum(&ebp->chksum, (int32_t *)bp->b_un.b_addr, 2957c478bd9Sstevel@tonic-gate fs->fs_bsize)) { 2967c478bd9Sstevel@tonic-gate brelse(bp); 2977c478bd9Sstevel@tonic-gate return (ENODEV); 2987c478bd9Sstevel@tonic-gate } 2997c478bd9Sstevel@tonic-gate 3007c478bd9Sstevel@tonic-gate /* 3017c478bd9Sstevel@tonic-gate * It is possible to get log blocks with all zeros. 3027c478bd9Sstevel@tonic-gate * We should also check for nextents to be zero in such case. 3037c478bd9Sstevel@tonic-gate */ 3047c478bd9Sstevel@tonic-gate if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) { 3057c478bd9Sstevel@tonic-gate brelse(bp); 3067c478bd9Sstevel@tonic-gate return (EDOM); 3077c478bd9Sstevel@tonic-gate } 3087c478bd9Sstevel@tonic-gate /* 3097c478bd9Sstevel@tonic-gate * Put allocation into memory. This requires conversion between 3107c478bd9Sstevel@tonic-gate * on the ondisk format of the extent (type extent_t) and the 3117c478bd9Sstevel@tonic-gate * in-core format of the extent (type ic_extent_t). The 3127c478bd9Sstevel@tonic-gate * difference is the in-core form of the extent block stores 3137c478bd9Sstevel@tonic-gate * the physical offset of the extent in disk blocks, which 3147c478bd9Sstevel@tonic-gate * can require more than a 32-bit field. 3157c478bd9Sstevel@tonic-gate */ 3167c478bd9Sstevel@tonic-gate nb = (size_t)(sizeof (ic_extent_block_t) + 3177c478bd9Sstevel@tonic-gate ((ebp->nextents - 1) * sizeof (ic_extent_t))); 3187c478bd9Sstevel@tonic-gate nebp = kmem_alloc(nb, KM_SLEEP); 3197c478bd9Sstevel@tonic-gate nebp->ic_nextents = ebp->nextents; 3207c478bd9Sstevel@tonic-gate nebp->ic_nbytes = ebp->nbytes; 3217c478bd9Sstevel@tonic-gate nebp->ic_nextbno = ebp->nextbno; 3227c478bd9Sstevel@tonic-gate for (i = 0; i < ebp->nextents; i++) { 3237c478bd9Sstevel@tonic-gate nebp->ic_extents[i].ic_lbno = ebp->extents[i].lbno; 3247c478bd9Sstevel@tonic-gate nebp->ic_extents[i].ic_nbno = ebp->extents[i].nbno; 3257c478bd9Sstevel@tonic-gate nebp->ic_extents[i].ic_pbno = 3267c478bd9Sstevel@tonic-gate logbtodb(fs, ebp->extents[i].pbno); 3277c478bd9Sstevel@tonic-gate } 3287c478bd9Sstevel@tonic-gate brelse(bp); 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate /* 3317c478bd9Sstevel@tonic-gate * Get the log state 3327c478bd9Sstevel@tonic-gate */ 3337c478bd9Sstevel@tonic-gate bno = nebp->ic_extents[0].ic_pbno; 3347c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, DEV_BSIZE); 3357c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 3367c478bd9Sstevel@tonic-gate brelse(bp); 3377c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno + 1, DEV_BSIZE); 3387c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 3397c478bd9Sstevel@tonic-gate brelse(bp); 3407c478bd9Sstevel@tonic-gate kmem_free(nebp, nb); 3417c478bd9Sstevel@tonic-gate return (EIO); 3427c478bd9Sstevel@tonic-gate } 3437c478bd9Sstevel@tonic-gate } 3447c478bd9Sstevel@tonic-gate 3457c478bd9Sstevel@tonic-gate /* 3467c478bd9Sstevel@tonic-gate * Put ondisk struct into an anonymous buffer 3477c478bd9Sstevel@tonic-gate * This buffer will contain the memory for the ml_odunit struct 3487c478bd9Sstevel@tonic-gate */ 3497c478bd9Sstevel@tonic-gate tbp = ngeteblk(dbtob(LS_SECTORS)); 3507c478bd9Sstevel@tonic-gate tbp->b_edev = bp->b_edev; 3517c478bd9Sstevel@tonic-gate tbp->b_dev = bp->b_dev; 3527c478bd9Sstevel@tonic-gate tbp->b_blkno = bno; 3537c478bd9Sstevel@tonic-gate bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE); 3547c478bd9Sstevel@tonic-gate bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE); 3557c478bd9Sstevel@tonic-gate bp->b_flags |= (B_STALE | B_AGE); 3567c478bd9Sstevel@tonic-gate brelse(bp); 3577c478bd9Sstevel@tonic-gate bp = tbp; 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate /* 3607c478bd9Sstevel@tonic-gate * Verify the log state 3617c478bd9Sstevel@tonic-gate * 3627c478bd9Sstevel@tonic-gate * read/only mounts w/bad logs are allowed. umount will 3637c478bd9Sstevel@tonic-gate * eventually roll the bad log until the first IO error. 3647c478bd9Sstevel@tonic-gate * fsck will then repair the file system. 3657c478bd9Sstevel@tonic-gate * 3667c478bd9Sstevel@tonic-gate * read/write mounts with bad logs are not allowed. 3677c478bd9Sstevel@tonic-gate * 3687c478bd9Sstevel@tonic-gate */ 3697c478bd9Sstevel@tonic-gate ul = (ml_unit_t *)kmem_zalloc(sizeof (*ul), KM_SLEEP); 3707c478bd9Sstevel@tonic-gate bcopy(bp->b_un.b_addr, &ul->un_ondisk, sizeof (ml_odunit_t)); 3717c478bd9Sstevel@tonic-gate if ((ul->un_chksum != ul->un_head_ident + ul->un_tail_ident) || 3727c478bd9Sstevel@tonic-gate (ul->un_version != LUFS_VERSION_LATEST) || 3737c478bd9Sstevel@tonic-gate (!ronly && ul->un_badlog)) { 3747c478bd9Sstevel@tonic-gate kmem_free(ul, sizeof (*ul)); 3757c478bd9Sstevel@tonic-gate brelse(bp); 3767c478bd9Sstevel@tonic-gate kmem_free(nebp, nb); 3777c478bd9Sstevel@tonic-gate return (EIO); 3787c478bd9Sstevel@tonic-gate } 3797c478bd9Sstevel@tonic-gate /* 3807c478bd9Sstevel@tonic-gate * Initialize the incore-only fields 3817c478bd9Sstevel@tonic-gate */ 3827c478bd9Sstevel@tonic-gate if (ronly) 3837c478bd9Sstevel@tonic-gate ul->un_flags |= LDL_NOROLL; 3847c478bd9Sstevel@tonic-gate ul->un_bp = bp; 3857c478bd9Sstevel@tonic-gate ul->un_ufsvfs = ufsvfsp; 3867c478bd9Sstevel@tonic-gate ul->un_dev = ufsvfsp->vfs_dev; 3877c478bd9Sstevel@tonic-gate ul->un_ebp = nebp; 3887c478bd9Sstevel@tonic-gate ul->un_nbeb = nb; 3897c478bd9Sstevel@tonic-gate ul->un_maxresv = btodb(ul->un_logsize) * LDL_USABLE_BSIZE; 3907c478bd9Sstevel@tonic-gate ul->un_deltamap = map_get(ul, deltamaptype, DELTAMAP_NHASH); 3917c478bd9Sstevel@tonic-gate ul->un_logmap = map_get(ul, logmaptype, LOGMAP_NHASH); 3927c478bd9Sstevel@tonic-gate if (ul->un_debug & MT_MATAMAP) 3937c478bd9Sstevel@tonic-gate ul->un_matamap = map_get(ul, matamaptype, DELTAMAP_NHASH); 3947c478bd9Sstevel@tonic-gate mutex_init(&ul->un_log_mutex, NULL, MUTEX_DEFAULT, NULL); 3957c478bd9Sstevel@tonic-gate mutex_init(&ul->un_state_mutex, NULL, MUTEX_DEFAULT, NULL); 39614c932c0Sbatschul 39714c932c0Sbatschul /* 39814c932c0Sbatschul * Aquire the ufs_scan_lock before linking the mtm data 39914c932c0Sbatschul * structure so that we keep ufs_sync() and ufs_update() away 40014c932c0Sbatschul * when they execute the ufs_scan_inodes() run while we're in 40114c932c0Sbatschul * progress of enabling/disabling logging. 40214c932c0Sbatschul */ 40314c932c0Sbatschul mutex_enter(&ufs_scan_lock); 4047c478bd9Sstevel@tonic-gate ufsvfsp->vfs_log = ul; 4057c478bd9Sstevel@tonic-gate 4067c478bd9Sstevel@tonic-gate /* remember the state of the log before the log scan */ 4077c478bd9Sstevel@tonic-gate logmap_logscan(ul); 40814c932c0Sbatschul mutex_exit(&ufs_scan_lock); 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate /* 4117c478bd9Sstevel@tonic-gate * Error during scan 4127c478bd9Sstevel@tonic-gate * 4137c478bd9Sstevel@tonic-gate * If this is a read/only mount; ignore the error. 4147c478bd9Sstevel@tonic-gate * At a later time umount/fsck will repair the fs. 4157c478bd9Sstevel@tonic-gate * 4167c478bd9Sstevel@tonic-gate */ 4177c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 4187c478bd9Sstevel@tonic-gate if (!ronly) { 41914c932c0Sbatschul /* 42014c932c0Sbatschul * Aquire the ufs_scan_lock before de-linking 42114c932c0Sbatschul * the mtm data structure so that we keep ufs_sync() 42214c932c0Sbatschul * and ufs_update() away when they execute the 42314c932c0Sbatschul * ufs_scan_inodes() run while we're in progress of 42414c932c0Sbatschul * enabling/disabling logging. 42514c932c0Sbatschul */ 42614c932c0Sbatschul mutex_enter(&ufs_scan_lock); 4277c478bd9Sstevel@tonic-gate lufs_unsnarf(ufsvfsp); 42814c932c0Sbatschul mutex_exit(&ufs_scan_lock); 4297c478bd9Sstevel@tonic-gate return (EIO); 4307c478bd9Sstevel@tonic-gate } 4317c478bd9Sstevel@tonic-gate ul->un_flags &= ~LDL_ERROR; 4327c478bd9Sstevel@tonic-gate } 4337c478bd9Sstevel@tonic-gate if (!ronly) 4347c478bd9Sstevel@tonic-gate logmap_start_roll(ul); 4357c478bd9Sstevel@tonic-gate return (0); 4367c478bd9Sstevel@tonic-gate } 4377c478bd9Sstevel@tonic-gate 4384f3979a5SWolfgang Schremser uint32_t 4394f3979a5SWolfgang Schremser lufs_hd_genid(const ml_unit_t *up) 4404f3979a5SWolfgang Schremser { 4414f3979a5SWolfgang Schremser uint32_t id; 4424f3979a5SWolfgang Schremser 4434f3979a5SWolfgang Schremser mutex_enter(&genid_mutex); 4444f3979a5SWolfgang Schremser 4454f3979a5SWolfgang Schremser /* 4464f3979a5SWolfgang Schremser * The formula below implements an exponential, modular sequence. 4474f3979a5SWolfgang Schremser * 4484f3979a5SWolfgang Schremser * ID(N) = (SEED * (BASE^N)) % PRIME 4494f3979a5SWolfgang Schremser * 4504f3979a5SWolfgang Schremser * The numbers will be pseudo random. They depend on SEED, BASE, PRIME, 4514f3979a5SWolfgang Schremser * but will sweep through almost all of the range 1....PRIME-1. 4524f3979a5SWolfgang Schremser * Most importantly they will not repeat for PRIME-2 (4294967289) 4534f3979a5SWolfgang Schremser * repetitions. If they would repeat that could possibly cause hangs, 4544f3979a5SWolfgang Schremser * panics at mount/umount and failed mount operations. 4554f3979a5SWolfgang Schremser */ 4564f3979a5SWolfgang Schremser id = LUFS_NEXT_ID(last_loghead_ident); 4574f3979a5SWolfgang Schremser 4584f3979a5SWolfgang Schremser /* Checking if new identity used already */ 4594f3979a5SWolfgang Schremser if (up != NULL && up->un_head_ident == id) { 4604f3979a5SWolfgang Schremser DTRACE_PROBE1(head_ident_collision, uint32_t, id); 4614f3979a5SWolfgang Schremser 4624f3979a5SWolfgang Schremser /* 4634f3979a5SWolfgang Schremser * The following preserves the algorithm for the fix for 4644f3979a5SWolfgang Schremser * "panic: free: freeing free frag, dev:0x2000000018, blk:34605, 4654f3979a5SWolfgang Schremser * cg:26, ino:148071,". 4664f3979a5SWolfgang Schremser * If the header identities un_head_ident are equal to the 4674f3979a5SWolfgang Schremser * present element in the sequence, the next element of the 4684f3979a5SWolfgang Schremser * sequence is returned instead. 4694f3979a5SWolfgang Schremser */ 4704f3979a5SWolfgang Schremser id = LUFS_NEXT_ID(id); 4714f3979a5SWolfgang Schremser } 4724f3979a5SWolfgang Schremser 4734f3979a5SWolfgang Schremser last_loghead_ident = id; 4744f3979a5SWolfgang Schremser 4754f3979a5SWolfgang Schremser mutex_exit(&genid_mutex); 4764f3979a5SWolfgang Schremser 4774f3979a5SWolfgang Schremser return (id); 4784f3979a5SWolfgang Schremser } 4794f3979a5SWolfgang Schremser 4804f3979a5SWolfgang Schremser static void 4814f3979a5SWolfgang Schremser lufs_genid_init(void) 4824f3979a5SWolfgang Schremser { 4834f3979a5SWolfgang Schremser uint64_t seed; 4844f3979a5SWolfgang Schremser 4854f3979a5SWolfgang Schremser /* Initialization */ 4864f3979a5SWolfgang Schremser mutex_init(&genid_mutex, NULL, MUTEX_DEFAULT, NULL); 4874f3979a5SWolfgang Schremser 4884f3979a5SWolfgang Schremser /* Seed the algorithm */ 4894f3979a5SWolfgang Schremser do { 4904f3979a5SWolfgang Schremser timestruc_t tv; 4914f3979a5SWolfgang Schremser 4924f3979a5SWolfgang Schremser gethrestime(&tv); 4934f3979a5SWolfgang Schremser 4944f3979a5SWolfgang Schremser seed = (tv.tv_nsec << 3); 4954f3979a5SWolfgang Schremser seed ^= tv.tv_sec; 4964f3979a5SWolfgang Schremser 4974f3979a5SWolfgang Schremser last_loghead_ident = (uint32_t)(seed % LUFS_GENID_PRIME); 4984f3979a5SWolfgang Schremser } while (last_loghead_ident == UINT32_C(0)); 4994f3979a5SWolfgang Schremser } 5004f3979a5SWolfgang Schremser 5017c478bd9Sstevel@tonic-gate static int 5027c478bd9Sstevel@tonic-gate lufs_initialize( 5037c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp, 5047c478bd9Sstevel@tonic-gate daddr_t bno, 5057c478bd9Sstevel@tonic-gate size_t nb, 5067c478bd9Sstevel@tonic-gate struct fiolog *flp) 5077c478bd9Sstevel@tonic-gate { 5087c478bd9Sstevel@tonic-gate ml_odunit_t *ud, *ud2; 5097c478bd9Sstevel@tonic-gate buf_t *bp; 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate /* LINTED: warning: logical expression always true: op "||" */ 5127c478bd9Sstevel@tonic-gate ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE); 5137c478bd9Sstevel@tonic-gate ASSERT(nb >= ldl_minlogsize); 5147c478bd9Sstevel@tonic-gate 5157c478bd9Sstevel@tonic-gate bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, dbtob(LS_SECTORS)); 5167c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr, bp->b_bcount); 5177c478bd9Sstevel@tonic-gate 5187c478bd9Sstevel@tonic-gate ud = (void *)bp->b_un.b_addr; 5197c478bd9Sstevel@tonic-gate ud->od_version = LUFS_VERSION_LATEST; 5207c478bd9Sstevel@tonic-gate ud->od_maxtransfer = MIN(ufsvfsp->vfs_iotransz, ldl_maxtransfer); 5217c478bd9Sstevel@tonic-gate if (ud->od_maxtransfer < ldl_mintransfer) 5227c478bd9Sstevel@tonic-gate ud->od_maxtransfer = ldl_mintransfer; 5237c478bd9Sstevel@tonic-gate ud->od_devbsize = DEV_BSIZE; 5247c478bd9Sstevel@tonic-gate 5257c478bd9Sstevel@tonic-gate ud->od_requestsize = flp->nbytes_actual; 5267c478bd9Sstevel@tonic-gate ud->od_statesize = dbtob(LS_SECTORS); 5277c478bd9Sstevel@tonic-gate ud->od_logsize = nb - ud->od_statesize; 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate ud->od_statebno = INT32_C(0); 5307c478bd9Sstevel@tonic-gate 5314f3979a5SWolfgang Schremser ud->od_head_ident = lufs_hd_genid(NULL); 5327c478bd9Sstevel@tonic-gate ud->od_tail_ident = ud->od_head_ident; 5337c478bd9Sstevel@tonic-gate ud->od_chksum = ud->od_head_ident + ud->od_tail_ident; 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate ud->od_bol_lof = dbtob(ud->od_statebno) + ud->od_statesize; 5367c478bd9Sstevel@tonic-gate ud->od_eol_lof = ud->od_bol_lof + ud->od_logsize; 5377c478bd9Sstevel@tonic-gate ud->od_head_lof = ud->od_bol_lof; 5387c478bd9Sstevel@tonic-gate ud->od_tail_lof = ud->od_bol_lof; 5397c478bd9Sstevel@tonic-gate 5407c478bd9Sstevel@tonic-gate ASSERT(lufs_initialize_debug(ud)); 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE); 5437c478bd9Sstevel@tonic-gate bcopy(ud, ud2, sizeof (*ud)); 5447c478bd9Sstevel@tonic-gate 5457c478bd9Sstevel@tonic-gate UFS_BWRITE2(ufsvfsp, bp); 5467c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 5477c478bd9Sstevel@tonic-gate brelse(bp); 5487c478bd9Sstevel@tonic-gate return (EIO); 5497c478bd9Sstevel@tonic-gate } 5507c478bd9Sstevel@tonic-gate brelse(bp); 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate return (0); 5537c478bd9Sstevel@tonic-gate } 5547c478bd9Sstevel@tonic-gate 5557c478bd9Sstevel@tonic-gate /* 5567c478bd9Sstevel@tonic-gate * Free log space 5577c478bd9Sstevel@tonic-gate * Assumes the file system is write locked and is not logging 5587c478bd9Sstevel@tonic-gate */ 5597c478bd9Sstevel@tonic-gate static int 5607c478bd9Sstevel@tonic-gate lufs_free(struct ufsvfs *ufsvfsp) 5617c478bd9Sstevel@tonic-gate { 5627c478bd9Sstevel@tonic-gate int error = 0, i, j; 5637c478bd9Sstevel@tonic-gate buf_t *bp = NULL; 5647c478bd9Sstevel@tonic-gate extent_t *ep; 5657c478bd9Sstevel@tonic-gate extent_block_t *ebp; 5667c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 5677c478bd9Sstevel@tonic-gate daddr_t fno; 5687c478bd9Sstevel@tonic-gate int32_t logbno; 5697c478bd9Sstevel@tonic-gate long nfno; 5707c478bd9Sstevel@tonic-gate inode_t *ip = NULL; 5717c478bd9Sstevel@tonic-gate char clean; 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate /* 5747c478bd9Sstevel@tonic-gate * Nothing to free 5757c478bd9Sstevel@tonic-gate */ 5767c478bd9Sstevel@tonic-gate if (fs->fs_logbno == 0) 5777c478bd9Sstevel@tonic-gate return (0); 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate /* 5807c478bd9Sstevel@tonic-gate * Mark the file system as FSACTIVE and no log but honor the 5817c478bd9Sstevel@tonic-gate * current value of fs_reclaim. The reclaim thread could have 5827c478bd9Sstevel@tonic-gate * been active when lufs_disable() was called and if fs_reclaim 5837c478bd9Sstevel@tonic-gate * is reset to zero here it could lead to lost inodes. 5847c478bd9Sstevel@tonic-gate */ 5857c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 5867c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 5877c478bd9Sstevel@tonic-gate clean = fs->fs_clean; 5887c478bd9Sstevel@tonic-gate logbno = fs->fs_logbno; 5897c478bd9Sstevel@tonic-gate fs->fs_clean = FSACTIVE; 5907c478bd9Sstevel@tonic-gate fs->fs_logbno = INT32_C(0); 5917c478bd9Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 5927c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 5937c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 5947c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) { 5957c478bd9Sstevel@tonic-gate error = EIO; 5967c478bd9Sstevel@tonic-gate fs->fs_clean = clean; 5977c478bd9Sstevel@tonic-gate fs->fs_logbno = logbno; 5987c478bd9Sstevel@tonic-gate goto errout; 5997c478bd9Sstevel@tonic-gate } 6007c478bd9Sstevel@tonic-gate 6017c478bd9Sstevel@tonic-gate /* 6027c478bd9Sstevel@tonic-gate * fetch the allocation block 6037c478bd9Sstevel@tonic-gate * superblock -> one block of extents -> log data 6047c478bd9Sstevel@tonic-gate */ 6057c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, logbno), 6067c478bd9Sstevel@tonic-gate fs->fs_bsize); 6077c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 6087c478bd9Sstevel@tonic-gate error = EIO; 6097c478bd9Sstevel@tonic-gate goto errout; 6107c478bd9Sstevel@tonic-gate } 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate /* 6137c478bd9Sstevel@tonic-gate * Free up the allocated space (dummy inode needed for free()) 6147c478bd9Sstevel@tonic-gate */ 6157c478bd9Sstevel@tonic-gate ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO); 6167c478bd9Sstevel@tonic-gate ebp = (void *)bp->b_un.b_addr; 6177c478bd9Sstevel@tonic-gate for (i = 0, ep = &ebp->extents[0]; i < ebp->nextents; ++i, ++ep) { 6187c478bd9Sstevel@tonic-gate fno = logbtofrag(fs, ep->pbno); 6197c478bd9Sstevel@tonic-gate nfno = dbtofsb(fs, ep->nbno); 6207c478bd9Sstevel@tonic-gate for (j = 0; j < nfno; j += fs->fs_frag, fno += fs->fs_frag) 6217c478bd9Sstevel@tonic-gate free(ip, fno, fs->fs_bsize, 0); 6227c478bd9Sstevel@tonic-gate } 6237c478bd9Sstevel@tonic-gate free(ip, logbtofrag(fs, logbno), fs->fs_bsize, 0); 6247c478bd9Sstevel@tonic-gate brelse(bp); 6257c478bd9Sstevel@tonic-gate bp = NULL; 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 6287c478bd9Sstevel@tonic-gate * Push the metadata dirtied during the allocations 6297c478bd9Sstevel@tonic-gate */ 6307c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 6317c478bd9Sstevel@tonic-gate sbupdate(ufsvfsp->vfs_vfs); 6327c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 6337c478bd9Sstevel@tonic-gate bflush(ufsvfsp->vfs_dev); 6347c478bd9Sstevel@tonic-gate error = bfinval(ufsvfsp->vfs_dev, 0); 6357c478bd9Sstevel@tonic-gate if (error) 6367c478bd9Sstevel@tonic-gate goto errout; 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate /* 6397c478bd9Sstevel@tonic-gate * Free the dummy inode 6407c478bd9Sstevel@tonic-gate */ 6417c478bd9Sstevel@tonic-gate ufs_free_inode(ip); 6427c478bd9Sstevel@tonic-gate 6437c478bd9Sstevel@tonic-gate return (0); 6447c478bd9Sstevel@tonic-gate 6457c478bd9Sstevel@tonic-gate errout: 6467c478bd9Sstevel@tonic-gate /* 6477c478bd9Sstevel@tonic-gate * Free up all resources 6487c478bd9Sstevel@tonic-gate */ 6497c478bd9Sstevel@tonic-gate if (bp) 6507c478bd9Sstevel@tonic-gate brelse(bp); 6517c478bd9Sstevel@tonic-gate if (ip) 6527c478bd9Sstevel@tonic-gate ufs_free_inode(ip); 6537c478bd9Sstevel@tonic-gate return (error); 6547c478bd9Sstevel@tonic-gate } 6557c478bd9Sstevel@tonic-gate 6567c478bd9Sstevel@tonic-gate /* 6577c478bd9Sstevel@tonic-gate * Allocate log space 6587c478bd9Sstevel@tonic-gate * Assumes the file system is write locked and is not logging 6597c478bd9Sstevel@tonic-gate */ 6607c478bd9Sstevel@tonic-gate static int 661e7da395aSOwen Roberts lufs_alloc(struct ufsvfs *ufsvfsp, struct fiolog *flp, size_t minb, cred_t *cr) 6627c478bd9Sstevel@tonic-gate { 6637c478bd9Sstevel@tonic-gate int error = 0; 6647c478bd9Sstevel@tonic-gate buf_t *bp = NULL; 6657c478bd9Sstevel@tonic-gate extent_t *ep, *nep; 6667c478bd9Sstevel@tonic-gate extent_block_t *ebp; 6677c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 6687c478bd9Sstevel@tonic-gate daddr_t fno; /* in frags */ 6697c478bd9Sstevel@tonic-gate daddr_t bno; /* in disk blocks */ 6707c478bd9Sstevel@tonic-gate int32_t logbno = INT32_C(0); /* will be fs_logbno */ 6717c478bd9Sstevel@tonic-gate struct inode *ip = NULL; 6727c478bd9Sstevel@tonic-gate size_t nb = flp->nbytes_actual; 6737c478bd9Sstevel@tonic-gate size_t tb = 0; 6747c478bd9Sstevel@tonic-gate 6757c478bd9Sstevel@tonic-gate /* 6767c478bd9Sstevel@tonic-gate * Mark the file system as FSACTIVE 6777c478bd9Sstevel@tonic-gate */ 6787c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 6797c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 6807c478bd9Sstevel@tonic-gate fs->fs_clean = FSACTIVE; 6817c478bd9Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 6827c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 6837c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 6847c478bd9Sstevel@tonic-gate 6857c478bd9Sstevel@tonic-gate /* 6867c478bd9Sstevel@tonic-gate * Allocate the allocation block (need dummy shadow inode; 6877c478bd9Sstevel@tonic-gate * we use a shadow inode so the quota sub-system ignores 6887c478bd9Sstevel@tonic-gate * the block allocations.) 6897c478bd9Sstevel@tonic-gate * superblock -> one block of extents -> log data 6907c478bd9Sstevel@tonic-gate */ 6917c478bd9Sstevel@tonic-gate ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO); 6927c478bd9Sstevel@tonic-gate ip->i_mode = IFSHAD; /* make the dummy a shadow inode */ 6937c478bd9Sstevel@tonic-gate rw_enter(&ip->i_contents, RW_WRITER); 694e7da395aSOwen Roberts fno = contigpref(ufsvfsp, nb + fs->fs_bsize, minb); 6957c478bd9Sstevel@tonic-gate error = alloc(ip, fno, fs->fs_bsize, &fno, cr); 6967c478bd9Sstevel@tonic-gate if (error) 6977c478bd9Sstevel@tonic-gate goto errout; 6987c478bd9Sstevel@tonic-gate bno = fsbtodb(fs, fno); 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, fs->fs_bsize); 7017c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 7027c478bd9Sstevel@tonic-gate error = EIO; 7037c478bd9Sstevel@tonic-gate goto errout; 7047c478bd9Sstevel@tonic-gate } 7057c478bd9Sstevel@tonic-gate 7067c478bd9Sstevel@tonic-gate ebp = (void *)bp->b_un.b_addr; 7077c478bd9Sstevel@tonic-gate ebp->type = LUFS_EXTENTS; 7087c478bd9Sstevel@tonic-gate ebp->nextbno = UINT32_C(0); 7097c478bd9Sstevel@tonic-gate ebp->nextents = UINT32_C(0); 7107c478bd9Sstevel@tonic-gate ebp->chksum = INT32_C(0); 7117c478bd9Sstevel@tonic-gate if (fs->fs_magic == FS_MAGIC) 7127c478bd9Sstevel@tonic-gate logbno = bno; 7137c478bd9Sstevel@tonic-gate else 7147c478bd9Sstevel@tonic-gate logbno = dbtofsb(fs, bno); 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate /* 7177c478bd9Sstevel@tonic-gate * Initialize the first extent 7187c478bd9Sstevel@tonic-gate */ 7197c478bd9Sstevel@tonic-gate ep = &ebp->extents[0]; 7207c478bd9Sstevel@tonic-gate error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr); 7217c478bd9Sstevel@tonic-gate if (error) 7227c478bd9Sstevel@tonic-gate goto errout; 7237c478bd9Sstevel@tonic-gate bno = fsbtodb(fs, fno); 7247c478bd9Sstevel@tonic-gate 7257c478bd9Sstevel@tonic-gate ep->lbno = UINT32_C(0); 7267c478bd9Sstevel@tonic-gate if (fs->fs_magic == FS_MAGIC) 7277c478bd9Sstevel@tonic-gate ep->pbno = (uint32_t)bno; 7287c478bd9Sstevel@tonic-gate else 7297c478bd9Sstevel@tonic-gate ep->pbno = (uint32_t)fno; 7307c478bd9Sstevel@tonic-gate ep->nbno = (uint32_t)fsbtodb(fs, fs->fs_frag); 7317c478bd9Sstevel@tonic-gate ebp->nextents = UINT32_C(1); 7327c478bd9Sstevel@tonic-gate tb = fs->fs_bsize; 7337c478bd9Sstevel@tonic-gate nb -= fs->fs_bsize; 7347c478bd9Sstevel@tonic-gate 7357c478bd9Sstevel@tonic-gate while (nb) { 7367c478bd9Sstevel@tonic-gate error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr); 7377c478bd9Sstevel@tonic-gate if (error) { 738e7da395aSOwen Roberts if (tb < minb) 7397c478bd9Sstevel@tonic-gate goto errout; 7407c478bd9Sstevel@tonic-gate error = 0; 7417c478bd9Sstevel@tonic-gate break; 7427c478bd9Sstevel@tonic-gate } 7437c478bd9Sstevel@tonic-gate bno = fsbtodb(fs, fno); 7447c478bd9Sstevel@tonic-gate if ((daddr_t)((logbtodb(fs, ep->pbno) + ep->nbno) == bno)) 7457c478bd9Sstevel@tonic-gate ep->nbno += (uint32_t)(fsbtodb(fs, fs->fs_frag)); 7467c478bd9Sstevel@tonic-gate else { 7477c478bd9Sstevel@tonic-gate nep = ep + 1; 7487c478bd9Sstevel@tonic-gate if ((caddr_t)(nep + 1) > 7497c478bd9Sstevel@tonic-gate (bp->b_un.b_addr + fs->fs_bsize)) { 7507c478bd9Sstevel@tonic-gate free(ip, fno, fs->fs_bsize, 0); 7517c478bd9Sstevel@tonic-gate break; 7527c478bd9Sstevel@tonic-gate } 7537c478bd9Sstevel@tonic-gate nep->lbno = ep->lbno + ep->nbno; 7547c478bd9Sstevel@tonic-gate if (fs->fs_magic == FS_MAGIC) 7557c478bd9Sstevel@tonic-gate nep->pbno = (uint32_t)bno; 7567c478bd9Sstevel@tonic-gate else 7577c478bd9Sstevel@tonic-gate nep->pbno = (uint32_t)fno; 7587c478bd9Sstevel@tonic-gate nep->nbno = (uint32_t)(fsbtodb(fs, fs->fs_frag)); 7597c478bd9Sstevel@tonic-gate ebp->nextents++; 7607c478bd9Sstevel@tonic-gate ep = nep; 7617c478bd9Sstevel@tonic-gate } 7627c478bd9Sstevel@tonic-gate tb += fs->fs_bsize; 7637c478bd9Sstevel@tonic-gate nb -= fs->fs_bsize; 7647c478bd9Sstevel@tonic-gate } 765e7da395aSOwen Roberts 766e7da395aSOwen Roberts if (tb < minb) { /* Failed to reach minimum log size */ 767e7da395aSOwen Roberts error = ENOSPC; 768e7da395aSOwen Roberts goto errout; 769e7da395aSOwen Roberts } 770e7da395aSOwen Roberts 7717c478bd9Sstevel@tonic-gate ebp->nbytes = (uint32_t)tb; 7727c478bd9Sstevel@tonic-gate setsum(&ebp->chksum, (int32_t *)bp->b_un.b_addr, fs->fs_bsize); 7737c478bd9Sstevel@tonic-gate UFS_BWRITE2(ufsvfsp, bp); 7747c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 7757c478bd9Sstevel@tonic-gate error = EIO; 7767c478bd9Sstevel@tonic-gate goto errout; 7777c478bd9Sstevel@tonic-gate } 7787c478bd9Sstevel@tonic-gate /* 7797c478bd9Sstevel@tonic-gate * Initialize the first two sectors of the log 7807c478bd9Sstevel@tonic-gate */ 7817c478bd9Sstevel@tonic-gate error = lufs_initialize(ufsvfsp, logbtodb(fs, ebp->extents[0].pbno), 7827c478bd9Sstevel@tonic-gate tb, flp); 7837c478bd9Sstevel@tonic-gate if (error) 7847c478bd9Sstevel@tonic-gate goto errout; 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate /* 7877c478bd9Sstevel@tonic-gate * We are done initializing the allocation block and the log 7887c478bd9Sstevel@tonic-gate */ 7897c478bd9Sstevel@tonic-gate brelse(bp); 7907c478bd9Sstevel@tonic-gate bp = NULL; 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate /* 7937c478bd9Sstevel@tonic-gate * Update the superblock and push the dirty metadata 7947c478bd9Sstevel@tonic-gate */ 7957c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 7967c478bd9Sstevel@tonic-gate sbupdate(ufsvfsp->vfs_vfs); 7977c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 7987c478bd9Sstevel@tonic-gate bflush(ufsvfsp->vfs_dev); 7997c478bd9Sstevel@tonic-gate error = bfinval(ufsvfsp->vfs_dev, 1); 8007c478bd9Sstevel@tonic-gate if (error) 8017c478bd9Sstevel@tonic-gate goto errout; 8027c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) { 8037c478bd9Sstevel@tonic-gate error = EIO; 8047c478bd9Sstevel@tonic-gate goto errout; 8057c478bd9Sstevel@tonic-gate } 8067c478bd9Sstevel@tonic-gate 8077c478bd9Sstevel@tonic-gate /* 8087c478bd9Sstevel@tonic-gate * Everything is safely on disk; update log space pointer in sb 8097c478bd9Sstevel@tonic-gate */ 8107c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 8117c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 8127c478bd9Sstevel@tonic-gate fs->fs_logbno = (uint32_t)logbno; 8137c478bd9Sstevel@tonic-gate ufs_sbwrite(ufsvfsp); 8147c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 8157c478bd9Sstevel@tonic-gate ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 8167c478bd9Sstevel@tonic-gate 8177c478bd9Sstevel@tonic-gate /* 8187c478bd9Sstevel@tonic-gate * Free the dummy inode 8197c478bd9Sstevel@tonic-gate */ 8207c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 8217c478bd9Sstevel@tonic-gate ufs_free_inode(ip); 8227c478bd9Sstevel@tonic-gate 8237c478bd9Sstevel@tonic-gate /* inform user of real log size */ 8247c478bd9Sstevel@tonic-gate flp->nbytes_actual = tb; 8257c478bd9Sstevel@tonic-gate return (0); 8267c478bd9Sstevel@tonic-gate 8277c478bd9Sstevel@tonic-gate errout: 8287c478bd9Sstevel@tonic-gate /* 8297c478bd9Sstevel@tonic-gate * Free all resources 8307c478bd9Sstevel@tonic-gate */ 8317c478bd9Sstevel@tonic-gate if (bp) 8327c478bd9Sstevel@tonic-gate brelse(bp); 8337c478bd9Sstevel@tonic-gate if (logbno) { 8347c478bd9Sstevel@tonic-gate fs->fs_logbno = logbno; 8357c478bd9Sstevel@tonic-gate (void) lufs_free(ufsvfsp); 8367c478bd9Sstevel@tonic-gate } 8377c478bd9Sstevel@tonic-gate if (ip) { 8387c478bd9Sstevel@tonic-gate rw_exit(&ip->i_contents); 8397c478bd9Sstevel@tonic-gate ufs_free_inode(ip); 8407c478bd9Sstevel@tonic-gate } 8417c478bd9Sstevel@tonic-gate return (error); 8427c478bd9Sstevel@tonic-gate } 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate /* 8457c478bd9Sstevel@tonic-gate * Disable logging 8467c478bd9Sstevel@tonic-gate */ 8477c478bd9Sstevel@tonic-gate int 8487c478bd9Sstevel@tonic-gate lufs_disable(vnode_t *vp, struct fiolog *flp) 8497c478bd9Sstevel@tonic-gate { 8507c478bd9Sstevel@tonic-gate int error = 0; 8517c478bd9Sstevel@tonic-gate inode_t *ip = VTOI(vp); 8527c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 8537c478bd9Sstevel@tonic-gate struct fs *fs = ufsvfsp->vfs_fs; 8547c478bd9Sstevel@tonic-gate struct lockfs lf; 8557c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 8567c478bd9Sstevel@tonic-gate 8577c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ENONE; 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate /* 8607c478bd9Sstevel@tonic-gate * Logging is already disabled; done 8617c478bd9Sstevel@tonic-gate */ 8627c478bd9Sstevel@tonic-gate if (fs->fs_logbno == 0 || ufsvfsp->vfs_log == NULL) 8637c478bd9Sstevel@tonic-gate return (0); 8647c478bd9Sstevel@tonic-gate 8657c478bd9Sstevel@tonic-gate /* 8667c478bd9Sstevel@tonic-gate * Readonly file system 8677c478bd9Sstevel@tonic-gate */ 8687c478bd9Sstevel@tonic-gate if (fs->fs_ronly) { 8697c478bd9Sstevel@tonic-gate flp->error = FIOLOG_EROFS; 8707c478bd9Sstevel@tonic-gate return (0); 8717c478bd9Sstevel@tonic-gate } 8727c478bd9Sstevel@tonic-gate 8737c478bd9Sstevel@tonic-gate /* 8747c478bd9Sstevel@tonic-gate * File system must be write locked to disable logging 8757c478bd9Sstevel@tonic-gate */ 8767c478bd9Sstevel@tonic-gate error = ufs_fiolfss(vp, &lf); 8777c478bd9Sstevel@tonic-gate if (error) { 8787c478bd9Sstevel@tonic-gate return (error); 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate if (!LOCKFS_IS_ULOCK(&lf)) { 8817c478bd9Sstevel@tonic-gate flp->error = FIOLOG_EULOCK; 8827c478bd9Sstevel@tonic-gate return (0); 8837c478bd9Sstevel@tonic-gate } 8847c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_WLOCK; 8857c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 8867c478bd9Sstevel@tonic-gate lf.lf_comment = NULL; 8877c478bd9Sstevel@tonic-gate error = ufs_fiolfs(vp, &lf, 1); 8887c478bd9Sstevel@tonic-gate if (error) { 8897c478bd9Sstevel@tonic-gate flp->error = FIOLOG_EWLOCK; 8907c478bd9Sstevel@tonic-gate return (0); 8917c478bd9Sstevel@tonic-gate } 8927c478bd9Sstevel@tonic-gate 8937c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_log == NULL || fs->fs_logbno == 0) 8947c478bd9Sstevel@tonic-gate goto errout; 8957c478bd9Sstevel@tonic-gate 8967c478bd9Sstevel@tonic-gate /* 8977c478bd9Sstevel@tonic-gate * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT 8987c478bd9Sstevel@tonic-gate */ 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate /* 9017c478bd9Sstevel@tonic-gate * Disable logging: 9027c478bd9Sstevel@tonic-gate * Suspend the reclaim thread and force the delete thread to exit. 9037c478bd9Sstevel@tonic-gate * When a nologging mount has completed there may still be 9047c478bd9Sstevel@tonic-gate * work for reclaim to do so just suspend this thread until 9057c478bd9Sstevel@tonic-gate * it's [deadlock-] safe for it to continue. The delete 9067c478bd9Sstevel@tonic-gate * thread won't be needed as ufs_iinactive() calls 9077c478bd9Sstevel@tonic-gate * ufs_delete() when logging is disabled. 9087c478bd9Sstevel@tonic-gate * Freeze and drain reader ops. 9097c478bd9Sstevel@tonic-gate * Commit any outstanding reader transactions (ufs_flush). 9107c478bd9Sstevel@tonic-gate * Set the ``unmounted'' bit in the ufstrans struct. 9117c478bd9Sstevel@tonic-gate * If debug, remove metadata from matamap. 9127c478bd9Sstevel@tonic-gate * Disable matamap processing. 9137c478bd9Sstevel@tonic-gate * NULL the trans ops table. 9147c478bd9Sstevel@tonic-gate * Free all of the incore structs related to logging. 9157c478bd9Sstevel@tonic-gate * Allow reader ops. 9167c478bd9Sstevel@tonic-gate */ 9177c478bd9Sstevel@tonic-gate ufs_thread_suspend(&ufsvfsp->vfs_reclaim); 9187c478bd9Sstevel@tonic-gate ufs_thread_exit(&ufsvfsp->vfs_delete); 9197c478bd9Sstevel@tonic-gate 9207c478bd9Sstevel@tonic-gate vfs_lock_wait(ufsvfsp->vfs_vfs); 9217c478bd9Sstevel@tonic-gate ulp = &ufsvfsp->vfs_ulockfs; 9227c478bd9Sstevel@tonic-gate mutex_enter(&ulp->ul_lock); 923*1a5e258fSJosef 'Jeff' Sipek atomic_inc_ulong(&ufs_quiesce_pend); 9247c478bd9Sstevel@tonic-gate (void) ufs_quiesce(ulp); 9257c478bd9Sstevel@tonic-gate 9267c478bd9Sstevel@tonic-gate (void) ufs_flush(ufsvfsp->vfs_vfs); 9277c478bd9Sstevel@tonic-gate 9287c478bd9Sstevel@tonic-gate TRANS_MATA_UMOUNT(ufsvfsp); 9297c478bd9Sstevel@tonic-gate ufsvfsp->vfs_domatamap = 0; 9307c478bd9Sstevel@tonic-gate 9317c478bd9Sstevel@tonic-gate /* 9327c478bd9Sstevel@tonic-gate * Free all of the incore structs 93314c932c0Sbatschul * Aquire the ufs_scan_lock before de-linking the mtm data 93414c932c0Sbatschul * structure so that we keep ufs_sync() and ufs_update() away 93514c932c0Sbatschul * when they execute the ufs_scan_inodes() run while we're in 93614c932c0Sbatschul * progress of enabling/disabling logging. 9377c478bd9Sstevel@tonic-gate */ 93814c932c0Sbatschul mutex_enter(&ufs_scan_lock); 9397c478bd9Sstevel@tonic-gate (void) lufs_unsnarf(ufsvfsp); 94014c932c0Sbatschul mutex_exit(&ufs_scan_lock); 9417c478bd9Sstevel@tonic-gate 942*1a5e258fSJosef 'Jeff' Sipek atomic_dec_ulong(&ufs_quiesce_pend); 9437c478bd9Sstevel@tonic-gate mutex_exit(&ulp->ul_lock); 9447c478bd9Sstevel@tonic-gate vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0); 9457c478bd9Sstevel@tonic-gate vfs_unlock(ufsvfsp->vfs_vfs); 9467c478bd9Sstevel@tonic-gate 9477c478bd9Sstevel@tonic-gate fs->fs_rolled = FS_ALL_ROLLED; 9487c478bd9Sstevel@tonic-gate ufsvfsp->vfs_nolog_si = 0; 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate /* 9517c478bd9Sstevel@tonic-gate * Free the log space and mark the superblock as FSACTIVE 9527c478bd9Sstevel@tonic-gate */ 9537c478bd9Sstevel@tonic-gate (void) lufs_free(ufsvfsp); 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate /* 9567c478bd9Sstevel@tonic-gate * Allow the reclaim thread to continue. 9577c478bd9Sstevel@tonic-gate */ 9587c478bd9Sstevel@tonic-gate ufs_thread_continue(&ufsvfsp->vfs_reclaim); 9597c478bd9Sstevel@tonic-gate 9607c478bd9Sstevel@tonic-gate /* 9617c478bd9Sstevel@tonic-gate * Unlock the file system 9627c478bd9Sstevel@tonic-gate */ 9637c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_ULOCK; 9647c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 9657c478bd9Sstevel@tonic-gate error = ufs_fiolfs(vp, &lf, 1); 9667c478bd9Sstevel@tonic-gate if (error) 9677c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ENOULOCK; 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate return (0); 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate errout: 9727c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_ULOCK; 9737c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 9747c478bd9Sstevel@tonic-gate (void) ufs_fiolfs(vp, &lf, 1); 9757c478bd9Sstevel@tonic-gate return (error); 9767c478bd9Sstevel@tonic-gate } 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate /* 9797c478bd9Sstevel@tonic-gate * Enable logging 9807c478bd9Sstevel@tonic-gate */ 9817c478bd9Sstevel@tonic-gate int 9827c478bd9Sstevel@tonic-gate lufs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr) 9837c478bd9Sstevel@tonic-gate { 9847c478bd9Sstevel@tonic-gate int error; 9857c478bd9Sstevel@tonic-gate int reclaim; 9867c478bd9Sstevel@tonic-gate inode_t *ip = VTOI(vp); 9877c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 9887c478bd9Sstevel@tonic-gate struct fs *fs; 9897c478bd9Sstevel@tonic-gate ml_unit_t *ul; 9907c478bd9Sstevel@tonic-gate struct lockfs lf; 9917c478bd9Sstevel@tonic-gate struct ulockfs *ulp; 9927c478bd9Sstevel@tonic-gate vfs_t *vfsp = ufsvfsp->vfs_vfs; 9937c478bd9Sstevel@tonic-gate uint64_t tmp_nbytes_actual; 994e7da395aSOwen Roberts uint64_t cg_minlogsize; 995e7da395aSOwen Roberts uint32_t cgsize; 996e7da395aSOwen Roberts static int minlogsizewarn = 0; 997e7da395aSOwen Roberts static int maxlogsizewarn = 0; 9987c478bd9Sstevel@tonic-gate 9997c478bd9Sstevel@tonic-gate /* 10007c478bd9Sstevel@tonic-gate * Check if logging is already enabled 10017c478bd9Sstevel@tonic-gate */ 10027c478bd9Sstevel@tonic-gate if (ufsvfsp->vfs_log) { 10037c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ETRANS; 10047c478bd9Sstevel@tonic-gate /* for root ensure logging option is set */ 10057c478bd9Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); 10067c478bd9Sstevel@tonic-gate return (0); 10077c478bd9Sstevel@tonic-gate } 10087c478bd9Sstevel@tonic-gate fs = ufsvfsp->vfs_fs; 10097c478bd9Sstevel@tonic-gate 10107c478bd9Sstevel@tonic-gate /* 10117c478bd9Sstevel@tonic-gate * Come back here to recheck if we had to disable the log. 10127c478bd9Sstevel@tonic-gate */ 10137c478bd9Sstevel@tonic-gate recheck: 10147c478bd9Sstevel@tonic-gate error = 0; 10157c478bd9Sstevel@tonic-gate reclaim = 0; 10167c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ENONE; 10177c478bd9Sstevel@tonic-gate 10187c478bd9Sstevel@tonic-gate /* 1019e7da395aSOwen Roberts * The size of the ufs log is determined using the following rules: 1020e7da395aSOwen Roberts * 1021e7da395aSOwen Roberts * 1) If no size is requested the log size is calculated as a 1022e7da395aSOwen Roberts * ratio of the total file system size. By default this is 1023e7da395aSOwen Roberts * 1MB of log per 1GB of file system. This calculation is then 1024e7da395aSOwen Roberts * capped at the log size specified by ldl_softlogcap. 1025e7da395aSOwen Roberts * 2) The log size requested may then be increased based on the 1026e7da395aSOwen Roberts * number of cylinder groups contained in the file system. 1027e7da395aSOwen Roberts * To prevent a hang the log has to be large enough to contain a 1028e7da395aSOwen Roberts * single transaction that alters every cylinder group in the file 1029e7da395aSOwen Roberts * system. This is calculated as cg_minlogsize. 1030e7da395aSOwen Roberts * 3) Finally a check is made that the log size requested is within 1031e7da395aSOwen Roberts * the limits of ldl_minlogsize and ldl_maxlogsize. 1032e7da395aSOwen Roberts */ 1033e7da395aSOwen Roberts 1034e7da395aSOwen Roberts /* 10357c478bd9Sstevel@tonic-gate * Adjust requested log size 10367c478bd9Sstevel@tonic-gate */ 10377c478bd9Sstevel@tonic-gate flp->nbytes_actual = flp->nbytes_requested; 10387c478bd9Sstevel@tonic-gate if (flp->nbytes_actual == 0) { 10397c478bd9Sstevel@tonic-gate tmp_nbytes_actual = 10407c478bd9Sstevel@tonic-gate (((uint64_t)fs->fs_size) / ldl_divisor) << fs->fs_fshift; 10417c478bd9Sstevel@tonic-gate flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX); 1042e7da395aSOwen Roberts /* 1043e7da395aSOwen Roberts * The 1MB per 1GB log size allocation only applies up to 1044e7da395aSOwen Roberts * ldl_softlogcap size of log. 1045e7da395aSOwen Roberts */ 1046e7da395aSOwen Roberts flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_softlogcap); 10477c478bd9Sstevel@tonic-gate } 1048e7da395aSOwen Roberts 1049e7da395aSOwen Roberts cgsize = ldl_cgsizereq ? ldl_cgsizereq : LDL_CGSIZEREQ(fs); 1050e7da395aSOwen Roberts 1051e7da395aSOwen Roberts /* 1052e7da395aSOwen Roberts * Determine the log size required based on the number of cylinder 1053e7da395aSOwen Roberts * groups in the file system. The log has to be at least this size 1054e7da395aSOwen Roberts * to prevent possible hangs due to log space exhaustion. 1055e7da395aSOwen Roberts */ 1056e7da395aSOwen Roberts cg_minlogsize = cgsize * fs->fs_ncg; 1057e7da395aSOwen Roberts 1058e7da395aSOwen Roberts /* 1059e7da395aSOwen Roberts * Ensure that the minimum log size isn't so small that it could lead 1060e7da395aSOwen Roberts * to a full log hang. 1061e7da395aSOwen Roberts */ 1062e7da395aSOwen Roberts if (ldl_minlogsize < LDL_MINLOGSIZE) { 1063e7da395aSOwen Roberts ldl_minlogsize = LDL_MINLOGSIZE; 1064e7da395aSOwen Roberts if (!minlogsizewarn) { 1065e7da395aSOwen Roberts cmn_err(CE_WARN, "ldl_minlogsize too small, increasing " 1066e7da395aSOwen Roberts "to 0x%x", LDL_MINLOGSIZE); 1067e7da395aSOwen Roberts minlogsizewarn = 1; 1068e7da395aSOwen Roberts } 1069e7da395aSOwen Roberts } 1070e7da395aSOwen Roberts 1071e7da395aSOwen Roberts /* 1072e7da395aSOwen Roberts * Ensure that the maximum log size isn't greater than INT_MAX as the 1073e7da395aSOwen Roberts * logical log offset fields would overflow. 1074e7da395aSOwen Roberts */ 1075e7da395aSOwen Roberts if (ldl_maxlogsize > INT_MAX) { 1076e7da395aSOwen Roberts ldl_maxlogsize = INT_MAX; 1077e7da395aSOwen Roberts if (!maxlogsizewarn) { 1078e7da395aSOwen Roberts cmn_err(CE_WARN, "ldl_maxlogsize too large, reducing " 1079e7da395aSOwen Roberts "to 0x%x", INT_MAX); 1080e7da395aSOwen Roberts maxlogsizewarn = 1; 1081e7da395aSOwen Roberts } 1082e7da395aSOwen Roberts } 1083e7da395aSOwen Roberts 1084e7da395aSOwen Roberts if (cg_minlogsize > ldl_maxlogsize) { 1085e7da395aSOwen Roberts cmn_err(CE_WARN, 1086e7da395aSOwen Roberts "%s: reducing calculated log size from 0x%x to " 1087e7da395aSOwen Roberts "ldl_maxlogsize (0x%x).", fs->fs_fsmnt, (int)cg_minlogsize, 1088e7da395aSOwen Roberts ldl_maxlogsize); 1089e7da395aSOwen Roberts } 1090e7da395aSOwen Roberts 1091e7da395aSOwen Roberts cg_minlogsize = MAX(cg_minlogsize, ldl_minlogsize); 1092e7da395aSOwen Roberts cg_minlogsize = MIN(cg_minlogsize, ldl_maxlogsize); 1093e7da395aSOwen Roberts 1094e7da395aSOwen Roberts flp->nbytes_actual = MAX(flp->nbytes_actual, cg_minlogsize); 10957c478bd9Sstevel@tonic-gate flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize); 10967c478bd9Sstevel@tonic-gate flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize); 10977c478bd9Sstevel@tonic-gate flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual); 10987c478bd9Sstevel@tonic-gate 10997c478bd9Sstevel@tonic-gate /* 11007c478bd9Sstevel@tonic-gate * logging is enabled and the log is the right size; done 11017c478bd9Sstevel@tonic-gate */ 11027c478bd9Sstevel@tonic-gate ul = ufsvfsp->vfs_log; 11037c478bd9Sstevel@tonic-gate if (ul && fs->fs_logbno && (flp->nbytes_actual == ul->un_requestsize)) 11047c478bd9Sstevel@tonic-gate return (0); 11057c478bd9Sstevel@tonic-gate 11067c478bd9Sstevel@tonic-gate /* 11077c478bd9Sstevel@tonic-gate * Readonly file system 11087c478bd9Sstevel@tonic-gate */ 11097c478bd9Sstevel@tonic-gate if (fs->fs_ronly) { 11107c478bd9Sstevel@tonic-gate flp->error = FIOLOG_EROFS; 11117c478bd9Sstevel@tonic-gate return (0); 11127c478bd9Sstevel@tonic-gate } 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate /* 11157c478bd9Sstevel@tonic-gate * File system must be write locked to enable logging 11167c478bd9Sstevel@tonic-gate */ 11177c478bd9Sstevel@tonic-gate error = ufs_fiolfss(vp, &lf); 11187c478bd9Sstevel@tonic-gate if (error) { 11197c478bd9Sstevel@tonic-gate return (error); 11207c478bd9Sstevel@tonic-gate } 11217c478bd9Sstevel@tonic-gate if (!LOCKFS_IS_ULOCK(&lf)) { 11227c478bd9Sstevel@tonic-gate flp->error = FIOLOG_EULOCK; 11237c478bd9Sstevel@tonic-gate return (0); 11247c478bd9Sstevel@tonic-gate } 11257c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_WLOCK; 11267c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 11277c478bd9Sstevel@tonic-gate lf.lf_comment = NULL; 11287c478bd9Sstevel@tonic-gate error = ufs_fiolfs(vp, &lf, 1); 11297c478bd9Sstevel@tonic-gate if (error) { 11307c478bd9Sstevel@tonic-gate flp->error = FIOLOG_EWLOCK; 11317c478bd9Sstevel@tonic-gate return (0); 11327c478bd9Sstevel@tonic-gate } 11337c478bd9Sstevel@tonic-gate 11347c478bd9Sstevel@tonic-gate /* 113514c932c0Sbatschul * Grab appropriate locks to synchronize with the rest 113614c932c0Sbatschul * of the system 113714c932c0Sbatschul */ 113814c932c0Sbatschul vfs_lock_wait(vfsp); 113914c932c0Sbatschul ulp = &ufsvfsp->vfs_ulockfs; 114014c932c0Sbatschul mutex_enter(&ulp->ul_lock); 114114c932c0Sbatschul 114214c932c0Sbatschul /* 11437c478bd9Sstevel@tonic-gate * File system must be fairly consistent to enable logging 11447c478bd9Sstevel@tonic-gate */ 11457c478bd9Sstevel@tonic-gate if (fs->fs_clean != FSLOG && 11467c478bd9Sstevel@tonic-gate fs->fs_clean != FSACTIVE && 11477c478bd9Sstevel@tonic-gate fs->fs_clean != FSSTABLE && 11487c478bd9Sstevel@tonic-gate fs->fs_clean != FSCLEAN) { 11497c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ECLEAN; 11507c478bd9Sstevel@tonic-gate goto unlockout; 11517c478bd9Sstevel@tonic-gate } 11527c478bd9Sstevel@tonic-gate 11537c478bd9Sstevel@tonic-gate /* 11547c478bd9Sstevel@tonic-gate * A write-locked file system is only active if there are 11557c478bd9Sstevel@tonic-gate * open deleted files; so remember to set FS_RECLAIM later. 11567c478bd9Sstevel@tonic-gate */ 11577c478bd9Sstevel@tonic-gate if (fs->fs_clean == FSACTIVE) 11587c478bd9Sstevel@tonic-gate reclaim = FS_RECLAIM; 11597c478bd9Sstevel@tonic-gate 11607c478bd9Sstevel@tonic-gate /* 11617c478bd9Sstevel@tonic-gate * Logging is already enabled; must be changing the log's size 11627c478bd9Sstevel@tonic-gate */ 11637c478bd9Sstevel@tonic-gate if (fs->fs_logbno && ufsvfsp->vfs_log) { 11647c478bd9Sstevel@tonic-gate /* 11657c478bd9Sstevel@tonic-gate * Before we can disable logging, we must give up our 11667c478bd9Sstevel@tonic-gate * lock. As a consequence of unlocking and disabling the 11677c478bd9Sstevel@tonic-gate * log, the fs structure may change. Because of this, when 11687c478bd9Sstevel@tonic-gate * disabling is complete, we will go back to recheck to 11697c478bd9Sstevel@tonic-gate * repeat all of the checks that we performed to get to 11707c478bd9Sstevel@tonic-gate * this point. Disabling sets fs->fs_logbno to 0, so this 11717c478bd9Sstevel@tonic-gate * will not put us into an infinite loop. 11727c478bd9Sstevel@tonic-gate */ 117314c932c0Sbatschul mutex_exit(&ulp->ul_lock); 117414c932c0Sbatschul vfs_unlock(vfsp); 117514c932c0Sbatschul 11767c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_ULOCK; 11777c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 11787c478bd9Sstevel@tonic-gate error = ufs_fiolfs(vp, &lf, 1); 11797c478bd9Sstevel@tonic-gate if (error) { 11807c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ENOULOCK; 11817c478bd9Sstevel@tonic-gate return (0); 11827c478bd9Sstevel@tonic-gate } 11837c478bd9Sstevel@tonic-gate error = lufs_disable(vp, flp); 11847c478bd9Sstevel@tonic-gate if (error || (flp->error != FIOLOG_ENONE)) 11857c478bd9Sstevel@tonic-gate return (0); 11867c478bd9Sstevel@tonic-gate goto recheck; 11877c478bd9Sstevel@tonic-gate } 11887c478bd9Sstevel@tonic-gate 1189e7da395aSOwen Roberts error = lufs_alloc(ufsvfsp, flp, cg_minlogsize, cr); 11907c478bd9Sstevel@tonic-gate if (error) 11917c478bd9Sstevel@tonic-gate goto errout; 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate /* 11947c478bd9Sstevel@tonic-gate * Create all of the incore structs 11957c478bd9Sstevel@tonic-gate */ 11967c478bd9Sstevel@tonic-gate error = lufs_snarf(ufsvfsp, fs, 0); 11977c478bd9Sstevel@tonic-gate if (error) 11987c478bd9Sstevel@tonic-gate goto errout; 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate /* 12017c478bd9Sstevel@tonic-gate * DON'T ``GOTO ERROUT'' PAST THIS POINT 12027c478bd9Sstevel@tonic-gate */ 12037c478bd9Sstevel@tonic-gate 12047c478bd9Sstevel@tonic-gate /* 12057c478bd9Sstevel@tonic-gate * Pretend we were just mounted with logging enabled 12067c478bd9Sstevel@tonic-gate * Get the ops vector 12077c478bd9Sstevel@tonic-gate * If debug, record metadata locations with log subsystem 12087c478bd9Sstevel@tonic-gate * Start the delete thread 12097c478bd9Sstevel@tonic-gate * Start the reclaim thread, if necessary 12107c478bd9Sstevel@tonic-gate */ 12117c478bd9Sstevel@tonic-gate vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); 12127c478bd9Sstevel@tonic-gate 12137c478bd9Sstevel@tonic-gate TRANS_DOMATAMAP(ufsvfsp); 12147c478bd9Sstevel@tonic-gate TRANS_MATA_MOUNT(ufsvfsp); 12157c478bd9Sstevel@tonic-gate TRANS_MATA_SI(ufsvfsp, fs); 12167c478bd9Sstevel@tonic-gate ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp); 12177c478bd9Sstevel@tonic-gate if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) { 12187c478bd9Sstevel@tonic-gate fs->fs_reclaim &= ~FS_RECLAIM; 12197c478bd9Sstevel@tonic-gate fs->fs_reclaim |= FS_RECLAIMING; 12207c478bd9Sstevel@tonic-gate ufs_thread_start(&ufsvfsp->vfs_reclaim, 12217c478bd9Sstevel@tonic-gate ufs_thread_reclaim, vfsp); 12227c478bd9Sstevel@tonic-gate } else 12237c478bd9Sstevel@tonic-gate fs->fs_reclaim |= reclaim; 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate mutex_exit(&ulp->ul_lock); 12267c478bd9Sstevel@tonic-gate vfs_unlock(vfsp); 12277c478bd9Sstevel@tonic-gate 12287c478bd9Sstevel@tonic-gate /* 12297c478bd9Sstevel@tonic-gate * Unlock the file system 12307c478bd9Sstevel@tonic-gate */ 12317c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_ULOCK; 12327c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 12337c478bd9Sstevel@tonic-gate error = ufs_fiolfs(vp, &lf, 1); 12347c478bd9Sstevel@tonic-gate if (error) { 12357c478bd9Sstevel@tonic-gate flp->error = FIOLOG_ENOULOCK; 12367c478bd9Sstevel@tonic-gate return (0); 12377c478bd9Sstevel@tonic-gate } 12387c478bd9Sstevel@tonic-gate 12397c478bd9Sstevel@tonic-gate /* 12407c478bd9Sstevel@tonic-gate * There's nothing in the log yet (we've just allocated it) 12417c478bd9Sstevel@tonic-gate * so directly write out the super block. 12427c478bd9Sstevel@tonic-gate * Note, we have to force this sb out to disk 12437c478bd9Sstevel@tonic-gate * (not just to the log) so that if we crash we know we are logging 12447c478bd9Sstevel@tonic-gate */ 12457c478bd9Sstevel@tonic-gate mutex_enter(&ufsvfsp->vfs_lock); 12467c478bd9Sstevel@tonic-gate fs->fs_clean = FSLOG; 12477c478bd9Sstevel@tonic-gate fs->fs_rolled = FS_NEED_ROLL; /* Mark the fs as unrolled */ 12487c478bd9Sstevel@tonic-gate UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp); 12497c478bd9Sstevel@tonic-gate mutex_exit(&ufsvfsp->vfs_lock); 12507c478bd9Sstevel@tonic-gate 12517c478bd9Sstevel@tonic-gate return (0); 12527c478bd9Sstevel@tonic-gate 12537c478bd9Sstevel@tonic-gate errout: 125414c932c0Sbatschul /* 125514c932c0Sbatschul * Aquire the ufs_scan_lock before de-linking the mtm data 125614c932c0Sbatschul * structure so that we keep ufs_sync() and ufs_update() away 125714c932c0Sbatschul * when they execute the ufs_scan_inodes() run while we're in 125814c932c0Sbatschul * progress of enabling/disabling logging. 125914c932c0Sbatschul */ 126014c932c0Sbatschul mutex_enter(&ufs_scan_lock); 12617c478bd9Sstevel@tonic-gate (void) lufs_unsnarf(ufsvfsp); 126214c932c0Sbatschul mutex_exit(&ufs_scan_lock); 126314c932c0Sbatschul 12647c478bd9Sstevel@tonic-gate (void) lufs_free(ufsvfsp); 12657c478bd9Sstevel@tonic-gate unlockout: 126614c932c0Sbatschul mutex_exit(&ulp->ul_lock); 126714c932c0Sbatschul vfs_unlock(vfsp); 126814c932c0Sbatschul 12697c478bd9Sstevel@tonic-gate lf.lf_lock = LOCKFS_ULOCK; 12707c478bd9Sstevel@tonic-gate lf.lf_flags = 0; 12717c478bd9Sstevel@tonic-gate (void) ufs_fiolfs(vp, &lf, 1); 12727c478bd9Sstevel@tonic-gate return (error); 12737c478bd9Sstevel@tonic-gate } 12747c478bd9Sstevel@tonic-gate 12757c478bd9Sstevel@tonic-gate void 12767c478bd9Sstevel@tonic-gate lufs_read_strategy(ml_unit_t *ul, buf_t *bp) 12777c478bd9Sstevel@tonic-gate { 12787c478bd9Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 12797c478bd9Sstevel@tonic-gate offset_t mof = ldbtob(bp->b_blkno); 12807c478bd9Sstevel@tonic-gate off_t nb = bp->b_bcount; 12817c478bd9Sstevel@tonic-gate mapentry_t *age; 12827c478bd9Sstevel@tonic-gate char *va; 12837c478bd9Sstevel@tonic-gate int (*saviodone)(); 12847c478bd9Sstevel@tonic-gate int entire_range; 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate /* 12877c478bd9Sstevel@tonic-gate * get a linked list of overlapping deltas 12887c478bd9Sstevel@tonic-gate * returns with &mtm->mtm_rwlock held 12897c478bd9Sstevel@tonic-gate */ 12907c478bd9Sstevel@tonic-gate entire_range = logmap_list_get(logmap, mof, nb, &age); 12917c478bd9Sstevel@tonic-gate 12927c478bd9Sstevel@tonic-gate /* 12937c478bd9Sstevel@tonic-gate * no overlapping deltas were found; read master 12947c478bd9Sstevel@tonic-gate */ 12957c478bd9Sstevel@tonic-gate if (age == NULL) { 12967c478bd9Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 12977c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 12987c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 12997c478bd9Sstevel@tonic-gate bp->b_error = EIO; 13007c478bd9Sstevel@tonic-gate biodone(bp); 13017c478bd9Sstevel@tonic-gate } else { 1302d3d50737SRafael Vanoni ul->un_ufsvfs->vfs_iotstamp = ddi_get_lbolt(); 13037c478bd9Sstevel@tonic-gate logstats.ls_lreads.value.ui64++; 13047c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13057c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 13067c478bd9Sstevel@tonic-gate } 13077c478bd9Sstevel@tonic-gate return; 13087c478bd9Sstevel@tonic-gate } 13097c478bd9Sstevel@tonic-gate 13107c478bd9Sstevel@tonic-gate va = bp_mapin_common(bp, VM_SLEEP); 13117c478bd9Sstevel@tonic-gate /* 13127c478bd9Sstevel@tonic-gate * if necessary, sync read the data from master 13137c478bd9Sstevel@tonic-gate * errors are returned in bp 13147c478bd9Sstevel@tonic-gate */ 13157c478bd9Sstevel@tonic-gate if (!entire_range) { 13167c478bd9Sstevel@tonic-gate saviodone = bp->b_iodone; 13177c478bd9Sstevel@tonic-gate bp->b_iodone = trans_not_done; 13187c478bd9Sstevel@tonic-gate logstats.ls_mreads.value.ui64++; 13197c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 13207c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 13217c478bd9Sstevel@tonic-gate if (trans_not_wait(bp)) 13227c478bd9Sstevel@tonic-gate ldl_seterror(ul, "Error reading master"); 13237c478bd9Sstevel@tonic-gate bp->b_iodone = saviodone; 13247c478bd9Sstevel@tonic-gate } 13257c478bd9Sstevel@tonic-gate 13267c478bd9Sstevel@tonic-gate /* 13277c478bd9Sstevel@tonic-gate * sync read the data from the log 13287c478bd9Sstevel@tonic-gate * errors are returned inline 13297c478bd9Sstevel@tonic-gate */ 13307c478bd9Sstevel@tonic-gate if (ldl_read(ul, va, mof, nb, age)) { 13317c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 13327c478bd9Sstevel@tonic-gate bp->b_error = EIO; 13337c478bd9Sstevel@tonic-gate } 13347c478bd9Sstevel@tonic-gate 13357c478bd9Sstevel@tonic-gate /* 13367c478bd9Sstevel@tonic-gate * unlist the deltas 13377c478bd9Sstevel@tonic-gate */ 13387c478bd9Sstevel@tonic-gate logmap_list_put(logmap, age); 13397c478bd9Sstevel@tonic-gate 13407c478bd9Sstevel@tonic-gate /* 13417c478bd9Sstevel@tonic-gate * all done 13427c478bd9Sstevel@tonic-gate */ 13437c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 13447c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 13457c478bd9Sstevel@tonic-gate bp->b_error = EIO; 13467c478bd9Sstevel@tonic-gate } 13477c478bd9Sstevel@tonic-gate biodone(bp); 13487c478bd9Sstevel@tonic-gate } 13497c478bd9Sstevel@tonic-gate 13507c478bd9Sstevel@tonic-gate void 13517c478bd9Sstevel@tonic-gate lufs_write_strategy(ml_unit_t *ul, buf_t *bp) 13527c478bd9Sstevel@tonic-gate { 13537c478bd9Sstevel@tonic-gate offset_t mof = ldbtob(bp->b_blkno); 13547c478bd9Sstevel@tonic-gate off_t nb = bp->b_bcount; 13557c478bd9Sstevel@tonic-gate char *va; 13567c478bd9Sstevel@tonic-gate mapentry_t *me; 13577c478bd9Sstevel@tonic-gate 13587c478bd9Sstevel@tonic-gate ASSERT((nb & DEV_BMASK) == 0); 13597c478bd9Sstevel@tonic-gate ul->un_logmap->mtm_ref = 1; 13607c478bd9Sstevel@tonic-gate 13617c478bd9Sstevel@tonic-gate /* 13627c478bd9Sstevel@tonic-gate * if there are deltas, move into log 13637c478bd9Sstevel@tonic-gate */ 13647c478bd9Sstevel@tonic-gate me = deltamap_remove(ul->un_deltamap, mof, nb); 13657c478bd9Sstevel@tonic-gate if (me) { 13667c478bd9Sstevel@tonic-gate 13677c478bd9Sstevel@tonic-gate va = bp_mapin_common(bp, VM_SLEEP); 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) || 13707c478bd9Sstevel@tonic-gate (ul->un_matamap == NULL)|| 13717c478bd9Sstevel@tonic-gate matamap_within(ul->un_matamap, mof, nb)); 13727c478bd9Sstevel@tonic-gate 13737c478bd9Sstevel@tonic-gate /* 13747c478bd9Sstevel@tonic-gate * move to logmap 13757c478bd9Sstevel@tonic-gate */ 13767c478bd9Sstevel@tonic-gate if (ufs_crb_enable) { 13777c478bd9Sstevel@tonic-gate logmap_add_buf(ul, va, mof, me, 13787c478bd9Sstevel@tonic-gate bp->b_un.b_addr, nb); 13797c478bd9Sstevel@tonic-gate } else { 13807c478bd9Sstevel@tonic-gate logmap_add(ul, va, mof, me); 13817c478bd9Sstevel@tonic-gate } 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 13847c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 13857c478bd9Sstevel@tonic-gate bp->b_error = EIO; 13867c478bd9Sstevel@tonic-gate } 13877c478bd9Sstevel@tonic-gate biodone(bp); 13887c478bd9Sstevel@tonic-gate return; 13897c478bd9Sstevel@tonic-gate } 13907c478bd9Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 13917c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 13927c478bd9Sstevel@tonic-gate bp->b_error = EIO; 13937c478bd9Sstevel@tonic-gate biodone(bp); 13947c478bd9Sstevel@tonic-gate return; 13957c478bd9Sstevel@tonic-gate } 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate /* 13987c478bd9Sstevel@tonic-gate * Check that we are not updating metadata, or if so then via B_PHYS. 13997c478bd9Sstevel@tonic-gate */ 14007c478bd9Sstevel@tonic-gate ASSERT((ul->un_matamap == NULL) || 14017c478bd9Sstevel@tonic-gate !(matamap_overlap(ul->un_matamap, mof, nb) && 14027c478bd9Sstevel@tonic-gate ((bp->b_flags & B_PHYS) == 0))); 14037c478bd9Sstevel@tonic-gate 1404d3d50737SRafael Vanoni ul->un_ufsvfs->vfs_iotstamp = ddi_get_lbolt(); 14057c478bd9Sstevel@tonic-gate logstats.ls_lwrites.value.ui64++; 14067c478bd9Sstevel@tonic-gate 14077c478bd9Sstevel@tonic-gate /* If snapshots are enabled, write through the snapshot driver */ 14087c478bd9Sstevel@tonic-gate if (ul->un_ufsvfs->vfs_snapshot) 14097c478bd9Sstevel@tonic-gate fssnap_strategy(&ul->un_ufsvfs->vfs_snapshot, bp); 14107c478bd9Sstevel@tonic-gate else 14117c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 14127c478bd9Sstevel@tonic-gate 14137c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_OUBLK, 1); 14147c478bd9Sstevel@tonic-gate } 14157c478bd9Sstevel@tonic-gate 14167c478bd9Sstevel@tonic-gate void 14177c478bd9Sstevel@tonic-gate lufs_strategy(ml_unit_t *ul, buf_t *bp) 14187c478bd9Sstevel@tonic-gate { 14197c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) 14207c478bd9Sstevel@tonic-gate lufs_read_strategy(ul, bp); 14217c478bd9Sstevel@tonic-gate else 14227c478bd9Sstevel@tonic-gate lufs_write_strategy(ul, bp); 14237c478bd9Sstevel@tonic-gate } 14247c478bd9Sstevel@tonic-gate 14257c478bd9Sstevel@tonic-gate /* ARGSUSED */ 14267c478bd9Sstevel@tonic-gate static int 14277c478bd9Sstevel@tonic-gate delta_stats_update(kstat_t *ksp, int rw) 14287c478bd9Sstevel@tonic-gate { 14297c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) { 14307c478bd9Sstevel@tonic-gate delta_stats[DT_SB] = dkstats.ds_superblock_deltas.value.ui64; 14317c478bd9Sstevel@tonic-gate delta_stats[DT_CG] = dkstats.ds_bitmap_deltas.value.ui64; 14327c478bd9Sstevel@tonic-gate delta_stats[DT_SI] = dkstats.ds_suminfo_deltas.value.ui64; 14337c478bd9Sstevel@tonic-gate delta_stats[DT_AB] = dkstats.ds_allocblk_deltas.value.ui64; 14347c478bd9Sstevel@tonic-gate delta_stats[DT_ABZERO] = dkstats.ds_ab0_deltas.value.ui64; 14357c478bd9Sstevel@tonic-gate delta_stats[DT_DIR] = dkstats.ds_dir_deltas.value.ui64; 14367c478bd9Sstevel@tonic-gate delta_stats[DT_INODE] = dkstats.ds_inode_deltas.value.ui64; 14377c478bd9Sstevel@tonic-gate delta_stats[DT_FBI] = dkstats.ds_fbiwrite_deltas.value.ui64; 14387c478bd9Sstevel@tonic-gate delta_stats[DT_QR] = dkstats.ds_quota_deltas.value.ui64; 14397c478bd9Sstevel@tonic-gate delta_stats[DT_SHAD] = dkstats.ds_shadow_deltas.value.ui64; 14407c478bd9Sstevel@tonic-gate 14417c478bd9Sstevel@tonic-gate roll_stats[DT_SB] = dkstats.ds_superblock_rolled.value.ui64; 14427c478bd9Sstevel@tonic-gate roll_stats[DT_CG] = dkstats.ds_bitmap_rolled.value.ui64; 14437c478bd9Sstevel@tonic-gate roll_stats[DT_SI] = dkstats.ds_suminfo_rolled.value.ui64; 14447c478bd9Sstevel@tonic-gate roll_stats[DT_AB] = dkstats.ds_allocblk_rolled.value.ui64; 14457c478bd9Sstevel@tonic-gate roll_stats[DT_ABZERO] = dkstats.ds_ab0_rolled.value.ui64; 14467c478bd9Sstevel@tonic-gate roll_stats[DT_DIR] = dkstats.ds_dir_rolled.value.ui64; 14477c478bd9Sstevel@tonic-gate roll_stats[DT_INODE] = dkstats.ds_inode_rolled.value.ui64; 14487c478bd9Sstevel@tonic-gate roll_stats[DT_FBI] = dkstats.ds_fbiwrite_rolled.value.ui64; 14497c478bd9Sstevel@tonic-gate roll_stats[DT_QR] = dkstats.ds_quota_rolled.value.ui64; 14507c478bd9Sstevel@tonic-gate roll_stats[DT_SHAD] = dkstats.ds_shadow_rolled.value.ui64; 14517c478bd9Sstevel@tonic-gate } else { 14527c478bd9Sstevel@tonic-gate dkstats.ds_superblock_deltas.value.ui64 = delta_stats[DT_SB]; 14537c478bd9Sstevel@tonic-gate dkstats.ds_bitmap_deltas.value.ui64 = delta_stats[DT_CG]; 14547c478bd9Sstevel@tonic-gate dkstats.ds_suminfo_deltas.value.ui64 = delta_stats[DT_SI]; 14557c478bd9Sstevel@tonic-gate dkstats.ds_allocblk_deltas.value.ui64 = delta_stats[DT_AB]; 14567c478bd9Sstevel@tonic-gate dkstats.ds_ab0_deltas.value.ui64 = delta_stats[DT_ABZERO]; 14577c478bd9Sstevel@tonic-gate dkstats.ds_dir_deltas.value.ui64 = delta_stats[DT_DIR]; 14587c478bd9Sstevel@tonic-gate dkstats.ds_inode_deltas.value.ui64 = delta_stats[DT_INODE]; 14597c478bd9Sstevel@tonic-gate dkstats.ds_fbiwrite_deltas.value.ui64 = delta_stats[DT_FBI]; 14607c478bd9Sstevel@tonic-gate dkstats.ds_quota_deltas.value.ui64 = delta_stats[DT_QR]; 14617c478bd9Sstevel@tonic-gate dkstats.ds_shadow_deltas.value.ui64 = delta_stats[DT_SHAD]; 14627c478bd9Sstevel@tonic-gate 14637c478bd9Sstevel@tonic-gate dkstats.ds_superblock_rolled.value.ui64 = roll_stats[DT_SB]; 14647c478bd9Sstevel@tonic-gate dkstats.ds_bitmap_rolled.value.ui64 = roll_stats[DT_CG]; 14657c478bd9Sstevel@tonic-gate dkstats.ds_suminfo_rolled.value.ui64 = roll_stats[DT_SI]; 14667c478bd9Sstevel@tonic-gate dkstats.ds_allocblk_rolled.value.ui64 = roll_stats[DT_AB]; 14677c478bd9Sstevel@tonic-gate dkstats.ds_ab0_rolled.value.ui64 = roll_stats[DT_ABZERO]; 14687c478bd9Sstevel@tonic-gate dkstats.ds_dir_rolled.value.ui64 = roll_stats[DT_DIR]; 14697c478bd9Sstevel@tonic-gate dkstats.ds_inode_rolled.value.ui64 = roll_stats[DT_INODE]; 14707c478bd9Sstevel@tonic-gate dkstats.ds_fbiwrite_rolled.value.ui64 = roll_stats[DT_FBI]; 14717c478bd9Sstevel@tonic-gate dkstats.ds_quota_rolled.value.ui64 = roll_stats[DT_QR]; 14727c478bd9Sstevel@tonic-gate dkstats.ds_shadow_rolled.value.ui64 = roll_stats[DT_SHAD]; 14737c478bd9Sstevel@tonic-gate } 14747c478bd9Sstevel@tonic-gate return (0); 14757c478bd9Sstevel@tonic-gate } 14767c478bd9Sstevel@tonic-gate 14777c478bd9Sstevel@tonic-gate extern size_t ufs_crb_limit; 14787c478bd9Sstevel@tonic-gate extern int ufs_max_crb_divisor; 14797c478bd9Sstevel@tonic-gate 14807c478bd9Sstevel@tonic-gate void 14817c478bd9Sstevel@tonic-gate lufs_init(void) 14827c478bd9Sstevel@tonic-gate { 14837c478bd9Sstevel@tonic-gate kstat_t *ksp; 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate /* Create kmem caches */ 14867c478bd9Sstevel@tonic-gate lufs_sv = kmem_cache_create("lufs_save", sizeof (lufs_save_t), 0, 14877c478bd9Sstevel@tonic-gate NULL, NULL, NULL, NULL, NULL, 0); 14887c478bd9Sstevel@tonic-gate lufs_bp = kmem_cache_create("lufs_bufs", sizeof (lufs_buf_t), 0, 14897c478bd9Sstevel@tonic-gate NULL, NULL, NULL, NULL, NULL, 0); 14907c478bd9Sstevel@tonic-gate 14917c478bd9Sstevel@tonic-gate mutex_init(&log_mutex, NULL, MUTEX_DEFAULT, NULL); 14927c478bd9Sstevel@tonic-gate 14937c478bd9Sstevel@tonic-gate _init_top(); 14947c478bd9Sstevel@tonic-gate 1495683b2949SRichard Lowe if (bio_lufs_strategy == NULL) 14967c478bd9Sstevel@tonic-gate bio_lufs_strategy = (void (*) (void *, buf_t *)) lufs_strategy; 14977c478bd9Sstevel@tonic-gate 14987c478bd9Sstevel@tonic-gate /* 14997c478bd9Sstevel@tonic-gate * Initialise general logging and delta kstats 15007c478bd9Sstevel@tonic-gate */ 15017c478bd9Sstevel@tonic-gate ksp = kstat_create("ufs_log", 0, "logstats", "ufs", KSTAT_TYPE_NAMED, 15027c478bd9Sstevel@tonic-gate sizeof (logstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 15037c478bd9Sstevel@tonic-gate if (ksp) { 15047c478bd9Sstevel@tonic-gate ksp->ks_data = (void *) &logstats; 15057c478bd9Sstevel@tonic-gate kstat_install(ksp); 15067c478bd9Sstevel@tonic-gate } 15077c478bd9Sstevel@tonic-gate 15087c478bd9Sstevel@tonic-gate ksp = kstat_create("ufs_log", 0, "deltastats", "ufs", KSTAT_TYPE_NAMED, 15097c478bd9Sstevel@tonic-gate sizeof (dkstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 15107c478bd9Sstevel@tonic-gate if (ksp) { 15117c478bd9Sstevel@tonic-gate ksp->ks_data = (void *) &dkstats; 15127c478bd9Sstevel@tonic-gate ksp->ks_update = delta_stats_update; 15137c478bd9Sstevel@tonic-gate kstat_install(ksp); 15147c478bd9Sstevel@tonic-gate } 15157c478bd9Sstevel@tonic-gate 15164f3979a5SWolfgang Schremser /* Initialize generation of logging ids */ 15174f3979a5SWolfgang Schremser lufs_genid_init(); 15184f3979a5SWolfgang Schremser 15197c478bd9Sstevel@tonic-gate /* 15207c478bd9Sstevel@tonic-gate * Set up the maximum amount of kmem that the crbs (system wide) 15217c478bd9Sstevel@tonic-gate * can use. 15227c478bd9Sstevel@tonic-gate */ 15237c478bd9Sstevel@tonic-gate ufs_crb_limit = kmem_maxavail() / ufs_max_crb_divisor; 15247c478bd9Sstevel@tonic-gate } 1525