1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <sys/types.h> 28 #include <sys/vnode.h> 29 #include <sys/buf.h> 30 #include <sys/errno.h> 31 #include <sys/fssnap_if.h> 32 #include <sys/fs/ufs_inode.h> 33 #include <sys/fs/ufs_filio.h> 34 #include <sys/sysmacros.h> 35 #include <sys/modctl.h> 36 #include <sys/fs/ufs_log.h> 37 #include <sys/fs/ufs_bio.h> 38 #include <sys/fs/ufs_fsdir.h> 39 #include <sys/debug.h> 40 #include <sys/atomic.h> 41 #include <sys/kmem.h> 42 #include <sys/inttypes.h> 43 #include <sys/vfs.h> 44 #include <sys/mntent.h> 45 #include <sys/conf.h> 46 #include <sys/param.h> 47 #include <sys/kstat.h> 48 #include <sys/cmn_err.h> 49 #include <sys/sdt.h> 50 51 #define LUFS_GENID_PRIME UINT64_C(4294967291) 52 #define LUFS_GENID_BASE UINT64_C(311) 53 #define LUFS_NEXT_ID(id) ((uint32_t)(((id) * LUFS_GENID_BASE) % \ 54 LUFS_GENID_PRIME)) 55 56 extern kmutex_t ufs_scan_lock; 57 58 static kmutex_t log_mutex; /* general purpose log layer lock */ 59 kmutex_t ml_scan; /* Scan thread syncronization */ 60 kcondvar_t ml_scan_cv; /* Scan thread syncronization */ 61 62 struct kmem_cache *lufs_sv; 63 struct kmem_cache *lufs_bp; 64 65 /* Tunables */ 66 uint_t ldl_maxlogsize = LDL_MAXLOGSIZE; 67 uint_t ldl_minlogsize = LDL_MINLOGSIZE; 68 uint32_t ldl_divisor = LDL_DIVISOR; 69 uint32_t ldl_mintransfer = LDL_MINTRANSFER; 70 uint32_t ldl_maxtransfer = LDL_MAXTRANSFER; 71 uint32_t ldl_minbufsize = LDL_MINBUFSIZE; 72 73 /* Generation of header ids */ 74 static kmutex_t genid_mutex; 75 static uint32_t last_loghead_ident = UINT32_C(0); 76 77 /* 78 * Logging delta and roll statistics 79 */ 80 struct delta_kstats { 81 kstat_named_t ds_superblock_deltas; 82 kstat_named_t ds_bitmap_deltas; 83 kstat_named_t ds_suminfo_deltas; 84 kstat_named_t ds_allocblk_deltas; 85 kstat_named_t ds_ab0_deltas; 86 kstat_named_t ds_dir_deltas; 87 kstat_named_t ds_inode_deltas; 88 kstat_named_t ds_fbiwrite_deltas; 89 kstat_named_t ds_quota_deltas; 90 kstat_named_t ds_shadow_deltas; 91 92 kstat_named_t ds_superblock_rolled; 93 kstat_named_t ds_bitmap_rolled; 94 kstat_named_t ds_suminfo_rolled; 95 kstat_named_t ds_allocblk_rolled; 96 kstat_named_t ds_ab0_rolled; 97 kstat_named_t ds_dir_rolled; 98 kstat_named_t ds_inode_rolled; 99 kstat_named_t ds_fbiwrite_rolled; 100 kstat_named_t ds_quota_rolled; 101 kstat_named_t ds_shadow_rolled; 102 } dkstats = { 103 { "superblock_deltas", KSTAT_DATA_UINT64 }, 104 { "bitmap_deltas", KSTAT_DATA_UINT64 }, 105 { "suminfo_deltas", KSTAT_DATA_UINT64 }, 106 { "allocblk_deltas", KSTAT_DATA_UINT64 }, 107 { "ab0_deltas", KSTAT_DATA_UINT64 }, 108 { "dir_deltas", KSTAT_DATA_UINT64 }, 109 { "inode_deltas", KSTAT_DATA_UINT64 }, 110 { "fbiwrite_deltas", KSTAT_DATA_UINT64 }, 111 { "quota_deltas", KSTAT_DATA_UINT64 }, 112 { "shadow_deltas", KSTAT_DATA_UINT64 }, 113 114 { "superblock_rolled", KSTAT_DATA_UINT64 }, 115 { "bitmap_rolled", KSTAT_DATA_UINT64 }, 116 { "suminfo_rolled", KSTAT_DATA_UINT64 }, 117 { "allocblk_rolled", KSTAT_DATA_UINT64 }, 118 { "ab0_rolled", KSTAT_DATA_UINT64 }, 119 { "dir_rolled", KSTAT_DATA_UINT64 }, 120 { "inode_rolled", KSTAT_DATA_UINT64 }, 121 { "fbiwrite_rolled", KSTAT_DATA_UINT64 }, 122 { "quota_rolled", KSTAT_DATA_UINT64 }, 123 { "shadow_rolled", KSTAT_DATA_UINT64 } 124 }; 125 126 uint64_t delta_stats[DT_MAX]; 127 uint64_t roll_stats[DT_MAX]; 128 129 /* 130 * General logging kstats 131 */ 132 struct logstats logstats = { 133 { "master_reads", KSTAT_DATA_UINT64 }, 134 { "master_writes", KSTAT_DATA_UINT64 }, 135 { "log_reads_inmem", KSTAT_DATA_UINT64 }, 136 { "log_reads", KSTAT_DATA_UINT64 }, 137 { "log_writes", KSTAT_DATA_UINT64 }, 138 { "log_master_reads", KSTAT_DATA_UINT64 }, 139 { "log_roll_reads", KSTAT_DATA_UINT64 }, 140 { "log_roll_writes", KSTAT_DATA_UINT64 } 141 }; 142 143 int 144 trans_not_done(struct buf *cb) 145 { 146 sema_v(&cb->b_io); 147 return (0); 148 } 149 150 static void 151 trans_wait_panic(struct buf *cb) 152 { 153 while ((cb->b_flags & B_DONE) == 0) 154 drv_usecwait(10); 155 } 156 157 int 158 trans_not_wait(struct buf *cb) 159 { 160 /* 161 * In case of panic, busy wait for completion 162 */ 163 if (panicstr) 164 trans_wait_panic(cb); 165 else 166 sema_p(&cb->b_io); 167 168 return (geterror(cb)); 169 } 170 171 int 172 trans_wait(struct buf *cb) 173 { 174 /* 175 * In case of panic, busy wait for completion and run md daemon queues 176 */ 177 if (panicstr) 178 trans_wait_panic(cb); 179 return (biowait(cb)); 180 } 181 182 static void 183 setsum(int32_t *sp, int32_t *lp, int nb) 184 { 185 int32_t csum = 0; 186 187 *sp = 0; 188 nb /= sizeof (int32_t); 189 while (nb--) 190 csum += *lp++; 191 *sp = csum; 192 } 193 194 static int 195 checksum(int32_t *sp, int32_t *lp, int nb) 196 { 197 int32_t ssum = *sp; 198 199 setsum(sp, lp, nb); 200 if (ssum != *sp) { 201 *sp = ssum; 202 return (0); 203 } 204 return (1); 205 } 206 207 void 208 lufs_unsnarf(ufsvfs_t *ufsvfsp) 209 { 210 ml_unit_t *ul; 211 mt_map_t *mtm; 212 213 ul = ufsvfsp->vfs_log; 214 if (ul == NULL) 215 return; 216 217 mtm = ul->un_logmap; 218 219 /* 220 * Wait for a pending top_issue_sync which is 221 * dispatched (via taskq_dispatch()) but hasnt completed yet. 222 */ 223 224 mutex_enter(&mtm->mtm_lock); 225 226 while (mtm->mtm_taskq_sync_count != 0) { 227 cv_wait(&mtm->mtm_cv, &mtm->mtm_lock); 228 } 229 230 mutex_exit(&mtm->mtm_lock); 231 232 /* Roll committed transactions */ 233 logmap_roll_dev(ul); 234 235 /* Kill the roll thread */ 236 logmap_kill_roll(ul); 237 238 /* release saved alloction info */ 239 if (ul->un_ebp) 240 kmem_free(ul->un_ebp, ul->un_nbeb); 241 242 /* release circular bufs */ 243 free_cirbuf(&ul->un_rdbuf); 244 free_cirbuf(&ul->un_wrbuf); 245 246 /* release maps */ 247 if (ul->un_logmap) 248 ul->un_logmap = map_put(ul->un_logmap); 249 if (ul->un_deltamap) 250 ul->un_deltamap = map_put(ul->un_deltamap); 251 if (ul->un_matamap) 252 ul->un_matamap = map_put(ul->un_matamap); 253 254 mutex_destroy(&ul->un_log_mutex); 255 mutex_destroy(&ul->un_state_mutex); 256 257 /* release state buffer MUST BE LAST!! (contains our ondisk data) */ 258 if (ul->un_bp) 259 brelse(ul->un_bp); 260 kmem_free(ul, sizeof (*ul)); 261 262 ufsvfsp->vfs_log = NULL; 263 } 264 265 int 266 lufs_snarf(ufsvfs_t *ufsvfsp, struct fs *fs, int ronly) 267 { 268 buf_t *bp, *tbp; 269 ml_unit_t *ul; 270 extent_block_t *ebp; 271 ic_extent_block_t *nebp; 272 size_t nb; 273 daddr_t bno; /* in disk blocks */ 274 int i; 275 276 /* LINTED: warning: logical expression always true: op "||" */ 277 ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE); 278 279 /* 280 * Get the allocation table 281 * During a remount the superblock pointed to by the ufsvfsp 282 * is out of date. Hence the need for the ``new'' superblock 283 * pointer, fs, passed in as a parameter. 284 */ 285 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, fs->fs_logbno), 286 fs->fs_bsize); 287 if (bp->b_flags & B_ERROR) { 288 brelse(bp); 289 return (EIO); 290 } 291 ebp = (void *)bp->b_un.b_addr; 292 if (!checksum(&ebp->chksum, (int32_t *)bp->b_un.b_addr, 293 fs->fs_bsize)) { 294 brelse(bp); 295 return (ENODEV); 296 } 297 298 /* 299 * It is possible to get log blocks with all zeros. 300 * We should also check for nextents to be zero in such case. 301 */ 302 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) { 303 brelse(bp); 304 return (EDOM); 305 } 306 /* 307 * Put allocation into memory. This requires conversion between 308 * on the ondisk format of the extent (type extent_t) and the 309 * in-core format of the extent (type ic_extent_t). The 310 * difference is the in-core form of the extent block stores 311 * the physical offset of the extent in disk blocks, which 312 * can require more than a 32-bit field. 313 */ 314 nb = (size_t)(sizeof (ic_extent_block_t) + 315 ((ebp->nextents - 1) * sizeof (ic_extent_t))); 316 nebp = kmem_alloc(nb, KM_SLEEP); 317 nebp->ic_nextents = ebp->nextents; 318 nebp->ic_nbytes = ebp->nbytes; 319 nebp->ic_nextbno = ebp->nextbno; 320 for (i = 0; i < ebp->nextents; i++) { 321 nebp->ic_extents[i].ic_lbno = ebp->extents[i].lbno; 322 nebp->ic_extents[i].ic_nbno = ebp->extents[i].nbno; 323 nebp->ic_extents[i].ic_pbno = 324 logbtodb(fs, ebp->extents[i].pbno); 325 } 326 brelse(bp); 327 328 /* 329 * Get the log state 330 */ 331 bno = nebp->ic_extents[0].ic_pbno; 332 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, DEV_BSIZE); 333 if (bp->b_flags & B_ERROR) { 334 brelse(bp); 335 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno + 1, DEV_BSIZE); 336 if (bp->b_flags & B_ERROR) { 337 brelse(bp); 338 kmem_free(nebp, nb); 339 return (EIO); 340 } 341 } 342 343 /* 344 * Put ondisk struct into an anonymous buffer 345 * This buffer will contain the memory for the ml_odunit struct 346 */ 347 tbp = ngeteblk(dbtob(LS_SECTORS)); 348 tbp->b_edev = bp->b_edev; 349 tbp->b_dev = bp->b_dev; 350 tbp->b_blkno = bno; 351 bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE); 352 bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE); 353 bp->b_flags |= (B_STALE | B_AGE); 354 brelse(bp); 355 bp = tbp; 356 357 /* 358 * Verify the log state 359 * 360 * read/only mounts w/bad logs are allowed. umount will 361 * eventually roll the bad log until the first IO error. 362 * fsck will then repair the file system. 363 * 364 * read/write mounts with bad logs are not allowed. 365 * 366 */ 367 ul = (ml_unit_t *)kmem_zalloc(sizeof (*ul), KM_SLEEP); 368 bcopy(bp->b_un.b_addr, &ul->un_ondisk, sizeof (ml_odunit_t)); 369 if ((ul->un_chksum != ul->un_head_ident + ul->un_tail_ident) || 370 (ul->un_version != LUFS_VERSION_LATEST) || 371 (!ronly && ul->un_badlog)) { 372 kmem_free(ul, sizeof (*ul)); 373 brelse(bp); 374 kmem_free(nebp, nb); 375 return (EIO); 376 } 377 /* 378 * Initialize the incore-only fields 379 */ 380 if (ronly) 381 ul->un_flags |= LDL_NOROLL; 382 ul->un_bp = bp; 383 ul->un_ufsvfs = ufsvfsp; 384 ul->un_dev = ufsvfsp->vfs_dev; 385 ul->un_ebp = nebp; 386 ul->un_nbeb = nb; 387 ul->un_maxresv = btodb(ul->un_logsize) * LDL_USABLE_BSIZE; 388 ul->un_deltamap = map_get(ul, deltamaptype, DELTAMAP_NHASH); 389 ul->un_logmap = map_get(ul, logmaptype, LOGMAP_NHASH); 390 if (ul->un_debug & MT_MATAMAP) 391 ul->un_matamap = map_get(ul, matamaptype, DELTAMAP_NHASH); 392 mutex_init(&ul->un_log_mutex, NULL, MUTEX_DEFAULT, NULL); 393 mutex_init(&ul->un_state_mutex, NULL, MUTEX_DEFAULT, NULL); 394 395 /* 396 * Aquire the ufs_scan_lock before linking the mtm data 397 * structure so that we keep ufs_sync() and ufs_update() away 398 * when they execute the ufs_scan_inodes() run while we're in 399 * progress of enabling/disabling logging. 400 */ 401 mutex_enter(&ufs_scan_lock); 402 ufsvfsp->vfs_log = ul; 403 404 /* remember the state of the log before the log scan */ 405 logmap_logscan(ul); 406 mutex_exit(&ufs_scan_lock); 407 408 /* 409 * Error during scan 410 * 411 * If this is a read/only mount; ignore the error. 412 * At a later time umount/fsck will repair the fs. 413 * 414 */ 415 if (ul->un_flags & LDL_ERROR) { 416 if (!ronly) { 417 /* 418 * Aquire the ufs_scan_lock before de-linking 419 * the mtm data structure so that we keep ufs_sync() 420 * and ufs_update() away when they execute the 421 * ufs_scan_inodes() run while we're in progress of 422 * enabling/disabling logging. 423 */ 424 mutex_enter(&ufs_scan_lock); 425 lufs_unsnarf(ufsvfsp); 426 mutex_exit(&ufs_scan_lock); 427 return (EIO); 428 } 429 ul->un_flags &= ~LDL_ERROR; 430 } 431 if (!ronly) 432 logmap_start_roll(ul); 433 return (0); 434 } 435 436 uint32_t 437 lufs_hd_genid(const ml_unit_t *up) 438 { 439 uint32_t id; 440 441 mutex_enter(&genid_mutex); 442 443 /* 444 * The formula below implements an exponential, modular sequence. 445 * 446 * ID(N) = (SEED * (BASE^N)) % PRIME 447 * 448 * The numbers will be pseudo random. They depend on SEED, BASE, PRIME, 449 * but will sweep through almost all of the range 1....PRIME-1. 450 * Most importantly they will not repeat for PRIME-2 (4294967289) 451 * repetitions. If they would repeat that could possibly cause hangs, 452 * panics at mount/umount and failed mount operations. 453 */ 454 id = LUFS_NEXT_ID(last_loghead_ident); 455 456 /* Checking if new identity used already */ 457 if (up != NULL && up->un_head_ident == id) { 458 DTRACE_PROBE1(head_ident_collision, uint32_t, id); 459 460 /* 461 * The following preserves the algorithm for the fix for 462 * "panic: free: freeing free frag, dev:0x2000000018, blk:34605, 463 * cg:26, ino:148071,". 464 * If the header identities un_head_ident are equal to the 465 * present element in the sequence, the next element of the 466 * sequence is returned instead. 467 */ 468 id = LUFS_NEXT_ID(id); 469 } 470 471 last_loghead_ident = id; 472 473 mutex_exit(&genid_mutex); 474 475 return (id); 476 } 477 478 static void 479 lufs_genid_init(void) 480 { 481 uint64_t seed; 482 483 /* Initialization */ 484 mutex_init(&genid_mutex, NULL, MUTEX_DEFAULT, NULL); 485 486 /* Seed the algorithm */ 487 do { 488 timestruc_t tv; 489 490 gethrestime(&tv); 491 492 seed = (tv.tv_nsec << 3); 493 seed ^= tv.tv_sec; 494 495 last_loghead_ident = (uint32_t)(seed % LUFS_GENID_PRIME); 496 } while (last_loghead_ident == UINT32_C(0)); 497 } 498 499 static int 500 lufs_initialize( 501 ufsvfs_t *ufsvfsp, 502 daddr_t bno, 503 size_t nb, 504 struct fiolog *flp) 505 { 506 ml_odunit_t *ud, *ud2; 507 buf_t *bp; 508 509 /* LINTED: warning: logical expression always true: op "||" */ 510 ASSERT(sizeof (ml_odunit_t) < DEV_BSIZE); 511 ASSERT(nb >= ldl_minlogsize); 512 513 bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, bno, dbtob(LS_SECTORS)); 514 bzero(bp->b_un.b_addr, bp->b_bcount); 515 516 ud = (void *)bp->b_un.b_addr; 517 ud->od_version = LUFS_VERSION_LATEST; 518 ud->od_maxtransfer = MIN(ufsvfsp->vfs_iotransz, ldl_maxtransfer); 519 if (ud->od_maxtransfer < ldl_mintransfer) 520 ud->od_maxtransfer = ldl_mintransfer; 521 ud->od_devbsize = DEV_BSIZE; 522 523 ud->od_requestsize = flp->nbytes_actual; 524 ud->od_statesize = dbtob(LS_SECTORS); 525 ud->od_logsize = nb - ud->od_statesize; 526 527 ud->od_statebno = INT32_C(0); 528 529 ud->od_head_ident = lufs_hd_genid(NULL); 530 ud->od_tail_ident = ud->od_head_ident; 531 ud->od_chksum = ud->od_head_ident + ud->od_tail_ident; 532 533 ud->od_bol_lof = dbtob(ud->od_statebno) + ud->od_statesize; 534 ud->od_eol_lof = ud->od_bol_lof + ud->od_logsize; 535 ud->od_head_lof = ud->od_bol_lof; 536 ud->od_tail_lof = ud->od_bol_lof; 537 538 ASSERT(lufs_initialize_debug(ud)); 539 540 ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE); 541 bcopy(ud, ud2, sizeof (*ud)); 542 543 UFS_BWRITE2(ufsvfsp, bp); 544 if (bp->b_flags & B_ERROR) { 545 brelse(bp); 546 return (EIO); 547 } 548 brelse(bp); 549 550 return (0); 551 } 552 553 /* 554 * Free log space 555 * Assumes the file system is write locked and is not logging 556 */ 557 static int 558 lufs_free(struct ufsvfs *ufsvfsp) 559 { 560 int error = 0, i, j; 561 buf_t *bp = NULL; 562 extent_t *ep; 563 extent_block_t *ebp; 564 struct fs *fs = ufsvfsp->vfs_fs; 565 daddr_t fno; 566 int32_t logbno; 567 long nfno; 568 inode_t *ip = NULL; 569 char clean; 570 571 /* 572 * Nothing to free 573 */ 574 if (fs->fs_logbno == 0) 575 return (0); 576 577 /* 578 * Mark the file system as FSACTIVE and no log but honor the 579 * current value of fs_reclaim. The reclaim thread could have 580 * been active when lufs_disable() was called and if fs_reclaim 581 * is reset to zero here it could lead to lost inodes. 582 */ 583 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 584 mutex_enter(&ufsvfsp->vfs_lock); 585 clean = fs->fs_clean; 586 logbno = fs->fs_logbno; 587 fs->fs_clean = FSACTIVE; 588 fs->fs_logbno = INT32_C(0); 589 ufs_sbwrite(ufsvfsp); 590 mutex_exit(&ufsvfsp->vfs_lock); 591 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 592 if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) { 593 error = EIO; 594 fs->fs_clean = clean; 595 fs->fs_logbno = logbno; 596 goto errout; 597 } 598 599 /* 600 * fetch the allocation block 601 * superblock -> one block of extents -> log data 602 */ 603 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, logbtodb(fs, logbno), 604 fs->fs_bsize); 605 if (bp->b_flags & B_ERROR) { 606 error = EIO; 607 goto errout; 608 } 609 610 /* 611 * Free up the allocated space (dummy inode needed for free()) 612 */ 613 ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO); 614 ebp = (void *)bp->b_un.b_addr; 615 for (i = 0, ep = &ebp->extents[0]; i < ebp->nextents; ++i, ++ep) { 616 fno = logbtofrag(fs, ep->pbno); 617 nfno = dbtofsb(fs, ep->nbno); 618 for (j = 0; j < nfno; j += fs->fs_frag, fno += fs->fs_frag) 619 free(ip, fno, fs->fs_bsize, 0); 620 } 621 free(ip, logbtofrag(fs, logbno), fs->fs_bsize, 0); 622 brelse(bp); 623 bp = NULL; 624 625 /* 626 * Push the metadata dirtied during the allocations 627 */ 628 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 629 sbupdate(ufsvfsp->vfs_vfs); 630 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 631 bflush(ufsvfsp->vfs_dev); 632 error = bfinval(ufsvfsp->vfs_dev, 0); 633 if (error) 634 goto errout; 635 636 /* 637 * Free the dummy inode 638 */ 639 ufs_free_inode(ip); 640 641 return (0); 642 643 errout: 644 /* 645 * Free up all resources 646 */ 647 if (bp) 648 brelse(bp); 649 if (ip) 650 ufs_free_inode(ip); 651 return (error); 652 } 653 654 /* 655 * Allocate log space 656 * Assumes the file system is write locked and is not logging 657 */ 658 static int 659 lufs_alloc(struct ufsvfs *ufsvfsp, struct fiolog *flp, cred_t *cr) 660 { 661 int error = 0; 662 buf_t *bp = NULL; 663 extent_t *ep, *nep; 664 extent_block_t *ebp; 665 struct fs *fs = ufsvfsp->vfs_fs; 666 daddr_t fno; /* in frags */ 667 daddr_t bno; /* in disk blocks */ 668 int32_t logbno = INT32_C(0); /* will be fs_logbno */ 669 struct inode *ip = NULL; 670 size_t nb = flp->nbytes_actual; 671 size_t tb = 0; 672 673 /* 674 * Mark the file system as FSACTIVE 675 */ 676 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 677 mutex_enter(&ufsvfsp->vfs_lock); 678 fs->fs_clean = FSACTIVE; 679 ufs_sbwrite(ufsvfsp); 680 mutex_exit(&ufsvfsp->vfs_lock); 681 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 682 683 /* 684 * Allocate the allocation block (need dummy shadow inode; 685 * we use a shadow inode so the quota sub-system ignores 686 * the block allocations.) 687 * superblock -> one block of extents -> log data 688 */ 689 ip = ufs_alloc_inode(ufsvfsp, UFSROOTINO); 690 ip->i_mode = IFSHAD; /* make the dummy a shadow inode */ 691 rw_enter(&ip->i_contents, RW_WRITER); 692 fno = contigpref(ufsvfsp, nb + fs->fs_bsize); 693 error = alloc(ip, fno, fs->fs_bsize, &fno, cr); 694 if (error) 695 goto errout; 696 bno = fsbtodb(fs, fno); 697 698 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, bno, fs->fs_bsize); 699 if (bp->b_flags & B_ERROR) { 700 error = EIO; 701 goto errout; 702 } 703 704 ebp = (void *)bp->b_un.b_addr; 705 ebp->type = LUFS_EXTENTS; 706 ebp->nextbno = UINT32_C(0); 707 ebp->nextents = UINT32_C(0); 708 ebp->chksum = INT32_C(0); 709 if (fs->fs_magic == FS_MAGIC) 710 logbno = bno; 711 else 712 logbno = dbtofsb(fs, bno); 713 714 /* 715 * Initialize the first extent 716 */ 717 ep = &ebp->extents[0]; 718 error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr); 719 if (error) 720 goto errout; 721 bno = fsbtodb(fs, fno); 722 723 ep->lbno = UINT32_C(0); 724 if (fs->fs_magic == FS_MAGIC) 725 ep->pbno = (uint32_t)bno; 726 else 727 ep->pbno = (uint32_t)fno; 728 ep->nbno = (uint32_t)fsbtodb(fs, fs->fs_frag); 729 ebp->nextents = UINT32_C(1); 730 tb = fs->fs_bsize; 731 nb -= fs->fs_bsize; 732 733 while (nb) { 734 error = alloc(ip, fno + fs->fs_frag, fs->fs_bsize, &fno, cr); 735 if (error) { 736 if (tb < ldl_minlogsize) 737 goto errout; 738 error = 0; 739 break; 740 } 741 bno = fsbtodb(fs, fno); 742 if ((daddr_t)((logbtodb(fs, ep->pbno) + ep->nbno) == bno)) 743 ep->nbno += (uint32_t)(fsbtodb(fs, fs->fs_frag)); 744 else { 745 nep = ep + 1; 746 if ((caddr_t)(nep + 1) > 747 (bp->b_un.b_addr + fs->fs_bsize)) { 748 free(ip, fno, fs->fs_bsize, 0); 749 break; 750 } 751 nep->lbno = ep->lbno + ep->nbno; 752 if (fs->fs_magic == FS_MAGIC) 753 nep->pbno = (uint32_t)bno; 754 else 755 nep->pbno = (uint32_t)fno; 756 nep->nbno = (uint32_t)(fsbtodb(fs, fs->fs_frag)); 757 ebp->nextents++; 758 ep = nep; 759 } 760 tb += fs->fs_bsize; 761 nb -= fs->fs_bsize; 762 } 763 ebp->nbytes = (uint32_t)tb; 764 setsum(&ebp->chksum, (int32_t *)bp->b_un.b_addr, fs->fs_bsize); 765 UFS_BWRITE2(ufsvfsp, bp); 766 if (bp->b_flags & B_ERROR) { 767 error = EIO; 768 goto errout; 769 } 770 /* 771 * Initialize the first two sectors of the log 772 */ 773 error = lufs_initialize(ufsvfsp, logbtodb(fs, ebp->extents[0].pbno), 774 tb, flp); 775 if (error) 776 goto errout; 777 778 /* 779 * We are done initializing the allocation block and the log 780 */ 781 brelse(bp); 782 bp = NULL; 783 784 /* 785 * Update the superblock and push the dirty metadata 786 */ 787 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 788 sbupdate(ufsvfsp->vfs_vfs); 789 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 790 bflush(ufsvfsp->vfs_dev); 791 error = bfinval(ufsvfsp->vfs_dev, 1); 792 if (error) 793 goto errout; 794 if (ufsvfsp->vfs_bufp->b_flags & B_ERROR) { 795 error = EIO; 796 goto errout; 797 } 798 799 /* 800 * Everything is safely on disk; update log space pointer in sb 801 */ 802 ufsvfsp->vfs_ulockfs.ul_sbowner = curthread; 803 mutex_enter(&ufsvfsp->vfs_lock); 804 fs->fs_logbno = (uint32_t)logbno; 805 ufs_sbwrite(ufsvfsp); 806 mutex_exit(&ufsvfsp->vfs_lock); 807 ufsvfsp->vfs_ulockfs.ul_sbowner = (kthread_id_t)-1; 808 809 /* 810 * Free the dummy inode 811 */ 812 rw_exit(&ip->i_contents); 813 ufs_free_inode(ip); 814 815 /* inform user of real log size */ 816 flp->nbytes_actual = tb; 817 return (0); 818 819 errout: 820 /* 821 * Free all resources 822 */ 823 if (bp) 824 brelse(bp); 825 if (logbno) { 826 fs->fs_logbno = logbno; 827 (void) lufs_free(ufsvfsp); 828 } 829 if (ip) { 830 rw_exit(&ip->i_contents); 831 ufs_free_inode(ip); 832 } 833 return (error); 834 } 835 836 /* 837 * Disable logging 838 */ 839 int 840 lufs_disable(vnode_t *vp, struct fiolog *flp) 841 { 842 int error = 0; 843 inode_t *ip = VTOI(vp); 844 ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 845 struct fs *fs = ufsvfsp->vfs_fs; 846 struct lockfs lf; 847 struct ulockfs *ulp; 848 849 flp->error = FIOLOG_ENONE; 850 851 /* 852 * Logging is already disabled; done 853 */ 854 if (fs->fs_logbno == 0 || ufsvfsp->vfs_log == NULL) 855 return (0); 856 857 /* 858 * Readonly file system 859 */ 860 if (fs->fs_ronly) { 861 flp->error = FIOLOG_EROFS; 862 return (0); 863 } 864 865 /* 866 * File system must be write locked to disable logging 867 */ 868 error = ufs_fiolfss(vp, &lf); 869 if (error) { 870 return (error); 871 } 872 if (!LOCKFS_IS_ULOCK(&lf)) { 873 flp->error = FIOLOG_EULOCK; 874 return (0); 875 } 876 lf.lf_lock = LOCKFS_WLOCK; 877 lf.lf_flags = 0; 878 lf.lf_comment = NULL; 879 error = ufs_fiolfs(vp, &lf, 1); 880 if (error) { 881 flp->error = FIOLOG_EWLOCK; 882 return (0); 883 } 884 885 if (ufsvfsp->vfs_log == NULL || fs->fs_logbno == 0) 886 goto errout; 887 888 /* 889 * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT 890 */ 891 892 /* 893 * Disable logging: 894 * Suspend the reclaim thread and force the delete thread to exit. 895 * When a nologging mount has completed there may still be 896 * work for reclaim to do so just suspend this thread until 897 * it's [deadlock-] safe for it to continue. The delete 898 * thread won't be needed as ufs_iinactive() calls 899 * ufs_delete() when logging is disabled. 900 * Freeze and drain reader ops. 901 * Commit any outstanding reader transactions (ufs_flush). 902 * Set the ``unmounted'' bit in the ufstrans struct. 903 * If debug, remove metadata from matamap. 904 * Disable matamap processing. 905 * NULL the trans ops table. 906 * Free all of the incore structs related to logging. 907 * Allow reader ops. 908 */ 909 ufs_thread_suspend(&ufsvfsp->vfs_reclaim); 910 ufs_thread_exit(&ufsvfsp->vfs_delete); 911 912 vfs_lock_wait(ufsvfsp->vfs_vfs); 913 ulp = &ufsvfsp->vfs_ulockfs; 914 mutex_enter(&ulp->ul_lock); 915 atomic_add_long(&ufs_quiesce_pend, 1); 916 (void) ufs_quiesce(ulp); 917 918 (void) ufs_flush(ufsvfsp->vfs_vfs); 919 920 TRANS_MATA_UMOUNT(ufsvfsp); 921 ufsvfsp->vfs_domatamap = 0; 922 923 /* 924 * Free all of the incore structs 925 * Aquire the ufs_scan_lock before de-linking the mtm data 926 * structure so that we keep ufs_sync() and ufs_update() away 927 * when they execute the ufs_scan_inodes() run while we're in 928 * progress of enabling/disabling logging. 929 */ 930 mutex_enter(&ufs_scan_lock); 931 (void) lufs_unsnarf(ufsvfsp); 932 mutex_exit(&ufs_scan_lock); 933 934 atomic_add_long(&ufs_quiesce_pend, -1); 935 mutex_exit(&ulp->ul_lock); 936 vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0); 937 vfs_unlock(ufsvfsp->vfs_vfs); 938 939 fs->fs_rolled = FS_ALL_ROLLED; 940 ufsvfsp->vfs_nolog_si = 0; 941 942 /* 943 * Free the log space and mark the superblock as FSACTIVE 944 */ 945 (void) lufs_free(ufsvfsp); 946 947 /* 948 * Allow the reclaim thread to continue. 949 */ 950 ufs_thread_continue(&ufsvfsp->vfs_reclaim); 951 952 /* 953 * Unlock the file system 954 */ 955 lf.lf_lock = LOCKFS_ULOCK; 956 lf.lf_flags = 0; 957 error = ufs_fiolfs(vp, &lf, 1); 958 if (error) 959 flp->error = FIOLOG_ENOULOCK; 960 961 return (0); 962 963 errout: 964 lf.lf_lock = LOCKFS_ULOCK; 965 lf.lf_flags = 0; 966 (void) ufs_fiolfs(vp, &lf, 1); 967 return (error); 968 } 969 970 /* 971 * Enable logging 972 */ 973 int 974 lufs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr) 975 { 976 int error; 977 int reclaim; 978 inode_t *ip = VTOI(vp); 979 ufsvfs_t *ufsvfsp = ip->i_ufsvfs; 980 struct fs *fs; 981 ml_unit_t *ul; 982 struct lockfs lf; 983 struct ulockfs *ulp; 984 vfs_t *vfsp = ufsvfsp->vfs_vfs; 985 uint64_t tmp_nbytes_actual; 986 987 /* 988 * Check if logging is already enabled 989 */ 990 if (ufsvfsp->vfs_log) { 991 flp->error = FIOLOG_ETRANS; 992 /* for root ensure logging option is set */ 993 vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); 994 return (0); 995 } 996 fs = ufsvfsp->vfs_fs; 997 998 /* 999 * Come back here to recheck if we had to disable the log. 1000 */ 1001 recheck: 1002 error = 0; 1003 reclaim = 0; 1004 flp->error = FIOLOG_ENONE; 1005 1006 /* 1007 * Adjust requested log size 1008 */ 1009 flp->nbytes_actual = flp->nbytes_requested; 1010 if (flp->nbytes_actual == 0) { 1011 tmp_nbytes_actual = 1012 (((uint64_t)fs->fs_size) / ldl_divisor) << fs->fs_fshift; 1013 flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX); 1014 } 1015 flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize); 1016 flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize); 1017 flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual); 1018 1019 /* 1020 * logging is enabled and the log is the right size; done 1021 */ 1022 ul = ufsvfsp->vfs_log; 1023 if (ul && fs->fs_logbno && (flp->nbytes_actual == ul->un_requestsize)) 1024 return (0); 1025 1026 /* 1027 * Readonly file system 1028 */ 1029 if (fs->fs_ronly) { 1030 flp->error = FIOLOG_EROFS; 1031 return (0); 1032 } 1033 1034 /* 1035 * File system must be write locked to enable logging 1036 */ 1037 error = ufs_fiolfss(vp, &lf); 1038 if (error) { 1039 return (error); 1040 } 1041 if (!LOCKFS_IS_ULOCK(&lf)) { 1042 flp->error = FIOLOG_EULOCK; 1043 return (0); 1044 } 1045 lf.lf_lock = LOCKFS_WLOCK; 1046 lf.lf_flags = 0; 1047 lf.lf_comment = NULL; 1048 error = ufs_fiolfs(vp, &lf, 1); 1049 if (error) { 1050 flp->error = FIOLOG_EWLOCK; 1051 return (0); 1052 } 1053 1054 /* 1055 * Grab appropriate locks to synchronize with the rest 1056 * of the system 1057 */ 1058 vfs_lock_wait(vfsp); 1059 ulp = &ufsvfsp->vfs_ulockfs; 1060 mutex_enter(&ulp->ul_lock); 1061 1062 /* 1063 * File system must be fairly consistent to enable logging 1064 */ 1065 if (fs->fs_clean != FSLOG && 1066 fs->fs_clean != FSACTIVE && 1067 fs->fs_clean != FSSTABLE && 1068 fs->fs_clean != FSCLEAN) { 1069 flp->error = FIOLOG_ECLEAN; 1070 goto unlockout; 1071 } 1072 1073 /* 1074 * A write-locked file system is only active if there are 1075 * open deleted files; so remember to set FS_RECLAIM later. 1076 */ 1077 if (fs->fs_clean == FSACTIVE) 1078 reclaim = FS_RECLAIM; 1079 1080 /* 1081 * Logging is already enabled; must be changing the log's size 1082 */ 1083 if (fs->fs_logbno && ufsvfsp->vfs_log) { 1084 /* 1085 * Before we can disable logging, we must give up our 1086 * lock. As a consequence of unlocking and disabling the 1087 * log, the fs structure may change. Because of this, when 1088 * disabling is complete, we will go back to recheck to 1089 * repeat all of the checks that we performed to get to 1090 * this point. Disabling sets fs->fs_logbno to 0, so this 1091 * will not put us into an infinite loop. 1092 */ 1093 mutex_exit(&ulp->ul_lock); 1094 vfs_unlock(vfsp); 1095 1096 lf.lf_lock = LOCKFS_ULOCK; 1097 lf.lf_flags = 0; 1098 error = ufs_fiolfs(vp, &lf, 1); 1099 if (error) { 1100 flp->error = FIOLOG_ENOULOCK; 1101 return (0); 1102 } 1103 error = lufs_disable(vp, flp); 1104 if (error || (flp->error != FIOLOG_ENONE)) 1105 return (0); 1106 goto recheck; 1107 } 1108 1109 error = lufs_alloc(ufsvfsp, flp, cr); 1110 if (error) 1111 goto errout; 1112 1113 /* 1114 * Create all of the incore structs 1115 */ 1116 error = lufs_snarf(ufsvfsp, fs, 0); 1117 if (error) 1118 goto errout; 1119 1120 /* 1121 * DON'T ``GOTO ERROUT'' PAST THIS POINT 1122 */ 1123 1124 /* 1125 * Pretend we were just mounted with logging enabled 1126 * Get the ops vector 1127 * If debug, record metadata locations with log subsystem 1128 * Start the delete thread 1129 * Start the reclaim thread, if necessary 1130 */ 1131 vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); 1132 1133 TRANS_DOMATAMAP(ufsvfsp); 1134 TRANS_MATA_MOUNT(ufsvfsp); 1135 TRANS_MATA_SI(ufsvfsp, fs); 1136 ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp); 1137 if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) { 1138 fs->fs_reclaim &= ~FS_RECLAIM; 1139 fs->fs_reclaim |= FS_RECLAIMING; 1140 ufs_thread_start(&ufsvfsp->vfs_reclaim, 1141 ufs_thread_reclaim, vfsp); 1142 } else 1143 fs->fs_reclaim |= reclaim; 1144 1145 mutex_exit(&ulp->ul_lock); 1146 vfs_unlock(vfsp); 1147 1148 /* 1149 * Unlock the file system 1150 */ 1151 lf.lf_lock = LOCKFS_ULOCK; 1152 lf.lf_flags = 0; 1153 error = ufs_fiolfs(vp, &lf, 1); 1154 if (error) { 1155 flp->error = FIOLOG_ENOULOCK; 1156 return (0); 1157 } 1158 1159 /* 1160 * There's nothing in the log yet (we've just allocated it) 1161 * so directly write out the super block. 1162 * Note, we have to force this sb out to disk 1163 * (not just to the log) so that if we crash we know we are logging 1164 */ 1165 mutex_enter(&ufsvfsp->vfs_lock); 1166 fs->fs_clean = FSLOG; 1167 fs->fs_rolled = FS_NEED_ROLL; /* Mark the fs as unrolled */ 1168 UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp); 1169 mutex_exit(&ufsvfsp->vfs_lock); 1170 1171 return (0); 1172 1173 errout: 1174 /* 1175 * Aquire the ufs_scan_lock before de-linking the mtm data 1176 * structure so that we keep ufs_sync() and ufs_update() away 1177 * when they execute the ufs_scan_inodes() run while we're in 1178 * progress of enabling/disabling logging. 1179 */ 1180 mutex_enter(&ufs_scan_lock); 1181 (void) lufs_unsnarf(ufsvfsp); 1182 mutex_exit(&ufs_scan_lock); 1183 1184 (void) lufs_free(ufsvfsp); 1185 unlockout: 1186 mutex_exit(&ulp->ul_lock); 1187 vfs_unlock(vfsp); 1188 1189 lf.lf_lock = LOCKFS_ULOCK; 1190 lf.lf_flags = 0; 1191 (void) ufs_fiolfs(vp, &lf, 1); 1192 return (error); 1193 } 1194 1195 void 1196 lufs_read_strategy(ml_unit_t *ul, buf_t *bp) 1197 { 1198 mt_map_t *logmap = ul->un_logmap; 1199 offset_t mof = ldbtob(bp->b_blkno); 1200 off_t nb = bp->b_bcount; 1201 mapentry_t *age; 1202 char *va; 1203 int (*saviodone)(); 1204 int entire_range; 1205 1206 /* 1207 * get a linked list of overlapping deltas 1208 * returns with &mtm->mtm_rwlock held 1209 */ 1210 entire_range = logmap_list_get(logmap, mof, nb, &age); 1211 1212 /* 1213 * no overlapping deltas were found; read master 1214 */ 1215 if (age == NULL) { 1216 rw_exit(&logmap->mtm_rwlock); 1217 if (ul->un_flags & LDL_ERROR) { 1218 bp->b_flags |= B_ERROR; 1219 bp->b_error = EIO; 1220 biodone(bp); 1221 } else { 1222 ul->un_ufsvfs->vfs_iotstamp = lbolt; 1223 logstats.ls_lreads.value.ui64++; 1224 (void) bdev_strategy(bp); 1225 lwp_stat_update(LWP_STAT_INBLK, 1); 1226 } 1227 return; 1228 } 1229 1230 va = bp_mapin_common(bp, VM_SLEEP); 1231 /* 1232 * if necessary, sync read the data from master 1233 * errors are returned in bp 1234 */ 1235 if (!entire_range) { 1236 saviodone = bp->b_iodone; 1237 bp->b_iodone = trans_not_done; 1238 logstats.ls_mreads.value.ui64++; 1239 (void) bdev_strategy(bp); 1240 lwp_stat_update(LWP_STAT_INBLK, 1); 1241 if (trans_not_wait(bp)) 1242 ldl_seterror(ul, "Error reading master"); 1243 bp->b_iodone = saviodone; 1244 } 1245 1246 /* 1247 * sync read the data from the log 1248 * errors are returned inline 1249 */ 1250 if (ldl_read(ul, va, mof, nb, age)) { 1251 bp->b_flags |= B_ERROR; 1252 bp->b_error = EIO; 1253 } 1254 1255 /* 1256 * unlist the deltas 1257 */ 1258 logmap_list_put(logmap, age); 1259 1260 /* 1261 * all done 1262 */ 1263 if (ul->un_flags & LDL_ERROR) { 1264 bp->b_flags |= B_ERROR; 1265 bp->b_error = EIO; 1266 } 1267 biodone(bp); 1268 } 1269 1270 void 1271 lufs_write_strategy(ml_unit_t *ul, buf_t *bp) 1272 { 1273 offset_t mof = ldbtob(bp->b_blkno); 1274 off_t nb = bp->b_bcount; 1275 char *va; 1276 mapentry_t *me; 1277 1278 ASSERT((nb & DEV_BMASK) == 0); 1279 ul->un_logmap->mtm_ref = 1; 1280 1281 /* 1282 * if there are deltas, move into log 1283 */ 1284 me = deltamap_remove(ul->un_deltamap, mof, nb); 1285 if (me) { 1286 1287 va = bp_mapin_common(bp, VM_SLEEP); 1288 1289 ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) || 1290 (ul->un_matamap == NULL)|| 1291 matamap_within(ul->un_matamap, mof, nb)); 1292 1293 /* 1294 * move to logmap 1295 */ 1296 if (ufs_crb_enable) { 1297 logmap_add_buf(ul, va, mof, me, 1298 bp->b_un.b_addr, nb); 1299 } else { 1300 logmap_add(ul, va, mof, me); 1301 } 1302 1303 if (ul->un_flags & LDL_ERROR) { 1304 bp->b_flags |= B_ERROR; 1305 bp->b_error = EIO; 1306 } 1307 biodone(bp); 1308 return; 1309 } 1310 if (ul->un_flags & LDL_ERROR) { 1311 bp->b_flags |= B_ERROR; 1312 bp->b_error = EIO; 1313 biodone(bp); 1314 return; 1315 } 1316 1317 /* 1318 * Check that we are not updating metadata, or if so then via B_PHYS. 1319 */ 1320 ASSERT((ul->un_matamap == NULL) || 1321 !(matamap_overlap(ul->un_matamap, mof, nb) && 1322 ((bp->b_flags & B_PHYS) == 0))); 1323 1324 ul->un_ufsvfs->vfs_iotstamp = lbolt; 1325 logstats.ls_lwrites.value.ui64++; 1326 1327 /* If snapshots are enabled, write through the snapshot driver */ 1328 if (ul->un_ufsvfs->vfs_snapshot) 1329 fssnap_strategy(&ul->un_ufsvfs->vfs_snapshot, bp); 1330 else 1331 (void) bdev_strategy(bp); 1332 1333 lwp_stat_update(LWP_STAT_OUBLK, 1); 1334 } 1335 1336 void 1337 lufs_strategy(ml_unit_t *ul, buf_t *bp) 1338 { 1339 if (bp->b_flags & B_READ) 1340 lufs_read_strategy(ul, bp); 1341 else 1342 lufs_write_strategy(ul, bp); 1343 } 1344 1345 /* ARGSUSED */ 1346 static int 1347 delta_stats_update(kstat_t *ksp, int rw) 1348 { 1349 if (rw == KSTAT_WRITE) { 1350 delta_stats[DT_SB] = dkstats.ds_superblock_deltas.value.ui64; 1351 delta_stats[DT_CG] = dkstats.ds_bitmap_deltas.value.ui64; 1352 delta_stats[DT_SI] = dkstats.ds_suminfo_deltas.value.ui64; 1353 delta_stats[DT_AB] = dkstats.ds_allocblk_deltas.value.ui64; 1354 delta_stats[DT_ABZERO] = dkstats.ds_ab0_deltas.value.ui64; 1355 delta_stats[DT_DIR] = dkstats.ds_dir_deltas.value.ui64; 1356 delta_stats[DT_INODE] = dkstats.ds_inode_deltas.value.ui64; 1357 delta_stats[DT_FBI] = dkstats.ds_fbiwrite_deltas.value.ui64; 1358 delta_stats[DT_QR] = dkstats.ds_quota_deltas.value.ui64; 1359 delta_stats[DT_SHAD] = dkstats.ds_shadow_deltas.value.ui64; 1360 1361 roll_stats[DT_SB] = dkstats.ds_superblock_rolled.value.ui64; 1362 roll_stats[DT_CG] = dkstats.ds_bitmap_rolled.value.ui64; 1363 roll_stats[DT_SI] = dkstats.ds_suminfo_rolled.value.ui64; 1364 roll_stats[DT_AB] = dkstats.ds_allocblk_rolled.value.ui64; 1365 roll_stats[DT_ABZERO] = dkstats.ds_ab0_rolled.value.ui64; 1366 roll_stats[DT_DIR] = dkstats.ds_dir_rolled.value.ui64; 1367 roll_stats[DT_INODE] = dkstats.ds_inode_rolled.value.ui64; 1368 roll_stats[DT_FBI] = dkstats.ds_fbiwrite_rolled.value.ui64; 1369 roll_stats[DT_QR] = dkstats.ds_quota_rolled.value.ui64; 1370 roll_stats[DT_SHAD] = dkstats.ds_shadow_rolled.value.ui64; 1371 } else { 1372 dkstats.ds_superblock_deltas.value.ui64 = delta_stats[DT_SB]; 1373 dkstats.ds_bitmap_deltas.value.ui64 = delta_stats[DT_CG]; 1374 dkstats.ds_suminfo_deltas.value.ui64 = delta_stats[DT_SI]; 1375 dkstats.ds_allocblk_deltas.value.ui64 = delta_stats[DT_AB]; 1376 dkstats.ds_ab0_deltas.value.ui64 = delta_stats[DT_ABZERO]; 1377 dkstats.ds_dir_deltas.value.ui64 = delta_stats[DT_DIR]; 1378 dkstats.ds_inode_deltas.value.ui64 = delta_stats[DT_INODE]; 1379 dkstats.ds_fbiwrite_deltas.value.ui64 = delta_stats[DT_FBI]; 1380 dkstats.ds_quota_deltas.value.ui64 = delta_stats[DT_QR]; 1381 dkstats.ds_shadow_deltas.value.ui64 = delta_stats[DT_SHAD]; 1382 1383 dkstats.ds_superblock_rolled.value.ui64 = roll_stats[DT_SB]; 1384 dkstats.ds_bitmap_rolled.value.ui64 = roll_stats[DT_CG]; 1385 dkstats.ds_suminfo_rolled.value.ui64 = roll_stats[DT_SI]; 1386 dkstats.ds_allocblk_rolled.value.ui64 = roll_stats[DT_AB]; 1387 dkstats.ds_ab0_rolled.value.ui64 = roll_stats[DT_ABZERO]; 1388 dkstats.ds_dir_rolled.value.ui64 = roll_stats[DT_DIR]; 1389 dkstats.ds_inode_rolled.value.ui64 = roll_stats[DT_INODE]; 1390 dkstats.ds_fbiwrite_rolled.value.ui64 = roll_stats[DT_FBI]; 1391 dkstats.ds_quota_rolled.value.ui64 = roll_stats[DT_QR]; 1392 dkstats.ds_shadow_rolled.value.ui64 = roll_stats[DT_SHAD]; 1393 } 1394 return (0); 1395 } 1396 1397 extern size_t ufs_crb_limit; 1398 extern int ufs_max_crb_divisor; 1399 1400 void 1401 lufs_init(void) 1402 { 1403 kstat_t *ksp; 1404 1405 /* Create kmem caches */ 1406 lufs_sv = kmem_cache_create("lufs_save", sizeof (lufs_save_t), 0, 1407 NULL, NULL, NULL, NULL, NULL, 0); 1408 lufs_bp = kmem_cache_create("lufs_bufs", sizeof (lufs_buf_t), 0, 1409 NULL, NULL, NULL, NULL, NULL, 0); 1410 1411 mutex_init(&log_mutex, NULL, MUTEX_DEFAULT, NULL); 1412 1413 _init_top(); 1414 1415 if (&bio_lufs_strategy != NULL) 1416 bio_lufs_strategy = (void (*) (void *, buf_t *)) lufs_strategy; 1417 1418 /* 1419 * Initialise general logging and delta kstats 1420 */ 1421 ksp = kstat_create("ufs_log", 0, "logstats", "ufs", KSTAT_TYPE_NAMED, 1422 sizeof (logstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 1423 if (ksp) { 1424 ksp->ks_data = (void *) &logstats; 1425 kstat_install(ksp); 1426 } 1427 1428 ksp = kstat_create("ufs_log", 0, "deltastats", "ufs", KSTAT_TYPE_NAMED, 1429 sizeof (dkstats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); 1430 if (ksp) { 1431 ksp->ks_data = (void *) &dkstats; 1432 ksp->ks_update = delta_stats_update; 1433 kstat_install(ksp); 1434 } 1435 1436 /* Initialize generation of logging ids */ 1437 lufs_genid_init(); 1438 1439 /* 1440 * Set up the maximum amount of kmem that the crbs (system wide) 1441 * can use. 1442 */ 1443 ufs_crb_limit = kmem_maxavail() / ufs_max_crb_divisor; 1444 } 1445