1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/systm.h> 27 #include <sys/types.h> 28 #include <sys/vnode.h> 29 #include <sys/errno.h> 30 #include <sys/sysmacros.h> 31 #include <sys/debug.h> 32 #include <sys/kmem.h> 33 #include <sys/conf.h> 34 #include <sys/proc.h> 35 #include <sys/cmn_err.h> 36 #include <sys/fssnap_if.h> 37 #include <sys/fs/ufs_inode.h> 38 #include <sys/fs/ufs_filio.h> 39 #include <sys/fs/ufs_log.h> 40 #include <sys/fs/ufs_bio.h> 41 #include <sys/atomic.h> 42 43 extern int maxphys; 44 extern uint_t bypass_snapshot_throttle_key; 45 46 extern struct kmem_cache *lufs_sv; 47 extern struct kmem_cache *lufs_bp; 48 49 static void 50 makebusy(ml_unit_t *ul, buf_t *bp) 51 { 52 sema_p(&bp->b_sem); 53 if ((bp->b_flags & B_ERROR) == 0) 54 return; 55 if (bp->b_flags & B_READ) 56 ldl_seterror(ul, "Error reading ufs log"); 57 else 58 ldl_seterror(ul, "Error writing ufs log"); 59 } 60 61 static int 62 logdone(buf_t *bp) 63 { 64 bp->b_flags |= B_DONE; 65 66 if (bp->b_flags & B_WRITE) 67 sema_v(&bp->b_sem); 68 else 69 /* wakeup the thread waiting on this buf */ 70 sema_v(&bp->b_io); 71 return (0); 72 } 73 74 static int 75 ldl_strategy_done(buf_t *cb) 76 { 77 lufs_save_t *sv; 78 lufs_buf_t *lbp; 79 buf_t *bp; 80 81 ASSERT(SEMA_HELD(&cb->b_sem)); 82 ASSERT((cb->b_flags & B_DONE) == 0); 83 84 /* 85 * Compute address of the ``save'' struct 86 */ 87 lbp = (lufs_buf_t *)cb; 88 sv = (lufs_save_t *)lbp->lb_ptr; 89 90 if (cb->b_flags & B_ERROR) 91 sv->sv_error = 1; 92 93 /* 94 * If this is the last request, release the resources and 95 * ``done'' the original buffer header. 96 */ 97 if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) { 98 kmem_cache_free(lufs_bp, lbp); 99 return (1); 100 } 101 /* Propagate any errors back to the original buffer header */ 102 bp = sv->sv_bp; 103 if (sv->sv_error) 104 bp->b_flags |= B_ERROR; 105 kmem_cache_free(lufs_bp, lbp); 106 kmem_cache_free(lufs_sv, sv); 107 108 biodone(bp); 109 return (0); 110 } 111 112 /* 113 * Map the log logical block number to a physical disk block number 114 */ 115 static int 116 map_frag( 117 ml_unit_t *ul, 118 daddr_t lblkno, 119 size_t bcount, 120 daddr_t *pblkno, 121 size_t *pbcount) 122 { 123 ic_extent_t *ext = ul->un_ebp->ic_extents; 124 uint32_t e = ul->un_ebp->ic_nextents; 125 uint32_t s = 0; 126 uint32_t i = e >> 1; 127 uint32_t lasti = i; 128 uint32_t bno_off; 129 130 again: 131 if (ext[i].ic_lbno <= lblkno) { 132 if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) { 133 /* FOUND IT */ 134 bno_off = lblkno - (uint32_t)ext[i].ic_lbno; 135 *pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off)); 136 *pblkno = ext[i].ic_pbno + bno_off; 137 return (0); 138 } else 139 s = i; 140 } else 141 e = i; 142 i = s + ((e - s) >> 1); 143 144 if (i == lasti) { 145 *pbcount = bcount; 146 return (ENOENT); 147 } 148 lasti = i; 149 150 goto again; 151 } 152 153 /* 154 * The log is a set of extents (which typically will be only one, but 155 * may be more if the disk was close to full when the log was created) 156 * and hence the logical offsets into the log 157 * have to be translated into their real device locations before 158 * calling the device's strategy routine. The translation may result 159 * in several IO requests if this request spans extents. 160 */ 161 void 162 ldl_strategy(ml_unit_t *ul, buf_t *pb) 163 { 164 lufs_save_t *sv; 165 lufs_buf_t *lbp; 166 buf_t *cb; 167 ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 168 daddr_t lblkno, pblkno; 169 size_t nb_left, pbcount; 170 off_t offset; 171 dev_t dev = ul->un_dev; 172 int error; 173 int read = pb->b_flags & B_READ; 174 175 /* 176 * Allocate and initialise the save stucture, 177 */ 178 sv = kmem_cache_alloc(lufs_sv, KM_SLEEP); 179 sv->sv_error = 0; 180 sv->sv_bp = pb; 181 nb_left = pb->b_bcount; 182 sv->sv_nb_left = nb_left; 183 184 lblkno = pb->b_blkno; 185 offset = 0; 186 187 do { 188 error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount); 189 190 lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP); 191 bioinit(&lbp->lb_buf); 192 lbp->lb_ptr = sv; 193 194 cb = bioclone(pb, offset, pbcount, dev, 195 pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP); 196 197 offset += pbcount; 198 lblkno += btodb(pbcount); 199 nb_left -= pbcount; 200 201 if (error) { 202 cb->b_flags |= B_ERROR; 203 cb->b_resid = cb->b_bcount; 204 biodone(cb); 205 } else { 206 if (read) { 207 logstats.ls_ldlreads.value.ui64++; 208 ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 209 lwp_stat_update(LWP_STAT_INBLK, 1); 210 } else { 211 logstats.ls_ldlwrites.value.ui64++; 212 lwp_stat_update(LWP_STAT_OUBLK, 1); 213 } 214 215 /* 216 * write through the snapshot driver if necessary 217 * We do not want this write to be throttled because 218 * we are holding the un_log mutex here. If we 219 * are throttled in fssnap_translate, the fssnap_taskq 220 * thread which can wake us up can get blocked on 221 * the un_log mutex resulting in a deadlock. 222 */ 223 if (ufsvfsp->vfs_snapshot) { 224 (void) tsd_set(bypass_snapshot_throttle_key, 225 (void *)1); 226 fssnap_strategy(&ufsvfsp->vfs_snapshot, cb); 227 228 (void) tsd_set(bypass_snapshot_throttle_key, 229 (void *)0); 230 } else { 231 (void) bdev_strategy(cb); 232 } 233 } 234 235 } while (nb_left); 236 } 237 238 static void 239 writelog(ml_unit_t *ul, buf_t *bp) 240 { 241 ASSERT(SEMA_HELD(&bp->b_sem)); 242 243 /* 244 * This is really an B_ASYNC write but we want Presto to 245 * cache this write. The iodone routine, logdone, processes 246 * the buf correctly. 247 */ 248 bp->b_flags = B_WRITE; 249 bp->b_edev = ul->un_dev; 250 bp->b_iodone = logdone; 251 252 /* 253 * return EIO for every IO if in hard error state 254 */ 255 if (ul->un_flags & LDL_ERROR) { 256 bp->b_flags |= B_ERROR; 257 bp->b_error = EIO; 258 biodone(bp); 259 return; 260 } 261 262 ldl_strategy(ul, bp); 263 } 264 265 static void 266 readlog(ml_unit_t *ul, buf_t *bp) 267 { 268 ASSERT(SEMA_HELD(&bp->b_sem)); 269 ASSERT(bp->b_bcount); 270 271 bp->b_flags = B_READ; 272 bp->b_edev = ul->un_dev; 273 bp->b_iodone = logdone; 274 275 /* all IO returns errors when in error state */ 276 if (ul->un_flags & LDL_ERROR) { 277 bp->b_flags |= B_ERROR; 278 bp->b_error = EIO; 279 biodone(bp); 280 (void) trans_wait(bp); 281 return; 282 } 283 284 ldl_strategy(ul, bp); 285 286 if (trans_wait(bp)) 287 ldl_seterror(ul, "Error reading ufs log"); 288 } 289 290 /* 291 * NOTE: writers are single threaded thru the log layer. 292 * This means we can safely reference and change the cb and bp fields 293 * that ldl_read does not reference w/o holding the cb_rwlock or 294 * the bp makebusy lock. 295 */ 296 static void 297 push_dirty_bp(ml_unit_t *ul, buf_t *bp) 298 { 299 buf_t *newbp; 300 cirbuf_t *cb = &ul->un_wrbuf; 301 302 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 303 ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0); 304 305 /* 306 * async write the buf 307 */ 308 writelog(ul, bp); 309 310 /* 311 * no longer filling any buf 312 */ 313 cb->cb_dirty = NULL; 314 315 /* 316 * no extra buffer space; all done 317 */ 318 if (bp->b_bcount == bp->b_bufsize) 319 return; 320 321 /* 322 * give extra buffer space to a new bp 323 * try to take buf off of free list 324 */ 325 if ((newbp = cb->cb_free) != NULL) { 326 cb->cb_free = newbp->b_forw; 327 } else { 328 newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 329 sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 330 sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 331 } 332 newbp->b_flags = 0; 333 newbp->b_bcount = 0; 334 newbp->b_file = NULL; 335 newbp->b_offset = -1; 336 newbp->b_bufsize = bp->b_bufsize - bp->b_bcount; 337 newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount; 338 bp->b_bufsize = bp->b_bcount; 339 340 /* 341 * lock out readers and put new buf at LRU position 342 */ 343 rw_enter(&cb->cb_rwlock, RW_WRITER); 344 newbp->b_forw = bp->b_forw; 345 newbp->b_back = bp; 346 bp->b_forw->b_back = newbp; 347 bp->b_forw = newbp; 348 rw_exit(&cb->cb_rwlock); 349 } 350 351 static void 352 inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb) 353 { 354 buf_t *bp; 355 off_t elof = lof + nb; 356 off_t buflof; 357 off_t bufelof; 358 359 /* 360 * discard all bufs that overlap the range (lof, lof + nb) 361 */ 362 rw_enter(&cb->cb_rwlock, RW_WRITER); 363 bp = cb->cb_bp; 364 do { 365 if (bp == cb->cb_dirty || bp->b_bcount == 0) { 366 bp = bp->b_forw; 367 continue; 368 } 369 buflof = dbtob(bp->b_blkno); 370 bufelof = buflof + bp->b_bcount; 371 if ((buflof < lof && bufelof <= lof) || 372 (buflof >= elof && bufelof > elof)) { 373 bp = bp->b_forw; 374 continue; 375 } 376 makebusy(ul, bp); 377 bp->b_flags = 0; 378 bp->b_bcount = 0; 379 sema_v(&bp->b_sem); 380 bp = bp->b_forw; 381 } while (bp != cb->cb_bp); 382 rw_exit(&cb->cb_rwlock); 383 } 384 385 /* 386 * NOTE: writers are single threaded thru the log layer. 387 * This means we can safely reference and change the cb and bp fields 388 * that ldl_read does not reference w/o holding the cb_rwlock or 389 * the bp makebusy lock. 390 */ 391 static buf_t * 392 get_write_bp(ml_unit_t *ul) 393 { 394 cirbuf_t *cb = &ul->un_wrbuf; 395 buf_t *bp; 396 397 /* 398 * cb_dirty is the buffer we are currently filling; if any 399 */ 400 if ((bp = cb->cb_dirty) != NULL) { 401 makebusy(ul, bp); 402 return (bp); 403 } 404 /* 405 * discard any bp that overlaps the current tail since we are 406 * about to overwrite it. 407 */ 408 inval_range(ul, cb, ul->un_tail_lof, 1); 409 410 /* 411 * steal LRU buf 412 */ 413 rw_enter(&cb->cb_rwlock, RW_WRITER); 414 bp = cb->cb_bp->b_forw; 415 makebusy(ul, bp); 416 417 cb->cb_dirty = bp; 418 cb->cb_bp = bp; 419 420 bp->b_flags = 0; 421 bp->b_bcount = 0; 422 bp->b_blkno = btodb(ul->un_tail_lof); 423 ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof); 424 rw_exit(&cb->cb_rwlock); 425 426 /* 427 * NOTE: 428 * 1. un_tail_lof never addresses >= un_eol_lof 429 * 2. b_blkno + btodb(b_bufsize) may > un_eol_lof 430 * this case is handled in storebuf 431 */ 432 return (bp); 433 } 434 435 void 436 alloc_wrbuf(cirbuf_t *cb, size_t bufsize) 437 { 438 int i; 439 buf_t *bp; 440 441 /* 442 * Clear previous allocation 443 */ 444 if (cb->cb_nb) 445 free_cirbuf(cb); 446 447 bzero(cb, sizeof (*cb)); 448 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 449 450 rw_enter(&cb->cb_rwlock, RW_WRITER); 451 452 /* 453 * preallocate 3 bp's and put them on the free list. 454 */ 455 for (i = 0; i < 3; ++i) { 456 bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 457 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 458 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 459 bp->b_offset = -1; 460 bp->b_forw = cb->cb_free; 461 cb->cb_free = bp; 462 } 463 464 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 465 cb->cb_nb = bufsize; 466 467 /* 468 * first bp claims entire write buffer 469 */ 470 bp = cb->cb_free; 471 cb->cb_free = bp->b_forw; 472 473 bp->b_forw = bp; 474 bp->b_back = bp; 475 cb->cb_bp = bp; 476 bp->b_un.b_addr = cb->cb_va; 477 bp->b_bufsize = cb->cb_nb; 478 479 rw_exit(&cb->cb_rwlock); 480 } 481 482 void 483 alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize) 484 { 485 caddr_t va; 486 size_t nb; 487 buf_t *bp; 488 489 /* 490 * Clear previous allocation 491 */ 492 if (cb->cb_nb) 493 free_cirbuf(cb); 494 495 bzero(cb, sizeof (*cb)); 496 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 497 498 rw_enter(&cb->cb_rwlock, RW_WRITER); 499 500 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 501 cb->cb_nb = bufsize; 502 503 /* 504 * preallocate N bufs that are hard-sized to blksize 505 * in other words, the read buffer pool is a linked list 506 * of statically sized bufs. 507 */ 508 va = cb->cb_va; 509 while ((nb = bufsize) != 0) { 510 if (nb > blksize) 511 nb = blksize; 512 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP); 513 bzero(bp, sizeof (buf_t)); 514 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 515 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 516 bp->b_un.b_addr = va; 517 bp->b_bufsize = nb; 518 if (cb->cb_bp) { 519 bp->b_forw = cb->cb_bp->b_forw; 520 bp->b_back = cb->cb_bp; 521 cb->cb_bp->b_forw->b_back = bp; 522 cb->cb_bp->b_forw = bp; 523 } else 524 bp->b_forw = bp->b_back = bp; 525 cb->cb_bp = bp; 526 bufsize -= nb; 527 va += nb; 528 } 529 530 rw_exit(&cb->cb_rwlock); 531 } 532 533 void 534 free_cirbuf(cirbuf_t *cb) 535 { 536 buf_t *bp; 537 538 if (cb->cb_nb == 0) 539 return; 540 541 rw_enter(&cb->cb_rwlock, RW_WRITER); 542 ASSERT(cb->cb_dirty == NULL); 543 544 /* 545 * free the active bufs 546 */ 547 while ((bp = cb->cb_bp) != NULL) { 548 if (bp == bp->b_forw) 549 cb->cb_bp = NULL; 550 else 551 cb->cb_bp = bp->b_forw; 552 bp->b_back->b_forw = bp->b_forw; 553 bp->b_forw->b_back = bp->b_back; 554 sema_destroy(&bp->b_sem); 555 sema_destroy(&bp->b_io); 556 kmem_free(bp, sizeof (buf_t)); 557 } 558 559 /* 560 * free the free bufs 561 */ 562 while ((bp = cb->cb_free) != NULL) { 563 cb->cb_free = bp->b_forw; 564 sema_destroy(&bp->b_sem); 565 sema_destroy(&bp->b_io); 566 kmem_free(bp, sizeof (buf_t)); 567 } 568 kmem_free(cb->cb_va, cb->cb_nb); 569 cb->cb_va = NULL; 570 cb->cb_nb = 0; 571 rw_exit(&cb->cb_rwlock); 572 rw_destroy(&cb->cb_rwlock); 573 } 574 575 static int 576 within_range(off_t lof, daddr_t blkno, ulong_t bcount) 577 { 578 off_t blof = dbtob(blkno); 579 580 return ((lof >= blof) && (lof < (blof + bcount))); 581 } 582 583 static buf_t * 584 find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 585 { 586 buf_t *bp; 587 588 /* 589 * find a buf that contains the offset lof 590 */ 591 rw_enter(&cb->cb_rwlock, RW_READER); 592 bp = cb->cb_bp; 593 do { 594 if (bp->b_bcount && 595 within_range(lof, bp->b_blkno, bp->b_bcount)) { 596 makebusy(ul, bp); 597 rw_exit(&cb->cb_rwlock); 598 return (bp); 599 } 600 bp = bp->b_forw; 601 } while (bp != cb->cb_bp); 602 rw_exit(&cb->cb_rwlock); 603 604 return (NULL); 605 } 606 607 static off_t 608 find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 609 { 610 buf_t *bp, *bpend; 611 off_t rlof; 612 613 /* 614 * we mustn't: 615 * o read past eol 616 * o read past the tail 617 * o read data that may be being written. 618 */ 619 rw_enter(&cb->cb_rwlock, RW_READER); 620 bpend = bp = cb->cb_bp->b_forw; 621 rlof = ul->un_tail_lof; 622 do { 623 if (bp->b_bcount) { 624 rlof = dbtob(bp->b_blkno); 625 break; 626 } 627 bp = bp->b_forw; 628 } while (bp != bpend); 629 rw_exit(&cb->cb_rwlock); 630 631 if (lof <= rlof) 632 /* lof is prior to the range represented by the write buf */ 633 return (rlof); 634 else 635 /* lof follows the range represented by the write buf */ 636 return ((off_t)ul->un_eol_lof); 637 } 638 639 static buf_t * 640 get_read_bp(ml_unit_t *ul, off_t lof) 641 { 642 cirbuf_t *cb; 643 buf_t *bp; 644 off_t rlof; 645 646 /* 647 * retrieve as much data as possible from the incore buffers 648 */ 649 if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) { 650 logstats.ls_lreadsinmem.value.ui64++; 651 return (bp); 652 } 653 if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) { 654 logstats.ls_lreadsinmem.value.ui64++; 655 return (bp); 656 } 657 658 /* 659 * steal the LRU buf 660 */ 661 cb = &ul->un_rdbuf; 662 rw_enter(&cb->cb_rwlock, RW_WRITER); 663 bp = cb->cb_bp->b_forw; 664 makebusy(ul, bp); 665 bp->b_flags = 0; 666 bp->b_bcount = 0; 667 cb->cb_bp = bp; 668 rw_exit(&cb->cb_rwlock); 669 670 /* 671 * don't read past the tail or the end-of-log 672 */ 673 bp->b_blkno = btodb(lof); 674 lof = dbtob(bp->b_blkno); 675 rlof = find_read_lof(ul, &ul->un_wrbuf, lof); 676 bp->b_bcount = MIN(bp->b_bufsize, rlof - lof); 677 readlog(ul, bp); 678 return (bp); 679 } 680 681 /* 682 * NOTE: writers are single threaded thru the log layer. 683 * This means we can safely reference and change the cb and bp fields 684 * that ldl_read does not reference w/o holding the cb_rwlock or 685 * the bp makebusy lock. 686 */ 687 static int 688 extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp) 689 { 690 buf_t *bpforw = bp->b_forw; 691 692 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 693 694 /* 695 * there is no `next' bp; do nothing 696 */ 697 if (bpforw == bp) 698 return (0); 699 700 /* 701 * buffer space is not adjacent; do nothing 702 */ 703 if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr) 704 return (0); 705 706 /* 707 * locking protocol requires giving up any bp locks before 708 * acquiring cb_rwlock. This is okay because we hold 709 * un_log_mutex. 710 */ 711 sema_v(&bp->b_sem); 712 713 /* 714 * lock out ldl_read 715 */ 716 rw_enter(&cb->cb_rwlock, RW_WRITER); 717 718 /* 719 * wait for current IO to finish w/next bp; if necessary 720 */ 721 makebusy(ul, bpforw); 722 723 /* 724 * free the next bp and steal its space 725 */ 726 bp->b_forw = bpforw->b_forw; 727 bpforw->b_forw->b_back = bp; 728 bp->b_bufsize += bpforw->b_bufsize; 729 sema_v(&bpforw->b_sem); 730 bpforw->b_forw = cb->cb_free; 731 cb->cb_free = bpforw; 732 makebusy(ul, bp); 733 rw_exit(&cb->cb_rwlock); 734 735 return (1); 736 } 737 738 static size_t 739 storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb) 740 { 741 size_t copy_nb; 742 size_t nb_in_sec; 743 sect_trailer_t *st; 744 size_t nb_left = nb; 745 cirbuf_t *cb = &ul->un_wrbuf; 746 747 again: 748 nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount); 749 copy_nb = MIN(nb_left, nb_in_sec); 750 751 ASSERT(copy_nb); 752 753 bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb); 754 bp->b_bcount += copy_nb; 755 va += copy_nb; 756 nb_left -= copy_nb; 757 ul->un_tail_lof += copy_nb; 758 759 if ((nb_in_sec -= copy_nb) == 0) { 760 st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount); 761 762 st->st_tid = ul->un_logmap->mtm_tid; 763 st->st_ident = ul->un_tail_ident++; 764 bp->b_bcount += sizeof (sect_trailer_t); 765 ul->un_tail_lof += sizeof (sect_trailer_t); 766 /* 767 * log wrapped; async write this bp 768 */ 769 if (ul->un_tail_lof == ul->un_eol_lof) { 770 ul->un_tail_lof = ul->un_bol_lof; 771 push_dirty_bp(ul, bp); 772 return (nb - nb_left); 773 } 774 /* 775 * out of bp space; get more or async write buf 776 */ 777 if (bp->b_bcount == bp->b_bufsize) { 778 if (!extend_write_bp(ul, cb, bp)) { 779 push_dirty_bp(ul, bp); 780 return (nb - nb_left); 781 } 782 } 783 } 784 if (nb_left) 785 goto again; 786 787 sema_v(&bp->b_sem); 788 return (nb); 789 } 790 791 static void 792 fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me) 793 { 794 offset_t src_mof = me->me_mof; 795 size_t src_nb = me->me_nb; 796 797 if (src_mof > dst_mof) { 798 ASSERT(src_mof < (dst_mof + dst_nb)); 799 dst_va += (src_mof - dst_mof); 800 dst_nb -= (src_mof - dst_mof); 801 } else { 802 ASSERT(dst_mof < (src_mof + src_nb)); 803 src_nb -= (dst_mof - src_mof); 804 } 805 806 src_nb = MIN(src_nb, dst_nb); 807 ASSERT(src_nb); 808 bzero(dst_va, src_nb); 809 } 810 811 /* 812 * dst_va == NULL means don't copy anything 813 */ 814 static ulong_t 815 fetchbuf( 816 ml_unit_t *ul, 817 buf_t *bp, 818 caddr_t dst_va, 819 size_t dst_nb, 820 off_t *dst_lofp) 821 { 822 caddr_t copy_va; 823 size_t copy_nb; 824 size_t nb_sec; 825 off_t dst_lof = *dst_lofp; 826 ulong_t sav_dst_nb = dst_nb; 827 ulong_t src_nb = bp->b_bcount; 828 off_t src_lof = dbtob(bp->b_blkno); 829 off_t src_elof = src_lof + src_nb; 830 caddr_t src_va = bp->b_un.b_addr; 831 832 /* 833 * copy from bp to dst_va 834 */ 835 while (dst_nb) { 836 /* 837 * compute address within bp 838 */ 839 copy_va = src_va + (dst_lof - src_lof); 840 841 /* 842 * adjust copy size to amount of data in bp 843 */ 844 copy_nb = MIN(dst_nb, src_elof - dst_lof); 845 846 /* 847 * adjust copy size to amount of data in sector 848 */ 849 nb_sec = NB_LEFT_IN_SECTOR(dst_lof); 850 copy_nb = MIN(copy_nb, nb_sec); 851 852 /* 853 * dst_va == NULL means don't do copy (see logseek()) 854 */ 855 if (dst_va) { 856 bcopy(copy_va, dst_va, copy_nb); 857 dst_va += copy_nb; 858 } 859 dst_lof += copy_nb; 860 dst_nb -= copy_nb; 861 nb_sec -= copy_nb; 862 863 /* 864 * advance over sector trailer 865 */ 866 if (nb_sec == 0) 867 dst_lof += sizeof (sect_trailer_t); 868 869 /* 870 * exhausted buffer 871 * return current lof for next read 872 */ 873 if (dst_lof == src_elof) { 874 sema_v(&bp->b_sem); 875 if (dst_lof == ul->un_eol_lof) 876 dst_lof = ul->un_bol_lof; 877 *dst_lofp = dst_lof; 878 return (sav_dst_nb - dst_nb); 879 } 880 } 881 882 /* 883 * copy complete - return current lof 884 */ 885 sema_v(&bp->b_sem); 886 *dst_lofp = dst_lof; 887 return (sav_dst_nb); 888 } 889 890 void 891 ldl_round_commit(ml_unit_t *ul) 892 { 893 int wrapped; 894 buf_t *bp; 895 sect_trailer_t *st; 896 size_t bcount; 897 cirbuf_t *cb = &ul->un_wrbuf; 898 899 /* 900 * if nothing to write; then do nothing 901 */ 902 if ((bp = cb->cb_dirty) == NULL) 903 return; 904 makebusy(ul, bp); 905 906 /* 907 * round up to sector boundary and set new tail 908 * don't readjust st_ident if buf is already rounded 909 */ 910 bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE); 911 if (bcount == bp->b_bcount) { 912 sema_v(&bp->b_sem); 913 return; 914 } 915 bp->b_bcount = bcount; 916 ul->un_tail_lof = dbtob(bp->b_blkno) + bcount; 917 wrapped = 0; 918 if (ul->un_tail_lof == ul->un_eol_lof) { 919 ul->un_tail_lof = ul->un_bol_lof; 920 ++wrapped; 921 } 922 ASSERT(ul->un_tail_lof != ul->un_head_lof); 923 924 /* 925 * fix up the sector trailer 926 */ 927 /* LINTED */ 928 st = (sect_trailer_t *) 929 ((bp->b_un.b_addr + bcount) - sizeof (*st)); 930 st->st_tid = ul->un_logmap->mtm_tid; 931 st->st_ident = ul->un_tail_ident++; 932 933 /* 934 * if tail wrapped or we have exhausted this buffer 935 * async write the buffer 936 */ 937 if (wrapped || bcount == bp->b_bufsize) 938 push_dirty_bp(ul, bp); 939 else 940 sema_v(&bp->b_sem); 941 } 942 943 void 944 ldl_push_commit(ml_unit_t *ul) 945 { 946 buf_t *bp; 947 cirbuf_t *cb = &ul->un_wrbuf; 948 949 /* 950 * if nothing to write; then do nothing 951 */ 952 if ((bp = cb->cb_dirty) == NULL) 953 return; 954 makebusy(ul, bp); 955 push_dirty_bp(ul, bp); 956 } 957 958 int 959 ldl_need_commit(ml_unit_t *ul) 960 { 961 return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2))); 962 } 963 964 int 965 ldl_has_space(ml_unit_t *ul, mapentry_t *me) 966 { 967 off_t nfb; 968 off_t nb; 969 970 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 971 972 /* 973 * Add up the size used by the deltas 974 * round nb up to a sector length plus an extra sector 975 * w/o the extra sector we couldn't distinguish 976 * a full log (head == tail) from an empty log (head == tail) 977 */ 978 for (nb = DEV_BSIZE; me; me = me->me_hash) { 979 nb += sizeof (struct delta); 980 if (me->me_dt != DT_CANCEL) 981 nb += me->me_nb; 982 } 983 nb = P2ROUNDUP(nb, DEV_BSIZE); 984 985 if (ul->un_head_lof <= ul->un_tail_lof) 986 nfb = (ul->un_head_lof - ul->un_bol_lof) + 987 (ul->un_eol_lof - ul->un_tail_lof); 988 else 989 nfb = ul->un_head_lof - ul->un_tail_lof; 990 991 return (nb < nfb); 992 } 993 994 void 995 ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me) 996 { 997 buf_t *bp; 998 caddr_t va; 999 size_t nb; 1000 size_t actual; 1001 1002 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1003 1004 /* Write the delta */ 1005 1006 nb = sizeof (struct delta); 1007 va = (caddr_t)&me->me_delta; 1008 bp = get_write_bp(ul); 1009 1010 while (nb) { 1011 if (ul->un_flags & LDL_ERROR) { 1012 sema_v(&bp->b_sem); 1013 return; 1014 } 1015 actual = storebuf(ul, bp, va, nb); 1016 ASSERT(actual); 1017 va += actual; 1018 nb -= actual; 1019 if (nb) 1020 bp = get_write_bp(ul); 1021 } 1022 1023 /* If a commit, cancel, or 0's; we're almost done */ 1024 switch (me->me_dt) { 1025 case DT_COMMIT: 1026 case DT_CANCEL: 1027 case DT_ABZERO: 1028 /* roll needs to know where the next delta will go */ 1029 me->me_lof = ul->un_tail_lof; 1030 return; 1031 default: 1032 break; 1033 } 1034 1035 /* Now write the data */ 1036 1037 ASSERT(me->me_nb != 0); 1038 1039 nb = me->me_nb; 1040 va = (me->me_mof - bufmof) + bufp; 1041 bp = get_write_bp(ul); 1042 1043 /* Save where we will put the data */ 1044 me->me_lof = ul->un_tail_lof; 1045 1046 while (nb) { 1047 if (ul->un_flags & LDL_ERROR) { 1048 sema_v(&bp->b_sem); 1049 return; 1050 } 1051 actual = storebuf(ul, bp, va, nb); 1052 ASSERT(actual); 1053 va += actual; 1054 nb -= actual; 1055 if (nb) 1056 bp = get_write_bp(ul); 1057 } 1058 } 1059 1060 void 1061 ldl_waito(ml_unit_t *ul) 1062 { 1063 buf_t *bp; 1064 cirbuf_t *cb = &ul->un_wrbuf; 1065 1066 rw_enter(&cb->cb_rwlock, RW_WRITER); 1067 /* 1068 * wait on them 1069 */ 1070 bp = cb->cb_bp; 1071 do { 1072 if ((bp->b_flags & B_DONE) == 0) { 1073 makebusy(ul, bp); 1074 sema_v(&bp->b_sem); 1075 } 1076 bp = bp->b_forw; 1077 } while (bp != cb->cb_bp); 1078 rw_exit(&cb->cb_rwlock); 1079 } 1080 1081 /* 1082 * seek nb bytes from location lof 1083 */ 1084 static int 1085 logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp) 1086 { 1087 buf_t *bp; 1088 ulong_t actual; 1089 1090 while (nb) { 1091 bp = get_read_bp(ul, lof); 1092 if (bp->b_flags & B_ERROR) { 1093 sema_v(&bp->b_sem); 1094 return (EIO); 1095 } 1096 actual = fetchbuf(ul, bp, NULL, nb, &lof); 1097 ASSERT(actual); 1098 nb -= actual; 1099 } 1100 *lofp = lof; 1101 ASSERT(nb == 0); 1102 return (0); 1103 } 1104 1105 int 1106 ldl_read( 1107 ml_unit_t *ul, /* Log unit */ 1108 caddr_t va, /* address of buffer to read into */ 1109 offset_t mof, /* mof of buffer */ 1110 off_t nb, /* length of buffer */ 1111 mapentry_t *me) /* Map entry list */ 1112 { 1113 buf_t *bp; 1114 crb_t *crb; 1115 caddr_t rva; /* address to read into */ 1116 size_t rnb; /* # of bytes to read */ 1117 off_t lof; /* log device offset to read from */ 1118 off_t skip; 1119 ulong_t actual; 1120 int error; 1121 caddr_t eva = va + nb; /* end of buffer */ 1122 1123 for (; me; me = me->me_agenext) { 1124 ASSERT(me->me_dt != DT_CANCEL); 1125 1126 /* 1127 * check for an cached roll buffer 1128 */ 1129 crb = me->me_crb; 1130 if (crb) { 1131 if (mof > crb->c_mof) { 1132 /* 1133 * This mapentry overlaps with the beginning of 1134 * the supplied buffer 1135 */ 1136 skip = mof - crb->c_mof; 1137 bcopy(crb->c_buf + skip, va, 1138 MIN(nb, crb->c_nb - skip)); 1139 } else { 1140 /* 1141 * This mapentry starts at or after 1142 * the supplied buffer. 1143 */ 1144 skip = crb->c_mof - mof; 1145 bcopy(crb->c_buf, va + skip, 1146 MIN(crb->c_nb, nb - skip)); 1147 } 1148 logstats.ls_lreadsinmem.value.ui64++; 1149 continue; 1150 } 1151 1152 /* 1153 * check for a delta full of zeroes - there's no log data 1154 */ 1155 if (me->me_dt == DT_ABZERO) { 1156 fetchzeroes(va, mof, nb, me); 1157 continue; 1158 } 1159 1160 if (mof > me->me_mof) { 1161 rnb = (size_t)(mof - me->me_mof); 1162 error = logseek(ul, me->me_lof, rnb, &lof); 1163 if (error) 1164 return (EIO); 1165 rva = va; 1166 rnb = me->me_nb - rnb; 1167 rnb = ((rva + rnb) > eva) ? eva - rva : rnb; 1168 } else { 1169 lof = me->me_lof; 1170 rva = (me->me_mof - mof) + va; 1171 rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb; 1172 } 1173 1174 while (rnb) { 1175 bp = get_read_bp(ul, lof); 1176 if (bp->b_flags & B_ERROR) { 1177 sema_v(&bp->b_sem); 1178 return (EIO); 1179 } 1180 ASSERT(((me->me_flags & ME_ROLL) == 0) || 1181 (bp != ul->un_wrbuf.cb_dirty)); 1182 actual = fetchbuf(ul, bp, rva, rnb, &lof); 1183 ASSERT(actual); 1184 rva += actual; 1185 rnb -= actual; 1186 } 1187 } 1188 return (0); 1189 } 1190 1191 void 1192 ldl_savestate(ml_unit_t *ul) 1193 { 1194 int error; 1195 buf_t *bp = ul->un_bp; 1196 ml_odunit_t *ud = (void *)bp->b_un.b_addr; 1197 ml_odunit_t *ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE); 1198 1199 #if DEBUG 1200 /* 1201 * Scan test is running; don't update intermediate state 1202 */ 1203 if (ul->un_logmap && ul->un_logmap->mtm_trimlof) 1204 return; 1205 #endif /* DEBUG */ 1206 1207 mutex_enter(&ul->un_state_mutex); 1208 bcopy(&ul->un_ondisk, ud, sizeof (*ud)); 1209 ud->od_chksum = ud->od_head_ident + ud->od_tail_ident; 1210 bcopy(ud, ud2, sizeof (*ud)); 1211 1212 /* If a snapshot is enabled write through the shapshot driver. */ 1213 if (ul->un_ufsvfs->vfs_snapshot) 1214 UFS_BWRITE2(ul->un_ufsvfs, bp); 1215 else 1216 BWRITE2(bp); 1217 logstats.ls_ldlwrites.value.ui64++; 1218 error = bp->b_flags & B_ERROR; 1219 mutex_exit(&ul->un_state_mutex); 1220 if (error) 1221 ldl_seterror(ul, "Error writing ufs log state"); 1222 } 1223 1224 /* 1225 * The head will be set to (new_lof - header) since ldl_sethead is 1226 * called with the new_lof of the data portion of a delta. 1227 */ 1228 void 1229 ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid) 1230 { 1231 off_t nb; 1232 off_t new_lof; 1233 uint32_t new_ident; 1234 daddr_t beg_blkno; 1235 daddr_t end_blkno; 1236 1237 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1238 1239 if (data_lof == -1) { 1240 /* log is empty */ 1241 new_ident = lufs_hd_genid(ul); 1242 new_lof = ul->un_tail_lof; 1243 1244 } else { 1245 /* compute header's lof */ 1246 new_ident = ul->un_head_ident; 1247 new_lof = data_lof - sizeof (struct delta); 1248 1249 /* whoops, header spans sectors; subtract out sector trailer */ 1250 if (btodb(new_lof) != btodb(data_lof)) 1251 new_lof -= sizeof (sect_trailer_t); 1252 1253 /* whoops, header wrapped the log; go to last sector */ 1254 if (new_lof < ul->un_bol_lof) { 1255 /* sector offset */ 1256 new_lof -= dbtob(btodb(new_lof)); 1257 /* add to last sector's lof */ 1258 new_lof += (ul->un_eol_lof - DEV_BSIZE); 1259 } 1260 ul->un_head_tid = tid; 1261 } 1262 1263 /* 1264 * check for nop 1265 */ 1266 if (new_lof == ul->un_head_lof) 1267 return; 1268 1269 /* 1270 * invalidate the affected bufs and calculate new ident 1271 */ 1272 if (new_lof > ul->un_head_lof) { 1273 nb = new_lof - ul->un_head_lof; 1274 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 1275 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 1276 1277 end_blkno = btodb(new_lof); 1278 beg_blkno = btodb(ul->un_head_lof); 1279 new_ident += (end_blkno - beg_blkno); 1280 } else { 1281 nb = ul->un_eol_lof - ul->un_head_lof; 1282 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 1283 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 1284 1285 end_blkno = btodb(ul->un_eol_lof); 1286 beg_blkno = btodb(ul->un_head_lof); 1287 new_ident += (end_blkno - beg_blkno); 1288 1289 nb = new_lof - ul->un_bol_lof; 1290 inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb); 1291 inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb); 1292 1293 end_blkno = btodb(new_lof); 1294 beg_blkno = btodb(ul->un_bol_lof); 1295 new_ident += (end_blkno - beg_blkno); 1296 } 1297 /* 1298 * don't update the head if there has been an error 1299 */ 1300 if (ul->un_flags & LDL_ERROR) 1301 return; 1302 1303 /* Fix up the head and ident */ 1304 ASSERT(new_lof >= ul->un_bol_lof); 1305 ul->un_head_lof = new_lof; 1306 ul->un_head_ident = new_ident; 1307 if (data_lof == -1) { 1308 ul->un_tail_ident = ul->un_head_ident; 1309 } 1310 1311 1312 /* Commit to the database */ 1313 ldl_savestate(ul); 1314 1315 ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) || 1316 ldl_sethead_debug(ul)); 1317 } 1318 1319 /* 1320 * The tail will be set to the sector following lof+nb 1321 * lof + nb == size of the last delta + commit record 1322 * this function is called once after the log scan has completed. 1323 */ 1324 void 1325 ldl_settail(ml_unit_t *ul, off_t lof, size_t nb) 1326 { 1327 off_t new_lof; 1328 uint32_t new_ident; 1329 daddr_t beg_blkno; 1330 daddr_t end_blkno; 1331 1332 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1333 1334 if (lof == -1) { 1335 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)); 1336 ul->un_head_lof = ul->un_tail_lof; 1337 ul->un_head_ident = lufs_hd_genid(ul); 1338 ul->un_tail_ident = ul->un_head_ident; 1339 1340 /* Commit to the database */ 1341 ldl_savestate(ul); 1342 1343 return; 1344 } 1345 1346 /* 1347 * new_lof is the offset of the sector following the last commit 1348 */ 1349 (void) logseek(ul, lof, nb, &new_lof); 1350 ASSERT(new_lof != dbtob(btodb(ul->un_head_lof))); 1351 1352 /* 1353 * calculate new ident 1354 */ 1355 if (new_lof > ul->un_head_lof) { 1356 end_blkno = btodb(new_lof); 1357 beg_blkno = btodb(ul->un_head_lof); 1358 new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 1359 } else { 1360 end_blkno = btodb(ul->un_eol_lof); 1361 beg_blkno = btodb(ul->un_head_lof); 1362 new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 1363 1364 end_blkno = btodb(new_lof); 1365 beg_blkno = btodb(ul->un_bol_lof); 1366 new_ident += (end_blkno - beg_blkno); 1367 } 1368 1369 /* Fix up the tail and ident */ 1370 ul->un_tail_lof = new_lof; 1371 ul->un_tail_ident = new_ident; 1372 1373 /* Commit to the database */ 1374 ldl_savestate(ul); 1375 } 1376 1377 /* 1378 * LOGSCAN STUFF 1379 */ 1380 static int 1381 ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof) 1382 { 1383 ulong_t ident; 1384 size_t nblk, i; 1385 sect_trailer_t *st; 1386 1387 /* 1388 * compute ident for first sector in the buffer 1389 */ 1390 ident = ul->un_head_ident; 1391 if (bp->b_blkno >= btodb(ul->un_head_lof)) { 1392 ident += (bp->b_blkno - btodb(ul->un_head_lof)); 1393 } else { 1394 ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof)); 1395 ident += (bp->b_blkno - btodb(ul->un_bol_lof)); 1396 } 1397 /* 1398 * truncate the buffer down to the last valid sector 1399 */ 1400 nblk = btodb(bp->b_bcount); 1401 bp->b_bcount = 0; 1402 /* LINTED */ 1403 st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE); 1404 for (i = 0; i < nblk; ++i) { 1405 if (st->st_ident != ident) 1406 break; 1407 1408 /* remember last valid tid for ldl_logscan_error() */ 1409 ul->un_tid = st->st_tid; 1410 1411 /* LINTED */ 1412 st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE); 1413 ++ident; 1414 bp->b_bcount += DEV_BSIZE; 1415 } 1416 /* 1417 * make sure that lof is still within range 1418 */ 1419 return (within_range(lof, bp->b_blkno, bp->b_bcount)); 1420 } 1421 1422 ulong_t 1423 ldl_logscan_nbcommit(off_t lof) 1424 { 1425 /* 1426 * lof is the offset following the commit header. However, 1427 * if the commit header fell on the end-of-sector, then lof 1428 * has already been advanced to the beginning of the next 1429 * sector. So do nothing. Otherwise, return the remaining 1430 * bytes in the sector. 1431 */ 1432 if ((lof & (DEV_BSIZE - 1)) == 0) 1433 return (0); 1434 return (NB_LEFT_IN_SECTOR(lof)); 1435 } 1436 1437 int 1438 ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va) 1439 { 1440 buf_t *bp; 1441 ulong_t actual; 1442 1443 ASSERT(ul->un_head_lof != ul->un_tail_lof); 1444 1445 /* 1446 * Check the log data doesn't go out of bounds 1447 */ 1448 if (ul->un_head_lof < ul->un_tail_lof) { 1449 if (!WITHIN(*lofp, nb, ul->un_head_lof, 1450 (ul->un_tail_lof - ul->un_head_lof))) { 1451 return (EIO); 1452 } 1453 } else { 1454 if (OVERLAP(*lofp, nb, ul->un_tail_lof, 1455 (ul->un_head_lof - ul->un_tail_lof))) { 1456 return (EIO); 1457 } 1458 } 1459 1460 while (nb) { 1461 bp = get_read_bp(ul, *lofp); 1462 if (bp->b_flags & B_ERROR) { 1463 sema_v(&bp->b_sem); 1464 return (EIO); 1465 } 1466 /* 1467 * out-of-seq idents means partial transaction 1468 * panic, non-corrupting powerfail, ... 1469 */ 1470 if (!ldl_logscan_ident(ul, bp, *lofp)) { 1471 sema_v(&bp->b_sem); 1472 return (EIO); 1473 } 1474 /* 1475 * copy the header into the caller's buf 1476 */ 1477 actual = fetchbuf(ul, bp, va, nb, lofp); 1478 if (va) 1479 va += actual; 1480 nb -= actual; 1481 } 1482 return (0); 1483 } 1484 1485 void 1486 ldl_logscan_begin(ml_unit_t *ul) 1487 { 1488 size_t bufsize; 1489 1490 ASSERT(ul->un_wrbuf.cb_dirty == NULL); 1491 1492 /* 1493 * logscan has begun 1494 */ 1495 ul->un_flags |= LDL_SCAN; 1496 1497 /* 1498 * reset the circular bufs 1499 */ 1500 bufsize = ldl_bufsize(ul); 1501 alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize); 1502 alloc_wrbuf(&ul->un_wrbuf, bufsize); 1503 1504 /* 1505 * set the tail to reflect a full log 1506 */ 1507 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE; 1508 1509 if (ul->un_tail_lof < ul->un_bol_lof) 1510 ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE; 1511 if (ul->un_tail_lof >= ul->un_eol_lof) 1512 ul->un_tail_lof = ul->un_bol_lof; 1513 1514 /* 1515 * un_tid is used during error processing; it is initialized to 1516 * the tid of the delta at un_head_lof; 1517 */ 1518 ul->un_tid = ul->un_head_tid; 1519 } 1520 1521 void 1522 ldl_logscan_end(ml_unit_t *ul) 1523 { 1524 size_t bufsize; 1525 1526 /* 1527 * reset the circular bufs 1528 */ 1529 bufsize = ldl_bufsize(ul); 1530 alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE); 1531 alloc_wrbuf(&ul->un_wrbuf, bufsize); 1532 1533 /* 1534 * Done w/scan 1535 */ 1536 ul->un_flags &= ~LDL_SCAN; 1537 } 1538 1539 int 1540 ldl_need_roll(ml_unit_t *ul) 1541 { 1542 off_t busybytes; 1543 off_t head; 1544 off_t tail; 1545 off_t bol; 1546 off_t eol; 1547 off_t nb; 1548 1549 /* 1550 * snapshot the log state 1551 */ 1552 head = ul->un_head_lof; 1553 tail = ul->un_tail_lof; 1554 bol = ul->un_bol_lof; 1555 eol = ul->un_eol_lof; 1556 nb = ul->un_logsize; 1557 1558 /* 1559 * compute number of busy (inuse) bytes 1560 */ 1561 if (head <= tail) 1562 busybytes = tail - head; 1563 else 1564 busybytes = (eol - head) + (tail - bol); 1565 1566 /* 1567 * return TRUE if > 75% full 1568 */ 1569 return (busybytes > (nb - (nb >> 2))); 1570 } 1571 1572 void 1573 ldl_seterror(ml_unit_t *ul, char *why) 1574 { 1575 /* 1576 * already in error state; do nothing 1577 */ 1578 if (ul->un_flags & LDL_ERROR) 1579 return; 1580 1581 ul->un_flags |= LDL_ERROR; /* incore */ 1582 ul->un_badlog = 1; /* ondisk (cleared by fsck) */ 1583 1584 /* 1585 * Commit to state sectors 1586 */ 1587 uniqtime(&ul->un_timestamp); 1588 ldl_savestate(ul); 1589 1590 /* Pretty print */ 1591 cmn_err(CE_WARN, "%s", why); 1592 cmn_err(CE_WARN, "ufs log for %s changed state to Error", 1593 ul->un_ufsvfs->vfs_fs->fs_fsmnt); 1594 cmn_err(CE_WARN, "Please umount(8) %s and run fsck(8)", 1595 ul->un_ufsvfs->vfs_fs->fs_fsmnt); 1596 1597 /* 1598 * If we aren't in the middle of scan (aka snarf); tell ufs 1599 * to hard lock itself. 1600 */ 1601 if ((ul->un_flags & LDL_SCAN) == 0) 1602 ufs_trans_onerror(); 1603 } 1604 1605 size_t 1606 ldl_bufsize(ml_unit_t *ul) 1607 { 1608 size_t bufsize; 1609 extern uint32_t ldl_minbufsize; 1610 1611 /* 1612 * initial guess is the maxtransfer value for this log device 1613 * increase if too small 1614 * decrease if too large 1615 */ 1616 bufsize = dbtob(btod(ul->un_maxtransfer)); 1617 if (bufsize < ldl_minbufsize) 1618 bufsize = ldl_minbufsize; 1619 if (bufsize > maxphys) 1620 bufsize = maxphys; 1621 if (bufsize > ul->un_maxtransfer) 1622 bufsize = ul->un_maxtransfer; 1623 return (bufsize); 1624 } 1625