1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/systm.h> 30 #include <sys/types.h> 31 #include <sys/vnode.h> 32 #include <sys/errno.h> 33 #include <sys/sysmacros.h> 34 #include <sys/debug.h> 35 #include <sys/kmem.h> 36 #include <sys/conf.h> 37 #include <sys/proc.h> 38 #include <sys/cmn_err.h> 39 #include <sys/fssnap_if.h> 40 #include <sys/fs/ufs_inode.h> 41 #include <sys/fs/ufs_filio.h> 42 #include <sys/fs/ufs_log.h> 43 #include <sys/fs/ufs_bio.h> 44 #include <sys/atomic.h> 45 46 extern int maxphys; 47 extern uint_t bypass_snapshot_throttle_key; 48 49 extern struct kmem_cache *lufs_sv; 50 extern struct kmem_cache *lufs_bp; 51 52 static void 53 makebusy(ml_unit_t *ul, buf_t *bp) 54 { 55 sema_p(&bp->b_sem); 56 if ((bp->b_flags & B_ERROR) == 0) 57 return; 58 if (bp->b_flags & B_READ) 59 ldl_seterror(ul, "Error reading ufs log"); 60 else 61 ldl_seterror(ul, "Error writing ufs log"); 62 } 63 64 static int 65 logdone(buf_t *bp) 66 { 67 bp->b_flags |= B_DONE; 68 69 if (bp->b_flags & B_WRITE) 70 sema_v(&bp->b_sem); 71 else 72 /* wakeup the thread waiting on this buf */ 73 sema_v(&bp->b_io); 74 return (0); 75 } 76 77 static int 78 ldl_strategy_done(buf_t *cb) 79 { 80 lufs_save_t *sv; 81 lufs_buf_t *lbp; 82 buf_t *bp; 83 84 ASSERT(SEMA_HELD(&cb->b_sem)); 85 ASSERT((cb->b_flags & B_DONE) == 0); 86 87 /* 88 * Compute address of the ``save'' struct 89 */ 90 lbp = (lufs_buf_t *)cb; 91 sv = (lufs_save_t *)lbp->lb_ptr; 92 93 if (cb->b_flags & B_ERROR) 94 sv->sv_error = 1; 95 96 /* 97 * If this is the last request, release the resources and 98 * ``done'' the original buffer header. 99 */ 100 if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) { 101 kmem_cache_free(lufs_bp, lbp); 102 return (1); 103 } 104 /* Propagate any errors back to the original buffer header */ 105 bp = sv->sv_bp; 106 if (sv->sv_error) 107 bp->b_flags |= B_ERROR; 108 kmem_cache_free(lufs_bp, lbp); 109 kmem_cache_free(lufs_sv, sv); 110 111 biodone(bp); 112 return (0); 113 } 114 115 /* 116 * Map the log logical block number to a physical disk block number 117 */ 118 static int 119 map_frag( 120 ml_unit_t *ul, 121 daddr_t lblkno, 122 size_t bcount, 123 daddr_t *pblkno, 124 size_t *pbcount) 125 { 126 ic_extent_t *ext = ul->un_ebp->ic_extents; 127 uint32_t e = ul->un_ebp->ic_nextents; 128 uint32_t s = 0; 129 uint32_t i = e >> 1; 130 uint32_t lasti = i; 131 uint32_t bno_off; 132 133 again: 134 if (ext[i].ic_lbno <= lblkno) { 135 if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) { 136 /* FOUND IT */ 137 bno_off = lblkno - (uint32_t)ext[i].ic_lbno; 138 *pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off)); 139 *pblkno = ext[i].ic_pbno + bno_off; 140 return (0); 141 } else 142 s = i; 143 } else 144 e = i; 145 i = s + ((e - s) >> 1); 146 147 if (i == lasti) { 148 *pbcount = bcount; 149 return (ENOENT); 150 } 151 lasti = i; 152 153 goto again; 154 } 155 156 /* 157 * The log is a set of extents (which typically will be only one, but 158 * may be more if the disk was close to full when the log was created) 159 * and hence the logical offsets into the log 160 * have to be translated into their real device locations before 161 * calling the device's strategy routine. The translation may result 162 * in several IO requests if this request spans extents. 163 */ 164 void 165 ldl_strategy(ml_unit_t *ul, buf_t *pb) 166 { 167 lufs_save_t *sv; 168 lufs_buf_t *lbp; 169 buf_t *cb; 170 ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 171 daddr_t lblkno, pblkno; 172 size_t nb_left, pbcount; 173 off_t offset; 174 dev_t dev = ul->un_dev; 175 int error; 176 int read = pb->b_flags & B_READ; 177 178 /* 179 * Allocate and initialise the save stucture, 180 */ 181 sv = kmem_cache_alloc(lufs_sv, KM_SLEEP); 182 sv->sv_error = 0; 183 sv->sv_bp = pb; 184 nb_left = pb->b_bcount; 185 sv->sv_nb_left = nb_left; 186 187 lblkno = pb->b_blkno; 188 offset = 0; 189 190 do { 191 error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount); 192 193 lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP); 194 bioinit(&lbp->lb_buf); 195 lbp->lb_ptr = sv; 196 197 cb = bioclone(pb, offset, pbcount, dev, 198 pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP); 199 200 offset += pbcount; 201 lblkno += btodb(pbcount); 202 nb_left -= pbcount; 203 204 if (error) { 205 cb->b_flags |= B_ERROR; 206 cb->b_resid = cb->b_bcount; 207 biodone(cb); 208 } else { 209 if (read) { 210 logstats.ls_ldlreads.value.ui64++; 211 ufsvfsp->vfs_iotstamp = lbolt; 212 lwp_stat_update(LWP_STAT_INBLK, 1); 213 } else { 214 logstats.ls_ldlwrites.value.ui64++; 215 lwp_stat_update(LWP_STAT_OUBLK, 1); 216 } 217 218 /* 219 * write through the snapshot driver if necessary 220 * We do not want this write to be throttled because 221 * we are holding the un_log mutex here. If we 222 * are throttled in fssnap_translate, the fssnap_taskq 223 * thread which can wake us up can get blocked on 224 * the un_log mutex resulting in a deadlock. 225 */ 226 if (ufsvfsp->vfs_snapshot) { 227 (void) tsd_set(bypass_snapshot_throttle_key, \ 228 (void *)1); 229 fssnap_strategy(&ufsvfsp->vfs_snapshot, cb); 230 231 (void) tsd_set(bypass_snapshot_throttle_key, \ 232 (void *)0); 233 } else { 234 (void) bdev_strategy(cb); 235 } 236 } 237 238 } while (nb_left); 239 } 240 241 static void 242 writelog(ml_unit_t *ul, buf_t *bp) 243 { 244 ASSERT(SEMA_HELD(&bp->b_sem)); 245 246 /* 247 * This is really an B_ASYNC write but we want Presto to 248 * cache this write. The iodone routine, logdone, processes 249 * the buf correctly. 250 */ 251 bp->b_flags = B_WRITE; 252 bp->b_edev = ul->un_dev; 253 bp->b_iodone = logdone; 254 255 /* 256 * return EIO for every IO if in hard error state 257 */ 258 if (ul->un_flags & LDL_ERROR) { 259 bp->b_flags |= B_ERROR; 260 bp->b_error = EIO; 261 biodone(bp); 262 return; 263 } 264 265 ldl_strategy(ul, bp); 266 } 267 268 static void 269 readlog(ml_unit_t *ul, buf_t *bp) 270 { 271 ASSERT(SEMA_HELD(&bp->b_sem)); 272 ASSERT(bp->b_bcount); 273 274 bp->b_flags = B_READ; 275 bp->b_edev = ul->un_dev; 276 bp->b_iodone = logdone; 277 278 /* all IO returns errors when in error state */ 279 if (ul->un_flags & LDL_ERROR) { 280 bp->b_flags |= B_ERROR; 281 bp->b_error = EIO; 282 biodone(bp); 283 (void) trans_wait(bp); 284 return; 285 } 286 287 ldl_strategy(ul, bp); 288 289 if (trans_wait(bp)) 290 ldl_seterror(ul, "Error reading ufs log"); 291 } 292 293 /* 294 * NOTE: writers are single threaded thru the log layer. 295 * This means we can safely reference and change the cb and bp fields 296 * that ldl_read does not reference w/o holding the cb_rwlock or 297 * the bp makebusy lock. 298 */ 299 static void 300 push_dirty_bp(ml_unit_t *ul, buf_t *bp) 301 { 302 buf_t *newbp; 303 cirbuf_t *cb = &ul->un_wrbuf; 304 305 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 306 ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0); 307 308 /* 309 * async write the buf 310 */ 311 writelog(ul, bp); 312 313 /* 314 * no longer filling any buf 315 */ 316 cb->cb_dirty = NULL; 317 318 /* 319 * no extra buffer space; all done 320 */ 321 if (bp->b_bcount == bp->b_bufsize) 322 return; 323 324 /* 325 * give extra buffer space to a new bp 326 * try to take buf off of free list 327 */ 328 if ((newbp = cb->cb_free) != NULL) { 329 cb->cb_free = newbp->b_forw; 330 } else { 331 newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 332 sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 333 sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 334 } 335 newbp->b_flags = 0; 336 newbp->b_bcount = 0; 337 newbp->b_file = NULL; 338 newbp->b_offset = -1; 339 newbp->b_bufsize = bp->b_bufsize - bp->b_bcount; 340 newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount; 341 bp->b_bufsize = bp->b_bcount; 342 343 /* 344 * lock out readers and put new buf at LRU position 345 */ 346 rw_enter(&cb->cb_rwlock, RW_WRITER); 347 newbp->b_forw = bp->b_forw; 348 newbp->b_back = bp; 349 bp->b_forw->b_back = newbp; 350 bp->b_forw = newbp; 351 rw_exit(&cb->cb_rwlock); 352 } 353 354 static void 355 inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb) 356 { 357 buf_t *bp; 358 off_t elof = lof + nb; 359 off_t buflof; 360 off_t bufelof; 361 362 /* 363 * discard all bufs that overlap the range (lof, lof + nb) 364 */ 365 rw_enter(&cb->cb_rwlock, RW_WRITER); 366 bp = cb->cb_bp; 367 do { 368 if (bp == cb->cb_dirty || bp->b_bcount == 0) { 369 bp = bp->b_forw; 370 continue; 371 } 372 buflof = dbtob(bp->b_blkno); 373 bufelof = buflof + bp->b_bcount; 374 if ((buflof < lof && bufelof <= lof) || 375 (buflof >= elof && bufelof > elof)) { 376 bp = bp->b_forw; 377 continue; 378 } 379 makebusy(ul, bp); 380 bp->b_flags = 0; 381 bp->b_bcount = 0; 382 sema_v(&bp->b_sem); 383 bp = bp->b_forw; 384 } while (bp != cb->cb_bp); 385 rw_exit(&cb->cb_rwlock); 386 } 387 388 /* 389 * NOTE: writers are single threaded thru the log layer. 390 * This means we can safely reference and change the cb and bp fields 391 * that ldl_read does not reference w/o holding the cb_rwlock or 392 * the bp makebusy lock. 393 */ 394 static buf_t * 395 get_write_bp(ml_unit_t *ul) 396 { 397 cirbuf_t *cb = &ul->un_wrbuf; 398 buf_t *bp; 399 400 /* 401 * cb_dirty is the buffer we are currently filling; if any 402 */ 403 if ((bp = cb->cb_dirty) != NULL) { 404 makebusy(ul, bp); 405 return (bp); 406 } 407 /* 408 * discard any bp that overlaps the current tail since we are 409 * about to overwrite it. 410 */ 411 inval_range(ul, cb, ul->un_tail_lof, 1); 412 413 /* 414 * steal LRU buf 415 */ 416 rw_enter(&cb->cb_rwlock, RW_WRITER); 417 bp = cb->cb_bp->b_forw; 418 makebusy(ul, bp); 419 420 cb->cb_dirty = bp; 421 cb->cb_bp = bp; 422 423 bp->b_flags = 0; 424 bp->b_bcount = 0; 425 bp->b_blkno = btodb(ul->un_tail_lof); 426 ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof); 427 rw_exit(&cb->cb_rwlock); 428 429 /* 430 * NOTE: 431 * 1. un_tail_lof never addresses >= un_eol_lof 432 * 2. b_blkno + btodb(b_bufsize) may > un_eol_lof 433 * this case is handled in storebuf 434 */ 435 return (bp); 436 } 437 438 void 439 alloc_wrbuf(cirbuf_t *cb, size_t bufsize) 440 { 441 int i; 442 buf_t *bp; 443 444 /* 445 * Clear previous allocation 446 */ 447 if (cb->cb_nb) 448 free_cirbuf(cb); 449 450 bzero(cb, sizeof (*cb)); 451 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 452 453 rw_enter(&cb->cb_rwlock, RW_WRITER); 454 455 /* 456 * preallocate 3 bp's and put them on the free list. 457 */ 458 for (i = 0; i < 3; ++i) { 459 bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 460 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 461 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 462 bp->b_offset = -1; 463 bp->b_forw = cb->cb_free; 464 cb->cb_free = bp; 465 } 466 467 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 468 cb->cb_nb = bufsize; 469 470 /* 471 * first bp claims entire write buffer 472 */ 473 bp = cb->cb_free; 474 cb->cb_free = bp->b_forw; 475 476 bp->b_forw = bp; 477 bp->b_back = bp; 478 cb->cb_bp = bp; 479 bp->b_un.b_addr = cb->cb_va; 480 bp->b_bufsize = cb->cb_nb; 481 482 rw_exit(&cb->cb_rwlock); 483 } 484 485 void 486 alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize) 487 { 488 caddr_t va; 489 size_t nb; 490 buf_t *bp; 491 492 /* 493 * Clear previous allocation 494 */ 495 if (cb->cb_nb) 496 free_cirbuf(cb); 497 498 bzero(cb, sizeof (*cb)); 499 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 500 501 rw_enter(&cb->cb_rwlock, RW_WRITER); 502 503 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 504 cb->cb_nb = bufsize; 505 506 /* 507 * preallocate N bufs that are hard-sized to blksize 508 * in other words, the read buffer pool is a linked list 509 * of statically sized bufs. 510 */ 511 va = cb->cb_va; 512 while ((nb = bufsize) != 0) { 513 if (nb > blksize) 514 nb = blksize; 515 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP); 516 bzero(bp, sizeof (buf_t)); 517 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 518 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 519 bp->b_un.b_addr = va; 520 bp->b_bufsize = nb; 521 if (cb->cb_bp) { 522 bp->b_forw = cb->cb_bp->b_forw; 523 bp->b_back = cb->cb_bp; 524 cb->cb_bp->b_forw->b_back = bp; 525 cb->cb_bp->b_forw = bp; 526 } else 527 bp->b_forw = bp->b_back = bp; 528 cb->cb_bp = bp; 529 bufsize -= nb; 530 va += nb; 531 } 532 533 rw_exit(&cb->cb_rwlock); 534 } 535 536 void 537 free_cirbuf(cirbuf_t *cb) 538 { 539 buf_t *bp; 540 541 if (cb->cb_nb == 0) 542 return; 543 544 rw_enter(&cb->cb_rwlock, RW_WRITER); 545 ASSERT(cb->cb_dirty == NULL); 546 547 /* 548 * free the active bufs 549 */ 550 while ((bp = cb->cb_bp) != NULL) { 551 if (bp == bp->b_forw) 552 cb->cb_bp = NULL; 553 else 554 cb->cb_bp = bp->b_forw; 555 bp->b_back->b_forw = bp->b_forw; 556 bp->b_forw->b_back = bp->b_back; 557 sema_destroy(&bp->b_sem); 558 sema_destroy(&bp->b_io); 559 kmem_free(bp, sizeof (buf_t)); 560 } 561 562 /* 563 * free the free bufs 564 */ 565 while ((bp = cb->cb_free) != NULL) { 566 cb->cb_free = bp->b_forw; 567 sema_destroy(&bp->b_sem); 568 sema_destroy(&bp->b_io); 569 kmem_free(bp, sizeof (buf_t)); 570 } 571 kmem_free(cb->cb_va, cb->cb_nb); 572 cb->cb_va = NULL; 573 cb->cb_nb = 0; 574 rw_exit(&cb->cb_rwlock); 575 rw_destroy(&cb->cb_rwlock); 576 } 577 578 static int 579 within_range(off_t lof, daddr_t blkno, ulong_t bcount) 580 { 581 off_t blof = dbtob(blkno); 582 583 return ((lof >= blof) && (lof < (blof + bcount))); 584 } 585 586 static buf_t * 587 find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 588 { 589 buf_t *bp; 590 591 /* 592 * find a buf that contains the offset lof 593 */ 594 rw_enter(&cb->cb_rwlock, RW_READER); 595 bp = cb->cb_bp; 596 do { 597 if (bp->b_bcount && 598 within_range(lof, bp->b_blkno, bp->b_bcount)) { 599 makebusy(ul, bp); 600 rw_exit(&cb->cb_rwlock); 601 return (bp); 602 } 603 bp = bp->b_forw; 604 } while (bp != cb->cb_bp); 605 rw_exit(&cb->cb_rwlock); 606 607 return (NULL); 608 } 609 610 static off_t 611 find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 612 { 613 buf_t *bp, *bpend; 614 off_t rlof; 615 616 /* 617 * we mustn't: 618 * o read past eol 619 * o read past the tail 620 * o read data that may be being written. 621 */ 622 rw_enter(&cb->cb_rwlock, RW_READER); 623 bpend = bp = cb->cb_bp->b_forw; 624 rlof = ul->un_tail_lof; 625 do { 626 if (bp->b_bcount) { 627 rlof = dbtob(bp->b_blkno); 628 break; 629 } 630 bp = bp->b_forw; 631 } while (bp != bpend); 632 rw_exit(&cb->cb_rwlock); 633 634 if (lof <= rlof) 635 /* lof is prior to the range represented by the write buf */ 636 return (rlof); 637 else 638 /* lof follows the range represented by the write buf */ 639 return ((off_t)ul->un_eol_lof); 640 } 641 642 static buf_t * 643 get_read_bp(ml_unit_t *ul, off_t lof) 644 { 645 cirbuf_t *cb; 646 buf_t *bp; 647 off_t rlof; 648 649 /* 650 * retrieve as much data as possible from the incore buffers 651 */ 652 if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) { 653 logstats.ls_lreadsinmem.value.ui64++; 654 return (bp); 655 } 656 if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) { 657 logstats.ls_lreadsinmem.value.ui64++; 658 return (bp); 659 } 660 661 /* 662 * steal the LRU buf 663 */ 664 cb = &ul->un_rdbuf; 665 rw_enter(&cb->cb_rwlock, RW_WRITER); 666 bp = cb->cb_bp->b_forw; 667 makebusy(ul, bp); 668 bp->b_flags = 0; 669 bp->b_bcount = 0; 670 cb->cb_bp = bp; 671 rw_exit(&cb->cb_rwlock); 672 673 /* 674 * don't read past the tail or the end-of-log 675 */ 676 bp->b_blkno = btodb(lof); 677 lof = dbtob(bp->b_blkno); 678 rlof = find_read_lof(ul, &ul->un_wrbuf, lof); 679 bp->b_bcount = MIN(bp->b_bufsize, rlof - lof); 680 readlog(ul, bp); 681 return (bp); 682 } 683 684 /* 685 * NOTE: writers are single threaded thru the log layer. 686 * This means we can safely reference and change the cb and bp fields 687 * that ldl_read does not reference w/o holding the cb_rwlock or 688 * the bp makebusy lock. 689 */ 690 static int 691 extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp) 692 { 693 buf_t *bpforw = bp->b_forw; 694 695 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 696 697 /* 698 * there is no `next' bp; do nothing 699 */ 700 if (bpforw == bp) 701 return (0); 702 703 /* 704 * buffer space is not adjacent; do nothing 705 */ 706 if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr) 707 return (0); 708 709 /* 710 * locking protocol requires giving up any bp locks before 711 * acquiring cb_rwlock. This is okay because we hold 712 * un_log_mutex. 713 */ 714 sema_v(&bp->b_sem); 715 716 /* 717 * lock out ldl_read 718 */ 719 rw_enter(&cb->cb_rwlock, RW_WRITER); 720 721 /* 722 * wait for current IO to finish w/next bp; if necessary 723 */ 724 makebusy(ul, bpforw); 725 726 /* 727 * free the next bp and steal its space 728 */ 729 bp->b_forw = bpforw->b_forw; 730 bpforw->b_forw->b_back = bp; 731 bp->b_bufsize += bpforw->b_bufsize; 732 sema_v(&bpforw->b_sem); 733 bpforw->b_forw = cb->cb_free; 734 cb->cb_free = bpforw; 735 makebusy(ul, bp); 736 rw_exit(&cb->cb_rwlock); 737 738 return (1); 739 } 740 741 static size_t 742 storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb) 743 { 744 size_t copy_nb; 745 size_t nb_in_sec; 746 sect_trailer_t *st; 747 size_t nb_left = nb; 748 cirbuf_t *cb = &ul->un_wrbuf; 749 750 again: 751 nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount); 752 copy_nb = MIN(nb_left, nb_in_sec); 753 754 ASSERT(copy_nb); 755 756 bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb); 757 bp->b_bcount += copy_nb; 758 va += copy_nb; 759 nb_left -= copy_nb; 760 ul->un_tail_lof += copy_nb; 761 762 if ((nb_in_sec -= copy_nb) == 0) { 763 st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount); 764 765 st->st_tid = ul->un_logmap->mtm_tid; 766 st->st_ident = ul->un_tail_ident++; 767 bp->b_bcount += sizeof (sect_trailer_t); 768 ul->un_tail_lof += sizeof (sect_trailer_t); 769 /* 770 * log wrapped; async write this bp 771 */ 772 if (ul->un_tail_lof == ul->un_eol_lof) { 773 ul->un_tail_lof = ul->un_bol_lof; 774 push_dirty_bp(ul, bp); 775 return (nb - nb_left); 776 } 777 /* 778 * out of bp space; get more or async write buf 779 */ 780 if (bp->b_bcount == bp->b_bufsize) { 781 if (!extend_write_bp(ul, cb, bp)) { 782 push_dirty_bp(ul, bp); 783 return (nb - nb_left); 784 } 785 } 786 } 787 if (nb_left) 788 goto again; 789 790 sema_v(&bp->b_sem); 791 return (nb); 792 } 793 794 static void 795 fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me) 796 { 797 offset_t src_mof = me->me_mof; 798 size_t src_nb = me->me_nb; 799 800 if (src_mof > dst_mof) { 801 ASSERT(src_mof < (dst_mof + dst_nb)); 802 dst_va += (src_mof - dst_mof); 803 dst_nb -= (src_mof - dst_mof); 804 } else { 805 ASSERT(dst_mof < (src_mof + src_nb)); 806 src_nb -= (dst_mof - src_mof); 807 } 808 809 src_nb = MIN(src_nb, dst_nb); 810 ASSERT(src_nb); 811 bzero(dst_va, src_nb); 812 } 813 814 /* 815 * dst_va == NULL means don't copy anything 816 */ 817 static ulong_t 818 fetchbuf( 819 ml_unit_t *ul, 820 buf_t *bp, 821 caddr_t dst_va, 822 size_t dst_nb, 823 off_t *dst_lofp) 824 { 825 caddr_t copy_va; 826 size_t copy_nb; 827 size_t nb_sec; 828 off_t dst_lof = *dst_lofp; 829 ulong_t sav_dst_nb = dst_nb; 830 ulong_t src_nb = bp->b_bcount; 831 off_t src_lof = dbtob(bp->b_blkno); 832 off_t src_elof = src_lof + src_nb; 833 caddr_t src_va = bp->b_un.b_addr; 834 835 /* 836 * copy from bp to dst_va 837 */ 838 while (dst_nb) { 839 /* 840 * compute address within bp 841 */ 842 copy_va = src_va + (dst_lof - src_lof); 843 844 /* 845 * adjust copy size to amount of data in bp 846 */ 847 copy_nb = MIN(dst_nb, src_elof - dst_lof); 848 849 /* 850 * adjust copy size to amount of data in sector 851 */ 852 nb_sec = NB_LEFT_IN_SECTOR(dst_lof); 853 copy_nb = MIN(copy_nb, nb_sec); 854 855 /* 856 * dst_va == NULL means don't do copy (see logseek()) 857 */ 858 if (dst_va) { 859 bcopy(copy_va, dst_va, copy_nb); 860 dst_va += copy_nb; 861 } 862 dst_lof += copy_nb; 863 dst_nb -= copy_nb; 864 nb_sec -= copy_nb; 865 866 /* 867 * advance over sector trailer 868 */ 869 if (nb_sec == 0) 870 dst_lof += sizeof (sect_trailer_t); 871 872 /* 873 * exhausted buffer 874 * return current lof for next read 875 */ 876 if (dst_lof == src_elof) { 877 sema_v(&bp->b_sem); 878 if (dst_lof == ul->un_eol_lof) 879 dst_lof = ul->un_bol_lof; 880 *dst_lofp = dst_lof; 881 return (sav_dst_nb - dst_nb); 882 } 883 } 884 885 /* 886 * copy complete - return current lof 887 */ 888 sema_v(&bp->b_sem); 889 *dst_lofp = dst_lof; 890 return (sav_dst_nb); 891 } 892 893 void 894 ldl_round_commit(ml_unit_t *ul) 895 { 896 int wrapped; 897 buf_t *bp; 898 sect_trailer_t *st; 899 size_t bcount; 900 cirbuf_t *cb = &ul->un_wrbuf; 901 902 /* 903 * if nothing to write; then do nothing 904 */ 905 if ((bp = cb->cb_dirty) == NULL) 906 return; 907 makebusy(ul, bp); 908 909 /* 910 * round up to sector boundary and set new tail 911 * don't readjust st_ident if buf is already rounded 912 */ 913 bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE); 914 if (bcount == bp->b_bcount) { 915 sema_v(&bp->b_sem); 916 return; 917 } 918 bp->b_bcount = bcount; 919 ul->un_tail_lof = dbtob(bp->b_blkno) + bcount; 920 wrapped = 0; 921 if (ul->un_tail_lof == ul->un_eol_lof) { 922 ul->un_tail_lof = ul->un_bol_lof; 923 ++wrapped; 924 } 925 ASSERT(ul->un_tail_lof != ul->un_head_lof); 926 927 /* 928 * fix up the sector trailer 929 */ 930 /* LINTED */ 931 st = (sect_trailer_t *) 932 ((bp->b_un.b_addr + bcount) - sizeof (*st)); 933 st->st_tid = ul->un_logmap->mtm_tid; 934 st->st_ident = ul->un_tail_ident++; 935 936 /* 937 * if tail wrapped or we have exhausted this buffer 938 * async write the buffer 939 */ 940 if (wrapped || bcount == bp->b_bufsize) 941 push_dirty_bp(ul, bp); 942 else 943 sema_v(&bp->b_sem); 944 } 945 946 void 947 ldl_push_commit(ml_unit_t *ul) 948 { 949 buf_t *bp; 950 cirbuf_t *cb = &ul->un_wrbuf; 951 952 /* 953 * if nothing to write; then do nothing 954 */ 955 if ((bp = cb->cb_dirty) == NULL) 956 return; 957 makebusy(ul, bp); 958 push_dirty_bp(ul, bp); 959 } 960 961 int 962 ldl_need_commit(ml_unit_t *ul) 963 { 964 return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2))); 965 } 966 967 int 968 ldl_has_space(ml_unit_t *ul, mapentry_t *me) 969 { 970 off_t nfb; 971 off_t nb; 972 973 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 974 975 /* 976 * Add up the size used by the deltas 977 * round nb up to a sector length plus an extra sector 978 * w/o the extra sector we couldn't distinguish 979 * a full log (head == tail) from an empty log (head == tail) 980 */ 981 for (nb = DEV_BSIZE; me; me = me->me_hash) { 982 nb += sizeof (struct delta); 983 if (me->me_dt != DT_CANCEL) 984 nb += me->me_nb; 985 } 986 nb = P2ROUNDUP(nb, DEV_BSIZE); 987 988 if (ul->un_head_lof <= ul->un_tail_lof) 989 nfb = (ul->un_head_lof - ul->un_bol_lof) + 990 (ul->un_eol_lof - ul->un_tail_lof); 991 else 992 nfb = ul->un_head_lof - ul->un_tail_lof; 993 994 return (nb < nfb); 995 } 996 997 void 998 ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me) 999 { 1000 buf_t *bp; 1001 caddr_t va; 1002 size_t nb; 1003 size_t actual; 1004 1005 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1006 1007 /* Write the delta */ 1008 1009 nb = sizeof (struct delta); 1010 va = (caddr_t)&me->me_delta; 1011 bp = get_write_bp(ul); 1012 1013 while (nb) { 1014 if (ul->un_flags & LDL_ERROR) { 1015 sema_v(&bp->b_sem); 1016 return; 1017 } 1018 actual = storebuf(ul, bp, va, nb); 1019 ASSERT(actual); 1020 va += actual; 1021 nb -= actual; 1022 if (nb) 1023 bp = get_write_bp(ul); 1024 } 1025 1026 /* If a commit, cancel, or 0's; we're almost done */ 1027 switch (me->me_dt) { 1028 case DT_COMMIT: 1029 case DT_CANCEL: 1030 case DT_ABZERO: 1031 /* roll needs to know where the next delta will go */ 1032 me->me_lof = ul->un_tail_lof; 1033 return; 1034 default: 1035 break; 1036 } 1037 1038 /* Now write the data */ 1039 1040 ASSERT(me->me_nb != 0); 1041 1042 nb = me->me_nb; 1043 va = (me->me_mof - bufmof) + bufp; 1044 bp = get_write_bp(ul); 1045 1046 /* Save where we will put the data */ 1047 me->me_lof = ul->un_tail_lof; 1048 1049 while (nb) { 1050 if (ul->un_flags & LDL_ERROR) { 1051 sema_v(&bp->b_sem); 1052 return; 1053 } 1054 actual = storebuf(ul, bp, va, nb); 1055 ASSERT(actual); 1056 va += actual; 1057 nb -= actual; 1058 if (nb) 1059 bp = get_write_bp(ul); 1060 } 1061 } 1062 1063 void 1064 ldl_waito(ml_unit_t *ul) 1065 { 1066 buf_t *bp; 1067 cirbuf_t *cb = &ul->un_wrbuf; 1068 1069 rw_enter(&cb->cb_rwlock, RW_WRITER); 1070 /* 1071 * wait on them 1072 */ 1073 bp = cb->cb_bp; 1074 do { 1075 if ((bp->b_flags & B_DONE) == 0) { 1076 makebusy(ul, bp); 1077 sema_v(&bp->b_sem); 1078 } 1079 bp = bp->b_forw; 1080 } while (bp != cb->cb_bp); 1081 rw_exit(&cb->cb_rwlock); 1082 } 1083 1084 /* 1085 * seek nb bytes from location lof 1086 */ 1087 static int 1088 logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp) 1089 { 1090 buf_t *bp; 1091 ulong_t actual; 1092 1093 while (nb) { 1094 bp = get_read_bp(ul, lof); 1095 if (bp->b_flags & B_ERROR) { 1096 sema_v(&bp->b_sem); 1097 return (EIO); 1098 } 1099 actual = fetchbuf(ul, bp, NULL, nb, &lof); 1100 ASSERT(actual); 1101 nb -= actual; 1102 } 1103 *lofp = lof; 1104 ASSERT(nb == 0); 1105 return (0); 1106 } 1107 1108 int 1109 ldl_read( 1110 ml_unit_t *ul, /* Log unit */ 1111 caddr_t va, /* address of buffer to read into */ 1112 offset_t mof, /* mof of buffer */ 1113 off_t nb, /* length of buffer */ 1114 mapentry_t *me) /* Map entry list */ 1115 { 1116 buf_t *bp; 1117 crb_t *crb; 1118 caddr_t rva; /* address to read into */ 1119 size_t rnb; /* # of bytes to read */ 1120 off_t lof; /* log device offset to read from */ 1121 off_t skip; 1122 ulong_t actual; 1123 int error; 1124 caddr_t eva = va + nb; /* end of buffer */ 1125 1126 for (; me; me = me->me_agenext) { 1127 ASSERT(me->me_dt != DT_CANCEL); 1128 1129 /* 1130 * check for an cached roll buffer 1131 */ 1132 crb = me->me_crb; 1133 if (crb) { 1134 if (mof > crb->c_mof) { 1135 /* 1136 * This mapentry overlaps with the beginning of 1137 * the supplied buffer 1138 */ 1139 skip = mof - crb->c_mof; 1140 bcopy(crb->c_buf + skip, va, 1141 MIN(nb, crb->c_nb - skip)); 1142 } else { 1143 /* 1144 * This mapentry starts at or after 1145 * the supplied buffer. 1146 */ 1147 skip = crb->c_mof - mof; 1148 bcopy(crb->c_buf, va + skip, 1149 MIN(crb->c_nb, nb - skip)); 1150 } 1151 logstats.ls_lreadsinmem.value.ui64++; 1152 continue; 1153 } 1154 1155 /* 1156 * check for a delta full of zeroes - there's no log data 1157 */ 1158 if (me->me_dt == DT_ABZERO) { 1159 fetchzeroes(va, mof, nb, me); 1160 continue; 1161 } 1162 1163 if (mof > me->me_mof) { 1164 rnb = (size_t)(mof - me->me_mof); 1165 error = logseek(ul, me->me_lof, rnb, &lof); 1166 if (error) 1167 return (EIO); 1168 rva = va; 1169 rnb = me->me_nb - rnb; 1170 rnb = ((rva + rnb) > eva) ? eva - rva : rnb; 1171 } else { 1172 lof = me->me_lof; 1173 rva = (me->me_mof - mof) + va; 1174 rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb; 1175 } 1176 1177 while (rnb) { 1178 bp = get_read_bp(ul, lof); 1179 if (bp->b_flags & B_ERROR) { 1180 sema_v(&bp->b_sem); 1181 return (EIO); 1182 } 1183 ASSERT(((me->me_flags & ME_ROLL) == 0) || 1184 (bp != ul->un_wrbuf.cb_dirty)); 1185 actual = fetchbuf(ul, bp, rva, rnb, &lof); 1186 ASSERT(actual); 1187 rva += actual; 1188 rnb -= actual; 1189 } 1190 } 1191 return (0); 1192 } 1193 1194 void 1195 ldl_savestate(ml_unit_t *ul) 1196 { 1197 int error; 1198 buf_t *bp = ul->un_bp; 1199 ml_odunit_t *ud = (void *)bp->b_un.b_addr; 1200 ml_odunit_t *ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE); 1201 1202 #if DEBUG 1203 /* 1204 * Scan test is running; don't update intermediate state 1205 */ 1206 if (ul->un_logmap && ul->un_logmap->mtm_trimlof) 1207 return; 1208 #endif /* DEBUG */ 1209 1210 mutex_enter(&ul->un_state_mutex); 1211 bcopy(&ul->un_ondisk, ud, sizeof (*ud)); 1212 ud->od_chksum = ud->od_head_ident + ud->od_tail_ident; 1213 bcopy(ud, ud2, sizeof (*ud)); 1214 1215 /* If a snapshot is enabled write through the shapshot driver. */ 1216 if (ul->un_ufsvfs->vfs_snapshot) 1217 UFS_BWRITE2(ul->un_ufsvfs, bp); 1218 else 1219 BWRITE2(bp); 1220 logstats.ls_ldlwrites.value.ui64++; 1221 error = bp->b_flags & B_ERROR; 1222 mutex_exit(&ul->un_state_mutex); 1223 if (error) 1224 ldl_seterror(ul, "Error writing ufs log state"); 1225 } 1226 1227 /* 1228 * The head will be set to (new_lof - header) since ldl_sethead is 1229 * called with the new_lof of the data portion of a delta. 1230 */ 1231 void 1232 ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid) 1233 { 1234 off_t nb; 1235 off_t new_lof; 1236 uint32_t new_ident; 1237 daddr_t beg_blkno; 1238 daddr_t end_blkno; 1239 struct timeval tv; 1240 1241 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1242 1243 if (data_lof == -1) { 1244 /* log is empty */ 1245 uniqtime(&tv); 1246 if (tv.tv_usec == ul->un_head_ident) { 1247 tv.tv_usec++; 1248 } 1249 last_loghead_ident = tv.tv_usec; 1250 new_ident = tv.tv_usec; 1251 new_lof = ul->un_tail_lof; 1252 1253 } else { 1254 /* compute header's lof */ 1255 new_ident = ul->un_head_ident; 1256 new_lof = data_lof - sizeof (struct delta); 1257 1258 /* whoops, header spans sectors; subtract out sector trailer */ 1259 if (btodb(new_lof) != btodb(data_lof)) 1260 new_lof -= sizeof (sect_trailer_t); 1261 1262 /* whoops, header wrapped the log; go to last sector */ 1263 if (new_lof < ul->un_bol_lof) { 1264 /* sector offset */ 1265 new_lof -= dbtob(btodb(new_lof)); 1266 /* add to last sector's lof */ 1267 new_lof += (ul->un_eol_lof - DEV_BSIZE); 1268 } 1269 ul->un_head_tid = tid; 1270 } 1271 1272 /* 1273 * check for nop 1274 */ 1275 if (new_lof == ul->un_head_lof) 1276 return; 1277 1278 /* 1279 * invalidate the affected bufs and calculate new ident 1280 */ 1281 if (new_lof > ul->un_head_lof) { 1282 nb = new_lof - ul->un_head_lof; 1283 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 1284 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 1285 1286 end_blkno = btodb(new_lof); 1287 beg_blkno = btodb(ul->un_head_lof); 1288 new_ident += (end_blkno - beg_blkno); 1289 } else { 1290 nb = ul->un_eol_lof - ul->un_head_lof; 1291 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 1292 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 1293 1294 end_blkno = btodb(ul->un_eol_lof); 1295 beg_blkno = btodb(ul->un_head_lof); 1296 new_ident += (end_blkno - beg_blkno); 1297 1298 nb = new_lof - ul->un_bol_lof; 1299 inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb); 1300 inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb); 1301 1302 end_blkno = btodb(new_lof); 1303 beg_blkno = btodb(ul->un_bol_lof); 1304 new_ident += (end_blkno - beg_blkno); 1305 } 1306 /* 1307 * don't update the head if there has been an error 1308 */ 1309 if (ul->un_flags & LDL_ERROR) 1310 return; 1311 1312 /* Fix up the head and ident */ 1313 ASSERT(new_lof >= ul->un_bol_lof); 1314 ul->un_head_lof = new_lof; 1315 ul->un_head_ident = new_ident; 1316 if (data_lof == -1) { 1317 ul->un_tail_ident = ul->un_head_ident; 1318 } 1319 1320 1321 /* Commit to the database */ 1322 ldl_savestate(ul); 1323 1324 ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) || 1325 ldl_sethead_debug(ul)); 1326 } 1327 1328 /* 1329 * The tail will be set to the sector following lof+nb 1330 * lof + nb == size of the last delta + commit record 1331 * this function is called once after the log scan has completed. 1332 */ 1333 void 1334 ldl_settail(ml_unit_t *ul, off_t lof, size_t nb) 1335 { 1336 off_t new_lof; 1337 uint32_t new_ident; 1338 daddr_t beg_blkno; 1339 daddr_t end_blkno; 1340 struct timeval tv; 1341 1342 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1343 1344 if (lof == -1) { 1345 uniqtime(&tv); 1346 if (tv.tv_usec == ul->un_head_ident) { 1347 tv.tv_usec++; 1348 } 1349 last_loghead_ident = tv.tv_usec; 1350 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)); 1351 ul->un_head_lof = ul->un_tail_lof; 1352 ul->un_head_ident = tv.tv_usec; 1353 ul->un_tail_ident = ul->un_head_ident; 1354 1355 /* Commit to the database */ 1356 ldl_savestate(ul); 1357 1358 return; 1359 } 1360 1361 /* 1362 * new_lof is the offset of the sector following the last commit 1363 */ 1364 (void) logseek(ul, lof, nb, &new_lof); 1365 ASSERT(new_lof != dbtob(btodb(ul->un_head_lof))); 1366 1367 /* 1368 * calculate new ident 1369 */ 1370 if (new_lof > ul->un_head_lof) { 1371 end_blkno = btodb(new_lof); 1372 beg_blkno = btodb(ul->un_head_lof); 1373 new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 1374 } else { 1375 end_blkno = btodb(ul->un_eol_lof); 1376 beg_blkno = btodb(ul->un_head_lof); 1377 new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 1378 1379 end_blkno = btodb(new_lof); 1380 beg_blkno = btodb(ul->un_bol_lof); 1381 new_ident += (end_blkno - beg_blkno); 1382 } 1383 1384 /* Fix up the tail and ident */ 1385 ul->un_tail_lof = new_lof; 1386 ul->un_tail_ident = new_ident; 1387 1388 /* Commit to the database */ 1389 ldl_savestate(ul); 1390 } 1391 1392 /* 1393 * LOGSCAN STUFF 1394 */ 1395 static int 1396 ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof) 1397 { 1398 ulong_t ident; 1399 size_t nblk, i; 1400 sect_trailer_t *st; 1401 1402 /* 1403 * compute ident for first sector in the buffer 1404 */ 1405 ident = ul->un_head_ident; 1406 if (bp->b_blkno >= btodb(ul->un_head_lof)) { 1407 ident += (bp->b_blkno - btodb(ul->un_head_lof)); 1408 } else { 1409 ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof)); 1410 ident += (bp->b_blkno - btodb(ul->un_bol_lof)); 1411 } 1412 /* 1413 * truncate the buffer down to the last valid sector 1414 */ 1415 nblk = btodb(bp->b_bcount); 1416 bp->b_bcount = 0; 1417 /* LINTED */ 1418 st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE); 1419 for (i = 0; i < nblk; ++i) { 1420 if (st->st_ident != ident) 1421 break; 1422 1423 /* remember last valid tid for ldl_logscan_error() */ 1424 ul->un_tid = st->st_tid; 1425 1426 /* LINTED */ 1427 st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE); 1428 ++ident; 1429 bp->b_bcount += DEV_BSIZE; 1430 } 1431 /* 1432 * make sure that lof is still within range 1433 */ 1434 return (within_range(lof, bp->b_blkno, bp->b_bcount)); 1435 } 1436 1437 ulong_t 1438 ldl_logscan_nbcommit(off_t lof) 1439 { 1440 /* 1441 * lof is the offset following the commit header. However, 1442 * if the commit header fell on the end-of-sector, then lof 1443 * has already been advanced to the beginning of the next 1444 * sector. So do nothing. Otherwise, return the remaining 1445 * bytes in the sector. 1446 */ 1447 if ((lof & (DEV_BSIZE - 1)) == 0) 1448 return (0); 1449 return (NB_LEFT_IN_SECTOR(lof)); 1450 } 1451 1452 int 1453 ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va) 1454 { 1455 buf_t *bp; 1456 ulong_t actual; 1457 1458 ASSERT(ul->un_head_lof != ul->un_tail_lof); 1459 1460 /* 1461 * Check the log data doesn't go out of bounds 1462 */ 1463 if (ul->un_head_lof < ul->un_tail_lof) { 1464 if (!WITHIN(*lofp, nb, ul->un_head_lof, 1465 (ul->un_tail_lof - ul->un_head_lof))) { 1466 return (EIO); 1467 } 1468 } else { 1469 if (OVERLAP(*lofp, nb, ul->un_tail_lof, 1470 (ul->un_head_lof - ul->un_tail_lof))) { 1471 return (EIO); 1472 } 1473 } 1474 1475 while (nb) { 1476 bp = get_read_bp(ul, *lofp); 1477 if (bp->b_flags & B_ERROR) { 1478 sema_v(&bp->b_sem); 1479 return (EIO); 1480 } 1481 /* 1482 * out-of-seq idents means partial transaction 1483 * panic, non-corrupting powerfail, ... 1484 */ 1485 if (!ldl_logscan_ident(ul, bp, *lofp)) { 1486 sema_v(&bp->b_sem); 1487 return (EIO); 1488 } 1489 /* 1490 * copy the header into the caller's buf 1491 */ 1492 actual = fetchbuf(ul, bp, va, nb, lofp); 1493 if (va) 1494 va += actual; 1495 nb -= actual; 1496 } 1497 return (0); 1498 } 1499 1500 void 1501 ldl_logscan_begin(ml_unit_t *ul) 1502 { 1503 size_t bufsize; 1504 1505 ASSERT(ul->un_wrbuf.cb_dirty == NULL); 1506 1507 /* 1508 * logscan has begun 1509 */ 1510 ul->un_flags |= LDL_SCAN; 1511 1512 /* 1513 * reset the circular bufs 1514 */ 1515 bufsize = ldl_bufsize(ul); 1516 alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize); 1517 alloc_wrbuf(&ul->un_wrbuf, bufsize); 1518 1519 /* 1520 * set the tail to reflect a full log 1521 */ 1522 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE; 1523 1524 if (ul->un_tail_lof < ul->un_bol_lof) 1525 ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE; 1526 if (ul->un_tail_lof >= ul->un_eol_lof) 1527 ul->un_tail_lof = ul->un_bol_lof; 1528 1529 /* 1530 * un_tid is used during error processing; it is initialized to 1531 * the tid of the delta at un_head_lof; 1532 */ 1533 ul->un_tid = ul->un_head_tid; 1534 } 1535 1536 void 1537 ldl_logscan_end(ml_unit_t *ul) 1538 { 1539 size_t bufsize; 1540 1541 /* 1542 * reset the circular bufs 1543 */ 1544 bufsize = ldl_bufsize(ul); 1545 alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE); 1546 alloc_wrbuf(&ul->un_wrbuf, bufsize); 1547 1548 /* 1549 * Done w/scan 1550 */ 1551 ul->un_flags &= ~LDL_SCAN; 1552 } 1553 1554 int 1555 ldl_need_roll(ml_unit_t *ul) 1556 { 1557 off_t busybytes; 1558 off_t head; 1559 off_t tail; 1560 off_t bol; 1561 off_t eol; 1562 off_t nb; 1563 1564 /* 1565 * snapshot the log state 1566 */ 1567 head = ul->un_head_lof; 1568 tail = ul->un_tail_lof; 1569 bol = ul->un_bol_lof; 1570 eol = ul->un_eol_lof; 1571 nb = ul->un_logsize; 1572 1573 /* 1574 * compute number of busy (inuse) bytes 1575 */ 1576 if (head <= tail) 1577 busybytes = tail - head; 1578 else 1579 busybytes = (eol - head) + (tail - bol); 1580 1581 /* 1582 * return TRUE if > 75% full 1583 */ 1584 return (busybytes > (nb - (nb >> 2))); 1585 } 1586 1587 void 1588 ldl_seterror(ml_unit_t *ul, char *why) 1589 { 1590 /* 1591 * already in error state; do nothing 1592 */ 1593 if (ul->un_flags & LDL_ERROR) 1594 return; 1595 1596 ul->un_flags |= LDL_ERROR; /* incore */ 1597 ul->un_badlog = 1; /* ondisk (cleared by fsck) */ 1598 1599 /* 1600 * Commit to state sectors 1601 */ 1602 uniqtime(&ul->un_timestamp); 1603 ldl_savestate(ul); 1604 1605 /* Pretty print */ 1606 cmn_err(CE_WARN, "%s", why); 1607 cmn_err(CE_WARN, "ufs log for %s changed state to Error", 1608 ul->un_ufsvfs->vfs_fs->fs_fsmnt); 1609 cmn_err(CE_WARN, "Please umount(1M) %s and run fsck(1M)", 1610 ul->un_ufsvfs->vfs_fs->fs_fsmnt); 1611 1612 /* 1613 * If we aren't in the middle of scan (aka snarf); tell ufs 1614 * to hard lock itself. 1615 */ 1616 if ((ul->un_flags & LDL_SCAN) == 0) 1617 ufs_trans_onerror(); 1618 } 1619 1620 size_t 1621 ldl_bufsize(ml_unit_t *ul) 1622 { 1623 size_t bufsize; 1624 extern uint32_t ldl_minbufsize; 1625 1626 /* 1627 * initial guess is the maxtransfer value for this log device 1628 * increase if too small 1629 * decrease if too large 1630 */ 1631 bufsize = dbtob(btod(ul->un_maxtransfer)); 1632 if (bufsize < ldl_minbufsize) 1633 bufsize = ldl_minbufsize; 1634 if (bufsize > maxphys) 1635 bufsize = maxphys; 1636 if (bufsize > ul->un_maxtransfer) 1637 bufsize = ul->un_maxtransfer; 1638 return (bufsize); 1639 } 1640