1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/systm.h> 29 #include <sys/types.h> 30 #include <sys/vnode.h> 31 #include <sys/errno.h> 32 #include <sys/sysmacros.h> 33 #include <sys/debug.h> 34 #include <sys/kmem.h> 35 #include <sys/conf.h> 36 #include <sys/proc.h> 37 #include <sys/cmn_err.h> 38 #include <sys/fssnap_if.h> 39 #include <sys/fs/ufs_inode.h> 40 #include <sys/fs/ufs_filio.h> 41 #include <sys/fs/ufs_log.h> 42 #include <sys/fs/ufs_bio.h> 43 #include <sys/atomic.h> 44 45 extern int maxphys; 46 extern uint_t bypass_snapshot_throttle_key; 47 48 extern struct kmem_cache *lufs_sv; 49 extern struct kmem_cache *lufs_bp; 50 51 static void 52 makebusy(ml_unit_t *ul, buf_t *bp) 53 { 54 sema_p(&bp->b_sem); 55 if ((bp->b_flags & B_ERROR) == 0) 56 return; 57 if (bp->b_flags & B_READ) 58 ldl_seterror(ul, "Error reading ufs log"); 59 else 60 ldl_seterror(ul, "Error writing ufs log"); 61 } 62 63 static int 64 logdone(buf_t *bp) 65 { 66 bp->b_flags |= B_DONE; 67 68 if (bp->b_flags & B_WRITE) 69 sema_v(&bp->b_sem); 70 else 71 /* wakeup the thread waiting on this buf */ 72 sema_v(&bp->b_io); 73 return (0); 74 } 75 76 static int 77 ldl_strategy_done(buf_t *cb) 78 { 79 lufs_save_t *sv; 80 lufs_buf_t *lbp; 81 buf_t *bp; 82 83 ASSERT(SEMA_HELD(&cb->b_sem)); 84 ASSERT((cb->b_flags & B_DONE) == 0); 85 86 /* 87 * Compute address of the ``save'' struct 88 */ 89 lbp = (lufs_buf_t *)cb; 90 sv = (lufs_save_t *)lbp->lb_ptr; 91 92 if (cb->b_flags & B_ERROR) 93 sv->sv_error = 1; 94 95 /* 96 * If this is the last request, release the resources and 97 * ``done'' the original buffer header. 98 */ 99 if (atomic_add_long_nv(&sv->sv_nb_left, -cb->b_bcount)) { 100 kmem_cache_free(lufs_bp, lbp); 101 return (1); 102 } 103 /* Propagate any errors back to the original buffer header */ 104 bp = sv->sv_bp; 105 if (sv->sv_error) 106 bp->b_flags |= B_ERROR; 107 kmem_cache_free(lufs_bp, lbp); 108 kmem_cache_free(lufs_sv, sv); 109 110 biodone(bp); 111 return (0); 112 } 113 114 /* 115 * Map the log logical block number to a physical disk block number 116 */ 117 static int 118 map_frag( 119 ml_unit_t *ul, 120 daddr_t lblkno, 121 size_t bcount, 122 daddr_t *pblkno, 123 size_t *pbcount) 124 { 125 ic_extent_t *ext = ul->un_ebp->ic_extents; 126 uint32_t e = ul->un_ebp->ic_nextents; 127 uint32_t s = 0; 128 uint32_t i = e >> 1; 129 uint32_t lasti = i; 130 uint32_t bno_off; 131 132 again: 133 if (ext[i].ic_lbno <= lblkno) { 134 if ((ext[i].ic_lbno + ext[i].ic_nbno) > lblkno) { 135 /* FOUND IT */ 136 bno_off = lblkno - (uint32_t)ext[i].ic_lbno; 137 *pbcount = MIN(bcount, dbtob(ext[i].ic_nbno - bno_off)); 138 *pblkno = ext[i].ic_pbno + bno_off; 139 return (0); 140 } else 141 s = i; 142 } else 143 e = i; 144 i = s + ((e - s) >> 1); 145 146 if (i == lasti) { 147 *pbcount = bcount; 148 return (ENOENT); 149 } 150 lasti = i; 151 152 goto again; 153 } 154 155 /* 156 * The log is a set of extents (which typically will be only one, but 157 * may be more if the disk was close to full when the log was created) 158 * and hence the logical offsets into the log 159 * have to be translated into their real device locations before 160 * calling the device's strategy routine. The translation may result 161 * in several IO requests if this request spans extents. 162 */ 163 void 164 ldl_strategy(ml_unit_t *ul, buf_t *pb) 165 { 166 lufs_save_t *sv; 167 lufs_buf_t *lbp; 168 buf_t *cb; 169 ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 170 daddr_t lblkno, pblkno; 171 size_t nb_left, pbcount; 172 off_t offset; 173 dev_t dev = ul->un_dev; 174 int error; 175 int read = pb->b_flags & B_READ; 176 177 /* 178 * Allocate and initialise the save stucture, 179 */ 180 sv = kmem_cache_alloc(lufs_sv, KM_SLEEP); 181 sv->sv_error = 0; 182 sv->sv_bp = pb; 183 nb_left = pb->b_bcount; 184 sv->sv_nb_left = nb_left; 185 186 lblkno = pb->b_blkno; 187 offset = 0; 188 189 do { 190 error = map_frag(ul, lblkno, nb_left, &pblkno, &pbcount); 191 192 lbp = kmem_cache_alloc(lufs_bp, KM_SLEEP); 193 bioinit(&lbp->lb_buf); 194 lbp->lb_ptr = sv; 195 196 cb = bioclone(pb, offset, pbcount, dev, 197 pblkno, ldl_strategy_done, &lbp->lb_buf, KM_SLEEP); 198 199 offset += pbcount; 200 lblkno += btodb(pbcount); 201 nb_left -= pbcount; 202 203 if (error) { 204 cb->b_flags |= B_ERROR; 205 cb->b_resid = cb->b_bcount; 206 biodone(cb); 207 } else { 208 if (read) { 209 logstats.ls_ldlreads.value.ui64++; 210 ufsvfsp->vfs_iotstamp = lbolt; 211 lwp_stat_update(LWP_STAT_INBLK, 1); 212 } else { 213 logstats.ls_ldlwrites.value.ui64++; 214 lwp_stat_update(LWP_STAT_OUBLK, 1); 215 } 216 217 /* 218 * write through the snapshot driver if necessary 219 * We do not want this write to be throttled because 220 * we are holding the un_log mutex here. If we 221 * are throttled in fssnap_translate, the fssnap_taskq 222 * thread which can wake us up can get blocked on 223 * the un_log mutex resulting in a deadlock. 224 */ 225 if (ufsvfsp->vfs_snapshot) { 226 (void) tsd_set(bypass_snapshot_throttle_key, 227 (void *)1); 228 fssnap_strategy(&ufsvfsp->vfs_snapshot, cb); 229 230 (void) tsd_set(bypass_snapshot_throttle_key, 231 (void *)0); 232 } else { 233 (void) bdev_strategy(cb); 234 } 235 } 236 237 } while (nb_left); 238 } 239 240 static void 241 writelog(ml_unit_t *ul, buf_t *bp) 242 { 243 ASSERT(SEMA_HELD(&bp->b_sem)); 244 245 /* 246 * This is really an B_ASYNC write but we want Presto to 247 * cache this write. The iodone routine, logdone, processes 248 * the buf correctly. 249 */ 250 bp->b_flags = B_WRITE; 251 bp->b_edev = ul->un_dev; 252 bp->b_iodone = logdone; 253 254 /* 255 * return EIO for every IO if in hard error state 256 */ 257 if (ul->un_flags & LDL_ERROR) { 258 bp->b_flags |= B_ERROR; 259 bp->b_error = EIO; 260 biodone(bp); 261 return; 262 } 263 264 ldl_strategy(ul, bp); 265 } 266 267 static void 268 readlog(ml_unit_t *ul, buf_t *bp) 269 { 270 ASSERT(SEMA_HELD(&bp->b_sem)); 271 ASSERT(bp->b_bcount); 272 273 bp->b_flags = B_READ; 274 bp->b_edev = ul->un_dev; 275 bp->b_iodone = logdone; 276 277 /* all IO returns errors when in error state */ 278 if (ul->un_flags & LDL_ERROR) { 279 bp->b_flags |= B_ERROR; 280 bp->b_error = EIO; 281 biodone(bp); 282 (void) trans_wait(bp); 283 return; 284 } 285 286 ldl_strategy(ul, bp); 287 288 if (trans_wait(bp)) 289 ldl_seterror(ul, "Error reading ufs log"); 290 } 291 292 /* 293 * NOTE: writers are single threaded thru the log layer. 294 * This means we can safely reference and change the cb and bp fields 295 * that ldl_read does not reference w/o holding the cb_rwlock or 296 * the bp makebusy lock. 297 */ 298 static void 299 push_dirty_bp(ml_unit_t *ul, buf_t *bp) 300 { 301 buf_t *newbp; 302 cirbuf_t *cb = &ul->un_wrbuf; 303 304 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 305 ASSERT((bp->b_bcount & (DEV_BSIZE-1)) == 0); 306 307 /* 308 * async write the buf 309 */ 310 writelog(ul, bp); 311 312 /* 313 * no longer filling any buf 314 */ 315 cb->cb_dirty = NULL; 316 317 /* 318 * no extra buffer space; all done 319 */ 320 if (bp->b_bcount == bp->b_bufsize) 321 return; 322 323 /* 324 * give extra buffer space to a new bp 325 * try to take buf off of free list 326 */ 327 if ((newbp = cb->cb_free) != NULL) { 328 cb->cb_free = newbp->b_forw; 329 } else { 330 newbp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 331 sema_init(&newbp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 332 sema_init(&newbp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 333 } 334 newbp->b_flags = 0; 335 newbp->b_bcount = 0; 336 newbp->b_file = NULL; 337 newbp->b_offset = -1; 338 newbp->b_bufsize = bp->b_bufsize - bp->b_bcount; 339 newbp->b_un.b_addr = bp->b_un.b_addr + bp->b_bcount; 340 bp->b_bufsize = bp->b_bcount; 341 342 /* 343 * lock out readers and put new buf at LRU position 344 */ 345 rw_enter(&cb->cb_rwlock, RW_WRITER); 346 newbp->b_forw = bp->b_forw; 347 newbp->b_back = bp; 348 bp->b_forw->b_back = newbp; 349 bp->b_forw = newbp; 350 rw_exit(&cb->cb_rwlock); 351 } 352 353 static void 354 inval_range(ml_unit_t *ul, cirbuf_t *cb, off_t lof, off_t nb) 355 { 356 buf_t *bp; 357 off_t elof = lof + nb; 358 off_t buflof; 359 off_t bufelof; 360 361 /* 362 * discard all bufs that overlap the range (lof, lof + nb) 363 */ 364 rw_enter(&cb->cb_rwlock, RW_WRITER); 365 bp = cb->cb_bp; 366 do { 367 if (bp == cb->cb_dirty || bp->b_bcount == 0) { 368 bp = bp->b_forw; 369 continue; 370 } 371 buflof = dbtob(bp->b_blkno); 372 bufelof = buflof + bp->b_bcount; 373 if ((buflof < lof && bufelof <= lof) || 374 (buflof >= elof && bufelof > elof)) { 375 bp = bp->b_forw; 376 continue; 377 } 378 makebusy(ul, bp); 379 bp->b_flags = 0; 380 bp->b_bcount = 0; 381 sema_v(&bp->b_sem); 382 bp = bp->b_forw; 383 } while (bp != cb->cb_bp); 384 rw_exit(&cb->cb_rwlock); 385 } 386 387 /* 388 * NOTE: writers are single threaded thru the log layer. 389 * This means we can safely reference and change the cb and bp fields 390 * that ldl_read does not reference w/o holding the cb_rwlock or 391 * the bp makebusy lock. 392 */ 393 static buf_t * 394 get_write_bp(ml_unit_t *ul) 395 { 396 cirbuf_t *cb = &ul->un_wrbuf; 397 buf_t *bp; 398 399 /* 400 * cb_dirty is the buffer we are currently filling; if any 401 */ 402 if ((bp = cb->cb_dirty) != NULL) { 403 makebusy(ul, bp); 404 return (bp); 405 } 406 /* 407 * discard any bp that overlaps the current tail since we are 408 * about to overwrite it. 409 */ 410 inval_range(ul, cb, ul->un_tail_lof, 1); 411 412 /* 413 * steal LRU buf 414 */ 415 rw_enter(&cb->cb_rwlock, RW_WRITER); 416 bp = cb->cb_bp->b_forw; 417 makebusy(ul, bp); 418 419 cb->cb_dirty = bp; 420 cb->cb_bp = bp; 421 422 bp->b_flags = 0; 423 bp->b_bcount = 0; 424 bp->b_blkno = btodb(ul->un_tail_lof); 425 ASSERT(dbtob(bp->b_blkno) == ul->un_tail_lof); 426 rw_exit(&cb->cb_rwlock); 427 428 /* 429 * NOTE: 430 * 1. un_tail_lof never addresses >= un_eol_lof 431 * 2. b_blkno + btodb(b_bufsize) may > un_eol_lof 432 * this case is handled in storebuf 433 */ 434 return (bp); 435 } 436 437 void 438 alloc_wrbuf(cirbuf_t *cb, size_t bufsize) 439 { 440 int i; 441 buf_t *bp; 442 443 /* 444 * Clear previous allocation 445 */ 446 if (cb->cb_nb) 447 free_cirbuf(cb); 448 449 bzero(cb, sizeof (*cb)); 450 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 451 452 rw_enter(&cb->cb_rwlock, RW_WRITER); 453 454 /* 455 * preallocate 3 bp's and put them on the free list. 456 */ 457 for (i = 0; i < 3; ++i) { 458 bp = kmem_zalloc(sizeof (buf_t), KM_SLEEP); 459 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 460 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 461 bp->b_offset = -1; 462 bp->b_forw = cb->cb_free; 463 cb->cb_free = bp; 464 } 465 466 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 467 cb->cb_nb = bufsize; 468 469 /* 470 * first bp claims entire write buffer 471 */ 472 bp = cb->cb_free; 473 cb->cb_free = bp->b_forw; 474 475 bp->b_forw = bp; 476 bp->b_back = bp; 477 cb->cb_bp = bp; 478 bp->b_un.b_addr = cb->cb_va; 479 bp->b_bufsize = cb->cb_nb; 480 481 rw_exit(&cb->cb_rwlock); 482 } 483 484 void 485 alloc_rdbuf(cirbuf_t *cb, size_t bufsize, size_t blksize) 486 { 487 caddr_t va; 488 size_t nb; 489 buf_t *bp; 490 491 /* 492 * Clear previous allocation 493 */ 494 if (cb->cb_nb) 495 free_cirbuf(cb); 496 497 bzero(cb, sizeof (*cb)); 498 rw_init(&cb->cb_rwlock, NULL, RW_DRIVER, NULL); 499 500 rw_enter(&cb->cb_rwlock, RW_WRITER); 501 502 cb->cb_va = kmem_alloc(bufsize, KM_SLEEP); 503 cb->cb_nb = bufsize; 504 505 /* 506 * preallocate N bufs that are hard-sized to blksize 507 * in other words, the read buffer pool is a linked list 508 * of statically sized bufs. 509 */ 510 va = cb->cb_va; 511 while ((nb = bufsize) != 0) { 512 if (nb > blksize) 513 nb = blksize; 514 bp = kmem_alloc(sizeof (buf_t), KM_SLEEP); 515 bzero(bp, sizeof (buf_t)); 516 sema_init(&bp->b_sem, 1, NULL, SEMA_DEFAULT, NULL); 517 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 518 bp->b_un.b_addr = va; 519 bp->b_bufsize = nb; 520 if (cb->cb_bp) { 521 bp->b_forw = cb->cb_bp->b_forw; 522 bp->b_back = cb->cb_bp; 523 cb->cb_bp->b_forw->b_back = bp; 524 cb->cb_bp->b_forw = bp; 525 } else 526 bp->b_forw = bp->b_back = bp; 527 cb->cb_bp = bp; 528 bufsize -= nb; 529 va += nb; 530 } 531 532 rw_exit(&cb->cb_rwlock); 533 } 534 535 void 536 free_cirbuf(cirbuf_t *cb) 537 { 538 buf_t *bp; 539 540 if (cb->cb_nb == 0) 541 return; 542 543 rw_enter(&cb->cb_rwlock, RW_WRITER); 544 ASSERT(cb->cb_dirty == NULL); 545 546 /* 547 * free the active bufs 548 */ 549 while ((bp = cb->cb_bp) != NULL) { 550 if (bp == bp->b_forw) 551 cb->cb_bp = NULL; 552 else 553 cb->cb_bp = bp->b_forw; 554 bp->b_back->b_forw = bp->b_forw; 555 bp->b_forw->b_back = bp->b_back; 556 sema_destroy(&bp->b_sem); 557 sema_destroy(&bp->b_io); 558 kmem_free(bp, sizeof (buf_t)); 559 } 560 561 /* 562 * free the free bufs 563 */ 564 while ((bp = cb->cb_free) != NULL) { 565 cb->cb_free = bp->b_forw; 566 sema_destroy(&bp->b_sem); 567 sema_destroy(&bp->b_io); 568 kmem_free(bp, sizeof (buf_t)); 569 } 570 kmem_free(cb->cb_va, cb->cb_nb); 571 cb->cb_va = NULL; 572 cb->cb_nb = 0; 573 rw_exit(&cb->cb_rwlock); 574 rw_destroy(&cb->cb_rwlock); 575 } 576 577 static int 578 within_range(off_t lof, daddr_t blkno, ulong_t bcount) 579 { 580 off_t blof = dbtob(blkno); 581 582 return ((lof >= blof) && (lof < (blof + bcount))); 583 } 584 585 static buf_t * 586 find_bp(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 587 { 588 buf_t *bp; 589 590 /* 591 * find a buf that contains the offset lof 592 */ 593 rw_enter(&cb->cb_rwlock, RW_READER); 594 bp = cb->cb_bp; 595 do { 596 if (bp->b_bcount && 597 within_range(lof, bp->b_blkno, bp->b_bcount)) { 598 makebusy(ul, bp); 599 rw_exit(&cb->cb_rwlock); 600 return (bp); 601 } 602 bp = bp->b_forw; 603 } while (bp != cb->cb_bp); 604 rw_exit(&cb->cb_rwlock); 605 606 return (NULL); 607 } 608 609 static off_t 610 find_read_lof(ml_unit_t *ul, cirbuf_t *cb, off_t lof) 611 { 612 buf_t *bp, *bpend; 613 off_t rlof; 614 615 /* 616 * we mustn't: 617 * o read past eol 618 * o read past the tail 619 * o read data that may be being written. 620 */ 621 rw_enter(&cb->cb_rwlock, RW_READER); 622 bpend = bp = cb->cb_bp->b_forw; 623 rlof = ul->un_tail_lof; 624 do { 625 if (bp->b_bcount) { 626 rlof = dbtob(bp->b_blkno); 627 break; 628 } 629 bp = bp->b_forw; 630 } while (bp != bpend); 631 rw_exit(&cb->cb_rwlock); 632 633 if (lof <= rlof) 634 /* lof is prior to the range represented by the write buf */ 635 return (rlof); 636 else 637 /* lof follows the range represented by the write buf */ 638 return ((off_t)ul->un_eol_lof); 639 } 640 641 static buf_t * 642 get_read_bp(ml_unit_t *ul, off_t lof) 643 { 644 cirbuf_t *cb; 645 buf_t *bp; 646 off_t rlof; 647 648 /* 649 * retrieve as much data as possible from the incore buffers 650 */ 651 if ((bp = find_bp(ul, &ul->un_wrbuf, lof)) != NULL) { 652 logstats.ls_lreadsinmem.value.ui64++; 653 return (bp); 654 } 655 if ((bp = find_bp(ul, &ul->un_rdbuf, lof)) != NULL) { 656 logstats.ls_lreadsinmem.value.ui64++; 657 return (bp); 658 } 659 660 /* 661 * steal the LRU buf 662 */ 663 cb = &ul->un_rdbuf; 664 rw_enter(&cb->cb_rwlock, RW_WRITER); 665 bp = cb->cb_bp->b_forw; 666 makebusy(ul, bp); 667 bp->b_flags = 0; 668 bp->b_bcount = 0; 669 cb->cb_bp = bp; 670 rw_exit(&cb->cb_rwlock); 671 672 /* 673 * don't read past the tail or the end-of-log 674 */ 675 bp->b_blkno = btodb(lof); 676 lof = dbtob(bp->b_blkno); 677 rlof = find_read_lof(ul, &ul->un_wrbuf, lof); 678 bp->b_bcount = MIN(bp->b_bufsize, rlof - lof); 679 readlog(ul, bp); 680 return (bp); 681 } 682 683 /* 684 * NOTE: writers are single threaded thru the log layer. 685 * This means we can safely reference and change the cb and bp fields 686 * that ldl_read does not reference w/o holding the cb_rwlock or 687 * the bp makebusy lock. 688 */ 689 static int 690 extend_write_bp(ml_unit_t *ul, cirbuf_t *cb, buf_t *bp) 691 { 692 buf_t *bpforw = bp->b_forw; 693 694 ASSERT(bp == cb->cb_bp && bp == cb->cb_dirty); 695 696 /* 697 * there is no `next' bp; do nothing 698 */ 699 if (bpforw == bp) 700 return (0); 701 702 /* 703 * buffer space is not adjacent; do nothing 704 */ 705 if ((bp->b_un.b_addr + bp->b_bufsize) != bpforw->b_un.b_addr) 706 return (0); 707 708 /* 709 * locking protocol requires giving up any bp locks before 710 * acquiring cb_rwlock. This is okay because we hold 711 * un_log_mutex. 712 */ 713 sema_v(&bp->b_sem); 714 715 /* 716 * lock out ldl_read 717 */ 718 rw_enter(&cb->cb_rwlock, RW_WRITER); 719 720 /* 721 * wait for current IO to finish w/next bp; if necessary 722 */ 723 makebusy(ul, bpforw); 724 725 /* 726 * free the next bp and steal its space 727 */ 728 bp->b_forw = bpforw->b_forw; 729 bpforw->b_forw->b_back = bp; 730 bp->b_bufsize += bpforw->b_bufsize; 731 sema_v(&bpforw->b_sem); 732 bpforw->b_forw = cb->cb_free; 733 cb->cb_free = bpforw; 734 makebusy(ul, bp); 735 rw_exit(&cb->cb_rwlock); 736 737 return (1); 738 } 739 740 static size_t 741 storebuf(ml_unit_t *ul, buf_t *bp, caddr_t va, size_t nb) 742 { 743 size_t copy_nb; 744 size_t nb_in_sec; 745 sect_trailer_t *st; 746 size_t nb_left = nb; 747 cirbuf_t *cb = &ul->un_wrbuf; 748 749 again: 750 nb_in_sec = NB_LEFT_IN_SECTOR(bp->b_bcount); 751 copy_nb = MIN(nb_left, nb_in_sec); 752 753 ASSERT(copy_nb); 754 755 bcopy(va, bp->b_un.b_addr + bp->b_bcount, copy_nb); 756 bp->b_bcount += copy_nb; 757 va += copy_nb; 758 nb_left -= copy_nb; 759 ul->un_tail_lof += copy_nb; 760 761 if ((nb_in_sec -= copy_nb) == 0) { 762 st = (sect_trailer_t *)(bp->b_un.b_addr + bp->b_bcount); 763 764 st->st_tid = ul->un_logmap->mtm_tid; 765 st->st_ident = ul->un_tail_ident++; 766 bp->b_bcount += sizeof (sect_trailer_t); 767 ul->un_tail_lof += sizeof (sect_trailer_t); 768 /* 769 * log wrapped; async write this bp 770 */ 771 if (ul->un_tail_lof == ul->un_eol_lof) { 772 ul->un_tail_lof = ul->un_bol_lof; 773 push_dirty_bp(ul, bp); 774 return (nb - nb_left); 775 } 776 /* 777 * out of bp space; get more or async write buf 778 */ 779 if (bp->b_bcount == bp->b_bufsize) { 780 if (!extend_write_bp(ul, cb, bp)) { 781 push_dirty_bp(ul, bp); 782 return (nb - nb_left); 783 } 784 } 785 } 786 if (nb_left) 787 goto again; 788 789 sema_v(&bp->b_sem); 790 return (nb); 791 } 792 793 static void 794 fetchzeroes(caddr_t dst_va, offset_t dst_mof, ulong_t dst_nb, mapentry_t *me) 795 { 796 offset_t src_mof = me->me_mof; 797 size_t src_nb = me->me_nb; 798 799 if (src_mof > dst_mof) { 800 ASSERT(src_mof < (dst_mof + dst_nb)); 801 dst_va += (src_mof - dst_mof); 802 dst_nb -= (src_mof - dst_mof); 803 } else { 804 ASSERT(dst_mof < (src_mof + src_nb)); 805 src_nb -= (dst_mof - src_mof); 806 } 807 808 src_nb = MIN(src_nb, dst_nb); 809 ASSERT(src_nb); 810 bzero(dst_va, src_nb); 811 } 812 813 /* 814 * dst_va == NULL means don't copy anything 815 */ 816 static ulong_t 817 fetchbuf( 818 ml_unit_t *ul, 819 buf_t *bp, 820 caddr_t dst_va, 821 size_t dst_nb, 822 off_t *dst_lofp) 823 { 824 caddr_t copy_va; 825 size_t copy_nb; 826 size_t nb_sec; 827 off_t dst_lof = *dst_lofp; 828 ulong_t sav_dst_nb = dst_nb; 829 ulong_t src_nb = bp->b_bcount; 830 off_t src_lof = dbtob(bp->b_blkno); 831 off_t src_elof = src_lof + src_nb; 832 caddr_t src_va = bp->b_un.b_addr; 833 834 /* 835 * copy from bp to dst_va 836 */ 837 while (dst_nb) { 838 /* 839 * compute address within bp 840 */ 841 copy_va = src_va + (dst_lof - src_lof); 842 843 /* 844 * adjust copy size to amount of data in bp 845 */ 846 copy_nb = MIN(dst_nb, src_elof - dst_lof); 847 848 /* 849 * adjust copy size to amount of data in sector 850 */ 851 nb_sec = NB_LEFT_IN_SECTOR(dst_lof); 852 copy_nb = MIN(copy_nb, nb_sec); 853 854 /* 855 * dst_va == NULL means don't do copy (see logseek()) 856 */ 857 if (dst_va) { 858 bcopy(copy_va, dst_va, copy_nb); 859 dst_va += copy_nb; 860 } 861 dst_lof += copy_nb; 862 dst_nb -= copy_nb; 863 nb_sec -= copy_nb; 864 865 /* 866 * advance over sector trailer 867 */ 868 if (nb_sec == 0) 869 dst_lof += sizeof (sect_trailer_t); 870 871 /* 872 * exhausted buffer 873 * return current lof for next read 874 */ 875 if (dst_lof == src_elof) { 876 sema_v(&bp->b_sem); 877 if (dst_lof == ul->un_eol_lof) 878 dst_lof = ul->un_bol_lof; 879 *dst_lofp = dst_lof; 880 return (sav_dst_nb - dst_nb); 881 } 882 } 883 884 /* 885 * copy complete - return current lof 886 */ 887 sema_v(&bp->b_sem); 888 *dst_lofp = dst_lof; 889 return (sav_dst_nb); 890 } 891 892 void 893 ldl_round_commit(ml_unit_t *ul) 894 { 895 int wrapped; 896 buf_t *bp; 897 sect_trailer_t *st; 898 size_t bcount; 899 cirbuf_t *cb = &ul->un_wrbuf; 900 901 /* 902 * if nothing to write; then do nothing 903 */ 904 if ((bp = cb->cb_dirty) == NULL) 905 return; 906 makebusy(ul, bp); 907 908 /* 909 * round up to sector boundary and set new tail 910 * don't readjust st_ident if buf is already rounded 911 */ 912 bcount = P2ROUNDUP(bp->b_bcount, DEV_BSIZE); 913 if (bcount == bp->b_bcount) { 914 sema_v(&bp->b_sem); 915 return; 916 } 917 bp->b_bcount = bcount; 918 ul->un_tail_lof = dbtob(bp->b_blkno) + bcount; 919 wrapped = 0; 920 if (ul->un_tail_lof == ul->un_eol_lof) { 921 ul->un_tail_lof = ul->un_bol_lof; 922 ++wrapped; 923 } 924 ASSERT(ul->un_tail_lof != ul->un_head_lof); 925 926 /* 927 * fix up the sector trailer 928 */ 929 /* LINTED */ 930 st = (sect_trailer_t *) 931 ((bp->b_un.b_addr + bcount) - sizeof (*st)); 932 st->st_tid = ul->un_logmap->mtm_tid; 933 st->st_ident = ul->un_tail_ident++; 934 935 /* 936 * if tail wrapped or we have exhausted this buffer 937 * async write the buffer 938 */ 939 if (wrapped || bcount == bp->b_bufsize) 940 push_dirty_bp(ul, bp); 941 else 942 sema_v(&bp->b_sem); 943 } 944 945 void 946 ldl_push_commit(ml_unit_t *ul) 947 { 948 buf_t *bp; 949 cirbuf_t *cb = &ul->un_wrbuf; 950 951 /* 952 * if nothing to write; then do nothing 953 */ 954 if ((bp = cb->cb_dirty) == NULL) 955 return; 956 makebusy(ul, bp); 957 push_dirty_bp(ul, bp); 958 } 959 960 int 961 ldl_need_commit(ml_unit_t *ul) 962 { 963 return (ul->un_resv > (ul->un_maxresv - (ul->un_maxresv>>2))); 964 } 965 966 int 967 ldl_has_space(ml_unit_t *ul, mapentry_t *me) 968 { 969 off_t nfb; 970 off_t nb; 971 972 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 973 974 /* 975 * Add up the size used by the deltas 976 * round nb up to a sector length plus an extra sector 977 * w/o the extra sector we couldn't distinguish 978 * a full log (head == tail) from an empty log (head == tail) 979 */ 980 for (nb = DEV_BSIZE; me; me = me->me_hash) { 981 nb += sizeof (struct delta); 982 if (me->me_dt != DT_CANCEL) 983 nb += me->me_nb; 984 } 985 nb = P2ROUNDUP(nb, DEV_BSIZE); 986 987 if (ul->un_head_lof <= ul->un_tail_lof) 988 nfb = (ul->un_head_lof - ul->un_bol_lof) + 989 (ul->un_eol_lof - ul->un_tail_lof); 990 else 991 nfb = ul->un_head_lof - ul->un_tail_lof; 992 993 return (nb < nfb); 994 } 995 996 void 997 ldl_write(ml_unit_t *ul, caddr_t bufp, offset_t bufmof, struct mapentry *me) 998 { 999 buf_t *bp; 1000 caddr_t va; 1001 size_t nb; 1002 size_t actual; 1003 1004 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1005 1006 /* Write the delta */ 1007 1008 nb = sizeof (struct delta); 1009 va = (caddr_t)&me->me_delta; 1010 bp = get_write_bp(ul); 1011 1012 while (nb) { 1013 if (ul->un_flags & LDL_ERROR) { 1014 sema_v(&bp->b_sem); 1015 return; 1016 } 1017 actual = storebuf(ul, bp, va, nb); 1018 ASSERT(actual); 1019 va += actual; 1020 nb -= actual; 1021 if (nb) 1022 bp = get_write_bp(ul); 1023 } 1024 1025 /* If a commit, cancel, or 0's; we're almost done */ 1026 switch (me->me_dt) { 1027 case DT_COMMIT: 1028 case DT_CANCEL: 1029 case DT_ABZERO: 1030 /* roll needs to know where the next delta will go */ 1031 me->me_lof = ul->un_tail_lof; 1032 return; 1033 default: 1034 break; 1035 } 1036 1037 /* Now write the data */ 1038 1039 ASSERT(me->me_nb != 0); 1040 1041 nb = me->me_nb; 1042 va = (me->me_mof - bufmof) + bufp; 1043 bp = get_write_bp(ul); 1044 1045 /* Save where we will put the data */ 1046 me->me_lof = ul->un_tail_lof; 1047 1048 while (nb) { 1049 if (ul->un_flags & LDL_ERROR) { 1050 sema_v(&bp->b_sem); 1051 return; 1052 } 1053 actual = storebuf(ul, bp, va, nb); 1054 ASSERT(actual); 1055 va += actual; 1056 nb -= actual; 1057 if (nb) 1058 bp = get_write_bp(ul); 1059 } 1060 } 1061 1062 void 1063 ldl_waito(ml_unit_t *ul) 1064 { 1065 buf_t *bp; 1066 cirbuf_t *cb = &ul->un_wrbuf; 1067 1068 rw_enter(&cb->cb_rwlock, RW_WRITER); 1069 /* 1070 * wait on them 1071 */ 1072 bp = cb->cb_bp; 1073 do { 1074 if ((bp->b_flags & B_DONE) == 0) { 1075 makebusy(ul, bp); 1076 sema_v(&bp->b_sem); 1077 } 1078 bp = bp->b_forw; 1079 } while (bp != cb->cb_bp); 1080 rw_exit(&cb->cb_rwlock); 1081 } 1082 1083 /* 1084 * seek nb bytes from location lof 1085 */ 1086 static int 1087 logseek(ml_unit_t *ul, off_t lof, size_t nb, off_t *lofp) 1088 { 1089 buf_t *bp; 1090 ulong_t actual; 1091 1092 while (nb) { 1093 bp = get_read_bp(ul, lof); 1094 if (bp->b_flags & B_ERROR) { 1095 sema_v(&bp->b_sem); 1096 return (EIO); 1097 } 1098 actual = fetchbuf(ul, bp, NULL, nb, &lof); 1099 ASSERT(actual); 1100 nb -= actual; 1101 } 1102 *lofp = lof; 1103 ASSERT(nb == 0); 1104 return (0); 1105 } 1106 1107 int 1108 ldl_read( 1109 ml_unit_t *ul, /* Log unit */ 1110 caddr_t va, /* address of buffer to read into */ 1111 offset_t mof, /* mof of buffer */ 1112 off_t nb, /* length of buffer */ 1113 mapentry_t *me) /* Map entry list */ 1114 { 1115 buf_t *bp; 1116 crb_t *crb; 1117 caddr_t rva; /* address to read into */ 1118 size_t rnb; /* # of bytes to read */ 1119 off_t lof; /* log device offset to read from */ 1120 off_t skip; 1121 ulong_t actual; 1122 int error; 1123 caddr_t eva = va + nb; /* end of buffer */ 1124 1125 for (; me; me = me->me_agenext) { 1126 ASSERT(me->me_dt != DT_CANCEL); 1127 1128 /* 1129 * check for an cached roll buffer 1130 */ 1131 crb = me->me_crb; 1132 if (crb) { 1133 if (mof > crb->c_mof) { 1134 /* 1135 * This mapentry overlaps with the beginning of 1136 * the supplied buffer 1137 */ 1138 skip = mof - crb->c_mof; 1139 bcopy(crb->c_buf + skip, va, 1140 MIN(nb, crb->c_nb - skip)); 1141 } else { 1142 /* 1143 * This mapentry starts at or after 1144 * the supplied buffer. 1145 */ 1146 skip = crb->c_mof - mof; 1147 bcopy(crb->c_buf, va + skip, 1148 MIN(crb->c_nb, nb - skip)); 1149 } 1150 logstats.ls_lreadsinmem.value.ui64++; 1151 continue; 1152 } 1153 1154 /* 1155 * check for a delta full of zeroes - there's no log data 1156 */ 1157 if (me->me_dt == DT_ABZERO) { 1158 fetchzeroes(va, mof, nb, me); 1159 continue; 1160 } 1161 1162 if (mof > me->me_mof) { 1163 rnb = (size_t)(mof - me->me_mof); 1164 error = logseek(ul, me->me_lof, rnb, &lof); 1165 if (error) 1166 return (EIO); 1167 rva = va; 1168 rnb = me->me_nb - rnb; 1169 rnb = ((rva + rnb) > eva) ? eva - rva : rnb; 1170 } else { 1171 lof = me->me_lof; 1172 rva = (me->me_mof - mof) + va; 1173 rnb = ((rva + me->me_nb) > eva) ? eva - rva : me->me_nb; 1174 } 1175 1176 while (rnb) { 1177 bp = get_read_bp(ul, lof); 1178 if (bp->b_flags & B_ERROR) { 1179 sema_v(&bp->b_sem); 1180 return (EIO); 1181 } 1182 ASSERT(((me->me_flags & ME_ROLL) == 0) || 1183 (bp != ul->un_wrbuf.cb_dirty)); 1184 actual = fetchbuf(ul, bp, rva, rnb, &lof); 1185 ASSERT(actual); 1186 rva += actual; 1187 rnb -= actual; 1188 } 1189 } 1190 return (0); 1191 } 1192 1193 void 1194 ldl_savestate(ml_unit_t *ul) 1195 { 1196 int error; 1197 buf_t *bp = ul->un_bp; 1198 ml_odunit_t *ud = (void *)bp->b_un.b_addr; 1199 ml_odunit_t *ud2 = (void *)(bp->b_un.b_addr + DEV_BSIZE); 1200 1201 #if DEBUG 1202 /* 1203 * Scan test is running; don't update intermediate state 1204 */ 1205 if (ul->un_logmap && ul->un_logmap->mtm_trimlof) 1206 return; 1207 #endif /* DEBUG */ 1208 1209 mutex_enter(&ul->un_state_mutex); 1210 bcopy(&ul->un_ondisk, ud, sizeof (*ud)); 1211 ud->od_chksum = ud->od_head_ident + ud->od_tail_ident; 1212 bcopy(ud, ud2, sizeof (*ud)); 1213 1214 /* If a snapshot is enabled write through the shapshot driver. */ 1215 if (ul->un_ufsvfs->vfs_snapshot) 1216 UFS_BWRITE2(ul->un_ufsvfs, bp); 1217 else 1218 BWRITE2(bp); 1219 logstats.ls_ldlwrites.value.ui64++; 1220 error = bp->b_flags & B_ERROR; 1221 mutex_exit(&ul->un_state_mutex); 1222 if (error) 1223 ldl_seterror(ul, "Error writing ufs log state"); 1224 } 1225 1226 /* 1227 * The head will be set to (new_lof - header) since ldl_sethead is 1228 * called with the new_lof of the data portion of a delta. 1229 */ 1230 void 1231 ldl_sethead(ml_unit_t *ul, off_t data_lof, uint32_t tid) 1232 { 1233 off_t nb; 1234 off_t new_lof; 1235 uint32_t new_ident; 1236 daddr_t beg_blkno; 1237 daddr_t end_blkno; 1238 struct timeval tv; 1239 1240 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1241 1242 if (data_lof == -1) { 1243 /* log is empty */ 1244 uniqtime(&tv); 1245 if (tv.tv_usec == ul->un_head_ident) { 1246 tv.tv_usec++; 1247 } 1248 last_loghead_ident = tv.tv_usec; 1249 new_ident = tv.tv_usec; 1250 new_lof = ul->un_tail_lof; 1251 1252 } else { 1253 /* compute header's lof */ 1254 new_ident = ul->un_head_ident; 1255 new_lof = data_lof - sizeof (struct delta); 1256 1257 /* whoops, header spans sectors; subtract out sector trailer */ 1258 if (btodb(new_lof) != btodb(data_lof)) 1259 new_lof -= sizeof (sect_trailer_t); 1260 1261 /* whoops, header wrapped the log; go to last sector */ 1262 if (new_lof < ul->un_bol_lof) { 1263 /* sector offset */ 1264 new_lof -= dbtob(btodb(new_lof)); 1265 /* add to last sector's lof */ 1266 new_lof += (ul->un_eol_lof - DEV_BSIZE); 1267 } 1268 ul->un_head_tid = tid; 1269 } 1270 1271 /* 1272 * check for nop 1273 */ 1274 if (new_lof == ul->un_head_lof) 1275 return; 1276 1277 /* 1278 * invalidate the affected bufs and calculate new ident 1279 */ 1280 if (new_lof > ul->un_head_lof) { 1281 nb = new_lof - ul->un_head_lof; 1282 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 1283 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 1284 1285 end_blkno = btodb(new_lof); 1286 beg_blkno = btodb(ul->un_head_lof); 1287 new_ident += (end_blkno - beg_blkno); 1288 } else { 1289 nb = ul->un_eol_lof - ul->un_head_lof; 1290 inval_range(ul, &ul->un_wrbuf, ul->un_head_lof, nb); 1291 inval_range(ul, &ul->un_rdbuf, ul->un_head_lof, nb); 1292 1293 end_blkno = btodb(ul->un_eol_lof); 1294 beg_blkno = btodb(ul->un_head_lof); 1295 new_ident += (end_blkno - beg_blkno); 1296 1297 nb = new_lof - ul->un_bol_lof; 1298 inval_range(ul, &ul->un_wrbuf, ul->un_bol_lof, nb); 1299 inval_range(ul, &ul->un_rdbuf, ul->un_bol_lof, nb); 1300 1301 end_blkno = btodb(new_lof); 1302 beg_blkno = btodb(ul->un_bol_lof); 1303 new_ident += (end_blkno - beg_blkno); 1304 } 1305 /* 1306 * don't update the head if there has been an error 1307 */ 1308 if (ul->un_flags & LDL_ERROR) 1309 return; 1310 1311 /* Fix up the head and ident */ 1312 ASSERT(new_lof >= ul->un_bol_lof); 1313 ul->un_head_lof = new_lof; 1314 ul->un_head_ident = new_ident; 1315 if (data_lof == -1) { 1316 ul->un_tail_ident = ul->un_head_ident; 1317 } 1318 1319 1320 /* Commit to the database */ 1321 ldl_savestate(ul); 1322 1323 ASSERT(((ul->un_logmap->mtm_debug & MT_SCAN) == 0) || 1324 ldl_sethead_debug(ul)); 1325 } 1326 1327 /* 1328 * The tail will be set to the sector following lof+nb 1329 * lof + nb == size of the last delta + commit record 1330 * this function is called once after the log scan has completed. 1331 */ 1332 void 1333 ldl_settail(ml_unit_t *ul, off_t lof, size_t nb) 1334 { 1335 off_t new_lof; 1336 uint32_t new_ident; 1337 daddr_t beg_blkno; 1338 daddr_t end_blkno; 1339 struct timeval tv; 1340 1341 ASSERT(MUTEX_HELD(&ul->un_log_mutex)); 1342 1343 if (lof == -1) { 1344 uniqtime(&tv); 1345 if (tv.tv_usec == ul->un_head_ident) { 1346 tv.tv_usec++; 1347 } 1348 last_loghead_ident = tv.tv_usec; 1349 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)); 1350 ul->un_head_lof = ul->un_tail_lof; 1351 ul->un_head_ident = tv.tv_usec; 1352 ul->un_tail_ident = ul->un_head_ident; 1353 1354 /* Commit to the database */ 1355 ldl_savestate(ul); 1356 1357 return; 1358 } 1359 1360 /* 1361 * new_lof is the offset of the sector following the last commit 1362 */ 1363 (void) logseek(ul, lof, nb, &new_lof); 1364 ASSERT(new_lof != dbtob(btodb(ul->un_head_lof))); 1365 1366 /* 1367 * calculate new ident 1368 */ 1369 if (new_lof > ul->un_head_lof) { 1370 end_blkno = btodb(new_lof); 1371 beg_blkno = btodb(ul->un_head_lof); 1372 new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 1373 } else { 1374 end_blkno = btodb(ul->un_eol_lof); 1375 beg_blkno = btodb(ul->un_head_lof); 1376 new_ident = ul->un_head_ident + (end_blkno - beg_blkno); 1377 1378 end_blkno = btodb(new_lof); 1379 beg_blkno = btodb(ul->un_bol_lof); 1380 new_ident += (end_blkno - beg_blkno); 1381 } 1382 1383 /* Fix up the tail and ident */ 1384 ul->un_tail_lof = new_lof; 1385 ul->un_tail_ident = new_ident; 1386 1387 /* Commit to the database */ 1388 ldl_savestate(ul); 1389 } 1390 1391 /* 1392 * LOGSCAN STUFF 1393 */ 1394 static int 1395 ldl_logscan_ident(ml_unit_t *ul, buf_t *bp, off_t lof) 1396 { 1397 ulong_t ident; 1398 size_t nblk, i; 1399 sect_trailer_t *st; 1400 1401 /* 1402 * compute ident for first sector in the buffer 1403 */ 1404 ident = ul->un_head_ident; 1405 if (bp->b_blkno >= btodb(ul->un_head_lof)) { 1406 ident += (bp->b_blkno - btodb(ul->un_head_lof)); 1407 } else { 1408 ident += (btodb(ul->un_eol_lof) - btodb(ul->un_head_lof)); 1409 ident += (bp->b_blkno - btodb(ul->un_bol_lof)); 1410 } 1411 /* 1412 * truncate the buffer down to the last valid sector 1413 */ 1414 nblk = btodb(bp->b_bcount); 1415 bp->b_bcount = 0; 1416 /* LINTED */ 1417 st = (sect_trailer_t *)(bp->b_un.b_addr + LDL_USABLE_BSIZE); 1418 for (i = 0; i < nblk; ++i) { 1419 if (st->st_ident != ident) 1420 break; 1421 1422 /* remember last valid tid for ldl_logscan_error() */ 1423 ul->un_tid = st->st_tid; 1424 1425 /* LINTED */ 1426 st = (sect_trailer_t *)(((caddr_t)st) + DEV_BSIZE); 1427 ++ident; 1428 bp->b_bcount += DEV_BSIZE; 1429 } 1430 /* 1431 * make sure that lof is still within range 1432 */ 1433 return (within_range(lof, bp->b_blkno, bp->b_bcount)); 1434 } 1435 1436 ulong_t 1437 ldl_logscan_nbcommit(off_t lof) 1438 { 1439 /* 1440 * lof is the offset following the commit header. However, 1441 * if the commit header fell on the end-of-sector, then lof 1442 * has already been advanced to the beginning of the next 1443 * sector. So do nothing. Otherwise, return the remaining 1444 * bytes in the sector. 1445 */ 1446 if ((lof & (DEV_BSIZE - 1)) == 0) 1447 return (0); 1448 return (NB_LEFT_IN_SECTOR(lof)); 1449 } 1450 1451 int 1452 ldl_logscan_read(ml_unit_t *ul, off_t *lofp, size_t nb, caddr_t va) 1453 { 1454 buf_t *bp; 1455 ulong_t actual; 1456 1457 ASSERT(ul->un_head_lof != ul->un_tail_lof); 1458 1459 /* 1460 * Check the log data doesn't go out of bounds 1461 */ 1462 if (ul->un_head_lof < ul->un_tail_lof) { 1463 if (!WITHIN(*lofp, nb, ul->un_head_lof, 1464 (ul->un_tail_lof - ul->un_head_lof))) { 1465 return (EIO); 1466 } 1467 } else { 1468 if (OVERLAP(*lofp, nb, ul->un_tail_lof, 1469 (ul->un_head_lof - ul->un_tail_lof))) { 1470 return (EIO); 1471 } 1472 } 1473 1474 while (nb) { 1475 bp = get_read_bp(ul, *lofp); 1476 if (bp->b_flags & B_ERROR) { 1477 sema_v(&bp->b_sem); 1478 return (EIO); 1479 } 1480 /* 1481 * out-of-seq idents means partial transaction 1482 * panic, non-corrupting powerfail, ... 1483 */ 1484 if (!ldl_logscan_ident(ul, bp, *lofp)) { 1485 sema_v(&bp->b_sem); 1486 return (EIO); 1487 } 1488 /* 1489 * copy the header into the caller's buf 1490 */ 1491 actual = fetchbuf(ul, bp, va, nb, lofp); 1492 if (va) 1493 va += actual; 1494 nb -= actual; 1495 } 1496 return (0); 1497 } 1498 1499 void 1500 ldl_logscan_begin(ml_unit_t *ul) 1501 { 1502 size_t bufsize; 1503 1504 ASSERT(ul->un_wrbuf.cb_dirty == NULL); 1505 1506 /* 1507 * logscan has begun 1508 */ 1509 ul->un_flags |= LDL_SCAN; 1510 1511 /* 1512 * reset the circular bufs 1513 */ 1514 bufsize = ldl_bufsize(ul); 1515 alloc_rdbuf(&ul->un_rdbuf, bufsize, bufsize); 1516 alloc_wrbuf(&ul->un_wrbuf, bufsize); 1517 1518 /* 1519 * set the tail to reflect a full log 1520 */ 1521 ul->un_tail_lof = dbtob(btodb(ul->un_head_lof)) - DEV_BSIZE; 1522 1523 if (ul->un_tail_lof < ul->un_bol_lof) 1524 ul->un_tail_lof = ul->un_eol_lof - DEV_BSIZE; 1525 if (ul->un_tail_lof >= ul->un_eol_lof) 1526 ul->un_tail_lof = ul->un_bol_lof; 1527 1528 /* 1529 * un_tid is used during error processing; it is initialized to 1530 * the tid of the delta at un_head_lof; 1531 */ 1532 ul->un_tid = ul->un_head_tid; 1533 } 1534 1535 void 1536 ldl_logscan_end(ml_unit_t *ul) 1537 { 1538 size_t bufsize; 1539 1540 /* 1541 * reset the circular bufs 1542 */ 1543 bufsize = ldl_bufsize(ul); 1544 alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE); 1545 alloc_wrbuf(&ul->un_wrbuf, bufsize); 1546 1547 /* 1548 * Done w/scan 1549 */ 1550 ul->un_flags &= ~LDL_SCAN; 1551 } 1552 1553 int 1554 ldl_need_roll(ml_unit_t *ul) 1555 { 1556 off_t busybytes; 1557 off_t head; 1558 off_t tail; 1559 off_t bol; 1560 off_t eol; 1561 off_t nb; 1562 1563 /* 1564 * snapshot the log state 1565 */ 1566 head = ul->un_head_lof; 1567 tail = ul->un_tail_lof; 1568 bol = ul->un_bol_lof; 1569 eol = ul->un_eol_lof; 1570 nb = ul->un_logsize; 1571 1572 /* 1573 * compute number of busy (inuse) bytes 1574 */ 1575 if (head <= tail) 1576 busybytes = tail - head; 1577 else 1578 busybytes = (eol - head) + (tail - bol); 1579 1580 /* 1581 * return TRUE if > 75% full 1582 */ 1583 return (busybytes > (nb - (nb >> 2))); 1584 } 1585 1586 void 1587 ldl_seterror(ml_unit_t *ul, char *why) 1588 { 1589 /* 1590 * already in error state; do nothing 1591 */ 1592 if (ul->un_flags & LDL_ERROR) 1593 return; 1594 1595 ul->un_flags |= LDL_ERROR; /* incore */ 1596 ul->un_badlog = 1; /* ondisk (cleared by fsck) */ 1597 1598 /* 1599 * Commit to state sectors 1600 */ 1601 uniqtime(&ul->un_timestamp); 1602 ldl_savestate(ul); 1603 1604 /* Pretty print */ 1605 cmn_err(CE_WARN, "%s", why); 1606 cmn_err(CE_WARN, "ufs log for %s changed state to Error", 1607 ul->un_ufsvfs->vfs_fs->fs_fsmnt); 1608 cmn_err(CE_WARN, "Please umount(1M) %s and run fsck(1M)", 1609 ul->un_ufsvfs->vfs_fs->fs_fsmnt); 1610 1611 /* 1612 * If we aren't in the middle of scan (aka snarf); tell ufs 1613 * to hard lock itself. 1614 */ 1615 if ((ul->un_flags & LDL_SCAN) == 0) 1616 ufs_trans_onerror(); 1617 } 1618 1619 size_t 1620 ldl_bufsize(ml_unit_t *ul) 1621 { 1622 size_t bufsize; 1623 extern uint32_t ldl_minbufsize; 1624 1625 /* 1626 * initial guess is the maxtransfer value for this log device 1627 * increase if too small 1628 * decrease if too large 1629 */ 1630 bufsize = dbtob(btod(ul->un_maxtransfer)); 1631 if (bufsize < ldl_minbufsize) 1632 bufsize = ldl_minbufsize; 1633 if (bufsize > maxphys) 1634 bufsize = maxphys; 1635 if (bufsize > ul->un_maxtransfer) 1636 bufsize = ul->un_maxtransfer; 1637 return (bufsize); 1638 } 1639