1 /* 2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it would be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 * 12 * Further, this software is distributed without any warranty that it is 13 * free of the rightful claim of any third person regarding infringement 14 * or the like. Any license provided herein, whether implied or 15 * otherwise, applies only to this software file. Patent licenses, if 16 * any, provided herein do not apply to combinations of this program with 17 * other software, or any other product whatsoever. 18 * 19 * You should have received a copy of the GNU General Public License along 20 * with this program; if not, write the Free Software Foundation, Inc., 59 21 * Temple Place - Suite 330, Boston MA 02111-1307, USA. 22 * 23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 24 * Mountain View, CA 94043, or: 25 * 26 * http://www.sgi.com 27 * 28 * For further information regarding this notice, see: 29 * 30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ 31 */ 32 33 #include "xfs.h" 34 #include "xfs_macros.h" 35 #include "xfs_types.h" 36 #include "xfs_inum.h" 37 #include "xfs_log.h" 38 #include "xfs_trans.h" 39 #include "xfs_buf_item.h" 40 #include "xfs_sb.h" 41 #include "xfs_ag.h" 42 #include "xfs_dir.h" 43 #include "xfs_dmapi.h" 44 #include "xfs_mount.h" 45 #include "xfs_trans_priv.h" 46 #include "xfs_error.h" 47 #include "xfs_rw.h" 48 49 50 STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 51 xfs_daddr_t, int); 52 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 53 xfs_daddr_t, int); 54 55 56 /* 57 * Get and lock the buffer for the caller if it is not already 58 * locked within the given transaction. If it is already locked 59 * within the transaction, just increment its lock recursion count 60 * and return a pointer to it. 61 * 62 * Use the fast path function xfs_trans_buf_item_match() or the buffer 63 * cache routine incore_match() to find the buffer 64 * if it is already owned by this transaction. 65 * 66 * If we don't already own the buffer, use get_buf() to get it. 67 * If it doesn't yet have an associated xfs_buf_log_item structure, 68 * then allocate one and add the item to this transaction. 69 * 70 * If the transaction pointer is NULL, make this just a normal 71 * get_buf() call. 72 */ 73 xfs_buf_t * 74 xfs_trans_get_buf(xfs_trans_t *tp, 75 xfs_buftarg_t *target_dev, 76 xfs_daddr_t blkno, 77 int len, 78 uint flags) 79 { 80 xfs_buf_t *bp; 81 xfs_buf_log_item_t *bip; 82 83 if (flags == 0) 84 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 85 86 /* 87 * Default to a normal get_buf() call if the tp is NULL. 88 */ 89 if (tp == NULL) { 90 bp = xfs_buf_get_flags(target_dev, blkno, len, 91 flags | BUF_BUSY); 92 return(bp); 93 } 94 95 /* 96 * If we find the buffer in the cache with this transaction 97 * pointer in its b_fsprivate2 field, then we know we already 98 * have it locked. In this case we just increment the lock 99 * recursion count and return the buffer to the caller. 100 */ 101 if (tp->t_items.lic_next == NULL) { 102 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); 103 } else { 104 bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len); 105 } 106 if (bp != NULL) { 107 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 108 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { 109 xfs_buftrace("TRANS GET RECUR SHUT", bp); 110 XFS_BUF_SUPER_STALE(bp); 111 } 112 /* 113 * If the buffer is stale then it was binval'ed 114 * since last read. This doesn't matter since the 115 * caller isn't allowed to use the data anyway. 116 */ 117 else if (XFS_BUF_ISSTALE(bp)) { 118 xfs_buftrace("TRANS GET RECUR STALE", bp); 119 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 120 } 121 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 122 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 123 ASSERT(bip != NULL); 124 ASSERT(atomic_read(&bip->bli_refcount) > 0); 125 bip->bli_recur++; 126 xfs_buftrace("TRANS GET RECUR", bp); 127 xfs_buf_item_trace("GET RECUR", bip); 128 return (bp); 129 } 130 131 /* 132 * We always specify the BUF_BUSY flag within a transaction so 133 * that get_buf does not try to push out a delayed write buffer 134 * which might cause another transaction to take place (if the 135 * buffer was delayed alloc). Such recursive transactions can 136 * easily deadlock with our current transaction as well as cause 137 * us to run out of stack space. 138 */ 139 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); 140 if (bp == NULL) { 141 return NULL; 142 } 143 144 ASSERT(!XFS_BUF_GETERROR(bp)); 145 146 /* 147 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 148 * it doesn't have one yet, then allocate one and initialize it. 149 * The checks to see if one is there are in xfs_buf_item_init(). 150 */ 151 xfs_buf_item_init(bp, tp->t_mountp); 152 153 /* 154 * Set the recursion count for the buffer within this transaction 155 * to 0. 156 */ 157 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 158 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 159 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 160 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 161 bip->bli_recur = 0; 162 163 /* 164 * Take a reference for this transaction on the buf item. 165 */ 166 atomic_inc(&bip->bli_refcount); 167 168 /* 169 * Get a log_item_desc to point at the new item. 170 */ 171 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 172 173 /* 174 * Initialize b_fsprivate2 so we can find it with incore_match() 175 * above. 176 */ 177 XFS_BUF_SET_FSPRIVATE2(bp, tp); 178 179 xfs_buftrace("TRANS GET", bp); 180 xfs_buf_item_trace("GET", bip); 181 return (bp); 182 } 183 184 /* 185 * Get and lock the superblock buffer of this file system for the 186 * given transaction. 187 * 188 * We don't need to use incore_match() here, because the superblock 189 * buffer is a private buffer which we keep a pointer to in the 190 * mount structure. 191 */ 192 xfs_buf_t * 193 xfs_trans_getsb(xfs_trans_t *tp, 194 struct xfs_mount *mp, 195 int flags) 196 { 197 xfs_buf_t *bp; 198 xfs_buf_log_item_t *bip; 199 200 /* 201 * Default to just trying to lock the superblock buffer 202 * if tp is NULL. 203 */ 204 if (tp == NULL) { 205 return (xfs_getsb(mp, flags)); 206 } 207 208 /* 209 * If the superblock buffer already has this transaction 210 * pointer in its b_fsprivate2 field, then we know we already 211 * have it locked. In this case we just increment the lock 212 * recursion count and return the buffer to the caller. 213 */ 214 bp = mp->m_sb_bp; 215 if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { 216 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 217 ASSERT(bip != NULL); 218 ASSERT(atomic_read(&bip->bli_refcount) > 0); 219 bip->bli_recur++; 220 xfs_buf_item_trace("GETSB RECUR", bip); 221 return (bp); 222 } 223 224 bp = xfs_getsb(mp, flags); 225 if (bp == NULL) { 226 return NULL; 227 } 228 229 /* 230 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 231 * it doesn't have one yet, then allocate one and initialize it. 232 * The checks to see if one is there are in xfs_buf_item_init(). 233 */ 234 xfs_buf_item_init(bp, mp); 235 236 /* 237 * Set the recursion count for the buffer within this transaction 238 * to 0. 239 */ 240 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 241 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 242 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 243 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 244 bip->bli_recur = 0; 245 246 /* 247 * Take a reference for this transaction on the buf item. 248 */ 249 atomic_inc(&bip->bli_refcount); 250 251 /* 252 * Get a log_item_desc to point at the new item. 253 */ 254 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 255 256 /* 257 * Initialize b_fsprivate2 so we can find it with incore_match() 258 * above. 259 */ 260 XFS_BUF_SET_FSPRIVATE2(bp, tp); 261 262 xfs_buf_item_trace("GETSB", bip); 263 return (bp); 264 } 265 266 #ifdef DEBUG 267 xfs_buftarg_t *xfs_error_target; 268 int xfs_do_error; 269 int xfs_req_num; 270 int xfs_error_mod = 33; 271 #endif 272 273 /* 274 * Get and lock the buffer for the caller if it is not already 275 * locked within the given transaction. If it has not yet been 276 * read in, read it from disk. If it is already locked 277 * within the transaction and already read in, just increment its 278 * lock recursion count and return a pointer to it. 279 * 280 * Use the fast path function xfs_trans_buf_item_match() or the buffer 281 * cache routine incore_match() to find the buffer 282 * if it is already owned by this transaction. 283 * 284 * If we don't already own the buffer, use read_buf() to get it. 285 * If it doesn't yet have an associated xfs_buf_log_item structure, 286 * then allocate one and add the item to this transaction. 287 * 288 * If the transaction pointer is NULL, make this just a normal 289 * read_buf() call. 290 */ 291 int 292 xfs_trans_read_buf( 293 xfs_mount_t *mp, 294 xfs_trans_t *tp, 295 xfs_buftarg_t *target, 296 xfs_daddr_t blkno, 297 int len, 298 uint flags, 299 xfs_buf_t **bpp) 300 { 301 xfs_buf_t *bp; 302 xfs_buf_log_item_t *bip; 303 int error; 304 305 if (flags == 0) 306 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 307 308 /* 309 * Default to a normal get_buf() call if the tp is NULL. 310 */ 311 if (tp == NULL) { 312 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 313 if (!bp) 314 return XFS_ERROR(ENOMEM); 315 316 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 317 xfs_ioerror_alert("xfs_trans_read_buf", mp, 318 bp, blkno); 319 error = XFS_BUF_GETERROR(bp); 320 xfs_buf_relse(bp); 321 return error; 322 } 323 #ifdef DEBUG 324 if (xfs_do_error && (bp != NULL)) { 325 if (xfs_error_target == target) { 326 if (((xfs_req_num++) % xfs_error_mod) == 0) { 327 xfs_buf_relse(bp); 328 printk("Returning error!\n"); 329 return XFS_ERROR(EIO); 330 } 331 } 332 } 333 #endif 334 if (XFS_FORCED_SHUTDOWN(mp)) 335 goto shutdown_abort; 336 *bpp = bp; 337 return 0; 338 } 339 340 /* 341 * If we find the buffer in the cache with this transaction 342 * pointer in its b_fsprivate2 field, then we know we already 343 * have it locked. If it is already read in we just increment 344 * the lock recursion count and return the buffer to the caller. 345 * If the buffer is not yet read in, then we read it in, increment 346 * the lock recursion count, and return it to the caller. 347 */ 348 if (tp->t_items.lic_next == NULL) { 349 bp = xfs_trans_buf_item_match(tp, target, blkno, len); 350 } else { 351 bp = xfs_trans_buf_item_match_all(tp, target, blkno, len); 352 } 353 if (bp != NULL) { 354 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 355 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 356 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 357 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 358 if (!(XFS_BUF_ISDONE(bp))) { 359 xfs_buftrace("READ_BUF_INCORE !DONE", bp); 360 ASSERT(!XFS_BUF_ISASYNC(bp)); 361 XFS_BUF_READ(bp); 362 xfsbdstrat(tp->t_mountp, bp); 363 xfs_iowait(bp); 364 if (XFS_BUF_GETERROR(bp) != 0) { 365 xfs_ioerror_alert("xfs_trans_read_buf", mp, 366 bp, blkno); 367 error = XFS_BUF_GETERROR(bp); 368 xfs_buf_relse(bp); 369 /* 370 * We can gracefully recover from most 371 * read errors. Ones we can't are those 372 * that happen after the transaction's 373 * already dirty. 374 */ 375 if (tp->t_flags & XFS_TRANS_DIRTY) 376 xfs_force_shutdown(tp->t_mountp, 377 XFS_METADATA_IO_ERROR); 378 return error; 379 } 380 } 381 /* 382 * We never locked this buf ourselves, so we shouldn't 383 * brelse it either. Just get out. 384 */ 385 if (XFS_FORCED_SHUTDOWN(mp)) { 386 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); 387 *bpp = NULL; 388 return XFS_ERROR(EIO); 389 } 390 391 392 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 393 bip->bli_recur++; 394 395 ASSERT(atomic_read(&bip->bli_refcount) > 0); 396 xfs_buf_item_trace("READ RECUR", bip); 397 *bpp = bp; 398 return 0; 399 } 400 401 /* 402 * We always specify the BUF_BUSY flag within a transaction so 403 * that get_buf does not try to push out a delayed write buffer 404 * which might cause another transaction to take place (if the 405 * buffer was delayed alloc). Such recursive transactions can 406 * easily deadlock with our current transaction as well as cause 407 * us to run out of stack space. 408 */ 409 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 410 if (bp == NULL) { 411 *bpp = NULL; 412 return 0; 413 } 414 if (XFS_BUF_GETERROR(bp) != 0) { 415 XFS_BUF_SUPER_STALE(bp); 416 xfs_buftrace("READ ERROR", bp); 417 error = XFS_BUF_GETERROR(bp); 418 419 xfs_ioerror_alert("xfs_trans_read_buf", mp, 420 bp, blkno); 421 if (tp->t_flags & XFS_TRANS_DIRTY) 422 xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR); 423 xfs_buf_relse(bp); 424 return error; 425 } 426 #ifdef DEBUG 427 if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) { 428 if (xfs_error_target == target) { 429 if (((xfs_req_num++) % xfs_error_mod) == 0) { 430 xfs_force_shutdown(tp->t_mountp, 431 XFS_METADATA_IO_ERROR); 432 xfs_buf_relse(bp); 433 printk("Returning error in trans!\n"); 434 return XFS_ERROR(EIO); 435 } 436 } 437 } 438 #endif 439 if (XFS_FORCED_SHUTDOWN(mp)) 440 goto shutdown_abort; 441 442 /* 443 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 444 * it doesn't have one yet, then allocate one and initialize it. 445 * The checks to see if one is there are in xfs_buf_item_init(). 446 */ 447 xfs_buf_item_init(bp, tp->t_mountp); 448 449 /* 450 * Set the recursion count for the buffer within this transaction 451 * to 0. 452 */ 453 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 454 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 455 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 456 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 457 bip->bli_recur = 0; 458 459 /* 460 * Take a reference for this transaction on the buf item. 461 */ 462 atomic_inc(&bip->bli_refcount); 463 464 /* 465 * Get a log_item_desc to point at the new item. 466 */ 467 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 468 469 /* 470 * Initialize b_fsprivate2 so we can find it with incore_match() 471 * above. 472 */ 473 XFS_BUF_SET_FSPRIVATE2(bp, tp); 474 475 xfs_buftrace("TRANS READ", bp); 476 xfs_buf_item_trace("READ", bip); 477 *bpp = bp; 478 return 0; 479 480 shutdown_abort: 481 /* 482 * the theory here is that buffer is good but we're 483 * bailing out because the filesystem is being forcibly 484 * shut down. So we should leave the b_flags alone since 485 * the buffer's not staled and just get out. 486 */ 487 #if defined(DEBUG) 488 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 489 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 490 #endif 491 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 492 (XFS_B_STALE|XFS_B_DELWRI)); 493 494 xfs_buftrace("READ_BUF XFSSHUTDN", bp); 495 xfs_buf_relse(bp); 496 *bpp = NULL; 497 return XFS_ERROR(EIO); 498 } 499 500 501 /* 502 * Release the buffer bp which was previously acquired with one of the 503 * xfs_trans_... buffer allocation routines if the buffer has not 504 * been modified within this transaction. If the buffer is modified 505 * within this transaction, do decrement the recursion count but do 506 * not release the buffer even if the count goes to 0. If the buffer is not 507 * modified within the transaction, decrement the recursion count and 508 * release the buffer if the recursion count goes to 0. 509 * 510 * If the buffer is to be released and it was not modified before 511 * this transaction began, then free the buf_log_item associated with it. 512 * 513 * If the transaction pointer is NULL, make this just a normal 514 * brelse() call. 515 */ 516 void 517 xfs_trans_brelse(xfs_trans_t *tp, 518 xfs_buf_t *bp) 519 { 520 xfs_buf_log_item_t *bip; 521 xfs_log_item_t *lip; 522 xfs_log_item_desc_t *lidp; 523 524 /* 525 * Default to a normal brelse() call if the tp is NULL. 526 */ 527 if (tp == NULL) { 528 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 529 /* 530 * If there's a buf log item attached to the buffer, 531 * then let the AIL know that the buffer is being 532 * unlocked. 533 */ 534 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 535 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 536 if (lip->li_type == XFS_LI_BUF) { 537 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); 538 xfs_trans_unlocked_item( 539 bip->bli_item.li_mountp, 540 lip); 541 } 542 } 543 xfs_buf_relse(bp); 544 return; 545 } 546 547 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 548 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 549 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 550 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 551 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 552 ASSERT(atomic_read(&bip->bli_refcount) > 0); 553 554 /* 555 * Find the item descriptor pointing to this buffer's 556 * log item. It must be there. 557 */ 558 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 559 ASSERT(lidp != NULL); 560 561 /* 562 * If the release is just for a recursive lock, 563 * then decrement the count and return. 564 */ 565 if (bip->bli_recur > 0) { 566 bip->bli_recur--; 567 xfs_buf_item_trace("RELSE RECUR", bip); 568 return; 569 } 570 571 /* 572 * If the buffer is dirty within this transaction, we can't 573 * release it until we commit. 574 */ 575 if (lidp->lid_flags & XFS_LID_DIRTY) { 576 xfs_buf_item_trace("RELSE DIRTY", bip); 577 return; 578 } 579 580 /* 581 * If the buffer has been invalidated, then we can't release 582 * it until the transaction commits to disk unless it is re-dirtied 583 * as part of this transaction. This prevents us from pulling 584 * the item from the AIL before we should. 585 */ 586 if (bip->bli_flags & XFS_BLI_STALE) { 587 xfs_buf_item_trace("RELSE STALE", bip); 588 return; 589 } 590 591 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 592 xfs_buf_item_trace("RELSE", bip); 593 594 /* 595 * Free up the log item descriptor tracking the released item. 596 */ 597 xfs_trans_free_item(tp, lidp); 598 599 /* 600 * Clear the hold flag in the buf log item if it is set. 601 * We wouldn't want the next user of the buffer to 602 * get confused. 603 */ 604 if (bip->bli_flags & XFS_BLI_HOLD) { 605 bip->bli_flags &= ~XFS_BLI_HOLD; 606 } 607 608 /* 609 * Drop our reference to the buf log item. 610 */ 611 atomic_dec(&bip->bli_refcount); 612 613 /* 614 * If the buf item is not tracking data in the log, then 615 * we must free it before releasing the buffer back to the 616 * free pool. Before releasing the buffer to the free pool, 617 * clear the transaction pointer in b_fsprivate2 to dissolve 618 * its relation to this transaction. 619 */ 620 if (!xfs_buf_item_dirty(bip)) { 621 /*** 622 ASSERT(bp->b_pincount == 0); 623 ***/ 624 ASSERT(atomic_read(&bip->bli_refcount) == 0); 625 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 626 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 627 xfs_buf_item_relse(bp); 628 bip = NULL; 629 } 630 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 631 632 /* 633 * If we've still got a buf log item on the buffer, then 634 * tell the AIL that the buffer is being unlocked. 635 */ 636 if (bip != NULL) { 637 xfs_trans_unlocked_item(bip->bli_item.li_mountp, 638 (xfs_log_item_t*)bip); 639 } 640 641 xfs_buf_relse(bp); 642 return; 643 } 644 645 /* 646 * Add the locked buffer to the transaction. 647 * The buffer must be locked, and it cannot be associated with any 648 * transaction. 649 * 650 * If the buffer does not yet have a buf log item associated with it, 651 * then allocate one for it. Then add the buf item to the transaction. 652 */ 653 void 654 xfs_trans_bjoin(xfs_trans_t *tp, 655 xfs_buf_t *bp) 656 { 657 xfs_buf_log_item_t *bip; 658 659 ASSERT(XFS_BUF_ISBUSY(bp)); 660 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 661 662 /* 663 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 664 * it doesn't have one yet, then allocate one and initialize it. 665 * The checks to see if one is there are in xfs_buf_item_init(). 666 */ 667 xfs_buf_item_init(bp, tp->t_mountp); 668 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 669 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 670 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 671 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 672 673 /* 674 * Take a reference for this transaction on the buf item. 675 */ 676 atomic_inc(&bip->bli_refcount); 677 678 /* 679 * Get a log_item_desc to point at the new item. 680 */ 681 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 682 683 /* 684 * Initialize b_fsprivate2 so we can find it with incore_match() 685 * in xfs_trans_get_buf() and friends above. 686 */ 687 XFS_BUF_SET_FSPRIVATE2(bp, tp); 688 689 xfs_buf_item_trace("BJOIN", bip); 690 } 691 692 /* 693 * Mark the buffer as not needing to be unlocked when the buf item's 694 * IOP_UNLOCK() routine is called. The buffer must already be locked 695 * and associated with the given transaction. 696 */ 697 /* ARGSUSED */ 698 void 699 xfs_trans_bhold(xfs_trans_t *tp, 700 xfs_buf_t *bp) 701 { 702 xfs_buf_log_item_t *bip; 703 704 ASSERT(XFS_BUF_ISBUSY(bp)); 705 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 706 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 707 708 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 709 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 710 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 711 ASSERT(atomic_read(&bip->bli_refcount) > 0); 712 bip->bli_flags |= XFS_BLI_HOLD; 713 xfs_buf_item_trace("BHOLD", bip); 714 } 715 716 /* 717 * This is called to mark bytes first through last inclusive of the given 718 * buffer as needing to be logged when the transaction is committed. 719 * The buffer must already be associated with the given transaction. 720 * 721 * First and last are numbers relative to the beginning of this buffer, 722 * so the first byte in the buffer is numbered 0 regardless of the 723 * value of b_blkno. 724 */ 725 void 726 xfs_trans_log_buf(xfs_trans_t *tp, 727 xfs_buf_t *bp, 728 uint first, 729 uint last) 730 { 731 xfs_buf_log_item_t *bip; 732 xfs_log_item_desc_t *lidp; 733 734 ASSERT(XFS_BUF_ISBUSY(bp)); 735 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 736 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 737 ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); 738 ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || 739 (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); 740 741 /* 742 * Mark the buffer as needing to be written out eventually, 743 * and set its iodone function to remove the buffer's buf log 744 * item from the AIL and free it when the buffer is flushed 745 * to disk. See xfs_buf_attach_iodone() for more details 746 * on li_cb and xfs_buf_iodone_callbacks(). 747 * If we end up aborting this transaction, we trap this buffer 748 * inside the b_bdstrat callback so that this won't get written to 749 * disk. 750 */ 751 XFS_BUF_DELAYWRITE(bp); 752 XFS_BUF_DONE(bp); 753 754 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 755 ASSERT(atomic_read(&bip->bli_refcount) > 0); 756 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 757 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 758 759 /* 760 * If we invalidated the buffer within this transaction, then 761 * cancel the invalidation now that we're dirtying the buffer 762 * again. There are no races with the code in xfs_buf_item_unpin(), 763 * because we have a reference to the buffer this entire time. 764 */ 765 if (bip->bli_flags & XFS_BLI_STALE) { 766 xfs_buf_item_trace("BLOG UNSTALE", bip); 767 bip->bli_flags &= ~XFS_BLI_STALE; 768 ASSERT(XFS_BUF_ISSTALE(bp)); 769 XFS_BUF_UNSTALE(bp); 770 bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL; 771 } 772 773 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 774 ASSERT(lidp != NULL); 775 776 tp->t_flags |= XFS_TRANS_DIRTY; 777 lidp->lid_flags |= XFS_LID_DIRTY; 778 lidp->lid_flags &= ~XFS_LID_BUF_STALE; 779 bip->bli_flags |= XFS_BLI_LOGGED; 780 xfs_buf_item_log(bip, first, last); 781 xfs_buf_item_trace("BLOG", bip); 782 } 783 784 785 /* 786 * This called to invalidate a buffer that is being used within 787 * a transaction. Typically this is because the blocks in the 788 * buffer are being freed, so we need to prevent it from being 789 * written out when we're done. Allowing it to be written again 790 * might overwrite data in the free blocks if they are reallocated 791 * to a file. 792 * 793 * We prevent the buffer from being written out by clearing the 794 * B_DELWRI flag. We can't always 795 * get rid of the buf log item at this point, though, because 796 * the buffer may still be pinned by another transaction. If that 797 * is the case, then we'll wait until the buffer is committed to 798 * disk for the last time (we can tell by the ref count) and 799 * free it in xfs_buf_item_unpin(). Until it is cleaned up we 800 * will keep the buffer locked so that the buffer and buf log item 801 * are not reused. 802 */ 803 void 804 xfs_trans_binval( 805 xfs_trans_t *tp, 806 xfs_buf_t *bp) 807 { 808 xfs_log_item_desc_t *lidp; 809 xfs_buf_log_item_t *bip; 810 811 ASSERT(XFS_BUF_ISBUSY(bp)); 812 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 813 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 814 815 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 816 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 817 ASSERT(lidp != NULL); 818 ASSERT(atomic_read(&bip->bli_refcount) > 0); 819 820 if (bip->bli_flags & XFS_BLI_STALE) { 821 /* 822 * If the buffer is already invalidated, then 823 * just return. 824 */ 825 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 826 ASSERT(XFS_BUF_ISSTALE(bp)); 827 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 828 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF)); 829 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 830 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 831 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 832 xfs_buftrace("XFS_BINVAL RECUR", bp); 833 xfs_buf_item_trace("BINVAL RECUR", bip); 834 return; 835 } 836 837 /* 838 * Clear the dirty bit in the buffer and set the STALE flag 839 * in the buf log item. The STALE flag will be used in 840 * xfs_buf_item_unpin() to determine if it should clean up 841 * when the last reference to the buf item is given up. 842 * We set the XFS_BLI_CANCEL flag in the buf log format structure 843 * and log the buf item. This will be used at recovery time 844 * to determine that copies of the buffer in the log before 845 * this should not be replayed. 846 * We mark the item descriptor and the transaction dirty so 847 * that we'll hold the buffer until after the commit. 848 * 849 * Since we're invalidating the buffer, we also clear the state 850 * about which parts of the buffer have been logged. We also 851 * clear the flag indicating that this is an inode buffer since 852 * the data in the buffer will no longer be valid. 853 * 854 * We set the stale bit in the buffer as well since we're getting 855 * rid of it. 856 */ 857 XFS_BUF_UNDELAYWRITE(bp); 858 XFS_BUF_STALE(bp); 859 bip->bli_flags |= XFS_BLI_STALE; 860 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); 861 bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; 862 bip->bli_format.blf_flags |= XFS_BLI_CANCEL; 863 memset((char *)(bip->bli_format.blf_data_map), 0, 864 (bip->bli_format.blf_map_size * sizeof(uint))); 865 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 866 tp->t_flags |= XFS_TRANS_DIRTY; 867 xfs_buftrace("XFS_BINVAL", bp); 868 xfs_buf_item_trace("BINVAL", bip); 869 } 870 871 /* 872 * This call is used to indicate that the buffer contains on-disk 873 * inodes which must be handled specially during recovery. They 874 * require special handling because only the di_next_unlinked from 875 * the inodes in the buffer should be recovered. The rest of the 876 * data in the buffer is logged via the inodes themselves. 877 * 878 * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log 879 * format structure so that we'll know what to do at recovery time. 880 */ 881 /* ARGSUSED */ 882 void 883 xfs_trans_inode_buf( 884 xfs_trans_t *tp, 885 xfs_buf_t *bp) 886 { 887 xfs_buf_log_item_t *bip; 888 889 ASSERT(XFS_BUF_ISBUSY(bp)); 890 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 891 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 892 893 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 894 ASSERT(atomic_read(&bip->bli_refcount) > 0); 895 896 bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; 897 } 898 899 /* 900 * This call is used to indicate that the buffer is going to 901 * be staled and was an inode buffer. This means it gets 902 * special processing during unpin - where any inodes 903 * associated with the buffer should be removed from ail. 904 * There is also special processing during recovery, 905 * any replay of the inodes in the buffer needs to be 906 * prevented as the buffer may have been reused. 907 */ 908 void 909 xfs_trans_stale_inode_buf( 910 xfs_trans_t *tp, 911 xfs_buf_t *bp) 912 { 913 xfs_buf_log_item_t *bip; 914 915 ASSERT(XFS_BUF_ISBUSY(bp)); 916 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 917 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 918 919 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 920 ASSERT(atomic_read(&bip->bli_refcount) > 0); 921 922 bip->bli_flags |= XFS_BLI_STALE_INODE; 923 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) 924 xfs_buf_iodone; 925 } 926 927 928 929 /* 930 * Mark the buffer as being one which contains newly allocated 931 * inodes. We need to make sure that even if this buffer is 932 * relogged as an 'inode buf' we still recover all of the inode 933 * images in the face of a crash. This works in coordination with 934 * xfs_buf_item_committed() to ensure that the buffer remains in the 935 * AIL at its original location even after it has been relogged. 936 */ 937 /* ARGSUSED */ 938 void 939 xfs_trans_inode_alloc_buf( 940 xfs_trans_t *tp, 941 xfs_buf_t *bp) 942 { 943 xfs_buf_log_item_t *bip; 944 945 ASSERT(XFS_BUF_ISBUSY(bp)); 946 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 947 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 948 949 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 950 ASSERT(atomic_read(&bip->bli_refcount) > 0); 951 952 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 953 } 954 955 956 /* 957 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of 958 * dquots. However, unlike in inode buffer recovery, dquot buffers get 959 * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag). 960 * The only thing that makes dquot buffers different from regular 961 * buffers is that we must not replay dquot bufs when recovering 962 * if a _corresponding_ quotaoff has happened. We also have to distinguish 963 * between usr dquot bufs and grp dquot bufs, because usr and grp quotas 964 * can be turned off independently. 965 */ 966 /* ARGSUSED */ 967 void 968 xfs_trans_dquot_buf( 969 xfs_trans_t *tp, 970 xfs_buf_t *bp, 971 uint type) 972 { 973 xfs_buf_log_item_t *bip; 974 975 ASSERT(XFS_BUF_ISBUSY(bp)); 976 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 977 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 978 ASSERT(type == XFS_BLI_UDQUOT_BUF || 979 type == XFS_BLI_PDQUOT_BUF || 980 type == XFS_BLI_GDQUOT_BUF); 981 982 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 983 ASSERT(atomic_read(&bip->bli_refcount) > 0); 984 985 bip->bli_format.blf_flags |= type; 986 } 987 988 /* 989 * Check to see if a buffer matching the given parameters is already 990 * a part of the given transaction. Only check the first, embedded 991 * chunk, since we don't want to spend all day scanning large transactions. 992 */ 993 STATIC xfs_buf_t * 994 xfs_trans_buf_item_match( 995 xfs_trans_t *tp, 996 xfs_buftarg_t *target, 997 xfs_daddr_t blkno, 998 int len) 999 { 1000 xfs_log_item_chunk_t *licp; 1001 xfs_log_item_desc_t *lidp; 1002 xfs_buf_log_item_t *blip; 1003 xfs_buf_t *bp; 1004 int i; 1005 1006 bp = NULL; 1007 len = BBTOB(len); 1008 licp = &tp->t_items; 1009 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 1010 for (i = 0; i < licp->lic_unused; i++) { 1011 /* 1012 * Skip unoccupied slots. 1013 */ 1014 if (XFS_LIC_ISFREE(licp, i)) { 1015 continue; 1016 } 1017 1018 lidp = XFS_LIC_SLOT(licp, i); 1019 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1020 if (blip->bli_item.li_type != XFS_LI_BUF) { 1021 continue; 1022 } 1023 1024 bp = blip->bli_buf; 1025 if ((XFS_BUF_TARGET(bp) == target) && 1026 (XFS_BUF_ADDR(bp) == blkno) && 1027 (XFS_BUF_COUNT(bp) == len)) { 1028 /* 1029 * We found it. Break out and 1030 * return the pointer to the buffer. 1031 */ 1032 break; 1033 } else { 1034 bp = NULL; 1035 } 1036 } 1037 } 1038 return bp; 1039 } 1040 1041 /* 1042 * Check to see if a buffer matching the given parameters is already 1043 * a part of the given transaction. Check all the chunks, we 1044 * want to be thorough. 1045 */ 1046 STATIC xfs_buf_t * 1047 xfs_trans_buf_item_match_all( 1048 xfs_trans_t *tp, 1049 xfs_buftarg_t *target, 1050 xfs_daddr_t blkno, 1051 int len) 1052 { 1053 xfs_log_item_chunk_t *licp; 1054 xfs_log_item_desc_t *lidp; 1055 xfs_buf_log_item_t *blip; 1056 xfs_buf_t *bp; 1057 int i; 1058 1059 bp = NULL; 1060 len = BBTOB(len); 1061 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 1062 if (XFS_LIC_ARE_ALL_FREE(licp)) { 1063 ASSERT(licp == &tp->t_items); 1064 ASSERT(licp->lic_next == NULL); 1065 return NULL; 1066 } 1067 for (i = 0; i < licp->lic_unused; i++) { 1068 /* 1069 * Skip unoccupied slots. 1070 */ 1071 if (XFS_LIC_ISFREE(licp, i)) { 1072 continue; 1073 } 1074 1075 lidp = XFS_LIC_SLOT(licp, i); 1076 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1077 if (blip->bli_item.li_type != XFS_LI_BUF) { 1078 continue; 1079 } 1080 1081 bp = blip->bli_buf; 1082 if ((XFS_BUF_TARGET(bp) == target) && 1083 (XFS_BUF_ADDR(bp) == blkno) && 1084 (XFS_BUF_COUNT(bp) == len)) { 1085 /* 1086 * We found it. Break out and 1087 * return the pointer to the buffer. 1088 */ 1089 return bp; 1090 } 1091 } 1092 } 1093 return NULL; 1094 } 1095