1 /* 2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it would be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 * 12 * Further, this software is distributed without any warranty that it is 13 * free of the rightful claim of any third person regarding infringement 14 * or the like. Any license provided herein, whether implied or 15 * otherwise, applies only to this software file. Patent licenses, if 16 * any, provided herein do not apply to combinations of this program with 17 * other software, or any other product whatsoever. 18 * 19 * You should have received a copy of the GNU General Public License along 20 * with this program; if not, write the Free Software Foundation, Inc., 59 21 * Temple Place - Suite 330, Boston MA 02111-1307, USA. 22 * 23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, 24 * Mountain View, CA 94043, or: 25 * 26 * http://www.sgi.com 27 * 28 * For further information regarding this notice, see: 29 * 30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ 31 */ 32 33 #include "xfs.h" 34 #include "xfs_macros.h" 35 #include "xfs_types.h" 36 #include "xfs_inum.h" 37 #include "xfs_log.h" 38 #include "xfs_trans.h" 39 #include "xfs_buf_item.h" 40 #include "xfs_sb.h" 41 #include "xfs_ag.h" 42 #include "xfs_dir.h" 43 #include "xfs_dmapi.h" 44 #include "xfs_mount.h" 45 #include "xfs_trans_priv.h" 46 #include "xfs_error.h" 47 #include "xfs_rw.h" 48 49 50 STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 51 xfs_daddr_t, int); 52 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 53 xfs_daddr_t, int); 54 55 56 /* 57 * Get and lock the buffer for the caller if it is not already 58 * locked within the given transaction. If it is already locked 59 * within the transaction, just increment its lock recursion count 60 * and return a pointer to it. 61 * 62 * Use the fast path function xfs_trans_buf_item_match() or the buffer 63 * cache routine incore_match() to find the buffer 64 * if it is already owned by this transaction. 65 * 66 * If we don't already own the buffer, use get_buf() to get it. 67 * If it doesn't yet have an associated xfs_buf_log_item structure, 68 * then allocate one and add the item to this transaction. 69 * 70 * If the transaction pointer is NULL, make this just a normal 71 * get_buf() call. 72 */ 73 xfs_buf_t * 74 xfs_trans_get_buf(xfs_trans_t *tp, 75 xfs_buftarg_t *target_dev, 76 xfs_daddr_t blkno, 77 int len, 78 uint flags) 79 { 80 xfs_buf_t *bp; 81 xfs_buf_log_item_t *bip; 82 83 if (flags == 0) 84 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 85 86 /* 87 * Default to a normal get_buf() call if the tp is NULL. 88 */ 89 if (tp == NULL) { 90 bp = xfs_buf_get_flags(target_dev, blkno, len, 91 flags | BUF_BUSY); 92 return(bp); 93 } 94 95 /* 96 * If we find the buffer in the cache with this transaction 97 * pointer in its b_fsprivate2 field, then we know we already 98 * have it locked. In this case we just increment the lock 99 * recursion count and return the buffer to the caller. 100 */ 101 if (tp->t_items.lic_next == NULL) { 102 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); 103 } else { 104 bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len); 105 } 106 if (bp != NULL) { 107 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 108 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { 109 xfs_buftrace("TRANS GET RECUR SHUT", bp); 110 XFS_BUF_SUPER_STALE(bp); 111 } 112 /* 113 * If the buffer is stale then it was binval'ed 114 * since last read. This doesn't matter since the 115 * caller isn't allowed to use the data anyway. 116 */ 117 else if (XFS_BUF_ISSTALE(bp)) { 118 xfs_buftrace("TRANS GET RECUR STALE", bp); 119 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 120 } 121 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 122 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 123 ASSERT(bip != NULL); 124 ASSERT(atomic_read(&bip->bli_refcount) > 0); 125 bip->bli_recur++; 126 xfs_buftrace("TRANS GET RECUR", bp); 127 xfs_buf_item_trace("GET RECUR", bip); 128 return (bp); 129 } 130 131 /* 132 * We always specify the BUF_BUSY flag within a transaction so 133 * that get_buf does not try to push out a delayed write buffer 134 * which might cause another transaction to take place (if the 135 * buffer was delayed alloc). Such recursive transactions can 136 * easily deadlock with our current transaction as well as cause 137 * us to run out of stack space. 138 */ 139 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); 140 if (bp == NULL) { 141 return NULL; 142 } 143 144 ASSERT(!XFS_BUF_GETERROR(bp)); 145 146 /* 147 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 148 * it doesn't have one yet, then allocate one and initialize it. 149 * The checks to see if one is there are in xfs_buf_item_init(). 150 */ 151 xfs_buf_item_init(bp, tp->t_mountp); 152 153 /* 154 * Set the recursion count for the buffer within this transaction 155 * to 0. 156 */ 157 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 158 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 159 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 160 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 161 bip->bli_recur = 0; 162 163 /* 164 * Take a reference for this transaction on the buf item. 165 */ 166 atomic_inc(&bip->bli_refcount); 167 168 /* 169 * Get a log_item_desc to point at the new item. 170 */ 171 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 172 173 /* 174 * Initialize b_fsprivate2 so we can find it with incore_match() 175 * above. 176 */ 177 XFS_BUF_SET_FSPRIVATE2(bp, tp); 178 179 xfs_buftrace("TRANS GET", bp); 180 xfs_buf_item_trace("GET", bip); 181 return (bp); 182 } 183 184 /* 185 * Get and lock the superblock buffer of this file system for the 186 * given transaction. 187 * 188 * We don't need to use incore_match() here, because the superblock 189 * buffer is a private buffer which we keep a pointer to in the 190 * mount structure. 191 */ 192 xfs_buf_t * 193 xfs_trans_getsb(xfs_trans_t *tp, 194 struct xfs_mount *mp, 195 int flags) 196 { 197 xfs_buf_t *bp; 198 xfs_buf_log_item_t *bip; 199 200 /* 201 * Default to just trying to lock the superblock buffer 202 * if tp is NULL. 203 */ 204 if (tp == NULL) { 205 return (xfs_getsb(mp, flags)); 206 } 207 208 /* 209 * If the superblock buffer already has this transaction 210 * pointer in its b_fsprivate2 field, then we know we already 211 * have it locked. In this case we just increment the lock 212 * recursion count and return the buffer to the caller. 213 */ 214 bp = mp->m_sb_bp; 215 if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { 216 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 217 ASSERT(bip != NULL); 218 ASSERT(atomic_read(&bip->bli_refcount) > 0); 219 bip->bli_recur++; 220 xfs_buf_item_trace("GETSB RECUR", bip); 221 return (bp); 222 } 223 224 bp = xfs_getsb(mp, flags); 225 if (bp == NULL) { 226 return NULL; 227 } 228 229 /* 230 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 231 * it doesn't have one yet, then allocate one and initialize it. 232 * The checks to see if one is there are in xfs_buf_item_init(). 233 */ 234 xfs_buf_item_init(bp, mp); 235 236 /* 237 * Set the recursion count for the buffer within this transaction 238 * to 0. 239 */ 240 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 241 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 242 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 243 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 244 bip->bli_recur = 0; 245 246 /* 247 * Take a reference for this transaction on the buf item. 248 */ 249 atomic_inc(&bip->bli_refcount); 250 251 /* 252 * Get a log_item_desc to point at the new item. 253 */ 254 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 255 256 /* 257 * Initialize b_fsprivate2 so we can find it with incore_match() 258 * above. 259 */ 260 XFS_BUF_SET_FSPRIVATE2(bp, tp); 261 262 xfs_buf_item_trace("GETSB", bip); 263 return (bp); 264 } 265 266 #ifdef DEBUG 267 xfs_buftarg_t *xfs_error_target; 268 int xfs_do_error; 269 int xfs_req_num; 270 int xfs_error_mod = 33; 271 #endif 272 273 /* 274 * Get and lock the buffer for the caller if it is not already 275 * locked within the given transaction. If it has not yet been 276 * read in, read it from disk. If it is already locked 277 * within the transaction and already read in, just increment its 278 * lock recursion count and return a pointer to it. 279 * 280 * Use the fast path function xfs_trans_buf_item_match() or the buffer 281 * cache routine incore_match() to find the buffer 282 * if it is already owned by this transaction. 283 * 284 * If we don't already own the buffer, use read_buf() to get it. 285 * If it doesn't yet have an associated xfs_buf_log_item structure, 286 * then allocate one and add the item to this transaction. 287 * 288 * If the transaction pointer is NULL, make this just a normal 289 * read_buf() call. 290 */ 291 int 292 xfs_trans_read_buf( 293 xfs_mount_t *mp, 294 xfs_trans_t *tp, 295 xfs_buftarg_t *target, 296 xfs_daddr_t blkno, 297 int len, 298 uint flags, 299 xfs_buf_t **bpp) 300 { 301 xfs_buf_t *bp; 302 xfs_buf_log_item_t *bip; 303 int error; 304 305 if (flags == 0) 306 flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; 307 308 /* 309 * Default to a normal get_buf() call if the tp is NULL. 310 */ 311 if (tp == NULL) { 312 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 313 if (!bp) 314 return XFS_ERROR(ENOMEM); 315 316 if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { 317 xfs_ioerror_alert("xfs_trans_read_buf", mp, 318 bp, blkno); 319 error = XFS_BUF_GETERROR(bp); 320 xfs_buf_relse(bp); 321 return error; 322 } 323 #ifdef DEBUG 324 if (xfs_do_error && (bp != NULL)) { 325 if (xfs_error_target == target) { 326 if (((xfs_req_num++) % xfs_error_mod) == 0) { 327 xfs_buf_relse(bp); 328 printk("Returning error!\n"); 329 return XFS_ERROR(EIO); 330 } 331 } 332 } 333 #endif 334 if (XFS_FORCED_SHUTDOWN(mp)) 335 goto shutdown_abort; 336 *bpp = bp; 337 return 0; 338 } 339 340 /* 341 * If we find the buffer in the cache with this transaction 342 * pointer in its b_fsprivate2 field, then we know we already 343 * have it locked. If it is already read in we just increment 344 * the lock recursion count and return the buffer to the caller. 345 * If the buffer is not yet read in, then we read it in, increment 346 * the lock recursion count, and return it to the caller. 347 */ 348 if (tp->t_items.lic_next == NULL) { 349 bp = xfs_trans_buf_item_match(tp, target, blkno, len); 350 } else { 351 bp = xfs_trans_buf_item_match_all(tp, target, blkno, len); 352 } 353 if (bp != NULL) { 354 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 355 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 356 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 357 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 358 if (!(XFS_BUF_ISDONE(bp))) { 359 xfs_buftrace("READ_BUF_INCORE !DONE", bp); 360 ASSERT(!XFS_BUF_ISASYNC(bp)); 361 XFS_BUF_READ(bp); 362 xfsbdstrat(tp->t_mountp, bp); 363 xfs_iowait(bp); 364 if (XFS_BUF_GETERROR(bp) != 0) { 365 xfs_ioerror_alert("xfs_trans_read_buf", mp, 366 bp, blkno); 367 error = XFS_BUF_GETERROR(bp); 368 xfs_buf_relse(bp); 369 /* 370 * We can gracefully recover from most 371 * read errors. Ones we can't are those 372 * that happen after the transaction's 373 * already dirty. 374 */ 375 if (tp->t_flags & XFS_TRANS_DIRTY) 376 xfs_force_shutdown(tp->t_mountp, 377 XFS_METADATA_IO_ERROR); 378 return error; 379 } 380 } 381 /* 382 * We never locked this buf ourselves, so we shouldn't 383 * brelse it either. Just get out. 384 */ 385 if (XFS_FORCED_SHUTDOWN(mp)) { 386 xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp); 387 *bpp = NULL; 388 return XFS_ERROR(EIO); 389 } 390 391 392 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 393 bip->bli_recur++; 394 395 ASSERT(atomic_read(&bip->bli_refcount) > 0); 396 xfs_buf_item_trace("READ RECUR", bip); 397 *bpp = bp; 398 return 0; 399 } 400 401 /* 402 * We always specify the BUF_BUSY flag within a transaction so 403 * that get_buf does not try to push out a delayed write buffer 404 * which might cause another transaction to take place (if the 405 * buffer was delayed alloc). Such recursive transactions can 406 * easily deadlock with our current transaction as well as cause 407 * us to run out of stack space. 408 */ 409 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 410 if (bp == NULL) { 411 *bpp = NULL; 412 return 0; 413 } 414 if (XFS_BUF_GETERROR(bp) != 0) { 415 XFS_BUF_SUPER_STALE(bp); 416 xfs_buftrace("READ ERROR", bp); 417 error = XFS_BUF_GETERROR(bp); 418 419 xfs_ioerror_alert("xfs_trans_read_buf", mp, 420 bp, blkno); 421 if (tp->t_flags & XFS_TRANS_DIRTY) 422 xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR); 423 xfs_buf_relse(bp); 424 return error; 425 } 426 #ifdef DEBUG 427 if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) { 428 if (xfs_error_target == target) { 429 if (((xfs_req_num++) % xfs_error_mod) == 0) { 430 xfs_force_shutdown(tp->t_mountp, 431 XFS_METADATA_IO_ERROR); 432 xfs_buf_relse(bp); 433 printk("Returning error in trans!\n"); 434 return XFS_ERROR(EIO); 435 } 436 } 437 } 438 #endif 439 if (XFS_FORCED_SHUTDOWN(mp)) 440 goto shutdown_abort; 441 442 /* 443 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 444 * it doesn't have one yet, then allocate one and initialize it. 445 * The checks to see if one is there are in xfs_buf_item_init(). 446 */ 447 xfs_buf_item_init(bp, tp->t_mountp); 448 449 /* 450 * Set the recursion count for the buffer within this transaction 451 * to 0. 452 */ 453 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 454 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 455 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 456 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 457 bip->bli_recur = 0; 458 459 /* 460 * Take a reference for this transaction on the buf item. 461 */ 462 atomic_inc(&bip->bli_refcount); 463 464 /* 465 * Get a log_item_desc to point at the new item. 466 */ 467 (void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip); 468 469 /* 470 * Initialize b_fsprivate2 so we can find it with incore_match() 471 * above. 472 */ 473 XFS_BUF_SET_FSPRIVATE2(bp, tp); 474 475 xfs_buftrace("TRANS READ", bp); 476 xfs_buf_item_trace("READ", bip); 477 *bpp = bp; 478 return 0; 479 480 shutdown_abort: 481 /* 482 * the theory here is that buffer is good but we're 483 * bailing out because the filesystem is being forcibly 484 * shut down. So we should leave the b_flags alone since 485 * the buffer's not staled and just get out. 486 */ 487 #if defined(DEBUG) 488 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 489 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 490 #endif 491 ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) != 492 (XFS_B_STALE|XFS_B_DELWRI)); 493 494 xfs_buftrace("READ_BUF XFSSHUTDN", bp); 495 xfs_buf_relse(bp); 496 *bpp = NULL; 497 return XFS_ERROR(EIO); 498 } 499 500 501 /* 502 * Release the buffer bp which was previously acquired with one of the 503 * xfs_trans_... buffer allocation routines if the buffer has not 504 * been modified within this transaction. If the buffer is modified 505 * within this transaction, do decrement the recursion count but do 506 * not release the buffer even if the count goes to 0. If the buffer is not 507 * modified within the transaction, decrement the recursion count and 508 * release the buffer if the recursion count goes to 0. 509 * 510 * If the buffer is to be released and it was not modified before 511 * this transaction began, then free the buf_log_item associated with it. 512 * 513 * If the transaction pointer is NULL, make this just a normal 514 * brelse() call. 515 */ 516 void 517 xfs_trans_brelse(xfs_trans_t *tp, 518 xfs_buf_t *bp) 519 { 520 xfs_buf_log_item_t *bip; 521 xfs_log_item_t *lip; 522 xfs_log_item_desc_t *lidp; 523 524 /* 525 * Default to a normal brelse() call if the tp is NULL. 526 */ 527 if (tp == NULL) { 528 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 529 /* 530 * If there's a buf log item attached to the buffer, 531 * then let the AIL know that the buffer is being 532 * unlocked. 533 */ 534 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 535 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 536 if (lip->li_type == XFS_LI_BUF) { 537 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); 538 xfs_trans_unlocked_item( 539 bip->bli_item.li_mountp, 540 lip); 541 } 542 } 543 xfs_buf_relse(bp); 544 return; 545 } 546 547 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 548 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 549 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 550 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 551 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 552 ASSERT(atomic_read(&bip->bli_refcount) > 0); 553 554 /* 555 * Find the item descriptor pointing to this buffer's 556 * log item. It must be there. 557 */ 558 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 559 ASSERT(lidp != NULL); 560 561 /* 562 * If the release is just for a recursive lock, 563 * then decrement the count and return. 564 */ 565 if (bip->bli_recur > 0) { 566 bip->bli_recur--; 567 xfs_buf_item_trace("RELSE RECUR", bip); 568 return; 569 } 570 571 /* 572 * If the buffer is dirty within this transaction, we can't 573 * release it until we commit. 574 */ 575 if (lidp->lid_flags & XFS_LID_DIRTY) { 576 xfs_buf_item_trace("RELSE DIRTY", bip); 577 return; 578 } 579 580 /* 581 * If the buffer has been invalidated, then we can't release 582 * it until the transaction commits to disk unless it is re-dirtied 583 * as part of this transaction. This prevents us from pulling 584 * the item from the AIL before we should. 585 */ 586 if (bip->bli_flags & XFS_BLI_STALE) { 587 xfs_buf_item_trace("RELSE STALE", bip); 588 return; 589 } 590 591 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 592 xfs_buf_item_trace("RELSE", bip); 593 594 /* 595 * Free up the log item descriptor tracking the released item. 596 */ 597 xfs_trans_free_item(tp, lidp); 598 599 /* 600 * Clear the hold flag in the buf log item if it is set. 601 * We wouldn't want the next user of the buffer to 602 * get confused. 603 */ 604 if (bip->bli_flags & XFS_BLI_HOLD) { 605 bip->bli_flags &= ~XFS_BLI_HOLD; 606 } 607 608 /* 609 * Drop our reference to the buf log item. 610 */ 611 atomic_dec(&bip->bli_refcount); 612 613 /* 614 * If the buf item is not tracking data in the log, then 615 * we must free it before releasing the buffer back to the 616 * free pool. Before releasing the buffer to the free pool, 617 * clear the transaction pointer in b_fsprivate2 to dissolve 618 * its relation to this transaction. 619 */ 620 if (!xfs_buf_item_dirty(bip)) { 621 /*** 622 ASSERT(bp->b_pincount == 0); 623 ***/ 624 ASSERT(atomic_read(&bip->bli_refcount) == 0); 625 ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); 626 ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 627 xfs_buf_item_relse(bp); 628 bip = NULL; 629 } 630 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 631 632 /* 633 * If we've still got a buf log item on the buffer, then 634 * tell the AIL that the buffer is being unlocked. 635 */ 636 if (bip != NULL) { 637 xfs_trans_unlocked_item(bip->bli_item.li_mountp, 638 (xfs_log_item_t*)bip); 639 } 640 641 xfs_buf_relse(bp); 642 return; 643 } 644 645 /* 646 * Add the locked buffer to the transaction. 647 * The buffer must be locked, and it cannot be associated with any 648 * transaction. 649 * 650 * If the buffer does not yet have a buf log item associated with it, 651 * then allocate one for it. Then add the buf item to the transaction. 652 */ 653 void 654 xfs_trans_bjoin(xfs_trans_t *tp, 655 xfs_buf_t *bp) 656 { 657 xfs_buf_log_item_t *bip; 658 659 ASSERT(XFS_BUF_ISBUSY(bp)); 660 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 661 662 /* 663 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 664 * it doesn't have one yet, then allocate one and initialize it. 665 * The checks to see if one is there are in xfs_buf_item_init(). 666 */ 667 xfs_buf_item_init(bp, tp->t_mountp); 668 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 669 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 670 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 671 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 672 673 /* 674 * Take a reference for this transaction on the buf item. 675 */ 676 atomic_inc(&bip->bli_refcount); 677 678 /* 679 * Get a log_item_desc to point at the new item. 680 */ 681 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip); 682 683 /* 684 * Initialize b_fsprivate2 so we can find it with incore_match() 685 * in xfs_trans_get_buf() and friends above. 686 */ 687 XFS_BUF_SET_FSPRIVATE2(bp, tp); 688 689 xfs_buf_item_trace("BJOIN", bip); 690 } 691 692 /* 693 * Mark the buffer as not needing to be unlocked when the buf item's 694 * IOP_UNLOCK() routine is called. The buffer must already be locked 695 * and associated with the given transaction. 696 */ 697 /* ARGSUSED */ 698 void 699 xfs_trans_bhold(xfs_trans_t *tp, 700 xfs_buf_t *bp) 701 { 702 xfs_buf_log_item_t *bip; 703 704 ASSERT(XFS_BUF_ISBUSY(bp)); 705 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 706 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 707 708 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 709 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 710 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 711 ASSERT(atomic_read(&bip->bli_refcount) > 0); 712 bip->bli_flags |= XFS_BLI_HOLD; 713 xfs_buf_item_trace("BHOLD", bip); 714 } 715 716 /* 717 * Cancel the previous buffer hold request made on this buffer 718 * for this transaction. 719 */ 720 void 721 xfs_trans_bhold_release(xfs_trans_t *tp, 722 xfs_buf_t *bp) 723 { 724 xfs_buf_log_item_t *bip; 725 726 ASSERT(XFS_BUF_ISBUSY(bp)); 727 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 728 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 729 730 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 731 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 732 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL)); 733 ASSERT(atomic_read(&bip->bli_refcount) > 0); 734 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 735 bip->bli_flags &= ~XFS_BLI_HOLD; 736 xfs_buf_item_trace("BHOLD RELEASE", bip); 737 } 738 739 /* 740 * This is called to mark bytes first through last inclusive of the given 741 * buffer as needing to be logged when the transaction is committed. 742 * The buffer must already be associated with the given transaction. 743 * 744 * First and last are numbers relative to the beginning of this buffer, 745 * so the first byte in the buffer is numbered 0 regardless of the 746 * value of b_blkno. 747 */ 748 void 749 xfs_trans_log_buf(xfs_trans_t *tp, 750 xfs_buf_t *bp, 751 uint first, 752 uint last) 753 { 754 xfs_buf_log_item_t *bip; 755 xfs_log_item_desc_t *lidp; 756 757 ASSERT(XFS_BUF_ISBUSY(bp)); 758 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 759 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 760 ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); 761 ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || 762 (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); 763 764 /* 765 * Mark the buffer as needing to be written out eventually, 766 * and set its iodone function to remove the buffer's buf log 767 * item from the AIL and free it when the buffer is flushed 768 * to disk. See xfs_buf_attach_iodone() for more details 769 * on li_cb and xfs_buf_iodone_callbacks(). 770 * If we end up aborting this transaction, we trap this buffer 771 * inside the b_bdstrat callback so that this won't get written to 772 * disk. 773 */ 774 XFS_BUF_DELAYWRITE(bp); 775 XFS_BUF_DONE(bp); 776 777 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 778 ASSERT(atomic_read(&bip->bli_refcount) > 0); 779 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 780 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone; 781 782 /* 783 * If we invalidated the buffer within this transaction, then 784 * cancel the invalidation now that we're dirtying the buffer 785 * again. There are no races with the code in xfs_buf_item_unpin(), 786 * because we have a reference to the buffer this entire time. 787 */ 788 if (bip->bli_flags & XFS_BLI_STALE) { 789 xfs_buf_item_trace("BLOG UNSTALE", bip); 790 bip->bli_flags &= ~XFS_BLI_STALE; 791 ASSERT(XFS_BUF_ISSTALE(bp)); 792 XFS_BUF_UNSTALE(bp); 793 bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL; 794 } 795 796 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 797 ASSERT(lidp != NULL); 798 799 tp->t_flags |= XFS_TRANS_DIRTY; 800 lidp->lid_flags |= XFS_LID_DIRTY; 801 lidp->lid_flags &= ~XFS_LID_BUF_STALE; 802 bip->bli_flags |= XFS_BLI_LOGGED; 803 xfs_buf_item_log(bip, first, last); 804 xfs_buf_item_trace("BLOG", bip); 805 } 806 807 808 /* 809 * This called to invalidate a buffer that is being used within 810 * a transaction. Typically this is because the blocks in the 811 * buffer are being freed, so we need to prevent it from being 812 * written out when we're done. Allowing it to be written again 813 * might overwrite data in the free blocks if they are reallocated 814 * to a file. 815 * 816 * We prevent the buffer from being written out by clearing the 817 * B_DELWRI flag. We can't always 818 * get rid of the buf log item at this point, though, because 819 * the buffer may still be pinned by another transaction. If that 820 * is the case, then we'll wait until the buffer is committed to 821 * disk for the last time (we can tell by the ref count) and 822 * free it in xfs_buf_item_unpin(). Until it is cleaned up we 823 * will keep the buffer locked so that the buffer and buf log item 824 * are not reused. 825 */ 826 void 827 xfs_trans_binval( 828 xfs_trans_t *tp, 829 xfs_buf_t *bp) 830 { 831 xfs_log_item_desc_t *lidp; 832 xfs_buf_log_item_t *bip; 833 834 ASSERT(XFS_BUF_ISBUSY(bp)); 835 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 836 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 837 838 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 839 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip); 840 ASSERT(lidp != NULL); 841 ASSERT(atomic_read(&bip->bli_refcount) > 0); 842 843 if (bip->bli_flags & XFS_BLI_STALE) { 844 /* 845 * If the buffer is already invalidated, then 846 * just return. 847 */ 848 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 849 ASSERT(XFS_BUF_ISSTALE(bp)); 850 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 851 ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF)); 852 ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); 853 ASSERT(lidp->lid_flags & XFS_LID_DIRTY); 854 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 855 xfs_buftrace("XFS_BINVAL RECUR", bp); 856 xfs_buf_item_trace("BINVAL RECUR", bip); 857 return; 858 } 859 860 /* 861 * Clear the dirty bit in the buffer and set the STALE flag 862 * in the buf log item. The STALE flag will be used in 863 * xfs_buf_item_unpin() to determine if it should clean up 864 * when the last reference to the buf item is given up. 865 * We set the XFS_BLI_CANCEL flag in the buf log format structure 866 * and log the buf item. This will be used at recovery time 867 * to determine that copies of the buffer in the log before 868 * this should not be replayed. 869 * We mark the item descriptor and the transaction dirty so 870 * that we'll hold the buffer until after the commit. 871 * 872 * Since we're invalidating the buffer, we also clear the state 873 * about which parts of the buffer have been logged. We also 874 * clear the flag indicating that this is an inode buffer since 875 * the data in the buffer will no longer be valid. 876 * 877 * We set the stale bit in the buffer as well since we're getting 878 * rid of it. 879 */ 880 XFS_BUF_UNDELAYWRITE(bp); 881 XFS_BUF_STALE(bp); 882 bip->bli_flags |= XFS_BLI_STALE; 883 bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY); 884 bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF; 885 bip->bli_format.blf_flags |= XFS_BLI_CANCEL; 886 memset((char *)(bip->bli_format.blf_data_map), 0, 887 (bip->bli_format.blf_map_size * sizeof(uint))); 888 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 889 tp->t_flags |= XFS_TRANS_DIRTY; 890 xfs_buftrace("XFS_BINVAL", bp); 891 xfs_buf_item_trace("BINVAL", bip); 892 } 893 894 /* 895 * This call is used to indicate that the buffer contains on-disk 896 * inodes which must be handled specially during recovery. They 897 * require special handling because only the di_next_unlinked from 898 * the inodes in the buffer should be recovered. The rest of the 899 * data in the buffer is logged via the inodes themselves. 900 * 901 * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log 902 * format structure so that we'll know what to do at recovery time. 903 */ 904 /* ARGSUSED */ 905 void 906 xfs_trans_inode_buf( 907 xfs_trans_t *tp, 908 xfs_buf_t *bp) 909 { 910 xfs_buf_log_item_t *bip; 911 912 ASSERT(XFS_BUF_ISBUSY(bp)); 913 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 914 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 915 916 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 917 ASSERT(atomic_read(&bip->bli_refcount) > 0); 918 919 bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; 920 } 921 922 /* 923 * This call is used to indicate that the buffer is going to 924 * be staled and was an inode buffer. This means it gets 925 * special processing during unpin - where any inodes 926 * associated with the buffer should be removed from ail. 927 * There is also special processing during recovery, 928 * any replay of the inodes in the buffer needs to be 929 * prevented as the buffer may have been reused. 930 */ 931 void 932 xfs_trans_stale_inode_buf( 933 xfs_trans_t *tp, 934 xfs_buf_t *bp) 935 { 936 xfs_buf_log_item_t *bip; 937 938 ASSERT(XFS_BUF_ISBUSY(bp)); 939 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 940 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 941 942 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 943 ASSERT(atomic_read(&bip->bli_refcount) > 0); 944 945 bip->bli_flags |= XFS_BLI_STALE_INODE; 946 bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) 947 xfs_buf_iodone; 948 } 949 950 951 952 /* 953 * Mark the buffer as being one which contains newly allocated 954 * inodes. We need to make sure that even if this buffer is 955 * relogged as an 'inode buf' we still recover all of the inode 956 * images in the face of a crash. This works in coordination with 957 * xfs_buf_item_committed() to ensure that the buffer remains in the 958 * AIL at its original location even after it has been relogged. 959 */ 960 /* ARGSUSED */ 961 void 962 xfs_trans_inode_alloc_buf( 963 xfs_trans_t *tp, 964 xfs_buf_t *bp) 965 { 966 xfs_buf_log_item_t *bip; 967 968 ASSERT(XFS_BUF_ISBUSY(bp)); 969 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 970 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 971 972 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 973 ASSERT(atomic_read(&bip->bli_refcount) > 0); 974 975 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 976 } 977 978 979 /* 980 * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of 981 * dquots. However, unlike in inode buffer recovery, dquot buffers get 982 * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag). 983 * The only thing that makes dquot buffers different from regular 984 * buffers is that we must not replay dquot bufs when recovering 985 * if a _corresponding_ quotaoff has happened. We also have to distinguish 986 * between usr dquot bufs and grp dquot bufs, because usr and grp quotas 987 * can be turned off independently. 988 */ 989 /* ARGSUSED */ 990 void 991 xfs_trans_dquot_buf( 992 xfs_trans_t *tp, 993 xfs_buf_t *bp, 994 uint type) 995 { 996 xfs_buf_log_item_t *bip; 997 998 ASSERT(XFS_BUF_ISBUSY(bp)); 999 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 1000 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 1001 ASSERT(type == XFS_BLI_UDQUOT_BUF || 1002 type == XFS_BLI_PDQUOT_BUF || 1003 type == XFS_BLI_GDQUOT_BUF); 1004 1005 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 1006 ASSERT(atomic_read(&bip->bli_refcount) > 0); 1007 1008 bip->bli_format.blf_flags |= type; 1009 } 1010 1011 /* 1012 * Check to see if a buffer matching the given parameters is already 1013 * a part of the given transaction. Only check the first, embedded 1014 * chunk, since we don't want to spend all day scanning large transactions. 1015 */ 1016 STATIC xfs_buf_t * 1017 xfs_trans_buf_item_match( 1018 xfs_trans_t *tp, 1019 xfs_buftarg_t *target, 1020 xfs_daddr_t blkno, 1021 int len) 1022 { 1023 xfs_log_item_chunk_t *licp; 1024 xfs_log_item_desc_t *lidp; 1025 xfs_buf_log_item_t *blip; 1026 xfs_buf_t *bp; 1027 int i; 1028 1029 bp = NULL; 1030 len = BBTOB(len); 1031 licp = &tp->t_items; 1032 if (!XFS_LIC_ARE_ALL_FREE(licp)) { 1033 for (i = 0; i < licp->lic_unused; i++) { 1034 /* 1035 * Skip unoccupied slots. 1036 */ 1037 if (XFS_LIC_ISFREE(licp, i)) { 1038 continue; 1039 } 1040 1041 lidp = XFS_LIC_SLOT(licp, i); 1042 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1043 if (blip->bli_item.li_type != XFS_LI_BUF) { 1044 continue; 1045 } 1046 1047 bp = blip->bli_buf; 1048 if ((XFS_BUF_TARGET(bp) == target) && 1049 (XFS_BUF_ADDR(bp) == blkno) && 1050 (XFS_BUF_COUNT(bp) == len)) { 1051 /* 1052 * We found it. Break out and 1053 * return the pointer to the buffer. 1054 */ 1055 break; 1056 } else { 1057 bp = NULL; 1058 } 1059 } 1060 } 1061 return bp; 1062 } 1063 1064 /* 1065 * Check to see if a buffer matching the given parameters is already 1066 * a part of the given transaction. Check all the chunks, we 1067 * want to be thorough. 1068 */ 1069 STATIC xfs_buf_t * 1070 xfs_trans_buf_item_match_all( 1071 xfs_trans_t *tp, 1072 xfs_buftarg_t *target, 1073 xfs_daddr_t blkno, 1074 int len) 1075 { 1076 xfs_log_item_chunk_t *licp; 1077 xfs_log_item_desc_t *lidp; 1078 xfs_buf_log_item_t *blip; 1079 xfs_buf_t *bp; 1080 int i; 1081 1082 bp = NULL; 1083 len = BBTOB(len); 1084 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { 1085 if (XFS_LIC_ARE_ALL_FREE(licp)) { 1086 ASSERT(licp == &tp->t_items); 1087 ASSERT(licp->lic_next == NULL); 1088 return NULL; 1089 } 1090 for (i = 0; i < licp->lic_unused; i++) { 1091 /* 1092 * Skip unoccupied slots. 1093 */ 1094 if (XFS_LIC_ISFREE(licp, i)) { 1095 continue; 1096 } 1097 1098 lidp = XFS_LIC_SLOT(licp, i); 1099 blip = (xfs_buf_log_item_t *)lidp->lid_item; 1100 if (blip->bli_item.li_type != XFS_LI_BUF) { 1101 continue; 1102 } 1103 1104 bp = blip->bli_buf; 1105 if ((XFS_BUF_TARGET(bp) == target) && 1106 (XFS_BUF_ADDR(bp) == blkno) && 1107 (XFS_BUF_COUNT(bp) == len)) { 1108 /* 1109 * We found it. Break out and 1110 * return the pointer to the buffer. 1111 */ 1112 return bp; 1113 } 1114 } 1115 } 1116 return NULL; 1117 } 1118