1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * Copyright (c) 2013 Red Hat, Inc. 4 * All Rights Reserved. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it would be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include "xfs.h" 20 #include "xfs_fs.h" 21 #include "xfs_shared.h" 22 #include "xfs_format.h" 23 #include "xfs_log_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_bit.h" 26 #include "xfs_mount.h" 27 #include "xfs_da_format.h" 28 #include "xfs_da_btree.h" 29 #include "xfs_dir2.h" 30 #include "xfs_dir2_priv.h" 31 #include "xfs_inode.h" 32 #include "xfs_trans.h" 33 #include "xfs_inode_item.h" 34 #include "xfs_alloc.h" 35 #include "xfs_bmap.h" 36 #include "xfs_attr.h" 37 #include "xfs_attr_leaf.h" 38 #include "xfs_error.h" 39 #include "xfs_trace.h" 40 #include "xfs_cksum.h" 41 #include "xfs_buf_item.h" 42 43 /* 44 * xfs_da_btree.c 45 * 46 * Routines to implement directories as Btrees of hashed names. 47 */ 48 49 /*======================================================================== 50 * Function prototypes for the kernel. 51 *========================================================================*/ 52 53 /* 54 * Routines used for growing the Btree. 55 */ 56 STATIC int xfs_da3_root_split(xfs_da_state_t *state, 57 xfs_da_state_blk_t *existing_root, 58 xfs_da_state_blk_t *new_child); 59 STATIC int xfs_da3_node_split(xfs_da_state_t *state, 60 xfs_da_state_blk_t *existing_blk, 61 xfs_da_state_blk_t *split_blk, 62 xfs_da_state_blk_t *blk_to_add, 63 int treelevel, 64 int *result); 65 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, 66 xfs_da_state_blk_t *node_blk_1, 67 xfs_da_state_blk_t *node_blk_2); 68 STATIC void xfs_da3_node_add(xfs_da_state_t *state, 69 xfs_da_state_blk_t *old_node_blk, 70 xfs_da_state_blk_t *new_node_blk); 71 72 /* 73 * Routines used for shrinking the Btree. 74 */ 75 STATIC int xfs_da3_root_join(xfs_da_state_t *state, 76 xfs_da_state_blk_t *root_blk); 77 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); 78 STATIC void xfs_da3_node_remove(xfs_da_state_t *state, 79 xfs_da_state_blk_t *drop_blk); 80 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, 81 xfs_da_state_blk_t *src_node_blk, 82 xfs_da_state_blk_t *dst_node_blk); 83 84 /* 85 * Utility routines. 86 */ 87 STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, 88 xfs_da_state_blk_t *drop_blk, 89 xfs_da_state_blk_t *save_blk); 90 91 92 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ 93 94 /* 95 * Allocate a dir-state structure. 96 * We don't put them on the stack since they're large. 97 */ 98 xfs_da_state_t * 99 xfs_da_state_alloc(void) 100 { 101 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); 102 } 103 104 /* 105 * Kill the altpath contents of a da-state structure. 106 */ 107 STATIC void 108 xfs_da_state_kill_altpath(xfs_da_state_t *state) 109 { 110 int i; 111 112 for (i = 0; i < state->altpath.active; i++) 113 state->altpath.blk[i].bp = NULL; 114 state->altpath.active = 0; 115 } 116 117 /* 118 * Free a da-state structure. 119 */ 120 void 121 xfs_da_state_free(xfs_da_state_t *state) 122 { 123 xfs_da_state_kill_altpath(state); 124 #ifdef DEBUG 125 memset((char *)state, 0, sizeof(*state)); 126 #endif /* DEBUG */ 127 kmem_zone_free(xfs_da_state_zone, state); 128 } 129 130 static bool 131 xfs_da3_node_verify( 132 struct xfs_buf *bp) 133 { 134 struct xfs_mount *mp = bp->b_target->bt_mount; 135 struct xfs_da_intnode *hdr = bp->b_addr; 136 struct xfs_da3_icnode_hdr ichdr; 137 const struct xfs_dir_ops *ops; 138 139 ops = xfs_dir_get_ops(mp, NULL); 140 141 ops->node_hdr_from_disk(&ichdr, hdr); 142 143 if (xfs_sb_version_hascrc(&mp->m_sb)) { 144 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 145 146 if (ichdr.magic != XFS_DA3_NODE_MAGIC) 147 return false; 148 149 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid)) 150 return false; 151 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 152 return false; 153 } else { 154 if (ichdr.magic != XFS_DA_NODE_MAGIC) 155 return false; 156 } 157 if (ichdr.level == 0) 158 return false; 159 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 160 return false; 161 if (ichdr.count == 0) 162 return false; 163 164 /* 165 * we don't know if the node is for and attribute or directory tree, 166 * so only fail if the count is outside both bounds 167 */ 168 if (ichdr.count > mp->m_dir_geo->node_ents && 169 ichdr.count > mp->m_attr_geo->node_ents) 170 return false; 171 172 /* XXX: hash order check? */ 173 174 return true; 175 } 176 177 static void 178 xfs_da3_node_write_verify( 179 struct xfs_buf *bp) 180 { 181 struct xfs_mount *mp = bp->b_target->bt_mount; 182 struct xfs_buf_log_item *bip = bp->b_fspriv; 183 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 184 185 if (!xfs_da3_node_verify(bp)) { 186 xfs_buf_ioerror(bp, -EFSCORRUPTED); 187 xfs_verifier_error(bp); 188 return; 189 } 190 191 if (!xfs_sb_version_hascrc(&mp->m_sb)) 192 return; 193 194 if (bip) 195 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 196 197 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); 198 } 199 200 /* 201 * leaf/node format detection on trees is sketchy, so a node read can be done on 202 * leaf level blocks when detection identifies the tree as a node format tree 203 * incorrectly. In this case, we need to swap the verifier to match the correct 204 * format of the block being read. 205 */ 206 static void 207 xfs_da3_node_read_verify( 208 struct xfs_buf *bp) 209 { 210 struct xfs_da_blkinfo *info = bp->b_addr; 211 212 switch (be16_to_cpu(info->magic)) { 213 case XFS_DA3_NODE_MAGIC: 214 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 215 xfs_buf_ioerror(bp, -EFSBADCRC); 216 break; 217 } 218 /* fall through */ 219 case XFS_DA_NODE_MAGIC: 220 if (!xfs_da3_node_verify(bp)) { 221 xfs_buf_ioerror(bp, -EFSCORRUPTED); 222 break; 223 } 224 return; 225 case XFS_ATTR_LEAF_MAGIC: 226 case XFS_ATTR3_LEAF_MAGIC: 227 bp->b_ops = &xfs_attr3_leaf_buf_ops; 228 bp->b_ops->verify_read(bp); 229 return; 230 case XFS_DIR2_LEAFN_MAGIC: 231 case XFS_DIR3_LEAFN_MAGIC: 232 bp->b_ops = &xfs_dir3_leafn_buf_ops; 233 bp->b_ops->verify_read(bp); 234 return; 235 default: 236 break; 237 } 238 239 /* corrupt block */ 240 xfs_verifier_error(bp); 241 } 242 243 const struct xfs_buf_ops xfs_da3_node_buf_ops = { 244 .verify_read = xfs_da3_node_read_verify, 245 .verify_write = xfs_da3_node_write_verify, 246 }; 247 248 int 249 xfs_da3_node_read( 250 struct xfs_trans *tp, 251 struct xfs_inode *dp, 252 xfs_dablk_t bno, 253 xfs_daddr_t mappedbno, 254 struct xfs_buf **bpp, 255 int which_fork) 256 { 257 int err; 258 259 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 260 which_fork, &xfs_da3_node_buf_ops); 261 if (!err && tp) { 262 struct xfs_da_blkinfo *info = (*bpp)->b_addr; 263 int type; 264 265 switch (be16_to_cpu(info->magic)) { 266 case XFS_DA_NODE_MAGIC: 267 case XFS_DA3_NODE_MAGIC: 268 type = XFS_BLFT_DA_NODE_BUF; 269 break; 270 case XFS_ATTR_LEAF_MAGIC: 271 case XFS_ATTR3_LEAF_MAGIC: 272 type = XFS_BLFT_ATTR_LEAF_BUF; 273 break; 274 case XFS_DIR2_LEAFN_MAGIC: 275 case XFS_DIR3_LEAFN_MAGIC: 276 type = XFS_BLFT_DIR_LEAFN_BUF; 277 break; 278 default: 279 type = 0; 280 ASSERT(0); 281 break; 282 } 283 xfs_trans_buf_set_type(tp, *bpp, type); 284 } 285 return err; 286 } 287 288 /*======================================================================== 289 * Routines used for growing the Btree. 290 *========================================================================*/ 291 292 /* 293 * Create the initial contents of an intermediate node. 294 */ 295 int 296 xfs_da3_node_create( 297 struct xfs_da_args *args, 298 xfs_dablk_t blkno, 299 int level, 300 struct xfs_buf **bpp, 301 int whichfork) 302 { 303 struct xfs_da_intnode *node; 304 struct xfs_trans *tp = args->trans; 305 struct xfs_mount *mp = tp->t_mountp; 306 struct xfs_da3_icnode_hdr ichdr = {0}; 307 struct xfs_buf *bp; 308 int error; 309 struct xfs_inode *dp = args->dp; 310 311 trace_xfs_da_node_create(args); 312 ASSERT(level <= XFS_DA_NODE_MAXDEPTH); 313 314 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); 315 if (error) 316 return error; 317 bp->b_ops = &xfs_da3_node_buf_ops; 318 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 319 node = bp->b_addr; 320 321 if (xfs_sb_version_hascrc(&mp->m_sb)) { 322 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 323 324 ichdr.magic = XFS_DA3_NODE_MAGIC; 325 hdr3->info.blkno = cpu_to_be64(bp->b_bn); 326 hdr3->info.owner = cpu_to_be64(args->dp->i_ino); 327 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid); 328 } else { 329 ichdr.magic = XFS_DA_NODE_MAGIC; 330 } 331 ichdr.level = level; 332 333 dp->d_ops->node_hdr_to_disk(node, &ichdr); 334 xfs_trans_log_buf(tp, bp, 335 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 336 337 *bpp = bp; 338 return 0; 339 } 340 341 /* 342 * Split a leaf node, rebalance, then possibly split 343 * intermediate nodes, rebalance, etc. 344 */ 345 int /* error */ 346 xfs_da3_split( 347 struct xfs_da_state *state) 348 { 349 struct xfs_da_state_blk *oldblk; 350 struct xfs_da_state_blk *newblk; 351 struct xfs_da_state_blk *addblk; 352 struct xfs_da_intnode *node; 353 struct xfs_buf *bp; 354 int max; 355 int action = 0; 356 int error; 357 int i; 358 359 trace_xfs_da_split(state->args); 360 361 /* 362 * Walk back up the tree splitting/inserting/adjusting as necessary. 363 * If we need to insert and there isn't room, split the node, then 364 * decide which fragment to insert the new block from below into. 365 * Note that we may split the root this way, but we need more fixup. 366 */ 367 max = state->path.active - 1; 368 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); 369 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || 370 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 371 372 addblk = &state->path.blk[max]; /* initial dummy value */ 373 for (i = max; (i >= 0) && addblk; state->path.active--, i--) { 374 oldblk = &state->path.blk[i]; 375 newblk = &state->altpath.blk[i]; 376 377 /* 378 * If a leaf node then 379 * Allocate a new leaf node, then rebalance across them. 380 * else if an intermediate node then 381 * We split on the last layer, must we split the node? 382 */ 383 switch (oldblk->magic) { 384 case XFS_ATTR_LEAF_MAGIC: 385 error = xfs_attr3_leaf_split(state, oldblk, newblk); 386 if ((error != 0) && (error != -ENOSPC)) { 387 return error; /* GROT: attr is inconsistent */ 388 } 389 if (!error) { 390 addblk = newblk; 391 break; 392 } 393 /* 394 * Entry wouldn't fit, split the leaf again. 395 */ 396 state->extravalid = 1; 397 if (state->inleaf) { 398 state->extraafter = 0; /* before newblk */ 399 trace_xfs_attr_leaf_split_before(state->args); 400 error = xfs_attr3_leaf_split(state, oldblk, 401 &state->extrablk); 402 } else { 403 state->extraafter = 1; /* after newblk */ 404 trace_xfs_attr_leaf_split_after(state->args); 405 error = xfs_attr3_leaf_split(state, newblk, 406 &state->extrablk); 407 } 408 if (error) 409 return error; /* GROT: attr inconsistent */ 410 addblk = newblk; 411 break; 412 case XFS_DIR2_LEAFN_MAGIC: 413 error = xfs_dir2_leafn_split(state, oldblk, newblk); 414 if (error) 415 return error; 416 addblk = newblk; 417 break; 418 case XFS_DA_NODE_MAGIC: 419 error = xfs_da3_node_split(state, oldblk, newblk, addblk, 420 max - i, &action); 421 addblk->bp = NULL; 422 if (error) 423 return error; /* GROT: dir is inconsistent */ 424 /* 425 * Record the newly split block for the next time thru? 426 */ 427 if (action) 428 addblk = newblk; 429 else 430 addblk = NULL; 431 break; 432 } 433 434 /* 435 * Update the btree to show the new hashval for this child. 436 */ 437 xfs_da3_fixhashpath(state, &state->path); 438 } 439 if (!addblk) 440 return 0; 441 442 /* 443 * Split the root node. 444 */ 445 ASSERT(state->path.active == 0); 446 oldblk = &state->path.blk[0]; 447 error = xfs_da3_root_split(state, oldblk, addblk); 448 if (error) { 449 addblk->bp = NULL; 450 return error; /* GROT: dir is inconsistent */ 451 } 452 453 /* 454 * Update pointers to the node which used to be block 0 and 455 * just got bumped because of the addition of a new root node. 456 * There might be three blocks involved if a double split occurred, 457 * and the original block 0 could be at any position in the list. 458 * 459 * Note: the magic numbers and sibling pointers are in the same 460 * physical place for both v2 and v3 headers (by design). Hence it 461 * doesn't matter which version of the xfs_da_intnode structure we use 462 * here as the result will be the same using either structure. 463 */ 464 node = oldblk->bp->b_addr; 465 if (node->hdr.info.forw) { 466 if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { 467 bp = addblk->bp; 468 } else { 469 ASSERT(state->extravalid); 470 bp = state->extrablk.bp; 471 } 472 node = bp->b_addr; 473 node->hdr.info.back = cpu_to_be32(oldblk->blkno); 474 xfs_trans_log_buf(state->args->trans, bp, 475 XFS_DA_LOGRANGE(node, &node->hdr.info, 476 sizeof(node->hdr.info))); 477 } 478 node = oldblk->bp->b_addr; 479 if (node->hdr.info.back) { 480 if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { 481 bp = addblk->bp; 482 } else { 483 ASSERT(state->extravalid); 484 bp = state->extrablk.bp; 485 } 486 node = bp->b_addr; 487 node->hdr.info.forw = cpu_to_be32(oldblk->blkno); 488 xfs_trans_log_buf(state->args->trans, bp, 489 XFS_DA_LOGRANGE(node, &node->hdr.info, 490 sizeof(node->hdr.info))); 491 } 492 addblk->bp = NULL; 493 return 0; 494 } 495 496 /* 497 * Split the root. We have to create a new root and point to the two 498 * parts (the split old root) that we just created. Copy block zero to 499 * the EOF, extending the inode in process. 500 */ 501 STATIC int /* error */ 502 xfs_da3_root_split( 503 struct xfs_da_state *state, 504 struct xfs_da_state_blk *blk1, 505 struct xfs_da_state_blk *blk2) 506 { 507 struct xfs_da_intnode *node; 508 struct xfs_da_intnode *oldroot; 509 struct xfs_da_node_entry *btree; 510 struct xfs_da3_icnode_hdr nodehdr; 511 struct xfs_da_args *args; 512 struct xfs_buf *bp; 513 struct xfs_inode *dp; 514 struct xfs_trans *tp; 515 struct xfs_dir2_leaf *leaf; 516 xfs_dablk_t blkno; 517 int level; 518 int error; 519 int size; 520 521 trace_xfs_da_root_split(state->args); 522 523 /* 524 * Copy the existing (incorrect) block from the root node position 525 * to a free space somewhere. 526 */ 527 args = state->args; 528 error = xfs_da_grow_inode(args, &blkno); 529 if (error) 530 return error; 531 532 dp = args->dp; 533 tp = args->trans; 534 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); 535 if (error) 536 return error; 537 node = bp->b_addr; 538 oldroot = blk1->bp->b_addr; 539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 541 struct xfs_da3_icnode_hdr nodehdr; 542 543 dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); 544 btree = dp->d_ops->node_tree_p(oldroot); 545 size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); 546 level = nodehdr.level; 547 548 /* 549 * we are about to copy oldroot to bp, so set up the type 550 * of bp while we know exactly what it will be. 551 */ 552 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 553 } else { 554 struct xfs_dir3_icleaf_hdr leafhdr; 555 struct xfs_dir2_leaf_entry *ents; 556 557 leaf = (xfs_dir2_leaf_t *)oldroot; 558 dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); 559 ents = dp->d_ops->leaf_ents_p(leaf); 560 561 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || 562 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); 563 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); 564 level = 0; 565 566 /* 567 * we are about to copy oldroot to bp, so set up the type 568 * of bp while we know exactly what it will be. 569 */ 570 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); 571 } 572 573 /* 574 * we can copy most of the information in the node from one block to 575 * another, but for CRC enabled headers we have to make sure that the 576 * block specific identifiers are kept intact. We update the buffer 577 * directly for this. 578 */ 579 memcpy(node, oldroot, size); 580 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 581 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 582 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; 583 584 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); 585 } 586 xfs_trans_log_buf(tp, bp, 0, size - 1); 587 588 bp->b_ops = blk1->bp->b_ops; 589 xfs_trans_buf_copy_type(bp, blk1->bp); 590 blk1->bp = bp; 591 blk1->blkno = blkno; 592 593 /* 594 * Set up the new root node. 595 */ 596 error = xfs_da3_node_create(args, 597 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, 598 level + 1, &bp, args->whichfork); 599 if (error) 600 return error; 601 602 node = bp->b_addr; 603 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 604 btree = dp->d_ops->node_tree_p(node); 605 btree[0].hashval = cpu_to_be32(blk1->hashval); 606 btree[0].before = cpu_to_be32(blk1->blkno); 607 btree[1].hashval = cpu_to_be32(blk2->hashval); 608 btree[1].before = cpu_to_be32(blk2->blkno); 609 nodehdr.count = 2; 610 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 611 612 #ifdef DEBUG 613 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 614 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 615 ASSERT(blk1->blkno >= args->geo->leafblk && 616 blk1->blkno < args->geo->freeblk); 617 ASSERT(blk2->blkno >= args->geo->leafblk && 618 blk2->blkno < args->geo->freeblk); 619 } 620 #endif 621 622 /* Header is already logged by xfs_da_node_create */ 623 xfs_trans_log_buf(tp, bp, 624 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); 625 626 return 0; 627 } 628 629 /* 630 * Split the node, rebalance, then add the new entry. 631 */ 632 STATIC int /* error */ 633 xfs_da3_node_split( 634 struct xfs_da_state *state, 635 struct xfs_da_state_blk *oldblk, 636 struct xfs_da_state_blk *newblk, 637 struct xfs_da_state_blk *addblk, 638 int treelevel, 639 int *result) 640 { 641 struct xfs_da_intnode *node; 642 struct xfs_da3_icnode_hdr nodehdr; 643 xfs_dablk_t blkno; 644 int newcount; 645 int error; 646 int useextra; 647 struct xfs_inode *dp = state->args->dp; 648 649 trace_xfs_da_node_split(state->args); 650 651 node = oldblk->bp->b_addr; 652 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 653 654 /* 655 * With V2 dirs the extra block is data or freespace. 656 */ 657 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; 658 newcount = 1 + useextra; 659 /* 660 * Do we have to split the node? 661 */ 662 if (nodehdr.count + newcount > state->args->geo->node_ents) { 663 /* 664 * Allocate a new node, add to the doubly linked chain of 665 * nodes, then move some of our excess entries into it. 666 */ 667 error = xfs_da_grow_inode(state->args, &blkno); 668 if (error) 669 return error; /* GROT: dir is inconsistent */ 670 671 error = xfs_da3_node_create(state->args, blkno, treelevel, 672 &newblk->bp, state->args->whichfork); 673 if (error) 674 return error; /* GROT: dir is inconsistent */ 675 newblk->blkno = blkno; 676 newblk->magic = XFS_DA_NODE_MAGIC; 677 xfs_da3_node_rebalance(state, oldblk, newblk); 678 error = xfs_da3_blk_link(state, oldblk, newblk); 679 if (error) 680 return error; 681 *result = 1; 682 } else { 683 *result = 0; 684 } 685 686 /* 687 * Insert the new entry(s) into the correct block 688 * (updating last hashval in the process). 689 * 690 * xfs_da3_node_add() inserts BEFORE the given index, 691 * and as a result of using node_lookup_int() we always 692 * point to a valid entry (not after one), but a split 693 * operation always results in a new block whose hashvals 694 * FOLLOW the current block. 695 * 696 * If we had double-split op below us, then add the extra block too. 697 */ 698 node = oldblk->bp->b_addr; 699 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 700 if (oldblk->index <= nodehdr.count) { 701 oldblk->index++; 702 xfs_da3_node_add(state, oldblk, addblk); 703 if (useextra) { 704 if (state->extraafter) 705 oldblk->index++; 706 xfs_da3_node_add(state, oldblk, &state->extrablk); 707 state->extravalid = 0; 708 } 709 } else { 710 newblk->index++; 711 xfs_da3_node_add(state, newblk, addblk); 712 if (useextra) { 713 if (state->extraafter) 714 newblk->index++; 715 xfs_da3_node_add(state, newblk, &state->extrablk); 716 state->extravalid = 0; 717 } 718 } 719 720 return 0; 721 } 722 723 /* 724 * Balance the btree elements between two intermediate nodes, 725 * usually one full and one empty. 726 * 727 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 728 */ 729 STATIC void 730 xfs_da3_node_rebalance( 731 struct xfs_da_state *state, 732 struct xfs_da_state_blk *blk1, 733 struct xfs_da_state_blk *blk2) 734 { 735 struct xfs_da_intnode *node1; 736 struct xfs_da_intnode *node2; 737 struct xfs_da_intnode *tmpnode; 738 struct xfs_da_node_entry *btree1; 739 struct xfs_da_node_entry *btree2; 740 struct xfs_da_node_entry *btree_s; 741 struct xfs_da_node_entry *btree_d; 742 struct xfs_da3_icnode_hdr nodehdr1; 743 struct xfs_da3_icnode_hdr nodehdr2; 744 struct xfs_trans *tp; 745 int count; 746 int tmp; 747 int swap = 0; 748 struct xfs_inode *dp = state->args->dp; 749 750 trace_xfs_da_node_rebalance(state->args); 751 752 node1 = blk1->bp->b_addr; 753 node2 = blk2->bp->b_addr; 754 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 755 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 756 btree1 = dp->d_ops->node_tree_p(node1); 757 btree2 = dp->d_ops->node_tree_p(node2); 758 759 /* 760 * Figure out how many entries need to move, and in which direction. 761 * Swap the nodes around if that makes it simpler. 762 */ 763 if (nodehdr1.count > 0 && nodehdr2.count > 0 && 764 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 765 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < 766 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { 767 tmpnode = node1; 768 node1 = node2; 769 node2 = tmpnode; 770 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 771 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 772 btree1 = dp->d_ops->node_tree_p(node1); 773 btree2 = dp->d_ops->node_tree_p(node2); 774 swap = 1; 775 } 776 777 count = (nodehdr1.count - nodehdr2.count) / 2; 778 if (count == 0) 779 return; 780 tp = state->args->trans; 781 /* 782 * Two cases: high-to-low and low-to-high. 783 */ 784 if (count > 0) { 785 /* 786 * Move elements in node2 up to make a hole. 787 */ 788 tmp = nodehdr2.count; 789 if (tmp > 0) { 790 tmp *= (uint)sizeof(xfs_da_node_entry_t); 791 btree_s = &btree2[0]; 792 btree_d = &btree2[count]; 793 memmove(btree_d, btree_s, tmp); 794 } 795 796 /* 797 * Move the req'd B-tree elements from high in node1 to 798 * low in node2. 799 */ 800 nodehdr2.count += count; 801 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 802 btree_s = &btree1[nodehdr1.count - count]; 803 btree_d = &btree2[0]; 804 memcpy(btree_d, btree_s, tmp); 805 nodehdr1.count -= count; 806 } else { 807 /* 808 * Move the req'd B-tree elements from low in node2 to 809 * high in node1. 810 */ 811 count = -count; 812 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 813 btree_s = &btree2[0]; 814 btree_d = &btree1[nodehdr1.count]; 815 memcpy(btree_d, btree_s, tmp); 816 nodehdr1.count += count; 817 818 xfs_trans_log_buf(tp, blk1->bp, 819 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 820 821 /* 822 * Move elements in node2 down to fill the hole. 823 */ 824 tmp = nodehdr2.count - count; 825 tmp *= (uint)sizeof(xfs_da_node_entry_t); 826 btree_s = &btree2[count]; 827 btree_d = &btree2[0]; 828 memmove(btree_d, btree_s, tmp); 829 nodehdr2.count -= count; 830 } 831 832 /* 833 * Log header of node 1 and all current bits of node 2. 834 */ 835 dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); 836 xfs_trans_log_buf(tp, blk1->bp, 837 XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); 838 839 dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); 840 xfs_trans_log_buf(tp, blk2->bp, 841 XFS_DA_LOGRANGE(node2, &node2->hdr, 842 dp->d_ops->node_hdr_size + 843 (sizeof(btree2[0]) * nodehdr2.count))); 844 845 /* 846 * Record the last hashval from each block for upward propagation. 847 * (note: don't use the swapped node pointers) 848 */ 849 if (swap) { 850 node1 = blk1->bp->b_addr; 851 node2 = blk2->bp->b_addr; 852 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 853 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 854 btree1 = dp->d_ops->node_tree_p(node1); 855 btree2 = dp->d_ops->node_tree_p(node2); 856 } 857 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); 858 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); 859 860 /* 861 * Adjust the expected index for insertion. 862 */ 863 if (blk1->index >= nodehdr1.count) { 864 blk2->index = blk1->index - nodehdr1.count; 865 blk1->index = nodehdr1.count + 1; /* make it invalid */ 866 } 867 } 868 869 /* 870 * Add a new entry to an intermediate node. 871 */ 872 STATIC void 873 xfs_da3_node_add( 874 struct xfs_da_state *state, 875 struct xfs_da_state_blk *oldblk, 876 struct xfs_da_state_blk *newblk) 877 { 878 struct xfs_da_intnode *node; 879 struct xfs_da3_icnode_hdr nodehdr; 880 struct xfs_da_node_entry *btree; 881 int tmp; 882 struct xfs_inode *dp = state->args->dp; 883 884 trace_xfs_da_node_add(state->args); 885 886 node = oldblk->bp->b_addr; 887 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 888 btree = dp->d_ops->node_tree_p(node); 889 890 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); 891 ASSERT(newblk->blkno != 0); 892 if (state->args->whichfork == XFS_DATA_FORK) 893 ASSERT(newblk->blkno >= state->args->geo->leafblk && 894 newblk->blkno < state->args->geo->freeblk); 895 896 /* 897 * We may need to make some room before we insert the new node. 898 */ 899 tmp = 0; 900 if (oldblk->index < nodehdr.count) { 901 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); 902 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); 903 } 904 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); 905 btree[oldblk->index].before = cpu_to_be32(newblk->blkno); 906 xfs_trans_log_buf(state->args->trans, oldblk->bp, 907 XFS_DA_LOGRANGE(node, &btree[oldblk->index], 908 tmp + sizeof(*btree))); 909 910 nodehdr.count += 1; 911 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 912 xfs_trans_log_buf(state->args->trans, oldblk->bp, 913 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 914 915 /* 916 * Copy the last hash value from the oldblk to propagate upwards. 917 */ 918 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 919 } 920 921 /*======================================================================== 922 * Routines used for shrinking the Btree. 923 *========================================================================*/ 924 925 /* 926 * Deallocate an empty leaf node, remove it from its parent, 927 * possibly deallocating that block, etc... 928 */ 929 int 930 xfs_da3_join( 931 struct xfs_da_state *state) 932 { 933 struct xfs_da_state_blk *drop_blk; 934 struct xfs_da_state_blk *save_blk; 935 int action = 0; 936 int error; 937 938 trace_xfs_da_join(state->args); 939 940 drop_blk = &state->path.blk[ state->path.active-1 ]; 941 save_blk = &state->altpath.blk[ state->path.active-1 ]; 942 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 943 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || 944 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 945 946 /* 947 * Walk back up the tree joining/deallocating as necessary. 948 * When we stop dropping blocks, break out. 949 */ 950 for ( ; state->path.active >= 2; drop_blk--, save_blk--, 951 state->path.active--) { 952 /* 953 * See if we can combine the block with a neighbor. 954 * (action == 0) => no options, just leave 955 * (action == 1) => coalesce, then unlink 956 * (action == 2) => block empty, unlink it 957 */ 958 switch (drop_blk->magic) { 959 case XFS_ATTR_LEAF_MAGIC: 960 error = xfs_attr3_leaf_toosmall(state, &action); 961 if (error) 962 return error; 963 if (action == 0) 964 return 0; 965 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); 966 break; 967 case XFS_DIR2_LEAFN_MAGIC: 968 error = xfs_dir2_leafn_toosmall(state, &action); 969 if (error) 970 return error; 971 if (action == 0) 972 return 0; 973 xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); 974 break; 975 case XFS_DA_NODE_MAGIC: 976 /* 977 * Remove the offending node, fixup hashvals, 978 * check for a toosmall neighbor. 979 */ 980 xfs_da3_node_remove(state, drop_blk); 981 xfs_da3_fixhashpath(state, &state->path); 982 error = xfs_da3_node_toosmall(state, &action); 983 if (error) 984 return error; 985 if (action == 0) 986 return 0; 987 xfs_da3_node_unbalance(state, drop_blk, save_blk); 988 break; 989 } 990 xfs_da3_fixhashpath(state, &state->altpath); 991 error = xfs_da3_blk_unlink(state, drop_blk, save_blk); 992 xfs_da_state_kill_altpath(state); 993 if (error) 994 return error; 995 error = xfs_da_shrink_inode(state->args, drop_blk->blkno, 996 drop_blk->bp); 997 drop_blk->bp = NULL; 998 if (error) 999 return error; 1000 } 1001 /* 1002 * We joined all the way to the top. If it turns out that 1003 * we only have one entry in the root, make the child block 1004 * the new root. 1005 */ 1006 xfs_da3_node_remove(state, drop_blk); 1007 xfs_da3_fixhashpath(state, &state->path); 1008 error = xfs_da3_root_join(state, &state->path.blk[0]); 1009 return error; 1010 } 1011 1012 #ifdef DEBUG 1013 static void 1014 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) 1015 { 1016 __be16 magic = blkinfo->magic; 1017 1018 if (level == 1) { 1019 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1020 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1021 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1022 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1023 } else { 1024 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1025 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); 1026 } 1027 ASSERT(!blkinfo->forw); 1028 ASSERT(!blkinfo->back); 1029 } 1030 #else /* !DEBUG */ 1031 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) 1032 #endif /* !DEBUG */ 1033 1034 /* 1035 * We have only one entry in the root. Copy the only remaining child of 1036 * the old root to block 0 as the new root node. 1037 */ 1038 STATIC int 1039 xfs_da3_root_join( 1040 struct xfs_da_state *state, 1041 struct xfs_da_state_blk *root_blk) 1042 { 1043 struct xfs_da_intnode *oldroot; 1044 struct xfs_da_args *args; 1045 xfs_dablk_t child; 1046 struct xfs_buf *bp; 1047 struct xfs_da3_icnode_hdr oldroothdr; 1048 struct xfs_da_node_entry *btree; 1049 int error; 1050 struct xfs_inode *dp = state->args->dp; 1051 1052 trace_xfs_da_root_join(state->args); 1053 1054 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 1055 1056 args = state->args; 1057 oldroot = root_blk->bp->b_addr; 1058 dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); 1059 ASSERT(oldroothdr.forw == 0); 1060 ASSERT(oldroothdr.back == 0); 1061 1062 /* 1063 * If the root has more than one child, then don't do anything. 1064 */ 1065 if (oldroothdr.count > 1) 1066 return 0; 1067 1068 /* 1069 * Read in the (only) child block, then copy those bytes into 1070 * the root block's buffer and free the original child block. 1071 */ 1072 btree = dp->d_ops->node_tree_p(oldroot); 1073 child = be32_to_cpu(btree[0].before); 1074 ASSERT(child != 0); 1075 error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, 1076 args->whichfork); 1077 if (error) 1078 return error; 1079 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); 1080 1081 /* 1082 * This could be copying a leaf back into the root block in the case of 1083 * there only being a single leaf block left in the tree. Hence we have 1084 * to update the b_ops pointer as well to match the buffer type change 1085 * that could occur. For dir3 blocks we also need to update the block 1086 * number in the buffer header. 1087 */ 1088 memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); 1089 root_blk->bp->b_ops = bp->b_ops; 1090 xfs_trans_buf_copy_type(root_blk->bp, bp); 1091 if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { 1092 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; 1093 da3->blkno = cpu_to_be64(root_blk->bp->b_bn); 1094 } 1095 xfs_trans_log_buf(args->trans, root_blk->bp, 0, 1096 args->geo->blksize - 1); 1097 error = xfs_da_shrink_inode(args, child, bp); 1098 return error; 1099 } 1100 1101 /* 1102 * Check a node block and its neighbors to see if the block should be 1103 * collapsed into one or the other neighbor. Always keep the block 1104 * with the smaller block number. 1105 * If the current block is over 50% full, don't try to join it, return 0. 1106 * If the block is empty, fill in the state structure and return 2. 1107 * If it can be collapsed, fill in the state structure and return 1. 1108 * If nothing can be done, return 0. 1109 */ 1110 STATIC int 1111 xfs_da3_node_toosmall( 1112 struct xfs_da_state *state, 1113 int *action) 1114 { 1115 struct xfs_da_intnode *node; 1116 struct xfs_da_state_blk *blk; 1117 struct xfs_da_blkinfo *info; 1118 xfs_dablk_t blkno; 1119 struct xfs_buf *bp; 1120 struct xfs_da3_icnode_hdr nodehdr; 1121 int count; 1122 int forward; 1123 int error; 1124 int retval; 1125 int i; 1126 struct xfs_inode *dp = state->args->dp; 1127 1128 trace_xfs_da_node_toosmall(state->args); 1129 1130 /* 1131 * Check for the degenerate case of the block being over 50% full. 1132 * If so, it's not worth even looking to see if we might be able 1133 * to coalesce with a sibling. 1134 */ 1135 blk = &state->path.blk[ state->path.active-1 ]; 1136 info = blk->bp->b_addr; 1137 node = (xfs_da_intnode_t *)info; 1138 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1139 if (nodehdr.count > (state->args->geo->node_ents >> 1)) { 1140 *action = 0; /* blk over 50%, don't try to join */ 1141 return 0; /* blk over 50%, don't try to join */ 1142 } 1143 1144 /* 1145 * Check for the degenerate case of the block being empty. 1146 * If the block is empty, we'll simply delete it, no need to 1147 * coalesce it with a sibling block. We choose (arbitrarily) 1148 * to merge with the forward block unless it is NULL. 1149 */ 1150 if (nodehdr.count == 0) { 1151 /* 1152 * Make altpath point to the block we want to keep and 1153 * path point to the block we want to drop (this one). 1154 */ 1155 forward = (info->forw != 0); 1156 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1157 error = xfs_da3_path_shift(state, &state->altpath, forward, 1158 0, &retval); 1159 if (error) 1160 return error; 1161 if (retval) { 1162 *action = 0; 1163 } else { 1164 *action = 2; 1165 } 1166 return 0; 1167 } 1168 1169 /* 1170 * Examine each sibling block to see if we can coalesce with 1171 * at least 25% free space to spare. We need to figure out 1172 * whether to merge with the forward or the backward block. 1173 * We prefer coalescing with the lower numbered sibling so as 1174 * to shrink a directory over time. 1175 */ 1176 count = state->args->geo->node_ents; 1177 count -= state->args->geo->node_ents >> 2; 1178 count -= nodehdr.count; 1179 1180 /* start with smaller blk num */ 1181 forward = nodehdr.forw < nodehdr.back; 1182 for (i = 0; i < 2; forward = !forward, i++) { 1183 struct xfs_da3_icnode_hdr thdr; 1184 if (forward) 1185 blkno = nodehdr.forw; 1186 else 1187 blkno = nodehdr.back; 1188 if (blkno == 0) 1189 continue; 1190 error = xfs_da3_node_read(state->args->trans, dp, 1191 blkno, -1, &bp, state->args->whichfork); 1192 if (error) 1193 return error; 1194 1195 node = bp->b_addr; 1196 dp->d_ops->node_hdr_from_disk(&thdr, node); 1197 xfs_trans_brelse(state->args->trans, bp); 1198 1199 if (count - thdr.count >= 0) 1200 break; /* fits with at least 25% to spare */ 1201 } 1202 if (i >= 2) { 1203 *action = 0; 1204 return 0; 1205 } 1206 1207 /* 1208 * Make altpath point to the block we want to keep (the lower 1209 * numbered block) and path point to the block we want to drop. 1210 */ 1211 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1212 if (blkno < blk->blkno) { 1213 error = xfs_da3_path_shift(state, &state->altpath, forward, 1214 0, &retval); 1215 } else { 1216 error = xfs_da3_path_shift(state, &state->path, forward, 1217 0, &retval); 1218 } 1219 if (error) 1220 return error; 1221 if (retval) { 1222 *action = 0; 1223 return 0; 1224 } 1225 *action = 1; 1226 return 0; 1227 } 1228 1229 /* 1230 * Pick up the last hashvalue from an intermediate node. 1231 */ 1232 STATIC uint 1233 xfs_da3_node_lasthash( 1234 struct xfs_inode *dp, 1235 struct xfs_buf *bp, 1236 int *count) 1237 { 1238 struct xfs_da_intnode *node; 1239 struct xfs_da_node_entry *btree; 1240 struct xfs_da3_icnode_hdr nodehdr; 1241 1242 node = bp->b_addr; 1243 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1244 if (count) 1245 *count = nodehdr.count; 1246 if (!nodehdr.count) 1247 return 0; 1248 btree = dp->d_ops->node_tree_p(node); 1249 return be32_to_cpu(btree[nodehdr.count - 1].hashval); 1250 } 1251 1252 /* 1253 * Walk back up the tree adjusting hash values as necessary, 1254 * when we stop making changes, return. 1255 */ 1256 void 1257 xfs_da3_fixhashpath( 1258 struct xfs_da_state *state, 1259 struct xfs_da_state_path *path) 1260 { 1261 struct xfs_da_state_blk *blk; 1262 struct xfs_da_intnode *node; 1263 struct xfs_da_node_entry *btree; 1264 xfs_dahash_t lasthash=0; 1265 int level; 1266 int count; 1267 struct xfs_inode *dp = state->args->dp; 1268 1269 trace_xfs_da_fixhashpath(state->args); 1270 1271 level = path->active-1; 1272 blk = &path->blk[ level ]; 1273 switch (blk->magic) { 1274 case XFS_ATTR_LEAF_MAGIC: 1275 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); 1276 if (count == 0) 1277 return; 1278 break; 1279 case XFS_DIR2_LEAFN_MAGIC: 1280 lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); 1281 if (count == 0) 1282 return; 1283 break; 1284 case XFS_DA_NODE_MAGIC: 1285 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); 1286 if (count == 0) 1287 return; 1288 break; 1289 } 1290 for (blk--, level--; level >= 0; blk--, level--) { 1291 struct xfs_da3_icnode_hdr nodehdr; 1292 1293 node = blk->bp->b_addr; 1294 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1295 btree = dp->d_ops->node_tree_p(node); 1296 if (be32_to_cpu(btree[blk->index].hashval) == lasthash) 1297 break; 1298 blk->hashval = lasthash; 1299 btree[blk->index].hashval = cpu_to_be32(lasthash); 1300 xfs_trans_log_buf(state->args->trans, blk->bp, 1301 XFS_DA_LOGRANGE(node, &btree[blk->index], 1302 sizeof(*btree))); 1303 1304 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1305 } 1306 } 1307 1308 /* 1309 * Remove an entry from an intermediate node. 1310 */ 1311 STATIC void 1312 xfs_da3_node_remove( 1313 struct xfs_da_state *state, 1314 struct xfs_da_state_blk *drop_blk) 1315 { 1316 struct xfs_da_intnode *node; 1317 struct xfs_da3_icnode_hdr nodehdr; 1318 struct xfs_da_node_entry *btree; 1319 int index; 1320 int tmp; 1321 struct xfs_inode *dp = state->args->dp; 1322 1323 trace_xfs_da_node_remove(state->args); 1324 1325 node = drop_blk->bp->b_addr; 1326 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1327 ASSERT(drop_blk->index < nodehdr.count); 1328 ASSERT(drop_blk->index >= 0); 1329 1330 /* 1331 * Copy over the offending entry, or just zero it out. 1332 */ 1333 index = drop_blk->index; 1334 btree = dp->d_ops->node_tree_p(node); 1335 if (index < nodehdr.count - 1) { 1336 tmp = nodehdr.count - index - 1; 1337 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1338 memmove(&btree[index], &btree[index + 1], tmp); 1339 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1340 XFS_DA_LOGRANGE(node, &btree[index], tmp)); 1341 index = nodehdr.count - 1; 1342 } 1343 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); 1344 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1345 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); 1346 nodehdr.count -= 1; 1347 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 1348 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1349 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 1350 1351 /* 1352 * Copy the last hash value from the block to propagate upwards. 1353 */ 1354 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); 1355 } 1356 1357 /* 1358 * Unbalance the elements between two intermediate nodes, 1359 * move all Btree elements from one node into another. 1360 */ 1361 STATIC void 1362 xfs_da3_node_unbalance( 1363 struct xfs_da_state *state, 1364 struct xfs_da_state_blk *drop_blk, 1365 struct xfs_da_state_blk *save_blk) 1366 { 1367 struct xfs_da_intnode *drop_node; 1368 struct xfs_da_intnode *save_node; 1369 struct xfs_da_node_entry *drop_btree; 1370 struct xfs_da_node_entry *save_btree; 1371 struct xfs_da3_icnode_hdr drop_hdr; 1372 struct xfs_da3_icnode_hdr save_hdr; 1373 struct xfs_trans *tp; 1374 int sindex; 1375 int tmp; 1376 struct xfs_inode *dp = state->args->dp; 1377 1378 trace_xfs_da_node_unbalance(state->args); 1379 1380 drop_node = drop_blk->bp->b_addr; 1381 save_node = save_blk->bp->b_addr; 1382 dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); 1383 dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); 1384 drop_btree = dp->d_ops->node_tree_p(drop_node); 1385 save_btree = dp->d_ops->node_tree_p(save_node); 1386 tp = state->args->trans; 1387 1388 /* 1389 * If the dying block has lower hashvals, then move all the 1390 * elements in the remaining block up to make a hole. 1391 */ 1392 if ((be32_to_cpu(drop_btree[0].hashval) < 1393 be32_to_cpu(save_btree[0].hashval)) || 1394 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < 1395 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { 1396 /* XXX: check this - is memmove dst correct? */ 1397 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); 1398 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); 1399 1400 sindex = 0; 1401 xfs_trans_log_buf(tp, save_blk->bp, 1402 XFS_DA_LOGRANGE(save_node, &save_btree[0], 1403 (save_hdr.count + drop_hdr.count) * 1404 sizeof(xfs_da_node_entry_t))); 1405 } else { 1406 sindex = save_hdr.count; 1407 xfs_trans_log_buf(tp, save_blk->bp, 1408 XFS_DA_LOGRANGE(save_node, &save_btree[sindex], 1409 drop_hdr.count * sizeof(xfs_da_node_entry_t))); 1410 } 1411 1412 /* 1413 * Move all the B-tree elements from drop_blk to save_blk. 1414 */ 1415 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); 1416 memcpy(&save_btree[sindex], &drop_btree[0], tmp); 1417 save_hdr.count += drop_hdr.count; 1418 1419 dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); 1420 xfs_trans_log_buf(tp, save_blk->bp, 1421 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1422 dp->d_ops->node_hdr_size)); 1423 1424 /* 1425 * Save the last hashval in the remaining block for upward propagation. 1426 */ 1427 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); 1428 } 1429 1430 /*======================================================================== 1431 * Routines used for finding things in the Btree. 1432 *========================================================================*/ 1433 1434 /* 1435 * Walk down the Btree looking for a particular filename, filling 1436 * in the state structure as we go. 1437 * 1438 * We will set the state structure to point to each of the elements 1439 * in each of the nodes where either the hashval is or should be. 1440 * 1441 * We support duplicate hashval's so for each entry in the current 1442 * node that could contain the desired hashval, descend. This is a 1443 * pruned depth-first tree search. 1444 */ 1445 int /* error */ 1446 xfs_da3_node_lookup_int( 1447 struct xfs_da_state *state, 1448 int *result) 1449 { 1450 struct xfs_da_state_blk *blk; 1451 struct xfs_da_blkinfo *curr; 1452 struct xfs_da_intnode *node; 1453 struct xfs_da_node_entry *btree; 1454 struct xfs_da3_icnode_hdr nodehdr; 1455 struct xfs_da_args *args; 1456 xfs_dablk_t blkno; 1457 xfs_dahash_t hashval; 1458 xfs_dahash_t btreehashval; 1459 int probe; 1460 int span; 1461 int max; 1462 int error; 1463 int retval; 1464 struct xfs_inode *dp = state->args->dp; 1465 1466 args = state->args; 1467 1468 /* 1469 * Descend thru the B-tree searching each level for the right 1470 * node to use, until the right hashval is found. 1471 */ 1472 blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0; 1473 for (blk = &state->path.blk[0], state->path.active = 1; 1474 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1475 blk++, state->path.active++) { 1476 /* 1477 * Read the next node down in the tree. 1478 */ 1479 blk->blkno = blkno; 1480 error = xfs_da3_node_read(args->trans, args->dp, blkno, 1481 -1, &blk->bp, args->whichfork); 1482 if (error) { 1483 blk->blkno = 0; 1484 state->path.active--; 1485 return error; 1486 } 1487 curr = blk->bp->b_addr; 1488 blk->magic = be16_to_cpu(curr->magic); 1489 1490 if (blk->magic == XFS_ATTR_LEAF_MAGIC || 1491 blk->magic == XFS_ATTR3_LEAF_MAGIC) { 1492 blk->magic = XFS_ATTR_LEAF_MAGIC; 1493 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1494 break; 1495 } 1496 1497 if (blk->magic == XFS_DIR2_LEAFN_MAGIC || 1498 blk->magic == XFS_DIR3_LEAFN_MAGIC) { 1499 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1500 blk->hashval = xfs_dir2_leafn_lasthash(args->dp, 1501 blk->bp, NULL); 1502 break; 1503 } 1504 1505 blk->magic = XFS_DA_NODE_MAGIC; 1506 1507 1508 /* 1509 * Search an intermediate node for a match. 1510 */ 1511 node = blk->bp->b_addr; 1512 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1513 btree = dp->d_ops->node_tree_p(node); 1514 1515 max = nodehdr.count; 1516 blk->hashval = be32_to_cpu(btree[max - 1].hashval); 1517 1518 /* 1519 * Binary search. (note: small blocks will skip loop) 1520 */ 1521 probe = span = max / 2; 1522 hashval = args->hashval; 1523 while (span > 4) { 1524 span /= 2; 1525 btreehashval = be32_to_cpu(btree[probe].hashval); 1526 if (btreehashval < hashval) 1527 probe += span; 1528 else if (btreehashval > hashval) 1529 probe -= span; 1530 else 1531 break; 1532 } 1533 ASSERT((probe >= 0) && (probe < max)); 1534 ASSERT((span <= 4) || 1535 (be32_to_cpu(btree[probe].hashval) == hashval)); 1536 1537 /* 1538 * Since we may have duplicate hashval's, find the first 1539 * matching hashval in the node. 1540 */ 1541 while (probe > 0 && 1542 be32_to_cpu(btree[probe].hashval) >= hashval) { 1543 probe--; 1544 } 1545 while (probe < max && 1546 be32_to_cpu(btree[probe].hashval) < hashval) { 1547 probe++; 1548 } 1549 1550 /* 1551 * Pick the right block to descend on. 1552 */ 1553 if (probe == max) { 1554 blk->index = max - 1; 1555 blkno = be32_to_cpu(btree[max - 1].before); 1556 } else { 1557 blk->index = probe; 1558 blkno = be32_to_cpu(btree[probe].before); 1559 } 1560 } 1561 1562 /* 1563 * A leaf block that ends in the hashval that we are interested in 1564 * (final hashval == search hashval) means that the next block may 1565 * contain more entries with the same hashval, shift upward to the 1566 * next leaf and keep searching. 1567 */ 1568 for (;;) { 1569 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1570 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1571 &blk->index, state); 1572 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1573 retval = xfs_attr3_leaf_lookup_int(blk->bp, args); 1574 blk->index = args->index; 1575 args->blkno = blk->blkno; 1576 } else { 1577 ASSERT(0); 1578 return -EFSCORRUPTED; 1579 } 1580 if (((retval == -ENOENT) || (retval == -ENOATTR)) && 1581 (blk->hashval == args->hashval)) { 1582 error = xfs_da3_path_shift(state, &state->path, 1, 1, 1583 &retval); 1584 if (error) 1585 return error; 1586 if (retval == 0) { 1587 continue; 1588 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1589 /* path_shift() gives ENOENT */ 1590 retval = -ENOATTR; 1591 } 1592 } 1593 break; 1594 } 1595 *result = retval; 1596 return 0; 1597 } 1598 1599 /*======================================================================== 1600 * Utility routines. 1601 *========================================================================*/ 1602 1603 /* 1604 * Compare two intermediate nodes for "order". 1605 */ 1606 STATIC int 1607 xfs_da3_node_order( 1608 struct xfs_inode *dp, 1609 struct xfs_buf *node1_bp, 1610 struct xfs_buf *node2_bp) 1611 { 1612 struct xfs_da_intnode *node1; 1613 struct xfs_da_intnode *node2; 1614 struct xfs_da_node_entry *btree1; 1615 struct xfs_da_node_entry *btree2; 1616 struct xfs_da3_icnode_hdr node1hdr; 1617 struct xfs_da3_icnode_hdr node2hdr; 1618 1619 node1 = node1_bp->b_addr; 1620 node2 = node2_bp->b_addr; 1621 dp->d_ops->node_hdr_from_disk(&node1hdr, node1); 1622 dp->d_ops->node_hdr_from_disk(&node2hdr, node2); 1623 btree1 = dp->d_ops->node_tree_p(node1); 1624 btree2 = dp->d_ops->node_tree_p(node2); 1625 1626 if (node1hdr.count > 0 && node2hdr.count > 0 && 1627 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 1628 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < 1629 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { 1630 return 1; 1631 } 1632 return 0; 1633 } 1634 1635 /* 1636 * Link a new block into a doubly linked list of blocks (of whatever type). 1637 */ 1638 int /* error */ 1639 xfs_da3_blk_link( 1640 struct xfs_da_state *state, 1641 struct xfs_da_state_blk *old_blk, 1642 struct xfs_da_state_blk *new_blk) 1643 { 1644 struct xfs_da_blkinfo *old_info; 1645 struct xfs_da_blkinfo *new_info; 1646 struct xfs_da_blkinfo *tmp_info; 1647 struct xfs_da_args *args; 1648 struct xfs_buf *bp; 1649 int before = 0; 1650 int error; 1651 struct xfs_inode *dp = state->args->dp; 1652 1653 /* 1654 * Set up environment. 1655 */ 1656 args = state->args; 1657 ASSERT(args != NULL); 1658 old_info = old_blk->bp->b_addr; 1659 new_info = new_blk->bp->b_addr; 1660 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1661 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1662 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1663 1664 switch (old_blk->magic) { 1665 case XFS_ATTR_LEAF_MAGIC: 1666 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); 1667 break; 1668 case XFS_DIR2_LEAFN_MAGIC: 1669 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); 1670 break; 1671 case XFS_DA_NODE_MAGIC: 1672 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); 1673 break; 1674 } 1675 1676 /* 1677 * Link blocks in appropriate order. 1678 */ 1679 if (before) { 1680 /* 1681 * Link new block in before existing block. 1682 */ 1683 trace_xfs_da_link_before(args); 1684 new_info->forw = cpu_to_be32(old_blk->blkno); 1685 new_info->back = old_info->back; 1686 if (old_info->back) { 1687 error = xfs_da3_node_read(args->trans, dp, 1688 be32_to_cpu(old_info->back), 1689 -1, &bp, args->whichfork); 1690 if (error) 1691 return error; 1692 ASSERT(bp != NULL); 1693 tmp_info = bp->b_addr; 1694 ASSERT(tmp_info->magic == old_info->magic); 1695 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1696 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1697 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1698 } 1699 old_info->back = cpu_to_be32(new_blk->blkno); 1700 } else { 1701 /* 1702 * Link new block in after existing block. 1703 */ 1704 trace_xfs_da_link_after(args); 1705 new_info->forw = old_info->forw; 1706 new_info->back = cpu_to_be32(old_blk->blkno); 1707 if (old_info->forw) { 1708 error = xfs_da3_node_read(args->trans, dp, 1709 be32_to_cpu(old_info->forw), 1710 -1, &bp, args->whichfork); 1711 if (error) 1712 return error; 1713 ASSERT(bp != NULL); 1714 tmp_info = bp->b_addr; 1715 ASSERT(tmp_info->magic == old_info->magic); 1716 ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); 1717 tmp_info->back = cpu_to_be32(new_blk->blkno); 1718 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1719 } 1720 old_info->forw = cpu_to_be32(new_blk->blkno); 1721 } 1722 1723 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); 1724 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); 1725 return 0; 1726 } 1727 1728 /* 1729 * Unlink a block from a doubly linked list of blocks. 1730 */ 1731 STATIC int /* error */ 1732 xfs_da3_blk_unlink( 1733 struct xfs_da_state *state, 1734 struct xfs_da_state_blk *drop_blk, 1735 struct xfs_da_state_blk *save_blk) 1736 { 1737 struct xfs_da_blkinfo *drop_info; 1738 struct xfs_da_blkinfo *save_info; 1739 struct xfs_da_blkinfo *tmp_info; 1740 struct xfs_da_args *args; 1741 struct xfs_buf *bp; 1742 int error; 1743 1744 /* 1745 * Set up environment. 1746 */ 1747 args = state->args; 1748 ASSERT(args != NULL); 1749 save_info = save_blk->bp->b_addr; 1750 drop_info = drop_blk->bp->b_addr; 1751 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1752 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1753 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1754 ASSERT(save_blk->magic == drop_blk->magic); 1755 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 1756 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 1757 ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || 1758 (be32_to_cpu(drop_info->back) == save_blk->blkno)); 1759 1760 /* 1761 * Unlink the leaf block from the doubly linked chain of leaves. 1762 */ 1763 if (be32_to_cpu(save_info->back) == drop_blk->blkno) { 1764 trace_xfs_da_unlink_back(args); 1765 save_info->back = drop_info->back; 1766 if (drop_info->back) { 1767 error = xfs_da3_node_read(args->trans, args->dp, 1768 be32_to_cpu(drop_info->back), 1769 -1, &bp, args->whichfork); 1770 if (error) 1771 return error; 1772 ASSERT(bp != NULL); 1773 tmp_info = bp->b_addr; 1774 ASSERT(tmp_info->magic == save_info->magic); 1775 ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); 1776 tmp_info->forw = cpu_to_be32(save_blk->blkno); 1777 xfs_trans_log_buf(args->trans, bp, 0, 1778 sizeof(*tmp_info) - 1); 1779 } 1780 } else { 1781 trace_xfs_da_unlink_forward(args); 1782 save_info->forw = drop_info->forw; 1783 if (drop_info->forw) { 1784 error = xfs_da3_node_read(args->trans, args->dp, 1785 be32_to_cpu(drop_info->forw), 1786 -1, &bp, args->whichfork); 1787 if (error) 1788 return error; 1789 ASSERT(bp != NULL); 1790 tmp_info = bp->b_addr; 1791 ASSERT(tmp_info->magic == save_info->magic); 1792 ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); 1793 tmp_info->back = cpu_to_be32(save_blk->blkno); 1794 xfs_trans_log_buf(args->trans, bp, 0, 1795 sizeof(*tmp_info) - 1); 1796 } 1797 } 1798 1799 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); 1800 return 0; 1801 } 1802 1803 /* 1804 * Move a path "forward" or "!forward" one block at the current level. 1805 * 1806 * This routine will adjust a "path" to point to the next block 1807 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the 1808 * Btree, including updating pointers to the intermediate nodes between 1809 * the new bottom and the root. 1810 */ 1811 int /* error */ 1812 xfs_da3_path_shift( 1813 struct xfs_da_state *state, 1814 struct xfs_da_state_path *path, 1815 int forward, 1816 int release, 1817 int *result) 1818 { 1819 struct xfs_da_state_blk *blk; 1820 struct xfs_da_blkinfo *info; 1821 struct xfs_da_intnode *node; 1822 struct xfs_da_args *args; 1823 struct xfs_da_node_entry *btree; 1824 struct xfs_da3_icnode_hdr nodehdr; 1825 xfs_dablk_t blkno = 0; 1826 int level; 1827 int error; 1828 struct xfs_inode *dp = state->args->dp; 1829 1830 trace_xfs_da_path_shift(state->args); 1831 1832 /* 1833 * Roll up the Btree looking for the first block where our 1834 * current index is not at the edge of the block. Note that 1835 * we skip the bottom layer because we want the sibling block. 1836 */ 1837 args = state->args; 1838 ASSERT(args != NULL); 1839 ASSERT(path != NULL); 1840 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1841 level = (path->active-1) - 1; /* skip bottom layer in path */ 1842 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1843 node = blk->bp->b_addr; 1844 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1845 btree = dp->d_ops->node_tree_p(node); 1846 1847 if (forward && (blk->index < nodehdr.count - 1)) { 1848 blk->index++; 1849 blkno = be32_to_cpu(btree[blk->index].before); 1850 break; 1851 } else if (!forward && (blk->index > 0)) { 1852 blk->index--; 1853 blkno = be32_to_cpu(btree[blk->index].before); 1854 break; 1855 } 1856 } 1857 if (level < 0) { 1858 *result = -ENOENT; /* we're out of our tree */ 1859 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 1860 return 0; 1861 } 1862 1863 /* 1864 * Roll down the edge of the subtree until we reach the 1865 * same depth we were at originally. 1866 */ 1867 for (blk++, level++; level < path->active; blk++, level++) { 1868 /* 1869 * Release the old block. 1870 * (if it's dirty, trans won't actually let go) 1871 */ 1872 if (release) 1873 xfs_trans_brelse(args->trans, blk->bp); 1874 1875 /* 1876 * Read the next child block. 1877 */ 1878 blk->blkno = blkno; 1879 error = xfs_da3_node_read(args->trans, dp, blkno, -1, 1880 &blk->bp, args->whichfork); 1881 if (error) 1882 return error; 1883 info = blk->bp->b_addr; 1884 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1885 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 1886 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1887 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1888 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1889 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1890 1891 1892 /* 1893 * Note: we flatten the magic number to a single type so we 1894 * don't have to compare against crc/non-crc types elsewhere. 1895 */ 1896 switch (be16_to_cpu(info->magic)) { 1897 case XFS_DA_NODE_MAGIC: 1898 case XFS_DA3_NODE_MAGIC: 1899 blk->magic = XFS_DA_NODE_MAGIC; 1900 node = (xfs_da_intnode_t *)info; 1901 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1902 btree = dp->d_ops->node_tree_p(node); 1903 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1904 if (forward) 1905 blk->index = 0; 1906 else 1907 blk->index = nodehdr.count - 1; 1908 blkno = be32_to_cpu(btree[blk->index].before); 1909 break; 1910 case XFS_ATTR_LEAF_MAGIC: 1911 case XFS_ATTR3_LEAF_MAGIC: 1912 blk->magic = XFS_ATTR_LEAF_MAGIC; 1913 ASSERT(level == path->active-1); 1914 blk->index = 0; 1915 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1916 break; 1917 case XFS_DIR2_LEAFN_MAGIC: 1918 case XFS_DIR3_LEAFN_MAGIC: 1919 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1920 ASSERT(level == path->active-1); 1921 blk->index = 0; 1922 blk->hashval = xfs_dir2_leafn_lasthash(args->dp, 1923 blk->bp, NULL); 1924 break; 1925 default: 1926 ASSERT(0); 1927 break; 1928 } 1929 } 1930 *result = 0; 1931 return 0; 1932 } 1933 1934 1935 /*======================================================================== 1936 * Utility routines. 1937 *========================================================================*/ 1938 1939 /* 1940 * Implement a simple hash on a character string. 1941 * Rotate the hash value by 7 bits, then XOR each character in. 1942 * This is implemented with some source-level loop unrolling. 1943 */ 1944 xfs_dahash_t 1945 xfs_da_hashname(const __uint8_t *name, int namelen) 1946 { 1947 xfs_dahash_t hash; 1948 1949 /* 1950 * Do four characters at a time as long as we can. 1951 */ 1952 for (hash = 0; namelen >= 4; namelen -= 4, name += 4) 1953 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ 1954 (name[3] << 0) ^ rol32(hash, 7 * 4); 1955 1956 /* 1957 * Now do the rest of the characters. 1958 */ 1959 switch (namelen) { 1960 case 3: 1961 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ 1962 rol32(hash, 7 * 3); 1963 case 2: 1964 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); 1965 case 1: 1966 return (name[0] << 0) ^ rol32(hash, 7 * 1); 1967 default: /* case 0: */ 1968 return hash; 1969 } 1970 } 1971 1972 enum xfs_dacmp 1973 xfs_da_compname( 1974 struct xfs_da_args *args, 1975 const unsigned char *name, 1976 int len) 1977 { 1978 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 1979 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 1980 } 1981 1982 static xfs_dahash_t 1983 xfs_default_hashname( 1984 struct xfs_name *name) 1985 { 1986 return xfs_da_hashname(name->name, name->len); 1987 } 1988 1989 const struct xfs_nameops xfs_default_nameops = { 1990 .hashname = xfs_default_hashname, 1991 .compname = xfs_da_compname 1992 }; 1993 1994 int 1995 xfs_da_grow_inode_int( 1996 struct xfs_da_args *args, 1997 xfs_fileoff_t *bno, 1998 int count) 1999 { 2000 struct xfs_trans *tp = args->trans; 2001 struct xfs_inode *dp = args->dp; 2002 int w = args->whichfork; 2003 xfs_rfsblock_t nblks = dp->i_d.di_nblocks; 2004 struct xfs_bmbt_irec map, *mapp; 2005 int nmap, error, got, i, mapi; 2006 2007 /* 2008 * Find a spot in the file space to put the new block. 2009 */ 2010 error = xfs_bmap_first_unused(tp, dp, count, bno, w); 2011 if (error) 2012 return error; 2013 2014 /* 2015 * Try mapping it in one filesystem block. 2016 */ 2017 nmap = 1; 2018 ASSERT(args->firstblock != NULL); 2019 error = xfs_bmapi_write(tp, dp, *bno, count, 2020 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2021 args->firstblock, args->total, &map, &nmap, 2022 args->flist); 2023 if (error) 2024 return error; 2025 2026 ASSERT(nmap <= 1); 2027 if (nmap == 1) { 2028 mapp = ↦ 2029 mapi = 1; 2030 } else if (nmap == 0 && count > 1) { 2031 xfs_fileoff_t b; 2032 int c; 2033 2034 /* 2035 * If we didn't get it and the block might work if fragmented, 2036 * try without the CONTIG flag. Loop until we get it all. 2037 */ 2038 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); 2039 for (b = *bno, mapi = 0; b < *bno + count; ) { 2040 nmap = MIN(XFS_BMAP_MAX_NMAP, count); 2041 c = (int)(*bno + count - b); 2042 error = xfs_bmapi_write(tp, dp, b, c, 2043 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2044 args->firstblock, args->total, 2045 &mapp[mapi], &nmap, args->flist); 2046 if (error) 2047 goto out_free_map; 2048 if (nmap < 1) 2049 break; 2050 mapi += nmap; 2051 b = mapp[mapi - 1].br_startoff + 2052 mapp[mapi - 1].br_blockcount; 2053 } 2054 } else { 2055 mapi = 0; 2056 mapp = NULL; 2057 } 2058 2059 /* 2060 * Count the blocks we got, make sure it matches the total. 2061 */ 2062 for (i = 0, got = 0; i < mapi; i++) 2063 got += mapp[i].br_blockcount; 2064 if (got != count || mapp[0].br_startoff != *bno || 2065 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 2066 *bno + count) { 2067 error = -ENOSPC; 2068 goto out_free_map; 2069 } 2070 2071 /* account for newly allocated blocks in reserved blocks total */ 2072 args->total -= dp->i_d.di_nblocks - nblks; 2073 2074 out_free_map: 2075 if (mapp != &map) 2076 kmem_free(mapp); 2077 return error; 2078 } 2079 2080 /* 2081 * Add a block to the btree ahead of the file. 2082 * Return the new block number to the caller. 2083 */ 2084 int 2085 xfs_da_grow_inode( 2086 struct xfs_da_args *args, 2087 xfs_dablk_t *new_blkno) 2088 { 2089 xfs_fileoff_t bno; 2090 int error; 2091 2092 trace_xfs_da_grow_inode(args); 2093 2094 bno = args->geo->leafblk; 2095 error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); 2096 if (!error) 2097 *new_blkno = (xfs_dablk_t)bno; 2098 return error; 2099 } 2100 2101 /* 2102 * Ick. We need to always be able to remove a btree block, even 2103 * if there's no space reservation because the filesystem is full. 2104 * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. 2105 * It swaps the target block with the last block in the file. The 2106 * last block in the file can always be removed since it can't cause 2107 * a bmap btree split to do that. 2108 */ 2109 STATIC int 2110 xfs_da3_swap_lastblock( 2111 struct xfs_da_args *args, 2112 xfs_dablk_t *dead_blknop, 2113 struct xfs_buf **dead_bufp) 2114 { 2115 struct xfs_da_blkinfo *dead_info; 2116 struct xfs_da_blkinfo *sib_info; 2117 struct xfs_da_intnode *par_node; 2118 struct xfs_da_intnode *dead_node; 2119 struct xfs_dir2_leaf *dead_leaf2; 2120 struct xfs_da_node_entry *btree; 2121 struct xfs_da3_icnode_hdr par_hdr; 2122 struct xfs_inode *dp; 2123 struct xfs_trans *tp; 2124 struct xfs_mount *mp; 2125 struct xfs_buf *dead_buf; 2126 struct xfs_buf *last_buf; 2127 struct xfs_buf *sib_buf; 2128 struct xfs_buf *par_buf; 2129 xfs_dahash_t dead_hash; 2130 xfs_fileoff_t lastoff; 2131 xfs_dablk_t dead_blkno; 2132 xfs_dablk_t last_blkno; 2133 xfs_dablk_t sib_blkno; 2134 xfs_dablk_t par_blkno; 2135 int error; 2136 int w; 2137 int entno; 2138 int level; 2139 int dead_level; 2140 2141 trace_xfs_da_swap_lastblock(args); 2142 2143 dead_buf = *dead_bufp; 2144 dead_blkno = *dead_blknop; 2145 tp = args->trans; 2146 dp = args->dp; 2147 w = args->whichfork; 2148 ASSERT(w == XFS_DATA_FORK); 2149 mp = dp->i_mount; 2150 lastoff = args->geo->freeblk; 2151 error = xfs_bmap_last_before(tp, dp, &lastoff, w); 2152 if (error) 2153 return error; 2154 if (unlikely(lastoff == 0)) { 2155 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, 2156 mp); 2157 return -EFSCORRUPTED; 2158 } 2159 /* 2160 * Read the last block in the btree space. 2161 */ 2162 last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; 2163 error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); 2164 if (error) 2165 return error; 2166 /* 2167 * Copy the last block into the dead buffer and log it. 2168 */ 2169 memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); 2170 xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); 2171 dead_info = dead_buf->b_addr; 2172 /* 2173 * Get values from the moved block. 2174 */ 2175 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 2176 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 2177 struct xfs_dir3_icleaf_hdr leafhdr; 2178 struct xfs_dir2_leaf_entry *ents; 2179 2180 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 2181 dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); 2182 ents = dp->d_ops->leaf_ents_p(dead_leaf2); 2183 dead_level = 0; 2184 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); 2185 } else { 2186 struct xfs_da3_icnode_hdr deadhdr; 2187 2188 dead_node = (xfs_da_intnode_t *)dead_info; 2189 dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); 2190 btree = dp->d_ops->node_tree_p(dead_node); 2191 dead_level = deadhdr.level; 2192 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); 2193 } 2194 sib_buf = par_buf = NULL; 2195 /* 2196 * If the moved block has a left sibling, fix up the pointers. 2197 */ 2198 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 2199 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); 2200 if (error) 2201 goto done; 2202 sib_info = sib_buf->b_addr; 2203 if (unlikely( 2204 be32_to_cpu(sib_info->forw) != last_blkno || 2205 sib_info->magic != dead_info->magic)) { 2206 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", 2207 XFS_ERRLEVEL_LOW, mp); 2208 error = -EFSCORRUPTED; 2209 goto done; 2210 } 2211 sib_info->forw = cpu_to_be32(dead_blkno); 2212 xfs_trans_log_buf(tp, sib_buf, 2213 XFS_DA_LOGRANGE(sib_info, &sib_info->forw, 2214 sizeof(sib_info->forw))); 2215 sib_buf = NULL; 2216 } 2217 /* 2218 * If the moved block has a right sibling, fix up the pointers. 2219 */ 2220 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 2221 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); 2222 if (error) 2223 goto done; 2224 sib_info = sib_buf->b_addr; 2225 if (unlikely( 2226 be32_to_cpu(sib_info->back) != last_blkno || 2227 sib_info->magic != dead_info->magic)) { 2228 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", 2229 XFS_ERRLEVEL_LOW, mp); 2230 error = -EFSCORRUPTED; 2231 goto done; 2232 } 2233 sib_info->back = cpu_to_be32(dead_blkno); 2234 xfs_trans_log_buf(tp, sib_buf, 2235 XFS_DA_LOGRANGE(sib_info, &sib_info->back, 2236 sizeof(sib_info->back))); 2237 sib_buf = NULL; 2238 } 2239 par_blkno = args->geo->leafblk; 2240 level = -1; 2241 /* 2242 * Walk down the tree looking for the parent of the moved block. 2243 */ 2244 for (;;) { 2245 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2246 if (error) 2247 goto done; 2248 par_node = par_buf->b_addr; 2249 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); 2250 if (level >= 0 && level != par_hdr.level + 1) { 2251 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 2252 XFS_ERRLEVEL_LOW, mp); 2253 error = -EFSCORRUPTED; 2254 goto done; 2255 } 2256 level = par_hdr.level; 2257 btree = dp->d_ops->node_tree_p(par_node); 2258 for (entno = 0; 2259 entno < par_hdr.count && 2260 be32_to_cpu(btree[entno].hashval) < dead_hash; 2261 entno++) 2262 continue; 2263 if (entno == par_hdr.count) { 2264 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 2265 XFS_ERRLEVEL_LOW, mp); 2266 error = -EFSCORRUPTED; 2267 goto done; 2268 } 2269 par_blkno = be32_to_cpu(btree[entno].before); 2270 if (level == dead_level + 1) 2271 break; 2272 xfs_trans_brelse(tp, par_buf); 2273 par_buf = NULL; 2274 } 2275 /* 2276 * We're in the right parent block. 2277 * Look for the right entry. 2278 */ 2279 for (;;) { 2280 for (; 2281 entno < par_hdr.count && 2282 be32_to_cpu(btree[entno].before) != last_blkno; 2283 entno++) 2284 continue; 2285 if (entno < par_hdr.count) 2286 break; 2287 par_blkno = par_hdr.forw; 2288 xfs_trans_brelse(tp, par_buf); 2289 par_buf = NULL; 2290 if (unlikely(par_blkno == 0)) { 2291 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", 2292 XFS_ERRLEVEL_LOW, mp); 2293 error = -EFSCORRUPTED; 2294 goto done; 2295 } 2296 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2297 if (error) 2298 goto done; 2299 par_node = par_buf->b_addr; 2300 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); 2301 if (par_hdr.level != level) { 2302 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 2303 XFS_ERRLEVEL_LOW, mp); 2304 error = -EFSCORRUPTED; 2305 goto done; 2306 } 2307 btree = dp->d_ops->node_tree_p(par_node); 2308 entno = 0; 2309 } 2310 /* 2311 * Update the parent entry pointing to the moved block. 2312 */ 2313 btree[entno].before = cpu_to_be32(dead_blkno); 2314 xfs_trans_log_buf(tp, par_buf, 2315 XFS_DA_LOGRANGE(par_node, &btree[entno].before, 2316 sizeof(btree[entno].before))); 2317 *dead_blknop = last_blkno; 2318 *dead_bufp = last_buf; 2319 return 0; 2320 done: 2321 if (par_buf) 2322 xfs_trans_brelse(tp, par_buf); 2323 if (sib_buf) 2324 xfs_trans_brelse(tp, sib_buf); 2325 xfs_trans_brelse(tp, last_buf); 2326 return error; 2327 } 2328 2329 /* 2330 * Remove a btree block from a directory or attribute. 2331 */ 2332 int 2333 xfs_da_shrink_inode( 2334 xfs_da_args_t *args, 2335 xfs_dablk_t dead_blkno, 2336 struct xfs_buf *dead_buf) 2337 { 2338 xfs_inode_t *dp; 2339 int done, error, w, count; 2340 xfs_trans_t *tp; 2341 2342 trace_xfs_da_shrink_inode(args); 2343 2344 dp = args->dp; 2345 w = args->whichfork; 2346 tp = args->trans; 2347 count = args->geo->fsbcount; 2348 for (;;) { 2349 /* 2350 * Remove extents. If we get ENOSPC for a dir we have to move 2351 * the last block to the place we want to kill. 2352 */ 2353 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2354 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2355 0, args->firstblock, args->flist, &done); 2356 if (error == -ENOSPC) { 2357 if (w != XFS_DATA_FORK) 2358 break; 2359 error = xfs_da3_swap_lastblock(args, &dead_blkno, 2360 &dead_buf); 2361 if (error) 2362 break; 2363 } else { 2364 break; 2365 } 2366 } 2367 xfs_trans_binval(tp, dead_buf); 2368 return error; 2369 } 2370 2371 /* 2372 * See if the mapping(s) for this btree block are valid, i.e. 2373 * don't contain holes, are logically contiguous, and cover the whole range. 2374 */ 2375 STATIC int 2376 xfs_da_map_covers_blocks( 2377 int nmap, 2378 xfs_bmbt_irec_t *mapp, 2379 xfs_dablk_t bno, 2380 int count) 2381 { 2382 int i; 2383 xfs_fileoff_t off; 2384 2385 for (i = 0, off = bno; i < nmap; i++) { 2386 if (mapp[i].br_startblock == HOLESTARTBLOCK || 2387 mapp[i].br_startblock == DELAYSTARTBLOCK) { 2388 return 0; 2389 } 2390 if (off != mapp[i].br_startoff) { 2391 return 0; 2392 } 2393 off += mapp[i].br_blockcount; 2394 } 2395 return off == bno + count; 2396 } 2397 2398 /* 2399 * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. 2400 * 2401 * For the single map case, it is assumed that the caller has provided a pointer 2402 * to a valid xfs_buf_map. For the multiple map case, this function will 2403 * allocate the xfs_buf_map to hold all the maps and replace the caller's single 2404 * map pointer with the allocated map. 2405 */ 2406 static int 2407 xfs_buf_map_from_irec( 2408 struct xfs_mount *mp, 2409 struct xfs_buf_map **mapp, 2410 int *nmaps, 2411 struct xfs_bmbt_irec *irecs, 2412 int nirecs) 2413 { 2414 struct xfs_buf_map *map; 2415 int i; 2416 2417 ASSERT(*nmaps == 1); 2418 ASSERT(nirecs >= 1); 2419 2420 if (nirecs > 1) { 2421 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), 2422 KM_SLEEP | KM_NOFS); 2423 if (!map) 2424 return -ENOMEM; 2425 *mapp = map; 2426 } 2427 2428 *nmaps = nirecs; 2429 map = *mapp; 2430 for (i = 0; i < *nmaps; i++) { 2431 ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && 2432 irecs[i].br_startblock != HOLESTARTBLOCK); 2433 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); 2434 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); 2435 } 2436 return 0; 2437 } 2438 2439 /* 2440 * Map the block we are given ready for reading. There are three possible return 2441 * values: 2442 * -1 - will be returned if we land in a hole and mappedbno == -2 so the 2443 * caller knows not to execute a subsequent read. 2444 * 0 - if we mapped the block successfully 2445 * >0 - positive error number if there was an error. 2446 */ 2447 static int 2448 xfs_dabuf_map( 2449 struct xfs_inode *dp, 2450 xfs_dablk_t bno, 2451 xfs_daddr_t mappedbno, 2452 int whichfork, 2453 struct xfs_buf_map **map, 2454 int *nmaps) 2455 { 2456 struct xfs_mount *mp = dp->i_mount; 2457 int nfsb; 2458 int error = 0; 2459 struct xfs_bmbt_irec irec; 2460 struct xfs_bmbt_irec *irecs = &irec; 2461 int nirecs; 2462 2463 ASSERT(map && *map); 2464 ASSERT(*nmaps == 1); 2465 2466 if (whichfork == XFS_DATA_FORK) 2467 nfsb = mp->m_dir_geo->fsbcount; 2468 else 2469 nfsb = mp->m_attr_geo->fsbcount; 2470 2471 /* 2472 * Caller doesn't have a mapping. -2 means don't complain 2473 * if we land in a hole. 2474 */ 2475 if (mappedbno == -1 || mappedbno == -2) { 2476 /* 2477 * Optimize the one-block case. 2478 */ 2479 if (nfsb != 1) 2480 irecs = kmem_zalloc(sizeof(irec) * nfsb, 2481 KM_SLEEP | KM_NOFS); 2482 2483 nirecs = nfsb; 2484 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, 2485 &nirecs, xfs_bmapi_aflag(whichfork)); 2486 if (error) 2487 goto out; 2488 } else { 2489 irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); 2490 irecs->br_startoff = (xfs_fileoff_t)bno; 2491 irecs->br_blockcount = nfsb; 2492 irecs->br_state = 0; 2493 nirecs = 1; 2494 } 2495 2496 if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { 2497 error = mappedbno == -2 ? -1 : -EFSCORRUPTED; 2498 if (unlikely(error == -EFSCORRUPTED)) { 2499 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 2500 int i; 2501 xfs_alert(mp, "%s: bno %lld dir: inode %lld", 2502 __func__, (long long)bno, 2503 (long long)dp->i_ino); 2504 for (i = 0; i < *nmaps; i++) { 2505 xfs_alert(mp, 2506 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", 2507 i, 2508 (long long)irecs[i].br_startoff, 2509 (long long)irecs[i].br_startblock, 2510 (long long)irecs[i].br_blockcount, 2511 irecs[i].br_state); 2512 } 2513 } 2514 XFS_ERROR_REPORT("xfs_da_do_buf(1)", 2515 XFS_ERRLEVEL_LOW, mp); 2516 } 2517 goto out; 2518 } 2519 error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); 2520 out: 2521 if (irecs != &irec) 2522 kmem_free(irecs); 2523 return error; 2524 } 2525 2526 /* 2527 * Get a buffer for the dir/attr block. 2528 */ 2529 int 2530 xfs_da_get_buf( 2531 struct xfs_trans *trans, 2532 struct xfs_inode *dp, 2533 xfs_dablk_t bno, 2534 xfs_daddr_t mappedbno, 2535 struct xfs_buf **bpp, 2536 int whichfork) 2537 { 2538 struct xfs_buf *bp; 2539 struct xfs_buf_map map; 2540 struct xfs_buf_map *mapp; 2541 int nmap; 2542 int error; 2543 2544 *bpp = NULL; 2545 mapp = ↦ 2546 nmap = 1; 2547 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2548 &mapp, &nmap); 2549 if (error) { 2550 /* mapping a hole is not an error, but we don't continue */ 2551 if (error == -1) 2552 error = 0; 2553 goto out_free; 2554 } 2555 2556 bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, 2557 mapp, nmap, 0); 2558 error = bp ? bp->b_error : -EIO; 2559 if (error) { 2560 if (bp) 2561 xfs_trans_brelse(trans, bp); 2562 goto out_free; 2563 } 2564 2565 *bpp = bp; 2566 2567 out_free: 2568 if (mapp != &map) 2569 kmem_free(mapp); 2570 2571 return error; 2572 } 2573 2574 /* 2575 * Get a buffer for the dir/attr block, fill in the contents. 2576 */ 2577 int 2578 xfs_da_read_buf( 2579 struct xfs_trans *trans, 2580 struct xfs_inode *dp, 2581 xfs_dablk_t bno, 2582 xfs_daddr_t mappedbno, 2583 struct xfs_buf **bpp, 2584 int whichfork, 2585 const struct xfs_buf_ops *ops) 2586 { 2587 struct xfs_buf *bp; 2588 struct xfs_buf_map map; 2589 struct xfs_buf_map *mapp; 2590 int nmap; 2591 int error; 2592 2593 *bpp = NULL; 2594 mapp = ↦ 2595 nmap = 1; 2596 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2597 &mapp, &nmap); 2598 if (error) { 2599 /* mapping a hole is not an error, but we don't continue */ 2600 if (error == -1) 2601 error = 0; 2602 goto out_free; 2603 } 2604 2605 error = xfs_trans_read_buf_map(dp->i_mount, trans, 2606 dp->i_mount->m_ddev_targp, 2607 mapp, nmap, 0, &bp, ops); 2608 if (error) 2609 goto out_free; 2610 2611 if (whichfork == XFS_ATTR_FORK) 2612 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); 2613 else 2614 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); 2615 *bpp = bp; 2616 out_free: 2617 if (mapp != &map) 2618 kmem_free(mapp); 2619 2620 return error; 2621 } 2622 2623 /* 2624 * Readahead the dir/attr block. 2625 */ 2626 xfs_daddr_t 2627 xfs_da_reada_buf( 2628 struct xfs_inode *dp, 2629 xfs_dablk_t bno, 2630 xfs_daddr_t mappedbno, 2631 int whichfork, 2632 const struct xfs_buf_ops *ops) 2633 { 2634 struct xfs_buf_map map; 2635 struct xfs_buf_map *mapp; 2636 int nmap; 2637 int error; 2638 2639 mapp = ↦ 2640 nmap = 1; 2641 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2642 &mapp, &nmap); 2643 if (error) { 2644 /* mapping a hole is not an error, but we don't continue */ 2645 if (error == -1) 2646 error = 0; 2647 goto out_free; 2648 } 2649 2650 mappedbno = mapp[0].bm_bn; 2651 xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); 2652 2653 out_free: 2654 if (mapp != &map) 2655 kmem_free(mapp); 2656 2657 if (error) 2658 return -1; 2659 return mappedbno; 2660 } 2661