1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_inode.h" 16 #include "xfs_dir2.h" 17 #include "xfs_dir2_priv.h" 18 #include "xfs_trans.h" 19 #include "xfs_bmap.h" 20 #include "xfs_attr_leaf.h" 21 #include "xfs_error.h" 22 #include "xfs_trace.h" 23 #include "xfs_buf_item.h" 24 #include "xfs_log.h" 25 #include "xfs_errortag.h" 26 #include "xfs_health.h" 27 28 /* 29 * xfs_da_btree.c 30 * 31 * Routines to implement directories as Btrees of hashed names. 32 */ 33 34 /*======================================================================== 35 * Function prototypes for the kernel. 36 *========================================================================*/ 37 38 /* 39 * Routines used for growing the Btree. 40 */ 41 STATIC int xfs_da3_root_split(xfs_da_state_t *state, 42 xfs_da_state_blk_t *existing_root, 43 xfs_da_state_blk_t *new_child); 44 STATIC int xfs_da3_node_split(xfs_da_state_t *state, 45 xfs_da_state_blk_t *existing_blk, 46 xfs_da_state_blk_t *split_blk, 47 xfs_da_state_blk_t *blk_to_add, 48 int treelevel, 49 int *result); 50 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, 51 xfs_da_state_blk_t *node_blk_1, 52 xfs_da_state_blk_t *node_blk_2); 53 STATIC void xfs_da3_node_add(xfs_da_state_t *state, 54 xfs_da_state_blk_t *old_node_blk, 55 xfs_da_state_blk_t *new_node_blk); 56 57 /* 58 * Routines used for shrinking the Btree. 59 */ 60 STATIC int xfs_da3_root_join(xfs_da_state_t *state, 61 xfs_da_state_blk_t *root_blk); 62 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); 63 STATIC void xfs_da3_node_remove(xfs_da_state_t *state, 64 xfs_da_state_blk_t *drop_blk); 65 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, 66 xfs_da_state_blk_t *src_node_blk, 67 xfs_da_state_blk_t *dst_node_blk); 68 69 /* 70 * Utility routines. 71 */ 72 STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, 73 xfs_da_state_blk_t *drop_blk, 74 xfs_da_state_blk_t *save_blk); 75 76 77 struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */ 78 79 /* 80 * Allocate a dir-state structure. 81 * We don't put them on the stack since they're large. 82 */ 83 struct xfs_da_state * 84 xfs_da_state_alloc( 85 struct xfs_da_args *args) 86 { 87 struct xfs_da_state *state; 88 89 state = kmem_cache_zalloc(xfs_da_state_cache, 90 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); 91 state->args = args; 92 state->mp = args->dp->i_mount; 93 return state; 94 } 95 96 /* 97 * Kill the altpath contents of a da-state structure. 98 */ 99 STATIC void 100 xfs_da_state_kill_altpath(xfs_da_state_t *state) 101 { 102 int i; 103 104 for (i = 0; i < state->altpath.active; i++) 105 state->altpath.blk[i].bp = NULL; 106 state->altpath.active = 0; 107 } 108 109 /* 110 * Free a da-state structure. 111 */ 112 void 113 xfs_da_state_free(xfs_da_state_t *state) 114 { 115 xfs_da_state_kill_altpath(state); 116 #ifdef DEBUG 117 memset((char *)state, 0, sizeof(*state)); 118 #endif /* DEBUG */ 119 kmem_cache_free(xfs_da_state_cache, state); 120 } 121 122 void 123 xfs_da_state_reset( 124 struct xfs_da_state *state, 125 struct xfs_da_args *args) 126 { 127 xfs_da_state_kill_altpath(state); 128 memset(state, 0, sizeof(struct xfs_da_state)); 129 state->args = args; 130 state->mp = state->args->dp->i_mount; 131 } 132 133 static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) 134 { 135 if (whichfork == XFS_DATA_FORK) 136 return mp->m_dir_geo->fsbcount; 137 return mp->m_attr_geo->fsbcount; 138 } 139 140 void 141 xfs_da3_node_hdr_from_disk( 142 struct xfs_mount *mp, 143 struct xfs_da3_icnode_hdr *to, 144 struct xfs_da_intnode *from) 145 { 146 if (xfs_has_crc(mp)) { 147 struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from; 148 149 to->forw = be32_to_cpu(from3->hdr.info.hdr.forw); 150 to->back = be32_to_cpu(from3->hdr.info.hdr.back); 151 to->magic = be16_to_cpu(from3->hdr.info.hdr.magic); 152 to->count = be16_to_cpu(from3->hdr.__count); 153 to->level = be16_to_cpu(from3->hdr.__level); 154 to->btree = from3->__btree; 155 ASSERT(to->magic == XFS_DA3_NODE_MAGIC); 156 } else { 157 to->forw = be32_to_cpu(from->hdr.info.forw); 158 to->back = be32_to_cpu(from->hdr.info.back); 159 to->magic = be16_to_cpu(from->hdr.info.magic); 160 to->count = be16_to_cpu(from->hdr.__count); 161 to->level = be16_to_cpu(from->hdr.__level); 162 to->btree = from->__btree; 163 ASSERT(to->magic == XFS_DA_NODE_MAGIC); 164 } 165 } 166 167 void 168 xfs_da3_node_hdr_to_disk( 169 struct xfs_mount *mp, 170 struct xfs_da_intnode *to, 171 struct xfs_da3_icnode_hdr *from) 172 { 173 if (xfs_has_crc(mp)) { 174 struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to; 175 176 ASSERT(from->magic == XFS_DA3_NODE_MAGIC); 177 to3->hdr.info.hdr.forw = cpu_to_be32(from->forw); 178 to3->hdr.info.hdr.back = cpu_to_be32(from->back); 179 to3->hdr.info.hdr.magic = cpu_to_be16(from->magic); 180 to3->hdr.__count = cpu_to_be16(from->count); 181 to3->hdr.__level = cpu_to_be16(from->level); 182 } else { 183 ASSERT(from->magic == XFS_DA_NODE_MAGIC); 184 to->hdr.info.forw = cpu_to_be32(from->forw); 185 to->hdr.info.back = cpu_to_be32(from->back); 186 to->hdr.info.magic = cpu_to_be16(from->magic); 187 to->hdr.__count = cpu_to_be16(from->count); 188 to->hdr.__level = cpu_to_be16(from->level); 189 } 190 } 191 192 /* 193 * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only 194 * accessible on v5 filesystems. This header format is common across da node, 195 * attr leaf and dir leaf blocks. 196 */ 197 xfs_failaddr_t 198 xfs_da3_blkinfo_verify( 199 struct xfs_buf *bp, 200 struct xfs_da3_blkinfo *hdr3) 201 { 202 struct xfs_mount *mp = bp->b_mount; 203 struct xfs_da_blkinfo *hdr = &hdr3->hdr; 204 205 if (!xfs_verify_magic16(bp, hdr->magic)) 206 return __this_address; 207 208 if (xfs_has_crc(mp)) { 209 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 210 return __this_address; 211 if (be64_to_cpu(hdr3->blkno) != xfs_buf_daddr(bp)) 212 return __this_address; 213 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 214 return __this_address; 215 } 216 217 return NULL; 218 } 219 220 static xfs_failaddr_t 221 xfs_da3_node_verify( 222 struct xfs_buf *bp) 223 { 224 struct xfs_mount *mp = bp->b_mount; 225 struct xfs_da_intnode *hdr = bp->b_addr; 226 struct xfs_da3_icnode_hdr ichdr; 227 xfs_failaddr_t fa; 228 229 xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr); 230 231 fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); 232 if (fa) 233 return fa; 234 235 if (ichdr.level == 0) 236 return __this_address; 237 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 238 return __this_address; 239 if (ichdr.count == 0) 240 return __this_address; 241 242 /* 243 * we don't know if the node is for and attribute or directory tree, 244 * so only fail if the count is outside both bounds 245 */ 246 if (ichdr.count > mp->m_dir_geo->node_ents && 247 ichdr.count > mp->m_attr_geo->node_ents) 248 return __this_address; 249 250 /* XXX: hash order check? */ 251 252 return NULL; 253 } 254 255 xfs_failaddr_t 256 xfs_da3_node_header_check( 257 struct xfs_buf *bp, 258 xfs_ino_t owner) 259 { 260 struct xfs_mount *mp = bp->b_mount; 261 262 if (xfs_has_crc(mp)) { 263 struct xfs_da3_blkinfo *hdr3 = bp->b_addr; 264 265 if (hdr3->hdr.magic != cpu_to_be16(XFS_DA3_NODE_MAGIC)) 266 return __this_address; 267 268 if (be64_to_cpu(hdr3->owner) != owner) 269 return __this_address; 270 } 271 272 return NULL; 273 } 274 275 xfs_failaddr_t 276 xfs_da3_header_check( 277 struct xfs_buf *bp, 278 xfs_ino_t owner) 279 { 280 struct xfs_mount *mp = bp->b_mount; 281 struct xfs_da_blkinfo *hdr = bp->b_addr; 282 283 if (!xfs_has_crc(mp)) 284 return NULL; 285 286 switch (hdr->magic) { 287 case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): 288 return xfs_attr3_leaf_header_check(bp, owner); 289 case cpu_to_be16(XFS_DA3_NODE_MAGIC): 290 return xfs_da3_node_header_check(bp, owner); 291 case cpu_to_be16(XFS_DIR3_LEAF1_MAGIC): 292 case cpu_to_be16(XFS_DIR3_LEAFN_MAGIC): 293 return xfs_dir3_leaf_header_check(bp, owner); 294 } 295 296 ASSERT(0); 297 return NULL; 298 } 299 300 static void 301 xfs_da3_node_write_verify( 302 struct xfs_buf *bp) 303 { 304 struct xfs_mount *mp = bp->b_mount; 305 struct xfs_buf_log_item *bip = bp->b_log_item; 306 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 307 xfs_failaddr_t fa; 308 309 fa = xfs_da3_node_verify(bp); 310 if (fa) { 311 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 312 return; 313 } 314 315 if (!xfs_has_crc(mp)) 316 return; 317 318 if (bip) 319 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 320 321 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); 322 } 323 324 /* 325 * leaf/node format detection on trees is sketchy, so a node read can be done on 326 * leaf level blocks when detection identifies the tree as a node format tree 327 * incorrectly. In this case, we need to swap the verifier to match the correct 328 * format of the block being read. 329 */ 330 static void 331 xfs_da3_node_read_verify( 332 struct xfs_buf *bp) 333 { 334 struct xfs_da_blkinfo *info = bp->b_addr; 335 xfs_failaddr_t fa; 336 337 switch (be16_to_cpu(info->magic)) { 338 case XFS_DA3_NODE_MAGIC: 339 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 340 xfs_verifier_error(bp, -EFSBADCRC, 341 __this_address); 342 break; 343 } 344 fallthrough; 345 case XFS_DA_NODE_MAGIC: 346 fa = xfs_da3_node_verify(bp); 347 if (fa) 348 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 349 return; 350 case XFS_ATTR_LEAF_MAGIC: 351 case XFS_ATTR3_LEAF_MAGIC: 352 bp->b_ops = &xfs_attr3_leaf_buf_ops; 353 bp->b_ops->verify_read(bp); 354 return; 355 case XFS_DIR2_LEAFN_MAGIC: 356 case XFS_DIR3_LEAFN_MAGIC: 357 bp->b_ops = &xfs_dir3_leafn_buf_ops; 358 bp->b_ops->verify_read(bp); 359 return; 360 default: 361 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 362 break; 363 } 364 } 365 366 /* Verify the structure of a da3 block. */ 367 static xfs_failaddr_t 368 xfs_da3_node_verify_struct( 369 struct xfs_buf *bp) 370 { 371 struct xfs_da_blkinfo *info = bp->b_addr; 372 373 switch (be16_to_cpu(info->magic)) { 374 case XFS_DA3_NODE_MAGIC: 375 case XFS_DA_NODE_MAGIC: 376 return xfs_da3_node_verify(bp); 377 case XFS_ATTR_LEAF_MAGIC: 378 case XFS_ATTR3_LEAF_MAGIC: 379 bp->b_ops = &xfs_attr3_leaf_buf_ops; 380 return bp->b_ops->verify_struct(bp); 381 case XFS_DIR2_LEAFN_MAGIC: 382 case XFS_DIR3_LEAFN_MAGIC: 383 bp->b_ops = &xfs_dir3_leafn_buf_ops; 384 return bp->b_ops->verify_struct(bp); 385 default: 386 return __this_address; 387 } 388 } 389 390 const struct xfs_buf_ops xfs_da3_node_buf_ops = { 391 .name = "xfs_da3_node", 392 .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC), 393 cpu_to_be16(XFS_DA3_NODE_MAGIC) }, 394 .verify_read = xfs_da3_node_read_verify, 395 .verify_write = xfs_da3_node_write_verify, 396 .verify_struct = xfs_da3_node_verify_struct, 397 }; 398 399 static int 400 xfs_da3_node_set_type( 401 struct xfs_trans *tp, 402 struct xfs_inode *dp, 403 int whichfork, 404 struct xfs_buf *bp) 405 { 406 struct xfs_da_blkinfo *info = bp->b_addr; 407 408 switch (be16_to_cpu(info->magic)) { 409 case XFS_DA_NODE_MAGIC: 410 case XFS_DA3_NODE_MAGIC: 411 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 412 return 0; 413 case XFS_ATTR_LEAF_MAGIC: 414 case XFS_ATTR3_LEAF_MAGIC: 415 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF); 416 return 0; 417 case XFS_DIR2_LEAFN_MAGIC: 418 case XFS_DIR3_LEAFN_MAGIC: 419 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); 420 return 0; 421 default: 422 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp, 423 info, sizeof(*info)); 424 xfs_trans_brelse(tp, bp); 425 xfs_dirattr_mark_sick(dp, whichfork); 426 return -EFSCORRUPTED; 427 } 428 } 429 430 int 431 xfs_da3_node_read( 432 struct xfs_trans *tp, 433 struct xfs_inode *dp, 434 xfs_dablk_t bno, 435 struct xfs_buf **bpp, 436 int whichfork) 437 { 438 int error; 439 440 error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork, 441 &xfs_da3_node_buf_ops); 442 if (error || !*bpp || !tp) 443 return error; 444 return xfs_da3_node_set_type(tp, dp, whichfork, *bpp); 445 } 446 447 int 448 xfs_da3_node_read_mapped( 449 struct xfs_trans *tp, 450 struct xfs_inode *dp, 451 xfs_daddr_t mappedbno, 452 struct xfs_buf **bpp, 453 int whichfork) 454 { 455 struct xfs_mount *mp = dp->i_mount; 456 int error; 457 458 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno, 459 XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0, 460 bpp, &xfs_da3_node_buf_ops); 461 if (xfs_metadata_is_sick(error)) 462 xfs_dirattr_mark_sick(dp, whichfork); 463 if (error || !*bpp) 464 return error; 465 466 if (whichfork == XFS_ATTR_FORK) 467 xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF); 468 else 469 xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF); 470 471 if (!tp) 472 return 0; 473 return xfs_da3_node_set_type(tp, dp, whichfork, *bpp); 474 } 475 476 /* 477 * Copy src directory/attr leaf/node buffer to the dst. 478 * For v5 file systems make sure the right blkno is stamped in. 479 */ 480 void 481 xfs_da_buf_copy( 482 struct xfs_buf *dst, 483 struct xfs_buf *src, 484 size_t size) 485 { 486 struct xfs_da3_blkinfo *da3 = dst->b_addr; 487 488 memcpy(dst->b_addr, src->b_addr, size); 489 dst->b_ops = src->b_ops; 490 xfs_trans_buf_copy_type(dst, src); 491 if (xfs_has_crc(dst->b_mount)) 492 da3->blkno = cpu_to_be64(xfs_buf_daddr(dst)); 493 } 494 495 /*======================================================================== 496 * Routines used for growing the Btree. 497 *========================================================================*/ 498 499 /* 500 * Create the initial contents of an intermediate node. 501 */ 502 int 503 xfs_da3_node_create( 504 struct xfs_da_args *args, 505 xfs_dablk_t blkno, 506 int level, 507 struct xfs_buf **bpp, 508 int whichfork) 509 { 510 struct xfs_da_intnode *node; 511 struct xfs_trans *tp = args->trans; 512 struct xfs_mount *mp = tp->t_mountp; 513 struct xfs_da3_icnode_hdr ichdr = {0}; 514 struct xfs_buf *bp; 515 int error; 516 struct xfs_inode *dp = args->dp; 517 518 trace_xfs_da_node_create(args); 519 ASSERT(level <= XFS_DA_NODE_MAXDEPTH); 520 521 error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork); 522 if (error) 523 return error; 524 bp->b_ops = &xfs_da3_node_buf_ops; 525 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 526 node = bp->b_addr; 527 528 if (xfs_has_crc(mp)) { 529 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 530 531 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); 532 ichdr.magic = XFS_DA3_NODE_MAGIC; 533 hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp)); 534 hdr3->info.owner = cpu_to_be64(args->owner); 535 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid); 536 } else { 537 ichdr.magic = XFS_DA_NODE_MAGIC; 538 } 539 ichdr.level = level; 540 541 xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr); 542 xfs_trans_log_buf(tp, bp, 543 XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size)); 544 545 *bpp = bp; 546 return 0; 547 } 548 549 /* 550 * Split a leaf node, rebalance, then possibly split 551 * intermediate nodes, rebalance, etc. 552 */ 553 int /* error */ 554 xfs_da3_split( 555 struct xfs_da_state *state) 556 { 557 struct xfs_da_state_blk *oldblk; 558 struct xfs_da_state_blk *newblk; 559 struct xfs_da_state_blk *addblk; 560 struct xfs_da_intnode *node; 561 int max; 562 int action = 0; 563 int error; 564 int i; 565 566 trace_xfs_da_split(state->args); 567 568 if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT)) 569 return -EIO; 570 571 /* 572 * Walk back up the tree splitting/inserting/adjusting as necessary. 573 * If we need to insert and there isn't room, split the node, then 574 * decide which fragment to insert the new block from below into. 575 * Note that we may split the root this way, but we need more fixup. 576 */ 577 max = state->path.active - 1; 578 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); 579 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || 580 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 581 582 addblk = &state->path.blk[max]; /* initial dummy value */ 583 for (i = max; (i >= 0) && addblk; state->path.active--, i--) { 584 oldblk = &state->path.blk[i]; 585 newblk = &state->altpath.blk[i]; 586 587 /* 588 * If a leaf node then 589 * Allocate a new leaf node, then rebalance across them. 590 * else if an intermediate node then 591 * We split on the last layer, must we split the node? 592 */ 593 switch (oldblk->magic) { 594 case XFS_ATTR_LEAF_MAGIC: 595 error = xfs_attr3_leaf_split(state, oldblk, newblk); 596 if ((error != 0) && (error != -ENOSPC)) { 597 return error; /* GROT: attr is inconsistent */ 598 } 599 if (!error) { 600 addblk = newblk; 601 break; 602 } 603 /* 604 * Entry wouldn't fit, split the leaf again. The new 605 * extrablk will be consumed by xfs_da3_node_split if 606 * the node is split. 607 */ 608 state->extravalid = 1; 609 if (state->inleaf) { 610 state->extraafter = 0; /* before newblk */ 611 trace_xfs_attr_leaf_split_before(state->args); 612 error = xfs_attr3_leaf_split(state, oldblk, 613 &state->extrablk); 614 } else { 615 state->extraafter = 1; /* after newblk */ 616 trace_xfs_attr_leaf_split_after(state->args); 617 error = xfs_attr3_leaf_split(state, newblk, 618 &state->extrablk); 619 } 620 if (error) 621 return error; /* GROT: attr inconsistent */ 622 addblk = newblk; 623 break; 624 case XFS_DIR2_LEAFN_MAGIC: 625 error = xfs_dir2_leafn_split(state, oldblk, newblk); 626 if (error) 627 return error; 628 addblk = newblk; 629 break; 630 case XFS_DA_NODE_MAGIC: 631 error = xfs_da3_node_split(state, oldblk, newblk, addblk, 632 max - i, &action); 633 addblk->bp = NULL; 634 if (error) 635 return error; /* GROT: dir is inconsistent */ 636 /* 637 * Record the newly split block for the next time thru? 638 */ 639 if (action) 640 addblk = newblk; 641 else 642 addblk = NULL; 643 break; 644 } 645 646 /* 647 * Update the btree to show the new hashval for this child. 648 */ 649 xfs_da3_fixhashpath(state, &state->path); 650 } 651 if (!addblk) 652 return 0; 653 654 /* 655 * xfs_da3_node_split() should have consumed any extra blocks we added 656 * during a double leaf split in the attr fork. This is guaranteed as 657 * we can't be here if the attr fork only has a single leaf block. 658 */ 659 ASSERT(state->extravalid == 0 || 660 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 661 662 /* 663 * Split the root node. 664 */ 665 ASSERT(state->path.active == 0); 666 oldblk = &state->path.blk[0]; 667 error = xfs_da3_root_split(state, oldblk, addblk); 668 if (error) 669 goto out; 670 671 /* 672 * Update pointers to the node which used to be block 0 and just got 673 * bumped because of the addition of a new root node. Note that the 674 * original block 0 could be at any position in the list of blocks in 675 * the tree. 676 * 677 * Note: the magic numbers and sibling pointers are in the same physical 678 * place for both v2 and v3 headers (by design). Hence it doesn't matter 679 * which version of the xfs_da_intnode structure we use here as the 680 * result will be the same using either structure. 681 */ 682 node = oldblk->bp->b_addr; 683 if (node->hdr.info.forw) { 684 if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { 685 xfs_buf_mark_corrupt(oldblk->bp); 686 xfs_da_mark_sick(state->args); 687 error = -EFSCORRUPTED; 688 goto out; 689 } 690 node = addblk->bp->b_addr; 691 node->hdr.info.back = cpu_to_be32(oldblk->blkno); 692 xfs_trans_log_buf(state->args->trans, addblk->bp, 693 XFS_DA_LOGRANGE(node, &node->hdr.info, 694 sizeof(node->hdr.info))); 695 } 696 node = oldblk->bp->b_addr; 697 if (node->hdr.info.back) { 698 if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { 699 xfs_buf_mark_corrupt(oldblk->bp); 700 xfs_da_mark_sick(state->args); 701 error = -EFSCORRUPTED; 702 goto out; 703 } 704 node = addblk->bp->b_addr; 705 node->hdr.info.forw = cpu_to_be32(oldblk->blkno); 706 xfs_trans_log_buf(state->args->trans, addblk->bp, 707 XFS_DA_LOGRANGE(node, &node->hdr.info, 708 sizeof(node->hdr.info))); 709 } 710 out: 711 addblk->bp = NULL; 712 return error; 713 } 714 715 /* 716 * Split the root. We have to create a new root and point to the two 717 * parts (the split old root) that we just created. Copy block zero to 718 * the EOF, extending the inode in process. 719 */ 720 STATIC int /* error */ 721 xfs_da3_root_split( 722 struct xfs_da_state *state, 723 struct xfs_da_state_blk *blk1, 724 struct xfs_da_state_blk *blk2) 725 { 726 struct xfs_da_intnode *node; 727 struct xfs_da_intnode *oldroot; 728 struct xfs_da_node_entry *btree; 729 struct xfs_da3_icnode_hdr nodehdr; 730 struct xfs_da_args *args; 731 struct xfs_buf *bp; 732 struct xfs_inode *dp; 733 struct xfs_trans *tp; 734 struct xfs_dir2_leaf *leaf; 735 xfs_dablk_t blkno; 736 int level; 737 int error; 738 int size; 739 740 trace_xfs_da_root_split(state->args); 741 742 /* 743 * Copy the existing (incorrect) block from the root node position 744 * to a free space somewhere. 745 */ 746 args = state->args; 747 error = xfs_da_grow_inode(args, &blkno); 748 if (error) 749 return error; 750 751 dp = args->dp; 752 tp = args->trans; 753 error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork); 754 if (error) 755 return error; 756 node = bp->b_addr; 757 oldroot = blk1->bp->b_addr; 758 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 759 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 760 struct xfs_da3_icnode_hdr icnodehdr; 761 762 xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot); 763 btree = icnodehdr.btree; 764 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); 765 level = icnodehdr.level; 766 } else { 767 struct xfs_dir3_icleaf_hdr leafhdr; 768 769 leaf = (xfs_dir2_leaf_t *)oldroot; 770 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf); 771 772 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || 773 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); 774 size = (int)((char *)&leafhdr.ents[leafhdr.count] - 775 (char *)leaf); 776 level = 0; 777 } 778 779 /* 780 * Copy old root to new buffer and log it. 781 */ 782 xfs_da_buf_copy(bp, blk1->bp, size); 783 xfs_trans_log_buf(tp, bp, 0, size - 1); 784 785 /* 786 * Update blk1 to point to new buffer. 787 */ 788 blk1->bp = bp; 789 blk1->blkno = blkno; 790 791 /* 792 * Set up the new root node. 793 */ 794 error = xfs_da3_node_create(args, 795 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, 796 level + 1, &bp, args->whichfork); 797 if (error) 798 return error; 799 800 node = bp->b_addr; 801 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 802 btree = nodehdr.btree; 803 btree[0].hashval = cpu_to_be32(blk1->hashval); 804 btree[0].before = cpu_to_be32(blk1->blkno); 805 btree[1].hashval = cpu_to_be32(blk2->hashval); 806 btree[1].before = cpu_to_be32(blk2->blkno); 807 nodehdr.count = 2; 808 xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); 809 810 #ifdef DEBUG 811 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 812 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 813 ASSERT(blk1->blkno >= args->geo->leafblk && 814 blk1->blkno < args->geo->freeblk); 815 ASSERT(blk2->blkno >= args->geo->leafblk && 816 blk2->blkno < args->geo->freeblk); 817 } 818 #endif 819 820 /* Header is already logged by xfs_da_node_create */ 821 xfs_trans_log_buf(tp, bp, 822 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); 823 824 return 0; 825 } 826 827 /* 828 * Split the node, rebalance, then add the new entry. 829 */ 830 STATIC int /* error */ 831 xfs_da3_node_split( 832 struct xfs_da_state *state, 833 struct xfs_da_state_blk *oldblk, 834 struct xfs_da_state_blk *newblk, 835 struct xfs_da_state_blk *addblk, 836 int treelevel, 837 int *result) 838 { 839 struct xfs_da_intnode *node; 840 struct xfs_da3_icnode_hdr nodehdr; 841 xfs_dablk_t blkno; 842 int newcount; 843 int error; 844 int useextra; 845 struct xfs_inode *dp = state->args->dp; 846 847 trace_xfs_da_node_split(state->args); 848 849 node = oldblk->bp->b_addr; 850 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 851 852 /* 853 * With V2 dirs the extra block is data or freespace. 854 */ 855 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; 856 newcount = 1 + useextra; 857 /* 858 * Do we have to split the node? 859 */ 860 if (nodehdr.count + newcount > state->args->geo->node_ents) { 861 /* 862 * Allocate a new node, add to the doubly linked chain of 863 * nodes, then move some of our excess entries into it. 864 */ 865 error = xfs_da_grow_inode(state->args, &blkno); 866 if (error) 867 return error; /* GROT: dir is inconsistent */ 868 869 error = xfs_da3_node_create(state->args, blkno, treelevel, 870 &newblk->bp, state->args->whichfork); 871 if (error) 872 return error; /* GROT: dir is inconsistent */ 873 newblk->blkno = blkno; 874 newblk->magic = XFS_DA_NODE_MAGIC; 875 xfs_da3_node_rebalance(state, oldblk, newblk); 876 error = xfs_da3_blk_link(state, oldblk, newblk); 877 if (error) 878 return error; 879 *result = 1; 880 } else { 881 *result = 0; 882 } 883 884 /* 885 * Insert the new entry(s) into the correct block 886 * (updating last hashval in the process). 887 * 888 * xfs_da3_node_add() inserts BEFORE the given index, 889 * and as a result of using node_lookup_int() we always 890 * point to a valid entry (not after one), but a split 891 * operation always results in a new block whose hashvals 892 * FOLLOW the current block. 893 * 894 * If we had double-split op below us, then add the extra block too. 895 */ 896 node = oldblk->bp->b_addr; 897 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 898 if (oldblk->index <= nodehdr.count) { 899 oldblk->index++; 900 xfs_da3_node_add(state, oldblk, addblk); 901 if (useextra) { 902 if (state->extraafter) 903 oldblk->index++; 904 xfs_da3_node_add(state, oldblk, &state->extrablk); 905 state->extravalid = 0; 906 } 907 } else { 908 newblk->index++; 909 xfs_da3_node_add(state, newblk, addblk); 910 if (useextra) { 911 if (state->extraafter) 912 newblk->index++; 913 xfs_da3_node_add(state, newblk, &state->extrablk); 914 state->extravalid = 0; 915 } 916 } 917 918 return 0; 919 } 920 921 /* 922 * Balance the btree elements between two intermediate nodes, 923 * usually one full and one empty. 924 * 925 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 926 */ 927 STATIC void 928 xfs_da3_node_rebalance( 929 struct xfs_da_state *state, 930 struct xfs_da_state_blk *blk1, 931 struct xfs_da_state_blk *blk2) 932 { 933 struct xfs_da_intnode *node1; 934 struct xfs_da_intnode *node2; 935 struct xfs_da_node_entry *btree1; 936 struct xfs_da_node_entry *btree2; 937 struct xfs_da_node_entry *btree_s; 938 struct xfs_da_node_entry *btree_d; 939 struct xfs_da3_icnode_hdr nodehdr1; 940 struct xfs_da3_icnode_hdr nodehdr2; 941 struct xfs_trans *tp; 942 int count; 943 int tmp; 944 int swap = 0; 945 struct xfs_inode *dp = state->args->dp; 946 947 trace_xfs_da_node_rebalance(state->args); 948 949 node1 = blk1->bp->b_addr; 950 node2 = blk2->bp->b_addr; 951 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1); 952 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2); 953 btree1 = nodehdr1.btree; 954 btree2 = nodehdr2.btree; 955 956 /* 957 * Figure out how many entries need to move, and in which direction. 958 * Swap the nodes around if that makes it simpler. 959 */ 960 if (nodehdr1.count > 0 && nodehdr2.count > 0 && 961 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 962 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < 963 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { 964 swap(node1, node2); 965 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1); 966 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2); 967 btree1 = nodehdr1.btree; 968 btree2 = nodehdr2.btree; 969 swap = 1; 970 } 971 972 count = (nodehdr1.count - nodehdr2.count) / 2; 973 if (count == 0) 974 return; 975 tp = state->args->trans; 976 /* 977 * Two cases: high-to-low and low-to-high. 978 */ 979 if (count > 0) { 980 /* 981 * Move elements in node2 up to make a hole. 982 */ 983 tmp = nodehdr2.count; 984 if (tmp > 0) { 985 tmp *= (uint)sizeof(xfs_da_node_entry_t); 986 btree_s = &btree2[0]; 987 btree_d = &btree2[count]; 988 memmove(btree_d, btree_s, tmp); 989 } 990 991 /* 992 * Move the req'd B-tree elements from high in node1 to 993 * low in node2. 994 */ 995 nodehdr2.count += count; 996 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 997 btree_s = &btree1[nodehdr1.count - count]; 998 btree_d = &btree2[0]; 999 memcpy(btree_d, btree_s, tmp); 1000 nodehdr1.count -= count; 1001 } else { 1002 /* 1003 * Move the req'd B-tree elements from low in node2 to 1004 * high in node1. 1005 */ 1006 count = -count; 1007 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 1008 btree_s = &btree2[0]; 1009 btree_d = &btree1[nodehdr1.count]; 1010 memcpy(btree_d, btree_s, tmp); 1011 nodehdr1.count += count; 1012 1013 xfs_trans_log_buf(tp, blk1->bp, 1014 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 1015 1016 /* 1017 * Move elements in node2 down to fill the hole. 1018 */ 1019 tmp = nodehdr2.count - count; 1020 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1021 btree_s = &btree2[count]; 1022 btree_d = &btree2[0]; 1023 memmove(btree_d, btree_s, tmp); 1024 nodehdr2.count -= count; 1025 } 1026 1027 /* 1028 * Log header of node 1 and all current bits of node 2. 1029 */ 1030 xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1); 1031 xfs_trans_log_buf(tp, blk1->bp, 1032 XFS_DA_LOGRANGE(node1, &node1->hdr, 1033 state->args->geo->node_hdr_size)); 1034 1035 xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2); 1036 xfs_trans_log_buf(tp, blk2->bp, 1037 XFS_DA_LOGRANGE(node2, &node2->hdr, 1038 state->args->geo->node_hdr_size + 1039 (sizeof(btree2[0]) * nodehdr2.count))); 1040 1041 /* 1042 * Record the last hashval from each block for upward propagation. 1043 * (note: don't use the swapped node pointers) 1044 */ 1045 if (swap) { 1046 node1 = blk1->bp->b_addr; 1047 node2 = blk2->bp->b_addr; 1048 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1); 1049 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2); 1050 btree1 = nodehdr1.btree; 1051 btree2 = nodehdr2.btree; 1052 } 1053 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); 1054 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); 1055 1056 /* 1057 * Adjust the expected index for insertion. 1058 */ 1059 if (blk1->index >= nodehdr1.count) { 1060 blk2->index = blk1->index - nodehdr1.count; 1061 blk1->index = nodehdr1.count + 1; /* make it invalid */ 1062 } 1063 } 1064 1065 /* 1066 * Add a new entry to an intermediate node. 1067 */ 1068 STATIC void 1069 xfs_da3_node_add( 1070 struct xfs_da_state *state, 1071 struct xfs_da_state_blk *oldblk, 1072 struct xfs_da_state_blk *newblk) 1073 { 1074 struct xfs_da_intnode *node; 1075 struct xfs_da3_icnode_hdr nodehdr; 1076 struct xfs_da_node_entry *btree; 1077 int tmp; 1078 struct xfs_inode *dp = state->args->dp; 1079 1080 trace_xfs_da_node_add(state->args); 1081 1082 node = oldblk->bp->b_addr; 1083 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 1084 btree = nodehdr.btree; 1085 1086 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); 1087 ASSERT(newblk->blkno != 0); 1088 if (state->args->whichfork == XFS_DATA_FORK) 1089 ASSERT(newblk->blkno >= state->args->geo->leafblk && 1090 newblk->blkno < state->args->geo->freeblk); 1091 1092 /* 1093 * We may need to make some room before we insert the new node. 1094 */ 1095 tmp = 0; 1096 if (oldblk->index < nodehdr.count) { 1097 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); 1098 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); 1099 } 1100 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); 1101 btree[oldblk->index].before = cpu_to_be32(newblk->blkno); 1102 xfs_trans_log_buf(state->args->trans, oldblk->bp, 1103 XFS_DA_LOGRANGE(node, &btree[oldblk->index], 1104 tmp + sizeof(*btree))); 1105 1106 nodehdr.count += 1; 1107 xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); 1108 xfs_trans_log_buf(state->args->trans, oldblk->bp, 1109 XFS_DA_LOGRANGE(node, &node->hdr, 1110 state->args->geo->node_hdr_size)); 1111 1112 /* 1113 * Copy the last hash value from the oldblk to propagate upwards. 1114 */ 1115 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1116 } 1117 1118 /*======================================================================== 1119 * Routines used for shrinking the Btree. 1120 *========================================================================*/ 1121 1122 /* 1123 * Deallocate an empty leaf node, remove it from its parent, 1124 * possibly deallocating that block, etc... 1125 */ 1126 int 1127 xfs_da3_join( 1128 struct xfs_da_state *state) 1129 { 1130 struct xfs_da_state_blk *drop_blk; 1131 struct xfs_da_state_blk *save_blk; 1132 int action = 0; 1133 int error; 1134 1135 trace_xfs_da_join(state->args); 1136 1137 drop_blk = &state->path.blk[ state->path.active-1 ]; 1138 save_blk = &state->altpath.blk[ state->path.active-1 ]; 1139 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 1140 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || 1141 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 1142 1143 /* 1144 * Walk back up the tree joining/deallocating as necessary. 1145 * When we stop dropping blocks, break out. 1146 */ 1147 for ( ; state->path.active >= 2; drop_blk--, save_blk--, 1148 state->path.active--) { 1149 /* 1150 * See if we can combine the block with a neighbor. 1151 * (action == 0) => no options, just leave 1152 * (action == 1) => coalesce, then unlink 1153 * (action == 2) => block empty, unlink it 1154 */ 1155 switch (drop_blk->magic) { 1156 case XFS_ATTR_LEAF_MAGIC: 1157 error = xfs_attr3_leaf_toosmall(state, &action); 1158 if (error) 1159 return error; 1160 if (action == 0) 1161 return 0; 1162 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); 1163 break; 1164 case XFS_DIR2_LEAFN_MAGIC: 1165 error = xfs_dir2_leafn_toosmall(state, &action); 1166 if (error) 1167 return error; 1168 if (action == 0) 1169 return 0; 1170 xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); 1171 break; 1172 case XFS_DA_NODE_MAGIC: 1173 /* 1174 * Remove the offending node, fixup hashvals, 1175 * check for a toosmall neighbor. 1176 */ 1177 xfs_da3_node_remove(state, drop_blk); 1178 xfs_da3_fixhashpath(state, &state->path); 1179 error = xfs_da3_node_toosmall(state, &action); 1180 if (error) 1181 return error; 1182 if (action == 0) 1183 return 0; 1184 xfs_da3_node_unbalance(state, drop_blk, save_blk); 1185 break; 1186 } 1187 xfs_da3_fixhashpath(state, &state->altpath); 1188 error = xfs_da3_blk_unlink(state, drop_blk, save_blk); 1189 xfs_da_state_kill_altpath(state); 1190 if (error) 1191 return error; 1192 error = xfs_da_shrink_inode(state->args, drop_blk->blkno, 1193 drop_blk->bp); 1194 drop_blk->bp = NULL; 1195 if (error) 1196 return error; 1197 } 1198 /* 1199 * We joined all the way to the top. If it turns out that 1200 * we only have one entry in the root, make the child block 1201 * the new root. 1202 */ 1203 xfs_da3_node_remove(state, drop_blk); 1204 xfs_da3_fixhashpath(state, &state->path); 1205 error = xfs_da3_root_join(state, &state->path.blk[0]); 1206 return error; 1207 } 1208 1209 #ifdef DEBUG 1210 static void 1211 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) 1212 { 1213 __be16 magic = blkinfo->magic; 1214 1215 if (level == 1) { 1216 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1217 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1218 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1219 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1220 } else { 1221 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1222 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); 1223 } 1224 ASSERT(!blkinfo->forw); 1225 ASSERT(!blkinfo->back); 1226 } 1227 #else /* !DEBUG */ 1228 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) 1229 #endif /* !DEBUG */ 1230 1231 /* 1232 * We have only one entry in the root. Copy the only remaining child of 1233 * the old root to block 0 as the new root node. 1234 */ 1235 STATIC int 1236 xfs_da3_root_join( 1237 struct xfs_da_state *state, 1238 struct xfs_da_state_blk *root_blk) 1239 { 1240 struct xfs_da_intnode *oldroot; 1241 struct xfs_da_args *args; 1242 xfs_dablk_t child; 1243 struct xfs_buf *bp; 1244 struct xfs_da3_icnode_hdr oldroothdr; 1245 int error; 1246 struct xfs_inode *dp = state->args->dp; 1247 xfs_failaddr_t fa; 1248 1249 trace_xfs_da_root_join(state->args); 1250 1251 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 1252 1253 args = state->args; 1254 oldroot = root_blk->bp->b_addr; 1255 xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot); 1256 ASSERT(oldroothdr.forw == 0); 1257 ASSERT(oldroothdr.back == 0); 1258 1259 /* 1260 * If the root has more than one child, then don't do anything. 1261 */ 1262 if (oldroothdr.count > 1) 1263 return 0; 1264 1265 /* 1266 * Read in the (only) child block, then copy those bytes into 1267 * the root block's buffer and free the original child block. 1268 */ 1269 child = be32_to_cpu(oldroothdr.btree[0].before); 1270 ASSERT(child != 0); 1271 error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork); 1272 if (error) 1273 return error; 1274 fa = xfs_da3_header_check(bp, args->owner); 1275 if (fa) { 1276 __xfs_buf_mark_corrupt(bp, fa); 1277 xfs_trans_brelse(args->trans, bp); 1278 xfs_da_mark_sick(args); 1279 return -EFSCORRUPTED; 1280 } 1281 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); 1282 1283 /* 1284 * Copy child to root buffer and log it. 1285 */ 1286 xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize); 1287 xfs_trans_log_buf(args->trans, root_blk->bp, 0, 1288 args->geo->blksize - 1); 1289 /* 1290 * Now we can drop the child buffer. 1291 */ 1292 error = xfs_da_shrink_inode(args, child, bp); 1293 return error; 1294 } 1295 1296 /* 1297 * Check a node block and its neighbors to see if the block should be 1298 * collapsed into one or the other neighbor. Always keep the block 1299 * with the smaller block number. 1300 * If the current block is over 50% full, don't try to join it, return 0. 1301 * If the block is empty, fill in the state structure and return 2. 1302 * If it can be collapsed, fill in the state structure and return 1. 1303 * If nothing can be done, return 0. 1304 */ 1305 STATIC int 1306 xfs_da3_node_toosmall( 1307 struct xfs_da_state *state, 1308 int *action) 1309 { 1310 struct xfs_da_intnode *node; 1311 struct xfs_da_state_blk *blk; 1312 struct xfs_da_blkinfo *info; 1313 xfs_dablk_t blkno; 1314 struct xfs_buf *bp; 1315 xfs_failaddr_t fa; 1316 struct xfs_da3_icnode_hdr nodehdr; 1317 int count; 1318 int forward; 1319 int error; 1320 int retval; 1321 int i; 1322 struct xfs_inode *dp = state->args->dp; 1323 1324 trace_xfs_da_node_toosmall(state->args); 1325 1326 /* 1327 * Check for the degenerate case of the block being over 50% full. 1328 * If so, it's not worth even looking to see if we might be able 1329 * to coalesce with a sibling. 1330 */ 1331 blk = &state->path.blk[ state->path.active-1 ]; 1332 info = blk->bp->b_addr; 1333 node = (xfs_da_intnode_t *)info; 1334 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 1335 if (nodehdr.count > (state->args->geo->node_ents >> 1)) { 1336 *action = 0; /* blk over 50%, don't try to join */ 1337 return 0; /* blk over 50%, don't try to join */ 1338 } 1339 1340 /* 1341 * Check for the degenerate case of the block being empty. 1342 * If the block is empty, we'll simply delete it, no need to 1343 * coalesce it with a sibling block. We choose (arbitrarily) 1344 * to merge with the forward block unless it is NULL. 1345 */ 1346 if (nodehdr.count == 0) { 1347 /* 1348 * Make altpath point to the block we want to keep and 1349 * path point to the block we want to drop (this one). 1350 */ 1351 forward = (info->forw != 0); 1352 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1353 error = xfs_da3_path_shift(state, &state->altpath, forward, 1354 0, &retval); 1355 if (error) 1356 return error; 1357 if (retval) { 1358 *action = 0; 1359 } else { 1360 *action = 2; 1361 } 1362 return 0; 1363 } 1364 1365 /* 1366 * Examine each sibling block to see if we can coalesce with 1367 * at least 25% free space to spare. We need to figure out 1368 * whether to merge with the forward or the backward block. 1369 * We prefer coalescing with the lower numbered sibling so as 1370 * to shrink a directory over time. 1371 */ 1372 count = state->args->geo->node_ents; 1373 count -= state->args->geo->node_ents >> 2; 1374 count -= nodehdr.count; 1375 1376 /* start with smaller blk num */ 1377 forward = nodehdr.forw < nodehdr.back; 1378 for (i = 0; i < 2; forward = !forward, i++) { 1379 struct xfs_da3_icnode_hdr thdr; 1380 if (forward) 1381 blkno = nodehdr.forw; 1382 else 1383 blkno = nodehdr.back; 1384 if (blkno == 0) 1385 continue; 1386 error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp, 1387 state->args->whichfork); 1388 if (error) 1389 return error; 1390 fa = xfs_da3_node_header_check(bp, state->args->owner); 1391 if (fa) { 1392 __xfs_buf_mark_corrupt(bp, fa); 1393 xfs_trans_brelse(state->args->trans, bp); 1394 xfs_da_mark_sick(state->args); 1395 return -EFSCORRUPTED; 1396 } 1397 1398 node = bp->b_addr; 1399 xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node); 1400 xfs_trans_brelse(state->args->trans, bp); 1401 1402 if (count - thdr.count >= 0) 1403 break; /* fits with at least 25% to spare */ 1404 } 1405 if (i >= 2) { 1406 *action = 0; 1407 return 0; 1408 } 1409 1410 /* 1411 * Make altpath point to the block we want to keep (the lower 1412 * numbered block) and path point to the block we want to drop. 1413 */ 1414 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1415 if (blkno < blk->blkno) { 1416 error = xfs_da3_path_shift(state, &state->altpath, forward, 1417 0, &retval); 1418 } else { 1419 error = xfs_da3_path_shift(state, &state->path, forward, 1420 0, &retval); 1421 } 1422 if (error) 1423 return error; 1424 if (retval) { 1425 *action = 0; 1426 return 0; 1427 } 1428 *action = 1; 1429 return 0; 1430 } 1431 1432 /* 1433 * Pick up the last hashvalue from an intermediate node. 1434 */ 1435 STATIC uint 1436 xfs_da3_node_lasthash( 1437 struct xfs_inode *dp, 1438 struct xfs_buf *bp, 1439 int *count) 1440 { 1441 struct xfs_da3_icnode_hdr nodehdr; 1442 1443 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr); 1444 if (count) 1445 *count = nodehdr.count; 1446 if (!nodehdr.count) 1447 return 0; 1448 return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval); 1449 } 1450 1451 /* 1452 * Walk back up the tree adjusting hash values as necessary, 1453 * when we stop making changes, return. 1454 */ 1455 void 1456 xfs_da3_fixhashpath( 1457 struct xfs_da_state *state, 1458 struct xfs_da_state_path *path) 1459 { 1460 struct xfs_da_state_blk *blk; 1461 struct xfs_da_intnode *node; 1462 struct xfs_da_node_entry *btree; 1463 xfs_dahash_t lasthash=0; 1464 int level; 1465 int count; 1466 struct xfs_inode *dp = state->args->dp; 1467 1468 trace_xfs_da_fixhashpath(state->args); 1469 1470 level = path->active-1; 1471 blk = &path->blk[ level ]; 1472 switch (blk->magic) { 1473 case XFS_ATTR_LEAF_MAGIC: 1474 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); 1475 if (count == 0) 1476 return; 1477 break; 1478 case XFS_DIR2_LEAFN_MAGIC: 1479 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count); 1480 if (count == 0) 1481 return; 1482 break; 1483 case XFS_DA_NODE_MAGIC: 1484 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); 1485 if (count == 0) 1486 return; 1487 break; 1488 } 1489 for (blk--, level--; level >= 0; blk--, level--) { 1490 struct xfs_da3_icnode_hdr nodehdr; 1491 1492 node = blk->bp->b_addr; 1493 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 1494 btree = nodehdr.btree; 1495 if (be32_to_cpu(btree[blk->index].hashval) == lasthash) 1496 break; 1497 blk->hashval = lasthash; 1498 btree[blk->index].hashval = cpu_to_be32(lasthash); 1499 xfs_trans_log_buf(state->args->trans, blk->bp, 1500 XFS_DA_LOGRANGE(node, &btree[blk->index], 1501 sizeof(*btree))); 1502 1503 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1504 } 1505 } 1506 1507 /* 1508 * Remove an entry from an intermediate node. 1509 */ 1510 STATIC void 1511 xfs_da3_node_remove( 1512 struct xfs_da_state *state, 1513 struct xfs_da_state_blk *drop_blk) 1514 { 1515 struct xfs_da_intnode *node; 1516 struct xfs_da3_icnode_hdr nodehdr; 1517 struct xfs_da_node_entry *btree; 1518 int index; 1519 int tmp; 1520 struct xfs_inode *dp = state->args->dp; 1521 1522 trace_xfs_da_node_remove(state->args); 1523 1524 node = drop_blk->bp->b_addr; 1525 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 1526 ASSERT(drop_blk->index < nodehdr.count); 1527 ASSERT(drop_blk->index >= 0); 1528 1529 /* 1530 * Copy over the offending entry, or just zero it out. 1531 */ 1532 index = drop_blk->index; 1533 btree = nodehdr.btree; 1534 if (index < nodehdr.count - 1) { 1535 tmp = nodehdr.count - index - 1; 1536 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1537 memmove(&btree[index], &btree[index + 1], tmp); 1538 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1539 XFS_DA_LOGRANGE(node, &btree[index], tmp)); 1540 index = nodehdr.count - 1; 1541 } 1542 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); 1543 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1544 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); 1545 nodehdr.count -= 1; 1546 xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr); 1547 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1548 XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size)); 1549 1550 /* 1551 * Copy the last hash value from the block to propagate upwards. 1552 */ 1553 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); 1554 } 1555 1556 /* 1557 * Unbalance the elements between two intermediate nodes, 1558 * move all Btree elements from one node into another. 1559 */ 1560 STATIC void 1561 xfs_da3_node_unbalance( 1562 struct xfs_da_state *state, 1563 struct xfs_da_state_blk *drop_blk, 1564 struct xfs_da_state_blk *save_blk) 1565 { 1566 struct xfs_da_intnode *drop_node; 1567 struct xfs_da_intnode *save_node; 1568 struct xfs_da_node_entry *drop_btree; 1569 struct xfs_da_node_entry *save_btree; 1570 struct xfs_da3_icnode_hdr drop_hdr; 1571 struct xfs_da3_icnode_hdr save_hdr; 1572 struct xfs_trans *tp; 1573 int sindex; 1574 int tmp; 1575 struct xfs_inode *dp = state->args->dp; 1576 1577 trace_xfs_da_node_unbalance(state->args); 1578 1579 drop_node = drop_blk->bp->b_addr; 1580 save_node = save_blk->bp->b_addr; 1581 xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node); 1582 xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node); 1583 drop_btree = drop_hdr.btree; 1584 save_btree = save_hdr.btree; 1585 tp = state->args->trans; 1586 1587 /* 1588 * If the dying block has lower hashvals, then move all the 1589 * elements in the remaining block up to make a hole. 1590 */ 1591 if ((be32_to_cpu(drop_btree[0].hashval) < 1592 be32_to_cpu(save_btree[0].hashval)) || 1593 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < 1594 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { 1595 /* XXX: check this - is memmove dst correct? */ 1596 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); 1597 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); 1598 1599 sindex = 0; 1600 xfs_trans_log_buf(tp, save_blk->bp, 1601 XFS_DA_LOGRANGE(save_node, &save_btree[0], 1602 (save_hdr.count + drop_hdr.count) * 1603 sizeof(xfs_da_node_entry_t))); 1604 } else { 1605 sindex = save_hdr.count; 1606 xfs_trans_log_buf(tp, save_blk->bp, 1607 XFS_DA_LOGRANGE(save_node, &save_btree[sindex], 1608 drop_hdr.count * sizeof(xfs_da_node_entry_t))); 1609 } 1610 1611 /* 1612 * Move all the B-tree elements from drop_blk to save_blk. 1613 */ 1614 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); 1615 memcpy(&save_btree[sindex], &drop_btree[0], tmp); 1616 save_hdr.count += drop_hdr.count; 1617 1618 xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr); 1619 xfs_trans_log_buf(tp, save_blk->bp, 1620 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1621 state->args->geo->node_hdr_size)); 1622 1623 /* 1624 * Save the last hashval in the remaining block for upward propagation. 1625 */ 1626 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); 1627 } 1628 1629 /*======================================================================== 1630 * Routines used for finding things in the Btree. 1631 *========================================================================*/ 1632 1633 /* 1634 * Walk down the Btree looking for a particular filename, filling 1635 * in the state structure as we go. 1636 * 1637 * We will set the state structure to point to each of the elements 1638 * in each of the nodes where either the hashval is or should be. 1639 * 1640 * We support duplicate hashval's so for each entry in the current 1641 * node that could contain the desired hashval, descend. This is a 1642 * pruned depth-first tree search. 1643 */ 1644 int /* error */ 1645 xfs_da3_node_lookup_int( 1646 struct xfs_da_state *state, 1647 int *result) 1648 { 1649 struct xfs_da_state_blk *blk; 1650 struct xfs_da_blkinfo *curr; 1651 struct xfs_da_intnode *node; 1652 struct xfs_da_node_entry *btree; 1653 struct xfs_da3_icnode_hdr nodehdr; 1654 struct xfs_da_args *args; 1655 xfs_failaddr_t fa; 1656 xfs_dablk_t blkno; 1657 xfs_dahash_t hashval; 1658 xfs_dahash_t btreehashval; 1659 int probe; 1660 int span; 1661 int max; 1662 int error; 1663 int retval; 1664 unsigned int expected_level = 0; 1665 uint16_t magic; 1666 struct xfs_inode *dp = state->args->dp; 1667 1668 args = state->args; 1669 1670 /* 1671 * Descend thru the B-tree searching each level for the right 1672 * node to use, until the right hashval is found. 1673 */ 1674 blkno = args->geo->leafblk; 1675 for (blk = &state->path.blk[0], state->path.active = 1; 1676 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1677 blk++, state->path.active++) { 1678 /* 1679 * Read the next node down in the tree. 1680 */ 1681 blk->blkno = blkno; 1682 error = xfs_da3_node_read(args->trans, args->dp, blkno, 1683 &blk->bp, args->whichfork); 1684 if (error) { 1685 blk->blkno = 0; 1686 state->path.active--; 1687 return error; 1688 } 1689 curr = blk->bp->b_addr; 1690 magic = be16_to_cpu(curr->magic); 1691 1692 if (magic == XFS_ATTR_LEAF_MAGIC || 1693 magic == XFS_ATTR3_LEAF_MAGIC) { 1694 fa = xfs_attr3_leaf_header_check(blk->bp, args->owner); 1695 if (fa) { 1696 __xfs_buf_mark_corrupt(blk->bp, fa); 1697 xfs_da_mark_sick(args); 1698 return -EFSCORRUPTED; 1699 } 1700 blk->magic = XFS_ATTR_LEAF_MAGIC; 1701 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1702 break; 1703 } 1704 1705 if (magic == XFS_DIR2_LEAFN_MAGIC || 1706 magic == XFS_DIR3_LEAFN_MAGIC) { 1707 fa = xfs_dir3_leaf_header_check(blk->bp, args->owner); 1708 if (fa) { 1709 __xfs_buf_mark_corrupt(blk->bp, fa); 1710 xfs_da_mark_sick(args); 1711 return -EFSCORRUPTED; 1712 } 1713 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1714 blk->hashval = xfs_dir2_leaf_lasthash(args->dp, 1715 blk->bp, NULL); 1716 break; 1717 } 1718 1719 if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { 1720 xfs_buf_mark_corrupt(blk->bp); 1721 xfs_da_mark_sick(args); 1722 return -EFSCORRUPTED; 1723 } 1724 1725 fa = xfs_da3_node_header_check(blk->bp, args->owner); 1726 if (fa) { 1727 __xfs_buf_mark_corrupt(blk->bp, fa); 1728 xfs_da_mark_sick(args); 1729 return -EFSCORRUPTED; 1730 } 1731 1732 blk->magic = XFS_DA_NODE_MAGIC; 1733 1734 /* 1735 * Search an intermediate node for a match. 1736 */ 1737 node = blk->bp->b_addr; 1738 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node); 1739 btree = nodehdr.btree; 1740 1741 /* Tree taller than we can handle; bail out! */ 1742 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { 1743 xfs_buf_mark_corrupt(blk->bp); 1744 xfs_da_mark_sick(args); 1745 return -EFSCORRUPTED; 1746 } 1747 1748 /* Check the level from the root. */ 1749 if (blkno == args->geo->leafblk) 1750 expected_level = nodehdr.level - 1; 1751 else if (expected_level != nodehdr.level) { 1752 xfs_buf_mark_corrupt(blk->bp); 1753 xfs_da_mark_sick(args); 1754 return -EFSCORRUPTED; 1755 } else 1756 expected_level--; 1757 1758 max = nodehdr.count; 1759 blk->hashval = be32_to_cpu(btree[max - 1].hashval); 1760 1761 /* 1762 * Binary search. (note: small blocks will skip loop) 1763 */ 1764 probe = span = max / 2; 1765 hashval = args->hashval; 1766 while (span > 4) { 1767 span /= 2; 1768 btreehashval = be32_to_cpu(btree[probe].hashval); 1769 if (btreehashval < hashval) 1770 probe += span; 1771 else if (btreehashval > hashval) 1772 probe -= span; 1773 else 1774 break; 1775 } 1776 ASSERT((probe >= 0) && (probe < max)); 1777 ASSERT((span <= 4) || 1778 (be32_to_cpu(btree[probe].hashval) == hashval)); 1779 1780 /* 1781 * Since we may have duplicate hashval's, find the first 1782 * matching hashval in the node. 1783 */ 1784 while (probe > 0 && 1785 be32_to_cpu(btree[probe].hashval) >= hashval) { 1786 probe--; 1787 } 1788 while (probe < max && 1789 be32_to_cpu(btree[probe].hashval) < hashval) { 1790 probe++; 1791 } 1792 1793 /* 1794 * Pick the right block to descend on. 1795 */ 1796 if (probe == max) { 1797 blk->index = max - 1; 1798 blkno = be32_to_cpu(btree[max - 1].before); 1799 } else { 1800 blk->index = probe; 1801 blkno = be32_to_cpu(btree[probe].before); 1802 } 1803 1804 /* We can't point back to the root. */ 1805 if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) { 1806 xfs_da_mark_sick(args); 1807 return -EFSCORRUPTED; 1808 } 1809 } 1810 1811 if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) { 1812 xfs_da_mark_sick(args); 1813 return -EFSCORRUPTED; 1814 } 1815 1816 /* 1817 * A leaf block that ends in the hashval that we are interested in 1818 * (final hashval == search hashval) means that the next block may 1819 * contain more entries with the same hashval, shift upward to the 1820 * next leaf and keep searching. 1821 */ 1822 for (;;) { 1823 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1824 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1825 &blk->index, state); 1826 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1827 retval = xfs_attr3_leaf_lookup_int(blk->bp, args); 1828 blk->index = args->index; 1829 args->blkno = blk->blkno; 1830 } else { 1831 ASSERT(0); 1832 xfs_da_mark_sick(args); 1833 return -EFSCORRUPTED; 1834 } 1835 if (((retval == -ENOENT) || (retval == -ENOATTR)) && 1836 (blk->hashval == args->hashval)) { 1837 error = xfs_da3_path_shift(state, &state->path, 1, 1, 1838 &retval); 1839 if (error) 1840 return error; 1841 if (retval == 0) { 1842 continue; 1843 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1844 /* path_shift() gives ENOENT */ 1845 retval = -ENOATTR; 1846 } 1847 } 1848 break; 1849 } 1850 *result = retval; 1851 return 0; 1852 } 1853 1854 /*======================================================================== 1855 * Utility routines. 1856 *========================================================================*/ 1857 1858 /* 1859 * Compare two intermediate nodes for "order". 1860 */ 1861 STATIC int 1862 xfs_da3_node_order( 1863 struct xfs_inode *dp, 1864 struct xfs_buf *node1_bp, 1865 struct xfs_buf *node2_bp) 1866 { 1867 struct xfs_da_intnode *node1; 1868 struct xfs_da_intnode *node2; 1869 struct xfs_da_node_entry *btree1; 1870 struct xfs_da_node_entry *btree2; 1871 struct xfs_da3_icnode_hdr node1hdr; 1872 struct xfs_da3_icnode_hdr node2hdr; 1873 1874 node1 = node1_bp->b_addr; 1875 node2 = node2_bp->b_addr; 1876 xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1); 1877 xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2); 1878 btree1 = node1hdr.btree; 1879 btree2 = node2hdr.btree; 1880 1881 if (node1hdr.count > 0 && node2hdr.count > 0 && 1882 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 1883 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < 1884 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { 1885 return 1; 1886 } 1887 return 0; 1888 } 1889 1890 /* 1891 * Link a new block into a doubly linked list of blocks (of whatever type). 1892 */ 1893 int /* error */ 1894 xfs_da3_blk_link( 1895 struct xfs_da_state *state, 1896 struct xfs_da_state_blk *old_blk, 1897 struct xfs_da_state_blk *new_blk) 1898 { 1899 struct xfs_da_blkinfo *old_info; 1900 struct xfs_da_blkinfo *new_info; 1901 struct xfs_da_blkinfo *tmp_info; 1902 struct xfs_da_args *args; 1903 struct xfs_buf *bp; 1904 xfs_failaddr_t fa; 1905 int before = 0; 1906 int error; 1907 struct xfs_inode *dp = state->args->dp; 1908 1909 /* 1910 * Set up environment. 1911 */ 1912 args = state->args; 1913 ASSERT(args != NULL); 1914 old_info = old_blk->bp->b_addr; 1915 new_info = new_blk->bp->b_addr; 1916 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1917 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1918 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1919 1920 switch (old_blk->magic) { 1921 case XFS_ATTR_LEAF_MAGIC: 1922 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); 1923 break; 1924 case XFS_DIR2_LEAFN_MAGIC: 1925 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); 1926 break; 1927 case XFS_DA_NODE_MAGIC: 1928 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); 1929 break; 1930 } 1931 1932 /* 1933 * Link blocks in appropriate order. 1934 */ 1935 if (before) { 1936 /* 1937 * Link new block in before existing block. 1938 */ 1939 trace_xfs_da_link_before(args); 1940 new_info->forw = cpu_to_be32(old_blk->blkno); 1941 new_info->back = old_info->back; 1942 if (old_info->back) { 1943 error = xfs_da3_node_read(args->trans, dp, 1944 be32_to_cpu(old_info->back), 1945 &bp, args->whichfork); 1946 if (error) 1947 return error; 1948 fa = xfs_da3_header_check(bp, args->owner); 1949 if (fa) { 1950 __xfs_buf_mark_corrupt(bp, fa); 1951 xfs_trans_brelse(args->trans, bp); 1952 xfs_da_mark_sick(args); 1953 return -EFSCORRUPTED; 1954 } 1955 ASSERT(bp != NULL); 1956 tmp_info = bp->b_addr; 1957 ASSERT(tmp_info->magic == old_info->magic); 1958 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1959 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1960 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1961 } 1962 old_info->back = cpu_to_be32(new_blk->blkno); 1963 } else { 1964 /* 1965 * Link new block in after existing block. 1966 */ 1967 trace_xfs_da_link_after(args); 1968 new_info->forw = old_info->forw; 1969 new_info->back = cpu_to_be32(old_blk->blkno); 1970 if (old_info->forw) { 1971 error = xfs_da3_node_read(args->trans, dp, 1972 be32_to_cpu(old_info->forw), 1973 &bp, args->whichfork); 1974 if (error) 1975 return error; 1976 fa = xfs_da3_header_check(bp, args->owner); 1977 if (fa) { 1978 __xfs_buf_mark_corrupt(bp, fa); 1979 xfs_trans_brelse(args->trans, bp); 1980 xfs_da_mark_sick(args); 1981 return -EFSCORRUPTED; 1982 } 1983 ASSERT(bp != NULL); 1984 tmp_info = bp->b_addr; 1985 ASSERT(tmp_info->magic == old_info->magic); 1986 ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); 1987 tmp_info->back = cpu_to_be32(new_blk->blkno); 1988 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1989 } 1990 old_info->forw = cpu_to_be32(new_blk->blkno); 1991 } 1992 1993 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); 1994 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); 1995 return 0; 1996 } 1997 1998 /* 1999 * Unlink a block from a doubly linked list of blocks. 2000 */ 2001 STATIC int /* error */ 2002 xfs_da3_blk_unlink( 2003 struct xfs_da_state *state, 2004 struct xfs_da_state_blk *drop_blk, 2005 struct xfs_da_state_blk *save_blk) 2006 { 2007 struct xfs_da_blkinfo *drop_info; 2008 struct xfs_da_blkinfo *save_info; 2009 struct xfs_da_blkinfo *tmp_info; 2010 struct xfs_da_args *args; 2011 struct xfs_buf *bp; 2012 xfs_failaddr_t fa; 2013 int error; 2014 2015 /* 2016 * Set up environment. 2017 */ 2018 args = state->args; 2019 ASSERT(args != NULL); 2020 save_info = save_blk->bp->b_addr; 2021 drop_info = drop_blk->bp->b_addr; 2022 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 2023 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 2024 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 2025 ASSERT(save_blk->magic == drop_blk->magic); 2026 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 2027 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 2028 ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || 2029 (be32_to_cpu(drop_info->back) == save_blk->blkno)); 2030 2031 /* 2032 * Unlink the leaf block from the doubly linked chain of leaves. 2033 */ 2034 if (be32_to_cpu(save_info->back) == drop_blk->blkno) { 2035 trace_xfs_da_unlink_back(args); 2036 save_info->back = drop_info->back; 2037 if (drop_info->back) { 2038 error = xfs_da3_node_read(args->trans, args->dp, 2039 be32_to_cpu(drop_info->back), 2040 &bp, args->whichfork); 2041 if (error) 2042 return error; 2043 fa = xfs_da3_header_check(bp, args->owner); 2044 if (fa) { 2045 __xfs_buf_mark_corrupt(bp, fa); 2046 xfs_trans_brelse(args->trans, bp); 2047 xfs_da_mark_sick(args); 2048 return -EFSCORRUPTED; 2049 } 2050 ASSERT(bp != NULL); 2051 tmp_info = bp->b_addr; 2052 ASSERT(tmp_info->magic == save_info->magic); 2053 ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); 2054 tmp_info->forw = cpu_to_be32(save_blk->blkno); 2055 xfs_trans_log_buf(args->trans, bp, 0, 2056 sizeof(*tmp_info) - 1); 2057 } 2058 } else { 2059 trace_xfs_da_unlink_forward(args); 2060 save_info->forw = drop_info->forw; 2061 if (drop_info->forw) { 2062 error = xfs_da3_node_read(args->trans, args->dp, 2063 be32_to_cpu(drop_info->forw), 2064 &bp, args->whichfork); 2065 if (error) 2066 return error; 2067 fa = xfs_da3_header_check(bp, args->owner); 2068 if (fa) { 2069 __xfs_buf_mark_corrupt(bp, fa); 2070 xfs_trans_brelse(args->trans, bp); 2071 xfs_da_mark_sick(args); 2072 return -EFSCORRUPTED; 2073 } 2074 ASSERT(bp != NULL); 2075 tmp_info = bp->b_addr; 2076 ASSERT(tmp_info->magic == save_info->magic); 2077 ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); 2078 tmp_info->back = cpu_to_be32(save_blk->blkno); 2079 xfs_trans_log_buf(args->trans, bp, 0, 2080 sizeof(*tmp_info) - 1); 2081 } 2082 } 2083 2084 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); 2085 return 0; 2086 } 2087 2088 /* 2089 * Move a path "forward" or "!forward" one block at the current level. 2090 * 2091 * This routine will adjust a "path" to point to the next block 2092 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the 2093 * Btree, including updating pointers to the intermediate nodes between 2094 * the new bottom and the root. 2095 */ 2096 int /* error */ 2097 xfs_da3_path_shift( 2098 struct xfs_da_state *state, 2099 struct xfs_da_state_path *path, 2100 int forward, 2101 int release, 2102 int *result) 2103 { 2104 struct xfs_da_state_blk *blk; 2105 struct xfs_da_blkinfo *info; 2106 struct xfs_da_args *args; 2107 struct xfs_da_node_entry *btree; 2108 struct xfs_da3_icnode_hdr nodehdr; 2109 struct xfs_buf *bp; 2110 xfs_failaddr_t fa; 2111 xfs_dablk_t blkno = 0; 2112 int level; 2113 int error; 2114 struct xfs_inode *dp = state->args->dp; 2115 2116 trace_xfs_da_path_shift(state->args); 2117 2118 /* 2119 * Roll up the Btree looking for the first block where our 2120 * current index is not at the edge of the block. Note that 2121 * we skip the bottom layer because we want the sibling block. 2122 */ 2123 args = state->args; 2124 ASSERT(args != NULL); 2125 ASSERT(path != NULL); 2126 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 2127 level = (path->active-1) - 1; /* skip bottom layer in path */ 2128 for (; level >= 0; level--) { 2129 blk = &path->blk[level]; 2130 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, 2131 blk->bp->b_addr); 2132 2133 if (forward && (blk->index < nodehdr.count - 1)) { 2134 blk->index++; 2135 blkno = be32_to_cpu(nodehdr.btree[blk->index].before); 2136 break; 2137 } else if (!forward && (blk->index > 0)) { 2138 blk->index--; 2139 blkno = be32_to_cpu(nodehdr.btree[blk->index].before); 2140 break; 2141 } 2142 } 2143 if (level < 0) { 2144 *result = -ENOENT; /* we're out of our tree */ 2145 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 2146 return 0; 2147 } 2148 2149 /* 2150 * Roll down the edge of the subtree until we reach the 2151 * same depth we were at originally. 2152 */ 2153 for (blk++, level++; level < path->active; blk++, level++) { 2154 /* 2155 * Read the next child block into a local buffer. 2156 */ 2157 error = xfs_da3_node_read(args->trans, dp, blkno, &bp, 2158 args->whichfork); 2159 if (error) 2160 return error; 2161 2162 /* 2163 * Release the old block (if it's dirty, the trans doesn't 2164 * actually let go) and swap the local buffer into the path 2165 * structure. This ensures failure of the above read doesn't set 2166 * a NULL buffer in an active slot in the path. 2167 */ 2168 if (release) 2169 xfs_trans_brelse(args->trans, blk->bp); 2170 blk->blkno = blkno; 2171 blk->bp = bp; 2172 2173 info = blk->bp->b_addr; 2174 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 2175 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 2176 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 2177 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 2178 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 2179 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 2180 2181 2182 /* 2183 * Note: we flatten the magic number to a single type so we 2184 * don't have to compare against crc/non-crc types elsewhere. 2185 */ 2186 switch (be16_to_cpu(info->magic)) { 2187 case XFS_DA_NODE_MAGIC: 2188 case XFS_DA3_NODE_MAGIC: 2189 fa = xfs_da3_node_header_check(blk->bp, args->owner); 2190 if (fa) { 2191 __xfs_buf_mark_corrupt(blk->bp, fa); 2192 xfs_da_mark_sick(args); 2193 return -EFSCORRUPTED; 2194 } 2195 blk->magic = XFS_DA_NODE_MAGIC; 2196 xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, 2197 bp->b_addr); 2198 btree = nodehdr.btree; 2199 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 2200 if (forward) 2201 blk->index = 0; 2202 else 2203 blk->index = nodehdr.count - 1; 2204 blkno = be32_to_cpu(btree[blk->index].before); 2205 break; 2206 case XFS_ATTR_LEAF_MAGIC: 2207 case XFS_ATTR3_LEAF_MAGIC: 2208 fa = xfs_attr3_leaf_header_check(blk->bp, args->owner); 2209 if (fa) { 2210 __xfs_buf_mark_corrupt(blk->bp, fa); 2211 xfs_da_mark_sick(args); 2212 return -EFSCORRUPTED; 2213 } 2214 blk->magic = XFS_ATTR_LEAF_MAGIC; 2215 ASSERT(level == path->active-1); 2216 blk->index = 0; 2217 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 2218 break; 2219 case XFS_DIR2_LEAFN_MAGIC: 2220 case XFS_DIR3_LEAFN_MAGIC: 2221 fa = xfs_dir3_leaf_header_check(blk->bp, args->owner); 2222 if (fa) { 2223 __xfs_buf_mark_corrupt(blk->bp, fa); 2224 xfs_da_mark_sick(args); 2225 return -EFSCORRUPTED; 2226 } 2227 blk->magic = XFS_DIR2_LEAFN_MAGIC; 2228 ASSERT(level == path->active-1); 2229 blk->index = 0; 2230 blk->hashval = xfs_dir2_leaf_lasthash(args->dp, 2231 blk->bp, NULL); 2232 break; 2233 default: 2234 ASSERT(0); 2235 break; 2236 } 2237 } 2238 *result = 0; 2239 return 0; 2240 } 2241 2242 2243 /*======================================================================== 2244 * Utility routines. 2245 *========================================================================*/ 2246 2247 /* 2248 * Implement a simple hash on a character string. 2249 * Rotate the hash value by 7 bits, then XOR each character in. 2250 * This is implemented with some source-level loop unrolling. 2251 */ 2252 xfs_dahash_t 2253 xfs_da_hashname(const uint8_t *name, int namelen) 2254 { 2255 xfs_dahash_t hash; 2256 2257 /* 2258 * Do four characters at a time as long as we can. 2259 */ 2260 for (hash = 0; namelen >= 4; namelen -= 4, name += 4) 2261 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ 2262 (name[3] << 0) ^ rol32(hash, 7 * 4); 2263 2264 /* 2265 * Now do the rest of the characters. 2266 */ 2267 switch (namelen) { 2268 case 3: 2269 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ 2270 rol32(hash, 7 * 3); 2271 case 2: 2272 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); 2273 case 1: 2274 return (name[0] << 0) ^ rol32(hash, 7 * 1); 2275 default: /* case 0: */ 2276 return hash; 2277 } 2278 } 2279 2280 enum xfs_dacmp 2281 xfs_da_compname( 2282 struct xfs_da_args *args, 2283 const unsigned char *name, 2284 int len) 2285 { 2286 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 2287 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 2288 } 2289 2290 int 2291 xfs_da_grow_inode_int( 2292 struct xfs_da_args *args, 2293 xfs_fileoff_t *bno, 2294 int count) 2295 { 2296 struct xfs_trans *tp = args->trans; 2297 struct xfs_inode *dp = args->dp; 2298 int w = args->whichfork; 2299 xfs_rfsblock_t nblks = dp->i_nblocks; 2300 struct xfs_bmbt_irec map, *mapp = ↦ 2301 int nmap, error, got, i, mapi = 1; 2302 2303 /* 2304 * Find a spot in the file space to put the new block. 2305 */ 2306 error = xfs_bmap_first_unused(tp, dp, count, bno, w); 2307 if (error) 2308 return error; 2309 2310 /* 2311 * Try mapping it in one filesystem block. 2312 */ 2313 nmap = 1; 2314 error = xfs_bmapi_write(tp, dp, *bno, count, 2315 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2316 args->total, &map, &nmap); 2317 if (error == -ENOSPC && count > 1) { 2318 xfs_fileoff_t b; 2319 int c; 2320 2321 /* 2322 * If we didn't get it and the block might work if fragmented, 2323 * try without the CONTIG flag. Loop until we get it all. 2324 */ 2325 mapp = kmalloc(sizeof(*mapp) * count, 2326 GFP_KERNEL | __GFP_NOFAIL); 2327 for (b = *bno, mapi = 0; b < *bno + count; ) { 2328 c = (int)(*bno + count - b); 2329 nmap = min(XFS_BMAP_MAX_NMAP, c); 2330 error = xfs_bmapi_write(tp, dp, b, c, 2331 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2332 args->total, &mapp[mapi], &nmap); 2333 if (error) 2334 goto out_free_map; 2335 mapi += nmap; 2336 b = mapp[mapi - 1].br_startoff + 2337 mapp[mapi - 1].br_blockcount; 2338 } 2339 } 2340 if (error) 2341 goto out_free_map; 2342 2343 /* 2344 * Count the blocks we got, make sure it matches the total. 2345 */ 2346 for (i = 0, got = 0; i < mapi; i++) 2347 got += mapp[i].br_blockcount; 2348 if (got != count || mapp[0].br_startoff != *bno || 2349 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 2350 *bno + count) { 2351 error = -ENOSPC; 2352 goto out_free_map; 2353 } 2354 2355 /* account for newly allocated blocks in reserved blocks total */ 2356 args->total -= dp->i_nblocks - nblks; 2357 2358 out_free_map: 2359 if (mapp != &map) 2360 kfree(mapp); 2361 return error; 2362 } 2363 2364 /* 2365 * Add a block to the btree ahead of the file. 2366 * Return the new block number to the caller. 2367 */ 2368 int 2369 xfs_da_grow_inode( 2370 struct xfs_da_args *args, 2371 xfs_dablk_t *new_blkno) 2372 { 2373 xfs_fileoff_t bno; 2374 int error; 2375 2376 trace_xfs_da_grow_inode(args); 2377 2378 bno = args->geo->leafblk; 2379 error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); 2380 if (!error) 2381 *new_blkno = (xfs_dablk_t)bno; 2382 return error; 2383 } 2384 2385 /* 2386 * Ick. We need to always be able to remove a btree block, even 2387 * if there's no space reservation because the filesystem is full. 2388 * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. 2389 * It swaps the target block with the last block in the file. The 2390 * last block in the file can always be removed since it can't cause 2391 * a bmap btree split to do that. 2392 */ 2393 STATIC int 2394 xfs_da3_swap_lastblock( 2395 struct xfs_da_args *args, 2396 xfs_dablk_t *dead_blknop, 2397 struct xfs_buf **dead_bufp) 2398 { 2399 struct xfs_da_blkinfo *dead_info; 2400 struct xfs_da_blkinfo *sib_info; 2401 struct xfs_da_intnode *par_node; 2402 struct xfs_da_intnode *dead_node; 2403 struct xfs_dir2_leaf *dead_leaf2; 2404 struct xfs_da_node_entry *btree; 2405 struct xfs_da3_icnode_hdr par_hdr; 2406 struct xfs_inode *dp; 2407 struct xfs_trans *tp; 2408 struct xfs_mount *mp; 2409 struct xfs_buf *dead_buf; 2410 struct xfs_buf *last_buf; 2411 struct xfs_buf *sib_buf; 2412 struct xfs_buf *par_buf; 2413 xfs_failaddr_t fa; 2414 xfs_dahash_t dead_hash; 2415 xfs_fileoff_t lastoff; 2416 xfs_dablk_t dead_blkno; 2417 xfs_dablk_t last_blkno; 2418 xfs_dablk_t sib_blkno; 2419 xfs_dablk_t par_blkno; 2420 int error; 2421 int w; 2422 int entno; 2423 int level; 2424 int dead_level; 2425 2426 trace_xfs_da_swap_lastblock(args); 2427 2428 dead_buf = *dead_bufp; 2429 dead_blkno = *dead_blknop; 2430 tp = args->trans; 2431 dp = args->dp; 2432 w = args->whichfork; 2433 ASSERT(w == XFS_DATA_FORK); 2434 mp = dp->i_mount; 2435 lastoff = args->geo->freeblk; 2436 error = xfs_bmap_last_before(tp, dp, &lastoff, w); 2437 if (error) 2438 return error; 2439 if (XFS_IS_CORRUPT(mp, lastoff == 0)) { 2440 xfs_da_mark_sick(args); 2441 return -EFSCORRUPTED; 2442 } 2443 /* 2444 * Read the last block in the btree space. 2445 */ 2446 last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; 2447 error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w); 2448 if (error) 2449 return error; 2450 fa = xfs_da3_header_check(last_buf, args->owner); 2451 if (fa) { 2452 __xfs_buf_mark_corrupt(last_buf, fa); 2453 xfs_trans_brelse(tp, last_buf); 2454 xfs_da_mark_sick(args); 2455 return -EFSCORRUPTED; 2456 } 2457 2458 /* 2459 * Copy the last block into the dead buffer and log it. 2460 */ 2461 xfs_da_buf_copy(dead_buf, last_buf, args->geo->blksize); 2462 xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); 2463 dead_info = dead_buf->b_addr; 2464 2465 /* 2466 * Get values from the moved block. 2467 */ 2468 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 2469 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 2470 struct xfs_dir3_icleaf_hdr leafhdr; 2471 struct xfs_dir2_leaf_entry *ents; 2472 2473 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 2474 xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, 2475 dead_leaf2); 2476 ents = leafhdr.ents; 2477 dead_level = 0; 2478 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); 2479 } else { 2480 struct xfs_da3_icnode_hdr deadhdr; 2481 2482 dead_node = (xfs_da_intnode_t *)dead_info; 2483 xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node); 2484 btree = deadhdr.btree; 2485 dead_level = deadhdr.level; 2486 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); 2487 } 2488 sib_buf = par_buf = NULL; 2489 /* 2490 * If the moved block has a left sibling, fix up the pointers. 2491 */ 2492 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 2493 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w); 2494 if (error) 2495 goto done; 2496 fa = xfs_da3_header_check(sib_buf, args->owner); 2497 if (fa) { 2498 __xfs_buf_mark_corrupt(sib_buf, fa); 2499 xfs_da_mark_sick(args); 2500 error = -EFSCORRUPTED; 2501 goto done; 2502 } 2503 sib_info = sib_buf->b_addr; 2504 if (XFS_IS_CORRUPT(mp, 2505 be32_to_cpu(sib_info->forw) != last_blkno || 2506 sib_info->magic != dead_info->magic)) { 2507 xfs_da_mark_sick(args); 2508 error = -EFSCORRUPTED; 2509 goto done; 2510 } 2511 sib_info->forw = cpu_to_be32(dead_blkno); 2512 xfs_trans_log_buf(tp, sib_buf, 2513 XFS_DA_LOGRANGE(sib_info, &sib_info->forw, 2514 sizeof(sib_info->forw))); 2515 sib_buf = NULL; 2516 } 2517 /* 2518 * If the moved block has a right sibling, fix up the pointers. 2519 */ 2520 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 2521 error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w); 2522 if (error) 2523 goto done; 2524 fa = xfs_da3_header_check(sib_buf, args->owner); 2525 if (fa) { 2526 __xfs_buf_mark_corrupt(sib_buf, fa); 2527 xfs_da_mark_sick(args); 2528 error = -EFSCORRUPTED; 2529 goto done; 2530 } 2531 sib_info = sib_buf->b_addr; 2532 if (XFS_IS_CORRUPT(mp, 2533 be32_to_cpu(sib_info->back) != last_blkno || 2534 sib_info->magic != dead_info->magic)) { 2535 xfs_da_mark_sick(args); 2536 error = -EFSCORRUPTED; 2537 goto done; 2538 } 2539 sib_info->back = cpu_to_be32(dead_blkno); 2540 xfs_trans_log_buf(tp, sib_buf, 2541 XFS_DA_LOGRANGE(sib_info, &sib_info->back, 2542 sizeof(sib_info->back))); 2543 sib_buf = NULL; 2544 } 2545 par_blkno = args->geo->leafblk; 2546 level = -1; 2547 /* 2548 * Walk down the tree looking for the parent of the moved block. 2549 */ 2550 for (;;) { 2551 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w); 2552 if (error) 2553 goto done; 2554 fa = xfs_da3_node_header_check(par_buf, args->owner); 2555 if (fa) { 2556 __xfs_buf_mark_corrupt(par_buf, fa); 2557 xfs_da_mark_sick(args); 2558 error = -EFSCORRUPTED; 2559 goto done; 2560 } 2561 par_node = par_buf->b_addr; 2562 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); 2563 if (XFS_IS_CORRUPT(mp, 2564 level >= 0 && level != par_hdr.level + 1)) { 2565 xfs_da_mark_sick(args); 2566 error = -EFSCORRUPTED; 2567 goto done; 2568 } 2569 level = par_hdr.level; 2570 btree = par_hdr.btree; 2571 for (entno = 0; 2572 entno < par_hdr.count && 2573 be32_to_cpu(btree[entno].hashval) < dead_hash; 2574 entno++) 2575 continue; 2576 if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) { 2577 xfs_da_mark_sick(args); 2578 error = -EFSCORRUPTED; 2579 goto done; 2580 } 2581 par_blkno = be32_to_cpu(btree[entno].before); 2582 if (level == dead_level + 1) 2583 break; 2584 xfs_trans_brelse(tp, par_buf); 2585 par_buf = NULL; 2586 } 2587 /* 2588 * We're in the right parent block. 2589 * Look for the right entry. 2590 */ 2591 for (;;) { 2592 for (; 2593 entno < par_hdr.count && 2594 be32_to_cpu(btree[entno].before) != last_blkno; 2595 entno++) 2596 continue; 2597 if (entno < par_hdr.count) 2598 break; 2599 par_blkno = par_hdr.forw; 2600 xfs_trans_brelse(tp, par_buf); 2601 par_buf = NULL; 2602 if (XFS_IS_CORRUPT(mp, par_blkno == 0)) { 2603 xfs_da_mark_sick(args); 2604 error = -EFSCORRUPTED; 2605 goto done; 2606 } 2607 error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w); 2608 if (error) 2609 goto done; 2610 fa = xfs_da3_node_header_check(par_buf, args->owner); 2611 if (fa) { 2612 __xfs_buf_mark_corrupt(par_buf, fa); 2613 xfs_da_mark_sick(args); 2614 error = -EFSCORRUPTED; 2615 goto done; 2616 } 2617 par_node = par_buf->b_addr; 2618 xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); 2619 if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) { 2620 xfs_da_mark_sick(args); 2621 error = -EFSCORRUPTED; 2622 goto done; 2623 } 2624 btree = par_hdr.btree; 2625 entno = 0; 2626 } 2627 /* 2628 * Update the parent entry pointing to the moved block. 2629 */ 2630 btree[entno].before = cpu_to_be32(dead_blkno); 2631 xfs_trans_log_buf(tp, par_buf, 2632 XFS_DA_LOGRANGE(par_node, &btree[entno].before, 2633 sizeof(btree[entno].before))); 2634 *dead_blknop = last_blkno; 2635 *dead_bufp = last_buf; 2636 return 0; 2637 done: 2638 if (par_buf) 2639 xfs_trans_brelse(tp, par_buf); 2640 if (sib_buf) 2641 xfs_trans_brelse(tp, sib_buf); 2642 xfs_trans_brelse(tp, last_buf); 2643 return error; 2644 } 2645 2646 /* 2647 * Remove a btree block from a directory or attribute. 2648 */ 2649 int 2650 xfs_da_shrink_inode( 2651 struct xfs_da_args *args, 2652 xfs_dablk_t dead_blkno, 2653 struct xfs_buf *dead_buf) 2654 { 2655 struct xfs_inode *dp; 2656 int done, error, w, count; 2657 struct xfs_trans *tp; 2658 2659 trace_xfs_da_shrink_inode(args); 2660 2661 dp = args->dp; 2662 w = args->whichfork; 2663 tp = args->trans; 2664 count = args->geo->fsbcount; 2665 for (;;) { 2666 /* 2667 * Remove extents. If we get ENOSPC for a dir we have to move 2668 * the last block to the place we want to kill. 2669 */ 2670 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2671 xfs_bmapi_aflag(w), 0, &done); 2672 if (error == -ENOSPC) { 2673 if (w != XFS_DATA_FORK) 2674 break; 2675 error = xfs_da3_swap_lastblock(args, &dead_blkno, 2676 &dead_buf); 2677 if (error) 2678 break; 2679 } else { 2680 break; 2681 } 2682 } 2683 xfs_trans_binval(tp, dead_buf); 2684 return error; 2685 } 2686 2687 static int 2688 xfs_dabuf_map( 2689 struct xfs_inode *dp, 2690 xfs_dablk_t bno, 2691 unsigned int flags, 2692 int whichfork, 2693 struct xfs_buf_map **mapp, 2694 int *nmaps) 2695 { 2696 struct xfs_mount *mp = dp->i_mount; 2697 int nfsb = xfs_dabuf_nfsb(mp, whichfork); 2698 struct xfs_bmbt_irec irec, *irecs = &irec; 2699 struct xfs_buf_map *map = *mapp; 2700 xfs_fileoff_t off = bno; 2701 int error = 0, nirecs, i; 2702 2703 if (nfsb > 1) 2704 irecs = kzalloc(sizeof(irec) * nfsb, 2705 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); 2706 2707 nirecs = nfsb; 2708 error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs, 2709 xfs_bmapi_aflag(whichfork)); 2710 if (error) 2711 goto out_free_irecs; 2712 2713 /* 2714 * Use the caller provided map for the single map case, else allocate a 2715 * larger one that needs to be free by the caller. 2716 */ 2717 if (nirecs > 1) { 2718 map = kzalloc(nirecs * sizeof(struct xfs_buf_map), 2719 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); 2720 if (!map) { 2721 error = -ENOMEM; 2722 goto out_free_irecs; 2723 } 2724 *mapp = map; 2725 } 2726 2727 for (i = 0; i < nirecs; i++) { 2728 if (irecs[i].br_startblock == HOLESTARTBLOCK || 2729 irecs[i].br_startblock == DELAYSTARTBLOCK) 2730 goto invalid_mapping; 2731 if (off != irecs[i].br_startoff) 2732 goto invalid_mapping; 2733 2734 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); 2735 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); 2736 off += irecs[i].br_blockcount; 2737 } 2738 2739 if (off != bno + nfsb) 2740 goto invalid_mapping; 2741 2742 *nmaps = nirecs; 2743 out_free_irecs: 2744 if (irecs != &irec) 2745 kfree(irecs); 2746 return error; 2747 2748 invalid_mapping: 2749 /* Caller ok with no mapping. */ 2750 if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) { 2751 xfs_dirattr_mark_sick(dp, whichfork); 2752 error = -EFSCORRUPTED; 2753 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 2754 xfs_alert(mp, "%s: bno %u inode %llu", 2755 __func__, bno, dp->i_ino); 2756 2757 for (i = 0; i < nirecs; i++) { 2758 xfs_alert(mp, 2759 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", 2760 i, irecs[i].br_startoff, 2761 irecs[i].br_startblock, 2762 irecs[i].br_blockcount, 2763 irecs[i].br_state); 2764 } 2765 } 2766 } else { 2767 *nmaps = 0; 2768 } 2769 goto out_free_irecs; 2770 } 2771 2772 /* 2773 * Get a buffer for the dir/attr block. 2774 */ 2775 int 2776 xfs_da_get_buf( 2777 struct xfs_trans *tp, 2778 struct xfs_inode *dp, 2779 xfs_dablk_t bno, 2780 struct xfs_buf **bpp, 2781 int whichfork) 2782 { 2783 struct xfs_mount *mp = dp->i_mount; 2784 struct xfs_buf *bp; 2785 struct xfs_buf_map map, *mapp = ↦ 2786 int nmap = 1; 2787 int error; 2788 2789 *bpp = NULL; 2790 error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap); 2791 if (error || nmap == 0) 2792 goto out_free; 2793 2794 error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp); 2795 if (error) 2796 goto out_free; 2797 2798 *bpp = bp; 2799 2800 out_free: 2801 if (mapp != &map) 2802 kfree(mapp); 2803 2804 return error; 2805 } 2806 2807 /* 2808 * Get a buffer for the dir/attr block, fill in the contents. 2809 */ 2810 int 2811 xfs_da_read_buf( 2812 struct xfs_trans *tp, 2813 struct xfs_inode *dp, 2814 xfs_dablk_t bno, 2815 unsigned int flags, 2816 struct xfs_buf **bpp, 2817 int whichfork, 2818 const struct xfs_buf_ops *ops) 2819 { 2820 struct xfs_mount *mp = dp->i_mount; 2821 struct xfs_buf *bp; 2822 struct xfs_buf_map map, *mapp = ↦ 2823 int nmap = 1; 2824 int error; 2825 2826 *bpp = NULL; 2827 error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap); 2828 if (error || !nmap) 2829 goto out_free; 2830 2831 error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0, 2832 &bp, ops); 2833 if (xfs_metadata_is_sick(error)) 2834 xfs_dirattr_mark_sick(dp, whichfork); 2835 if (error) 2836 goto out_free; 2837 2838 if (whichfork == XFS_ATTR_FORK) 2839 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); 2840 else 2841 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); 2842 *bpp = bp; 2843 out_free: 2844 if (mapp != &map) 2845 kfree(mapp); 2846 2847 return error; 2848 } 2849 2850 /* 2851 * Readahead the dir/attr block. 2852 */ 2853 int 2854 xfs_da_reada_buf( 2855 struct xfs_inode *dp, 2856 xfs_dablk_t bno, 2857 unsigned int flags, 2858 int whichfork, 2859 const struct xfs_buf_ops *ops) 2860 { 2861 struct xfs_buf_map map; 2862 struct xfs_buf_map *mapp; 2863 int nmap; 2864 int error; 2865 2866 mapp = ↦ 2867 nmap = 1; 2868 error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap); 2869 if (error || !nmap) 2870 goto out_free; 2871 2872 xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); 2873 2874 out_free: 2875 if (mapp != &map) 2876 kfree(mapp); 2877 2878 return error; 2879 } 2880