1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 /* Now we have all buffers that must be used in balancing of the tree */ 6 /* Further calculations can not cause schedule(), and thus the buffer */ 7 /* tree will be stable until the balancing will be finished */ 8 /* balance the tree according to the analysis made before, */ 9 /* and using buffers obtained after all above. */ 10 11 /** 12 ** balance_leaf_when_delete 13 ** balance_leaf 14 ** do_balance 15 ** 16 **/ 17 18 #include <asm/uaccess.h> 19 #include <linux/time.h> 20 #include <linux/reiserfs_fs.h> 21 #include <linux/buffer_head.h> 22 #include <linux/kernel.h> 23 24 static inline void buffer_info_init_left(struct tree_balance *tb, 25 struct buffer_info *bi) 26 { 27 bi->tb = tb; 28 bi->bi_bh = tb->L[0]; 29 bi->bi_parent = tb->FL[0]; 30 bi->bi_position = get_left_neighbor_position(tb, 0); 31 } 32 33 static inline void buffer_info_init_right(struct tree_balance *tb, 34 struct buffer_info *bi) 35 { 36 bi->tb = tb; 37 bi->bi_bh = tb->R[0]; 38 bi->bi_parent = tb->FR[0]; 39 bi->bi_position = get_right_neighbor_position(tb, 0); 40 } 41 42 static inline void buffer_info_init_tbS0(struct tree_balance *tb, 43 struct buffer_info *bi) 44 { 45 bi->tb = tb; 46 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 47 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 48 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); 49 } 50 51 static inline void buffer_info_init_bh(struct tree_balance *tb, 52 struct buffer_info *bi, 53 struct buffer_head *bh) 54 { 55 bi->tb = tb; 56 bi->bi_bh = bh; 57 bi->bi_parent = NULL; 58 bi->bi_position = 0; 59 } 60 61 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 62 struct buffer_head *bh, int flag) 63 { 64 journal_mark_dirty(tb->transaction_handle, 65 tb->transaction_handle->t_super, bh); 66 } 67 68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 70 71 /* summary: 72 if deleting something ( tb->insert_size[0] < 0 ) 73 return(balance_leaf_when_delete()); (flag d handled here) 74 else 75 if lnum is larger than 0 we put items into the left node 76 if rnum is larger than 0 we put items into the right node 77 if snum1 is larger than 0 we put items into the new node s1 78 if snum2 is larger than 0 we put items into the new node s2 79 Note that all *num* count new items being created. 80 81 It would be easier to read balance_leaf() if each of these summary 82 lines was a separate procedure rather than being inlined. I think 83 that there are many passages here and in balance_leaf_when_delete() in 84 which two calls to one procedure can replace two passages, and it 85 might save cache space and improve software maintenance costs to do so. 86 87 Vladimir made the perceptive comment that we should offload most of 88 the decision making in this function into fix_nodes/check_balance, and 89 then create some sort of structure in tb that says what actions should 90 be performed by do_balance. 91 92 -Hans */ 93 94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0 95 * 96 * lnum, rnum can have values >= -1 97 * -1 means that the neighbor must be joined with S 98 * 0 means that nothing should be done with the neighbor 99 * >0 means to shift entirely or partly the specified number of items to the neighbor 100 */ 101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 102 { 103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 104 int item_pos = PATH_LAST_POSITION(tb->tb_path); 105 int pos_in_item = tb->tb_path->pos_in_item; 106 struct buffer_info bi; 107 int n; 108 struct item_head *ih; 109 110 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 111 "vs- 12000: level: wrong FR %z", tb->FR[0]); 112 RFALSE(tb->blknum[0] > 1, 113 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), 115 "PAP-12010: tree can not be empty"); 116 117 ih = B_N_PITEM_HEAD(tbS0, item_pos); 118 buffer_info_init_tbS0(tb, &bi); 119 120 /* Delete or truncate the item */ 121 122 switch (flag) { 123 case M_DELETE: /* delete item in S[0] */ 124 125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 127 -tb->insert_size[0], ih); 128 129 leaf_delete_items(&bi, 0, item_pos, 1, -1); 130 131 if (!item_pos && tb->CFL[0]) { 132 if (B_NR_ITEMS(tbS0)) { 133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 134 0); 135 } else { 136 if (!PATH_H_POSITION(tb->tb_path, 1)) 137 replace_key(tb, tb->CFL[0], tb->lkey[0], 138 PATH_H_PPARENT(tb->tb_path, 139 0), 0); 140 } 141 } 142 143 RFALSE(!item_pos && !tb->CFL[0], 144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], 145 tb->L[0]); 146 147 break; 148 149 case M_CUT:{ /* cut item in S[0] */ 150 if (is_direntry_le_ih(ih)) { 151 152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 154 tb->insert_size[0] = -1; 155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 156 -tb->insert_size[0]); 157 158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], 159 "PAP-12030: can not change delimiting key. CFL[0]=%p", 160 tb->CFL[0]); 161 162 if (!item_pos && !pos_in_item && tb->CFL[0]) { 163 replace_key(tb, tb->CFL[0], tb->lkey[0], 164 tbS0, 0); 165 } 166 } else { 167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 168 -tb->insert_size[0]); 169 170 RFALSE(!ih_item_len(ih), 171 "PAP-12035: cut must leave non-zero dynamic length of item"); 172 } 173 break; 174 } 175 176 default: 177 print_cur_tb("12040"); 178 reiserfs_panic(tb->tb_sb, "PAP-12040", 179 "unexpected mode: %s(%d)", 180 (flag == 181 M_PASTE) ? "PASTE" : ((flag == 182 M_INSERT) ? "INSERT" : 183 "UNKNOWN"), flag); 184 } 185 186 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 187 n = B_NR_ITEMS(tbS0); 188 if (tb->lnum[0]) { /* L[0] takes part in balancing */ 189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ 190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ 191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 192 /* all contents of all the 3 buffers will be in L[0] */ 193 if (PATH_H_POSITION(tb->tb_path, 1) == 0 194 && 1 < B_NR_ITEMS(tb->FR[0])) 195 replace_key(tb, tb->CFL[0], 196 tb->lkey[0], 197 tb->FR[0], 1); 198 199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, 200 -1, NULL); 201 leaf_move_items(LEAF_FROM_R_TO_L, tb, 202 B_NR_ITEMS(tb->R[0]), 203 -1, NULL); 204 205 reiserfs_invalidate_buffer(tb, tbS0); 206 reiserfs_invalidate_buffer(tb, 207 tb->R[0]); 208 209 return 0; 210 } 211 /* all contents of all the 3 buffers will be in R[0] */ 212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, 213 NULL); 214 leaf_move_items(LEAF_FROM_L_TO_R, tb, 215 B_NR_ITEMS(tb->L[0]), -1, NULL); 216 217 /* right_delimiting_key is correct in R[0] */ 218 replace_key(tb, tb->CFR[0], tb->rkey[0], 219 tb->R[0], 0); 220 221 reiserfs_invalidate_buffer(tb, tbS0); 222 reiserfs_invalidate_buffer(tb, tb->L[0]); 223 224 return -1; 225 } 226 227 RFALSE(tb->rnum[0] != 0, 228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 229 /* all contents of L[0] and S[0] will be in L[0] */ 230 leaf_shift_left(tb, n, -1); 231 232 reiserfs_invalidate_buffer(tb, tbS0); 233 234 return 0; 235 } 236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ 237 238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) || 239 (tb->lnum[0] + tb->rnum[0] > n + 1), 240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", 241 tb->rnum[0], tb->lnum[0], n); 242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) && 243 (tb->lbytes != -1 || tb->rbytes != -1), 244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 245 tb->rbytes, tb->lbytes); 246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && 247 (tb->lbytes < 1 || tb->rbytes != -1), 248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 249 tb->rbytes, tb->lbytes); 250 251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 253 254 reiserfs_invalidate_buffer(tb, tbS0); 255 256 return 0; 257 } 258 259 if (tb->rnum[0] == -1) { 260 /* all contents of R[0] and S[0] will be in R[0] */ 261 leaf_shift_right(tb, n, -1); 262 reiserfs_invalidate_buffer(tb, tbS0); 263 return 0; 264 } 265 266 RFALSE(tb->rnum[0], 267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 268 return 0; 269 } 270 271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ 272 const char *body, /* body of inserted item or bytes to paste */ 273 int flag, /* i - insert, d - delete, c - cut, p - paste 274 (see comment to do_balance) */ 275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what 276 must be inserted into the next higher level. This insertion 277 consists of a key or two keys and their corresponding 278 pointers */ 279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ 280 ) 281 { 282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 284 of the affected item */ 285 struct buffer_info bi; 286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 287 int snum[2]; /* number of items that will be placed 288 into S_new (includes partially shifted 289 items) */ 290 int sbytes[2]; /* if an item is partially shifted into S_new then 291 if it is a directory item 292 it is the number of entries from the item that are shifted into S_new 293 else 294 it is the number of bytes from the item that are shifted into S_new 295 */ 296 int n, i; 297 int ret_val; 298 int pos_in_item; 299 int zeros_num; 300 301 PROC_INFO_INC(tb->tb_sb, balance_at[0]); 302 303 /* Make balance in case insert_size[0] < 0 */ 304 if (tb->insert_size[0] < 0) 305 return balance_leaf_when_delete(tb, flag); 306 307 zeros_num = 0; 308 if (flag == M_INSERT && !body) 309 zeros_num = ih_item_len(ih); 310 311 pos_in_item = tb->tb_path->pos_in_item; 312 /* for indirect item pos_in_item is measured in unformatted node 313 pointers. Recalculate to bytes */ 314 if (flag != M_INSERT 315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) 316 pos_in_item *= UNFM_P_SIZE; 317 318 if (tb->lnum[0] > 0) { 319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 320 if (item_pos < tb->lnum[0]) { 321 /* new item or it part falls to L[0], shift it too */ 322 n = B_NR_ITEMS(tb->L[0]); 323 324 switch (flag) { 325 case M_INSERT: /* insert item into L[0] */ 326 327 if (item_pos == tb->lnum[0] - 1 328 && tb->lbytes != -1) { 329 /* part of new item falls into L[0] */ 330 int new_item_len; 331 int version; 332 333 ret_val = 334 leaf_shift_left(tb, tb->lnum[0] - 1, 335 -1); 336 337 /* Calculate item length to insert to S[0] */ 338 new_item_len = 339 ih_item_len(ih) - tb->lbytes; 340 /* Calculate and check item length to insert to L[0] */ 341 put_ih_item_len(ih, 342 ih_item_len(ih) - 343 new_item_len); 344 345 RFALSE(ih_item_len(ih) <= 0, 346 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 347 ih_item_len(ih)); 348 349 /* Insert new item into L[0] */ 350 buffer_info_init_left(tb, &bi); 351 leaf_insert_into_buf(&bi, 352 n + item_pos - 353 ret_val, ih, body, 354 zeros_num > 355 ih_item_len(ih) ? 356 ih_item_len(ih) : 357 zeros_num); 358 359 version = ih_version(ih); 360 361 /* Calculate key component, item length and body to insert into S[0] */ 362 set_le_ih_k_offset(ih, 363 le_ih_k_offset(ih) + 364 (tb-> 365 lbytes << 366 (is_indirect_le_ih 367 (ih) ? tb->tb_sb-> 368 s_blocksize_bits - 369 UNFM_P_SHIFT : 370 0))); 371 372 put_ih_item_len(ih, new_item_len); 373 if (tb->lbytes > zeros_num) { 374 body += 375 (tb->lbytes - zeros_num); 376 zeros_num = 0; 377 } else 378 zeros_num -= tb->lbytes; 379 380 RFALSE(ih_item_len(ih) <= 0, 381 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 382 ih_item_len(ih)); 383 } else { 384 /* new item in whole falls into L[0] */ 385 /* Shift lnum[0]-1 items to L[0] */ 386 ret_val = 387 leaf_shift_left(tb, tb->lnum[0] - 1, 388 tb->lbytes); 389 /* Insert new item into L[0] */ 390 buffer_info_init_left(tb, &bi); 391 leaf_insert_into_buf(&bi, 392 n + item_pos - 393 ret_val, ih, body, 394 zeros_num); 395 tb->insert_size[0] = 0; 396 zeros_num = 0; 397 } 398 break; 399 400 case M_PASTE: /* append item in L[0] */ 401 402 if (item_pos == tb->lnum[0] - 1 403 && tb->lbytes != -1) { 404 /* we must shift the part of the appended item */ 405 if (is_direntry_le_ih 406 (B_N_PITEM_HEAD(tbS0, item_pos))) { 407 408 RFALSE(zeros_num, 409 "PAP-12090: invalid parameter in case of a directory"); 410 /* directory item */ 411 if (tb->lbytes > pos_in_item) { 412 /* new directory entry falls into L[0] */ 413 struct item_head 414 *pasted; 415 int l_pos_in_item = 416 pos_in_item; 417 418 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 419 ret_val = 420 leaf_shift_left(tb, 421 tb-> 422 lnum 423 [0], 424 tb-> 425 lbytes 426 - 427 1); 428 if (ret_val 429 && !item_pos) { 430 pasted = 431 B_N_PITEM_HEAD 432 (tb->L[0], 433 B_NR_ITEMS 434 (tb-> 435 L[0]) - 436 1); 437 l_pos_in_item += 438 I_ENTRY_COUNT 439 (pasted) - 440 (tb-> 441 lbytes - 442 1); 443 } 444 445 /* Append given directory entry to directory item */ 446 buffer_info_init_left(tb, &bi); 447 leaf_paste_in_buffer 448 (&bi, 449 n + item_pos - 450 ret_val, 451 l_pos_in_item, 452 tb->insert_size[0], 453 body, zeros_num); 454 455 /* previous string prepared space for pasting new entry, following string pastes this entry */ 456 457 /* when we have merge directory item, pos_in_item has been changed too */ 458 459 /* paste new directory entry. 1 is entry number */ 460 leaf_paste_entries(&bi, 461 n + 462 item_pos 463 - 464 ret_val, 465 l_pos_in_item, 466 1, 467 (struct 468 reiserfs_de_head 469 *) 470 body, 471 body 472 + 473 DEH_SIZE, 474 tb-> 475 insert_size 476 [0] 477 ); 478 tb->insert_size[0] = 0; 479 } else { 480 /* new directory item doesn't fall into L[0] */ 481 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ 482 leaf_shift_left(tb, 483 tb-> 484 lnum[0], 485 tb-> 486 lbytes); 487 } 488 /* Calculate new position to append in item body */ 489 pos_in_item -= tb->lbytes; 490 } else { 491 /* regular object */ 492 RFALSE(tb->lbytes <= 0, 493 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", 494 tb->lbytes); 495 RFALSE(pos_in_item != 496 ih_item_len 497 (B_N_PITEM_HEAD 498 (tbS0, item_pos)), 499 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", 500 ih_item_len 501 (B_N_PITEM_HEAD 502 (tbS0, item_pos)), 503 pos_in_item); 504 505 if (tb->lbytes >= pos_in_item) { 506 /* appended item will be in L[0] in whole */ 507 int l_n; 508 509 /* this bytes number must be appended to the last item of L[h] */ 510 l_n = 511 tb->lbytes - 512 pos_in_item; 513 514 /* Calculate new insert_size[0] */ 515 tb->insert_size[0] -= 516 l_n; 517 518 RFALSE(tb-> 519 insert_size[0] <= 520 0, 521 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", 522 tb-> 523 insert_size[0]); 524 ret_val = 525 leaf_shift_left(tb, 526 tb-> 527 lnum 528 [0], 529 ih_item_len 530 (B_N_PITEM_HEAD 531 (tbS0, 532 item_pos))); 533 /* Append to body of item in L[0] */ 534 buffer_info_init_left(tb, &bi); 535 leaf_paste_in_buffer 536 (&bi, 537 n + item_pos - 538 ret_val, 539 ih_item_len 540 (B_N_PITEM_HEAD 541 (tb->L[0], 542 n + item_pos - 543 ret_val)), l_n, 544 body, 545 zeros_num > 546 l_n ? l_n : 547 zeros_num); 548 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ 549 { 550 int version; 551 int temp_l = 552 l_n; 553 554 RFALSE 555 (ih_item_len 556 (B_N_PITEM_HEAD 557 (tbS0, 558 0)), 559 "PAP-12106: item length must be 0"); 560 RFALSE 561 (comp_short_le_keys 562 (B_N_PKEY 563 (tbS0, 0), 564 B_N_PKEY 565 (tb->L[0], 566 n + 567 item_pos 568 - 569 ret_val)), 570 "PAP-12107: items must be of the same file"); 571 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { 572 temp_l = 573 l_n 574 << 575 (tb-> 576 tb_sb-> 577 s_blocksize_bits 578 - 579 UNFM_P_SHIFT); 580 } 581 /* update key of first item in S0 */ 582 version = 583 ih_version 584 (B_N_PITEM_HEAD 585 (tbS0, 0)); 586 set_le_key_k_offset 587 (version, 588 B_N_PKEY 589 (tbS0, 0), 590 le_key_k_offset 591 (version, 592 B_N_PKEY 593 (tbS0, 594 0)) + 595 temp_l); 596 /* update left delimiting key */ 597 set_le_key_k_offset 598 (version, 599 B_N_PDELIM_KEY 600 (tb-> 601 CFL[0], 602 tb-> 603 lkey[0]), 604 le_key_k_offset 605 (version, 606 B_N_PDELIM_KEY 607 (tb-> 608 CFL[0], 609 tb-> 610 lkey[0])) 611 + temp_l); 612 } 613 614 /* Calculate new body, position in item and insert_size[0] */ 615 if (l_n > zeros_num) { 616 body += 617 (l_n - 618 zeros_num); 619 zeros_num = 0; 620 } else 621 zeros_num -= 622 l_n; 623 pos_in_item = 0; 624 625 RFALSE 626 (comp_short_le_keys 627 (B_N_PKEY(tbS0, 0), 628 B_N_PKEY(tb->L[0], 629 B_NR_ITEMS 630 (tb-> 631 L[0]) - 632 1)) 633 || 634 !op_is_left_mergeable 635 (B_N_PKEY(tbS0, 0), 636 tbS0->b_size) 637 || 638 !op_is_left_mergeable 639 (B_N_PDELIM_KEY 640 (tb->CFL[0], 641 tb->lkey[0]), 642 tbS0->b_size), 643 "PAP-12120: item must be merge-able with left neighboring item"); 644 } else { /* only part of the appended item will be in L[0] */ 645 646 /* Calculate position in item for append in S[0] */ 647 pos_in_item -= 648 tb->lbytes; 649 650 RFALSE(pos_in_item <= 0, 651 "PAP-12125: no place for paste. pos_in_item=%d", 652 pos_in_item); 653 654 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 655 leaf_shift_left(tb, 656 tb-> 657 lnum[0], 658 tb-> 659 lbytes); 660 } 661 } 662 } else { /* appended item will be in L[0] in whole */ 663 664 struct item_head *pasted; 665 666 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ 667 /* then increment pos_in_item by the size of the last item in L[0] */ 668 pasted = 669 B_N_PITEM_HEAD(tb->L[0], 670 n - 1); 671 if (is_direntry_le_ih(pasted)) 672 pos_in_item += 673 ih_entry_count 674 (pasted); 675 else 676 pos_in_item += 677 ih_item_len(pasted); 678 } 679 680 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 681 ret_val = 682 leaf_shift_left(tb, tb->lnum[0], 683 tb->lbytes); 684 /* Append to body of item in L[0] */ 685 buffer_info_init_left(tb, &bi); 686 leaf_paste_in_buffer(&bi, 687 n + item_pos - 688 ret_val, 689 pos_in_item, 690 tb->insert_size[0], 691 body, zeros_num); 692 693 /* if appended item is directory, paste entry */ 694 pasted = 695 B_N_PITEM_HEAD(tb->L[0], 696 n + item_pos - 697 ret_val); 698 if (is_direntry_le_ih(pasted)) 699 leaf_paste_entries(&bi, 700 n + 701 item_pos - 702 ret_val, 703 pos_in_item, 704 1, 705 (struct 706 reiserfs_de_head 707 *)body, 708 body + 709 DEH_SIZE, 710 tb-> 711 insert_size 712 [0] 713 ); 714 /* if appended item is indirect item, put unformatted node into un list */ 715 if (is_indirect_le_ih(pasted)) 716 set_ih_free_space(pasted, 0); 717 tb->insert_size[0] = 0; 718 zeros_num = 0; 719 } 720 break; 721 default: /* cases d and t */ 722 reiserfs_panic(tb->tb_sb, "PAP-12130", 723 "lnum > 0: unexpected mode: " 724 " %s(%d)", 725 (flag == 726 M_DELETE) ? "DELETE" : ((flag == 727 M_CUT) 728 ? "CUT" 729 : 730 "UNKNOWN"), 731 flag); 732 } 733 } else { 734 /* new item doesn't fall into L[0] */ 735 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 736 } 737 } 738 739 /* tb->lnum[0] > 0 */ 740 /* Calculate new item position */ 741 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); 742 743 if (tb->rnum[0] > 0) { 744 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 745 n = B_NR_ITEMS(tbS0); 746 switch (flag) { 747 748 case M_INSERT: /* insert item */ 749 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ 750 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ 751 loff_t old_key_comp, old_len, 752 r_zeros_number; 753 const char *r_body; 754 int version; 755 loff_t offset; 756 757 leaf_shift_right(tb, tb->rnum[0] - 1, 758 -1); 759 760 version = ih_version(ih); 761 /* Remember key component and item length */ 762 old_key_comp = le_ih_k_offset(ih); 763 old_len = ih_item_len(ih); 764 765 /* Calculate key component and item length to insert into R[0] */ 766 offset = 767 le_ih_k_offset(ih) + 768 ((old_len - 769 tb-> 770 rbytes) << (is_indirect_le_ih(ih) 771 ? tb->tb_sb-> 772 s_blocksize_bits - 773 UNFM_P_SHIFT : 0)); 774 set_le_ih_k_offset(ih, offset); 775 put_ih_item_len(ih, tb->rbytes); 776 /* Insert part of the item into R[0] */ 777 buffer_info_init_right(tb, &bi); 778 if ((old_len - tb->rbytes) > zeros_num) { 779 r_zeros_number = 0; 780 r_body = 781 body + (old_len - 782 tb->rbytes) - 783 zeros_num; 784 } else { 785 r_body = body; 786 r_zeros_number = 787 zeros_num - (old_len - 788 tb->rbytes); 789 zeros_num -= r_zeros_number; 790 } 791 792 leaf_insert_into_buf(&bi, 0, ih, r_body, 793 r_zeros_number); 794 795 /* Replace right delimiting key by first key in R[0] */ 796 replace_key(tb, tb->CFR[0], tb->rkey[0], 797 tb->R[0], 0); 798 799 /* Calculate key component and item length to insert into S[0] */ 800 set_le_ih_k_offset(ih, old_key_comp); 801 put_ih_item_len(ih, 802 old_len - tb->rbytes); 803 804 tb->insert_size[0] -= tb->rbytes; 805 806 } else { /* whole new item falls into R[0] */ 807 808 /* Shift rnum[0]-1 items to R[0] */ 809 ret_val = 810 leaf_shift_right(tb, 811 tb->rnum[0] - 1, 812 tb->rbytes); 813 /* Insert new item into R[0] */ 814 buffer_info_init_right(tb, &bi); 815 leaf_insert_into_buf(&bi, 816 item_pos - n + 817 tb->rnum[0] - 1, 818 ih, body, 819 zeros_num); 820 821 if (item_pos - n + tb->rnum[0] - 1 == 0) { 822 replace_key(tb, tb->CFR[0], 823 tb->rkey[0], 824 tb->R[0], 0); 825 826 } 827 zeros_num = tb->insert_size[0] = 0; 828 } 829 } else { /* new item or part of it doesn't fall into R[0] */ 830 831 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 832 } 833 break; 834 835 case M_PASTE: /* append item */ 836 837 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ 838 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ 839 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ 840 int entry_count; 841 842 RFALSE(zeros_num, 843 "PAP-12145: invalid parameter in case of a directory"); 844 entry_count = 845 I_ENTRY_COUNT(B_N_PITEM_HEAD 846 (tbS0, 847 item_pos)); 848 if (entry_count - tb->rbytes < 849 pos_in_item) 850 /* new directory entry falls into R[0] */ 851 { 852 int paste_entry_position; 853 854 RFALSE(tb->rbytes - 1 >= 855 entry_count 856 || !tb-> 857 insert_size[0], 858 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", 859 tb->rbytes, 860 entry_count); 861 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ 862 leaf_shift_right(tb, 863 tb-> 864 rnum 865 [0], 866 tb-> 867 rbytes 868 - 1); 869 /* Paste given directory entry to directory item */ 870 paste_entry_position = 871 pos_in_item - 872 entry_count + 873 tb->rbytes - 1; 874 buffer_info_init_right(tb, &bi); 875 leaf_paste_in_buffer 876 (&bi, 0, 877 paste_entry_position, 878 tb->insert_size[0], 879 body, zeros_num); 880 /* paste entry */ 881 leaf_paste_entries(&bi, 882 0, 883 paste_entry_position, 884 1, 885 (struct 886 reiserfs_de_head 887 *) 888 body, 889 body 890 + 891 DEH_SIZE, 892 tb-> 893 insert_size 894 [0] 895 ); 896 897 if (paste_entry_position 898 == 0) { 899 /* change delimiting keys */ 900 replace_key(tb, 901 tb-> 902 CFR 903 [0], 904 tb-> 905 rkey 906 [0], 907 tb-> 908 R 909 [0], 910 0); 911 } 912 913 tb->insert_size[0] = 0; 914 pos_in_item++; 915 } else { /* new directory entry doesn't fall into R[0] */ 916 917 leaf_shift_right(tb, 918 tb-> 919 rnum 920 [0], 921 tb-> 922 rbytes); 923 } 924 } else { /* regular object */ 925 926 int n_shift, n_rem, 927 r_zeros_number; 928 const char *r_body; 929 930 /* Calculate number of bytes which must be shifted from appended item */ 931 if ((n_shift = 932 tb->rbytes - 933 tb->insert_size[0]) < 0) 934 n_shift = 0; 935 936 RFALSE(pos_in_item != 937 ih_item_len 938 (B_N_PITEM_HEAD 939 (tbS0, item_pos)), 940 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", 941 pos_in_item, 942 ih_item_len 943 (B_N_PITEM_HEAD 944 (tbS0, item_pos))); 945 946 leaf_shift_right(tb, 947 tb->rnum[0], 948 n_shift); 949 /* Calculate number of bytes which must remain in body after appending to R[0] */ 950 if ((n_rem = 951 tb->insert_size[0] - 952 tb->rbytes) < 0) 953 n_rem = 0; 954 955 { 956 int version; 957 unsigned long temp_rem = 958 n_rem; 959 960 version = 961 ih_version 962 (B_N_PITEM_HEAD 963 (tb->R[0], 0)); 964 if (is_indirect_le_key 965 (version, 966 B_N_PKEY(tb->R[0], 967 0))) { 968 temp_rem = 969 n_rem << 970 (tb->tb_sb-> 971 s_blocksize_bits 972 - 973 UNFM_P_SHIFT); 974 } 975 set_le_key_k_offset 976 (version, 977 B_N_PKEY(tb->R[0], 978 0), 979 le_key_k_offset 980 (version, 981 B_N_PKEY(tb->R[0], 982 0)) + 983 temp_rem); 984 set_le_key_k_offset 985 (version, 986 B_N_PDELIM_KEY(tb-> 987 CFR 988 [0], 989 tb-> 990 rkey 991 [0]), 992 le_key_k_offset 993 (version, 994 B_N_PDELIM_KEY 995 (tb->CFR[0], 996 tb->rkey[0])) + 997 temp_rem); 998 } 999 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 1000 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 1001 do_balance_mark_internal_dirty 1002 (tb, tb->CFR[0], 0); 1003 1004 /* Append part of body into R[0] */ 1005 buffer_info_init_right(tb, &bi); 1006 if (n_rem > zeros_num) { 1007 r_zeros_number = 0; 1008 r_body = 1009 body + n_rem - 1010 zeros_num; 1011 } else { 1012 r_body = body; 1013 r_zeros_number = 1014 zeros_num - n_rem; 1015 zeros_num -= 1016 r_zeros_number; 1017 } 1018 1019 leaf_paste_in_buffer(&bi, 0, 1020 n_shift, 1021 tb-> 1022 insert_size 1023 [0] - 1024 n_rem, 1025 r_body, 1026 r_zeros_number); 1027 1028 if (is_indirect_le_ih 1029 (B_N_PITEM_HEAD 1030 (tb->R[0], 0))) { 1031 #if 0 1032 RFALSE(n_rem, 1033 "PAP-12160: paste more than one unformatted node pointer"); 1034 #endif 1035 set_ih_free_space 1036 (B_N_PITEM_HEAD 1037 (tb->R[0], 0), 0); 1038 } 1039 tb->insert_size[0] = n_rem; 1040 if (!n_rem) 1041 pos_in_item++; 1042 } 1043 } else { /* pasted item in whole falls into R[0] */ 1044 1045 struct item_head *pasted; 1046 1047 ret_val = 1048 leaf_shift_right(tb, tb->rnum[0], 1049 tb->rbytes); 1050 /* append item in R[0] */ 1051 if (pos_in_item >= 0) { 1052 buffer_info_init_right(tb, &bi); 1053 leaf_paste_in_buffer(&bi, 1054 item_pos - 1055 n + 1056 tb-> 1057 rnum[0], 1058 pos_in_item, 1059 tb-> 1060 insert_size 1061 [0], body, 1062 zeros_num); 1063 } 1064 1065 /* paste new entry, if item is directory item */ 1066 pasted = 1067 B_N_PITEM_HEAD(tb->R[0], 1068 item_pos - n + 1069 tb->rnum[0]); 1070 if (is_direntry_le_ih(pasted) 1071 && pos_in_item >= 0) { 1072 leaf_paste_entries(&bi, 1073 item_pos - 1074 n + 1075 tb->rnum[0], 1076 pos_in_item, 1077 1, 1078 (struct 1079 reiserfs_de_head 1080 *)body, 1081 body + 1082 DEH_SIZE, 1083 tb-> 1084 insert_size 1085 [0] 1086 ); 1087 if (!pos_in_item) { 1088 1089 RFALSE(item_pos - n + 1090 tb->rnum[0], 1091 "PAP-12165: directory item must be first item of node when pasting is in 0th position"); 1092 1093 /* update delimiting keys */ 1094 replace_key(tb, 1095 tb->CFR[0], 1096 tb->rkey[0], 1097 tb->R[0], 1098 0); 1099 } 1100 } 1101 1102 if (is_indirect_le_ih(pasted)) 1103 set_ih_free_space(pasted, 0); 1104 zeros_num = tb->insert_size[0] = 0; 1105 } 1106 } else { /* new item doesn't fall into R[0] */ 1107 1108 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 1109 } 1110 break; 1111 default: /* cases d and t */ 1112 reiserfs_panic(tb->tb_sb, "PAP-12175", 1113 "rnum > 0: unexpected mode: %s(%d)", 1114 (flag == 1115 M_DELETE) ? "DELETE" : ((flag == 1116 M_CUT) ? "CUT" 1117 : "UNKNOWN"), 1118 flag); 1119 } 1120 1121 } 1122 1123 /* tb->rnum[0] > 0 */ 1124 RFALSE(tb->blknum[0] > 3, 1125 "PAP-12180: blknum can not be %d. It must be <= 3", 1126 tb->blknum[0]); 1127 RFALSE(tb->blknum[0] < 0, 1128 "PAP-12185: blknum can not be %d. It must be >= 0", 1129 tb->blknum[0]); 1130 1131 /* if while adding to a node we discover that it is possible to split 1132 it in two, and merge the left part into the left neighbor and the 1133 right part into the right neighbor, eliminating the node */ 1134 if (tb->blknum[0] == 0) { /* node S[0] is empty now */ 1135 1136 RFALSE(!tb->lnum[0] || !tb->rnum[0], 1137 "PAP-12190: lnum and rnum must not be zero"); 1138 /* if insertion was done before 0-th position in R[0], right 1139 delimiting key of the tb->L[0]'s and left delimiting key are 1140 not set correctly */ 1141 if (tb->CFL[0]) { 1142 if (!tb->CFR[0]) 1143 reiserfs_panic(tb->tb_sb, "vs-12195", 1144 "CFR not initialized"); 1145 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 1146 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); 1147 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 1148 } 1149 1150 reiserfs_invalidate_buffer(tb, tbS0); 1151 return 0; 1152 } 1153 1154 /* Fill new nodes that appear in place of S[0] */ 1155 1156 /* I am told that this copying is because we need an array to enable 1157 the looping code. -Hans */ 1158 snum[0] = tb->s1num, snum[1] = tb->s2num; 1159 sbytes[0] = tb->s1bytes; 1160 sbytes[1] = tb->s2bytes; 1161 for (i = tb->blknum[0] - 2; i >= 0; i--) { 1162 1163 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, 1164 snum[i]); 1165 1166 /* here we shift from S to S_new nodes */ 1167 1168 S_new[i] = get_FEB(tb); 1169 1170 /* initialized block type and tree level */ 1171 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); 1172 1173 n = B_NR_ITEMS(tbS0); 1174 1175 switch (flag) { 1176 case M_INSERT: /* insert item */ 1177 1178 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ 1179 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ 1180 int old_key_comp, old_len, 1181 r_zeros_number; 1182 const char *r_body; 1183 int version; 1184 1185 /* Move snum[i]-1 items from S[0] to S_new[i] */ 1186 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1187 snum[i] - 1, -1, 1188 S_new[i]); 1189 /* Remember key component and item length */ 1190 version = ih_version(ih); 1191 old_key_comp = le_ih_k_offset(ih); 1192 old_len = ih_item_len(ih); 1193 1194 /* Calculate key component and item length to insert into S_new[i] */ 1195 set_le_ih_k_offset(ih, 1196 le_ih_k_offset(ih) + 1197 ((old_len - 1198 sbytes[i]) << 1199 (is_indirect_le_ih 1200 (ih) ? tb->tb_sb-> 1201 s_blocksize_bits - 1202 UNFM_P_SHIFT : 1203 0))); 1204 1205 put_ih_item_len(ih, sbytes[i]); 1206 1207 /* Insert part of the item into S_new[i] before 0-th item */ 1208 buffer_info_init_bh(tb, &bi, S_new[i]); 1209 1210 if ((old_len - sbytes[i]) > zeros_num) { 1211 r_zeros_number = 0; 1212 r_body = 1213 body + (old_len - 1214 sbytes[i]) - 1215 zeros_num; 1216 } else { 1217 r_body = body; 1218 r_zeros_number = 1219 zeros_num - (old_len - 1220 sbytes[i]); 1221 zeros_num -= r_zeros_number; 1222 } 1223 1224 leaf_insert_into_buf(&bi, 0, ih, r_body, 1225 r_zeros_number); 1226 1227 /* Calculate key component and item length to insert into S[i] */ 1228 set_le_ih_k_offset(ih, old_key_comp); 1229 put_ih_item_len(ih, 1230 old_len - sbytes[i]); 1231 tb->insert_size[0] -= sbytes[i]; 1232 } else { /* whole new item falls into S_new[i] */ 1233 1234 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ 1235 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1236 snum[i] - 1, sbytes[i], 1237 S_new[i]); 1238 1239 /* Insert new item into S_new[i] */ 1240 buffer_info_init_bh(tb, &bi, S_new[i]); 1241 leaf_insert_into_buf(&bi, 1242 item_pos - n + 1243 snum[i] - 1, ih, 1244 body, zeros_num); 1245 1246 zeros_num = tb->insert_size[0] = 0; 1247 } 1248 } 1249 1250 else { /* new item or it part don't falls into S_new[i] */ 1251 1252 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1253 snum[i], sbytes[i], S_new[i]); 1254 } 1255 break; 1256 1257 case M_PASTE: /* append item */ 1258 1259 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ 1260 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ 1261 struct item_head *aux_ih; 1262 1263 RFALSE(ih, "PAP-12210: ih must be 0"); 1264 1265 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); 1266 if (is_direntry_le_ih(aux_ih)) { 1267 /* we append to directory item */ 1268 1269 int entry_count; 1270 1271 entry_count = 1272 ih_entry_count(aux_ih); 1273 1274 if (entry_count - sbytes[i] < 1275 pos_in_item 1276 && pos_in_item <= 1277 entry_count) { 1278 /* new directory entry falls into S_new[i] */ 1279 1280 RFALSE(!tb-> 1281 insert_size[0], 1282 "PAP-12215: insert_size is already 0"); 1283 RFALSE(sbytes[i] - 1 >= 1284 entry_count, 1285 "PAP-12220: there are no so much entries (%d), only %d", 1286 sbytes[i] - 1, 1287 entry_count); 1288 1289 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ 1290 leaf_move_items 1291 (LEAF_FROM_S_TO_SNEW, 1292 tb, snum[i], 1293 sbytes[i] - 1, 1294 S_new[i]); 1295 /* Paste given directory entry to directory item */ 1296 buffer_info_init_bh(tb, &bi, S_new[i]); 1297 leaf_paste_in_buffer 1298 (&bi, 0, 1299 pos_in_item - 1300 entry_count + 1301 sbytes[i] - 1, 1302 tb->insert_size[0], 1303 body, zeros_num); 1304 /* paste new directory entry */ 1305 leaf_paste_entries(&bi, 1306 0, 1307 pos_in_item 1308 - 1309 entry_count 1310 + 1311 sbytes 1312 [i] - 1313 1, 1, 1314 (struct 1315 reiserfs_de_head 1316 *) 1317 body, 1318 body 1319 + 1320 DEH_SIZE, 1321 tb-> 1322 insert_size 1323 [0] 1324 ); 1325 tb->insert_size[0] = 0; 1326 pos_in_item++; 1327 } else { /* new directory entry doesn't fall into S_new[i] */ 1328 leaf_move_items 1329 (LEAF_FROM_S_TO_SNEW, 1330 tb, snum[i], 1331 sbytes[i], 1332 S_new[i]); 1333 } 1334 } else { /* regular object */ 1335 1336 int n_shift, n_rem, 1337 r_zeros_number; 1338 const char *r_body; 1339 1340 RFALSE(pos_in_item != 1341 ih_item_len 1342 (B_N_PITEM_HEAD 1343 (tbS0, item_pos)) 1344 || tb->insert_size[0] <= 1345 0, 1346 "PAP-12225: item too short or insert_size <= 0"); 1347 1348 /* Calculate number of bytes which must be shifted from appended item */ 1349 n_shift = 1350 sbytes[i] - 1351 tb->insert_size[0]; 1352 if (n_shift < 0) 1353 n_shift = 0; 1354 leaf_move_items 1355 (LEAF_FROM_S_TO_SNEW, tb, 1356 snum[i], n_shift, 1357 S_new[i]); 1358 1359 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 1360 n_rem = 1361 tb->insert_size[0] - 1362 sbytes[i]; 1363 if (n_rem < 0) 1364 n_rem = 0; 1365 /* Append part of body into S_new[0] */ 1366 buffer_info_init_bh(tb, &bi, S_new[i]); 1367 if (n_rem > zeros_num) { 1368 r_zeros_number = 0; 1369 r_body = 1370 body + n_rem - 1371 zeros_num; 1372 } else { 1373 r_body = body; 1374 r_zeros_number = 1375 zeros_num - n_rem; 1376 zeros_num -= 1377 r_zeros_number; 1378 } 1379 1380 leaf_paste_in_buffer(&bi, 0, 1381 n_shift, 1382 tb-> 1383 insert_size 1384 [0] - 1385 n_rem, 1386 r_body, 1387 r_zeros_number); 1388 { 1389 struct item_head *tmp; 1390 1391 tmp = 1392 B_N_PITEM_HEAD(S_new 1393 [i], 1394 0); 1395 if (is_indirect_le_ih 1396 (tmp)) { 1397 set_ih_free_space 1398 (tmp, 0); 1399 set_le_ih_k_offset 1400 (tmp, 1401 le_ih_k_offset 1402 (tmp) + 1403 (n_rem << 1404 (tb-> 1405 tb_sb-> 1406 s_blocksize_bits 1407 - 1408 UNFM_P_SHIFT))); 1409 } else { 1410 set_le_ih_k_offset 1411 (tmp, 1412 le_ih_k_offset 1413 (tmp) + 1414 n_rem); 1415 } 1416 } 1417 1418 tb->insert_size[0] = n_rem; 1419 if (!n_rem) 1420 pos_in_item++; 1421 } 1422 } else 1423 /* item falls wholly into S_new[i] */ 1424 { 1425 int leaf_mi; 1426 struct item_head *pasted; 1427 1428 #ifdef CONFIG_REISERFS_CHECK 1429 struct item_head *ih_check = 1430 B_N_PITEM_HEAD(tbS0, item_pos); 1431 1432 if (!is_direntry_le_ih(ih_check) 1433 && (pos_in_item != ih_item_len(ih_check) 1434 || tb->insert_size[0] <= 0)) 1435 reiserfs_panic(tb->tb_sb, 1436 "PAP-12235", 1437 "pos_in_item " 1438 "must be equal " 1439 "to ih_item_len"); 1440 #endif /* CONFIG_REISERFS_CHECK */ 1441 1442 leaf_mi = 1443 leaf_move_items(LEAF_FROM_S_TO_SNEW, 1444 tb, snum[i], 1445 sbytes[i], 1446 S_new[i]); 1447 1448 RFALSE(leaf_mi, 1449 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1450 leaf_mi); 1451 1452 /* paste into item */ 1453 buffer_info_init_bh(tb, &bi, S_new[i]); 1454 leaf_paste_in_buffer(&bi, 1455 item_pos - n + 1456 snum[i], 1457 pos_in_item, 1458 tb->insert_size[0], 1459 body, zeros_num); 1460 1461 pasted = 1462 B_N_PITEM_HEAD(S_new[i], 1463 item_pos - n + 1464 snum[i]); 1465 if (is_direntry_le_ih(pasted)) { 1466 leaf_paste_entries(&bi, 1467 item_pos - 1468 n + snum[i], 1469 pos_in_item, 1470 1, 1471 (struct 1472 reiserfs_de_head 1473 *)body, 1474 body + 1475 DEH_SIZE, 1476 tb-> 1477 insert_size 1478 [0] 1479 ); 1480 } 1481 1482 /* if we paste to indirect item update ih_free_space */ 1483 if (is_indirect_le_ih(pasted)) 1484 set_ih_free_space(pasted, 0); 1485 zeros_num = tb->insert_size[0] = 0; 1486 } 1487 } 1488 1489 else { /* pasted item doesn't fall into S_new[i] */ 1490 1491 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1492 snum[i], sbytes[i], S_new[i]); 1493 } 1494 break; 1495 default: /* cases d and t */ 1496 reiserfs_panic(tb->tb_sb, "PAP-12245", 1497 "blknum > 2: unexpected mode: %s(%d)", 1498 (flag == 1499 M_DELETE) ? "DELETE" : ((flag == 1500 M_CUT) ? "CUT" 1501 : "UNKNOWN"), 1502 flag); 1503 } 1504 1505 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); 1506 insert_ptr[i] = S_new[i]; 1507 1508 RFALSE(!buffer_journaled(S_new[i]) 1509 || buffer_journal_dirty(S_new[i]) 1510 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", 1511 i, S_new[i]); 1512 } 1513 1514 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1515 affected item which remains in S */ 1516 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ 1517 1518 switch (flag) { 1519 case M_INSERT: /* insert item into S[0] */ 1520 buffer_info_init_tbS0(tb, &bi); 1521 leaf_insert_into_buf(&bi, item_pos, ih, body, 1522 zeros_num); 1523 1524 /* If we insert the first key change the delimiting key */ 1525 if (item_pos == 0) { 1526 if (tb->CFL[0]) /* can be 0 in reiserfsck */ 1527 replace_key(tb, tb->CFL[0], tb->lkey[0], 1528 tbS0, 0); 1529 1530 } 1531 break; 1532 1533 case M_PASTE:{ /* append item in S[0] */ 1534 struct item_head *pasted; 1535 1536 pasted = B_N_PITEM_HEAD(tbS0, item_pos); 1537 /* when directory, may be new entry already pasted */ 1538 if (is_direntry_le_ih(pasted)) { 1539 if (pos_in_item >= 0 && 1540 pos_in_item <= 1541 ih_entry_count(pasted)) { 1542 1543 RFALSE(!tb->insert_size[0], 1544 "PAP-12260: insert_size is 0 already"); 1545 1546 /* prepare space */ 1547 buffer_info_init_tbS0(tb, &bi); 1548 leaf_paste_in_buffer(&bi, 1549 item_pos, 1550 pos_in_item, 1551 tb-> 1552 insert_size 1553 [0], body, 1554 zeros_num); 1555 1556 /* paste entry */ 1557 leaf_paste_entries(&bi, 1558 item_pos, 1559 pos_in_item, 1560 1, 1561 (struct 1562 reiserfs_de_head 1563 *)body, 1564 body + 1565 DEH_SIZE, 1566 tb-> 1567 insert_size 1568 [0] 1569 ); 1570 if (!item_pos && !pos_in_item) { 1571 RFALSE(!tb->CFL[0] 1572 || !tb->L[0], 1573 "PAP-12270: CFL[0]/L[0] must be specified"); 1574 if (tb->CFL[0]) { 1575 replace_key(tb, 1576 tb-> 1577 CFL 1578 [0], 1579 tb-> 1580 lkey 1581 [0], 1582 tbS0, 1583 0); 1584 1585 } 1586 } 1587 tb->insert_size[0] = 0; 1588 } 1589 } else { /* regular object */ 1590 if (pos_in_item == ih_item_len(pasted)) { 1591 1592 RFALSE(tb->insert_size[0] <= 0, 1593 "PAP-12275: insert size must not be %d", 1594 tb->insert_size[0]); 1595 buffer_info_init_tbS0(tb, &bi); 1596 leaf_paste_in_buffer(&bi, 1597 item_pos, 1598 pos_in_item, 1599 tb-> 1600 insert_size 1601 [0], body, 1602 zeros_num); 1603 1604 if (is_indirect_le_ih(pasted)) { 1605 #if 0 1606 RFALSE(tb-> 1607 insert_size[0] != 1608 UNFM_P_SIZE, 1609 "PAP-12280: insert_size for indirect item must be %d, not %d", 1610 UNFM_P_SIZE, 1611 tb-> 1612 insert_size[0]); 1613 #endif 1614 set_ih_free_space 1615 (pasted, 0); 1616 } 1617 tb->insert_size[0] = 0; 1618 } 1619 #ifdef CONFIG_REISERFS_CHECK 1620 else { 1621 if (tb->insert_size[0]) { 1622 print_cur_tb("12285"); 1623 reiserfs_panic(tb-> 1624 tb_sb, 1625 "PAP-12285", 1626 "insert_size " 1627 "must be 0 " 1628 "(%d)", 1629 tb->insert_size[0]); 1630 } 1631 } 1632 #endif /* CONFIG_REISERFS_CHECK */ 1633 1634 } 1635 } /* case M_PASTE: */ 1636 } 1637 } 1638 #ifdef CONFIG_REISERFS_CHECK 1639 if (flag == M_PASTE && tb->insert_size[0]) { 1640 print_cur_tb("12290"); 1641 reiserfs_panic(tb->tb_sb, 1642 "PAP-12290", "insert_size is still not 0 (%d)", 1643 tb->insert_size[0]); 1644 } 1645 #endif /* CONFIG_REISERFS_CHECK */ 1646 return 0; 1647 } /* Leaf level of the tree is balanced (end of balance_leaf) */ 1648 1649 /* Make empty node */ 1650 void make_empty_node(struct buffer_info *bi) 1651 { 1652 struct block_head *blkh; 1653 1654 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1655 1656 blkh = B_BLK_HEAD(bi->bi_bh); 1657 set_blkh_nr_item(blkh, 0); 1658 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); 1659 1660 if (bi->bi_parent) 1661 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1662 } 1663 1664 /* Get first empty buffer */ 1665 struct buffer_head *get_FEB(struct tree_balance *tb) 1666 { 1667 int i; 1668 struct buffer_info bi; 1669 1670 for (i = 0; i < MAX_FEB_SIZE; i++) 1671 if (tb->FEB[i] != NULL) 1672 break; 1673 1674 if (i == MAX_FEB_SIZE) 1675 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); 1676 1677 buffer_info_init_bh(tb, &bi, tb->FEB[i]); 1678 make_empty_node(&bi); 1679 set_buffer_uptodate(tb->FEB[i]); 1680 tb->used[i] = tb->FEB[i]; 1681 tb->FEB[i] = NULL; 1682 1683 return tb->used[i]; 1684 } 1685 1686 /* This is now used because reiserfs_free_block has to be able to 1687 ** schedule. 1688 */ 1689 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1690 { 1691 int i; 1692 1693 if (buffer_dirty(bh)) 1694 reiserfs_warning(tb->tb_sb, "reiserfs-12320", 1695 "called with dirty buffer"); 1696 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) 1697 if (!tb->thrown[i]) { 1698 tb->thrown[i] = bh; 1699 get_bh(bh); /* free_thrown puts this */ 1700 return; 1701 } 1702 reiserfs_warning(tb->tb_sb, "reiserfs-12321", 1703 "too many thrown buffers"); 1704 } 1705 1706 static void free_thrown(struct tree_balance *tb) 1707 { 1708 int i; 1709 b_blocknr_t blocknr; 1710 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { 1711 if (tb->thrown[i]) { 1712 blocknr = tb->thrown[i]->b_blocknr; 1713 if (buffer_dirty(tb->thrown[i])) 1714 reiserfs_warning(tb->tb_sb, "reiserfs-12322", 1715 "called with dirty buffer %d", 1716 blocknr); 1717 brelse(tb->thrown[i]); /* incremented in store_thrown */ 1718 reiserfs_free_block(tb->transaction_handle, NULL, 1719 blocknr, 0); 1720 } 1721 } 1722 } 1723 1724 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) 1725 { 1726 struct block_head *blkh; 1727 blkh = B_BLK_HEAD(bh); 1728 set_blkh_level(blkh, FREE_LEVEL); 1729 set_blkh_nr_item(blkh, 0); 1730 1731 clear_buffer_dirty(bh); 1732 store_thrown(tb, bh); 1733 } 1734 1735 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1736 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, 1737 struct buffer_head *src, int n_src) 1738 { 1739 1740 RFALSE(dest == NULL || src == NULL, 1741 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1742 src, dest); 1743 RFALSE(!B_IS_KEYS_LEVEL(dest), 1744 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1745 dest); 1746 RFALSE(n_dest < 0 || n_src < 0, 1747 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1748 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1749 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1750 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1751 1752 if (B_IS_ITEMS_LEVEL(src)) 1753 /* source buffer contains leaf node */ 1754 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), 1755 KEY_SIZE); 1756 else 1757 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), 1758 KEY_SIZE); 1759 1760 do_balance_mark_internal_dirty(tb, dest, 0); 1761 } 1762 1763 int get_left_neighbor_position(struct tree_balance *tb, int h) 1764 { 1765 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1766 1767 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, 1768 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1769 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); 1770 1771 if (Sh_position == 0) 1772 return B_NR_ITEMS(tb->FL[h]); 1773 else 1774 return Sh_position - 1; 1775 } 1776 1777 int get_right_neighbor_position(struct tree_balance *tb, int h) 1778 { 1779 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1780 1781 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, 1782 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1783 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); 1784 1785 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) 1786 return 0; 1787 else 1788 return Sh_position + 1; 1789 } 1790 1791 #ifdef CONFIG_REISERFS_CHECK 1792 1793 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); 1794 static void check_internal_node(struct super_block *s, struct buffer_head *bh, 1795 char *mes) 1796 { 1797 struct disk_child *dc; 1798 int i; 1799 1800 RFALSE(!bh, "PAP-12336: bh == 0"); 1801 1802 if (!bh || !B_IS_IN_TREE(bh)) 1803 return; 1804 1805 RFALSE(!buffer_dirty(bh) && 1806 !(buffer_journaled(bh) || buffer_journal_dirty(bh)), 1807 "PAP-12337: buffer (%b) must be dirty", bh); 1808 dc = B_N_CHILD(bh, 0); 1809 1810 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { 1811 if (!is_reusable(s, dc_block_number(dc), 1)) { 1812 print_cur_tb(mes); 1813 reiserfs_panic(s, "PAP-12338", 1814 "invalid child pointer %y in %b", 1815 dc, bh); 1816 } 1817 } 1818 } 1819 1820 static int locked_or_not_in_tree(struct tree_balance *tb, 1821 struct buffer_head *bh, char *which) 1822 { 1823 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || 1824 !B_IS_IN_TREE(bh)) { 1825 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); 1826 return 1; 1827 } 1828 return 0; 1829 } 1830 1831 static int check_before_balancing(struct tree_balance *tb) 1832 { 1833 int retval = 0; 1834 1835 if (REISERFS_SB(tb->tb_sb)->cur_tb) { 1836 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1837 "occurred based on cur_tb not being null at " 1838 "this point in code. do_balance cannot properly " 1839 "handle concurrent tree accesses on a same " 1840 "mount point."); 1841 } 1842 1843 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1844 prepped all of these for us). */ 1845 if (tb->lnum[0]) { 1846 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1847 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1848 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); 1849 check_leaf(tb->L[0]); 1850 } 1851 if (tb->rnum[0]) { 1852 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); 1853 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); 1854 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); 1855 check_leaf(tb->R[0]); 1856 } 1857 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), 1858 "S[0]"); 1859 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1860 1861 return retval; 1862 } 1863 1864 static void check_after_balance_leaf(struct tree_balance *tb) 1865 { 1866 if (tb->lnum[0]) { 1867 if (B_FREE_SPACE(tb->L[0]) != 1868 MAX_CHILD_SIZE(tb->L[0]) - 1869 dc_size(B_N_CHILD 1870 (tb->FL[0], get_left_neighbor_position(tb, 0)))) { 1871 print_cur_tb("12221"); 1872 reiserfs_panic(tb->tb_sb, "PAP-12355", 1873 "shift to left was incorrect"); 1874 } 1875 } 1876 if (tb->rnum[0]) { 1877 if (B_FREE_SPACE(tb->R[0]) != 1878 MAX_CHILD_SIZE(tb->R[0]) - 1879 dc_size(B_N_CHILD 1880 (tb->FR[0], get_right_neighbor_position(tb, 0)))) { 1881 print_cur_tb("12222"); 1882 reiserfs_panic(tb->tb_sb, "PAP-12360", 1883 "shift to right was incorrect"); 1884 } 1885 } 1886 if (PATH_H_PBUFFER(tb->tb_path, 1) && 1887 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != 1888 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1889 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1890 PATH_H_POSITION(tb->tb_path, 1)))))) { 1891 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); 1892 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1893 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1894 PATH_H_POSITION(tb->tb_path, 1895 1)))); 1896 print_cur_tb("12223"); 1897 reiserfs_warning(tb->tb_sb, "reiserfs-12363", 1898 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1899 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1900 left, 1901 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), 1902 PATH_H_PBUFFER(tb->tb_path, 1), 1903 PATH_H_POSITION(tb->tb_path, 1), 1904 dc_size(B_N_CHILD 1905 (PATH_H_PBUFFER(tb->tb_path, 1), 1906 PATH_H_POSITION(tb->tb_path, 1))), 1907 right); 1908 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); 1909 } 1910 } 1911 1912 static void check_leaf_level(struct tree_balance *tb) 1913 { 1914 check_leaf(tb->L[0]); 1915 check_leaf(tb->R[0]); 1916 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1917 } 1918 1919 static void check_internal_levels(struct tree_balance *tb) 1920 { 1921 int h; 1922 1923 /* check all internal nodes */ 1924 for (h = 1; tb->insert_size[h]; h++) { 1925 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), 1926 "BAD BUFFER ON PATH"); 1927 if (tb->lnum[h]) 1928 check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); 1929 if (tb->rnum[h]) 1930 check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); 1931 } 1932 1933 } 1934 1935 #endif 1936 1937 /* Now we have all of the buffers that must be used in balancing of 1938 the tree. We rely on the assumption that schedule() will not occur 1939 while do_balance works. ( Only interrupt handlers are acceptable.) 1940 We balance the tree according to the analysis made before this, 1941 using buffers already obtained. For SMP support it will someday be 1942 necessary to add ordered locking of tb. */ 1943 1944 /* Some interesting rules of balancing: 1945 1946 we delete a maximum of two nodes per level per balancing: we never 1947 delete R, when we delete two of three nodes L, S, R then we move 1948 them into R. 1949 1950 we only delete L if we are deleting two nodes, if we delete only 1951 one node we delete S 1952 1953 if we shift leaves then we shift as much as we can: this is a 1954 deliberate policy of extremism in node packing which results in 1955 higher average utilization after repeated random balance operations 1956 at the cost of more memory copies and more balancing as a result of 1957 small insertions to full nodes. 1958 1959 if we shift internal nodes we try to evenly balance the node 1960 utilization, with consequent less balancing at the cost of lower 1961 utilization. 1962 1963 one could argue that the policy for directories in leaves should be 1964 that of internal nodes, but we will wait until another day to 1965 evaluate this.... It would be nice to someday measure and prove 1966 these assumptions as to what is optimal.... 1967 1968 */ 1969 1970 static inline void do_balance_starts(struct tree_balance *tb) 1971 { 1972 /* use print_cur_tb() to see initial state of struct 1973 tree_balance */ 1974 1975 /* store_print_tb (tb); */ 1976 1977 /* do not delete, just comment it out */ 1978 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1979 "check");*/ 1980 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1981 #ifdef CONFIG_REISERFS_CHECK 1982 REISERFS_SB(tb->tb_sb)->cur_tb = tb; 1983 #endif 1984 } 1985 1986 static inline void do_balance_completed(struct tree_balance *tb) 1987 { 1988 1989 #ifdef CONFIG_REISERFS_CHECK 1990 check_leaf_level(tb); 1991 check_internal_levels(tb); 1992 REISERFS_SB(tb->tb_sb)->cur_tb = NULL; 1993 #endif 1994 1995 /* reiserfs_free_block is no longer schedule safe. So, we need to 1996 ** put the buffers we want freed on the thrown list during do_balance, 1997 ** and then free them now 1998 */ 1999 2000 REISERFS_SB(tb->tb_sb)->s_do_balance++; 2001 2002 /* release all nodes hold to perform the balancing */ 2003 unfix_nodes(tb); 2004 2005 free_thrown(tb); 2006 } 2007 2008 void do_balance(struct tree_balance *tb, /* tree_balance structure */ 2009 struct item_head *ih, /* item header of inserted item */ 2010 const char *body, /* body of inserted item or bytes to paste */ 2011 int flag) 2012 { /* i - insert, d - delete 2013 c - cut, p - paste 2014 2015 Cut means delete part of an item 2016 (includes removing an entry from a 2017 directory). 2018 2019 Delete means delete whole item. 2020 2021 Insert means add a new item into the 2022 tree. 2023 2024 Paste means to append to the end of an 2025 existing file or to insert a directory 2026 entry. */ 2027 int child_pos, /* position of a child node in its parent */ 2028 h; /* level of the tree being processed */ 2029 struct item_head insert_key[2]; /* in our processing of one level 2030 we sometimes determine what 2031 must be inserted into the next 2032 higher level. This insertion 2033 consists of a key or two keys 2034 and their corresponding 2035 pointers */ 2036 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 2037 level */ 2038 2039 tb->tb_mode = flag; 2040 tb->need_balance_dirty = 0; 2041 2042 if (FILESYSTEM_CHANGED_TB(tb)) { 2043 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " 2044 "changed"); 2045 } 2046 /* if we have no real work to do */ 2047 if (!tb->insert_size[0]) { 2048 reiserfs_warning(tb->tb_sb, "PAP-12350", 2049 "insert_size == 0, mode == %c", flag); 2050 unfix_nodes(tb); 2051 return; 2052 } 2053 2054 atomic_inc(&(fs_generation(tb->tb_sb))); 2055 do_balance_starts(tb); 2056 2057 /* balance leaf returns 0 except if combining L R and S into 2058 one node. see balance_internal() for explanation of this 2059 line of code. */ 2060 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 2061 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 2062 2063 #ifdef CONFIG_REISERFS_CHECK 2064 check_after_balance_leaf(tb); 2065 #endif 2066 2067 /* Balance internal level of the tree. */ 2068 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) 2069 child_pos = 2070 balance_internal(tb, h, child_pos, insert_key, insert_ptr); 2071 2072 do_balance_completed(tb); 2073 2074 } 2075