1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * alloc.c 5 * 6 * Extent allocs and frees 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/highmem.h> 30 31 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 32 #include <cluster/masklog.h> 33 34 #include "ocfs2.h" 35 36 #include "alloc.h" 37 #include "dlmglue.h" 38 #include "extent_map.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "localalloc.h" 42 #include "suballoc.h" 43 #include "sysfile.h" 44 #include "file.h" 45 #include "super.h" 46 #include "uptodate.h" 47 48 #include "buffer_head_io.h" 49 50 static int ocfs2_extent_contig(struct inode *inode, 51 struct ocfs2_extent_rec *ext, 52 u64 blkno); 53 54 static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 55 struct ocfs2_journal_handle *handle, 56 struct inode *inode, 57 int wanted, 58 struct ocfs2_alloc_context *meta_ac, 59 struct buffer_head *bhs[]); 60 61 static int ocfs2_add_branch(struct ocfs2_super *osb, 62 struct ocfs2_journal_handle *handle, 63 struct inode *inode, 64 struct buffer_head *fe_bh, 65 struct buffer_head *eb_bh, 66 struct buffer_head *last_eb_bh, 67 struct ocfs2_alloc_context *meta_ac); 68 69 static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 70 struct ocfs2_journal_handle *handle, 71 struct inode *inode, 72 struct buffer_head *fe_bh, 73 struct ocfs2_alloc_context *meta_ac, 74 struct buffer_head **ret_new_eb_bh); 75 76 static int ocfs2_do_insert_extent(struct ocfs2_super *osb, 77 struct ocfs2_journal_handle *handle, 78 struct inode *inode, 79 struct buffer_head *fe_bh, 80 u64 blkno, 81 u32 new_clusters); 82 83 static int ocfs2_find_branch_target(struct ocfs2_super *osb, 84 struct inode *inode, 85 struct buffer_head *fe_bh, 86 struct buffer_head **target_bh); 87 88 static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb, 89 struct inode *inode, 90 struct ocfs2_dinode *fe, 91 unsigned int new_i_clusters, 92 struct buffer_head *old_last_eb, 93 struct buffer_head **new_last_eb); 94 95 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 96 97 static int ocfs2_extent_contig(struct inode *inode, 98 struct ocfs2_extent_rec *ext, 99 u64 blkno) 100 { 101 return blkno == (le64_to_cpu(ext->e_blkno) + 102 ocfs2_clusters_to_blocks(inode->i_sb, 103 le32_to_cpu(ext->e_clusters))); 104 } 105 106 /* 107 * How many free extents have we got before we need more meta data? 108 */ 109 int ocfs2_num_free_extents(struct ocfs2_super *osb, 110 struct inode *inode, 111 struct ocfs2_dinode *fe) 112 { 113 int retval; 114 struct ocfs2_extent_list *el; 115 struct ocfs2_extent_block *eb; 116 struct buffer_head *eb_bh = NULL; 117 118 mlog_entry_void(); 119 120 if (!OCFS2_IS_VALID_DINODE(fe)) { 121 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 122 retval = -EIO; 123 goto bail; 124 } 125 126 if (fe->i_last_eb_blk) { 127 retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 128 &eb_bh, OCFS2_BH_CACHED, inode); 129 if (retval < 0) { 130 mlog_errno(retval); 131 goto bail; 132 } 133 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 134 el = &eb->h_list; 135 } else 136 el = &fe->id2.i_list; 137 138 BUG_ON(el->l_tree_depth != 0); 139 140 retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 141 bail: 142 if (eb_bh) 143 brelse(eb_bh); 144 145 mlog_exit(retval); 146 return retval; 147 } 148 149 /* expects array to already be allocated 150 * 151 * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 152 * l_count for you 153 */ 154 static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 155 struct ocfs2_journal_handle *handle, 156 struct inode *inode, 157 int wanted, 158 struct ocfs2_alloc_context *meta_ac, 159 struct buffer_head *bhs[]) 160 { 161 int count, status, i; 162 u16 suballoc_bit_start; 163 u32 num_got; 164 u64 first_blkno; 165 struct ocfs2_extent_block *eb; 166 167 mlog_entry_void(); 168 169 count = 0; 170 while (count < wanted) { 171 status = ocfs2_claim_metadata(osb, 172 handle, 173 meta_ac, 174 wanted - count, 175 &suballoc_bit_start, 176 &num_got, 177 &first_blkno); 178 if (status < 0) { 179 mlog_errno(status); 180 goto bail; 181 } 182 183 for(i = count; i < (num_got + count); i++) { 184 bhs[i] = sb_getblk(osb->sb, first_blkno); 185 if (bhs[i] == NULL) { 186 status = -EIO; 187 mlog_errno(status); 188 goto bail; 189 } 190 ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 191 192 status = ocfs2_journal_access(handle, inode, bhs[i], 193 OCFS2_JOURNAL_ACCESS_CREATE); 194 if (status < 0) { 195 mlog_errno(status); 196 goto bail; 197 } 198 199 memset(bhs[i]->b_data, 0, osb->sb->s_blocksize); 200 eb = (struct ocfs2_extent_block *) bhs[i]->b_data; 201 /* Ok, setup the minimal stuff here. */ 202 strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); 203 eb->h_blkno = cpu_to_le64(first_blkno); 204 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 205 206 #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 207 /* we always use slot zero's suballocator */ 208 eb->h_suballoc_slot = 0; 209 #else 210 eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); 211 #endif 212 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 213 eb->h_list.l_count = 214 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 215 216 suballoc_bit_start++; 217 first_blkno++; 218 219 /* We'll also be dirtied by the caller, so 220 * this isn't absolutely necessary. */ 221 status = ocfs2_journal_dirty(handle, bhs[i]); 222 if (status < 0) { 223 mlog_errno(status); 224 goto bail; 225 } 226 } 227 228 count += num_got; 229 } 230 231 status = 0; 232 bail: 233 if (status < 0) { 234 for(i = 0; i < wanted; i++) { 235 if (bhs[i]) 236 brelse(bhs[i]); 237 bhs[i] = NULL; 238 } 239 } 240 mlog_exit(status); 241 return status; 242 } 243 244 /* 245 * Add an entire tree branch to our inode. eb_bh is the extent block 246 * to start at, if we don't want to start the branch at the dinode 247 * structure. 248 * 249 * last_eb_bh is required as we have to update it's next_leaf pointer 250 * for the new last extent block. 251 * 252 * the new branch will be 'empty' in the sense that every block will 253 * contain a single record with e_clusters == 0. 254 */ 255 static int ocfs2_add_branch(struct ocfs2_super *osb, 256 struct ocfs2_journal_handle *handle, 257 struct inode *inode, 258 struct buffer_head *fe_bh, 259 struct buffer_head *eb_bh, 260 struct buffer_head *last_eb_bh, 261 struct ocfs2_alloc_context *meta_ac) 262 { 263 int status, new_blocks, i; 264 u64 next_blkno, new_last_eb_blk; 265 struct buffer_head *bh; 266 struct buffer_head **new_eb_bhs = NULL; 267 struct ocfs2_dinode *fe; 268 struct ocfs2_extent_block *eb; 269 struct ocfs2_extent_list *eb_el; 270 struct ocfs2_extent_list *el; 271 272 mlog_entry_void(); 273 274 BUG_ON(!last_eb_bh); 275 276 fe = (struct ocfs2_dinode *) fe_bh->b_data; 277 278 if (eb_bh) { 279 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 280 el = &eb->h_list; 281 } else 282 el = &fe->id2.i_list; 283 284 /* we never add a branch to a leaf. */ 285 BUG_ON(!el->l_tree_depth); 286 287 new_blocks = le16_to_cpu(el->l_tree_depth); 288 289 /* allocate the number of new eb blocks we need */ 290 new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), 291 GFP_KERNEL); 292 if (!new_eb_bhs) { 293 status = -ENOMEM; 294 mlog_errno(status); 295 goto bail; 296 } 297 298 status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 299 meta_ac, new_eb_bhs); 300 if (status < 0) { 301 mlog_errno(status); 302 goto bail; 303 } 304 305 /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be 306 * linked with the rest of the tree. 307 * conversly, new_eb_bhs[0] is the new bottommost leaf. 308 * 309 * when we leave the loop, new_last_eb_blk will point to the 310 * newest leaf, and next_blkno will point to the topmost extent 311 * block. */ 312 next_blkno = new_last_eb_blk = 0; 313 for(i = 0; i < new_blocks; i++) { 314 bh = new_eb_bhs[i]; 315 eb = (struct ocfs2_extent_block *) bh->b_data; 316 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 317 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 318 status = -EIO; 319 goto bail; 320 } 321 eb_el = &eb->h_list; 322 323 status = ocfs2_journal_access(handle, inode, bh, 324 OCFS2_JOURNAL_ACCESS_CREATE); 325 if (status < 0) { 326 mlog_errno(status); 327 goto bail; 328 } 329 330 eb->h_next_leaf_blk = 0; 331 eb_el->l_tree_depth = cpu_to_le16(i); 332 eb_el->l_next_free_rec = cpu_to_le16(1); 333 eb_el->l_recs[0].e_cpos = fe->i_clusters; 334 eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno); 335 eb_el->l_recs[0].e_clusters = cpu_to_le32(0); 336 if (!eb_el->l_tree_depth) 337 new_last_eb_blk = le64_to_cpu(eb->h_blkno); 338 339 status = ocfs2_journal_dirty(handle, bh); 340 if (status < 0) { 341 mlog_errno(status); 342 goto bail; 343 } 344 345 next_blkno = le64_to_cpu(eb->h_blkno); 346 } 347 348 /* This is a bit hairy. We want to update up to three blocks 349 * here without leaving any of them in an inconsistent state 350 * in case of error. We don't have to worry about 351 * journal_dirty erroring as it won't unless we've aborted the 352 * handle (in which case we would never be here) so reserving 353 * the write with journal_access is all we need to do. */ 354 status = ocfs2_journal_access(handle, inode, last_eb_bh, 355 OCFS2_JOURNAL_ACCESS_WRITE); 356 if (status < 0) { 357 mlog_errno(status); 358 goto bail; 359 } 360 status = ocfs2_journal_access(handle, inode, fe_bh, 361 OCFS2_JOURNAL_ACCESS_WRITE); 362 if (status < 0) { 363 mlog_errno(status); 364 goto bail; 365 } 366 if (eb_bh) { 367 status = ocfs2_journal_access(handle, inode, eb_bh, 368 OCFS2_JOURNAL_ACCESS_WRITE); 369 if (status < 0) { 370 mlog_errno(status); 371 goto bail; 372 } 373 } 374 375 /* Link the new branch into the rest of the tree (el will 376 * either be on the fe, or the extent block passed in. */ 377 i = le16_to_cpu(el->l_next_free_rec); 378 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 379 el->l_recs[i].e_cpos = fe->i_clusters; 380 el->l_recs[i].e_clusters = 0; 381 le16_add_cpu(&el->l_next_free_rec, 1); 382 383 /* fe needs a new last extent block pointer, as does the 384 * next_leaf on the previously last-extent-block. */ 385 fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 386 387 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 388 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 389 390 status = ocfs2_journal_dirty(handle, last_eb_bh); 391 if (status < 0) 392 mlog_errno(status); 393 status = ocfs2_journal_dirty(handle, fe_bh); 394 if (status < 0) 395 mlog_errno(status); 396 if (eb_bh) { 397 status = ocfs2_journal_dirty(handle, eb_bh); 398 if (status < 0) 399 mlog_errno(status); 400 } 401 402 status = 0; 403 bail: 404 if (new_eb_bhs) { 405 for (i = 0; i < new_blocks; i++) 406 if (new_eb_bhs[i]) 407 brelse(new_eb_bhs[i]); 408 kfree(new_eb_bhs); 409 } 410 411 mlog_exit(status); 412 return status; 413 } 414 415 /* 416 * adds another level to the allocation tree. 417 * returns back the new extent block so you can add a branch to it 418 * after this call. 419 */ 420 static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 421 struct ocfs2_journal_handle *handle, 422 struct inode *inode, 423 struct buffer_head *fe_bh, 424 struct ocfs2_alloc_context *meta_ac, 425 struct buffer_head **ret_new_eb_bh) 426 { 427 int status, i; 428 struct buffer_head *new_eb_bh = NULL; 429 struct ocfs2_dinode *fe; 430 struct ocfs2_extent_block *eb; 431 struct ocfs2_extent_list *fe_el; 432 struct ocfs2_extent_list *eb_el; 433 434 mlog_entry_void(); 435 436 status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 437 &new_eb_bh); 438 if (status < 0) { 439 mlog_errno(status); 440 goto bail; 441 } 442 443 eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 444 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 445 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 446 status = -EIO; 447 goto bail; 448 } 449 450 eb_el = &eb->h_list; 451 fe = (struct ocfs2_dinode *) fe_bh->b_data; 452 fe_el = &fe->id2.i_list; 453 454 status = ocfs2_journal_access(handle, inode, new_eb_bh, 455 OCFS2_JOURNAL_ACCESS_CREATE); 456 if (status < 0) { 457 mlog_errno(status); 458 goto bail; 459 } 460 461 /* copy the fe data into the new extent block */ 462 eb_el->l_tree_depth = fe_el->l_tree_depth; 463 eb_el->l_next_free_rec = fe_el->l_next_free_rec; 464 for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 465 eb_el->l_recs[i].e_cpos = fe_el->l_recs[i].e_cpos; 466 eb_el->l_recs[i].e_clusters = fe_el->l_recs[i].e_clusters; 467 eb_el->l_recs[i].e_blkno = fe_el->l_recs[i].e_blkno; 468 } 469 470 status = ocfs2_journal_dirty(handle, new_eb_bh); 471 if (status < 0) { 472 mlog_errno(status); 473 goto bail; 474 } 475 476 status = ocfs2_journal_access(handle, inode, fe_bh, 477 OCFS2_JOURNAL_ACCESS_WRITE); 478 if (status < 0) { 479 mlog_errno(status); 480 goto bail; 481 } 482 483 /* update fe now */ 484 le16_add_cpu(&fe_el->l_tree_depth, 1); 485 fe_el->l_recs[0].e_cpos = 0; 486 fe_el->l_recs[0].e_blkno = eb->h_blkno; 487 fe_el->l_recs[0].e_clusters = fe->i_clusters; 488 for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) { 489 fe_el->l_recs[i].e_cpos = 0; 490 fe_el->l_recs[i].e_clusters = 0; 491 fe_el->l_recs[i].e_blkno = 0; 492 } 493 fe_el->l_next_free_rec = cpu_to_le16(1); 494 495 /* If this is our 1st tree depth shift, then last_eb_blk 496 * becomes the allocated extent block */ 497 if (fe_el->l_tree_depth == cpu_to_le16(1)) 498 fe->i_last_eb_blk = eb->h_blkno; 499 500 status = ocfs2_journal_dirty(handle, fe_bh); 501 if (status < 0) { 502 mlog_errno(status); 503 goto bail; 504 } 505 506 *ret_new_eb_bh = new_eb_bh; 507 new_eb_bh = NULL; 508 status = 0; 509 bail: 510 if (new_eb_bh) 511 brelse(new_eb_bh); 512 513 mlog_exit(status); 514 return status; 515 } 516 517 /* 518 * Expects the tree to already have room in the rightmost leaf for the 519 * extent. Updates all the extent blocks (and the dinode) on the way 520 * down. 521 */ 522 static int ocfs2_do_insert_extent(struct ocfs2_super *osb, 523 struct ocfs2_journal_handle *handle, 524 struct inode *inode, 525 struct buffer_head *fe_bh, 526 u64 start_blk, 527 u32 new_clusters) 528 { 529 int status, i, num_bhs = 0; 530 u64 next_blkno; 531 u16 next_free; 532 struct buffer_head **eb_bhs = NULL; 533 struct ocfs2_dinode *fe; 534 struct ocfs2_extent_block *eb; 535 struct ocfs2_extent_list *el; 536 537 mlog_entry_void(); 538 539 status = ocfs2_journal_access(handle, inode, fe_bh, 540 OCFS2_JOURNAL_ACCESS_WRITE); 541 if (status < 0) { 542 mlog_errno(status); 543 goto bail; 544 } 545 546 fe = (struct ocfs2_dinode *) fe_bh->b_data; 547 el = &fe->id2.i_list; 548 if (el->l_tree_depth) { 549 /* This is another operation where we want to be 550 * careful about our tree updates. An error here means 551 * none of the previous changes we made should roll 552 * forward. As a result, we have to record the buffers 553 * for this part of the tree in an array and reserve a 554 * journal write to them before making any changes. */ 555 num_bhs = le16_to_cpu(fe->id2.i_list.l_tree_depth); 556 eb_bhs = kcalloc(num_bhs, sizeof(struct buffer_head *), 557 GFP_KERNEL); 558 if (!eb_bhs) { 559 status = -ENOMEM; 560 mlog_errno(status); 561 goto bail; 562 } 563 564 i = 0; 565 while(el->l_tree_depth) { 566 next_free = le16_to_cpu(el->l_next_free_rec); 567 if (next_free == 0) { 568 ocfs2_error(inode->i_sb, 569 "Dinode %llu has a bad extent list", 570 (unsigned long long)OCFS2_I(inode)->ip_blkno); 571 status = -EIO; 572 goto bail; 573 } 574 next_blkno = le64_to_cpu(el->l_recs[next_free - 1].e_blkno); 575 576 BUG_ON(i >= num_bhs); 577 status = ocfs2_read_block(osb, next_blkno, &eb_bhs[i], 578 OCFS2_BH_CACHED, inode); 579 if (status < 0) { 580 mlog_errno(status); 581 goto bail; 582 } 583 eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data; 584 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 585 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 586 eb); 587 status = -EIO; 588 goto bail; 589 } 590 591 status = ocfs2_journal_access(handle, inode, eb_bhs[i], 592 OCFS2_JOURNAL_ACCESS_WRITE); 593 if (status < 0) { 594 mlog_errno(status); 595 goto bail; 596 } 597 598 el = &eb->h_list; 599 i++; 600 /* When we leave this loop, eb_bhs[num_bhs - 1] will 601 * hold the bottom-most leaf extent block. */ 602 } 603 BUG_ON(el->l_tree_depth); 604 605 el = &fe->id2.i_list; 606 /* If we have tree depth, then the fe update is 607 * trivial, and we want to switch el out for the 608 * bottom-most leaf in order to update it with the 609 * actual extent data below. */ 610 next_free = le16_to_cpu(el->l_next_free_rec); 611 if (next_free == 0) { 612 ocfs2_error(inode->i_sb, 613 "Dinode %llu has a bad extent list", 614 (unsigned long long)OCFS2_I(inode)->ip_blkno); 615 status = -EIO; 616 goto bail; 617 } 618 le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 619 new_clusters); 620 /* (num_bhs - 1) to avoid the leaf */ 621 for(i = 0; i < (num_bhs - 1); i++) { 622 eb = (struct ocfs2_extent_block *) eb_bhs[i]->b_data; 623 el = &eb->h_list; 624 625 /* finally, make our actual change to the 626 * intermediate extent blocks. */ 627 next_free = le16_to_cpu(el->l_next_free_rec); 628 le32_add_cpu(&el->l_recs[next_free - 1].e_clusters, 629 new_clusters); 630 631 status = ocfs2_journal_dirty(handle, eb_bhs[i]); 632 if (status < 0) 633 mlog_errno(status); 634 } 635 BUG_ON(i != (num_bhs - 1)); 636 /* note that the leaf block wasn't touched in 637 * the loop above */ 638 eb = (struct ocfs2_extent_block *) eb_bhs[num_bhs - 1]->b_data; 639 el = &eb->h_list; 640 BUG_ON(el->l_tree_depth); 641 } 642 643 /* yay, we can finally add the actual extent now! */ 644 i = le16_to_cpu(el->l_next_free_rec) - 1; 645 if (le16_to_cpu(el->l_next_free_rec) && 646 ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) { 647 le32_add_cpu(&el->l_recs[i].e_clusters, new_clusters); 648 } else if (le16_to_cpu(el->l_next_free_rec) && 649 (le32_to_cpu(el->l_recs[i].e_clusters) == 0)) { 650 /* having an empty extent at eof is legal. */ 651 if (el->l_recs[i].e_cpos != fe->i_clusters) { 652 ocfs2_error(inode->i_sb, 653 "Dinode %llu trailing extent is bad: " 654 "cpos (%u) != number of clusters (%u)", 655 (unsigned long long)OCFS2_I(inode)->ip_blkno, 656 le32_to_cpu(el->l_recs[i].e_cpos), 657 le32_to_cpu(fe->i_clusters)); 658 status = -EIO; 659 goto bail; 660 } 661 el->l_recs[i].e_blkno = cpu_to_le64(start_blk); 662 el->l_recs[i].e_clusters = cpu_to_le32(new_clusters); 663 } else { 664 /* No contiguous record, or no empty record at eof, so 665 * we add a new one. */ 666 667 BUG_ON(le16_to_cpu(el->l_next_free_rec) >= 668 le16_to_cpu(el->l_count)); 669 i = le16_to_cpu(el->l_next_free_rec); 670 671 el->l_recs[i].e_blkno = cpu_to_le64(start_blk); 672 el->l_recs[i].e_clusters = cpu_to_le32(new_clusters); 673 el->l_recs[i].e_cpos = fe->i_clusters; 674 le16_add_cpu(&el->l_next_free_rec, 1); 675 } 676 677 /* 678 * extent_map errors are not fatal, so they are ignored outside 679 * of flushing the thing. 680 */ 681 status = ocfs2_extent_map_append(inode, &el->l_recs[i], 682 new_clusters); 683 if (status) { 684 mlog_errno(status); 685 ocfs2_extent_map_drop(inode, le32_to_cpu(fe->i_clusters)); 686 } 687 688 status = ocfs2_journal_dirty(handle, fe_bh); 689 if (status < 0) 690 mlog_errno(status); 691 if (fe->id2.i_list.l_tree_depth) { 692 status = ocfs2_journal_dirty(handle, eb_bhs[num_bhs - 1]); 693 if (status < 0) 694 mlog_errno(status); 695 } 696 697 status = 0; 698 bail: 699 if (eb_bhs) { 700 for (i = 0; i < num_bhs; i++) 701 if (eb_bhs[i]) 702 brelse(eb_bhs[i]); 703 kfree(eb_bhs); 704 } 705 706 mlog_exit(status); 707 return status; 708 } 709 710 /* 711 * Should only be called when there is no space left in any of the 712 * leaf nodes. What we want to do is find the lowest tree depth 713 * non-leaf extent block with room for new records. There are three 714 * valid results of this search: 715 * 716 * 1) a lowest extent block is found, then we pass it back in 717 * *lowest_eb_bh and return '0' 718 * 719 * 2) the search fails to find anything, but the dinode has room. We 720 * pass NULL back in *lowest_eb_bh, but still return '0' 721 * 722 * 3) the search fails to find anything AND the dinode is full, in 723 * which case we return > 0 724 * 725 * return status < 0 indicates an error. 726 */ 727 static int ocfs2_find_branch_target(struct ocfs2_super *osb, 728 struct inode *inode, 729 struct buffer_head *fe_bh, 730 struct buffer_head **target_bh) 731 { 732 int status = 0, i; 733 u64 blkno; 734 struct ocfs2_dinode *fe; 735 struct ocfs2_extent_block *eb; 736 struct ocfs2_extent_list *el; 737 struct buffer_head *bh = NULL; 738 struct buffer_head *lowest_bh = NULL; 739 740 mlog_entry_void(); 741 742 *target_bh = NULL; 743 744 fe = (struct ocfs2_dinode *) fe_bh->b_data; 745 el = &fe->id2.i_list; 746 747 while(le16_to_cpu(el->l_tree_depth) > 1) { 748 if (le16_to_cpu(el->l_next_free_rec) == 0) { 749 ocfs2_error(inode->i_sb, "Dinode %llu has empty " 750 "extent list (next_free_rec == 0)", 751 (unsigned long long)OCFS2_I(inode)->ip_blkno); 752 status = -EIO; 753 goto bail; 754 } 755 i = le16_to_cpu(el->l_next_free_rec) - 1; 756 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 757 if (!blkno) { 758 ocfs2_error(inode->i_sb, "Dinode %llu has extent " 759 "list where extent # %d has no physical " 760 "block start", 761 (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 762 status = -EIO; 763 goto bail; 764 } 765 766 if (bh) { 767 brelse(bh); 768 bh = NULL; 769 } 770 771 status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 772 inode); 773 if (status < 0) { 774 mlog_errno(status); 775 goto bail; 776 } 777 778 eb = (struct ocfs2_extent_block *) bh->b_data; 779 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 780 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 781 status = -EIO; 782 goto bail; 783 } 784 el = &eb->h_list; 785 786 if (le16_to_cpu(el->l_next_free_rec) < 787 le16_to_cpu(el->l_count)) { 788 if (lowest_bh) 789 brelse(lowest_bh); 790 lowest_bh = bh; 791 get_bh(lowest_bh); 792 } 793 } 794 795 /* If we didn't find one and the fe doesn't have any room, 796 * then return '1' */ 797 if (!lowest_bh 798 && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 799 status = 1; 800 801 *target_bh = lowest_bh; 802 bail: 803 if (bh) 804 brelse(bh); 805 806 mlog_exit(status); 807 return status; 808 } 809 810 /* the caller needs to update fe->i_clusters */ 811 int ocfs2_insert_extent(struct ocfs2_super *osb, 812 struct ocfs2_journal_handle *handle, 813 struct inode *inode, 814 struct buffer_head *fe_bh, 815 u64 start_blk, 816 u32 new_clusters, 817 struct ocfs2_alloc_context *meta_ac) 818 { 819 int status, i, shift; 820 struct buffer_head *last_eb_bh = NULL; 821 struct buffer_head *bh = NULL; 822 struct ocfs2_dinode *fe; 823 struct ocfs2_extent_block *eb; 824 struct ocfs2_extent_list *el; 825 826 mlog_entry_void(); 827 828 mlog(0, "add %u clusters starting at block %llu to inode %llu\n", 829 new_clusters, (unsigned long long)start_blk, 830 (unsigned long long)OCFS2_I(inode)->ip_blkno); 831 832 fe = (struct ocfs2_dinode *) fe_bh->b_data; 833 el = &fe->id2.i_list; 834 835 if (el->l_tree_depth) { 836 /* jump to end of tree */ 837 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 838 &last_eb_bh, OCFS2_BH_CACHED, inode); 839 if (status < 0) { 840 mlog_exit(status); 841 goto bail; 842 } 843 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 844 el = &eb->h_list; 845 } 846 847 /* Can we allocate without adding/shifting tree bits? */ 848 i = le16_to_cpu(el->l_next_free_rec) - 1; 849 if (le16_to_cpu(el->l_next_free_rec) == 0 850 || (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count)) 851 || le32_to_cpu(el->l_recs[i].e_clusters) == 0 852 || ocfs2_extent_contig(inode, &el->l_recs[i], start_blk)) 853 goto out_add; 854 855 mlog(0, "ocfs2_allocate_extent: couldn't do a simple add, traversing " 856 "tree now.\n"); 857 858 shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); 859 if (shift < 0) { 860 status = shift; 861 mlog_errno(status); 862 goto bail; 863 } 864 865 /* We traveled all the way to the bottom of the allocation tree 866 * and didn't find room for any more extents - we need to add 867 * another tree level */ 868 if (shift) { 869 /* if we hit a leaf, we'd better be empty :) */ 870 BUG_ON(le16_to_cpu(el->l_next_free_rec) != 871 le16_to_cpu(el->l_count)); 872 BUG_ON(bh); 873 mlog(0, "ocfs2_allocate_extent: need to shift tree depth " 874 "(current = %u)\n", 875 le16_to_cpu(fe->id2.i_list.l_tree_depth)); 876 877 /* ocfs2_shift_tree_depth will return us a buffer with 878 * the new extent block (so we can pass that to 879 * ocfs2_add_branch). */ 880 status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, 881 meta_ac, &bh); 882 if (status < 0) { 883 mlog_errno(status); 884 goto bail; 885 } 886 /* Special case: we have room now if we shifted from 887 * tree_depth 0 */ 888 if (fe->id2.i_list.l_tree_depth == cpu_to_le16(1)) 889 goto out_add; 890 } 891 892 /* call ocfs2_add_branch to add the final part of the tree with 893 * the new data. */ 894 mlog(0, "ocfs2_allocate_extent: add branch. bh = %p\n", bh); 895 status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, 896 meta_ac); 897 if (status < 0) { 898 mlog_errno(status); 899 goto bail; 900 } 901 902 out_add: 903 /* Finally, we can add clusters. */ 904 status = ocfs2_do_insert_extent(osb, handle, inode, fe_bh, 905 start_blk, new_clusters); 906 if (status < 0) 907 mlog_errno(status); 908 909 bail: 910 if (bh) 911 brelse(bh); 912 913 if (last_eb_bh) 914 brelse(last_eb_bh); 915 916 mlog_exit(status); 917 return status; 918 } 919 920 static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 921 { 922 struct buffer_head *tl_bh = osb->osb_tl_bh; 923 struct ocfs2_dinode *di; 924 struct ocfs2_truncate_log *tl; 925 926 di = (struct ocfs2_dinode *) tl_bh->b_data; 927 tl = &di->id2.i_dealloc; 928 929 mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count), 930 "slot %d, invalid truncate log parameters: used = " 931 "%u, count = %u\n", osb->slot_num, 932 le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count)); 933 return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count); 934 } 935 936 static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, 937 unsigned int new_start) 938 { 939 unsigned int tail_index; 940 unsigned int current_tail; 941 942 /* No records, nothing to coalesce */ 943 if (!le16_to_cpu(tl->tl_used)) 944 return 0; 945 946 tail_index = le16_to_cpu(tl->tl_used) - 1; 947 current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start); 948 current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters); 949 950 return current_tail == new_start; 951 } 952 953 static int ocfs2_truncate_log_append(struct ocfs2_super *osb, 954 struct ocfs2_journal_handle *handle, 955 u64 start_blk, 956 unsigned int num_clusters) 957 { 958 int status, index; 959 unsigned int start_cluster, tl_count; 960 struct inode *tl_inode = osb->osb_tl_inode; 961 struct buffer_head *tl_bh = osb->osb_tl_bh; 962 struct ocfs2_dinode *di; 963 struct ocfs2_truncate_log *tl; 964 965 mlog_entry("start_blk = %llu, num_clusters = %u\n", 966 (unsigned long long)start_blk, num_clusters); 967 968 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 969 970 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 971 972 di = (struct ocfs2_dinode *) tl_bh->b_data; 973 tl = &di->id2.i_dealloc; 974 if (!OCFS2_IS_VALID_DINODE(di)) { 975 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 976 status = -EIO; 977 goto bail; 978 } 979 980 tl_count = le16_to_cpu(tl->tl_count); 981 mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 982 tl_count == 0, 983 "Truncate record count on #%llu invalid " 984 "wanted %u, actual %u\n", 985 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, 986 ocfs2_truncate_recs_per_inode(osb->sb), 987 le16_to_cpu(tl->tl_count)); 988 989 /* Caller should have known to flush before calling us. */ 990 index = le16_to_cpu(tl->tl_used); 991 if (index >= tl_count) { 992 status = -ENOSPC; 993 mlog_errno(status); 994 goto bail; 995 } 996 997 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 998 OCFS2_JOURNAL_ACCESS_WRITE); 999 if (status < 0) { 1000 mlog_errno(status); 1001 goto bail; 1002 } 1003 1004 mlog(0, "Log truncate of %u clusters starting at cluster %u to " 1005 "%llu (index = %d)\n", num_clusters, start_cluster, 1006 (unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index); 1007 1008 if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) { 1009 /* 1010 * Move index back to the record we are coalescing with. 1011 * ocfs2_truncate_log_can_coalesce() guarantees nonzero 1012 */ 1013 index--; 1014 1015 num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters); 1016 mlog(0, "Coalesce with index %u (start = %u, clusters = %u)\n", 1017 index, le32_to_cpu(tl->tl_recs[index].t_start), 1018 num_clusters); 1019 } else { 1020 tl->tl_recs[index].t_start = cpu_to_le32(start_cluster); 1021 tl->tl_used = cpu_to_le16(index + 1); 1022 } 1023 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 1024 1025 status = ocfs2_journal_dirty(handle, tl_bh); 1026 if (status < 0) { 1027 mlog_errno(status); 1028 goto bail; 1029 } 1030 1031 bail: 1032 mlog_exit(status); 1033 return status; 1034 } 1035 1036 static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 1037 struct ocfs2_journal_handle *handle, 1038 struct inode *data_alloc_inode, 1039 struct buffer_head *data_alloc_bh) 1040 { 1041 int status = 0; 1042 int i; 1043 unsigned int num_clusters; 1044 u64 start_blk; 1045 struct ocfs2_truncate_rec rec; 1046 struct ocfs2_dinode *di; 1047 struct ocfs2_truncate_log *tl; 1048 struct inode *tl_inode = osb->osb_tl_inode; 1049 struct buffer_head *tl_bh = osb->osb_tl_bh; 1050 1051 mlog_entry_void(); 1052 1053 di = (struct ocfs2_dinode *) tl_bh->b_data; 1054 tl = &di->id2.i_dealloc; 1055 i = le16_to_cpu(tl->tl_used) - 1; 1056 while (i >= 0) { 1057 /* Caller has given us at least enough credits to 1058 * update the truncate log dinode */ 1059 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 1060 OCFS2_JOURNAL_ACCESS_WRITE); 1061 if (status < 0) { 1062 mlog_errno(status); 1063 goto bail; 1064 } 1065 1066 tl->tl_used = cpu_to_le16(i); 1067 1068 status = ocfs2_journal_dirty(handle, tl_bh); 1069 if (status < 0) { 1070 mlog_errno(status); 1071 goto bail; 1072 } 1073 1074 /* TODO: Perhaps we can calculate the bulk of the 1075 * credits up front rather than extending like 1076 * this. */ 1077 status = ocfs2_extend_trans(handle, 1078 OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 1079 if (status < 0) { 1080 mlog_errno(status); 1081 goto bail; 1082 } 1083 1084 rec = tl->tl_recs[i]; 1085 start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb, 1086 le32_to_cpu(rec.t_start)); 1087 num_clusters = le32_to_cpu(rec.t_clusters); 1088 1089 /* if start_blk is not set, we ignore the record as 1090 * invalid. */ 1091 if (start_blk) { 1092 mlog(0, "free record %d, start = %u, clusters = %u\n", 1093 i, le32_to_cpu(rec.t_start), num_clusters); 1094 1095 status = ocfs2_free_clusters(handle, data_alloc_inode, 1096 data_alloc_bh, start_blk, 1097 num_clusters); 1098 if (status < 0) { 1099 mlog_errno(status); 1100 goto bail; 1101 } 1102 } 1103 i--; 1104 } 1105 1106 bail: 1107 mlog_exit(status); 1108 return status; 1109 } 1110 1111 /* Expects you to already be holding tl_inode->i_mutex */ 1112 static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1113 { 1114 int status; 1115 unsigned int num_to_flush; 1116 struct ocfs2_journal_handle *handle = NULL; 1117 struct inode *tl_inode = osb->osb_tl_inode; 1118 struct inode *data_alloc_inode = NULL; 1119 struct buffer_head *tl_bh = osb->osb_tl_bh; 1120 struct buffer_head *data_alloc_bh = NULL; 1121 struct ocfs2_dinode *di; 1122 struct ocfs2_truncate_log *tl; 1123 1124 mlog_entry_void(); 1125 1126 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 1127 1128 di = (struct ocfs2_dinode *) tl_bh->b_data; 1129 tl = &di->id2.i_dealloc; 1130 if (!OCFS2_IS_VALID_DINODE(di)) { 1131 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); 1132 status = -EIO; 1133 goto bail; 1134 } 1135 1136 num_to_flush = le16_to_cpu(tl->tl_used); 1137 mlog(0, "Flush %u records from truncate log #%llu\n", 1138 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 1139 if (!num_to_flush) { 1140 status = 0; 1141 goto bail; 1142 } 1143 1144 handle = ocfs2_alloc_handle(osb); 1145 if (!handle) { 1146 status = -ENOMEM; 1147 mlog_errno(status); 1148 goto bail; 1149 } 1150 1151 data_alloc_inode = ocfs2_get_system_file_inode(osb, 1152 GLOBAL_BITMAP_SYSTEM_INODE, 1153 OCFS2_INVALID_SLOT); 1154 if (!data_alloc_inode) { 1155 status = -EINVAL; 1156 mlog(ML_ERROR, "Could not get bitmap inode!\n"); 1157 goto bail; 1158 } 1159 1160 ocfs2_handle_add_inode(handle, data_alloc_inode); 1161 status = ocfs2_meta_lock(data_alloc_inode, handle, &data_alloc_bh, 1); 1162 if (status < 0) { 1163 mlog_errno(status); 1164 goto bail; 1165 } 1166 1167 handle = ocfs2_start_trans(osb, handle, OCFS2_TRUNCATE_LOG_UPDATE); 1168 if (IS_ERR(handle)) { 1169 status = PTR_ERR(handle); 1170 handle = NULL; 1171 mlog_errno(status); 1172 goto bail; 1173 } 1174 1175 status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, 1176 data_alloc_bh); 1177 if (status < 0) { 1178 mlog_errno(status); 1179 goto bail; 1180 } 1181 1182 bail: 1183 if (handle) 1184 ocfs2_commit_trans(handle); 1185 1186 if (data_alloc_inode) 1187 iput(data_alloc_inode); 1188 1189 if (data_alloc_bh) 1190 brelse(data_alloc_bh); 1191 1192 mlog_exit(status); 1193 return status; 1194 } 1195 1196 int ocfs2_flush_truncate_log(struct ocfs2_super *osb) 1197 { 1198 int status; 1199 struct inode *tl_inode = osb->osb_tl_inode; 1200 1201 mutex_lock(&tl_inode->i_mutex); 1202 status = __ocfs2_flush_truncate_log(osb); 1203 mutex_unlock(&tl_inode->i_mutex); 1204 1205 return status; 1206 } 1207 1208 static void ocfs2_truncate_log_worker(void *data) 1209 { 1210 int status; 1211 struct ocfs2_super *osb = data; 1212 1213 mlog_entry_void(); 1214 1215 status = ocfs2_flush_truncate_log(osb); 1216 if (status < 0) 1217 mlog_errno(status); 1218 1219 mlog_exit(status); 1220 } 1221 1222 #define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ) 1223 void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 1224 int cancel) 1225 { 1226 if (osb->osb_tl_inode) { 1227 /* We want to push off log flushes while truncates are 1228 * still running. */ 1229 if (cancel) 1230 cancel_delayed_work(&osb->osb_truncate_log_wq); 1231 1232 queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq, 1233 OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL); 1234 } 1235 } 1236 1237 static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, 1238 int slot_num, 1239 struct inode **tl_inode, 1240 struct buffer_head **tl_bh) 1241 { 1242 int status; 1243 struct inode *inode = NULL; 1244 struct buffer_head *bh = NULL; 1245 1246 inode = ocfs2_get_system_file_inode(osb, 1247 TRUNCATE_LOG_SYSTEM_INODE, 1248 slot_num); 1249 if (!inode) { 1250 status = -EINVAL; 1251 mlog(ML_ERROR, "Could not get load truncate log inode!\n"); 1252 goto bail; 1253 } 1254 1255 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 1256 OCFS2_BH_CACHED, inode); 1257 if (status < 0) { 1258 iput(inode); 1259 mlog_errno(status); 1260 goto bail; 1261 } 1262 1263 *tl_inode = inode; 1264 *tl_bh = bh; 1265 bail: 1266 mlog_exit(status); 1267 return status; 1268 } 1269 1270 /* called during the 1st stage of node recovery. we stamp a clean 1271 * truncate log and pass back a copy for processing later. if the 1272 * truncate log does not require processing, a *tl_copy is set to 1273 * NULL. */ 1274 int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, 1275 int slot_num, 1276 struct ocfs2_dinode **tl_copy) 1277 { 1278 int status; 1279 struct inode *tl_inode = NULL; 1280 struct buffer_head *tl_bh = NULL; 1281 struct ocfs2_dinode *di; 1282 struct ocfs2_truncate_log *tl; 1283 1284 *tl_copy = NULL; 1285 1286 mlog(0, "recover truncate log from slot %d\n", slot_num); 1287 1288 status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh); 1289 if (status < 0) { 1290 mlog_errno(status); 1291 goto bail; 1292 } 1293 1294 di = (struct ocfs2_dinode *) tl_bh->b_data; 1295 tl = &di->id2.i_dealloc; 1296 if (!OCFS2_IS_VALID_DINODE(di)) { 1297 OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); 1298 status = -EIO; 1299 goto bail; 1300 } 1301 1302 if (le16_to_cpu(tl->tl_used)) { 1303 mlog(0, "We'll have %u logs to recover\n", 1304 le16_to_cpu(tl->tl_used)); 1305 1306 *tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL); 1307 if (!(*tl_copy)) { 1308 status = -ENOMEM; 1309 mlog_errno(status); 1310 goto bail; 1311 } 1312 1313 /* Assuming the write-out below goes well, this copy 1314 * will be passed back to recovery for processing. */ 1315 memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size); 1316 1317 /* All we need to do to clear the truncate log is set 1318 * tl_used. */ 1319 tl->tl_used = 0; 1320 1321 status = ocfs2_write_block(osb, tl_bh, tl_inode); 1322 if (status < 0) { 1323 mlog_errno(status); 1324 goto bail; 1325 } 1326 } 1327 1328 bail: 1329 if (tl_inode) 1330 iput(tl_inode); 1331 if (tl_bh) 1332 brelse(tl_bh); 1333 1334 if (status < 0 && (*tl_copy)) { 1335 kfree(*tl_copy); 1336 *tl_copy = NULL; 1337 } 1338 1339 mlog_exit(status); 1340 return status; 1341 } 1342 1343 int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, 1344 struct ocfs2_dinode *tl_copy) 1345 { 1346 int status = 0; 1347 int i; 1348 unsigned int clusters, num_recs, start_cluster; 1349 u64 start_blk; 1350 struct ocfs2_journal_handle *handle; 1351 struct inode *tl_inode = osb->osb_tl_inode; 1352 struct ocfs2_truncate_log *tl; 1353 1354 mlog_entry_void(); 1355 1356 if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) { 1357 mlog(ML_ERROR, "Asked to recover my own truncate log!\n"); 1358 return -EINVAL; 1359 } 1360 1361 tl = &tl_copy->id2.i_dealloc; 1362 num_recs = le16_to_cpu(tl->tl_used); 1363 mlog(0, "cleanup %u records from %llu\n", num_recs, 1364 (unsigned long long)tl_copy->i_blkno); 1365 1366 mutex_lock(&tl_inode->i_mutex); 1367 for(i = 0; i < num_recs; i++) { 1368 if (ocfs2_truncate_log_needs_flush(osb)) { 1369 status = __ocfs2_flush_truncate_log(osb); 1370 if (status < 0) { 1371 mlog_errno(status); 1372 goto bail_up; 1373 } 1374 } 1375 1376 handle = ocfs2_start_trans(osb, NULL, 1377 OCFS2_TRUNCATE_LOG_UPDATE); 1378 if (IS_ERR(handle)) { 1379 status = PTR_ERR(handle); 1380 mlog_errno(status); 1381 goto bail_up; 1382 } 1383 1384 clusters = le32_to_cpu(tl->tl_recs[i].t_clusters); 1385 start_cluster = le32_to_cpu(tl->tl_recs[i].t_start); 1386 start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster); 1387 1388 status = ocfs2_truncate_log_append(osb, handle, 1389 start_blk, clusters); 1390 ocfs2_commit_trans(handle); 1391 if (status < 0) { 1392 mlog_errno(status); 1393 goto bail_up; 1394 } 1395 } 1396 1397 bail_up: 1398 mutex_unlock(&tl_inode->i_mutex); 1399 1400 mlog_exit(status); 1401 return status; 1402 } 1403 1404 void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb) 1405 { 1406 int status; 1407 struct inode *tl_inode = osb->osb_tl_inode; 1408 1409 mlog_entry_void(); 1410 1411 if (tl_inode) { 1412 cancel_delayed_work(&osb->osb_truncate_log_wq); 1413 flush_workqueue(ocfs2_wq); 1414 1415 status = ocfs2_flush_truncate_log(osb); 1416 if (status < 0) 1417 mlog_errno(status); 1418 1419 brelse(osb->osb_tl_bh); 1420 iput(osb->osb_tl_inode); 1421 } 1422 1423 mlog_exit_void(); 1424 } 1425 1426 int ocfs2_truncate_log_init(struct ocfs2_super *osb) 1427 { 1428 int status; 1429 struct inode *tl_inode = NULL; 1430 struct buffer_head *tl_bh = NULL; 1431 1432 mlog_entry_void(); 1433 1434 status = ocfs2_get_truncate_log_info(osb, 1435 osb->slot_num, 1436 &tl_inode, 1437 &tl_bh); 1438 if (status < 0) 1439 mlog_errno(status); 1440 1441 /* ocfs2_truncate_log_shutdown keys on the existence of 1442 * osb->osb_tl_inode so we don't set any of the osb variables 1443 * until we're sure all is well. */ 1444 INIT_WORK(&osb->osb_truncate_log_wq, ocfs2_truncate_log_worker, osb); 1445 osb->osb_tl_bh = tl_bh; 1446 osb->osb_tl_inode = tl_inode; 1447 1448 mlog_exit(status); 1449 return status; 1450 } 1451 1452 /* This function will figure out whether the currently last extent 1453 * block will be deleted, and if it will, what the new last extent 1454 * block will be so we can update his h_next_leaf_blk field, as well 1455 * as the dinodes i_last_eb_blk */ 1456 static int ocfs2_find_new_last_ext_blk(struct ocfs2_super *osb, 1457 struct inode *inode, 1458 struct ocfs2_dinode *fe, 1459 u32 new_i_clusters, 1460 struct buffer_head *old_last_eb, 1461 struct buffer_head **new_last_eb) 1462 { 1463 int i, status = 0; 1464 u64 block = 0; 1465 struct ocfs2_extent_block *eb; 1466 struct ocfs2_extent_list *el; 1467 struct buffer_head *bh = NULL; 1468 1469 *new_last_eb = NULL; 1470 1471 if (!OCFS2_IS_VALID_DINODE(fe)) { 1472 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 1473 status = -EIO; 1474 goto bail; 1475 } 1476 1477 /* we have no tree, so of course, no last_eb. */ 1478 if (!fe->id2.i_list.l_tree_depth) 1479 goto bail; 1480 1481 /* trunc to zero special case - this makes tree_depth = 0 1482 * regardless of what it is. */ 1483 if (!new_i_clusters) 1484 goto bail; 1485 1486 eb = (struct ocfs2_extent_block *) old_last_eb->b_data; 1487 el = &(eb->h_list); 1488 BUG_ON(!el->l_next_free_rec); 1489 1490 /* Make sure that this guy will actually be empty after we 1491 * clear away the data. */ 1492 if (le32_to_cpu(el->l_recs[0].e_cpos) < new_i_clusters) 1493 goto bail; 1494 1495 /* Ok, at this point, we know that last_eb will definitely 1496 * change, so lets traverse the tree and find the second to 1497 * last extent block. */ 1498 el = &(fe->id2.i_list); 1499 /* go down the tree, */ 1500 do { 1501 for(i = (le16_to_cpu(el->l_next_free_rec) - 1); i >= 0; i--) { 1502 if (le32_to_cpu(el->l_recs[i].e_cpos) < 1503 new_i_clusters) { 1504 block = le64_to_cpu(el->l_recs[i].e_blkno); 1505 break; 1506 } 1507 } 1508 BUG_ON(i < 0); 1509 1510 if (bh) { 1511 brelse(bh); 1512 bh = NULL; 1513 } 1514 1515 status = ocfs2_read_block(osb, block, &bh, OCFS2_BH_CACHED, 1516 inode); 1517 if (status < 0) { 1518 mlog_errno(status); 1519 goto bail; 1520 } 1521 eb = (struct ocfs2_extent_block *) bh->b_data; 1522 el = &eb->h_list; 1523 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1524 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1525 status = -EIO; 1526 goto bail; 1527 } 1528 } while (el->l_tree_depth); 1529 1530 *new_last_eb = bh; 1531 get_bh(*new_last_eb); 1532 mlog(0, "returning block %llu\n", 1533 (unsigned long long)le64_to_cpu(eb->h_blkno)); 1534 bail: 1535 if (bh) 1536 brelse(bh); 1537 1538 return status; 1539 } 1540 1541 static int ocfs2_do_truncate(struct ocfs2_super *osb, 1542 unsigned int clusters_to_del, 1543 struct inode *inode, 1544 struct buffer_head *fe_bh, 1545 struct buffer_head *old_last_eb_bh, 1546 struct ocfs2_journal_handle *handle, 1547 struct ocfs2_truncate_context *tc) 1548 { 1549 int status, i, depth; 1550 struct ocfs2_dinode *fe; 1551 struct ocfs2_extent_block *eb; 1552 struct ocfs2_extent_block *last_eb = NULL; 1553 struct ocfs2_extent_list *el; 1554 struct buffer_head *eb_bh = NULL; 1555 struct buffer_head *last_eb_bh = NULL; 1556 u64 next_eb = 0; 1557 u64 delete_blk = 0; 1558 1559 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1560 1561 status = ocfs2_find_new_last_ext_blk(osb, 1562 inode, 1563 fe, 1564 le32_to_cpu(fe->i_clusters) - 1565 clusters_to_del, 1566 old_last_eb_bh, 1567 &last_eb_bh); 1568 if (status < 0) { 1569 mlog_errno(status); 1570 goto bail; 1571 } 1572 if (last_eb_bh) 1573 last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1574 1575 status = ocfs2_journal_access(handle, inode, fe_bh, 1576 OCFS2_JOURNAL_ACCESS_WRITE); 1577 if (status < 0) { 1578 mlog_errno(status); 1579 goto bail; 1580 } 1581 el = &(fe->id2.i_list); 1582 1583 spin_lock(&OCFS2_I(inode)->ip_lock); 1584 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 1585 clusters_to_del; 1586 spin_unlock(&OCFS2_I(inode)->ip_lock); 1587 le32_add_cpu(&fe->i_clusters, -clusters_to_del); 1588 fe->i_mtime = cpu_to_le64(CURRENT_TIME.tv_sec); 1589 fe->i_mtime_nsec = cpu_to_le32(CURRENT_TIME.tv_nsec); 1590 1591 i = le16_to_cpu(el->l_next_free_rec) - 1; 1592 1593 BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); 1594 le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); 1595 /* tree depth zero, we can just delete the clusters, otherwise 1596 * we need to record the offset of the next level extent block 1597 * as we may overwrite it. */ 1598 if (!el->l_tree_depth) 1599 delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) 1600 + ocfs2_clusters_to_blocks(osb->sb, 1601 le32_to_cpu(el->l_recs[i].e_clusters)); 1602 else 1603 next_eb = le64_to_cpu(el->l_recs[i].e_blkno); 1604 1605 if (!el->l_recs[i].e_clusters) { 1606 /* if we deleted the whole extent record, then clear 1607 * out the other fields and update the extent 1608 * list. For depth > 0 trees, we've already recorded 1609 * the extent block in 'next_eb' */ 1610 el->l_recs[i].e_cpos = 0; 1611 el->l_recs[i].e_blkno = 0; 1612 BUG_ON(!el->l_next_free_rec); 1613 le16_add_cpu(&el->l_next_free_rec, -1); 1614 } 1615 1616 depth = le16_to_cpu(el->l_tree_depth); 1617 if (!fe->i_clusters) { 1618 /* trunc to zero is a special case. */ 1619 el->l_tree_depth = 0; 1620 fe->i_last_eb_blk = 0; 1621 } else if (last_eb) 1622 fe->i_last_eb_blk = last_eb->h_blkno; 1623 1624 status = ocfs2_journal_dirty(handle, fe_bh); 1625 if (status < 0) { 1626 mlog_errno(status); 1627 goto bail; 1628 } 1629 1630 if (last_eb) { 1631 /* If there will be a new last extent block, then by 1632 * definition, there cannot be any leaves to the right of 1633 * him. */ 1634 status = ocfs2_journal_access(handle, inode, last_eb_bh, 1635 OCFS2_JOURNAL_ACCESS_WRITE); 1636 if (status < 0) { 1637 mlog_errno(status); 1638 goto bail; 1639 } 1640 last_eb->h_next_leaf_blk = 0; 1641 status = ocfs2_journal_dirty(handle, last_eb_bh); 1642 if (status < 0) { 1643 mlog_errno(status); 1644 goto bail; 1645 } 1646 } 1647 1648 /* if our tree depth > 0, update all the tree blocks below us. */ 1649 while (depth) { 1650 mlog(0, "traveling tree (depth = %d, next_eb = %llu)\n", 1651 depth, (unsigned long long)next_eb); 1652 status = ocfs2_read_block(osb, next_eb, &eb_bh, 1653 OCFS2_BH_CACHED, inode); 1654 if (status < 0) { 1655 mlog_errno(status); 1656 goto bail; 1657 } 1658 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 1659 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1660 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1661 status = -EIO; 1662 goto bail; 1663 } 1664 el = &(eb->h_list); 1665 1666 status = ocfs2_journal_access(handle, inode, eb_bh, 1667 OCFS2_JOURNAL_ACCESS_WRITE); 1668 if (status < 0) { 1669 mlog_errno(status); 1670 goto bail; 1671 } 1672 1673 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); 1674 BUG_ON(depth != (le16_to_cpu(el->l_tree_depth) + 1)); 1675 1676 i = le16_to_cpu(el->l_next_free_rec) - 1; 1677 1678 mlog(0, "extent block %llu, before: record %d: " 1679 "(%u, %u, %llu), next = %u\n", 1680 (unsigned long long)le64_to_cpu(eb->h_blkno), i, 1681 le32_to_cpu(el->l_recs[i].e_cpos), 1682 le32_to_cpu(el->l_recs[i].e_clusters), 1683 (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno), 1684 le16_to_cpu(el->l_next_free_rec)); 1685 1686 BUG_ON(le32_to_cpu(el->l_recs[i].e_clusters) < clusters_to_del); 1687 le32_add_cpu(&el->l_recs[i].e_clusters, -clusters_to_del); 1688 1689 next_eb = le64_to_cpu(el->l_recs[i].e_blkno); 1690 /* bottom-most block requires us to delete data.*/ 1691 if (!el->l_tree_depth) 1692 delete_blk = le64_to_cpu(el->l_recs[i].e_blkno) 1693 + ocfs2_clusters_to_blocks(osb->sb, 1694 le32_to_cpu(el->l_recs[i].e_clusters)); 1695 if (!el->l_recs[i].e_clusters) { 1696 el->l_recs[i].e_cpos = 0; 1697 el->l_recs[i].e_blkno = 0; 1698 BUG_ON(!el->l_next_free_rec); 1699 le16_add_cpu(&el->l_next_free_rec, -1); 1700 } 1701 mlog(0, "extent block %llu, after: record %d: " 1702 "(%u, %u, %llu), next = %u\n", 1703 (unsigned long long)le64_to_cpu(eb->h_blkno), i, 1704 le32_to_cpu(el->l_recs[i].e_cpos), 1705 le32_to_cpu(el->l_recs[i].e_clusters), 1706 (unsigned long long)le64_to_cpu(el->l_recs[i].e_blkno), 1707 le16_to_cpu(el->l_next_free_rec)); 1708 1709 status = ocfs2_journal_dirty(handle, eb_bh); 1710 if (status < 0) { 1711 mlog_errno(status); 1712 goto bail; 1713 } 1714 1715 if (!el->l_next_free_rec) { 1716 mlog(0, "deleting this extent block.\n"); 1717 1718 ocfs2_remove_from_cache(inode, eb_bh); 1719 1720 BUG_ON(el->l_recs[0].e_clusters); 1721 BUG_ON(el->l_recs[0].e_cpos); 1722 BUG_ON(el->l_recs[0].e_blkno); 1723 if (eb->h_suballoc_slot == 0) { 1724 /* 1725 * This code only understands how to 1726 * lock the suballocator in slot 0, 1727 * which is fine because allocation is 1728 * only ever done out of that 1729 * suballocator too. A future version 1730 * might change that however, so avoid 1731 * a free if we don't know how to 1732 * handle it. This way an fs incompat 1733 * bit will not be necessary. 1734 */ 1735 status = ocfs2_free_extent_block(handle, 1736 tc->tc_ext_alloc_inode, 1737 tc->tc_ext_alloc_bh, 1738 eb); 1739 if (status < 0) { 1740 mlog_errno(status); 1741 goto bail; 1742 } 1743 } 1744 } 1745 brelse(eb_bh); 1746 eb_bh = NULL; 1747 depth--; 1748 } 1749 1750 BUG_ON(!delete_blk); 1751 status = ocfs2_truncate_log_append(osb, handle, delete_blk, 1752 clusters_to_del); 1753 if (status < 0) { 1754 mlog_errno(status); 1755 goto bail; 1756 } 1757 status = 0; 1758 bail: 1759 if (!status) 1760 ocfs2_extent_map_trunc(inode, le32_to_cpu(fe->i_clusters)); 1761 else 1762 ocfs2_extent_map_drop(inode, 0); 1763 mlog_exit(status); 1764 return status; 1765 } 1766 1767 /* 1768 * It is expected, that by the time you call this function, 1769 * inode->i_size and fe->i_size have been adjusted. 1770 * 1771 * WARNING: This will kfree the truncate context 1772 */ 1773 int ocfs2_commit_truncate(struct ocfs2_super *osb, 1774 struct inode *inode, 1775 struct buffer_head *fe_bh, 1776 struct ocfs2_truncate_context *tc) 1777 { 1778 int status, i, credits, tl_sem = 0; 1779 u32 clusters_to_del, target_i_clusters; 1780 u64 last_eb = 0; 1781 struct ocfs2_dinode *fe; 1782 struct ocfs2_extent_block *eb; 1783 struct ocfs2_extent_list *el; 1784 struct buffer_head *last_eb_bh; 1785 struct ocfs2_journal_handle *handle = NULL; 1786 struct inode *tl_inode = osb->osb_tl_inode; 1787 1788 mlog_entry_void(); 1789 1790 down_write(&OCFS2_I(inode)->ip_alloc_sem); 1791 1792 target_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 1793 i_size_read(inode)); 1794 1795 last_eb_bh = tc->tc_last_eb_bh; 1796 tc->tc_last_eb_bh = NULL; 1797 1798 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1799 1800 if (fe->id2.i_list.l_tree_depth) { 1801 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1802 el = &eb->h_list; 1803 } else 1804 el = &fe->id2.i_list; 1805 last_eb = le64_to_cpu(fe->i_last_eb_blk); 1806 start: 1807 mlog(0, "ocfs2_commit_truncate: fe->i_clusters = %u, " 1808 "last_eb = %llu, fe->i_last_eb_blk = %llu, " 1809 "fe->id2.i_list.l_tree_depth = %u last_eb_bh = %p\n", 1810 le32_to_cpu(fe->i_clusters), (unsigned long long)last_eb, 1811 (unsigned long long)le64_to_cpu(fe->i_last_eb_blk), 1812 le16_to_cpu(fe->id2.i_list.l_tree_depth), last_eb_bh); 1813 1814 if (last_eb != le64_to_cpu(fe->i_last_eb_blk)) { 1815 mlog(0, "last_eb changed!\n"); 1816 BUG_ON(!fe->id2.i_list.l_tree_depth); 1817 last_eb = le64_to_cpu(fe->i_last_eb_blk); 1818 /* i_last_eb_blk may have changed, read it if 1819 * necessary. We don't have to worry about the 1820 * truncate to zero case here (where there becomes no 1821 * last_eb) because we never loop back after our work 1822 * is done. */ 1823 if (last_eb_bh) { 1824 brelse(last_eb_bh); 1825 last_eb_bh = NULL; 1826 } 1827 1828 status = ocfs2_read_block(osb, last_eb, 1829 &last_eb_bh, OCFS2_BH_CACHED, 1830 inode); 1831 if (status < 0) { 1832 mlog_errno(status); 1833 goto bail; 1834 } 1835 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1836 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1837 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1838 status = -EIO; 1839 goto bail; 1840 } 1841 el = &(eb->h_list); 1842 } 1843 1844 /* by now, el will point to the extent list on the bottom most 1845 * portion of this tree. */ 1846 i = le16_to_cpu(el->l_next_free_rec) - 1; 1847 if (le32_to_cpu(el->l_recs[i].e_cpos) >= target_i_clusters) 1848 clusters_to_del = le32_to_cpu(el->l_recs[i].e_clusters); 1849 else 1850 clusters_to_del = (le32_to_cpu(el->l_recs[i].e_clusters) + 1851 le32_to_cpu(el->l_recs[i].e_cpos)) - 1852 target_i_clusters; 1853 1854 mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del); 1855 1856 mutex_lock(&tl_inode->i_mutex); 1857 tl_sem = 1; 1858 /* ocfs2_truncate_log_needs_flush guarantees us at least one 1859 * record is free for use. If there isn't any, we flush to get 1860 * an empty truncate log. */ 1861 if (ocfs2_truncate_log_needs_flush(osb)) { 1862 status = __ocfs2_flush_truncate_log(osb); 1863 if (status < 0) { 1864 mlog_errno(status); 1865 goto bail; 1866 } 1867 } 1868 1869 credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 1870 fe, el); 1871 handle = ocfs2_start_trans(osb, NULL, credits); 1872 if (IS_ERR(handle)) { 1873 status = PTR_ERR(handle); 1874 handle = NULL; 1875 mlog_errno(status); 1876 goto bail; 1877 } 1878 1879 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 1880 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 1881 if (status < 0) 1882 mlog_errno(status); 1883 1884 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, 1885 last_eb_bh, handle, tc); 1886 if (status < 0) { 1887 mlog_errno(status); 1888 goto bail; 1889 } 1890 1891 mutex_unlock(&tl_inode->i_mutex); 1892 tl_sem = 0; 1893 1894 ocfs2_commit_trans(handle); 1895 handle = NULL; 1896 1897 BUG_ON(le32_to_cpu(fe->i_clusters) < target_i_clusters); 1898 if (le32_to_cpu(fe->i_clusters) > target_i_clusters) 1899 goto start; 1900 bail: 1901 up_write(&OCFS2_I(inode)->ip_alloc_sem); 1902 1903 ocfs2_schedule_truncate_log_flush(osb, 1); 1904 1905 if (tl_sem) 1906 mutex_unlock(&tl_inode->i_mutex); 1907 1908 if (handle) 1909 ocfs2_commit_trans(handle); 1910 1911 if (last_eb_bh) 1912 brelse(last_eb_bh); 1913 1914 /* This will drop the ext_alloc cluster lock for us */ 1915 ocfs2_free_truncate_context(tc); 1916 1917 mlog_exit(status); 1918 return status; 1919 } 1920 1921 1922 /* 1923 * Expects the inode to already be locked. This will figure out which 1924 * inodes need to be locked and will put them on the returned truncate 1925 * context. 1926 */ 1927 int ocfs2_prepare_truncate(struct ocfs2_super *osb, 1928 struct inode *inode, 1929 struct buffer_head *fe_bh, 1930 struct ocfs2_truncate_context **tc) 1931 { 1932 int status, metadata_delete; 1933 unsigned int new_i_clusters; 1934 struct ocfs2_dinode *fe; 1935 struct ocfs2_extent_block *eb; 1936 struct ocfs2_extent_list *el; 1937 struct buffer_head *last_eb_bh = NULL; 1938 struct inode *ext_alloc_inode = NULL; 1939 struct buffer_head *ext_alloc_bh = NULL; 1940 1941 mlog_entry_void(); 1942 1943 *tc = NULL; 1944 1945 new_i_clusters = ocfs2_clusters_for_bytes(osb->sb, 1946 i_size_read(inode)); 1947 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1948 1949 mlog(0, "fe->i_clusters = %u, new_i_clusters = %u, fe->i_size =" 1950 "%llu\n", fe->i_clusters, new_i_clusters, 1951 (unsigned long long)fe->i_size); 1952 1953 if (le32_to_cpu(fe->i_clusters) <= new_i_clusters) { 1954 ocfs2_error(inode->i_sb, "Dinode %llu has cluster count " 1955 "%u and size %llu whereas struct inode has " 1956 "cluster count %u and size %llu which caused an " 1957 "invalid truncate to %u clusters.", 1958 (unsigned long long)le64_to_cpu(fe->i_blkno), 1959 le32_to_cpu(fe->i_clusters), 1960 (unsigned long long)le64_to_cpu(fe->i_size), 1961 OCFS2_I(inode)->ip_clusters, i_size_read(inode), 1962 new_i_clusters); 1963 mlog_meta_lvb(ML_ERROR, &OCFS2_I(inode)->ip_meta_lockres); 1964 status = -EIO; 1965 goto bail; 1966 } 1967 1968 *tc = kcalloc(1, sizeof(struct ocfs2_truncate_context), GFP_KERNEL); 1969 if (!(*tc)) { 1970 status = -ENOMEM; 1971 mlog_errno(status); 1972 goto bail; 1973 } 1974 1975 metadata_delete = 0; 1976 if (fe->id2.i_list.l_tree_depth) { 1977 /* If we have a tree, then the truncate may result in 1978 * metadata deletes. Figure this out from the 1979 * rightmost leaf block.*/ 1980 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 1981 &last_eb_bh, OCFS2_BH_CACHED, inode); 1982 if (status < 0) { 1983 mlog_errno(status); 1984 goto bail; 1985 } 1986 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 1987 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1988 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1989 1990 brelse(last_eb_bh); 1991 status = -EIO; 1992 goto bail; 1993 } 1994 el = &(eb->h_list); 1995 if (le32_to_cpu(el->l_recs[0].e_cpos) >= new_i_clusters) 1996 metadata_delete = 1; 1997 } 1998 1999 (*tc)->tc_last_eb_bh = last_eb_bh; 2000 2001 if (metadata_delete) { 2002 mlog(0, "Will have to delete metadata for this trunc. " 2003 "locking allocator.\n"); 2004 ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); 2005 if (!ext_alloc_inode) { 2006 status = -ENOMEM; 2007 mlog_errno(status); 2008 goto bail; 2009 } 2010 2011 mutex_lock(&ext_alloc_inode->i_mutex); 2012 (*tc)->tc_ext_alloc_inode = ext_alloc_inode; 2013 2014 status = ocfs2_meta_lock(ext_alloc_inode, 2015 NULL, 2016 &ext_alloc_bh, 2017 1); 2018 if (status < 0) { 2019 mlog_errno(status); 2020 goto bail; 2021 } 2022 (*tc)->tc_ext_alloc_bh = ext_alloc_bh; 2023 (*tc)->tc_ext_alloc_locked = 1; 2024 } 2025 2026 status = 0; 2027 bail: 2028 if (status < 0) { 2029 if (*tc) 2030 ocfs2_free_truncate_context(*tc); 2031 *tc = NULL; 2032 } 2033 mlog_exit_void(); 2034 return status; 2035 } 2036 2037 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 2038 { 2039 if (tc->tc_ext_alloc_inode) { 2040 if (tc->tc_ext_alloc_locked) 2041 ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); 2042 2043 mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); 2044 iput(tc->tc_ext_alloc_inode); 2045 } 2046 2047 if (tc->tc_ext_alloc_bh) 2048 brelse(tc->tc_ext_alloc_bh); 2049 2050 if (tc->tc_last_eb_bh) 2051 brelse(tc->tc_last_eb_bh); 2052 2053 kfree(tc); 2054 } 2055