1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5 */ 6 7 #include <linux/spinlock.h> 8 #include <linux/completion.h> 9 #include <linux/buffer_head.h> 10 #include <linux/blkdev.h> 11 #include <linux/gfs2_ondisk.h> 12 #include <linux/crc32.h> 13 #include <linux/iomap.h> 14 #include <linux/ktime.h> 15 16 #include "gfs2.h" 17 #include "incore.h" 18 #include "bmap.h" 19 #include "glock.h" 20 #include "inode.h" 21 #include "meta_io.h" 22 #include "quota.h" 23 #include "rgrp.h" 24 #include "log.h" 25 #include "super.h" 26 #include "trans.h" 27 #include "dir.h" 28 #include "util.h" 29 #include "aops.h" 30 #include "trace_gfs2.h" 31 32 /* This doesn't need to be that large as max 64 bit pointers in a 4k 33 * block is 512, so __u16 is fine for that. It saves stack space to 34 * keep it small. 35 */ 36 struct metapath { 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; 38 __u16 mp_list[GFS2_MAX_META_HEIGHT]; 39 int mp_fheight; /* find_metapath height */ 40 int mp_aheight; /* actual height (lookup height) */ 41 }; 42 43 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); 44 45 /** 46 * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio 47 * @ip: the inode 48 * @dibh: the dinode buffer 49 * @block: the block number that was allocated 50 * @folio: The folio. 51 * 52 * Returns: errno 53 */ 54 static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh, 55 u64 block, struct folio *folio) 56 { 57 struct inode *inode = &ip->i_inode; 58 59 if (!folio_test_uptodate(folio)) { 60 void *kaddr = kmap_local_folio(folio, 0); 61 u64 dsize = i_size_read(inode); 62 63 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 64 memset(kaddr + dsize, 0, folio_size(folio) - dsize); 65 kunmap_local(kaddr); 66 67 folio_mark_uptodate(folio); 68 } 69 70 if (gfs2_is_jdata(ip)) { 71 struct buffer_head *bh = folio_buffers(folio); 72 73 if (!bh) 74 bh = create_empty_buffers(folio, 75 BIT(inode->i_blkbits), BIT(BH_Uptodate)); 76 77 if (!buffer_mapped(bh)) 78 map_bh(bh, inode->i_sb, block); 79 80 set_buffer_uptodate(bh); 81 gfs2_trans_add_data(ip->i_gl, bh); 82 } else { 83 folio_mark_dirty(folio); 84 gfs2_ordered_add_inode(ip); 85 } 86 87 return 0; 88 } 89 90 static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio) 91 { 92 struct buffer_head *bh, *dibh; 93 struct gfs2_dinode *di; 94 u64 block = 0; 95 int isdir = gfs2_is_dir(ip); 96 int error; 97 98 error = gfs2_meta_inode_buffer(ip, &dibh); 99 if (error) 100 return error; 101 102 if (i_size_read(&ip->i_inode)) { 103 /* Get a free block, fill it with the stuffed data, 104 and write it out to disk */ 105 106 unsigned int n = 1; 107 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL); 108 if (error) 109 goto out_brelse; 110 if (isdir) { 111 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1); 112 error = gfs2_dir_get_new_buffer(ip, block, &bh); 113 if (error) 114 goto out_brelse; 115 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), 116 dibh, sizeof(struct gfs2_dinode)); 117 brelse(bh); 118 } else { 119 error = gfs2_unstuffer_folio(ip, dibh, block, folio); 120 if (error) 121 goto out_brelse; 122 } 123 } 124 125 /* Set up the pointer to the new block */ 126 127 gfs2_trans_add_meta(ip->i_gl, dibh); 128 di = (struct gfs2_dinode *)dibh->b_data; 129 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 130 131 if (i_size_read(&ip->i_inode)) { 132 *(__be64 *)(di + 1) = cpu_to_be64(block); 133 gfs2_add_inode_blocks(&ip->i_inode, 1); 134 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 135 } 136 137 ip->i_height = 1; 138 di->di_height = cpu_to_be16(1); 139 140 out_brelse: 141 brelse(dibh); 142 return error; 143 } 144 145 /** 146 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big 147 * @ip: The GFS2 inode to unstuff 148 * 149 * This routine unstuffs a dinode and returns it to a "normal" state such 150 * that the height can be grown in the traditional way. 151 * 152 * Returns: errno 153 */ 154 155 int gfs2_unstuff_dinode(struct gfs2_inode *ip) 156 { 157 struct inode *inode = &ip->i_inode; 158 struct folio *folio; 159 int error; 160 161 down_write(&ip->i_rw_mutex); 162 folio = filemap_grab_folio(inode->i_mapping, 0); 163 error = PTR_ERR(folio); 164 if (IS_ERR(folio)) 165 goto out; 166 error = __gfs2_unstuff_inode(ip, folio); 167 folio_unlock(folio); 168 folio_put(folio); 169 out: 170 up_write(&ip->i_rw_mutex); 171 return error; 172 } 173 174 /** 175 * find_metapath - Find path through the metadata tree 176 * @sdp: The superblock 177 * @block: The disk block to look up 178 * @mp: The metapath to return the result in 179 * @height: The pre-calculated height of the metadata tree 180 * 181 * This routine returns a struct metapath structure that defines a path 182 * through the metadata of inode "ip" to get to block "block". 183 * 184 * Example: 185 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a 186 * filesystem with a blocksize of 4096. 187 * 188 * find_metapath() would return a struct metapath structure set to: 189 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165. 190 * 191 * That means that in order to get to the block containing the byte at 192 * offset 101342453, we would load the indirect block pointed to by pointer 193 * 0 in the dinode. We would then load the indirect block pointed to by 194 * pointer 48 in that indirect block. We would then load the data block 195 * pointed to by pointer 165 in that indirect block. 196 * 197 * ---------------------------------------- 198 * | Dinode | | 199 * | | 4| 200 * | |0 1 2 3 4 5 9| 201 * | | 6| 202 * ---------------------------------------- 203 * | 204 * | 205 * V 206 * ---------------------------------------- 207 * | Indirect Block | 208 * | 5| 209 * | 4 4 4 4 4 5 5 1| 210 * |0 5 6 7 8 9 0 1 2| 211 * ---------------------------------------- 212 * | 213 * | 214 * V 215 * ---------------------------------------- 216 * | Indirect Block | 217 * | 1 1 1 1 1 5| 218 * | 6 6 6 6 6 1| 219 * |0 3 4 5 6 7 2| 220 * ---------------------------------------- 221 * | 222 * | 223 * V 224 * ---------------------------------------- 225 * | Data block containing offset | 226 * | 101342453 | 227 * | | 228 * | | 229 * ---------------------------------------- 230 * 231 */ 232 233 static void find_metapath(const struct gfs2_sbd *sdp, u64 block, 234 struct metapath *mp, unsigned int height) 235 { 236 unsigned int i; 237 238 mp->mp_fheight = height; 239 for (i = height; i--;) 240 mp->mp_list[i] = do_div(block, sdp->sd_inptrs); 241 } 242 243 static inline unsigned int metapath_branch_start(const struct metapath *mp) 244 { 245 if (mp->mp_list[0] == 0) 246 return 2; 247 return 1; 248 } 249 250 /** 251 * metaptr1 - Return the first possible metadata pointer in a metapath buffer 252 * @height: The metadata height (0 = dinode) 253 * @mp: The metapath 254 */ 255 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) 256 { 257 struct buffer_head *bh = mp->mp_bh[height]; 258 if (height == 0) 259 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); 260 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); 261 } 262 263 /** 264 * metapointer - Return pointer to start of metadata in a buffer 265 * @height: The metadata height (0 = dinode) 266 * @mp: The metapath 267 * 268 * Return a pointer to the block number of the next height of the metadata 269 * tree given a buffer containing the pointer to the current height of the 270 * metadata tree. 271 */ 272 273 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) 274 { 275 __be64 *p = metaptr1(height, mp); 276 return p + mp->mp_list[height]; 277 } 278 279 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp) 280 { 281 const struct buffer_head *bh = mp->mp_bh[height]; 282 return (const __be64 *)(bh->b_data + bh->b_size); 283 } 284 285 static void clone_metapath(struct metapath *clone, struct metapath *mp) 286 { 287 unsigned int hgt; 288 289 *clone = *mp; 290 for (hgt = 0; hgt < mp->mp_aheight; hgt++) 291 get_bh(clone->mp_bh[hgt]); 292 } 293 294 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) 295 { 296 const __be64 *t; 297 298 for (t = start; t < end; t++) { 299 struct buffer_head *rabh; 300 301 if (!*t) 302 continue; 303 304 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); 305 if (trylock_buffer(rabh)) { 306 if (!buffer_uptodate(rabh)) { 307 rabh->b_end_io = end_buffer_read_sync; 308 submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | 309 REQ_PRIO, rabh); 310 continue; 311 } 312 unlock_buffer(rabh); 313 } 314 brelse(rabh); 315 } 316 } 317 318 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, 319 unsigned int x, unsigned int h) 320 { 321 for (; x < h; x++) { 322 __be64 *ptr = metapointer(x, mp); 323 u64 dblock = be64_to_cpu(*ptr); 324 int ret; 325 326 if (!dblock) 327 break; 328 ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]); 329 if (ret) 330 return ret; 331 } 332 mp->mp_aheight = x + 1; 333 return 0; 334 } 335 336 /** 337 * lookup_metapath - Walk the metadata tree to a specific point 338 * @ip: The inode 339 * @mp: The metapath 340 * 341 * Assumes that the inode's buffer has already been looked up and 342 * hooked onto mp->mp_bh[0] and that the metapath has been initialised 343 * by find_metapath(). 344 * 345 * If this function encounters part of the tree which has not been 346 * allocated, it returns the current height of the tree at the point 347 * at which it found the unallocated block. Blocks which are found are 348 * added to the mp->mp_bh[] list. 349 * 350 * Returns: error 351 */ 352 353 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) 354 { 355 return __fillup_metapath(ip, mp, 0, ip->i_height - 1); 356 } 357 358 /** 359 * fillup_metapath - fill up buffers for the metadata path to a specific height 360 * @ip: The inode 361 * @mp: The metapath 362 * @h: The height to which it should be mapped 363 * 364 * Similar to lookup_metapath, but does lookups for a range of heights 365 * 366 * Returns: error or the number of buffers filled 367 */ 368 369 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) 370 { 371 unsigned int x = 0; 372 int ret; 373 374 if (h) { 375 /* find the first buffer we need to look up. */ 376 for (x = h - 1; x > 0; x--) { 377 if (mp->mp_bh[x]) 378 break; 379 } 380 } 381 ret = __fillup_metapath(ip, mp, x, h); 382 if (ret) 383 return ret; 384 return mp->mp_aheight - x - 1; 385 } 386 387 static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp) 388 { 389 sector_t factor = 1, block = 0; 390 int hgt; 391 392 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) { 393 if (hgt < mp->mp_aheight) 394 block += mp->mp_list[hgt] * factor; 395 factor *= sdp->sd_inptrs; 396 } 397 return block; 398 } 399 400 static void release_metapath(struct metapath *mp) 401 { 402 int i; 403 404 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { 405 if (mp->mp_bh[i] == NULL) 406 break; 407 brelse(mp->mp_bh[i]); 408 mp->mp_bh[i] = NULL; 409 } 410 } 411 412 /** 413 * gfs2_extent_length - Returns length of an extent of blocks 414 * @bh: The metadata block 415 * @ptr: Current position in @bh 416 * @limit: Max extent length to return 417 * @eob: Set to 1 if we hit "end of block" 418 * 419 * Returns: The length of the extent (minimum of one block) 420 */ 421 422 static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) 423 { 424 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); 425 const __be64 *first = ptr; 426 u64 d = be64_to_cpu(*ptr); 427 428 *eob = 0; 429 do { 430 ptr++; 431 if (ptr >= end) 432 break; 433 d++; 434 } while(be64_to_cpu(*ptr) == d); 435 if (ptr >= end) 436 *eob = 1; 437 return ptr - first; 438 } 439 440 enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE }; 441 442 /* 443 * gfs2_metadata_walker - walk an indirect block 444 * @mp: Metapath to indirect block 445 * @ptrs: Number of pointers to look at 446 * 447 * When returning WALK_FOLLOW, the walker must update @mp to point at the right 448 * indirect block to follow. 449 */ 450 typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp, 451 unsigned int ptrs); 452 453 /* 454 * gfs2_walk_metadata - walk a tree of indirect blocks 455 * @inode: The inode 456 * @mp: Starting point of walk 457 * @max_len: Maximum number of blocks to walk 458 * @walker: Called during the walk 459 * 460 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or 461 * past the end of metadata, and a negative error code otherwise. 462 */ 463 464 static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp, 465 u64 max_len, gfs2_metadata_walker walker) 466 { 467 struct gfs2_inode *ip = GFS2_I(inode); 468 struct gfs2_sbd *sdp = GFS2_SB(inode); 469 u64 factor = 1; 470 unsigned int hgt; 471 int ret; 472 473 /* 474 * The walk starts in the lowest allocated indirect block, which may be 475 * before the position indicated by @mp. Adjust @max_len accordingly 476 * to avoid a short walk. 477 */ 478 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) { 479 max_len += mp->mp_list[hgt] * factor; 480 mp->mp_list[hgt] = 0; 481 factor *= sdp->sd_inptrs; 482 } 483 484 for (;;) { 485 u16 start = mp->mp_list[hgt]; 486 enum walker_status status; 487 unsigned int ptrs; 488 u64 len; 489 490 /* Walk indirect block. */ 491 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start; 492 len = ptrs * factor; 493 if (len > max_len) 494 ptrs = DIV_ROUND_UP_ULL(max_len, factor); 495 status = walker(mp, ptrs); 496 switch (status) { 497 case WALK_STOP: 498 return 1; 499 case WALK_FOLLOW: 500 BUG_ON(mp->mp_aheight == mp->mp_fheight); 501 ptrs = mp->mp_list[hgt] - start; 502 len = ptrs * factor; 503 break; 504 case WALK_CONTINUE: 505 break; 506 } 507 if (len >= max_len) 508 break; 509 max_len -= len; 510 if (status == WALK_FOLLOW) 511 goto fill_up_metapath; 512 513 lower_metapath: 514 /* Decrease height of metapath. */ 515 brelse(mp->mp_bh[hgt]); 516 mp->mp_bh[hgt] = NULL; 517 mp->mp_list[hgt] = 0; 518 if (!hgt) 519 break; 520 hgt--; 521 factor *= sdp->sd_inptrs; 522 523 /* Advance in metadata tree. */ 524 (mp->mp_list[hgt])++; 525 if (hgt) { 526 if (mp->mp_list[hgt] >= sdp->sd_inptrs) 527 goto lower_metapath; 528 } else { 529 if (mp->mp_list[hgt] >= sdp->sd_diptrs) 530 break; 531 } 532 533 fill_up_metapath: 534 /* Increase height of metapath. */ 535 ret = fillup_metapath(ip, mp, ip->i_height - 1); 536 if (ret < 0) 537 return ret; 538 hgt += ret; 539 for (; ret; ret--) 540 do_div(factor, sdp->sd_inptrs); 541 mp->mp_aheight = hgt + 1; 542 } 543 return 0; 544 } 545 546 static enum walker_status gfs2_hole_walker(struct metapath *mp, 547 unsigned int ptrs) 548 { 549 const __be64 *start, *ptr, *end; 550 unsigned int hgt; 551 552 hgt = mp->mp_aheight - 1; 553 start = metapointer(hgt, mp); 554 end = start + ptrs; 555 556 for (ptr = start; ptr < end; ptr++) { 557 if (*ptr) { 558 mp->mp_list[hgt] += ptr - start; 559 if (mp->mp_aheight == mp->mp_fheight) 560 return WALK_STOP; 561 return WALK_FOLLOW; 562 } 563 } 564 return WALK_CONTINUE; 565 } 566 567 /** 568 * gfs2_hole_size - figure out the size of a hole 569 * @inode: The inode 570 * @lblock: The logical starting block number 571 * @len: How far to look (in blocks) 572 * @mp: The metapath at lblock 573 * @iomap: The iomap to store the hole size in 574 * 575 * This function modifies @mp. 576 * 577 * Returns: errno on error 578 */ 579 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, 580 struct metapath *mp, struct iomap *iomap) 581 { 582 struct metapath clone; 583 u64 hole_size; 584 int ret; 585 586 clone_metapath(&clone, mp); 587 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker); 588 if (ret < 0) 589 goto out; 590 591 if (ret == 1) 592 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock; 593 else 594 hole_size = len; 595 iomap->length = hole_size << inode->i_blkbits; 596 ret = 0; 597 598 out: 599 release_metapath(&clone); 600 return ret; 601 } 602 603 static inline void gfs2_indirect_init(struct metapath *mp, 604 struct gfs2_glock *gl, unsigned int i, 605 unsigned offset, u64 bn) 606 { 607 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + 608 ((i > 1) ? sizeof(struct gfs2_meta_header) : 609 sizeof(struct gfs2_dinode))); 610 BUG_ON(i < 1); 611 BUG_ON(mp->mp_bh[i] != NULL); 612 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 613 gfs2_trans_add_meta(gl, mp->mp_bh[i]); 614 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 615 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 616 ptr += offset; 617 *ptr = cpu_to_be64(bn); 618 } 619 620 enum alloc_state { 621 ALLOC_DATA = 0, 622 ALLOC_GROW_DEPTH = 1, 623 ALLOC_GROW_HEIGHT = 2, 624 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ 625 }; 626 627 /** 628 * __gfs2_iomap_alloc - Build a metadata tree of the requested height 629 * @inode: The GFS2 inode 630 * @iomap: The iomap structure 631 * @mp: The metapath, with proper height information calculated 632 * 633 * In this routine we may have to alloc: 634 * i) Indirect blocks to grow the metadata tree height 635 * ii) Indirect blocks to fill in lower part of the metadata tree 636 * iii) Data blocks 637 * 638 * This function is called after __gfs2_iomap_get, which works out the 639 * total number of blocks which we need via gfs2_alloc_size. 640 * 641 * We then do the actual allocation asking for an extent at a time (if 642 * enough contiguous free blocks are available, there will only be one 643 * allocation request per call) and uses the state machine to initialise 644 * the blocks in order. 645 * 646 * Right now, this function will allocate at most one indirect block 647 * worth of data -- with a default block size of 4K, that's slightly 648 * less than 2M. If this limitation is ever removed to allow huge 649 * allocations, we would probably still want to limit the iomap size we 650 * return to avoid stalling other tasks during huge writes; the next 651 * iomap iteration would then find the blocks already allocated. 652 * 653 * Returns: errno on error 654 */ 655 656 static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, 657 struct metapath *mp) 658 { 659 struct gfs2_inode *ip = GFS2_I(inode); 660 struct gfs2_sbd *sdp = GFS2_SB(inode); 661 struct buffer_head *dibh = mp->mp_bh[0]; 662 u64 bn; 663 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; 664 size_t dblks = iomap->length >> inode->i_blkbits; 665 const unsigned end_of_metadata = mp->mp_fheight - 1; 666 int ret; 667 enum alloc_state state; 668 __be64 *ptr; 669 __be64 zero_bn = 0; 670 671 BUG_ON(mp->mp_aheight < 1); 672 BUG_ON(dibh == NULL); 673 BUG_ON(dblks < 1); 674 675 gfs2_trans_add_meta(ip->i_gl, dibh); 676 677 down_write(&ip->i_rw_mutex); 678 679 if (mp->mp_fheight == mp->mp_aheight) { 680 /* Bottom indirect block exists */ 681 state = ALLOC_DATA; 682 } else { 683 /* Need to allocate indirect blocks */ 684 if (mp->mp_fheight == ip->i_height) { 685 /* Writing into existing tree, extend tree down */ 686 iblks = mp->mp_fheight - mp->mp_aheight; 687 state = ALLOC_GROW_DEPTH; 688 } else { 689 /* Building up tree height */ 690 state = ALLOC_GROW_HEIGHT; 691 iblks = mp->mp_fheight - ip->i_height; 692 branch_start = metapath_branch_start(mp); 693 iblks += (mp->mp_fheight - branch_start); 694 } 695 } 696 697 /* start of the second part of the function (state machine) */ 698 699 blks = dblks + iblks; 700 i = mp->mp_aheight; 701 do { 702 n = blks - alloced; 703 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 704 if (ret) 705 goto out; 706 alloced += n; 707 if (state != ALLOC_DATA || gfs2_is_jdata(ip)) 708 gfs2_trans_remove_revoke(sdp, bn, n); 709 switch (state) { 710 /* Growing height of tree */ 711 case ALLOC_GROW_HEIGHT: 712 if (i == 1) { 713 ptr = (__be64 *)(dibh->b_data + 714 sizeof(struct gfs2_dinode)); 715 zero_bn = *ptr; 716 } 717 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0; 718 i++, n--) 719 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); 720 if (i - 1 == mp->mp_fheight - ip->i_height) { 721 i--; 722 gfs2_buffer_copy_tail(mp->mp_bh[i], 723 sizeof(struct gfs2_meta_header), 724 dibh, sizeof(struct gfs2_dinode)); 725 gfs2_buffer_clear_tail(dibh, 726 sizeof(struct gfs2_dinode) + 727 sizeof(__be64)); 728 ptr = (__be64 *)(mp->mp_bh[i]->b_data + 729 sizeof(struct gfs2_meta_header)); 730 *ptr = zero_bn; 731 state = ALLOC_GROW_DEPTH; 732 for(i = branch_start; i < mp->mp_fheight; i++) { 733 if (mp->mp_bh[i] == NULL) 734 break; 735 brelse(mp->mp_bh[i]); 736 mp->mp_bh[i] = NULL; 737 } 738 i = branch_start; 739 } 740 if (n == 0) 741 break; 742 fallthrough; /* To branching from existing tree */ 743 case ALLOC_GROW_DEPTH: 744 if (i > 1 && i < mp->mp_fheight) 745 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); 746 for (; i < mp->mp_fheight && n > 0; i++, n--) 747 gfs2_indirect_init(mp, ip->i_gl, i, 748 mp->mp_list[i-1], bn++); 749 if (i == mp->mp_fheight) 750 state = ALLOC_DATA; 751 if (n == 0) 752 break; 753 fallthrough; /* To tree complete, adding data blocks */ 754 case ALLOC_DATA: 755 BUG_ON(n > dblks); 756 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 757 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); 758 dblks = n; 759 ptr = metapointer(end_of_metadata, mp); 760 iomap->addr = bn << inode->i_blkbits; 761 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW; 762 while (n-- > 0) 763 *ptr++ = cpu_to_be64(bn++); 764 break; 765 } 766 } while (iomap->addr == IOMAP_NULL_ADDR); 767 768 iomap->type = IOMAP_MAPPED; 769 iomap->length = (u64)dblks << inode->i_blkbits; 770 ip->i_height = mp->mp_fheight; 771 gfs2_add_inode_blocks(&ip->i_inode, alloced); 772 gfs2_dinode_out(ip, dibh->b_data); 773 out: 774 up_write(&ip->i_rw_mutex); 775 return ret; 776 } 777 778 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE 779 780 /** 781 * gfs2_alloc_size - Compute the maximum allocation size 782 * @inode: The inode 783 * @mp: The metapath 784 * @size: Requested size in blocks 785 * 786 * Compute the maximum size of the next allocation at @mp. 787 * 788 * Returns: size in blocks 789 */ 790 static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) 791 { 792 struct gfs2_inode *ip = GFS2_I(inode); 793 struct gfs2_sbd *sdp = GFS2_SB(inode); 794 const __be64 *first, *ptr, *end; 795 796 /* 797 * For writes to stuffed files, this function is called twice via 798 * __gfs2_iomap_get, before and after unstuffing. The size we return the 799 * first time needs to be large enough to get the reservation and 800 * allocation sizes right. The size we return the second time must 801 * be exact or else __gfs2_iomap_alloc won't do the right thing. 802 */ 803 804 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { 805 unsigned int maxsize = mp->mp_fheight > 1 ? 806 sdp->sd_inptrs : sdp->sd_diptrs; 807 maxsize -= mp->mp_list[mp->mp_fheight - 1]; 808 if (size > maxsize) 809 size = maxsize; 810 return size; 811 } 812 813 first = metapointer(ip->i_height - 1, mp); 814 end = metaend(ip->i_height - 1, mp); 815 if (end - first > size) 816 end = first + size; 817 for (ptr = first; ptr < end; ptr++) { 818 if (*ptr) 819 break; 820 } 821 return ptr - first; 822 } 823 824 /** 825 * __gfs2_iomap_get - Map blocks from an inode to disk blocks 826 * @inode: The inode 827 * @pos: Starting position in bytes 828 * @length: Length to map, in bytes 829 * @flags: iomap flags 830 * @iomap: The iomap structure 831 * @mp: The metapath 832 * 833 * Returns: errno 834 */ 835 static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, 836 unsigned flags, struct iomap *iomap, 837 struct metapath *mp) 838 { 839 struct gfs2_inode *ip = GFS2_I(inode); 840 struct gfs2_sbd *sdp = GFS2_SB(inode); 841 loff_t size = i_size_read(inode); 842 __be64 *ptr; 843 sector_t lblock; 844 sector_t lblock_stop; 845 int ret; 846 int eob; 847 u64 len; 848 struct buffer_head *dibh = NULL, *bh; 849 u8 height; 850 851 if (!length) 852 return -EINVAL; 853 854 down_read(&ip->i_rw_mutex); 855 856 ret = gfs2_meta_inode_buffer(ip, &dibh); 857 if (ret) 858 goto unlock; 859 mp->mp_bh[0] = dibh; 860 861 if (gfs2_is_stuffed(ip)) { 862 if (flags & IOMAP_WRITE) { 863 loff_t max_size = gfs2_max_stuffed_size(ip); 864 865 if (pos + length > max_size) 866 goto unstuff; 867 iomap->length = max_size; 868 } else { 869 if (pos >= size) { 870 if (flags & IOMAP_REPORT) { 871 ret = -ENOENT; 872 goto unlock; 873 } else { 874 iomap->offset = pos; 875 iomap->length = length; 876 goto hole_found; 877 } 878 } 879 iomap->length = size; 880 } 881 iomap->addr = (ip->i_no_addr << inode->i_blkbits) + 882 sizeof(struct gfs2_dinode); 883 iomap->type = IOMAP_INLINE; 884 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); 885 goto out; 886 } 887 888 unstuff: 889 lblock = pos >> inode->i_blkbits; 890 iomap->offset = lblock << inode->i_blkbits; 891 lblock_stop = (pos + length - 1) >> inode->i_blkbits; 892 len = lblock_stop - lblock + 1; 893 iomap->length = len << inode->i_blkbits; 894 895 height = ip->i_height; 896 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) 897 height++; 898 find_metapath(sdp, lblock, mp, height); 899 if (height > ip->i_height || gfs2_is_stuffed(ip)) 900 goto do_alloc; 901 902 ret = lookup_metapath(ip, mp); 903 if (ret) 904 goto unlock; 905 906 if (mp->mp_aheight != ip->i_height) 907 goto do_alloc; 908 909 ptr = metapointer(ip->i_height - 1, mp); 910 if (*ptr == 0) 911 goto do_alloc; 912 913 bh = mp->mp_bh[ip->i_height - 1]; 914 len = gfs2_extent_length(bh, ptr, len, &eob); 915 916 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; 917 iomap->length = len << inode->i_blkbits; 918 iomap->type = IOMAP_MAPPED; 919 iomap->flags |= IOMAP_F_MERGED; 920 if (eob) 921 iomap->flags |= IOMAP_F_GFS2_BOUNDARY; 922 923 out: 924 iomap->bdev = inode->i_sb->s_bdev; 925 unlock: 926 up_read(&ip->i_rw_mutex); 927 return ret; 928 929 do_alloc: 930 if (flags & IOMAP_REPORT) { 931 if (pos >= size) 932 ret = -ENOENT; 933 else if (height == ip->i_height) 934 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 935 else 936 iomap->length = size - iomap->offset; 937 } else if (flags & IOMAP_WRITE) { 938 u64 alloc_size; 939 940 if (flags & IOMAP_DIRECT) 941 goto out; /* (see gfs2_file_direct_write) */ 942 943 len = gfs2_alloc_size(inode, mp, len); 944 alloc_size = len << inode->i_blkbits; 945 if (alloc_size < iomap->length) 946 iomap->length = alloc_size; 947 } else { 948 if (pos < size && height == ip->i_height) 949 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 950 } 951 hole_found: 952 iomap->addr = IOMAP_NULL_ADDR; 953 iomap->type = IOMAP_HOLE; 954 goto out; 955 } 956 957 static struct folio * 958 gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) 959 { 960 struct inode *inode = iter->inode; 961 unsigned int blockmask = i_blocksize(inode) - 1; 962 struct gfs2_sbd *sdp = GFS2_SB(inode); 963 unsigned int blocks; 964 struct folio *folio; 965 int status; 966 967 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; 968 status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); 969 if (status) 970 return ERR_PTR(status); 971 972 folio = iomap_get_folio(iter, pos, len); 973 if (IS_ERR(folio)) 974 gfs2_trans_end(sdp); 975 return folio; 976 } 977 978 static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, 979 unsigned copied, struct folio *folio) 980 { 981 struct gfs2_trans *tr = current->journal_info; 982 struct gfs2_inode *ip = GFS2_I(inode); 983 struct gfs2_sbd *sdp = GFS2_SB(inode); 984 985 if (!gfs2_is_stuffed(ip)) 986 gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos), 987 copied); 988 989 folio_unlock(folio); 990 folio_put(folio); 991 992 if (tr->tr_num_buf_new) 993 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 994 995 gfs2_trans_end(sdp); 996 } 997 998 static const struct iomap_folio_ops gfs2_iomap_folio_ops = { 999 .get_folio = gfs2_iomap_get_folio, 1000 .put_folio = gfs2_iomap_put_folio, 1001 }; 1002 1003 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, 1004 loff_t length, unsigned flags, 1005 struct iomap *iomap, 1006 struct metapath *mp) 1007 { 1008 struct gfs2_inode *ip = GFS2_I(inode); 1009 struct gfs2_sbd *sdp = GFS2_SB(inode); 1010 bool unstuff; 1011 int ret; 1012 1013 unstuff = gfs2_is_stuffed(ip) && 1014 pos + length > gfs2_max_stuffed_size(ip); 1015 1016 if (unstuff || iomap->type == IOMAP_HOLE) { 1017 unsigned int data_blocks, ind_blocks; 1018 struct gfs2_alloc_parms ap = {}; 1019 unsigned int rblocks; 1020 struct gfs2_trans *tr; 1021 1022 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, 1023 &ind_blocks); 1024 ap.target = data_blocks + ind_blocks; 1025 ret = gfs2_quota_lock_check(ip, &ap); 1026 if (ret) 1027 return ret; 1028 1029 ret = gfs2_inplace_reserve(ip, &ap); 1030 if (ret) 1031 goto out_qunlock; 1032 1033 rblocks = RES_DINODE + ind_blocks; 1034 if (gfs2_is_jdata(ip)) 1035 rblocks += data_blocks; 1036 if (ind_blocks || data_blocks) 1037 rblocks += RES_STATFS + RES_QUOTA; 1038 if (inode == sdp->sd_rindex) 1039 rblocks += 2 * RES_STATFS; 1040 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); 1041 1042 ret = gfs2_trans_begin(sdp, rblocks, 1043 iomap->length >> inode->i_blkbits); 1044 if (ret) 1045 goto out_trans_fail; 1046 1047 if (unstuff) { 1048 ret = gfs2_unstuff_dinode(ip); 1049 if (ret) 1050 goto out_trans_end; 1051 release_metapath(mp); 1052 ret = __gfs2_iomap_get(inode, iomap->offset, 1053 iomap->length, flags, iomap, mp); 1054 if (ret) 1055 goto out_trans_end; 1056 } 1057 1058 if (iomap->type == IOMAP_HOLE) { 1059 ret = __gfs2_iomap_alloc(inode, iomap, mp); 1060 if (ret) { 1061 gfs2_trans_end(sdp); 1062 gfs2_inplace_release(ip); 1063 punch_hole(ip, iomap->offset, iomap->length); 1064 goto out_qunlock; 1065 } 1066 } 1067 1068 tr = current->journal_info; 1069 if (tr->tr_num_buf_new) 1070 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1071 1072 gfs2_trans_end(sdp); 1073 } 1074 1075 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) 1076 iomap->folio_ops = &gfs2_iomap_folio_ops; 1077 return 0; 1078 1079 out_trans_end: 1080 gfs2_trans_end(sdp); 1081 out_trans_fail: 1082 gfs2_inplace_release(ip); 1083 out_qunlock: 1084 gfs2_quota_unlock(ip); 1085 return ret; 1086 } 1087 1088 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 1089 unsigned flags, struct iomap *iomap, 1090 struct iomap *srcmap) 1091 { 1092 struct gfs2_inode *ip = GFS2_I(inode); 1093 struct metapath mp = { .mp_aheight = 1, }; 1094 int ret; 1095 1096 if (gfs2_is_jdata(ip)) 1097 iomap->flags |= IOMAP_F_BUFFER_HEAD; 1098 1099 trace_gfs2_iomap_start(ip, pos, length, flags); 1100 ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 1101 if (ret) 1102 goto out_unlock; 1103 1104 switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) { 1105 case IOMAP_WRITE: 1106 if (flags & IOMAP_DIRECT) { 1107 /* 1108 * Silently fall back to buffered I/O for stuffed files 1109 * or if we've got a hole (see gfs2_file_direct_write). 1110 */ 1111 if (iomap->type != IOMAP_MAPPED) 1112 ret = -ENOTBLK; 1113 goto out_unlock; 1114 } 1115 break; 1116 case IOMAP_ZERO: 1117 if (iomap->type == IOMAP_HOLE) 1118 goto out_unlock; 1119 break; 1120 default: 1121 goto out_unlock; 1122 } 1123 1124 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); 1125 1126 out_unlock: 1127 release_metapath(&mp); 1128 trace_gfs2_iomap_end(ip, iomap, ret); 1129 return ret; 1130 } 1131 1132 static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, 1133 ssize_t written, unsigned flags, struct iomap *iomap) 1134 { 1135 struct gfs2_inode *ip = GFS2_I(inode); 1136 struct gfs2_sbd *sdp = GFS2_SB(inode); 1137 1138 switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) { 1139 case IOMAP_WRITE: 1140 if (flags & IOMAP_DIRECT) 1141 return 0; 1142 break; 1143 case IOMAP_ZERO: 1144 if (iomap->type == IOMAP_HOLE) 1145 return 0; 1146 break; 1147 default: 1148 return 0; 1149 } 1150 1151 if (!gfs2_is_stuffed(ip)) 1152 gfs2_ordered_add_inode(ip); 1153 1154 if (inode == sdp->sd_rindex) 1155 adjust_fs_space(inode); 1156 1157 gfs2_inplace_release(ip); 1158 1159 if (ip->i_qadata && ip->i_qadata->qa_qd_num) 1160 gfs2_quota_unlock(ip); 1161 1162 if (length != written && (iomap->flags & IOMAP_F_NEW)) { 1163 /* Deallocate blocks that were just allocated. */ 1164 loff_t hstart = round_up(pos + written, i_blocksize(inode)); 1165 loff_t hend = iomap->offset + iomap->length; 1166 1167 if (hstart < hend) { 1168 truncate_pagecache_range(inode, hstart, hend - 1); 1169 punch_hole(ip, hstart, hend - hstart); 1170 } 1171 } 1172 1173 if (unlikely(!written)) 1174 return 0; 1175 1176 if (iomap->flags & IOMAP_F_SIZE_CHANGED) 1177 mark_inode_dirty(inode); 1178 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 1179 return 0; 1180 } 1181 1182 const struct iomap_ops gfs2_iomap_ops = { 1183 .iomap_begin = gfs2_iomap_begin, 1184 .iomap_end = gfs2_iomap_end, 1185 }; 1186 1187 /** 1188 * gfs2_block_map - Map one or more blocks of an inode to a disk block 1189 * @inode: The inode 1190 * @lblock: The logical block number 1191 * @bh_map: The bh to be mapped 1192 * @create: True if its ok to alloc blocks to satify the request 1193 * 1194 * The size of the requested mapping is defined in bh_map->b_size. 1195 * 1196 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged 1197 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and 1198 * bh_map->b_size to indicate the size of the mapping when @lblock and 1199 * successive blocks are mapped, up to the requested size. 1200 * 1201 * Sets buffer_boundary() if a read of metadata will be required 1202 * before the next block can be mapped. Sets buffer_new() if new 1203 * blocks were allocated. 1204 * 1205 * Returns: errno 1206 */ 1207 1208 int gfs2_block_map(struct inode *inode, sector_t lblock, 1209 struct buffer_head *bh_map, int create) 1210 { 1211 struct gfs2_inode *ip = GFS2_I(inode); 1212 loff_t pos = (loff_t)lblock << inode->i_blkbits; 1213 loff_t length = bh_map->b_size; 1214 struct iomap iomap = { }; 1215 int ret; 1216 1217 clear_buffer_mapped(bh_map); 1218 clear_buffer_new(bh_map); 1219 clear_buffer_boundary(bh_map); 1220 trace_gfs2_bmap(ip, bh_map, lblock, create, 1); 1221 1222 if (!create) 1223 ret = gfs2_iomap_get(inode, pos, length, &iomap); 1224 else 1225 ret = gfs2_iomap_alloc(inode, pos, length, &iomap); 1226 if (ret) 1227 goto out; 1228 1229 if (iomap.length > bh_map->b_size) { 1230 iomap.length = bh_map->b_size; 1231 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY; 1232 } 1233 if (iomap.addr != IOMAP_NULL_ADDR) 1234 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits); 1235 bh_map->b_size = iomap.length; 1236 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY) 1237 set_buffer_boundary(bh_map); 1238 if (iomap.flags & IOMAP_F_NEW) 1239 set_buffer_new(bh_map); 1240 1241 out: 1242 trace_gfs2_bmap(ip, bh_map, lblock, create, ret); 1243 return ret; 1244 } 1245 1246 int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, 1247 unsigned int *extlen) 1248 { 1249 unsigned int blkbits = inode->i_blkbits; 1250 struct iomap iomap = { }; 1251 unsigned int len; 1252 int ret; 1253 1254 ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits, 1255 &iomap); 1256 if (ret) 1257 return ret; 1258 if (iomap.type != IOMAP_MAPPED) 1259 return -EIO; 1260 *dblock = iomap.addr >> blkbits; 1261 len = iomap.length >> blkbits; 1262 if (len < *extlen) 1263 *extlen = len; 1264 return 0; 1265 } 1266 1267 int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, 1268 unsigned int *extlen, bool *new) 1269 { 1270 unsigned int blkbits = inode->i_blkbits; 1271 struct iomap iomap = { }; 1272 unsigned int len; 1273 int ret; 1274 1275 ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits, 1276 &iomap); 1277 if (ret) 1278 return ret; 1279 if (iomap.type != IOMAP_MAPPED) 1280 return -EIO; 1281 *dblock = iomap.addr >> blkbits; 1282 len = iomap.length >> blkbits; 1283 if (len < *extlen) 1284 *extlen = len; 1285 *new = iomap.flags & IOMAP_F_NEW; 1286 return 0; 1287 } 1288 1289 /* 1290 * NOTE: Never call gfs2_block_zero_range with an open transaction because it 1291 * uses iomap write to perform its actions, which begin their own transactions 1292 * (iomap_begin, get_folio, etc.) 1293 */ 1294 static int gfs2_block_zero_range(struct inode *inode, loff_t from, 1295 unsigned int length) 1296 { 1297 BUG_ON(current->journal_info); 1298 return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops); 1299 } 1300 1301 #define GFS2_JTRUNC_REVOKES 8192 1302 1303 /** 1304 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files 1305 * @inode: The inode being truncated 1306 * @oldsize: The original (larger) size 1307 * @newsize: The new smaller size 1308 * 1309 * With jdata files, we have to journal a revoke for each block which is 1310 * truncated. As a result, we need to split this into separate transactions 1311 * if the number of pages being truncated gets too large. 1312 */ 1313 1314 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) 1315 { 1316 struct gfs2_sbd *sdp = GFS2_SB(inode); 1317 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 1318 u64 chunk; 1319 int error; 1320 1321 while (oldsize != newsize) { 1322 struct gfs2_trans *tr; 1323 unsigned int offs; 1324 1325 chunk = oldsize - newsize; 1326 if (chunk > max_chunk) 1327 chunk = max_chunk; 1328 1329 offs = oldsize & ~PAGE_MASK; 1330 if (offs && chunk > PAGE_SIZE) 1331 chunk = offs + ((chunk - offs) & PAGE_MASK); 1332 1333 truncate_pagecache(inode, oldsize - chunk); 1334 oldsize -= chunk; 1335 1336 tr = current->journal_info; 1337 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) 1338 continue; 1339 1340 gfs2_trans_end(sdp); 1341 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1342 if (error) 1343 return error; 1344 } 1345 1346 return 0; 1347 } 1348 1349 static int trunc_start(struct inode *inode, u64 newsize) 1350 { 1351 struct gfs2_inode *ip = GFS2_I(inode); 1352 struct gfs2_sbd *sdp = GFS2_SB(inode); 1353 struct buffer_head *dibh = NULL; 1354 int journaled = gfs2_is_jdata(ip); 1355 u64 oldsize = inode->i_size; 1356 int error; 1357 1358 if (!gfs2_is_stuffed(ip)) { 1359 unsigned int blocksize = i_blocksize(inode); 1360 unsigned int offs = newsize & (blocksize - 1); 1361 if (offs) { 1362 error = gfs2_block_zero_range(inode, newsize, 1363 blocksize - offs); 1364 if (error) 1365 return error; 1366 } 1367 } 1368 if (journaled) 1369 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); 1370 else 1371 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1372 if (error) 1373 return error; 1374 1375 error = gfs2_meta_inode_buffer(ip, &dibh); 1376 if (error) 1377 goto out; 1378 1379 gfs2_trans_add_meta(ip->i_gl, dibh); 1380 1381 if (gfs2_is_stuffed(ip)) 1382 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1383 else 1384 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; 1385 1386 i_size_write(inode, newsize); 1387 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1388 gfs2_dinode_out(ip, dibh->b_data); 1389 1390 if (journaled) 1391 error = gfs2_journaled_truncate(inode, oldsize, newsize); 1392 else 1393 truncate_pagecache(inode, newsize); 1394 1395 out: 1396 brelse(dibh); 1397 if (current->journal_info) 1398 gfs2_trans_end(sdp); 1399 return error; 1400 } 1401 1402 int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, 1403 struct iomap *iomap) 1404 { 1405 struct metapath mp = { .mp_aheight = 1, }; 1406 int ret; 1407 1408 ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp); 1409 release_metapath(&mp); 1410 return ret; 1411 } 1412 1413 int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, 1414 struct iomap *iomap) 1415 { 1416 struct metapath mp = { .mp_aheight = 1, }; 1417 int ret; 1418 1419 ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); 1420 if (!ret && iomap->type == IOMAP_HOLE) 1421 ret = __gfs2_iomap_alloc(inode, iomap, &mp); 1422 release_metapath(&mp); 1423 return ret; 1424 } 1425 1426 /** 1427 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein 1428 * @ip: inode 1429 * @rd_gh: holder of resource group glock 1430 * @bh: buffer head to sweep 1431 * @start: starting point in bh 1432 * @end: end point in bh 1433 * @meta: true if bh points to metadata (rather than data) 1434 * @btotal: place to keep count of total blocks freed 1435 * 1436 * We sweep a metadata buffer (provided by the metapath) for blocks we need to 1437 * free, and free them all. However, we do it one rgrp at a time. If this 1438 * block has references to multiple rgrps, we break it into individual 1439 * transactions. This allows other processes to use the rgrps while we're 1440 * focused on a single one, for better concurrency / performance. 1441 * At every transaction boundary, we rewrite the inode into the journal. 1442 * That way the bitmaps are kept consistent with the inode and we can recover 1443 * if we're interrupted by power-outages. 1444 * 1445 * Returns: 0, or return code if an error occurred. 1446 * *btotal has the total number of blocks freed 1447 */ 1448 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, 1449 struct buffer_head *bh, __be64 *start, __be64 *end, 1450 bool meta, u32 *btotal) 1451 { 1452 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1453 struct gfs2_rgrpd *rgd; 1454 struct gfs2_trans *tr; 1455 __be64 *p; 1456 int blks_outside_rgrp; 1457 u64 bn, bstart, isize_blks; 1458 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ 1459 int ret = 0; 1460 bool buf_in_tr = false; /* buffer was added to transaction */ 1461 1462 more_rgrps: 1463 rgd = NULL; 1464 if (gfs2_holder_initialized(rd_gh)) { 1465 rgd = gfs2_glock2rgrp(rd_gh->gh_gl); 1466 gfs2_assert_withdraw(sdp, 1467 gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); 1468 } 1469 blks_outside_rgrp = 0; 1470 bstart = 0; 1471 blen = 0; 1472 1473 for (p = start; p < end; p++) { 1474 if (!*p) 1475 continue; 1476 bn = be64_to_cpu(*p); 1477 1478 if (rgd) { 1479 if (!rgrp_contains_block(rgd, bn)) { 1480 blks_outside_rgrp++; 1481 continue; 1482 } 1483 } else { 1484 rgd = gfs2_blk2rgrpd(sdp, bn, true); 1485 if (unlikely(!rgd)) { 1486 ret = -EIO; 1487 goto out; 1488 } 1489 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1490 LM_FLAG_NODE_SCOPE, rd_gh); 1491 if (ret) 1492 goto out; 1493 1494 /* Must be done with the rgrp glock held: */ 1495 if (gfs2_rs_active(&ip->i_res) && 1496 rgd == ip->i_res.rs_rgd) 1497 gfs2_rs_deltree(&ip->i_res); 1498 } 1499 1500 /* The size of our transactions will be unknown until we 1501 actually process all the metadata blocks that relate to 1502 the rgrp. So we estimate. We know it can't be more than 1503 the dinode's i_blocks and we don't want to exceed the 1504 journal flush threshold, sd_log_thresh2. */ 1505 if (current->journal_info == NULL) { 1506 unsigned int jblocks_rqsted, revokes; 1507 1508 jblocks_rqsted = rgd->rd_length + RES_DINODE + 1509 RES_INDIRECT; 1510 isize_blks = gfs2_get_inode_blocks(&ip->i_inode); 1511 if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) 1512 jblocks_rqsted += 1513 atomic_read(&sdp->sd_log_thresh2); 1514 else 1515 jblocks_rqsted += isize_blks; 1516 revokes = jblocks_rqsted; 1517 if (meta) 1518 revokes += end - start; 1519 else if (ip->i_depth) 1520 revokes += sdp->sd_inptrs; 1521 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); 1522 if (ret) 1523 goto out_unlock; 1524 down_write(&ip->i_rw_mutex); 1525 } 1526 /* check if we will exceed the transaction blocks requested */ 1527 tr = current->journal_info; 1528 if (tr->tr_num_buf_new + RES_STATFS + 1529 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { 1530 /* We set blks_outside_rgrp to ensure the loop will 1531 be repeated for the same rgrp, but with a new 1532 transaction. */ 1533 blks_outside_rgrp++; 1534 /* This next part is tricky. If the buffer was added 1535 to the transaction, we've already set some block 1536 pointers to 0, so we better follow through and free 1537 them, or we will introduce corruption (so break). 1538 This may be impossible, or at least rare, but I 1539 decided to cover the case regardless. 1540 1541 If the buffer was not added to the transaction 1542 (this call), doing so would exceed our transaction 1543 size, so we need to end the transaction and start a 1544 new one (so goto). */ 1545 1546 if (buf_in_tr) 1547 break; 1548 goto out_unlock; 1549 } 1550 1551 gfs2_trans_add_meta(ip->i_gl, bh); 1552 buf_in_tr = true; 1553 *p = 0; 1554 if (bstart + blen == bn) { 1555 blen++; 1556 continue; 1557 } 1558 if (bstart) { 1559 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); 1560 (*btotal) += blen; 1561 gfs2_add_inode_blocks(&ip->i_inode, -blen); 1562 } 1563 bstart = bn; 1564 blen = 1; 1565 } 1566 if (bstart) { 1567 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); 1568 (*btotal) += blen; 1569 gfs2_add_inode_blocks(&ip->i_inode, -blen); 1570 } 1571 out_unlock: 1572 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks 1573 outside the rgrp we just processed, 1574 do it all over again. */ 1575 if (current->journal_info) { 1576 struct buffer_head *dibh; 1577 1578 ret = gfs2_meta_inode_buffer(ip, &dibh); 1579 if (ret) 1580 goto out; 1581 1582 /* Every transaction boundary, we rewrite the dinode 1583 to keep its di_blocks current in case of failure. */ 1584 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1585 gfs2_trans_add_meta(ip->i_gl, dibh); 1586 gfs2_dinode_out(ip, dibh->b_data); 1587 brelse(dibh); 1588 up_write(&ip->i_rw_mutex); 1589 gfs2_trans_end(sdp); 1590 buf_in_tr = false; 1591 } 1592 gfs2_glock_dq_uninit(rd_gh); 1593 cond_resched(); 1594 goto more_rgrps; 1595 } 1596 out: 1597 return ret; 1598 } 1599 1600 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h) 1601 { 1602 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0]))) 1603 return false; 1604 return true; 1605 } 1606 1607 /** 1608 * find_nonnull_ptr - find a non-null pointer given a metapath and height 1609 * @sdp: The superblock 1610 * @mp: starting metapath 1611 * @h: desired height to search 1612 * @end_list: See punch_hole(). 1613 * @end_aligned: See punch_hole(). 1614 * 1615 * Assumes the metapath is valid (with buffers) out to height h. 1616 * Returns: true if a non-null pointer was found in the metapath buffer 1617 * false if all remaining pointers are NULL in the buffer 1618 */ 1619 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, 1620 unsigned int h, 1621 __u16 *end_list, unsigned int end_aligned) 1622 { 1623 struct buffer_head *bh = mp->mp_bh[h]; 1624 __be64 *first, *ptr, *end; 1625 1626 first = metaptr1(h, mp); 1627 ptr = first + mp->mp_list[h]; 1628 end = (__be64 *)(bh->b_data + bh->b_size); 1629 if (end_list && mp_eq_to_hgt(mp, end_list, h)) { 1630 bool keep_end = h < end_aligned; 1631 end = first + end_list[h] + keep_end; 1632 } 1633 1634 while (ptr < end) { 1635 if (*ptr) { /* if we have a non-null pointer */ 1636 mp->mp_list[h] = ptr - first; 1637 h++; 1638 if (h < GFS2_MAX_META_HEIGHT) 1639 mp->mp_list[h] = 0; 1640 return true; 1641 } 1642 ptr++; 1643 } 1644 return false; 1645 } 1646 1647 enum dealloc_states { 1648 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ 1649 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ 1650 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ 1651 DEALLOC_DONE = 3, /* process complete */ 1652 }; 1653 1654 static inline void 1655 metapointer_range(struct metapath *mp, int height, 1656 __u16 *start_list, unsigned int start_aligned, 1657 __u16 *end_list, unsigned int end_aligned, 1658 __be64 **start, __be64 **end) 1659 { 1660 struct buffer_head *bh = mp->mp_bh[height]; 1661 __be64 *first; 1662 1663 first = metaptr1(height, mp); 1664 *start = first; 1665 if (mp_eq_to_hgt(mp, start_list, height)) { 1666 bool keep_start = height < start_aligned; 1667 *start = first + start_list[height] + keep_start; 1668 } 1669 *end = (__be64 *)(bh->b_data + bh->b_size); 1670 if (end_list && mp_eq_to_hgt(mp, end_list, height)) { 1671 bool keep_end = height < end_aligned; 1672 *end = first + end_list[height] + keep_end; 1673 } 1674 } 1675 1676 static inline bool walk_done(struct gfs2_sbd *sdp, 1677 struct metapath *mp, int height, 1678 __u16 *end_list, unsigned int end_aligned) 1679 { 1680 __u16 end; 1681 1682 if (end_list) { 1683 bool keep_end = height < end_aligned; 1684 if (!mp_eq_to_hgt(mp, end_list, height)) 1685 return false; 1686 end = end_list[height] + keep_end; 1687 } else 1688 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs; 1689 return mp->mp_list[height] >= end; 1690 } 1691 1692 /** 1693 * punch_hole - deallocate blocks in a file 1694 * @ip: inode to truncate 1695 * @offset: the start of the hole 1696 * @length: the size of the hole (or 0 for truncate) 1697 * 1698 * Punch a hole into a file or truncate a file at a given position. This 1699 * function operates in whole blocks (@offset and @length are rounded 1700 * accordingly); partially filled blocks must be cleared otherwise. 1701 * 1702 * This function works from the bottom up, and from the right to the left. In 1703 * other words, it strips off the highest layer (data) before stripping any of 1704 * the metadata. Doing it this way is best in case the operation is interrupted 1705 * by power failure, etc. The dinode is rewritten in every transaction to 1706 * guarantee integrity. 1707 */ 1708 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) 1709 { 1710 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1711 u64 maxsize = sdp->sd_heightsize[ip->i_height]; 1712 struct metapath mp = {}; 1713 struct buffer_head *dibh, *bh; 1714 struct gfs2_holder rd_gh; 1715 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; 1716 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; 1717 __u16 start_list[GFS2_MAX_META_HEIGHT]; 1718 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; 1719 unsigned int start_aligned, end_aligned; 1720 unsigned int strip_h = ip->i_height - 1; 1721 u32 btotal = 0; 1722 int ret, state; 1723 int mp_h; /* metapath buffers are read in to this height */ 1724 u64 prev_bnr = 0; 1725 __be64 *start, *end; 1726 1727 if (offset >= maxsize) { 1728 /* 1729 * The starting point lies beyond the allocated metadata; 1730 * there are no blocks to deallocate. 1731 */ 1732 return 0; 1733 } 1734 1735 /* 1736 * The start position of the hole is defined by lblock, start_list, and 1737 * start_aligned. The end position of the hole is defined by lend, 1738 * end_list, and end_aligned. 1739 * 1740 * start_aligned and end_aligned define down to which height the start 1741 * and end positions are aligned to the metadata tree (i.e., the 1742 * position is a multiple of the metadata granularity at the height 1743 * above). This determines at which heights additional meta pointers 1744 * needs to be preserved for the remaining data. 1745 */ 1746 1747 if (length) { 1748 u64 end_offset = offset + length; 1749 u64 lend; 1750 1751 /* 1752 * Clip the end at the maximum file size for the given height: 1753 * that's how far the metadata goes; files bigger than that 1754 * will have additional layers of indirection. 1755 */ 1756 if (end_offset > maxsize) 1757 end_offset = maxsize; 1758 lend = end_offset >> bsize_shift; 1759 1760 if (lblock >= lend) 1761 return 0; 1762 1763 find_metapath(sdp, lend, &mp, ip->i_height); 1764 end_list = __end_list; 1765 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list)); 1766 1767 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { 1768 if (end_list[mp_h]) 1769 break; 1770 } 1771 end_aligned = mp_h; 1772 } 1773 1774 find_metapath(sdp, lblock, &mp, ip->i_height); 1775 memcpy(start_list, mp.mp_list, sizeof(start_list)); 1776 1777 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { 1778 if (start_list[mp_h]) 1779 break; 1780 } 1781 start_aligned = mp_h; 1782 1783 ret = gfs2_meta_inode_buffer(ip, &dibh); 1784 if (ret) 1785 return ret; 1786 1787 mp.mp_bh[0] = dibh; 1788 ret = lookup_metapath(ip, &mp); 1789 if (ret) 1790 goto out_metapath; 1791 1792 /* issue read-ahead on metadata */ 1793 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) { 1794 metapointer_range(&mp, mp_h, start_list, start_aligned, 1795 end_list, end_aligned, &start, &end); 1796 gfs2_metapath_ra(ip->i_gl, start, end); 1797 } 1798 1799 if (mp.mp_aheight == ip->i_height) 1800 state = DEALLOC_MP_FULL; /* We have a complete metapath */ 1801 else 1802 state = DEALLOC_FILL_MP; /* deal with partial metapath */ 1803 1804 ret = gfs2_rindex_update(sdp); 1805 if (ret) 1806 goto out_metapath; 1807 1808 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); 1809 if (ret) 1810 goto out_metapath; 1811 gfs2_holder_mark_uninitialized(&rd_gh); 1812 1813 mp_h = strip_h; 1814 1815 while (state != DEALLOC_DONE) { 1816 switch (state) { 1817 /* Truncate a full metapath at the given strip height. 1818 * Note that strip_h == mp_h in order to be in this state. */ 1819 case DEALLOC_MP_FULL: 1820 bh = mp.mp_bh[mp_h]; 1821 gfs2_assert_withdraw(sdp, bh); 1822 if (gfs2_assert_withdraw(sdp, 1823 prev_bnr != bh->b_blocknr)) { 1824 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u," 1825 "s_h:%u, mp_h:%u\n", 1826 (unsigned long long)ip->i_no_addr, 1827 prev_bnr, ip->i_height, strip_h, mp_h); 1828 } 1829 prev_bnr = bh->b_blocknr; 1830 1831 if (gfs2_metatype_check(sdp, bh, 1832 (mp_h ? GFS2_METATYPE_IN : 1833 GFS2_METATYPE_DI))) { 1834 ret = -EIO; 1835 goto out; 1836 } 1837 1838 /* 1839 * Below, passing end_aligned as 0 gives us the 1840 * metapointer range excluding the end point: the end 1841 * point is the first metapath we must not deallocate! 1842 */ 1843 1844 metapointer_range(&mp, mp_h, start_list, start_aligned, 1845 end_list, 0 /* end_aligned */, 1846 &start, &end); 1847 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h], 1848 start, end, 1849 mp_h != ip->i_height - 1, 1850 &btotal); 1851 1852 /* If we hit an error or just swept dinode buffer, 1853 just exit. */ 1854 if (ret || !mp_h) { 1855 state = DEALLOC_DONE; 1856 break; 1857 } 1858 state = DEALLOC_MP_LOWER; 1859 break; 1860 1861 /* lower the metapath strip height */ 1862 case DEALLOC_MP_LOWER: 1863 /* We're done with the current buffer, so release it, 1864 unless it's the dinode buffer. Then back up to the 1865 previous pointer. */ 1866 if (mp_h) { 1867 brelse(mp.mp_bh[mp_h]); 1868 mp.mp_bh[mp_h] = NULL; 1869 } 1870 /* If we can't get any lower in height, we've stripped 1871 off all we can. Next step is to back up and start 1872 stripping the previous level of metadata. */ 1873 if (mp_h == 0) { 1874 strip_h--; 1875 memcpy(mp.mp_list, start_list, sizeof(start_list)); 1876 mp_h = strip_h; 1877 state = DEALLOC_FILL_MP; 1878 break; 1879 } 1880 mp.mp_list[mp_h] = 0; 1881 mp_h--; /* search one metadata height down */ 1882 mp.mp_list[mp_h]++; 1883 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned)) 1884 break; 1885 /* Here we've found a part of the metapath that is not 1886 * allocated. We need to search at that height for the 1887 * next non-null pointer. */ 1888 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) { 1889 state = DEALLOC_FILL_MP; 1890 mp_h++; 1891 } 1892 /* No more non-null pointers at this height. Back up 1893 to the previous height and try again. */ 1894 break; /* loop around in the same state */ 1895 1896 /* Fill the metapath with buffers to the given height. */ 1897 case DEALLOC_FILL_MP: 1898 /* Fill the buffers out to the current height. */ 1899 ret = fillup_metapath(ip, &mp, mp_h); 1900 if (ret < 0) 1901 goto out; 1902 1903 /* On the first pass, issue read-ahead on metadata. */ 1904 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { 1905 unsigned int height = mp.mp_aheight - 1; 1906 1907 /* No read-ahead for data blocks. */ 1908 if (mp.mp_aheight - 1 == strip_h) 1909 height--; 1910 1911 for (; height >= mp.mp_aheight - ret; height--) { 1912 metapointer_range(&mp, height, 1913 start_list, start_aligned, 1914 end_list, end_aligned, 1915 &start, &end); 1916 gfs2_metapath_ra(ip->i_gl, start, end); 1917 } 1918 } 1919 1920 /* If buffers found for the entire strip height */ 1921 if (mp.mp_aheight - 1 == strip_h) { 1922 state = DEALLOC_MP_FULL; 1923 break; 1924 } 1925 if (mp.mp_aheight < ip->i_height) /* We have a partial height */ 1926 mp_h = mp.mp_aheight - 1; 1927 1928 /* If we find a non-null block pointer, crawl a bit 1929 higher up in the metapath and try again, otherwise 1930 we need to look lower for a new starting point. */ 1931 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) 1932 mp_h++; 1933 else 1934 state = DEALLOC_MP_LOWER; 1935 break; 1936 } 1937 } 1938 1939 if (btotal) { 1940 if (current->journal_info == NULL) { 1941 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + 1942 RES_QUOTA, 0); 1943 if (ret) 1944 goto out; 1945 down_write(&ip->i_rw_mutex); 1946 } 1947 gfs2_statfs_change(sdp, 0, +btotal, 0); 1948 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, 1949 ip->i_inode.i_gid); 1950 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1951 gfs2_trans_add_meta(ip->i_gl, dibh); 1952 gfs2_dinode_out(ip, dibh->b_data); 1953 up_write(&ip->i_rw_mutex); 1954 gfs2_trans_end(sdp); 1955 } 1956 1957 out: 1958 if (gfs2_holder_initialized(&rd_gh)) 1959 gfs2_glock_dq_uninit(&rd_gh); 1960 if (current->journal_info) { 1961 up_write(&ip->i_rw_mutex); 1962 gfs2_trans_end(sdp); 1963 cond_resched(); 1964 } 1965 gfs2_quota_unhold(ip); 1966 out_metapath: 1967 release_metapath(&mp); 1968 return ret; 1969 } 1970 1971 static int trunc_end(struct gfs2_inode *ip) 1972 { 1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1974 struct buffer_head *dibh; 1975 int error; 1976 1977 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1978 if (error) 1979 return error; 1980 1981 down_write(&ip->i_rw_mutex); 1982 1983 error = gfs2_meta_inode_buffer(ip, &dibh); 1984 if (error) 1985 goto out; 1986 1987 if (!i_size_read(&ip->i_inode)) { 1988 ip->i_height = 0; 1989 ip->i_goal = ip->i_no_addr; 1990 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1991 gfs2_ordered_del_inode(ip); 1992 } 1993 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1994 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1995 1996 gfs2_trans_add_meta(ip->i_gl, dibh); 1997 gfs2_dinode_out(ip, dibh->b_data); 1998 brelse(dibh); 1999 2000 out: 2001 up_write(&ip->i_rw_mutex); 2002 gfs2_trans_end(sdp); 2003 return error; 2004 } 2005 2006 /** 2007 * do_shrink - make a file smaller 2008 * @inode: the inode 2009 * @newsize: the size to make the file 2010 * 2011 * Called with an exclusive lock on @inode. The @size must 2012 * be equal to or smaller than the current inode size. 2013 * 2014 * Returns: errno 2015 */ 2016 2017 static int do_shrink(struct inode *inode, u64 newsize) 2018 { 2019 struct gfs2_inode *ip = GFS2_I(inode); 2020 int error; 2021 2022 error = trunc_start(inode, newsize); 2023 if (error < 0) 2024 return error; 2025 if (gfs2_is_stuffed(ip)) 2026 return 0; 2027 2028 error = punch_hole(ip, newsize, 0); 2029 if (error == 0) 2030 error = trunc_end(ip); 2031 2032 return error; 2033 } 2034 2035 /** 2036 * do_grow - Touch and update inode size 2037 * @inode: The inode 2038 * @size: The new size 2039 * 2040 * This function updates the timestamps on the inode and 2041 * may also increase the size of the inode. This function 2042 * must not be called with @size any smaller than the current 2043 * inode size. 2044 * 2045 * Although it is not strictly required to unstuff files here, 2046 * earlier versions of GFS2 have a bug in the stuffed file reading 2047 * code which will result in a buffer overrun if the size is larger 2048 * than the max stuffed file size. In order to prevent this from 2049 * occurring, such files are unstuffed, but in other cases we can 2050 * just update the inode size directly. 2051 * 2052 * Returns: 0 on success, or -ve on error 2053 */ 2054 2055 static int do_grow(struct inode *inode, u64 size) 2056 { 2057 struct gfs2_inode *ip = GFS2_I(inode); 2058 struct gfs2_sbd *sdp = GFS2_SB(inode); 2059 struct gfs2_alloc_parms ap = { .target = 1, }; 2060 struct buffer_head *dibh; 2061 int error; 2062 int unstuff = 0; 2063 2064 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) { 2065 error = gfs2_quota_lock_check(ip, &ap); 2066 if (error) 2067 return error; 2068 2069 error = gfs2_inplace_reserve(ip, &ap); 2070 if (error) 2071 goto do_grow_qunlock; 2072 unstuff = 1; 2073 } 2074 2075 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + 2076 (unstuff && 2077 gfs2_is_jdata(ip) ? RES_JDATA : 0) + 2078 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 2079 0 : RES_QUOTA), 0); 2080 if (error) 2081 goto do_grow_release; 2082 2083 if (unstuff) { 2084 error = gfs2_unstuff_dinode(ip); 2085 if (error) 2086 goto do_end_trans; 2087 } 2088 2089 error = gfs2_meta_inode_buffer(ip, &dibh); 2090 if (error) 2091 goto do_end_trans; 2092 2093 truncate_setsize(inode, size); 2094 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 2095 gfs2_trans_add_meta(ip->i_gl, dibh); 2096 gfs2_dinode_out(ip, dibh->b_data); 2097 brelse(dibh); 2098 2099 do_end_trans: 2100 gfs2_trans_end(sdp); 2101 do_grow_release: 2102 if (unstuff) { 2103 gfs2_inplace_release(ip); 2104 do_grow_qunlock: 2105 gfs2_quota_unlock(ip); 2106 } 2107 return error; 2108 } 2109 2110 /** 2111 * gfs2_setattr_size - make a file a given size 2112 * @inode: the inode 2113 * @newsize: the size to make the file 2114 * 2115 * The file size can grow, shrink, or stay the same size. This 2116 * is called holding i_rwsem and an exclusive glock on the inode 2117 * in question. 2118 * 2119 * Returns: errno 2120 */ 2121 2122 int gfs2_setattr_size(struct inode *inode, u64 newsize) 2123 { 2124 struct gfs2_inode *ip = GFS2_I(inode); 2125 int ret; 2126 2127 BUG_ON(!S_ISREG(inode->i_mode)); 2128 2129 ret = inode_newsize_ok(inode, newsize); 2130 if (ret) 2131 return ret; 2132 2133 inode_dio_wait(inode); 2134 2135 ret = gfs2_qa_get(ip); 2136 if (ret) 2137 goto out; 2138 2139 if (newsize >= inode->i_size) { 2140 ret = do_grow(inode, newsize); 2141 goto out; 2142 } 2143 2144 ret = do_shrink(inode, newsize); 2145 out: 2146 gfs2_rs_delete(ip); 2147 gfs2_qa_put(ip); 2148 return ret; 2149 } 2150 2151 int gfs2_truncatei_resume(struct gfs2_inode *ip) 2152 { 2153 int error; 2154 error = punch_hole(ip, i_size_read(&ip->i_inode), 0); 2155 if (!error) 2156 error = trunc_end(ip); 2157 return error; 2158 } 2159 2160 int gfs2_file_dealloc(struct gfs2_inode *ip) 2161 { 2162 return punch_hole(ip, 0, 0); 2163 } 2164 2165 /** 2166 * gfs2_free_journal_extents - Free cached journal bmap info 2167 * @jd: The journal 2168 * 2169 */ 2170 2171 void gfs2_free_journal_extents(struct gfs2_jdesc *jd) 2172 { 2173 struct gfs2_journal_extent *jext; 2174 2175 while(!list_empty(&jd->extent_list)) { 2176 jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list); 2177 list_del(&jext->list); 2178 kfree(jext); 2179 } 2180 } 2181 2182 /** 2183 * gfs2_add_jextent - Add or merge a new extent to extent cache 2184 * @jd: The journal descriptor 2185 * @lblock: The logical block at start of new extent 2186 * @dblock: The physical block at start of new extent 2187 * @blocks: Size of extent in fs blocks 2188 * 2189 * Returns: 0 on success or -ENOMEM 2190 */ 2191 2192 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) 2193 { 2194 struct gfs2_journal_extent *jext; 2195 2196 if (!list_empty(&jd->extent_list)) { 2197 jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list); 2198 if ((jext->dblock + jext->blocks) == dblock) { 2199 jext->blocks += blocks; 2200 return 0; 2201 } 2202 } 2203 2204 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); 2205 if (jext == NULL) 2206 return -ENOMEM; 2207 jext->dblock = dblock; 2208 jext->lblock = lblock; 2209 jext->blocks = blocks; 2210 list_add_tail(&jext->list, &jd->extent_list); 2211 jd->nr_extents++; 2212 return 0; 2213 } 2214 2215 /** 2216 * gfs2_map_journal_extents - Cache journal bmap info 2217 * @sdp: The super block 2218 * @jd: The journal to map 2219 * 2220 * Create a reusable "extent" mapping from all logical 2221 * blocks to all physical blocks for the given journal. This will save 2222 * us time when writing journal blocks. Most journals will have only one 2223 * extent that maps all their logical blocks. That's because gfs2.mkfs 2224 * arranges the journal blocks sequentially to maximize performance. 2225 * So the extent would map the first block for the entire file length. 2226 * However, gfs2_jadd can happen while file activity is happening, so 2227 * those journals may not be sequential. Less likely is the case where 2228 * the users created their own journals by mounting the metafs and 2229 * laying it out. But it's still possible. These journals might have 2230 * several extents. 2231 * 2232 * Returns: 0 on success, or error on failure 2233 */ 2234 2235 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) 2236 { 2237 u64 lblock = 0; 2238 u64 lblock_stop; 2239 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 2240 struct buffer_head bh; 2241 unsigned int shift = sdp->sd_sb.sb_bsize_shift; 2242 u64 size; 2243 int rc; 2244 ktime_t start, end; 2245 2246 start = ktime_get(); 2247 lblock_stop = i_size_read(jd->jd_inode) >> shift; 2248 size = (lblock_stop - lblock) << shift; 2249 jd->nr_extents = 0; 2250 WARN_ON(!list_empty(&jd->extent_list)); 2251 2252 do { 2253 bh.b_state = 0; 2254 bh.b_blocknr = 0; 2255 bh.b_size = size; 2256 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); 2257 if (rc || !buffer_mapped(&bh)) 2258 goto fail; 2259 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); 2260 if (rc) 2261 goto fail; 2262 size -= bh.b_size; 2263 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 2264 } while(size > 0); 2265 2266 end = ktime_get(); 2267 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid, 2268 jd->nr_extents, ktime_ms_delta(end, start)); 2269 return 0; 2270 2271 fail: 2272 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", 2273 rc, jd->jd_jid, 2274 (unsigned long long)(i_size_read(jd->jd_inode) - size), 2275 jd->nr_extents); 2276 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", 2277 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, 2278 bh.b_state, (unsigned long long)bh.b_size); 2279 gfs2_free_journal_extents(jd); 2280 return rc; 2281 } 2282 2283 /** 2284 * gfs2_write_alloc_required - figure out if a write will require an allocation 2285 * @ip: the file being written to 2286 * @offset: the offset to write to 2287 * @len: the number of bytes being written 2288 * 2289 * Returns: 1 if an alloc is required, 0 otherwise 2290 */ 2291 2292 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 2293 unsigned int len) 2294 { 2295 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2296 struct buffer_head bh; 2297 unsigned int shift; 2298 u64 lblock, lblock_stop, size; 2299 u64 end_of_file; 2300 2301 if (!len) 2302 return 0; 2303 2304 if (gfs2_is_stuffed(ip)) { 2305 if (offset + len > gfs2_max_stuffed_size(ip)) 2306 return 1; 2307 return 0; 2308 } 2309 2310 shift = sdp->sd_sb.sb_bsize_shift; 2311 BUG_ON(gfs2_is_dir(ip)); 2312 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; 2313 lblock = offset >> shift; 2314 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 2315 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) 2316 return 1; 2317 2318 size = (lblock_stop - lblock) << shift; 2319 do { 2320 bh.b_state = 0; 2321 bh.b_size = size; 2322 gfs2_block_map(&ip->i_inode, lblock, &bh, 0); 2323 if (!buffer_mapped(&bh)) 2324 return 1; 2325 size -= bh.b_size; 2326 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 2327 } while(size > 0); 2328 2329 return 0; 2330 } 2331 2332 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length) 2333 { 2334 struct gfs2_inode *ip = GFS2_I(inode); 2335 struct buffer_head *dibh; 2336 int error; 2337 2338 if (offset >= inode->i_size) 2339 return 0; 2340 if (offset + length > inode->i_size) 2341 length = inode->i_size - offset; 2342 2343 error = gfs2_meta_inode_buffer(ip, &dibh); 2344 if (error) 2345 return error; 2346 gfs2_trans_add_meta(ip->i_gl, dibh); 2347 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0, 2348 length); 2349 brelse(dibh); 2350 return 0; 2351 } 2352 2353 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset, 2354 loff_t length) 2355 { 2356 struct gfs2_sbd *sdp = GFS2_SB(inode); 2357 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 2358 int error; 2359 2360 while (length) { 2361 struct gfs2_trans *tr; 2362 loff_t chunk; 2363 unsigned int offs; 2364 2365 chunk = length; 2366 if (chunk > max_chunk) 2367 chunk = max_chunk; 2368 2369 offs = offset & ~PAGE_MASK; 2370 if (offs && chunk > PAGE_SIZE) 2371 chunk = offs + ((chunk - offs) & PAGE_MASK); 2372 2373 truncate_pagecache_range(inode, offset, chunk); 2374 offset += chunk; 2375 length -= chunk; 2376 2377 tr = current->journal_info; 2378 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) 2379 continue; 2380 2381 gfs2_trans_end(sdp); 2382 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 2383 if (error) 2384 return error; 2385 } 2386 return 0; 2387 } 2388 2389 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) 2390 { 2391 struct inode *inode = file_inode(file); 2392 struct gfs2_inode *ip = GFS2_I(inode); 2393 struct gfs2_sbd *sdp = GFS2_SB(inode); 2394 unsigned int blocksize = i_blocksize(inode); 2395 loff_t start, end; 2396 int error; 2397 2398 if (!gfs2_is_stuffed(ip)) { 2399 unsigned int start_off, end_len; 2400 2401 start_off = offset & (blocksize - 1); 2402 end_len = (offset + length) & (blocksize - 1); 2403 if (start_off) { 2404 unsigned int len = length; 2405 if (length > blocksize - start_off) 2406 len = blocksize - start_off; 2407 error = gfs2_block_zero_range(inode, offset, len); 2408 if (error) 2409 goto out; 2410 if (start_off + length < blocksize) 2411 end_len = 0; 2412 } 2413 if (end_len) { 2414 error = gfs2_block_zero_range(inode, 2415 offset + length - end_len, end_len); 2416 if (error) 2417 goto out; 2418 } 2419 } 2420 2421 start = round_down(offset, blocksize); 2422 end = round_up(offset + length, blocksize) - 1; 2423 error = filemap_write_and_wait_range(inode->i_mapping, start, end); 2424 if (error) 2425 return error; 2426 2427 if (gfs2_is_jdata(ip)) 2428 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, 2429 GFS2_JTRUNC_REVOKES); 2430 else 2431 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 2432 if (error) 2433 return error; 2434 2435 if (gfs2_is_stuffed(ip)) { 2436 error = stuffed_zero_range(inode, offset, length); 2437 if (error) 2438 goto out; 2439 } 2440 2441 if (gfs2_is_jdata(ip)) { 2442 BUG_ON(!current->journal_info); 2443 gfs2_journaled_truncate_range(inode, offset, length); 2444 } else 2445 truncate_pagecache_range(inode, offset, offset + length - 1); 2446 2447 file_update_time(file); 2448 mark_inode_dirty(inode); 2449 2450 if (current->journal_info) 2451 gfs2_trans_end(sdp); 2452 2453 if (!gfs2_is_stuffed(ip)) 2454 error = punch_hole(ip, offset, length); 2455 2456 out: 2457 if (current->journal_info) 2458 gfs2_trans_end(sdp); 2459 return error; 2460 } 2461 2462 static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode, 2463 loff_t offset) 2464 { 2465 int ret; 2466 2467 if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode)))) 2468 return -EIO; 2469 2470 if (offset >= wpc->iomap.offset && 2471 offset < wpc->iomap.offset + wpc->iomap.length) 2472 return 0; 2473 2474 memset(&wpc->iomap, 0, sizeof(wpc->iomap)); 2475 ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap); 2476 return ret; 2477 } 2478 2479 const struct iomap_writeback_ops gfs2_writeback_ops = { 2480 .map_blocks = gfs2_map_blocks, 2481 }; 2482