1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5 */ 6 7 #include <linux/spinlock.h> 8 #include <linux/completion.h> 9 #include <linux/buffer_head.h> 10 #include <linux/blkdev.h> 11 #include <linux/gfs2_ondisk.h> 12 #include <linux/crc32.h> 13 #include <linux/iomap.h> 14 #include <linux/ktime.h> 15 16 #include "gfs2.h" 17 #include "incore.h" 18 #include "bmap.h" 19 #include "glock.h" 20 #include "inode.h" 21 #include "meta_io.h" 22 #include "quota.h" 23 #include "rgrp.h" 24 #include "log.h" 25 #include "super.h" 26 #include "trans.h" 27 #include "dir.h" 28 #include "util.h" 29 #include "aops.h" 30 #include "trace_gfs2.h" 31 32 /* This doesn't need to be that large as max 64 bit pointers in a 4k 33 * block is 512, so __u16 is fine for that. It saves stack space to 34 * keep it small. 35 */ 36 struct metapath { 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; 38 __u16 mp_list[GFS2_MAX_META_HEIGHT]; 39 int mp_fheight; /* find_metapath height */ 40 int mp_aheight; /* actual height (lookup height) */ 41 }; 42 43 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); 44 45 /** 46 * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio 47 * @ip: the inode 48 * @dibh: the dinode buffer 49 * @block: the block number that was allocated 50 * @folio: The folio. 51 * 52 * Returns: errno 53 */ 54 static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh, 55 u64 block, struct folio *folio) 56 { 57 struct inode *inode = &ip->i_inode; 58 59 if (!folio_test_uptodate(folio)) { 60 void *kaddr = kmap_local_folio(folio, 0); 61 u64 dsize = i_size_read(inode); 62 63 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); 64 memset(kaddr + dsize, 0, folio_size(folio) - dsize); 65 kunmap_local(kaddr); 66 67 folio_mark_uptodate(folio); 68 } 69 70 if (gfs2_is_jdata(ip)) { 71 struct buffer_head *bh = folio_buffers(folio); 72 73 if (!bh) 74 bh = create_empty_buffers(folio, 75 BIT(inode->i_blkbits), BIT(BH_Uptodate)); 76 77 if (!buffer_mapped(bh)) 78 map_bh(bh, inode->i_sb, block); 79 80 set_buffer_uptodate(bh); 81 gfs2_trans_add_data(ip->i_gl, bh); 82 } else { 83 folio_mark_dirty(folio); 84 gfs2_ordered_add_inode(ip); 85 } 86 87 return 0; 88 } 89 90 static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio) 91 { 92 struct buffer_head *bh, *dibh; 93 struct gfs2_dinode *di; 94 u64 block = 0; 95 int isdir = gfs2_is_dir(ip); 96 int error; 97 98 error = gfs2_meta_inode_buffer(ip, &dibh); 99 if (error) 100 return error; 101 102 if (i_size_read(&ip->i_inode)) { 103 /* Get a free block, fill it with the stuffed data, 104 and write it out to disk */ 105 106 unsigned int n = 1; 107 error = gfs2_alloc_blocks(ip, &block, &n, 0); 108 if (error) 109 goto out_brelse; 110 if (isdir) { 111 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1); 112 error = gfs2_dir_get_new_buffer(ip, block, &bh); 113 if (error) 114 goto out_brelse; 115 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), 116 dibh, sizeof(struct gfs2_dinode)); 117 brelse(bh); 118 } else { 119 error = gfs2_unstuffer_folio(ip, dibh, block, folio); 120 if (error) 121 goto out_brelse; 122 } 123 } 124 125 /* Set up the pointer to the new block */ 126 127 gfs2_trans_add_meta(ip->i_gl, dibh); 128 di = (struct gfs2_dinode *)dibh->b_data; 129 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 130 131 if (i_size_read(&ip->i_inode)) { 132 *(__be64 *)(di + 1) = cpu_to_be64(block); 133 gfs2_add_inode_blocks(&ip->i_inode, 1); 134 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 135 } 136 137 ip->i_height = 1; 138 di->di_height = cpu_to_be16(1); 139 140 out_brelse: 141 brelse(dibh); 142 return error; 143 } 144 145 /** 146 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big 147 * @ip: The GFS2 inode to unstuff 148 * 149 * This routine unstuffs a dinode and returns it to a "normal" state such 150 * that the height can be grown in the traditional way. 151 * 152 * Returns: errno 153 */ 154 155 int gfs2_unstuff_dinode(struct gfs2_inode *ip) 156 { 157 struct inode *inode = &ip->i_inode; 158 struct folio *folio; 159 int error; 160 161 down_write(&ip->i_rw_mutex); 162 folio = filemap_grab_folio(inode->i_mapping, 0); 163 error = PTR_ERR(folio); 164 if (IS_ERR(folio)) 165 goto out; 166 error = __gfs2_unstuff_inode(ip, folio); 167 folio_unlock(folio); 168 folio_put(folio); 169 out: 170 up_write(&ip->i_rw_mutex); 171 return error; 172 } 173 174 /** 175 * find_metapath - Find path through the metadata tree 176 * @sdp: The superblock 177 * @block: The disk block to look up 178 * @mp: The metapath to return the result in 179 * @height: The pre-calculated height of the metadata tree 180 * 181 * This routine returns a struct metapath structure that defines a path 182 * through the metadata of inode "ip" to get to block "block". 183 * 184 * Example: 185 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a 186 * filesystem with a blocksize of 4096. 187 * 188 * find_metapath() would return a struct metapath structure set to: 189 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165. 190 * 191 * That means that in order to get to the block containing the byte at 192 * offset 101342453, we would load the indirect block pointed to by pointer 193 * 0 in the dinode. We would then load the indirect block pointed to by 194 * pointer 48 in that indirect block. We would then load the data block 195 * pointed to by pointer 165 in that indirect block. 196 * 197 * ---------------------------------------- 198 * | Dinode | | 199 * | | 4| 200 * | |0 1 2 3 4 5 9| 201 * | | 6| 202 * ---------------------------------------- 203 * | 204 * | 205 * V 206 * ---------------------------------------- 207 * | Indirect Block | 208 * | 5| 209 * | 4 4 4 4 4 5 5 1| 210 * |0 5 6 7 8 9 0 1 2| 211 * ---------------------------------------- 212 * | 213 * | 214 * V 215 * ---------------------------------------- 216 * | Indirect Block | 217 * | 1 1 1 1 1 5| 218 * | 6 6 6 6 6 1| 219 * |0 3 4 5 6 7 2| 220 * ---------------------------------------- 221 * | 222 * | 223 * V 224 * ---------------------------------------- 225 * | Data block containing offset | 226 * | 101342453 | 227 * | | 228 * | | 229 * ---------------------------------------- 230 * 231 */ 232 233 static void find_metapath(const struct gfs2_sbd *sdp, u64 block, 234 struct metapath *mp, unsigned int height) 235 { 236 unsigned int i; 237 238 mp->mp_fheight = height; 239 for (i = height; i--;) 240 mp->mp_list[i] = do_div(block, sdp->sd_inptrs); 241 } 242 243 static inline unsigned int metapath_branch_start(const struct metapath *mp) 244 { 245 if (mp->mp_list[0] == 0) 246 return 2; 247 return 1; 248 } 249 250 /** 251 * metaptr1 - Return the first possible metadata pointer in a metapath buffer 252 * @height: The metadata height (0 = dinode) 253 * @mp: The metapath 254 */ 255 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp) 256 { 257 struct buffer_head *bh = mp->mp_bh[height]; 258 if (height == 0) 259 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode))); 260 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header))); 261 } 262 263 /** 264 * metapointer - Return pointer to start of metadata in a buffer 265 * @height: The metadata height (0 = dinode) 266 * @mp: The metapath 267 * 268 * Return a pointer to the block number of the next height of the metadata 269 * tree given a buffer containing the pointer to the current height of the 270 * metadata tree. 271 */ 272 273 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) 274 { 275 __be64 *p = metaptr1(height, mp); 276 return p + mp->mp_list[height]; 277 } 278 279 static inline const __be64 *metaend(unsigned int height, const struct metapath *mp) 280 { 281 const struct buffer_head *bh = mp->mp_bh[height]; 282 return (const __be64 *)(bh->b_data + bh->b_size); 283 } 284 285 static void clone_metapath(struct metapath *clone, struct metapath *mp) 286 { 287 unsigned int hgt; 288 289 *clone = *mp; 290 for (hgt = 0; hgt < mp->mp_aheight; hgt++) 291 get_bh(clone->mp_bh[hgt]); 292 } 293 294 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) 295 { 296 const __be64 *t; 297 298 for (t = start; t < end; t++) { 299 struct buffer_head *rabh; 300 301 if (!*t) 302 continue; 303 304 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE); 305 if (trylock_buffer(rabh)) { 306 if (!buffer_uptodate(rabh)) { 307 bh_submit(rabh, 308 REQ_OP_READ | REQ_RAHEAD | REQ_META | 309 REQ_PRIO, 310 bh_end_read); 311 } else { 312 unlock_buffer(rabh); 313 } 314 } 315 put_bh(rabh); 316 } 317 } 318 319 static inline struct buffer_head * 320 metapath_dibh(struct metapath *mp) 321 { 322 return mp->mp_bh[0]; 323 } 324 325 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, 326 unsigned int x, unsigned int h) 327 { 328 for (; x < h; x++) { 329 __be64 *ptr = metapointer(x, mp); 330 u64 dblock = be64_to_cpu(*ptr); 331 int ret; 332 333 if (!dblock) 334 break; 335 ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]); 336 if (ret) 337 return ret; 338 } 339 mp->mp_aheight = x + 1; 340 return 0; 341 } 342 343 /** 344 * lookup_metapath - Walk the metadata tree to a specific point 345 * @ip: The inode 346 * @mp: The metapath 347 * 348 * Assumes that the inode's buffer has already been looked up and 349 * hooked onto mp->mp_bh[0] and that the metapath has been initialised 350 * by find_metapath(). 351 * 352 * If this function encounters part of the tree which has not been 353 * allocated, it returns the current height of the tree at the point 354 * at which it found the unallocated block. Blocks which are found are 355 * added to the mp->mp_bh[] list. 356 * 357 * Returns: error 358 */ 359 360 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) 361 { 362 return __fillup_metapath(ip, mp, 0, ip->i_height - 1); 363 } 364 365 /** 366 * fillup_metapath - fill up buffers for the metadata path to a specific height 367 * @ip: The inode 368 * @mp: The metapath 369 * @h: The height to which it should be mapped 370 * 371 * Similar to lookup_metapath, but does lookups for a range of heights 372 * 373 * Returns: error or the number of buffers filled 374 */ 375 376 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) 377 { 378 unsigned int x = 0; 379 int ret; 380 381 if (h) { 382 /* find the first buffer we need to look up. */ 383 for (x = h - 1; x > 0; x--) { 384 if (mp->mp_bh[x]) 385 break; 386 } 387 } 388 ret = __fillup_metapath(ip, mp, x, h); 389 if (ret) 390 return ret; 391 return mp->mp_aheight - x - 1; 392 } 393 394 static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp) 395 { 396 sector_t factor = 1, block = 0; 397 int hgt; 398 399 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) { 400 if (hgt < mp->mp_aheight) 401 block += mp->mp_list[hgt] * factor; 402 factor *= sdp->sd_inptrs; 403 } 404 return block; 405 } 406 407 static void release_metapath(struct metapath *mp) 408 { 409 int i; 410 411 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { 412 if (mp->mp_bh[i] == NULL) 413 break; 414 brelse(mp->mp_bh[i]); 415 mp->mp_bh[i] = NULL; 416 } 417 } 418 419 /** 420 * gfs2_extent_length - Returns length of an extent of blocks 421 * @bh: The metadata block 422 * @ptr: Current position in @bh 423 * @eob: Set to 1 if we hit "end of block" 424 * 425 * Returns: The length of the extent (minimum of one block) 426 */ 427 428 static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, int *eob) 429 { 430 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); 431 const __be64 *first = ptr; 432 u64 d = be64_to_cpu(*ptr); 433 434 *eob = 0; 435 do { 436 ptr++; 437 if (ptr >= end) 438 break; 439 d++; 440 } while(be64_to_cpu(*ptr) == d); 441 if (ptr >= end) 442 *eob = 1; 443 return ptr - first; 444 } 445 446 enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE }; 447 448 /* 449 * gfs2_metadata_walker - walk an indirect block 450 * @mp: Metapath to indirect block 451 * @ptrs: Number of pointers to look at 452 * 453 * When returning WALK_FOLLOW, the walker must update @mp to point at the right 454 * indirect block to follow. 455 */ 456 typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp, 457 unsigned int ptrs); 458 459 /* 460 * gfs2_walk_metadata - walk a tree of indirect blocks 461 * @inode: The inode 462 * @mp: Starting point of walk 463 * @max_len: Maximum number of blocks to walk 464 * @walker: Called during the walk 465 * 466 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or 467 * past the end of metadata, and a negative error code otherwise. 468 */ 469 470 static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp, 471 u64 max_len, gfs2_metadata_walker walker) 472 { 473 struct gfs2_inode *ip = GFS2_I(inode); 474 struct gfs2_sbd *sdp = GFS2_SB(inode); 475 u64 factor = 1; 476 unsigned int hgt; 477 int ret; 478 479 /* 480 * The walk starts in the lowest allocated indirect block, which may be 481 * before the position indicated by @mp. Adjust @max_len accordingly 482 * to avoid a short walk. 483 */ 484 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) { 485 max_len += mp->mp_list[hgt] * factor; 486 mp->mp_list[hgt] = 0; 487 factor *= sdp->sd_inptrs; 488 } 489 490 for (;;) { 491 u16 start = mp->mp_list[hgt]; 492 enum walker_status status; 493 unsigned int ptrs; 494 u64 len; 495 496 /* Walk indirect block. */ 497 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start; 498 len = ptrs * factor; 499 if (len > max_len) 500 ptrs = DIV_ROUND_UP_ULL(max_len, factor); 501 status = walker(mp, ptrs); 502 switch (status) { 503 case WALK_STOP: 504 return 1; 505 case WALK_FOLLOW: 506 BUG_ON(mp->mp_aheight == mp->mp_fheight); 507 ptrs = mp->mp_list[hgt] - start; 508 len = ptrs * factor; 509 break; 510 case WALK_CONTINUE: 511 break; 512 } 513 if (len >= max_len) 514 break; 515 max_len -= len; 516 if (status == WALK_FOLLOW) 517 goto fill_up_metapath; 518 519 lower_metapath: 520 /* Decrease height of metapath. */ 521 brelse(mp->mp_bh[hgt]); 522 mp->mp_bh[hgt] = NULL; 523 mp->mp_list[hgt] = 0; 524 if (!hgt) 525 break; 526 hgt--; 527 factor *= sdp->sd_inptrs; 528 529 /* Advance in metadata tree. */ 530 (mp->mp_list[hgt])++; 531 if (hgt) { 532 if (mp->mp_list[hgt] >= sdp->sd_inptrs) 533 goto lower_metapath; 534 } else { 535 if (mp->mp_list[hgt] >= sdp->sd_diptrs) 536 break; 537 } 538 539 fill_up_metapath: 540 /* Increase height of metapath. */ 541 ret = fillup_metapath(ip, mp, ip->i_height - 1); 542 if (ret < 0) 543 return ret; 544 hgt += ret; 545 for (; ret; ret--) 546 do_div(factor, sdp->sd_inptrs); 547 mp->mp_aheight = hgt + 1; 548 } 549 return 0; 550 } 551 552 static enum walker_status gfs2_hole_walker(struct metapath *mp, 553 unsigned int ptrs) 554 { 555 const __be64 *start, *ptr, *end; 556 unsigned int hgt; 557 558 hgt = mp->mp_aheight - 1; 559 start = metapointer(hgt, mp); 560 end = start + ptrs; 561 562 for (ptr = start; ptr < end; ptr++) { 563 if (*ptr) { 564 mp->mp_list[hgt] += ptr - start; 565 if (mp->mp_aheight == mp->mp_fheight) 566 return WALK_STOP; 567 return WALK_FOLLOW; 568 } 569 } 570 return WALK_CONTINUE; 571 } 572 573 /** 574 * gfs2_hole_size - figure out the size of a hole 575 * @inode: The inode 576 * @lblock: The logical starting block number 577 * @len: How far to look (in blocks) 578 * @mp: The metapath at lblock 579 * @iomap: The iomap to store the hole size in 580 * 581 * This function modifies @mp. 582 * 583 * Returns: errno on error 584 */ 585 static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, 586 struct metapath *mp, struct iomap *iomap) 587 { 588 struct metapath clone; 589 u64 hole_size; 590 int ret; 591 592 clone_metapath(&clone, mp); 593 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker); 594 if (ret < 0) 595 goto out; 596 597 if (ret == 1) 598 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock; 599 else 600 hole_size = len; 601 iomap->length = hole_size << inode->i_blkbits; 602 ret = 0; 603 604 out: 605 release_metapath(&clone); 606 return ret; 607 } 608 609 static inline void gfs2_indirect_init(struct metapath *mp, 610 struct gfs2_glock *gl, unsigned int i, 611 unsigned offset, u64 bn) 612 { 613 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + 614 ((i > 1) ? sizeof(struct gfs2_meta_header) : 615 sizeof(struct gfs2_dinode))); 616 BUG_ON(i < 1); 617 BUG_ON(mp->mp_bh[i] != NULL); 618 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 619 gfs2_trans_add_meta(gl, mp->mp_bh[i]); 620 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 621 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 622 ptr += offset; 623 *ptr = cpu_to_be64(bn); 624 } 625 626 enum alloc_state { 627 ALLOC_DATA = 0, 628 ALLOC_GROW_DEPTH = 1, 629 ALLOC_GROW_HEIGHT = 2, 630 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ 631 }; 632 633 /** 634 * __gfs2_iomap_alloc - Build a metadata tree of the requested height 635 * @inode: The GFS2 inode 636 * @iomap: The iomap structure 637 * @mp: The metapath, with proper height information calculated 638 * 639 * In this routine we may have to alloc: 640 * i) Indirect blocks to grow the metadata tree height 641 * ii) Indirect blocks to fill in lower part of the metadata tree 642 * iii) Data blocks 643 * 644 * This function is called after __gfs2_iomap_get, which works out the 645 * total number of blocks which we need via gfs2_alloc_size. 646 * 647 * We then do the actual allocation asking for an extent at a time (if 648 * enough contiguous free blocks are available, there will only be one 649 * allocation request per call) and uses the state machine to initialise 650 * the blocks in order. 651 * 652 * Right now, this function will allocate at most one indirect block 653 * worth of data -- with a default block size of 4K, that's slightly 654 * less than 2M. If this limitation is ever removed to allow huge 655 * allocations, we would probably still want to limit the iomap size we 656 * return to avoid stalling other tasks during huge writes; the next 657 * iomap iteration would then find the blocks already allocated. 658 * 659 * Returns: errno on error 660 */ 661 662 static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, 663 struct metapath *mp) 664 { 665 struct gfs2_inode *ip = GFS2_I(inode); 666 struct gfs2_sbd *sdp = GFS2_SB(inode); 667 struct buffer_head *dibh = metapath_dibh(mp); 668 u64 bn; 669 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; 670 size_t dblks = iomap->length >> inode->i_blkbits; 671 const unsigned end_of_metadata = mp->mp_fheight - 1; 672 int ret; 673 enum alloc_state state; 674 __be64 *ptr; 675 __be64 zero_bn = 0; 676 677 BUG_ON(mp->mp_aheight < 1); 678 BUG_ON(dibh == NULL); 679 BUG_ON(dblks < 1); 680 681 gfs2_trans_add_meta(ip->i_gl, dibh); 682 683 down_write(&ip->i_rw_mutex); 684 685 if (mp->mp_fheight == mp->mp_aheight) { 686 /* Bottom indirect block exists */ 687 state = ALLOC_DATA; 688 } else { 689 /* Need to allocate indirect blocks */ 690 if (mp->mp_fheight == ip->i_height) { 691 /* Writing into existing tree, extend tree down */ 692 iblks = mp->mp_fheight - mp->mp_aheight; 693 state = ALLOC_GROW_DEPTH; 694 } else { 695 /* Building up tree height */ 696 state = ALLOC_GROW_HEIGHT; 697 iblks = mp->mp_fheight - ip->i_height; 698 branch_start = metapath_branch_start(mp); 699 iblks += (mp->mp_fheight - branch_start); 700 } 701 } 702 703 /* start of the second part of the function (state machine) */ 704 705 blks = dblks + iblks; 706 i = mp->mp_aheight; 707 do { 708 n = blks - alloced; 709 ret = gfs2_alloc_blocks(ip, &bn, &n, 0); 710 if (ret) 711 goto out; 712 alloced += n; 713 if (state != ALLOC_DATA || gfs2_is_jdata(ip)) 714 gfs2_trans_remove_revoke(sdp, bn, n); 715 switch (state) { 716 /* Growing height of tree */ 717 case ALLOC_GROW_HEIGHT: 718 if (i == 1) { 719 ptr = (__be64 *)(dibh->b_data + 720 sizeof(struct gfs2_dinode)); 721 zero_bn = *ptr; 722 } 723 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0; 724 i++, n--) 725 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); 726 if (i - 1 == mp->mp_fheight - ip->i_height) { 727 i--; 728 gfs2_buffer_copy_tail(mp->mp_bh[i], 729 sizeof(struct gfs2_meta_header), 730 dibh, sizeof(struct gfs2_dinode)); 731 gfs2_buffer_clear_tail(dibh, 732 sizeof(struct gfs2_dinode) + 733 sizeof(__be64)); 734 ptr = (__be64 *)(mp->mp_bh[i]->b_data + 735 sizeof(struct gfs2_meta_header)); 736 *ptr = zero_bn; 737 state = ALLOC_GROW_DEPTH; 738 for(i = branch_start; i < mp->mp_fheight; i++) { 739 if (mp->mp_bh[i] == NULL) 740 break; 741 brelse(mp->mp_bh[i]); 742 mp->mp_bh[i] = NULL; 743 } 744 i = branch_start; 745 } 746 if (n == 0) 747 break; 748 fallthrough; /* To branching from existing tree */ 749 case ALLOC_GROW_DEPTH: 750 if (i > 1 && i < mp->mp_fheight) 751 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); 752 for (; i < mp->mp_fheight && n > 0; i++, n--) 753 gfs2_indirect_init(mp, ip->i_gl, i, 754 mp->mp_list[i-1], bn++); 755 if (i == mp->mp_fheight) 756 state = ALLOC_DATA; 757 if (n == 0) 758 break; 759 fallthrough; /* To tree complete, adding data blocks */ 760 case ALLOC_DATA: 761 BUG_ON(n > dblks); 762 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 763 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); 764 dblks = n; 765 ptr = metapointer(end_of_metadata, mp); 766 iomap->addr = bn << inode->i_blkbits; 767 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW; 768 while (n-- > 0) 769 *ptr++ = cpu_to_be64(bn++); 770 break; 771 } 772 } while (iomap->addr == IOMAP_NULL_ADDR); 773 774 iomap->type = IOMAP_MAPPED; 775 iomap->length = (u64)dblks << inode->i_blkbits; 776 ip->i_height = mp->mp_fheight; 777 gfs2_add_inode_blocks(&ip->i_inode, alloced); 778 gfs2_dinode_out(ip, dibh->b_data); 779 out: 780 up_write(&ip->i_rw_mutex); 781 return ret; 782 } 783 784 #define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE 785 786 /** 787 * gfs2_alloc_size - Compute the maximum allocation size 788 * @inode: The inode 789 * @mp: The metapath 790 * @size: Requested size in blocks 791 * 792 * Compute the maximum size of the next allocation at @mp. 793 * 794 * Returns: size in blocks 795 */ 796 static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) 797 { 798 struct gfs2_inode *ip = GFS2_I(inode); 799 struct gfs2_sbd *sdp = GFS2_SB(inode); 800 const __be64 *first, *ptr, *end; 801 802 /* 803 * For writes to stuffed files, this function is called twice via 804 * __gfs2_iomap_get, before and after unstuffing. The size we return the 805 * first time needs to be large enough to get the reservation and 806 * allocation sizes right. The size we return the second time must 807 * be exact or else __gfs2_iomap_alloc won't do the right thing. 808 */ 809 810 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { 811 unsigned int maxsize = mp->mp_fheight > 1 ? 812 sdp->sd_inptrs : sdp->sd_diptrs; 813 maxsize -= mp->mp_list[mp->mp_fheight - 1]; 814 if (size > maxsize) 815 size = maxsize; 816 return size; 817 } 818 819 first = metapointer(ip->i_height - 1, mp); 820 end = metaend(ip->i_height - 1, mp); 821 if (end - first > size) 822 end = first + size; 823 for (ptr = first; ptr < end; ptr++) { 824 if (*ptr) 825 break; 826 } 827 return ptr - first; 828 } 829 830 /** 831 * __gfs2_iomap_get - Map blocks from an inode to disk blocks 832 * @inode: The inode 833 * @pos: Starting position in bytes 834 * @length: Length to map, in bytes 835 * @flags: iomap flags 836 * @iomap: The iomap structure 837 * @mp: The metapath 838 * 839 * Returns: errno 840 */ 841 static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, 842 unsigned flags, struct iomap *iomap, 843 struct metapath *mp) 844 { 845 struct gfs2_inode *ip = GFS2_I(inode); 846 struct gfs2_sbd *sdp = GFS2_SB(inode); 847 loff_t size = i_size_read(inode); 848 __be64 *ptr; 849 sector_t lblock; 850 sector_t lblock_stop; 851 int ret; 852 int eob; 853 u64 len; 854 struct buffer_head *dibh = NULL, *bh; 855 u8 height; 856 857 if (!length) 858 return -EINVAL; 859 860 down_read(&ip->i_rw_mutex); 861 862 ret = gfs2_meta_inode_buffer(ip, &dibh); 863 if (ret) 864 goto unlock; 865 mp->mp_bh[0] = dibh; 866 867 if (gfs2_is_stuffed(ip)) { 868 if (flags & IOMAP_WRITE) { 869 loff_t max_size = gfs2_max_stuffed_size(ip); 870 871 if (pos + length > max_size) 872 goto unstuff; 873 iomap->length = max_size; 874 } else { 875 if (pos >= size) { 876 if (flags & IOMAP_REPORT) { 877 ret = -ENOENT; 878 goto unlock; 879 } else { 880 iomap->offset = pos; 881 iomap->length = length; 882 goto hole_found; 883 } 884 } 885 iomap->length = size; 886 } 887 iomap->addr = (ip->i_no_addr << inode->i_blkbits) + 888 sizeof(struct gfs2_dinode); 889 iomap->type = IOMAP_INLINE; 890 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); 891 goto out; 892 } 893 894 unstuff: 895 lblock = pos >> inode->i_blkbits; 896 iomap->offset = lblock << inode->i_blkbits; 897 lblock_stop = (pos + length - 1) >> inode->i_blkbits; 898 len = lblock_stop - lblock + 1; 899 iomap->length = len << inode->i_blkbits; 900 901 height = ip->i_height; 902 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) 903 height++; 904 find_metapath(sdp, lblock, mp, height); 905 if (height > ip->i_height || gfs2_is_stuffed(ip)) 906 goto do_alloc; 907 908 ret = lookup_metapath(ip, mp); 909 if (ret) 910 goto unlock; 911 912 if (mp->mp_aheight != ip->i_height) 913 goto do_alloc; 914 915 ptr = metapointer(ip->i_height - 1, mp); 916 if (*ptr == 0) 917 goto do_alloc; 918 919 bh = mp->mp_bh[ip->i_height - 1]; 920 len = gfs2_extent_length(bh, ptr, &eob); 921 922 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; 923 iomap->length = len << inode->i_blkbits; 924 iomap->type = IOMAP_MAPPED; 925 iomap->flags |= IOMAP_F_MERGED; 926 if (eob) 927 iomap->flags |= IOMAP_F_GFS2_BOUNDARY; 928 929 out: 930 iomap->bdev = inode->i_sb->s_bdev; 931 unlock: 932 up_read(&ip->i_rw_mutex); 933 return ret; 934 935 do_alloc: 936 if (flags & IOMAP_REPORT) { 937 if (pos >= size) 938 ret = -ENOENT; 939 else if (height == ip->i_height) 940 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 941 else 942 iomap->length = size - iomap->offset; 943 } else if (flags & IOMAP_WRITE) { 944 u64 alloc_size; 945 946 if (flags & IOMAP_DIRECT) 947 goto out; /* (see gfs2_file_direct_write) */ 948 949 len = gfs2_alloc_size(inode, mp, len); 950 alloc_size = len << inode->i_blkbits; 951 if (alloc_size < iomap->length) 952 iomap->length = alloc_size; 953 } else { 954 if (pos < size && height == ip->i_height) 955 ret = gfs2_hole_size(inode, lblock, len, mp, iomap); 956 } 957 hole_found: 958 iomap->addr = IOMAP_NULL_ADDR; 959 iomap->type = IOMAP_HOLE; 960 goto out; 961 } 962 963 static struct folio * 964 gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) 965 { 966 struct inode *inode = iter->inode; 967 struct gfs2_inode *ip = GFS2_I(inode); 968 unsigned int blockmask = i_blocksize(inode) - 1; 969 struct gfs2_sbd *sdp = GFS2_SB(inode); 970 unsigned int blocks; 971 struct folio *folio; 972 int status; 973 974 if (!gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) 975 return iomap_get_folio(iter, pos, len); 976 977 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; 978 status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); 979 if (status) 980 return ERR_PTR(status); 981 982 folio = iomap_get_folio(iter, pos, len); 983 if (IS_ERR(folio)) 984 gfs2_trans_end(sdp); 985 return folio; 986 } 987 988 static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, 989 unsigned copied, struct folio *folio) 990 { 991 struct gfs2_trans *tr = current->journal_info; 992 struct gfs2_inode *ip = GFS2_I(inode); 993 struct gfs2_sbd *sdp = GFS2_SB(inode); 994 995 if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) 996 gfs2_trans_add_databufs(ip->i_gl, folio, 997 offset_in_folio(folio, pos), 998 copied); 999 1000 folio_unlock(folio); 1001 folio_put(folio); 1002 1003 if (gfs2_is_jdata(ip) || gfs2_is_stuffed(ip)) { 1004 if (tr->tr_num_buf_new) 1005 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1006 gfs2_trans_end(sdp); 1007 } 1008 } 1009 1010 const struct iomap_write_ops gfs2_iomap_write_ops = { 1011 .get_folio = gfs2_iomap_get_folio, 1012 .put_folio = gfs2_iomap_put_folio, 1013 }; 1014 1015 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, 1016 loff_t length, unsigned flags, 1017 struct iomap *iomap, 1018 struct metapath *mp) 1019 { 1020 struct gfs2_inode *ip = GFS2_I(inode); 1021 struct gfs2_sbd *sdp = GFS2_SB(inode); 1022 bool unstuff; 1023 int ret; 1024 1025 unstuff = gfs2_is_stuffed(ip) && 1026 pos + length > gfs2_max_stuffed_size(ip); 1027 1028 if (unstuff || iomap->type == IOMAP_HOLE) { 1029 unsigned int data_blocks, ind_blocks; 1030 struct gfs2_alloc_parms ap = {}; 1031 unsigned int rblocks; 1032 struct gfs2_trans *tr; 1033 1034 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, 1035 &ind_blocks); 1036 ap.target = data_blocks + ind_blocks; 1037 ret = gfs2_quota_lock_check(ip, &ap); 1038 if (ret) 1039 return ret; 1040 1041 ret = gfs2_inplace_reserve(ip, &ap); 1042 if (ret) 1043 goto out_qunlock; 1044 1045 rblocks = RES_DINODE + ind_blocks; 1046 if (gfs2_is_jdata(ip)) 1047 rblocks += data_blocks; 1048 if (ind_blocks || data_blocks) 1049 rblocks += RES_STATFS + RES_QUOTA; 1050 if (inode == sdp->sd_rindex) 1051 rblocks += 2 * RES_STATFS; 1052 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); 1053 1054 ret = gfs2_trans_begin(sdp, rblocks, 1055 iomap->length >> inode->i_blkbits); 1056 if (ret) 1057 goto out_trans_fail; 1058 1059 if (unstuff) { 1060 ret = gfs2_unstuff_dinode(ip); 1061 if (ret) 1062 goto out_trans_end; 1063 release_metapath(mp); 1064 ret = __gfs2_iomap_get(inode, iomap->offset, 1065 iomap->length, flags, iomap, mp); 1066 if (ret) 1067 goto out_trans_end; 1068 } 1069 1070 if (iomap->type == IOMAP_HOLE) { 1071 ret = __gfs2_iomap_alloc(inode, iomap, mp); 1072 if (ret) { 1073 gfs2_trans_end(sdp); 1074 gfs2_inplace_release(ip); 1075 punch_hole(ip, iomap->offset, iomap->length); 1076 goto out_qunlock; 1077 } 1078 } 1079 1080 tr = current->journal_info; 1081 if (tr->tr_num_buf_new) 1082 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1083 1084 gfs2_trans_end(sdp); 1085 } 1086 1087 return 0; 1088 1089 out_trans_end: 1090 gfs2_trans_end(sdp); 1091 out_trans_fail: 1092 gfs2_inplace_release(ip); 1093 out_qunlock: 1094 gfs2_quota_unlock(ip); 1095 return ret; 1096 } 1097 1098 static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 1099 unsigned flags, struct iomap *iomap, 1100 struct iomap *srcmap) 1101 { 1102 struct gfs2_inode *ip = GFS2_I(inode); 1103 struct metapath mp = { .mp_aheight = 1, }; 1104 int ret; 1105 1106 if (gfs2_is_jdata(ip)) 1107 iomap->flags |= IOMAP_F_BUFFER_HEAD; 1108 1109 trace_gfs2_iomap_start(ip, pos, length, flags); 1110 ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 1111 if (ret) 1112 goto out_unlock; 1113 1114 switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) { 1115 case IOMAP_WRITE: 1116 if (flags & IOMAP_DIRECT) { 1117 /* 1118 * Silently fall back to buffered I/O for stuffed files 1119 * or if we've got a hole (see gfs2_file_direct_write). 1120 */ 1121 if (iomap->type != IOMAP_MAPPED) 1122 ret = -ENOTBLK; 1123 goto out_unlock; 1124 } 1125 break; 1126 case IOMAP_ZERO: 1127 if (iomap->type == IOMAP_HOLE) 1128 goto out_unlock; 1129 break; 1130 default: 1131 goto out; 1132 } 1133 1134 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); 1135 if (ret) 1136 goto out_unlock; 1137 1138 out: 1139 if (iomap->type == IOMAP_INLINE) { 1140 iomap->private = metapath_dibh(&mp); 1141 get_bh(iomap->private); 1142 } 1143 1144 out_unlock: 1145 release_metapath(&mp); 1146 trace_gfs2_iomap_end(ip, iomap, ret); 1147 return ret; 1148 } 1149 1150 static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, 1151 ssize_t written, unsigned flags, struct iomap *iomap) 1152 { 1153 struct gfs2_inode *ip = GFS2_I(inode); 1154 struct gfs2_sbd *sdp = GFS2_SB(inode); 1155 1156 if (iomap->private) 1157 brelse(iomap->private); 1158 1159 switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) { 1160 case IOMAP_WRITE: 1161 if (flags & IOMAP_DIRECT) 1162 return 0; 1163 break; 1164 case IOMAP_ZERO: 1165 if (iomap->type == IOMAP_HOLE) 1166 return 0; 1167 break; 1168 default: 1169 return 0; 1170 } 1171 1172 if (!gfs2_is_stuffed(ip)) 1173 gfs2_ordered_add_inode(ip); 1174 1175 if (inode == sdp->sd_rindex) 1176 adjust_fs_space(inode); 1177 1178 gfs2_inplace_release(ip); 1179 1180 if (ip->i_qadata && ip->i_qadata->qa_qd_num) 1181 gfs2_quota_unlock(ip); 1182 1183 if (length != written && (iomap->flags & IOMAP_F_NEW)) { 1184 /* Deallocate blocks that were just allocated. */ 1185 loff_t hstart = round_up(pos + written, i_blocksize(inode)); 1186 loff_t hend = iomap->offset + iomap->length; 1187 1188 if (hstart < hend) { 1189 truncate_pagecache_range(inode, hstart, hend - 1); 1190 punch_hole(ip, hstart, hend - hstart); 1191 } 1192 } 1193 1194 if (unlikely(!written)) 1195 return 0; 1196 1197 if (iomap->flags & IOMAP_F_SIZE_CHANGED) 1198 mark_inode_dirty(inode); 1199 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 1200 return 0; 1201 } 1202 1203 const struct iomap_ops gfs2_iomap_ops = { 1204 .iomap_begin = gfs2_iomap_begin, 1205 .iomap_end = gfs2_iomap_end, 1206 }; 1207 1208 /** 1209 * gfs2_block_map - Map one or more blocks of an inode to a disk block 1210 * @inode: The inode 1211 * @lblock: The logical block number 1212 * @bh_map: The bh to be mapped 1213 * @create: True if its ok to alloc blocks to satify the request 1214 * 1215 * The size of the requested mapping is defined in bh_map->b_size. 1216 * 1217 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged 1218 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and 1219 * bh_map->b_size to indicate the size of the mapping when @lblock and 1220 * successive blocks are mapped, up to the requested size. 1221 * 1222 * Sets buffer_boundary() if a read of metadata will be required 1223 * before the next block can be mapped. Sets buffer_new() if new 1224 * blocks were allocated. 1225 * 1226 * Returns: errno 1227 */ 1228 1229 int gfs2_block_map(struct inode *inode, sector_t lblock, 1230 struct buffer_head *bh_map, int create) 1231 { 1232 struct gfs2_inode *ip = GFS2_I(inode); 1233 loff_t pos = (loff_t)lblock << inode->i_blkbits; 1234 loff_t length = bh_map->b_size; 1235 struct iomap iomap = { }; 1236 int ret; 1237 1238 clear_buffer_mapped(bh_map); 1239 clear_buffer_new(bh_map); 1240 clear_buffer_boundary(bh_map); 1241 trace_gfs2_bmap(ip, bh_map, lblock, create, 1); 1242 1243 if (!create) 1244 ret = gfs2_iomap_get(inode, pos, length, &iomap); 1245 else 1246 ret = gfs2_iomap_alloc(inode, pos, length, &iomap); 1247 if (ret) 1248 goto out; 1249 1250 if (iomap.length > bh_map->b_size) { 1251 iomap.length = bh_map->b_size; 1252 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY; 1253 } 1254 if (iomap.addr != IOMAP_NULL_ADDR) 1255 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits); 1256 bh_map->b_size = iomap.length; 1257 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY) 1258 set_buffer_boundary(bh_map); 1259 if (iomap.flags & IOMAP_F_NEW) 1260 set_buffer_new(bh_map); 1261 1262 out: 1263 trace_gfs2_bmap(ip, bh_map, lblock, create, ret); 1264 return ret; 1265 } 1266 1267 int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock, 1268 unsigned int *extlen) 1269 { 1270 unsigned int blkbits = inode->i_blkbits; 1271 struct iomap iomap = { }; 1272 unsigned int len; 1273 int ret; 1274 1275 ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits, 1276 &iomap); 1277 if (ret) 1278 return ret; 1279 if (iomap.type != IOMAP_MAPPED) 1280 return -EIO; 1281 *dblock = iomap.addr >> blkbits; 1282 len = iomap.length >> blkbits; 1283 if (len < *extlen) 1284 *extlen = len; 1285 return 0; 1286 } 1287 1288 int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock, 1289 unsigned int *extlen, bool *new) 1290 { 1291 unsigned int blkbits = inode->i_blkbits; 1292 struct iomap iomap = { }; 1293 unsigned int len; 1294 int ret; 1295 1296 ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits, 1297 &iomap); 1298 if (ret) 1299 return ret; 1300 if (iomap.type != IOMAP_MAPPED) 1301 return -EIO; 1302 *dblock = iomap.addr >> blkbits; 1303 len = iomap.length >> blkbits; 1304 if (len < *extlen) 1305 *extlen = len; 1306 *new = iomap.flags & IOMAP_F_NEW; 1307 return 0; 1308 } 1309 1310 /* 1311 * NOTE: Never call gfs2_block_zero_range with an open transaction because it 1312 * uses iomap write to perform its actions, which begin their own transactions 1313 * (iomap_begin, get_folio, etc.) 1314 */ 1315 static int gfs2_block_zero_range(struct inode *inode, loff_t from, loff_t length) 1316 { 1317 BUG_ON(current->journal_info); 1318 if (from >= inode->i_size) 1319 return 0; 1320 length = min(length, inode->i_size - from); 1321 return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops, 1322 &gfs2_iomap_write_ops, NULL); 1323 } 1324 1325 #define GFS2_JTRUNC_REVOKES 8192 1326 1327 /** 1328 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files 1329 * @inode: The inode being truncated 1330 * @oldsize: The original (larger) size 1331 * @newsize: The new smaller size 1332 * 1333 * With jdata files, we have to journal a revoke for each block which is 1334 * truncated. As a result, we need to split this into separate transactions 1335 * if the number of pages being truncated gets too large. 1336 */ 1337 1338 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) 1339 { 1340 struct gfs2_sbd *sdp = GFS2_SB(inode); 1341 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 1342 u64 chunk; 1343 int error; 1344 1345 while (oldsize != newsize) { 1346 struct gfs2_trans *tr; 1347 unsigned int offs; 1348 1349 chunk = oldsize - newsize; 1350 if (chunk > max_chunk) 1351 chunk = max_chunk; 1352 1353 offs = oldsize & ~PAGE_MASK; 1354 if (offs && chunk > PAGE_SIZE) 1355 chunk = offs + ((chunk - offs) & PAGE_MASK); 1356 1357 truncate_pagecache(inode, oldsize - chunk); 1358 oldsize -= chunk; 1359 1360 tr = current->journal_info; 1361 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) 1362 continue; 1363 1364 gfs2_trans_end(sdp); 1365 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 1366 if (error) 1367 return error; 1368 } 1369 1370 return 0; 1371 } 1372 1373 static int trunc_start(struct inode *inode, u64 newsize) 1374 { 1375 struct gfs2_inode *ip = GFS2_I(inode); 1376 struct gfs2_sbd *sdp = GFS2_SB(inode); 1377 struct buffer_head *dibh = NULL; 1378 int journaled = gfs2_is_jdata(ip); 1379 u64 oldsize = inode->i_size; 1380 int error; 1381 1382 if (!gfs2_is_stuffed(ip)) { 1383 unsigned int blocksize = i_blocksize(inode); 1384 unsigned int offs = newsize & (blocksize - 1); 1385 if (offs) { 1386 error = gfs2_block_zero_range(inode, newsize, 1387 blocksize - offs); 1388 if (error) 1389 return error; 1390 } 1391 } 1392 if (journaled) 1393 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); 1394 else 1395 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1396 if (error) 1397 return error; 1398 1399 error = gfs2_meta_inode_buffer(ip, &dibh); 1400 if (error) 1401 goto out; 1402 1403 gfs2_trans_add_meta(ip->i_gl, dibh); 1404 1405 if (gfs2_is_stuffed(ip)) 1406 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1407 else 1408 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; 1409 1410 i_size_write(inode, newsize); 1411 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1412 gfs2_dinode_out(ip, dibh->b_data); 1413 1414 if (journaled) 1415 error = gfs2_journaled_truncate(inode, oldsize, newsize); 1416 else 1417 truncate_pagecache(inode, newsize); 1418 1419 out: 1420 brelse(dibh); 1421 if (current->journal_info) 1422 gfs2_trans_end(sdp); 1423 return error; 1424 } 1425 1426 int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, 1427 struct iomap *iomap) 1428 { 1429 struct metapath mp = { .mp_aheight = 1, }; 1430 int ret; 1431 1432 ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp); 1433 release_metapath(&mp); 1434 return ret; 1435 } 1436 1437 int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length, 1438 struct iomap *iomap) 1439 { 1440 struct metapath mp = { .mp_aheight = 1, }; 1441 int ret; 1442 1443 ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp); 1444 if (!ret && iomap->type == IOMAP_HOLE) 1445 ret = __gfs2_iomap_alloc(inode, iomap, &mp); 1446 release_metapath(&mp); 1447 return ret; 1448 } 1449 1450 /** 1451 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein 1452 * @ip: inode 1453 * @rd_gh: holder of resource group glock 1454 * @bh: buffer head to sweep 1455 * @start: starting point in bh 1456 * @end: end point in bh 1457 * @meta: true if bh points to metadata (rather than data) 1458 * @btotal: place to keep count of total blocks freed 1459 * 1460 * We sweep a metadata buffer (provided by the metapath) for blocks we need to 1461 * free, and free them all. However, we do it one rgrp at a time. If this 1462 * block has references to multiple rgrps, we break it into individual 1463 * transactions. This allows other processes to use the rgrps while we're 1464 * focused on a single one, for better concurrency / performance. 1465 * At every transaction boundary, we rewrite the inode into the journal. 1466 * That way the bitmaps are kept consistent with the inode and we can recover 1467 * if we're interrupted by power-outages. 1468 * 1469 * Returns: 0, or return code if an error occurred. 1470 * *btotal has the total number of blocks freed 1471 */ 1472 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, 1473 struct buffer_head *bh, __be64 *start, __be64 *end, 1474 bool meta, u32 *btotal) 1475 { 1476 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1477 struct gfs2_rgrpd *rgd; 1478 struct gfs2_trans *tr; 1479 __be64 *p; 1480 int blks_outside_rgrp; 1481 u64 bn, bstart, isize_blks; 1482 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */ 1483 int ret = 0; 1484 bool buf_in_tr = false; /* buffer was added to transaction */ 1485 1486 more_rgrps: 1487 rgd = NULL; 1488 if (gfs2_holder_initialized(rd_gh)) { 1489 rgd = gfs2_glock2rgrp(rd_gh->gh_gl); 1490 gfs2_assert_withdraw(sdp, 1491 gfs2_glock_is_locked_by_me(rd_gh->gh_gl)); 1492 } 1493 blks_outside_rgrp = 0; 1494 bstart = 0; 1495 blen = 0; 1496 1497 for (p = start; p < end; p++) { 1498 if (!*p) 1499 continue; 1500 bn = be64_to_cpu(*p); 1501 1502 if (rgd) { 1503 if (!rgrp_contains_block(rgd, bn)) { 1504 blks_outside_rgrp++; 1505 continue; 1506 } 1507 } else { 1508 rgd = gfs2_blk2rgrpd(sdp, bn, true); 1509 if (unlikely(!rgd)) { 1510 ret = -EIO; 1511 goto out; 1512 } 1513 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1514 LM_FLAG_NODE_SCOPE, rd_gh); 1515 if (ret) 1516 goto out; 1517 1518 /* Must be done with the rgrp glock held: */ 1519 if (gfs2_rs_active(&ip->i_res) && 1520 rgd == ip->i_res.rs_rgd) 1521 gfs2_rs_deltree(&ip->i_res); 1522 } 1523 1524 /* The size of our transactions will be unknown until we 1525 actually process all the metadata blocks that relate to 1526 the rgrp. So we estimate. We know it can't be more than 1527 the dinode's i_blocks and we don't want to exceed the 1528 journal flush threshold, sd_log_thresh2. */ 1529 if (current->journal_info == NULL) { 1530 unsigned int jblocks_rqsted, revokes; 1531 1532 jblocks_rqsted = rgd->rd_length + RES_DINODE + 1533 RES_INDIRECT; 1534 isize_blks = gfs2_get_inode_blocks(&ip->i_inode); 1535 if (isize_blks > atomic_read(&sdp->sd_log_thresh2)) 1536 jblocks_rqsted += 1537 atomic_read(&sdp->sd_log_thresh2); 1538 else 1539 jblocks_rqsted += isize_blks; 1540 revokes = jblocks_rqsted; 1541 if (meta) 1542 revokes += end - start; 1543 else if (ip->i_diskflags & GFS2_DIF_EXHASH) 1544 revokes += sdp->sd_inptrs; 1545 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes); 1546 if (ret) 1547 goto out_unlock; 1548 down_write(&ip->i_rw_mutex); 1549 } 1550 /* check if we will exceed the transaction blocks requested */ 1551 tr = current->journal_info; 1552 if (tr->tr_num_buf_new + RES_STATFS + 1553 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) { 1554 /* We set blks_outside_rgrp to ensure the loop will 1555 be repeated for the same rgrp, but with a new 1556 transaction. */ 1557 blks_outside_rgrp++; 1558 /* This next part is tricky. If the buffer was added 1559 to the transaction, we've already set some block 1560 pointers to 0, so we better follow through and free 1561 them, or we will introduce corruption (so break). 1562 This may be impossible, or at least rare, but I 1563 decided to cover the case regardless. 1564 1565 If the buffer was not added to the transaction 1566 (this call), doing so would exceed our transaction 1567 size, so we need to end the transaction and start a 1568 new one (so goto). */ 1569 1570 if (buf_in_tr) 1571 break; 1572 goto out_unlock; 1573 } 1574 1575 gfs2_trans_add_meta(ip->i_gl, bh); 1576 buf_in_tr = true; 1577 *p = 0; 1578 if (bstart + blen == bn) { 1579 blen++; 1580 continue; 1581 } 1582 if (bstart) { 1583 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); 1584 (*btotal) += blen; 1585 gfs2_add_inode_blocks(&ip->i_inode, -blen); 1586 } 1587 bstart = bn; 1588 blen = 1; 1589 } 1590 if (bstart) { 1591 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta); 1592 (*btotal) += blen; 1593 gfs2_add_inode_blocks(&ip->i_inode, -blen); 1594 } 1595 out_unlock: 1596 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks 1597 outside the rgrp we just processed, 1598 do it all over again. */ 1599 if (current->journal_info) { 1600 struct buffer_head *dibh; 1601 1602 ret = gfs2_meta_inode_buffer(ip, &dibh); 1603 if (ret) 1604 goto out; 1605 1606 /* Every transaction boundary, we rewrite the dinode 1607 to keep its di_blocks current in case of failure. */ 1608 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1609 gfs2_trans_add_meta(ip->i_gl, dibh); 1610 gfs2_dinode_out(ip, dibh->b_data); 1611 brelse(dibh); 1612 up_write(&ip->i_rw_mutex); 1613 gfs2_trans_end(sdp); 1614 buf_in_tr = false; 1615 } 1616 gfs2_glock_dq_uninit(rd_gh); 1617 cond_resched(); 1618 goto more_rgrps; 1619 } 1620 out: 1621 return ret; 1622 } 1623 1624 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h) 1625 { 1626 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0]))) 1627 return false; 1628 return true; 1629 } 1630 1631 /** 1632 * find_nonnull_ptr - find a non-null pointer given a metapath and height 1633 * @sdp: The superblock 1634 * @mp: starting metapath 1635 * @h: desired height to search 1636 * @end_list: See punch_hole(). 1637 * @end_aligned: See punch_hole(). 1638 * 1639 * Assumes the metapath is valid (with buffers) out to height h. 1640 * Returns: true if a non-null pointer was found in the metapath buffer 1641 * false if all remaining pointers are NULL in the buffer 1642 */ 1643 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp, 1644 unsigned int h, 1645 __u16 *end_list, unsigned int end_aligned) 1646 { 1647 struct buffer_head *bh = mp->mp_bh[h]; 1648 __be64 *first, *ptr, *end; 1649 1650 first = metaptr1(h, mp); 1651 ptr = first + mp->mp_list[h]; 1652 end = (__be64 *)(bh->b_data + bh->b_size); 1653 if (end_list && mp_eq_to_hgt(mp, end_list, h)) { 1654 bool keep_end = h < end_aligned; 1655 end = first + end_list[h] + keep_end; 1656 } 1657 1658 while (ptr < end) { 1659 if (*ptr) { /* if we have a non-null pointer */ 1660 mp->mp_list[h] = ptr - first; 1661 h++; 1662 if (h < GFS2_MAX_META_HEIGHT) 1663 mp->mp_list[h] = 0; 1664 return true; 1665 } 1666 ptr++; 1667 } 1668 return false; 1669 } 1670 1671 enum dealloc_states { 1672 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */ 1673 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */ 1674 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */ 1675 DEALLOC_DONE = 3, /* process complete */ 1676 }; 1677 1678 static inline void 1679 metapointer_range(struct metapath *mp, int height, 1680 __u16 *start_list, unsigned int start_aligned, 1681 __u16 *end_list, unsigned int end_aligned, 1682 __be64 **start, __be64 **end) 1683 { 1684 struct buffer_head *bh = mp->mp_bh[height]; 1685 __be64 *first; 1686 1687 first = metaptr1(height, mp); 1688 *start = first; 1689 if (mp_eq_to_hgt(mp, start_list, height)) { 1690 bool keep_start = height < start_aligned; 1691 *start = first + start_list[height] + keep_start; 1692 } 1693 *end = (__be64 *)(bh->b_data + bh->b_size); 1694 if (end_list && mp_eq_to_hgt(mp, end_list, height)) { 1695 bool keep_end = height < end_aligned; 1696 *end = first + end_list[height] + keep_end; 1697 } 1698 } 1699 1700 static inline bool walk_done(struct gfs2_sbd *sdp, 1701 struct metapath *mp, int height, 1702 __u16 *end_list, unsigned int end_aligned) 1703 { 1704 __u16 end; 1705 1706 if (end_list) { 1707 bool keep_end = height < end_aligned; 1708 if (!mp_eq_to_hgt(mp, end_list, height)) 1709 return false; 1710 end = end_list[height] + keep_end; 1711 } else 1712 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs; 1713 return mp->mp_list[height] >= end; 1714 } 1715 1716 /** 1717 * punch_hole - deallocate blocks in a file 1718 * @ip: inode to truncate 1719 * @offset: the start of the hole 1720 * @length: the size of the hole (or 0 for truncate) 1721 * 1722 * Punch a hole into a file or truncate a file at a given position. This 1723 * function operates in whole blocks (@offset and @length are rounded 1724 * accordingly); partially filled blocks must be cleared otherwise. 1725 * 1726 * This function works from the bottom up, and from the right to the left. In 1727 * other words, it strips off the highest layer (data) before stripping any of 1728 * the metadata. Doing it this way is best in case the operation is interrupted 1729 * by power failure, etc. The dinode is rewritten in every transaction to 1730 * guarantee integrity. 1731 */ 1732 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) 1733 { 1734 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1735 u64 maxsize = sdp->sd_heightsize[ip->i_height]; 1736 struct metapath mp = {}; 1737 struct buffer_head *dibh, *bh; 1738 struct gfs2_holder rd_gh; 1739 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; 1740 unsigned int bsize = 1 << bsize_shift; 1741 u64 lblock = (offset + bsize - 1) >> bsize_shift; 1742 __u16 start_list[GFS2_MAX_META_HEIGHT]; 1743 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; 1744 unsigned int start_aligned, end_aligned; 1745 unsigned int strip_h = ip->i_height - 1; 1746 u32 btotal = 0; 1747 int ret, state; 1748 int mp_h; /* metapath buffers are read in to this height */ 1749 u64 prev_bnr = 0; 1750 __be64 *start, *end; 1751 1752 if (offset + bsize - 1 >= maxsize) { 1753 /* 1754 * The starting point lies beyond the allocated metadata; 1755 * there are no blocks to deallocate. 1756 */ 1757 return 0; 1758 } 1759 1760 /* 1761 * The start position of the hole is defined by lblock, start_list, and 1762 * start_aligned. The end position of the hole is defined by lend, 1763 * end_list, and end_aligned. 1764 * 1765 * start_aligned and end_aligned define down to which height the start 1766 * and end positions are aligned to the metadata tree (i.e., the 1767 * position is a multiple of the metadata granularity at the height 1768 * above). This determines at which heights additional meta pointers 1769 * needs to be preserved for the remaining data. 1770 */ 1771 1772 if (length) { 1773 u64 end_offset = offset + length; 1774 u64 lend; 1775 1776 /* 1777 * Clip the end at the maximum file size for the given height: 1778 * that's how far the metadata goes; files bigger than that 1779 * will have additional layers of indirection. 1780 */ 1781 if (end_offset > maxsize) 1782 end_offset = maxsize; 1783 lend = end_offset >> bsize_shift; 1784 1785 if (lblock >= lend) 1786 return 0; 1787 1788 find_metapath(sdp, lend, &mp, ip->i_height); 1789 end_list = __end_list; 1790 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list)); 1791 1792 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { 1793 if (end_list[mp_h]) 1794 break; 1795 } 1796 end_aligned = mp_h; 1797 } 1798 1799 find_metapath(sdp, lblock, &mp, ip->i_height); 1800 memcpy(start_list, mp.mp_list, sizeof(start_list)); 1801 1802 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) { 1803 if (start_list[mp_h]) 1804 break; 1805 } 1806 start_aligned = mp_h; 1807 1808 ret = gfs2_meta_inode_buffer(ip, &dibh); 1809 if (ret) 1810 return ret; 1811 1812 mp.mp_bh[0] = dibh; 1813 ret = lookup_metapath(ip, &mp); 1814 if (ret) 1815 goto out_metapath; 1816 1817 /* issue read-ahead on metadata */ 1818 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) { 1819 metapointer_range(&mp, mp_h, start_list, start_aligned, 1820 end_list, end_aligned, &start, &end); 1821 gfs2_metapath_ra(ip->i_gl, start, end); 1822 } 1823 1824 if (mp.mp_aheight == ip->i_height) 1825 state = DEALLOC_MP_FULL; /* We have a complete metapath */ 1826 else 1827 state = DEALLOC_FILL_MP; /* deal with partial metapath */ 1828 1829 ret = gfs2_rindex_update(sdp); 1830 if (ret) 1831 goto out_metapath; 1832 1833 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); 1834 if (ret) 1835 goto out_metapath; 1836 gfs2_holder_mark_uninitialized(&rd_gh); 1837 1838 mp_h = strip_h; 1839 1840 while (state != DEALLOC_DONE) { 1841 switch (state) { 1842 /* Truncate a full metapath at the given strip height. 1843 * Note that strip_h == mp_h in order to be in this state. */ 1844 case DEALLOC_MP_FULL: 1845 bh = mp.mp_bh[mp_h]; 1846 gfs2_assert_withdraw(sdp, bh); 1847 if (gfs2_assert_withdraw(sdp, 1848 prev_bnr != bh->b_blocknr)) { 1849 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u, " 1850 "s_h:%u, mp_h:%u\n", 1851 (unsigned long long)ip->i_no_addr, 1852 prev_bnr, ip->i_height, strip_h, mp_h); 1853 } 1854 prev_bnr = bh->b_blocknr; 1855 1856 if (gfs2_metatype_check(sdp, bh, 1857 (mp_h ? GFS2_METATYPE_IN : 1858 GFS2_METATYPE_DI))) { 1859 ret = -EIO; 1860 goto out; 1861 } 1862 1863 /* 1864 * Below, passing end_aligned as 0 gives us the 1865 * metapointer range excluding the end point: the end 1866 * point is the first metapath we must not deallocate! 1867 */ 1868 1869 metapointer_range(&mp, mp_h, start_list, start_aligned, 1870 end_list, 0 /* end_aligned */, 1871 &start, &end); 1872 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h], 1873 start, end, 1874 mp_h != ip->i_height - 1, 1875 &btotal); 1876 1877 /* If we hit an error or just swept dinode buffer, 1878 just exit. */ 1879 if (ret || !mp_h) { 1880 state = DEALLOC_DONE; 1881 break; 1882 } 1883 state = DEALLOC_MP_LOWER; 1884 break; 1885 1886 /* lower the metapath strip height */ 1887 case DEALLOC_MP_LOWER: 1888 /* We're done with the current buffer, so release it, 1889 unless it's the dinode buffer. Then back up to the 1890 previous pointer. */ 1891 if (mp_h) { 1892 brelse(mp.mp_bh[mp_h]); 1893 mp.mp_bh[mp_h] = NULL; 1894 } 1895 /* If we can't get any lower in height, we've stripped 1896 off all we can. Next step is to back up and start 1897 stripping the previous level of metadata. */ 1898 if (mp_h == 0) { 1899 strip_h--; 1900 memcpy(mp.mp_list, start_list, sizeof(start_list)); 1901 mp_h = strip_h; 1902 state = DEALLOC_FILL_MP; 1903 break; 1904 } 1905 mp.mp_list[mp_h] = 0; 1906 mp_h--; /* search one metadata height down */ 1907 mp.mp_list[mp_h]++; 1908 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned)) 1909 break; 1910 /* Here we've found a part of the metapath that is not 1911 * allocated. We need to search at that height for the 1912 * next non-null pointer. */ 1913 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) { 1914 state = DEALLOC_FILL_MP; 1915 mp_h++; 1916 } 1917 /* No more non-null pointers at this height. Back up 1918 to the previous height and try again. */ 1919 break; /* loop around in the same state */ 1920 1921 /* Fill the metapath with buffers to the given height. */ 1922 case DEALLOC_FILL_MP: 1923 /* Fill the buffers out to the current height. */ 1924 ret = fillup_metapath(ip, &mp, mp_h); 1925 if (ret < 0) 1926 goto out; 1927 1928 /* On the first pass, issue read-ahead on metadata. */ 1929 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { 1930 unsigned int height = mp.mp_aheight - 1; 1931 1932 /* No read-ahead for data blocks. */ 1933 if (mp.mp_aheight - 1 == strip_h) 1934 height--; 1935 1936 for (; height >= mp.mp_aheight - ret; height--) { 1937 metapointer_range(&mp, height, 1938 start_list, start_aligned, 1939 end_list, end_aligned, 1940 &start, &end); 1941 gfs2_metapath_ra(ip->i_gl, start, end); 1942 } 1943 } 1944 1945 /* If buffers found for the entire strip height */ 1946 if (mp.mp_aheight - 1 == strip_h) { 1947 state = DEALLOC_MP_FULL; 1948 break; 1949 } 1950 if (mp.mp_aheight < ip->i_height) /* We have a partial height */ 1951 mp_h = mp.mp_aheight - 1; 1952 1953 /* If we find a non-null block pointer, crawl a bit 1954 higher up in the metapath and try again, otherwise 1955 we need to look lower for a new starting point. */ 1956 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) 1957 mp_h++; 1958 else 1959 state = DEALLOC_MP_LOWER; 1960 break; 1961 } 1962 } 1963 1964 if (btotal) { 1965 if (current->journal_info == NULL) { 1966 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + 1967 RES_QUOTA, 0); 1968 if (ret) 1969 goto out; 1970 down_write(&ip->i_rw_mutex); 1971 } 1972 gfs2_statfs_change(sdp, 0, +btotal, 0); 1973 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, 1974 ip->i_inode.i_gid); 1975 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 1976 gfs2_trans_add_meta(ip->i_gl, dibh); 1977 gfs2_dinode_out(ip, dibh->b_data); 1978 up_write(&ip->i_rw_mutex); 1979 gfs2_trans_end(sdp); 1980 } 1981 1982 out: 1983 if (gfs2_holder_initialized(&rd_gh)) 1984 gfs2_glock_dq_uninit(&rd_gh); 1985 if (current->journal_info) { 1986 up_write(&ip->i_rw_mutex); 1987 gfs2_trans_end(sdp); 1988 cond_resched(); 1989 } 1990 gfs2_quota_unhold(ip); 1991 out_metapath: 1992 release_metapath(&mp); 1993 return ret; 1994 } 1995 1996 static int trunc_end(struct gfs2_inode *ip) 1997 { 1998 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1999 struct buffer_head *dibh; 2000 int error; 2001 2002 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 2003 if (error) 2004 return error; 2005 2006 down_write(&ip->i_rw_mutex); 2007 2008 error = gfs2_meta_inode_buffer(ip, &dibh); 2009 if (error) 2010 goto out; 2011 2012 if (!i_size_read(&ip->i_inode)) { 2013 ip->i_height = 0; 2014 ip->i_goal = ip->i_no_addr; 2015 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 2016 gfs2_ordered_del_inode(ip); 2017 } 2018 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 2019 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 2020 2021 gfs2_trans_add_meta(ip->i_gl, dibh); 2022 gfs2_dinode_out(ip, dibh->b_data); 2023 brelse(dibh); 2024 2025 out: 2026 up_write(&ip->i_rw_mutex); 2027 gfs2_trans_end(sdp); 2028 return error; 2029 } 2030 2031 /** 2032 * do_shrink - make a file smaller 2033 * @inode: the inode 2034 * @newsize: the size to make the file 2035 * 2036 * Called with an exclusive lock on @inode. The @size must 2037 * be equal to or smaller than the current inode size. 2038 * 2039 * Returns: errno 2040 */ 2041 2042 static int do_shrink(struct inode *inode, u64 newsize) 2043 { 2044 struct gfs2_inode *ip = GFS2_I(inode); 2045 int error; 2046 2047 error = trunc_start(inode, newsize); 2048 if (error < 0) 2049 return error; 2050 if (gfs2_is_stuffed(ip)) 2051 return 0; 2052 2053 error = punch_hole(ip, newsize, 0); 2054 if (error == 0) 2055 error = trunc_end(ip); 2056 2057 return error; 2058 } 2059 2060 /** 2061 * do_grow - Touch and update inode size 2062 * @inode: The inode 2063 * @size: The new size 2064 * 2065 * This function updates the timestamps on the inode and 2066 * may also increase the size of the inode. This function 2067 * must not be called with @size any smaller than the current 2068 * inode size. 2069 * 2070 * Although it is not strictly required to unstuff files here, 2071 * earlier versions of GFS2 have a bug in the stuffed file reading 2072 * code which will result in a buffer overrun if the size is larger 2073 * than the max stuffed file size. In order to prevent this from 2074 * occurring, such files are unstuffed, but in other cases we can 2075 * just update the inode size directly. 2076 * 2077 * Returns: 0 on success, or -ve on error 2078 */ 2079 2080 static int do_grow(struct inode *inode, u64 size) 2081 { 2082 struct gfs2_inode *ip = GFS2_I(inode); 2083 struct gfs2_sbd *sdp = GFS2_SB(inode); 2084 struct gfs2_alloc_parms ap = { .target = 1, }; 2085 struct buffer_head *dibh; 2086 int error; 2087 int unstuff = 0; 2088 2089 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) { 2090 error = gfs2_quota_lock_check(ip, &ap); 2091 if (error) 2092 return error; 2093 2094 error = gfs2_inplace_reserve(ip, &ap); 2095 if (error) 2096 goto do_grow_qunlock; 2097 unstuff = 1; 2098 } 2099 2100 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + 2101 (unstuff && 2102 gfs2_is_jdata(ip) ? RES_JDATA : 0) + 2103 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 2104 0 : RES_QUOTA), 0); 2105 if (error) 2106 goto do_grow_release; 2107 2108 if (unstuff) { 2109 error = gfs2_unstuff_dinode(ip); 2110 if (error) 2111 goto do_end_trans; 2112 } 2113 2114 error = gfs2_meta_inode_buffer(ip, &dibh); 2115 if (error) 2116 goto do_end_trans; 2117 2118 truncate_setsize(inode, size); 2119 inode_set_mtime_to_ts(&ip->i_inode, inode_set_ctime_current(&ip->i_inode)); 2120 gfs2_trans_add_meta(ip->i_gl, dibh); 2121 gfs2_dinode_out(ip, dibh->b_data); 2122 brelse(dibh); 2123 2124 do_end_trans: 2125 gfs2_trans_end(sdp); 2126 do_grow_release: 2127 if (unstuff) { 2128 gfs2_inplace_release(ip); 2129 do_grow_qunlock: 2130 gfs2_quota_unlock(ip); 2131 } 2132 return error; 2133 } 2134 2135 /** 2136 * gfs2_setattr_size - make a file a given size 2137 * @inode: the inode 2138 * @newsize: the size to make the file 2139 * 2140 * The file size can grow, shrink, or stay the same size. This 2141 * is called holding i_rwsem and an exclusive glock on the inode 2142 * in question. 2143 * 2144 * Returns: errno 2145 */ 2146 2147 int gfs2_setattr_size(struct inode *inode, u64 newsize) 2148 { 2149 struct gfs2_inode *ip = GFS2_I(inode); 2150 int ret; 2151 2152 BUG_ON(!S_ISREG(inode->i_mode)); 2153 2154 ret = inode_newsize_ok(inode, newsize); 2155 if (ret) 2156 return ret; 2157 2158 inode_dio_wait(inode); 2159 2160 ret = gfs2_qa_get(ip); 2161 if (ret) 2162 goto out; 2163 2164 if (newsize >= inode->i_size) { 2165 ret = do_grow(inode, newsize); 2166 goto out; 2167 } 2168 2169 ret = do_shrink(inode, newsize); 2170 out: 2171 gfs2_rs_delete(ip); 2172 gfs2_qa_put(ip); 2173 return ret; 2174 } 2175 2176 int gfs2_truncatei_resume(struct gfs2_inode *ip) 2177 { 2178 int error; 2179 error = punch_hole(ip, i_size_read(&ip->i_inode), 0); 2180 if (!error) 2181 error = trunc_end(ip); 2182 return error; 2183 } 2184 2185 int gfs2_file_dealloc(struct gfs2_inode *ip) 2186 { 2187 return punch_hole(ip, 0, 0); 2188 } 2189 2190 /** 2191 * gfs2_free_journal_extents - Free cached journal bmap info 2192 * @jd: The journal 2193 * 2194 */ 2195 2196 void gfs2_free_journal_extents(struct gfs2_jdesc *jd) 2197 { 2198 struct gfs2_journal_extent *jext; 2199 2200 while(!list_empty(&jd->extent_list)) { 2201 jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list); 2202 list_del(&jext->list); 2203 kfree(jext); 2204 } 2205 } 2206 2207 /** 2208 * gfs2_add_jextent - Add or merge a new extent to extent cache 2209 * @jd: The journal descriptor 2210 * @lblock: The logical block at start of new extent 2211 * @dblock: The physical block at start of new extent 2212 * @blocks: Size of extent in fs blocks 2213 * 2214 * Returns: 0 on success or -ENOMEM 2215 */ 2216 2217 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) 2218 { 2219 struct gfs2_journal_extent *jext; 2220 2221 if (!list_empty(&jd->extent_list)) { 2222 jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list); 2223 if ((jext->dblock + jext->blocks) == dblock) { 2224 jext->blocks += blocks; 2225 return 0; 2226 } 2227 } 2228 2229 jext = kzalloc_obj(struct gfs2_journal_extent, GFP_NOFS); 2230 if (jext == NULL) 2231 return -ENOMEM; 2232 jext->dblock = dblock; 2233 jext->lblock = lblock; 2234 jext->blocks = blocks; 2235 list_add_tail(&jext->list, &jd->extent_list); 2236 jd->nr_extents++; 2237 return 0; 2238 } 2239 2240 /** 2241 * gfs2_map_journal_extents - Cache journal bmap info 2242 * @sdp: The super block 2243 * @jd: The journal to map 2244 * 2245 * Create a reusable "extent" mapping from all logical 2246 * blocks to all physical blocks for the given journal. This will save 2247 * us time when writing journal blocks. Most journals will have only one 2248 * extent that maps all their logical blocks. That's because gfs2.mkfs 2249 * arranges the journal blocks sequentially to maximize performance. 2250 * So the extent would map the first block for the entire file length. 2251 * However, gfs2_jadd can happen while file activity is happening, so 2252 * those journals may not be sequential. Less likely is the case where 2253 * the users created their own journals by mounting the metafs and 2254 * laying it out. But it's still possible. These journals might have 2255 * several extents. 2256 * 2257 * Returns: 0 on success, or error on failure 2258 */ 2259 2260 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) 2261 { 2262 u64 lblock = 0; 2263 u64 lblock_stop; 2264 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 2265 struct buffer_head bh; 2266 unsigned int shift = sdp->sd_sb.sb_bsize_shift; 2267 u64 size; 2268 int rc; 2269 ktime_t start, end; 2270 2271 start = ktime_get(); 2272 lblock_stop = i_size_read(jd->jd_inode) >> shift; 2273 size = (lblock_stop - lblock) << shift; 2274 jd->nr_extents = 0; 2275 WARN_ON(!list_empty(&jd->extent_list)); 2276 2277 do { 2278 bh.b_state = 0; 2279 bh.b_blocknr = 0; 2280 bh.b_size = size; 2281 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); 2282 if (rc || !buffer_mapped(&bh)) 2283 goto fail; 2284 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); 2285 if (rc) 2286 goto fail; 2287 size -= bh.b_size; 2288 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 2289 } while(size > 0); 2290 2291 end = ktime_get(); 2292 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid, 2293 jd->nr_extents, ktime_ms_delta(end, start)); 2294 return 0; 2295 2296 fail: 2297 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", 2298 rc, jd->jd_jid, 2299 (unsigned long long)(i_size_read(jd->jd_inode) - size), 2300 jd->nr_extents); 2301 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", 2302 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, 2303 bh.b_state, (unsigned long long)bh.b_size); 2304 gfs2_free_journal_extents(jd); 2305 return rc; 2306 } 2307 2308 /** 2309 * gfs2_write_alloc_required - figure out if a write will require an allocation 2310 * @ip: the file being written to 2311 * @offset: the offset to write to 2312 * @len: the number of bytes being written 2313 * 2314 * Returns: 1 if an alloc is required, 0 otherwise 2315 */ 2316 2317 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, 2318 unsigned int len) 2319 { 2320 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2321 struct buffer_head bh; 2322 unsigned int shift; 2323 u64 lblock, lblock_stop, size; 2324 u64 end_of_file; 2325 2326 if (!len) 2327 return 0; 2328 2329 if (gfs2_is_stuffed(ip)) { 2330 if (offset + len > gfs2_max_stuffed_size(ip)) 2331 return 1; 2332 return 0; 2333 } 2334 2335 shift = sdp->sd_sb.sb_bsize_shift; 2336 BUG_ON(gfs2_is_dir(ip)); 2337 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; 2338 lblock = offset >> shift; 2339 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; 2340 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex)) 2341 return 1; 2342 2343 size = (lblock_stop - lblock) << shift; 2344 do { 2345 bh.b_state = 0; 2346 bh.b_size = size; 2347 gfs2_block_map(&ip->i_inode, lblock, &bh, 0); 2348 if (!buffer_mapped(&bh)) 2349 return 1; 2350 size -= bh.b_size; 2351 lblock += (bh.b_size >> ip->i_inode.i_blkbits); 2352 } while(size > 0); 2353 2354 return 0; 2355 } 2356 2357 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length) 2358 { 2359 struct gfs2_inode *ip = GFS2_I(inode); 2360 struct buffer_head *dibh; 2361 int error; 2362 2363 if (offset >= inode->i_size) 2364 return 0; 2365 if (offset + length > inode->i_size) 2366 length = inode->i_size - offset; 2367 2368 error = gfs2_meta_inode_buffer(ip, &dibh); 2369 if (error) 2370 return error; 2371 gfs2_trans_add_meta(ip->i_gl, dibh); 2372 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0, 2373 length); 2374 brelse(dibh); 2375 return 0; 2376 } 2377 2378 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset, 2379 loff_t length) 2380 { 2381 struct gfs2_sbd *sdp = GFS2_SB(inode); 2382 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; 2383 int error; 2384 2385 while (length) { 2386 struct gfs2_trans *tr; 2387 loff_t chunk; 2388 unsigned int offs; 2389 2390 chunk = length; 2391 if (chunk > max_chunk) 2392 chunk = max_chunk; 2393 2394 offs = offset & ~PAGE_MASK; 2395 if (offs && chunk > PAGE_SIZE) 2396 chunk = offs + ((chunk - offs) & PAGE_MASK); 2397 2398 truncate_pagecache_range(inode, offset, chunk); 2399 offset += chunk; 2400 length -= chunk; 2401 2402 tr = current->journal_info; 2403 if (!test_bit(TR_TOUCHED, &tr->tr_flags)) 2404 continue; 2405 2406 gfs2_trans_end(sdp); 2407 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); 2408 if (error) 2409 return error; 2410 } 2411 return 0; 2412 } 2413 2414 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length) 2415 { 2416 struct inode *inode = file_inode(file); 2417 struct gfs2_inode *ip = GFS2_I(inode); 2418 struct gfs2_sbd *sdp = GFS2_SB(inode); 2419 unsigned int blocksize = i_blocksize(inode); 2420 loff_t start, end; 2421 int error; 2422 2423 if (!gfs2_is_stuffed(ip)) { 2424 unsigned int start_off, end_len; 2425 2426 start_off = offset & (blocksize - 1); 2427 end_len = (offset + length) & (blocksize - 1); 2428 if (start_off) { 2429 unsigned int len = length; 2430 if (length > blocksize - start_off) 2431 len = blocksize - start_off; 2432 error = gfs2_block_zero_range(inode, offset, len); 2433 if (error) 2434 goto out; 2435 if (start_off + length < blocksize) 2436 end_len = 0; 2437 } 2438 if (end_len) { 2439 error = gfs2_block_zero_range(inode, 2440 offset + length - end_len, end_len); 2441 if (error) 2442 goto out; 2443 } 2444 } 2445 2446 start = round_down(offset, blocksize); 2447 end = round_up(offset + length, blocksize) - 1; 2448 error = filemap_write_and_wait_range(inode->i_mapping, start, end); 2449 if (error) 2450 return error; 2451 2452 if (gfs2_is_jdata(ip)) 2453 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA, 2454 GFS2_JTRUNC_REVOKES); 2455 else 2456 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 2457 if (error) 2458 return error; 2459 2460 if (gfs2_is_stuffed(ip)) { 2461 error = stuffed_zero_range(inode, offset, length); 2462 if (error) 2463 goto out; 2464 } 2465 2466 if (gfs2_is_jdata(ip)) { 2467 BUG_ON(!current->journal_info); 2468 gfs2_journaled_truncate_range(inode, offset, length); 2469 } else 2470 truncate_pagecache_range(inode, offset, offset + length - 1); 2471 2472 file_update_time(file); 2473 mark_inode_dirty(inode); 2474 2475 if (current->journal_info) 2476 gfs2_trans_end(sdp); 2477 2478 if (!gfs2_is_stuffed(ip)) 2479 error = punch_hole(ip, offset, length); 2480 2481 out: 2482 if (current->journal_info) 2483 gfs2_trans_end(sdp); 2484 return error; 2485 } 2486 2487 static ssize_t gfs2_writeback_range(struct iomap_writepage_ctx *wpc, 2488 struct folio *folio, u64 offset, unsigned int len, u64 end_pos) 2489 { 2490 if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(wpc->inode)))) 2491 return -EIO; 2492 2493 if (offset < wpc->iomap.offset || 2494 offset >= wpc->iomap.offset + wpc->iomap.length) { 2495 int ret; 2496 2497 memset(&wpc->iomap, 0, sizeof(wpc->iomap)); 2498 ret = gfs2_iomap_get(wpc->inode, offset, INT_MAX, &wpc->iomap); 2499 if (ret) 2500 return ret; 2501 } 2502 2503 return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); 2504 } 2505 2506 const struct iomap_writeback_ops gfs2_writeback_ops = { 2507 .writeback_range = gfs2_writeback_range, 2508 .writeback_submit = iomap_ioend_writeback_submit, 2509 }; 2510