1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/completion.h> 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/prefetch.h> 17 #include <linux/blkdev.h> 18 #include <linux/rbtree.h> 19 20 #include "gfs2.h" 21 #include "incore.h" 22 #include "glock.h" 23 #include "glops.h" 24 #include "lops.h" 25 #include "meta_io.h" 26 #include "quota.h" 27 #include "rgrp.h" 28 #include "super.h" 29 #include "trans.h" 30 #include "util.h" 31 #include "log.h" 32 #include "inode.h" 33 #include "trace_gfs2.h" 34 35 #define BFITNOENT ((u32)~0) 36 #define NO_BLOCK ((u64)~0) 37 38 #if BITS_PER_LONG == 32 39 #define LBITMASK (0x55555555UL) 40 #define LBITSKIP55 (0x55555555UL) 41 #define LBITSKIP00 (0x00000000UL) 42 #else 43 #define LBITMASK (0x5555555555555555UL) 44 #define LBITSKIP55 (0x5555555555555555UL) 45 #define LBITSKIP00 (0x0000000000000000UL) 46 #endif 47 48 /* 49 * These routines are used by the resource group routines (rgrp.c) 50 * to keep track of block allocation. Each block is represented by two 51 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 52 * 53 * 0 = Free 54 * 1 = Used (not metadata) 55 * 2 = Unlinked (still in use) inode 56 * 3 = Used (metadata) 57 */ 58 59 static const char valid_change[16] = { 60 /* current */ 61 /* n */ 0, 1, 1, 1, 62 /* e */ 1, 0, 0, 0, 63 /* w */ 0, 0, 0, 1, 64 1, 0, 0, 0 65 }; 66 67 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, 68 const struct gfs2_inode *ip, bool nowrap); 69 70 71 /** 72 * gfs2_setbit - Set a bit in the bitmaps 73 * @rbm: The position of the bit to set 74 * @do_clone: Also set the clone bitmap, if it exists 75 * @new_state: the new state of the block 76 * 77 */ 78 79 static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, 80 unsigned char new_state) 81 { 82 unsigned char *byte1, *byte2, *end, cur_state; 83 unsigned int buflen = rbm->bi->bi_len; 84 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 85 86 byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); 87 end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; 88 89 BUG_ON(byte1 >= end); 90 91 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 92 93 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 94 printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " 95 "new_state=%d\n", rbm->offset, cur_state, new_state); 96 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", 97 (unsigned long long)rbm->rgd->rd_addr, 98 rbm->bi->bi_start); 99 printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", 100 rbm->bi->bi_offset, rbm->bi->bi_len); 101 dump_stack(); 102 gfs2_consist_rgrpd(rbm->rgd); 103 return; 104 } 105 *byte1 ^= (cur_state ^ new_state) << bit; 106 107 if (do_clone && rbm->bi->bi_clone) { 108 byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); 109 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 110 *byte2 ^= (cur_state ^ new_state) << bit; 111 } 112 } 113 114 /** 115 * gfs2_testbit - test a bit in the bitmaps 116 * @rbm: The bit to test 117 * 118 * Returns: The two bit block state of the requested bit 119 */ 120 121 static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) 122 { 123 const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; 124 const u8 *byte; 125 unsigned int bit; 126 127 byte = buffer + (rbm->offset / GFS2_NBBY); 128 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; 129 130 return (*byte >> bit) & GFS2_BIT_MASK; 131 } 132 133 /** 134 * gfs2_bit_search 135 * @ptr: Pointer to bitmap data 136 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 137 * @state: The state we are searching for 138 * 139 * We xor the bitmap data with a patter which is the bitwise opposite 140 * of what we are looking for, this gives rise to a pattern of ones 141 * wherever there is a match. Since we have two bits per entry, we 142 * take this pattern, shift it down by one place and then and it with 143 * the original. All the even bit positions (0,2,4, etc) then represent 144 * successful matches, so we mask with 0x55555..... to remove the unwanted 145 * odd bit positions. 146 * 147 * This allows searching of a whole u64 at once (32 blocks) with a 148 * single test (on 64 bit arches). 149 */ 150 151 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 152 { 153 u64 tmp; 154 static const u64 search[] = { 155 [0] = 0xffffffffffffffffULL, 156 [1] = 0xaaaaaaaaaaaaaaaaULL, 157 [2] = 0x5555555555555555ULL, 158 [3] = 0x0000000000000000ULL, 159 }; 160 tmp = le64_to_cpu(*ptr) ^ search[state]; 161 tmp &= (tmp >> 1); 162 tmp &= mask; 163 return tmp; 164 } 165 166 /** 167 * rs_cmp - multi-block reservation range compare 168 * @blk: absolute file system block number of the new reservation 169 * @len: number of blocks in the new reservation 170 * @rs: existing reservation to compare against 171 * 172 * returns: 1 if the block range is beyond the reach of the reservation 173 * -1 if the block range is before the start of the reservation 174 * 0 if the block range overlaps with the reservation 175 */ 176 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 177 { 178 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); 179 180 if (blk >= startblk + rs->rs_free) 181 return 1; 182 if (blk + len - 1 < startblk) 183 return -1; 184 return 0; 185 } 186 187 /** 188 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 189 * a block in a given allocation state. 190 * @buf: the buffer that holds the bitmaps 191 * @len: the length (in bytes) of the buffer 192 * @goal: start search at this block's bit-pair (within @buffer) 193 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 194 * 195 * Scope of @goal and returned block number is only within this bitmap buffer, 196 * not entire rgrp or filesystem. @buffer will be offset from the actual 197 * beginning of a bitmap block buffer, skipping any header structures, but 198 * headers are always a multiple of 64 bits long so that the buffer is 199 * always aligned to a 64 bit boundary. 200 * 201 * The size of the buffer is in bytes, but is it assumed that it is 202 * always ok to read a complete multiple of 64 bits at the end 203 * of the block in case the end is no aligned to a natural boundary. 204 * 205 * Return: the block number (bitmap buffer scope) that was found 206 */ 207 208 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 209 u32 goal, u8 state) 210 { 211 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 212 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 213 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 214 u64 tmp; 215 u64 mask = 0x5555555555555555ULL; 216 u32 bit; 217 218 /* Mask off bits we don't care about at the start of the search */ 219 mask <<= spoint; 220 tmp = gfs2_bit_search(ptr, mask, state); 221 ptr++; 222 while(tmp == 0 && ptr < end) { 223 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 224 ptr++; 225 } 226 /* Mask off any bits which are more than len bytes from the start */ 227 if (ptr == end && (len & (sizeof(u64) - 1))) 228 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 229 /* Didn't find anything, so return */ 230 if (tmp == 0) 231 return BFITNOENT; 232 ptr--; 233 bit = __ffs64(tmp); 234 bit /= 2; /* two bits per entry in the bitmap */ 235 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 236 } 237 238 /** 239 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number 240 * @rbm: The rbm with rgd already set correctly 241 * @block: The block number (filesystem relative) 242 * 243 * This sets the bi and offset members of an rbm based on a 244 * resource group and a filesystem relative block number. The 245 * resource group must be set in the rbm on entry, the bi and 246 * offset members will be set by this function. 247 * 248 * Returns: 0 on success, or an error code 249 */ 250 251 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 252 { 253 u64 rblock = block - rbm->rgd->rd_data0; 254 u32 goal = (u32)rblock; 255 int x; 256 257 if (WARN_ON_ONCE(rblock > UINT_MAX)) 258 return -EINVAL; 259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 260 return -E2BIG; 261 262 for (x = 0; x < rbm->rgd->rd_length; x++) { 263 rbm->bi = rbm->rgd->rd_bits + x; 264 if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { 265 rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); 266 break; 267 } 268 } 269 270 return 0; 271 } 272 273 /** 274 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned 275 * @rbm: Position to search (value/result) 276 * @n_unaligned: Number of unaligned blocks to check 277 * @len: Decremented for each block found (terminate on zero) 278 * 279 * Returns: true if a non-free block is encountered 280 */ 281 282 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) 283 { 284 u64 block; 285 u32 n; 286 u8 res; 287 288 for (n = 0; n < n_unaligned; n++) { 289 res = gfs2_testbit(rbm); 290 if (res != GFS2_BLKST_FREE) 291 return true; 292 (*len)--; 293 if (*len == 0) 294 return true; 295 block = gfs2_rbm_to_block(rbm); 296 if (gfs2_rbm_from_block(rbm, block + 1)) 297 return true; 298 } 299 300 return false; 301 } 302 303 /** 304 * gfs2_free_extlen - Return extent length of free blocks 305 * @rbm: Starting position 306 * @len: Max length to check 307 * 308 * Starting at the block specified by the rbm, see how many free blocks 309 * there are, not reading more than len blocks ahead. This can be done 310 * using memchr_inv when the blocks are byte aligned, but has to be done 311 * on a block by block basis in case of unaligned blocks. Also this 312 * function can cope with bitmap boundaries (although it must stop on 313 * a resource group boundary) 314 * 315 * Returns: Number of free blocks in the extent 316 */ 317 318 static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) 319 { 320 struct gfs2_rbm rbm = *rrbm; 321 u32 n_unaligned = rbm.offset & 3; 322 u32 size = len; 323 u32 bytes; 324 u32 chunk_size; 325 u8 *ptr, *start, *end; 326 u64 block; 327 328 if (n_unaligned && 329 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) 330 goto out; 331 332 n_unaligned = len & 3; 333 /* Start is now byte aligned */ 334 while (len > 3) { 335 start = rbm.bi->bi_bh->b_data; 336 if (rbm.bi->bi_clone) 337 start = rbm.bi->bi_clone; 338 end = start + rbm.bi->bi_bh->b_size; 339 start += rbm.bi->bi_offset; 340 BUG_ON(rbm.offset & 3); 341 start += (rbm.offset / GFS2_NBBY); 342 bytes = min_t(u32, len / GFS2_NBBY, (end - start)); 343 ptr = memchr_inv(start, 0, bytes); 344 chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); 345 chunk_size *= GFS2_NBBY; 346 BUG_ON(len < chunk_size); 347 len -= chunk_size; 348 block = gfs2_rbm_to_block(&rbm); 349 gfs2_rbm_from_block(&rbm, block + chunk_size); 350 n_unaligned = 3; 351 if (ptr) 352 break; 353 n_unaligned = len & 3; 354 } 355 356 /* Deal with any bits left over at the end */ 357 if (n_unaligned) 358 gfs2_unaligned_extlen(&rbm, n_unaligned, &len); 359 out: 360 return size - len; 361 } 362 363 /** 364 * gfs2_bitcount - count the number of bits in a certain state 365 * @rgd: the resource group descriptor 366 * @buffer: the buffer that holds the bitmaps 367 * @buflen: the length (in bytes) of the buffer 368 * @state: the state of the block we're looking for 369 * 370 * Returns: The number of bits 371 */ 372 373 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 374 unsigned int buflen, u8 state) 375 { 376 const u8 *byte = buffer; 377 const u8 *end = buffer + buflen; 378 const u8 state1 = state << 2; 379 const u8 state2 = state << 4; 380 const u8 state3 = state << 6; 381 u32 count = 0; 382 383 for (; byte < end; byte++) { 384 if (((*byte) & 0x03) == state) 385 count++; 386 if (((*byte) & 0x0C) == state1) 387 count++; 388 if (((*byte) & 0x30) == state2) 389 count++; 390 if (((*byte) & 0xC0) == state3) 391 count++; 392 } 393 394 return count; 395 } 396 397 /** 398 * gfs2_rgrp_verify - Verify that a resource group is consistent 399 * @rgd: the rgrp 400 * 401 */ 402 403 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 404 { 405 struct gfs2_sbd *sdp = rgd->rd_sbd; 406 struct gfs2_bitmap *bi = NULL; 407 u32 length = rgd->rd_length; 408 u32 count[4], tmp; 409 int buf, x; 410 411 memset(count, 0, 4 * sizeof(u32)); 412 413 /* Count # blocks in each of 4 possible allocation states */ 414 for (buf = 0; buf < length; buf++) { 415 bi = rgd->rd_bits + buf; 416 for (x = 0; x < 4; x++) 417 count[x] += gfs2_bitcount(rgd, 418 bi->bi_bh->b_data + 419 bi->bi_offset, 420 bi->bi_len, x); 421 } 422 423 if (count[0] != rgd->rd_free) { 424 if (gfs2_consist_rgrpd(rgd)) 425 fs_err(sdp, "free data mismatch: %u != %u\n", 426 count[0], rgd->rd_free); 427 return; 428 } 429 430 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 431 if (count[1] != tmp) { 432 if (gfs2_consist_rgrpd(rgd)) 433 fs_err(sdp, "used data mismatch: %u != %u\n", 434 count[1], tmp); 435 return; 436 } 437 438 if (count[2] + count[3] != rgd->rd_dinodes) { 439 if (gfs2_consist_rgrpd(rgd)) 440 fs_err(sdp, "used metadata mismatch: %u != %u\n", 441 count[2] + count[3], rgd->rd_dinodes); 442 return; 443 } 444 } 445 446 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 447 { 448 u64 first = rgd->rd_data0; 449 u64 last = first + rgd->rd_data; 450 return first <= block && block < last; 451 } 452 453 /** 454 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 455 * @sdp: The GFS2 superblock 456 * @blk: The data block number 457 * @exact: True if this needs to be an exact match 458 * 459 * Returns: The resource group, or NULL if not found 460 */ 461 462 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) 463 { 464 struct rb_node *n, *next; 465 struct gfs2_rgrpd *cur; 466 467 spin_lock(&sdp->sd_rindex_spin); 468 n = sdp->sd_rindex_tree.rb_node; 469 while (n) { 470 cur = rb_entry(n, struct gfs2_rgrpd, rd_node); 471 next = NULL; 472 if (blk < cur->rd_addr) 473 next = n->rb_left; 474 else if (blk >= cur->rd_data0 + cur->rd_data) 475 next = n->rb_right; 476 if (next == NULL) { 477 spin_unlock(&sdp->sd_rindex_spin); 478 if (exact) { 479 if (blk < cur->rd_addr) 480 return NULL; 481 if (blk >= cur->rd_data0 + cur->rd_data) 482 return NULL; 483 } 484 return cur; 485 } 486 n = next; 487 } 488 spin_unlock(&sdp->sd_rindex_spin); 489 490 return NULL; 491 } 492 493 /** 494 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 495 * @sdp: The GFS2 superblock 496 * 497 * Returns: The first rgrp in the filesystem 498 */ 499 500 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 501 { 502 const struct rb_node *n; 503 struct gfs2_rgrpd *rgd; 504 505 spin_lock(&sdp->sd_rindex_spin); 506 n = rb_first(&sdp->sd_rindex_tree); 507 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 508 spin_unlock(&sdp->sd_rindex_spin); 509 510 return rgd; 511 } 512 513 /** 514 * gfs2_rgrpd_get_next - get the next RG 515 * @rgd: the resource group descriptor 516 * 517 * Returns: The next rgrp 518 */ 519 520 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 521 { 522 struct gfs2_sbd *sdp = rgd->rd_sbd; 523 const struct rb_node *n; 524 525 spin_lock(&sdp->sd_rindex_spin); 526 n = rb_next(&rgd->rd_node); 527 if (n == NULL) 528 n = rb_first(&sdp->sd_rindex_tree); 529 530 if (unlikely(&rgd->rd_node == n)) { 531 spin_unlock(&sdp->sd_rindex_spin); 532 return NULL; 533 } 534 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 535 spin_unlock(&sdp->sd_rindex_spin); 536 return rgd; 537 } 538 539 void gfs2_free_clones(struct gfs2_rgrpd *rgd) 540 { 541 int x; 542 543 for (x = 0; x < rgd->rd_length; x++) { 544 struct gfs2_bitmap *bi = rgd->rd_bits + x; 545 kfree(bi->bi_clone); 546 bi->bi_clone = NULL; 547 } 548 } 549 550 /** 551 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode 552 * @ip: the inode for this reservation 553 */ 554 int gfs2_rs_alloc(struct gfs2_inode *ip) 555 { 556 int error = 0; 557 struct gfs2_blkreserv *res; 558 559 if (ip->i_res) 560 return 0; 561 562 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 563 if (!res) 564 error = -ENOMEM; 565 566 RB_CLEAR_NODE(&res->rs_node); 567 568 down_write(&ip->i_rw_mutex); 569 if (ip->i_res) 570 kmem_cache_free(gfs2_rsrv_cachep, res); 571 else 572 ip->i_res = res; 573 up_write(&ip->i_rw_mutex); 574 return error; 575 } 576 577 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 578 { 579 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", 580 (unsigned long long)rs->rs_inum, 581 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), 582 rs->rs_rbm.offset, rs->rs_free); 583 } 584 585 /** 586 * __rs_deltree - remove a multi-block reservation from the rgd tree 587 * @rs: The reservation to remove 588 * 589 */ 590 static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) 591 { 592 struct gfs2_rgrpd *rgd; 593 594 if (!gfs2_rs_active(rs)) 595 return; 596 597 rgd = rs->rs_rbm.rgd; 598 trace_gfs2_rs(rs, TRACE_RS_TREEDEL); 599 rb_erase(&rs->rs_node, &rgd->rd_rstree); 600 RB_CLEAR_NODE(&rs->rs_node); 601 602 if (rs->rs_free) { 603 /* return reserved blocks to the rgrp and the ip */ 604 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); 605 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; 606 rs->rs_free = 0; 607 clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); 608 smp_mb__after_clear_bit(); 609 } 610 } 611 612 /** 613 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree 614 * @rs: The reservation to remove 615 * 616 */ 617 void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) 618 { 619 struct gfs2_rgrpd *rgd; 620 621 rgd = rs->rs_rbm.rgd; 622 if (rgd) { 623 spin_lock(&rgd->rd_rsspin); 624 __rs_deltree(ip, rs); 625 spin_unlock(&rgd->rd_rsspin); 626 } 627 } 628 629 /** 630 * gfs2_rs_delete - delete a multi-block reservation 631 * @ip: The inode for this reservation 632 * 633 */ 634 void gfs2_rs_delete(struct gfs2_inode *ip) 635 { 636 down_write(&ip->i_rw_mutex); 637 if (ip->i_res) { 638 gfs2_rs_deltree(ip, ip->i_res); 639 BUG_ON(ip->i_res->rs_free); 640 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 641 ip->i_res = NULL; 642 } 643 up_write(&ip->i_rw_mutex); 644 } 645 646 /** 647 * return_all_reservations - return all reserved blocks back to the rgrp. 648 * @rgd: the rgrp that needs its space back 649 * 650 * We previously reserved a bunch of blocks for allocation. Now we need to 651 * give them back. This leave the reservation structures in tact, but removes 652 * all of their corresponding "no-fly zones". 653 */ 654 static void return_all_reservations(struct gfs2_rgrpd *rgd) 655 { 656 struct rb_node *n; 657 struct gfs2_blkreserv *rs; 658 659 spin_lock(&rgd->rd_rsspin); 660 while ((n = rb_first(&rgd->rd_rstree))) { 661 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 662 __rs_deltree(NULL, rs); 663 } 664 spin_unlock(&rgd->rd_rsspin); 665 } 666 667 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 668 { 669 struct rb_node *n; 670 struct gfs2_rgrpd *rgd; 671 struct gfs2_glock *gl; 672 673 while ((n = rb_first(&sdp->sd_rindex_tree))) { 674 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 675 gl = rgd->rd_gl; 676 677 rb_erase(n, &sdp->sd_rindex_tree); 678 679 if (gl) { 680 spin_lock(&gl->gl_spin); 681 gl->gl_object = NULL; 682 spin_unlock(&gl->gl_spin); 683 gfs2_glock_add_to_lru(gl); 684 gfs2_glock_put(gl); 685 } 686 687 gfs2_free_clones(rgd); 688 kfree(rgd->rd_bits); 689 return_all_reservations(rgd); 690 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 691 } 692 } 693 694 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 695 { 696 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 697 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); 698 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 699 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); 700 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); 701 } 702 703 /** 704 * gfs2_compute_bitstructs - Compute the bitmap sizes 705 * @rgd: The resource group descriptor 706 * 707 * Calculates bitmap descriptors, one for each block that contains bitmap data 708 * 709 * Returns: errno 710 */ 711 712 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 713 { 714 struct gfs2_sbd *sdp = rgd->rd_sbd; 715 struct gfs2_bitmap *bi; 716 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 717 u32 bytes_left, bytes; 718 int x; 719 720 if (!length) 721 return -EINVAL; 722 723 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 724 if (!rgd->rd_bits) 725 return -ENOMEM; 726 727 bytes_left = rgd->rd_bitbytes; 728 729 for (x = 0; x < length; x++) { 730 bi = rgd->rd_bits + x; 731 732 bi->bi_flags = 0; 733 /* small rgrp; bitmap stored completely in header block */ 734 if (length == 1) { 735 bytes = bytes_left; 736 bi->bi_offset = sizeof(struct gfs2_rgrp); 737 bi->bi_start = 0; 738 bi->bi_len = bytes; 739 /* header block */ 740 } else if (x == 0) { 741 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 742 bi->bi_offset = sizeof(struct gfs2_rgrp); 743 bi->bi_start = 0; 744 bi->bi_len = bytes; 745 /* last block */ 746 } else if (x + 1 == length) { 747 bytes = bytes_left; 748 bi->bi_offset = sizeof(struct gfs2_meta_header); 749 bi->bi_start = rgd->rd_bitbytes - bytes_left; 750 bi->bi_len = bytes; 751 /* other blocks */ 752 } else { 753 bytes = sdp->sd_sb.sb_bsize - 754 sizeof(struct gfs2_meta_header); 755 bi->bi_offset = sizeof(struct gfs2_meta_header); 756 bi->bi_start = rgd->rd_bitbytes - bytes_left; 757 bi->bi_len = bytes; 758 } 759 760 bytes_left -= bytes; 761 } 762 763 if (bytes_left) { 764 gfs2_consist_rgrpd(rgd); 765 return -EIO; 766 } 767 bi = rgd->rd_bits + (length - 1); 768 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 769 if (gfs2_consist_rgrpd(rgd)) { 770 gfs2_rindex_print(rgd); 771 fs_err(sdp, "start=%u len=%u offset=%u\n", 772 bi->bi_start, bi->bi_len, bi->bi_offset); 773 } 774 return -EIO; 775 } 776 777 return 0; 778 } 779 780 /** 781 * gfs2_ri_total - Total up the file system space, according to the rindex. 782 * @sdp: the filesystem 783 * 784 */ 785 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 786 { 787 u64 total_data = 0; 788 struct inode *inode = sdp->sd_rindex; 789 struct gfs2_inode *ip = GFS2_I(inode); 790 char buf[sizeof(struct gfs2_rindex)]; 791 int error, rgrps; 792 793 for (rgrps = 0;; rgrps++) { 794 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 795 796 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 797 break; 798 error = gfs2_internal_read(ip, buf, &pos, 799 sizeof(struct gfs2_rindex)); 800 if (error != sizeof(struct gfs2_rindex)) 801 break; 802 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 803 } 804 return total_data; 805 } 806 807 static int rgd_insert(struct gfs2_rgrpd *rgd) 808 { 809 struct gfs2_sbd *sdp = rgd->rd_sbd; 810 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 811 812 /* Figure out where to put new node */ 813 while (*newn) { 814 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, 815 rd_node); 816 817 parent = *newn; 818 if (rgd->rd_addr < cur->rd_addr) 819 newn = &((*newn)->rb_left); 820 else if (rgd->rd_addr > cur->rd_addr) 821 newn = &((*newn)->rb_right); 822 else 823 return -EEXIST; 824 } 825 826 rb_link_node(&rgd->rd_node, parent, newn); 827 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 828 sdp->sd_rgrps++; 829 return 0; 830 } 831 832 /** 833 * read_rindex_entry - Pull in a new resource index entry from the disk 834 * @ip: Pointer to the rindex inode 835 * 836 * Returns: 0 on success, > 0 on EOF, error code otherwise 837 */ 838 839 static int read_rindex_entry(struct gfs2_inode *ip) 840 { 841 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 842 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 843 struct gfs2_rindex buf; 844 int error; 845 struct gfs2_rgrpd *rgd; 846 847 if (pos >= i_size_read(&ip->i_inode)) 848 return 1; 849 850 error = gfs2_internal_read(ip, (char *)&buf, &pos, 851 sizeof(struct gfs2_rindex)); 852 853 if (error != sizeof(struct gfs2_rindex)) 854 return (error == 0) ? 1 : error; 855 856 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 857 error = -ENOMEM; 858 if (!rgd) 859 return error; 860 861 rgd->rd_sbd = sdp; 862 rgd->rd_addr = be64_to_cpu(buf.ri_addr); 863 rgd->rd_length = be32_to_cpu(buf.ri_length); 864 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 865 rgd->rd_data = be32_to_cpu(buf.ri_data); 866 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 867 spin_lock_init(&rgd->rd_rsspin); 868 869 error = compute_bitstructs(rgd); 870 if (error) 871 goto fail; 872 873 error = gfs2_glock_get(sdp, rgd->rd_addr, 874 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 875 if (error) 876 goto fail; 877 878 rgd->rd_gl->gl_object = rgd; 879 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 880 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 881 if (rgd->rd_data > sdp->sd_max_rg_data) 882 sdp->sd_max_rg_data = rgd->rd_data; 883 spin_lock(&sdp->sd_rindex_spin); 884 error = rgd_insert(rgd); 885 spin_unlock(&sdp->sd_rindex_spin); 886 if (!error) 887 return 0; 888 889 error = 0; /* someone else read in the rgrp; free it and ignore it */ 890 gfs2_glock_put(rgd->rd_gl); 891 892 fail: 893 kfree(rgd->rd_bits); 894 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 895 return error; 896 } 897 898 /** 899 * gfs2_ri_update - Pull in a new resource index from the disk 900 * @ip: pointer to the rindex inode 901 * 902 * Returns: 0 on successful update, error code otherwise 903 */ 904 905 static int gfs2_ri_update(struct gfs2_inode *ip) 906 { 907 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 908 int error; 909 910 do { 911 error = read_rindex_entry(ip); 912 } while (error == 0); 913 914 if (error < 0) 915 return error; 916 917 sdp->sd_rindex_uptodate = 1; 918 return 0; 919 } 920 921 /** 922 * gfs2_rindex_update - Update the rindex if required 923 * @sdp: The GFS2 superblock 924 * 925 * We grab a lock on the rindex inode to make sure that it doesn't 926 * change whilst we are performing an operation. We keep this lock 927 * for quite long periods of time compared to other locks. This 928 * doesn't matter, since it is shared and it is very, very rarely 929 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 930 * 931 * This makes sure that we're using the latest copy of the resource index 932 * special file, which might have been updated if someone expanded the 933 * filesystem (via gfs2_grow utility), which adds new resource groups. 934 * 935 * Returns: 0 on succeess, error code otherwise 936 */ 937 938 int gfs2_rindex_update(struct gfs2_sbd *sdp) 939 { 940 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 941 struct gfs2_glock *gl = ip->i_gl; 942 struct gfs2_holder ri_gh; 943 int error = 0; 944 int unlock_required = 0; 945 946 /* Read new copy from disk if we don't have the latest */ 947 if (!sdp->sd_rindex_uptodate) { 948 if (!gfs2_glock_is_locked_by_me(gl)) { 949 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 950 if (error) 951 return error; 952 unlock_required = 1; 953 } 954 if (!sdp->sd_rindex_uptodate) 955 error = gfs2_ri_update(ip); 956 if (unlock_required) 957 gfs2_glock_dq_uninit(&ri_gh); 958 } 959 960 return error; 961 } 962 963 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 964 { 965 const struct gfs2_rgrp *str = buf; 966 u32 rg_flags; 967 968 rg_flags = be32_to_cpu(str->rg_flags); 969 rg_flags &= ~GFS2_RDF_MASK; 970 rgd->rd_flags &= GFS2_RDF_MASK; 971 rgd->rd_flags |= rg_flags; 972 rgd->rd_free = be32_to_cpu(str->rg_free); 973 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 974 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 975 } 976 977 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 978 { 979 struct gfs2_rgrp *str = buf; 980 981 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 982 str->rg_free = cpu_to_be32(rgd->rd_free); 983 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 984 str->__pad = cpu_to_be32(0); 985 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 986 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 987 } 988 989 static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) 990 { 991 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 992 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; 993 994 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || 995 rgl->rl_dinodes != str->rg_dinodes || 996 rgl->rl_igeneration != str->rg_igeneration) 997 return 0; 998 return 1; 999 } 1000 1001 static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) 1002 { 1003 const struct gfs2_rgrp *str = buf; 1004 1005 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); 1006 rgl->rl_flags = str->rg_flags; 1007 rgl->rl_free = str->rg_free; 1008 rgl->rl_dinodes = str->rg_dinodes; 1009 rgl->rl_igeneration = str->rg_igeneration; 1010 rgl->__pad = 0UL; 1011 } 1012 1013 static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) 1014 { 1015 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; 1016 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; 1017 rgl->rl_unlinked = cpu_to_be32(unlinked); 1018 } 1019 1020 static u32 count_unlinked(struct gfs2_rgrpd *rgd) 1021 { 1022 struct gfs2_bitmap *bi; 1023 const u32 length = rgd->rd_length; 1024 const u8 *buffer = NULL; 1025 u32 i, goal, count = 0; 1026 1027 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { 1028 goal = 0; 1029 buffer = bi->bi_bh->b_data + bi->bi_offset; 1030 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1031 while (goal < bi->bi_len * GFS2_NBBY) { 1032 goal = gfs2_bitfit(buffer, bi->bi_len, goal, 1033 GFS2_BLKST_UNLINKED); 1034 if (goal == BFITNOENT) 1035 break; 1036 count++; 1037 goal++; 1038 } 1039 } 1040 1041 return count; 1042 } 1043 1044 1045 /** 1046 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps 1047 * @rgd: the struct gfs2_rgrpd describing the RG to read in 1048 * 1049 * Read in all of a Resource Group's header and bitmap blocks. 1050 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 1051 * 1052 * Returns: errno 1053 */ 1054 1055 int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) 1056 { 1057 struct gfs2_sbd *sdp = rgd->rd_sbd; 1058 struct gfs2_glock *gl = rgd->rd_gl; 1059 unsigned int length = rgd->rd_length; 1060 struct gfs2_bitmap *bi; 1061 unsigned int x, y; 1062 int error; 1063 1064 if (rgd->rd_bits[0].bi_bh != NULL) 1065 return 0; 1066 1067 for (x = 0; x < length; x++) { 1068 bi = rgd->rd_bits + x; 1069 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 1070 if (error) 1071 goto fail; 1072 } 1073 1074 for (y = length; y--;) { 1075 bi = rgd->rd_bits + y; 1076 error = gfs2_meta_wait(sdp, bi->bi_bh); 1077 if (error) 1078 goto fail; 1079 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 1080 GFS2_METATYPE_RG)) { 1081 error = -EIO; 1082 goto fail; 1083 } 1084 } 1085 1086 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 1087 for (x = 0; x < length; x++) 1088 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 1089 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 1090 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1091 rgd->rd_free_clone = rgd->rd_free; 1092 } 1093 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { 1094 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); 1095 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, 1096 rgd->rd_bits[0].bi_bh->b_data); 1097 } 1098 else if (sdp->sd_args.ar_rgrplvb) { 1099 if (!gfs2_rgrp_lvb_valid(rgd)){ 1100 gfs2_consist_rgrpd(rgd); 1101 error = -EIO; 1102 goto fail; 1103 } 1104 if (rgd->rd_rgl->rl_unlinked == 0) 1105 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1106 } 1107 return 0; 1108 1109 fail: 1110 while (x--) { 1111 bi = rgd->rd_bits + x; 1112 brelse(bi->bi_bh); 1113 bi->bi_bh = NULL; 1114 gfs2_assert_warn(sdp, !bi->bi_clone); 1115 } 1116 1117 return error; 1118 } 1119 1120 int update_rgrp_lvb(struct gfs2_rgrpd *rgd) 1121 { 1122 u32 rl_flags; 1123 1124 if (rgd->rd_flags & GFS2_RDF_UPTODATE) 1125 return 0; 1126 1127 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) 1128 return gfs2_rgrp_bh_get(rgd); 1129 1130 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); 1131 rl_flags &= ~GFS2_RDF_MASK; 1132 rgd->rd_flags &= GFS2_RDF_MASK; 1133 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1134 if (rgd->rd_rgl->rl_unlinked == 0) 1135 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1136 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); 1137 rgd->rd_free_clone = rgd->rd_free; 1138 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); 1139 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); 1140 return 0; 1141 } 1142 1143 int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 1144 { 1145 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1146 struct gfs2_sbd *sdp = rgd->rd_sbd; 1147 1148 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) 1149 return 0; 1150 return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); 1151 } 1152 1153 /** 1154 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 1155 * @gh: The glock holder for the resource group 1156 * 1157 */ 1158 1159 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) 1160 { 1161 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 1162 int x, length = rgd->rd_length; 1163 1164 for (x = 0; x < length; x++) { 1165 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1166 if (bi->bi_bh) { 1167 brelse(bi->bi_bh); 1168 bi->bi_bh = NULL; 1169 } 1170 } 1171 1172 } 1173 1174 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 1175 struct buffer_head *bh, 1176 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 1177 { 1178 struct super_block *sb = sdp->sd_vfs; 1179 struct block_device *bdev = sb->s_bdev; 1180 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / 1181 bdev_logical_block_size(sb->s_bdev); 1182 u64 blk; 1183 sector_t start = 0; 1184 sector_t nr_sects = 0; 1185 int rv; 1186 unsigned int x; 1187 u32 trimmed = 0; 1188 u8 diff; 1189 1190 for (x = 0; x < bi->bi_len; x++) { 1191 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; 1192 clone += bi->bi_offset; 1193 clone += x; 1194 if (bh) { 1195 const u8 *orig = bh->b_data + bi->bi_offset + x; 1196 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 1197 } else { 1198 diff = ~(*clone | (*clone >> 1)); 1199 } 1200 diff &= 0x55; 1201 if (diff == 0) 1202 continue; 1203 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 1204 blk *= sects_per_blk; /* convert to sectors */ 1205 while(diff) { 1206 if (diff & 1) { 1207 if (nr_sects == 0) 1208 goto start_new_extent; 1209 if ((start + nr_sects) != blk) { 1210 if (nr_sects >= minlen) { 1211 rv = blkdev_issue_discard(bdev, 1212 start, nr_sects, 1213 GFP_NOFS, 0); 1214 if (rv) 1215 goto fail; 1216 trimmed += nr_sects; 1217 } 1218 nr_sects = 0; 1219 start_new_extent: 1220 start = blk; 1221 } 1222 nr_sects += sects_per_blk; 1223 } 1224 diff >>= 2; 1225 blk += sects_per_blk; 1226 } 1227 } 1228 if (nr_sects >= minlen) { 1229 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 1230 if (rv) 1231 goto fail; 1232 trimmed += nr_sects; 1233 } 1234 if (ptrimmed) 1235 *ptrimmed = trimmed; 1236 return 0; 1237 1238 fail: 1239 if (sdp->sd_args.ar_discard) 1240 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 1241 sdp->sd_args.ar_discard = 0; 1242 return -EIO; 1243 } 1244 1245 /** 1246 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem 1247 * @filp: Any file on the filesystem 1248 * @argp: Pointer to the arguments (also used to pass result) 1249 * 1250 * Returns: 0 on success, otherwise error code 1251 */ 1252 1253 int gfs2_fitrim(struct file *filp, void __user *argp) 1254 { 1255 struct inode *inode = filp->f_dentry->d_inode; 1256 struct gfs2_sbd *sdp = GFS2_SB(inode); 1257 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 1258 struct buffer_head *bh; 1259 struct gfs2_rgrpd *rgd; 1260 struct gfs2_rgrpd *rgd_end; 1261 struct gfs2_holder gh; 1262 struct fstrim_range r; 1263 int ret = 0; 1264 u64 amt; 1265 u64 trimmed = 0; 1266 unsigned int x; 1267 1268 if (!capable(CAP_SYS_ADMIN)) 1269 return -EPERM; 1270 1271 if (!blk_queue_discard(q)) 1272 return -EOPNOTSUPP; 1273 1274 if (argp == NULL) { 1275 r.start = 0; 1276 r.len = ULLONG_MAX; 1277 r.minlen = 0; 1278 } else if (copy_from_user(&r, argp, sizeof(r))) 1279 return -EFAULT; 1280 1281 ret = gfs2_rindex_update(sdp); 1282 if (ret) 1283 return ret; 1284 1285 rgd = gfs2_blk2rgrpd(sdp, r.start, 0); 1286 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); 1287 1288 while (1) { 1289 1290 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 1291 if (ret) 1292 goto out; 1293 1294 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { 1295 /* Trim each bitmap in the rgrp */ 1296 for (x = 0; x < rgd->rd_length; x++) { 1297 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1298 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); 1299 if (ret) { 1300 gfs2_glock_dq_uninit(&gh); 1301 goto out; 1302 } 1303 trimmed += amt; 1304 } 1305 1306 /* Mark rgrp as having been trimmed */ 1307 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); 1308 if (ret == 0) { 1309 bh = rgd->rd_bits[0].bi_bh; 1310 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1311 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1312 gfs2_rgrp_out(rgd, bh->b_data); 1313 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1314 gfs2_trans_end(sdp); 1315 } 1316 } 1317 gfs2_glock_dq_uninit(&gh); 1318 1319 if (rgd == rgd_end) 1320 break; 1321 1322 rgd = gfs2_rgrpd_get_next(rgd); 1323 } 1324 1325 out: 1326 r.len = trimmed << 9; 1327 if (argp && copy_to_user(argp, &r, sizeof(r))) 1328 return -EFAULT; 1329 1330 return ret; 1331 } 1332 1333 /** 1334 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1335 * @ip: the inode structure 1336 * 1337 */ 1338 static void rs_insert(struct gfs2_inode *ip) 1339 { 1340 struct rb_node **newn, *parent = NULL; 1341 int rc; 1342 struct gfs2_blkreserv *rs = ip->i_res; 1343 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; 1344 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); 1345 1346 BUG_ON(gfs2_rs_active(rs)); 1347 1348 spin_lock(&rgd->rd_rsspin); 1349 newn = &rgd->rd_rstree.rb_node; 1350 while (*newn) { 1351 struct gfs2_blkreserv *cur = 1352 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1353 1354 parent = *newn; 1355 rc = rs_cmp(fsblock, rs->rs_free, cur); 1356 if (rc > 0) 1357 newn = &((*newn)->rb_right); 1358 else if (rc < 0) 1359 newn = &((*newn)->rb_left); 1360 else { 1361 spin_unlock(&rgd->rd_rsspin); 1362 WARN_ON(1); 1363 return; 1364 } 1365 } 1366 1367 rb_link_node(&rs->rs_node, parent, newn); 1368 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1369 1370 /* Do our rgrp accounting for the reservation */ 1371 rgd->rd_reserved += rs->rs_free; /* blocks reserved */ 1372 spin_unlock(&rgd->rd_rsspin); 1373 trace_gfs2_rs(rs, TRACE_RS_INSERT); 1374 } 1375 1376 /** 1377 * rg_mblk_search - find a group of multiple free blocks to form a reservation 1378 * @rgd: the resource group descriptor 1379 * @ip: pointer to the inode for which we're reserving blocks 1380 * @requested: number of blocks required for this allocation 1381 * 1382 */ 1383 1384 static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, 1385 unsigned requested) 1386 { 1387 struct gfs2_rbm rbm = { .rgd = rgd, }; 1388 u64 goal; 1389 struct gfs2_blkreserv *rs = ip->i_res; 1390 u32 extlen; 1391 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; 1392 int ret; 1393 1394 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); 1395 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); 1396 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) 1397 return; 1398 1399 /* Find bitmap block that contains bits for goal block */ 1400 if (rgrp_contains_block(rgd, ip->i_goal)) 1401 goal = ip->i_goal; 1402 else 1403 goal = rgd->rd_last_alloc + rgd->rd_data0; 1404 1405 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) 1406 return; 1407 1408 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); 1409 if (ret == 0) { 1410 rs->rs_rbm = rbm; 1411 rs->rs_free = extlen; 1412 rs->rs_inum = ip->i_no_addr; 1413 rs_insert(ip); 1414 } 1415 } 1416 1417 /** 1418 * gfs2_next_unreserved_block - Return next block that is not reserved 1419 * @rgd: The resource group 1420 * @block: The starting block 1421 * @length: The required length 1422 * @ip: Ignore any reservations for this inode 1423 * 1424 * If the block does not appear in any reservation, then return the 1425 * block number unchanged. If it does appear in the reservation, then 1426 * keep looking through the tree of reservations in order to find the 1427 * first block number which is not reserved. 1428 */ 1429 1430 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, 1431 u32 length, 1432 const struct gfs2_inode *ip) 1433 { 1434 struct gfs2_blkreserv *rs; 1435 struct rb_node *n; 1436 int rc; 1437 1438 spin_lock(&rgd->rd_rsspin); 1439 n = rgd->rd_rstree.rb_node; 1440 while (n) { 1441 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1442 rc = rs_cmp(block, length, rs); 1443 if (rc < 0) 1444 n = n->rb_left; 1445 else if (rc > 0) 1446 n = n->rb_right; 1447 else 1448 break; 1449 } 1450 1451 if (n) { 1452 while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { 1453 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; 1454 n = n->rb_right; 1455 if (n == NULL) 1456 break; 1457 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1458 } 1459 } 1460 1461 spin_unlock(&rgd->rd_rsspin); 1462 return block; 1463 } 1464 1465 /** 1466 * gfs2_reservation_check_and_update - Check for reservations during block alloc 1467 * @rbm: The current position in the resource group 1468 * @ip: The inode for which we are searching for blocks 1469 * @minext: The minimum extent length 1470 * 1471 * This checks the current position in the rgrp to see whether there is 1472 * a reservation covering this block. If not then this function is a 1473 * no-op. If there is, then the position is moved to the end of the 1474 * contiguous reservation(s) so that we are pointing at the first 1475 * non-reserved block. 1476 * 1477 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error 1478 */ 1479 1480 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, 1481 const struct gfs2_inode *ip, 1482 u32 minext) 1483 { 1484 u64 block = gfs2_rbm_to_block(rbm); 1485 u32 extlen = 1; 1486 u64 nblock; 1487 int ret; 1488 1489 /* 1490 * If we have a minimum extent length, then skip over any extent 1491 * which is less than the min extent length in size. 1492 */ 1493 if (minext) { 1494 extlen = gfs2_free_extlen(rbm, minext); 1495 nblock = block + extlen; 1496 if (extlen < minext) 1497 goto fail; 1498 } 1499 1500 /* 1501 * Check the extent which has been found against the reservations 1502 * and skip if parts of it are already reserved 1503 */ 1504 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); 1505 if (nblock == block) 1506 return 0; 1507 fail: 1508 ret = gfs2_rbm_from_block(rbm, nblock); 1509 if (ret < 0) 1510 return ret; 1511 return 1; 1512 } 1513 1514 /** 1515 * gfs2_rbm_find - Look for blocks of a particular state 1516 * @rbm: Value/result starting position and final position 1517 * @state: The state which we want to find 1518 * @minext: The requested extent length (0 for a single block) 1519 * @ip: If set, check for reservations 1520 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping 1521 * around until we've reached the starting point. 1522 * 1523 * Side effects: 1524 * - If looking for free blocks, we set GBF_FULL on each bitmap which 1525 * has no free blocks in it. 1526 * 1527 * Returns: 0 on success, -ENOSPC if there is no block of the requested state 1528 */ 1529 1530 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, 1531 const struct gfs2_inode *ip, bool nowrap) 1532 { 1533 struct buffer_head *bh; 1534 struct gfs2_bitmap *initial_bi; 1535 u32 initial_offset; 1536 u32 offset; 1537 u8 *buffer; 1538 int index; 1539 int n = 0; 1540 int iters = rbm->rgd->rd_length; 1541 int ret; 1542 1543 /* If we are not starting at the beginning of a bitmap, then we 1544 * need to add one to the bitmap count to ensure that we search 1545 * the starting bitmap twice. 1546 */ 1547 if (rbm->offset != 0) 1548 iters++; 1549 1550 while(1) { 1551 if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && 1552 (state == GFS2_BLKST_FREE)) 1553 goto next_bitmap; 1554 1555 bh = rbm->bi->bi_bh; 1556 buffer = bh->b_data + rbm->bi->bi_offset; 1557 WARN_ON(!buffer_uptodate(bh)); 1558 if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) 1559 buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; 1560 initial_offset = rbm->offset; 1561 offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); 1562 if (offset == BFITNOENT) 1563 goto bitmap_full; 1564 rbm->offset = offset; 1565 if (ip == NULL) 1566 return 0; 1567 1568 initial_bi = rbm->bi; 1569 ret = gfs2_reservation_check_and_update(rbm, ip, minext); 1570 if (ret == 0) 1571 return 0; 1572 if (ret > 0) { 1573 n += (rbm->bi - initial_bi); 1574 goto next_iter; 1575 } 1576 if (ret == -E2BIG) { 1577 index = 0; 1578 rbm->offset = 0; 1579 n += (rbm->bi - initial_bi); 1580 goto res_covered_end_of_rgrp; 1581 } 1582 return ret; 1583 1584 bitmap_full: /* Mark bitmap as full and fall through */ 1585 if ((state == GFS2_BLKST_FREE) && initial_offset == 0) 1586 set_bit(GBF_FULL, &rbm->bi->bi_flags); 1587 1588 next_bitmap: /* Find next bitmap in the rgrp */ 1589 rbm->offset = 0; 1590 index = rbm->bi - rbm->rgd->rd_bits; 1591 index++; 1592 if (index == rbm->rgd->rd_length) 1593 index = 0; 1594 res_covered_end_of_rgrp: 1595 rbm->bi = &rbm->rgd->rd_bits[index]; 1596 if ((index == 0) && nowrap) 1597 break; 1598 n++; 1599 next_iter: 1600 if (n >= iters) 1601 break; 1602 } 1603 1604 return -ENOSPC; 1605 } 1606 1607 /** 1608 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1609 * @rgd: The rgrp 1610 * @last_unlinked: block address of the last dinode we unlinked 1611 * @skip: block address we should explicitly not unlink 1612 * 1613 * Returns: 0 if no error 1614 * The inode, if one has been found, in inode. 1615 */ 1616 1617 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1618 { 1619 u64 block; 1620 struct gfs2_sbd *sdp = rgd->rd_sbd; 1621 struct gfs2_glock *gl; 1622 struct gfs2_inode *ip; 1623 int error; 1624 int found = 0; 1625 struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; 1626 1627 while (1) { 1628 down_write(&sdp->sd_log_flush_lock); 1629 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); 1630 up_write(&sdp->sd_log_flush_lock); 1631 if (error == -ENOSPC) 1632 break; 1633 if (WARN_ON_ONCE(error)) 1634 break; 1635 1636 block = gfs2_rbm_to_block(&rbm); 1637 if (gfs2_rbm_from_block(&rbm, block + 1)) 1638 break; 1639 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) 1640 continue; 1641 if (block == skip) 1642 continue; 1643 *last_unlinked = block; 1644 1645 error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); 1646 if (error) 1647 continue; 1648 1649 /* If the inode is already in cache, we can ignore it here 1650 * because the existing inode disposal code will deal with 1651 * it when all refs have gone away. Accessing gl_object like 1652 * this is not safe in general. Here it is ok because we do 1653 * not dereference the pointer, and we only need an approx 1654 * answer to whether it is NULL or not. 1655 */ 1656 ip = gl->gl_object; 1657 1658 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1659 gfs2_glock_put(gl); 1660 else 1661 found++; 1662 1663 /* Limit reclaim to sensible number of tasks */ 1664 if (found > NR_CPUS) 1665 return; 1666 } 1667 1668 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1669 return; 1670 } 1671 1672 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1673 { 1674 struct gfs2_rgrpd *rgd = *pos; 1675 1676 rgd = gfs2_rgrpd_get_next(rgd); 1677 if (rgd == NULL) 1678 rgd = gfs2_rgrpd_get_next(NULL); 1679 *pos = rgd; 1680 if (rgd != begin) /* If we didn't wrap */ 1681 return true; 1682 return false; 1683 } 1684 1685 /** 1686 * gfs2_inplace_reserve - Reserve space in the filesystem 1687 * @ip: the inode to reserve space for 1688 * @requested: the number of blocks to be reserved 1689 * 1690 * Returns: errno 1691 */ 1692 1693 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1694 { 1695 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1696 struct gfs2_rgrpd *begin = NULL; 1697 struct gfs2_blkreserv *rs = ip->i_res; 1698 int error = 0, rg_locked, flags = LM_FLAG_TRY; 1699 u64 last_unlinked = NO_BLOCK; 1700 int loops = 0; 1701 1702 if (sdp->sd_args.ar_rgrplvb) 1703 flags |= GL_SKIP; 1704 if (gfs2_assert_warn(sdp, requested)) 1705 return -EINVAL; 1706 if (gfs2_rs_active(rs)) { 1707 begin = rs->rs_rbm.rgd; 1708 flags = 0; /* Yoda: Do or do not. There is no try */ 1709 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 1710 rs->rs_rbm.rgd = begin = ip->i_rgd; 1711 } else { 1712 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1713 } 1714 if (rs->rs_rbm.rgd == NULL) 1715 return -EBADSLT; 1716 1717 while (loops < 3) { 1718 rg_locked = 1; 1719 1720 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1721 rg_locked = 0; 1722 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1723 LM_ST_EXCLUSIVE, flags, 1724 &rs->rs_rgd_gh); 1725 if (error == GLR_TRYFAILED) 1726 goto next_rgrp; 1727 if (unlikely(error)) 1728 return error; 1729 if (sdp->sd_args.ar_rgrplvb) { 1730 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1731 if (unlikely(error)) { 1732 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1733 return error; 1734 } 1735 } 1736 } 1737 1738 /* Skip unuseable resource groups */ 1739 if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1740 goto skip_rgrp; 1741 1742 if (sdp->sd_args.ar_rgrplvb) 1743 gfs2_rgrp_bh_get(rs->rs_rbm.rgd); 1744 1745 /* Get a reservation if we don't already have one */ 1746 if (!gfs2_rs_active(rs)) 1747 rg_mblk_search(rs->rs_rbm.rgd, ip, requested); 1748 1749 /* Skip rgrps when we can't get a reservation on first pass */ 1750 if (!gfs2_rs_active(rs) && (loops < 1)) 1751 goto check_rgrp; 1752 1753 /* If rgrp has enough free space, use it */ 1754 if (rs->rs_rbm.rgd->rd_free_clone >= requested) { 1755 ip->i_rgd = rs->rs_rbm.rgd; 1756 return 0; 1757 } 1758 1759 /* Drop reservation, if we couldn't use reserved rgrp */ 1760 if (gfs2_rs_active(rs)) 1761 gfs2_rs_deltree(ip, rs); 1762 check_rgrp: 1763 /* Check for unlinked inodes which can be reclaimed */ 1764 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 1765 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, 1766 ip->i_no_addr); 1767 skip_rgrp: 1768 /* Unlock rgrp if required */ 1769 if (!rg_locked) 1770 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1771 next_rgrp: 1772 /* Find the next rgrp, and continue looking */ 1773 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1774 continue; 1775 1776 /* If we've scanned all the rgrps, but found no free blocks 1777 * then this checks for some less likely conditions before 1778 * trying again. 1779 */ 1780 flags &= ~LM_FLAG_TRY; 1781 loops++; 1782 /* Check that fs hasn't grown if writing to rindex */ 1783 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 1784 error = gfs2_ri_update(ip); 1785 if (error) 1786 return error; 1787 } 1788 /* Flushing the log may release space */ 1789 if (loops == 2) 1790 gfs2_log_flush(sdp, NULL); 1791 } 1792 1793 return -ENOSPC; 1794 } 1795 1796 /** 1797 * gfs2_inplace_release - release an inplace reservation 1798 * @ip: the inode the reservation was taken out on 1799 * 1800 * Release a reservation made by gfs2_inplace_reserve(). 1801 */ 1802 1803 void gfs2_inplace_release(struct gfs2_inode *ip) 1804 { 1805 struct gfs2_blkreserv *rs = ip->i_res; 1806 1807 if (rs->rs_rgd_gh.gh_gl) 1808 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1809 } 1810 1811 /** 1812 * gfs2_get_block_type - Check a block in a RG is of given type 1813 * @rgd: the resource group holding the block 1814 * @block: the block number 1815 * 1816 * Returns: The block type (GFS2_BLKST_*) 1817 */ 1818 1819 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1820 { 1821 struct gfs2_rbm rbm = { .rgd = rgd, }; 1822 int ret; 1823 1824 ret = gfs2_rbm_from_block(&rbm, block); 1825 WARN_ON_ONCE(ret != 0); 1826 1827 return gfs2_testbit(&rbm); 1828 } 1829 1830 1831 /** 1832 * gfs2_alloc_extent - allocate an extent from a given bitmap 1833 * @rbm: the resource group information 1834 * @dinode: TRUE if the first block we allocate is for a dinode 1835 * @n: The extent length (value/result) 1836 * 1837 * Add the bitmap buffer to the transaction. 1838 * Set the found bits to @new_state to change block's allocation state. 1839 */ 1840 static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, 1841 unsigned int *n) 1842 { 1843 struct gfs2_rbm pos = { .rgd = rbm->rgd, }; 1844 const unsigned int elen = *n; 1845 u64 block; 1846 int ret; 1847 1848 *n = 1; 1849 block = gfs2_rbm_to_block(rbm); 1850 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1851 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1852 block++; 1853 while (*n < elen) { 1854 ret = gfs2_rbm_from_block(&pos, block); 1855 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1856 break; 1857 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1858 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1859 (*n)++; 1860 block++; 1861 } 1862 } 1863 1864 /** 1865 * rgblk_free - Change alloc state of given block(s) 1866 * @sdp: the filesystem 1867 * @bstart: the start of a run of blocks to free 1868 * @blen: the length of the block run (all must lie within ONE RG!) 1869 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1870 * 1871 * Returns: Resource group containing the block(s) 1872 */ 1873 1874 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1875 u32 blen, unsigned char new_state) 1876 { 1877 struct gfs2_rbm rbm; 1878 1879 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1880 if (!rbm.rgd) { 1881 if (gfs2_consist(sdp)) 1882 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1883 return NULL; 1884 } 1885 1886 while (blen--) { 1887 gfs2_rbm_from_block(&rbm, bstart); 1888 bstart++; 1889 if (!rbm.bi->bi_clone) { 1890 rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, 1891 GFP_NOFS | __GFP_NOFAIL); 1892 memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, 1893 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 1894 rbm.bi->bi_len); 1895 } 1896 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 1897 gfs2_setbit(&rbm, false, new_state); 1898 } 1899 1900 return rbm.rgd; 1901 } 1902 1903 /** 1904 * gfs2_rgrp_dump - print out an rgrp 1905 * @seq: The iterator 1906 * @gl: The glock in question 1907 * 1908 */ 1909 1910 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1911 { 1912 struct gfs2_rgrpd *rgd = gl->gl_object; 1913 struct gfs2_blkreserv *trs; 1914 const struct rb_node *n; 1915 1916 if (rgd == NULL) 1917 return 0; 1918 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", 1919 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1920 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, 1921 rgd->rd_reserved); 1922 spin_lock(&rgd->rd_rsspin); 1923 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { 1924 trs = rb_entry(n, struct gfs2_blkreserv, rs_node); 1925 dump_rs(seq, trs); 1926 } 1927 spin_unlock(&rgd->rd_rsspin); 1928 return 0; 1929 } 1930 1931 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 1932 { 1933 struct gfs2_sbd *sdp = rgd->rd_sbd; 1934 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1935 (unsigned long long)rgd->rd_addr); 1936 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 1937 gfs2_rgrp_dump(NULL, rgd->rd_gl); 1938 rgd->rd_flags |= GFS2_RDF_ERROR; 1939 } 1940 1941 /** 1942 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation 1943 * @ip: The inode we have just allocated blocks for 1944 * @rbm: The start of the allocated blocks 1945 * @len: The extent length 1946 * 1947 * Adjusts a reservation after an allocation has taken place. If the 1948 * reservation does not match the allocation, or if it is now empty 1949 * then it is removed. 1950 */ 1951 1952 static void gfs2_adjust_reservation(struct gfs2_inode *ip, 1953 const struct gfs2_rbm *rbm, unsigned len) 1954 { 1955 struct gfs2_blkreserv *rs = ip->i_res; 1956 struct gfs2_rgrpd *rgd = rbm->rgd; 1957 unsigned rlen; 1958 u64 block; 1959 int ret; 1960 1961 spin_lock(&rgd->rd_rsspin); 1962 if (gfs2_rs_active(rs)) { 1963 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { 1964 block = gfs2_rbm_to_block(rbm); 1965 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); 1966 rlen = min(rs->rs_free, len); 1967 rs->rs_free -= rlen; 1968 rgd->rd_reserved -= rlen; 1969 trace_gfs2_rs(rs, TRACE_RS_CLAIM); 1970 if (rs->rs_free && !ret) 1971 goto out; 1972 } 1973 __rs_deltree(ip, rs); 1974 } 1975 out: 1976 spin_unlock(&rgd->rd_rsspin); 1977 } 1978 1979 /** 1980 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 1981 * @ip: the inode to allocate the block for 1982 * @bn: Used to return the starting block number 1983 * @nblocks: requested number of blocks/extent length (value/result) 1984 * @dinode: 1 if we're allocating a dinode block, else 0 1985 * @generation: the generation number of the inode 1986 * 1987 * Returns: 0 or error 1988 */ 1989 1990 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, 1991 bool dinode, u64 *generation) 1992 { 1993 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1994 struct buffer_head *dibh; 1995 struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; 1996 unsigned int ndata; 1997 u64 goal; 1998 u64 block; /* block, within the file system scope */ 1999 int error; 2000 2001 if (gfs2_rs_active(ip->i_res)) 2002 goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); 2003 else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) 2004 goal = ip->i_goal; 2005 else 2006 goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; 2007 2008 gfs2_rbm_from_block(&rbm, goal); 2009 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); 2010 2011 if (error == -ENOSPC) { 2012 gfs2_rbm_from_block(&rbm, goal); 2013 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); 2014 } 2015 2016 /* Since all blocks are reserved in advance, this shouldn't happen */ 2017 if (error) { 2018 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", 2019 (unsigned long long)ip->i_no_addr, error, *nblocks, 2020 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); 2021 goto rgrp_error; 2022 } 2023 2024 gfs2_alloc_extent(&rbm, dinode, nblocks); 2025 block = gfs2_rbm_to_block(&rbm); 2026 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; 2027 if (gfs2_rs_active(ip->i_res)) 2028 gfs2_adjust_reservation(ip, &rbm, *nblocks); 2029 ndata = *nblocks; 2030 if (dinode) 2031 ndata--; 2032 2033 if (!dinode) { 2034 ip->i_goal = block + ndata - 1; 2035 error = gfs2_meta_inode_buffer(ip, &dibh); 2036 if (error == 0) { 2037 struct gfs2_dinode *di = 2038 (struct gfs2_dinode *)dibh->b_data; 2039 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2040 di->di_goal_meta = di->di_goal_data = 2041 cpu_to_be64(ip->i_goal); 2042 brelse(dibh); 2043 } 2044 } 2045 if (rbm.rgd->rd_free < *nblocks) { 2046 printk(KERN_WARNING "nblocks=%u\n", *nblocks); 2047 goto rgrp_error; 2048 } 2049 2050 rbm.rgd->rd_free -= *nblocks; 2051 if (dinode) { 2052 rbm.rgd->rd_dinodes++; 2053 *generation = rbm.rgd->rd_igeneration++; 2054 if (*generation == 0) 2055 *generation = rbm.rgd->rd_igeneration++; 2056 } 2057 2058 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2059 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2060 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2061 2062 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2063 if (dinode) 2064 gfs2_trans_add_unrevoke(sdp, block, 1); 2065 2066 /* 2067 * This needs reviewing to see why we cannot do the quota change 2068 * at this point in the dinode case. 2069 */ 2070 if (ndata) 2071 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 2072 ip->i_inode.i_gid); 2073 2074 rbm.rgd->rd_free_clone -= *nblocks; 2075 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, 2076 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2077 *bn = block; 2078 return 0; 2079 2080 rgrp_error: 2081 gfs2_rgrp_error(rbm.rgd); 2082 return -EIO; 2083 } 2084 2085 /** 2086 * __gfs2_free_blocks - free a contiguous run of block(s) 2087 * @ip: the inode these blocks are being freed from 2088 * @bstart: first block of a run of contiguous blocks 2089 * @blen: the length of the block run 2090 * @meta: 1 if the blocks represent metadata 2091 * 2092 */ 2093 2094 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 2095 { 2096 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2097 struct gfs2_rgrpd *rgd; 2098 2099 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 2100 if (!rgd) 2101 return; 2102 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2103 rgd->rd_free += blen; 2104 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2105 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2106 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2107 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2108 2109 /* Directories keep their data in the metadata address space */ 2110 if (meta || ip->i_depth) 2111 gfs2_meta_wipe(ip, bstart, blen); 2112 } 2113 2114 /** 2115 * gfs2_free_meta - free a contiguous run of data block(s) 2116 * @ip: the inode these blocks are being freed from 2117 * @bstart: first block of a run of contiguous blocks 2118 * @blen: the length of the block run 2119 * 2120 */ 2121 2122 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 2123 { 2124 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2125 2126 __gfs2_free_blocks(ip, bstart, blen, 1); 2127 gfs2_statfs_change(sdp, 0, +blen, 0); 2128 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 2129 } 2130 2131 void gfs2_unlink_di(struct inode *inode) 2132 { 2133 struct gfs2_inode *ip = GFS2_I(inode); 2134 struct gfs2_sbd *sdp = GFS2_SB(inode); 2135 struct gfs2_rgrpd *rgd; 2136 u64 blkno = ip->i_no_addr; 2137 2138 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 2139 if (!rgd) 2140 return; 2141 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2142 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2143 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2144 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2145 update_rgrp_lvb_unlinked(rgd, 1); 2146 } 2147 2148 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 2149 { 2150 struct gfs2_sbd *sdp = rgd->rd_sbd; 2151 struct gfs2_rgrpd *tmp_rgd; 2152 2153 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 2154 if (!tmp_rgd) 2155 return; 2156 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 2157 2158 if (!rgd->rd_dinodes) 2159 gfs2_consist_rgrpd(rgd); 2160 rgd->rd_dinodes--; 2161 rgd->rd_free++; 2162 2163 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2164 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2165 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2166 update_rgrp_lvb_unlinked(rgd, -1); 2167 2168 gfs2_statfs_change(sdp, 0, +1, -1); 2169 } 2170 2171 2172 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 2173 { 2174 gfs2_free_uninit_di(rgd, ip->i_no_addr); 2175 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 2176 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 2177 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 2178 } 2179 2180 /** 2181 * gfs2_check_blk_type - Check the type of a block 2182 * @sdp: The superblock 2183 * @no_addr: The block number to check 2184 * @type: The block type we are looking for 2185 * 2186 * Returns: 0 if the block type matches the expected type 2187 * -ESTALE if it doesn't match 2188 * or -ve errno if something went wrong while checking 2189 */ 2190 2191 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 2192 { 2193 struct gfs2_rgrpd *rgd; 2194 struct gfs2_holder rgd_gh; 2195 int error = -EINVAL; 2196 2197 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); 2198 if (!rgd) 2199 goto fail; 2200 2201 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 2202 if (error) 2203 goto fail; 2204 2205 if (gfs2_get_block_type(rgd, no_addr) != type) 2206 error = -ESTALE; 2207 2208 gfs2_glock_dq_uninit(&rgd_gh); 2209 fail: 2210 return error; 2211 } 2212 2213 /** 2214 * gfs2_rlist_add - add a RG to a list of RGs 2215 * @ip: the inode 2216 * @rlist: the list of resource groups 2217 * @block: the block 2218 * 2219 * Figure out what RG a block belongs to and add that RG to the list 2220 * 2221 * FIXME: Don't use NOFAIL 2222 * 2223 */ 2224 2225 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 2226 u64 block) 2227 { 2228 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 2229 struct gfs2_rgrpd *rgd; 2230 struct gfs2_rgrpd **tmp; 2231 unsigned int new_space; 2232 unsigned int x; 2233 2234 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 2235 return; 2236 2237 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 2238 rgd = ip->i_rgd; 2239 else 2240 rgd = gfs2_blk2rgrpd(sdp, block, 1); 2241 if (!rgd) { 2242 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 2243 return; 2244 } 2245 ip->i_rgd = rgd; 2246 2247 for (x = 0; x < rlist->rl_rgrps; x++) 2248 if (rlist->rl_rgd[x] == rgd) 2249 return; 2250 2251 if (rlist->rl_rgrps == rlist->rl_space) { 2252 new_space = rlist->rl_space + 10; 2253 2254 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 2255 GFP_NOFS | __GFP_NOFAIL); 2256 2257 if (rlist->rl_rgd) { 2258 memcpy(tmp, rlist->rl_rgd, 2259 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 2260 kfree(rlist->rl_rgd); 2261 } 2262 2263 rlist->rl_space = new_space; 2264 rlist->rl_rgd = tmp; 2265 } 2266 2267 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 2268 } 2269 2270 /** 2271 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 2272 * and initialize an array of glock holders for them 2273 * @rlist: the list of resource groups 2274 * @state: the lock state to acquire the RG lock in 2275 * 2276 * FIXME: Don't use NOFAIL 2277 * 2278 */ 2279 2280 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 2281 { 2282 unsigned int x; 2283 2284 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), 2285 GFP_NOFS | __GFP_NOFAIL); 2286 for (x = 0; x < rlist->rl_rgrps; x++) 2287 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 2288 state, 0, 2289 &rlist->rl_ghs[x]); 2290 } 2291 2292 /** 2293 * gfs2_rlist_free - free a resource group list 2294 * @list: the list of resource groups 2295 * 2296 */ 2297 2298 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 2299 { 2300 unsigned int x; 2301 2302 kfree(rlist->rl_rgd); 2303 2304 if (rlist->rl_ghs) { 2305 for (x = 0; x < rlist->rl_rgrps; x++) 2306 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2307 kfree(rlist->rl_ghs); 2308 rlist->rl_ghs = NULL; 2309 } 2310 } 2311 2312