1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <linux/completion.h> 13 #include <linux/buffer_head.h> 14 #include <linux/fs.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/prefetch.h> 17 #include <linux/blkdev.h> 18 #include <linux/rbtree.h> 19 20 #include "gfs2.h" 21 #include "incore.h" 22 #include "glock.h" 23 #include "glops.h" 24 #include "lops.h" 25 #include "meta_io.h" 26 #include "quota.h" 27 #include "rgrp.h" 28 #include "super.h" 29 #include "trans.h" 30 #include "util.h" 31 #include "log.h" 32 #include "inode.h" 33 #include "trace_gfs2.h" 34 35 #define BFITNOENT ((u32)~0) 36 #define NO_BLOCK ((u64)~0) 37 38 #if BITS_PER_LONG == 32 39 #define LBITMASK (0x55555555UL) 40 #define LBITSKIP55 (0x55555555UL) 41 #define LBITSKIP00 (0x00000000UL) 42 #else 43 #define LBITMASK (0x5555555555555555UL) 44 #define LBITSKIP55 (0x5555555555555555UL) 45 #define LBITSKIP00 (0x0000000000000000UL) 46 #endif 47 48 /* 49 * These routines are used by the resource group routines (rgrp.c) 50 * to keep track of block allocation. Each block is represented by two 51 * bits. So, each byte represents GFS2_NBBY (i.e. 4) blocks. 52 * 53 * 0 = Free 54 * 1 = Used (not metadata) 55 * 2 = Unlinked (still in use) inode 56 * 3 = Used (metadata) 57 */ 58 59 static const char valid_change[16] = { 60 /* current */ 61 /* n */ 0, 1, 1, 1, 62 /* e */ 1, 0, 0, 0, 63 /* w */ 0, 0, 0, 1, 64 1, 0, 0, 0 65 }; 66 67 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 68 unsigned char old_state, 69 struct gfs2_bitmap **rbi); 70 71 /** 72 * gfs2_setbit - Set a bit in the bitmaps 73 * @rgd: the resource group descriptor 74 * @buf2: the clone buffer that holds the bitmaps 75 * @bi: the bitmap structure 76 * @block: the block to set 77 * @new_state: the new state of the block 78 * 79 */ 80 81 static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, 82 struct gfs2_bitmap *bi, u32 block, 83 unsigned char new_state) 84 { 85 unsigned char *byte1, *byte2, *end, cur_state; 86 unsigned int buflen = bi->bi_len; 87 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 88 89 byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); 90 end = bi->bi_bh->b_data + bi->bi_offset + buflen; 91 92 BUG_ON(byte1 >= end); 93 94 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 95 96 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 97 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " 98 "new_state=%d\n", 99 (unsigned long long)block, cur_state, new_state); 100 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", 101 (unsigned long long)rgd->rd_addr, 102 (unsigned long)bi->bi_start); 103 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", 104 (unsigned long)bi->bi_offset, 105 (unsigned long)bi->bi_len); 106 dump_stack(); 107 gfs2_consist_rgrpd(rgd); 108 return; 109 } 110 *byte1 ^= (cur_state ^ new_state) << bit; 111 112 if (buf2) { 113 byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); 114 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 115 *byte2 ^= (cur_state ^ new_state) << bit; 116 } 117 } 118 119 /** 120 * gfs2_testbit - test a bit in the bitmaps 121 * @rgd: the resource group descriptor 122 * @buffer: the buffer that holds the bitmaps 123 * @buflen: the length (in bytes) of the buffer 124 * @block: the block to read 125 * 126 */ 127 128 static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, 129 const unsigned char *buffer, 130 unsigned int buflen, u32 block) 131 { 132 const unsigned char *byte, *end; 133 unsigned char cur_state; 134 unsigned int bit; 135 136 byte = buffer + (block / GFS2_NBBY); 137 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 138 end = buffer + buflen; 139 140 gfs2_assert(rgd->rd_sbd, byte < end); 141 142 cur_state = (*byte >> bit) & GFS2_BIT_MASK; 143 144 return cur_state; 145 } 146 147 /** 148 * gfs2_bit_search 149 * @ptr: Pointer to bitmap data 150 * @mask: Mask to use (normally 0x55555.... but adjusted for search start) 151 * @state: The state we are searching for 152 * 153 * We xor the bitmap data with a patter which is the bitwise opposite 154 * of what we are looking for, this gives rise to a pattern of ones 155 * wherever there is a match. Since we have two bits per entry, we 156 * take this pattern, shift it down by one place and then and it with 157 * the original. All the even bit positions (0,2,4, etc) then represent 158 * successful matches, so we mask with 0x55555..... to remove the unwanted 159 * odd bit positions. 160 * 161 * This allows searching of a whole u64 at once (32 blocks) with a 162 * single test (on 64 bit arches). 163 */ 164 165 static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) 166 { 167 u64 tmp; 168 static const u64 search[] = { 169 [0] = 0xffffffffffffffffULL, 170 [1] = 0xaaaaaaaaaaaaaaaaULL, 171 [2] = 0x5555555555555555ULL, 172 [3] = 0x0000000000000000ULL, 173 }; 174 tmp = le64_to_cpu(*ptr) ^ search[state]; 175 tmp &= (tmp >> 1); 176 tmp &= mask; 177 return tmp; 178 } 179 180 /** 181 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 182 * a block in a given allocation state. 183 * @buf: the buffer that holds the bitmaps 184 * @len: the length (in bytes) of the buffer 185 * @goal: start search at this block's bit-pair (within @buffer) 186 * @state: GFS2_BLKST_XXX the state of the block we're looking for. 187 * 188 * Scope of @goal and returned block number is only within this bitmap buffer, 189 * not entire rgrp or filesystem. @buffer will be offset from the actual 190 * beginning of a bitmap block buffer, skipping any header structures, but 191 * headers are always a multiple of 64 bits long so that the buffer is 192 * always aligned to a 64 bit boundary. 193 * 194 * The size of the buffer is in bytes, but is it assumed that it is 195 * always ok to read a complete multiple of 64 bits at the end 196 * of the block in case the end is no aligned to a natural boundary. 197 * 198 * Return: the block number (bitmap buffer scope) that was found 199 */ 200 201 static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, 202 u32 goal, u8 state) 203 { 204 u32 spoint = (goal << 1) & ((8*sizeof(u64)) - 1); 205 const __le64 *ptr = ((__le64 *)buf) + (goal >> 5); 206 const __le64 *end = (__le64 *)(buf + ALIGN(len, sizeof(u64))); 207 u64 tmp; 208 u64 mask = 0x5555555555555555ULL; 209 u32 bit; 210 211 BUG_ON(state > 3); 212 213 /* Mask off bits we don't care about at the start of the search */ 214 mask <<= spoint; 215 tmp = gfs2_bit_search(ptr, mask, state); 216 ptr++; 217 while(tmp == 0 && ptr < end) { 218 tmp = gfs2_bit_search(ptr, 0x5555555555555555ULL, state); 219 ptr++; 220 } 221 /* Mask off any bits which are more than len bytes from the start */ 222 if (ptr == end && (len & (sizeof(u64) - 1))) 223 tmp &= (((u64)~0) >> (64 - 8*(len & (sizeof(u64) - 1)))); 224 /* Didn't find anything, so return */ 225 if (tmp == 0) 226 return BFITNOENT; 227 ptr--; 228 bit = __ffs64(tmp); 229 bit /= 2; /* two bits per entry in the bitmap */ 230 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 231 } 232 233 /** 234 * gfs2_bitcount - count the number of bits in a certain state 235 * @rgd: the resource group descriptor 236 * @buffer: the buffer that holds the bitmaps 237 * @buflen: the length (in bytes) of the buffer 238 * @state: the state of the block we're looking for 239 * 240 * Returns: The number of bits 241 */ 242 243 static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, 244 unsigned int buflen, u8 state) 245 { 246 const u8 *byte = buffer; 247 const u8 *end = buffer + buflen; 248 const u8 state1 = state << 2; 249 const u8 state2 = state << 4; 250 const u8 state3 = state << 6; 251 u32 count = 0; 252 253 for (; byte < end; byte++) { 254 if (((*byte) & 0x03) == state) 255 count++; 256 if (((*byte) & 0x0C) == state1) 257 count++; 258 if (((*byte) & 0x30) == state2) 259 count++; 260 if (((*byte) & 0xC0) == state3) 261 count++; 262 } 263 264 return count; 265 } 266 267 /** 268 * gfs2_rgrp_verify - Verify that a resource group is consistent 269 * @rgd: the rgrp 270 * 271 */ 272 273 void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) 274 { 275 struct gfs2_sbd *sdp = rgd->rd_sbd; 276 struct gfs2_bitmap *bi = NULL; 277 u32 length = rgd->rd_length; 278 u32 count[4], tmp; 279 int buf, x; 280 281 memset(count, 0, 4 * sizeof(u32)); 282 283 /* Count # blocks in each of 4 possible allocation states */ 284 for (buf = 0; buf < length; buf++) { 285 bi = rgd->rd_bits + buf; 286 for (x = 0; x < 4; x++) 287 count[x] += gfs2_bitcount(rgd, 288 bi->bi_bh->b_data + 289 bi->bi_offset, 290 bi->bi_len, x); 291 } 292 293 if (count[0] != rgd->rd_free) { 294 if (gfs2_consist_rgrpd(rgd)) 295 fs_err(sdp, "free data mismatch: %u != %u\n", 296 count[0], rgd->rd_free); 297 return; 298 } 299 300 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 301 if (count[1] != tmp) { 302 if (gfs2_consist_rgrpd(rgd)) 303 fs_err(sdp, "used data mismatch: %u != %u\n", 304 count[1], tmp); 305 return; 306 } 307 308 if (count[2] + count[3] != rgd->rd_dinodes) { 309 if (gfs2_consist_rgrpd(rgd)) 310 fs_err(sdp, "used metadata mismatch: %u != %u\n", 311 count[2] + count[3], rgd->rd_dinodes); 312 return; 313 } 314 } 315 316 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 317 { 318 u64 first = rgd->rd_data0; 319 u64 last = first + rgd->rd_data; 320 return first <= block && block < last; 321 } 322 323 /** 324 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number 325 * @sdp: The GFS2 superblock 326 * @blk: The data block number 327 * @exact: True if this needs to be an exact match 328 * 329 * Returns: The resource group, or NULL if not found 330 */ 331 332 struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) 333 { 334 struct rb_node *n, *next; 335 struct gfs2_rgrpd *cur; 336 337 spin_lock(&sdp->sd_rindex_spin); 338 n = sdp->sd_rindex_tree.rb_node; 339 while (n) { 340 cur = rb_entry(n, struct gfs2_rgrpd, rd_node); 341 next = NULL; 342 if (blk < cur->rd_addr) 343 next = n->rb_left; 344 else if (blk >= cur->rd_data0 + cur->rd_data) 345 next = n->rb_right; 346 if (next == NULL) { 347 spin_unlock(&sdp->sd_rindex_spin); 348 if (exact) { 349 if (blk < cur->rd_addr) 350 return NULL; 351 if (blk >= cur->rd_data0 + cur->rd_data) 352 return NULL; 353 } 354 return cur; 355 } 356 n = next; 357 } 358 spin_unlock(&sdp->sd_rindex_spin); 359 360 return NULL; 361 } 362 363 /** 364 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem 365 * @sdp: The GFS2 superblock 366 * 367 * Returns: The first rgrp in the filesystem 368 */ 369 370 struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) 371 { 372 const struct rb_node *n; 373 struct gfs2_rgrpd *rgd; 374 375 spin_lock(&sdp->sd_rindex_spin); 376 n = rb_first(&sdp->sd_rindex_tree); 377 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 378 spin_unlock(&sdp->sd_rindex_spin); 379 380 return rgd; 381 } 382 383 /** 384 * gfs2_rgrpd_get_next - get the next RG 385 * @rgd: the resource group descriptor 386 * 387 * Returns: The next rgrp 388 */ 389 390 struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) 391 { 392 struct gfs2_sbd *sdp = rgd->rd_sbd; 393 const struct rb_node *n; 394 395 spin_lock(&sdp->sd_rindex_spin); 396 n = rb_next(&rgd->rd_node); 397 if (n == NULL) 398 n = rb_first(&sdp->sd_rindex_tree); 399 400 if (unlikely(&rgd->rd_node == n)) { 401 spin_unlock(&sdp->sd_rindex_spin); 402 return NULL; 403 } 404 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 405 spin_unlock(&sdp->sd_rindex_spin); 406 return rgd; 407 } 408 409 void gfs2_free_clones(struct gfs2_rgrpd *rgd) 410 { 411 int x; 412 413 for (x = 0; x < rgd->rd_length; x++) { 414 struct gfs2_bitmap *bi = rgd->rd_bits + x; 415 kfree(bi->bi_clone); 416 bi->bi_clone = NULL; 417 } 418 } 419 420 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 421 { 422 struct rb_node *n; 423 struct gfs2_rgrpd *rgd; 424 struct gfs2_glock *gl; 425 426 while ((n = rb_first(&sdp->sd_rindex_tree))) { 427 rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); 428 gl = rgd->rd_gl; 429 430 rb_erase(n, &sdp->sd_rindex_tree); 431 432 if (gl) { 433 spin_lock(&gl->gl_spin); 434 gl->gl_object = NULL; 435 spin_unlock(&gl->gl_spin); 436 gfs2_glock_add_to_lru(gl); 437 gfs2_glock_put(gl); 438 } 439 440 gfs2_free_clones(rgd); 441 kfree(rgd->rd_bits); 442 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 443 } 444 } 445 446 static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) 447 { 448 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); 449 printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); 450 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); 451 printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); 452 printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); 453 } 454 455 /** 456 * gfs2_compute_bitstructs - Compute the bitmap sizes 457 * @rgd: The resource group descriptor 458 * 459 * Calculates bitmap descriptors, one for each block that contains bitmap data 460 * 461 * Returns: errno 462 */ 463 464 static int compute_bitstructs(struct gfs2_rgrpd *rgd) 465 { 466 struct gfs2_sbd *sdp = rgd->rd_sbd; 467 struct gfs2_bitmap *bi; 468 u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */ 469 u32 bytes_left, bytes; 470 int x; 471 472 if (!length) 473 return -EINVAL; 474 475 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_NOFS); 476 if (!rgd->rd_bits) 477 return -ENOMEM; 478 479 bytes_left = rgd->rd_bitbytes; 480 481 for (x = 0; x < length; x++) { 482 bi = rgd->rd_bits + x; 483 484 bi->bi_flags = 0; 485 /* small rgrp; bitmap stored completely in header block */ 486 if (length == 1) { 487 bytes = bytes_left; 488 bi->bi_offset = sizeof(struct gfs2_rgrp); 489 bi->bi_start = 0; 490 bi->bi_len = bytes; 491 /* header block */ 492 } else if (x == 0) { 493 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); 494 bi->bi_offset = sizeof(struct gfs2_rgrp); 495 bi->bi_start = 0; 496 bi->bi_len = bytes; 497 /* last block */ 498 } else if (x + 1 == length) { 499 bytes = bytes_left; 500 bi->bi_offset = sizeof(struct gfs2_meta_header); 501 bi->bi_start = rgd->rd_bitbytes - bytes_left; 502 bi->bi_len = bytes; 503 /* other blocks */ 504 } else { 505 bytes = sdp->sd_sb.sb_bsize - 506 sizeof(struct gfs2_meta_header); 507 bi->bi_offset = sizeof(struct gfs2_meta_header); 508 bi->bi_start = rgd->rd_bitbytes - bytes_left; 509 bi->bi_len = bytes; 510 } 511 512 bytes_left -= bytes; 513 } 514 515 if (bytes_left) { 516 gfs2_consist_rgrpd(rgd); 517 return -EIO; 518 } 519 bi = rgd->rd_bits + (length - 1); 520 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) { 521 if (gfs2_consist_rgrpd(rgd)) { 522 gfs2_rindex_print(rgd); 523 fs_err(sdp, "start=%u len=%u offset=%u\n", 524 bi->bi_start, bi->bi_len, bi->bi_offset); 525 } 526 return -EIO; 527 } 528 529 return 0; 530 } 531 532 /** 533 * gfs2_ri_total - Total up the file system space, according to the rindex. 534 * @sdp: the filesystem 535 * 536 */ 537 u64 gfs2_ri_total(struct gfs2_sbd *sdp) 538 { 539 u64 total_data = 0; 540 struct inode *inode = sdp->sd_rindex; 541 struct gfs2_inode *ip = GFS2_I(inode); 542 char buf[sizeof(struct gfs2_rindex)]; 543 int error, rgrps; 544 545 for (rgrps = 0;; rgrps++) { 546 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 547 548 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode)) 549 break; 550 error = gfs2_internal_read(ip, buf, &pos, 551 sizeof(struct gfs2_rindex)); 552 if (error != sizeof(struct gfs2_rindex)) 553 break; 554 total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data); 555 } 556 return total_data; 557 } 558 559 static int rgd_insert(struct gfs2_rgrpd *rgd) 560 { 561 struct gfs2_sbd *sdp = rgd->rd_sbd; 562 struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; 563 564 /* Figure out where to put new node */ 565 while (*newn) { 566 struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, 567 rd_node); 568 569 parent = *newn; 570 if (rgd->rd_addr < cur->rd_addr) 571 newn = &((*newn)->rb_left); 572 else if (rgd->rd_addr > cur->rd_addr) 573 newn = &((*newn)->rb_right); 574 else 575 return -EEXIST; 576 } 577 578 rb_link_node(&rgd->rd_node, parent, newn); 579 rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); 580 sdp->sd_rgrps++; 581 return 0; 582 } 583 584 /** 585 * read_rindex_entry - Pull in a new resource index entry from the disk 586 * @ip: Pointer to the rindex inode 587 * 588 * Returns: 0 on success, > 0 on EOF, error code otherwise 589 */ 590 591 static int read_rindex_entry(struct gfs2_inode *ip) 592 { 593 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 594 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); 595 struct gfs2_rindex buf; 596 int error; 597 struct gfs2_rgrpd *rgd; 598 599 if (pos >= i_size_read(&ip->i_inode)) 600 return 1; 601 602 error = gfs2_internal_read(ip, (char *)&buf, &pos, 603 sizeof(struct gfs2_rindex)); 604 605 if (error != sizeof(struct gfs2_rindex)) 606 return (error == 0) ? 1 : error; 607 608 rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); 609 error = -ENOMEM; 610 if (!rgd) 611 return error; 612 613 rgd->rd_sbd = sdp; 614 rgd->rd_addr = be64_to_cpu(buf.ri_addr); 615 rgd->rd_length = be32_to_cpu(buf.ri_length); 616 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 617 rgd->rd_data = be32_to_cpu(buf.ri_data); 618 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 619 620 error = compute_bitstructs(rgd); 621 if (error) 622 goto fail; 623 624 error = gfs2_glock_get(sdp, rgd->rd_addr, 625 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); 626 if (error) 627 goto fail; 628 629 rgd->rd_gl->gl_object = rgd; 630 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 631 if (rgd->rd_data > sdp->sd_max_rg_data) 632 sdp->sd_max_rg_data = rgd->rd_data; 633 spin_lock(&sdp->sd_rindex_spin); 634 error = rgd_insert(rgd); 635 spin_unlock(&sdp->sd_rindex_spin); 636 if (!error) 637 return 0; 638 639 error = 0; /* someone else read in the rgrp; free it and ignore it */ 640 gfs2_glock_put(rgd->rd_gl); 641 642 fail: 643 kfree(rgd->rd_bits); 644 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 645 return error; 646 } 647 648 /** 649 * gfs2_ri_update - Pull in a new resource index from the disk 650 * @ip: pointer to the rindex inode 651 * 652 * Returns: 0 on successful update, error code otherwise 653 */ 654 655 static int gfs2_ri_update(struct gfs2_inode *ip) 656 { 657 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 658 int error; 659 660 do { 661 error = read_rindex_entry(ip); 662 } while (error == 0); 663 664 if (error < 0) 665 return error; 666 667 sdp->sd_rindex_uptodate = 1; 668 return 0; 669 } 670 671 /** 672 * gfs2_rindex_update - Update the rindex if required 673 * @sdp: The GFS2 superblock 674 * 675 * We grab a lock on the rindex inode to make sure that it doesn't 676 * change whilst we are performing an operation. We keep this lock 677 * for quite long periods of time compared to other locks. This 678 * doesn't matter, since it is shared and it is very, very rarely 679 * accessed in the exclusive mode (i.e. only when expanding the filesystem). 680 * 681 * This makes sure that we're using the latest copy of the resource index 682 * special file, which might have been updated if someone expanded the 683 * filesystem (via gfs2_grow utility), which adds new resource groups. 684 * 685 * Returns: 0 on succeess, error code otherwise 686 */ 687 688 int gfs2_rindex_update(struct gfs2_sbd *sdp) 689 { 690 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); 691 struct gfs2_glock *gl = ip->i_gl; 692 struct gfs2_holder ri_gh; 693 int error = 0; 694 int unlock_required = 0; 695 696 /* Read new copy from disk if we don't have the latest */ 697 if (!sdp->sd_rindex_uptodate) { 698 if (!gfs2_glock_is_locked_by_me(gl)) { 699 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh); 700 if (error) 701 return error; 702 unlock_required = 1; 703 } 704 if (!sdp->sd_rindex_uptodate) 705 error = gfs2_ri_update(ip); 706 if (unlock_required) 707 gfs2_glock_dq_uninit(&ri_gh); 708 } 709 710 return error; 711 } 712 713 static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) 714 { 715 const struct gfs2_rgrp *str = buf; 716 u32 rg_flags; 717 718 rg_flags = be32_to_cpu(str->rg_flags); 719 rg_flags &= ~GFS2_RDF_MASK; 720 rgd->rd_flags &= GFS2_RDF_MASK; 721 rgd->rd_flags |= rg_flags; 722 rgd->rd_free = be32_to_cpu(str->rg_free); 723 rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); 724 rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); 725 } 726 727 static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) 728 { 729 struct gfs2_rgrp *str = buf; 730 731 str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); 732 str->rg_free = cpu_to_be32(rgd->rd_free); 733 str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); 734 str->__pad = cpu_to_be32(0); 735 str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration); 736 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 737 } 738 739 /** 740 * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps 741 * @gh: The glock holder for the resource group 742 * 743 * Read in all of a Resource Group's header and bitmap blocks. 744 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 745 * 746 * Returns: errno 747 */ 748 749 int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 750 { 751 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 752 struct gfs2_sbd *sdp = rgd->rd_sbd; 753 struct gfs2_glock *gl = rgd->rd_gl; 754 unsigned int length = rgd->rd_length; 755 struct gfs2_bitmap *bi; 756 unsigned int x, y; 757 int error; 758 759 for (x = 0; x < length; x++) { 760 bi = rgd->rd_bits + x; 761 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 762 if (error) 763 goto fail; 764 } 765 766 for (y = length; y--;) { 767 bi = rgd->rd_bits + y; 768 error = gfs2_meta_wait(sdp, bi->bi_bh); 769 if (error) 770 goto fail; 771 if (gfs2_metatype_check(sdp, bi->bi_bh, y ? GFS2_METATYPE_RB : 772 GFS2_METATYPE_RG)) { 773 error = -EIO; 774 goto fail; 775 } 776 } 777 778 if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { 779 for (x = 0; x < length; x++) 780 clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); 781 gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); 782 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 783 rgd->rd_free_clone = rgd->rd_free; 784 } 785 786 return 0; 787 788 fail: 789 while (x--) { 790 bi = rgd->rd_bits + x; 791 brelse(bi->bi_bh); 792 bi->bi_bh = NULL; 793 gfs2_assert_warn(sdp, !bi->bi_clone); 794 } 795 796 return error; 797 } 798 799 /** 800 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 801 * @gh: The glock holder for the resource group 802 * 803 */ 804 805 void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) 806 { 807 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; 808 int x, length = rgd->rd_length; 809 810 for (x = 0; x < length; x++) { 811 struct gfs2_bitmap *bi = rgd->rd_bits + x; 812 brelse(bi->bi_bh); 813 bi->bi_bh = NULL; 814 } 815 816 } 817 818 int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 819 struct buffer_head *bh, 820 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 821 { 822 struct super_block *sb = sdp->sd_vfs; 823 struct block_device *bdev = sb->s_bdev; 824 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / 825 bdev_logical_block_size(sb->s_bdev); 826 u64 blk; 827 sector_t start = 0; 828 sector_t nr_sects = 0; 829 int rv; 830 unsigned int x; 831 u32 trimmed = 0; 832 u8 diff; 833 834 for (x = 0; x < bi->bi_len; x++) { 835 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; 836 clone += bi->bi_offset; 837 clone += x; 838 if (bh) { 839 const u8 *orig = bh->b_data + bi->bi_offset + x; 840 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 841 } else { 842 diff = ~(*clone | (*clone >> 1)); 843 } 844 diff &= 0x55; 845 if (diff == 0) 846 continue; 847 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 848 blk *= sects_per_blk; /* convert to sectors */ 849 while(diff) { 850 if (diff & 1) { 851 if (nr_sects == 0) 852 goto start_new_extent; 853 if ((start + nr_sects) != blk) { 854 if (nr_sects >= minlen) { 855 rv = blkdev_issue_discard(bdev, 856 start, nr_sects, 857 GFP_NOFS, 0); 858 if (rv) 859 goto fail; 860 trimmed += nr_sects; 861 } 862 nr_sects = 0; 863 start_new_extent: 864 start = blk; 865 } 866 nr_sects += sects_per_blk; 867 } 868 diff >>= 2; 869 blk += sects_per_blk; 870 } 871 } 872 if (nr_sects >= minlen) { 873 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 874 if (rv) 875 goto fail; 876 trimmed += nr_sects; 877 } 878 if (ptrimmed) 879 *ptrimmed = trimmed; 880 return 0; 881 882 fail: 883 if (sdp->sd_args.ar_discard) 884 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 885 sdp->sd_args.ar_discard = 0; 886 return -EIO; 887 } 888 889 /** 890 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem 891 * @filp: Any file on the filesystem 892 * @argp: Pointer to the arguments (also used to pass result) 893 * 894 * Returns: 0 on success, otherwise error code 895 */ 896 897 int gfs2_fitrim(struct file *filp, void __user *argp) 898 { 899 struct inode *inode = filp->f_dentry->d_inode; 900 struct gfs2_sbd *sdp = GFS2_SB(inode); 901 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 902 struct buffer_head *bh; 903 struct gfs2_rgrpd *rgd; 904 struct gfs2_rgrpd *rgd_end; 905 struct gfs2_holder gh; 906 struct fstrim_range r; 907 int ret = 0; 908 u64 amt; 909 u64 trimmed = 0; 910 unsigned int x; 911 912 if (!capable(CAP_SYS_ADMIN)) 913 return -EPERM; 914 915 if (!blk_queue_discard(q)) 916 return -EOPNOTSUPP; 917 918 if (argp == NULL) { 919 r.start = 0; 920 r.len = ULLONG_MAX; 921 r.minlen = 0; 922 } else if (copy_from_user(&r, argp, sizeof(r))) 923 return -EFAULT; 924 925 ret = gfs2_rindex_update(sdp); 926 if (ret) 927 return ret; 928 929 rgd = gfs2_blk2rgrpd(sdp, r.start, 0); 930 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); 931 932 while (1) { 933 934 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); 935 if (ret) 936 goto out; 937 938 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { 939 /* Trim each bitmap in the rgrp */ 940 for (x = 0; x < rgd->rd_length; x++) { 941 struct gfs2_bitmap *bi = rgd->rd_bits + x; 942 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt); 943 if (ret) { 944 gfs2_glock_dq_uninit(&gh); 945 goto out; 946 } 947 trimmed += amt; 948 } 949 950 /* Mark rgrp as having been trimmed */ 951 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); 952 if (ret == 0) { 953 bh = rgd->rd_bits[0].bi_bh; 954 rgd->rd_flags |= GFS2_RGF_TRIMMED; 955 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 956 gfs2_rgrp_out(rgd, bh->b_data); 957 gfs2_trans_end(sdp); 958 } 959 } 960 gfs2_glock_dq_uninit(&gh); 961 962 if (rgd == rgd_end) 963 break; 964 965 rgd = gfs2_rgrpd_get_next(rgd); 966 } 967 968 out: 969 r.len = trimmed << 9; 970 if (argp && copy_to_user(argp, &r, sizeof(r))) 971 return -EFAULT; 972 973 return ret; 974 } 975 976 /** 977 * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode 978 * @ip: the incore GFS2 inode structure 979 * 980 * Returns: the struct gfs2_qadata 981 */ 982 983 struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) 984 { 985 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 986 int error; 987 BUG_ON(ip->i_qadata != NULL); 988 ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); 989 error = gfs2_rindex_update(sdp); 990 if (error) 991 fs_warn(sdp, "rindex update returns %d\n", error); 992 return ip->i_qadata; 993 } 994 995 /** 996 * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode 997 * @ip: the incore GFS2 inode structure 998 * 999 * Returns: the struct gfs2_qadata 1000 */ 1001 1002 static int gfs2_blkrsv_get(struct gfs2_inode *ip) 1003 { 1004 BUG_ON(ip->i_res != NULL); 1005 ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 1006 if (!ip->i_res) 1007 return -ENOMEM; 1008 return 0; 1009 } 1010 1011 /** 1012 * try_rgrp_fit - See if a given reservation will fit in a given RG 1013 * @rgd: the RG data 1014 * @ip: the inode 1015 * 1016 * If there's room for the requested blocks to be allocated from the RG: 1017 * 1018 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 1019 */ 1020 1021 static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) 1022 { 1023 const struct gfs2_blkreserv *rs = ip->i_res; 1024 1025 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1026 return 0; 1027 if (rgd->rd_free_clone >= rs->rs_requested) 1028 return 1; 1029 return 0; 1030 } 1031 1032 static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) 1033 { 1034 return (bi->bi_start * GFS2_NBBY) + blk; 1035 } 1036 1037 /** 1038 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 1039 * @rgd: The rgrp 1040 * @last_unlinked: block address of the last dinode we unlinked 1041 * @skip: block address we should explicitly not unlink 1042 * 1043 * Returns: 0 if no error 1044 * The inode, if one has been found, in inode. 1045 */ 1046 1047 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1048 { 1049 u32 goal = 0, block; 1050 u64 no_addr; 1051 struct gfs2_sbd *sdp = rgd->rd_sbd; 1052 struct gfs2_glock *gl; 1053 struct gfs2_inode *ip; 1054 int error; 1055 int found = 0; 1056 struct gfs2_bitmap *bi; 1057 1058 while (goal < rgd->rd_data) { 1059 down_write(&sdp->sd_log_flush_lock); 1060 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); 1061 up_write(&sdp->sd_log_flush_lock); 1062 if (block == BFITNOENT) 1063 break; 1064 1065 block = gfs2_bi2rgd_blk(bi, block); 1066 /* rgblk_search can return a block < goal, so we need to 1067 keep it marching forward. */ 1068 no_addr = block + rgd->rd_data0; 1069 goal = max(block + 1, goal + 1); 1070 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 1071 continue; 1072 if (no_addr == skip) 1073 continue; 1074 *last_unlinked = no_addr; 1075 1076 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); 1077 if (error) 1078 continue; 1079 1080 /* If the inode is already in cache, we can ignore it here 1081 * because the existing inode disposal code will deal with 1082 * it when all refs have gone away. Accessing gl_object like 1083 * this is not safe in general. Here it is ok because we do 1084 * not dereference the pointer, and we only need an approx 1085 * answer to whether it is NULL or not. 1086 */ 1087 ip = gl->gl_object; 1088 1089 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) 1090 gfs2_glock_put(gl); 1091 else 1092 found++; 1093 1094 /* Limit reclaim to sensible number of tasks */ 1095 if (found > NR_CPUS) 1096 return; 1097 } 1098 1099 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1100 return; 1101 } 1102 1103 /** 1104 * get_local_rgrp - Choose and lock a rgrp for allocation 1105 * @ip: the inode to reserve space for 1106 * @last_unlinked: the last unlinked block 1107 * 1108 * Try to acquire rgrp in way which avoids contending with others. 1109 * 1110 * Returns: errno 1111 */ 1112 1113 static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) 1114 { 1115 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1116 struct gfs2_rgrpd *rgd, *begin = NULL; 1117 struct gfs2_blkreserv *rs = ip->i_res; 1118 int error, rg_locked, flags = LM_FLAG_TRY; 1119 int loops = 0; 1120 1121 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1122 rgd = begin = ip->i_rgd; 1123 else 1124 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1125 1126 if (rgd == NULL) 1127 return -EBADSLT; 1128 1129 while (loops < 3) { 1130 rg_locked = 0; 1131 1132 if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { 1133 rg_locked = 1; 1134 error = 0; 1135 } else { 1136 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1137 flags, &rs->rs_rgd_gh); 1138 } 1139 switch (error) { 1140 case 0: 1141 if (try_rgrp_fit(rgd, ip)) { 1142 ip->i_rgd = rgd; 1143 return 0; 1144 } 1145 if (rgd->rd_flags & GFS2_RDF_CHECK) 1146 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1147 if (!rg_locked) 1148 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1149 /* fall through */ 1150 case GLR_TRYFAILED: 1151 rgd = gfs2_rgrpd_get_next(rgd); 1152 if (rgd == begin) { 1153 flags = 0; 1154 loops++; 1155 } 1156 break; 1157 default: 1158 return error; 1159 } 1160 } 1161 1162 return -ENOSPC; 1163 } 1164 1165 static void gfs2_blkrsv_put(struct gfs2_inode *ip) 1166 { 1167 BUG_ON(ip->i_res == NULL); 1168 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 1169 ip->i_res = NULL; 1170 } 1171 1172 /** 1173 * gfs2_inplace_reserve - Reserve space in the filesystem 1174 * @ip: the inode to reserve space for 1175 * @requested: the number of blocks to be reserved 1176 * 1177 * Returns: errno 1178 */ 1179 1180 int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1181 { 1182 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1183 struct gfs2_blkreserv *rs; 1184 int error; 1185 u64 last_unlinked = NO_BLOCK; 1186 int tries = 0; 1187 1188 error = gfs2_blkrsv_get(ip); 1189 if (error) 1190 return error; 1191 1192 rs = ip->i_res; 1193 rs->rs_requested = requested; 1194 if (gfs2_assert_warn(sdp, requested)) { 1195 error = -EINVAL; 1196 goto out; 1197 } 1198 1199 do { 1200 error = get_local_rgrp(ip, &last_unlinked); 1201 if (error != -ENOSPC) 1202 break; 1203 /* Check that fs hasn't grown if writing to rindex */ 1204 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 1205 error = gfs2_ri_update(ip); 1206 if (error) 1207 break; 1208 continue; 1209 } 1210 /* Flushing the log may release space */ 1211 gfs2_log_flush(sdp, NULL); 1212 } while (tries++ < 3); 1213 1214 out: 1215 if (error) 1216 gfs2_blkrsv_put(ip); 1217 return error; 1218 } 1219 1220 /** 1221 * gfs2_inplace_release - release an inplace reservation 1222 * @ip: the inode the reservation was taken out on 1223 * 1224 * Release a reservation made by gfs2_inplace_reserve(). 1225 */ 1226 1227 void gfs2_inplace_release(struct gfs2_inode *ip) 1228 { 1229 struct gfs2_blkreserv *rs = ip->i_res; 1230 1231 if (rs->rs_rgd_gh.gh_gl) 1232 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1233 gfs2_blkrsv_put(ip); 1234 } 1235 1236 /** 1237 * gfs2_get_block_type - Check a block in a RG is of given type 1238 * @rgd: the resource group holding the block 1239 * @block: the block number 1240 * 1241 * Returns: The block type (GFS2_BLKST_*) 1242 */ 1243 1244 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1245 { 1246 struct gfs2_bitmap *bi = NULL; 1247 u32 length, rgrp_block, buf_block; 1248 unsigned int buf; 1249 unsigned char type; 1250 1251 length = rgd->rd_length; 1252 rgrp_block = block - rgd->rd_data0; 1253 1254 for (buf = 0; buf < length; buf++) { 1255 bi = rgd->rd_bits + buf; 1256 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1257 break; 1258 } 1259 1260 gfs2_assert(rgd->rd_sbd, buf < length); 1261 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; 1262 1263 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1264 bi->bi_len, buf_block); 1265 1266 return type; 1267 } 1268 1269 /** 1270 * rgblk_search - find a block in @state 1271 * @rgd: the resource group descriptor 1272 * @goal: the goal block within the RG (start here to search for avail block) 1273 * @state: GFS2_BLKST_XXX the before-allocation state to find 1274 * @rbi: address of the pointer to the bitmap containing the block found 1275 * 1276 * Walk rgrp's bitmap to find bits that represent a block in @state. 1277 * 1278 * This function never fails, because we wouldn't call it unless we 1279 * know (from reservation results, etc.) that a block is available. 1280 * 1281 * Scope of @goal is just within rgrp, not the whole filesystem. 1282 * Scope of @returned block is just within bitmap, not the whole filesystem. 1283 * 1284 * Returns: the block number found relative to the bitmap rbi 1285 */ 1286 1287 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, 1288 struct gfs2_bitmap **rbi) 1289 { 1290 struct gfs2_bitmap *bi = NULL; 1291 const u32 length = rgd->rd_length; 1292 u32 biblk = BFITNOENT; 1293 unsigned int buf, x; 1294 const u8 *buffer = NULL; 1295 1296 *rbi = NULL; 1297 /* Find bitmap block that contains bits for goal block */ 1298 for (buf = 0; buf < length; buf++) { 1299 bi = rgd->rd_bits + buf; 1300 /* Convert scope of "goal" from rgrp-wide to within found bit block */ 1301 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { 1302 goal -= bi->bi_start * GFS2_NBBY; 1303 goto do_search; 1304 } 1305 } 1306 buf = 0; 1307 goal = 0; 1308 1309 do_search: 1310 /* Search (up to entire) bitmap in this rgrp for allocatable block. 1311 "x <= length", instead of "x < length", because we typically start 1312 the search in the middle of a bit block, but if we can't find an 1313 allocatable block anywhere else, we want to be able wrap around and 1314 search in the first part of our first-searched bit block. */ 1315 for (x = 0; x <= length; x++) { 1316 bi = rgd->rd_bits + buf; 1317 1318 if (test_bit(GBF_FULL, &bi->bi_flags) && 1319 (state == GFS2_BLKST_FREE)) 1320 goto skip; 1321 1322 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1323 bitmaps, so we must search the originals for that. */ 1324 buffer = bi->bi_bh->b_data + bi->bi_offset; 1325 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1326 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1327 buffer = bi->bi_clone + bi->bi_offset; 1328 1329 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); 1330 if (biblk != BFITNOENT) 1331 break; 1332 1333 if ((goal == 0) && (state == GFS2_BLKST_FREE)) 1334 set_bit(GBF_FULL, &bi->bi_flags); 1335 1336 /* Try next bitmap block (wrap back to rgrp header if at end) */ 1337 skip: 1338 buf++; 1339 buf %= length; 1340 goal = 0; 1341 } 1342 1343 if (biblk != BFITNOENT) 1344 *rbi = bi; 1345 1346 return biblk; 1347 } 1348 1349 /** 1350 * gfs2_alloc_extent - allocate an extent from a given bitmap 1351 * @rgd: the resource group descriptor 1352 * @bi: the bitmap within the rgrp 1353 * @blk: the block within the bitmap 1354 * @dinode: TRUE if the first block we allocate is for a dinode 1355 * @n: The extent length 1356 * 1357 * Add the found bitmap buffer to the transaction. 1358 * Set the found bits to @new_state to change block's allocation state. 1359 * Returns: starting block number of the extent (fs scope) 1360 */ 1361 static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, 1362 u32 blk, bool dinode, unsigned int *n) 1363 { 1364 const unsigned int elen = *n; 1365 u32 goal; 1366 const u8 *buffer = NULL; 1367 1368 *n = 0; 1369 buffer = bi->bi_bh->b_data + bi->bi_offset; 1370 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1371 gfs2_setbit(rgd, bi->bi_clone, bi, blk, 1372 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1373 (*n)++; 1374 goal = blk; 1375 while (*n < elen) { 1376 goal++; 1377 if (goal >= (bi->bi_len * GFS2_NBBY)) 1378 break; 1379 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != 1380 GFS2_BLKST_FREE) 1381 break; 1382 gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); 1383 (*n)++; 1384 } 1385 blk = gfs2_bi2rgd_blk(bi, blk); 1386 rgd->rd_last_alloc = blk + *n - 1; 1387 return rgd->rd_data0 + blk; 1388 } 1389 1390 /** 1391 * rgblk_free - Change alloc state of given block(s) 1392 * @sdp: the filesystem 1393 * @bstart: the start of a run of blocks to free 1394 * @blen: the length of the block run (all must lie within ONE RG!) 1395 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1396 * 1397 * Returns: Resource group containing the block(s) 1398 */ 1399 1400 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1401 u32 blen, unsigned char new_state) 1402 { 1403 struct gfs2_rgrpd *rgd; 1404 struct gfs2_bitmap *bi = NULL; 1405 u32 length, rgrp_blk, buf_blk; 1406 unsigned int buf; 1407 1408 rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1409 if (!rgd) { 1410 if (gfs2_consist(sdp)) 1411 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1412 return NULL; 1413 } 1414 1415 length = rgd->rd_length; 1416 1417 rgrp_blk = bstart - rgd->rd_data0; 1418 1419 while (blen--) { 1420 for (buf = 0; buf < length; buf++) { 1421 bi = rgd->rd_bits + buf; 1422 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1423 break; 1424 } 1425 1426 gfs2_assert(rgd->rd_sbd, buf < length); 1427 1428 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; 1429 rgrp_blk++; 1430 1431 if (!bi->bi_clone) { 1432 bi->bi_clone = kmalloc(bi->bi_bh->b_size, 1433 GFP_NOFS | __GFP_NOFAIL); 1434 memcpy(bi->bi_clone + bi->bi_offset, 1435 bi->bi_bh->b_data + bi->bi_offset, 1436 bi->bi_len); 1437 } 1438 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1439 gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); 1440 } 1441 1442 return rgd; 1443 } 1444 1445 /** 1446 * gfs2_rgrp_dump - print out an rgrp 1447 * @seq: The iterator 1448 * @gl: The glock in question 1449 * 1450 */ 1451 1452 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1453 { 1454 const struct gfs2_rgrpd *rgd = gl->gl_object; 1455 if (rgd == NULL) 1456 return 0; 1457 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", 1458 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1459 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); 1460 return 0; 1461 } 1462 1463 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) 1464 { 1465 struct gfs2_sbd *sdp = rgd->rd_sbd; 1466 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1467 (unsigned long long)rgd->rd_addr); 1468 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); 1469 gfs2_rgrp_dump(NULL, rgd->rd_gl); 1470 rgd->rd_flags |= GFS2_RDF_ERROR; 1471 } 1472 1473 /** 1474 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 1475 * @ip: the inode to allocate the block for 1476 * @bn: Used to return the starting block number 1477 * @ndata: requested number of blocks/extent length (value/result) 1478 * @dinode: 1 if we're allocating a dinode block, else 0 1479 * @generation: the generation number of the inode 1480 * 1481 * Returns: 0 or error 1482 */ 1483 1484 int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, 1485 bool dinode, u64 *generation) 1486 { 1487 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1488 struct buffer_head *dibh; 1489 struct gfs2_rgrpd *rgd; 1490 unsigned int ndata; 1491 u32 goal, blk; /* block, within the rgrp scope */ 1492 u64 block; /* block, within the file system scope */ 1493 int error; 1494 struct gfs2_bitmap *bi; 1495 1496 /* Only happens if there is a bug in gfs2, return something distinctive 1497 * to ensure that it is noticed. 1498 */ 1499 if (ip->i_res == NULL) 1500 return -ECANCELED; 1501 1502 rgd = ip->i_rgd; 1503 1504 if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) 1505 goal = ip->i_goal - rgd->rd_data0; 1506 else 1507 goal = rgd->rd_last_alloc; 1508 1509 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); 1510 1511 /* Since all blocks are reserved in advance, this shouldn't happen */ 1512 if (blk == BFITNOENT) 1513 goto rgrp_error; 1514 1515 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); 1516 ndata = *nblocks; 1517 if (dinode) 1518 ndata--; 1519 1520 if (!dinode) { 1521 ip->i_goal = block + ndata - 1; 1522 error = gfs2_meta_inode_buffer(ip, &dibh); 1523 if (error == 0) { 1524 struct gfs2_dinode *di = 1525 (struct gfs2_dinode *)dibh->b_data; 1526 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1527 di->di_goal_meta = di->di_goal_data = 1528 cpu_to_be64(ip->i_goal); 1529 brelse(dibh); 1530 } 1531 } 1532 if (rgd->rd_free < *nblocks) 1533 goto rgrp_error; 1534 1535 rgd->rd_free -= *nblocks; 1536 if (dinode) { 1537 rgd->rd_dinodes++; 1538 *generation = rgd->rd_igeneration++; 1539 if (*generation == 0) 1540 *generation = rgd->rd_igeneration++; 1541 } 1542 1543 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1544 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1545 1546 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 1547 if (dinode) 1548 gfs2_trans_add_unrevoke(sdp, block, 1); 1549 1550 /* 1551 * This needs reviewing to see why we cannot do the quota change 1552 * at this point in the dinode case. 1553 */ 1554 if (ndata) 1555 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 1556 ip->i_inode.i_gid); 1557 1558 rgd->rd_free_clone -= *nblocks; 1559 trace_gfs2_block_alloc(ip, rgd, block, *nblocks, 1560 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1561 *bn = block; 1562 return 0; 1563 1564 rgrp_error: 1565 gfs2_rgrp_error(rgd); 1566 return -EIO; 1567 } 1568 1569 /** 1570 * __gfs2_free_blocks - free a contiguous run of block(s) 1571 * @ip: the inode these blocks are being freed from 1572 * @bstart: first block of a run of contiguous blocks 1573 * @blen: the length of the block run 1574 * @meta: 1 if the blocks represent metadata 1575 * 1576 */ 1577 1578 void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) 1579 { 1580 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1581 struct gfs2_rgrpd *rgd; 1582 1583 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); 1584 if (!rgd) 1585 return; 1586 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 1587 rgd->rd_free += blen; 1588 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 1589 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1590 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1591 1592 /* Directories keep their data in the metadata address space */ 1593 if (meta || ip->i_depth) 1594 gfs2_meta_wipe(ip, bstart, blen); 1595 } 1596 1597 /** 1598 * gfs2_free_meta - free a contiguous run of data block(s) 1599 * @ip: the inode these blocks are being freed from 1600 * @bstart: first block of a run of contiguous blocks 1601 * @blen: the length of the block run 1602 * 1603 */ 1604 1605 void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 1606 { 1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1608 1609 __gfs2_free_blocks(ip, bstart, blen, 1); 1610 gfs2_statfs_change(sdp, 0, +blen, 0); 1611 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1612 } 1613 1614 void gfs2_unlink_di(struct inode *inode) 1615 { 1616 struct gfs2_inode *ip = GFS2_I(inode); 1617 struct gfs2_sbd *sdp = GFS2_SB(inode); 1618 struct gfs2_rgrpd *rgd; 1619 u64 blkno = ip->i_no_addr; 1620 1621 rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED); 1622 if (!rgd) 1623 return; 1624 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 1625 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1626 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1627 } 1628 1629 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 1630 { 1631 struct gfs2_sbd *sdp = rgd->rd_sbd; 1632 struct gfs2_rgrpd *tmp_rgd; 1633 1634 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); 1635 if (!tmp_rgd) 1636 return; 1637 gfs2_assert_withdraw(sdp, rgd == tmp_rgd); 1638 1639 if (!rgd->rd_dinodes) 1640 gfs2_consist_rgrpd(rgd); 1641 rgd->rd_dinodes--; 1642 rgd->rd_free++; 1643 1644 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1645 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1646 1647 gfs2_statfs_change(sdp, 0, +1, -1); 1648 } 1649 1650 1651 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1652 { 1653 gfs2_free_uninit_di(rgd, ip->i_no_addr); 1654 trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); 1655 gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); 1656 gfs2_meta_wipe(ip, ip->i_no_addr, 1); 1657 } 1658 1659 /** 1660 * gfs2_check_blk_type - Check the type of a block 1661 * @sdp: The superblock 1662 * @no_addr: The block number to check 1663 * @type: The block type we are looking for 1664 * 1665 * Returns: 0 if the block type matches the expected type 1666 * -ESTALE if it doesn't match 1667 * or -ve errno if something went wrong while checking 1668 */ 1669 1670 int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) 1671 { 1672 struct gfs2_rgrpd *rgd; 1673 struct gfs2_holder rgd_gh; 1674 int error = -EINVAL; 1675 1676 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); 1677 if (!rgd) 1678 goto fail; 1679 1680 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); 1681 if (error) 1682 goto fail; 1683 1684 if (gfs2_get_block_type(rgd, no_addr) != type) 1685 error = -ESTALE; 1686 1687 gfs2_glock_dq_uninit(&rgd_gh); 1688 fail: 1689 return error; 1690 } 1691 1692 /** 1693 * gfs2_rlist_add - add a RG to a list of RGs 1694 * @ip: the inode 1695 * @rlist: the list of resource groups 1696 * @block: the block 1697 * 1698 * Figure out what RG a block belongs to and add that RG to the list 1699 * 1700 * FIXME: Don't use NOFAIL 1701 * 1702 */ 1703 1704 void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, 1705 u64 block) 1706 { 1707 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1708 struct gfs2_rgrpd *rgd; 1709 struct gfs2_rgrpd **tmp; 1710 unsigned int new_space; 1711 unsigned int x; 1712 1713 if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) 1714 return; 1715 1716 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 1717 rgd = ip->i_rgd; 1718 else 1719 rgd = gfs2_blk2rgrpd(sdp, block, 1); 1720 if (!rgd) { 1721 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 1722 return; 1723 } 1724 ip->i_rgd = rgd; 1725 1726 for (x = 0; x < rlist->rl_rgrps; x++) 1727 if (rlist->rl_rgd[x] == rgd) 1728 return; 1729 1730 if (rlist->rl_rgrps == rlist->rl_space) { 1731 new_space = rlist->rl_space + 10; 1732 1733 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), 1734 GFP_NOFS | __GFP_NOFAIL); 1735 1736 if (rlist->rl_rgd) { 1737 memcpy(tmp, rlist->rl_rgd, 1738 rlist->rl_space * sizeof(struct gfs2_rgrpd *)); 1739 kfree(rlist->rl_rgd); 1740 } 1741 1742 rlist->rl_space = new_space; 1743 rlist->rl_rgd = tmp; 1744 } 1745 1746 rlist->rl_rgd[rlist->rl_rgrps++] = rgd; 1747 } 1748 1749 /** 1750 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate 1751 * and initialize an array of glock holders for them 1752 * @rlist: the list of resource groups 1753 * @state: the lock state to acquire the RG lock in 1754 * 1755 * FIXME: Don't use NOFAIL 1756 * 1757 */ 1758 1759 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) 1760 { 1761 unsigned int x; 1762 1763 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), 1764 GFP_NOFS | __GFP_NOFAIL); 1765 for (x = 0; x < rlist->rl_rgrps; x++) 1766 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, 1767 state, 0, 1768 &rlist->rl_ghs[x]); 1769 } 1770 1771 /** 1772 * gfs2_rlist_free - free a resource group list 1773 * @list: the list of resource groups 1774 * 1775 */ 1776 1777 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) 1778 { 1779 unsigned int x; 1780 1781 kfree(rlist->rl_rgd); 1782 1783 if (rlist->rl_ghs) { 1784 for (x = 0; x < rlist->rl_rgrps; x++) 1785 gfs2_holder_uninit(&rlist->rl_ghs[x]); 1786 kfree(rlist->rl_ghs); 1787 } 1788 } 1789 1790