1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 /* 11 * Implements Extendible Hashing as described in: 12 * "Extendible Hashing" by Fagin, et al in 13 * __ACM Trans. on Database Systems__, Sept 1979. 14 * 15 * 16 * Here's the layout of dirents which is essentially the same as that of ext2 17 * within a single block. The field de_name_len is the number of bytes 18 * actually required for the name (no null terminator). The field de_rec_len 19 * is the number of bytes allocated to the dirent. The offset of the next 20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is 21 * deleted, the preceding dirent inherits its allocated space, ie 22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained 23 * by adding de_rec_len to the current dirent, this essentially causes the 24 * deleted dirent to get jumped over when iterating through all the dirents. 25 * 26 * When deleting the first dirent in a block, there is no previous dirent so 27 * the field de_ino is set to zero to designate it as deleted. When allocating 28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the 29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first 30 * dirent is allocated. Otherwise it must go through all the 'used' dirents 31 * searching for one in which the amount of total space minus the amount of 32 * used space will provide enough space for the new dirent. 33 * 34 * There are two types of blocks in which dirents reside. In a stuffed dinode, 35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of 36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the 37 * beginning of the leaf block. The dirents reside in leaves when 38 * 39 * dip->i_diskflags & GFS2_DIF_EXHASH is true 40 * 41 * Otherwise, the dirents are "linear", within a single stuffed dinode block. 42 * 43 * When the dirents are in leaves, the actual contents of the directory file are 44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The 45 * dirents are NOT in the directory file itself. There can be more than one 46 * block pointer in the array that points to the same leaf. In fact, when a 47 * directory is first converted from linear to exhash, all of the pointers 48 * point to the same leaf. 49 * 50 * When a leaf is completely full, the size of the hash table can be 51 * doubled unless it is already at the maximum size which is hard coded into 52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, 53 * but never before the maximum hash table size has been reached. 54 */ 55 56 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 57 58 #include <linux/slab.h> 59 #include <linux/spinlock.h> 60 #include <linux/buffer_head.h> 61 #include <linux/sort.h> 62 #include <linux/gfs2_ondisk.h> 63 #include <linux/crc32.h> 64 #include <linux/vmalloc.h> 65 66 #include "gfs2.h" 67 #include "incore.h" 68 #include "dir.h" 69 #include "glock.h" 70 #include "inode.h" 71 #include "meta_io.h" 72 #include "quota.h" 73 #include "rgrp.h" 74 #include "trans.h" 75 #include "bmap.h" 76 #include "util.h" 77 78 #define IS_LEAF 1 /* Hashed (leaf) directory */ 79 #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ 80 81 #define MAX_RA_BLOCKS 32 /* max read-ahead blocks */ 82 83 #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) 84 #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) 85 86 struct qstr gfs2_qdot __read_mostly; 87 struct qstr gfs2_qdotdot __read_mostly; 88 89 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 90 const struct qstr *name, void *opaque); 91 92 int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 93 struct buffer_head **bhp) 94 { 95 struct buffer_head *bh; 96 97 bh = gfs2_meta_new(ip->i_gl, block); 98 gfs2_trans_add_meta(ip->i_gl, bh); 99 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 100 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 101 *bhp = bh; 102 return 0; 103 } 104 105 static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, 106 struct buffer_head **bhp) 107 { 108 struct buffer_head *bh; 109 int error; 110 111 error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh); 112 if (error) 113 return error; 114 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { 115 brelse(bh); 116 return -EIO; 117 } 118 *bhp = bh; 119 return 0; 120 } 121 122 static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, 123 unsigned int offset, unsigned int size) 124 { 125 struct buffer_head *dibh; 126 int error; 127 128 error = gfs2_meta_inode_buffer(ip, &dibh); 129 if (error) 130 return error; 131 132 gfs2_trans_add_meta(ip->i_gl, dibh); 133 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 134 if (ip->i_inode.i_size < offset + size) 135 i_size_write(&ip->i_inode, offset + size); 136 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 137 gfs2_dinode_out(ip, dibh->b_data); 138 139 brelse(dibh); 140 141 return size; 142 } 143 144 145 146 /** 147 * gfs2_dir_write_data - Write directory information to the inode 148 * @ip: The GFS2 inode 149 * @buf: The buffer containing information to be written 150 * @offset: The file offset to start writing at 151 * @size: The amount of data to write 152 * 153 * Returns: The number of bytes correctly written or error code 154 */ 155 static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, 156 u64 offset, unsigned int size) 157 { 158 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 159 struct buffer_head *dibh; 160 u64 lblock, dblock; 161 u32 extlen = 0; 162 unsigned int o; 163 int copied = 0; 164 int error = 0; 165 int new = 0; 166 167 if (!size) 168 return 0; 169 170 if (gfs2_is_stuffed(ip) && 171 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 172 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, 173 size); 174 175 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 176 return -EINVAL; 177 178 if (gfs2_is_stuffed(ip)) { 179 error = gfs2_unstuff_dinode(ip, NULL); 180 if (error) 181 return error; 182 } 183 184 lblock = offset; 185 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 186 187 while (copied < size) { 188 unsigned int amount; 189 struct buffer_head *bh; 190 191 amount = size - copied; 192 if (amount > sdp->sd_sb.sb_bsize - o) 193 amount = sdp->sd_sb.sb_bsize - o; 194 195 if (!extlen) { 196 new = 1; 197 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 198 &dblock, &extlen); 199 if (error) 200 goto fail; 201 error = -EIO; 202 if (gfs2_assert_withdraw(sdp, dblock)) 203 goto fail; 204 } 205 206 if (amount == sdp->sd_jbsize || new) 207 error = gfs2_dir_get_new_buffer(ip, dblock, &bh); 208 else 209 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh); 210 211 if (error) 212 goto fail; 213 214 gfs2_trans_add_meta(ip->i_gl, bh); 215 memcpy(bh->b_data + o, buf, amount); 216 brelse(bh); 217 218 buf += amount; 219 copied += amount; 220 lblock++; 221 dblock++; 222 extlen--; 223 224 o = sizeof(struct gfs2_meta_header); 225 } 226 227 out: 228 error = gfs2_meta_inode_buffer(ip, &dibh); 229 if (error) 230 return error; 231 232 if (ip->i_inode.i_size < offset + copied) 233 i_size_write(&ip->i_inode, offset + copied); 234 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 235 236 gfs2_trans_add_meta(ip->i_gl, dibh); 237 gfs2_dinode_out(ip, dibh->b_data); 238 brelse(dibh); 239 240 return copied; 241 fail: 242 if (copied) 243 goto out; 244 return error; 245 } 246 247 static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf, 248 unsigned int size) 249 { 250 struct buffer_head *dibh; 251 int error; 252 253 error = gfs2_meta_inode_buffer(ip, &dibh); 254 if (!error) { 255 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); 256 brelse(dibh); 257 } 258 259 return (error) ? error : size; 260 } 261 262 263 /** 264 * gfs2_dir_read_data - Read a data from a directory inode 265 * @ip: The GFS2 Inode 266 * @buf: The buffer to place result into 267 * @size: Amount of data to transfer 268 * 269 * Returns: The amount of data actually copied or the error 270 */ 271 static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, 272 unsigned int size) 273 { 274 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 275 u64 lblock, dblock; 276 u32 extlen = 0; 277 unsigned int o; 278 int copied = 0; 279 int error = 0; 280 281 if (gfs2_is_stuffed(ip)) 282 return gfs2_dir_read_stuffed(ip, buf, size); 283 284 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 285 return -EINVAL; 286 287 lblock = 0; 288 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 289 290 while (copied < size) { 291 unsigned int amount; 292 struct buffer_head *bh; 293 int new; 294 295 amount = size - copied; 296 if (amount > sdp->sd_sb.sb_bsize - o) 297 amount = sdp->sd_sb.sb_bsize - o; 298 299 if (!extlen) { 300 new = 0; 301 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 302 &dblock, &extlen); 303 if (error || !dblock) 304 goto fail; 305 BUG_ON(extlen < 1); 306 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); 307 } else { 308 error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); 309 if (error) 310 goto fail; 311 } 312 error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD); 313 if (error) { 314 brelse(bh); 315 goto fail; 316 } 317 dblock++; 318 extlen--; 319 memcpy(buf, bh->b_data + o, amount); 320 brelse(bh); 321 buf += (amount/sizeof(__be64)); 322 copied += amount; 323 lblock++; 324 o = sizeof(struct gfs2_meta_header); 325 } 326 327 return copied; 328 fail: 329 return (copied) ? copied : error; 330 } 331 332 /** 333 * gfs2_dir_get_hash_table - Get pointer to the dir hash table 334 * @ip: The inode in question 335 * 336 * Returns: The hash table or an error 337 */ 338 339 static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) 340 { 341 struct inode *inode = &ip->i_inode; 342 int ret; 343 u32 hsize; 344 __be64 *hc; 345 346 BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); 347 348 hc = ip->i_hash_cache; 349 if (hc) 350 return hc; 351 352 hsize = 1 << ip->i_depth; 353 hsize *= sizeof(__be64); 354 if (hsize != i_size_read(&ip->i_inode)) { 355 gfs2_consist_inode(ip); 356 return ERR_PTR(-EIO); 357 } 358 359 hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); 360 if (hc == NULL) 361 hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL); 362 363 if (hc == NULL) 364 return ERR_PTR(-ENOMEM); 365 366 ret = gfs2_dir_read_data(ip, hc, hsize); 367 if (ret < 0) { 368 kvfree(hc); 369 return ERR_PTR(ret); 370 } 371 372 spin_lock(&inode->i_lock); 373 if (likely(!ip->i_hash_cache)) { 374 ip->i_hash_cache = hc; 375 hc = NULL; 376 } 377 spin_unlock(&inode->i_lock); 378 kvfree(hc); 379 380 return ip->i_hash_cache; 381 } 382 383 /** 384 * gfs2_dir_hash_inval - Invalidate dir hash 385 * @ip: The directory inode 386 * 387 * Must be called with an exclusive glock, or during glock invalidation. 388 */ 389 void gfs2_dir_hash_inval(struct gfs2_inode *ip) 390 { 391 __be64 *hc = ip->i_hash_cache; 392 ip->i_hash_cache = NULL; 393 kvfree(hc); 394 } 395 396 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) 397 { 398 return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; 399 } 400 401 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, 402 const struct qstr *name, int ret) 403 { 404 if (!gfs2_dirent_sentinel(dent) && 405 be32_to_cpu(dent->de_hash) == name->hash && 406 be16_to_cpu(dent->de_name_len) == name->len && 407 memcmp(dent+1, name->name, name->len) == 0) 408 return ret; 409 return 0; 410 } 411 412 static int gfs2_dirent_find(const struct gfs2_dirent *dent, 413 const struct qstr *name, 414 void *opaque) 415 { 416 return __gfs2_dirent_find(dent, name, 1); 417 } 418 419 static int gfs2_dirent_prev(const struct gfs2_dirent *dent, 420 const struct qstr *name, 421 void *opaque) 422 { 423 return __gfs2_dirent_find(dent, name, 2); 424 } 425 426 /* 427 * name->name holds ptr to start of block. 428 * name->len holds size of block. 429 */ 430 static int gfs2_dirent_last(const struct gfs2_dirent *dent, 431 const struct qstr *name, 432 void *opaque) 433 { 434 const char *start = name->name; 435 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len); 436 if (name->len == (end - start)) 437 return 1; 438 return 0; 439 } 440 441 static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, 442 const struct qstr *name, 443 void *opaque) 444 { 445 unsigned required = GFS2_DIRENT_SIZE(name->len); 446 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 447 unsigned totlen = be16_to_cpu(dent->de_rec_len); 448 449 if (gfs2_dirent_sentinel(dent)) 450 actual = 0; 451 if (totlen - actual >= required) 452 return 1; 453 return 0; 454 } 455 456 struct dirent_gather { 457 const struct gfs2_dirent **pdent; 458 unsigned offset; 459 }; 460 461 static int gfs2_dirent_gather(const struct gfs2_dirent *dent, 462 const struct qstr *name, 463 void *opaque) 464 { 465 struct dirent_gather *g = opaque; 466 if (!gfs2_dirent_sentinel(dent)) { 467 g->pdent[g->offset++] = dent; 468 } 469 return 0; 470 } 471 472 /* 473 * Other possible things to check: 474 * - Inode located within filesystem size (and on valid block) 475 * - Valid directory entry type 476 * Not sure how heavy-weight we want to make this... could also check 477 * hash is correct for example, but that would take a lot of extra time. 478 * For now the most important thing is to check that the various sizes 479 * are correct. 480 */ 481 static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, 482 unsigned int size, unsigned int len, int first) 483 { 484 const char *msg = "gfs2_dirent too small"; 485 if (unlikely(size < sizeof(struct gfs2_dirent))) 486 goto error; 487 msg = "gfs2_dirent misaligned"; 488 if (unlikely(offset & 0x7)) 489 goto error; 490 msg = "gfs2_dirent points beyond end of block"; 491 if (unlikely(offset + size > len)) 492 goto error; 493 msg = "zero inode number"; 494 if (unlikely(!first && gfs2_dirent_sentinel(dent))) 495 goto error; 496 msg = "name length is greater than space in dirent"; 497 if (!gfs2_dirent_sentinel(dent) && 498 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > 499 size)) 500 goto error; 501 return 0; 502 error: 503 pr_warn("%s: %s (%s)\n", 504 __func__, msg, first ? "first in block" : "not first in block"); 505 return -EIO; 506 } 507 508 static int gfs2_dirent_offset(const void *buf) 509 { 510 const struct gfs2_meta_header *h = buf; 511 int offset; 512 513 BUG_ON(buf == NULL); 514 515 switch(be32_to_cpu(h->mh_type)) { 516 case GFS2_METATYPE_LF: 517 offset = sizeof(struct gfs2_leaf); 518 break; 519 case GFS2_METATYPE_DI: 520 offset = sizeof(struct gfs2_dinode); 521 break; 522 default: 523 goto wrong_type; 524 } 525 return offset; 526 wrong_type: 527 pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); 528 return -1; 529 } 530 531 static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, 532 unsigned int len, gfs2_dscan_t scan, 533 const struct qstr *name, 534 void *opaque) 535 { 536 struct gfs2_dirent *dent, *prev; 537 unsigned offset; 538 unsigned size; 539 int ret = 0; 540 541 ret = gfs2_dirent_offset(buf); 542 if (ret < 0) 543 goto consist_inode; 544 545 offset = ret; 546 prev = NULL; 547 dent = buf + offset; 548 size = be16_to_cpu(dent->de_rec_len); 549 if (gfs2_check_dirent(dent, offset, size, len, 1)) 550 goto consist_inode; 551 do { 552 ret = scan(dent, name, opaque); 553 if (ret) 554 break; 555 offset += size; 556 if (offset == len) 557 break; 558 prev = dent; 559 dent = buf + offset; 560 size = be16_to_cpu(dent->de_rec_len); 561 if (gfs2_check_dirent(dent, offset, size, len, 0)) 562 goto consist_inode; 563 } while(1); 564 565 switch(ret) { 566 case 0: 567 return NULL; 568 case 1: 569 return dent; 570 case 2: 571 return prev ? prev : dent; 572 default: 573 BUG_ON(ret > 0); 574 return ERR_PTR(ret); 575 } 576 577 consist_inode: 578 gfs2_consist_inode(GFS2_I(inode)); 579 return ERR_PTR(-EIO); 580 } 581 582 static int dirent_check_reclen(struct gfs2_inode *dip, 583 const struct gfs2_dirent *d, const void *end_p) 584 { 585 const void *ptr = d; 586 u16 rec_len = be16_to_cpu(d->de_rec_len); 587 588 if (unlikely(rec_len < sizeof(struct gfs2_dirent))) 589 goto broken; 590 ptr += rec_len; 591 if (ptr < end_p) 592 return rec_len; 593 if (ptr == end_p) 594 return -ENOENT; 595 broken: 596 gfs2_consist_inode(dip); 597 return -EIO; 598 } 599 600 /** 601 * dirent_next - Next dirent 602 * @dip: the directory 603 * @bh: The buffer 604 * @dent: Pointer to list of dirents 605 * 606 * Returns: 0 on success, error code otherwise 607 */ 608 609 static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, 610 struct gfs2_dirent **dent) 611 { 612 struct gfs2_dirent *cur = *dent, *tmp; 613 char *bh_end = bh->b_data + bh->b_size; 614 int ret; 615 616 ret = dirent_check_reclen(dip, cur, bh_end); 617 if (ret < 0) 618 return ret; 619 620 tmp = (void *)cur + ret; 621 ret = dirent_check_reclen(dip, tmp, bh_end); 622 if (ret == -EIO) 623 return ret; 624 625 /* Only the first dent could ever have de_inum.no_addr == 0 */ 626 if (gfs2_dirent_sentinel(tmp)) { 627 gfs2_consist_inode(dip); 628 return -EIO; 629 } 630 631 *dent = tmp; 632 return 0; 633 } 634 635 /** 636 * dirent_del - Delete a dirent 637 * @dip: The GFS2 inode 638 * @bh: The buffer 639 * @prev: The previous dirent 640 * @cur: The current dirent 641 * 642 */ 643 644 static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, 645 struct gfs2_dirent *prev, struct gfs2_dirent *cur) 646 { 647 u16 cur_rec_len, prev_rec_len; 648 649 if (gfs2_dirent_sentinel(cur)) { 650 gfs2_consist_inode(dip); 651 return; 652 } 653 654 gfs2_trans_add_meta(dip->i_gl, bh); 655 656 /* If there is no prev entry, this is the first entry in the block. 657 The de_rec_len is already as big as it needs to be. Just zero 658 out the inode number and return. */ 659 660 if (!prev) { 661 cur->de_inum.no_addr = 0; 662 cur->de_inum.no_formal_ino = 0; 663 return; 664 } 665 666 /* Combine this dentry with the previous one. */ 667 668 prev_rec_len = be16_to_cpu(prev->de_rec_len); 669 cur_rec_len = be16_to_cpu(cur->de_rec_len); 670 671 if ((char *)prev + prev_rec_len != (char *)cur) 672 gfs2_consist_inode(dip); 673 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) 674 gfs2_consist_inode(dip); 675 676 prev_rec_len += cur_rec_len; 677 prev->de_rec_len = cpu_to_be16(prev_rec_len); 678 } 679 680 /* 681 * Takes a dent from which to grab space as an argument. Returns the 682 * newly created dent. 683 */ 684 static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, 685 struct gfs2_dirent *dent, 686 const struct qstr *name, 687 struct buffer_head *bh) 688 { 689 struct gfs2_inode *ip = GFS2_I(inode); 690 struct gfs2_dirent *ndent; 691 unsigned offset = 0, totlen; 692 693 if (!gfs2_dirent_sentinel(dent)) 694 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 695 totlen = be16_to_cpu(dent->de_rec_len); 696 BUG_ON(offset + name->len > totlen); 697 gfs2_trans_add_meta(ip->i_gl, bh); 698 ndent = (struct gfs2_dirent *)((char *)dent + offset); 699 dent->de_rec_len = cpu_to_be16(offset); 700 gfs2_qstr2dirent(name, totlen - offset, ndent); 701 return ndent; 702 } 703 704 static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, 705 struct buffer_head *bh, 706 const struct qstr *name) 707 { 708 struct gfs2_dirent *dent; 709 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 710 gfs2_dirent_find_space, name, NULL); 711 if (!dent || IS_ERR(dent)) 712 return dent; 713 return gfs2_init_dirent(inode, dent, name, bh); 714 } 715 716 static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, 717 struct buffer_head **bhp) 718 { 719 int error; 720 721 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); 722 if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { 723 /* pr_info("block num=%llu\n", leaf_no); */ 724 error = -EIO; 725 } 726 727 return error; 728 } 729 730 /** 731 * get_leaf_nr - Get a leaf number associated with the index 732 * @dip: The GFS2 inode 733 * @index: 734 * @leaf_out: 735 * 736 * Returns: 0 on success, error code otherwise 737 */ 738 739 static int get_leaf_nr(struct gfs2_inode *dip, u32 index, 740 u64 *leaf_out) 741 { 742 __be64 *hash; 743 744 hash = gfs2_dir_get_hash_table(dip); 745 if (IS_ERR(hash)) 746 return PTR_ERR(hash); 747 *leaf_out = be64_to_cpu(*(hash + index)); 748 return 0; 749 } 750 751 static int get_first_leaf(struct gfs2_inode *dip, u32 index, 752 struct buffer_head **bh_out) 753 { 754 u64 leaf_no; 755 int error; 756 757 error = get_leaf_nr(dip, index, &leaf_no); 758 if (!error) 759 error = get_leaf(dip, leaf_no, bh_out); 760 761 return error; 762 } 763 764 static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, 765 const struct qstr *name, 766 gfs2_dscan_t scan, 767 struct buffer_head **pbh) 768 { 769 struct buffer_head *bh; 770 struct gfs2_dirent *dent; 771 struct gfs2_inode *ip = GFS2_I(inode); 772 int error; 773 774 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 775 struct gfs2_leaf *leaf; 776 unsigned hsize = 1 << ip->i_depth; 777 unsigned index; 778 u64 ln; 779 if (hsize * sizeof(u64) != i_size_read(inode)) { 780 gfs2_consist_inode(ip); 781 return ERR_PTR(-EIO); 782 } 783 784 index = name->hash >> (32 - ip->i_depth); 785 error = get_first_leaf(ip, index, &bh); 786 if (error) 787 return ERR_PTR(error); 788 do { 789 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 790 scan, name, NULL); 791 if (dent) 792 goto got_dent; 793 leaf = (struct gfs2_leaf *)bh->b_data; 794 ln = be64_to_cpu(leaf->lf_next); 795 brelse(bh); 796 if (!ln) 797 break; 798 799 error = get_leaf(ip, ln, &bh); 800 } while(!error); 801 802 return error ? ERR_PTR(error) : NULL; 803 } 804 805 806 error = gfs2_meta_inode_buffer(ip, &bh); 807 if (error) 808 return ERR_PTR(error); 809 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); 810 got_dent: 811 if (unlikely(dent == NULL || IS_ERR(dent))) { 812 brelse(bh); 813 bh = NULL; 814 } 815 *pbh = bh; 816 return dent; 817 } 818 819 static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) 820 { 821 struct gfs2_inode *ip = GFS2_I(inode); 822 unsigned int n = 1; 823 u64 bn; 824 int error; 825 struct buffer_head *bh; 826 struct gfs2_leaf *leaf; 827 struct gfs2_dirent *dent; 828 struct qstr name = { .name = "" }; 829 struct timespec tv = CURRENT_TIME; 830 831 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); 832 if (error) 833 return NULL; 834 bh = gfs2_meta_new(ip->i_gl, bn); 835 if (!bh) 836 return NULL; 837 838 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 839 gfs2_trans_add_meta(ip->i_gl, bh); 840 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 841 leaf = (struct gfs2_leaf *)bh->b_data; 842 leaf->lf_depth = cpu_to_be16(depth); 843 leaf->lf_entries = 0; 844 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); 845 leaf->lf_next = 0; 846 leaf->lf_inode = cpu_to_be64(ip->i_no_addr); 847 leaf->lf_dist = cpu_to_be32(1); 848 leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); 849 leaf->lf_sec = cpu_to_be64(tv.tv_sec); 850 memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); 851 dent = (struct gfs2_dirent *)(leaf+1); 852 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); 853 *pbh = bh; 854 return leaf; 855 } 856 857 /** 858 * dir_make_exhash - Convert a stuffed directory into an ExHash directory 859 * @dip: The GFS2 inode 860 * 861 * Returns: 0 on success, error code otherwise 862 */ 863 864 static int dir_make_exhash(struct inode *inode) 865 { 866 struct gfs2_inode *dip = GFS2_I(inode); 867 struct gfs2_sbd *sdp = GFS2_SB(inode); 868 struct gfs2_dirent *dent; 869 struct qstr args; 870 struct buffer_head *bh, *dibh; 871 struct gfs2_leaf *leaf; 872 int y; 873 u32 x; 874 __be64 *lp; 875 u64 bn; 876 int error; 877 878 error = gfs2_meta_inode_buffer(dip, &dibh); 879 if (error) 880 return error; 881 882 /* Turn over a new leaf */ 883 884 leaf = new_leaf(inode, &bh, 0); 885 if (!leaf) 886 return -ENOSPC; 887 bn = bh->b_blocknr; 888 889 gfs2_assert(sdp, dip->i_entries < (1 << 16)); 890 leaf->lf_entries = cpu_to_be16(dip->i_entries); 891 892 /* Copy dirents */ 893 894 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, 895 sizeof(struct gfs2_dinode)); 896 897 /* Find last entry */ 898 899 x = 0; 900 args.len = bh->b_size - sizeof(struct gfs2_dinode) + 901 sizeof(struct gfs2_leaf); 902 args.name = bh->b_data; 903 dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size, 904 gfs2_dirent_last, &args, NULL); 905 if (!dent) { 906 brelse(bh); 907 brelse(dibh); 908 return -EIO; 909 } 910 if (IS_ERR(dent)) { 911 brelse(bh); 912 brelse(dibh); 913 return PTR_ERR(dent); 914 } 915 916 /* Adjust the last dirent's record length 917 (Remember that dent still points to the last entry.) */ 918 919 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) + 920 sizeof(struct gfs2_dinode) - 921 sizeof(struct gfs2_leaf)); 922 923 brelse(bh); 924 925 /* We're done with the new leaf block, now setup the new 926 hash table. */ 927 928 gfs2_trans_add_meta(dip->i_gl, dibh); 929 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 930 931 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 932 933 for (x = sdp->sd_hash_ptrs; x--; lp++) 934 *lp = cpu_to_be64(bn); 935 936 i_size_write(inode, sdp->sd_sb.sb_bsize / 2); 937 gfs2_add_inode_blocks(&dip->i_inode, 1); 938 dip->i_diskflags |= GFS2_DIF_EXHASH; 939 940 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; 941 dip->i_depth = y; 942 943 gfs2_dinode_out(dip, dibh->b_data); 944 945 brelse(dibh); 946 947 return 0; 948 } 949 950 /** 951 * dir_split_leaf - Split a leaf block into two 952 * @dip: The GFS2 inode 953 * @index: 954 * @leaf_no: 955 * 956 * Returns: 0 on success, error code on failure 957 */ 958 959 static int dir_split_leaf(struct inode *inode, const struct qstr *name) 960 { 961 struct gfs2_inode *dip = GFS2_I(inode); 962 struct buffer_head *nbh, *obh, *dibh; 963 struct gfs2_leaf *nleaf, *oleaf; 964 struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; 965 u32 start, len, half_len, divider; 966 u64 bn, leaf_no; 967 __be64 *lp; 968 u32 index; 969 int x, moved = 0; 970 int error; 971 972 index = name->hash >> (32 - dip->i_depth); 973 error = get_leaf_nr(dip, index, &leaf_no); 974 if (error) 975 return error; 976 977 /* Get the old leaf block */ 978 error = get_leaf(dip, leaf_no, &obh); 979 if (error) 980 return error; 981 982 oleaf = (struct gfs2_leaf *)obh->b_data; 983 if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) { 984 brelse(obh); 985 return 1; /* can't split */ 986 } 987 988 gfs2_trans_add_meta(dip->i_gl, obh); 989 990 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 991 if (!nleaf) { 992 brelse(obh); 993 return -ENOSPC; 994 } 995 bn = nbh->b_blocknr; 996 997 /* Compute the start and len of leaf pointers in the hash table. */ 998 len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); 999 half_len = len >> 1; 1000 if (!half_len) { 1001 pr_warn("i_depth %u lf_depth %u index %u\n", 1002 dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); 1003 gfs2_consist_inode(dip); 1004 error = -EIO; 1005 goto fail_brelse; 1006 } 1007 1008 start = (index & ~(len - 1)); 1009 1010 /* Change the pointers. 1011 Don't bother distinguishing stuffed from non-stuffed. 1012 This code is complicated enough already. */ 1013 lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS); 1014 if (!lp) { 1015 error = -ENOMEM; 1016 goto fail_brelse; 1017 } 1018 1019 /* Change the pointers */ 1020 for (x = 0; x < half_len; x++) 1021 lp[x] = cpu_to_be64(bn); 1022 1023 gfs2_dir_hash_inval(dip); 1024 1025 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), 1026 half_len * sizeof(u64)); 1027 if (error != half_len * sizeof(u64)) { 1028 if (error >= 0) 1029 error = -EIO; 1030 goto fail_lpfree; 1031 } 1032 1033 kfree(lp); 1034 1035 /* Compute the divider */ 1036 divider = (start + half_len) << (32 - dip->i_depth); 1037 1038 /* Copy the entries */ 1039 dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); 1040 1041 do { 1042 next = dent; 1043 if (dirent_next(dip, obh, &next)) 1044 next = NULL; 1045 1046 if (!gfs2_dirent_sentinel(dent) && 1047 be32_to_cpu(dent->de_hash) < divider) { 1048 struct qstr str; 1049 str.name = (char*)(dent+1); 1050 str.len = be16_to_cpu(dent->de_name_len); 1051 str.hash = be32_to_cpu(dent->de_hash); 1052 new = gfs2_dirent_alloc(inode, nbh, &str); 1053 if (IS_ERR(new)) { 1054 error = PTR_ERR(new); 1055 break; 1056 } 1057 1058 new->de_inum = dent->de_inum; /* No endian worries */ 1059 new->de_type = dent->de_type; /* No endian worries */ 1060 be16_add_cpu(&nleaf->lf_entries, 1); 1061 1062 dirent_del(dip, obh, prev, dent); 1063 1064 if (!oleaf->lf_entries) 1065 gfs2_consist_inode(dip); 1066 be16_add_cpu(&oleaf->lf_entries, -1); 1067 1068 if (!prev) 1069 prev = dent; 1070 1071 moved = 1; 1072 } else { 1073 prev = dent; 1074 } 1075 dent = next; 1076 } while (dent); 1077 1078 oleaf->lf_depth = nleaf->lf_depth; 1079 1080 error = gfs2_meta_inode_buffer(dip, &dibh); 1081 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1082 gfs2_trans_add_meta(dip->i_gl, dibh); 1083 gfs2_add_inode_blocks(&dip->i_inode, 1); 1084 gfs2_dinode_out(dip, dibh->b_data); 1085 brelse(dibh); 1086 } 1087 1088 brelse(obh); 1089 brelse(nbh); 1090 1091 return error; 1092 1093 fail_lpfree: 1094 kfree(lp); 1095 1096 fail_brelse: 1097 brelse(obh); 1098 brelse(nbh); 1099 return error; 1100 } 1101 1102 /** 1103 * dir_double_exhash - Double size of ExHash table 1104 * @dip: The GFS2 dinode 1105 * 1106 * Returns: 0 on success, error code on failure 1107 */ 1108 1109 static int dir_double_exhash(struct gfs2_inode *dip) 1110 { 1111 struct buffer_head *dibh; 1112 u32 hsize; 1113 u32 hsize_bytes; 1114 __be64 *hc; 1115 __be64 *hc2, *h; 1116 int x; 1117 int error = 0; 1118 1119 hsize = 1 << dip->i_depth; 1120 hsize_bytes = hsize * sizeof(__be64); 1121 1122 hc = gfs2_dir_get_hash_table(dip); 1123 if (IS_ERR(hc)) 1124 return PTR_ERR(hc); 1125 1126 hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); 1127 if (hc2 == NULL) 1128 hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); 1129 1130 if (!hc2) 1131 return -ENOMEM; 1132 1133 h = hc2; 1134 error = gfs2_meta_inode_buffer(dip, &dibh); 1135 if (error) 1136 goto out_kfree; 1137 1138 for (x = 0; x < hsize; x++) { 1139 *h++ = *hc; 1140 *h++ = *hc; 1141 hc++; 1142 } 1143 1144 error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); 1145 if (error != (hsize_bytes * 2)) 1146 goto fail; 1147 1148 gfs2_dir_hash_inval(dip); 1149 dip->i_hash_cache = hc2; 1150 dip->i_depth++; 1151 gfs2_dinode_out(dip, dibh->b_data); 1152 brelse(dibh); 1153 return 0; 1154 1155 fail: 1156 /* Replace original hash table & size */ 1157 gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); 1158 i_size_write(&dip->i_inode, hsize_bytes); 1159 gfs2_dinode_out(dip, dibh->b_data); 1160 brelse(dibh); 1161 out_kfree: 1162 kvfree(hc2); 1163 return error; 1164 } 1165 1166 /** 1167 * compare_dents - compare directory entries by hash value 1168 * @a: first dent 1169 * @b: second dent 1170 * 1171 * When comparing the hash entries of @a to @b: 1172 * gt: returns 1 1173 * lt: returns -1 1174 * eq: returns 0 1175 */ 1176 1177 static int compare_dents(const void *a, const void *b) 1178 { 1179 const struct gfs2_dirent *dent_a, *dent_b; 1180 u32 hash_a, hash_b; 1181 int ret = 0; 1182 1183 dent_a = *(const struct gfs2_dirent **)a; 1184 hash_a = be32_to_cpu(dent_a->de_hash); 1185 1186 dent_b = *(const struct gfs2_dirent **)b; 1187 hash_b = be32_to_cpu(dent_b->de_hash); 1188 1189 if (hash_a > hash_b) 1190 ret = 1; 1191 else if (hash_a < hash_b) 1192 ret = -1; 1193 else { 1194 unsigned int len_a = be16_to_cpu(dent_a->de_name_len); 1195 unsigned int len_b = be16_to_cpu(dent_b->de_name_len); 1196 1197 if (len_a > len_b) 1198 ret = 1; 1199 else if (len_a < len_b) 1200 ret = -1; 1201 else 1202 ret = memcmp(dent_a + 1, dent_b + 1, len_a); 1203 } 1204 1205 return ret; 1206 } 1207 1208 /** 1209 * do_filldir_main - read out directory entries 1210 * @dip: The GFS2 inode 1211 * @ctx: what to feed the entries to 1212 * @darr: an array of struct gfs2_dirent pointers to read 1213 * @entries: the number of entries in darr 1214 * @copied: pointer to int that's non-zero if a entry has been copied out 1215 * 1216 * Jump through some hoops to make sure that if there are hash collsions, 1217 * they are read out at the beginning of a buffer. We want to minimize 1218 * the possibility that they will fall into different readdir buffers or 1219 * that someone will want to seek to that location. 1220 * 1221 * Returns: errno, >0 if the actor tells you to stop 1222 */ 1223 1224 static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, 1225 const struct gfs2_dirent **darr, u32 entries, 1226 int *copied) 1227 { 1228 const struct gfs2_dirent *dent, *dent_next; 1229 u64 off, off_next; 1230 unsigned int x, y; 1231 int run = 0; 1232 1233 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); 1234 1235 dent_next = darr[0]; 1236 off_next = be32_to_cpu(dent_next->de_hash); 1237 off_next = gfs2_disk_hash2offset(off_next); 1238 1239 for (x = 0, y = 1; x < entries; x++, y++) { 1240 dent = dent_next; 1241 off = off_next; 1242 1243 if (y < entries) { 1244 dent_next = darr[y]; 1245 off_next = be32_to_cpu(dent_next->de_hash); 1246 off_next = gfs2_disk_hash2offset(off_next); 1247 1248 if (off < ctx->pos) 1249 continue; 1250 ctx->pos = off; 1251 1252 if (off_next == off) { 1253 if (*copied && !run) 1254 return 1; 1255 run = 1; 1256 } else 1257 run = 0; 1258 } else { 1259 if (off < ctx->pos) 1260 continue; 1261 ctx->pos = off; 1262 } 1263 1264 if (!dir_emit(ctx, (const char *)(dent + 1), 1265 be16_to_cpu(dent->de_name_len), 1266 be64_to_cpu(dent->de_inum.no_addr), 1267 be16_to_cpu(dent->de_type))) 1268 return 1; 1269 1270 *copied = 1; 1271 } 1272 1273 /* Increment the ctx->pos by one, so the next time we come into the 1274 do_filldir fxn, we get the next entry instead of the last one in the 1275 current leaf */ 1276 1277 ctx->pos++; 1278 1279 return 0; 1280 } 1281 1282 static void *gfs2_alloc_sort_buffer(unsigned size) 1283 { 1284 void *ptr = NULL; 1285 1286 if (size < KMALLOC_MAX_SIZE) 1287 ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); 1288 if (!ptr) 1289 ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL); 1290 return ptr; 1291 } 1292 1293 static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, 1294 int *copied, unsigned *depth, 1295 u64 leaf_no) 1296 { 1297 struct gfs2_inode *ip = GFS2_I(inode); 1298 struct gfs2_sbd *sdp = GFS2_SB(inode); 1299 struct buffer_head *bh; 1300 struct gfs2_leaf *lf; 1301 unsigned entries = 0, entries2 = 0; 1302 unsigned leaves = 0; 1303 const struct gfs2_dirent **darr, *dent; 1304 struct dirent_gather g; 1305 struct buffer_head **larr; 1306 int leaf = 0; 1307 int error, i; 1308 u64 lfn = leaf_no; 1309 1310 do { 1311 error = get_leaf(ip, lfn, &bh); 1312 if (error) 1313 goto out; 1314 lf = (struct gfs2_leaf *)bh->b_data; 1315 if (leaves == 0) 1316 *depth = be16_to_cpu(lf->lf_depth); 1317 entries += be16_to_cpu(lf->lf_entries); 1318 leaves++; 1319 lfn = be64_to_cpu(lf->lf_next); 1320 brelse(bh); 1321 } while(lfn); 1322 1323 if (!entries) 1324 return 0; 1325 1326 error = -ENOMEM; 1327 /* 1328 * The extra 99 entries are not normally used, but are a buffer 1329 * zone in case the number of entries in the leaf is corrupt. 1330 * 99 is the maximum number of entries that can fit in a single 1331 * leaf block. 1332 */ 1333 larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); 1334 if (!larr) 1335 goto out; 1336 darr = (const struct gfs2_dirent **)(larr + leaves); 1337 g.pdent = darr; 1338 g.offset = 0; 1339 lfn = leaf_no; 1340 1341 do { 1342 error = get_leaf(ip, lfn, &bh); 1343 if (error) 1344 goto out_free; 1345 lf = (struct gfs2_leaf *)bh->b_data; 1346 lfn = be64_to_cpu(lf->lf_next); 1347 if (lf->lf_entries) { 1348 entries2 += be16_to_cpu(lf->lf_entries); 1349 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 1350 gfs2_dirent_gather, NULL, &g); 1351 error = PTR_ERR(dent); 1352 if (IS_ERR(dent)) 1353 goto out_free; 1354 if (entries2 != g.offset) { 1355 fs_warn(sdp, "Number of entries corrupt in dir " 1356 "leaf %llu, entries2 (%u) != " 1357 "g.offset (%u)\n", 1358 (unsigned long long)bh->b_blocknr, 1359 entries2, g.offset); 1360 1361 error = -EIO; 1362 goto out_free; 1363 } 1364 error = 0; 1365 larr[leaf++] = bh; 1366 } else { 1367 brelse(bh); 1368 } 1369 } while(lfn); 1370 1371 BUG_ON(entries2 != entries); 1372 error = do_filldir_main(ip, ctx, darr, entries, copied); 1373 out_free: 1374 for(i = 0; i < leaf; i++) 1375 brelse(larr[i]); 1376 kvfree(larr); 1377 out: 1378 return error; 1379 } 1380 1381 /** 1382 * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks. 1383 * 1384 * Note: we can't calculate each index like dir_e_read can because we don't 1385 * have the leaf, and therefore we don't have the depth, and therefore we 1386 * don't have the length. So we have to just read enough ahead to make up 1387 * for the loss of information. 1388 */ 1389 static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, 1390 struct file_ra_state *f_ra) 1391 { 1392 struct gfs2_inode *ip = GFS2_I(inode); 1393 struct gfs2_glock *gl = ip->i_gl; 1394 struct buffer_head *bh; 1395 u64 blocknr = 0, last; 1396 unsigned count; 1397 1398 /* First check if we've already read-ahead for the whole range. */ 1399 if (index + MAX_RA_BLOCKS < f_ra->start) 1400 return; 1401 1402 f_ra->start = max((pgoff_t)index, f_ra->start); 1403 for (count = 0; count < MAX_RA_BLOCKS; count++) { 1404 if (f_ra->start >= hsize) /* if exceeded the hash table */ 1405 break; 1406 1407 last = blocknr; 1408 blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]); 1409 f_ra->start++; 1410 if (blocknr == last) 1411 continue; 1412 1413 bh = gfs2_getbuf(gl, blocknr, 1); 1414 if (trylock_buffer(bh)) { 1415 if (buffer_uptodate(bh)) { 1416 unlock_buffer(bh); 1417 brelse(bh); 1418 continue; 1419 } 1420 bh->b_end_io = end_buffer_read_sync; 1421 submit_bh(READA | REQ_META, bh); 1422 continue; 1423 } 1424 brelse(bh); 1425 } 1426 } 1427 1428 /** 1429 * dir_e_read - Reads the entries from a directory into a filldir buffer 1430 * @dip: dinode pointer 1431 * @ctx: actor to feed the entries to 1432 * 1433 * Returns: errno 1434 */ 1435 1436 static int dir_e_read(struct inode *inode, struct dir_context *ctx, 1437 struct file_ra_state *f_ra) 1438 { 1439 struct gfs2_inode *dip = GFS2_I(inode); 1440 u32 hsize, len = 0; 1441 u32 hash, index; 1442 __be64 *lp; 1443 int copied = 0; 1444 int error = 0; 1445 unsigned depth = 0; 1446 1447 hsize = 1 << dip->i_depth; 1448 hash = gfs2_dir_offset2hash(ctx->pos); 1449 index = hash >> (32 - dip->i_depth); 1450 1451 if (dip->i_hash_cache == NULL) 1452 f_ra->start = 0; 1453 lp = gfs2_dir_get_hash_table(dip); 1454 if (IS_ERR(lp)) 1455 return PTR_ERR(lp); 1456 1457 gfs2_dir_readahead(inode, hsize, index, f_ra); 1458 1459 while (index < hsize) { 1460 error = gfs2_dir_read_leaf(inode, ctx, 1461 &copied, &depth, 1462 be64_to_cpu(lp[index])); 1463 if (error) 1464 break; 1465 1466 len = 1 << (dip->i_depth - depth); 1467 index = (index & ~(len - 1)) + len; 1468 } 1469 1470 if (error > 0) 1471 error = 0; 1472 return error; 1473 } 1474 1475 int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, 1476 struct file_ra_state *f_ra) 1477 { 1478 struct gfs2_inode *dip = GFS2_I(inode); 1479 struct gfs2_sbd *sdp = GFS2_SB(inode); 1480 struct dirent_gather g; 1481 const struct gfs2_dirent **darr, *dent; 1482 struct buffer_head *dibh; 1483 int copied = 0; 1484 int error; 1485 1486 if (!dip->i_entries) 1487 return 0; 1488 1489 if (dip->i_diskflags & GFS2_DIF_EXHASH) 1490 return dir_e_read(inode, ctx, f_ra); 1491 1492 if (!gfs2_is_stuffed(dip)) { 1493 gfs2_consist_inode(dip); 1494 return -EIO; 1495 } 1496 1497 error = gfs2_meta_inode_buffer(dip, &dibh); 1498 if (error) 1499 return error; 1500 1501 error = -ENOMEM; 1502 /* 96 is max number of dirents which can be stuffed into an inode */ 1503 darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS); 1504 if (darr) { 1505 g.pdent = darr; 1506 g.offset = 0; 1507 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, 1508 gfs2_dirent_gather, NULL, &g); 1509 if (IS_ERR(dent)) { 1510 error = PTR_ERR(dent); 1511 goto out; 1512 } 1513 if (dip->i_entries != g.offset) { 1514 fs_warn(sdp, "Number of entries corrupt in dir %llu, " 1515 "ip->i_entries (%u) != g.offset (%u)\n", 1516 (unsigned long long)dip->i_no_addr, 1517 dip->i_entries, 1518 g.offset); 1519 error = -EIO; 1520 goto out; 1521 } 1522 error = do_filldir_main(dip, ctx, darr, 1523 dip->i_entries, &copied); 1524 out: 1525 kfree(darr); 1526 } 1527 1528 if (error > 0) 1529 error = 0; 1530 1531 brelse(dibh); 1532 1533 return error; 1534 } 1535 1536 /** 1537 * gfs2_dir_search - Search a directory 1538 * @dip: The GFS2 dir inode 1539 * @name: The name we are looking up 1540 * @fail_on_exist: Fail if the name exists rather than looking it up 1541 * 1542 * This routine searches a directory for a file or another directory. 1543 * Assumes a glock is held on dip. 1544 * 1545 * Returns: errno 1546 */ 1547 1548 struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, 1549 bool fail_on_exist) 1550 { 1551 struct buffer_head *bh; 1552 struct gfs2_dirent *dent; 1553 u64 addr, formal_ino; 1554 u16 dtype; 1555 1556 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1557 if (dent) { 1558 if (IS_ERR(dent)) 1559 return ERR_CAST(dent); 1560 dtype = be16_to_cpu(dent->de_type); 1561 addr = be64_to_cpu(dent->de_inum.no_addr); 1562 formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); 1563 brelse(bh); 1564 if (fail_on_exist) 1565 return ERR_PTR(-EEXIST); 1566 return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); 1567 } 1568 return ERR_PTR(-ENOENT); 1569 } 1570 1571 int gfs2_dir_check(struct inode *dir, const struct qstr *name, 1572 const struct gfs2_inode *ip) 1573 { 1574 struct buffer_head *bh; 1575 struct gfs2_dirent *dent; 1576 int ret = -ENOENT; 1577 1578 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1579 if (dent) { 1580 if (IS_ERR(dent)) 1581 return PTR_ERR(dent); 1582 if (ip) { 1583 if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr) 1584 goto out; 1585 if (be64_to_cpu(dent->de_inum.no_formal_ino) != 1586 ip->i_no_formal_ino) 1587 goto out; 1588 if (unlikely(IF2DT(ip->i_inode.i_mode) != 1589 be16_to_cpu(dent->de_type))) { 1590 gfs2_consist_inode(GFS2_I(dir)); 1591 ret = -EIO; 1592 goto out; 1593 } 1594 } 1595 ret = 0; 1596 out: 1597 brelse(bh); 1598 } 1599 return ret; 1600 } 1601 1602 /** 1603 * dir_new_leaf - Add a new leaf onto hash chain 1604 * @inode: The directory 1605 * @name: The name we are adding 1606 * 1607 * This adds a new dir leaf onto an existing leaf when there is not 1608 * enough space to add a new dir entry. This is a last resort after 1609 * we've expanded the hash table to max size and also split existing 1610 * leaf blocks, so it will only occur for very large directories. 1611 * 1612 * The dist parameter is set to 1 for leaf blocks directly attached 1613 * to the hash table, 2 for one layer of indirection, 3 for two layers 1614 * etc. We are thus able to tell the difference between an old leaf 1615 * with dist set to zero (i.e. "don't know") and a new one where we 1616 * set this information for debug/fsck purposes. 1617 * 1618 * Returns: 0 on success, or -ve on error 1619 */ 1620 1621 static int dir_new_leaf(struct inode *inode, const struct qstr *name) 1622 { 1623 struct buffer_head *bh, *obh; 1624 struct gfs2_inode *ip = GFS2_I(inode); 1625 struct gfs2_leaf *leaf, *oleaf; 1626 u32 dist = 1; 1627 int error; 1628 u32 index; 1629 u64 bn; 1630 1631 index = name->hash >> (32 - ip->i_depth); 1632 error = get_first_leaf(ip, index, &obh); 1633 if (error) 1634 return error; 1635 do { 1636 dist++; 1637 oleaf = (struct gfs2_leaf *)obh->b_data; 1638 bn = be64_to_cpu(oleaf->lf_next); 1639 if (!bn) 1640 break; 1641 brelse(obh); 1642 error = get_leaf(ip, bn, &obh); 1643 if (error) 1644 return error; 1645 } while(1); 1646 1647 gfs2_trans_add_meta(ip->i_gl, obh); 1648 1649 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1650 if (!leaf) { 1651 brelse(obh); 1652 return -ENOSPC; 1653 } 1654 leaf->lf_dist = cpu_to_be32(dist); 1655 oleaf->lf_next = cpu_to_be64(bh->b_blocknr); 1656 brelse(bh); 1657 brelse(obh); 1658 1659 error = gfs2_meta_inode_buffer(ip, &bh); 1660 if (error) 1661 return error; 1662 gfs2_trans_add_meta(ip->i_gl, bh); 1663 gfs2_add_inode_blocks(&ip->i_inode, 1); 1664 gfs2_dinode_out(ip, bh->b_data); 1665 brelse(bh); 1666 return 0; 1667 } 1668 1669 static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) 1670 { 1671 u64 where = ip->i_no_addr + 1; 1672 if (ip->i_eattr == where) 1673 return 1; 1674 return 0; 1675 } 1676 1677 /** 1678 * gfs2_dir_add - Add new filename into directory 1679 * @inode: The directory inode 1680 * @name: The new name 1681 * @nip: The GFS2 inode to be linked in to the directory 1682 * @da: The directory addition info 1683 * 1684 * If the call to gfs2_diradd_alloc_required resulted in there being 1685 * no need to allocate any new directory blocks, then it will contain 1686 * a pointer to the directory entry and the bh in which it resides. We 1687 * can use that without having to repeat the search. If there was no 1688 * free space, then we must now create more space. 1689 * 1690 * Returns: 0 on success, error code on failure 1691 */ 1692 1693 int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1694 const struct gfs2_inode *nip, struct gfs2_diradd *da) 1695 { 1696 struct gfs2_inode *ip = GFS2_I(inode); 1697 struct buffer_head *bh = da->bh; 1698 struct gfs2_dirent *dent = da->dent; 1699 struct timespec tv; 1700 struct gfs2_leaf *leaf; 1701 int error; 1702 1703 while(1) { 1704 if (da->bh == NULL) { 1705 dent = gfs2_dirent_search(inode, name, 1706 gfs2_dirent_find_space, &bh); 1707 } 1708 if (dent) { 1709 if (IS_ERR(dent)) 1710 return PTR_ERR(dent); 1711 dent = gfs2_init_dirent(inode, dent, name, bh); 1712 gfs2_inum_out(nip, dent); 1713 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); 1714 dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); 1715 tv = CURRENT_TIME; 1716 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1717 leaf = (struct gfs2_leaf *)bh->b_data; 1718 be16_add_cpu(&leaf->lf_entries, 1); 1719 leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); 1720 leaf->lf_sec = cpu_to_be64(tv.tv_sec); 1721 } 1722 da->dent = NULL; 1723 da->bh = NULL; 1724 brelse(bh); 1725 ip->i_entries++; 1726 ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; 1727 if (S_ISDIR(nip->i_inode.i_mode)) 1728 inc_nlink(&ip->i_inode); 1729 mark_inode_dirty(inode); 1730 error = 0; 1731 break; 1732 } 1733 if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) { 1734 error = dir_make_exhash(inode); 1735 if (error) 1736 break; 1737 continue; 1738 } 1739 error = dir_split_leaf(inode, name); 1740 if (error == 0) 1741 continue; 1742 if (error < 0) 1743 break; 1744 if (ip->i_depth < GFS2_DIR_MAX_DEPTH) { 1745 error = dir_double_exhash(ip); 1746 if (error) 1747 break; 1748 error = dir_split_leaf(inode, name); 1749 if (error < 0) 1750 break; 1751 if (error == 0) 1752 continue; 1753 } 1754 error = dir_new_leaf(inode, name); 1755 if (!error) 1756 continue; 1757 error = -ENOSPC; 1758 break; 1759 } 1760 return error; 1761 } 1762 1763 1764 /** 1765 * gfs2_dir_del - Delete a directory entry 1766 * @dip: The GFS2 inode 1767 * @filename: The filename 1768 * 1769 * Returns: 0 on success, error code on failure 1770 */ 1771 1772 int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) 1773 { 1774 const struct qstr *name = &dentry->d_name; 1775 struct gfs2_dirent *dent, *prev = NULL; 1776 struct buffer_head *bh; 1777 struct timespec tv = CURRENT_TIME; 1778 1779 /* Returns _either_ the entry (if its first in block) or the 1780 previous entry otherwise */ 1781 dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); 1782 if (!dent) { 1783 gfs2_consist_inode(dip); 1784 return -EIO; 1785 } 1786 if (IS_ERR(dent)) { 1787 gfs2_consist_inode(dip); 1788 return PTR_ERR(dent); 1789 } 1790 /* If not first in block, adjust pointers accordingly */ 1791 if (gfs2_dirent_find(dent, name, NULL) == 0) { 1792 prev = dent; 1793 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len)); 1794 } 1795 1796 dirent_del(dip, bh, prev, dent); 1797 if (dip->i_diskflags & GFS2_DIF_EXHASH) { 1798 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; 1799 u16 entries = be16_to_cpu(leaf->lf_entries); 1800 if (!entries) 1801 gfs2_consist_inode(dip); 1802 leaf->lf_entries = cpu_to_be16(--entries); 1803 leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); 1804 leaf->lf_sec = cpu_to_be64(tv.tv_sec); 1805 } 1806 brelse(bh); 1807 1808 if (!dip->i_entries) 1809 gfs2_consist_inode(dip); 1810 dip->i_entries--; 1811 dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; 1812 if (d_is_dir(dentry)) 1813 drop_nlink(&dip->i_inode); 1814 mark_inode_dirty(&dip->i_inode); 1815 1816 return 0; 1817 } 1818 1819 /** 1820 * gfs2_dir_mvino - Change inode number of directory entry 1821 * @dip: The GFS2 inode 1822 * @filename: 1823 * @new_inode: 1824 * 1825 * This routine changes the inode number of a directory entry. It's used 1826 * by rename to change ".." when a directory is moved. 1827 * Assumes a glock is held on dvp. 1828 * 1829 * Returns: errno 1830 */ 1831 1832 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 1833 const struct gfs2_inode *nip, unsigned int new_type) 1834 { 1835 struct buffer_head *bh; 1836 struct gfs2_dirent *dent; 1837 int error; 1838 1839 dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh); 1840 if (!dent) { 1841 gfs2_consist_inode(dip); 1842 return -EIO; 1843 } 1844 if (IS_ERR(dent)) 1845 return PTR_ERR(dent); 1846 1847 gfs2_trans_add_meta(dip->i_gl, bh); 1848 gfs2_inum_out(nip, dent); 1849 dent->de_type = cpu_to_be16(new_type); 1850 1851 if (dip->i_diskflags & GFS2_DIF_EXHASH) { 1852 brelse(bh); 1853 error = gfs2_meta_inode_buffer(dip, &bh); 1854 if (error) 1855 return error; 1856 gfs2_trans_add_meta(dip->i_gl, bh); 1857 } 1858 1859 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1860 gfs2_dinode_out(dip, bh->b_data); 1861 brelse(bh); 1862 return 0; 1863 } 1864 1865 /** 1866 * leaf_dealloc - Deallocate a directory leaf 1867 * @dip: the directory 1868 * @index: the hash table offset in the directory 1869 * @len: the number of pointers to this leaf 1870 * @leaf_no: the leaf number 1871 * @leaf_bh: buffer_head for the starting leaf 1872 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0 1873 * 1874 * Returns: errno 1875 */ 1876 1877 static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1878 u64 leaf_no, struct buffer_head *leaf_bh, 1879 int last_dealloc) 1880 { 1881 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1882 struct gfs2_leaf *tmp_leaf; 1883 struct gfs2_rgrp_list rlist; 1884 struct buffer_head *bh, *dibh; 1885 u64 blk, nblk; 1886 unsigned int rg_blocks = 0, l_blocks = 0; 1887 char *ht; 1888 unsigned int x, size = len * sizeof(u64); 1889 int error; 1890 1891 error = gfs2_rindex_update(sdp); 1892 if (error) 1893 return error; 1894 1895 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1896 1897 ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); 1898 if (ht == NULL) 1899 ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO, 1900 PAGE_KERNEL); 1901 if (!ht) 1902 return -ENOMEM; 1903 1904 error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); 1905 if (error) 1906 goto out; 1907 1908 /* Count the number of leaves */ 1909 bh = leaf_bh; 1910 1911 for (blk = leaf_no; blk; blk = nblk) { 1912 if (blk != leaf_no) { 1913 error = get_leaf(dip, blk, &bh); 1914 if (error) 1915 goto out_rlist; 1916 } 1917 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1918 nblk = be64_to_cpu(tmp_leaf->lf_next); 1919 if (blk != leaf_no) 1920 brelse(bh); 1921 1922 gfs2_rlist_add(dip, &rlist, blk); 1923 l_blocks++; 1924 } 1925 1926 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); 1927 1928 for (x = 0; x < rlist.rl_rgrps; x++) { 1929 struct gfs2_rgrpd *rgd; 1930 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1931 rg_blocks += rgd->rd_length; 1932 } 1933 1934 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1935 if (error) 1936 goto out_rlist; 1937 1938 error = gfs2_trans_begin(sdp, 1939 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + 1940 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); 1941 if (error) 1942 goto out_rg_gunlock; 1943 1944 bh = leaf_bh; 1945 1946 for (blk = leaf_no; blk; blk = nblk) { 1947 if (blk != leaf_no) { 1948 error = get_leaf(dip, blk, &bh); 1949 if (error) 1950 goto out_end_trans; 1951 } 1952 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1953 nblk = be64_to_cpu(tmp_leaf->lf_next); 1954 if (blk != leaf_no) 1955 brelse(bh); 1956 1957 gfs2_free_meta(dip, blk, 1); 1958 gfs2_add_inode_blocks(&dip->i_inode, -1); 1959 } 1960 1961 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); 1962 if (error != size) { 1963 if (error >= 0) 1964 error = -EIO; 1965 goto out_end_trans; 1966 } 1967 1968 error = gfs2_meta_inode_buffer(dip, &dibh); 1969 if (error) 1970 goto out_end_trans; 1971 1972 gfs2_trans_add_meta(dip->i_gl, dibh); 1973 /* On the last dealloc, make this a regular file in case we crash. 1974 (We don't want to free these blocks a second time.) */ 1975 if (last_dealloc) 1976 dip->i_inode.i_mode = S_IFREG; 1977 gfs2_dinode_out(dip, dibh->b_data); 1978 brelse(dibh); 1979 1980 out_end_trans: 1981 gfs2_trans_end(sdp); 1982 out_rg_gunlock: 1983 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 1984 out_rlist: 1985 gfs2_rlist_free(&rlist); 1986 gfs2_quota_unhold(dip); 1987 out: 1988 kvfree(ht); 1989 return error; 1990 } 1991 1992 /** 1993 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory 1994 * @dip: the directory 1995 * 1996 * Dealloc all on-disk directory leaves to FREEMETA state 1997 * Change on-disk inode type to "regular file" 1998 * 1999 * Returns: errno 2000 */ 2001 2002 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) 2003 { 2004 struct buffer_head *bh; 2005 struct gfs2_leaf *leaf; 2006 u32 hsize, len; 2007 u32 index = 0, next_index; 2008 __be64 *lp; 2009 u64 leaf_no; 2010 int error = 0, last; 2011 2012 hsize = 1 << dip->i_depth; 2013 2014 lp = gfs2_dir_get_hash_table(dip); 2015 if (IS_ERR(lp)) 2016 return PTR_ERR(lp); 2017 2018 while (index < hsize) { 2019 leaf_no = be64_to_cpu(lp[index]); 2020 if (leaf_no) { 2021 error = get_leaf(dip, leaf_no, &bh); 2022 if (error) 2023 goto out; 2024 leaf = (struct gfs2_leaf *)bh->b_data; 2025 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); 2026 2027 next_index = (index & ~(len - 1)) + len; 2028 last = ((next_index >= hsize) ? 1 : 0); 2029 error = leaf_dealloc(dip, index, len, leaf_no, bh, 2030 last); 2031 brelse(bh); 2032 if (error) 2033 goto out; 2034 index = next_index; 2035 } else 2036 index++; 2037 } 2038 2039 if (index != hsize) { 2040 gfs2_consist_inode(dip); 2041 error = -EIO; 2042 } 2043 2044 out: 2045 2046 return error; 2047 } 2048 2049 /** 2050 * gfs2_diradd_alloc_required - find if adding entry will require an allocation 2051 * @ip: the file being written to 2052 * @filname: the filename that's going to be added 2053 * @da: The structure to return dir alloc info 2054 * 2055 * Returns: 0 if ok, -ve on error 2056 */ 2057 2058 int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, 2059 struct gfs2_diradd *da) 2060 { 2061 struct gfs2_inode *ip = GFS2_I(inode); 2062 struct gfs2_sbd *sdp = GFS2_SB(inode); 2063 const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); 2064 struct gfs2_dirent *dent; 2065 struct buffer_head *bh; 2066 2067 da->nr_blocks = 0; 2068 da->bh = NULL; 2069 da->dent = NULL; 2070 2071 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); 2072 if (!dent) { 2073 da->nr_blocks = sdp->sd_max_dirres; 2074 if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && 2075 (GFS2_DIRENT_SIZE(name->len) < extra)) 2076 da->nr_blocks = 1; 2077 return 0; 2078 } 2079 if (IS_ERR(dent)) 2080 return PTR_ERR(dent); 2081 2082 if (da->save_loc) { 2083 da->bh = bh; 2084 da->dent = dent; 2085 } else { 2086 brelse(bh); 2087 } 2088 return 0; 2089 } 2090 2091