1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 /* 11 * Implements Extendible Hashing as described in: 12 * "Extendible Hashing" by Fagin, et al in 13 * __ACM Trans. on Database Systems__, Sept 1979. 14 * 15 * 16 * Here's the layout of dirents which is essentially the same as that of ext2 17 * within a single block. The field de_name_len is the number of bytes 18 * actually required for the name (no null terminator). The field de_rec_len 19 * is the number of bytes allocated to the dirent. The offset of the next 20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is 21 * deleted, the preceding dirent inherits its allocated space, ie 22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained 23 * by adding de_rec_len to the current dirent, this essentially causes the 24 * deleted dirent to get jumped over when iterating through all the dirents. 25 * 26 * When deleting the first dirent in a block, there is no previous dirent so 27 * the field de_ino is set to zero to designate it as deleted. When allocating 28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the 29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first 30 * dirent is allocated. Otherwise it must go through all the 'used' dirents 31 * searching for one in which the amount of total space minus the amount of 32 * used space will provide enough space for the new dirent. 33 * 34 * There are two types of blocks in which dirents reside. In a stuffed dinode, 35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of 36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the 37 * beginning of the leaf block. The dirents reside in leaves when 38 * 39 * dip->i_diskflags & GFS2_DIF_EXHASH is true 40 * 41 * Otherwise, the dirents are "linear", within a single stuffed dinode block. 42 * 43 * When the dirents are in leaves, the actual contents of the directory file are 44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The 45 * dirents are NOT in the directory file itself. There can be more than one 46 * block pointer in the array that points to the same leaf. In fact, when a 47 * directory is first converted from linear to exhash, all of the pointers 48 * point to the same leaf. 49 * 50 * When a leaf is completely full, the size of the hash table can be 51 * doubled unless it is already at the maximum size which is hard coded into 52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, 53 * but never before the maximum hash table size has been reached. 54 */ 55 56 #include <linux/slab.h> 57 #include <linux/spinlock.h> 58 #include <linux/buffer_head.h> 59 #include <linux/sort.h> 60 #include <linux/gfs2_ondisk.h> 61 #include <linux/crc32.h> 62 #include <linux/vmalloc.h> 63 64 #include "gfs2.h" 65 #include "incore.h" 66 #include "dir.h" 67 #include "glock.h" 68 #include "inode.h" 69 #include "meta_io.h" 70 #include "quota.h" 71 #include "rgrp.h" 72 #include "trans.h" 73 #include "bmap.h" 74 #include "util.h" 75 76 #define IS_LEAF 1 /* Hashed (leaf) directory */ 77 #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ 78 79 #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) 80 #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) 81 82 struct qstr gfs2_qdot __read_mostly; 83 struct qstr gfs2_qdotdot __read_mostly; 84 85 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 86 const struct qstr *name, void *opaque); 87 88 int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 89 struct buffer_head **bhp) 90 { 91 struct buffer_head *bh; 92 93 bh = gfs2_meta_new(ip->i_gl, block); 94 gfs2_trans_add_bh(ip->i_gl, bh, 1); 95 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 96 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 97 *bhp = bh; 98 return 0; 99 } 100 101 static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, 102 struct buffer_head **bhp) 103 { 104 struct buffer_head *bh; 105 int error; 106 107 error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh); 108 if (error) 109 return error; 110 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { 111 brelse(bh); 112 return -EIO; 113 } 114 *bhp = bh; 115 return 0; 116 } 117 118 static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, 119 unsigned int offset, unsigned int size) 120 { 121 struct buffer_head *dibh; 122 int error; 123 124 error = gfs2_meta_inode_buffer(ip, &dibh); 125 if (error) 126 return error; 127 128 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 129 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 130 if (ip->i_inode.i_size < offset + size) 131 i_size_write(&ip->i_inode, offset + size); 132 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 133 gfs2_dinode_out(ip, dibh->b_data); 134 135 brelse(dibh); 136 137 return size; 138 } 139 140 141 142 /** 143 * gfs2_dir_write_data - Write directory information to the inode 144 * @ip: The GFS2 inode 145 * @buf: The buffer containing information to be written 146 * @offset: The file offset to start writing at 147 * @size: The amount of data to write 148 * 149 * Returns: The number of bytes correctly written or error code 150 */ 151 static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, 152 u64 offset, unsigned int size) 153 { 154 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 155 struct buffer_head *dibh; 156 u64 lblock, dblock; 157 u32 extlen = 0; 158 unsigned int o; 159 int copied = 0; 160 int error = 0; 161 int new = 0; 162 163 if (!size) 164 return 0; 165 166 if (gfs2_is_stuffed(ip) && 167 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 168 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, 169 size); 170 171 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 172 return -EINVAL; 173 174 if (gfs2_is_stuffed(ip)) { 175 error = gfs2_unstuff_dinode(ip, NULL); 176 if (error) 177 return error; 178 } 179 180 lblock = offset; 181 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 182 183 while (copied < size) { 184 unsigned int amount; 185 struct buffer_head *bh; 186 187 amount = size - copied; 188 if (amount > sdp->sd_sb.sb_bsize - o) 189 amount = sdp->sd_sb.sb_bsize - o; 190 191 if (!extlen) { 192 new = 1; 193 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 194 &dblock, &extlen); 195 if (error) 196 goto fail; 197 error = -EIO; 198 if (gfs2_assert_withdraw(sdp, dblock)) 199 goto fail; 200 } 201 202 if (amount == sdp->sd_jbsize || new) 203 error = gfs2_dir_get_new_buffer(ip, dblock, &bh); 204 else 205 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh); 206 207 if (error) 208 goto fail; 209 210 gfs2_trans_add_bh(ip->i_gl, bh, 1); 211 memcpy(bh->b_data + o, buf, amount); 212 brelse(bh); 213 214 buf += amount; 215 copied += amount; 216 lblock++; 217 dblock++; 218 extlen--; 219 220 o = sizeof(struct gfs2_meta_header); 221 } 222 223 out: 224 error = gfs2_meta_inode_buffer(ip, &dibh); 225 if (error) 226 return error; 227 228 if (ip->i_inode.i_size < offset + copied) 229 i_size_write(&ip->i_inode, offset + copied); 230 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 231 232 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 233 gfs2_dinode_out(ip, dibh->b_data); 234 brelse(dibh); 235 236 return copied; 237 fail: 238 if (copied) 239 goto out; 240 return error; 241 } 242 243 static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf, 244 unsigned int size) 245 { 246 struct buffer_head *dibh; 247 int error; 248 249 error = gfs2_meta_inode_buffer(ip, &dibh); 250 if (!error) { 251 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); 252 brelse(dibh); 253 } 254 255 return (error) ? error : size; 256 } 257 258 259 /** 260 * gfs2_dir_read_data - Read a data from a directory inode 261 * @ip: The GFS2 Inode 262 * @buf: The buffer to place result into 263 * @size: Amount of data to transfer 264 * 265 * Returns: The amount of data actually copied or the error 266 */ 267 static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf, 268 unsigned int size) 269 { 270 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 271 u64 lblock, dblock; 272 u32 extlen = 0; 273 unsigned int o; 274 int copied = 0; 275 int error = 0; 276 277 if (gfs2_is_stuffed(ip)) 278 return gfs2_dir_read_stuffed(ip, buf, size); 279 280 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 281 return -EINVAL; 282 283 lblock = 0; 284 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 285 286 while (copied < size) { 287 unsigned int amount; 288 struct buffer_head *bh; 289 int new; 290 291 amount = size - copied; 292 if (amount > sdp->sd_sb.sb_bsize - o) 293 amount = sdp->sd_sb.sb_bsize - o; 294 295 if (!extlen) { 296 new = 0; 297 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 298 &dblock, &extlen); 299 if (error || !dblock) 300 goto fail; 301 BUG_ON(extlen < 1); 302 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); 303 } else { 304 error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); 305 if (error) 306 goto fail; 307 } 308 error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD); 309 if (error) { 310 brelse(bh); 311 goto fail; 312 } 313 dblock++; 314 extlen--; 315 memcpy(buf, bh->b_data + o, amount); 316 brelse(bh); 317 buf += (amount/sizeof(__be64)); 318 copied += amount; 319 lblock++; 320 o = sizeof(struct gfs2_meta_header); 321 } 322 323 return copied; 324 fail: 325 return (copied) ? copied : error; 326 } 327 328 /** 329 * gfs2_dir_get_hash_table - Get pointer to the dir hash table 330 * @ip: The inode in question 331 * 332 * Returns: The hash table or an error 333 */ 334 335 static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) 336 { 337 struct inode *inode = &ip->i_inode; 338 int ret; 339 u32 hsize; 340 __be64 *hc; 341 342 BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH)); 343 344 hc = ip->i_hash_cache; 345 if (hc) 346 return hc; 347 348 hsize = 1 << ip->i_depth; 349 hsize *= sizeof(__be64); 350 if (hsize != i_size_read(&ip->i_inode)) { 351 gfs2_consist_inode(ip); 352 return ERR_PTR(-EIO); 353 } 354 355 hc = kmalloc(hsize, GFP_NOFS); 356 ret = -ENOMEM; 357 if (hc == NULL) 358 return ERR_PTR(-ENOMEM); 359 360 ret = gfs2_dir_read_data(ip, hc, hsize); 361 if (ret < 0) { 362 kfree(hc); 363 return ERR_PTR(ret); 364 } 365 366 spin_lock(&inode->i_lock); 367 if (ip->i_hash_cache) 368 kfree(hc); 369 else 370 ip->i_hash_cache = hc; 371 spin_unlock(&inode->i_lock); 372 373 return ip->i_hash_cache; 374 } 375 376 /** 377 * gfs2_dir_hash_inval - Invalidate dir hash 378 * @ip: The directory inode 379 * 380 * Must be called with an exclusive glock, or during glock invalidation. 381 */ 382 void gfs2_dir_hash_inval(struct gfs2_inode *ip) 383 { 384 __be64 *hc = ip->i_hash_cache; 385 ip->i_hash_cache = NULL; 386 kfree(hc); 387 } 388 389 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) 390 { 391 return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; 392 } 393 394 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, 395 const struct qstr *name, int ret) 396 { 397 if (!gfs2_dirent_sentinel(dent) && 398 be32_to_cpu(dent->de_hash) == name->hash && 399 be16_to_cpu(dent->de_name_len) == name->len && 400 memcmp(dent+1, name->name, name->len) == 0) 401 return ret; 402 return 0; 403 } 404 405 static int gfs2_dirent_find(const struct gfs2_dirent *dent, 406 const struct qstr *name, 407 void *opaque) 408 { 409 return __gfs2_dirent_find(dent, name, 1); 410 } 411 412 static int gfs2_dirent_prev(const struct gfs2_dirent *dent, 413 const struct qstr *name, 414 void *opaque) 415 { 416 return __gfs2_dirent_find(dent, name, 2); 417 } 418 419 /* 420 * name->name holds ptr to start of block. 421 * name->len holds size of block. 422 */ 423 static int gfs2_dirent_last(const struct gfs2_dirent *dent, 424 const struct qstr *name, 425 void *opaque) 426 { 427 const char *start = name->name; 428 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len); 429 if (name->len == (end - start)) 430 return 1; 431 return 0; 432 } 433 434 static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, 435 const struct qstr *name, 436 void *opaque) 437 { 438 unsigned required = GFS2_DIRENT_SIZE(name->len); 439 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 440 unsigned totlen = be16_to_cpu(dent->de_rec_len); 441 442 if (gfs2_dirent_sentinel(dent)) 443 actual = 0; 444 if (totlen - actual >= required) 445 return 1; 446 return 0; 447 } 448 449 struct dirent_gather { 450 const struct gfs2_dirent **pdent; 451 unsigned offset; 452 }; 453 454 static int gfs2_dirent_gather(const struct gfs2_dirent *dent, 455 const struct qstr *name, 456 void *opaque) 457 { 458 struct dirent_gather *g = opaque; 459 if (!gfs2_dirent_sentinel(dent)) { 460 g->pdent[g->offset++] = dent; 461 } 462 return 0; 463 } 464 465 /* 466 * Other possible things to check: 467 * - Inode located within filesystem size (and on valid block) 468 * - Valid directory entry type 469 * Not sure how heavy-weight we want to make this... could also check 470 * hash is correct for example, but that would take a lot of extra time. 471 * For now the most important thing is to check that the various sizes 472 * are correct. 473 */ 474 static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, 475 unsigned int size, unsigned int len, int first) 476 { 477 const char *msg = "gfs2_dirent too small"; 478 if (unlikely(size < sizeof(struct gfs2_dirent))) 479 goto error; 480 msg = "gfs2_dirent misaligned"; 481 if (unlikely(offset & 0x7)) 482 goto error; 483 msg = "gfs2_dirent points beyond end of block"; 484 if (unlikely(offset + size > len)) 485 goto error; 486 msg = "zero inode number"; 487 if (unlikely(!first && gfs2_dirent_sentinel(dent))) 488 goto error; 489 msg = "name length is greater than space in dirent"; 490 if (!gfs2_dirent_sentinel(dent) && 491 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > 492 size)) 493 goto error; 494 return 0; 495 error: 496 printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, 497 first ? "first in block" : "not first in block"); 498 return -EIO; 499 } 500 501 static int gfs2_dirent_offset(const void *buf) 502 { 503 const struct gfs2_meta_header *h = buf; 504 int offset; 505 506 BUG_ON(buf == NULL); 507 508 switch(be32_to_cpu(h->mh_type)) { 509 case GFS2_METATYPE_LF: 510 offset = sizeof(struct gfs2_leaf); 511 break; 512 case GFS2_METATYPE_DI: 513 offset = sizeof(struct gfs2_dinode); 514 break; 515 default: 516 goto wrong_type; 517 } 518 return offset; 519 wrong_type: 520 printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", 521 be32_to_cpu(h->mh_type)); 522 return -1; 523 } 524 525 static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, 526 unsigned int len, gfs2_dscan_t scan, 527 const struct qstr *name, 528 void *opaque) 529 { 530 struct gfs2_dirent *dent, *prev; 531 unsigned offset; 532 unsigned size; 533 int ret = 0; 534 535 ret = gfs2_dirent_offset(buf); 536 if (ret < 0) 537 goto consist_inode; 538 539 offset = ret; 540 prev = NULL; 541 dent = buf + offset; 542 size = be16_to_cpu(dent->de_rec_len); 543 if (gfs2_check_dirent(dent, offset, size, len, 1)) 544 goto consist_inode; 545 do { 546 ret = scan(dent, name, opaque); 547 if (ret) 548 break; 549 offset += size; 550 if (offset == len) 551 break; 552 prev = dent; 553 dent = buf + offset; 554 size = be16_to_cpu(dent->de_rec_len); 555 if (gfs2_check_dirent(dent, offset, size, len, 0)) 556 goto consist_inode; 557 } while(1); 558 559 switch(ret) { 560 case 0: 561 return NULL; 562 case 1: 563 return dent; 564 case 2: 565 return prev ? prev : dent; 566 default: 567 BUG_ON(ret > 0); 568 return ERR_PTR(ret); 569 } 570 571 consist_inode: 572 gfs2_consist_inode(GFS2_I(inode)); 573 return ERR_PTR(-EIO); 574 } 575 576 static int dirent_check_reclen(struct gfs2_inode *dip, 577 const struct gfs2_dirent *d, const void *end_p) 578 { 579 const void *ptr = d; 580 u16 rec_len = be16_to_cpu(d->de_rec_len); 581 582 if (unlikely(rec_len < sizeof(struct gfs2_dirent))) 583 goto broken; 584 ptr += rec_len; 585 if (ptr < end_p) 586 return rec_len; 587 if (ptr == end_p) 588 return -ENOENT; 589 broken: 590 gfs2_consist_inode(dip); 591 return -EIO; 592 } 593 594 /** 595 * dirent_next - Next dirent 596 * @dip: the directory 597 * @bh: The buffer 598 * @dent: Pointer to list of dirents 599 * 600 * Returns: 0 on success, error code otherwise 601 */ 602 603 static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, 604 struct gfs2_dirent **dent) 605 { 606 struct gfs2_dirent *cur = *dent, *tmp; 607 char *bh_end = bh->b_data + bh->b_size; 608 int ret; 609 610 ret = dirent_check_reclen(dip, cur, bh_end); 611 if (ret < 0) 612 return ret; 613 614 tmp = (void *)cur + ret; 615 ret = dirent_check_reclen(dip, tmp, bh_end); 616 if (ret == -EIO) 617 return ret; 618 619 /* Only the first dent could ever have de_inum.no_addr == 0 */ 620 if (gfs2_dirent_sentinel(tmp)) { 621 gfs2_consist_inode(dip); 622 return -EIO; 623 } 624 625 *dent = tmp; 626 return 0; 627 } 628 629 /** 630 * dirent_del - Delete a dirent 631 * @dip: The GFS2 inode 632 * @bh: The buffer 633 * @prev: The previous dirent 634 * @cur: The current dirent 635 * 636 */ 637 638 static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, 639 struct gfs2_dirent *prev, struct gfs2_dirent *cur) 640 { 641 u16 cur_rec_len, prev_rec_len; 642 643 if (gfs2_dirent_sentinel(cur)) { 644 gfs2_consist_inode(dip); 645 return; 646 } 647 648 gfs2_trans_add_bh(dip->i_gl, bh, 1); 649 650 /* If there is no prev entry, this is the first entry in the block. 651 The de_rec_len is already as big as it needs to be. Just zero 652 out the inode number and return. */ 653 654 if (!prev) { 655 cur->de_inum.no_addr = 0; 656 cur->de_inum.no_formal_ino = 0; 657 return; 658 } 659 660 /* Combine this dentry with the previous one. */ 661 662 prev_rec_len = be16_to_cpu(prev->de_rec_len); 663 cur_rec_len = be16_to_cpu(cur->de_rec_len); 664 665 if ((char *)prev + prev_rec_len != (char *)cur) 666 gfs2_consist_inode(dip); 667 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) 668 gfs2_consist_inode(dip); 669 670 prev_rec_len += cur_rec_len; 671 prev->de_rec_len = cpu_to_be16(prev_rec_len); 672 } 673 674 /* 675 * Takes a dent from which to grab space as an argument. Returns the 676 * newly created dent. 677 */ 678 static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, 679 struct gfs2_dirent *dent, 680 const struct qstr *name, 681 struct buffer_head *bh) 682 { 683 struct gfs2_inode *ip = GFS2_I(inode); 684 struct gfs2_dirent *ndent; 685 unsigned offset = 0, totlen; 686 687 if (!gfs2_dirent_sentinel(dent)) 688 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 689 totlen = be16_to_cpu(dent->de_rec_len); 690 BUG_ON(offset + name->len > totlen); 691 gfs2_trans_add_bh(ip->i_gl, bh, 1); 692 ndent = (struct gfs2_dirent *)((char *)dent + offset); 693 dent->de_rec_len = cpu_to_be16(offset); 694 gfs2_qstr2dirent(name, totlen - offset, ndent); 695 return ndent; 696 } 697 698 static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, 699 struct buffer_head *bh, 700 const struct qstr *name) 701 { 702 struct gfs2_dirent *dent; 703 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 704 gfs2_dirent_find_space, name, NULL); 705 if (!dent || IS_ERR(dent)) 706 return dent; 707 return gfs2_init_dirent(inode, dent, name, bh); 708 } 709 710 static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, 711 struct buffer_head **bhp) 712 { 713 int error; 714 715 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); 716 if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { 717 /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ 718 error = -EIO; 719 } 720 721 return error; 722 } 723 724 /** 725 * get_leaf_nr - Get a leaf number associated with the index 726 * @dip: The GFS2 inode 727 * @index: 728 * @leaf_out: 729 * 730 * Returns: 0 on success, error code otherwise 731 */ 732 733 static int get_leaf_nr(struct gfs2_inode *dip, u32 index, 734 u64 *leaf_out) 735 { 736 __be64 *hash; 737 738 hash = gfs2_dir_get_hash_table(dip); 739 if (IS_ERR(hash)) 740 return PTR_ERR(hash); 741 *leaf_out = be64_to_cpu(*(hash + index)); 742 return 0; 743 } 744 745 static int get_first_leaf(struct gfs2_inode *dip, u32 index, 746 struct buffer_head **bh_out) 747 { 748 u64 leaf_no; 749 int error; 750 751 error = get_leaf_nr(dip, index, &leaf_no); 752 if (!error) 753 error = get_leaf(dip, leaf_no, bh_out); 754 755 return error; 756 } 757 758 static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, 759 const struct qstr *name, 760 gfs2_dscan_t scan, 761 struct buffer_head **pbh) 762 { 763 struct buffer_head *bh; 764 struct gfs2_dirent *dent; 765 struct gfs2_inode *ip = GFS2_I(inode); 766 int error; 767 768 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 769 struct gfs2_leaf *leaf; 770 unsigned hsize = 1 << ip->i_depth; 771 unsigned index; 772 u64 ln; 773 if (hsize * sizeof(u64) != i_size_read(inode)) { 774 gfs2_consist_inode(ip); 775 return ERR_PTR(-EIO); 776 } 777 778 index = name->hash >> (32 - ip->i_depth); 779 error = get_first_leaf(ip, index, &bh); 780 if (error) 781 return ERR_PTR(error); 782 do { 783 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 784 scan, name, NULL); 785 if (dent) 786 goto got_dent; 787 leaf = (struct gfs2_leaf *)bh->b_data; 788 ln = be64_to_cpu(leaf->lf_next); 789 brelse(bh); 790 if (!ln) 791 break; 792 793 error = get_leaf(ip, ln, &bh); 794 } while(!error); 795 796 return error ? ERR_PTR(error) : NULL; 797 } 798 799 800 error = gfs2_meta_inode_buffer(ip, &bh); 801 if (error) 802 return ERR_PTR(error); 803 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); 804 got_dent: 805 if (unlikely(dent == NULL || IS_ERR(dent))) { 806 brelse(bh); 807 bh = NULL; 808 } 809 *pbh = bh; 810 return dent; 811 } 812 813 static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) 814 { 815 struct gfs2_inode *ip = GFS2_I(inode); 816 unsigned int n = 1; 817 u64 bn; 818 int error; 819 struct buffer_head *bh; 820 struct gfs2_leaf *leaf; 821 struct gfs2_dirent *dent; 822 struct qstr name = { .name = "", .len = 0, .hash = 0 }; 823 824 error = gfs2_alloc_block(ip, &bn, &n); 825 if (error) 826 return NULL; 827 bh = gfs2_meta_new(ip->i_gl, bn); 828 if (!bh) 829 return NULL; 830 831 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 832 gfs2_trans_add_bh(ip->i_gl, bh, 1); 833 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 834 leaf = (struct gfs2_leaf *)bh->b_data; 835 leaf->lf_depth = cpu_to_be16(depth); 836 leaf->lf_entries = 0; 837 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); 838 leaf->lf_next = 0; 839 memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); 840 dent = (struct gfs2_dirent *)(leaf+1); 841 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); 842 *pbh = bh; 843 return leaf; 844 } 845 846 /** 847 * dir_make_exhash - Convert a stuffed directory into an ExHash directory 848 * @dip: The GFS2 inode 849 * 850 * Returns: 0 on success, error code otherwise 851 */ 852 853 static int dir_make_exhash(struct inode *inode) 854 { 855 struct gfs2_inode *dip = GFS2_I(inode); 856 struct gfs2_sbd *sdp = GFS2_SB(inode); 857 struct gfs2_dirent *dent; 858 struct qstr args; 859 struct buffer_head *bh, *dibh; 860 struct gfs2_leaf *leaf; 861 int y; 862 u32 x; 863 __be64 *lp; 864 u64 bn; 865 int error; 866 867 error = gfs2_meta_inode_buffer(dip, &dibh); 868 if (error) 869 return error; 870 871 /* Turn over a new leaf */ 872 873 leaf = new_leaf(inode, &bh, 0); 874 if (!leaf) 875 return -ENOSPC; 876 bn = bh->b_blocknr; 877 878 gfs2_assert(sdp, dip->i_entries < (1 << 16)); 879 leaf->lf_entries = cpu_to_be16(dip->i_entries); 880 881 /* Copy dirents */ 882 883 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, 884 sizeof(struct gfs2_dinode)); 885 886 /* Find last entry */ 887 888 x = 0; 889 args.len = bh->b_size - sizeof(struct gfs2_dinode) + 890 sizeof(struct gfs2_leaf); 891 args.name = bh->b_data; 892 dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size, 893 gfs2_dirent_last, &args, NULL); 894 if (!dent) { 895 brelse(bh); 896 brelse(dibh); 897 return -EIO; 898 } 899 if (IS_ERR(dent)) { 900 brelse(bh); 901 brelse(dibh); 902 return PTR_ERR(dent); 903 } 904 905 /* Adjust the last dirent's record length 906 (Remember that dent still points to the last entry.) */ 907 908 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) + 909 sizeof(struct gfs2_dinode) - 910 sizeof(struct gfs2_leaf)); 911 912 brelse(bh); 913 914 /* We're done with the new leaf block, now setup the new 915 hash table. */ 916 917 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 918 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 919 920 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 921 922 for (x = sdp->sd_hash_ptrs; x--; lp++) 923 *lp = cpu_to_be64(bn); 924 925 i_size_write(inode, sdp->sd_sb.sb_bsize / 2); 926 gfs2_add_inode_blocks(&dip->i_inode, 1); 927 dip->i_diskflags |= GFS2_DIF_EXHASH; 928 929 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; 930 dip->i_depth = y; 931 932 gfs2_dinode_out(dip, dibh->b_data); 933 934 brelse(dibh); 935 936 return 0; 937 } 938 939 /** 940 * dir_split_leaf - Split a leaf block into two 941 * @dip: The GFS2 inode 942 * @index: 943 * @leaf_no: 944 * 945 * Returns: 0 on success, error code on failure 946 */ 947 948 static int dir_split_leaf(struct inode *inode, const struct qstr *name) 949 { 950 struct gfs2_inode *dip = GFS2_I(inode); 951 struct buffer_head *nbh, *obh, *dibh; 952 struct gfs2_leaf *nleaf, *oleaf; 953 struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; 954 u32 start, len, half_len, divider; 955 u64 bn, leaf_no; 956 __be64 *lp; 957 u32 index; 958 int x, moved = 0; 959 int error; 960 961 index = name->hash >> (32 - dip->i_depth); 962 error = get_leaf_nr(dip, index, &leaf_no); 963 if (error) 964 return error; 965 966 /* Get the old leaf block */ 967 error = get_leaf(dip, leaf_no, &obh); 968 if (error) 969 return error; 970 971 oleaf = (struct gfs2_leaf *)obh->b_data; 972 if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) { 973 brelse(obh); 974 return 1; /* can't split */ 975 } 976 977 gfs2_trans_add_bh(dip->i_gl, obh, 1); 978 979 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 980 if (!nleaf) { 981 brelse(obh); 982 return -ENOSPC; 983 } 984 bn = nbh->b_blocknr; 985 986 /* Compute the start and len of leaf pointers in the hash table. */ 987 len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); 988 half_len = len >> 1; 989 if (!half_len) { 990 printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); 991 gfs2_consist_inode(dip); 992 error = -EIO; 993 goto fail_brelse; 994 } 995 996 start = (index & ~(len - 1)); 997 998 /* Change the pointers. 999 Don't bother distinguishing stuffed from non-stuffed. 1000 This code is complicated enough already. */ 1001 lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS); 1002 if (!lp) { 1003 error = -ENOMEM; 1004 goto fail_brelse; 1005 } 1006 1007 /* Change the pointers */ 1008 for (x = 0; x < half_len; x++) 1009 lp[x] = cpu_to_be64(bn); 1010 1011 gfs2_dir_hash_inval(dip); 1012 1013 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), 1014 half_len * sizeof(u64)); 1015 if (error != half_len * sizeof(u64)) { 1016 if (error >= 0) 1017 error = -EIO; 1018 goto fail_lpfree; 1019 } 1020 1021 kfree(lp); 1022 1023 /* Compute the divider */ 1024 divider = (start + half_len) << (32 - dip->i_depth); 1025 1026 /* Copy the entries */ 1027 dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); 1028 1029 do { 1030 next = dent; 1031 if (dirent_next(dip, obh, &next)) 1032 next = NULL; 1033 1034 if (!gfs2_dirent_sentinel(dent) && 1035 be32_to_cpu(dent->de_hash) < divider) { 1036 struct qstr str; 1037 str.name = (char*)(dent+1); 1038 str.len = be16_to_cpu(dent->de_name_len); 1039 str.hash = be32_to_cpu(dent->de_hash); 1040 new = gfs2_dirent_alloc(inode, nbh, &str); 1041 if (IS_ERR(new)) { 1042 error = PTR_ERR(new); 1043 break; 1044 } 1045 1046 new->de_inum = dent->de_inum; /* No endian worries */ 1047 new->de_type = dent->de_type; /* No endian worries */ 1048 be16_add_cpu(&nleaf->lf_entries, 1); 1049 1050 dirent_del(dip, obh, prev, dent); 1051 1052 if (!oleaf->lf_entries) 1053 gfs2_consist_inode(dip); 1054 be16_add_cpu(&oleaf->lf_entries, -1); 1055 1056 if (!prev) 1057 prev = dent; 1058 1059 moved = 1; 1060 } else { 1061 prev = dent; 1062 } 1063 dent = next; 1064 } while (dent); 1065 1066 oleaf->lf_depth = nleaf->lf_depth; 1067 1068 error = gfs2_meta_inode_buffer(dip, &dibh); 1069 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1070 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1071 gfs2_add_inode_blocks(&dip->i_inode, 1); 1072 gfs2_dinode_out(dip, dibh->b_data); 1073 brelse(dibh); 1074 } 1075 1076 brelse(obh); 1077 brelse(nbh); 1078 1079 return error; 1080 1081 fail_lpfree: 1082 kfree(lp); 1083 1084 fail_brelse: 1085 brelse(obh); 1086 brelse(nbh); 1087 return error; 1088 } 1089 1090 /** 1091 * dir_double_exhash - Double size of ExHash table 1092 * @dip: The GFS2 dinode 1093 * 1094 * Returns: 0 on success, error code on failure 1095 */ 1096 1097 static int dir_double_exhash(struct gfs2_inode *dip) 1098 { 1099 struct buffer_head *dibh; 1100 u32 hsize; 1101 u32 hsize_bytes; 1102 __be64 *hc; 1103 __be64 *hc2, *h; 1104 int x; 1105 int error = 0; 1106 1107 hsize = 1 << dip->i_depth; 1108 hsize_bytes = hsize * sizeof(__be64); 1109 1110 hc = gfs2_dir_get_hash_table(dip); 1111 if (IS_ERR(hc)) 1112 return PTR_ERR(hc); 1113 1114 h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); 1115 if (!hc2) 1116 return -ENOMEM; 1117 1118 error = gfs2_meta_inode_buffer(dip, &dibh); 1119 if (error) 1120 goto out_kfree; 1121 1122 for (x = 0; x < hsize; x++) { 1123 *h++ = *hc; 1124 *h++ = *hc; 1125 hc++; 1126 } 1127 1128 error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2); 1129 if (error != (hsize_bytes * 2)) 1130 goto fail; 1131 1132 gfs2_dir_hash_inval(dip); 1133 dip->i_hash_cache = hc2; 1134 dip->i_depth++; 1135 gfs2_dinode_out(dip, dibh->b_data); 1136 brelse(dibh); 1137 return 0; 1138 1139 fail: 1140 /* Replace original hash table & size */ 1141 gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes); 1142 i_size_write(&dip->i_inode, hsize_bytes); 1143 gfs2_dinode_out(dip, dibh->b_data); 1144 brelse(dibh); 1145 out_kfree: 1146 kfree(hc2); 1147 return error; 1148 } 1149 1150 /** 1151 * compare_dents - compare directory entries by hash value 1152 * @a: first dent 1153 * @b: second dent 1154 * 1155 * When comparing the hash entries of @a to @b: 1156 * gt: returns 1 1157 * lt: returns -1 1158 * eq: returns 0 1159 */ 1160 1161 static int compare_dents(const void *a, const void *b) 1162 { 1163 const struct gfs2_dirent *dent_a, *dent_b; 1164 u32 hash_a, hash_b; 1165 int ret = 0; 1166 1167 dent_a = *(const struct gfs2_dirent **)a; 1168 hash_a = be32_to_cpu(dent_a->de_hash); 1169 1170 dent_b = *(const struct gfs2_dirent **)b; 1171 hash_b = be32_to_cpu(dent_b->de_hash); 1172 1173 if (hash_a > hash_b) 1174 ret = 1; 1175 else if (hash_a < hash_b) 1176 ret = -1; 1177 else { 1178 unsigned int len_a = be16_to_cpu(dent_a->de_name_len); 1179 unsigned int len_b = be16_to_cpu(dent_b->de_name_len); 1180 1181 if (len_a > len_b) 1182 ret = 1; 1183 else if (len_a < len_b) 1184 ret = -1; 1185 else 1186 ret = memcmp(dent_a + 1, dent_b + 1, len_a); 1187 } 1188 1189 return ret; 1190 } 1191 1192 /** 1193 * do_filldir_main - read out directory entries 1194 * @dip: The GFS2 inode 1195 * @offset: The offset in the file to read from 1196 * @opaque: opaque data to pass to filldir 1197 * @filldir: The function to pass entries to 1198 * @darr: an array of struct gfs2_dirent pointers to read 1199 * @entries: the number of entries in darr 1200 * @copied: pointer to int that's non-zero if a entry has been copied out 1201 * 1202 * Jump through some hoops to make sure that if there are hash collsions, 1203 * they are read out at the beginning of a buffer. We want to minimize 1204 * the possibility that they will fall into different readdir buffers or 1205 * that someone will want to seek to that location. 1206 * 1207 * Returns: errno, >0 on exception from filldir 1208 */ 1209 1210 static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, 1211 void *opaque, filldir_t filldir, 1212 const struct gfs2_dirent **darr, u32 entries, 1213 int *copied) 1214 { 1215 const struct gfs2_dirent *dent, *dent_next; 1216 u64 off, off_next; 1217 unsigned int x, y; 1218 int run = 0; 1219 int error = 0; 1220 1221 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); 1222 1223 dent_next = darr[0]; 1224 off_next = be32_to_cpu(dent_next->de_hash); 1225 off_next = gfs2_disk_hash2offset(off_next); 1226 1227 for (x = 0, y = 1; x < entries; x++, y++) { 1228 dent = dent_next; 1229 off = off_next; 1230 1231 if (y < entries) { 1232 dent_next = darr[y]; 1233 off_next = be32_to_cpu(dent_next->de_hash); 1234 off_next = gfs2_disk_hash2offset(off_next); 1235 1236 if (off < *offset) 1237 continue; 1238 *offset = off; 1239 1240 if (off_next == off) { 1241 if (*copied && !run) 1242 return 1; 1243 run = 1; 1244 } else 1245 run = 0; 1246 } else { 1247 if (off < *offset) 1248 continue; 1249 *offset = off; 1250 } 1251 1252 error = filldir(opaque, (const char *)(dent + 1), 1253 be16_to_cpu(dent->de_name_len), 1254 off, be64_to_cpu(dent->de_inum.no_addr), 1255 be16_to_cpu(dent->de_type)); 1256 if (error) 1257 return 1; 1258 1259 *copied = 1; 1260 } 1261 1262 /* Increment the *offset by one, so the next time we come into the 1263 do_filldir fxn, we get the next entry instead of the last one in the 1264 current leaf */ 1265 1266 (*offset)++; 1267 1268 return 0; 1269 } 1270 1271 static void *gfs2_alloc_sort_buffer(unsigned size) 1272 { 1273 void *ptr = NULL; 1274 1275 if (size < KMALLOC_MAX_SIZE) 1276 ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); 1277 if (!ptr) 1278 ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL); 1279 return ptr; 1280 } 1281 1282 static void gfs2_free_sort_buffer(void *ptr) 1283 { 1284 if (is_vmalloc_addr(ptr)) 1285 vfree(ptr); 1286 else 1287 kfree(ptr); 1288 } 1289 1290 static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, 1291 filldir_t filldir, int *copied, unsigned *depth, 1292 u64 leaf_no) 1293 { 1294 struct gfs2_inode *ip = GFS2_I(inode); 1295 struct gfs2_sbd *sdp = GFS2_SB(inode); 1296 struct buffer_head *bh; 1297 struct gfs2_leaf *lf; 1298 unsigned entries = 0, entries2 = 0; 1299 unsigned leaves = 0; 1300 const struct gfs2_dirent **darr, *dent; 1301 struct dirent_gather g; 1302 struct buffer_head **larr; 1303 int leaf = 0; 1304 int error, i; 1305 u64 lfn = leaf_no; 1306 1307 do { 1308 error = get_leaf(ip, lfn, &bh); 1309 if (error) 1310 goto out; 1311 lf = (struct gfs2_leaf *)bh->b_data; 1312 if (leaves == 0) 1313 *depth = be16_to_cpu(lf->lf_depth); 1314 entries += be16_to_cpu(lf->lf_entries); 1315 leaves++; 1316 lfn = be64_to_cpu(lf->lf_next); 1317 brelse(bh); 1318 } while(lfn); 1319 1320 if (!entries) 1321 return 0; 1322 1323 error = -ENOMEM; 1324 /* 1325 * The extra 99 entries are not normally used, but are a buffer 1326 * zone in case the number of entries in the leaf is corrupt. 1327 * 99 is the maximum number of entries that can fit in a single 1328 * leaf block. 1329 */ 1330 larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *)); 1331 if (!larr) 1332 goto out; 1333 darr = (const struct gfs2_dirent **)(larr + leaves); 1334 g.pdent = darr; 1335 g.offset = 0; 1336 lfn = leaf_no; 1337 1338 do { 1339 error = get_leaf(ip, lfn, &bh); 1340 if (error) 1341 goto out_free; 1342 lf = (struct gfs2_leaf *)bh->b_data; 1343 lfn = be64_to_cpu(lf->lf_next); 1344 if (lf->lf_entries) { 1345 entries2 += be16_to_cpu(lf->lf_entries); 1346 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 1347 gfs2_dirent_gather, NULL, &g); 1348 error = PTR_ERR(dent); 1349 if (IS_ERR(dent)) 1350 goto out_free; 1351 if (entries2 != g.offset) { 1352 fs_warn(sdp, "Number of entries corrupt in dir " 1353 "leaf %llu, entries2 (%u) != " 1354 "g.offset (%u)\n", 1355 (unsigned long long)bh->b_blocknr, 1356 entries2, g.offset); 1357 1358 error = -EIO; 1359 goto out_free; 1360 } 1361 error = 0; 1362 larr[leaf++] = bh; 1363 } else { 1364 brelse(bh); 1365 } 1366 } while(lfn); 1367 1368 BUG_ON(entries2 != entries); 1369 error = do_filldir_main(ip, offset, opaque, filldir, darr, 1370 entries, copied); 1371 out_free: 1372 for(i = 0; i < leaf; i++) 1373 brelse(larr[i]); 1374 gfs2_free_sort_buffer(larr); 1375 out: 1376 return error; 1377 } 1378 1379 1380 /** 1381 * dir_e_read - Reads the entries from a directory into a filldir buffer 1382 * @dip: dinode pointer 1383 * @offset: the hash of the last entry read shifted to the right once 1384 * @opaque: buffer for the filldir function to fill 1385 * @filldir: points to the filldir function to use 1386 * 1387 * Returns: errno 1388 */ 1389 1390 static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, 1391 filldir_t filldir) 1392 { 1393 struct gfs2_inode *dip = GFS2_I(inode); 1394 u32 hsize, len = 0; 1395 u32 hash, index; 1396 __be64 *lp; 1397 int copied = 0; 1398 int error = 0; 1399 unsigned depth = 0; 1400 1401 hsize = 1 << dip->i_depth; 1402 hash = gfs2_dir_offset2hash(*offset); 1403 index = hash >> (32 - dip->i_depth); 1404 1405 lp = gfs2_dir_get_hash_table(dip); 1406 if (IS_ERR(lp)) 1407 return PTR_ERR(lp); 1408 1409 while (index < hsize) { 1410 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, 1411 &copied, &depth, 1412 be64_to_cpu(lp[index])); 1413 if (error) 1414 break; 1415 1416 len = 1 << (dip->i_depth - depth); 1417 index = (index & ~(len - 1)) + len; 1418 } 1419 1420 if (error > 0) 1421 error = 0; 1422 return error; 1423 } 1424 1425 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 1426 filldir_t filldir) 1427 { 1428 struct gfs2_inode *dip = GFS2_I(inode); 1429 struct gfs2_sbd *sdp = GFS2_SB(inode); 1430 struct dirent_gather g; 1431 const struct gfs2_dirent **darr, *dent; 1432 struct buffer_head *dibh; 1433 int copied = 0; 1434 int error; 1435 1436 if (!dip->i_entries) 1437 return 0; 1438 1439 if (dip->i_diskflags & GFS2_DIF_EXHASH) 1440 return dir_e_read(inode, offset, opaque, filldir); 1441 1442 if (!gfs2_is_stuffed(dip)) { 1443 gfs2_consist_inode(dip); 1444 return -EIO; 1445 } 1446 1447 error = gfs2_meta_inode_buffer(dip, &dibh); 1448 if (error) 1449 return error; 1450 1451 error = -ENOMEM; 1452 /* 96 is max number of dirents which can be stuffed into an inode */ 1453 darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS); 1454 if (darr) { 1455 g.pdent = darr; 1456 g.offset = 0; 1457 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, 1458 gfs2_dirent_gather, NULL, &g); 1459 if (IS_ERR(dent)) { 1460 error = PTR_ERR(dent); 1461 goto out; 1462 } 1463 if (dip->i_entries != g.offset) { 1464 fs_warn(sdp, "Number of entries corrupt in dir %llu, " 1465 "ip->i_entries (%u) != g.offset (%u)\n", 1466 (unsigned long long)dip->i_no_addr, 1467 dip->i_entries, 1468 g.offset); 1469 error = -EIO; 1470 goto out; 1471 } 1472 error = do_filldir_main(dip, offset, opaque, filldir, darr, 1473 dip->i_entries, &copied); 1474 out: 1475 kfree(darr); 1476 } 1477 1478 if (error > 0) 1479 error = 0; 1480 1481 brelse(dibh); 1482 1483 return error; 1484 } 1485 1486 /** 1487 * gfs2_dir_search - Search a directory 1488 * @dip: The GFS2 inode 1489 * @filename: 1490 * @inode: 1491 * 1492 * This routine searches a directory for a file or another directory. 1493 * Assumes a glock is held on dip. 1494 * 1495 * Returns: errno 1496 */ 1497 1498 struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) 1499 { 1500 struct buffer_head *bh; 1501 struct gfs2_dirent *dent; 1502 struct inode *inode; 1503 1504 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1505 if (dent) { 1506 if (IS_ERR(dent)) 1507 return ERR_CAST(dent); 1508 inode = gfs2_inode_lookup(dir->i_sb, 1509 be16_to_cpu(dent->de_type), 1510 be64_to_cpu(dent->de_inum.no_addr), 1511 be64_to_cpu(dent->de_inum.no_formal_ino), 0); 1512 brelse(bh); 1513 return inode; 1514 } 1515 return ERR_PTR(-ENOENT); 1516 } 1517 1518 int gfs2_dir_check(struct inode *dir, const struct qstr *name, 1519 const struct gfs2_inode *ip) 1520 { 1521 struct buffer_head *bh; 1522 struct gfs2_dirent *dent; 1523 int ret = -ENOENT; 1524 1525 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1526 if (dent) { 1527 if (IS_ERR(dent)) 1528 return PTR_ERR(dent); 1529 if (ip) { 1530 if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr) 1531 goto out; 1532 if (be64_to_cpu(dent->de_inum.no_formal_ino) != 1533 ip->i_no_formal_ino) 1534 goto out; 1535 if (unlikely(IF2DT(ip->i_inode.i_mode) != 1536 be16_to_cpu(dent->de_type))) { 1537 gfs2_consist_inode(GFS2_I(dir)); 1538 ret = -EIO; 1539 goto out; 1540 } 1541 } 1542 ret = 0; 1543 out: 1544 brelse(bh); 1545 } 1546 return ret; 1547 } 1548 1549 static int dir_new_leaf(struct inode *inode, const struct qstr *name) 1550 { 1551 struct buffer_head *bh, *obh; 1552 struct gfs2_inode *ip = GFS2_I(inode); 1553 struct gfs2_leaf *leaf, *oleaf; 1554 int error; 1555 u32 index; 1556 u64 bn; 1557 1558 index = name->hash >> (32 - ip->i_depth); 1559 error = get_first_leaf(ip, index, &obh); 1560 if (error) 1561 return error; 1562 do { 1563 oleaf = (struct gfs2_leaf *)obh->b_data; 1564 bn = be64_to_cpu(oleaf->lf_next); 1565 if (!bn) 1566 break; 1567 brelse(obh); 1568 error = get_leaf(ip, bn, &obh); 1569 if (error) 1570 return error; 1571 } while(1); 1572 1573 gfs2_trans_add_bh(ip->i_gl, obh, 1); 1574 1575 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1576 if (!leaf) { 1577 brelse(obh); 1578 return -ENOSPC; 1579 } 1580 oleaf->lf_next = cpu_to_be64(bh->b_blocknr); 1581 brelse(bh); 1582 brelse(obh); 1583 1584 error = gfs2_meta_inode_buffer(ip, &bh); 1585 if (error) 1586 return error; 1587 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1588 gfs2_add_inode_blocks(&ip->i_inode, 1); 1589 gfs2_dinode_out(ip, bh->b_data); 1590 brelse(bh); 1591 return 0; 1592 } 1593 1594 /** 1595 * gfs2_dir_add - Add new filename into directory 1596 * @dip: The GFS2 inode 1597 * @filename: The new name 1598 * @inode: The inode number of the entry 1599 * @type: The type of the entry 1600 * 1601 * Returns: 0 on success, error code on failure 1602 */ 1603 1604 int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1605 const struct gfs2_inode *nip) 1606 { 1607 struct gfs2_inode *ip = GFS2_I(inode); 1608 struct buffer_head *bh; 1609 struct gfs2_dirent *dent; 1610 struct gfs2_leaf *leaf; 1611 int error; 1612 1613 while(1) { 1614 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, 1615 &bh); 1616 if (dent) { 1617 if (IS_ERR(dent)) 1618 return PTR_ERR(dent); 1619 dent = gfs2_init_dirent(inode, dent, name, bh); 1620 gfs2_inum_out(nip, dent); 1621 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); 1622 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1623 leaf = (struct gfs2_leaf *)bh->b_data; 1624 be16_add_cpu(&leaf->lf_entries, 1); 1625 } 1626 brelse(bh); 1627 error = gfs2_meta_inode_buffer(ip, &bh); 1628 if (error) 1629 break; 1630 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1631 ip->i_entries++; 1632 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1633 if (S_ISDIR(nip->i_inode.i_mode)) 1634 inc_nlink(&ip->i_inode); 1635 gfs2_dinode_out(ip, bh->b_data); 1636 brelse(bh); 1637 error = 0; 1638 break; 1639 } 1640 if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) { 1641 error = dir_make_exhash(inode); 1642 if (error) 1643 break; 1644 continue; 1645 } 1646 error = dir_split_leaf(inode, name); 1647 if (error == 0) 1648 continue; 1649 if (error < 0) 1650 break; 1651 if (ip->i_depth < GFS2_DIR_MAX_DEPTH) { 1652 error = dir_double_exhash(ip); 1653 if (error) 1654 break; 1655 error = dir_split_leaf(inode, name); 1656 if (error < 0) 1657 break; 1658 if (error == 0) 1659 continue; 1660 } 1661 error = dir_new_leaf(inode, name); 1662 if (!error) 1663 continue; 1664 error = -ENOSPC; 1665 break; 1666 } 1667 return error; 1668 } 1669 1670 1671 /** 1672 * gfs2_dir_del - Delete a directory entry 1673 * @dip: The GFS2 inode 1674 * @filename: The filename 1675 * 1676 * Returns: 0 on success, error code on failure 1677 */ 1678 1679 int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) 1680 { 1681 const struct qstr *name = &dentry->d_name; 1682 struct gfs2_dirent *dent, *prev = NULL; 1683 struct buffer_head *bh; 1684 1685 /* Returns _either_ the entry (if its first in block) or the 1686 previous entry otherwise */ 1687 dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); 1688 if (!dent) { 1689 gfs2_consist_inode(dip); 1690 return -EIO; 1691 } 1692 if (IS_ERR(dent)) { 1693 gfs2_consist_inode(dip); 1694 return PTR_ERR(dent); 1695 } 1696 /* If not first in block, adjust pointers accordingly */ 1697 if (gfs2_dirent_find(dent, name, NULL) == 0) { 1698 prev = dent; 1699 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len)); 1700 } 1701 1702 dirent_del(dip, bh, prev, dent); 1703 if (dip->i_diskflags & GFS2_DIF_EXHASH) { 1704 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; 1705 u16 entries = be16_to_cpu(leaf->lf_entries); 1706 if (!entries) 1707 gfs2_consist_inode(dip); 1708 leaf->lf_entries = cpu_to_be16(--entries); 1709 } 1710 brelse(bh); 1711 1712 if (!dip->i_entries) 1713 gfs2_consist_inode(dip); 1714 dip->i_entries--; 1715 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1716 if (S_ISDIR(dentry->d_inode->i_mode)) 1717 drop_nlink(&dip->i_inode); 1718 mark_inode_dirty(&dip->i_inode); 1719 1720 return 0; 1721 } 1722 1723 /** 1724 * gfs2_dir_mvino - Change inode number of directory entry 1725 * @dip: The GFS2 inode 1726 * @filename: 1727 * @new_inode: 1728 * 1729 * This routine changes the inode number of a directory entry. It's used 1730 * by rename to change ".." when a directory is moved. 1731 * Assumes a glock is held on dvp. 1732 * 1733 * Returns: errno 1734 */ 1735 1736 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 1737 const struct gfs2_inode *nip, unsigned int new_type) 1738 { 1739 struct buffer_head *bh; 1740 struct gfs2_dirent *dent; 1741 int error; 1742 1743 dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh); 1744 if (!dent) { 1745 gfs2_consist_inode(dip); 1746 return -EIO; 1747 } 1748 if (IS_ERR(dent)) 1749 return PTR_ERR(dent); 1750 1751 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1752 gfs2_inum_out(nip, dent); 1753 dent->de_type = cpu_to_be16(new_type); 1754 1755 if (dip->i_diskflags & GFS2_DIF_EXHASH) { 1756 brelse(bh); 1757 error = gfs2_meta_inode_buffer(dip, &bh); 1758 if (error) 1759 return error; 1760 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1761 } 1762 1763 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1764 gfs2_dinode_out(dip, bh->b_data); 1765 brelse(bh); 1766 return 0; 1767 } 1768 1769 /** 1770 * leaf_dealloc - Deallocate a directory leaf 1771 * @dip: the directory 1772 * @index: the hash table offset in the directory 1773 * @len: the number of pointers to this leaf 1774 * @leaf_no: the leaf number 1775 * @leaf_bh: buffer_head for the starting leaf 1776 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0 1777 * 1778 * Returns: errno 1779 */ 1780 1781 static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1782 u64 leaf_no, struct buffer_head *leaf_bh, 1783 int last_dealloc) 1784 { 1785 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1786 struct gfs2_leaf *tmp_leaf; 1787 struct gfs2_rgrp_list rlist; 1788 struct buffer_head *bh, *dibh; 1789 u64 blk, nblk; 1790 unsigned int rg_blocks = 0, l_blocks = 0; 1791 char *ht; 1792 unsigned int x, size = len * sizeof(u64); 1793 int error; 1794 1795 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1796 1797 ht = kzalloc(size, GFP_NOFS); 1798 if (!ht) 1799 return -ENOMEM; 1800 1801 if (!gfs2_alloc_get(dip)) { 1802 error = -ENOMEM; 1803 goto out; 1804 } 1805 1806 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1807 if (error) 1808 goto out_put; 1809 1810 /* Count the number of leaves */ 1811 bh = leaf_bh; 1812 1813 for (blk = leaf_no; blk; blk = nblk) { 1814 if (blk != leaf_no) { 1815 error = get_leaf(dip, blk, &bh); 1816 if (error) 1817 goto out_rlist; 1818 } 1819 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1820 nblk = be64_to_cpu(tmp_leaf->lf_next); 1821 if (blk != leaf_no) 1822 brelse(bh); 1823 1824 gfs2_rlist_add(dip, &rlist, blk); 1825 l_blocks++; 1826 } 1827 1828 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); 1829 1830 for (x = 0; x < rlist.rl_rgrps; x++) { 1831 struct gfs2_rgrpd *rgd; 1832 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1833 rg_blocks += rgd->rd_length; 1834 } 1835 1836 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1837 if (error) 1838 goto out_rlist; 1839 1840 error = gfs2_trans_begin(sdp, 1841 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + 1842 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); 1843 if (error) 1844 goto out_rg_gunlock; 1845 1846 bh = leaf_bh; 1847 1848 for (blk = leaf_no; blk; blk = nblk) { 1849 if (blk != leaf_no) { 1850 error = get_leaf(dip, blk, &bh); 1851 if (error) 1852 goto out_end_trans; 1853 } 1854 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1855 nblk = be64_to_cpu(tmp_leaf->lf_next); 1856 if (blk != leaf_no) 1857 brelse(bh); 1858 1859 gfs2_free_meta(dip, blk, 1); 1860 gfs2_add_inode_blocks(&dip->i_inode, -1); 1861 } 1862 1863 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); 1864 if (error != size) { 1865 if (error >= 0) 1866 error = -EIO; 1867 goto out_end_trans; 1868 } 1869 1870 error = gfs2_meta_inode_buffer(dip, &dibh); 1871 if (error) 1872 goto out_end_trans; 1873 1874 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1875 /* On the last dealloc, make this a regular file in case we crash. 1876 (We don't want to free these blocks a second time.) */ 1877 if (last_dealloc) 1878 dip->i_inode.i_mode = S_IFREG; 1879 gfs2_dinode_out(dip, dibh->b_data); 1880 brelse(dibh); 1881 1882 out_end_trans: 1883 gfs2_trans_end(sdp); 1884 out_rg_gunlock: 1885 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 1886 out_rlist: 1887 gfs2_rlist_free(&rlist); 1888 gfs2_quota_unhold(dip); 1889 out_put: 1890 gfs2_alloc_put(dip); 1891 out: 1892 kfree(ht); 1893 return error; 1894 } 1895 1896 /** 1897 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory 1898 * @dip: the directory 1899 * 1900 * Dealloc all on-disk directory leaves to FREEMETA state 1901 * Change on-disk inode type to "regular file" 1902 * 1903 * Returns: errno 1904 */ 1905 1906 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) 1907 { 1908 struct buffer_head *bh; 1909 struct gfs2_leaf *leaf; 1910 u32 hsize, len; 1911 u32 index = 0, next_index; 1912 __be64 *lp; 1913 u64 leaf_no; 1914 int error = 0, last; 1915 1916 hsize = 1 << dip->i_depth; 1917 1918 lp = gfs2_dir_get_hash_table(dip); 1919 if (IS_ERR(lp)) 1920 return PTR_ERR(lp); 1921 1922 while (index < hsize) { 1923 leaf_no = be64_to_cpu(lp[index]); 1924 if (leaf_no) { 1925 error = get_leaf(dip, leaf_no, &bh); 1926 if (error) 1927 goto out; 1928 leaf = (struct gfs2_leaf *)bh->b_data; 1929 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); 1930 1931 next_index = (index & ~(len - 1)) + len; 1932 last = ((next_index >= hsize) ? 1 : 0); 1933 error = leaf_dealloc(dip, index, len, leaf_no, bh, 1934 last); 1935 brelse(bh); 1936 if (error) 1937 goto out; 1938 index = next_index; 1939 } else 1940 index++; 1941 } 1942 1943 if (index != hsize) { 1944 gfs2_consist_inode(dip); 1945 error = -EIO; 1946 } 1947 1948 out: 1949 1950 return error; 1951 } 1952 1953 /** 1954 * gfs2_diradd_alloc_required - find if adding entry will require an allocation 1955 * @ip: the file being written to 1956 * @filname: the filename that's going to be added 1957 * 1958 * Returns: 1 if alloc required, 0 if not, -ve on error 1959 */ 1960 1961 int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) 1962 { 1963 struct gfs2_dirent *dent; 1964 struct buffer_head *bh; 1965 1966 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); 1967 if (!dent) { 1968 return 1; 1969 } 1970 if (IS_ERR(dent)) 1971 return PTR_ERR(dent); 1972 brelse(bh); 1973 return 0; 1974 } 1975 1976