1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 /* 11 * Implements Extendible Hashing as described in: 12 * "Extendible Hashing" by Fagin, et al in 13 * __ACM Trans. on Database Systems__, Sept 1979. 14 * 15 * 16 * Here's the layout of dirents which is essentially the same as that of ext2 17 * within a single block. The field de_name_len is the number of bytes 18 * actually required for the name (no null terminator). The field de_rec_len 19 * is the number of bytes allocated to the dirent. The offset of the next 20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is 21 * deleted, the preceding dirent inherits its allocated space, ie 22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained 23 * by adding de_rec_len to the current dirent, this essentially causes the 24 * deleted dirent to get jumped over when iterating through all the dirents. 25 * 26 * When deleting the first dirent in a block, there is no previous dirent so 27 * the field de_ino is set to zero to designate it as deleted. When allocating 28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the 29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first 30 * dirent is allocated. Otherwise it must go through all the 'used' dirents 31 * searching for one in which the amount of total space minus the amount of 32 * used space will provide enough space for the new dirent. 33 * 34 * There are two types of blocks in which dirents reside. In a stuffed dinode, 35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of 36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the 37 * beginning of the leaf block. The dirents reside in leaves when 38 * 39 * dip->i_di.di_flags & GFS2_DIF_EXHASH is true 40 * 41 * Otherwise, the dirents are "linear", within a single stuffed dinode block. 42 * 43 * When the dirents are in leaves, the actual contents of the directory file are 44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The 45 * dirents are NOT in the directory file itself. There can be more than one 46 * block pointer in the array that points to the same leaf. In fact, when a 47 * directory is first converted from linear to exhash, all of the pointers 48 * point to the same leaf. 49 * 50 * When a leaf is completely full, the size of the hash table can be 51 * doubled unless it is already at the maximum size which is hard coded into 52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, 53 * but never before the maximum hash table size has been reached. 54 */ 55 56 #include <linux/sched.h> 57 #include <linux/slab.h> 58 #include <linux/spinlock.h> 59 #include <linux/buffer_head.h> 60 #include <linux/sort.h> 61 #include <linux/gfs2_ondisk.h> 62 #include <linux/crc32.h> 63 #include <linux/vmalloc.h> 64 #include <linux/lm_interface.h> 65 66 #include "gfs2.h" 67 #include "incore.h" 68 #include "dir.h" 69 #include "glock.h" 70 #include "inode.h" 71 #include "meta_io.h" 72 #include "quota.h" 73 #include "rgrp.h" 74 #include "trans.h" 75 #include "bmap.h" 76 #include "util.h" 77 78 #define IS_LEAF 1 /* Hashed (leaf) directory */ 79 #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ 80 81 #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) 82 #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) 83 84 typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, 85 u64 leaf_no, void *data); 86 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 87 const struct qstr *name, void *opaque); 88 89 90 int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 91 struct buffer_head **bhp) 92 { 93 struct buffer_head *bh; 94 95 bh = gfs2_meta_new(ip->i_gl, block); 96 gfs2_trans_add_bh(ip->i_gl, bh, 1); 97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 99 *bhp = bh; 100 return 0; 101 } 102 103 static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block, 104 struct buffer_head **bhp) 105 { 106 struct buffer_head *bh; 107 int error; 108 109 error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh); 110 if (error) 111 return error; 112 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) { 113 brelse(bh); 114 return -EIO; 115 } 116 *bhp = bh; 117 return 0; 118 } 119 120 static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, 121 unsigned int offset, unsigned int size) 122 { 123 struct buffer_head *dibh; 124 int error; 125 126 error = gfs2_meta_inode_buffer(ip, &dibh); 127 if (error) 128 return error; 129 130 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 132 if (ip->i_di.di_size < offset + size) 133 ip->i_di.di_size = offset + size; 134 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 135 gfs2_dinode_out(ip, dibh->b_data); 136 137 brelse(dibh); 138 139 return size; 140 } 141 142 143 144 /** 145 * gfs2_dir_write_data - Write directory information to the inode 146 * @ip: The GFS2 inode 147 * @buf: The buffer containing information to be written 148 * @offset: The file offset to start writing at 149 * @size: The amount of data to write 150 * 151 * Returns: The number of bytes correctly written or error code 152 */ 153 static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, 154 u64 offset, unsigned int size) 155 { 156 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 157 struct buffer_head *dibh; 158 u64 lblock, dblock; 159 u32 extlen = 0; 160 unsigned int o; 161 int copied = 0; 162 int error = 0; 163 164 if (!size) 165 return 0; 166 167 if (gfs2_is_stuffed(ip) && 168 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) 169 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset, 170 size); 171 172 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 173 return -EINVAL; 174 175 if (gfs2_is_stuffed(ip)) { 176 error = gfs2_unstuff_dinode(ip, NULL); 177 if (error) 178 return error; 179 } 180 181 lblock = offset; 182 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 183 184 while (copied < size) { 185 unsigned int amount; 186 struct buffer_head *bh; 187 int new = 0; 188 189 amount = size - copied; 190 if (amount > sdp->sd_sb.sb_bsize - o) 191 amount = sdp->sd_sb.sb_bsize - o; 192 193 if (!extlen) { 194 new = 1; 195 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 196 &dblock, &extlen); 197 if (error) 198 goto fail; 199 error = -EIO; 200 if (gfs2_assert_withdraw(sdp, dblock)) 201 goto fail; 202 } 203 204 if (amount == sdp->sd_jbsize || new) 205 error = gfs2_dir_get_new_buffer(ip, dblock, &bh); 206 else 207 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh); 208 209 if (error) 210 goto fail; 211 212 gfs2_trans_add_bh(ip->i_gl, bh, 1); 213 memcpy(bh->b_data + o, buf, amount); 214 brelse(bh); 215 216 buf += amount; 217 copied += amount; 218 lblock++; 219 dblock++; 220 extlen--; 221 222 o = sizeof(struct gfs2_meta_header); 223 } 224 225 out: 226 error = gfs2_meta_inode_buffer(ip, &dibh); 227 if (error) 228 return error; 229 230 if (ip->i_di.di_size < offset + copied) 231 ip->i_di.di_size = offset + copied; 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 233 234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 235 gfs2_dinode_out(ip, dibh->b_data); 236 brelse(dibh); 237 238 return copied; 239 fail: 240 if (copied) 241 goto out; 242 return error; 243 } 244 245 static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf, 246 u64 offset, unsigned int size) 247 { 248 struct buffer_head *dibh; 249 int error; 250 251 error = gfs2_meta_inode_buffer(ip, &dibh); 252 if (!error) { 253 offset += sizeof(struct gfs2_dinode); 254 memcpy(buf, dibh->b_data + offset, size); 255 brelse(dibh); 256 } 257 258 return (error) ? error : size; 259 } 260 261 262 /** 263 * gfs2_dir_read_data - Read a data from a directory inode 264 * @ip: The GFS2 Inode 265 * @buf: The buffer to place result into 266 * @offset: File offset to begin jdata_readng from 267 * @size: Amount of data to transfer 268 * 269 * Returns: The amount of data actually copied or the error 270 */ 271 static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, 272 unsigned int size, unsigned ra) 273 { 274 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 275 u64 lblock, dblock; 276 u32 extlen = 0; 277 unsigned int o; 278 int copied = 0; 279 int error = 0; 280 281 if (offset >= ip->i_di.di_size) 282 return 0; 283 284 if (offset + size > ip->i_di.di_size) 285 size = ip->i_di.di_size - offset; 286 287 if (!size) 288 return 0; 289 290 if (gfs2_is_stuffed(ip)) 291 return gfs2_dir_read_stuffed(ip, buf, offset, size); 292 293 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) 294 return -EINVAL; 295 296 lblock = offset; 297 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); 298 299 while (copied < size) { 300 unsigned int amount; 301 struct buffer_head *bh; 302 int new; 303 304 amount = size - copied; 305 if (amount > sdp->sd_sb.sb_bsize - o) 306 amount = sdp->sd_sb.sb_bsize - o; 307 308 if (!extlen) { 309 new = 0; 310 error = gfs2_extent_map(&ip->i_inode, lblock, &new, 311 &dblock, &extlen); 312 if (error || !dblock) 313 goto fail; 314 BUG_ON(extlen < 1); 315 if (!ra) 316 extlen = 1; 317 bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); 318 } else { 319 error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh); 320 if (error) 321 goto fail; 322 } 323 error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD); 324 if (error) { 325 brelse(bh); 326 goto fail; 327 } 328 dblock++; 329 extlen--; 330 memcpy(buf, bh->b_data + o, amount); 331 brelse(bh); 332 buf += amount; 333 copied += amount; 334 lblock++; 335 o = sizeof(struct gfs2_meta_header); 336 } 337 338 return copied; 339 fail: 340 return (copied) ? copied : error; 341 } 342 343 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) 344 { 345 return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; 346 } 347 348 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent, 349 const struct qstr *name, int ret) 350 { 351 if (!gfs2_dirent_sentinel(dent) && 352 be32_to_cpu(dent->de_hash) == name->hash && 353 be16_to_cpu(dent->de_name_len) == name->len && 354 memcmp(dent+1, name->name, name->len) == 0) 355 return ret; 356 return 0; 357 } 358 359 static int gfs2_dirent_find(const struct gfs2_dirent *dent, 360 const struct qstr *name, 361 void *opaque) 362 { 363 return __gfs2_dirent_find(dent, name, 1); 364 } 365 366 static int gfs2_dirent_prev(const struct gfs2_dirent *dent, 367 const struct qstr *name, 368 void *opaque) 369 { 370 return __gfs2_dirent_find(dent, name, 2); 371 } 372 373 /* 374 * name->name holds ptr to start of block. 375 * name->len holds size of block. 376 */ 377 static int gfs2_dirent_last(const struct gfs2_dirent *dent, 378 const struct qstr *name, 379 void *opaque) 380 { 381 const char *start = name->name; 382 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len); 383 if (name->len == (end - start)) 384 return 1; 385 return 0; 386 } 387 388 static int gfs2_dirent_find_space(const struct gfs2_dirent *dent, 389 const struct qstr *name, 390 void *opaque) 391 { 392 unsigned required = GFS2_DIRENT_SIZE(name->len); 393 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 394 unsigned totlen = be16_to_cpu(dent->de_rec_len); 395 396 if (gfs2_dirent_sentinel(dent)) 397 actual = GFS2_DIRENT_SIZE(0); 398 if (totlen - actual >= required) 399 return 1; 400 return 0; 401 } 402 403 struct dirent_gather { 404 const struct gfs2_dirent **pdent; 405 unsigned offset; 406 }; 407 408 static int gfs2_dirent_gather(const struct gfs2_dirent *dent, 409 const struct qstr *name, 410 void *opaque) 411 { 412 struct dirent_gather *g = opaque; 413 if (!gfs2_dirent_sentinel(dent)) { 414 g->pdent[g->offset++] = dent; 415 } 416 return 0; 417 } 418 419 /* 420 * Other possible things to check: 421 * - Inode located within filesystem size (and on valid block) 422 * - Valid directory entry type 423 * Not sure how heavy-weight we want to make this... could also check 424 * hash is correct for example, but that would take a lot of extra time. 425 * For now the most important thing is to check that the various sizes 426 * are correct. 427 */ 428 static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, 429 unsigned int size, unsigned int len, int first) 430 { 431 const char *msg = "gfs2_dirent too small"; 432 if (unlikely(size < sizeof(struct gfs2_dirent))) 433 goto error; 434 msg = "gfs2_dirent misaligned"; 435 if (unlikely(offset & 0x7)) 436 goto error; 437 msg = "gfs2_dirent points beyond end of block"; 438 if (unlikely(offset + size > len)) 439 goto error; 440 msg = "zero inode number"; 441 if (unlikely(!first && gfs2_dirent_sentinel(dent))) 442 goto error; 443 msg = "name length is greater than space in dirent"; 444 if (!gfs2_dirent_sentinel(dent) && 445 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) > 446 size)) 447 goto error; 448 return 0; 449 error: 450 printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, 451 first ? "first in block" : "not first in block"); 452 return -EIO; 453 } 454 455 static int gfs2_dirent_offset(const void *buf) 456 { 457 const struct gfs2_meta_header *h = buf; 458 int offset; 459 460 BUG_ON(buf == NULL); 461 462 switch(be32_to_cpu(h->mh_type)) { 463 case GFS2_METATYPE_LF: 464 offset = sizeof(struct gfs2_leaf); 465 break; 466 case GFS2_METATYPE_DI: 467 offset = sizeof(struct gfs2_dinode); 468 break; 469 default: 470 goto wrong_type; 471 } 472 return offset; 473 wrong_type: 474 printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", 475 be32_to_cpu(h->mh_type)); 476 return -1; 477 } 478 479 static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf, 480 unsigned int len, gfs2_dscan_t scan, 481 const struct qstr *name, 482 void *opaque) 483 { 484 struct gfs2_dirent *dent, *prev; 485 unsigned offset; 486 unsigned size; 487 int ret = 0; 488 489 ret = gfs2_dirent_offset(buf); 490 if (ret < 0) 491 goto consist_inode; 492 493 offset = ret; 494 prev = NULL; 495 dent = buf + offset; 496 size = be16_to_cpu(dent->de_rec_len); 497 if (gfs2_check_dirent(dent, offset, size, len, 1)) 498 goto consist_inode; 499 do { 500 ret = scan(dent, name, opaque); 501 if (ret) 502 break; 503 offset += size; 504 if (offset == len) 505 break; 506 prev = dent; 507 dent = buf + offset; 508 size = be16_to_cpu(dent->de_rec_len); 509 if (gfs2_check_dirent(dent, offset, size, len, 0)) 510 goto consist_inode; 511 } while(1); 512 513 switch(ret) { 514 case 0: 515 return NULL; 516 case 1: 517 return dent; 518 case 2: 519 return prev ? prev : dent; 520 default: 521 BUG_ON(ret > 0); 522 return ERR_PTR(ret); 523 } 524 525 consist_inode: 526 gfs2_consist_inode(GFS2_I(inode)); 527 return ERR_PTR(-EIO); 528 } 529 530 531 /** 532 * dirent_first - Return the first dirent 533 * @dip: the directory 534 * @bh: The buffer 535 * @dent: Pointer to list of dirents 536 * 537 * return first dirent whether bh points to leaf or stuffed dinode 538 * 539 * Returns: IS_LEAF, IS_DINODE, or -errno 540 */ 541 542 static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, 543 struct gfs2_dirent **dent) 544 { 545 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; 546 547 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { 548 if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh)) 549 return -EIO; 550 *dent = (struct gfs2_dirent *)(bh->b_data + 551 sizeof(struct gfs2_leaf)); 552 return IS_LEAF; 553 } else { 554 if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI)) 555 return -EIO; 556 *dent = (struct gfs2_dirent *)(bh->b_data + 557 sizeof(struct gfs2_dinode)); 558 return IS_DINODE; 559 } 560 } 561 562 static int dirent_check_reclen(struct gfs2_inode *dip, 563 const struct gfs2_dirent *d, const void *end_p) 564 { 565 const void *ptr = d; 566 u16 rec_len = be16_to_cpu(d->de_rec_len); 567 568 if (unlikely(rec_len < sizeof(struct gfs2_dirent))) 569 goto broken; 570 ptr += rec_len; 571 if (ptr < end_p) 572 return rec_len; 573 if (ptr == end_p) 574 return -ENOENT; 575 broken: 576 gfs2_consist_inode(dip); 577 return -EIO; 578 } 579 580 /** 581 * dirent_next - Next dirent 582 * @dip: the directory 583 * @bh: The buffer 584 * @dent: Pointer to list of dirents 585 * 586 * Returns: 0 on success, error code otherwise 587 */ 588 589 static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, 590 struct gfs2_dirent **dent) 591 { 592 struct gfs2_dirent *cur = *dent, *tmp; 593 char *bh_end = bh->b_data + bh->b_size; 594 int ret; 595 596 ret = dirent_check_reclen(dip, cur, bh_end); 597 if (ret < 0) 598 return ret; 599 600 tmp = (void *)cur + ret; 601 ret = dirent_check_reclen(dip, tmp, bh_end); 602 if (ret == -EIO) 603 return ret; 604 605 /* Only the first dent could ever have de_inum.no_addr == 0 */ 606 if (gfs2_dirent_sentinel(tmp)) { 607 gfs2_consist_inode(dip); 608 return -EIO; 609 } 610 611 *dent = tmp; 612 return 0; 613 } 614 615 /** 616 * dirent_del - Delete a dirent 617 * @dip: The GFS2 inode 618 * @bh: The buffer 619 * @prev: The previous dirent 620 * @cur: The current dirent 621 * 622 */ 623 624 static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, 625 struct gfs2_dirent *prev, struct gfs2_dirent *cur) 626 { 627 u16 cur_rec_len, prev_rec_len; 628 629 if (gfs2_dirent_sentinel(cur)) { 630 gfs2_consist_inode(dip); 631 return; 632 } 633 634 gfs2_trans_add_bh(dip->i_gl, bh, 1); 635 636 /* If there is no prev entry, this is the first entry in the block. 637 The de_rec_len is already as big as it needs to be. Just zero 638 out the inode number and return. */ 639 640 if (!prev) { 641 cur->de_inum.no_addr = 0; 642 cur->de_inum.no_formal_ino = 0; 643 return; 644 } 645 646 /* Combine this dentry with the previous one. */ 647 648 prev_rec_len = be16_to_cpu(prev->de_rec_len); 649 cur_rec_len = be16_to_cpu(cur->de_rec_len); 650 651 if ((char *)prev + prev_rec_len != (char *)cur) 652 gfs2_consist_inode(dip); 653 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) 654 gfs2_consist_inode(dip); 655 656 prev_rec_len += cur_rec_len; 657 prev->de_rec_len = cpu_to_be16(prev_rec_len); 658 } 659 660 /* 661 * Takes a dent from which to grab space as an argument. Returns the 662 * newly created dent. 663 */ 664 static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, 665 struct gfs2_dirent *dent, 666 const struct qstr *name, 667 struct buffer_head *bh) 668 { 669 struct gfs2_inode *ip = GFS2_I(inode); 670 struct gfs2_dirent *ndent; 671 unsigned offset = 0, totlen; 672 673 if (!gfs2_dirent_sentinel(dent)) 674 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 675 totlen = be16_to_cpu(dent->de_rec_len); 676 BUG_ON(offset + name->len > totlen); 677 gfs2_trans_add_bh(ip->i_gl, bh, 1); 678 ndent = (struct gfs2_dirent *)((char *)dent + offset); 679 dent->de_rec_len = cpu_to_be16(offset); 680 gfs2_qstr2dirent(name, totlen - offset, ndent); 681 return ndent; 682 } 683 684 static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode, 685 struct buffer_head *bh, 686 const struct qstr *name) 687 { 688 struct gfs2_dirent *dent; 689 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 690 gfs2_dirent_find_space, name, NULL); 691 if (!dent || IS_ERR(dent)) 692 return dent; 693 return gfs2_init_dirent(inode, dent, name, bh); 694 } 695 696 static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, 697 struct buffer_head **bhp) 698 { 699 int error; 700 701 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); 702 if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { 703 /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ 704 error = -EIO; 705 } 706 707 return error; 708 } 709 710 /** 711 * get_leaf_nr - Get a leaf number associated with the index 712 * @dip: The GFS2 inode 713 * @index: 714 * @leaf_out: 715 * 716 * Returns: 0 on success, error code otherwise 717 */ 718 719 static int get_leaf_nr(struct gfs2_inode *dip, u32 index, 720 u64 *leaf_out) 721 { 722 __be64 leaf_no; 723 int error; 724 725 error = gfs2_dir_read_data(dip, (char *)&leaf_no, 726 index * sizeof(__be64), 727 sizeof(__be64), 0); 728 if (error != sizeof(u64)) 729 return (error < 0) ? error : -EIO; 730 731 *leaf_out = be64_to_cpu(leaf_no); 732 733 return 0; 734 } 735 736 static int get_first_leaf(struct gfs2_inode *dip, u32 index, 737 struct buffer_head **bh_out) 738 { 739 u64 leaf_no; 740 int error; 741 742 error = get_leaf_nr(dip, index, &leaf_no); 743 if (!error) 744 error = get_leaf(dip, leaf_no, bh_out); 745 746 return error; 747 } 748 749 static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, 750 const struct qstr *name, 751 gfs2_dscan_t scan, 752 struct buffer_head **pbh) 753 { 754 struct buffer_head *bh; 755 struct gfs2_dirent *dent; 756 struct gfs2_inode *ip = GFS2_I(inode); 757 int error; 758 759 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { 760 struct gfs2_leaf *leaf; 761 unsigned hsize = 1 << ip->i_di.di_depth; 762 unsigned index; 763 u64 ln; 764 if (hsize * sizeof(u64) != ip->i_di.di_size) { 765 gfs2_consist_inode(ip); 766 return ERR_PTR(-EIO); 767 } 768 769 index = name->hash >> (32 - ip->i_di.di_depth); 770 error = get_first_leaf(ip, index, &bh); 771 if (error) 772 return ERR_PTR(error); 773 do { 774 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 775 scan, name, NULL); 776 if (dent) 777 goto got_dent; 778 leaf = (struct gfs2_leaf *)bh->b_data; 779 ln = be64_to_cpu(leaf->lf_next); 780 brelse(bh); 781 if (!ln) 782 break; 783 784 error = get_leaf(ip, ln, &bh); 785 } while(!error); 786 787 return error ? ERR_PTR(error) : NULL; 788 } 789 790 791 error = gfs2_meta_inode_buffer(ip, &bh); 792 if (error) 793 return ERR_PTR(error); 794 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL); 795 got_dent: 796 if (unlikely(dent == NULL || IS_ERR(dent))) { 797 brelse(bh); 798 bh = NULL; 799 } 800 *pbh = bh; 801 return dent; 802 } 803 804 static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) 805 { 806 struct gfs2_inode *ip = GFS2_I(inode); 807 u64 bn = gfs2_alloc_meta(ip); 808 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); 809 struct gfs2_leaf *leaf; 810 struct gfs2_dirent *dent; 811 struct qstr name = { .name = "", .len = 0, .hash = 0 }; 812 if (!bh) 813 return NULL; 814 815 gfs2_trans_add_bh(ip->i_gl, bh, 1); 816 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 817 leaf = (struct gfs2_leaf *)bh->b_data; 818 leaf->lf_depth = cpu_to_be16(depth); 819 leaf->lf_entries = 0; 820 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); 821 leaf->lf_next = 0; 822 memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); 823 dent = (struct gfs2_dirent *)(leaf+1); 824 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); 825 *pbh = bh; 826 return leaf; 827 } 828 829 /** 830 * dir_make_exhash - Convert a stuffed directory into an ExHash directory 831 * @dip: The GFS2 inode 832 * 833 * Returns: 0 on success, error code otherwise 834 */ 835 836 static int dir_make_exhash(struct inode *inode) 837 { 838 struct gfs2_inode *dip = GFS2_I(inode); 839 struct gfs2_sbd *sdp = GFS2_SB(inode); 840 struct gfs2_dirent *dent; 841 struct qstr args; 842 struct buffer_head *bh, *dibh; 843 struct gfs2_leaf *leaf; 844 int y; 845 u32 x; 846 __be64 *lp; 847 u64 bn; 848 int error; 849 850 error = gfs2_meta_inode_buffer(dip, &dibh); 851 if (error) 852 return error; 853 854 /* Turn over a new leaf */ 855 856 leaf = new_leaf(inode, &bh, 0); 857 if (!leaf) 858 return -ENOSPC; 859 bn = bh->b_blocknr; 860 861 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16)); 862 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries); 863 864 /* Copy dirents */ 865 866 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, 867 sizeof(struct gfs2_dinode)); 868 869 /* Find last entry */ 870 871 x = 0; 872 args.len = bh->b_size - sizeof(struct gfs2_dinode) + 873 sizeof(struct gfs2_leaf); 874 args.name = bh->b_data; 875 dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size, 876 gfs2_dirent_last, &args, NULL); 877 if (!dent) { 878 brelse(bh); 879 brelse(dibh); 880 return -EIO; 881 } 882 if (IS_ERR(dent)) { 883 brelse(bh); 884 brelse(dibh); 885 return PTR_ERR(dent); 886 } 887 888 /* Adjust the last dirent's record length 889 (Remember that dent still points to the last entry.) */ 890 891 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) + 892 sizeof(struct gfs2_dinode) - 893 sizeof(struct gfs2_leaf)); 894 895 brelse(bh); 896 897 /* We're done with the new leaf block, now setup the new 898 hash table. */ 899 900 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 901 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 902 903 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 904 905 for (x = sdp->sd_hash_ptrs; x--; lp++) 906 *lp = cpu_to_be64(bn); 907 908 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; 909 dip->i_di.di_blocks++; 910 gfs2_set_inode_blocks(&dip->i_inode); 911 dip->i_di.di_flags |= GFS2_DIF_EXHASH; 912 913 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; 914 dip->i_di.di_depth = y; 915 916 gfs2_dinode_out(dip, dibh->b_data); 917 918 brelse(dibh); 919 920 return 0; 921 } 922 923 /** 924 * dir_split_leaf - Split a leaf block into two 925 * @dip: The GFS2 inode 926 * @index: 927 * @leaf_no: 928 * 929 * Returns: 0 on success, error code on failure 930 */ 931 932 static int dir_split_leaf(struct inode *inode, const struct qstr *name) 933 { 934 struct gfs2_inode *dip = GFS2_I(inode); 935 struct buffer_head *nbh, *obh, *dibh; 936 struct gfs2_leaf *nleaf, *oleaf; 937 struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new; 938 u32 start, len, half_len, divider; 939 u64 bn, leaf_no; 940 __be64 *lp; 941 u32 index; 942 int x, moved = 0; 943 int error; 944 945 index = name->hash >> (32 - dip->i_di.di_depth); 946 error = get_leaf_nr(dip, index, &leaf_no); 947 if (error) 948 return error; 949 950 /* Get the old leaf block */ 951 error = get_leaf(dip, leaf_no, &obh); 952 if (error) 953 return error; 954 955 oleaf = (struct gfs2_leaf *)obh->b_data; 956 if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) { 957 brelse(obh); 958 return 1; /* can't split */ 959 } 960 961 gfs2_trans_add_bh(dip->i_gl, obh, 1); 962 963 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 964 if (!nleaf) { 965 brelse(obh); 966 return -ENOSPC; 967 } 968 bn = nbh->b_blocknr; 969 970 /* Compute the start and len of leaf pointers in the hash table. */ 971 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); 972 half_len = len >> 1; 973 if (!half_len) { 974 printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index); 975 gfs2_consist_inode(dip); 976 error = -EIO; 977 goto fail_brelse; 978 } 979 980 start = (index & ~(len - 1)); 981 982 /* Change the pointers. 983 Don't bother distinguishing stuffed from non-stuffed. 984 This code is complicated enough already. */ 985 lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL); 986 /* Change the pointers */ 987 for (x = 0; x < half_len; x++) 988 lp[x] = cpu_to_be64(bn); 989 990 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), 991 half_len * sizeof(u64)); 992 if (error != half_len * sizeof(u64)) { 993 if (error >= 0) 994 error = -EIO; 995 goto fail_lpfree; 996 } 997 998 kfree(lp); 999 1000 /* Compute the divider */ 1001 divider = (start + half_len) << (32 - dip->i_di.di_depth); 1002 1003 /* Copy the entries */ 1004 dirent_first(dip, obh, &dent); 1005 1006 do { 1007 next = dent; 1008 if (dirent_next(dip, obh, &next)) 1009 next = NULL; 1010 1011 if (!gfs2_dirent_sentinel(dent) && 1012 be32_to_cpu(dent->de_hash) < divider) { 1013 struct qstr str; 1014 str.name = (char*)(dent+1); 1015 str.len = be16_to_cpu(dent->de_name_len); 1016 str.hash = be32_to_cpu(dent->de_hash); 1017 new = gfs2_dirent_alloc(inode, nbh, &str); 1018 if (IS_ERR(new)) { 1019 error = PTR_ERR(new); 1020 break; 1021 } 1022 1023 new->de_inum = dent->de_inum; /* No endian worries */ 1024 new->de_type = dent->de_type; /* No endian worries */ 1025 nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1); 1026 1027 dirent_del(dip, obh, prev, dent); 1028 1029 if (!oleaf->lf_entries) 1030 gfs2_consist_inode(dip); 1031 oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1); 1032 1033 if (!prev) 1034 prev = dent; 1035 1036 moved = 1; 1037 } else { 1038 prev = dent; 1039 } 1040 dent = next; 1041 } while (dent); 1042 1043 oleaf->lf_depth = nleaf->lf_depth; 1044 1045 error = gfs2_meta_inode_buffer(dip, &dibh); 1046 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1047 dip->i_di.di_blocks++; 1048 gfs2_set_inode_blocks(&dip->i_inode); 1049 gfs2_dinode_out(dip, dibh->b_data); 1050 brelse(dibh); 1051 } 1052 1053 brelse(obh); 1054 brelse(nbh); 1055 1056 return error; 1057 1058 fail_lpfree: 1059 kfree(lp); 1060 1061 fail_brelse: 1062 brelse(obh); 1063 brelse(nbh); 1064 return error; 1065 } 1066 1067 /** 1068 * dir_double_exhash - Double size of ExHash table 1069 * @dip: The GFS2 dinode 1070 * 1071 * Returns: 0 on success, error code on failure 1072 */ 1073 1074 static int dir_double_exhash(struct gfs2_inode *dip) 1075 { 1076 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1077 struct buffer_head *dibh; 1078 u32 hsize; 1079 u64 *buf; 1080 u64 *from, *to; 1081 u64 block; 1082 int x; 1083 int error = 0; 1084 1085 hsize = 1 << dip->i_di.di_depth; 1086 if (hsize * sizeof(u64) != dip->i_di.di_size) { 1087 gfs2_consist_inode(dip); 1088 return -EIO; 1089 } 1090 1091 /* Allocate both the "from" and "to" buffers in one big chunk */ 1092 1093 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); 1094 1095 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { 1096 error = gfs2_dir_read_data(dip, (char *)buf, 1097 block * sdp->sd_hash_bsize, 1098 sdp->sd_hash_bsize, 1); 1099 if (error != sdp->sd_hash_bsize) { 1100 if (error >= 0) 1101 error = -EIO; 1102 goto fail; 1103 } 1104 1105 from = buf; 1106 to = (u64 *)((char *)buf + sdp->sd_hash_bsize); 1107 1108 for (x = sdp->sd_hash_ptrs; x--; from++) { 1109 *to++ = *from; /* No endianess worries */ 1110 *to++ = *from; 1111 } 1112 1113 error = gfs2_dir_write_data(dip, 1114 (char *)buf + sdp->sd_hash_bsize, 1115 block * sdp->sd_sb.sb_bsize, 1116 sdp->sd_sb.sb_bsize); 1117 if (error != sdp->sd_sb.sb_bsize) { 1118 if (error >= 0) 1119 error = -EIO; 1120 goto fail; 1121 } 1122 } 1123 1124 kfree(buf); 1125 1126 error = gfs2_meta_inode_buffer(dip, &dibh); 1127 if (!gfs2_assert_withdraw(sdp, !error)) { 1128 dip->i_di.di_depth++; 1129 gfs2_dinode_out(dip, dibh->b_data); 1130 brelse(dibh); 1131 } 1132 1133 return error; 1134 1135 fail: 1136 kfree(buf); 1137 return error; 1138 } 1139 1140 /** 1141 * compare_dents - compare directory entries by hash value 1142 * @a: first dent 1143 * @b: second dent 1144 * 1145 * When comparing the hash entries of @a to @b: 1146 * gt: returns 1 1147 * lt: returns -1 1148 * eq: returns 0 1149 */ 1150 1151 static int compare_dents(const void *a, const void *b) 1152 { 1153 const struct gfs2_dirent *dent_a, *dent_b; 1154 u32 hash_a, hash_b; 1155 int ret = 0; 1156 1157 dent_a = *(const struct gfs2_dirent **)a; 1158 hash_a = be32_to_cpu(dent_a->de_hash); 1159 1160 dent_b = *(const struct gfs2_dirent **)b; 1161 hash_b = be32_to_cpu(dent_b->de_hash); 1162 1163 if (hash_a > hash_b) 1164 ret = 1; 1165 else if (hash_a < hash_b) 1166 ret = -1; 1167 else { 1168 unsigned int len_a = be16_to_cpu(dent_a->de_name_len); 1169 unsigned int len_b = be16_to_cpu(dent_b->de_name_len); 1170 1171 if (len_a > len_b) 1172 ret = 1; 1173 else if (len_a < len_b) 1174 ret = -1; 1175 else 1176 ret = memcmp(dent_a + 1, dent_b + 1, len_a); 1177 } 1178 1179 return ret; 1180 } 1181 1182 /** 1183 * do_filldir_main - read out directory entries 1184 * @dip: The GFS2 inode 1185 * @offset: The offset in the file to read from 1186 * @opaque: opaque data to pass to filldir 1187 * @filldir: The function to pass entries to 1188 * @darr: an array of struct gfs2_dirent pointers to read 1189 * @entries: the number of entries in darr 1190 * @copied: pointer to int that's non-zero if a entry has been copied out 1191 * 1192 * Jump through some hoops to make sure that if there are hash collsions, 1193 * they are read out at the beginning of a buffer. We want to minimize 1194 * the possibility that they will fall into different readdir buffers or 1195 * that someone will want to seek to that location. 1196 * 1197 * Returns: errno, >0 on exception from filldir 1198 */ 1199 1200 static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, 1201 void *opaque, filldir_t filldir, 1202 const struct gfs2_dirent **darr, u32 entries, 1203 int *copied) 1204 { 1205 const struct gfs2_dirent *dent, *dent_next; 1206 u64 off, off_next; 1207 unsigned int x, y; 1208 int run = 0; 1209 int error = 0; 1210 1211 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); 1212 1213 dent_next = darr[0]; 1214 off_next = be32_to_cpu(dent_next->de_hash); 1215 off_next = gfs2_disk_hash2offset(off_next); 1216 1217 for (x = 0, y = 1; x < entries; x++, y++) { 1218 dent = dent_next; 1219 off = off_next; 1220 1221 if (y < entries) { 1222 dent_next = darr[y]; 1223 off_next = be32_to_cpu(dent_next->de_hash); 1224 off_next = gfs2_disk_hash2offset(off_next); 1225 1226 if (off < *offset) 1227 continue; 1228 *offset = off; 1229 1230 if (off_next == off) { 1231 if (*copied && !run) 1232 return 1; 1233 run = 1; 1234 } else 1235 run = 0; 1236 } else { 1237 if (off < *offset) 1238 continue; 1239 *offset = off; 1240 } 1241 1242 error = filldir(opaque, (const char *)(dent + 1), 1243 be16_to_cpu(dent->de_name_len), 1244 off, be64_to_cpu(dent->de_inum.no_addr), 1245 be16_to_cpu(dent->de_type)); 1246 if (error) 1247 return 1; 1248 1249 *copied = 1; 1250 } 1251 1252 /* Increment the *offset by one, so the next time we come into the 1253 do_filldir fxn, we get the next entry instead of the last one in the 1254 current leaf */ 1255 1256 (*offset)++; 1257 1258 return 0; 1259 } 1260 1261 static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, 1262 filldir_t filldir, int *copied, unsigned *depth, 1263 u64 leaf_no) 1264 { 1265 struct gfs2_inode *ip = GFS2_I(inode); 1266 struct buffer_head *bh; 1267 struct gfs2_leaf *lf; 1268 unsigned entries = 0; 1269 unsigned leaves = 0; 1270 const struct gfs2_dirent **darr, *dent; 1271 struct dirent_gather g; 1272 struct buffer_head **larr; 1273 int leaf = 0; 1274 int error, i; 1275 u64 lfn = leaf_no; 1276 1277 do { 1278 error = get_leaf(ip, lfn, &bh); 1279 if (error) 1280 goto out; 1281 lf = (struct gfs2_leaf *)bh->b_data; 1282 if (leaves == 0) 1283 *depth = be16_to_cpu(lf->lf_depth); 1284 entries += be16_to_cpu(lf->lf_entries); 1285 leaves++; 1286 lfn = be64_to_cpu(lf->lf_next); 1287 brelse(bh); 1288 } while(lfn); 1289 1290 if (!entries) 1291 return 0; 1292 1293 error = -ENOMEM; 1294 larr = vmalloc((leaves + entries) * sizeof(void *)); 1295 if (!larr) 1296 goto out; 1297 darr = (const struct gfs2_dirent **)(larr + leaves); 1298 g.pdent = darr; 1299 g.offset = 0; 1300 lfn = leaf_no; 1301 1302 do { 1303 error = get_leaf(ip, lfn, &bh); 1304 if (error) 1305 goto out_kfree; 1306 lf = (struct gfs2_leaf *)bh->b_data; 1307 lfn = be64_to_cpu(lf->lf_next); 1308 if (lf->lf_entries) { 1309 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, 1310 gfs2_dirent_gather, NULL, &g); 1311 error = PTR_ERR(dent); 1312 if (IS_ERR(dent)) { 1313 goto out_kfree; 1314 } 1315 error = 0; 1316 larr[leaf++] = bh; 1317 } else { 1318 brelse(bh); 1319 } 1320 } while(lfn); 1321 1322 error = do_filldir_main(ip, offset, opaque, filldir, darr, 1323 entries, copied); 1324 out_kfree: 1325 for(i = 0; i < leaf; i++) 1326 brelse(larr[i]); 1327 vfree(larr); 1328 out: 1329 return error; 1330 } 1331 1332 /** 1333 * dir_e_read - Reads the entries from a directory into a filldir buffer 1334 * @dip: dinode pointer 1335 * @offset: the hash of the last entry read shifted to the right once 1336 * @opaque: buffer for the filldir function to fill 1337 * @filldir: points to the filldir function to use 1338 * 1339 * Returns: errno 1340 */ 1341 1342 static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, 1343 filldir_t filldir) 1344 { 1345 struct gfs2_inode *dip = GFS2_I(inode); 1346 struct gfs2_sbd *sdp = GFS2_SB(inode); 1347 u32 hsize, len = 0; 1348 u32 ht_offset, lp_offset, ht_offset_cur = -1; 1349 u32 hash, index; 1350 __be64 *lp; 1351 int copied = 0; 1352 int error = 0; 1353 unsigned depth = 0; 1354 1355 hsize = 1 << dip->i_di.di_depth; 1356 if (hsize * sizeof(u64) != dip->i_di.di_size) { 1357 gfs2_consist_inode(dip); 1358 return -EIO; 1359 } 1360 1361 hash = gfs2_dir_offset2hash(*offset); 1362 index = hash >> (32 - dip->i_di.di_depth); 1363 1364 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); 1365 if (!lp) 1366 return -ENOMEM; 1367 1368 while (index < hsize) { 1369 lp_offset = index & (sdp->sd_hash_ptrs - 1); 1370 ht_offset = index - lp_offset; 1371 1372 if (ht_offset_cur != ht_offset) { 1373 error = gfs2_dir_read_data(dip, (char *)lp, 1374 ht_offset * sizeof(__be64), 1375 sdp->sd_hash_bsize, 1); 1376 if (error != sdp->sd_hash_bsize) { 1377 if (error >= 0) 1378 error = -EIO; 1379 goto out; 1380 } 1381 ht_offset_cur = ht_offset; 1382 } 1383 1384 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, 1385 &copied, &depth, 1386 be64_to_cpu(lp[lp_offset])); 1387 if (error) 1388 break; 1389 1390 len = 1 << (dip->i_di.di_depth - depth); 1391 index = (index & ~(len - 1)) + len; 1392 } 1393 1394 out: 1395 kfree(lp); 1396 if (error > 0) 1397 error = 0; 1398 return error; 1399 } 1400 1401 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 1402 filldir_t filldir) 1403 { 1404 struct gfs2_inode *dip = GFS2_I(inode); 1405 struct dirent_gather g; 1406 const struct gfs2_dirent **darr, *dent; 1407 struct buffer_head *dibh; 1408 int copied = 0; 1409 int error; 1410 1411 if (!dip->i_di.di_entries) 1412 return 0; 1413 1414 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) 1415 return dir_e_read(inode, offset, opaque, filldir); 1416 1417 if (!gfs2_is_stuffed(dip)) { 1418 gfs2_consist_inode(dip); 1419 return -EIO; 1420 } 1421 1422 error = gfs2_meta_inode_buffer(dip, &dibh); 1423 if (error) 1424 return error; 1425 1426 error = -ENOMEM; 1427 darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *), 1428 GFP_KERNEL); 1429 if (darr) { 1430 g.pdent = darr; 1431 g.offset = 0; 1432 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size, 1433 gfs2_dirent_gather, NULL, &g); 1434 if (IS_ERR(dent)) { 1435 error = PTR_ERR(dent); 1436 goto out; 1437 } 1438 error = do_filldir_main(dip, offset, opaque, filldir, darr, 1439 dip->i_di.di_entries, &copied); 1440 out: 1441 kfree(darr); 1442 } 1443 1444 if (error > 0) 1445 error = 0; 1446 1447 brelse(dibh); 1448 1449 return error; 1450 } 1451 1452 /** 1453 * gfs2_dir_search - Search a directory 1454 * @dip: The GFS2 inode 1455 * @filename: 1456 * @inode: 1457 * 1458 * This routine searches a directory for a file or another directory. 1459 * Assumes a glock is held on dip. 1460 * 1461 * Returns: errno 1462 */ 1463 1464 int gfs2_dir_search(struct inode *dir, const struct qstr *name, 1465 struct gfs2_inum_host *inum, unsigned int *type) 1466 { 1467 struct buffer_head *bh; 1468 struct gfs2_dirent *dent; 1469 1470 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); 1471 if (dent) { 1472 if (IS_ERR(dent)) 1473 return PTR_ERR(dent); 1474 if (inum) 1475 gfs2_inum_in(inum, (char *)&dent->de_inum); 1476 if (type) 1477 *type = be16_to_cpu(dent->de_type); 1478 brelse(bh); 1479 return 0; 1480 } 1481 return -ENOENT; 1482 } 1483 1484 static int dir_new_leaf(struct inode *inode, const struct qstr *name) 1485 { 1486 struct buffer_head *bh, *obh; 1487 struct gfs2_inode *ip = GFS2_I(inode); 1488 struct gfs2_leaf *leaf, *oleaf; 1489 int error; 1490 u32 index; 1491 u64 bn; 1492 1493 index = name->hash >> (32 - ip->i_di.di_depth); 1494 error = get_first_leaf(ip, index, &obh); 1495 if (error) 1496 return error; 1497 do { 1498 oleaf = (struct gfs2_leaf *)obh->b_data; 1499 bn = be64_to_cpu(oleaf->lf_next); 1500 if (!bn) 1501 break; 1502 brelse(obh); 1503 error = get_leaf(ip, bn, &obh); 1504 if (error) 1505 return error; 1506 } while(1); 1507 1508 gfs2_trans_add_bh(ip->i_gl, obh, 1); 1509 1510 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1511 if (!leaf) { 1512 brelse(obh); 1513 return -ENOSPC; 1514 } 1515 oleaf->lf_next = cpu_to_be64(bh->b_blocknr); 1516 brelse(bh); 1517 brelse(obh); 1518 1519 error = gfs2_meta_inode_buffer(ip, &bh); 1520 if (error) 1521 return error; 1522 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1523 ip->i_di.di_blocks++; 1524 gfs2_set_inode_blocks(&ip->i_inode); 1525 gfs2_dinode_out(ip, bh->b_data); 1526 brelse(bh); 1527 return 0; 1528 } 1529 1530 /** 1531 * gfs2_dir_add - Add new filename into directory 1532 * @dip: The GFS2 inode 1533 * @filename: The new name 1534 * @inode: The inode number of the entry 1535 * @type: The type of the entry 1536 * 1537 * Returns: 0 on success, error code on failure 1538 */ 1539 1540 int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1541 const struct gfs2_inum_host *inum, unsigned type) 1542 { 1543 struct gfs2_inode *ip = GFS2_I(inode); 1544 struct buffer_head *bh; 1545 struct gfs2_dirent *dent; 1546 struct gfs2_leaf *leaf; 1547 int error; 1548 1549 while(1) { 1550 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, 1551 &bh); 1552 if (dent) { 1553 if (IS_ERR(dent)) 1554 return PTR_ERR(dent); 1555 dent = gfs2_init_dirent(inode, dent, name, bh); 1556 gfs2_inum_out(inum, (char *)&dent->de_inum); 1557 dent->de_type = cpu_to_be16(type); 1558 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { 1559 leaf = (struct gfs2_leaf *)bh->b_data; 1560 leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1); 1561 } 1562 brelse(bh); 1563 error = gfs2_meta_inode_buffer(ip, &bh); 1564 if (error) 1565 break; 1566 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1567 ip->i_di.di_entries++; 1568 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC; 1569 gfs2_dinode_out(ip, bh->b_data); 1570 brelse(bh); 1571 error = 0; 1572 break; 1573 } 1574 if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) { 1575 error = dir_make_exhash(inode); 1576 if (error) 1577 break; 1578 continue; 1579 } 1580 error = dir_split_leaf(inode, name); 1581 if (error == 0) 1582 continue; 1583 if (error < 0) 1584 break; 1585 if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { 1586 error = dir_double_exhash(ip); 1587 if (error) 1588 break; 1589 error = dir_split_leaf(inode, name); 1590 if (error < 0) 1591 break; 1592 if (error == 0) 1593 continue; 1594 } 1595 error = dir_new_leaf(inode, name); 1596 if (!error) 1597 continue; 1598 error = -ENOSPC; 1599 break; 1600 } 1601 return error; 1602 } 1603 1604 1605 /** 1606 * gfs2_dir_del - Delete a directory entry 1607 * @dip: The GFS2 inode 1608 * @filename: The filename 1609 * 1610 * Returns: 0 on success, error code on failure 1611 */ 1612 1613 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) 1614 { 1615 struct gfs2_dirent *dent, *prev = NULL; 1616 struct buffer_head *bh; 1617 int error; 1618 1619 /* Returns _either_ the entry (if its first in block) or the 1620 previous entry otherwise */ 1621 dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh); 1622 if (!dent) { 1623 gfs2_consist_inode(dip); 1624 return -EIO; 1625 } 1626 if (IS_ERR(dent)) { 1627 gfs2_consist_inode(dip); 1628 return PTR_ERR(dent); 1629 } 1630 /* If not first in block, adjust pointers accordingly */ 1631 if (gfs2_dirent_find(dent, name, NULL) == 0) { 1632 prev = dent; 1633 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len)); 1634 } 1635 1636 dirent_del(dip, bh, prev, dent); 1637 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { 1638 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; 1639 u16 entries = be16_to_cpu(leaf->lf_entries); 1640 if (!entries) 1641 gfs2_consist_inode(dip); 1642 leaf->lf_entries = cpu_to_be16(--entries); 1643 } 1644 brelse(bh); 1645 1646 error = gfs2_meta_inode_buffer(dip, &bh); 1647 if (error) 1648 return error; 1649 1650 if (!dip->i_di.di_entries) 1651 gfs2_consist_inode(dip); 1652 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1653 dip->i_di.di_entries--; 1654 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; 1655 gfs2_dinode_out(dip, bh->b_data); 1656 brelse(bh); 1657 mark_inode_dirty(&dip->i_inode); 1658 1659 return error; 1660 } 1661 1662 /** 1663 * gfs2_dir_mvino - Change inode number of directory entry 1664 * @dip: The GFS2 inode 1665 * @filename: 1666 * @new_inode: 1667 * 1668 * This routine changes the inode number of a directory entry. It's used 1669 * by rename to change ".." when a directory is moved. 1670 * Assumes a glock is held on dvp. 1671 * 1672 * Returns: errno 1673 */ 1674 1675 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 1676 struct gfs2_inum_host *inum, unsigned int new_type) 1677 { 1678 struct buffer_head *bh; 1679 struct gfs2_dirent *dent; 1680 int error; 1681 1682 dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh); 1683 if (!dent) { 1684 gfs2_consist_inode(dip); 1685 return -EIO; 1686 } 1687 if (IS_ERR(dent)) 1688 return PTR_ERR(dent); 1689 1690 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1691 gfs2_inum_out(inum, (char *)&dent->de_inum); 1692 dent->de_type = cpu_to_be16(new_type); 1693 1694 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { 1695 brelse(bh); 1696 error = gfs2_meta_inode_buffer(dip, &bh); 1697 if (error) 1698 return error; 1699 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1700 } 1701 1702 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC; 1703 gfs2_dinode_out(dip, bh->b_data); 1704 brelse(bh); 1705 return 0; 1706 } 1707 1708 /** 1709 * foreach_leaf - call a function for each leaf in a directory 1710 * @dip: the directory 1711 * @lc: the function to call for each each 1712 * @data: private data to pass to it 1713 * 1714 * Returns: errno 1715 */ 1716 1717 static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) 1718 { 1719 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1720 struct buffer_head *bh; 1721 struct gfs2_leaf *leaf; 1722 u32 hsize, len; 1723 u32 ht_offset, lp_offset, ht_offset_cur = -1; 1724 u32 index = 0; 1725 __be64 *lp; 1726 u64 leaf_no; 1727 int error = 0; 1728 1729 hsize = 1 << dip->i_di.di_depth; 1730 if (hsize * sizeof(u64) != dip->i_di.di_size) { 1731 gfs2_consist_inode(dip); 1732 return -EIO; 1733 } 1734 1735 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); 1736 if (!lp) 1737 return -ENOMEM; 1738 1739 while (index < hsize) { 1740 lp_offset = index & (sdp->sd_hash_ptrs - 1); 1741 ht_offset = index - lp_offset; 1742 1743 if (ht_offset_cur != ht_offset) { 1744 error = gfs2_dir_read_data(dip, (char *)lp, 1745 ht_offset * sizeof(__be64), 1746 sdp->sd_hash_bsize, 1); 1747 if (error != sdp->sd_hash_bsize) { 1748 if (error >= 0) 1749 error = -EIO; 1750 goto out; 1751 } 1752 ht_offset_cur = ht_offset; 1753 } 1754 1755 leaf_no = be64_to_cpu(lp[lp_offset]); 1756 if (leaf_no) { 1757 error = get_leaf(dip, leaf_no, &bh); 1758 if (error) 1759 goto out; 1760 leaf = (struct gfs2_leaf *)bh->b_data; 1761 len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth)); 1762 brelse(bh); 1763 1764 error = lc(dip, index, len, leaf_no, data); 1765 if (error) 1766 goto out; 1767 1768 index = (index & ~(len - 1)) + len; 1769 } else 1770 index++; 1771 } 1772 1773 if (index != hsize) { 1774 gfs2_consist_inode(dip); 1775 error = -EIO; 1776 } 1777 1778 out: 1779 kfree(lp); 1780 1781 return error; 1782 } 1783 1784 /** 1785 * leaf_dealloc - Deallocate a directory leaf 1786 * @dip: the directory 1787 * @index: the hash table offset in the directory 1788 * @len: the number of pointers to this leaf 1789 * @leaf_no: the leaf number 1790 * @data: not used 1791 * 1792 * Returns: errno 1793 */ 1794 1795 static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1796 u64 leaf_no, void *data) 1797 { 1798 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1799 struct gfs2_leaf *tmp_leaf; 1800 struct gfs2_rgrp_list rlist; 1801 struct buffer_head *bh, *dibh; 1802 u64 blk, nblk; 1803 unsigned int rg_blocks = 0, l_blocks = 0; 1804 char *ht; 1805 unsigned int x, size = len * sizeof(u64); 1806 int error; 1807 1808 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1809 1810 ht = kzalloc(size, GFP_KERNEL); 1811 if (!ht) 1812 return -ENOMEM; 1813 1814 gfs2_alloc_get(dip); 1815 1816 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1817 if (error) 1818 goto out; 1819 1820 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh); 1821 if (error) 1822 goto out_qs; 1823 1824 /* Count the number of leaves */ 1825 1826 for (blk = leaf_no; blk; blk = nblk) { 1827 error = get_leaf(dip, blk, &bh); 1828 if (error) 1829 goto out_rlist; 1830 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1831 nblk = be64_to_cpu(tmp_leaf->lf_next); 1832 brelse(bh); 1833 1834 gfs2_rlist_add(sdp, &rlist, blk); 1835 l_blocks++; 1836 } 1837 1838 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); 1839 1840 for (x = 0; x < rlist.rl_rgrps; x++) { 1841 struct gfs2_rgrpd *rgd; 1842 rgd = rlist.rl_ghs[x].gh_gl->gl_object; 1843 rg_blocks += rgd->rd_ri.ri_length; 1844 } 1845 1846 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); 1847 if (error) 1848 goto out_rlist; 1849 1850 error = gfs2_trans_begin(sdp, 1851 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) + 1852 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); 1853 if (error) 1854 goto out_rg_gunlock; 1855 1856 for (blk = leaf_no; blk; blk = nblk) { 1857 error = get_leaf(dip, blk, &bh); 1858 if (error) 1859 goto out_end_trans; 1860 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1861 nblk = be64_to_cpu(tmp_leaf->lf_next); 1862 brelse(bh); 1863 1864 gfs2_free_meta(dip, blk, 1); 1865 1866 if (!dip->i_di.di_blocks) 1867 gfs2_consist_inode(dip); 1868 dip->i_di.di_blocks--; 1869 gfs2_set_inode_blocks(&dip->i_inode); 1870 } 1871 1872 error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); 1873 if (error != size) { 1874 if (error >= 0) 1875 error = -EIO; 1876 goto out_end_trans; 1877 } 1878 1879 error = gfs2_meta_inode_buffer(dip, &dibh); 1880 if (error) 1881 goto out_end_trans; 1882 1883 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1884 gfs2_dinode_out(dip, dibh->b_data); 1885 brelse(dibh); 1886 1887 out_end_trans: 1888 gfs2_trans_end(sdp); 1889 out_rg_gunlock: 1890 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); 1891 out_rlist: 1892 gfs2_rlist_free(&rlist); 1893 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh); 1894 out_qs: 1895 gfs2_quota_unhold(dip); 1896 out: 1897 gfs2_alloc_put(dip); 1898 kfree(ht); 1899 return error; 1900 } 1901 1902 /** 1903 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory 1904 * @dip: the directory 1905 * 1906 * Dealloc all on-disk directory leaves to FREEMETA state 1907 * Change on-disk inode type to "regular file" 1908 * 1909 * Returns: errno 1910 */ 1911 1912 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) 1913 { 1914 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1915 struct buffer_head *bh; 1916 int error; 1917 1918 /* Dealloc on-disk leaves to FREEMETA state */ 1919 error = foreach_leaf(dip, leaf_dealloc, NULL); 1920 if (error) 1921 return error; 1922 1923 /* Make this a regular file in case we crash. 1924 (We don't want to free these blocks a second time.) */ 1925 1926 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1927 if (error) 1928 return error; 1929 1930 error = gfs2_meta_inode_buffer(dip, &bh); 1931 if (!error) { 1932 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1933 ((struct gfs2_dinode *)bh->b_data)->di_mode = 1934 cpu_to_be32(S_IFREG); 1935 brelse(bh); 1936 } 1937 1938 gfs2_trans_end(sdp); 1939 1940 return error; 1941 } 1942 1943 /** 1944 * gfs2_diradd_alloc_required - find if adding entry will require an allocation 1945 * @ip: the file being written to 1946 * @filname: the filename that's going to be added 1947 * 1948 * Returns: 1 if alloc required, 0 if not, -ve on error 1949 */ 1950 1951 int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) 1952 { 1953 struct gfs2_dirent *dent; 1954 struct buffer_head *bh; 1955 1956 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); 1957 if (!dent) { 1958 return 1; 1959 } 1960 if (IS_ERR(dent)) 1961 return PTR_ERR(dent); 1962 brelse(bh); 1963 return 0; 1964 } 1965 1966