1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/posix_acl.h> 16 #include <linux/sort.h> 17 #include <linux/gfs2_ondisk.h> 18 #include <linux/crc32.h> 19 #include <linux/security.h> 20 #include <linux/time.h> 21 22 #include "gfs2.h" 23 #include "incore.h" 24 #include "acl.h" 25 #include "bmap.h" 26 #include "dir.h" 27 #include "eattr.h" 28 #include "glock.h" 29 #include "glops.h" 30 #include "inode.h" 31 #include "log.h" 32 #include "meta_io.h" 33 #include "ops_address.h" 34 #include "quota.h" 35 #include "rgrp.h" 36 #include "trans.h" 37 #include "util.h" 38 39 struct gfs2_inum_range_host { 40 u64 ir_start; 41 u64 ir_length; 42 }; 43 44 static int iget_test(struct inode *inode, void *opaque) 45 { 46 struct gfs2_inode *ip = GFS2_I(inode); 47 u64 *no_addr = opaque; 48 49 if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) 50 return 1; 51 52 return 0; 53 } 54 55 static int iget_set(struct inode *inode, void *opaque) 56 { 57 struct gfs2_inode *ip = GFS2_I(inode); 58 u64 *no_addr = opaque; 59 60 inode->i_ino = (unsigned long)*no_addr; 61 ip->i_no_addr = *no_addr; 62 set_bit(GIF_USER, &ip->i_flags); 63 return 0; 64 } 65 66 struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 67 { 68 unsigned long hash = (unsigned long)no_addr; 69 return ilookup5(sb, hash, iget_test, &no_addr); 70 } 71 72 static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 73 { 74 unsigned long hash = (unsigned long)no_addr; 75 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 76 } 77 78 struct gfs2_skip_data { 79 u64 no_addr; 80 int skipped; 81 }; 82 83 static int iget_skip_test(struct inode *inode, void *opaque) 84 { 85 struct gfs2_inode *ip = GFS2_I(inode); 86 struct gfs2_skip_data *data = opaque; 87 88 if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ 89 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 90 data->skipped = 1; 91 return 0; 92 } 93 return 1; 94 } 95 return 0; 96 } 97 98 static int iget_skip_set(struct inode *inode, void *opaque) 99 { 100 struct gfs2_inode *ip = GFS2_I(inode); 101 struct gfs2_skip_data *data = opaque; 102 103 if (data->skipped) 104 return 1; 105 inode->i_ino = (unsigned long)(data->no_addr); 106 ip->i_no_addr = data->no_addr; 107 set_bit(GIF_USER, &ip->i_flags); 108 return 0; 109 } 110 111 static struct inode *gfs2_iget_skip(struct super_block *sb, 112 u64 no_addr) 113 { 114 struct gfs2_skip_data data; 115 unsigned long hash = (unsigned long)no_addr; 116 117 data.no_addr = no_addr; 118 data.skipped = 0; 119 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); 120 } 121 122 /** 123 * GFS2 lookup code fills in vfs inode contents based on info obtained 124 * from directory entry inside gfs2_inode_lookup(). This has caused issues 125 * with NFS code path since its get_dentry routine doesn't have the relevant 126 * directory entry when gfs2_inode_lookup() is invoked. Part of the code 127 * segment inside gfs2_inode_lookup code needs to get moved around. 128 * 129 * Clean up I_LOCK and I_NEW as well. 130 **/ 131 132 void gfs2_set_iop(struct inode *inode) 133 { 134 struct gfs2_sbd *sdp = GFS2_SB(inode); 135 umode_t mode = inode->i_mode; 136 137 if (S_ISREG(mode)) { 138 inode->i_op = &gfs2_file_iops; 139 if (gfs2_localflocks(sdp)) 140 inode->i_fop = gfs2_file_fops_nolock; 141 else 142 inode->i_fop = gfs2_file_fops; 143 } else if (S_ISDIR(mode)) { 144 inode->i_op = &gfs2_dir_iops; 145 if (gfs2_localflocks(sdp)) 146 inode->i_fop = gfs2_dir_fops_nolock; 147 else 148 inode->i_fop = gfs2_dir_fops; 149 } else if (S_ISLNK(mode)) { 150 inode->i_op = &gfs2_symlink_iops; 151 } else { 152 inode->i_op = &gfs2_file_iops; 153 init_special_inode(inode, inode->i_mode, inode->i_rdev); 154 } 155 156 unlock_new_inode(inode); 157 } 158 159 /** 160 * gfs2_inode_lookup - Lookup an inode 161 * @sb: The super block 162 * @no_addr: The inode number 163 * @type: The type of the inode 164 * @skip_freeing: set this not return an inode if it is currently being freed. 165 * 166 * Returns: A VFS inode, or an error 167 */ 168 169 struct inode *gfs2_inode_lookup(struct super_block *sb, 170 unsigned int type, 171 u64 no_addr, 172 u64 no_formal_ino, int skip_freeing) 173 { 174 struct inode *inode; 175 struct gfs2_inode *ip; 176 struct gfs2_glock *io_gl; 177 int error; 178 179 if (skip_freeing) 180 inode = gfs2_iget_skip(sb, no_addr); 181 else 182 inode = gfs2_iget(sb, no_addr); 183 ip = GFS2_I(inode); 184 185 if (!inode) 186 return ERR_PTR(-ENOBUFS); 187 188 if (inode->i_state & I_NEW) { 189 struct gfs2_sbd *sdp = GFS2_SB(inode); 190 ip->i_no_formal_ino = no_formal_ino; 191 192 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 193 if (unlikely(error)) 194 goto fail; 195 ip->i_gl->gl_object = ip; 196 197 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); 198 if (unlikely(error)) 199 goto fail_put; 200 201 set_bit(GIF_INVALID, &ip->i_flags); 202 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); 203 if (unlikely(error)) 204 goto fail_iopen; 205 ip->i_iopen_gh.gh_gl->gl_object = ip; 206 207 gfs2_glock_put(io_gl); 208 209 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 210 goto gfs2_nfsbypass; 211 212 inode->i_mode = DT2IF(type); 213 214 /* 215 * We must read the inode in order to work out its type in 216 * this case. Note that this doesn't happen often as we normally 217 * know the type beforehand. This code path only occurs during 218 * unlinked inode recovery (where it is safe to do this glock, 219 * which is not true in the general case). 220 */ 221 if (type == DT_UNKNOWN) { 222 struct gfs2_holder gh; 223 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 224 if (unlikely(error)) 225 goto fail_glock; 226 /* Inode is now uptodate */ 227 gfs2_glock_dq_uninit(&gh); 228 } 229 230 gfs2_set_iop(inode); 231 } 232 233 gfs2_nfsbypass: 234 return inode; 235 fail_glock: 236 gfs2_glock_dq(&ip->i_iopen_gh); 237 fail_iopen: 238 gfs2_glock_put(io_gl); 239 fail_put: 240 ip->i_gl->gl_object = NULL; 241 gfs2_glock_put(ip->i_gl); 242 fail: 243 iget_failed(inode); 244 return ERR_PTR(error); 245 } 246 247 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 248 { 249 const struct gfs2_dinode *str = buf; 250 struct timespec atime; 251 u16 height, depth; 252 253 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) 254 goto corrupt; 255 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); 256 ip->i_inode.i_mode = be32_to_cpu(str->di_mode); 257 ip->i_inode.i_rdev = 0; 258 switch (ip->i_inode.i_mode & S_IFMT) { 259 case S_IFBLK: 260 case S_IFCHR: 261 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), 262 be32_to_cpu(str->di_minor)); 263 break; 264 }; 265 266 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 267 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 268 /* 269 * We will need to review setting the nlink count here in the 270 * light of the forthcoming ro bind mount work. This is a reminder 271 * to do that. 272 */ 273 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); 274 ip->i_disksize = be64_to_cpu(str->di_size); 275 i_size_write(&ip->i_inode, ip->i_disksize); 276 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 277 atime.tv_sec = be64_to_cpu(str->di_atime); 278 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); 279 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) 280 ip->i_inode.i_atime = atime; 281 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); 282 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); 283 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); 284 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); 285 286 ip->i_goal = be64_to_cpu(str->di_goal_meta); 287 ip->i_generation = be64_to_cpu(str->di_generation); 288 289 ip->i_diskflags = be32_to_cpu(str->di_flags); 290 gfs2_set_inode_flags(&ip->i_inode); 291 height = be16_to_cpu(str->di_height); 292 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 293 goto corrupt; 294 ip->i_height = (u8)height; 295 296 depth = be16_to_cpu(str->di_depth); 297 if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) 298 goto corrupt; 299 ip->i_depth = (u8)depth; 300 ip->i_entries = be32_to_cpu(str->di_entries); 301 302 ip->i_eattr = be64_to_cpu(str->di_eattr); 303 if (S_ISREG(ip->i_inode.i_mode)) 304 gfs2_set_aops(&ip->i_inode); 305 306 return 0; 307 corrupt: 308 if (gfs2_consist_inode(ip)) 309 gfs2_dinode_print(ip); 310 return -EIO; 311 } 312 313 /** 314 * gfs2_inode_refresh - Refresh the incore copy of the dinode 315 * @ip: The GFS2 inode 316 * 317 * Returns: errno 318 */ 319 320 int gfs2_inode_refresh(struct gfs2_inode *ip) 321 { 322 struct buffer_head *dibh; 323 int error; 324 325 error = gfs2_meta_inode_buffer(ip, &dibh); 326 if (error) 327 return error; 328 329 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { 330 brelse(dibh); 331 return -EIO; 332 } 333 334 error = gfs2_dinode_in(ip, dibh->b_data); 335 brelse(dibh); 336 clear_bit(GIF_INVALID, &ip->i_flags); 337 338 return error; 339 } 340 341 int gfs2_dinode_dealloc(struct gfs2_inode *ip) 342 { 343 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 344 struct gfs2_alloc *al; 345 struct gfs2_rgrpd *rgd; 346 int error; 347 348 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { 349 if (gfs2_consist_inode(ip)) 350 gfs2_dinode_print(ip); 351 return -EIO; 352 } 353 354 al = gfs2_alloc_get(ip); 355 if (!al) 356 return -ENOMEM; 357 358 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 359 if (error) 360 goto out; 361 362 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 363 if (error) 364 goto out_qs; 365 366 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 367 if (!rgd) { 368 gfs2_consist_inode(ip); 369 error = -EIO; 370 goto out_rindex_relse; 371 } 372 373 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 374 &al->al_rgd_gh); 375 if (error) 376 goto out_rindex_relse; 377 378 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); 379 if (error) 380 goto out_rg_gunlock; 381 382 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 383 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); 384 385 gfs2_free_di(rgd, ip); 386 387 gfs2_trans_end(sdp); 388 389 out_rg_gunlock: 390 gfs2_glock_dq_uninit(&al->al_rgd_gh); 391 out_rindex_relse: 392 gfs2_glock_dq_uninit(&al->al_ri_gh); 393 out_qs: 394 gfs2_quota_unhold(ip); 395 out: 396 gfs2_alloc_put(ip); 397 return error; 398 } 399 400 /** 401 * gfs2_change_nlink - Change nlink count on inode 402 * @ip: The GFS2 inode 403 * @diff: The change in the nlink count required 404 * 405 * Returns: errno 406 */ 407 int gfs2_change_nlink(struct gfs2_inode *ip, int diff) 408 { 409 struct buffer_head *dibh; 410 u32 nlink; 411 int error; 412 413 BUG_ON(diff != 1 && diff != -1); 414 nlink = ip->i_inode.i_nlink + diff; 415 416 /* If we are reducing the nlink count, but the new value ends up being 417 bigger than the old one, we must have underflowed. */ 418 if (diff < 0 && nlink > ip->i_inode.i_nlink) { 419 if (gfs2_consist_inode(ip)) 420 gfs2_dinode_print(ip); 421 return -EIO; 422 } 423 424 error = gfs2_meta_inode_buffer(ip, &dibh); 425 if (error) 426 return error; 427 428 if (diff > 0) 429 inc_nlink(&ip->i_inode); 430 else 431 drop_nlink(&ip->i_inode); 432 433 ip->i_inode.i_ctime = CURRENT_TIME; 434 435 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 436 gfs2_dinode_out(ip, dibh->b_data); 437 brelse(dibh); 438 mark_inode_dirty(&ip->i_inode); 439 440 if (ip->i_inode.i_nlink == 0) 441 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ 442 443 return error; 444 } 445 446 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 447 { 448 struct qstr qstr; 449 struct inode *inode; 450 gfs2_str2qstr(&qstr, name); 451 inode = gfs2_lookupi(dip, &qstr, 1); 452 /* gfs2_lookupi has inconsistent callers: vfs 453 * related routines expect NULL for no entry found, 454 * gfs2_lookup_simple callers expect ENOENT 455 * and do not check for NULL. 456 */ 457 if (inode == NULL) 458 return ERR_PTR(-ENOENT); 459 else 460 return inode; 461 } 462 463 464 /** 465 * gfs2_lookupi - Look up a filename in a directory and return its inode 466 * @d_gh: An initialized holder for the directory glock 467 * @name: The name of the inode to look for 468 * @is_root: If 1, ignore the caller's permissions 469 * @i_gh: An uninitialized holder for the new inode glock 470 * 471 * This can be called via the VFS filldir function when NFS is doing 472 * a readdirplus and the inode which its intending to stat isn't 473 * already in cache. In this case we must not take the directory glock 474 * again, since the readdir call will have already taken that lock. 475 * 476 * Returns: errno 477 */ 478 479 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 480 int is_root) 481 { 482 struct super_block *sb = dir->i_sb; 483 struct gfs2_inode *dip = GFS2_I(dir); 484 struct gfs2_holder d_gh; 485 int error = 0; 486 struct inode *inode = NULL; 487 int unlock = 0; 488 489 if (!name->len || name->len > GFS2_FNAMESIZE) 490 return ERR_PTR(-ENAMETOOLONG); 491 492 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || 493 (name->len == 2 && memcmp(name->name, "..", 2) == 0 && 494 dir == sb->s_root->d_inode)) { 495 igrab(dir); 496 return dir; 497 } 498 499 if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { 500 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); 501 if (error) 502 return ERR_PTR(error); 503 unlock = 1; 504 } 505 506 if (!is_root) { 507 error = gfs2_permission(dir, MAY_EXEC); 508 if (error) 509 goto out; 510 } 511 512 inode = gfs2_dir_search(dir, name); 513 if (IS_ERR(inode)) 514 error = PTR_ERR(inode); 515 out: 516 if (unlock) 517 gfs2_glock_dq_uninit(&d_gh); 518 if (error == -ENOENT) 519 return NULL; 520 return inode ? inode : ERR_PTR(error); 521 } 522 523 static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) 524 { 525 const struct gfs2_inum_range *str = buf; 526 527 ir->ir_start = be64_to_cpu(str->ir_start); 528 ir->ir_length = be64_to_cpu(str->ir_length); 529 } 530 531 static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) 532 { 533 struct gfs2_inum_range *str = buf; 534 535 str->ir_start = cpu_to_be64(ir->ir_start); 536 str->ir_length = cpu_to_be64(ir->ir_length); 537 } 538 539 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) 540 { 541 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 542 struct buffer_head *bh; 543 struct gfs2_inum_range_host ir; 544 int error; 545 546 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 547 if (error) 548 return error; 549 mutex_lock(&sdp->sd_inum_mutex); 550 551 error = gfs2_meta_inode_buffer(ip, &bh); 552 if (error) { 553 mutex_unlock(&sdp->sd_inum_mutex); 554 gfs2_trans_end(sdp); 555 return error; 556 } 557 558 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 559 560 if (ir.ir_length) { 561 *formal_ino = ir.ir_start++; 562 ir.ir_length--; 563 gfs2_trans_add_bh(ip->i_gl, bh, 1); 564 gfs2_inum_range_out(&ir, 565 bh->b_data + sizeof(struct gfs2_dinode)); 566 brelse(bh); 567 mutex_unlock(&sdp->sd_inum_mutex); 568 gfs2_trans_end(sdp); 569 return 0; 570 } 571 572 brelse(bh); 573 574 mutex_unlock(&sdp->sd_inum_mutex); 575 gfs2_trans_end(sdp); 576 577 return 1; 578 } 579 580 static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) 581 { 582 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); 583 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); 584 struct gfs2_holder gh; 585 struct buffer_head *bh; 586 struct gfs2_inum_range_host ir; 587 int error; 588 589 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 590 if (error) 591 return error; 592 593 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); 594 if (error) 595 goto out; 596 mutex_lock(&sdp->sd_inum_mutex); 597 598 error = gfs2_meta_inode_buffer(ip, &bh); 599 if (error) 600 goto out_end_trans; 601 602 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 603 604 if (!ir.ir_length) { 605 struct buffer_head *m_bh; 606 u64 x, y; 607 __be64 z; 608 609 error = gfs2_meta_inode_buffer(m_ip, &m_bh); 610 if (error) 611 goto out_brelse; 612 613 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)); 614 x = y = be64_to_cpu(z); 615 ir.ir_start = x; 616 ir.ir_length = GFS2_INUM_QUANTUM; 617 x += GFS2_INUM_QUANTUM; 618 if (x < y) 619 gfs2_consist_inode(m_ip); 620 z = cpu_to_be64(x); 621 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 622 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z; 623 624 brelse(m_bh); 625 } 626 627 *formal_ino = ir.ir_start++; 628 ir.ir_length--; 629 630 gfs2_trans_add_bh(ip->i_gl, bh, 1); 631 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); 632 633 out_brelse: 634 brelse(bh); 635 out_end_trans: 636 mutex_unlock(&sdp->sd_inum_mutex); 637 gfs2_trans_end(sdp); 638 out: 639 gfs2_glock_dq_uninit(&gh); 640 return error; 641 } 642 643 static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum) 644 { 645 int error; 646 647 error = pick_formal_ino_1(sdp, inum); 648 if (error <= 0) 649 return error; 650 651 error = pick_formal_ino_2(sdp, inum); 652 653 return error; 654 } 655 656 /** 657 * create_ok - OK to create a new on-disk inode here? 658 * @dip: Directory in which dinode is to be created 659 * @name: Name of new dinode 660 * @mode: 661 * 662 * Returns: errno 663 */ 664 665 static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 666 unsigned int mode) 667 { 668 int error; 669 670 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 671 if (error) 672 return error; 673 674 /* Don't create entries in an unlinked directory */ 675 if (!dip->i_inode.i_nlink) 676 return -EPERM; 677 678 error = gfs2_dir_check(&dip->i_inode, name, NULL); 679 switch (error) { 680 case -ENOENT: 681 error = 0; 682 break; 683 case 0: 684 return -EEXIST; 685 default: 686 return error; 687 } 688 689 if (dip->i_entries == (u32)-1) 690 return -EFBIG; 691 if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) 692 return -EMLINK; 693 694 return 0; 695 } 696 697 static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 698 unsigned int *uid, unsigned int *gid) 699 { 700 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 701 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 702 if (S_ISDIR(*mode)) 703 *mode |= S_ISUID; 704 else if (dip->i_inode.i_uid != current_fsuid()) 705 *mode &= ~07111; 706 *uid = dip->i_inode.i_uid; 707 } else 708 *uid = current_fsuid(); 709 710 if (dip->i_inode.i_mode & S_ISGID) { 711 if (S_ISDIR(*mode)) 712 *mode |= S_ISGID; 713 *gid = dip->i_inode.i_gid; 714 } else 715 *gid = current_fsgid(); 716 } 717 718 static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) 719 { 720 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 721 int error; 722 723 if (gfs2_alloc_get(dip) == NULL) 724 return -ENOMEM; 725 726 dip->i_alloc->al_requested = RES_DINODE; 727 error = gfs2_inplace_reserve(dip); 728 if (error) 729 goto out; 730 731 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); 732 if (error) 733 goto out_ipreserv; 734 735 *no_addr = gfs2_alloc_di(dip, generation); 736 737 gfs2_trans_end(sdp); 738 739 out_ipreserv: 740 gfs2_inplace_release(dip); 741 out: 742 gfs2_alloc_put(dip); 743 return error; 744 } 745 746 /** 747 * init_dinode - Fill in a new dinode structure 748 * @dip: the directory this inode is being created in 749 * @gl: The glock covering the new inode 750 * @inum: the inode number 751 * @mode: the file permissions 752 * @uid: 753 * @gid: 754 * 755 */ 756 757 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 758 const struct gfs2_inum_host *inum, unsigned int mode, 759 unsigned int uid, unsigned int gid, 760 const u64 *generation, dev_t dev, struct buffer_head **bhp) 761 { 762 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 763 struct gfs2_dinode *di; 764 struct buffer_head *dibh; 765 struct timespec tv = CURRENT_TIME; 766 767 dibh = gfs2_meta_new(gl, inum->no_addr); 768 gfs2_trans_add_bh(gl, dibh, 1); 769 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 770 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 771 di = (struct gfs2_dinode *)dibh->b_data; 772 773 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); 774 di->di_num.no_addr = cpu_to_be64(inum->no_addr); 775 di->di_mode = cpu_to_be32(mode); 776 di->di_uid = cpu_to_be32(uid); 777 di->di_gid = cpu_to_be32(gid); 778 di->di_nlink = 0; 779 di->di_size = 0; 780 di->di_blocks = cpu_to_be64(1); 781 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 782 di->di_major = cpu_to_be32(MAJOR(dev)); 783 di->di_minor = cpu_to_be32(MINOR(dev)); 784 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 785 di->di_generation = cpu_to_be64(*generation); 786 di->di_flags = 0; 787 788 if (S_ISREG(mode)) { 789 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || 790 gfs2_tune_get(sdp, gt_new_files_jdata)) 791 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); 792 } else if (S_ISDIR(mode)) { 793 di->di_flags |= cpu_to_be32(dip->i_diskflags & 794 GFS2_DIF_INHERIT_JDATA); 795 } 796 797 di->__pad1 = 0; 798 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 799 di->di_height = 0; 800 di->__pad2 = 0; 801 di->__pad3 = 0; 802 di->di_depth = 0; 803 di->di_entries = 0; 804 memset(&di->__pad4, 0, sizeof(di->__pad4)); 805 di->di_eattr = 0; 806 di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); 807 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 808 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 809 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 810 811 set_buffer_uptodate(dibh); 812 813 *bhp = dibh; 814 } 815 816 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 817 unsigned int mode, const struct gfs2_inum_host *inum, 818 const u64 *generation, dev_t dev, struct buffer_head **bhp) 819 { 820 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 821 unsigned int uid, gid; 822 int error; 823 824 munge_mode_uid_gid(dip, &mode, &uid, &gid); 825 if (!gfs2_alloc_get(dip)) 826 return -ENOMEM; 827 828 error = gfs2_quota_lock(dip, uid, gid); 829 if (error) 830 goto out; 831 832 error = gfs2_quota_check(dip, uid, gid); 833 if (error) 834 goto out_quota; 835 836 error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); 837 if (error) 838 goto out_quota; 839 840 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 841 gfs2_quota_change(dip, +1, uid, gid); 842 gfs2_trans_end(sdp); 843 844 out_quota: 845 gfs2_quota_unlock(dip); 846 out: 847 gfs2_alloc_put(dip); 848 return error; 849 } 850 851 static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, 852 struct gfs2_inode *ip) 853 { 854 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 855 struct gfs2_alloc *al; 856 int alloc_required; 857 struct buffer_head *dibh; 858 int error; 859 860 al = gfs2_alloc_get(dip); 861 if (!al) 862 return -ENOMEM; 863 864 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 865 if (error) 866 goto fail; 867 868 error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); 869 if (alloc_required < 0) 870 goto fail_quota_locks; 871 if (alloc_required) { 872 error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); 873 if (error) 874 goto fail_quota_locks; 875 876 al->al_requested = sdp->sd_max_dirres; 877 878 error = gfs2_inplace_reserve(dip); 879 if (error) 880 goto fail_quota_locks; 881 882 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 883 al->al_rgd->rd_length + 884 2 * RES_DINODE + 885 RES_STATFS + RES_QUOTA, 0); 886 if (error) 887 goto fail_ipreserv; 888 } else { 889 error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); 890 if (error) 891 goto fail_quota_locks; 892 } 893 894 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 895 if (error) 896 goto fail_end_trans; 897 898 error = gfs2_meta_inode_buffer(ip, &dibh); 899 if (error) 900 goto fail_end_trans; 901 ip->i_inode.i_nlink = 1; 902 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 903 gfs2_dinode_out(ip, dibh->b_data); 904 brelse(dibh); 905 return 0; 906 907 fail_end_trans: 908 gfs2_trans_end(sdp); 909 910 fail_ipreserv: 911 if (dip->i_alloc->al_rgd) 912 gfs2_inplace_release(dip); 913 914 fail_quota_locks: 915 gfs2_quota_unlock(dip); 916 917 fail: 918 gfs2_alloc_put(dip); 919 return error; 920 } 921 922 static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 923 { 924 int err; 925 size_t len; 926 void *value; 927 char *name; 928 struct gfs2_ea_request er; 929 930 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 931 &name, &value, &len); 932 933 if (err) { 934 if (err == -EOPNOTSUPP) 935 return 0; 936 return err; 937 } 938 939 memset(&er, 0, sizeof(struct gfs2_ea_request)); 940 941 er.er_type = GFS2_EATYPE_SECURITY; 942 er.er_name = name; 943 er.er_data = value; 944 er.er_name_len = strlen(name); 945 er.er_data_len = len; 946 947 err = gfs2_ea_set_i(ip, &er); 948 949 kfree(value); 950 kfree(name); 951 952 return err; 953 } 954 955 /** 956 * gfs2_createi - Create a new inode 957 * @ghs: An array of two holders 958 * @name: The name of the new file 959 * @mode: the permissions on the new inode 960 * 961 * @ghs[0] is an initialized holder for the directory 962 * @ghs[1] is the holder for the inode lock 963 * 964 * If the return value is not NULL, the glocks on both the directory and the new 965 * file are held. A transaction has been started and an inplace reservation 966 * is held, as well. 967 * 968 * Returns: An inode 969 */ 970 971 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 972 unsigned int mode, dev_t dev) 973 { 974 struct inode *inode = NULL; 975 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 976 struct inode *dir = &dip->i_inode; 977 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 978 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 979 int error; 980 u64 generation; 981 struct buffer_head *bh = NULL; 982 983 if (!name->len || name->len > GFS2_FNAMESIZE) 984 return ERR_PTR(-ENAMETOOLONG); 985 986 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 987 error = gfs2_glock_nq(ghs); 988 if (error) 989 goto fail; 990 991 error = create_ok(dip, name, mode); 992 if (error) 993 goto fail_gunlock; 994 995 error = pick_formal_ino(sdp, &inum.no_formal_ino); 996 if (error) 997 goto fail_gunlock; 998 999 error = alloc_dinode(dip, &inum.no_addr, &generation); 1000 if (error) 1001 goto fail_gunlock; 1002 1003 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 1004 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 1005 if (error) 1006 goto fail_gunlock; 1007 1008 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 1009 if (error) 1010 goto fail_gunlock2; 1011 1012 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 1013 inum.no_addr, 1014 inum.no_formal_ino, 0); 1015 if (IS_ERR(inode)) 1016 goto fail_gunlock2; 1017 1018 error = gfs2_inode_refresh(GFS2_I(inode)); 1019 if (error) 1020 goto fail_gunlock2; 1021 1022 error = gfs2_acl_create(dip, GFS2_I(inode)); 1023 if (error) 1024 goto fail_gunlock2; 1025 1026 error = gfs2_security_init(dip, GFS2_I(inode)); 1027 if (error) 1028 goto fail_gunlock2; 1029 1030 error = link_dinode(dip, name, GFS2_I(inode)); 1031 if (error) 1032 goto fail_gunlock2; 1033 1034 if (bh) 1035 brelse(bh); 1036 return inode; 1037 1038 fail_gunlock2: 1039 gfs2_glock_dq_uninit(ghs + 1); 1040 if (inode && !IS_ERR(inode)) 1041 iput(inode); 1042 fail_gunlock: 1043 gfs2_glock_dq(ghs); 1044 fail: 1045 if (bh) 1046 brelse(bh); 1047 return ERR_PTR(error); 1048 } 1049 1050 /** 1051 * gfs2_rmdiri - Remove a directory 1052 * @dip: The parent directory of the directory to be removed 1053 * @name: The name of the directory to be removed 1054 * @ip: The GFS2 inode of the directory to be removed 1055 * 1056 * Assumes Glocks on dip and ip are held 1057 * 1058 * Returns: errno 1059 */ 1060 1061 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 1062 struct gfs2_inode *ip) 1063 { 1064 struct qstr dotname; 1065 int error; 1066 1067 if (ip->i_entries != 2) { 1068 if (gfs2_consist_inode(ip)) 1069 gfs2_dinode_print(ip); 1070 return -EIO; 1071 } 1072 1073 error = gfs2_dir_del(dip, name); 1074 if (error) 1075 return error; 1076 1077 error = gfs2_change_nlink(dip, -1); 1078 if (error) 1079 return error; 1080 1081 gfs2_str2qstr(&dotname, "."); 1082 error = gfs2_dir_del(ip, &dotname); 1083 if (error) 1084 return error; 1085 1086 gfs2_str2qstr(&dotname, ".."); 1087 error = gfs2_dir_del(ip, &dotname); 1088 if (error) 1089 return error; 1090 1091 /* It looks odd, but it really should be done twice */ 1092 error = gfs2_change_nlink(ip, -1); 1093 if (error) 1094 return error; 1095 1096 error = gfs2_change_nlink(ip, -1); 1097 if (error) 1098 return error; 1099 1100 return error; 1101 } 1102 1103 /* 1104 * gfs2_unlink_ok - check to see that a inode is still in a directory 1105 * @dip: the directory 1106 * @name: the name of the file 1107 * @ip: the inode 1108 * 1109 * Assumes that the lock on (at least) @dip is held. 1110 * 1111 * Returns: 0 if the parent/child relationship is correct, errno if it isn't 1112 */ 1113 1114 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, 1115 const struct gfs2_inode *ip) 1116 { 1117 int error; 1118 1119 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1120 return -EPERM; 1121 1122 if ((dip->i_inode.i_mode & S_ISVTX) && 1123 dip->i_inode.i_uid != current_fsuid() && 1124 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) 1125 return -EPERM; 1126 1127 if (IS_APPEND(&dip->i_inode)) 1128 return -EPERM; 1129 1130 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 1131 if (error) 1132 return error; 1133 1134 error = gfs2_dir_check(&dip->i_inode, name, ip); 1135 if (error) 1136 return error; 1137 1138 return 0; 1139 } 1140 1141 /** 1142 * gfs2_readlinki - return the contents of a symlink 1143 * @ip: the symlink's inode 1144 * @buf: a pointer to the buffer to be filled 1145 * @len: a pointer to the length of @buf 1146 * 1147 * If @buf is too small, a piece of memory is kmalloc()ed and needs 1148 * to be freed by the caller. 1149 * 1150 * Returns: errno 1151 */ 1152 1153 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) 1154 { 1155 struct gfs2_holder i_gh; 1156 struct buffer_head *dibh; 1157 unsigned int x; 1158 int error; 1159 1160 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); 1161 error = gfs2_glock_nq(&i_gh); 1162 if (error) { 1163 gfs2_holder_uninit(&i_gh); 1164 return error; 1165 } 1166 1167 if (!ip->i_disksize) { 1168 gfs2_consist_inode(ip); 1169 error = -EIO; 1170 goto out; 1171 } 1172 1173 error = gfs2_meta_inode_buffer(ip, &dibh); 1174 if (error) 1175 goto out; 1176 1177 x = ip->i_disksize + 1; 1178 if (x > *len) { 1179 *buf = kmalloc(x, GFP_NOFS); 1180 if (!*buf) { 1181 error = -ENOMEM; 1182 goto out_brelse; 1183 } 1184 } 1185 1186 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); 1187 *len = x; 1188 1189 out_brelse: 1190 brelse(dibh); 1191 out: 1192 gfs2_glock_dq_uninit(&i_gh); 1193 return error; 1194 } 1195 1196 static int 1197 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1198 { 1199 struct buffer_head *dibh; 1200 int error; 1201 1202 error = gfs2_meta_inode_buffer(ip, &dibh); 1203 if (!error) { 1204 error = inode_setattr(&ip->i_inode, attr); 1205 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1206 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1207 gfs2_dinode_out(ip, dibh->b_data); 1208 brelse(dibh); 1209 } 1210 return error; 1211 } 1212 1213 /** 1214 * gfs2_setattr_simple - 1215 * @ip: 1216 * @attr: 1217 * 1218 * Called with a reference on the vnode. 1219 * 1220 * Returns: errno 1221 */ 1222 1223 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1224 { 1225 int error; 1226 1227 if (current->journal_info) 1228 return __gfs2_setattr_simple(ip, attr); 1229 1230 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); 1231 if (error) 1232 return error; 1233 1234 error = __gfs2_setattr_simple(ip, attr); 1235 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1236 return error; 1237 } 1238 1239 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1240 { 1241 struct gfs2_dinode *str = buf; 1242 1243 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1244 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1245 str->di_header.__pad0 = 0; 1246 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1247 str->di_header.__pad1 = 0; 1248 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1249 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1250 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1251 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1252 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1253 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1254 str->di_size = cpu_to_be64(ip->i_disksize); 1255 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1256 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1257 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1258 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1259 1260 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1261 str->di_goal_data = cpu_to_be64(ip->i_goal); 1262 str->di_generation = cpu_to_be64(ip->i_generation); 1263 1264 str->di_flags = cpu_to_be32(ip->i_diskflags); 1265 str->di_height = cpu_to_be16(ip->i_height); 1266 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1267 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1268 GFS2_FORMAT_DE : 0); 1269 str->di_depth = cpu_to_be16(ip->i_depth); 1270 str->di_entries = cpu_to_be32(ip->i_entries); 1271 1272 str->di_eattr = cpu_to_be64(ip->i_eattr); 1273 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1274 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1275 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1276 } 1277 1278 void gfs2_dinode_print(const struct gfs2_inode *ip) 1279 { 1280 printk(KERN_INFO " no_formal_ino = %llu\n", 1281 (unsigned long long)ip->i_no_formal_ino); 1282 printk(KERN_INFO " no_addr = %llu\n", 1283 (unsigned long long)ip->i_no_addr); 1284 printk(KERN_INFO " i_disksize = %llu\n", 1285 (unsigned long long)ip->i_disksize); 1286 printk(KERN_INFO " blocks = %llu\n", 1287 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1288 printk(KERN_INFO " i_goal = %llu\n", 1289 (unsigned long long)ip->i_goal); 1290 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1291 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1292 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1293 printk(KERN_INFO " i_entries = %u\n", ip->i_entries); 1294 printk(KERN_INFO " i_eattr = %llu\n", 1295 (unsigned long long)ip->i_eattr); 1296 } 1297 1298