1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_acl.h" 21 #include "xfs_log.h" 22 #include "xfs_trans.h" 23 #include "xfs_sb.h" 24 #include "xfs_ag.h" 25 #include "xfs_alloc.h" 26 #include "xfs_quota.h" 27 #include "xfs_mount.h" 28 #include "xfs_bmap_btree.h" 29 #include "xfs_dinode.h" 30 #include "xfs_inode.h" 31 #include "xfs_bmap.h" 32 #include "xfs_rtalloc.h" 33 #include "xfs_error.h" 34 #include "xfs_itable.h" 35 #include "xfs_attr.h" 36 #include "xfs_buf_item.h" 37 #include "xfs_utils.h" 38 #include "xfs_vnodeops.h" 39 #include "xfs_inode_item.h" 40 #include "xfs_trace.h" 41 42 #include <linux/capability.h> 43 #include <linux/xattr.h> 44 #include <linux/namei.h> 45 #include <linux/posix_acl.h> 46 #include <linux/security.h> 47 #include <linux/fiemap.h> 48 #include <linux/slab.h> 49 50 static int 51 xfs_initxattrs( 52 struct inode *inode, 53 const struct xattr *xattr_array, 54 void *fs_info) 55 { 56 const struct xattr *xattr; 57 struct xfs_inode *ip = XFS_I(inode); 58 int error = 0; 59 60 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 61 error = xfs_attr_set(ip, xattr->name, xattr->value, 62 xattr->value_len, ATTR_SECURE); 63 if (error < 0) 64 break; 65 } 66 return error; 67 } 68 69 /* 70 * Hook in SELinux. This is not quite correct yet, what we really need 71 * here (as we do for default ACLs) is a mechanism by which creation of 72 * these attrs can be journalled at inode creation time (along with the 73 * inode, of course, such that log replay can't cause these to be lost). 74 */ 75 76 STATIC int 77 xfs_init_security( 78 struct inode *inode, 79 struct inode *dir, 80 const struct qstr *qstr) 81 { 82 return security_inode_init_security(inode, dir, qstr, 83 &xfs_initxattrs, NULL); 84 } 85 86 static void 87 xfs_dentry_to_name( 88 struct xfs_name *namep, 89 struct dentry *dentry) 90 { 91 namep->name = dentry->d_name.name; 92 namep->len = dentry->d_name.len; 93 } 94 95 STATIC void 96 xfs_cleanup_inode( 97 struct inode *dir, 98 struct inode *inode, 99 struct dentry *dentry) 100 { 101 struct xfs_name teardown; 102 103 /* Oh, the horror. 104 * If we can't add the ACL or we fail in 105 * xfs_init_security we must back out. 106 * ENOSPC can hit here, among other things. 107 */ 108 xfs_dentry_to_name(&teardown, dentry); 109 110 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); 111 iput(inode); 112 } 113 114 STATIC int 115 xfs_vn_mknod( 116 struct inode *dir, 117 struct dentry *dentry, 118 umode_t mode, 119 dev_t rdev) 120 { 121 struct inode *inode; 122 struct xfs_inode *ip = NULL; 123 struct posix_acl *default_acl = NULL; 124 struct xfs_name name; 125 int error; 126 127 /* 128 * Irix uses Missed'em'V split, but doesn't want to see 129 * the upper 5 bits of (14bit) major. 130 */ 131 if (S_ISCHR(mode) || S_ISBLK(mode)) { 132 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) 133 return -EINVAL; 134 rdev = sysv_encode_dev(rdev); 135 } else { 136 rdev = 0; 137 } 138 139 if (IS_POSIXACL(dir)) { 140 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); 141 if (IS_ERR(default_acl)) 142 return PTR_ERR(default_acl); 143 144 if (!default_acl) 145 mode &= ~current_umask(); 146 } 147 148 xfs_dentry_to_name(&name, dentry); 149 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); 150 if (unlikely(error)) 151 goto out_free_acl; 152 153 inode = VFS_I(ip); 154 155 error = xfs_init_security(inode, dir, &dentry->d_name); 156 if (unlikely(error)) 157 goto out_cleanup_inode; 158 159 if (default_acl) { 160 error = -xfs_inherit_acl(inode, default_acl); 161 default_acl = NULL; 162 if (unlikely(error)) 163 goto out_cleanup_inode; 164 } 165 166 167 d_instantiate(dentry, inode); 168 return -error; 169 170 out_cleanup_inode: 171 xfs_cleanup_inode(dir, inode, dentry); 172 out_free_acl: 173 posix_acl_release(default_acl); 174 return -error; 175 } 176 177 STATIC int 178 xfs_vn_create( 179 struct inode *dir, 180 struct dentry *dentry, 181 umode_t mode, 182 bool flags) 183 { 184 return xfs_vn_mknod(dir, dentry, mode, 0); 185 } 186 187 STATIC int 188 xfs_vn_mkdir( 189 struct inode *dir, 190 struct dentry *dentry, 191 umode_t mode) 192 { 193 return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); 194 } 195 196 STATIC struct dentry * 197 xfs_vn_lookup( 198 struct inode *dir, 199 struct dentry *dentry, 200 unsigned int flags) 201 { 202 struct xfs_inode *cip; 203 struct xfs_name name; 204 int error; 205 206 if (dentry->d_name.len >= MAXNAMELEN) 207 return ERR_PTR(-ENAMETOOLONG); 208 209 xfs_dentry_to_name(&name, dentry); 210 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); 211 if (unlikely(error)) { 212 if (unlikely(error != ENOENT)) 213 return ERR_PTR(-error); 214 d_add(dentry, NULL); 215 return NULL; 216 } 217 218 return d_splice_alias(VFS_I(cip), dentry); 219 } 220 221 STATIC struct dentry * 222 xfs_vn_ci_lookup( 223 struct inode *dir, 224 struct dentry *dentry, 225 unsigned int flags) 226 { 227 struct xfs_inode *ip; 228 struct xfs_name xname; 229 struct xfs_name ci_name; 230 struct qstr dname; 231 int error; 232 233 if (dentry->d_name.len >= MAXNAMELEN) 234 return ERR_PTR(-ENAMETOOLONG); 235 236 xfs_dentry_to_name(&xname, dentry); 237 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); 238 if (unlikely(error)) { 239 if (unlikely(error != ENOENT)) 240 return ERR_PTR(-error); 241 /* 242 * call d_add(dentry, NULL) here when d_drop_negative_children 243 * is called in xfs_vn_mknod (ie. allow negative dentries 244 * with CI filesystems). 245 */ 246 return NULL; 247 } 248 249 /* if exact match, just splice and exit */ 250 if (!ci_name.name) 251 return d_splice_alias(VFS_I(ip), dentry); 252 253 /* else case-insensitive match... */ 254 dname.name = ci_name.name; 255 dname.len = ci_name.len; 256 dentry = d_add_ci(dentry, VFS_I(ip), &dname); 257 kmem_free(ci_name.name); 258 return dentry; 259 } 260 261 STATIC int 262 xfs_vn_link( 263 struct dentry *old_dentry, 264 struct inode *dir, 265 struct dentry *dentry) 266 { 267 struct inode *inode = old_dentry->d_inode; 268 struct xfs_name name; 269 int error; 270 271 xfs_dentry_to_name(&name, dentry); 272 273 error = xfs_link(XFS_I(dir), XFS_I(inode), &name); 274 if (unlikely(error)) 275 return -error; 276 277 ihold(inode); 278 d_instantiate(dentry, inode); 279 return 0; 280 } 281 282 STATIC int 283 xfs_vn_unlink( 284 struct inode *dir, 285 struct dentry *dentry) 286 { 287 struct xfs_name name; 288 int error; 289 290 xfs_dentry_to_name(&name, dentry); 291 292 error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); 293 if (error) 294 return error; 295 296 /* 297 * With unlink, the VFS makes the dentry "negative": no inode, 298 * but still hashed. This is incompatible with case-insensitive 299 * mode, so invalidate (unhash) the dentry in CI-mode. 300 */ 301 if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) 302 d_invalidate(dentry); 303 return 0; 304 } 305 306 STATIC int 307 xfs_vn_symlink( 308 struct inode *dir, 309 struct dentry *dentry, 310 const char *symname) 311 { 312 struct inode *inode; 313 struct xfs_inode *cip = NULL; 314 struct xfs_name name; 315 int error; 316 umode_t mode; 317 318 mode = S_IFLNK | 319 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); 320 xfs_dentry_to_name(&name, dentry); 321 322 error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); 323 if (unlikely(error)) 324 goto out; 325 326 inode = VFS_I(cip); 327 328 error = xfs_init_security(inode, dir, &dentry->d_name); 329 if (unlikely(error)) 330 goto out_cleanup_inode; 331 332 d_instantiate(dentry, inode); 333 return 0; 334 335 out_cleanup_inode: 336 xfs_cleanup_inode(dir, inode, dentry); 337 out: 338 return -error; 339 } 340 341 STATIC int 342 xfs_vn_rename( 343 struct inode *odir, 344 struct dentry *odentry, 345 struct inode *ndir, 346 struct dentry *ndentry) 347 { 348 struct inode *new_inode = ndentry->d_inode; 349 struct xfs_name oname; 350 struct xfs_name nname; 351 352 xfs_dentry_to_name(&oname, odentry); 353 xfs_dentry_to_name(&nname, ndentry); 354 355 return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 356 XFS_I(ndir), &nname, new_inode ? 357 XFS_I(new_inode) : NULL); 358 } 359 360 /* 361 * careful here - this function can get called recursively, so 362 * we need to be very careful about how much stack we use. 363 * uio is kmalloced for this reason... 364 */ 365 STATIC void * 366 xfs_vn_follow_link( 367 struct dentry *dentry, 368 struct nameidata *nd) 369 { 370 char *link; 371 int error = -ENOMEM; 372 373 link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); 374 if (!link) 375 goto out_err; 376 377 error = -xfs_readlink(XFS_I(dentry->d_inode), link); 378 if (unlikely(error)) 379 goto out_kfree; 380 381 nd_set_link(nd, link); 382 return NULL; 383 384 out_kfree: 385 kfree(link); 386 out_err: 387 nd_set_link(nd, ERR_PTR(error)); 388 return NULL; 389 } 390 391 STATIC void 392 xfs_vn_put_link( 393 struct dentry *dentry, 394 struct nameidata *nd, 395 void *p) 396 { 397 char *s = nd_get_link(nd); 398 399 if (!IS_ERR(s)) 400 kfree(s); 401 } 402 403 STATIC int 404 xfs_vn_getattr( 405 struct vfsmount *mnt, 406 struct dentry *dentry, 407 struct kstat *stat) 408 { 409 struct inode *inode = dentry->d_inode; 410 struct xfs_inode *ip = XFS_I(inode); 411 struct xfs_mount *mp = ip->i_mount; 412 413 trace_xfs_getattr(ip); 414 415 if (XFS_FORCED_SHUTDOWN(mp)) 416 return -XFS_ERROR(EIO); 417 418 stat->size = XFS_ISIZE(ip); 419 stat->dev = inode->i_sb->s_dev; 420 stat->mode = ip->i_d.di_mode; 421 stat->nlink = ip->i_d.di_nlink; 422 stat->uid = ip->i_d.di_uid; 423 stat->gid = ip->i_d.di_gid; 424 stat->ino = ip->i_ino; 425 stat->atime = inode->i_atime; 426 stat->mtime = inode->i_mtime; 427 stat->ctime = inode->i_ctime; 428 stat->blocks = 429 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); 430 431 432 switch (inode->i_mode & S_IFMT) { 433 case S_IFBLK: 434 case S_IFCHR: 435 stat->blksize = BLKDEV_IOSIZE; 436 stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 437 sysv_minor(ip->i_df.if_u2.if_rdev)); 438 break; 439 default: 440 if (XFS_IS_REALTIME_INODE(ip)) { 441 /* 442 * If the file blocks are being allocated from a 443 * realtime volume, then return the inode's realtime 444 * extent size or the realtime volume's extent size. 445 */ 446 stat->blksize = 447 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; 448 } else 449 stat->blksize = xfs_preferred_iosize(mp); 450 stat->rdev = 0; 451 break; 452 } 453 454 return 0; 455 } 456 457 int 458 xfs_setattr_nonsize( 459 struct xfs_inode *ip, 460 struct iattr *iattr, 461 int flags) 462 { 463 xfs_mount_t *mp = ip->i_mount; 464 struct inode *inode = VFS_I(ip); 465 int mask = iattr->ia_valid; 466 xfs_trans_t *tp; 467 int error; 468 uid_t uid = 0, iuid = 0; 469 gid_t gid = 0, igid = 0; 470 struct xfs_dquot *udqp = NULL, *gdqp = NULL; 471 struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; 472 473 trace_xfs_setattr(ip); 474 475 if (mp->m_flags & XFS_MOUNT_RDONLY) 476 return XFS_ERROR(EROFS); 477 478 if (XFS_FORCED_SHUTDOWN(mp)) 479 return XFS_ERROR(EIO); 480 481 error = -inode_change_ok(inode, iattr); 482 if (error) 483 return XFS_ERROR(error); 484 485 ASSERT((mask & ATTR_SIZE) == 0); 486 487 /* 488 * If disk quotas is on, we make sure that the dquots do exist on disk, 489 * before we start any other transactions. Trying to do this later 490 * is messy. We don't care to take a readlock to look at the ids 491 * in inode here, because we can't hold it across the trans_reserve. 492 * If the IDs do change before we take the ilock, we're covered 493 * because the i_*dquot fields will get updated anyway. 494 */ 495 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { 496 uint qflags = 0; 497 498 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { 499 uid = iattr->ia_uid; 500 qflags |= XFS_QMOPT_UQUOTA; 501 } else { 502 uid = ip->i_d.di_uid; 503 } 504 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { 505 gid = iattr->ia_gid; 506 qflags |= XFS_QMOPT_GQUOTA; 507 } else { 508 gid = ip->i_d.di_gid; 509 } 510 511 /* 512 * We take a reference when we initialize udqp and gdqp, 513 * so it is important that we never blindly double trip on 514 * the same variable. See xfs_create() for an example. 515 */ 516 ASSERT(udqp == NULL); 517 ASSERT(gdqp == NULL); 518 error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), 519 qflags, &udqp, &gdqp); 520 if (error) 521 return error; 522 } 523 524 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 525 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 526 if (error) 527 goto out_dqrele; 528 529 xfs_ilock(ip, XFS_ILOCK_EXCL); 530 531 /* 532 * Change file ownership. Must be the owner or privileged. 533 */ 534 if (mask & (ATTR_UID|ATTR_GID)) { 535 /* 536 * These IDs could have changed since we last looked at them. 537 * But, we're assured that if the ownership did change 538 * while we didn't have the inode locked, inode's dquot(s) 539 * would have changed also. 540 */ 541 iuid = ip->i_d.di_uid; 542 igid = ip->i_d.di_gid; 543 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; 544 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; 545 546 /* 547 * Do a quota reservation only if uid/gid is actually 548 * going to change. 549 */ 550 if (XFS_IS_QUOTA_RUNNING(mp) && 551 ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 552 (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { 553 ASSERT(tp); 554 error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, 555 capable(CAP_FOWNER) ? 556 XFS_QMOPT_FORCE_RES : 0); 557 if (error) /* out of quota */ 558 goto out_trans_cancel; 559 } 560 } 561 562 xfs_trans_ijoin(tp, ip, 0); 563 564 /* 565 * Change file ownership. Must be the owner or privileged. 566 */ 567 if (mask & (ATTR_UID|ATTR_GID)) { 568 /* 569 * CAP_FSETID overrides the following restrictions: 570 * 571 * The set-user-ID and set-group-ID bits of a file will be 572 * cleared upon successful return from chown() 573 */ 574 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && 575 !capable(CAP_FSETID)) 576 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); 577 578 /* 579 * Change the ownerships and register quota modifications 580 * in the transaction. 581 */ 582 if (iuid != uid) { 583 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { 584 ASSERT(mask & ATTR_UID); 585 ASSERT(udqp); 586 olddquot1 = xfs_qm_vop_chown(tp, ip, 587 &ip->i_udquot, udqp); 588 } 589 ip->i_d.di_uid = uid; 590 inode->i_uid = uid; 591 } 592 if (igid != gid) { 593 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { 594 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 595 ASSERT(mask & ATTR_GID); 596 ASSERT(gdqp); 597 olddquot2 = xfs_qm_vop_chown(tp, ip, 598 &ip->i_gdquot, gdqp); 599 } 600 ip->i_d.di_gid = gid; 601 inode->i_gid = gid; 602 } 603 } 604 605 /* 606 * Change file access modes. 607 */ 608 if (mask & ATTR_MODE) { 609 umode_t mode = iattr->ia_mode; 610 611 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 612 mode &= ~S_ISGID; 613 614 ip->i_d.di_mode &= S_IFMT; 615 ip->i_d.di_mode |= mode & ~S_IFMT; 616 617 inode->i_mode &= S_IFMT; 618 inode->i_mode |= mode & ~S_IFMT; 619 } 620 621 /* 622 * Change file access or modified times. 623 */ 624 if (mask & ATTR_ATIME) { 625 inode->i_atime = iattr->ia_atime; 626 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 627 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 628 } 629 if (mask & ATTR_CTIME) { 630 inode->i_ctime = iattr->ia_ctime; 631 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 632 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 633 } 634 if (mask & ATTR_MTIME) { 635 inode->i_mtime = iattr->ia_mtime; 636 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 637 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 638 } 639 640 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 641 642 XFS_STATS_INC(xs_ig_attrchg); 643 644 if (mp->m_flags & XFS_MOUNT_WSYNC) 645 xfs_trans_set_sync(tp); 646 error = xfs_trans_commit(tp, 0); 647 648 xfs_iunlock(ip, XFS_ILOCK_EXCL); 649 650 /* 651 * Release any dquot(s) the inode had kept before chown. 652 */ 653 xfs_qm_dqrele(olddquot1); 654 xfs_qm_dqrele(olddquot2); 655 xfs_qm_dqrele(udqp); 656 xfs_qm_dqrele(gdqp); 657 658 if (error) 659 return XFS_ERROR(error); 660 661 /* 662 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode 663 * update. We could avoid this with linked transactions 664 * and passing down the transaction pointer all the way 665 * to attr_set. No previous user of the generic 666 * Posix ACL code seems to care about this issue either. 667 */ 668 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { 669 error = -xfs_acl_chmod(inode); 670 if (error) 671 return XFS_ERROR(error); 672 } 673 674 return 0; 675 676 out_trans_cancel: 677 xfs_trans_cancel(tp, 0); 678 xfs_iunlock(ip, XFS_ILOCK_EXCL); 679 out_dqrele: 680 xfs_qm_dqrele(udqp); 681 xfs_qm_dqrele(gdqp); 682 return error; 683 } 684 685 /* 686 * Truncate file. Must have write permission and not be a directory. 687 */ 688 int 689 xfs_setattr_size( 690 struct xfs_inode *ip, 691 struct iattr *iattr, 692 int flags) 693 { 694 struct xfs_mount *mp = ip->i_mount; 695 struct inode *inode = VFS_I(ip); 696 int mask = iattr->ia_valid; 697 xfs_off_t oldsize, newsize; 698 struct xfs_trans *tp; 699 int error; 700 uint lock_flags = 0; 701 uint commit_flags = 0; 702 703 trace_xfs_setattr(ip); 704 705 if (mp->m_flags & XFS_MOUNT_RDONLY) 706 return XFS_ERROR(EROFS); 707 708 if (XFS_FORCED_SHUTDOWN(mp)) 709 return XFS_ERROR(EIO); 710 711 error = -inode_change_ok(inode, iattr); 712 if (error) 713 return XFS_ERROR(error); 714 715 ASSERT(S_ISREG(ip->i_d.di_mode)); 716 ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 717 ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| 718 ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); 719 720 if (!(flags & XFS_ATTR_NOLOCK)) { 721 lock_flags |= XFS_IOLOCK_EXCL; 722 xfs_ilock(ip, lock_flags); 723 } 724 725 oldsize = inode->i_size; 726 newsize = iattr->ia_size; 727 728 /* 729 * Short circuit the truncate case for zero length files. 730 */ 731 if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { 732 if (!(mask & (ATTR_CTIME|ATTR_MTIME))) 733 goto out_unlock; 734 735 /* 736 * Use the regular setattr path to update the timestamps. 737 */ 738 xfs_iunlock(ip, lock_flags); 739 iattr->ia_valid &= ~ATTR_SIZE; 740 return xfs_setattr_nonsize(ip, iattr, 0); 741 } 742 743 /* 744 * Make sure that the dquots are attached to the inode. 745 */ 746 error = xfs_qm_dqattach(ip, 0); 747 if (error) 748 goto out_unlock; 749 750 /* 751 * Now we can make the changes. Before we join the inode to the 752 * transaction, take care of the part of the truncation that must be 753 * done without the inode lock. This needs to be done before joining 754 * the inode to the transaction, because the inode cannot be unlocked 755 * once it is a part of the transaction. 756 */ 757 if (newsize > oldsize) { 758 /* 759 * Do the first part of growing a file: zero any data in the 760 * last block that is beyond the old EOF. We need to do this 761 * before the inode is joined to the transaction to modify 762 * i_size. 763 */ 764 error = xfs_zero_eof(ip, newsize, oldsize); 765 if (error) 766 goto out_unlock; 767 } 768 769 /* 770 * We are going to log the inode size change in this transaction so 771 * any previous writes that are beyond the on disk EOF and the new 772 * EOF that have not been written out need to be written here. If we 773 * do not write the data out, we expose ourselves to the null files 774 * problem. 775 * 776 * Only flush from the on disk size to the smaller of the in memory 777 * file size or the new size as that's the range we really care about 778 * here and prevents waiting for other data not within the range we 779 * care about here. 780 */ 781 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { 782 error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0, 783 FI_NONE); 784 if (error) 785 goto out_unlock; 786 } 787 788 /* 789 * Wait for all direct I/O to complete. 790 */ 791 inode_dio_wait(inode); 792 793 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 794 if (error) 795 goto out_unlock; 796 797 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 798 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 799 XFS_TRANS_PERM_LOG_RES, 800 XFS_ITRUNCATE_LOG_COUNT); 801 if (error) 802 goto out_trans_cancel; 803 804 truncate_setsize(inode, newsize); 805 806 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 807 lock_flags |= XFS_ILOCK_EXCL; 808 809 xfs_ilock(ip, XFS_ILOCK_EXCL); 810 811 xfs_trans_ijoin(tp, ip, 0); 812 813 /* 814 * Only change the c/mtime if we are changing the size or we are 815 * explicitly asked to change it. This handles the semantic difference 816 * between truncate() and ftruncate() as implemented in the VFS. 817 * 818 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a 819 * special case where we need to update the times despite not having 820 * these flags set. For all other operations the VFS set these flags 821 * explicitly if it wants a timestamp update. 822 */ 823 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { 824 iattr->ia_ctime = iattr->ia_mtime = 825 current_fs_time(inode->i_sb); 826 mask |= ATTR_CTIME | ATTR_MTIME; 827 } 828 829 /* 830 * The first thing we do is set the size to new_size permanently on 831 * disk. This way we don't have to worry about anyone ever being able 832 * to look at the data being freed even in the face of a crash. 833 * What we're getting around here is the case where we free a block, it 834 * is allocated to another file, it is written to, and then we crash. 835 * If the new data gets written to the file but the log buffers 836 * containing the free and reallocation don't, then we'd end up with 837 * garbage in the blocks being freed. As long as we make the new size 838 * permanent before actually freeing any blocks it doesn't matter if 839 * they get written to. 840 */ 841 ip->i_d.di_size = newsize; 842 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 843 844 if (newsize <= oldsize) { 845 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); 846 if (error) 847 goto out_trans_abort; 848 849 /* 850 * Truncated "down", so we're removing references to old data 851 * here - if we delay flushing for a long time, we expose 852 * ourselves unduly to the notorious NULL files problem. So, 853 * we mark this inode and flush it when the file is closed, 854 * and do not wait the usual (long) time for writeout. 855 */ 856 xfs_iflags_set(ip, XFS_ITRUNCATED); 857 } 858 859 if (mask & ATTR_CTIME) { 860 inode->i_ctime = iattr->ia_ctime; 861 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 862 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 863 } 864 if (mask & ATTR_MTIME) { 865 inode->i_mtime = iattr->ia_mtime; 866 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; 867 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; 868 } 869 870 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 871 872 XFS_STATS_INC(xs_ig_attrchg); 873 874 if (mp->m_flags & XFS_MOUNT_WSYNC) 875 xfs_trans_set_sync(tp); 876 877 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 878 out_unlock: 879 if (lock_flags) 880 xfs_iunlock(ip, lock_flags); 881 return error; 882 883 out_trans_abort: 884 commit_flags |= XFS_TRANS_ABORT; 885 out_trans_cancel: 886 xfs_trans_cancel(tp, commit_flags); 887 goto out_unlock; 888 } 889 890 STATIC int 891 xfs_vn_setattr( 892 struct dentry *dentry, 893 struct iattr *iattr) 894 { 895 if (iattr->ia_valid & ATTR_SIZE) 896 return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); 897 return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); 898 } 899 900 STATIC int 901 xfs_vn_update_time( 902 struct inode *inode, 903 struct timespec *now, 904 int flags) 905 { 906 struct xfs_inode *ip = XFS_I(inode); 907 struct xfs_mount *mp = ip->i_mount; 908 struct xfs_trans *tp; 909 int error; 910 911 trace_xfs_update_time(ip); 912 913 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 914 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); 915 if (error) { 916 xfs_trans_cancel(tp, 0); 917 return -error; 918 } 919 920 xfs_ilock(ip, XFS_ILOCK_EXCL); 921 if (flags & S_CTIME) { 922 inode->i_ctime = *now; 923 ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec; 924 ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec; 925 } 926 if (flags & S_MTIME) { 927 inode->i_mtime = *now; 928 ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec; 929 ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec; 930 } 931 if (flags & S_ATIME) { 932 inode->i_atime = *now; 933 ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec; 934 ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec; 935 } 936 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 937 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 938 return -xfs_trans_commit(tp, 0); 939 } 940 941 #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 942 943 /* 944 * Call fiemap helper to fill in user data. 945 * Returns positive errors to xfs_getbmap. 946 */ 947 STATIC int 948 xfs_fiemap_format( 949 void **arg, 950 struct getbmapx *bmv, 951 int *full) 952 { 953 int error; 954 struct fiemap_extent_info *fieinfo = *arg; 955 u32 fiemap_flags = 0; 956 u64 logical, physical, length; 957 958 /* Do nothing for a hole */ 959 if (bmv->bmv_block == -1LL) 960 return 0; 961 962 logical = BBTOB(bmv->bmv_offset); 963 physical = BBTOB(bmv->bmv_block); 964 length = BBTOB(bmv->bmv_length); 965 966 if (bmv->bmv_oflags & BMV_OF_PREALLOC) 967 fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; 968 else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { 969 fiemap_flags |= FIEMAP_EXTENT_DELALLOC; 970 physical = 0; /* no block yet */ 971 } 972 if (bmv->bmv_oflags & BMV_OF_LAST) 973 fiemap_flags |= FIEMAP_EXTENT_LAST; 974 975 error = fiemap_fill_next_extent(fieinfo, logical, physical, 976 length, fiemap_flags); 977 if (error > 0) { 978 error = 0; 979 *full = 1; /* user array now full */ 980 } 981 982 return -error; 983 } 984 985 STATIC int 986 xfs_vn_fiemap( 987 struct inode *inode, 988 struct fiemap_extent_info *fieinfo, 989 u64 start, 990 u64 length) 991 { 992 xfs_inode_t *ip = XFS_I(inode); 993 struct getbmapx bm; 994 int error; 995 996 error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); 997 if (error) 998 return error; 999 1000 /* Set up bmap header for xfs internal routine */ 1001 bm.bmv_offset = BTOBB(start); 1002 /* Special case for whole file */ 1003 if (length == FIEMAP_MAX_OFFSET) 1004 bm.bmv_length = -1LL; 1005 else 1006 bm.bmv_length = BTOBB(length); 1007 1008 /* We add one because in getbmap world count includes the header */ 1009 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : 1010 fieinfo->fi_extents_max + 1; 1011 bm.bmv_count = min_t(__s32, bm.bmv_count, 1012 (PAGE_SIZE * 16 / sizeof(struct getbmapx))); 1013 bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; 1014 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 1015 bm.bmv_iflags |= BMV_IF_ATTRFORK; 1016 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) 1017 bm.bmv_iflags |= BMV_IF_DELALLOC; 1018 1019 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); 1020 if (error) 1021 return -error; 1022 1023 return 0; 1024 } 1025 1026 static const struct inode_operations xfs_inode_operations = { 1027 .get_acl = xfs_get_acl, 1028 .getattr = xfs_vn_getattr, 1029 .setattr = xfs_vn_setattr, 1030 .setxattr = generic_setxattr, 1031 .getxattr = generic_getxattr, 1032 .removexattr = generic_removexattr, 1033 .listxattr = xfs_vn_listxattr, 1034 .fiemap = xfs_vn_fiemap, 1035 .update_time = xfs_vn_update_time, 1036 }; 1037 1038 static const struct inode_operations xfs_dir_inode_operations = { 1039 .create = xfs_vn_create, 1040 .lookup = xfs_vn_lookup, 1041 .link = xfs_vn_link, 1042 .unlink = xfs_vn_unlink, 1043 .symlink = xfs_vn_symlink, 1044 .mkdir = xfs_vn_mkdir, 1045 /* 1046 * Yes, XFS uses the same method for rmdir and unlink. 1047 * 1048 * There are some subtile differences deeper in the code, 1049 * but we use S_ISDIR to check for those. 1050 */ 1051 .rmdir = xfs_vn_unlink, 1052 .mknod = xfs_vn_mknod, 1053 .rename = xfs_vn_rename, 1054 .get_acl = xfs_get_acl, 1055 .getattr = xfs_vn_getattr, 1056 .setattr = xfs_vn_setattr, 1057 .setxattr = generic_setxattr, 1058 .getxattr = generic_getxattr, 1059 .removexattr = generic_removexattr, 1060 .listxattr = xfs_vn_listxattr, 1061 .update_time = xfs_vn_update_time, 1062 }; 1063 1064 static const struct inode_operations xfs_dir_ci_inode_operations = { 1065 .create = xfs_vn_create, 1066 .lookup = xfs_vn_ci_lookup, 1067 .link = xfs_vn_link, 1068 .unlink = xfs_vn_unlink, 1069 .symlink = xfs_vn_symlink, 1070 .mkdir = xfs_vn_mkdir, 1071 /* 1072 * Yes, XFS uses the same method for rmdir and unlink. 1073 * 1074 * There are some subtile differences deeper in the code, 1075 * but we use S_ISDIR to check for those. 1076 */ 1077 .rmdir = xfs_vn_unlink, 1078 .mknod = xfs_vn_mknod, 1079 .rename = xfs_vn_rename, 1080 .get_acl = xfs_get_acl, 1081 .getattr = xfs_vn_getattr, 1082 .setattr = xfs_vn_setattr, 1083 .setxattr = generic_setxattr, 1084 .getxattr = generic_getxattr, 1085 .removexattr = generic_removexattr, 1086 .listxattr = xfs_vn_listxattr, 1087 .update_time = xfs_vn_update_time, 1088 }; 1089 1090 static const struct inode_operations xfs_symlink_inode_operations = { 1091 .readlink = generic_readlink, 1092 .follow_link = xfs_vn_follow_link, 1093 .put_link = xfs_vn_put_link, 1094 .get_acl = xfs_get_acl, 1095 .getattr = xfs_vn_getattr, 1096 .setattr = xfs_vn_setattr, 1097 .setxattr = generic_setxattr, 1098 .getxattr = generic_getxattr, 1099 .removexattr = generic_removexattr, 1100 .listxattr = xfs_vn_listxattr, 1101 .update_time = xfs_vn_update_time, 1102 }; 1103 1104 STATIC void 1105 xfs_diflags_to_iflags( 1106 struct inode *inode, 1107 struct xfs_inode *ip) 1108 { 1109 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) 1110 inode->i_flags |= S_IMMUTABLE; 1111 else 1112 inode->i_flags &= ~S_IMMUTABLE; 1113 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) 1114 inode->i_flags |= S_APPEND; 1115 else 1116 inode->i_flags &= ~S_APPEND; 1117 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) 1118 inode->i_flags |= S_SYNC; 1119 else 1120 inode->i_flags &= ~S_SYNC; 1121 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) 1122 inode->i_flags |= S_NOATIME; 1123 else 1124 inode->i_flags &= ~S_NOATIME; 1125 } 1126 1127 /* 1128 * Initialize the Linux inode, set up the operation vectors and 1129 * unlock the inode. 1130 * 1131 * When reading existing inodes from disk this is called directly 1132 * from xfs_iget, when creating a new inode it is called from 1133 * xfs_ialloc after setting up the inode. 1134 * 1135 * We are always called with an uninitialised linux inode here. 1136 * We need to initialise the necessary fields and take a reference 1137 * on it. 1138 */ 1139 void 1140 xfs_setup_inode( 1141 struct xfs_inode *ip) 1142 { 1143 struct inode *inode = &ip->i_vnode; 1144 1145 inode->i_ino = ip->i_ino; 1146 inode->i_state = I_NEW; 1147 1148 inode_sb_list_add(inode); 1149 /* make the inode look hashed for the writeback code */ 1150 hlist_add_fake(&inode->i_hash); 1151 1152 inode->i_mode = ip->i_d.di_mode; 1153 set_nlink(inode, ip->i_d.di_nlink); 1154 inode->i_uid = ip->i_d.di_uid; 1155 inode->i_gid = ip->i_d.di_gid; 1156 1157 switch (inode->i_mode & S_IFMT) { 1158 case S_IFBLK: 1159 case S_IFCHR: 1160 inode->i_rdev = 1161 MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, 1162 sysv_minor(ip->i_df.if_u2.if_rdev)); 1163 break; 1164 default: 1165 inode->i_rdev = 0; 1166 break; 1167 } 1168 1169 inode->i_generation = ip->i_d.di_gen; 1170 i_size_write(inode, ip->i_d.di_size); 1171 inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; 1172 inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; 1173 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; 1174 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; 1175 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 1176 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 1177 xfs_diflags_to_iflags(inode, ip); 1178 1179 switch (inode->i_mode & S_IFMT) { 1180 case S_IFREG: 1181 inode->i_op = &xfs_inode_operations; 1182 inode->i_fop = &xfs_file_operations; 1183 inode->i_mapping->a_ops = &xfs_address_space_operations; 1184 break; 1185 case S_IFDIR: 1186 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) 1187 inode->i_op = &xfs_dir_ci_inode_operations; 1188 else 1189 inode->i_op = &xfs_dir_inode_operations; 1190 inode->i_fop = &xfs_dir_file_operations; 1191 break; 1192 case S_IFLNK: 1193 inode->i_op = &xfs_symlink_inode_operations; 1194 if (!(ip->i_df.if_flags & XFS_IFINLINE)) 1195 inode->i_mapping->a_ops = &xfs_address_space_operations; 1196 break; 1197 default: 1198 inode->i_op = &xfs_inode_operations; 1199 init_special_inode(inode, inode->i_mode, inode->i_rdev); 1200 break; 1201 } 1202 1203 /* 1204 * If there is no attribute fork no ACL can exist on this inode, 1205 * and it can't have any file capabilities attached to it either. 1206 */ 1207 if (!XFS_IFORK_Q(ip)) { 1208 inode_has_no_xattr(inode); 1209 cache_no_acl(inode); 1210 } 1211 1212 xfs_iflags_clear(ip, XFS_INEW); 1213 barrier(); 1214 1215 unlock_new_inode(inode); 1216 } 1217