1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/export.h> 19 #include <linux/kernel.h> 20 #include <linux/slab.h> 21 #include <linux/fs.h> 22 #include <linux/namei.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/personality.h> 26 #include <linux/security.h> 27 #include <linux/ima.h> 28 #include <linux/syscalls.h> 29 #include <linux/mount.h> 30 #include <linux/audit.h> 31 #include <linux/capability.h> 32 #include <linux/file.h> 33 #include <linux/fcntl.h> 34 #include <linux/device_cgroup.h> 35 #include <linux/fs_struct.h> 36 #include <linux/posix_acl.h> 37 #include <asm/uaccess.h> 38 39 #include "internal.h" 40 #include "mount.h" 41 42 /* [Feb-1997 T. Schoebel-Theuer] 43 * Fundamental changes in the pathname lookup mechanisms (namei) 44 * were necessary because of omirr. The reason is that omirr needs 45 * to know the _real_ pathname, not the user-supplied one, in case 46 * of symlinks (and also when transname replacements occur). 47 * 48 * The new code replaces the old recursive symlink resolution with 49 * an iterative one (in case of non-nested symlink chains). It does 50 * this with calls to <fs>_follow_link(). 51 * As a side effect, dir_namei(), _namei() and follow_link() are now 52 * replaced with a single function lookup_dentry() that can handle all 53 * the special cases of the former code. 54 * 55 * With the new dcache, the pathname is stored at each inode, at least as 56 * long as the refcount of the inode is positive. As a side effect, the 57 * size of the dcache depends on the inode cache and thus is dynamic. 58 * 59 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 60 * resolution to correspond with current state of the code. 61 * 62 * Note that the symlink resolution is not *completely* iterative. 63 * There is still a significant amount of tail- and mid- recursion in 64 * the algorithm. Also, note that <fs>_readlink() is not used in 65 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 66 * may return different results than <fs>_follow_link(). Many virtual 67 * filesystems (including /proc) exhibit this behavior. 68 */ 69 70 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 71 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 72 * and the name already exists in form of a symlink, try to create the new 73 * name indicated by the symlink. The old code always complained that the 74 * name already exists, due to not following the symlink even if its target 75 * is nonexistent. The new semantics affects also mknod() and link() when 76 * the name is a symlink pointing to a non-existent name. 77 * 78 * I don't know which semantics is the right one, since I have no access 79 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 80 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 81 * "old" one. Personally, I think the new semantics is much more logical. 82 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 83 * file does succeed in both HP-UX and SunOs, but not in Solaris 84 * and in the old Linux semantics. 85 */ 86 87 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 88 * semantics. See the comments in "open_namei" and "do_link" below. 89 * 90 * [10-Sep-98 Alan Modra] Another symlink change. 91 */ 92 93 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 94 * inside the path - always follow. 95 * in the last component in creation/removal/renaming - never follow. 96 * if LOOKUP_FOLLOW passed - follow. 97 * if the pathname has trailing slashes - follow. 98 * otherwise - don't follow. 99 * (applied in that order). 100 * 101 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 102 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 103 * During the 2.4 we need to fix the userland stuff depending on it - 104 * hopefully we will be able to get rid of that wart in 2.5. So far only 105 * XEmacs seems to be relying on it... 106 */ 107 /* 108 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 109 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives 110 * any extra contention... 111 */ 112 113 /* In order to reduce some races, while at the same time doing additional 114 * checking and hopefully speeding things up, we copy filenames to the 115 * kernel data space before using them.. 116 * 117 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 118 * PATH_MAX includes the nul terminator --RR. 119 */ 120 static char *getname_flags(const char __user *filename, int flags, int *empty) 121 { 122 char *result = __getname(), *err; 123 int len; 124 125 if (unlikely(!result)) 126 return ERR_PTR(-ENOMEM); 127 128 len = strncpy_from_user(result, filename, PATH_MAX); 129 err = ERR_PTR(len); 130 if (unlikely(len < 0)) 131 goto error; 132 133 /* The empty path is special. */ 134 if (unlikely(!len)) { 135 if (empty) 136 *empty = 1; 137 err = ERR_PTR(-ENOENT); 138 if (!(flags & LOOKUP_EMPTY)) 139 goto error; 140 } 141 142 err = ERR_PTR(-ENAMETOOLONG); 143 if (likely(len < PATH_MAX)) { 144 audit_getname(result); 145 return result; 146 } 147 148 error: 149 __putname(result); 150 return err; 151 } 152 153 char *getname(const char __user * filename) 154 { 155 return getname_flags(filename, 0, NULL); 156 } 157 158 #ifdef CONFIG_AUDITSYSCALL 159 void putname(const char *name) 160 { 161 if (unlikely(!audit_dummy_context())) 162 audit_putname(name); 163 else 164 __putname(name); 165 } 166 EXPORT_SYMBOL(putname); 167 #endif 168 169 static int check_acl(struct inode *inode, int mask) 170 { 171 #ifdef CONFIG_FS_POSIX_ACL 172 struct posix_acl *acl; 173 174 if (mask & MAY_NOT_BLOCK) { 175 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); 176 if (!acl) 177 return -EAGAIN; 178 /* no ->get_acl() calls in RCU mode... */ 179 if (acl == ACL_NOT_CACHED) 180 return -ECHILD; 181 return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); 182 } 183 184 acl = get_cached_acl(inode, ACL_TYPE_ACCESS); 185 186 /* 187 * A filesystem can force a ACL callback by just never filling the 188 * ACL cache. But normally you'd fill the cache either at inode 189 * instantiation time, or on the first ->get_acl call. 190 * 191 * If the filesystem doesn't have a get_acl() function at all, we'll 192 * just create the negative cache entry. 193 */ 194 if (acl == ACL_NOT_CACHED) { 195 if (inode->i_op->get_acl) { 196 acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS); 197 if (IS_ERR(acl)) 198 return PTR_ERR(acl); 199 } else { 200 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); 201 return -EAGAIN; 202 } 203 } 204 205 if (acl) { 206 int error = posix_acl_permission(inode, acl, mask); 207 posix_acl_release(acl); 208 return error; 209 } 210 #endif 211 212 return -EAGAIN; 213 } 214 215 /* 216 * This does the basic permission checking 217 */ 218 static int acl_permission_check(struct inode *inode, int mask) 219 { 220 unsigned int mode = inode->i_mode; 221 222 if (likely(uid_eq(current_fsuid(), inode->i_uid))) 223 mode >>= 6; 224 else { 225 if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { 226 int error = check_acl(inode, mask); 227 if (error != -EAGAIN) 228 return error; 229 } 230 231 if (in_group_p(inode->i_gid)) 232 mode >>= 3; 233 } 234 235 /* 236 * If the DACs are ok we don't need any capability check. 237 */ 238 if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 239 return 0; 240 return -EACCES; 241 } 242 243 /** 244 * generic_permission - check for access rights on a Posix-like filesystem 245 * @inode: inode to check access rights for 246 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) 247 * 248 * Used to check for read/write/execute permissions on a file. 249 * We use "fsuid" for this, letting us set arbitrary permissions 250 * for filesystem access without changing the "normal" uids which 251 * are used for other things. 252 * 253 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk 254 * request cannot be satisfied (eg. requires blocking or too much complexity). 255 * It would then be called again in ref-walk mode. 256 */ 257 int generic_permission(struct inode *inode, int mask) 258 { 259 int ret; 260 261 /* 262 * Do the basic permission checks. 263 */ 264 ret = acl_permission_check(inode, mask); 265 if (ret != -EACCES) 266 return ret; 267 268 if (S_ISDIR(inode->i_mode)) { 269 /* DACs are overridable for directories */ 270 if (inode_capable(inode, CAP_DAC_OVERRIDE)) 271 return 0; 272 if (!(mask & MAY_WRITE)) 273 if (inode_capable(inode, CAP_DAC_READ_SEARCH)) 274 return 0; 275 return -EACCES; 276 } 277 /* 278 * Read/write DACs are always overridable. 279 * Executable DACs are overridable when there is 280 * at least one exec bit set. 281 */ 282 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) 283 if (inode_capable(inode, CAP_DAC_OVERRIDE)) 284 return 0; 285 286 /* 287 * Searching includes executable on directories, else just read. 288 */ 289 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 290 if (mask == MAY_READ) 291 if (inode_capable(inode, CAP_DAC_READ_SEARCH)) 292 return 0; 293 294 return -EACCES; 295 } 296 297 /* 298 * We _really_ want to just do "generic_permission()" without 299 * even looking at the inode->i_op values. So we keep a cache 300 * flag in inode->i_opflags, that says "this has not special 301 * permission function, use the fast case". 302 */ 303 static inline int do_inode_permission(struct inode *inode, int mask) 304 { 305 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { 306 if (likely(inode->i_op->permission)) 307 return inode->i_op->permission(inode, mask); 308 309 /* This gets set once for the inode lifetime */ 310 spin_lock(&inode->i_lock); 311 inode->i_opflags |= IOP_FASTPERM; 312 spin_unlock(&inode->i_lock); 313 } 314 return generic_permission(inode, mask); 315 } 316 317 /** 318 * inode_permission - check for access rights to a given inode 319 * @inode: inode to check permission on 320 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) 321 * 322 * Used to check for read/write/execute permissions on an inode. 323 * We use "fsuid" for this, letting us set arbitrary permissions 324 * for filesystem access without changing the "normal" uids which 325 * are used for other things. 326 * 327 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. 328 */ 329 int inode_permission(struct inode *inode, int mask) 330 { 331 int retval; 332 333 if (unlikely(mask & MAY_WRITE)) { 334 umode_t mode = inode->i_mode; 335 336 /* 337 * Nobody gets write access to a read-only fs. 338 */ 339 if (IS_RDONLY(inode) && 340 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 341 return -EROFS; 342 343 /* 344 * Nobody gets write access to an immutable file. 345 */ 346 if (IS_IMMUTABLE(inode)) 347 return -EACCES; 348 } 349 350 retval = do_inode_permission(inode, mask); 351 if (retval) 352 return retval; 353 354 retval = devcgroup_inode_permission(inode, mask); 355 if (retval) 356 return retval; 357 358 return security_inode_permission(inode, mask); 359 } 360 361 /** 362 * path_get - get a reference to a path 363 * @path: path to get the reference to 364 * 365 * Given a path increment the reference count to the dentry and the vfsmount. 366 */ 367 void path_get(struct path *path) 368 { 369 mntget(path->mnt); 370 dget(path->dentry); 371 } 372 EXPORT_SYMBOL(path_get); 373 374 /** 375 * path_put - put a reference to a path 376 * @path: path to put the reference to 377 * 378 * Given a path decrement the reference count to the dentry and the vfsmount. 379 */ 380 void path_put(struct path *path) 381 { 382 dput(path->dentry); 383 mntput(path->mnt); 384 } 385 EXPORT_SYMBOL(path_put); 386 387 /* 388 * Path walking has 2 modes, rcu-walk and ref-walk (see 389 * Documentation/filesystems/path-lookup.txt). In situations when we can't 390 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab 391 * normal reference counts on dentries and vfsmounts to transition to rcu-walk 392 * mode. Refcounts are grabbed at the last known good point before rcu-walk 393 * got stuck, so ref-walk may continue from there. If this is not successful 394 * (eg. a seqcount has changed), then failure is returned and it's up to caller 395 * to restart the path walk from the beginning in ref-walk mode. 396 */ 397 398 /** 399 * unlazy_walk - try to switch to ref-walk mode. 400 * @nd: nameidata pathwalk data 401 * @dentry: child of nd->path.dentry or NULL 402 * Returns: 0 on success, -ECHILD on failure 403 * 404 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry 405 * for ref-walk mode. @dentry must be a path found by a do_lookup call on 406 * @nd or NULL. Must be called from rcu-walk context. 407 */ 408 static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) 409 { 410 struct fs_struct *fs = current->fs; 411 struct dentry *parent = nd->path.dentry; 412 int want_root = 0; 413 414 BUG_ON(!(nd->flags & LOOKUP_RCU)); 415 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 416 want_root = 1; 417 spin_lock(&fs->lock); 418 if (nd->root.mnt != fs->root.mnt || 419 nd->root.dentry != fs->root.dentry) 420 goto err_root; 421 } 422 spin_lock(&parent->d_lock); 423 if (!dentry) { 424 if (!__d_rcu_to_refcount(parent, nd->seq)) 425 goto err_parent; 426 BUG_ON(nd->inode != parent->d_inode); 427 } else { 428 if (dentry->d_parent != parent) 429 goto err_parent; 430 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 431 if (!__d_rcu_to_refcount(dentry, nd->seq)) 432 goto err_child; 433 /* 434 * If the sequence check on the child dentry passed, then 435 * the child has not been removed from its parent. This 436 * means the parent dentry must be valid and able to take 437 * a reference at this point. 438 */ 439 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); 440 BUG_ON(!parent->d_count); 441 parent->d_count++; 442 spin_unlock(&dentry->d_lock); 443 } 444 spin_unlock(&parent->d_lock); 445 if (want_root) { 446 path_get(&nd->root); 447 spin_unlock(&fs->lock); 448 } 449 mntget(nd->path.mnt); 450 451 rcu_read_unlock(); 452 br_read_unlock(vfsmount_lock); 453 nd->flags &= ~LOOKUP_RCU; 454 return 0; 455 456 err_child: 457 spin_unlock(&dentry->d_lock); 458 err_parent: 459 spin_unlock(&parent->d_lock); 460 err_root: 461 if (want_root) 462 spin_unlock(&fs->lock); 463 return -ECHILD; 464 } 465 466 /** 467 * release_open_intent - free up open intent resources 468 * @nd: pointer to nameidata 469 */ 470 void release_open_intent(struct nameidata *nd) 471 { 472 struct file *file = nd->intent.open.file; 473 474 if (file && !IS_ERR(file)) { 475 if (file->f_path.dentry == NULL) 476 put_filp(file); 477 else 478 fput(file); 479 } 480 } 481 482 static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) 483 { 484 return dentry->d_op->d_revalidate(dentry, nd); 485 } 486 487 /** 488 * complete_walk - successful completion of path walk 489 * @nd: pointer nameidata 490 * 491 * If we had been in RCU mode, drop out of it and legitimize nd->path. 492 * Revalidate the final result, unless we'd already done that during 493 * the path walk or the filesystem doesn't ask for it. Return 0 on 494 * success, -error on failure. In case of failure caller does not 495 * need to drop nd->path. 496 */ 497 static int complete_walk(struct nameidata *nd) 498 { 499 struct dentry *dentry = nd->path.dentry; 500 int status; 501 502 if (nd->flags & LOOKUP_RCU) { 503 nd->flags &= ~LOOKUP_RCU; 504 if (!(nd->flags & LOOKUP_ROOT)) 505 nd->root.mnt = NULL; 506 spin_lock(&dentry->d_lock); 507 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { 508 spin_unlock(&dentry->d_lock); 509 rcu_read_unlock(); 510 br_read_unlock(vfsmount_lock); 511 return -ECHILD; 512 } 513 BUG_ON(nd->inode != dentry->d_inode); 514 spin_unlock(&dentry->d_lock); 515 mntget(nd->path.mnt); 516 rcu_read_unlock(); 517 br_read_unlock(vfsmount_lock); 518 } 519 520 if (likely(!(nd->flags & LOOKUP_JUMPED))) 521 return 0; 522 523 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 524 return 0; 525 526 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) 527 return 0; 528 529 /* Note: we do not d_invalidate() */ 530 status = d_revalidate(dentry, nd); 531 if (status > 0) 532 return 0; 533 534 if (!status) 535 status = -ESTALE; 536 537 path_put(&nd->path); 538 return status; 539 } 540 541 static __always_inline void set_root(struct nameidata *nd) 542 { 543 if (!nd->root.mnt) 544 get_fs_root(current->fs, &nd->root); 545 } 546 547 static int link_path_walk(const char *, struct nameidata *); 548 549 static __always_inline void set_root_rcu(struct nameidata *nd) 550 { 551 if (!nd->root.mnt) { 552 struct fs_struct *fs = current->fs; 553 unsigned seq; 554 555 do { 556 seq = read_seqcount_begin(&fs->seq); 557 nd->root = fs->root; 558 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); 559 } while (read_seqcount_retry(&fs->seq, seq)); 560 } 561 } 562 563 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 564 { 565 int ret; 566 567 if (IS_ERR(link)) 568 goto fail; 569 570 if (*link == '/') { 571 set_root(nd); 572 path_put(&nd->path); 573 nd->path = nd->root; 574 path_get(&nd->root); 575 nd->flags |= LOOKUP_JUMPED; 576 } 577 nd->inode = nd->path.dentry->d_inode; 578 579 ret = link_path_walk(link, nd); 580 return ret; 581 fail: 582 path_put(&nd->path); 583 return PTR_ERR(link); 584 } 585 586 static void path_put_conditional(struct path *path, struct nameidata *nd) 587 { 588 dput(path->dentry); 589 if (path->mnt != nd->path.mnt) 590 mntput(path->mnt); 591 } 592 593 static inline void path_to_nameidata(const struct path *path, 594 struct nameidata *nd) 595 { 596 if (!(nd->flags & LOOKUP_RCU)) { 597 dput(nd->path.dentry); 598 if (nd->path.mnt != path->mnt) 599 mntput(nd->path.mnt); 600 } 601 nd->path.mnt = path->mnt; 602 nd->path.dentry = path->dentry; 603 } 604 605 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) 606 { 607 struct inode *inode = link->dentry->d_inode; 608 if (!IS_ERR(cookie) && inode->i_op->put_link) 609 inode->i_op->put_link(link->dentry, nd, cookie); 610 path_put(link); 611 } 612 613 static __always_inline int 614 follow_link(struct path *link, struct nameidata *nd, void **p) 615 { 616 int error; 617 struct dentry *dentry = link->dentry; 618 619 BUG_ON(nd->flags & LOOKUP_RCU); 620 621 if (link->mnt == nd->path.mnt) 622 mntget(link->mnt); 623 624 if (unlikely(current->total_link_count >= 40)) { 625 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ 626 path_put(&nd->path); 627 return -ELOOP; 628 } 629 cond_resched(); 630 current->total_link_count++; 631 632 touch_atime(link); 633 nd_set_link(nd, NULL); 634 635 error = security_inode_follow_link(link->dentry, nd); 636 if (error) { 637 *p = ERR_PTR(error); /* no ->put_link(), please */ 638 path_put(&nd->path); 639 return error; 640 } 641 642 nd->last_type = LAST_BIND; 643 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 644 error = PTR_ERR(*p); 645 if (!IS_ERR(*p)) { 646 char *s = nd_get_link(nd); 647 error = 0; 648 if (s) 649 error = __vfs_follow_link(nd, s); 650 else if (nd->last_type == LAST_BIND) { 651 nd->flags |= LOOKUP_JUMPED; 652 nd->inode = nd->path.dentry->d_inode; 653 if (nd->inode->i_op->follow_link) { 654 /* stepped on a _really_ weird one */ 655 path_put(&nd->path); 656 error = -ELOOP; 657 } 658 } 659 } 660 return error; 661 } 662 663 static int follow_up_rcu(struct path *path) 664 { 665 struct mount *mnt = real_mount(path->mnt); 666 struct mount *parent; 667 struct dentry *mountpoint; 668 669 parent = mnt->mnt_parent; 670 if (&parent->mnt == path->mnt) 671 return 0; 672 mountpoint = mnt->mnt_mountpoint; 673 path->dentry = mountpoint; 674 path->mnt = &parent->mnt; 675 return 1; 676 } 677 678 int follow_up(struct path *path) 679 { 680 struct mount *mnt = real_mount(path->mnt); 681 struct mount *parent; 682 struct dentry *mountpoint; 683 684 br_read_lock(vfsmount_lock); 685 parent = mnt->mnt_parent; 686 if (&parent->mnt == path->mnt) { 687 br_read_unlock(vfsmount_lock); 688 return 0; 689 } 690 mntget(&parent->mnt); 691 mountpoint = dget(mnt->mnt_mountpoint); 692 br_read_unlock(vfsmount_lock); 693 dput(path->dentry); 694 path->dentry = mountpoint; 695 mntput(path->mnt); 696 path->mnt = &parent->mnt; 697 return 1; 698 } 699 700 /* 701 * Perform an automount 702 * - return -EISDIR to tell follow_managed() to stop and return the path we 703 * were called with. 704 */ 705 static int follow_automount(struct path *path, unsigned flags, 706 bool *need_mntput) 707 { 708 struct vfsmount *mnt; 709 int err; 710 711 if (!path->dentry->d_op || !path->dentry->d_op->d_automount) 712 return -EREMOTE; 713 714 /* We don't want to mount if someone's just doing a stat - 715 * unless they're stat'ing a directory and appended a '/' to 716 * the name. 717 * 718 * We do, however, want to mount if someone wants to open or 719 * create a file of any type under the mountpoint, wants to 720 * traverse through the mountpoint or wants to open the 721 * mounted directory. Also, autofs may mark negative dentries 722 * as being automount points. These will need the attentions 723 * of the daemon to instantiate them before they can be used. 724 */ 725 if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | 726 LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && 727 path->dentry->d_inode) 728 return -EISDIR; 729 730 current->total_link_count++; 731 if (current->total_link_count >= 40) 732 return -ELOOP; 733 734 mnt = path->dentry->d_op->d_automount(path); 735 if (IS_ERR(mnt)) { 736 /* 737 * The filesystem is allowed to return -EISDIR here to indicate 738 * it doesn't want to automount. For instance, autofs would do 739 * this so that its userspace daemon can mount on this dentry. 740 * 741 * However, we can only permit this if it's a terminal point in 742 * the path being looked up; if it wasn't then the remainder of 743 * the path is inaccessible and we should say so. 744 */ 745 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT)) 746 return -EREMOTE; 747 return PTR_ERR(mnt); 748 } 749 750 if (!mnt) /* mount collision */ 751 return 0; 752 753 if (!*need_mntput) { 754 /* lock_mount() may release path->mnt on error */ 755 mntget(path->mnt); 756 *need_mntput = true; 757 } 758 err = finish_automount(mnt, path); 759 760 switch (err) { 761 case -EBUSY: 762 /* Someone else made a mount here whilst we were busy */ 763 return 0; 764 case 0: 765 path_put(path); 766 path->mnt = mnt; 767 path->dentry = dget(mnt->mnt_root); 768 return 0; 769 default: 770 return err; 771 } 772 773 } 774 775 /* 776 * Handle a dentry that is managed in some way. 777 * - Flagged for transit management (autofs) 778 * - Flagged as mountpoint 779 * - Flagged as automount point 780 * 781 * This may only be called in refwalk mode. 782 * 783 * Serialization is taken care of in namespace.c 784 */ 785 static int follow_managed(struct path *path, unsigned flags) 786 { 787 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ 788 unsigned managed; 789 bool need_mntput = false; 790 int ret = 0; 791 792 /* Given that we're not holding a lock here, we retain the value in a 793 * local variable for each dentry as we look at it so that we don't see 794 * the components of that value change under us */ 795 while (managed = ACCESS_ONCE(path->dentry->d_flags), 796 managed &= DCACHE_MANAGED_DENTRY, 797 unlikely(managed != 0)) { 798 /* Allow the filesystem to manage the transit without i_mutex 799 * being held. */ 800 if (managed & DCACHE_MANAGE_TRANSIT) { 801 BUG_ON(!path->dentry->d_op); 802 BUG_ON(!path->dentry->d_op->d_manage); 803 ret = path->dentry->d_op->d_manage(path->dentry, false); 804 if (ret < 0) 805 break; 806 } 807 808 /* Transit to a mounted filesystem. */ 809 if (managed & DCACHE_MOUNTED) { 810 struct vfsmount *mounted = lookup_mnt(path); 811 if (mounted) { 812 dput(path->dentry); 813 if (need_mntput) 814 mntput(path->mnt); 815 path->mnt = mounted; 816 path->dentry = dget(mounted->mnt_root); 817 need_mntput = true; 818 continue; 819 } 820 821 /* Something is mounted on this dentry in another 822 * namespace and/or whatever was mounted there in this 823 * namespace got unmounted before we managed to get the 824 * vfsmount_lock */ 825 } 826 827 /* Handle an automount point */ 828 if (managed & DCACHE_NEED_AUTOMOUNT) { 829 ret = follow_automount(path, flags, &need_mntput); 830 if (ret < 0) 831 break; 832 continue; 833 } 834 835 /* We didn't change the current path point */ 836 break; 837 } 838 839 if (need_mntput && path->mnt == mnt) 840 mntput(path->mnt); 841 if (ret == -EISDIR) 842 ret = 0; 843 return ret < 0 ? ret : need_mntput; 844 } 845 846 int follow_down_one(struct path *path) 847 { 848 struct vfsmount *mounted; 849 850 mounted = lookup_mnt(path); 851 if (mounted) { 852 dput(path->dentry); 853 mntput(path->mnt); 854 path->mnt = mounted; 855 path->dentry = dget(mounted->mnt_root); 856 return 1; 857 } 858 return 0; 859 } 860 861 static inline bool managed_dentry_might_block(struct dentry *dentry) 862 { 863 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && 864 dentry->d_op->d_manage(dentry, true) < 0); 865 } 866 867 /* 868 * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if 869 * we meet a managed dentry that would need blocking. 870 */ 871 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, 872 struct inode **inode) 873 { 874 for (;;) { 875 struct mount *mounted; 876 /* 877 * Don't forget we might have a non-mountpoint managed dentry 878 * that wants to block transit. 879 */ 880 if (unlikely(managed_dentry_might_block(path->dentry))) 881 return false; 882 883 if (!d_mountpoint(path->dentry)) 884 break; 885 886 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 887 if (!mounted) 888 break; 889 path->mnt = &mounted->mnt; 890 path->dentry = mounted->mnt.mnt_root; 891 nd->flags |= LOOKUP_JUMPED; 892 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 893 /* 894 * Update the inode too. We don't need to re-check the 895 * dentry sequence number here after this d_inode read, 896 * because a mount-point is always pinned. 897 */ 898 *inode = path->dentry->d_inode; 899 } 900 return true; 901 } 902 903 static void follow_mount_rcu(struct nameidata *nd) 904 { 905 while (d_mountpoint(nd->path.dentry)) { 906 struct mount *mounted; 907 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); 908 if (!mounted) 909 break; 910 nd->path.mnt = &mounted->mnt; 911 nd->path.dentry = mounted->mnt.mnt_root; 912 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 913 } 914 } 915 916 static int follow_dotdot_rcu(struct nameidata *nd) 917 { 918 set_root_rcu(nd); 919 920 while (1) { 921 if (nd->path.dentry == nd->root.dentry && 922 nd->path.mnt == nd->root.mnt) { 923 break; 924 } 925 if (nd->path.dentry != nd->path.mnt->mnt_root) { 926 struct dentry *old = nd->path.dentry; 927 struct dentry *parent = old->d_parent; 928 unsigned seq; 929 930 seq = read_seqcount_begin(&parent->d_seq); 931 if (read_seqcount_retry(&old->d_seq, nd->seq)) 932 goto failed; 933 nd->path.dentry = parent; 934 nd->seq = seq; 935 break; 936 } 937 if (!follow_up_rcu(&nd->path)) 938 break; 939 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 940 } 941 follow_mount_rcu(nd); 942 nd->inode = nd->path.dentry->d_inode; 943 return 0; 944 945 failed: 946 nd->flags &= ~LOOKUP_RCU; 947 if (!(nd->flags & LOOKUP_ROOT)) 948 nd->root.mnt = NULL; 949 rcu_read_unlock(); 950 br_read_unlock(vfsmount_lock); 951 return -ECHILD; 952 } 953 954 /* 955 * Follow down to the covering mount currently visible to userspace. At each 956 * point, the filesystem owning that dentry may be queried as to whether the 957 * caller is permitted to proceed or not. 958 */ 959 int follow_down(struct path *path) 960 { 961 unsigned managed; 962 int ret; 963 964 while (managed = ACCESS_ONCE(path->dentry->d_flags), 965 unlikely(managed & DCACHE_MANAGED_DENTRY)) { 966 /* Allow the filesystem to manage the transit without i_mutex 967 * being held. 968 * 969 * We indicate to the filesystem if someone is trying to mount 970 * something here. This gives autofs the chance to deny anyone 971 * other than its daemon the right to mount on its 972 * superstructure. 973 * 974 * The filesystem may sleep at this point. 975 */ 976 if (managed & DCACHE_MANAGE_TRANSIT) { 977 BUG_ON(!path->dentry->d_op); 978 BUG_ON(!path->dentry->d_op->d_manage); 979 ret = path->dentry->d_op->d_manage( 980 path->dentry, false); 981 if (ret < 0) 982 return ret == -EISDIR ? 0 : ret; 983 } 984 985 /* Transit to a mounted filesystem. */ 986 if (managed & DCACHE_MOUNTED) { 987 struct vfsmount *mounted = lookup_mnt(path); 988 if (!mounted) 989 break; 990 dput(path->dentry); 991 mntput(path->mnt); 992 path->mnt = mounted; 993 path->dentry = dget(mounted->mnt_root); 994 continue; 995 } 996 997 /* Don't handle automount points here */ 998 break; 999 } 1000 return 0; 1001 } 1002 1003 /* 1004 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() 1005 */ 1006 static void follow_mount(struct path *path) 1007 { 1008 while (d_mountpoint(path->dentry)) { 1009 struct vfsmount *mounted = lookup_mnt(path); 1010 if (!mounted) 1011 break; 1012 dput(path->dentry); 1013 mntput(path->mnt); 1014 path->mnt = mounted; 1015 path->dentry = dget(mounted->mnt_root); 1016 } 1017 } 1018 1019 static void follow_dotdot(struct nameidata *nd) 1020 { 1021 set_root(nd); 1022 1023 while(1) { 1024 struct dentry *old = nd->path.dentry; 1025 1026 if (nd->path.dentry == nd->root.dentry && 1027 nd->path.mnt == nd->root.mnt) { 1028 break; 1029 } 1030 if (nd->path.dentry != nd->path.mnt->mnt_root) { 1031 /* rare case of legitimate dget_parent()... */ 1032 nd->path.dentry = dget_parent(nd->path.dentry); 1033 dput(old); 1034 break; 1035 } 1036 if (!follow_up(&nd->path)) 1037 break; 1038 } 1039 follow_mount(&nd->path); 1040 nd->inode = nd->path.dentry->d_inode; 1041 } 1042 1043 /* 1044 * This looks up the name in dcache, possibly revalidates the old dentry and 1045 * allocates a new one if not found or not valid. In the need_lookup argument 1046 * returns whether i_op->lookup is necessary. 1047 * 1048 * dir->d_inode->i_mutex must be held 1049 */ 1050 static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, 1051 struct nameidata *nd, bool *need_lookup) 1052 { 1053 struct dentry *dentry; 1054 int error; 1055 1056 *need_lookup = false; 1057 dentry = d_lookup(dir, name); 1058 if (dentry) { 1059 if (d_need_lookup(dentry)) { 1060 *need_lookup = true; 1061 } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { 1062 error = d_revalidate(dentry, nd); 1063 if (unlikely(error <= 0)) { 1064 if (error < 0) { 1065 dput(dentry); 1066 return ERR_PTR(error); 1067 } else if (!d_invalidate(dentry)) { 1068 dput(dentry); 1069 dentry = NULL; 1070 } 1071 } 1072 } 1073 } 1074 1075 if (!dentry) { 1076 dentry = d_alloc(dir, name); 1077 if (unlikely(!dentry)) 1078 return ERR_PTR(-ENOMEM); 1079 1080 *need_lookup = true; 1081 } 1082 return dentry; 1083 } 1084 1085 /* 1086 * Call i_op->lookup on the dentry. The dentry must be negative but may be 1087 * hashed if it was pouplated with DCACHE_NEED_LOOKUP. 1088 * 1089 * dir->d_inode->i_mutex must be held 1090 */ 1091 static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, 1092 struct nameidata *nd) 1093 { 1094 struct dentry *old; 1095 1096 /* Don't create child dentry for a dead directory. */ 1097 if (unlikely(IS_DEADDIR(dir))) { 1098 dput(dentry); 1099 return ERR_PTR(-ENOENT); 1100 } 1101 1102 old = dir->i_op->lookup(dir, dentry, nd); 1103 if (unlikely(old)) { 1104 dput(dentry); 1105 dentry = old; 1106 } 1107 return dentry; 1108 } 1109 1110 static struct dentry *__lookup_hash(struct qstr *name, 1111 struct dentry *base, struct nameidata *nd) 1112 { 1113 bool need_lookup; 1114 struct dentry *dentry; 1115 1116 dentry = lookup_dcache(name, base, nd, &need_lookup); 1117 if (!need_lookup) 1118 return dentry; 1119 1120 return lookup_real(base->d_inode, dentry, nd); 1121 } 1122 1123 /* 1124 * It's more convoluted than I'd like it to be, but... it's still fairly 1125 * small and for now I'd prefer to have fast path as straight as possible. 1126 * It _is_ time-critical. 1127 */ 1128 static int do_lookup(struct nameidata *nd, struct qstr *name, 1129 struct path *path, struct inode **inode) 1130 { 1131 struct vfsmount *mnt = nd->path.mnt; 1132 struct dentry *dentry, *parent = nd->path.dentry; 1133 int need_reval = 1; 1134 int status = 1; 1135 int err; 1136 1137 /* 1138 * Rename seqlock is not required here because in the off chance 1139 * of a false negative due to a concurrent rename, we're going to 1140 * do the non-racy lookup, below. 1141 */ 1142 if (nd->flags & LOOKUP_RCU) { 1143 unsigned seq; 1144 dentry = __d_lookup_rcu(parent, name, &seq, nd->inode); 1145 if (!dentry) 1146 goto unlazy; 1147 1148 /* 1149 * This sequence count validates that the inode matches 1150 * the dentry name information from lookup. 1151 */ 1152 *inode = dentry->d_inode; 1153 if (read_seqcount_retry(&dentry->d_seq, seq)) 1154 return -ECHILD; 1155 1156 /* 1157 * This sequence count validates that the parent had no 1158 * changes while we did the lookup of the dentry above. 1159 * 1160 * The memory barrier in read_seqcount_begin of child is 1161 * enough, we can use __read_seqcount_retry here. 1162 */ 1163 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1164 return -ECHILD; 1165 nd->seq = seq; 1166 1167 if (unlikely(d_need_lookup(dentry))) 1168 goto unlazy; 1169 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1170 status = d_revalidate(dentry, nd); 1171 if (unlikely(status <= 0)) { 1172 if (status != -ECHILD) 1173 need_reval = 0; 1174 goto unlazy; 1175 } 1176 } 1177 path->mnt = mnt; 1178 path->dentry = dentry; 1179 if (unlikely(!__follow_mount_rcu(nd, path, inode))) 1180 goto unlazy; 1181 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) 1182 goto unlazy; 1183 return 0; 1184 unlazy: 1185 if (unlazy_walk(nd, dentry)) 1186 return -ECHILD; 1187 } else { 1188 dentry = __d_lookup(parent, name); 1189 } 1190 1191 if (unlikely(!dentry)) 1192 goto need_lookup; 1193 1194 if (unlikely(d_need_lookup(dentry))) { 1195 dput(dentry); 1196 goto need_lookup; 1197 } 1198 1199 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) 1200 status = d_revalidate(dentry, nd); 1201 if (unlikely(status <= 0)) { 1202 if (status < 0) { 1203 dput(dentry); 1204 return status; 1205 } 1206 if (!d_invalidate(dentry)) { 1207 dput(dentry); 1208 goto need_lookup; 1209 } 1210 } 1211 done: 1212 path->mnt = mnt; 1213 path->dentry = dentry; 1214 err = follow_managed(path, nd->flags); 1215 if (unlikely(err < 0)) { 1216 path_put_conditional(path, nd); 1217 return err; 1218 } 1219 if (err) 1220 nd->flags |= LOOKUP_JUMPED; 1221 *inode = path->dentry->d_inode; 1222 return 0; 1223 1224 need_lookup: 1225 BUG_ON(nd->inode != parent->d_inode); 1226 1227 mutex_lock(&parent->d_inode->i_mutex); 1228 dentry = __lookup_hash(name, parent, nd); 1229 mutex_unlock(&parent->d_inode->i_mutex); 1230 if (IS_ERR(dentry)) 1231 return PTR_ERR(dentry); 1232 goto done; 1233 } 1234 1235 static inline int may_lookup(struct nameidata *nd) 1236 { 1237 if (nd->flags & LOOKUP_RCU) { 1238 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); 1239 if (err != -ECHILD) 1240 return err; 1241 if (unlazy_walk(nd, NULL)) 1242 return -ECHILD; 1243 } 1244 return inode_permission(nd->inode, MAY_EXEC); 1245 } 1246 1247 static inline int handle_dots(struct nameidata *nd, int type) 1248 { 1249 if (type == LAST_DOTDOT) { 1250 if (nd->flags & LOOKUP_RCU) { 1251 if (follow_dotdot_rcu(nd)) 1252 return -ECHILD; 1253 } else 1254 follow_dotdot(nd); 1255 } 1256 return 0; 1257 } 1258 1259 static void terminate_walk(struct nameidata *nd) 1260 { 1261 if (!(nd->flags & LOOKUP_RCU)) { 1262 path_put(&nd->path); 1263 } else { 1264 nd->flags &= ~LOOKUP_RCU; 1265 if (!(nd->flags & LOOKUP_ROOT)) 1266 nd->root.mnt = NULL; 1267 rcu_read_unlock(); 1268 br_read_unlock(vfsmount_lock); 1269 } 1270 } 1271 1272 /* 1273 * Do we need to follow links? We _really_ want to be able 1274 * to do this check without having to look at inode->i_op, 1275 * so we keep a cache of "no, this doesn't need follow_link" 1276 * for the common case. 1277 */ 1278 static inline int should_follow_link(struct inode *inode, int follow) 1279 { 1280 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { 1281 if (likely(inode->i_op->follow_link)) 1282 return follow; 1283 1284 /* This gets set once for the inode lifetime */ 1285 spin_lock(&inode->i_lock); 1286 inode->i_opflags |= IOP_NOFOLLOW; 1287 spin_unlock(&inode->i_lock); 1288 } 1289 return 0; 1290 } 1291 1292 static inline int walk_component(struct nameidata *nd, struct path *path, 1293 struct qstr *name, int type, int follow) 1294 { 1295 struct inode *inode; 1296 int err; 1297 /* 1298 * "." and ".." are special - ".." especially so because it has 1299 * to be able to know about the current root directory and 1300 * parent relationships. 1301 */ 1302 if (unlikely(type != LAST_NORM)) 1303 return handle_dots(nd, type); 1304 err = do_lookup(nd, name, path, &inode); 1305 if (unlikely(err)) { 1306 terminate_walk(nd); 1307 return err; 1308 } 1309 if (!inode) { 1310 path_to_nameidata(path, nd); 1311 terminate_walk(nd); 1312 return -ENOENT; 1313 } 1314 if (should_follow_link(inode, follow)) { 1315 if (nd->flags & LOOKUP_RCU) { 1316 if (unlikely(unlazy_walk(nd, path->dentry))) { 1317 terminate_walk(nd); 1318 return -ECHILD; 1319 } 1320 } 1321 BUG_ON(inode != path->dentry->d_inode); 1322 return 1; 1323 } 1324 path_to_nameidata(path, nd); 1325 nd->inode = inode; 1326 return 0; 1327 } 1328 1329 /* 1330 * This limits recursive symlink follows to 8, while 1331 * limiting consecutive symlinks to 40. 1332 * 1333 * Without that kind of total limit, nasty chains of consecutive 1334 * symlinks can cause almost arbitrarily long lookups. 1335 */ 1336 static inline int nested_symlink(struct path *path, struct nameidata *nd) 1337 { 1338 int res; 1339 1340 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { 1341 path_put_conditional(path, nd); 1342 path_put(&nd->path); 1343 return -ELOOP; 1344 } 1345 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 1346 1347 nd->depth++; 1348 current->link_count++; 1349 1350 do { 1351 struct path link = *path; 1352 void *cookie; 1353 1354 res = follow_link(&link, nd, &cookie); 1355 if (!res) 1356 res = walk_component(nd, path, &nd->last, 1357 nd->last_type, LOOKUP_FOLLOW); 1358 put_link(nd, &link, cookie); 1359 } while (res > 0); 1360 1361 current->link_count--; 1362 nd->depth--; 1363 return res; 1364 } 1365 1366 /* 1367 * We really don't want to look at inode->i_op->lookup 1368 * when we don't have to. So we keep a cache bit in 1369 * the inode ->i_opflags field that says "yes, we can 1370 * do lookup on this inode". 1371 */ 1372 static inline int can_lookup(struct inode *inode) 1373 { 1374 if (likely(inode->i_opflags & IOP_LOOKUP)) 1375 return 1; 1376 if (likely(!inode->i_op->lookup)) 1377 return 0; 1378 1379 /* We do this once for the lifetime of the inode */ 1380 spin_lock(&inode->i_lock); 1381 inode->i_opflags |= IOP_LOOKUP; 1382 spin_unlock(&inode->i_lock); 1383 return 1; 1384 } 1385 1386 /* 1387 * We can do the critical dentry name comparison and hashing 1388 * operations one word at a time, but we are limited to: 1389 * 1390 * - Architectures with fast unaligned word accesses. We could 1391 * do a "get_unaligned()" if this helps and is sufficiently 1392 * fast. 1393 * 1394 * - Little-endian machines (so that we can generate the mask 1395 * of low bytes efficiently). Again, we *could* do a byte 1396 * swapping load on big-endian architectures if that is not 1397 * expensive enough to make the optimization worthless. 1398 * 1399 * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we 1400 * do not trap on the (extremely unlikely) case of a page 1401 * crossing operation. 1402 * 1403 * - Furthermore, we need an efficient 64-bit compile for the 1404 * 64-bit case in order to generate the "number of bytes in 1405 * the final mask". Again, that could be replaced with a 1406 * efficient population count instruction or similar. 1407 */ 1408 #ifdef CONFIG_DCACHE_WORD_ACCESS 1409 1410 #include <asm/word-at-a-time.h> 1411 1412 #ifdef CONFIG_64BIT 1413 1414 static inline unsigned int fold_hash(unsigned long hash) 1415 { 1416 hash += hash >> (8*sizeof(int)); 1417 return hash; 1418 } 1419 1420 #else /* 32-bit case */ 1421 1422 #define fold_hash(x) (x) 1423 1424 #endif 1425 1426 unsigned int full_name_hash(const unsigned char *name, unsigned int len) 1427 { 1428 unsigned long a, mask; 1429 unsigned long hash = 0; 1430 1431 for (;;) { 1432 a = load_unaligned_zeropad(name); 1433 if (len < sizeof(unsigned long)) 1434 break; 1435 hash += a; 1436 hash *= 9; 1437 name += sizeof(unsigned long); 1438 len -= sizeof(unsigned long); 1439 if (!len) 1440 goto done; 1441 } 1442 mask = ~(~0ul << len*8); 1443 hash += mask & a; 1444 done: 1445 return fold_hash(hash); 1446 } 1447 EXPORT_SYMBOL(full_name_hash); 1448 1449 /* 1450 * Calculate the length and hash of the path component, and 1451 * return the length of the component; 1452 */ 1453 static inline unsigned long hash_name(const char *name, unsigned int *hashp) 1454 { 1455 unsigned long a, b, adata, bdata, mask, hash, len; 1456 const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; 1457 1458 hash = a = 0; 1459 len = -sizeof(unsigned long); 1460 do { 1461 hash = (hash + a) * 9; 1462 len += sizeof(unsigned long); 1463 a = load_unaligned_zeropad(name+len); 1464 b = a ^ REPEAT_BYTE('/'); 1465 } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); 1466 1467 adata = prep_zero_mask(a, adata, &constants); 1468 bdata = prep_zero_mask(b, bdata, &constants); 1469 1470 mask = create_zero_mask(adata | bdata); 1471 1472 hash += a & zero_bytemask(mask); 1473 *hashp = fold_hash(hash); 1474 1475 return len + find_zero(mask); 1476 } 1477 1478 #else 1479 1480 unsigned int full_name_hash(const unsigned char *name, unsigned int len) 1481 { 1482 unsigned long hash = init_name_hash(); 1483 while (len--) 1484 hash = partial_name_hash(*name++, hash); 1485 return end_name_hash(hash); 1486 } 1487 EXPORT_SYMBOL(full_name_hash); 1488 1489 /* 1490 * We know there's a real path component here of at least 1491 * one character. 1492 */ 1493 static inline unsigned long hash_name(const char *name, unsigned int *hashp) 1494 { 1495 unsigned long hash = init_name_hash(); 1496 unsigned long len = 0, c; 1497 1498 c = (unsigned char)*name; 1499 do { 1500 len++; 1501 hash = partial_name_hash(c, hash); 1502 c = (unsigned char)name[len]; 1503 } while (c && c != '/'); 1504 *hashp = end_name_hash(hash); 1505 return len; 1506 } 1507 1508 #endif 1509 1510 /* 1511 * Name resolution. 1512 * This is the basic name resolution function, turning a pathname into 1513 * the final dentry. We expect 'base' to be positive and a directory. 1514 * 1515 * Returns 0 and nd will have valid dentry and mnt on success. 1516 * Returns error and drops reference to input namei data on failure. 1517 */ 1518 static int link_path_walk(const char *name, struct nameidata *nd) 1519 { 1520 struct path next; 1521 int err; 1522 1523 while (*name=='/') 1524 name++; 1525 if (!*name) 1526 return 0; 1527 1528 /* At this point we know we have a real path component. */ 1529 for(;;) { 1530 struct qstr this; 1531 long len; 1532 int type; 1533 1534 err = may_lookup(nd); 1535 if (err) 1536 break; 1537 1538 len = hash_name(name, &this.hash); 1539 this.name = name; 1540 this.len = len; 1541 1542 type = LAST_NORM; 1543 if (name[0] == '.') switch (len) { 1544 case 2: 1545 if (name[1] == '.') { 1546 type = LAST_DOTDOT; 1547 nd->flags |= LOOKUP_JUMPED; 1548 } 1549 break; 1550 case 1: 1551 type = LAST_DOT; 1552 } 1553 if (likely(type == LAST_NORM)) { 1554 struct dentry *parent = nd->path.dentry; 1555 nd->flags &= ~LOOKUP_JUMPED; 1556 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { 1557 err = parent->d_op->d_hash(parent, nd->inode, 1558 &this); 1559 if (err < 0) 1560 break; 1561 } 1562 } 1563 1564 if (!name[len]) 1565 goto last_component; 1566 /* 1567 * If it wasn't NUL, we know it was '/'. Skip that 1568 * slash, and continue until no more slashes. 1569 */ 1570 do { 1571 len++; 1572 } while (unlikely(name[len] == '/')); 1573 if (!name[len]) 1574 goto last_component; 1575 name += len; 1576 1577 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); 1578 if (err < 0) 1579 return err; 1580 1581 if (err) { 1582 err = nested_symlink(&next, nd); 1583 if (err) 1584 return err; 1585 } 1586 if (can_lookup(nd->inode)) 1587 continue; 1588 err = -ENOTDIR; 1589 break; 1590 /* here ends the main loop */ 1591 1592 last_component: 1593 nd->last = this; 1594 nd->last_type = type; 1595 return 0; 1596 } 1597 terminate_walk(nd); 1598 return err; 1599 } 1600 1601 static int path_init(int dfd, const char *name, unsigned int flags, 1602 struct nameidata *nd, struct file **fp) 1603 { 1604 int retval = 0; 1605 int fput_needed; 1606 struct file *file; 1607 1608 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1609 nd->flags = flags | LOOKUP_JUMPED; 1610 nd->depth = 0; 1611 if (flags & LOOKUP_ROOT) { 1612 struct inode *inode = nd->root.dentry->d_inode; 1613 if (*name) { 1614 if (!inode->i_op->lookup) 1615 return -ENOTDIR; 1616 retval = inode_permission(inode, MAY_EXEC); 1617 if (retval) 1618 return retval; 1619 } 1620 nd->path = nd->root; 1621 nd->inode = inode; 1622 if (flags & LOOKUP_RCU) { 1623 br_read_lock(vfsmount_lock); 1624 rcu_read_lock(); 1625 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1626 } else { 1627 path_get(&nd->path); 1628 } 1629 return 0; 1630 } 1631 1632 nd->root.mnt = NULL; 1633 1634 if (*name=='/') { 1635 if (flags & LOOKUP_RCU) { 1636 br_read_lock(vfsmount_lock); 1637 rcu_read_lock(); 1638 set_root_rcu(nd); 1639 } else { 1640 set_root(nd); 1641 path_get(&nd->root); 1642 } 1643 nd->path = nd->root; 1644 } else if (dfd == AT_FDCWD) { 1645 if (flags & LOOKUP_RCU) { 1646 struct fs_struct *fs = current->fs; 1647 unsigned seq; 1648 1649 br_read_lock(vfsmount_lock); 1650 rcu_read_lock(); 1651 1652 do { 1653 seq = read_seqcount_begin(&fs->seq); 1654 nd->path = fs->pwd; 1655 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1656 } while (read_seqcount_retry(&fs->seq, seq)); 1657 } else { 1658 get_fs_pwd(current->fs, &nd->path); 1659 } 1660 } else { 1661 struct dentry *dentry; 1662 1663 file = fget_raw_light(dfd, &fput_needed); 1664 retval = -EBADF; 1665 if (!file) 1666 goto out_fail; 1667 1668 dentry = file->f_path.dentry; 1669 1670 if (*name) { 1671 retval = -ENOTDIR; 1672 if (!S_ISDIR(dentry->d_inode->i_mode)) 1673 goto fput_fail; 1674 1675 retval = inode_permission(dentry->d_inode, MAY_EXEC); 1676 if (retval) 1677 goto fput_fail; 1678 } 1679 1680 nd->path = file->f_path; 1681 if (flags & LOOKUP_RCU) { 1682 if (fput_needed) 1683 *fp = file; 1684 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1685 br_read_lock(vfsmount_lock); 1686 rcu_read_lock(); 1687 } else { 1688 path_get(&file->f_path); 1689 fput_light(file, fput_needed); 1690 } 1691 } 1692 1693 nd->inode = nd->path.dentry->d_inode; 1694 return 0; 1695 1696 fput_fail: 1697 fput_light(file, fput_needed); 1698 out_fail: 1699 return retval; 1700 } 1701 1702 static inline int lookup_last(struct nameidata *nd, struct path *path) 1703 { 1704 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) 1705 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 1706 1707 nd->flags &= ~LOOKUP_PARENT; 1708 return walk_component(nd, path, &nd->last, nd->last_type, 1709 nd->flags & LOOKUP_FOLLOW); 1710 } 1711 1712 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1713 static int path_lookupat(int dfd, const char *name, 1714 unsigned int flags, struct nameidata *nd) 1715 { 1716 struct file *base = NULL; 1717 struct path path; 1718 int err; 1719 1720 /* 1721 * Path walking is largely split up into 2 different synchronisation 1722 * schemes, rcu-walk and ref-walk (explained in 1723 * Documentation/filesystems/path-lookup.txt). These share much of the 1724 * path walk code, but some things particularly setup, cleanup, and 1725 * following mounts are sufficiently divergent that functions are 1726 * duplicated. Typically there is a function foo(), and its RCU 1727 * analogue, foo_rcu(). 1728 * 1729 * -ECHILD is the error number of choice (just to avoid clashes) that 1730 * is returned if some aspect of an rcu-walk fails. Such an error must 1731 * be handled by restarting a traditional ref-walk (which will always 1732 * be able to complete). 1733 */ 1734 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base); 1735 1736 if (unlikely(err)) 1737 return err; 1738 1739 current->total_link_count = 0; 1740 err = link_path_walk(name, nd); 1741 1742 if (!err && !(flags & LOOKUP_PARENT)) { 1743 err = lookup_last(nd, &path); 1744 while (err > 0) { 1745 void *cookie; 1746 struct path link = path; 1747 nd->flags |= LOOKUP_PARENT; 1748 err = follow_link(&link, nd, &cookie); 1749 if (!err) 1750 err = lookup_last(nd, &path); 1751 put_link(nd, &link, cookie); 1752 } 1753 } 1754 1755 if (!err) 1756 err = complete_walk(nd); 1757 1758 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1759 if (!nd->inode->i_op->lookup) { 1760 path_put(&nd->path); 1761 err = -ENOTDIR; 1762 } 1763 } 1764 1765 if (base) 1766 fput(base); 1767 1768 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { 1769 path_put(&nd->root); 1770 nd->root.mnt = NULL; 1771 } 1772 return err; 1773 } 1774 1775 static int do_path_lookup(int dfd, const char *name, 1776 unsigned int flags, struct nameidata *nd) 1777 { 1778 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); 1779 if (unlikely(retval == -ECHILD)) 1780 retval = path_lookupat(dfd, name, flags, nd); 1781 if (unlikely(retval == -ESTALE)) 1782 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); 1783 1784 if (likely(!retval)) { 1785 if (unlikely(!audit_dummy_context())) { 1786 if (nd->path.dentry && nd->inode) 1787 audit_inode(name, nd->path.dentry); 1788 } 1789 } 1790 return retval; 1791 } 1792 1793 int kern_path_parent(const char *name, struct nameidata *nd) 1794 { 1795 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); 1796 } 1797 1798 int kern_path(const char *name, unsigned int flags, struct path *path) 1799 { 1800 struct nameidata nd; 1801 int res = do_path_lookup(AT_FDCWD, name, flags, &nd); 1802 if (!res) 1803 *path = nd.path; 1804 return res; 1805 } 1806 1807 /** 1808 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 1809 * @dentry: pointer to dentry of the base directory 1810 * @mnt: pointer to vfs mount of the base directory 1811 * @name: pointer to file name 1812 * @flags: lookup flags 1813 * @path: pointer to struct path to fill 1814 */ 1815 int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1816 const char *name, unsigned int flags, 1817 struct path *path) 1818 { 1819 struct nameidata nd; 1820 int err; 1821 nd.root.dentry = dentry; 1822 nd.root.mnt = mnt; 1823 BUG_ON(flags & LOOKUP_PARENT); 1824 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 1825 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); 1826 if (!err) 1827 *path = nd.path; 1828 return err; 1829 } 1830 1831 /* 1832 * Restricted form of lookup. Doesn't follow links, single-component only, 1833 * needs parent already locked. Doesn't follow mounts. 1834 * SMP-safe. 1835 */ 1836 static struct dentry *lookup_hash(struct nameidata *nd) 1837 { 1838 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1839 } 1840 1841 /** 1842 * lookup_one_len - filesystem helper to lookup single pathname component 1843 * @name: pathname component to lookup 1844 * @base: base directory to lookup from 1845 * @len: maximum length @len should be interpreted to 1846 * 1847 * Note that this routine is purely a helper for filesystem usage and should 1848 * not be called by generic code. Also note that by using this function the 1849 * nameidata argument is passed to the filesystem methods and a filesystem 1850 * using this helper needs to be prepared for that. 1851 */ 1852 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1853 { 1854 struct qstr this; 1855 unsigned int c; 1856 int err; 1857 1858 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1859 1860 this.name = name; 1861 this.len = len; 1862 this.hash = full_name_hash(name, len); 1863 if (!len) 1864 return ERR_PTR(-EACCES); 1865 1866 while (len--) { 1867 c = *(const unsigned char *)name++; 1868 if (c == '/' || c == '\0') 1869 return ERR_PTR(-EACCES); 1870 } 1871 /* 1872 * See if the low-level filesystem might want 1873 * to use its own hash.. 1874 */ 1875 if (base->d_flags & DCACHE_OP_HASH) { 1876 int err = base->d_op->d_hash(base, base->d_inode, &this); 1877 if (err < 0) 1878 return ERR_PTR(err); 1879 } 1880 1881 err = inode_permission(base->d_inode, MAY_EXEC); 1882 if (err) 1883 return ERR_PTR(err); 1884 1885 return __lookup_hash(&this, base, NULL); 1886 } 1887 1888 int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 1889 struct path *path, int *empty) 1890 { 1891 struct nameidata nd; 1892 char *tmp = getname_flags(name, flags, empty); 1893 int err = PTR_ERR(tmp); 1894 if (!IS_ERR(tmp)) { 1895 1896 BUG_ON(flags & LOOKUP_PARENT); 1897 1898 err = do_path_lookup(dfd, tmp, flags, &nd); 1899 putname(tmp); 1900 if (!err) 1901 *path = nd.path; 1902 } 1903 return err; 1904 } 1905 1906 int user_path_at(int dfd, const char __user *name, unsigned flags, 1907 struct path *path) 1908 { 1909 return user_path_at_empty(dfd, name, flags, path, NULL); 1910 } 1911 1912 static int user_path_parent(int dfd, const char __user *path, 1913 struct nameidata *nd, char **name) 1914 { 1915 char *s = getname(path); 1916 int error; 1917 1918 if (IS_ERR(s)) 1919 return PTR_ERR(s); 1920 1921 error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd); 1922 if (error) 1923 putname(s); 1924 else 1925 *name = s; 1926 1927 return error; 1928 } 1929 1930 /* 1931 * It's inline, so penalty for filesystems that don't use sticky bit is 1932 * minimal. 1933 */ 1934 static inline int check_sticky(struct inode *dir, struct inode *inode) 1935 { 1936 kuid_t fsuid = current_fsuid(); 1937 1938 if (!(dir->i_mode & S_ISVTX)) 1939 return 0; 1940 if (uid_eq(inode->i_uid, fsuid)) 1941 return 0; 1942 if (uid_eq(dir->i_uid, fsuid)) 1943 return 0; 1944 return !inode_capable(inode, CAP_FOWNER); 1945 } 1946 1947 /* 1948 * Check whether we can remove a link victim from directory dir, check 1949 * whether the type of victim is right. 1950 * 1. We can't do it if dir is read-only (done in permission()) 1951 * 2. We should have write and exec permissions on dir 1952 * 3. We can't remove anything from append-only dir 1953 * 4. We can't do anything with immutable dir (done in permission()) 1954 * 5. If the sticky bit on dir is set we should either 1955 * a. be owner of dir, or 1956 * b. be owner of victim, or 1957 * c. have CAP_FOWNER capability 1958 * 6. If the victim is append-only or immutable we can't do antyhing with 1959 * links pointing to it. 1960 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1961 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1962 * 9. We can't remove a root or mountpoint. 1963 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1964 * nfs_async_unlink(). 1965 */ 1966 static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1967 { 1968 int error; 1969 1970 if (!victim->d_inode) 1971 return -ENOENT; 1972 1973 BUG_ON(victim->d_parent->d_inode != dir); 1974 audit_inode_child(victim, dir); 1975 1976 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 1977 if (error) 1978 return error; 1979 if (IS_APPEND(dir)) 1980 return -EPERM; 1981 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1982 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 1983 return -EPERM; 1984 if (isdir) { 1985 if (!S_ISDIR(victim->d_inode->i_mode)) 1986 return -ENOTDIR; 1987 if (IS_ROOT(victim)) 1988 return -EBUSY; 1989 } else if (S_ISDIR(victim->d_inode->i_mode)) 1990 return -EISDIR; 1991 if (IS_DEADDIR(dir)) 1992 return -ENOENT; 1993 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1994 return -EBUSY; 1995 return 0; 1996 } 1997 1998 /* Check whether we can create an object with dentry child in directory 1999 * dir. 2000 * 1. We can't do it if child already exists (open has special treatment for 2001 * this case, but since we are inlined it's OK) 2002 * 2. We can't do it if dir is read-only (done in permission()) 2003 * 3. We should have write and exec permissions on dir 2004 * 4. We can't do it if dir is immutable (done in permission()) 2005 */ 2006 static inline int may_create(struct inode *dir, struct dentry *child) 2007 { 2008 if (child->d_inode) 2009 return -EEXIST; 2010 if (IS_DEADDIR(dir)) 2011 return -ENOENT; 2012 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 2013 } 2014 2015 /* 2016 * p1 and p2 should be directories on the same fs. 2017 */ 2018 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 2019 { 2020 struct dentry *p; 2021 2022 if (p1 == p2) { 2023 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 2024 return NULL; 2025 } 2026 2027 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 2028 2029 p = d_ancestor(p2, p1); 2030 if (p) { 2031 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 2032 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 2033 return p; 2034 } 2035 2036 p = d_ancestor(p1, p2); 2037 if (p) { 2038 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 2039 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 2040 return p; 2041 } 2042 2043 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 2044 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 2045 return NULL; 2046 } 2047 2048 void unlock_rename(struct dentry *p1, struct dentry *p2) 2049 { 2050 mutex_unlock(&p1->d_inode->i_mutex); 2051 if (p1 != p2) { 2052 mutex_unlock(&p2->d_inode->i_mutex); 2053 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 2054 } 2055 } 2056 2057 int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2058 struct nameidata *nd) 2059 { 2060 int error = may_create(dir, dentry); 2061 2062 if (error) 2063 return error; 2064 2065 if (!dir->i_op->create) 2066 return -EACCES; /* shouldn't it be ENOSYS? */ 2067 mode &= S_IALLUGO; 2068 mode |= S_IFREG; 2069 error = security_inode_create(dir, dentry, mode); 2070 if (error) 2071 return error; 2072 error = dir->i_op->create(dir, dentry, mode, nd); 2073 if (!error) 2074 fsnotify_create(dir, dentry); 2075 return error; 2076 } 2077 2078 static int may_open(struct path *path, int acc_mode, int flag) 2079 { 2080 struct dentry *dentry = path->dentry; 2081 struct inode *inode = dentry->d_inode; 2082 int error; 2083 2084 /* O_PATH? */ 2085 if (!acc_mode) 2086 return 0; 2087 2088 if (!inode) 2089 return -ENOENT; 2090 2091 switch (inode->i_mode & S_IFMT) { 2092 case S_IFLNK: 2093 return -ELOOP; 2094 case S_IFDIR: 2095 if (acc_mode & MAY_WRITE) 2096 return -EISDIR; 2097 break; 2098 case S_IFBLK: 2099 case S_IFCHR: 2100 if (path->mnt->mnt_flags & MNT_NODEV) 2101 return -EACCES; 2102 /*FALLTHRU*/ 2103 case S_IFIFO: 2104 case S_IFSOCK: 2105 flag &= ~O_TRUNC; 2106 break; 2107 } 2108 2109 error = inode_permission(inode, acc_mode); 2110 if (error) 2111 return error; 2112 2113 /* 2114 * An append-only file must be opened in append mode for writing. 2115 */ 2116 if (IS_APPEND(inode)) { 2117 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 2118 return -EPERM; 2119 if (flag & O_TRUNC) 2120 return -EPERM; 2121 } 2122 2123 /* O_NOATIME can only be set by the owner or superuser */ 2124 if (flag & O_NOATIME && !inode_owner_or_capable(inode)) 2125 return -EPERM; 2126 2127 return 0; 2128 } 2129 2130 static int handle_truncate(struct file *filp) 2131 { 2132 struct path *path = &filp->f_path; 2133 struct inode *inode = path->dentry->d_inode; 2134 int error = get_write_access(inode); 2135 if (error) 2136 return error; 2137 /* 2138 * Refuse to truncate files with mandatory locks held on them. 2139 */ 2140 error = locks_verify_locked(inode); 2141 if (!error) 2142 error = security_path_truncate(path); 2143 if (!error) { 2144 error = do_truncate(path->dentry, 0, 2145 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, 2146 filp); 2147 } 2148 put_write_access(inode); 2149 return error; 2150 } 2151 2152 static inline int open_to_namei_flags(int flag) 2153 { 2154 if ((flag & O_ACCMODE) == 3) 2155 flag--; 2156 return flag; 2157 } 2158 2159 /* 2160 * Handle the last step of open() 2161 */ 2162 static struct file *do_last(struct nameidata *nd, struct path *path, 2163 const struct open_flags *op, const char *pathname) 2164 { 2165 struct dentry *dir = nd->path.dentry; 2166 struct dentry *dentry; 2167 int open_flag = op->open_flag; 2168 int will_truncate = open_flag & O_TRUNC; 2169 int want_write = 0; 2170 int acc_mode = op->acc_mode; 2171 struct file *filp; 2172 int error; 2173 2174 nd->flags &= ~LOOKUP_PARENT; 2175 nd->flags |= op->intent; 2176 2177 switch (nd->last_type) { 2178 case LAST_DOTDOT: 2179 case LAST_DOT: 2180 error = handle_dots(nd, nd->last_type); 2181 if (error) 2182 return ERR_PTR(error); 2183 /* fallthrough */ 2184 case LAST_ROOT: 2185 error = complete_walk(nd); 2186 if (error) 2187 return ERR_PTR(error); 2188 audit_inode(pathname, nd->path.dentry); 2189 if (open_flag & O_CREAT) { 2190 error = -EISDIR; 2191 goto exit; 2192 } 2193 goto ok; 2194 case LAST_BIND: 2195 error = complete_walk(nd); 2196 if (error) 2197 return ERR_PTR(error); 2198 audit_inode(pathname, dir); 2199 goto ok; 2200 } 2201 2202 if (!(open_flag & O_CREAT)) { 2203 int symlink_ok = 0; 2204 if (nd->last.name[nd->last.len]) 2205 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 2206 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) 2207 symlink_ok = 1; 2208 /* we _can_ be in RCU mode here */ 2209 error = walk_component(nd, path, &nd->last, LAST_NORM, 2210 !symlink_ok); 2211 if (error < 0) 2212 return ERR_PTR(error); 2213 if (error) /* symlink */ 2214 return NULL; 2215 /* sayonara */ 2216 error = complete_walk(nd); 2217 if (error) 2218 return ERR_PTR(error); 2219 2220 error = -ENOTDIR; 2221 if (nd->flags & LOOKUP_DIRECTORY) { 2222 if (!nd->inode->i_op->lookup) 2223 goto exit; 2224 } 2225 audit_inode(pathname, nd->path.dentry); 2226 goto ok; 2227 } 2228 2229 /* create side of things */ 2230 /* 2231 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been 2232 * cleared when we got to the last component we are about to look up 2233 */ 2234 error = complete_walk(nd); 2235 if (error) 2236 return ERR_PTR(error); 2237 2238 audit_inode(pathname, dir); 2239 error = -EISDIR; 2240 /* trailing slashes? */ 2241 if (nd->last.name[nd->last.len]) 2242 goto exit; 2243 2244 mutex_lock(&dir->d_inode->i_mutex); 2245 2246 dentry = lookup_hash(nd); 2247 error = PTR_ERR(dentry); 2248 if (IS_ERR(dentry)) { 2249 mutex_unlock(&dir->d_inode->i_mutex); 2250 goto exit; 2251 } 2252 2253 path->dentry = dentry; 2254 path->mnt = nd->path.mnt; 2255 2256 /* Negative dentry, just create the file */ 2257 if (!dentry->d_inode) { 2258 umode_t mode = op->mode; 2259 if (!IS_POSIXACL(dir->d_inode)) 2260 mode &= ~current_umask(); 2261 /* 2262 * This write is needed to ensure that a 2263 * rw->ro transition does not occur between 2264 * the time when the file is created and when 2265 * a permanent write count is taken through 2266 * the 'struct file' in nameidata_to_filp(). 2267 */ 2268 error = mnt_want_write(nd->path.mnt); 2269 if (error) 2270 goto exit_mutex_unlock; 2271 want_write = 1; 2272 /* Don't check for write permission, don't truncate */ 2273 open_flag &= ~O_TRUNC; 2274 will_truncate = 0; 2275 acc_mode = MAY_OPEN; 2276 error = security_path_mknod(&nd->path, dentry, mode, 0); 2277 if (error) 2278 goto exit_mutex_unlock; 2279 error = vfs_create(dir->d_inode, dentry, mode, nd); 2280 if (error) 2281 goto exit_mutex_unlock; 2282 mutex_unlock(&dir->d_inode->i_mutex); 2283 dput(nd->path.dentry); 2284 nd->path.dentry = dentry; 2285 goto common; 2286 } 2287 2288 /* 2289 * It already exists. 2290 */ 2291 mutex_unlock(&dir->d_inode->i_mutex); 2292 audit_inode(pathname, path->dentry); 2293 2294 error = -EEXIST; 2295 if (open_flag & O_EXCL) 2296 goto exit_dput; 2297 2298 error = follow_managed(path, nd->flags); 2299 if (error < 0) 2300 goto exit_dput; 2301 2302 if (error) 2303 nd->flags |= LOOKUP_JUMPED; 2304 2305 error = -ENOENT; 2306 if (!path->dentry->d_inode) 2307 goto exit_dput; 2308 2309 if (path->dentry->d_inode->i_op->follow_link) 2310 return NULL; 2311 2312 path_to_nameidata(path, nd); 2313 nd->inode = path->dentry->d_inode; 2314 /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ 2315 error = complete_walk(nd); 2316 if (error) 2317 return ERR_PTR(error); 2318 error = -EISDIR; 2319 if (S_ISDIR(nd->inode->i_mode)) 2320 goto exit; 2321 ok: 2322 if (!S_ISREG(nd->inode->i_mode)) 2323 will_truncate = 0; 2324 2325 if (will_truncate) { 2326 error = mnt_want_write(nd->path.mnt); 2327 if (error) 2328 goto exit; 2329 want_write = 1; 2330 } 2331 common: 2332 error = may_open(&nd->path, acc_mode, open_flag); 2333 if (error) 2334 goto exit; 2335 filp = nameidata_to_filp(nd); 2336 if (!IS_ERR(filp)) { 2337 error = ima_file_check(filp, op->acc_mode); 2338 if (error) { 2339 fput(filp); 2340 filp = ERR_PTR(error); 2341 } 2342 } 2343 if (!IS_ERR(filp)) { 2344 if (will_truncate) { 2345 error = handle_truncate(filp); 2346 if (error) { 2347 fput(filp); 2348 filp = ERR_PTR(error); 2349 } 2350 } 2351 } 2352 out: 2353 if (want_write) 2354 mnt_drop_write(nd->path.mnt); 2355 path_put(&nd->path); 2356 return filp; 2357 2358 exit_mutex_unlock: 2359 mutex_unlock(&dir->d_inode->i_mutex); 2360 exit_dput: 2361 path_put_conditional(path, nd); 2362 exit: 2363 filp = ERR_PTR(error); 2364 goto out; 2365 } 2366 2367 static struct file *path_openat(int dfd, const char *pathname, 2368 struct nameidata *nd, const struct open_flags *op, int flags) 2369 { 2370 struct file *base = NULL; 2371 struct file *filp; 2372 struct path path; 2373 int error; 2374 2375 filp = get_empty_filp(); 2376 if (!filp) 2377 return ERR_PTR(-ENFILE); 2378 2379 filp->f_flags = op->open_flag; 2380 nd->intent.open.file = filp; 2381 nd->intent.open.flags = open_to_namei_flags(op->open_flag); 2382 nd->intent.open.create_mode = op->mode; 2383 2384 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); 2385 if (unlikely(error)) 2386 goto out_filp; 2387 2388 current->total_link_count = 0; 2389 error = link_path_walk(pathname, nd); 2390 if (unlikely(error)) 2391 goto out_filp; 2392 2393 filp = do_last(nd, &path, op, pathname); 2394 while (unlikely(!filp)) { /* trailing symlink */ 2395 struct path link = path; 2396 void *cookie; 2397 if (!(nd->flags & LOOKUP_FOLLOW)) { 2398 path_put_conditional(&path, nd); 2399 path_put(&nd->path); 2400 filp = ERR_PTR(-ELOOP); 2401 break; 2402 } 2403 nd->flags |= LOOKUP_PARENT; 2404 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); 2405 error = follow_link(&link, nd, &cookie); 2406 if (unlikely(error)) 2407 filp = ERR_PTR(error); 2408 else 2409 filp = do_last(nd, &path, op, pathname); 2410 put_link(nd, &link, cookie); 2411 } 2412 out: 2413 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) 2414 path_put(&nd->root); 2415 if (base) 2416 fput(base); 2417 release_open_intent(nd); 2418 return filp; 2419 2420 out_filp: 2421 filp = ERR_PTR(error); 2422 goto out; 2423 } 2424 2425 struct file *do_filp_open(int dfd, const char *pathname, 2426 const struct open_flags *op, int flags) 2427 { 2428 struct nameidata nd; 2429 struct file *filp; 2430 2431 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); 2432 if (unlikely(filp == ERR_PTR(-ECHILD))) 2433 filp = path_openat(dfd, pathname, &nd, op, flags); 2434 if (unlikely(filp == ERR_PTR(-ESTALE))) 2435 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); 2436 return filp; 2437 } 2438 2439 struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, 2440 const char *name, const struct open_flags *op, int flags) 2441 { 2442 struct nameidata nd; 2443 struct file *file; 2444 2445 nd.root.mnt = mnt; 2446 nd.root.dentry = dentry; 2447 2448 flags |= LOOKUP_ROOT; 2449 2450 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) 2451 return ERR_PTR(-ELOOP); 2452 2453 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); 2454 if (unlikely(file == ERR_PTR(-ECHILD))) 2455 file = path_openat(-1, name, &nd, op, flags); 2456 if (unlikely(file == ERR_PTR(-ESTALE))) 2457 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); 2458 return file; 2459 } 2460 2461 struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir) 2462 { 2463 struct dentry *dentry = ERR_PTR(-EEXIST); 2464 struct nameidata nd; 2465 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); 2466 if (error) 2467 return ERR_PTR(error); 2468 2469 /* 2470 * Yucky last component or no last component at all? 2471 * (foo/., foo/.., /////) 2472 */ 2473 if (nd.last_type != LAST_NORM) 2474 goto out; 2475 nd.flags &= ~LOOKUP_PARENT; 2476 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2477 nd.intent.open.flags = O_EXCL; 2478 2479 /* 2480 * Do the final lookup. 2481 */ 2482 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2483 dentry = lookup_hash(&nd); 2484 if (IS_ERR(dentry)) 2485 goto fail; 2486 2487 if (dentry->d_inode) 2488 goto eexist; 2489 /* 2490 * Special case - lookup gave negative, but... we had foo/bar/ 2491 * From the vfs_mknod() POV we just have a negative dentry - 2492 * all is fine. Let's be bastards - you had / on the end, you've 2493 * been asking for (non-existent) directory. -ENOENT for you. 2494 */ 2495 if (unlikely(!is_dir && nd.last.name[nd.last.len])) { 2496 dput(dentry); 2497 dentry = ERR_PTR(-ENOENT); 2498 goto fail; 2499 } 2500 *path = nd.path; 2501 return dentry; 2502 eexist: 2503 dput(dentry); 2504 dentry = ERR_PTR(-EEXIST); 2505 fail: 2506 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2507 out: 2508 path_put(&nd.path); 2509 return dentry; 2510 } 2511 EXPORT_SYMBOL(kern_path_create); 2512 2513 struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) 2514 { 2515 char *tmp = getname(pathname); 2516 struct dentry *res; 2517 if (IS_ERR(tmp)) 2518 return ERR_CAST(tmp); 2519 res = kern_path_create(dfd, tmp, path, is_dir); 2520 putname(tmp); 2521 return res; 2522 } 2523 EXPORT_SYMBOL(user_path_create); 2524 2525 int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 2526 { 2527 int error = may_create(dir, dentry); 2528 2529 if (error) 2530 return error; 2531 2532 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2533 return -EPERM; 2534 2535 if (!dir->i_op->mknod) 2536 return -EPERM; 2537 2538 error = devcgroup_inode_mknod(mode, dev); 2539 if (error) 2540 return error; 2541 2542 error = security_inode_mknod(dir, dentry, mode, dev); 2543 if (error) 2544 return error; 2545 2546 error = dir->i_op->mknod(dir, dentry, mode, dev); 2547 if (!error) 2548 fsnotify_create(dir, dentry); 2549 return error; 2550 } 2551 2552 static int may_mknod(umode_t mode) 2553 { 2554 switch (mode & S_IFMT) { 2555 case S_IFREG: 2556 case S_IFCHR: 2557 case S_IFBLK: 2558 case S_IFIFO: 2559 case S_IFSOCK: 2560 case 0: /* zero mode translates to S_IFREG */ 2561 return 0; 2562 case S_IFDIR: 2563 return -EPERM; 2564 default: 2565 return -EINVAL; 2566 } 2567 } 2568 2569 SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, 2570 unsigned, dev) 2571 { 2572 struct dentry *dentry; 2573 struct path path; 2574 int error; 2575 2576 if (S_ISDIR(mode)) 2577 return -EPERM; 2578 2579 dentry = user_path_create(dfd, filename, &path, 0); 2580 if (IS_ERR(dentry)) 2581 return PTR_ERR(dentry); 2582 2583 if (!IS_POSIXACL(path.dentry->d_inode)) 2584 mode &= ~current_umask(); 2585 error = may_mknod(mode); 2586 if (error) 2587 goto out_dput; 2588 error = mnt_want_write(path.mnt); 2589 if (error) 2590 goto out_dput; 2591 error = security_path_mknod(&path, dentry, mode, dev); 2592 if (error) 2593 goto out_drop_write; 2594 switch (mode & S_IFMT) { 2595 case 0: case S_IFREG: 2596 error = vfs_create(path.dentry->d_inode,dentry,mode,NULL); 2597 break; 2598 case S_IFCHR: case S_IFBLK: 2599 error = vfs_mknod(path.dentry->d_inode,dentry,mode, 2600 new_decode_dev(dev)); 2601 break; 2602 case S_IFIFO: case S_IFSOCK: 2603 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); 2604 break; 2605 } 2606 out_drop_write: 2607 mnt_drop_write(path.mnt); 2608 out_dput: 2609 dput(dentry); 2610 mutex_unlock(&path.dentry->d_inode->i_mutex); 2611 path_put(&path); 2612 2613 return error; 2614 } 2615 2616 SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev) 2617 { 2618 return sys_mknodat(AT_FDCWD, filename, mode, dev); 2619 } 2620 2621 int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 2622 { 2623 int error = may_create(dir, dentry); 2624 unsigned max_links = dir->i_sb->s_max_links; 2625 2626 if (error) 2627 return error; 2628 2629 if (!dir->i_op->mkdir) 2630 return -EPERM; 2631 2632 mode &= (S_IRWXUGO|S_ISVTX); 2633 error = security_inode_mkdir(dir, dentry, mode); 2634 if (error) 2635 return error; 2636 2637 if (max_links && dir->i_nlink >= max_links) 2638 return -EMLINK; 2639 2640 error = dir->i_op->mkdir(dir, dentry, mode); 2641 if (!error) 2642 fsnotify_mkdir(dir, dentry); 2643 return error; 2644 } 2645 2646 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) 2647 { 2648 struct dentry *dentry; 2649 struct path path; 2650 int error; 2651 2652 dentry = user_path_create(dfd, pathname, &path, 1); 2653 if (IS_ERR(dentry)) 2654 return PTR_ERR(dentry); 2655 2656 if (!IS_POSIXACL(path.dentry->d_inode)) 2657 mode &= ~current_umask(); 2658 error = mnt_want_write(path.mnt); 2659 if (error) 2660 goto out_dput; 2661 error = security_path_mkdir(&path, dentry, mode); 2662 if (error) 2663 goto out_drop_write; 2664 error = vfs_mkdir(path.dentry->d_inode, dentry, mode); 2665 out_drop_write: 2666 mnt_drop_write(path.mnt); 2667 out_dput: 2668 dput(dentry); 2669 mutex_unlock(&path.dentry->d_inode->i_mutex); 2670 path_put(&path); 2671 return error; 2672 } 2673 2674 SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) 2675 { 2676 return sys_mkdirat(AT_FDCWD, pathname, mode); 2677 } 2678 2679 /* 2680 * The dentry_unhash() helper will try to drop the dentry early: we 2681 * should have a usage count of 1 if we're the only user of this 2682 * dentry, and if that is true (possibly after pruning the dcache), 2683 * then we drop the dentry now. 2684 * 2685 * A low-level filesystem can, if it choses, legally 2686 * do a 2687 * 2688 * if (!d_unhashed(dentry)) 2689 * return -EBUSY; 2690 * 2691 * if it cannot handle the case of removing a directory 2692 * that is still in use by something else.. 2693 */ 2694 void dentry_unhash(struct dentry *dentry) 2695 { 2696 shrink_dcache_parent(dentry); 2697 spin_lock(&dentry->d_lock); 2698 if (dentry->d_count == 1) 2699 __d_drop(dentry); 2700 spin_unlock(&dentry->d_lock); 2701 } 2702 2703 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2704 { 2705 int error = may_delete(dir, dentry, 1); 2706 2707 if (error) 2708 return error; 2709 2710 if (!dir->i_op->rmdir) 2711 return -EPERM; 2712 2713 dget(dentry); 2714 mutex_lock(&dentry->d_inode->i_mutex); 2715 2716 error = -EBUSY; 2717 if (d_mountpoint(dentry)) 2718 goto out; 2719 2720 error = security_inode_rmdir(dir, dentry); 2721 if (error) 2722 goto out; 2723 2724 shrink_dcache_parent(dentry); 2725 error = dir->i_op->rmdir(dir, dentry); 2726 if (error) 2727 goto out; 2728 2729 dentry->d_inode->i_flags |= S_DEAD; 2730 dont_mount(dentry); 2731 2732 out: 2733 mutex_unlock(&dentry->d_inode->i_mutex); 2734 dput(dentry); 2735 if (!error) 2736 d_delete(dentry); 2737 return error; 2738 } 2739 2740 static long do_rmdir(int dfd, const char __user *pathname) 2741 { 2742 int error = 0; 2743 char * name; 2744 struct dentry *dentry; 2745 struct nameidata nd; 2746 2747 error = user_path_parent(dfd, pathname, &nd, &name); 2748 if (error) 2749 return error; 2750 2751 switch(nd.last_type) { 2752 case LAST_DOTDOT: 2753 error = -ENOTEMPTY; 2754 goto exit1; 2755 case LAST_DOT: 2756 error = -EINVAL; 2757 goto exit1; 2758 case LAST_ROOT: 2759 error = -EBUSY; 2760 goto exit1; 2761 } 2762 2763 nd.flags &= ~LOOKUP_PARENT; 2764 2765 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2766 dentry = lookup_hash(&nd); 2767 error = PTR_ERR(dentry); 2768 if (IS_ERR(dentry)) 2769 goto exit2; 2770 if (!dentry->d_inode) { 2771 error = -ENOENT; 2772 goto exit3; 2773 } 2774 error = mnt_want_write(nd.path.mnt); 2775 if (error) 2776 goto exit3; 2777 error = security_path_rmdir(&nd.path, dentry); 2778 if (error) 2779 goto exit4; 2780 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2781 exit4: 2782 mnt_drop_write(nd.path.mnt); 2783 exit3: 2784 dput(dentry); 2785 exit2: 2786 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2787 exit1: 2788 path_put(&nd.path); 2789 putname(name); 2790 return error; 2791 } 2792 2793 SYSCALL_DEFINE1(rmdir, const char __user *, pathname) 2794 { 2795 return do_rmdir(AT_FDCWD, pathname); 2796 } 2797 2798 int vfs_unlink(struct inode *dir, struct dentry *dentry) 2799 { 2800 int error = may_delete(dir, dentry, 0); 2801 2802 if (error) 2803 return error; 2804 2805 if (!dir->i_op->unlink) 2806 return -EPERM; 2807 2808 mutex_lock(&dentry->d_inode->i_mutex); 2809 if (d_mountpoint(dentry)) 2810 error = -EBUSY; 2811 else { 2812 error = security_inode_unlink(dir, dentry); 2813 if (!error) { 2814 error = dir->i_op->unlink(dir, dentry); 2815 if (!error) 2816 dont_mount(dentry); 2817 } 2818 } 2819 mutex_unlock(&dentry->d_inode->i_mutex); 2820 2821 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 2822 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 2823 fsnotify_link_count(dentry->d_inode); 2824 d_delete(dentry); 2825 } 2826 2827 return error; 2828 } 2829 2830 /* 2831 * Make sure that the actual truncation of the file will occur outside its 2832 * directory's i_mutex. Truncate can take a long time if there is a lot of 2833 * writeout happening, and we don't want to prevent access to the directory 2834 * while waiting on the I/O. 2835 */ 2836 static long do_unlinkat(int dfd, const char __user *pathname) 2837 { 2838 int error; 2839 char *name; 2840 struct dentry *dentry; 2841 struct nameidata nd; 2842 struct inode *inode = NULL; 2843 2844 error = user_path_parent(dfd, pathname, &nd, &name); 2845 if (error) 2846 return error; 2847 2848 error = -EISDIR; 2849 if (nd.last_type != LAST_NORM) 2850 goto exit1; 2851 2852 nd.flags &= ~LOOKUP_PARENT; 2853 2854 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2855 dentry = lookup_hash(&nd); 2856 error = PTR_ERR(dentry); 2857 if (!IS_ERR(dentry)) { 2858 /* Why not before? Because we want correct error value */ 2859 if (nd.last.name[nd.last.len]) 2860 goto slashes; 2861 inode = dentry->d_inode; 2862 if (!inode) 2863 goto slashes; 2864 ihold(inode); 2865 error = mnt_want_write(nd.path.mnt); 2866 if (error) 2867 goto exit2; 2868 error = security_path_unlink(&nd.path, dentry); 2869 if (error) 2870 goto exit3; 2871 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2872 exit3: 2873 mnt_drop_write(nd.path.mnt); 2874 exit2: 2875 dput(dentry); 2876 } 2877 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2878 if (inode) 2879 iput(inode); /* truncate the inode here */ 2880 exit1: 2881 path_put(&nd.path); 2882 putname(name); 2883 return error; 2884 2885 slashes: 2886 error = !dentry->d_inode ? -ENOENT : 2887 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 2888 goto exit2; 2889 } 2890 2891 SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) 2892 { 2893 if ((flag & ~AT_REMOVEDIR) != 0) 2894 return -EINVAL; 2895 2896 if (flag & AT_REMOVEDIR) 2897 return do_rmdir(dfd, pathname); 2898 2899 return do_unlinkat(dfd, pathname); 2900 } 2901 2902 SYSCALL_DEFINE1(unlink, const char __user *, pathname) 2903 { 2904 return do_unlinkat(AT_FDCWD, pathname); 2905 } 2906 2907 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) 2908 { 2909 int error = may_create(dir, dentry); 2910 2911 if (error) 2912 return error; 2913 2914 if (!dir->i_op->symlink) 2915 return -EPERM; 2916 2917 error = security_inode_symlink(dir, dentry, oldname); 2918 if (error) 2919 return error; 2920 2921 error = dir->i_op->symlink(dir, dentry, oldname); 2922 if (!error) 2923 fsnotify_create(dir, dentry); 2924 return error; 2925 } 2926 2927 SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, 2928 int, newdfd, const char __user *, newname) 2929 { 2930 int error; 2931 char *from; 2932 struct dentry *dentry; 2933 struct path path; 2934 2935 from = getname(oldname); 2936 if (IS_ERR(from)) 2937 return PTR_ERR(from); 2938 2939 dentry = user_path_create(newdfd, newname, &path, 0); 2940 error = PTR_ERR(dentry); 2941 if (IS_ERR(dentry)) 2942 goto out_putname; 2943 2944 error = mnt_want_write(path.mnt); 2945 if (error) 2946 goto out_dput; 2947 error = security_path_symlink(&path, dentry, from); 2948 if (error) 2949 goto out_drop_write; 2950 error = vfs_symlink(path.dentry->d_inode, dentry, from); 2951 out_drop_write: 2952 mnt_drop_write(path.mnt); 2953 out_dput: 2954 dput(dentry); 2955 mutex_unlock(&path.dentry->d_inode->i_mutex); 2956 path_put(&path); 2957 out_putname: 2958 putname(from); 2959 return error; 2960 } 2961 2962 SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname) 2963 { 2964 return sys_symlinkat(oldname, AT_FDCWD, newname); 2965 } 2966 2967 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2968 { 2969 struct inode *inode = old_dentry->d_inode; 2970 unsigned max_links = dir->i_sb->s_max_links; 2971 int error; 2972 2973 if (!inode) 2974 return -ENOENT; 2975 2976 error = may_create(dir, new_dentry); 2977 if (error) 2978 return error; 2979 2980 if (dir->i_sb != inode->i_sb) 2981 return -EXDEV; 2982 2983 /* 2984 * A link to an append-only or immutable file cannot be created. 2985 */ 2986 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2987 return -EPERM; 2988 if (!dir->i_op->link) 2989 return -EPERM; 2990 if (S_ISDIR(inode->i_mode)) 2991 return -EPERM; 2992 2993 error = security_inode_link(old_dentry, dir, new_dentry); 2994 if (error) 2995 return error; 2996 2997 mutex_lock(&inode->i_mutex); 2998 /* Make sure we don't allow creating hardlink to an unlinked file */ 2999 if (inode->i_nlink == 0) 3000 error = -ENOENT; 3001 else if (max_links && inode->i_nlink >= max_links) 3002 error = -EMLINK; 3003 else 3004 error = dir->i_op->link(old_dentry, dir, new_dentry); 3005 mutex_unlock(&inode->i_mutex); 3006 if (!error) 3007 fsnotify_link(dir, inode, new_dentry); 3008 return error; 3009 } 3010 3011 /* 3012 * Hardlinks are often used in delicate situations. We avoid 3013 * security-related surprises by not following symlinks on the 3014 * newname. --KAB 3015 * 3016 * We don't follow them on the oldname either to be compatible 3017 * with linux 2.0, and to avoid hard-linking to directories 3018 * and other special files. --ADM 3019 */ 3020 SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, 3021 int, newdfd, const char __user *, newname, int, flags) 3022 { 3023 struct dentry *new_dentry; 3024 struct path old_path, new_path; 3025 int how = 0; 3026 int error; 3027 3028 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 3029 return -EINVAL; 3030 /* 3031 * To use null names we require CAP_DAC_READ_SEARCH 3032 * This ensures that not everyone will be able to create 3033 * handlink using the passed filedescriptor. 3034 */ 3035 if (flags & AT_EMPTY_PATH) { 3036 if (!capable(CAP_DAC_READ_SEARCH)) 3037 return -ENOENT; 3038 how = LOOKUP_EMPTY; 3039 } 3040 3041 if (flags & AT_SYMLINK_FOLLOW) 3042 how |= LOOKUP_FOLLOW; 3043 3044 error = user_path_at(olddfd, oldname, how, &old_path); 3045 if (error) 3046 return error; 3047 3048 new_dentry = user_path_create(newdfd, newname, &new_path, 0); 3049 error = PTR_ERR(new_dentry); 3050 if (IS_ERR(new_dentry)) 3051 goto out; 3052 3053 error = -EXDEV; 3054 if (old_path.mnt != new_path.mnt) 3055 goto out_dput; 3056 error = mnt_want_write(new_path.mnt); 3057 if (error) 3058 goto out_dput; 3059 error = security_path_link(old_path.dentry, &new_path, new_dentry); 3060 if (error) 3061 goto out_drop_write; 3062 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); 3063 out_drop_write: 3064 mnt_drop_write(new_path.mnt); 3065 out_dput: 3066 dput(new_dentry); 3067 mutex_unlock(&new_path.dentry->d_inode->i_mutex); 3068 path_put(&new_path); 3069 out: 3070 path_put(&old_path); 3071 3072 return error; 3073 } 3074 3075 SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname) 3076 { 3077 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 3078 } 3079 3080 /* 3081 * The worst of all namespace operations - renaming directory. "Perverted" 3082 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 3083 * Problems: 3084 * a) we can get into loop creation. Check is done in is_subdir(). 3085 * b) race potential - two innocent renames can create a loop together. 3086 * That's where 4.4 screws up. Current fix: serialization on 3087 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 3088 * story. 3089 * c) we have to lock _three_ objects - parents and victim (if it exists). 3090 * And that - after we got ->i_mutex on parents (until then we don't know 3091 * whether the target exists). Solution: try to be smart with locking 3092 * order for inodes. We rely on the fact that tree topology may change 3093 * only under ->s_vfs_rename_mutex _and_ that parent of the object we 3094 * move will be locked. Thus we can rank directories by the tree 3095 * (ancestors first) and rank all non-directories after them. 3096 * That works since everybody except rename does "lock parent, lookup, 3097 * lock child" and rename is under ->s_vfs_rename_mutex. 3098 * HOWEVER, it relies on the assumption that any object with ->lookup() 3099 * has no more than 1 dentry. If "hybrid" objects will ever appear, 3100 * we'd better make sure that there's no link(2) for them. 3101 * d) conversion from fhandle to dentry may come in the wrong moment - when 3102 * we are removing the target. Solution: we will have to grab ->i_mutex 3103 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 3104 * ->i_mutex on parents, which works but leads to some truly excessive 3105 * locking]. 3106 */ 3107 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 3108 struct inode *new_dir, struct dentry *new_dentry) 3109 { 3110 int error = 0; 3111 struct inode *target = new_dentry->d_inode; 3112 unsigned max_links = new_dir->i_sb->s_max_links; 3113 3114 /* 3115 * If we are going to change the parent - check write permissions, 3116 * we'll need to flip '..'. 3117 */ 3118 if (new_dir != old_dir) { 3119 error = inode_permission(old_dentry->d_inode, MAY_WRITE); 3120 if (error) 3121 return error; 3122 } 3123 3124 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 3125 if (error) 3126 return error; 3127 3128 dget(new_dentry); 3129 if (target) 3130 mutex_lock(&target->i_mutex); 3131 3132 error = -EBUSY; 3133 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) 3134 goto out; 3135 3136 error = -EMLINK; 3137 if (max_links && !target && new_dir != old_dir && 3138 new_dir->i_nlink >= max_links) 3139 goto out; 3140 3141 if (target) 3142 shrink_dcache_parent(new_dentry); 3143 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3144 if (error) 3145 goto out; 3146 3147 if (target) { 3148 target->i_flags |= S_DEAD; 3149 dont_mount(new_dentry); 3150 } 3151 out: 3152 if (target) 3153 mutex_unlock(&target->i_mutex); 3154 dput(new_dentry); 3155 if (!error) 3156 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3157 d_move(old_dentry,new_dentry); 3158 return error; 3159 } 3160 3161 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 3162 struct inode *new_dir, struct dentry *new_dentry) 3163 { 3164 struct inode *target = new_dentry->d_inode; 3165 int error; 3166 3167 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 3168 if (error) 3169 return error; 3170 3171 dget(new_dentry); 3172 if (target) 3173 mutex_lock(&target->i_mutex); 3174 3175 error = -EBUSY; 3176 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 3177 goto out; 3178 3179 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 3180 if (error) 3181 goto out; 3182 3183 if (target) 3184 dont_mount(new_dentry); 3185 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 3186 d_move(old_dentry, new_dentry); 3187 out: 3188 if (target) 3189 mutex_unlock(&target->i_mutex); 3190 dput(new_dentry); 3191 return error; 3192 } 3193 3194 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 3195 struct inode *new_dir, struct dentry *new_dentry) 3196 { 3197 int error; 3198 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 3199 const unsigned char *old_name; 3200 3201 if (old_dentry->d_inode == new_dentry->d_inode) 3202 return 0; 3203 3204 error = may_delete(old_dir, old_dentry, is_dir); 3205 if (error) 3206 return error; 3207 3208 if (!new_dentry->d_inode) 3209 error = may_create(new_dir, new_dentry); 3210 else 3211 error = may_delete(new_dir, new_dentry, is_dir); 3212 if (error) 3213 return error; 3214 3215 if (!old_dir->i_op->rename) 3216 return -EPERM; 3217 3218 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 3219 3220 if (is_dir) 3221 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 3222 else 3223 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 3224 if (!error) 3225 fsnotify_move(old_dir, new_dir, old_name, is_dir, 3226 new_dentry->d_inode, old_dentry); 3227 fsnotify_oldname_free(old_name); 3228 3229 return error; 3230 } 3231 3232 SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, 3233 int, newdfd, const char __user *, newname) 3234 { 3235 struct dentry *old_dir, *new_dir; 3236 struct dentry *old_dentry, *new_dentry; 3237 struct dentry *trap; 3238 struct nameidata oldnd, newnd; 3239 char *from; 3240 char *to; 3241 int error; 3242 3243 error = user_path_parent(olddfd, oldname, &oldnd, &from); 3244 if (error) 3245 goto exit; 3246 3247 error = user_path_parent(newdfd, newname, &newnd, &to); 3248 if (error) 3249 goto exit1; 3250 3251 error = -EXDEV; 3252 if (oldnd.path.mnt != newnd.path.mnt) 3253 goto exit2; 3254 3255 old_dir = oldnd.path.dentry; 3256 error = -EBUSY; 3257 if (oldnd.last_type != LAST_NORM) 3258 goto exit2; 3259 3260 new_dir = newnd.path.dentry; 3261 if (newnd.last_type != LAST_NORM) 3262 goto exit2; 3263 3264 oldnd.flags &= ~LOOKUP_PARENT; 3265 newnd.flags &= ~LOOKUP_PARENT; 3266 newnd.flags |= LOOKUP_RENAME_TARGET; 3267 3268 trap = lock_rename(new_dir, old_dir); 3269 3270 old_dentry = lookup_hash(&oldnd); 3271 error = PTR_ERR(old_dentry); 3272 if (IS_ERR(old_dentry)) 3273 goto exit3; 3274 /* source must exist */ 3275 error = -ENOENT; 3276 if (!old_dentry->d_inode) 3277 goto exit4; 3278 /* unless the source is a directory trailing slashes give -ENOTDIR */ 3279 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 3280 error = -ENOTDIR; 3281 if (oldnd.last.name[oldnd.last.len]) 3282 goto exit4; 3283 if (newnd.last.name[newnd.last.len]) 3284 goto exit4; 3285 } 3286 /* source should not be ancestor of target */ 3287 error = -EINVAL; 3288 if (old_dentry == trap) 3289 goto exit4; 3290 new_dentry = lookup_hash(&newnd); 3291 error = PTR_ERR(new_dentry); 3292 if (IS_ERR(new_dentry)) 3293 goto exit4; 3294 /* target should not be an ancestor of source */ 3295 error = -ENOTEMPTY; 3296 if (new_dentry == trap) 3297 goto exit5; 3298 3299 error = mnt_want_write(oldnd.path.mnt); 3300 if (error) 3301 goto exit5; 3302 error = security_path_rename(&oldnd.path, old_dentry, 3303 &newnd.path, new_dentry); 3304 if (error) 3305 goto exit6; 3306 error = vfs_rename(old_dir->d_inode, old_dentry, 3307 new_dir->d_inode, new_dentry); 3308 exit6: 3309 mnt_drop_write(oldnd.path.mnt); 3310 exit5: 3311 dput(new_dentry); 3312 exit4: 3313 dput(old_dentry); 3314 exit3: 3315 unlock_rename(new_dir, old_dir); 3316 exit2: 3317 path_put(&newnd.path); 3318 putname(to); 3319 exit1: 3320 path_put(&oldnd.path); 3321 putname(from); 3322 exit: 3323 return error; 3324 } 3325 3326 SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) 3327 { 3328 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); 3329 } 3330 3331 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 3332 { 3333 int len; 3334 3335 len = PTR_ERR(link); 3336 if (IS_ERR(link)) 3337 goto out; 3338 3339 len = strlen(link); 3340 if (len > (unsigned) buflen) 3341 len = buflen; 3342 if (copy_to_user(buffer, link, len)) 3343 len = -EFAULT; 3344 out: 3345 return len; 3346 } 3347 3348 /* 3349 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 3350 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 3351 * using) it for any given inode is up to filesystem. 3352 */ 3353 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 3354 { 3355 struct nameidata nd; 3356 void *cookie; 3357 int res; 3358 3359 nd.depth = 0; 3360 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 3361 if (IS_ERR(cookie)) 3362 return PTR_ERR(cookie); 3363 3364 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 3365 if (dentry->d_inode->i_op->put_link) 3366 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 3367 return res; 3368 } 3369 3370 int vfs_follow_link(struct nameidata *nd, const char *link) 3371 { 3372 return __vfs_follow_link(nd, link); 3373 } 3374 3375 /* get the link contents into pagecache */ 3376 static char *page_getlink(struct dentry * dentry, struct page **ppage) 3377 { 3378 char *kaddr; 3379 struct page *page; 3380 struct address_space *mapping = dentry->d_inode->i_mapping; 3381 page = read_mapping_page(mapping, 0, NULL); 3382 if (IS_ERR(page)) 3383 return (char*)page; 3384 *ppage = page; 3385 kaddr = kmap(page); 3386 nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1); 3387 return kaddr; 3388 } 3389 3390 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 3391 { 3392 struct page *page = NULL; 3393 char *s = page_getlink(dentry, &page); 3394 int res = vfs_readlink(dentry,buffer,buflen,s); 3395 if (page) { 3396 kunmap(page); 3397 page_cache_release(page); 3398 } 3399 return res; 3400 } 3401 3402 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 3403 { 3404 struct page *page = NULL; 3405 nd_set_link(nd, page_getlink(dentry, &page)); 3406 return page; 3407 } 3408 3409 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 3410 { 3411 struct page *page = cookie; 3412 3413 if (page) { 3414 kunmap(page); 3415 page_cache_release(page); 3416 } 3417 } 3418 3419 /* 3420 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS 3421 */ 3422 int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) 3423 { 3424 struct address_space *mapping = inode->i_mapping; 3425 struct page *page; 3426 void *fsdata; 3427 int err; 3428 char *kaddr; 3429 unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; 3430 if (nofs) 3431 flags |= AOP_FLAG_NOFS; 3432 3433 retry: 3434 err = pagecache_write_begin(NULL, mapping, 0, len-1, 3435 flags, &page, &fsdata); 3436 if (err) 3437 goto fail; 3438 3439 kaddr = kmap_atomic(page); 3440 memcpy(kaddr, symname, len-1); 3441 kunmap_atomic(kaddr); 3442 3443 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, 3444 page, fsdata); 3445 if (err < 0) 3446 goto fail; 3447 if (err < len-1) 3448 goto retry; 3449 3450 mark_inode_dirty(inode); 3451 return 0; 3452 fail: 3453 return err; 3454 } 3455 3456 int page_symlink(struct inode *inode, const char *symname, int len) 3457 { 3458 return __page_symlink(inode, symname, len, 3459 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); 3460 } 3461 3462 const struct inode_operations page_symlink_inode_operations = { 3463 .readlink = generic_readlink, 3464 .follow_link = page_follow_link_light, 3465 .put_link = page_put_link, 3466 }; 3467 3468 EXPORT_SYMBOL(user_path_at); 3469 EXPORT_SYMBOL(follow_down_one); 3470 EXPORT_SYMBOL(follow_down); 3471 EXPORT_SYMBOL(follow_up); 3472 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 3473 EXPORT_SYMBOL(getname); 3474 EXPORT_SYMBOL(lock_rename); 3475 EXPORT_SYMBOL(lookup_one_len); 3476 EXPORT_SYMBOL(page_follow_link_light); 3477 EXPORT_SYMBOL(page_put_link); 3478 EXPORT_SYMBOL(page_readlink); 3479 EXPORT_SYMBOL(__page_symlink); 3480 EXPORT_SYMBOL(page_symlink); 3481 EXPORT_SYMBOL(page_symlink_inode_operations); 3482 EXPORT_SYMBOL(kern_path); 3483 EXPORT_SYMBOL(vfs_path_lookup); 3484 EXPORT_SYMBOL(inode_permission); 3485 EXPORT_SYMBOL(unlock_rename); 3486 EXPORT_SYMBOL(vfs_create); 3487 EXPORT_SYMBOL(vfs_follow_link); 3488 EXPORT_SYMBOL(vfs_link); 3489 EXPORT_SYMBOL(vfs_mkdir); 3490 EXPORT_SYMBOL(vfs_mknod); 3491 EXPORT_SYMBOL(generic_permission); 3492 EXPORT_SYMBOL(vfs_readlink); 3493 EXPORT_SYMBOL(vfs_rename); 3494 EXPORT_SYMBOL(vfs_rmdir); 3495 EXPORT_SYMBOL(vfs_symlink); 3496 EXPORT_SYMBOL(vfs_unlink); 3497 EXPORT_SYMBOL(dentry_unhash); 3498 EXPORT_SYMBOL(generic_readlink); 3499