1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/smp_lock.h> 26 #include <linux/personality.h> 27 #include <linux/security.h> 28 #include <linux/syscalls.h> 29 #include <linux/mount.h> 30 #include <linux/audit.h> 31 #include <linux/capability.h> 32 #include <linux/file.h> 33 #include <linux/fcntl.h> 34 #include <linux/namei.h> 35 #include <asm/namei.h> 36 #include <asm/uaccess.h> 37 38 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 39 40 /* [Feb-1997 T. Schoebel-Theuer] 41 * Fundamental changes in the pathname lookup mechanisms (namei) 42 * were necessary because of omirr. The reason is that omirr needs 43 * to know the _real_ pathname, not the user-supplied one, in case 44 * of symlinks (and also when transname replacements occur). 45 * 46 * The new code replaces the old recursive symlink resolution with 47 * an iterative one (in case of non-nested symlink chains). It does 48 * this with calls to <fs>_follow_link(). 49 * As a side effect, dir_namei(), _namei() and follow_link() are now 50 * replaced with a single function lookup_dentry() that can handle all 51 * the special cases of the former code. 52 * 53 * With the new dcache, the pathname is stored at each inode, at least as 54 * long as the refcount of the inode is positive. As a side effect, the 55 * size of the dcache depends on the inode cache and thus is dynamic. 56 * 57 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 58 * resolution to correspond with current state of the code. 59 * 60 * Note that the symlink resolution is not *completely* iterative. 61 * There is still a significant amount of tail- and mid- recursion in 62 * the algorithm. Also, note that <fs>_readlink() is not used in 63 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 64 * may return different results than <fs>_follow_link(). Many virtual 65 * filesystems (including /proc) exhibit this behavior. 66 */ 67 68 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 69 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 70 * and the name already exists in form of a symlink, try to create the new 71 * name indicated by the symlink. The old code always complained that the 72 * name already exists, due to not following the symlink even if its target 73 * is nonexistent. The new semantics affects also mknod() and link() when 74 * the name is a symlink pointing to a non-existant name. 75 * 76 * I don't know which semantics is the right one, since I have no access 77 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 78 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 79 * "old" one. Personally, I think the new semantics is much more logical. 80 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 81 * file does succeed in both HP-UX and SunOs, but not in Solaris 82 * and in the old Linux semantics. 83 */ 84 85 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 86 * semantics. See the comments in "open_namei" and "do_link" below. 87 * 88 * [10-Sep-98 Alan Modra] Another symlink change. 89 */ 90 91 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 92 * inside the path - always follow. 93 * in the last component in creation/removal/renaming - never follow. 94 * if LOOKUP_FOLLOW passed - follow. 95 * if the pathname has trailing slashes - follow. 96 * otherwise - don't follow. 97 * (applied in that order). 98 * 99 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 100 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 101 * During the 2.4 we need to fix the userland stuff depending on it - 102 * hopefully we will be able to get rid of that wart in 2.5. So far only 103 * XEmacs seems to be relying on it... 104 */ 105 /* 106 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 107 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives 108 * any extra contention... 109 */ 110 111 /* In order to reduce some races, while at the same time doing additional 112 * checking and hopefully speeding things up, we copy filenames to the 113 * kernel data space before using them.. 114 * 115 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 116 * PATH_MAX includes the nul terminator --RR. 117 */ 118 static int do_getname(const char __user *filename, char *page) 119 { 120 int retval; 121 unsigned long len = PATH_MAX; 122 123 if (!segment_eq(get_fs(), KERNEL_DS)) { 124 if ((unsigned long) filename >= TASK_SIZE) 125 return -EFAULT; 126 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 127 len = TASK_SIZE - (unsigned long) filename; 128 } 129 130 retval = strncpy_from_user(page, filename, len); 131 if (retval > 0) { 132 if (retval < len) 133 return 0; 134 return -ENAMETOOLONG; 135 } else if (!retval) 136 retval = -ENOENT; 137 return retval; 138 } 139 140 char * getname(const char __user * filename) 141 { 142 char *tmp, *result; 143 144 result = ERR_PTR(-ENOMEM); 145 tmp = __getname(); 146 if (tmp) { 147 int retval = do_getname(filename, tmp); 148 149 result = tmp; 150 if (retval < 0) { 151 __putname(tmp); 152 result = ERR_PTR(retval); 153 } 154 } 155 audit_getname(result); 156 return result; 157 } 158 159 #ifdef CONFIG_AUDITSYSCALL 160 void putname(const char *name) 161 { 162 if (unlikely(!audit_dummy_context())) 163 audit_putname(name); 164 else 165 __putname(name); 166 } 167 EXPORT_SYMBOL(putname); 168 #endif 169 170 171 /** 172 * generic_permission - check for access rights on a Posix-like filesystem 173 * @inode: inode to check access rights for 174 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 175 * @check_acl: optional callback to check for Posix ACLs 176 * 177 * Used to check for read/write/execute permissions on a file. 178 * We use "fsuid" for this, letting us set arbitrary permissions 179 * for filesystem access without changing the "normal" uids which 180 * are used for other things.. 181 */ 182 int generic_permission(struct inode *inode, int mask, 183 int (*check_acl)(struct inode *inode, int mask)) 184 { 185 umode_t mode = inode->i_mode; 186 187 if (current->fsuid == inode->i_uid) 188 mode >>= 6; 189 else { 190 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 191 int error = check_acl(inode, mask); 192 if (error == -EACCES) 193 goto check_capabilities; 194 else if (error != -EAGAIN) 195 return error; 196 } 197 198 if (in_group_p(inode->i_gid)) 199 mode >>= 3; 200 } 201 202 /* 203 * If the DACs are ok we don't need any capability check. 204 */ 205 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 206 return 0; 207 208 check_capabilities: 209 /* 210 * Read/write DACs are always overridable. 211 * Executable DACs are overridable if at least one exec bit is set. 212 */ 213 if (!(mask & MAY_EXEC) || 214 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 215 if (capable(CAP_DAC_OVERRIDE)) 216 return 0; 217 218 /* 219 * Searching includes executable on directories, else just read. 220 */ 221 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 222 if (capable(CAP_DAC_READ_SEARCH)) 223 return 0; 224 225 return -EACCES; 226 } 227 228 int permission(struct inode *inode, int mask, struct nameidata *nd) 229 { 230 umode_t mode = inode->i_mode; 231 int retval, submask; 232 233 if (mask & MAY_WRITE) { 234 235 /* 236 * Nobody gets write access to a read-only fs. 237 */ 238 if (IS_RDONLY(inode) && 239 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 240 return -EROFS; 241 242 /* 243 * Nobody gets write access to an immutable file. 244 */ 245 if (IS_IMMUTABLE(inode)) 246 return -EACCES; 247 } 248 249 250 /* 251 * MAY_EXEC on regular files requires special handling: We override 252 * filesystem execute permissions if the mode bits aren't set. 253 */ 254 if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO)) 255 return -EACCES; 256 257 /* Ordinary permission routines do not understand MAY_APPEND. */ 258 submask = mask & ~MAY_APPEND; 259 if (inode->i_op && inode->i_op->permission) 260 retval = inode->i_op->permission(inode, submask, nd); 261 else 262 retval = generic_permission(inode, submask, NULL); 263 if (retval) 264 return retval; 265 266 return security_inode_permission(inode, mask, nd); 267 } 268 269 /** 270 * vfs_permission - check for access rights to a given path 271 * @nd: lookup result that describes the path 272 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 273 * 274 * Used to check for read/write/execute permissions on a path. 275 * We use "fsuid" for this, letting us set arbitrary permissions 276 * for filesystem access without changing the "normal" uids which 277 * are used for other things. 278 */ 279 int vfs_permission(struct nameidata *nd, int mask) 280 { 281 return permission(nd->dentry->d_inode, mask, nd); 282 } 283 284 /** 285 * file_permission - check for additional access rights to a given file 286 * @file: file to check access rights for 287 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 288 * 289 * Used to check for read/write/execute permissions on an already opened 290 * file. 291 * 292 * Note: 293 * Do not use this function in new code. All access checks should 294 * be done using vfs_permission(). 295 */ 296 int file_permission(struct file *file, int mask) 297 { 298 return permission(file->f_dentry->d_inode, mask, NULL); 299 } 300 301 /* 302 * get_write_access() gets write permission for a file. 303 * put_write_access() releases this write permission. 304 * This is used for regular files. 305 * We cannot support write (and maybe mmap read-write shared) accesses and 306 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 307 * can have the following values: 308 * 0: no writers, no VM_DENYWRITE mappings 309 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 310 * > 0: (i_writecount) users are writing to the file. 311 * 312 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 313 * except for the cases where we don't hold i_writecount yet. Then we need to 314 * use {get,deny}_write_access() - these functions check the sign and refuse 315 * to do the change if sign is wrong. Exclusion between them is provided by 316 * the inode->i_lock spinlock. 317 */ 318 319 int get_write_access(struct inode * inode) 320 { 321 spin_lock(&inode->i_lock); 322 if (atomic_read(&inode->i_writecount) < 0) { 323 spin_unlock(&inode->i_lock); 324 return -ETXTBSY; 325 } 326 atomic_inc(&inode->i_writecount); 327 spin_unlock(&inode->i_lock); 328 329 return 0; 330 } 331 332 int deny_write_access(struct file * file) 333 { 334 struct inode *inode = file->f_dentry->d_inode; 335 336 spin_lock(&inode->i_lock); 337 if (atomic_read(&inode->i_writecount) > 0) { 338 spin_unlock(&inode->i_lock); 339 return -ETXTBSY; 340 } 341 atomic_dec(&inode->i_writecount); 342 spin_unlock(&inode->i_lock); 343 344 return 0; 345 } 346 347 void path_release(struct nameidata *nd) 348 { 349 dput(nd->dentry); 350 mntput(nd->mnt); 351 } 352 353 /* 354 * umount() mustn't call path_release()/mntput() as that would clear 355 * mnt_expiry_mark 356 */ 357 void path_release_on_umount(struct nameidata *nd) 358 { 359 dput(nd->dentry); 360 mntput_no_expire(nd->mnt); 361 } 362 363 /** 364 * release_open_intent - free up open intent resources 365 * @nd: pointer to nameidata 366 */ 367 void release_open_intent(struct nameidata *nd) 368 { 369 if (nd->intent.open.file->f_dentry == NULL) 370 put_filp(nd->intent.open.file); 371 else 372 fput(nd->intent.open.file); 373 } 374 375 static inline struct dentry * 376 do_revalidate(struct dentry *dentry, struct nameidata *nd) 377 { 378 int status = dentry->d_op->d_revalidate(dentry, nd); 379 if (unlikely(status <= 0)) { 380 /* 381 * The dentry failed validation. 382 * If d_revalidate returned 0 attempt to invalidate 383 * the dentry otherwise d_revalidate is asking us 384 * to return a fail status. 385 */ 386 if (!status) { 387 if (!d_invalidate(dentry)) { 388 dput(dentry); 389 dentry = NULL; 390 } 391 } else { 392 dput(dentry); 393 dentry = ERR_PTR(status); 394 } 395 } 396 return dentry; 397 } 398 399 /* 400 * Internal lookup() using the new generic dcache. 401 * SMP-safe 402 */ 403 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 404 { 405 struct dentry * dentry = __d_lookup(parent, name); 406 407 /* lockess __d_lookup may fail due to concurrent d_move() 408 * in some unrelated directory, so try with d_lookup 409 */ 410 if (!dentry) 411 dentry = d_lookup(parent, name); 412 413 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 414 dentry = do_revalidate(dentry, nd); 415 416 return dentry; 417 } 418 419 /* 420 * Short-cut version of permission(), for calling by 421 * path_walk(), when dcache lock is held. Combines parts 422 * of permission() and generic_permission(), and tests ONLY for 423 * MAY_EXEC permission. 424 * 425 * If appropriate, check DAC only. If not appropriate, or 426 * short-cut DAC fails, then call permission() to do more 427 * complete permission check. 428 */ 429 static int exec_permission_lite(struct inode *inode, 430 struct nameidata *nd) 431 { 432 umode_t mode = inode->i_mode; 433 434 if (inode->i_op && inode->i_op->permission) 435 return -EAGAIN; 436 437 if (current->fsuid == inode->i_uid) 438 mode >>= 6; 439 else if (in_group_p(inode->i_gid)) 440 mode >>= 3; 441 442 if (mode & MAY_EXEC) 443 goto ok; 444 445 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 446 goto ok; 447 448 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 449 goto ok; 450 451 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 452 goto ok; 453 454 return -EACCES; 455 ok: 456 return security_inode_permission(inode, MAY_EXEC, nd); 457 } 458 459 /* 460 * This is called when everything else fails, and we actually have 461 * to go to the low-level filesystem to find out what we should do.. 462 * 463 * We get the directory semaphore, and after getting that we also 464 * make sure that nobody added the entry to the dcache in the meantime.. 465 * SMP-safe 466 */ 467 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 468 { 469 struct dentry * result; 470 struct inode *dir = parent->d_inode; 471 472 mutex_lock(&dir->i_mutex); 473 /* 474 * First re-do the cached lookup just in case it was created 475 * while we waited for the directory semaphore.. 476 * 477 * FIXME! This could use version numbering or similar to 478 * avoid unnecessary cache lookups. 479 * 480 * The "dcache_lock" is purely to protect the RCU list walker 481 * from concurrent renames at this point (we mustn't get false 482 * negatives from the RCU list walk here, unlike the optimistic 483 * fast walk). 484 * 485 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 486 */ 487 result = d_lookup(parent, name); 488 if (!result) { 489 struct dentry * dentry = d_alloc(parent, name); 490 result = ERR_PTR(-ENOMEM); 491 if (dentry) { 492 result = dir->i_op->lookup(dir, dentry, nd); 493 if (result) 494 dput(dentry); 495 else 496 result = dentry; 497 } 498 mutex_unlock(&dir->i_mutex); 499 return result; 500 } 501 502 /* 503 * Uhhuh! Nasty case: the cache was re-populated while 504 * we waited on the semaphore. Need to revalidate. 505 */ 506 mutex_unlock(&dir->i_mutex); 507 if (result->d_op && result->d_op->d_revalidate) { 508 result = do_revalidate(result, nd); 509 if (!result) 510 result = ERR_PTR(-ENOENT); 511 } 512 return result; 513 } 514 515 static int __emul_lookup_dentry(const char *, struct nameidata *); 516 517 /* SMP-safe */ 518 static __always_inline int 519 walk_init_root(const char *name, struct nameidata *nd) 520 { 521 struct fs_struct *fs = current->fs; 522 523 read_lock(&fs->lock); 524 if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 525 nd->mnt = mntget(fs->altrootmnt); 526 nd->dentry = dget(fs->altroot); 527 read_unlock(&fs->lock); 528 if (__emul_lookup_dentry(name,nd)) 529 return 0; 530 read_lock(&fs->lock); 531 } 532 nd->mnt = mntget(fs->rootmnt); 533 nd->dentry = dget(fs->root); 534 read_unlock(&fs->lock); 535 return 1; 536 } 537 538 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 539 { 540 int res = 0; 541 char *name; 542 if (IS_ERR(link)) 543 goto fail; 544 545 if (*link == '/') { 546 path_release(nd); 547 if (!walk_init_root(link, nd)) 548 /* weird __emul_prefix() stuff did it */ 549 goto out; 550 } 551 res = link_path_walk(link, nd); 552 out: 553 if (nd->depth || res || nd->last_type!=LAST_NORM) 554 return res; 555 /* 556 * If it is an iterative symlinks resolution in open_namei() we 557 * have to copy the last component. And all that crap because of 558 * bloody create() on broken symlinks. Furrfu... 559 */ 560 name = __getname(); 561 if (unlikely(!name)) { 562 path_release(nd); 563 return -ENOMEM; 564 } 565 strcpy(name, nd->last.name); 566 nd->last.name = name; 567 return 0; 568 fail: 569 path_release(nd); 570 return PTR_ERR(link); 571 } 572 573 struct path { 574 struct vfsmount *mnt; 575 struct dentry *dentry; 576 }; 577 578 static inline void dput_path(struct path *path, struct nameidata *nd) 579 { 580 dput(path->dentry); 581 if (path->mnt != nd->mnt) 582 mntput(path->mnt); 583 } 584 585 static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 586 { 587 dput(nd->dentry); 588 if (nd->mnt != path->mnt) 589 mntput(nd->mnt); 590 nd->mnt = path->mnt; 591 nd->dentry = path->dentry; 592 } 593 594 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 595 { 596 int error; 597 void *cookie; 598 struct dentry *dentry = path->dentry; 599 600 touch_atime(path->mnt, dentry); 601 nd_set_link(nd, NULL); 602 603 if (path->mnt != nd->mnt) { 604 path_to_nameidata(path, nd); 605 dget(dentry); 606 } 607 mntget(path->mnt); 608 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 609 error = PTR_ERR(cookie); 610 if (!IS_ERR(cookie)) { 611 char *s = nd_get_link(nd); 612 error = 0; 613 if (s) 614 error = __vfs_follow_link(nd, s); 615 if (dentry->d_inode->i_op->put_link) 616 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 617 } 618 dput(dentry); 619 mntput(path->mnt); 620 621 return error; 622 } 623 624 /* 625 * This limits recursive symlink follows to 8, while 626 * limiting consecutive symlinks to 40. 627 * 628 * Without that kind of total limit, nasty chains of consecutive 629 * symlinks can cause almost arbitrarily long lookups. 630 */ 631 static inline int do_follow_link(struct path *path, struct nameidata *nd) 632 { 633 int err = -ELOOP; 634 if (current->link_count >= MAX_NESTED_LINKS) 635 goto loop; 636 if (current->total_link_count >= 40) 637 goto loop; 638 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 639 cond_resched(); 640 err = security_inode_follow_link(path->dentry, nd); 641 if (err) 642 goto loop; 643 current->link_count++; 644 current->total_link_count++; 645 nd->depth++; 646 err = __do_follow_link(path, nd); 647 current->link_count--; 648 nd->depth--; 649 return err; 650 loop: 651 dput_path(path, nd); 652 path_release(nd); 653 return err; 654 } 655 656 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 657 { 658 struct vfsmount *parent; 659 struct dentry *mountpoint; 660 spin_lock(&vfsmount_lock); 661 parent=(*mnt)->mnt_parent; 662 if (parent == *mnt) { 663 spin_unlock(&vfsmount_lock); 664 return 0; 665 } 666 mntget(parent); 667 mountpoint=dget((*mnt)->mnt_mountpoint); 668 spin_unlock(&vfsmount_lock); 669 dput(*dentry); 670 *dentry = mountpoint; 671 mntput(*mnt); 672 *mnt = parent; 673 return 1; 674 } 675 676 /* no need for dcache_lock, as serialization is taken care in 677 * namespace.c 678 */ 679 static int __follow_mount(struct path *path) 680 { 681 int res = 0; 682 while (d_mountpoint(path->dentry)) { 683 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); 684 if (!mounted) 685 break; 686 dput(path->dentry); 687 if (res) 688 mntput(path->mnt); 689 path->mnt = mounted; 690 path->dentry = dget(mounted->mnt_root); 691 res = 1; 692 } 693 return res; 694 } 695 696 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) 697 { 698 while (d_mountpoint(*dentry)) { 699 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 700 if (!mounted) 701 break; 702 dput(*dentry); 703 mntput(*mnt); 704 *mnt = mounted; 705 *dentry = dget(mounted->mnt_root); 706 } 707 } 708 709 /* no need for dcache_lock, as serialization is taken care in 710 * namespace.c 711 */ 712 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 713 { 714 struct vfsmount *mounted; 715 716 mounted = lookup_mnt(*mnt, *dentry); 717 if (mounted) { 718 dput(*dentry); 719 mntput(*mnt); 720 *mnt = mounted; 721 *dentry = dget(mounted->mnt_root); 722 return 1; 723 } 724 return 0; 725 } 726 727 static __always_inline void follow_dotdot(struct nameidata *nd) 728 { 729 struct fs_struct *fs = current->fs; 730 731 while(1) { 732 struct vfsmount *parent; 733 struct dentry *old = nd->dentry; 734 735 read_lock(&fs->lock); 736 if (nd->dentry == fs->root && 737 nd->mnt == fs->rootmnt) { 738 read_unlock(&fs->lock); 739 break; 740 } 741 read_unlock(&fs->lock); 742 spin_lock(&dcache_lock); 743 if (nd->dentry != nd->mnt->mnt_root) { 744 nd->dentry = dget(nd->dentry->d_parent); 745 spin_unlock(&dcache_lock); 746 dput(old); 747 break; 748 } 749 spin_unlock(&dcache_lock); 750 spin_lock(&vfsmount_lock); 751 parent = nd->mnt->mnt_parent; 752 if (parent == nd->mnt) { 753 spin_unlock(&vfsmount_lock); 754 break; 755 } 756 mntget(parent); 757 nd->dentry = dget(nd->mnt->mnt_mountpoint); 758 spin_unlock(&vfsmount_lock); 759 dput(old); 760 mntput(nd->mnt); 761 nd->mnt = parent; 762 } 763 follow_mount(&nd->mnt, &nd->dentry); 764 } 765 766 /* 767 * It's more convoluted than I'd like it to be, but... it's still fairly 768 * small and for now I'd prefer to have fast path as straight as possible. 769 * It _is_ time-critical. 770 */ 771 static int do_lookup(struct nameidata *nd, struct qstr *name, 772 struct path *path) 773 { 774 struct vfsmount *mnt = nd->mnt; 775 struct dentry *dentry = __d_lookup(nd->dentry, name); 776 777 if (!dentry) 778 goto need_lookup; 779 if (dentry->d_op && dentry->d_op->d_revalidate) 780 goto need_revalidate; 781 done: 782 path->mnt = mnt; 783 path->dentry = dentry; 784 __follow_mount(path); 785 return 0; 786 787 need_lookup: 788 dentry = real_lookup(nd->dentry, name, nd); 789 if (IS_ERR(dentry)) 790 goto fail; 791 goto done; 792 793 need_revalidate: 794 dentry = do_revalidate(dentry, nd); 795 if (!dentry) 796 goto need_lookup; 797 if (IS_ERR(dentry)) 798 goto fail; 799 goto done; 800 801 fail: 802 return PTR_ERR(dentry); 803 } 804 805 /* 806 * Name resolution. 807 * This is the basic name resolution function, turning a pathname into 808 * the final dentry. We expect 'base' to be positive and a directory. 809 * 810 * Returns 0 and nd will have valid dentry and mnt on success. 811 * Returns error and drops reference to input namei data on failure. 812 */ 813 static fastcall int __link_path_walk(const char * name, struct nameidata *nd) 814 { 815 struct path next; 816 struct inode *inode; 817 int err; 818 unsigned int lookup_flags = nd->flags; 819 820 while (*name=='/') 821 name++; 822 if (!*name) 823 goto return_reval; 824 825 inode = nd->dentry->d_inode; 826 if (nd->depth) 827 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 828 829 /* At this point we know we have a real path component. */ 830 for(;;) { 831 unsigned long hash; 832 struct qstr this; 833 unsigned int c; 834 835 nd->flags |= LOOKUP_CONTINUE; 836 err = exec_permission_lite(inode, nd); 837 if (err == -EAGAIN) 838 err = vfs_permission(nd, MAY_EXEC); 839 if (err) 840 break; 841 842 this.name = name; 843 c = *(const unsigned char *)name; 844 845 hash = init_name_hash(); 846 do { 847 name++; 848 hash = partial_name_hash(c, hash); 849 c = *(const unsigned char *)name; 850 } while (c && (c != '/')); 851 this.len = name - (const char *) this.name; 852 this.hash = end_name_hash(hash); 853 854 /* remove trailing slashes? */ 855 if (!c) 856 goto last_component; 857 while (*++name == '/'); 858 if (!*name) 859 goto last_with_slashes; 860 861 /* 862 * "." and ".." are special - ".." especially so because it has 863 * to be able to know about the current root directory and 864 * parent relationships. 865 */ 866 if (this.name[0] == '.') switch (this.len) { 867 default: 868 break; 869 case 2: 870 if (this.name[1] != '.') 871 break; 872 follow_dotdot(nd); 873 inode = nd->dentry->d_inode; 874 /* fallthrough */ 875 case 1: 876 continue; 877 } 878 /* 879 * See if the low-level filesystem might want 880 * to use its own hash.. 881 */ 882 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 883 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 884 if (err < 0) 885 break; 886 } 887 /* This does the actual lookups.. */ 888 err = do_lookup(nd, &this, &next); 889 if (err) 890 break; 891 892 err = -ENOENT; 893 inode = next.dentry->d_inode; 894 if (!inode) 895 goto out_dput; 896 err = -ENOTDIR; 897 if (!inode->i_op) 898 goto out_dput; 899 900 if (inode->i_op->follow_link) { 901 err = do_follow_link(&next, nd); 902 if (err) 903 goto return_err; 904 err = -ENOENT; 905 inode = nd->dentry->d_inode; 906 if (!inode) 907 break; 908 err = -ENOTDIR; 909 if (!inode->i_op) 910 break; 911 } else 912 path_to_nameidata(&next, nd); 913 err = -ENOTDIR; 914 if (!inode->i_op->lookup) 915 break; 916 continue; 917 /* here ends the main loop */ 918 919 last_with_slashes: 920 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 921 last_component: 922 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 923 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 924 if (lookup_flags & LOOKUP_PARENT) 925 goto lookup_parent; 926 if (this.name[0] == '.') switch (this.len) { 927 default: 928 break; 929 case 2: 930 if (this.name[1] != '.') 931 break; 932 follow_dotdot(nd); 933 inode = nd->dentry->d_inode; 934 /* fallthrough */ 935 case 1: 936 goto return_reval; 937 } 938 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 939 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 940 if (err < 0) 941 break; 942 } 943 err = do_lookup(nd, &this, &next); 944 if (err) 945 break; 946 inode = next.dentry->d_inode; 947 if ((lookup_flags & LOOKUP_FOLLOW) 948 && inode && inode->i_op && inode->i_op->follow_link) { 949 err = do_follow_link(&next, nd); 950 if (err) 951 goto return_err; 952 inode = nd->dentry->d_inode; 953 } else 954 path_to_nameidata(&next, nd); 955 err = -ENOENT; 956 if (!inode) 957 break; 958 if (lookup_flags & LOOKUP_DIRECTORY) { 959 err = -ENOTDIR; 960 if (!inode->i_op || !inode->i_op->lookup) 961 break; 962 } 963 goto return_base; 964 lookup_parent: 965 nd->last = this; 966 nd->last_type = LAST_NORM; 967 if (this.name[0] != '.') 968 goto return_base; 969 if (this.len == 1) 970 nd->last_type = LAST_DOT; 971 else if (this.len == 2 && this.name[1] == '.') 972 nd->last_type = LAST_DOTDOT; 973 else 974 goto return_base; 975 return_reval: 976 /* 977 * We bypassed the ordinary revalidation routines. 978 * We may need to check the cached dentry for staleness. 979 */ 980 if (nd->dentry && nd->dentry->d_sb && 981 (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 982 err = -ESTALE; 983 /* Note: we do not d_invalidate() */ 984 if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) 985 break; 986 } 987 return_base: 988 return 0; 989 out_dput: 990 dput_path(&next, nd); 991 break; 992 } 993 path_release(nd); 994 return_err: 995 return err; 996 } 997 998 /* 999 * Wrapper to retry pathname resolution whenever the underlying 1000 * file system returns an ESTALE. 1001 * 1002 * Retry the whole path once, forcing real lookup requests 1003 * instead of relying on the dcache. 1004 */ 1005 int fastcall link_path_walk(const char *name, struct nameidata *nd) 1006 { 1007 struct nameidata save = *nd; 1008 int result; 1009 1010 /* make sure the stuff we saved doesn't go away */ 1011 dget(save.dentry); 1012 mntget(save.mnt); 1013 1014 result = __link_path_walk(name, nd); 1015 if (result == -ESTALE) { 1016 *nd = save; 1017 dget(nd->dentry); 1018 mntget(nd->mnt); 1019 nd->flags |= LOOKUP_REVAL; 1020 result = __link_path_walk(name, nd); 1021 } 1022 1023 dput(save.dentry); 1024 mntput(save.mnt); 1025 1026 return result; 1027 } 1028 1029 int fastcall path_walk(const char * name, struct nameidata *nd) 1030 { 1031 current->total_link_count = 0; 1032 return link_path_walk(name, nd); 1033 } 1034 1035 /* 1036 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 1037 * everything is done. Returns 0 and drops input nd, if lookup failed; 1038 */ 1039 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 1040 { 1041 if (path_walk(name, nd)) 1042 return 0; /* something went wrong... */ 1043 1044 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { 1045 struct dentry *old_dentry = nd->dentry; 1046 struct vfsmount *old_mnt = nd->mnt; 1047 struct qstr last = nd->last; 1048 int last_type = nd->last_type; 1049 struct fs_struct *fs = current->fs; 1050 1051 /* 1052 * NAME was not found in alternate root or it's a directory. 1053 * Try to find it in the normal root: 1054 */ 1055 nd->last_type = LAST_ROOT; 1056 read_lock(&fs->lock); 1057 nd->mnt = mntget(fs->rootmnt); 1058 nd->dentry = dget(fs->root); 1059 read_unlock(&fs->lock); 1060 if (path_walk(name, nd) == 0) { 1061 if (nd->dentry->d_inode) { 1062 dput(old_dentry); 1063 mntput(old_mnt); 1064 return 1; 1065 } 1066 path_release(nd); 1067 } 1068 nd->dentry = old_dentry; 1069 nd->mnt = old_mnt; 1070 nd->last = last; 1071 nd->last_type = last_type; 1072 } 1073 return 1; 1074 } 1075 1076 void set_fs_altroot(void) 1077 { 1078 char *emul = __emul_prefix(); 1079 struct nameidata nd; 1080 struct vfsmount *mnt = NULL, *oldmnt; 1081 struct dentry *dentry = NULL, *olddentry; 1082 int err; 1083 struct fs_struct *fs = current->fs; 1084 1085 if (!emul) 1086 goto set_it; 1087 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 1088 if (!err) { 1089 mnt = nd.mnt; 1090 dentry = nd.dentry; 1091 } 1092 set_it: 1093 write_lock(&fs->lock); 1094 oldmnt = fs->altrootmnt; 1095 olddentry = fs->altroot; 1096 fs->altrootmnt = mnt; 1097 fs->altroot = dentry; 1098 write_unlock(&fs->lock); 1099 if (olddentry) { 1100 dput(olddentry); 1101 mntput(oldmnt); 1102 } 1103 } 1104 1105 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1106 static int fastcall do_path_lookup(int dfd, const char *name, 1107 unsigned int flags, struct nameidata *nd) 1108 { 1109 int retval = 0; 1110 int fput_needed; 1111 struct file *file; 1112 struct fs_struct *fs = current->fs; 1113 1114 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1115 nd->flags = flags; 1116 nd->depth = 0; 1117 1118 if (*name=='/') { 1119 read_lock(&fs->lock); 1120 if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1121 nd->mnt = mntget(fs->altrootmnt); 1122 nd->dentry = dget(fs->altroot); 1123 read_unlock(&fs->lock); 1124 if (__emul_lookup_dentry(name,nd)) 1125 goto out; /* found in altroot */ 1126 read_lock(&fs->lock); 1127 } 1128 nd->mnt = mntget(fs->rootmnt); 1129 nd->dentry = dget(fs->root); 1130 read_unlock(&fs->lock); 1131 } else if (dfd == AT_FDCWD) { 1132 read_lock(&fs->lock); 1133 nd->mnt = mntget(fs->pwdmnt); 1134 nd->dentry = dget(fs->pwd); 1135 read_unlock(&fs->lock); 1136 } else { 1137 struct dentry *dentry; 1138 1139 file = fget_light(dfd, &fput_needed); 1140 retval = -EBADF; 1141 if (!file) 1142 goto out_fail; 1143 1144 dentry = file->f_dentry; 1145 1146 retval = -ENOTDIR; 1147 if (!S_ISDIR(dentry->d_inode->i_mode)) 1148 goto fput_fail; 1149 1150 retval = file_permission(file, MAY_EXEC); 1151 if (retval) 1152 goto fput_fail; 1153 1154 nd->mnt = mntget(file->f_vfsmnt); 1155 nd->dentry = dget(dentry); 1156 1157 fput_light(file, fput_needed); 1158 } 1159 current->total_link_count = 0; 1160 retval = link_path_walk(name, nd); 1161 out: 1162 if (likely(retval == 0)) { 1163 if (unlikely(!audit_dummy_context() && nd && nd->dentry && 1164 nd->dentry->d_inode)) 1165 audit_inode(name, nd->dentry->d_inode); 1166 } 1167 out_fail: 1168 return retval; 1169 1170 fput_fail: 1171 fput_light(file, fput_needed); 1172 goto out_fail; 1173 } 1174 1175 int fastcall path_lookup(const char *name, unsigned int flags, 1176 struct nameidata *nd) 1177 { 1178 return do_path_lookup(AT_FDCWD, name, flags, nd); 1179 } 1180 1181 static int __path_lookup_intent_open(int dfd, const char *name, 1182 unsigned int lookup_flags, struct nameidata *nd, 1183 int open_flags, int create_mode) 1184 { 1185 struct file *filp = get_empty_filp(); 1186 int err; 1187 1188 if (filp == NULL) 1189 return -ENFILE; 1190 nd->intent.open.file = filp; 1191 nd->intent.open.flags = open_flags; 1192 nd->intent.open.create_mode = create_mode; 1193 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); 1194 if (IS_ERR(nd->intent.open.file)) { 1195 if (err == 0) { 1196 err = PTR_ERR(nd->intent.open.file); 1197 path_release(nd); 1198 } 1199 } else if (err != 0) 1200 release_open_intent(nd); 1201 return err; 1202 } 1203 1204 /** 1205 * path_lookup_open - lookup a file path with open intent 1206 * @dfd: the directory to use as base, or AT_FDCWD 1207 * @name: pointer to file name 1208 * @lookup_flags: lookup intent flags 1209 * @nd: pointer to nameidata 1210 * @open_flags: open intent flags 1211 */ 1212 int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, 1213 struct nameidata *nd, int open_flags) 1214 { 1215 return __path_lookup_intent_open(dfd, name, lookup_flags, nd, 1216 open_flags, 0); 1217 } 1218 1219 /** 1220 * path_lookup_create - lookup a file path with open + create intent 1221 * @dfd: the directory to use as base, or AT_FDCWD 1222 * @name: pointer to file name 1223 * @lookup_flags: lookup intent flags 1224 * @nd: pointer to nameidata 1225 * @open_flags: open intent flags 1226 * @create_mode: create intent flags 1227 */ 1228 static int path_lookup_create(int dfd, const char *name, 1229 unsigned int lookup_flags, struct nameidata *nd, 1230 int open_flags, int create_mode) 1231 { 1232 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, 1233 nd, open_flags, create_mode); 1234 } 1235 1236 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, 1237 struct nameidata *nd, int open_flags) 1238 { 1239 char *tmp = getname(name); 1240 int err = PTR_ERR(tmp); 1241 1242 if (!IS_ERR(tmp)) { 1243 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0); 1244 putname(tmp); 1245 } 1246 return err; 1247 } 1248 1249 /* 1250 * Restricted form of lookup. Doesn't follow links, single-component only, 1251 * needs parent already locked. Doesn't follow mounts. 1252 * SMP-safe. 1253 */ 1254 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) 1255 { 1256 struct dentry * dentry; 1257 struct inode *inode; 1258 int err; 1259 1260 inode = base->d_inode; 1261 err = permission(inode, MAY_EXEC, nd); 1262 dentry = ERR_PTR(err); 1263 if (err) 1264 goto out; 1265 1266 /* 1267 * See if the low-level filesystem might want 1268 * to use its own hash.. 1269 */ 1270 if (base->d_op && base->d_op->d_hash) { 1271 err = base->d_op->d_hash(base, name); 1272 dentry = ERR_PTR(err); 1273 if (err < 0) 1274 goto out; 1275 } 1276 1277 dentry = cached_lookup(base, name, nd); 1278 if (!dentry) { 1279 struct dentry *new = d_alloc(base, name); 1280 dentry = ERR_PTR(-ENOMEM); 1281 if (!new) 1282 goto out; 1283 dentry = inode->i_op->lookup(inode, new, nd); 1284 if (!dentry) 1285 dentry = new; 1286 else 1287 dput(new); 1288 } 1289 out: 1290 return dentry; 1291 } 1292 1293 static struct dentry *lookup_hash(struct nameidata *nd) 1294 { 1295 return __lookup_hash(&nd->last, nd->dentry, nd); 1296 } 1297 1298 /* SMP-safe */ 1299 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) 1300 { 1301 unsigned long hash; 1302 struct qstr this; 1303 unsigned int c; 1304 1305 this.name = name; 1306 this.len = len; 1307 if (!len) 1308 goto access; 1309 1310 hash = init_name_hash(); 1311 while (len--) { 1312 c = *(const unsigned char *)name++; 1313 if (c == '/' || c == '\0') 1314 goto access; 1315 hash = partial_name_hash(c, hash); 1316 } 1317 this.hash = end_name_hash(hash); 1318 1319 return __lookup_hash(&this, base, NULL); 1320 access: 1321 return ERR_PTR(-EACCES); 1322 } 1323 1324 /* 1325 * namei() 1326 * 1327 * is used by most simple commands to get the inode of a specified name. 1328 * Open, link etc use their own routines, but this is enough for things 1329 * like 'chmod' etc. 1330 * 1331 * namei exists in two versions: namei/lnamei. The only difference is 1332 * that namei follows links, while lnamei does not. 1333 * SMP-safe 1334 */ 1335 int fastcall __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1336 struct nameidata *nd) 1337 { 1338 char *tmp = getname(name); 1339 int err = PTR_ERR(tmp); 1340 1341 if (!IS_ERR(tmp)) { 1342 err = do_path_lookup(dfd, tmp, flags, nd); 1343 putname(tmp); 1344 } 1345 return err; 1346 } 1347 1348 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1349 { 1350 return __user_walk_fd(AT_FDCWD, name, flags, nd); 1351 } 1352 1353 /* 1354 * It's inline, so penalty for filesystems that don't use sticky bit is 1355 * minimal. 1356 */ 1357 static inline int check_sticky(struct inode *dir, struct inode *inode) 1358 { 1359 if (!(dir->i_mode & S_ISVTX)) 1360 return 0; 1361 if (inode->i_uid == current->fsuid) 1362 return 0; 1363 if (dir->i_uid == current->fsuid) 1364 return 0; 1365 return !capable(CAP_FOWNER); 1366 } 1367 1368 /* 1369 * Check whether we can remove a link victim from directory dir, check 1370 * whether the type of victim is right. 1371 * 1. We can't do it if dir is read-only (done in permission()) 1372 * 2. We should have write and exec permissions on dir 1373 * 3. We can't remove anything from append-only dir 1374 * 4. We can't do anything with immutable dir (done in permission()) 1375 * 5. If the sticky bit on dir is set we should either 1376 * a. be owner of dir, or 1377 * b. be owner of victim, or 1378 * c. have CAP_FOWNER capability 1379 * 6. If the victim is append-only or immutable we can't do antyhing with 1380 * links pointing to it. 1381 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1382 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1383 * 9. We can't remove a root or mountpoint. 1384 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1385 * nfs_async_unlink(). 1386 */ 1387 static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1388 { 1389 int error; 1390 1391 if (!victim->d_inode) 1392 return -ENOENT; 1393 1394 BUG_ON(victim->d_parent->d_inode != dir); 1395 audit_inode_child(victim->d_name.name, victim->d_inode, dir); 1396 1397 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1398 if (error) 1399 return error; 1400 if (IS_APPEND(dir)) 1401 return -EPERM; 1402 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1403 IS_IMMUTABLE(victim->d_inode)) 1404 return -EPERM; 1405 if (isdir) { 1406 if (!S_ISDIR(victim->d_inode->i_mode)) 1407 return -ENOTDIR; 1408 if (IS_ROOT(victim)) 1409 return -EBUSY; 1410 } else if (S_ISDIR(victim->d_inode->i_mode)) 1411 return -EISDIR; 1412 if (IS_DEADDIR(dir)) 1413 return -ENOENT; 1414 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1415 return -EBUSY; 1416 return 0; 1417 } 1418 1419 /* Check whether we can create an object with dentry child in directory 1420 * dir. 1421 * 1. We can't do it if child already exists (open has special treatment for 1422 * this case, but since we are inlined it's OK) 1423 * 2. We can't do it if dir is read-only (done in permission()) 1424 * 3. We should have write and exec permissions on dir 1425 * 4. We can't do it if dir is immutable (done in permission()) 1426 */ 1427 static inline int may_create(struct inode *dir, struct dentry *child, 1428 struct nameidata *nd) 1429 { 1430 if (child->d_inode) 1431 return -EEXIST; 1432 if (IS_DEADDIR(dir)) 1433 return -ENOENT; 1434 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1435 } 1436 1437 /* 1438 * O_DIRECTORY translates into forcing a directory lookup. 1439 */ 1440 static inline int lookup_flags(unsigned int f) 1441 { 1442 unsigned long retval = LOOKUP_FOLLOW; 1443 1444 if (f & O_NOFOLLOW) 1445 retval &= ~LOOKUP_FOLLOW; 1446 1447 if (f & O_DIRECTORY) 1448 retval |= LOOKUP_DIRECTORY; 1449 1450 return retval; 1451 } 1452 1453 /* 1454 * p1 and p2 should be directories on the same fs. 1455 */ 1456 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1457 { 1458 struct dentry *p; 1459 1460 if (p1 == p2) { 1461 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1462 return NULL; 1463 } 1464 1465 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1466 1467 for (p = p1; p->d_parent != p; p = p->d_parent) { 1468 if (p->d_parent == p2) { 1469 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 1470 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 1471 return p; 1472 } 1473 } 1474 1475 for (p = p2; p->d_parent != p; p = p->d_parent) { 1476 if (p->d_parent == p1) { 1477 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1478 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1479 return p; 1480 } 1481 } 1482 1483 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1484 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1485 return NULL; 1486 } 1487 1488 void unlock_rename(struct dentry *p1, struct dentry *p2) 1489 { 1490 mutex_unlock(&p1->d_inode->i_mutex); 1491 if (p1 != p2) { 1492 mutex_unlock(&p2->d_inode->i_mutex); 1493 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1494 } 1495 } 1496 1497 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1498 struct nameidata *nd) 1499 { 1500 int error = may_create(dir, dentry, nd); 1501 1502 if (error) 1503 return error; 1504 1505 if (!dir->i_op || !dir->i_op->create) 1506 return -EACCES; /* shouldn't it be ENOSYS? */ 1507 mode &= S_IALLUGO; 1508 mode |= S_IFREG; 1509 error = security_inode_create(dir, dentry, mode); 1510 if (error) 1511 return error; 1512 DQUOT_INIT(dir); 1513 error = dir->i_op->create(dir, dentry, mode, nd); 1514 if (!error) 1515 fsnotify_create(dir, dentry); 1516 return error; 1517 } 1518 1519 int may_open(struct nameidata *nd, int acc_mode, int flag) 1520 { 1521 struct dentry *dentry = nd->dentry; 1522 struct inode *inode = dentry->d_inode; 1523 int error; 1524 1525 if (!inode) 1526 return -ENOENT; 1527 1528 if (S_ISLNK(inode->i_mode)) 1529 return -ELOOP; 1530 1531 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1532 return -EISDIR; 1533 1534 error = vfs_permission(nd, acc_mode); 1535 if (error) 1536 return error; 1537 1538 /* 1539 * FIFO's, sockets and device files are special: they don't 1540 * actually live on the filesystem itself, and as such you 1541 * can write to them even if the filesystem is read-only. 1542 */ 1543 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1544 flag &= ~O_TRUNC; 1545 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1546 if (nd->mnt->mnt_flags & MNT_NODEV) 1547 return -EACCES; 1548 1549 flag &= ~O_TRUNC; 1550 } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) 1551 return -EROFS; 1552 /* 1553 * An append-only file must be opened in append mode for writing. 1554 */ 1555 if (IS_APPEND(inode)) { 1556 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1557 return -EPERM; 1558 if (flag & O_TRUNC) 1559 return -EPERM; 1560 } 1561 1562 /* O_NOATIME can only be set by the owner or superuser */ 1563 if (flag & O_NOATIME) 1564 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) 1565 return -EPERM; 1566 1567 /* 1568 * Ensure there are no outstanding leases on the file. 1569 */ 1570 error = break_lease(inode, flag); 1571 if (error) 1572 return error; 1573 1574 if (flag & O_TRUNC) { 1575 error = get_write_access(inode); 1576 if (error) 1577 return error; 1578 1579 /* 1580 * Refuse to truncate files with mandatory locks held on them. 1581 */ 1582 error = locks_verify_locked(inode); 1583 if (!error) { 1584 DQUOT_INIT(inode); 1585 1586 error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); 1587 } 1588 put_write_access(inode); 1589 if (error) 1590 return error; 1591 } else 1592 if (flag & FMODE_WRITE) 1593 DQUOT_INIT(inode); 1594 1595 return 0; 1596 } 1597 1598 static int open_namei_create(struct nameidata *nd, struct path *path, 1599 int flag, int mode) 1600 { 1601 int error; 1602 struct dentry *dir = nd->dentry; 1603 1604 if (!IS_POSIXACL(dir->d_inode)) 1605 mode &= ~current->fs->umask; 1606 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1607 mutex_unlock(&dir->d_inode->i_mutex); 1608 dput(nd->dentry); 1609 nd->dentry = path->dentry; 1610 if (error) 1611 return error; 1612 /* Don't check for write permission, don't truncate */ 1613 return may_open(nd, 0, flag & ~O_TRUNC); 1614 } 1615 1616 /* 1617 * open_namei() 1618 * 1619 * namei for open - this is in fact almost the whole open-routine. 1620 * 1621 * Note that the low bits of "flag" aren't the same as in the open 1622 * system call - they are 00 - no permissions needed 1623 * 01 - read permission needed 1624 * 10 - write permission needed 1625 * 11 - read/write permissions needed 1626 * which is a lot more logical, and also allows the "no perm" needed 1627 * for symlinks (where the permissions are checked later). 1628 * SMP-safe 1629 */ 1630 int open_namei(int dfd, const char *pathname, int flag, 1631 int mode, struct nameidata *nd) 1632 { 1633 int acc_mode, error; 1634 struct path path; 1635 struct dentry *dir; 1636 int count = 0; 1637 1638 acc_mode = ACC_MODE(flag); 1639 1640 /* O_TRUNC implies we need access checks for write permissions */ 1641 if (flag & O_TRUNC) 1642 acc_mode |= MAY_WRITE; 1643 1644 /* Allow the LSM permission hook to distinguish append 1645 access from general write access. */ 1646 if (flag & O_APPEND) 1647 acc_mode |= MAY_APPEND; 1648 1649 /* 1650 * The simplest case - just a plain lookup. 1651 */ 1652 if (!(flag & O_CREAT)) { 1653 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1654 nd, flag); 1655 if (error) 1656 return error; 1657 goto ok; 1658 } 1659 1660 /* 1661 * Create - we need to know the parent. 1662 */ 1663 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); 1664 if (error) 1665 return error; 1666 1667 /* 1668 * We have the parent and last component. First of all, check 1669 * that we are not asked to creat(2) an obvious directory - that 1670 * will not do. 1671 */ 1672 error = -EISDIR; 1673 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1674 goto exit; 1675 1676 dir = nd->dentry; 1677 nd->flags &= ~LOOKUP_PARENT; 1678 mutex_lock(&dir->d_inode->i_mutex); 1679 path.dentry = lookup_hash(nd); 1680 path.mnt = nd->mnt; 1681 1682 do_last: 1683 error = PTR_ERR(path.dentry); 1684 if (IS_ERR(path.dentry)) { 1685 mutex_unlock(&dir->d_inode->i_mutex); 1686 goto exit; 1687 } 1688 1689 if (IS_ERR(nd->intent.open.file)) { 1690 mutex_unlock(&dir->d_inode->i_mutex); 1691 error = PTR_ERR(nd->intent.open.file); 1692 goto exit_dput; 1693 } 1694 1695 /* Negative dentry, just create the file */ 1696 if (!path.dentry->d_inode) { 1697 error = open_namei_create(nd, &path, flag, mode); 1698 if (error) 1699 goto exit; 1700 return 0; 1701 } 1702 1703 /* 1704 * It already exists. 1705 */ 1706 mutex_unlock(&dir->d_inode->i_mutex); 1707 audit_inode_update(path.dentry->d_inode); 1708 1709 error = -EEXIST; 1710 if (flag & O_EXCL) 1711 goto exit_dput; 1712 1713 if (__follow_mount(&path)) { 1714 error = -ELOOP; 1715 if (flag & O_NOFOLLOW) 1716 goto exit_dput; 1717 } 1718 1719 error = -ENOENT; 1720 if (!path.dentry->d_inode) 1721 goto exit_dput; 1722 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1723 goto do_link; 1724 1725 path_to_nameidata(&path, nd); 1726 error = -EISDIR; 1727 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1728 goto exit; 1729 ok: 1730 error = may_open(nd, acc_mode, flag); 1731 if (error) 1732 goto exit; 1733 return 0; 1734 1735 exit_dput: 1736 dput_path(&path, nd); 1737 exit: 1738 if (!IS_ERR(nd->intent.open.file)) 1739 release_open_intent(nd); 1740 path_release(nd); 1741 return error; 1742 1743 do_link: 1744 error = -ELOOP; 1745 if (flag & O_NOFOLLOW) 1746 goto exit_dput; 1747 /* 1748 * This is subtle. Instead of calling do_follow_link() we do the 1749 * thing by hands. The reason is that this way we have zero link_count 1750 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1751 * After that we have the parent and last component, i.e. 1752 * we are in the same situation as after the first path_walk(). 1753 * Well, almost - if the last component is normal we get its copy 1754 * stored in nd->last.name and we will have to putname() it when we 1755 * are done. Procfs-like symlinks just set LAST_BIND. 1756 */ 1757 nd->flags |= LOOKUP_PARENT; 1758 error = security_inode_follow_link(path.dentry, nd); 1759 if (error) 1760 goto exit_dput; 1761 error = __do_follow_link(&path, nd); 1762 if (error) { 1763 /* Does someone understand code flow here? Or it is only 1764 * me so stupid? Anathema to whoever designed this non-sense 1765 * with "intent.open". 1766 */ 1767 release_open_intent(nd); 1768 return error; 1769 } 1770 nd->flags &= ~LOOKUP_PARENT; 1771 if (nd->last_type == LAST_BIND) 1772 goto ok; 1773 error = -EISDIR; 1774 if (nd->last_type != LAST_NORM) 1775 goto exit; 1776 if (nd->last.name[nd->last.len]) { 1777 __putname(nd->last.name); 1778 goto exit; 1779 } 1780 error = -ELOOP; 1781 if (count++==32) { 1782 __putname(nd->last.name); 1783 goto exit; 1784 } 1785 dir = nd->dentry; 1786 mutex_lock(&dir->d_inode->i_mutex); 1787 path.dentry = lookup_hash(nd); 1788 path.mnt = nd->mnt; 1789 __putname(nd->last.name); 1790 goto do_last; 1791 } 1792 1793 /** 1794 * lookup_create - lookup a dentry, creating it if it doesn't exist 1795 * @nd: nameidata info 1796 * @is_dir: directory flag 1797 * 1798 * Simple function to lookup and return a dentry and create it 1799 * if it doesn't exist. Is SMP-safe. 1800 * 1801 * Returns with nd->dentry->d_inode->i_mutex locked. 1802 */ 1803 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1804 { 1805 struct dentry *dentry = ERR_PTR(-EEXIST); 1806 1807 mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); 1808 /* 1809 * Yucky last component or no last component at all? 1810 * (foo/., foo/.., /////) 1811 */ 1812 if (nd->last_type != LAST_NORM) 1813 goto fail; 1814 nd->flags &= ~LOOKUP_PARENT; 1815 nd->flags |= LOOKUP_CREATE; 1816 nd->intent.open.flags = O_EXCL; 1817 1818 /* 1819 * Do the final lookup. 1820 */ 1821 dentry = lookup_hash(nd); 1822 if (IS_ERR(dentry)) 1823 goto fail; 1824 1825 /* 1826 * Special case - lookup gave negative, but... we had foo/bar/ 1827 * From the vfs_mknod() POV we just have a negative dentry - 1828 * all is fine. Let's be bastards - you had / on the end, you've 1829 * been asking for (non-existent) directory. -ENOENT for you. 1830 */ 1831 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1832 goto enoent; 1833 return dentry; 1834 enoent: 1835 dput(dentry); 1836 dentry = ERR_PTR(-ENOENT); 1837 fail: 1838 return dentry; 1839 } 1840 EXPORT_SYMBOL_GPL(lookup_create); 1841 1842 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1843 { 1844 int error = may_create(dir, dentry, NULL); 1845 1846 if (error) 1847 return error; 1848 1849 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 1850 return -EPERM; 1851 1852 if (!dir->i_op || !dir->i_op->mknod) 1853 return -EPERM; 1854 1855 error = security_inode_mknod(dir, dentry, mode, dev); 1856 if (error) 1857 return error; 1858 1859 DQUOT_INIT(dir); 1860 error = dir->i_op->mknod(dir, dentry, mode, dev); 1861 if (!error) 1862 fsnotify_create(dir, dentry); 1863 return error; 1864 } 1865 1866 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 1867 unsigned dev) 1868 { 1869 int error = 0; 1870 char * tmp; 1871 struct dentry * dentry; 1872 struct nameidata nd; 1873 1874 if (S_ISDIR(mode)) 1875 return -EPERM; 1876 tmp = getname(filename); 1877 if (IS_ERR(tmp)) 1878 return PTR_ERR(tmp); 1879 1880 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 1881 if (error) 1882 goto out; 1883 dentry = lookup_create(&nd, 0); 1884 error = PTR_ERR(dentry); 1885 1886 if (!IS_POSIXACL(nd.dentry->d_inode)) 1887 mode &= ~current->fs->umask; 1888 if (!IS_ERR(dentry)) { 1889 switch (mode & S_IFMT) { 1890 case 0: case S_IFREG: 1891 error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); 1892 break; 1893 case S_IFCHR: case S_IFBLK: 1894 error = vfs_mknod(nd.dentry->d_inode,dentry,mode, 1895 new_decode_dev(dev)); 1896 break; 1897 case S_IFIFO: case S_IFSOCK: 1898 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); 1899 break; 1900 case S_IFDIR: 1901 error = -EPERM; 1902 break; 1903 default: 1904 error = -EINVAL; 1905 } 1906 dput(dentry); 1907 } 1908 mutex_unlock(&nd.dentry->d_inode->i_mutex); 1909 path_release(&nd); 1910 out: 1911 putname(tmp); 1912 1913 return error; 1914 } 1915 1916 asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) 1917 { 1918 return sys_mknodat(AT_FDCWD, filename, mode, dev); 1919 } 1920 1921 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1922 { 1923 int error = may_create(dir, dentry, NULL); 1924 1925 if (error) 1926 return error; 1927 1928 if (!dir->i_op || !dir->i_op->mkdir) 1929 return -EPERM; 1930 1931 mode &= (S_IRWXUGO|S_ISVTX); 1932 error = security_inode_mkdir(dir, dentry, mode); 1933 if (error) 1934 return error; 1935 1936 DQUOT_INIT(dir); 1937 error = dir->i_op->mkdir(dir, dentry, mode); 1938 if (!error) 1939 fsnotify_mkdir(dir, dentry); 1940 return error; 1941 } 1942 1943 asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) 1944 { 1945 int error = 0; 1946 char * tmp; 1947 struct dentry *dentry; 1948 struct nameidata nd; 1949 1950 tmp = getname(pathname); 1951 error = PTR_ERR(tmp); 1952 if (IS_ERR(tmp)) 1953 goto out_err; 1954 1955 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 1956 if (error) 1957 goto out; 1958 dentry = lookup_create(&nd, 1); 1959 error = PTR_ERR(dentry); 1960 if (IS_ERR(dentry)) 1961 goto out_unlock; 1962 1963 if (!IS_POSIXACL(nd.dentry->d_inode)) 1964 mode &= ~current->fs->umask; 1965 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); 1966 dput(dentry); 1967 out_unlock: 1968 mutex_unlock(&nd.dentry->d_inode->i_mutex); 1969 path_release(&nd); 1970 out: 1971 putname(tmp); 1972 out_err: 1973 return error; 1974 } 1975 1976 asmlinkage long sys_mkdir(const char __user *pathname, int mode) 1977 { 1978 return sys_mkdirat(AT_FDCWD, pathname, mode); 1979 } 1980 1981 /* 1982 * We try to drop the dentry early: we should have 1983 * a usage count of 2 if we're the only user of this 1984 * dentry, and if that is true (possibly after pruning 1985 * the dcache), then we drop the dentry now. 1986 * 1987 * A low-level filesystem can, if it choses, legally 1988 * do a 1989 * 1990 * if (!d_unhashed(dentry)) 1991 * return -EBUSY; 1992 * 1993 * if it cannot handle the case of removing a directory 1994 * that is still in use by something else.. 1995 */ 1996 void dentry_unhash(struct dentry *dentry) 1997 { 1998 dget(dentry); 1999 if (atomic_read(&dentry->d_count)) 2000 shrink_dcache_parent(dentry); 2001 spin_lock(&dcache_lock); 2002 spin_lock(&dentry->d_lock); 2003 if (atomic_read(&dentry->d_count) == 2) 2004 __d_drop(dentry); 2005 spin_unlock(&dentry->d_lock); 2006 spin_unlock(&dcache_lock); 2007 } 2008 2009 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2010 { 2011 int error = may_delete(dir, dentry, 1); 2012 2013 if (error) 2014 return error; 2015 2016 if (!dir->i_op || !dir->i_op->rmdir) 2017 return -EPERM; 2018 2019 DQUOT_INIT(dir); 2020 2021 mutex_lock(&dentry->d_inode->i_mutex); 2022 dentry_unhash(dentry); 2023 if (d_mountpoint(dentry)) 2024 error = -EBUSY; 2025 else { 2026 error = security_inode_rmdir(dir, dentry); 2027 if (!error) { 2028 error = dir->i_op->rmdir(dir, dentry); 2029 if (!error) 2030 dentry->d_inode->i_flags |= S_DEAD; 2031 } 2032 } 2033 mutex_unlock(&dentry->d_inode->i_mutex); 2034 if (!error) { 2035 d_delete(dentry); 2036 } 2037 dput(dentry); 2038 2039 return error; 2040 } 2041 2042 static long do_rmdir(int dfd, const char __user *pathname) 2043 { 2044 int error = 0; 2045 char * name; 2046 struct dentry *dentry; 2047 struct nameidata nd; 2048 2049 name = getname(pathname); 2050 if(IS_ERR(name)) 2051 return PTR_ERR(name); 2052 2053 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2054 if (error) 2055 goto exit; 2056 2057 switch(nd.last_type) { 2058 case LAST_DOTDOT: 2059 error = -ENOTEMPTY; 2060 goto exit1; 2061 case LAST_DOT: 2062 error = -EINVAL; 2063 goto exit1; 2064 case LAST_ROOT: 2065 error = -EBUSY; 2066 goto exit1; 2067 } 2068 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2069 dentry = lookup_hash(&nd); 2070 error = PTR_ERR(dentry); 2071 if (IS_ERR(dentry)) 2072 goto exit2; 2073 error = vfs_rmdir(nd.dentry->d_inode, dentry); 2074 dput(dentry); 2075 exit2: 2076 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2077 exit1: 2078 path_release(&nd); 2079 exit: 2080 putname(name); 2081 return error; 2082 } 2083 2084 asmlinkage long sys_rmdir(const char __user *pathname) 2085 { 2086 return do_rmdir(AT_FDCWD, pathname); 2087 } 2088 2089 int vfs_unlink(struct inode *dir, struct dentry *dentry) 2090 { 2091 int error = may_delete(dir, dentry, 0); 2092 2093 if (error) 2094 return error; 2095 2096 if (!dir->i_op || !dir->i_op->unlink) 2097 return -EPERM; 2098 2099 DQUOT_INIT(dir); 2100 2101 mutex_lock(&dentry->d_inode->i_mutex); 2102 if (d_mountpoint(dentry)) 2103 error = -EBUSY; 2104 else { 2105 error = security_inode_unlink(dir, dentry); 2106 if (!error) 2107 error = dir->i_op->unlink(dir, dentry); 2108 } 2109 mutex_unlock(&dentry->d_inode->i_mutex); 2110 2111 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 2112 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 2113 d_delete(dentry); 2114 } 2115 2116 return error; 2117 } 2118 2119 /* 2120 * Make sure that the actual truncation of the file will occur outside its 2121 * directory's i_mutex. Truncate can take a long time if there is a lot of 2122 * writeout happening, and we don't want to prevent access to the directory 2123 * while waiting on the I/O. 2124 */ 2125 static long do_unlinkat(int dfd, const char __user *pathname) 2126 { 2127 int error = 0; 2128 char * name; 2129 struct dentry *dentry; 2130 struct nameidata nd; 2131 struct inode *inode = NULL; 2132 2133 name = getname(pathname); 2134 if(IS_ERR(name)) 2135 return PTR_ERR(name); 2136 2137 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2138 if (error) 2139 goto exit; 2140 error = -EISDIR; 2141 if (nd.last_type != LAST_NORM) 2142 goto exit1; 2143 mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2144 dentry = lookup_hash(&nd); 2145 error = PTR_ERR(dentry); 2146 if (!IS_ERR(dentry)) { 2147 /* Why not before? Because we want correct error value */ 2148 if (nd.last.name[nd.last.len]) 2149 goto slashes; 2150 inode = dentry->d_inode; 2151 if (inode) 2152 atomic_inc(&inode->i_count); 2153 error = vfs_unlink(nd.dentry->d_inode, dentry); 2154 exit2: 2155 dput(dentry); 2156 } 2157 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2158 if (inode) 2159 iput(inode); /* truncate the inode here */ 2160 exit1: 2161 path_release(&nd); 2162 exit: 2163 putname(name); 2164 return error; 2165 2166 slashes: 2167 error = !dentry->d_inode ? -ENOENT : 2168 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 2169 goto exit2; 2170 } 2171 2172 asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) 2173 { 2174 if ((flag & ~AT_REMOVEDIR) != 0) 2175 return -EINVAL; 2176 2177 if (flag & AT_REMOVEDIR) 2178 return do_rmdir(dfd, pathname); 2179 2180 return do_unlinkat(dfd, pathname); 2181 } 2182 2183 asmlinkage long sys_unlink(const char __user *pathname) 2184 { 2185 return do_unlinkat(AT_FDCWD, pathname); 2186 } 2187 2188 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2189 { 2190 int error = may_create(dir, dentry, NULL); 2191 2192 if (error) 2193 return error; 2194 2195 if (!dir->i_op || !dir->i_op->symlink) 2196 return -EPERM; 2197 2198 error = security_inode_symlink(dir, dentry, oldname); 2199 if (error) 2200 return error; 2201 2202 DQUOT_INIT(dir); 2203 error = dir->i_op->symlink(dir, dentry, oldname); 2204 if (!error) 2205 fsnotify_create(dir, dentry); 2206 return error; 2207 } 2208 2209 asmlinkage long sys_symlinkat(const char __user *oldname, 2210 int newdfd, const char __user *newname) 2211 { 2212 int error = 0; 2213 char * from; 2214 char * to; 2215 struct dentry *dentry; 2216 struct nameidata nd; 2217 2218 from = getname(oldname); 2219 if(IS_ERR(from)) 2220 return PTR_ERR(from); 2221 to = getname(newname); 2222 error = PTR_ERR(to); 2223 if (IS_ERR(to)) 2224 goto out_putname; 2225 2226 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2227 if (error) 2228 goto out; 2229 dentry = lookup_create(&nd, 0); 2230 error = PTR_ERR(dentry); 2231 if (IS_ERR(dentry)) 2232 goto out_unlock; 2233 2234 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); 2235 dput(dentry); 2236 out_unlock: 2237 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2238 path_release(&nd); 2239 out: 2240 putname(to); 2241 out_putname: 2242 putname(from); 2243 return error; 2244 } 2245 2246 asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname) 2247 { 2248 return sys_symlinkat(oldname, AT_FDCWD, newname); 2249 } 2250 2251 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2252 { 2253 struct inode *inode = old_dentry->d_inode; 2254 int error; 2255 2256 if (!inode) 2257 return -ENOENT; 2258 2259 error = may_create(dir, new_dentry, NULL); 2260 if (error) 2261 return error; 2262 2263 if (dir->i_sb != inode->i_sb) 2264 return -EXDEV; 2265 2266 /* 2267 * A link to an append-only or immutable file cannot be created. 2268 */ 2269 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2270 return -EPERM; 2271 if (!dir->i_op || !dir->i_op->link) 2272 return -EPERM; 2273 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2274 return -EPERM; 2275 2276 error = security_inode_link(old_dentry, dir, new_dentry); 2277 if (error) 2278 return error; 2279 2280 mutex_lock(&old_dentry->d_inode->i_mutex); 2281 DQUOT_INIT(dir); 2282 error = dir->i_op->link(old_dentry, dir, new_dentry); 2283 mutex_unlock(&old_dentry->d_inode->i_mutex); 2284 if (!error) 2285 fsnotify_create(dir, new_dentry); 2286 return error; 2287 } 2288 2289 /* 2290 * Hardlinks are often used in delicate situations. We avoid 2291 * security-related surprises by not following symlinks on the 2292 * newname. --KAB 2293 * 2294 * We don't follow them on the oldname either to be compatible 2295 * with linux 2.0, and to avoid hard-linking to directories 2296 * and other special files. --ADM 2297 */ 2298 asmlinkage long sys_linkat(int olddfd, const char __user *oldname, 2299 int newdfd, const char __user *newname, 2300 int flags) 2301 { 2302 struct dentry *new_dentry; 2303 struct nameidata nd, old_nd; 2304 int error; 2305 char * to; 2306 2307 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2308 return -EINVAL; 2309 2310 to = getname(newname); 2311 if (IS_ERR(to)) 2312 return PTR_ERR(to); 2313 2314 error = __user_walk_fd(olddfd, oldname, 2315 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, 2316 &old_nd); 2317 if (error) 2318 goto exit; 2319 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2320 if (error) 2321 goto out; 2322 error = -EXDEV; 2323 if (old_nd.mnt != nd.mnt) 2324 goto out_release; 2325 new_dentry = lookup_create(&nd, 0); 2326 error = PTR_ERR(new_dentry); 2327 if (IS_ERR(new_dentry)) 2328 goto out_unlock; 2329 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 2330 dput(new_dentry); 2331 out_unlock: 2332 mutex_unlock(&nd.dentry->d_inode->i_mutex); 2333 out_release: 2334 path_release(&nd); 2335 out: 2336 path_release(&old_nd); 2337 exit: 2338 putname(to); 2339 2340 return error; 2341 } 2342 2343 asmlinkage long sys_link(const char __user *oldname, const char __user *newname) 2344 { 2345 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 2346 } 2347 2348 /* 2349 * The worst of all namespace operations - renaming directory. "Perverted" 2350 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2351 * Problems: 2352 * a) we can get into loop creation. Check is done in is_subdir(). 2353 * b) race potential - two innocent renames can create a loop together. 2354 * That's where 4.4 screws up. Current fix: serialization on 2355 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 2356 * story. 2357 * c) we have to lock _three_ objects - parents and victim (if it exists). 2358 * And that - after we got ->i_mutex on parents (until then we don't know 2359 * whether the target exists). Solution: try to be smart with locking 2360 * order for inodes. We rely on the fact that tree topology may change 2361 * only under ->s_vfs_rename_mutex _and_ that parent of the object we 2362 * move will be locked. Thus we can rank directories by the tree 2363 * (ancestors first) and rank all non-directories after them. 2364 * That works since everybody except rename does "lock parent, lookup, 2365 * lock child" and rename is under ->s_vfs_rename_mutex. 2366 * HOWEVER, it relies on the assumption that any object with ->lookup() 2367 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2368 * we'd better make sure that there's no link(2) for them. 2369 * d) some filesystems don't support opened-but-unlinked directories, 2370 * either because of layout or because they are not ready to deal with 2371 * all cases correctly. The latter will be fixed (taking this sort of 2372 * stuff into VFS), but the former is not going away. Solution: the same 2373 * trick as in rmdir(). 2374 * e) conversion from fhandle to dentry may come in the wrong moment - when 2375 * we are removing the target. Solution: we will have to grab ->i_mutex 2376 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2377 * ->i_mutex on parents, which works but leads to some truely excessive 2378 * locking]. 2379 */ 2380 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2381 struct inode *new_dir, struct dentry *new_dentry) 2382 { 2383 int error = 0; 2384 struct inode *target; 2385 2386 /* 2387 * If we are going to change the parent - check write permissions, 2388 * we'll need to flip '..'. 2389 */ 2390 if (new_dir != old_dir) { 2391 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2392 if (error) 2393 return error; 2394 } 2395 2396 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2397 if (error) 2398 return error; 2399 2400 target = new_dentry->d_inode; 2401 if (target) { 2402 mutex_lock(&target->i_mutex); 2403 dentry_unhash(new_dentry); 2404 } 2405 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2406 error = -EBUSY; 2407 else 2408 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2409 if (target) { 2410 if (!error) 2411 target->i_flags |= S_DEAD; 2412 mutex_unlock(&target->i_mutex); 2413 if (d_unhashed(new_dentry)) 2414 d_rehash(new_dentry); 2415 dput(new_dentry); 2416 } 2417 if (!error) 2418 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2419 d_move(old_dentry,new_dentry); 2420 return error; 2421 } 2422 2423 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2424 struct inode *new_dir, struct dentry *new_dentry) 2425 { 2426 struct inode *target; 2427 int error; 2428 2429 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2430 if (error) 2431 return error; 2432 2433 dget(new_dentry); 2434 target = new_dentry->d_inode; 2435 if (target) 2436 mutex_lock(&target->i_mutex); 2437 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2438 error = -EBUSY; 2439 else 2440 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2441 if (!error) { 2442 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2443 d_move(old_dentry, new_dentry); 2444 } 2445 if (target) 2446 mutex_unlock(&target->i_mutex); 2447 dput(new_dentry); 2448 return error; 2449 } 2450 2451 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2452 struct inode *new_dir, struct dentry *new_dentry) 2453 { 2454 int error; 2455 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2456 const char *old_name; 2457 2458 if (old_dentry->d_inode == new_dentry->d_inode) 2459 return 0; 2460 2461 error = may_delete(old_dir, old_dentry, is_dir); 2462 if (error) 2463 return error; 2464 2465 if (!new_dentry->d_inode) 2466 error = may_create(new_dir, new_dentry, NULL); 2467 else 2468 error = may_delete(new_dir, new_dentry, is_dir); 2469 if (error) 2470 return error; 2471 2472 if (!old_dir->i_op || !old_dir->i_op->rename) 2473 return -EPERM; 2474 2475 DQUOT_INIT(old_dir); 2476 DQUOT_INIT(new_dir); 2477 2478 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2479 2480 if (is_dir) 2481 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2482 else 2483 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2484 if (!error) { 2485 const char *new_name = old_dentry->d_name.name; 2486 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, 2487 new_dentry->d_inode, old_dentry->d_inode); 2488 } 2489 fsnotify_oldname_free(old_name); 2490 2491 return error; 2492 } 2493 2494 static int do_rename(int olddfd, const char *oldname, 2495 int newdfd, const char *newname) 2496 { 2497 int error = 0; 2498 struct dentry * old_dir, * new_dir; 2499 struct dentry * old_dentry, *new_dentry; 2500 struct dentry * trap; 2501 struct nameidata oldnd, newnd; 2502 2503 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); 2504 if (error) 2505 goto exit; 2506 2507 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); 2508 if (error) 2509 goto exit1; 2510 2511 error = -EXDEV; 2512 if (oldnd.mnt != newnd.mnt) 2513 goto exit2; 2514 2515 old_dir = oldnd.dentry; 2516 error = -EBUSY; 2517 if (oldnd.last_type != LAST_NORM) 2518 goto exit2; 2519 2520 new_dir = newnd.dentry; 2521 if (newnd.last_type != LAST_NORM) 2522 goto exit2; 2523 2524 trap = lock_rename(new_dir, old_dir); 2525 2526 old_dentry = lookup_hash(&oldnd); 2527 error = PTR_ERR(old_dentry); 2528 if (IS_ERR(old_dentry)) 2529 goto exit3; 2530 /* source must exist */ 2531 error = -ENOENT; 2532 if (!old_dentry->d_inode) 2533 goto exit4; 2534 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2535 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2536 error = -ENOTDIR; 2537 if (oldnd.last.name[oldnd.last.len]) 2538 goto exit4; 2539 if (newnd.last.name[newnd.last.len]) 2540 goto exit4; 2541 } 2542 /* source should not be ancestor of target */ 2543 error = -EINVAL; 2544 if (old_dentry == trap) 2545 goto exit4; 2546 new_dentry = lookup_hash(&newnd); 2547 error = PTR_ERR(new_dentry); 2548 if (IS_ERR(new_dentry)) 2549 goto exit4; 2550 /* target should not be an ancestor of source */ 2551 error = -ENOTEMPTY; 2552 if (new_dentry == trap) 2553 goto exit5; 2554 2555 error = vfs_rename(old_dir->d_inode, old_dentry, 2556 new_dir->d_inode, new_dentry); 2557 exit5: 2558 dput(new_dentry); 2559 exit4: 2560 dput(old_dentry); 2561 exit3: 2562 unlock_rename(new_dir, old_dir); 2563 exit2: 2564 path_release(&newnd); 2565 exit1: 2566 path_release(&oldnd); 2567 exit: 2568 return error; 2569 } 2570 2571 asmlinkage long sys_renameat(int olddfd, const char __user *oldname, 2572 int newdfd, const char __user *newname) 2573 { 2574 int error; 2575 char * from; 2576 char * to; 2577 2578 from = getname(oldname); 2579 if(IS_ERR(from)) 2580 return PTR_ERR(from); 2581 to = getname(newname); 2582 error = PTR_ERR(to); 2583 if (!IS_ERR(to)) { 2584 error = do_rename(olddfd, from, newdfd, to); 2585 putname(to); 2586 } 2587 putname(from); 2588 return error; 2589 } 2590 2591 asmlinkage long sys_rename(const char __user *oldname, const char __user *newname) 2592 { 2593 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); 2594 } 2595 2596 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2597 { 2598 int len; 2599 2600 len = PTR_ERR(link); 2601 if (IS_ERR(link)) 2602 goto out; 2603 2604 len = strlen(link); 2605 if (len > (unsigned) buflen) 2606 len = buflen; 2607 if (copy_to_user(buffer, link, len)) 2608 len = -EFAULT; 2609 out: 2610 return len; 2611 } 2612 2613 /* 2614 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2615 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2616 * using) it for any given inode is up to filesystem. 2617 */ 2618 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2619 { 2620 struct nameidata nd; 2621 void *cookie; 2622 2623 nd.depth = 0; 2624 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 2625 if (!IS_ERR(cookie)) { 2626 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2627 if (dentry->d_inode->i_op->put_link) 2628 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 2629 cookie = ERR_PTR(res); 2630 } 2631 return PTR_ERR(cookie); 2632 } 2633 2634 int vfs_follow_link(struct nameidata *nd, const char *link) 2635 { 2636 return __vfs_follow_link(nd, link); 2637 } 2638 2639 /* get the link contents into pagecache */ 2640 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2641 { 2642 struct page * page; 2643 struct address_space *mapping = dentry->d_inode->i_mapping; 2644 page = read_mapping_page(mapping, 0, NULL); 2645 if (IS_ERR(page)) 2646 goto sync_fail; 2647 wait_on_page_locked(page); 2648 if (!PageUptodate(page)) 2649 goto async_fail; 2650 *ppage = page; 2651 return kmap(page); 2652 2653 async_fail: 2654 page_cache_release(page); 2655 return ERR_PTR(-EIO); 2656 2657 sync_fail: 2658 return (char*)page; 2659 } 2660 2661 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2662 { 2663 struct page *page = NULL; 2664 char *s = page_getlink(dentry, &page); 2665 int res = vfs_readlink(dentry,buffer,buflen,s); 2666 if (page) { 2667 kunmap(page); 2668 page_cache_release(page); 2669 } 2670 return res; 2671 } 2672 2673 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2674 { 2675 struct page *page = NULL; 2676 nd_set_link(nd, page_getlink(dentry, &page)); 2677 return page; 2678 } 2679 2680 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 2681 { 2682 struct page *page = cookie; 2683 2684 if (page) { 2685 kunmap(page); 2686 page_cache_release(page); 2687 } 2688 } 2689 2690 int __page_symlink(struct inode *inode, const char *symname, int len, 2691 gfp_t gfp_mask) 2692 { 2693 struct address_space *mapping = inode->i_mapping; 2694 struct page *page; 2695 int err = -ENOMEM; 2696 char *kaddr; 2697 2698 retry: 2699 page = find_or_create_page(mapping, 0, gfp_mask); 2700 if (!page) 2701 goto fail; 2702 err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); 2703 if (err == AOP_TRUNCATED_PAGE) { 2704 page_cache_release(page); 2705 goto retry; 2706 } 2707 if (err) 2708 goto fail_map; 2709 kaddr = kmap_atomic(page, KM_USER0); 2710 memcpy(kaddr, symname, len-1); 2711 kunmap_atomic(kaddr, KM_USER0); 2712 err = mapping->a_ops->commit_write(NULL, page, 0, len-1); 2713 if (err == AOP_TRUNCATED_PAGE) { 2714 page_cache_release(page); 2715 goto retry; 2716 } 2717 if (err) 2718 goto fail_map; 2719 /* 2720 * Notice that we are _not_ going to block here - end of page is 2721 * unmapped, so this will only try to map the rest of page, see 2722 * that it is unmapped (typically even will not look into inode - 2723 * ->i_size will be enough for everything) and zero it out. 2724 * OTOH it's obviously correct and should make the page up-to-date. 2725 */ 2726 if (!PageUptodate(page)) { 2727 err = mapping->a_ops->readpage(NULL, page); 2728 if (err != AOP_TRUNCATED_PAGE) 2729 wait_on_page_locked(page); 2730 } else { 2731 unlock_page(page); 2732 } 2733 page_cache_release(page); 2734 if (err < 0) 2735 goto fail; 2736 mark_inode_dirty(inode); 2737 return 0; 2738 fail_map: 2739 unlock_page(page); 2740 page_cache_release(page); 2741 fail: 2742 return err; 2743 } 2744 2745 int page_symlink(struct inode *inode, const char *symname, int len) 2746 { 2747 return __page_symlink(inode, symname, len, 2748 mapping_gfp_mask(inode->i_mapping)); 2749 } 2750 2751 struct inode_operations page_symlink_inode_operations = { 2752 .readlink = generic_readlink, 2753 .follow_link = page_follow_link_light, 2754 .put_link = page_put_link, 2755 }; 2756 2757 EXPORT_SYMBOL(__user_walk); 2758 EXPORT_SYMBOL(__user_walk_fd); 2759 EXPORT_SYMBOL(follow_down); 2760 EXPORT_SYMBOL(follow_up); 2761 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2762 EXPORT_SYMBOL(getname); 2763 EXPORT_SYMBOL(lock_rename); 2764 EXPORT_SYMBOL(lookup_one_len); 2765 EXPORT_SYMBOL(page_follow_link_light); 2766 EXPORT_SYMBOL(page_put_link); 2767 EXPORT_SYMBOL(page_readlink); 2768 EXPORT_SYMBOL(__page_symlink); 2769 EXPORT_SYMBOL(page_symlink); 2770 EXPORT_SYMBOL(page_symlink_inode_operations); 2771 EXPORT_SYMBOL(path_lookup); 2772 EXPORT_SYMBOL(path_release); 2773 EXPORT_SYMBOL(path_walk); 2774 EXPORT_SYMBOL(permission); 2775 EXPORT_SYMBOL(vfs_permission); 2776 EXPORT_SYMBOL(file_permission); 2777 EXPORT_SYMBOL(unlock_rename); 2778 EXPORT_SYMBOL(vfs_create); 2779 EXPORT_SYMBOL(vfs_follow_link); 2780 EXPORT_SYMBOL(vfs_link); 2781 EXPORT_SYMBOL(vfs_mkdir); 2782 EXPORT_SYMBOL(vfs_mknod); 2783 EXPORT_SYMBOL(generic_permission); 2784 EXPORT_SYMBOL(vfs_readlink); 2785 EXPORT_SYMBOL(vfs_rename); 2786 EXPORT_SYMBOL(vfs_rmdir); 2787 EXPORT_SYMBOL(vfs_symlink); 2788 EXPORT_SYMBOL(vfs_unlink); 2789 EXPORT_SYMBOL(dentry_unhash); 2790 EXPORT_SYMBOL(generic_readlink); 2791