1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/dnotify.h> 25 #include <linux/smp_lock.h> 26 #include <linux/personality.h> 27 #include <linux/security.h> 28 #include <linux/syscalls.h> 29 #include <linux/mount.h> 30 #include <linux/audit.h> 31 #include <asm/namei.h> 32 #include <asm/uaccess.h> 33 34 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 35 36 /* [Feb-1997 T. Schoebel-Theuer] 37 * Fundamental changes in the pathname lookup mechanisms (namei) 38 * were necessary because of omirr. The reason is that omirr needs 39 * to know the _real_ pathname, not the user-supplied one, in case 40 * of symlinks (and also when transname replacements occur). 41 * 42 * The new code replaces the old recursive symlink resolution with 43 * an iterative one (in case of non-nested symlink chains). It does 44 * this with calls to <fs>_follow_link(). 45 * As a side effect, dir_namei(), _namei() and follow_link() are now 46 * replaced with a single function lookup_dentry() that can handle all 47 * the special cases of the former code. 48 * 49 * With the new dcache, the pathname is stored at each inode, at least as 50 * long as the refcount of the inode is positive. As a side effect, the 51 * size of the dcache depends on the inode cache and thus is dynamic. 52 * 53 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 54 * resolution to correspond with current state of the code. 55 * 56 * Note that the symlink resolution is not *completely* iterative. 57 * There is still a significant amount of tail- and mid- recursion in 58 * the algorithm. Also, note that <fs>_readlink() is not used in 59 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 60 * may return different results than <fs>_follow_link(). Many virtual 61 * filesystems (including /proc) exhibit this behavior. 62 */ 63 64 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 65 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 66 * and the name already exists in form of a symlink, try to create the new 67 * name indicated by the symlink. The old code always complained that the 68 * name already exists, due to not following the symlink even if its target 69 * is nonexistent. The new semantics affects also mknod() and link() when 70 * the name is a symlink pointing to a non-existant name. 71 * 72 * I don't know which semantics is the right one, since I have no access 73 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 74 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 75 * "old" one. Personally, I think the new semantics is much more logical. 76 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 77 * file does succeed in both HP-UX and SunOs, but not in Solaris 78 * and in the old Linux semantics. 79 */ 80 81 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 82 * semantics. See the comments in "open_namei" and "do_link" below. 83 * 84 * [10-Sep-98 Alan Modra] Another symlink change. 85 */ 86 87 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 88 * inside the path - always follow. 89 * in the last component in creation/removal/renaming - never follow. 90 * if LOOKUP_FOLLOW passed - follow. 91 * if the pathname has trailing slashes - follow. 92 * otherwise - don't follow. 93 * (applied in that order). 94 * 95 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 96 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 97 * During the 2.4 we need to fix the userland stuff depending on it - 98 * hopefully we will be able to get rid of that wart in 2.5. So far only 99 * XEmacs seems to be relying on it... 100 */ 101 /* 102 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 103 * implemented. Let's see if raised priority of ->s_vfs_rename_sem gives 104 * any extra contention... 105 */ 106 107 /* In order to reduce some races, while at the same time doing additional 108 * checking and hopefully speeding things up, we copy filenames to the 109 * kernel data space before using them.. 110 * 111 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 112 * PATH_MAX includes the nul terminator --RR. 113 */ 114 static inline int do_getname(const char __user *filename, char *page) 115 { 116 int retval; 117 unsigned long len = PATH_MAX; 118 119 if (!segment_eq(get_fs(), KERNEL_DS)) { 120 if ((unsigned long) filename >= TASK_SIZE) 121 return -EFAULT; 122 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 123 len = TASK_SIZE - (unsigned long) filename; 124 } 125 126 retval = strncpy_from_user(page, filename, len); 127 if (retval > 0) { 128 if (retval < len) 129 return 0; 130 return -ENAMETOOLONG; 131 } else if (!retval) 132 retval = -ENOENT; 133 return retval; 134 } 135 136 char * getname(const char __user * filename) 137 { 138 char *tmp, *result; 139 140 result = ERR_PTR(-ENOMEM); 141 tmp = __getname(); 142 if (tmp) { 143 int retval = do_getname(filename, tmp); 144 145 result = tmp; 146 if (retval < 0) { 147 __putname(tmp); 148 result = ERR_PTR(retval); 149 } 150 } 151 audit_getname(result); 152 return result; 153 } 154 155 #ifdef CONFIG_AUDITSYSCALL 156 void putname(const char *name) 157 { 158 if (unlikely(current->audit_context)) 159 audit_putname(name); 160 else 161 __putname(name); 162 } 163 EXPORT_SYMBOL(putname); 164 #endif 165 166 167 /** 168 * generic_permission - check for access rights on a Posix-like filesystem 169 * @inode: inode to check access rights for 170 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 171 * @check_acl: optional callback to check for Posix ACLs 172 * 173 * Used to check for read/write/execute permissions on a file. 174 * We use "fsuid" for this, letting us set arbitrary permissions 175 * for filesystem access without changing the "normal" uids which 176 * are used for other things.. 177 */ 178 int generic_permission(struct inode *inode, int mask, 179 int (*check_acl)(struct inode *inode, int mask)) 180 { 181 umode_t mode = inode->i_mode; 182 183 if (current->fsuid == inode->i_uid) 184 mode >>= 6; 185 else { 186 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 187 int error = check_acl(inode, mask); 188 if (error == -EACCES) 189 goto check_capabilities; 190 else if (error != -EAGAIN) 191 return error; 192 } 193 194 if (in_group_p(inode->i_gid)) 195 mode >>= 3; 196 } 197 198 /* 199 * If the DACs are ok we don't need any capability check. 200 */ 201 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 202 return 0; 203 204 check_capabilities: 205 /* 206 * Read/write DACs are always overridable. 207 * Executable DACs are overridable if at least one exec bit is set. 208 */ 209 if (!(mask & MAY_EXEC) || 210 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 211 if (capable(CAP_DAC_OVERRIDE)) 212 return 0; 213 214 /* 215 * Searching includes executable on directories, else just read. 216 */ 217 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 218 if (capable(CAP_DAC_READ_SEARCH)) 219 return 0; 220 221 return -EACCES; 222 } 223 224 int permission(struct inode *inode, int mask, struct nameidata *nd) 225 { 226 int retval, submask; 227 228 if (mask & MAY_WRITE) { 229 umode_t mode = inode->i_mode; 230 231 /* 232 * Nobody gets write access to a read-only fs. 233 */ 234 if (IS_RDONLY(inode) && 235 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 236 return -EROFS; 237 238 /* 239 * Nobody gets write access to an immutable file. 240 */ 241 if (IS_IMMUTABLE(inode)) 242 return -EACCES; 243 } 244 245 246 /* Ordinary permission routines do not understand MAY_APPEND. */ 247 submask = mask & ~MAY_APPEND; 248 if (inode->i_op && inode->i_op->permission) 249 retval = inode->i_op->permission(inode, submask, nd); 250 else 251 retval = generic_permission(inode, submask, NULL); 252 if (retval) 253 return retval; 254 255 return security_inode_permission(inode, mask, nd); 256 } 257 258 /* 259 * get_write_access() gets write permission for a file. 260 * put_write_access() releases this write permission. 261 * This is used for regular files. 262 * We cannot support write (and maybe mmap read-write shared) accesses and 263 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 264 * can have the following values: 265 * 0: no writers, no VM_DENYWRITE mappings 266 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 267 * > 0: (i_writecount) users are writing to the file. 268 * 269 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 270 * except for the cases where we don't hold i_writecount yet. Then we need to 271 * use {get,deny}_write_access() - these functions check the sign and refuse 272 * to do the change if sign is wrong. Exclusion between them is provided by 273 * the inode->i_lock spinlock. 274 */ 275 276 int get_write_access(struct inode * inode) 277 { 278 spin_lock(&inode->i_lock); 279 if (atomic_read(&inode->i_writecount) < 0) { 280 spin_unlock(&inode->i_lock); 281 return -ETXTBSY; 282 } 283 atomic_inc(&inode->i_writecount); 284 spin_unlock(&inode->i_lock); 285 286 return 0; 287 } 288 289 int deny_write_access(struct file * file) 290 { 291 struct inode *inode = file->f_dentry->d_inode; 292 293 spin_lock(&inode->i_lock); 294 if (atomic_read(&inode->i_writecount) > 0) { 295 spin_unlock(&inode->i_lock); 296 return -ETXTBSY; 297 } 298 atomic_dec(&inode->i_writecount); 299 spin_unlock(&inode->i_lock); 300 301 return 0; 302 } 303 304 void path_release(struct nameidata *nd) 305 { 306 dput(nd->dentry); 307 mntput(nd->mnt); 308 } 309 310 /* 311 * umount() mustn't call path_release()/mntput() as that would clear 312 * mnt_expiry_mark 313 */ 314 void path_release_on_umount(struct nameidata *nd) 315 { 316 dput(nd->dentry); 317 _mntput(nd->mnt); 318 } 319 320 /* 321 * Internal lookup() using the new generic dcache. 322 * SMP-safe 323 */ 324 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 325 { 326 struct dentry * dentry = __d_lookup(parent, name); 327 328 /* lockess __d_lookup may fail due to concurrent d_move() 329 * in some unrelated directory, so try with d_lookup 330 */ 331 if (!dentry) 332 dentry = d_lookup(parent, name); 333 334 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 335 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { 336 dput(dentry); 337 dentry = NULL; 338 } 339 } 340 return dentry; 341 } 342 343 /* 344 * Short-cut version of permission(), for calling by 345 * path_walk(), when dcache lock is held. Combines parts 346 * of permission() and generic_permission(), and tests ONLY for 347 * MAY_EXEC permission. 348 * 349 * If appropriate, check DAC only. If not appropriate, or 350 * short-cut DAC fails, then call permission() to do more 351 * complete permission check. 352 */ 353 static inline int exec_permission_lite(struct inode *inode, 354 struct nameidata *nd) 355 { 356 umode_t mode = inode->i_mode; 357 358 if (inode->i_op && inode->i_op->permission) 359 return -EAGAIN; 360 361 if (current->fsuid == inode->i_uid) 362 mode >>= 6; 363 else if (in_group_p(inode->i_gid)) 364 mode >>= 3; 365 366 if (mode & MAY_EXEC) 367 goto ok; 368 369 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 370 goto ok; 371 372 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 373 goto ok; 374 375 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 376 goto ok; 377 378 return -EACCES; 379 ok: 380 return security_inode_permission(inode, MAY_EXEC, nd); 381 } 382 383 /* 384 * This is called when everything else fails, and we actually have 385 * to go to the low-level filesystem to find out what we should do.. 386 * 387 * We get the directory semaphore, and after getting that we also 388 * make sure that nobody added the entry to the dcache in the meantime.. 389 * SMP-safe 390 */ 391 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 392 { 393 struct dentry * result; 394 struct inode *dir = parent->d_inode; 395 396 down(&dir->i_sem); 397 /* 398 * First re-do the cached lookup just in case it was created 399 * while we waited for the directory semaphore.. 400 * 401 * FIXME! This could use version numbering or similar to 402 * avoid unnecessary cache lookups. 403 * 404 * The "dcache_lock" is purely to protect the RCU list walker 405 * from concurrent renames at this point (we mustn't get false 406 * negatives from the RCU list walk here, unlike the optimistic 407 * fast walk). 408 * 409 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 410 */ 411 result = d_lookup(parent, name); 412 if (!result) { 413 struct dentry * dentry = d_alloc(parent, name); 414 result = ERR_PTR(-ENOMEM); 415 if (dentry) { 416 result = dir->i_op->lookup(dir, dentry, nd); 417 if (result) 418 dput(dentry); 419 else 420 result = dentry; 421 } 422 up(&dir->i_sem); 423 return result; 424 } 425 426 /* 427 * Uhhuh! Nasty case: the cache was re-populated while 428 * we waited on the semaphore. Need to revalidate. 429 */ 430 up(&dir->i_sem); 431 if (result->d_op && result->d_op->d_revalidate) { 432 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { 433 dput(result); 434 result = ERR_PTR(-ENOENT); 435 } 436 } 437 return result; 438 } 439 440 static int __emul_lookup_dentry(const char *, struct nameidata *); 441 442 /* SMP-safe */ 443 static inline int 444 walk_init_root(const char *name, struct nameidata *nd) 445 { 446 read_lock(¤t->fs->lock); 447 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 448 nd->mnt = mntget(current->fs->altrootmnt); 449 nd->dentry = dget(current->fs->altroot); 450 read_unlock(¤t->fs->lock); 451 if (__emul_lookup_dentry(name,nd)) 452 return 0; 453 read_lock(¤t->fs->lock); 454 } 455 nd->mnt = mntget(current->fs->rootmnt); 456 nd->dentry = dget(current->fs->root); 457 read_unlock(¤t->fs->lock); 458 return 1; 459 } 460 461 static inline int __vfs_follow_link(struct nameidata *nd, const char *link) 462 { 463 int res = 0; 464 char *name; 465 if (IS_ERR(link)) 466 goto fail; 467 468 if (*link == '/') { 469 path_release(nd); 470 if (!walk_init_root(link, nd)) 471 /* weird __emul_prefix() stuff did it */ 472 goto out; 473 } 474 res = link_path_walk(link, nd); 475 out: 476 if (nd->depth || res || nd->last_type!=LAST_NORM) 477 return res; 478 /* 479 * If it is an iterative symlinks resolution in open_namei() we 480 * have to copy the last component. And all that crap because of 481 * bloody create() on broken symlinks. Furrfu... 482 */ 483 name = __getname(); 484 if (unlikely(!name)) { 485 path_release(nd); 486 return -ENOMEM; 487 } 488 strcpy(name, nd->last.name); 489 nd->last.name = name; 490 return 0; 491 fail: 492 path_release(nd); 493 return PTR_ERR(link); 494 } 495 496 static inline int __do_follow_link(struct dentry *dentry, struct nameidata *nd) 497 { 498 int error; 499 500 touch_atime(nd->mnt, dentry); 501 nd_set_link(nd, NULL); 502 error = dentry->d_inode->i_op->follow_link(dentry, nd); 503 if (!error) { 504 char *s = nd_get_link(nd); 505 if (s) 506 error = __vfs_follow_link(nd, s); 507 if (dentry->d_inode->i_op->put_link) 508 dentry->d_inode->i_op->put_link(dentry, nd); 509 } 510 511 return error; 512 } 513 514 /* 515 * This limits recursive symlink follows to 8, while 516 * limiting consecutive symlinks to 40. 517 * 518 * Without that kind of total limit, nasty chains of consecutive 519 * symlinks can cause almost arbitrarily long lookups. 520 */ 521 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) 522 { 523 int err = -ELOOP; 524 if (current->link_count >= MAX_NESTED_LINKS) 525 goto loop; 526 if (current->total_link_count >= 40) 527 goto loop; 528 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 529 cond_resched(); 530 err = security_inode_follow_link(dentry, nd); 531 if (err) 532 goto loop; 533 current->link_count++; 534 current->total_link_count++; 535 nd->depth++; 536 err = __do_follow_link(dentry, nd); 537 current->link_count--; 538 nd->depth--; 539 return err; 540 loop: 541 path_release(nd); 542 return err; 543 } 544 545 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 546 { 547 struct vfsmount *parent; 548 struct dentry *mountpoint; 549 spin_lock(&vfsmount_lock); 550 parent=(*mnt)->mnt_parent; 551 if (parent == *mnt) { 552 spin_unlock(&vfsmount_lock); 553 return 0; 554 } 555 mntget(parent); 556 mountpoint=dget((*mnt)->mnt_mountpoint); 557 spin_unlock(&vfsmount_lock); 558 dput(*dentry); 559 *dentry = mountpoint; 560 mntput(*mnt); 561 *mnt = parent; 562 return 1; 563 } 564 565 /* no need for dcache_lock, as serialization is taken care in 566 * namespace.c 567 */ 568 static int follow_mount(struct vfsmount **mnt, struct dentry **dentry) 569 { 570 int res = 0; 571 while (d_mountpoint(*dentry)) { 572 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 573 if (!mounted) 574 break; 575 mntput(*mnt); 576 *mnt = mounted; 577 dput(*dentry); 578 *dentry = dget(mounted->mnt_root); 579 res = 1; 580 } 581 return res; 582 } 583 584 /* no need for dcache_lock, as serialization is taken care in 585 * namespace.c 586 */ 587 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) 588 { 589 struct vfsmount *mounted; 590 591 mounted = lookup_mnt(*mnt, *dentry); 592 if (mounted) { 593 mntput(*mnt); 594 *mnt = mounted; 595 dput(*dentry); 596 *dentry = dget(mounted->mnt_root); 597 return 1; 598 } 599 return 0; 600 } 601 602 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 603 { 604 return __follow_down(mnt,dentry); 605 } 606 607 static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry) 608 { 609 while(1) { 610 struct vfsmount *parent; 611 struct dentry *old = *dentry; 612 613 read_lock(¤t->fs->lock); 614 if (*dentry == current->fs->root && 615 *mnt == current->fs->rootmnt) { 616 read_unlock(¤t->fs->lock); 617 break; 618 } 619 read_unlock(¤t->fs->lock); 620 spin_lock(&dcache_lock); 621 if (*dentry != (*mnt)->mnt_root) { 622 *dentry = dget((*dentry)->d_parent); 623 spin_unlock(&dcache_lock); 624 dput(old); 625 break; 626 } 627 spin_unlock(&dcache_lock); 628 spin_lock(&vfsmount_lock); 629 parent = (*mnt)->mnt_parent; 630 if (parent == *mnt) { 631 spin_unlock(&vfsmount_lock); 632 break; 633 } 634 mntget(parent); 635 *dentry = dget((*mnt)->mnt_mountpoint); 636 spin_unlock(&vfsmount_lock); 637 dput(old); 638 mntput(*mnt); 639 *mnt = parent; 640 } 641 follow_mount(mnt, dentry); 642 } 643 644 struct path { 645 struct vfsmount *mnt; 646 struct dentry *dentry; 647 }; 648 649 /* 650 * It's more convoluted than I'd like it to be, but... it's still fairly 651 * small and for now I'd prefer to have fast path as straight as possible. 652 * It _is_ time-critical. 653 */ 654 static int do_lookup(struct nameidata *nd, struct qstr *name, 655 struct path *path) 656 { 657 struct vfsmount *mnt = nd->mnt; 658 struct dentry *dentry = __d_lookup(nd->dentry, name); 659 660 if (!dentry) 661 goto need_lookup; 662 if (dentry->d_op && dentry->d_op->d_revalidate) 663 goto need_revalidate; 664 done: 665 path->mnt = mnt; 666 path->dentry = dentry; 667 return 0; 668 669 need_lookup: 670 dentry = real_lookup(nd->dentry, name, nd); 671 if (IS_ERR(dentry)) 672 goto fail; 673 goto done; 674 675 need_revalidate: 676 if (dentry->d_op->d_revalidate(dentry, nd)) 677 goto done; 678 if (d_invalidate(dentry)) 679 goto done; 680 dput(dentry); 681 goto need_lookup; 682 683 fail: 684 return PTR_ERR(dentry); 685 } 686 687 /* 688 * Name resolution. 689 * This is the basic name resolution function, turning a pathname into 690 * the final dentry. We expect 'base' to be positive and a directory. 691 * 692 * Returns 0 and nd will have valid dentry and mnt on success. 693 * Returns error and drops reference to input namei data on failure. 694 */ 695 static fastcall int __link_path_walk(const char * name, struct nameidata *nd) 696 { 697 struct path next; 698 struct inode *inode; 699 int err; 700 unsigned int lookup_flags = nd->flags; 701 702 while (*name=='/') 703 name++; 704 if (!*name) 705 goto return_reval; 706 707 inode = nd->dentry->d_inode; 708 if (nd->depth) 709 lookup_flags = LOOKUP_FOLLOW; 710 711 /* At this point we know we have a real path component. */ 712 for(;;) { 713 unsigned long hash; 714 struct qstr this; 715 unsigned int c; 716 717 err = exec_permission_lite(inode, nd); 718 if (err == -EAGAIN) { 719 err = permission(inode, MAY_EXEC, nd); 720 } 721 if (err) 722 break; 723 724 this.name = name; 725 c = *(const unsigned char *)name; 726 727 hash = init_name_hash(); 728 do { 729 name++; 730 hash = partial_name_hash(c, hash); 731 c = *(const unsigned char *)name; 732 } while (c && (c != '/')); 733 this.len = name - (const char *) this.name; 734 this.hash = end_name_hash(hash); 735 736 /* remove trailing slashes? */ 737 if (!c) 738 goto last_component; 739 while (*++name == '/'); 740 if (!*name) 741 goto last_with_slashes; 742 743 /* 744 * "." and ".." are special - ".." especially so because it has 745 * to be able to know about the current root directory and 746 * parent relationships. 747 */ 748 if (this.name[0] == '.') switch (this.len) { 749 default: 750 break; 751 case 2: 752 if (this.name[1] != '.') 753 break; 754 follow_dotdot(&nd->mnt, &nd->dentry); 755 inode = nd->dentry->d_inode; 756 /* fallthrough */ 757 case 1: 758 continue; 759 } 760 /* 761 * See if the low-level filesystem might want 762 * to use its own hash.. 763 */ 764 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 765 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 766 if (err < 0) 767 break; 768 } 769 nd->flags |= LOOKUP_CONTINUE; 770 /* This does the actual lookups.. */ 771 err = do_lookup(nd, &this, &next); 772 if (err) 773 break; 774 /* Check mountpoints.. */ 775 follow_mount(&next.mnt, &next.dentry); 776 777 err = -ENOENT; 778 inode = next.dentry->d_inode; 779 if (!inode) 780 goto out_dput; 781 err = -ENOTDIR; 782 if (!inode->i_op) 783 goto out_dput; 784 785 if (inode->i_op->follow_link) { 786 mntget(next.mnt); 787 err = do_follow_link(next.dentry, nd); 788 dput(next.dentry); 789 mntput(next.mnt); 790 if (err) 791 goto return_err; 792 err = -ENOENT; 793 inode = nd->dentry->d_inode; 794 if (!inode) 795 break; 796 err = -ENOTDIR; 797 if (!inode->i_op) 798 break; 799 } else { 800 dput(nd->dentry); 801 nd->mnt = next.mnt; 802 nd->dentry = next.dentry; 803 } 804 err = -ENOTDIR; 805 if (!inode->i_op->lookup) 806 break; 807 continue; 808 /* here ends the main loop */ 809 810 last_with_slashes: 811 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 812 last_component: 813 nd->flags &= ~LOOKUP_CONTINUE; 814 if (lookup_flags & LOOKUP_PARENT) 815 goto lookup_parent; 816 if (this.name[0] == '.') switch (this.len) { 817 default: 818 break; 819 case 2: 820 if (this.name[1] != '.') 821 break; 822 follow_dotdot(&nd->mnt, &nd->dentry); 823 inode = nd->dentry->d_inode; 824 /* fallthrough */ 825 case 1: 826 goto return_reval; 827 } 828 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 829 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 830 if (err < 0) 831 break; 832 } 833 err = do_lookup(nd, &this, &next); 834 if (err) 835 break; 836 follow_mount(&next.mnt, &next.dentry); 837 inode = next.dentry->d_inode; 838 if ((lookup_flags & LOOKUP_FOLLOW) 839 && inode && inode->i_op && inode->i_op->follow_link) { 840 mntget(next.mnt); 841 err = do_follow_link(next.dentry, nd); 842 dput(next.dentry); 843 mntput(next.mnt); 844 if (err) 845 goto return_err; 846 inode = nd->dentry->d_inode; 847 } else { 848 dput(nd->dentry); 849 nd->mnt = next.mnt; 850 nd->dentry = next.dentry; 851 } 852 err = -ENOENT; 853 if (!inode) 854 break; 855 if (lookup_flags & LOOKUP_DIRECTORY) { 856 err = -ENOTDIR; 857 if (!inode->i_op || !inode->i_op->lookup) 858 break; 859 } 860 goto return_base; 861 lookup_parent: 862 nd->last = this; 863 nd->last_type = LAST_NORM; 864 if (this.name[0] != '.') 865 goto return_base; 866 if (this.len == 1) 867 nd->last_type = LAST_DOT; 868 else if (this.len == 2 && this.name[1] == '.') 869 nd->last_type = LAST_DOTDOT; 870 else 871 goto return_base; 872 return_reval: 873 /* 874 * We bypassed the ordinary revalidation routines. 875 * We may need to check the cached dentry for staleness. 876 */ 877 if (nd->dentry && nd->dentry->d_sb && 878 (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 879 err = -ESTALE; 880 /* Note: we do not d_invalidate() */ 881 if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) 882 break; 883 } 884 return_base: 885 return 0; 886 out_dput: 887 dput(next.dentry); 888 break; 889 } 890 path_release(nd); 891 return_err: 892 return err; 893 } 894 895 /* 896 * Wrapper to retry pathname resolution whenever the underlying 897 * file system returns an ESTALE. 898 * 899 * Retry the whole path once, forcing real lookup requests 900 * instead of relying on the dcache. 901 */ 902 int fastcall link_path_walk(const char *name, struct nameidata *nd) 903 { 904 struct nameidata save = *nd; 905 int result; 906 907 /* make sure the stuff we saved doesn't go away */ 908 dget(save.dentry); 909 mntget(save.mnt); 910 911 result = __link_path_walk(name, nd); 912 if (result == -ESTALE) { 913 *nd = save; 914 dget(nd->dentry); 915 mntget(nd->mnt); 916 nd->flags |= LOOKUP_REVAL; 917 result = __link_path_walk(name, nd); 918 } 919 920 dput(save.dentry); 921 mntput(save.mnt); 922 923 return result; 924 } 925 926 int fastcall path_walk(const char * name, struct nameidata *nd) 927 { 928 current->total_link_count = 0; 929 return link_path_walk(name, nd); 930 } 931 932 /* 933 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 934 * everything is done. Returns 0 and drops input nd, if lookup failed; 935 */ 936 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 937 { 938 if (path_walk(name, nd)) 939 return 0; /* something went wrong... */ 940 941 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { 942 struct dentry *old_dentry = nd->dentry; 943 struct vfsmount *old_mnt = nd->mnt; 944 struct qstr last = nd->last; 945 int last_type = nd->last_type; 946 /* 947 * NAME was not found in alternate root or it's a directory. Try to find 948 * it in the normal root: 949 */ 950 nd->last_type = LAST_ROOT; 951 read_lock(¤t->fs->lock); 952 nd->mnt = mntget(current->fs->rootmnt); 953 nd->dentry = dget(current->fs->root); 954 read_unlock(¤t->fs->lock); 955 if (path_walk(name, nd) == 0) { 956 if (nd->dentry->d_inode) { 957 dput(old_dentry); 958 mntput(old_mnt); 959 return 1; 960 } 961 path_release(nd); 962 } 963 nd->dentry = old_dentry; 964 nd->mnt = old_mnt; 965 nd->last = last; 966 nd->last_type = last_type; 967 } 968 return 1; 969 } 970 971 void set_fs_altroot(void) 972 { 973 char *emul = __emul_prefix(); 974 struct nameidata nd; 975 struct vfsmount *mnt = NULL, *oldmnt; 976 struct dentry *dentry = NULL, *olddentry; 977 int err; 978 979 if (!emul) 980 goto set_it; 981 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 982 if (!err) { 983 mnt = nd.mnt; 984 dentry = nd.dentry; 985 } 986 set_it: 987 write_lock(¤t->fs->lock); 988 oldmnt = current->fs->altrootmnt; 989 olddentry = current->fs->altroot; 990 current->fs->altrootmnt = mnt; 991 current->fs->altroot = dentry; 992 write_unlock(¤t->fs->lock); 993 if (olddentry) { 994 dput(olddentry); 995 mntput(oldmnt); 996 } 997 } 998 999 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1000 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) 1001 { 1002 int retval = 0; 1003 1004 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1005 nd->flags = flags; 1006 nd->depth = 0; 1007 1008 read_lock(¤t->fs->lock); 1009 if (*name=='/') { 1010 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1011 nd->mnt = mntget(current->fs->altrootmnt); 1012 nd->dentry = dget(current->fs->altroot); 1013 read_unlock(¤t->fs->lock); 1014 if (__emul_lookup_dentry(name,nd)) 1015 goto out; /* found in altroot */ 1016 read_lock(¤t->fs->lock); 1017 } 1018 nd->mnt = mntget(current->fs->rootmnt); 1019 nd->dentry = dget(current->fs->root); 1020 } else { 1021 nd->mnt = mntget(current->fs->pwdmnt); 1022 nd->dentry = dget(current->fs->pwd); 1023 } 1024 read_unlock(¤t->fs->lock); 1025 current->total_link_count = 0; 1026 retval = link_path_walk(name, nd); 1027 out: 1028 if (unlikely(current->audit_context 1029 && nd && nd->dentry && nd->dentry->d_inode)) 1030 audit_inode(name, nd->dentry->d_inode); 1031 return retval; 1032 } 1033 1034 /* 1035 * Restricted form of lookup. Doesn't follow links, single-component only, 1036 * needs parent already locked. Doesn't follow mounts. 1037 * SMP-safe. 1038 */ 1039 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) 1040 { 1041 struct dentry * dentry; 1042 struct inode *inode; 1043 int err; 1044 1045 inode = base->d_inode; 1046 err = permission(inode, MAY_EXEC, nd); 1047 dentry = ERR_PTR(err); 1048 if (err) 1049 goto out; 1050 1051 /* 1052 * See if the low-level filesystem might want 1053 * to use its own hash.. 1054 */ 1055 if (base->d_op && base->d_op->d_hash) { 1056 err = base->d_op->d_hash(base, name); 1057 dentry = ERR_PTR(err); 1058 if (err < 0) 1059 goto out; 1060 } 1061 1062 dentry = cached_lookup(base, name, nd); 1063 if (!dentry) { 1064 struct dentry *new = d_alloc(base, name); 1065 dentry = ERR_PTR(-ENOMEM); 1066 if (!new) 1067 goto out; 1068 dentry = inode->i_op->lookup(inode, new, nd); 1069 if (!dentry) 1070 dentry = new; 1071 else 1072 dput(new); 1073 } 1074 out: 1075 return dentry; 1076 } 1077 1078 struct dentry * lookup_hash(struct qstr *name, struct dentry * base) 1079 { 1080 return __lookup_hash(name, base, NULL); 1081 } 1082 1083 /* SMP-safe */ 1084 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) 1085 { 1086 unsigned long hash; 1087 struct qstr this; 1088 unsigned int c; 1089 1090 this.name = name; 1091 this.len = len; 1092 if (!len) 1093 goto access; 1094 1095 hash = init_name_hash(); 1096 while (len--) { 1097 c = *(const unsigned char *)name++; 1098 if (c == '/' || c == '\0') 1099 goto access; 1100 hash = partial_name_hash(c, hash); 1101 } 1102 this.hash = end_name_hash(hash); 1103 1104 return lookup_hash(&this, base); 1105 access: 1106 return ERR_PTR(-EACCES); 1107 } 1108 1109 /* 1110 * namei() 1111 * 1112 * is used by most simple commands to get the inode of a specified name. 1113 * Open, link etc use their own routines, but this is enough for things 1114 * like 'chmod' etc. 1115 * 1116 * namei exists in two versions: namei/lnamei. The only difference is 1117 * that namei follows links, while lnamei does not. 1118 * SMP-safe 1119 */ 1120 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1121 { 1122 char *tmp = getname(name); 1123 int err = PTR_ERR(tmp); 1124 1125 if (!IS_ERR(tmp)) { 1126 err = path_lookup(tmp, flags, nd); 1127 putname(tmp); 1128 } 1129 return err; 1130 } 1131 1132 /* 1133 * It's inline, so penalty for filesystems that don't use sticky bit is 1134 * minimal. 1135 */ 1136 static inline int check_sticky(struct inode *dir, struct inode *inode) 1137 { 1138 if (!(dir->i_mode & S_ISVTX)) 1139 return 0; 1140 if (inode->i_uid == current->fsuid) 1141 return 0; 1142 if (dir->i_uid == current->fsuid) 1143 return 0; 1144 return !capable(CAP_FOWNER); 1145 } 1146 1147 /* 1148 * Check whether we can remove a link victim from directory dir, check 1149 * whether the type of victim is right. 1150 * 1. We can't do it if dir is read-only (done in permission()) 1151 * 2. We should have write and exec permissions on dir 1152 * 3. We can't remove anything from append-only dir 1153 * 4. We can't do anything with immutable dir (done in permission()) 1154 * 5. If the sticky bit on dir is set we should either 1155 * a. be owner of dir, or 1156 * b. be owner of victim, or 1157 * c. have CAP_FOWNER capability 1158 * 6. If the victim is append-only or immutable we can't do antyhing with 1159 * links pointing to it. 1160 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1161 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1162 * 9. We can't remove a root or mountpoint. 1163 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1164 * nfs_async_unlink(). 1165 */ 1166 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1167 { 1168 int error; 1169 1170 if (!victim->d_inode) 1171 return -ENOENT; 1172 1173 BUG_ON(victim->d_parent->d_inode != dir); 1174 1175 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1176 if (error) 1177 return error; 1178 if (IS_APPEND(dir)) 1179 return -EPERM; 1180 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1181 IS_IMMUTABLE(victim->d_inode)) 1182 return -EPERM; 1183 if (isdir) { 1184 if (!S_ISDIR(victim->d_inode->i_mode)) 1185 return -ENOTDIR; 1186 if (IS_ROOT(victim)) 1187 return -EBUSY; 1188 } else if (S_ISDIR(victim->d_inode->i_mode)) 1189 return -EISDIR; 1190 if (IS_DEADDIR(dir)) 1191 return -ENOENT; 1192 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1193 return -EBUSY; 1194 return 0; 1195 } 1196 1197 /* Check whether we can create an object with dentry child in directory 1198 * dir. 1199 * 1. We can't do it if child already exists (open has special treatment for 1200 * this case, but since we are inlined it's OK) 1201 * 2. We can't do it if dir is read-only (done in permission()) 1202 * 3. We should have write and exec permissions on dir 1203 * 4. We can't do it if dir is immutable (done in permission()) 1204 */ 1205 static inline int may_create(struct inode *dir, struct dentry *child, 1206 struct nameidata *nd) 1207 { 1208 if (child->d_inode) 1209 return -EEXIST; 1210 if (IS_DEADDIR(dir)) 1211 return -ENOENT; 1212 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1213 } 1214 1215 /* 1216 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security 1217 * reasons. 1218 * 1219 * O_DIRECTORY translates into forcing a directory lookup. 1220 */ 1221 static inline int lookup_flags(unsigned int f) 1222 { 1223 unsigned long retval = LOOKUP_FOLLOW; 1224 1225 if (f & O_NOFOLLOW) 1226 retval &= ~LOOKUP_FOLLOW; 1227 1228 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1229 retval &= ~LOOKUP_FOLLOW; 1230 1231 if (f & O_DIRECTORY) 1232 retval |= LOOKUP_DIRECTORY; 1233 1234 return retval; 1235 } 1236 1237 /* 1238 * p1 and p2 should be directories on the same fs. 1239 */ 1240 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1241 { 1242 struct dentry *p; 1243 1244 if (p1 == p2) { 1245 down(&p1->d_inode->i_sem); 1246 return NULL; 1247 } 1248 1249 down(&p1->d_inode->i_sb->s_vfs_rename_sem); 1250 1251 for (p = p1; p->d_parent != p; p = p->d_parent) { 1252 if (p->d_parent == p2) { 1253 down(&p2->d_inode->i_sem); 1254 down(&p1->d_inode->i_sem); 1255 return p; 1256 } 1257 } 1258 1259 for (p = p2; p->d_parent != p; p = p->d_parent) { 1260 if (p->d_parent == p1) { 1261 down(&p1->d_inode->i_sem); 1262 down(&p2->d_inode->i_sem); 1263 return p; 1264 } 1265 } 1266 1267 down(&p1->d_inode->i_sem); 1268 down(&p2->d_inode->i_sem); 1269 return NULL; 1270 } 1271 1272 void unlock_rename(struct dentry *p1, struct dentry *p2) 1273 { 1274 up(&p1->d_inode->i_sem); 1275 if (p1 != p2) { 1276 up(&p2->d_inode->i_sem); 1277 up(&p1->d_inode->i_sb->s_vfs_rename_sem); 1278 } 1279 } 1280 1281 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1282 struct nameidata *nd) 1283 { 1284 int error = may_create(dir, dentry, nd); 1285 1286 if (error) 1287 return error; 1288 1289 if (!dir->i_op || !dir->i_op->create) 1290 return -EACCES; /* shouldn't it be ENOSYS? */ 1291 mode &= S_IALLUGO; 1292 mode |= S_IFREG; 1293 error = security_inode_create(dir, dentry, mode); 1294 if (error) 1295 return error; 1296 DQUOT_INIT(dir); 1297 error = dir->i_op->create(dir, dentry, mode, nd); 1298 if (!error) { 1299 inode_dir_notify(dir, DN_CREATE); 1300 security_inode_post_create(dir, dentry, mode); 1301 } 1302 return error; 1303 } 1304 1305 int may_open(struct nameidata *nd, int acc_mode, int flag) 1306 { 1307 struct dentry *dentry = nd->dentry; 1308 struct inode *inode = dentry->d_inode; 1309 int error; 1310 1311 if (!inode) 1312 return -ENOENT; 1313 1314 if (S_ISLNK(inode->i_mode)) 1315 return -ELOOP; 1316 1317 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1318 return -EISDIR; 1319 1320 error = permission(inode, acc_mode, nd); 1321 if (error) 1322 return error; 1323 1324 /* 1325 * FIFO's, sockets and device files are special: they don't 1326 * actually live on the filesystem itself, and as such you 1327 * can write to them even if the filesystem is read-only. 1328 */ 1329 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1330 flag &= ~O_TRUNC; 1331 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1332 if (nd->mnt->mnt_flags & MNT_NODEV) 1333 return -EACCES; 1334 1335 flag &= ~O_TRUNC; 1336 } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) 1337 return -EROFS; 1338 /* 1339 * An append-only file must be opened in append mode for writing. 1340 */ 1341 if (IS_APPEND(inode)) { 1342 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1343 return -EPERM; 1344 if (flag & O_TRUNC) 1345 return -EPERM; 1346 } 1347 1348 /* O_NOATIME can only be set by the owner or superuser */ 1349 if (flag & O_NOATIME) 1350 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) 1351 return -EPERM; 1352 1353 /* 1354 * Ensure there are no outstanding leases on the file. 1355 */ 1356 error = break_lease(inode, flag); 1357 if (error) 1358 return error; 1359 1360 if (flag & O_TRUNC) { 1361 error = get_write_access(inode); 1362 if (error) 1363 return error; 1364 1365 /* 1366 * Refuse to truncate files with mandatory locks held on them. 1367 */ 1368 error = locks_verify_locked(inode); 1369 if (!error) { 1370 DQUOT_INIT(inode); 1371 1372 error = do_truncate(dentry, 0); 1373 } 1374 put_write_access(inode); 1375 if (error) 1376 return error; 1377 } else 1378 if (flag & FMODE_WRITE) 1379 DQUOT_INIT(inode); 1380 1381 return 0; 1382 } 1383 1384 /* 1385 * open_namei() 1386 * 1387 * namei for open - this is in fact almost the whole open-routine. 1388 * 1389 * Note that the low bits of "flag" aren't the same as in the open 1390 * system call - they are 00 - no permissions needed 1391 * 01 - read permission needed 1392 * 10 - write permission needed 1393 * 11 - read/write permissions needed 1394 * which is a lot more logical, and also allows the "no perm" needed 1395 * for symlinks (where the permissions are checked later). 1396 * SMP-safe 1397 */ 1398 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) 1399 { 1400 int acc_mode, error = 0; 1401 struct dentry *dentry; 1402 struct dentry *dir; 1403 int count = 0; 1404 1405 acc_mode = ACC_MODE(flag); 1406 1407 /* Allow the LSM permission hook to distinguish append 1408 access from general write access. */ 1409 if (flag & O_APPEND) 1410 acc_mode |= MAY_APPEND; 1411 1412 /* Fill in the open() intent data */ 1413 nd->intent.open.flags = flag; 1414 nd->intent.open.create_mode = mode; 1415 1416 /* 1417 * The simplest case - just a plain lookup. 1418 */ 1419 if (!(flag & O_CREAT)) { 1420 error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); 1421 if (error) 1422 return error; 1423 goto ok; 1424 } 1425 1426 /* 1427 * Create - we need to know the parent. 1428 */ 1429 error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); 1430 if (error) 1431 return error; 1432 1433 /* 1434 * We have the parent and last component. First of all, check 1435 * that we are not asked to creat(2) an obvious directory - that 1436 * will not do. 1437 */ 1438 error = -EISDIR; 1439 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1440 goto exit; 1441 1442 dir = nd->dentry; 1443 nd->flags &= ~LOOKUP_PARENT; 1444 down(&dir->d_inode->i_sem); 1445 dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1446 1447 do_last: 1448 error = PTR_ERR(dentry); 1449 if (IS_ERR(dentry)) { 1450 up(&dir->d_inode->i_sem); 1451 goto exit; 1452 } 1453 1454 /* Negative dentry, just create the file */ 1455 if (!dentry->d_inode) { 1456 if (!IS_POSIXACL(dir->d_inode)) 1457 mode &= ~current->fs->umask; 1458 error = vfs_create(dir->d_inode, dentry, mode, nd); 1459 up(&dir->d_inode->i_sem); 1460 dput(nd->dentry); 1461 nd->dentry = dentry; 1462 if (error) 1463 goto exit; 1464 /* Don't check for write permission, don't truncate */ 1465 acc_mode = 0; 1466 flag &= ~O_TRUNC; 1467 goto ok; 1468 } 1469 1470 /* 1471 * It already exists. 1472 */ 1473 up(&dir->d_inode->i_sem); 1474 1475 error = -EEXIST; 1476 if (flag & O_EXCL) 1477 goto exit_dput; 1478 1479 if (d_mountpoint(dentry)) { 1480 error = -ELOOP; 1481 if (flag & O_NOFOLLOW) 1482 goto exit_dput; 1483 while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); 1484 } 1485 error = -ENOENT; 1486 if (!dentry->d_inode) 1487 goto exit_dput; 1488 if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) 1489 goto do_link; 1490 1491 dput(nd->dentry); 1492 nd->dentry = dentry; 1493 error = -EISDIR; 1494 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) 1495 goto exit; 1496 ok: 1497 error = may_open(nd, acc_mode, flag); 1498 if (error) 1499 goto exit; 1500 return 0; 1501 1502 exit_dput: 1503 dput(dentry); 1504 exit: 1505 path_release(nd); 1506 return error; 1507 1508 do_link: 1509 error = -ELOOP; 1510 if (flag & O_NOFOLLOW) 1511 goto exit_dput; 1512 /* 1513 * This is subtle. Instead of calling do_follow_link() we do the 1514 * thing by hands. The reason is that this way we have zero link_count 1515 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1516 * After that we have the parent and last component, i.e. 1517 * we are in the same situation as after the first path_walk(). 1518 * Well, almost - if the last component is normal we get its copy 1519 * stored in nd->last.name and we will have to putname() it when we 1520 * are done. Procfs-like symlinks just set LAST_BIND. 1521 */ 1522 nd->flags |= LOOKUP_PARENT; 1523 error = security_inode_follow_link(dentry, nd); 1524 if (error) 1525 goto exit_dput; 1526 error = __do_follow_link(dentry, nd); 1527 dput(dentry); 1528 if (error) 1529 return error; 1530 nd->flags &= ~LOOKUP_PARENT; 1531 if (nd->last_type == LAST_BIND) { 1532 dentry = nd->dentry; 1533 goto ok; 1534 } 1535 error = -EISDIR; 1536 if (nd->last_type != LAST_NORM) 1537 goto exit; 1538 if (nd->last.name[nd->last.len]) { 1539 putname(nd->last.name); 1540 goto exit; 1541 } 1542 error = -ELOOP; 1543 if (count++==32) { 1544 putname(nd->last.name); 1545 goto exit; 1546 } 1547 dir = nd->dentry; 1548 down(&dir->d_inode->i_sem); 1549 dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1550 putname(nd->last.name); 1551 goto do_last; 1552 } 1553 1554 /** 1555 * lookup_create - lookup a dentry, creating it if it doesn't exist 1556 * @nd: nameidata info 1557 * @is_dir: directory flag 1558 * 1559 * Simple function to lookup and return a dentry and create it 1560 * if it doesn't exist. Is SMP-safe. 1561 */ 1562 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1563 { 1564 struct dentry *dentry; 1565 1566 down(&nd->dentry->d_inode->i_sem); 1567 dentry = ERR_PTR(-EEXIST); 1568 if (nd->last_type != LAST_NORM) 1569 goto fail; 1570 nd->flags &= ~LOOKUP_PARENT; 1571 dentry = lookup_hash(&nd->last, nd->dentry); 1572 if (IS_ERR(dentry)) 1573 goto fail; 1574 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1575 goto enoent; 1576 return dentry; 1577 enoent: 1578 dput(dentry); 1579 dentry = ERR_PTR(-ENOENT); 1580 fail: 1581 return dentry; 1582 } 1583 1584 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1585 { 1586 int error = may_create(dir, dentry, NULL); 1587 1588 if (error) 1589 return error; 1590 1591 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 1592 return -EPERM; 1593 1594 if (!dir->i_op || !dir->i_op->mknod) 1595 return -EPERM; 1596 1597 error = security_inode_mknod(dir, dentry, mode, dev); 1598 if (error) 1599 return error; 1600 1601 DQUOT_INIT(dir); 1602 error = dir->i_op->mknod(dir, dentry, mode, dev); 1603 if (!error) { 1604 inode_dir_notify(dir, DN_CREATE); 1605 security_inode_post_mknod(dir, dentry, mode, dev); 1606 } 1607 return error; 1608 } 1609 1610 asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) 1611 { 1612 int error = 0; 1613 char * tmp; 1614 struct dentry * dentry; 1615 struct nameidata nd; 1616 1617 if (S_ISDIR(mode)) 1618 return -EPERM; 1619 tmp = getname(filename); 1620 if (IS_ERR(tmp)) 1621 return PTR_ERR(tmp); 1622 1623 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1624 if (error) 1625 goto out; 1626 dentry = lookup_create(&nd, 0); 1627 error = PTR_ERR(dentry); 1628 1629 if (!IS_POSIXACL(nd.dentry->d_inode)) 1630 mode &= ~current->fs->umask; 1631 if (!IS_ERR(dentry)) { 1632 switch (mode & S_IFMT) { 1633 case 0: case S_IFREG: 1634 error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); 1635 break; 1636 case S_IFCHR: case S_IFBLK: 1637 error = vfs_mknod(nd.dentry->d_inode,dentry,mode, 1638 new_decode_dev(dev)); 1639 break; 1640 case S_IFIFO: case S_IFSOCK: 1641 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); 1642 break; 1643 case S_IFDIR: 1644 error = -EPERM; 1645 break; 1646 default: 1647 error = -EINVAL; 1648 } 1649 dput(dentry); 1650 } 1651 up(&nd.dentry->d_inode->i_sem); 1652 path_release(&nd); 1653 out: 1654 putname(tmp); 1655 1656 return error; 1657 } 1658 1659 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1660 { 1661 int error = may_create(dir, dentry, NULL); 1662 1663 if (error) 1664 return error; 1665 1666 if (!dir->i_op || !dir->i_op->mkdir) 1667 return -EPERM; 1668 1669 mode &= (S_IRWXUGO|S_ISVTX); 1670 error = security_inode_mkdir(dir, dentry, mode); 1671 if (error) 1672 return error; 1673 1674 DQUOT_INIT(dir); 1675 error = dir->i_op->mkdir(dir, dentry, mode); 1676 if (!error) { 1677 inode_dir_notify(dir, DN_CREATE); 1678 security_inode_post_mkdir(dir,dentry, mode); 1679 } 1680 return error; 1681 } 1682 1683 asmlinkage long sys_mkdir(const char __user * pathname, int mode) 1684 { 1685 int error = 0; 1686 char * tmp; 1687 1688 tmp = getname(pathname); 1689 error = PTR_ERR(tmp); 1690 if (!IS_ERR(tmp)) { 1691 struct dentry *dentry; 1692 struct nameidata nd; 1693 1694 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1695 if (error) 1696 goto out; 1697 dentry = lookup_create(&nd, 1); 1698 error = PTR_ERR(dentry); 1699 if (!IS_ERR(dentry)) { 1700 if (!IS_POSIXACL(nd.dentry->d_inode)) 1701 mode &= ~current->fs->umask; 1702 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); 1703 dput(dentry); 1704 } 1705 up(&nd.dentry->d_inode->i_sem); 1706 path_release(&nd); 1707 out: 1708 putname(tmp); 1709 } 1710 1711 return error; 1712 } 1713 1714 /* 1715 * We try to drop the dentry early: we should have 1716 * a usage count of 2 if we're the only user of this 1717 * dentry, and if that is true (possibly after pruning 1718 * the dcache), then we drop the dentry now. 1719 * 1720 * A low-level filesystem can, if it choses, legally 1721 * do a 1722 * 1723 * if (!d_unhashed(dentry)) 1724 * return -EBUSY; 1725 * 1726 * if it cannot handle the case of removing a directory 1727 * that is still in use by something else.. 1728 */ 1729 void dentry_unhash(struct dentry *dentry) 1730 { 1731 dget(dentry); 1732 if (atomic_read(&dentry->d_count)) 1733 shrink_dcache_parent(dentry); 1734 spin_lock(&dcache_lock); 1735 spin_lock(&dentry->d_lock); 1736 if (atomic_read(&dentry->d_count) == 2) 1737 __d_drop(dentry); 1738 spin_unlock(&dentry->d_lock); 1739 spin_unlock(&dcache_lock); 1740 } 1741 1742 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 1743 { 1744 int error = may_delete(dir, dentry, 1); 1745 1746 if (error) 1747 return error; 1748 1749 if (!dir->i_op || !dir->i_op->rmdir) 1750 return -EPERM; 1751 1752 DQUOT_INIT(dir); 1753 1754 down(&dentry->d_inode->i_sem); 1755 dentry_unhash(dentry); 1756 if (d_mountpoint(dentry)) 1757 error = -EBUSY; 1758 else { 1759 error = security_inode_rmdir(dir, dentry); 1760 if (!error) { 1761 error = dir->i_op->rmdir(dir, dentry); 1762 if (!error) 1763 dentry->d_inode->i_flags |= S_DEAD; 1764 } 1765 } 1766 up(&dentry->d_inode->i_sem); 1767 if (!error) { 1768 inode_dir_notify(dir, DN_DELETE); 1769 d_delete(dentry); 1770 } 1771 dput(dentry); 1772 1773 return error; 1774 } 1775 1776 asmlinkage long sys_rmdir(const char __user * pathname) 1777 { 1778 int error = 0; 1779 char * name; 1780 struct dentry *dentry; 1781 struct nameidata nd; 1782 1783 name = getname(pathname); 1784 if(IS_ERR(name)) 1785 return PTR_ERR(name); 1786 1787 error = path_lookup(name, LOOKUP_PARENT, &nd); 1788 if (error) 1789 goto exit; 1790 1791 switch(nd.last_type) { 1792 case LAST_DOTDOT: 1793 error = -ENOTEMPTY; 1794 goto exit1; 1795 case LAST_DOT: 1796 error = -EINVAL; 1797 goto exit1; 1798 case LAST_ROOT: 1799 error = -EBUSY; 1800 goto exit1; 1801 } 1802 down(&nd.dentry->d_inode->i_sem); 1803 dentry = lookup_hash(&nd.last, nd.dentry); 1804 error = PTR_ERR(dentry); 1805 if (!IS_ERR(dentry)) { 1806 error = vfs_rmdir(nd.dentry->d_inode, dentry); 1807 dput(dentry); 1808 } 1809 up(&nd.dentry->d_inode->i_sem); 1810 exit1: 1811 path_release(&nd); 1812 exit: 1813 putname(name); 1814 return error; 1815 } 1816 1817 int vfs_unlink(struct inode *dir, struct dentry *dentry) 1818 { 1819 int error = may_delete(dir, dentry, 0); 1820 1821 if (error) 1822 return error; 1823 1824 if (!dir->i_op || !dir->i_op->unlink) 1825 return -EPERM; 1826 1827 DQUOT_INIT(dir); 1828 1829 down(&dentry->d_inode->i_sem); 1830 if (d_mountpoint(dentry)) 1831 error = -EBUSY; 1832 else { 1833 error = security_inode_unlink(dir, dentry); 1834 if (!error) 1835 error = dir->i_op->unlink(dir, dentry); 1836 } 1837 up(&dentry->d_inode->i_sem); 1838 1839 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 1840 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 1841 d_delete(dentry); 1842 inode_dir_notify(dir, DN_DELETE); 1843 } 1844 return error; 1845 } 1846 1847 /* 1848 * Make sure that the actual truncation of the file will occur outside its 1849 * directory's i_sem. Truncate can take a long time if there is a lot of 1850 * writeout happening, and we don't want to prevent access to the directory 1851 * while waiting on the I/O. 1852 */ 1853 asmlinkage long sys_unlink(const char __user * pathname) 1854 { 1855 int error = 0; 1856 char * name; 1857 struct dentry *dentry; 1858 struct nameidata nd; 1859 struct inode *inode = NULL; 1860 1861 name = getname(pathname); 1862 if(IS_ERR(name)) 1863 return PTR_ERR(name); 1864 1865 error = path_lookup(name, LOOKUP_PARENT, &nd); 1866 if (error) 1867 goto exit; 1868 error = -EISDIR; 1869 if (nd.last_type != LAST_NORM) 1870 goto exit1; 1871 down(&nd.dentry->d_inode->i_sem); 1872 dentry = lookup_hash(&nd.last, nd.dentry); 1873 error = PTR_ERR(dentry); 1874 if (!IS_ERR(dentry)) { 1875 /* Why not before? Because we want correct error value */ 1876 if (nd.last.name[nd.last.len]) 1877 goto slashes; 1878 inode = dentry->d_inode; 1879 if (inode) 1880 atomic_inc(&inode->i_count); 1881 error = vfs_unlink(nd.dentry->d_inode, dentry); 1882 exit2: 1883 dput(dentry); 1884 } 1885 up(&nd.dentry->d_inode->i_sem); 1886 if (inode) 1887 iput(inode); /* truncate the inode here */ 1888 exit1: 1889 path_release(&nd); 1890 exit: 1891 putname(name); 1892 return error; 1893 1894 slashes: 1895 error = !dentry->d_inode ? -ENOENT : 1896 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 1897 goto exit2; 1898 } 1899 1900 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 1901 { 1902 int error = may_create(dir, dentry, NULL); 1903 1904 if (error) 1905 return error; 1906 1907 if (!dir->i_op || !dir->i_op->symlink) 1908 return -EPERM; 1909 1910 error = security_inode_symlink(dir, dentry, oldname); 1911 if (error) 1912 return error; 1913 1914 DQUOT_INIT(dir); 1915 error = dir->i_op->symlink(dir, dentry, oldname); 1916 if (!error) { 1917 inode_dir_notify(dir, DN_CREATE); 1918 security_inode_post_symlink(dir, dentry, oldname); 1919 } 1920 return error; 1921 } 1922 1923 asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname) 1924 { 1925 int error = 0; 1926 char * from; 1927 char * to; 1928 1929 from = getname(oldname); 1930 if(IS_ERR(from)) 1931 return PTR_ERR(from); 1932 to = getname(newname); 1933 error = PTR_ERR(to); 1934 if (!IS_ERR(to)) { 1935 struct dentry *dentry; 1936 struct nameidata nd; 1937 1938 error = path_lookup(to, LOOKUP_PARENT, &nd); 1939 if (error) 1940 goto out; 1941 dentry = lookup_create(&nd, 0); 1942 error = PTR_ERR(dentry); 1943 if (!IS_ERR(dentry)) { 1944 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); 1945 dput(dentry); 1946 } 1947 up(&nd.dentry->d_inode->i_sem); 1948 path_release(&nd); 1949 out: 1950 putname(to); 1951 } 1952 putname(from); 1953 return error; 1954 } 1955 1956 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 1957 { 1958 struct inode *inode = old_dentry->d_inode; 1959 int error; 1960 1961 if (!inode) 1962 return -ENOENT; 1963 1964 error = may_create(dir, new_dentry, NULL); 1965 if (error) 1966 return error; 1967 1968 if (dir->i_sb != inode->i_sb) 1969 return -EXDEV; 1970 1971 /* 1972 * A link to an append-only or immutable file cannot be created. 1973 */ 1974 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1975 return -EPERM; 1976 if (!dir->i_op || !dir->i_op->link) 1977 return -EPERM; 1978 if (S_ISDIR(old_dentry->d_inode->i_mode)) 1979 return -EPERM; 1980 1981 error = security_inode_link(old_dentry, dir, new_dentry); 1982 if (error) 1983 return error; 1984 1985 down(&old_dentry->d_inode->i_sem); 1986 DQUOT_INIT(dir); 1987 error = dir->i_op->link(old_dentry, dir, new_dentry); 1988 up(&old_dentry->d_inode->i_sem); 1989 if (!error) { 1990 inode_dir_notify(dir, DN_CREATE); 1991 security_inode_post_link(old_dentry, dir, new_dentry); 1992 } 1993 return error; 1994 } 1995 1996 /* 1997 * Hardlinks are often used in delicate situations. We avoid 1998 * security-related surprises by not following symlinks on the 1999 * newname. --KAB 2000 * 2001 * We don't follow them on the oldname either to be compatible 2002 * with linux 2.0, and to avoid hard-linking to directories 2003 * and other special files. --ADM 2004 */ 2005 asmlinkage long sys_link(const char __user * oldname, const char __user * newname) 2006 { 2007 struct dentry *new_dentry; 2008 struct nameidata nd, old_nd; 2009 int error; 2010 char * to; 2011 2012 to = getname(newname); 2013 if (IS_ERR(to)) 2014 return PTR_ERR(to); 2015 2016 error = __user_walk(oldname, 0, &old_nd); 2017 if (error) 2018 goto exit; 2019 error = path_lookup(to, LOOKUP_PARENT, &nd); 2020 if (error) 2021 goto out; 2022 error = -EXDEV; 2023 if (old_nd.mnt != nd.mnt) 2024 goto out_release; 2025 new_dentry = lookup_create(&nd, 0); 2026 error = PTR_ERR(new_dentry); 2027 if (!IS_ERR(new_dentry)) { 2028 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 2029 dput(new_dentry); 2030 } 2031 up(&nd.dentry->d_inode->i_sem); 2032 out_release: 2033 path_release(&nd); 2034 out: 2035 path_release(&old_nd); 2036 exit: 2037 putname(to); 2038 2039 return error; 2040 } 2041 2042 /* 2043 * The worst of all namespace operations - renaming directory. "Perverted" 2044 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2045 * Problems: 2046 * a) we can get into loop creation. Check is done in is_subdir(). 2047 * b) race potential - two innocent renames can create a loop together. 2048 * That's where 4.4 screws up. Current fix: serialization on 2049 * sb->s_vfs_rename_sem. We might be more accurate, but that's another 2050 * story. 2051 * c) we have to lock _three_ objects - parents and victim (if it exists). 2052 * And that - after we got ->i_sem on parents (until then we don't know 2053 * whether the target exists). Solution: try to be smart with locking 2054 * order for inodes. We rely on the fact that tree topology may change 2055 * only under ->s_vfs_rename_sem _and_ that parent of the object we 2056 * move will be locked. Thus we can rank directories by the tree 2057 * (ancestors first) and rank all non-directories after them. 2058 * That works since everybody except rename does "lock parent, lookup, 2059 * lock child" and rename is under ->s_vfs_rename_sem. 2060 * HOWEVER, it relies on the assumption that any object with ->lookup() 2061 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2062 * we'd better make sure that there's no link(2) for them. 2063 * d) some filesystems don't support opened-but-unlinked directories, 2064 * either because of layout or because they are not ready to deal with 2065 * all cases correctly. The latter will be fixed (taking this sort of 2066 * stuff into VFS), but the former is not going away. Solution: the same 2067 * trick as in rmdir(). 2068 * e) conversion from fhandle to dentry may come in the wrong moment - when 2069 * we are removing the target. Solution: we will have to grab ->i_sem 2070 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2071 * ->i_sem on parents, which works but leads to some truely excessive 2072 * locking]. 2073 */ 2074 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2075 struct inode *new_dir, struct dentry *new_dentry) 2076 { 2077 int error = 0; 2078 struct inode *target; 2079 2080 /* 2081 * If we are going to change the parent - check write permissions, 2082 * we'll need to flip '..'. 2083 */ 2084 if (new_dir != old_dir) { 2085 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2086 if (error) 2087 return error; 2088 } 2089 2090 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2091 if (error) 2092 return error; 2093 2094 target = new_dentry->d_inode; 2095 if (target) { 2096 down(&target->i_sem); 2097 dentry_unhash(new_dentry); 2098 } 2099 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2100 error = -EBUSY; 2101 else 2102 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2103 if (target) { 2104 if (!error) 2105 target->i_flags |= S_DEAD; 2106 up(&target->i_sem); 2107 if (d_unhashed(new_dentry)) 2108 d_rehash(new_dentry); 2109 dput(new_dentry); 2110 } 2111 if (!error) { 2112 d_move(old_dentry,new_dentry); 2113 security_inode_post_rename(old_dir, old_dentry, 2114 new_dir, new_dentry); 2115 } 2116 return error; 2117 } 2118 2119 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2120 struct inode *new_dir, struct dentry *new_dentry) 2121 { 2122 struct inode *target; 2123 int error; 2124 2125 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2126 if (error) 2127 return error; 2128 2129 dget(new_dentry); 2130 target = new_dentry->d_inode; 2131 if (target) 2132 down(&target->i_sem); 2133 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2134 error = -EBUSY; 2135 else 2136 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2137 if (!error) { 2138 /* The following d_move() should become unconditional */ 2139 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) 2140 d_move(old_dentry, new_dentry); 2141 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry); 2142 } 2143 if (target) 2144 up(&target->i_sem); 2145 dput(new_dentry); 2146 return error; 2147 } 2148 2149 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2150 struct inode *new_dir, struct dentry *new_dentry) 2151 { 2152 int error; 2153 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2154 2155 if (old_dentry->d_inode == new_dentry->d_inode) 2156 return 0; 2157 2158 error = may_delete(old_dir, old_dentry, is_dir); 2159 if (error) 2160 return error; 2161 2162 if (!new_dentry->d_inode) 2163 error = may_create(new_dir, new_dentry, NULL); 2164 else 2165 error = may_delete(new_dir, new_dentry, is_dir); 2166 if (error) 2167 return error; 2168 2169 if (!old_dir->i_op || !old_dir->i_op->rename) 2170 return -EPERM; 2171 2172 DQUOT_INIT(old_dir); 2173 DQUOT_INIT(new_dir); 2174 2175 if (is_dir) 2176 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2177 else 2178 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2179 if (!error) { 2180 if (old_dir == new_dir) 2181 inode_dir_notify(old_dir, DN_RENAME); 2182 else { 2183 inode_dir_notify(old_dir, DN_DELETE); 2184 inode_dir_notify(new_dir, DN_CREATE); 2185 } 2186 } 2187 return error; 2188 } 2189 2190 static inline int do_rename(const char * oldname, const char * newname) 2191 { 2192 int error = 0; 2193 struct dentry * old_dir, * new_dir; 2194 struct dentry * old_dentry, *new_dentry; 2195 struct dentry * trap; 2196 struct nameidata oldnd, newnd; 2197 2198 error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); 2199 if (error) 2200 goto exit; 2201 2202 error = path_lookup(newname, LOOKUP_PARENT, &newnd); 2203 if (error) 2204 goto exit1; 2205 2206 error = -EXDEV; 2207 if (oldnd.mnt != newnd.mnt) 2208 goto exit2; 2209 2210 old_dir = oldnd.dentry; 2211 error = -EBUSY; 2212 if (oldnd.last_type != LAST_NORM) 2213 goto exit2; 2214 2215 new_dir = newnd.dentry; 2216 if (newnd.last_type != LAST_NORM) 2217 goto exit2; 2218 2219 trap = lock_rename(new_dir, old_dir); 2220 2221 old_dentry = lookup_hash(&oldnd.last, old_dir); 2222 error = PTR_ERR(old_dentry); 2223 if (IS_ERR(old_dentry)) 2224 goto exit3; 2225 /* source must exist */ 2226 error = -ENOENT; 2227 if (!old_dentry->d_inode) 2228 goto exit4; 2229 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2230 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2231 error = -ENOTDIR; 2232 if (oldnd.last.name[oldnd.last.len]) 2233 goto exit4; 2234 if (newnd.last.name[newnd.last.len]) 2235 goto exit4; 2236 } 2237 /* source should not be ancestor of target */ 2238 error = -EINVAL; 2239 if (old_dentry == trap) 2240 goto exit4; 2241 new_dentry = lookup_hash(&newnd.last, new_dir); 2242 error = PTR_ERR(new_dentry); 2243 if (IS_ERR(new_dentry)) 2244 goto exit4; 2245 /* target should not be an ancestor of source */ 2246 error = -ENOTEMPTY; 2247 if (new_dentry == trap) 2248 goto exit5; 2249 2250 error = vfs_rename(old_dir->d_inode, old_dentry, 2251 new_dir->d_inode, new_dentry); 2252 exit5: 2253 dput(new_dentry); 2254 exit4: 2255 dput(old_dentry); 2256 exit3: 2257 unlock_rename(new_dir, old_dir); 2258 exit2: 2259 path_release(&newnd); 2260 exit1: 2261 path_release(&oldnd); 2262 exit: 2263 return error; 2264 } 2265 2266 asmlinkage long sys_rename(const char __user * oldname, const char __user * newname) 2267 { 2268 int error; 2269 char * from; 2270 char * to; 2271 2272 from = getname(oldname); 2273 if(IS_ERR(from)) 2274 return PTR_ERR(from); 2275 to = getname(newname); 2276 error = PTR_ERR(to); 2277 if (!IS_ERR(to)) { 2278 error = do_rename(from,to); 2279 putname(to); 2280 } 2281 putname(from); 2282 return error; 2283 } 2284 2285 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2286 { 2287 int len; 2288 2289 len = PTR_ERR(link); 2290 if (IS_ERR(link)) 2291 goto out; 2292 2293 len = strlen(link); 2294 if (len > (unsigned) buflen) 2295 len = buflen; 2296 if (copy_to_user(buffer, link, len)) 2297 len = -EFAULT; 2298 out: 2299 return len; 2300 } 2301 2302 /* 2303 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2304 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2305 * using) it for any given inode is up to filesystem. 2306 */ 2307 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2308 { 2309 struct nameidata nd; 2310 int res; 2311 nd.depth = 0; 2312 res = dentry->d_inode->i_op->follow_link(dentry, &nd); 2313 if (!res) { 2314 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2315 if (dentry->d_inode->i_op->put_link) 2316 dentry->d_inode->i_op->put_link(dentry, &nd); 2317 } 2318 return res; 2319 } 2320 2321 int vfs_follow_link(struct nameidata *nd, const char *link) 2322 { 2323 return __vfs_follow_link(nd, link); 2324 } 2325 2326 /* get the link contents into pagecache */ 2327 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2328 { 2329 struct page * page; 2330 struct address_space *mapping = dentry->d_inode->i_mapping; 2331 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, 2332 NULL); 2333 if (IS_ERR(page)) 2334 goto sync_fail; 2335 wait_on_page_locked(page); 2336 if (!PageUptodate(page)) 2337 goto async_fail; 2338 *ppage = page; 2339 return kmap(page); 2340 2341 async_fail: 2342 page_cache_release(page); 2343 return ERR_PTR(-EIO); 2344 2345 sync_fail: 2346 return (char*)page; 2347 } 2348 2349 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2350 { 2351 struct page *page = NULL; 2352 char *s = page_getlink(dentry, &page); 2353 int res = vfs_readlink(dentry,buffer,buflen,s); 2354 if (page) { 2355 kunmap(page); 2356 page_cache_release(page); 2357 } 2358 return res; 2359 } 2360 2361 int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2362 { 2363 struct page *page; 2364 nd_set_link(nd, page_getlink(dentry, &page)); 2365 return 0; 2366 } 2367 2368 void page_put_link(struct dentry *dentry, struct nameidata *nd) 2369 { 2370 if (!IS_ERR(nd_get_link(nd))) { 2371 struct page *page; 2372 page = find_get_page(dentry->d_inode->i_mapping, 0); 2373 if (!page) 2374 BUG(); 2375 kunmap(page); 2376 page_cache_release(page); 2377 page_cache_release(page); 2378 } 2379 } 2380 2381 int page_symlink(struct inode *inode, const char *symname, int len) 2382 { 2383 struct address_space *mapping = inode->i_mapping; 2384 struct page *page = grab_cache_page(mapping, 0); 2385 int err = -ENOMEM; 2386 char *kaddr; 2387 2388 if (!page) 2389 goto fail; 2390 err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); 2391 if (err) 2392 goto fail_map; 2393 kaddr = kmap_atomic(page, KM_USER0); 2394 memcpy(kaddr, symname, len-1); 2395 kunmap_atomic(kaddr, KM_USER0); 2396 mapping->a_ops->commit_write(NULL, page, 0, len-1); 2397 /* 2398 * Notice that we are _not_ going to block here - end of page is 2399 * unmapped, so this will only try to map the rest of page, see 2400 * that it is unmapped (typically even will not look into inode - 2401 * ->i_size will be enough for everything) and zero it out. 2402 * OTOH it's obviously correct and should make the page up-to-date. 2403 */ 2404 if (!PageUptodate(page)) { 2405 err = mapping->a_ops->readpage(NULL, page); 2406 wait_on_page_locked(page); 2407 } else { 2408 unlock_page(page); 2409 } 2410 page_cache_release(page); 2411 if (err < 0) 2412 goto fail; 2413 mark_inode_dirty(inode); 2414 return 0; 2415 fail_map: 2416 unlock_page(page); 2417 page_cache_release(page); 2418 fail: 2419 return err; 2420 } 2421 2422 struct inode_operations page_symlink_inode_operations = { 2423 .readlink = generic_readlink, 2424 .follow_link = page_follow_link_light, 2425 .put_link = page_put_link, 2426 }; 2427 2428 EXPORT_SYMBOL(__user_walk); 2429 EXPORT_SYMBOL(follow_down); 2430 EXPORT_SYMBOL(follow_up); 2431 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2432 EXPORT_SYMBOL(getname); 2433 EXPORT_SYMBOL(lock_rename); 2434 EXPORT_SYMBOL(lookup_hash); 2435 EXPORT_SYMBOL(lookup_one_len); 2436 EXPORT_SYMBOL(page_follow_link_light); 2437 EXPORT_SYMBOL(page_put_link); 2438 EXPORT_SYMBOL(page_readlink); 2439 EXPORT_SYMBOL(page_symlink); 2440 EXPORT_SYMBOL(page_symlink_inode_operations); 2441 EXPORT_SYMBOL(path_lookup); 2442 EXPORT_SYMBOL(path_release); 2443 EXPORT_SYMBOL(path_walk); 2444 EXPORT_SYMBOL(permission); 2445 EXPORT_SYMBOL(unlock_rename); 2446 EXPORT_SYMBOL(vfs_create); 2447 EXPORT_SYMBOL(vfs_follow_link); 2448 EXPORT_SYMBOL(vfs_link); 2449 EXPORT_SYMBOL(vfs_mkdir); 2450 EXPORT_SYMBOL(vfs_mknod); 2451 EXPORT_SYMBOL(generic_permission); 2452 EXPORT_SYMBOL(vfs_readlink); 2453 EXPORT_SYMBOL(vfs_rename); 2454 EXPORT_SYMBOL(vfs_rmdir); 2455 EXPORT_SYMBOL(vfs_symlink); 2456 EXPORT_SYMBOL(vfs_unlink); 2457 EXPORT_SYMBOL(dentry_unhash); 2458 EXPORT_SYMBOL(generic_readlink); 2459