1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/smp_lock.h> 26 #include <linux/personality.h> 27 #include <linux/security.h> 28 #include <linux/syscalls.h> 29 #include <linux/mount.h> 30 #include <linux/audit.h> 31 #include <asm/namei.h> 32 #include <asm/uaccess.h> 33 34 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 35 36 /* [Feb-1997 T. Schoebel-Theuer] 37 * Fundamental changes in the pathname lookup mechanisms (namei) 38 * were necessary because of omirr. The reason is that omirr needs 39 * to know the _real_ pathname, not the user-supplied one, in case 40 * of symlinks (and also when transname replacements occur). 41 * 42 * The new code replaces the old recursive symlink resolution with 43 * an iterative one (in case of non-nested symlink chains). It does 44 * this with calls to <fs>_follow_link(). 45 * As a side effect, dir_namei(), _namei() and follow_link() are now 46 * replaced with a single function lookup_dentry() that can handle all 47 * the special cases of the former code. 48 * 49 * With the new dcache, the pathname is stored at each inode, at least as 50 * long as the refcount of the inode is positive. As a side effect, the 51 * size of the dcache depends on the inode cache and thus is dynamic. 52 * 53 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 54 * resolution to correspond with current state of the code. 55 * 56 * Note that the symlink resolution is not *completely* iterative. 57 * There is still a significant amount of tail- and mid- recursion in 58 * the algorithm. Also, note that <fs>_readlink() is not used in 59 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 60 * may return different results than <fs>_follow_link(). Many virtual 61 * filesystems (including /proc) exhibit this behavior. 62 */ 63 64 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 65 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 66 * and the name already exists in form of a symlink, try to create the new 67 * name indicated by the symlink. The old code always complained that the 68 * name already exists, due to not following the symlink even if its target 69 * is nonexistent. The new semantics affects also mknod() and link() when 70 * the name is a symlink pointing to a non-existant name. 71 * 72 * I don't know which semantics is the right one, since I have no access 73 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 74 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 75 * "old" one. Personally, I think the new semantics is much more logical. 76 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 77 * file does succeed in both HP-UX and SunOs, but not in Solaris 78 * and in the old Linux semantics. 79 */ 80 81 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 82 * semantics. See the comments in "open_namei" and "do_link" below. 83 * 84 * [10-Sep-98 Alan Modra] Another symlink change. 85 */ 86 87 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 88 * inside the path - always follow. 89 * in the last component in creation/removal/renaming - never follow. 90 * if LOOKUP_FOLLOW passed - follow. 91 * if the pathname has trailing slashes - follow. 92 * otherwise - don't follow. 93 * (applied in that order). 94 * 95 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 96 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 97 * During the 2.4 we need to fix the userland stuff depending on it - 98 * hopefully we will be able to get rid of that wart in 2.5. So far only 99 * XEmacs seems to be relying on it... 100 */ 101 /* 102 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 103 * implemented. Let's see if raised priority of ->s_vfs_rename_sem gives 104 * any extra contention... 105 */ 106 107 /* In order to reduce some races, while at the same time doing additional 108 * checking and hopefully speeding things up, we copy filenames to the 109 * kernel data space before using them.. 110 * 111 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 112 * PATH_MAX includes the nul terminator --RR. 113 */ 114 static inline int do_getname(const char __user *filename, char *page) 115 { 116 int retval; 117 unsigned long len = PATH_MAX; 118 119 if (!segment_eq(get_fs(), KERNEL_DS)) { 120 if ((unsigned long) filename >= TASK_SIZE) 121 return -EFAULT; 122 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 123 len = TASK_SIZE - (unsigned long) filename; 124 } 125 126 retval = strncpy_from_user(page, filename, len); 127 if (retval > 0) { 128 if (retval < len) 129 return 0; 130 return -ENAMETOOLONG; 131 } else if (!retval) 132 retval = -ENOENT; 133 return retval; 134 } 135 136 char * getname(const char __user * filename) 137 { 138 char *tmp, *result; 139 140 result = ERR_PTR(-ENOMEM); 141 tmp = __getname(); 142 if (tmp) { 143 int retval = do_getname(filename, tmp); 144 145 result = tmp; 146 if (retval < 0) { 147 __putname(tmp); 148 result = ERR_PTR(retval); 149 } 150 } 151 audit_getname(result); 152 return result; 153 } 154 155 #ifdef CONFIG_AUDITSYSCALL 156 void putname(const char *name) 157 { 158 if (unlikely(current->audit_context)) 159 audit_putname(name); 160 else 161 __putname(name); 162 } 163 EXPORT_SYMBOL(putname); 164 #endif 165 166 167 /** 168 * generic_permission - check for access rights on a Posix-like filesystem 169 * @inode: inode to check access rights for 170 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 171 * @check_acl: optional callback to check for Posix ACLs 172 * 173 * Used to check for read/write/execute permissions on a file. 174 * We use "fsuid" for this, letting us set arbitrary permissions 175 * for filesystem access without changing the "normal" uids which 176 * are used for other things.. 177 */ 178 int generic_permission(struct inode *inode, int mask, 179 int (*check_acl)(struct inode *inode, int mask)) 180 { 181 umode_t mode = inode->i_mode; 182 183 if (current->fsuid == inode->i_uid) 184 mode >>= 6; 185 else { 186 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 187 int error = check_acl(inode, mask); 188 if (error == -EACCES) 189 goto check_capabilities; 190 else if (error != -EAGAIN) 191 return error; 192 } 193 194 if (in_group_p(inode->i_gid)) 195 mode >>= 3; 196 } 197 198 /* 199 * If the DACs are ok we don't need any capability check. 200 */ 201 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 202 return 0; 203 204 check_capabilities: 205 /* 206 * Read/write DACs are always overridable. 207 * Executable DACs are overridable if at least one exec bit is set. 208 */ 209 if (!(mask & MAY_EXEC) || 210 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 211 if (capable(CAP_DAC_OVERRIDE)) 212 return 0; 213 214 /* 215 * Searching includes executable on directories, else just read. 216 */ 217 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 218 if (capable(CAP_DAC_READ_SEARCH)) 219 return 0; 220 221 return -EACCES; 222 } 223 224 int permission(struct inode *inode, int mask, struct nameidata *nd) 225 { 226 int retval, submask; 227 228 if (mask & MAY_WRITE) { 229 umode_t mode = inode->i_mode; 230 231 /* 232 * Nobody gets write access to a read-only fs. 233 */ 234 if (IS_RDONLY(inode) && 235 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 236 return -EROFS; 237 238 /* 239 * Nobody gets write access to an immutable file. 240 */ 241 if (IS_IMMUTABLE(inode)) 242 return -EACCES; 243 } 244 245 246 /* Ordinary permission routines do not understand MAY_APPEND. */ 247 submask = mask & ~MAY_APPEND; 248 if (inode->i_op && inode->i_op->permission) 249 retval = inode->i_op->permission(inode, submask, nd); 250 else 251 retval = generic_permission(inode, submask, NULL); 252 if (retval) 253 return retval; 254 255 return security_inode_permission(inode, mask, nd); 256 } 257 258 /* 259 * get_write_access() gets write permission for a file. 260 * put_write_access() releases this write permission. 261 * This is used for regular files. 262 * We cannot support write (and maybe mmap read-write shared) accesses and 263 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 264 * can have the following values: 265 * 0: no writers, no VM_DENYWRITE mappings 266 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 267 * > 0: (i_writecount) users are writing to the file. 268 * 269 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 270 * except for the cases where we don't hold i_writecount yet. Then we need to 271 * use {get,deny}_write_access() - these functions check the sign and refuse 272 * to do the change if sign is wrong. Exclusion between them is provided by 273 * the inode->i_lock spinlock. 274 */ 275 276 int get_write_access(struct inode * inode) 277 { 278 spin_lock(&inode->i_lock); 279 if (atomic_read(&inode->i_writecount) < 0) { 280 spin_unlock(&inode->i_lock); 281 return -ETXTBSY; 282 } 283 atomic_inc(&inode->i_writecount); 284 spin_unlock(&inode->i_lock); 285 286 return 0; 287 } 288 289 int deny_write_access(struct file * file) 290 { 291 struct inode *inode = file->f_dentry->d_inode; 292 293 spin_lock(&inode->i_lock); 294 if (atomic_read(&inode->i_writecount) > 0) { 295 spin_unlock(&inode->i_lock); 296 return -ETXTBSY; 297 } 298 atomic_dec(&inode->i_writecount); 299 spin_unlock(&inode->i_lock); 300 301 return 0; 302 } 303 304 void path_release(struct nameidata *nd) 305 { 306 dput(nd->dentry); 307 mntput(nd->mnt); 308 } 309 310 /* 311 * umount() mustn't call path_release()/mntput() as that would clear 312 * mnt_expiry_mark 313 */ 314 void path_release_on_umount(struct nameidata *nd) 315 { 316 dput(nd->dentry); 317 mntput_no_expire(nd->mnt); 318 } 319 320 /* 321 * Internal lookup() using the new generic dcache. 322 * SMP-safe 323 */ 324 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 325 { 326 struct dentry * dentry = __d_lookup(parent, name); 327 328 /* lockess __d_lookup may fail due to concurrent d_move() 329 * in some unrelated directory, so try with d_lookup 330 */ 331 if (!dentry) 332 dentry = d_lookup(parent, name); 333 334 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { 335 if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { 336 dput(dentry); 337 dentry = NULL; 338 } 339 } 340 return dentry; 341 } 342 343 /* 344 * Short-cut version of permission(), for calling by 345 * path_walk(), when dcache lock is held. Combines parts 346 * of permission() and generic_permission(), and tests ONLY for 347 * MAY_EXEC permission. 348 * 349 * If appropriate, check DAC only. If not appropriate, or 350 * short-cut DAC fails, then call permission() to do more 351 * complete permission check. 352 */ 353 static inline int exec_permission_lite(struct inode *inode, 354 struct nameidata *nd) 355 { 356 umode_t mode = inode->i_mode; 357 358 if (inode->i_op && inode->i_op->permission) 359 return -EAGAIN; 360 361 if (current->fsuid == inode->i_uid) 362 mode >>= 6; 363 else if (in_group_p(inode->i_gid)) 364 mode >>= 3; 365 366 if (mode & MAY_EXEC) 367 goto ok; 368 369 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 370 goto ok; 371 372 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 373 goto ok; 374 375 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 376 goto ok; 377 378 return -EACCES; 379 ok: 380 return security_inode_permission(inode, MAY_EXEC, nd); 381 } 382 383 /* 384 * This is called when everything else fails, and we actually have 385 * to go to the low-level filesystem to find out what we should do.. 386 * 387 * We get the directory semaphore, and after getting that we also 388 * make sure that nobody added the entry to the dcache in the meantime.. 389 * SMP-safe 390 */ 391 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 392 { 393 struct dentry * result; 394 struct inode *dir = parent->d_inode; 395 396 down(&dir->i_sem); 397 /* 398 * First re-do the cached lookup just in case it was created 399 * while we waited for the directory semaphore.. 400 * 401 * FIXME! This could use version numbering or similar to 402 * avoid unnecessary cache lookups. 403 * 404 * The "dcache_lock" is purely to protect the RCU list walker 405 * from concurrent renames at this point (we mustn't get false 406 * negatives from the RCU list walk here, unlike the optimistic 407 * fast walk). 408 * 409 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 410 */ 411 result = d_lookup(parent, name); 412 if (!result) { 413 struct dentry * dentry = d_alloc(parent, name); 414 result = ERR_PTR(-ENOMEM); 415 if (dentry) { 416 result = dir->i_op->lookup(dir, dentry, nd); 417 if (result) 418 dput(dentry); 419 else 420 result = dentry; 421 } 422 up(&dir->i_sem); 423 return result; 424 } 425 426 /* 427 * Uhhuh! Nasty case: the cache was re-populated while 428 * we waited on the semaphore. Need to revalidate. 429 */ 430 up(&dir->i_sem); 431 if (result->d_op && result->d_op->d_revalidate) { 432 if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { 433 dput(result); 434 result = ERR_PTR(-ENOENT); 435 } 436 } 437 return result; 438 } 439 440 static int __emul_lookup_dentry(const char *, struct nameidata *); 441 442 /* SMP-safe */ 443 static inline int 444 walk_init_root(const char *name, struct nameidata *nd) 445 { 446 read_lock(¤t->fs->lock); 447 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 448 nd->mnt = mntget(current->fs->altrootmnt); 449 nd->dentry = dget(current->fs->altroot); 450 read_unlock(¤t->fs->lock); 451 if (__emul_lookup_dentry(name,nd)) 452 return 0; 453 read_lock(¤t->fs->lock); 454 } 455 nd->mnt = mntget(current->fs->rootmnt); 456 nd->dentry = dget(current->fs->root); 457 read_unlock(¤t->fs->lock); 458 return 1; 459 } 460 461 static inline int __vfs_follow_link(struct nameidata *nd, const char *link) 462 { 463 int res = 0; 464 char *name; 465 if (IS_ERR(link)) 466 goto fail; 467 468 if (*link == '/') { 469 path_release(nd); 470 if (!walk_init_root(link, nd)) 471 /* weird __emul_prefix() stuff did it */ 472 goto out; 473 } 474 res = link_path_walk(link, nd); 475 out: 476 if (nd->depth || res || nd->last_type!=LAST_NORM) 477 return res; 478 /* 479 * If it is an iterative symlinks resolution in open_namei() we 480 * have to copy the last component. And all that crap because of 481 * bloody create() on broken symlinks. Furrfu... 482 */ 483 name = __getname(); 484 if (unlikely(!name)) { 485 path_release(nd); 486 return -ENOMEM; 487 } 488 strcpy(name, nd->last.name); 489 nd->last.name = name; 490 return 0; 491 fail: 492 path_release(nd); 493 return PTR_ERR(link); 494 } 495 496 struct path { 497 struct vfsmount *mnt; 498 struct dentry *dentry; 499 }; 500 501 static inline int __do_follow_link(struct path *path, struct nameidata *nd) 502 { 503 int error; 504 struct dentry *dentry = path->dentry; 505 506 touch_atime(path->mnt, dentry); 507 nd_set_link(nd, NULL); 508 509 if (path->mnt == nd->mnt) 510 mntget(path->mnt); 511 error = dentry->d_inode->i_op->follow_link(dentry, nd); 512 if (!error) { 513 char *s = nd_get_link(nd); 514 if (s) 515 error = __vfs_follow_link(nd, s); 516 if (dentry->d_inode->i_op->put_link) 517 dentry->d_inode->i_op->put_link(dentry, nd); 518 } 519 dput(dentry); 520 mntput(path->mnt); 521 522 return error; 523 } 524 525 /* 526 * This limits recursive symlink follows to 8, while 527 * limiting consecutive symlinks to 40. 528 * 529 * Without that kind of total limit, nasty chains of consecutive 530 * symlinks can cause almost arbitrarily long lookups. 531 */ 532 static inline int do_follow_link(struct path *path, struct nameidata *nd) 533 { 534 int err = -ELOOP; 535 if (current->link_count >= MAX_NESTED_LINKS) 536 goto loop; 537 if (current->total_link_count >= 40) 538 goto loop; 539 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 540 cond_resched(); 541 err = security_inode_follow_link(path->dentry, nd); 542 if (err) 543 goto loop; 544 current->link_count++; 545 current->total_link_count++; 546 nd->depth++; 547 err = __do_follow_link(path, nd); 548 current->link_count--; 549 nd->depth--; 550 return err; 551 loop: 552 dput(path->dentry); 553 if (path->mnt != nd->mnt) 554 mntput(path->mnt); 555 path_release(nd); 556 return err; 557 } 558 559 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 560 { 561 struct vfsmount *parent; 562 struct dentry *mountpoint; 563 spin_lock(&vfsmount_lock); 564 parent=(*mnt)->mnt_parent; 565 if (parent == *mnt) { 566 spin_unlock(&vfsmount_lock); 567 return 0; 568 } 569 mntget(parent); 570 mountpoint=dget((*mnt)->mnt_mountpoint); 571 spin_unlock(&vfsmount_lock); 572 dput(*dentry); 573 *dentry = mountpoint; 574 mntput(*mnt); 575 *mnt = parent; 576 return 1; 577 } 578 579 /* no need for dcache_lock, as serialization is taken care in 580 * namespace.c 581 */ 582 static int __follow_mount(struct path *path) 583 { 584 int res = 0; 585 while (d_mountpoint(path->dentry)) { 586 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); 587 if (!mounted) 588 break; 589 dput(path->dentry); 590 if (res) 591 mntput(path->mnt); 592 path->mnt = mounted; 593 path->dentry = dget(mounted->mnt_root); 594 res = 1; 595 } 596 return res; 597 } 598 599 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) 600 { 601 while (d_mountpoint(*dentry)) { 602 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 603 if (!mounted) 604 break; 605 dput(*dentry); 606 mntput(*mnt); 607 *mnt = mounted; 608 *dentry = dget(mounted->mnt_root); 609 } 610 } 611 612 /* no need for dcache_lock, as serialization is taken care in 613 * namespace.c 614 */ 615 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 616 { 617 struct vfsmount *mounted; 618 619 mounted = lookup_mnt(*mnt, *dentry); 620 if (mounted) { 621 dput(*dentry); 622 mntput(*mnt); 623 *mnt = mounted; 624 *dentry = dget(mounted->mnt_root); 625 return 1; 626 } 627 return 0; 628 } 629 630 static inline void follow_dotdot(struct nameidata *nd) 631 { 632 while(1) { 633 struct vfsmount *parent; 634 struct dentry *old = nd->dentry; 635 636 read_lock(¤t->fs->lock); 637 if (nd->dentry == current->fs->root && 638 nd->mnt == current->fs->rootmnt) { 639 read_unlock(¤t->fs->lock); 640 break; 641 } 642 read_unlock(¤t->fs->lock); 643 spin_lock(&dcache_lock); 644 if (nd->dentry != nd->mnt->mnt_root) { 645 nd->dentry = dget(nd->dentry->d_parent); 646 spin_unlock(&dcache_lock); 647 dput(old); 648 break; 649 } 650 spin_unlock(&dcache_lock); 651 spin_lock(&vfsmount_lock); 652 parent = nd->mnt->mnt_parent; 653 if (parent == nd->mnt) { 654 spin_unlock(&vfsmount_lock); 655 break; 656 } 657 mntget(parent); 658 nd->dentry = dget(nd->mnt->mnt_mountpoint); 659 spin_unlock(&vfsmount_lock); 660 dput(old); 661 mntput(nd->mnt); 662 nd->mnt = parent; 663 } 664 follow_mount(&nd->mnt, &nd->dentry); 665 } 666 667 /* 668 * It's more convoluted than I'd like it to be, but... it's still fairly 669 * small and for now I'd prefer to have fast path as straight as possible. 670 * It _is_ time-critical. 671 */ 672 static int do_lookup(struct nameidata *nd, struct qstr *name, 673 struct path *path) 674 { 675 struct vfsmount *mnt = nd->mnt; 676 struct dentry *dentry = __d_lookup(nd->dentry, name); 677 678 if (!dentry) 679 goto need_lookup; 680 if (dentry->d_op && dentry->d_op->d_revalidate) 681 goto need_revalidate; 682 done: 683 path->mnt = mnt; 684 path->dentry = dentry; 685 __follow_mount(path); 686 return 0; 687 688 need_lookup: 689 dentry = real_lookup(nd->dentry, name, nd); 690 if (IS_ERR(dentry)) 691 goto fail; 692 goto done; 693 694 need_revalidate: 695 if (dentry->d_op->d_revalidate(dentry, nd)) 696 goto done; 697 if (d_invalidate(dentry)) 698 goto done; 699 dput(dentry); 700 goto need_lookup; 701 702 fail: 703 return PTR_ERR(dentry); 704 } 705 706 /* 707 * Name resolution. 708 * This is the basic name resolution function, turning a pathname into 709 * the final dentry. We expect 'base' to be positive and a directory. 710 * 711 * Returns 0 and nd will have valid dentry and mnt on success. 712 * Returns error and drops reference to input namei data on failure. 713 */ 714 static fastcall int __link_path_walk(const char * name, struct nameidata *nd) 715 { 716 struct path next; 717 struct inode *inode; 718 int err; 719 unsigned int lookup_flags = nd->flags; 720 721 while (*name=='/') 722 name++; 723 if (!*name) 724 goto return_reval; 725 726 inode = nd->dentry->d_inode; 727 if (nd->depth) 728 lookup_flags = LOOKUP_FOLLOW; 729 730 /* At this point we know we have a real path component. */ 731 for(;;) { 732 unsigned long hash; 733 struct qstr this; 734 unsigned int c; 735 736 err = exec_permission_lite(inode, nd); 737 if (err == -EAGAIN) { 738 err = permission(inode, MAY_EXEC, nd); 739 } 740 if (err) 741 break; 742 743 this.name = name; 744 c = *(const unsigned char *)name; 745 746 hash = init_name_hash(); 747 do { 748 name++; 749 hash = partial_name_hash(c, hash); 750 c = *(const unsigned char *)name; 751 } while (c && (c != '/')); 752 this.len = name - (const char *) this.name; 753 this.hash = end_name_hash(hash); 754 755 /* remove trailing slashes? */ 756 if (!c) 757 goto last_component; 758 while (*++name == '/'); 759 if (!*name) 760 goto last_with_slashes; 761 762 /* 763 * "." and ".." are special - ".." especially so because it has 764 * to be able to know about the current root directory and 765 * parent relationships. 766 */ 767 if (this.name[0] == '.') switch (this.len) { 768 default: 769 break; 770 case 2: 771 if (this.name[1] != '.') 772 break; 773 follow_dotdot(nd); 774 inode = nd->dentry->d_inode; 775 /* fallthrough */ 776 case 1: 777 continue; 778 } 779 /* 780 * See if the low-level filesystem might want 781 * to use its own hash.. 782 */ 783 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 784 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 785 if (err < 0) 786 break; 787 } 788 nd->flags |= LOOKUP_CONTINUE; 789 /* This does the actual lookups.. */ 790 err = do_lookup(nd, &this, &next); 791 if (err) 792 break; 793 794 err = -ENOENT; 795 inode = next.dentry->d_inode; 796 if (!inode) 797 goto out_dput; 798 err = -ENOTDIR; 799 if (!inode->i_op) 800 goto out_dput; 801 802 if (inode->i_op->follow_link) { 803 err = do_follow_link(&next, nd); 804 if (err) 805 goto return_err; 806 err = -ENOENT; 807 inode = nd->dentry->d_inode; 808 if (!inode) 809 break; 810 err = -ENOTDIR; 811 if (!inode->i_op) 812 break; 813 } else { 814 dput(nd->dentry); 815 if (nd->mnt != next.mnt) 816 mntput(nd->mnt); 817 nd->mnt = next.mnt; 818 nd->dentry = next.dentry; 819 } 820 err = -ENOTDIR; 821 if (!inode->i_op->lookup) 822 break; 823 continue; 824 /* here ends the main loop */ 825 826 last_with_slashes: 827 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 828 last_component: 829 nd->flags &= ~LOOKUP_CONTINUE; 830 if (lookup_flags & LOOKUP_PARENT) 831 goto lookup_parent; 832 if (this.name[0] == '.') switch (this.len) { 833 default: 834 break; 835 case 2: 836 if (this.name[1] != '.') 837 break; 838 follow_dotdot(nd); 839 inode = nd->dentry->d_inode; 840 /* fallthrough */ 841 case 1: 842 goto return_reval; 843 } 844 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { 845 err = nd->dentry->d_op->d_hash(nd->dentry, &this); 846 if (err < 0) 847 break; 848 } 849 err = do_lookup(nd, &this, &next); 850 if (err) 851 break; 852 inode = next.dentry->d_inode; 853 if ((lookup_flags & LOOKUP_FOLLOW) 854 && inode && inode->i_op && inode->i_op->follow_link) { 855 err = do_follow_link(&next, nd); 856 if (err) 857 goto return_err; 858 inode = nd->dentry->d_inode; 859 } else { 860 dput(nd->dentry); 861 if (nd->mnt != next.mnt) 862 mntput(nd->mnt); 863 nd->mnt = next.mnt; 864 nd->dentry = next.dentry; 865 } 866 err = -ENOENT; 867 if (!inode) 868 break; 869 if (lookup_flags & LOOKUP_DIRECTORY) { 870 err = -ENOTDIR; 871 if (!inode->i_op || !inode->i_op->lookup) 872 break; 873 } 874 goto return_base; 875 lookup_parent: 876 nd->last = this; 877 nd->last_type = LAST_NORM; 878 if (this.name[0] != '.') 879 goto return_base; 880 if (this.len == 1) 881 nd->last_type = LAST_DOT; 882 else if (this.len == 2 && this.name[1] == '.') 883 nd->last_type = LAST_DOTDOT; 884 else 885 goto return_base; 886 return_reval: 887 /* 888 * We bypassed the ordinary revalidation routines. 889 * We may need to check the cached dentry for staleness. 890 */ 891 if (nd->dentry && nd->dentry->d_sb && 892 (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 893 err = -ESTALE; 894 /* Note: we do not d_invalidate() */ 895 if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) 896 break; 897 } 898 return_base: 899 return 0; 900 out_dput: 901 dput(next.dentry); 902 if (nd->mnt != next.mnt) 903 mntput(next.mnt); 904 break; 905 } 906 path_release(nd); 907 return_err: 908 return err; 909 } 910 911 /* 912 * Wrapper to retry pathname resolution whenever the underlying 913 * file system returns an ESTALE. 914 * 915 * Retry the whole path once, forcing real lookup requests 916 * instead of relying on the dcache. 917 */ 918 int fastcall link_path_walk(const char *name, struct nameidata *nd) 919 { 920 struct nameidata save = *nd; 921 int result; 922 923 /* make sure the stuff we saved doesn't go away */ 924 dget(save.dentry); 925 mntget(save.mnt); 926 927 result = __link_path_walk(name, nd); 928 if (result == -ESTALE) { 929 *nd = save; 930 dget(nd->dentry); 931 mntget(nd->mnt); 932 nd->flags |= LOOKUP_REVAL; 933 result = __link_path_walk(name, nd); 934 } 935 936 dput(save.dentry); 937 mntput(save.mnt); 938 939 return result; 940 } 941 942 int fastcall path_walk(const char * name, struct nameidata *nd) 943 { 944 current->total_link_count = 0; 945 return link_path_walk(name, nd); 946 } 947 948 /* 949 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 950 * everything is done. Returns 0 and drops input nd, if lookup failed; 951 */ 952 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 953 { 954 if (path_walk(name, nd)) 955 return 0; /* something went wrong... */ 956 957 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { 958 struct dentry *old_dentry = nd->dentry; 959 struct vfsmount *old_mnt = nd->mnt; 960 struct qstr last = nd->last; 961 int last_type = nd->last_type; 962 /* 963 * NAME was not found in alternate root or it's a directory. Try to find 964 * it in the normal root: 965 */ 966 nd->last_type = LAST_ROOT; 967 read_lock(¤t->fs->lock); 968 nd->mnt = mntget(current->fs->rootmnt); 969 nd->dentry = dget(current->fs->root); 970 read_unlock(¤t->fs->lock); 971 if (path_walk(name, nd) == 0) { 972 if (nd->dentry->d_inode) { 973 dput(old_dentry); 974 mntput(old_mnt); 975 return 1; 976 } 977 path_release(nd); 978 } 979 nd->dentry = old_dentry; 980 nd->mnt = old_mnt; 981 nd->last = last; 982 nd->last_type = last_type; 983 } 984 return 1; 985 } 986 987 void set_fs_altroot(void) 988 { 989 char *emul = __emul_prefix(); 990 struct nameidata nd; 991 struct vfsmount *mnt = NULL, *oldmnt; 992 struct dentry *dentry = NULL, *olddentry; 993 int err; 994 995 if (!emul) 996 goto set_it; 997 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 998 if (!err) { 999 mnt = nd.mnt; 1000 dentry = nd.dentry; 1001 } 1002 set_it: 1003 write_lock(¤t->fs->lock); 1004 oldmnt = current->fs->altrootmnt; 1005 olddentry = current->fs->altroot; 1006 current->fs->altrootmnt = mnt; 1007 current->fs->altroot = dentry; 1008 write_unlock(¤t->fs->lock); 1009 if (olddentry) { 1010 dput(olddentry); 1011 mntput(oldmnt); 1012 } 1013 } 1014 1015 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1016 int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) 1017 { 1018 int retval = 0; 1019 1020 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1021 nd->flags = flags; 1022 nd->depth = 0; 1023 1024 read_lock(¤t->fs->lock); 1025 if (*name=='/') { 1026 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { 1027 nd->mnt = mntget(current->fs->altrootmnt); 1028 nd->dentry = dget(current->fs->altroot); 1029 read_unlock(¤t->fs->lock); 1030 if (__emul_lookup_dentry(name,nd)) 1031 goto out; /* found in altroot */ 1032 read_lock(¤t->fs->lock); 1033 } 1034 nd->mnt = mntget(current->fs->rootmnt); 1035 nd->dentry = dget(current->fs->root); 1036 } else { 1037 nd->mnt = mntget(current->fs->pwdmnt); 1038 nd->dentry = dget(current->fs->pwd); 1039 } 1040 read_unlock(¤t->fs->lock); 1041 current->total_link_count = 0; 1042 retval = link_path_walk(name, nd); 1043 out: 1044 if (unlikely(current->audit_context 1045 && nd && nd->dentry && nd->dentry->d_inode)) 1046 audit_inode(name, nd->dentry->d_inode); 1047 return retval; 1048 } 1049 1050 /* 1051 * Restricted form of lookup. Doesn't follow links, single-component only, 1052 * needs parent already locked. Doesn't follow mounts. 1053 * SMP-safe. 1054 */ 1055 static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) 1056 { 1057 struct dentry * dentry; 1058 struct inode *inode; 1059 int err; 1060 1061 inode = base->d_inode; 1062 err = permission(inode, MAY_EXEC, nd); 1063 dentry = ERR_PTR(err); 1064 if (err) 1065 goto out; 1066 1067 /* 1068 * See if the low-level filesystem might want 1069 * to use its own hash.. 1070 */ 1071 if (base->d_op && base->d_op->d_hash) { 1072 err = base->d_op->d_hash(base, name); 1073 dentry = ERR_PTR(err); 1074 if (err < 0) 1075 goto out; 1076 } 1077 1078 dentry = cached_lookup(base, name, nd); 1079 if (!dentry) { 1080 struct dentry *new = d_alloc(base, name); 1081 dentry = ERR_PTR(-ENOMEM); 1082 if (!new) 1083 goto out; 1084 dentry = inode->i_op->lookup(inode, new, nd); 1085 if (!dentry) 1086 dentry = new; 1087 else 1088 dput(new); 1089 } 1090 out: 1091 return dentry; 1092 } 1093 1094 struct dentry * lookup_hash(struct qstr *name, struct dentry * base) 1095 { 1096 return __lookup_hash(name, base, NULL); 1097 } 1098 1099 /* SMP-safe */ 1100 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) 1101 { 1102 unsigned long hash; 1103 struct qstr this; 1104 unsigned int c; 1105 1106 this.name = name; 1107 this.len = len; 1108 if (!len) 1109 goto access; 1110 1111 hash = init_name_hash(); 1112 while (len--) { 1113 c = *(const unsigned char *)name++; 1114 if (c == '/' || c == '\0') 1115 goto access; 1116 hash = partial_name_hash(c, hash); 1117 } 1118 this.hash = end_name_hash(hash); 1119 1120 return lookup_hash(&this, base); 1121 access: 1122 return ERR_PTR(-EACCES); 1123 } 1124 1125 /* 1126 * namei() 1127 * 1128 * is used by most simple commands to get the inode of a specified name. 1129 * Open, link etc use their own routines, but this is enough for things 1130 * like 'chmod' etc. 1131 * 1132 * namei exists in two versions: namei/lnamei. The only difference is 1133 * that namei follows links, while lnamei does not. 1134 * SMP-safe 1135 */ 1136 int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1137 { 1138 char *tmp = getname(name); 1139 int err = PTR_ERR(tmp); 1140 1141 if (!IS_ERR(tmp)) { 1142 err = path_lookup(tmp, flags, nd); 1143 putname(tmp); 1144 } 1145 return err; 1146 } 1147 1148 /* 1149 * It's inline, so penalty for filesystems that don't use sticky bit is 1150 * minimal. 1151 */ 1152 static inline int check_sticky(struct inode *dir, struct inode *inode) 1153 { 1154 if (!(dir->i_mode & S_ISVTX)) 1155 return 0; 1156 if (inode->i_uid == current->fsuid) 1157 return 0; 1158 if (dir->i_uid == current->fsuid) 1159 return 0; 1160 return !capable(CAP_FOWNER); 1161 } 1162 1163 /* 1164 * Check whether we can remove a link victim from directory dir, check 1165 * whether the type of victim is right. 1166 * 1. We can't do it if dir is read-only (done in permission()) 1167 * 2. We should have write and exec permissions on dir 1168 * 3. We can't remove anything from append-only dir 1169 * 4. We can't do anything with immutable dir (done in permission()) 1170 * 5. If the sticky bit on dir is set we should either 1171 * a. be owner of dir, or 1172 * b. be owner of victim, or 1173 * c. have CAP_FOWNER capability 1174 * 6. If the victim is append-only or immutable we can't do antyhing with 1175 * links pointing to it. 1176 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1177 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1178 * 9. We can't remove a root or mountpoint. 1179 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1180 * nfs_async_unlink(). 1181 */ 1182 static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1183 { 1184 int error; 1185 1186 if (!victim->d_inode) 1187 return -ENOENT; 1188 1189 BUG_ON(victim->d_parent->d_inode != dir); 1190 1191 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1192 if (error) 1193 return error; 1194 if (IS_APPEND(dir)) 1195 return -EPERM; 1196 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1197 IS_IMMUTABLE(victim->d_inode)) 1198 return -EPERM; 1199 if (isdir) { 1200 if (!S_ISDIR(victim->d_inode->i_mode)) 1201 return -ENOTDIR; 1202 if (IS_ROOT(victim)) 1203 return -EBUSY; 1204 } else if (S_ISDIR(victim->d_inode->i_mode)) 1205 return -EISDIR; 1206 if (IS_DEADDIR(dir)) 1207 return -ENOENT; 1208 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1209 return -EBUSY; 1210 return 0; 1211 } 1212 1213 /* Check whether we can create an object with dentry child in directory 1214 * dir. 1215 * 1. We can't do it if child already exists (open has special treatment for 1216 * this case, but since we are inlined it's OK) 1217 * 2. We can't do it if dir is read-only (done in permission()) 1218 * 3. We should have write and exec permissions on dir 1219 * 4. We can't do it if dir is immutable (done in permission()) 1220 */ 1221 static inline int may_create(struct inode *dir, struct dentry *child, 1222 struct nameidata *nd) 1223 { 1224 if (child->d_inode) 1225 return -EEXIST; 1226 if (IS_DEADDIR(dir)) 1227 return -ENOENT; 1228 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1229 } 1230 1231 /* 1232 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security 1233 * reasons. 1234 * 1235 * O_DIRECTORY translates into forcing a directory lookup. 1236 */ 1237 static inline int lookup_flags(unsigned int f) 1238 { 1239 unsigned long retval = LOOKUP_FOLLOW; 1240 1241 if (f & O_NOFOLLOW) 1242 retval &= ~LOOKUP_FOLLOW; 1243 1244 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1245 retval &= ~LOOKUP_FOLLOW; 1246 1247 if (f & O_DIRECTORY) 1248 retval |= LOOKUP_DIRECTORY; 1249 1250 return retval; 1251 } 1252 1253 /* 1254 * p1 and p2 should be directories on the same fs. 1255 */ 1256 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1257 { 1258 struct dentry *p; 1259 1260 if (p1 == p2) { 1261 down(&p1->d_inode->i_sem); 1262 return NULL; 1263 } 1264 1265 down(&p1->d_inode->i_sb->s_vfs_rename_sem); 1266 1267 for (p = p1; p->d_parent != p; p = p->d_parent) { 1268 if (p->d_parent == p2) { 1269 down(&p2->d_inode->i_sem); 1270 down(&p1->d_inode->i_sem); 1271 return p; 1272 } 1273 } 1274 1275 for (p = p2; p->d_parent != p; p = p->d_parent) { 1276 if (p->d_parent == p1) { 1277 down(&p1->d_inode->i_sem); 1278 down(&p2->d_inode->i_sem); 1279 return p; 1280 } 1281 } 1282 1283 down(&p1->d_inode->i_sem); 1284 down(&p2->d_inode->i_sem); 1285 return NULL; 1286 } 1287 1288 void unlock_rename(struct dentry *p1, struct dentry *p2) 1289 { 1290 up(&p1->d_inode->i_sem); 1291 if (p1 != p2) { 1292 up(&p2->d_inode->i_sem); 1293 up(&p1->d_inode->i_sb->s_vfs_rename_sem); 1294 } 1295 } 1296 1297 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1298 struct nameidata *nd) 1299 { 1300 int error = may_create(dir, dentry, nd); 1301 1302 if (error) 1303 return error; 1304 1305 if (!dir->i_op || !dir->i_op->create) 1306 return -EACCES; /* shouldn't it be ENOSYS? */ 1307 mode &= S_IALLUGO; 1308 mode |= S_IFREG; 1309 error = security_inode_create(dir, dentry, mode); 1310 if (error) 1311 return error; 1312 DQUOT_INIT(dir); 1313 error = dir->i_op->create(dir, dentry, mode, nd); 1314 if (!error) { 1315 fsnotify_create(dir, dentry->d_name.name); 1316 security_inode_post_create(dir, dentry, mode); 1317 } 1318 return error; 1319 } 1320 1321 int may_open(struct nameidata *nd, int acc_mode, int flag) 1322 { 1323 struct dentry *dentry = nd->dentry; 1324 struct inode *inode = dentry->d_inode; 1325 int error; 1326 1327 if (!inode) 1328 return -ENOENT; 1329 1330 if (S_ISLNK(inode->i_mode)) 1331 return -ELOOP; 1332 1333 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) 1334 return -EISDIR; 1335 1336 error = permission(inode, acc_mode, nd); 1337 if (error) 1338 return error; 1339 1340 /* 1341 * FIFO's, sockets and device files are special: they don't 1342 * actually live on the filesystem itself, and as such you 1343 * can write to them even if the filesystem is read-only. 1344 */ 1345 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1346 flag &= ~O_TRUNC; 1347 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1348 if (nd->mnt->mnt_flags & MNT_NODEV) 1349 return -EACCES; 1350 1351 flag &= ~O_TRUNC; 1352 } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) 1353 return -EROFS; 1354 /* 1355 * An append-only file must be opened in append mode for writing. 1356 */ 1357 if (IS_APPEND(inode)) { 1358 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1359 return -EPERM; 1360 if (flag & O_TRUNC) 1361 return -EPERM; 1362 } 1363 1364 /* O_NOATIME can only be set by the owner or superuser */ 1365 if (flag & O_NOATIME) 1366 if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) 1367 return -EPERM; 1368 1369 /* 1370 * Ensure there are no outstanding leases on the file. 1371 */ 1372 error = break_lease(inode, flag); 1373 if (error) 1374 return error; 1375 1376 if (flag & O_TRUNC) { 1377 error = get_write_access(inode); 1378 if (error) 1379 return error; 1380 1381 /* 1382 * Refuse to truncate files with mandatory locks held on them. 1383 */ 1384 error = locks_verify_locked(inode); 1385 if (!error) { 1386 DQUOT_INIT(inode); 1387 1388 error = do_truncate(dentry, 0); 1389 } 1390 put_write_access(inode); 1391 if (error) 1392 return error; 1393 } else 1394 if (flag & FMODE_WRITE) 1395 DQUOT_INIT(inode); 1396 1397 return 0; 1398 } 1399 1400 /* 1401 * open_namei() 1402 * 1403 * namei for open - this is in fact almost the whole open-routine. 1404 * 1405 * Note that the low bits of "flag" aren't the same as in the open 1406 * system call - they are 00 - no permissions needed 1407 * 01 - read permission needed 1408 * 10 - write permission needed 1409 * 11 - read/write permissions needed 1410 * which is a lot more logical, and also allows the "no perm" needed 1411 * for symlinks (where the permissions are checked later). 1412 * SMP-safe 1413 */ 1414 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) 1415 { 1416 int acc_mode, error = 0; 1417 struct path path; 1418 struct dentry *dir; 1419 int count = 0; 1420 1421 acc_mode = ACC_MODE(flag); 1422 1423 /* Allow the LSM permission hook to distinguish append 1424 access from general write access. */ 1425 if (flag & O_APPEND) 1426 acc_mode |= MAY_APPEND; 1427 1428 /* Fill in the open() intent data */ 1429 nd->intent.open.flags = flag; 1430 nd->intent.open.create_mode = mode; 1431 1432 /* 1433 * The simplest case - just a plain lookup. 1434 */ 1435 if (!(flag & O_CREAT)) { 1436 error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); 1437 if (error) 1438 return error; 1439 goto ok; 1440 } 1441 1442 /* 1443 * Create - we need to know the parent. 1444 */ 1445 error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); 1446 if (error) 1447 return error; 1448 1449 /* 1450 * We have the parent and last component. First of all, check 1451 * that we are not asked to creat(2) an obvious directory - that 1452 * will not do. 1453 */ 1454 error = -EISDIR; 1455 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1456 goto exit; 1457 1458 dir = nd->dentry; 1459 nd->flags &= ~LOOKUP_PARENT; 1460 down(&dir->d_inode->i_sem); 1461 path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1462 path.mnt = nd->mnt; 1463 1464 do_last: 1465 error = PTR_ERR(path.dentry); 1466 if (IS_ERR(path.dentry)) { 1467 up(&dir->d_inode->i_sem); 1468 goto exit; 1469 } 1470 1471 /* Negative dentry, just create the file */ 1472 if (!path.dentry->d_inode) { 1473 if (!IS_POSIXACL(dir->d_inode)) 1474 mode &= ~current->fs->umask; 1475 error = vfs_create(dir->d_inode, path.dentry, mode, nd); 1476 up(&dir->d_inode->i_sem); 1477 dput(nd->dentry); 1478 nd->dentry = path.dentry; 1479 if (error) 1480 goto exit; 1481 /* Don't check for write permission, don't truncate */ 1482 acc_mode = 0; 1483 flag &= ~O_TRUNC; 1484 goto ok; 1485 } 1486 1487 /* 1488 * It already exists. 1489 */ 1490 up(&dir->d_inode->i_sem); 1491 1492 error = -EEXIST; 1493 if (flag & O_EXCL) 1494 goto exit_dput; 1495 1496 if (__follow_mount(&path)) { 1497 error = -ELOOP; 1498 if (flag & O_NOFOLLOW) 1499 goto exit_dput; 1500 } 1501 error = -ENOENT; 1502 if (!path.dentry->d_inode) 1503 goto exit_dput; 1504 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1505 goto do_link; 1506 1507 dput(nd->dentry); 1508 nd->dentry = path.dentry; 1509 if (nd->mnt != path.mnt) 1510 mntput(nd->mnt); 1511 nd->mnt = path.mnt; 1512 error = -EISDIR; 1513 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1514 goto exit; 1515 ok: 1516 error = may_open(nd, acc_mode, flag); 1517 if (error) 1518 goto exit; 1519 return 0; 1520 1521 exit_dput: 1522 dput(path.dentry); 1523 if (nd->mnt != path.mnt) 1524 mntput(path.mnt); 1525 exit: 1526 path_release(nd); 1527 return error; 1528 1529 do_link: 1530 error = -ELOOP; 1531 if (flag & O_NOFOLLOW) 1532 goto exit_dput; 1533 /* 1534 * This is subtle. Instead of calling do_follow_link() we do the 1535 * thing by hands. The reason is that this way we have zero link_count 1536 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1537 * After that we have the parent and last component, i.e. 1538 * we are in the same situation as after the first path_walk(). 1539 * Well, almost - if the last component is normal we get its copy 1540 * stored in nd->last.name and we will have to putname() it when we 1541 * are done. Procfs-like symlinks just set LAST_BIND. 1542 */ 1543 nd->flags |= LOOKUP_PARENT; 1544 error = security_inode_follow_link(path.dentry, nd); 1545 if (error) 1546 goto exit_dput; 1547 error = __do_follow_link(&path, nd); 1548 if (error) 1549 return error; 1550 nd->flags &= ~LOOKUP_PARENT; 1551 if (nd->last_type == LAST_BIND) 1552 goto ok; 1553 error = -EISDIR; 1554 if (nd->last_type != LAST_NORM) 1555 goto exit; 1556 if (nd->last.name[nd->last.len]) { 1557 putname(nd->last.name); 1558 goto exit; 1559 } 1560 error = -ELOOP; 1561 if (count++==32) { 1562 putname(nd->last.name); 1563 goto exit; 1564 } 1565 dir = nd->dentry; 1566 down(&dir->d_inode->i_sem); 1567 path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); 1568 path.mnt = nd->mnt; 1569 putname(nd->last.name); 1570 goto do_last; 1571 } 1572 1573 /** 1574 * lookup_create - lookup a dentry, creating it if it doesn't exist 1575 * @nd: nameidata info 1576 * @is_dir: directory flag 1577 * 1578 * Simple function to lookup and return a dentry and create it 1579 * if it doesn't exist. Is SMP-safe. 1580 * 1581 * Returns with nd->dentry->d_inode->i_sem locked. 1582 */ 1583 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1584 { 1585 struct dentry *dentry = ERR_PTR(-EEXIST); 1586 1587 down(&nd->dentry->d_inode->i_sem); 1588 /* 1589 * Yucky last component or no last component at all? 1590 * (foo/., foo/.., /////) 1591 */ 1592 if (nd->last_type != LAST_NORM) 1593 goto fail; 1594 nd->flags &= ~LOOKUP_PARENT; 1595 1596 /* 1597 * Do the final lookup. 1598 */ 1599 dentry = lookup_hash(&nd->last, nd->dentry); 1600 if (IS_ERR(dentry)) 1601 goto fail; 1602 1603 /* 1604 * Special case - lookup gave negative, but... we had foo/bar/ 1605 * From the vfs_mknod() POV we just have a negative dentry - 1606 * all is fine. Let's be bastards - you had / on the end, you've 1607 * been asking for (non-existent) directory. -ENOENT for you. 1608 */ 1609 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1610 goto enoent; 1611 return dentry; 1612 enoent: 1613 dput(dentry); 1614 dentry = ERR_PTR(-ENOENT); 1615 fail: 1616 return dentry; 1617 } 1618 EXPORT_SYMBOL_GPL(lookup_create); 1619 1620 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1621 { 1622 int error = may_create(dir, dentry, NULL); 1623 1624 if (error) 1625 return error; 1626 1627 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 1628 return -EPERM; 1629 1630 if (!dir->i_op || !dir->i_op->mknod) 1631 return -EPERM; 1632 1633 error = security_inode_mknod(dir, dentry, mode, dev); 1634 if (error) 1635 return error; 1636 1637 DQUOT_INIT(dir); 1638 error = dir->i_op->mknod(dir, dentry, mode, dev); 1639 if (!error) { 1640 fsnotify_create(dir, dentry->d_name.name); 1641 security_inode_post_mknod(dir, dentry, mode, dev); 1642 } 1643 return error; 1644 } 1645 1646 asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) 1647 { 1648 int error = 0; 1649 char * tmp; 1650 struct dentry * dentry; 1651 struct nameidata nd; 1652 1653 if (S_ISDIR(mode)) 1654 return -EPERM; 1655 tmp = getname(filename); 1656 if (IS_ERR(tmp)) 1657 return PTR_ERR(tmp); 1658 1659 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1660 if (error) 1661 goto out; 1662 dentry = lookup_create(&nd, 0); 1663 error = PTR_ERR(dentry); 1664 1665 if (!IS_POSIXACL(nd.dentry->d_inode)) 1666 mode &= ~current->fs->umask; 1667 if (!IS_ERR(dentry)) { 1668 switch (mode & S_IFMT) { 1669 case 0: case S_IFREG: 1670 error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); 1671 break; 1672 case S_IFCHR: case S_IFBLK: 1673 error = vfs_mknod(nd.dentry->d_inode,dentry,mode, 1674 new_decode_dev(dev)); 1675 break; 1676 case S_IFIFO: case S_IFSOCK: 1677 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); 1678 break; 1679 case S_IFDIR: 1680 error = -EPERM; 1681 break; 1682 default: 1683 error = -EINVAL; 1684 } 1685 dput(dentry); 1686 } 1687 up(&nd.dentry->d_inode->i_sem); 1688 path_release(&nd); 1689 out: 1690 putname(tmp); 1691 1692 return error; 1693 } 1694 1695 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1696 { 1697 int error = may_create(dir, dentry, NULL); 1698 1699 if (error) 1700 return error; 1701 1702 if (!dir->i_op || !dir->i_op->mkdir) 1703 return -EPERM; 1704 1705 mode &= (S_IRWXUGO|S_ISVTX); 1706 error = security_inode_mkdir(dir, dentry, mode); 1707 if (error) 1708 return error; 1709 1710 DQUOT_INIT(dir); 1711 error = dir->i_op->mkdir(dir, dentry, mode); 1712 if (!error) { 1713 fsnotify_mkdir(dir, dentry->d_name.name); 1714 security_inode_post_mkdir(dir,dentry, mode); 1715 } 1716 return error; 1717 } 1718 1719 asmlinkage long sys_mkdir(const char __user * pathname, int mode) 1720 { 1721 int error = 0; 1722 char * tmp; 1723 1724 tmp = getname(pathname); 1725 error = PTR_ERR(tmp); 1726 if (!IS_ERR(tmp)) { 1727 struct dentry *dentry; 1728 struct nameidata nd; 1729 1730 error = path_lookup(tmp, LOOKUP_PARENT, &nd); 1731 if (error) 1732 goto out; 1733 dentry = lookup_create(&nd, 1); 1734 error = PTR_ERR(dentry); 1735 if (!IS_ERR(dentry)) { 1736 if (!IS_POSIXACL(nd.dentry->d_inode)) 1737 mode &= ~current->fs->umask; 1738 error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); 1739 dput(dentry); 1740 } 1741 up(&nd.dentry->d_inode->i_sem); 1742 path_release(&nd); 1743 out: 1744 putname(tmp); 1745 } 1746 1747 return error; 1748 } 1749 1750 /* 1751 * We try to drop the dentry early: we should have 1752 * a usage count of 2 if we're the only user of this 1753 * dentry, and if that is true (possibly after pruning 1754 * the dcache), then we drop the dentry now. 1755 * 1756 * A low-level filesystem can, if it choses, legally 1757 * do a 1758 * 1759 * if (!d_unhashed(dentry)) 1760 * return -EBUSY; 1761 * 1762 * if it cannot handle the case of removing a directory 1763 * that is still in use by something else.. 1764 */ 1765 void dentry_unhash(struct dentry *dentry) 1766 { 1767 dget(dentry); 1768 if (atomic_read(&dentry->d_count)) 1769 shrink_dcache_parent(dentry); 1770 spin_lock(&dcache_lock); 1771 spin_lock(&dentry->d_lock); 1772 if (atomic_read(&dentry->d_count) == 2) 1773 __d_drop(dentry); 1774 spin_unlock(&dentry->d_lock); 1775 spin_unlock(&dcache_lock); 1776 } 1777 1778 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 1779 { 1780 int error = may_delete(dir, dentry, 1); 1781 1782 if (error) 1783 return error; 1784 1785 if (!dir->i_op || !dir->i_op->rmdir) 1786 return -EPERM; 1787 1788 DQUOT_INIT(dir); 1789 1790 down(&dentry->d_inode->i_sem); 1791 dentry_unhash(dentry); 1792 if (d_mountpoint(dentry)) 1793 error = -EBUSY; 1794 else { 1795 error = security_inode_rmdir(dir, dentry); 1796 if (!error) { 1797 error = dir->i_op->rmdir(dir, dentry); 1798 if (!error) 1799 dentry->d_inode->i_flags |= S_DEAD; 1800 } 1801 } 1802 up(&dentry->d_inode->i_sem); 1803 if (!error) { 1804 fsnotify_rmdir(dentry, dentry->d_inode, dir); 1805 d_delete(dentry); 1806 } 1807 dput(dentry); 1808 1809 return error; 1810 } 1811 1812 asmlinkage long sys_rmdir(const char __user * pathname) 1813 { 1814 int error = 0; 1815 char * name; 1816 struct dentry *dentry; 1817 struct nameidata nd; 1818 1819 name = getname(pathname); 1820 if(IS_ERR(name)) 1821 return PTR_ERR(name); 1822 1823 error = path_lookup(name, LOOKUP_PARENT, &nd); 1824 if (error) 1825 goto exit; 1826 1827 switch(nd.last_type) { 1828 case LAST_DOTDOT: 1829 error = -ENOTEMPTY; 1830 goto exit1; 1831 case LAST_DOT: 1832 error = -EINVAL; 1833 goto exit1; 1834 case LAST_ROOT: 1835 error = -EBUSY; 1836 goto exit1; 1837 } 1838 down(&nd.dentry->d_inode->i_sem); 1839 dentry = lookup_hash(&nd.last, nd.dentry); 1840 error = PTR_ERR(dentry); 1841 if (!IS_ERR(dentry)) { 1842 error = vfs_rmdir(nd.dentry->d_inode, dentry); 1843 dput(dentry); 1844 } 1845 up(&nd.dentry->d_inode->i_sem); 1846 exit1: 1847 path_release(&nd); 1848 exit: 1849 putname(name); 1850 return error; 1851 } 1852 1853 int vfs_unlink(struct inode *dir, struct dentry *dentry) 1854 { 1855 int error = may_delete(dir, dentry, 0); 1856 1857 if (error) 1858 return error; 1859 1860 if (!dir->i_op || !dir->i_op->unlink) 1861 return -EPERM; 1862 1863 DQUOT_INIT(dir); 1864 1865 down(&dentry->d_inode->i_sem); 1866 if (d_mountpoint(dentry)) 1867 error = -EBUSY; 1868 else { 1869 error = security_inode_unlink(dir, dentry); 1870 if (!error) 1871 error = dir->i_op->unlink(dir, dentry); 1872 } 1873 up(&dentry->d_inode->i_sem); 1874 1875 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 1876 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 1877 fsnotify_unlink(dentry, dir); 1878 d_delete(dentry); 1879 } 1880 1881 return error; 1882 } 1883 1884 /* 1885 * Make sure that the actual truncation of the file will occur outside its 1886 * directory's i_sem. Truncate can take a long time if there is a lot of 1887 * writeout happening, and we don't want to prevent access to the directory 1888 * while waiting on the I/O. 1889 */ 1890 asmlinkage long sys_unlink(const char __user * pathname) 1891 { 1892 int error = 0; 1893 char * name; 1894 struct dentry *dentry; 1895 struct nameidata nd; 1896 struct inode *inode = NULL; 1897 1898 name = getname(pathname); 1899 if(IS_ERR(name)) 1900 return PTR_ERR(name); 1901 1902 error = path_lookup(name, LOOKUP_PARENT, &nd); 1903 if (error) 1904 goto exit; 1905 error = -EISDIR; 1906 if (nd.last_type != LAST_NORM) 1907 goto exit1; 1908 down(&nd.dentry->d_inode->i_sem); 1909 dentry = lookup_hash(&nd.last, nd.dentry); 1910 error = PTR_ERR(dentry); 1911 if (!IS_ERR(dentry)) { 1912 /* Why not before? Because we want correct error value */ 1913 if (nd.last.name[nd.last.len]) 1914 goto slashes; 1915 inode = dentry->d_inode; 1916 if (inode) 1917 atomic_inc(&inode->i_count); 1918 error = vfs_unlink(nd.dentry->d_inode, dentry); 1919 exit2: 1920 dput(dentry); 1921 } 1922 up(&nd.dentry->d_inode->i_sem); 1923 if (inode) 1924 iput(inode); /* truncate the inode here */ 1925 exit1: 1926 path_release(&nd); 1927 exit: 1928 putname(name); 1929 return error; 1930 1931 slashes: 1932 error = !dentry->d_inode ? -ENOENT : 1933 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 1934 goto exit2; 1935 } 1936 1937 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 1938 { 1939 int error = may_create(dir, dentry, NULL); 1940 1941 if (error) 1942 return error; 1943 1944 if (!dir->i_op || !dir->i_op->symlink) 1945 return -EPERM; 1946 1947 error = security_inode_symlink(dir, dentry, oldname); 1948 if (error) 1949 return error; 1950 1951 DQUOT_INIT(dir); 1952 error = dir->i_op->symlink(dir, dentry, oldname); 1953 if (!error) { 1954 fsnotify_create(dir, dentry->d_name.name); 1955 security_inode_post_symlink(dir, dentry, oldname); 1956 } 1957 return error; 1958 } 1959 1960 asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname) 1961 { 1962 int error = 0; 1963 char * from; 1964 char * to; 1965 1966 from = getname(oldname); 1967 if(IS_ERR(from)) 1968 return PTR_ERR(from); 1969 to = getname(newname); 1970 error = PTR_ERR(to); 1971 if (!IS_ERR(to)) { 1972 struct dentry *dentry; 1973 struct nameidata nd; 1974 1975 error = path_lookup(to, LOOKUP_PARENT, &nd); 1976 if (error) 1977 goto out; 1978 dentry = lookup_create(&nd, 0); 1979 error = PTR_ERR(dentry); 1980 if (!IS_ERR(dentry)) { 1981 error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); 1982 dput(dentry); 1983 } 1984 up(&nd.dentry->d_inode->i_sem); 1985 path_release(&nd); 1986 out: 1987 putname(to); 1988 } 1989 putname(from); 1990 return error; 1991 } 1992 1993 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 1994 { 1995 struct inode *inode = old_dentry->d_inode; 1996 int error; 1997 1998 if (!inode) 1999 return -ENOENT; 2000 2001 error = may_create(dir, new_dentry, NULL); 2002 if (error) 2003 return error; 2004 2005 if (dir->i_sb != inode->i_sb) 2006 return -EXDEV; 2007 2008 /* 2009 * A link to an append-only or immutable file cannot be created. 2010 */ 2011 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2012 return -EPERM; 2013 if (!dir->i_op || !dir->i_op->link) 2014 return -EPERM; 2015 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2016 return -EPERM; 2017 2018 error = security_inode_link(old_dentry, dir, new_dentry); 2019 if (error) 2020 return error; 2021 2022 down(&old_dentry->d_inode->i_sem); 2023 DQUOT_INIT(dir); 2024 error = dir->i_op->link(old_dentry, dir, new_dentry); 2025 up(&old_dentry->d_inode->i_sem); 2026 if (!error) { 2027 fsnotify_create(dir, new_dentry->d_name.name); 2028 security_inode_post_link(old_dentry, dir, new_dentry); 2029 } 2030 return error; 2031 } 2032 2033 /* 2034 * Hardlinks are often used in delicate situations. We avoid 2035 * security-related surprises by not following symlinks on the 2036 * newname. --KAB 2037 * 2038 * We don't follow them on the oldname either to be compatible 2039 * with linux 2.0, and to avoid hard-linking to directories 2040 * and other special files. --ADM 2041 */ 2042 asmlinkage long sys_link(const char __user * oldname, const char __user * newname) 2043 { 2044 struct dentry *new_dentry; 2045 struct nameidata nd, old_nd; 2046 int error; 2047 char * to; 2048 2049 to = getname(newname); 2050 if (IS_ERR(to)) 2051 return PTR_ERR(to); 2052 2053 error = __user_walk(oldname, 0, &old_nd); 2054 if (error) 2055 goto exit; 2056 error = path_lookup(to, LOOKUP_PARENT, &nd); 2057 if (error) 2058 goto out; 2059 error = -EXDEV; 2060 if (old_nd.mnt != nd.mnt) 2061 goto out_release; 2062 new_dentry = lookup_create(&nd, 0); 2063 error = PTR_ERR(new_dentry); 2064 if (!IS_ERR(new_dentry)) { 2065 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); 2066 dput(new_dentry); 2067 } 2068 up(&nd.dentry->d_inode->i_sem); 2069 out_release: 2070 path_release(&nd); 2071 out: 2072 path_release(&old_nd); 2073 exit: 2074 putname(to); 2075 2076 return error; 2077 } 2078 2079 /* 2080 * The worst of all namespace operations - renaming directory. "Perverted" 2081 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2082 * Problems: 2083 * a) we can get into loop creation. Check is done in is_subdir(). 2084 * b) race potential - two innocent renames can create a loop together. 2085 * That's where 4.4 screws up. Current fix: serialization on 2086 * sb->s_vfs_rename_sem. We might be more accurate, but that's another 2087 * story. 2088 * c) we have to lock _three_ objects - parents and victim (if it exists). 2089 * And that - after we got ->i_sem on parents (until then we don't know 2090 * whether the target exists). Solution: try to be smart with locking 2091 * order for inodes. We rely on the fact that tree topology may change 2092 * only under ->s_vfs_rename_sem _and_ that parent of the object we 2093 * move will be locked. Thus we can rank directories by the tree 2094 * (ancestors first) and rank all non-directories after them. 2095 * That works since everybody except rename does "lock parent, lookup, 2096 * lock child" and rename is under ->s_vfs_rename_sem. 2097 * HOWEVER, it relies on the assumption that any object with ->lookup() 2098 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2099 * we'd better make sure that there's no link(2) for them. 2100 * d) some filesystems don't support opened-but-unlinked directories, 2101 * either because of layout or because they are not ready to deal with 2102 * all cases correctly. The latter will be fixed (taking this sort of 2103 * stuff into VFS), but the former is not going away. Solution: the same 2104 * trick as in rmdir(). 2105 * e) conversion from fhandle to dentry may come in the wrong moment - when 2106 * we are removing the target. Solution: we will have to grab ->i_sem 2107 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2108 * ->i_sem on parents, which works but leads to some truely excessive 2109 * locking]. 2110 */ 2111 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2112 struct inode *new_dir, struct dentry *new_dentry) 2113 { 2114 int error = 0; 2115 struct inode *target; 2116 2117 /* 2118 * If we are going to change the parent - check write permissions, 2119 * we'll need to flip '..'. 2120 */ 2121 if (new_dir != old_dir) { 2122 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2123 if (error) 2124 return error; 2125 } 2126 2127 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2128 if (error) 2129 return error; 2130 2131 target = new_dentry->d_inode; 2132 if (target) { 2133 down(&target->i_sem); 2134 dentry_unhash(new_dentry); 2135 } 2136 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2137 error = -EBUSY; 2138 else 2139 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2140 if (target) { 2141 if (!error) 2142 target->i_flags |= S_DEAD; 2143 up(&target->i_sem); 2144 if (d_unhashed(new_dentry)) 2145 d_rehash(new_dentry); 2146 dput(new_dentry); 2147 } 2148 if (!error) { 2149 d_move(old_dentry,new_dentry); 2150 security_inode_post_rename(old_dir, old_dentry, 2151 new_dir, new_dentry); 2152 } 2153 return error; 2154 } 2155 2156 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2157 struct inode *new_dir, struct dentry *new_dentry) 2158 { 2159 struct inode *target; 2160 int error; 2161 2162 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2163 if (error) 2164 return error; 2165 2166 dget(new_dentry); 2167 target = new_dentry->d_inode; 2168 if (target) 2169 down(&target->i_sem); 2170 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2171 error = -EBUSY; 2172 else 2173 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2174 if (!error) { 2175 /* The following d_move() should become unconditional */ 2176 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) 2177 d_move(old_dentry, new_dentry); 2178 security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry); 2179 } 2180 if (target) 2181 up(&target->i_sem); 2182 dput(new_dentry); 2183 return error; 2184 } 2185 2186 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2187 struct inode *new_dir, struct dentry *new_dentry) 2188 { 2189 int error; 2190 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2191 const char *old_name; 2192 2193 if (old_dentry->d_inode == new_dentry->d_inode) 2194 return 0; 2195 2196 error = may_delete(old_dir, old_dentry, is_dir); 2197 if (error) 2198 return error; 2199 2200 if (!new_dentry->d_inode) 2201 error = may_create(new_dir, new_dentry, NULL); 2202 else 2203 error = may_delete(new_dir, new_dentry, is_dir); 2204 if (error) 2205 return error; 2206 2207 if (!old_dir->i_op || !old_dir->i_op->rename) 2208 return -EPERM; 2209 2210 DQUOT_INIT(old_dir); 2211 DQUOT_INIT(new_dir); 2212 2213 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2214 2215 if (is_dir) 2216 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2217 else 2218 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2219 if (!error) { 2220 const char *new_name = old_dentry->d_name.name; 2221 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir); 2222 } 2223 fsnotify_oldname_free(old_name); 2224 2225 return error; 2226 } 2227 2228 static inline int do_rename(const char * oldname, const char * newname) 2229 { 2230 int error = 0; 2231 struct dentry * old_dir, * new_dir; 2232 struct dentry * old_dentry, *new_dentry; 2233 struct dentry * trap; 2234 struct nameidata oldnd, newnd; 2235 2236 error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); 2237 if (error) 2238 goto exit; 2239 2240 error = path_lookup(newname, LOOKUP_PARENT, &newnd); 2241 if (error) 2242 goto exit1; 2243 2244 error = -EXDEV; 2245 if (oldnd.mnt != newnd.mnt) 2246 goto exit2; 2247 2248 old_dir = oldnd.dentry; 2249 error = -EBUSY; 2250 if (oldnd.last_type != LAST_NORM) 2251 goto exit2; 2252 2253 new_dir = newnd.dentry; 2254 if (newnd.last_type != LAST_NORM) 2255 goto exit2; 2256 2257 trap = lock_rename(new_dir, old_dir); 2258 2259 old_dentry = lookup_hash(&oldnd.last, old_dir); 2260 error = PTR_ERR(old_dentry); 2261 if (IS_ERR(old_dentry)) 2262 goto exit3; 2263 /* source must exist */ 2264 error = -ENOENT; 2265 if (!old_dentry->d_inode) 2266 goto exit4; 2267 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2268 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2269 error = -ENOTDIR; 2270 if (oldnd.last.name[oldnd.last.len]) 2271 goto exit4; 2272 if (newnd.last.name[newnd.last.len]) 2273 goto exit4; 2274 } 2275 /* source should not be ancestor of target */ 2276 error = -EINVAL; 2277 if (old_dentry == trap) 2278 goto exit4; 2279 new_dentry = lookup_hash(&newnd.last, new_dir); 2280 error = PTR_ERR(new_dentry); 2281 if (IS_ERR(new_dentry)) 2282 goto exit4; 2283 /* target should not be an ancestor of source */ 2284 error = -ENOTEMPTY; 2285 if (new_dentry == trap) 2286 goto exit5; 2287 2288 error = vfs_rename(old_dir->d_inode, old_dentry, 2289 new_dir->d_inode, new_dentry); 2290 exit5: 2291 dput(new_dentry); 2292 exit4: 2293 dput(old_dentry); 2294 exit3: 2295 unlock_rename(new_dir, old_dir); 2296 exit2: 2297 path_release(&newnd); 2298 exit1: 2299 path_release(&oldnd); 2300 exit: 2301 return error; 2302 } 2303 2304 asmlinkage long sys_rename(const char __user * oldname, const char __user * newname) 2305 { 2306 int error; 2307 char * from; 2308 char * to; 2309 2310 from = getname(oldname); 2311 if(IS_ERR(from)) 2312 return PTR_ERR(from); 2313 to = getname(newname); 2314 error = PTR_ERR(to); 2315 if (!IS_ERR(to)) { 2316 error = do_rename(from,to); 2317 putname(to); 2318 } 2319 putname(from); 2320 return error; 2321 } 2322 2323 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2324 { 2325 int len; 2326 2327 len = PTR_ERR(link); 2328 if (IS_ERR(link)) 2329 goto out; 2330 2331 len = strlen(link); 2332 if (len > (unsigned) buflen) 2333 len = buflen; 2334 if (copy_to_user(buffer, link, len)) 2335 len = -EFAULT; 2336 out: 2337 return len; 2338 } 2339 2340 /* 2341 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2342 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2343 * using) it for any given inode is up to filesystem. 2344 */ 2345 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2346 { 2347 struct nameidata nd; 2348 int res; 2349 nd.depth = 0; 2350 res = dentry->d_inode->i_op->follow_link(dentry, &nd); 2351 if (!res) { 2352 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2353 if (dentry->d_inode->i_op->put_link) 2354 dentry->d_inode->i_op->put_link(dentry, &nd); 2355 } 2356 return res; 2357 } 2358 2359 int vfs_follow_link(struct nameidata *nd, const char *link) 2360 { 2361 return __vfs_follow_link(nd, link); 2362 } 2363 2364 /* get the link contents into pagecache */ 2365 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2366 { 2367 struct page * page; 2368 struct address_space *mapping = dentry->d_inode->i_mapping; 2369 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, 2370 NULL); 2371 if (IS_ERR(page)) 2372 goto sync_fail; 2373 wait_on_page_locked(page); 2374 if (!PageUptodate(page)) 2375 goto async_fail; 2376 *ppage = page; 2377 return kmap(page); 2378 2379 async_fail: 2380 page_cache_release(page); 2381 return ERR_PTR(-EIO); 2382 2383 sync_fail: 2384 return (char*)page; 2385 } 2386 2387 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2388 { 2389 struct page *page = NULL; 2390 char *s = page_getlink(dentry, &page); 2391 int res = vfs_readlink(dentry,buffer,buflen,s); 2392 if (page) { 2393 kunmap(page); 2394 page_cache_release(page); 2395 } 2396 return res; 2397 } 2398 2399 int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2400 { 2401 struct page *page; 2402 nd_set_link(nd, page_getlink(dentry, &page)); 2403 return 0; 2404 } 2405 2406 void page_put_link(struct dentry *dentry, struct nameidata *nd) 2407 { 2408 if (!IS_ERR(nd_get_link(nd))) { 2409 struct page *page; 2410 page = find_get_page(dentry->d_inode->i_mapping, 0); 2411 if (!page) 2412 BUG(); 2413 kunmap(page); 2414 page_cache_release(page); 2415 page_cache_release(page); 2416 } 2417 } 2418 2419 int page_symlink(struct inode *inode, const char *symname, int len) 2420 { 2421 struct address_space *mapping = inode->i_mapping; 2422 struct page *page = grab_cache_page(mapping, 0); 2423 int err = -ENOMEM; 2424 char *kaddr; 2425 2426 if (!page) 2427 goto fail; 2428 err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); 2429 if (err) 2430 goto fail_map; 2431 kaddr = kmap_atomic(page, KM_USER0); 2432 memcpy(kaddr, symname, len-1); 2433 kunmap_atomic(kaddr, KM_USER0); 2434 mapping->a_ops->commit_write(NULL, page, 0, len-1); 2435 /* 2436 * Notice that we are _not_ going to block here - end of page is 2437 * unmapped, so this will only try to map the rest of page, see 2438 * that it is unmapped (typically even will not look into inode - 2439 * ->i_size will be enough for everything) and zero it out. 2440 * OTOH it's obviously correct and should make the page up-to-date. 2441 */ 2442 if (!PageUptodate(page)) { 2443 err = mapping->a_ops->readpage(NULL, page); 2444 wait_on_page_locked(page); 2445 } else { 2446 unlock_page(page); 2447 } 2448 page_cache_release(page); 2449 if (err < 0) 2450 goto fail; 2451 mark_inode_dirty(inode); 2452 return 0; 2453 fail_map: 2454 unlock_page(page); 2455 page_cache_release(page); 2456 fail: 2457 return err; 2458 } 2459 2460 struct inode_operations page_symlink_inode_operations = { 2461 .readlink = generic_readlink, 2462 .follow_link = page_follow_link_light, 2463 .put_link = page_put_link, 2464 }; 2465 2466 EXPORT_SYMBOL(__user_walk); 2467 EXPORT_SYMBOL(follow_down); 2468 EXPORT_SYMBOL(follow_up); 2469 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2470 EXPORT_SYMBOL(getname); 2471 EXPORT_SYMBOL(lock_rename); 2472 EXPORT_SYMBOL(lookup_hash); 2473 EXPORT_SYMBOL(lookup_one_len); 2474 EXPORT_SYMBOL(page_follow_link_light); 2475 EXPORT_SYMBOL(page_put_link); 2476 EXPORT_SYMBOL(page_readlink); 2477 EXPORT_SYMBOL(page_symlink); 2478 EXPORT_SYMBOL(page_symlink_inode_operations); 2479 EXPORT_SYMBOL(path_lookup); 2480 EXPORT_SYMBOL(path_release); 2481 EXPORT_SYMBOL(path_walk); 2482 EXPORT_SYMBOL(permission); 2483 EXPORT_SYMBOL(unlock_rename); 2484 EXPORT_SYMBOL(vfs_create); 2485 EXPORT_SYMBOL(vfs_follow_link); 2486 EXPORT_SYMBOL(vfs_link); 2487 EXPORT_SYMBOL(vfs_mkdir); 2488 EXPORT_SYMBOL(vfs_mknod); 2489 EXPORT_SYMBOL(generic_permission); 2490 EXPORT_SYMBOL(vfs_readlink); 2491 EXPORT_SYMBOL(vfs_rename); 2492 EXPORT_SYMBOL(vfs_rmdir); 2493 EXPORT_SYMBOL(vfs_symlink); 2494 EXPORT_SYMBOL(vfs_unlink); 2495 EXPORT_SYMBOL(dentry_unhash); 2496 EXPORT_SYMBOL(generic_readlink); 2497