1 /* 2 * linux/fs/namei.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 /* 8 * Some corrections by tytso. 9 */ 10 11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname 12 * lookup logic. 13 */ 14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. 15 */ 16 17 #include <linux/init.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/fs.h> 21 #include <linux/namei.h> 22 #include <linux/quotaops.h> 23 #include <linux/pagemap.h> 24 #include <linux/fsnotify.h> 25 #include <linux/personality.h> 26 #include <linux/security.h> 27 #include <linux/syscalls.h> 28 #include <linux/mount.h> 29 #include <linux/audit.h> 30 #include <linux/capability.h> 31 #include <linux/file.h> 32 #include <linux/fcntl.h> 33 #include <linux/device_cgroup.h> 34 #include <asm/namei.h> 35 #include <asm/uaccess.h> 36 37 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 38 39 /* [Feb-1997 T. Schoebel-Theuer] 40 * Fundamental changes in the pathname lookup mechanisms (namei) 41 * were necessary because of omirr. The reason is that omirr needs 42 * to know the _real_ pathname, not the user-supplied one, in case 43 * of symlinks (and also when transname replacements occur). 44 * 45 * The new code replaces the old recursive symlink resolution with 46 * an iterative one (in case of non-nested symlink chains). It does 47 * this with calls to <fs>_follow_link(). 48 * As a side effect, dir_namei(), _namei() and follow_link() are now 49 * replaced with a single function lookup_dentry() that can handle all 50 * the special cases of the former code. 51 * 52 * With the new dcache, the pathname is stored at each inode, at least as 53 * long as the refcount of the inode is positive. As a side effect, the 54 * size of the dcache depends on the inode cache and thus is dynamic. 55 * 56 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink 57 * resolution to correspond with current state of the code. 58 * 59 * Note that the symlink resolution is not *completely* iterative. 60 * There is still a significant amount of tail- and mid- recursion in 61 * the algorithm. Also, note that <fs>_readlink() is not used in 62 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() 63 * may return different results than <fs>_follow_link(). Many virtual 64 * filesystems (including /proc) exhibit this behavior. 65 */ 66 67 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: 68 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL 69 * and the name already exists in form of a symlink, try to create the new 70 * name indicated by the symlink. The old code always complained that the 71 * name already exists, due to not following the symlink even if its target 72 * is nonexistent. The new semantics affects also mknod() and link() when 73 * the name is a symlink pointing to a non-existant name. 74 * 75 * I don't know which semantics is the right one, since I have no access 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 77 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the 78 * "old" one. Personally, I think the new semantics is much more logical. 79 * Note that "ln old new" where "new" is a symlink pointing to a non-existing 80 * file does succeed in both HP-UX and SunOs, but not in Solaris 81 * and in the old Linux semantics. 82 */ 83 84 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink 85 * semantics. See the comments in "open_namei" and "do_link" below. 86 * 87 * [10-Sep-98 Alan Modra] Another symlink change. 88 */ 89 90 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: 91 * inside the path - always follow. 92 * in the last component in creation/removal/renaming - never follow. 93 * if LOOKUP_FOLLOW passed - follow. 94 * if the pathname has trailing slashes - follow. 95 * otherwise - don't follow. 96 * (applied in that order). 97 * 98 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT 99 * restored for 2.4. This is the last surviving part of old 4.2BSD bug. 100 * During the 2.4 we need to fix the userland stuff depending on it - 101 * hopefully we will be able to get rid of that wart in 2.5. So far only 102 * XEmacs seems to be relying on it... 103 */ 104 /* 105 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) 106 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives 107 * any extra contention... 108 */ 109 110 static int __link_path_walk(const char *name, struct nameidata *nd); 111 112 /* In order to reduce some races, while at the same time doing additional 113 * checking and hopefully speeding things up, we copy filenames to the 114 * kernel data space before using them.. 115 * 116 * POSIX.1 2.4: an empty pathname is invalid (ENOENT). 117 * PATH_MAX includes the nul terminator --RR. 118 */ 119 static int do_getname(const char __user *filename, char *page) 120 { 121 int retval; 122 unsigned long len = PATH_MAX; 123 124 if (!segment_eq(get_fs(), KERNEL_DS)) { 125 if ((unsigned long) filename >= TASK_SIZE) 126 return -EFAULT; 127 if (TASK_SIZE - (unsigned long) filename < PATH_MAX) 128 len = TASK_SIZE - (unsigned long) filename; 129 } 130 131 retval = strncpy_from_user(page, filename, len); 132 if (retval > 0) { 133 if (retval < len) 134 return 0; 135 return -ENAMETOOLONG; 136 } else if (!retval) 137 retval = -ENOENT; 138 return retval; 139 } 140 141 char * getname(const char __user * filename) 142 { 143 char *tmp, *result; 144 145 result = ERR_PTR(-ENOMEM); 146 tmp = __getname(); 147 if (tmp) { 148 int retval = do_getname(filename, tmp); 149 150 result = tmp; 151 if (retval < 0) { 152 __putname(tmp); 153 result = ERR_PTR(retval); 154 } 155 } 156 audit_getname(result); 157 return result; 158 } 159 160 #ifdef CONFIG_AUDITSYSCALL 161 void putname(const char *name) 162 { 163 if (unlikely(!audit_dummy_context())) 164 audit_putname(name); 165 else 166 __putname(name); 167 } 168 EXPORT_SYMBOL(putname); 169 #endif 170 171 172 /** 173 * generic_permission - check for access rights on a Posix-like filesystem 174 * @inode: inode to check access rights for 175 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 176 * @check_acl: optional callback to check for Posix ACLs 177 * 178 * Used to check for read/write/execute permissions on a file. 179 * We use "fsuid" for this, letting us set arbitrary permissions 180 * for filesystem access without changing the "normal" uids which 181 * are used for other things.. 182 */ 183 int generic_permission(struct inode *inode, int mask, 184 int (*check_acl)(struct inode *inode, int mask)) 185 { 186 umode_t mode = inode->i_mode; 187 188 if (current->fsuid == inode->i_uid) 189 mode >>= 6; 190 else { 191 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 192 int error = check_acl(inode, mask); 193 if (error == -EACCES) 194 goto check_capabilities; 195 else if (error != -EAGAIN) 196 return error; 197 } 198 199 if (in_group_p(inode->i_gid)) 200 mode >>= 3; 201 } 202 203 /* 204 * If the DACs are ok we don't need any capability check. 205 */ 206 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 207 return 0; 208 209 check_capabilities: 210 /* 211 * Read/write DACs are always overridable. 212 * Executable DACs are overridable if at least one exec bit is set. 213 */ 214 if (!(mask & MAY_EXEC) || 215 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 216 if (capable(CAP_DAC_OVERRIDE)) 217 return 0; 218 219 /* 220 * Searching includes executable on directories, else just read. 221 */ 222 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 223 if (capable(CAP_DAC_READ_SEARCH)) 224 return 0; 225 226 return -EACCES; 227 } 228 229 int permission(struct inode *inode, int mask, struct nameidata *nd) 230 { 231 int retval, submask; 232 struct vfsmount *mnt = NULL; 233 234 if (nd) 235 mnt = nd->path.mnt; 236 237 if (mask & MAY_WRITE) { 238 umode_t mode = inode->i_mode; 239 240 /* 241 * Nobody gets write access to a read-only fs. 242 */ 243 if (IS_RDONLY(inode) && 244 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) 245 return -EROFS; 246 247 /* 248 * Nobody gets write access to an immutable file. 249 */ 250 if (IS_IMMUTABLE(inode)) 251 return -EACCES; 252 } 253 254 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 255 /* 256 * MAY_EXEC on regular files is denied if the fs is mounted 257 * with the "noexec" flag. 258 */ 259 if (mnt && (mnt->mnt_flags & MNT_NOEXEC)) 260 return -EACCES; 261 } 262 263 /* Ordinary permission routines do not understand MAY_APPEND. */ 264 submask = mask & ~MAY_APPEND; 265 if (inode->i_op && inode->i_op->permission) { 266 retval = inode->i_op->permission(inode, submask, nd); 267 if (!retval) { 268 /* 269 * Exec permission on a regular file is denied if none 270 * of the execute bits are set. 271 * 272 * This check should be done by the ->permission() 273 * method. 274 */ 275 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && 276 !(inode->i_mode & S_IXUGO)) 277 return -EACCES; 278 } 279 } else { 280 retval = generic_permission(inode, submask, NULL); 281 } 282 if (retval) 283 return retval; 284 285 retval = devcgroup_inode_permission(inode, mask); 286 if (retval) 287 return retval; 288 289 return security_inode_permission(inode, mask, nd); 290 } 291 292 /** 293 * vfs_permission - check for access rights to a given path 294 * @nd: lookup result that describes the path 295 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 296 * 297 * Used to check for read/write/execute permissions on a path. 298 * We use "fsuid" for this, letting us set arbitrary permissions 299 * for filesystem access without changing the "normal" uids which 300 * are used for other things. 301 */ 302 int vfs_permission(struct nameidata *nd, int mask) 303 { 304 return permission(nd->path.dentry->d_inode, mask, nd); 305 } 306 307 /** 308 * file_permission - check for additional access rights to a given file 309 * @file: file to check access rights for 310 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 311 * 312 * Used to check for read/write/execute permissions on an already opened 313 * file. 314 * 315 * Note: 316 * Do not use this function in new code. All access checks should 317 * be done using vfs_permission(). 318 */ 319 int file_permission(struct file *file, int mask) 320 { 321 return permission(file->f_path.dentry->d_inode, mask, NULL); 322 } 323 324 /* 325 * get_write_access() gets write permission for a file. 326 * put_write_access() releases this write permission. 327 * This is used for regular files. 328 * We cannot support write (and maybe mmap read-write shared) accesses and 329 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode 330 * can have the following values: 331 * 0: no writers, no VM_DENYWRITE mappings 332 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist 333 * > 0: (i_writecount) users are writing to the file. 334 * 335 * Normally we operate on that counter with atomic_{inc,dec} and it's safe 336 * except for the cases where we don't hold i_writecount yet. Then we need to 337 * use {get,deny}_write_access() - these functions check the sign and refuse 338 * to do the change if sign is wrong. Exclusion between them is provided by 339 * the inode->i_lock spinlock. 340 */ 341 342 int get_write_access(struct inode * inode) 343 { 344 spin_lock(&inode->i_lock); 345 if (atomic_read(&inode->i_writecount) < 0) { 346 spin_unlock(&inode->i_lock); 347 return -ETXTBSY; 348 } 349 atomic_inc(&inode->i_writecount); 350 spin_unlock(&inode->i_lock); 351 352 return 0; 353 } 354 355 int deny_write_access(struct file * file) 356 { 357 struct inode *inode = file->f_path.dentry->d_inode; 358 359 spin_lock(&inode->i_lock); 360 if (atomic_read(&inode->i_writecount) > 0) { 361 spin_unlock(&inode->i_lock); 362 return -ETXTBSY; 363 } 364 atomic_dec(&inode->i_writecount); 365 spin_unlock(&inode->i_lock); 366 367 return 0; 368 } 369 370 /** 371 * path_get - get a reference to a path 372 * @path: path to get the reference to 373 * 374 * Given a path increment the reference count to the dentry and the vfsmount. 375 */ 376 void path_get(struct path *path) 377 { 378 mntget(path->mnt); 379 dget(path->dentry); 380 } 381 EXPORT_SYMBOL(path_get); 382 383 /** 384 * path_put - put a reference to a path 385 * @path: path to put the reference to 386 * 387 * Given a path decrement the reference count to the dentry and the vfsmount. 388 */ 389 void path_put(struct path *path) 390 { 391 dput(path->dentry); 392 mntput(path->mnt); 393 } 394 EXPORT_SYMBOL(path_put); 395 396 /** 397 * release_open_intent - free up open intent resources 398 * @nd: pointer to nameidata 399 */ 400 void release_open_intent(struct nameidata *nd) 401 { 402 if (nd->intent.open.file->f_path.dentry == NULL) 403 put_filp(nd->intent.open.file); 404 else 405 fput(nd->intent.open.file); 406 } 407 408 static inline struct dentry * 409 do_revalidate(struct dentry *dentry, struct nameidata *nd) 410 { 411 int status = dentry->d_op->d_revalidate(dentry, nd); 412 if (unlikely(status <= 0)) { 413 /* 414 * The dentry failed validation. 415 * If d_revalidate returned 0 attempt to invalidate 416 * the dentry otherwise d_revalidate is asking us 417 * to return a fail status. 418 */ 419 if (!status) { 420 if (!d_invalidate(dentry)) { 421 dput(dentry); 422 dentry = NULL; 423 } 424 } else { 425 dput(dentry); 426 dentry = ERR_PTR(status); 427 } 428 } 429 return dentry; 430 } 431 432 /* 433 * Internal lookup() using the new generic dcache. 434 * SMP-safe 435 */ 436 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 437 { 438 struct dentry * dentry = __d_lookup(parent, name); 439 440 /* lockess __d_lookup may fail due to concurrent d_move() 441 * in some unrelated directory, so try with d_lookup 442 */ 443 if (!dentry) 444 dentry = d_lookup(parent, name); 445 446 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 447 dentry = do_revalidate(dentry, nd); 448 449 return dentry; 450 } 451 452 /* 453 * Short-cut version of permission(), for calling by 454 * path_walk(), when dcache lock is held. Combines parts 455 * of permission() and generic_permission(), and tests ONLY for 456 * MAY_EXEC permission. 457 * 458 * If appropriate, check DAC only. If not appropriate, or 459 * short-cut DAC fails, then call permission() to do more 460 * complete permission check. 461 */ 462 static int exec_permission_lite(struct inode *inode, 463 struct nameidata *nd) 464 { 465 umode_t mode = inode->i_mode; 466 467 if (inode->i_op && inode->i_op->permission) 468 return -EAGAIN; 469 470 if (current->fsuid == inode->i_uid) 471 mode >>= 6; 472 else if (in_group_p(inode->i_gid)) 473 mode >>= 3; 474 475 if (mode & MAY_EXEC) 476 goto ok; 477 478 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) 479 goto ok; 480 481 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) 482 goto ok; 483 484 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 485 goto ok; 486 487 return -EACCES; 488 ok: 489 return security_inode_permission(inode, MAY_EXEC, nd); 490 } 491 492 /* 493 * This is called when everything else fails, and we actually have 494 * to go to the low-level filesystem to find out what we should do.. 495 * 496 * We get the directory semaphore, and after getting that we also 497 * make sure that nobody added the entry to the dcache in the meantime.. 498 * SMP-safe 499 */ 500 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) 501 { 502 struct dentry * result; 503 struct inode *dir = parent->d_inode; 504 505 mutex_lock(&dir->i_mutex); 506 /* 507 * First re-do the cached lookup just in case it was created 508 * while we waited for the directory semaphore.. 509 * 510 * FIXME! This could use version numbering or similar to 511 * avoid unnecessary cache lookups. 512 * 513 * The "dcache_lock" is purely to protect the RCU list walker 514 * from concurrent renames at this point (we mustn't get false 515 * negatives from the RCU list walk here, unlike the optimistic 516 * fast walk). 517 * 518 * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup 519 */ 520 result = d_lookup(parent, name); 521 if (!result) { 522 struct dentry * dentry = d_alloc(parent, name); 523 result = ERR_PTR(-ENOMEM); 524 if (dentry) { 525 result = dir->i_op->lookup(dir, dentry, nd); 526 if (result) 527 dput(dentry); 528 else 529 result = dentry; 530 } 531 mutex_unlock(&dir->i_mutex); 532 return result; 533 } 534 535 /* 536 * Uhhuh! Nasty case: the cache was re-populated while 537 * we waited on the semaphore. Need to revalidate. 538 */ 539 mutex_unlock(&dir->i_mutex); 540 if (result->d_op && result->d_op->d_revalidate) { 541 result = do_revalidate(result, nd); 542 if (!result) 543 result = ERR_PTR(-ENOENT); 544 } 545 return result; 546 } 547 548 static int __emul_lookup_dentry(const char *, struct nameidata *); 549 550 /* SMP-safe */ 551 static __always_inline int 552 walk_init_root(const char *name, struct nameidata *nd) 553 { 554 struct fs_struct *fs = current->fs; 555 556 read_lock(&fs->lock); 557 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { 558 nd->path = fs->altroot; 559 path_get(&fs->altroot); 560 read_unlock(&fs->lock); 561 if (__emul_lookup_dentry(name,nd)) 562 return 0; 563 read_lock(&fs->lock); 564 } 565 nd->path = fs->root; 566 path_get(&fs->root); 567 read_unlock(&fs->lock); 568 return 1; 569 } 570 571 /* 572 * Wrapper to retry pathname resolution whenever the underlying 573 * file system returns an ESTALE. 574 * 575 * Retry the whole path once, forcing real lookup requests 576 * instead of relying on the dcache. 577 */ 578 static __always_inline int link_path_walk(const char *name, struct nameidata *nd) 579 { 580 struct path save = nd->path; 581 int result; 582 583 /* make sure the stuff we saved doesn't go away */ 584 path_get(&save); 585 586 result = __link_path_walk(name, nd); 587 if (result == -ESTALE) { 588 /* nd->path had been dropped */ 589 nd->path = save; 590 path_get(&nd->path); 591 nd->flags |= LOOKUP_REVAL; 592 result = __link_path_walk(name, nd); 593 } 594 595 path_put(&save); 596 597 return result; 598 } 599 600 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 601 { 602 int res = 0; 603 char *name; 604 if (IS_ERR(link)) 605 goto fail; 606 607 if (*link == '/') { 608 path_put(&nd->path); 609 if (!walk_init_root(link, nd)) 610 /* weird __emul_prefix() stuff did it */ 611 goto out; 612 } 613 res = link_path_walk(link, nd); 614 out: 615 if (nd->depth || res || nd->last_type!=LAST_NORM) 616 return res; 617 /* 618 * If it is an iterative symlinks resolution in open_namei() we 619 * have to copy the last component. And all that crap because of 620 * bloody create() on broken symlinks. Furrfu... 621 */ 622 name = __getname(); 623 if (unlikely(!name)) { 624 path_put(&nd->path); 625 return -ENOMEM; 626 } 627 strcpy(name, nd->last.name); 628 nd->last.name = name; 629 return 0; 630 fail: 631 path_put(&nd->path); 632 return PTR_ERR(link); 633 } 634 635 static void path_put_conditional(struct path *path, struct nameidata *nd) 636 { 637 dput(path->dentry); 638 if (path->mnt != nd->path.mnt) 639 mntput(path->mnt); 640 } 641 642 static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 643 { 644 dput(nd->path.dentry); 645 if (nd->path.mnt != path->mnt) 646 mntput(nd->path.mnt); 647 nd->path.mnt = path->mnt; 648 nd->path.dentry = path->dentry; 649 } 650 651 static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 652 { 653 int error; 654 void *cookie; 655 struct dentry *dentry = path->dentry; 656 657 touch_atime(path->mnt, dentry); 658 nd_set_link(nd, NULL); 659 660 if (path->mnt != nd->path.mnt) { 661 path_to_nameidata(path, nd); 662 dget(dentry); 663 } 664 mntget(path->mnt); 665 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 666 error = PTR_ERR(cookie); 667 if (!IS_ERR(cookie)) { 668 char *s = nd_get_link(nd); 669 error = 0; 670 if (s) 671 error = __vfs_follow_link(nd, s); 672 if (dentry->d_inode->i_op->put_link) 673 dentry->d_inode->i_op->put_link(dentry, nd, cookie); 674 } 675 path_put(path); 676 677 return error; 678 } 679 680 /* 681 * This limits recursive symlink follows to 8, while 682 * limiting consecutive symlinks to 40. 683 * 684 * Without that kind of total limit, nasty chains of consecutive 685 * symlinks can cause almost arbitrarily long lookups. 686 */ 687 static inline int do_follow_link(struct path *path, struct nameidata *nd) 688 { 689 int err = -ELOOP; 690 if (current->link_count >= MAX_NESTED_LINKS) 691 goto loop; 692 if (current->total_link_count >= 40) 693 goto loop; 694 BUG_ON(nd->depth >= MAX_NESTED_LINKS); 695 cond_resched(); 696 err = security_inode_follow_link(path->dentry, nd); 697 if (err) 698 goto loop; 699 current->link_count++; 700 current->total_link_count++; 701 nd->depth++; 702 err = __do_follow_link(path, nd); 703 current->link_count--; 704 nd->depth--; 705 return err; 706 loop: 707 path_put_conditional(path, nd); 708 path_put(&nd->path); 709 return err; 710 } 711 712 int follow_up(struct vfsmount **mnt, struct dentry **dentry) 713 { 714 struct vfsmount *parent; 715 struct dentry *mountpoint; 716 spin_lock(&vfsmount_lock); 717 parent=(*mnt)->mnt_parent; 718 if (parent == *mnt) { 719 spin_unlock(&vfsmount_lock); 720 return 0; 721 } 722 mntget(parent); 723 mountpoint=dget((*mnt)->mnt_mountpoint); 724 spin_unlock(&vfsmount_lock); 725 dput(*dentry); 726 *dentry = mountpoint; 727 mntput(*mnt); 728 *mnt = parent; 729 return 1; 730 } 731 732 /* no need for dcache_lock, as serialization is taken care in 733 * namespace.c 734 */ 735 static int __follow_mount(struct path *path) 736 { 737 int res = 0; 738 while (d_mountpoint(path->dentry)) { 739 struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry); 740 if (!mounted) 741 break; 742 dput(path->dentry); 743 if (res) 744 mntput(path->mnt); 745 path->mnt = mounted; 746 path->dentry = dget(mounted->mnt_root); 747 res = 1; 748 } 749 return res; 750 } 751 752 static void follow_mount(struct vfsmount **mnt, struct dentry **dentry) 753 { 754 while (d_mountpoint(*dentry)) { 755 struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); 756 if (!mounted) 757 break; 758 dput(*dentry); 759 mntput(*mnt); 760 *mnt = mounted; 761 *dentry = dget(mounted->mnt_root); 762 } 763 } 764 765 /* no need for dcache_lock, as serialization is taken care in 766 * namespace.c 767 */ 768 int follow_down(struct vfsmount **mnt, struct dentry **dentry) 769 { 770 struct vfsmount *mounted; 771 772 mounted = lookup_mnt(*mnt, *dentry); 773 if (mounted) { 774 dput(*dentry); 775 mntput(*mnt); 776 *mnt = mounted; 777 *dentry = dget(mounted->mnt_root); 778 return 1; 779 } 780 return 0; 781 } 782 783 static __always_inline void follow_dotdot(struct nameidata *nd) 784 { 785 struct fs_struct *fs = current->fs; 786 787 while(1) { 788 struct vfsmount *parent; 789 struct dentry *old = nd->path.dentry; 790 791 read_lock(&fs->lock); 792 if (nd->path.dentry == fs->root.dentry && 793 nd->path.mnt == fs->root.mnt) { 794 read_unlock(&fs->lock); 795 break; 796 } 797 read_unlock(&fs->lock); 798 spin_lock(&dcache_lock); 799 if (nd->path.dentry != nd->path.mnt->mnt_root) { 800 nd->path.dentry = dget(nd->path.dentry->d_parent); 801 spin_unlock(&dcache_lock); 802 dput(old); 803 break; 804 } 805 spin_unlock(&dcache_lock); 806 spin_lock(&vfsmount_lock); 807 parent = nd->path.mnt->mnt_parent; 808 if (parent == nd->path.mnt) { 809 spin_unlock(&vfsmount_lock); 810 break; 811 } 812 mntget(parent); 813 nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint); 814 spin_unlock(&vfsmount_lock); 815 dput(old); 816 mntput(nd->path.mnt); 817 nd->path.mnt = parent; 818 } 819 follow_mount(&nd->path.mnt, &nd->path.dentry); 820 } 821 822 /* 823 * It's more convoluted than I'd like it to be, but... it's still fairly 824 * small and for now I'd prefer to have fast path as straight as possible. 825 * It _is_ time-critical. 826 */ 827 static int do_lookup(struct nameidata *nd, struct qstr *name, 828 struct path *path) 829 { 830 struct vfsmount *mnt = nd->path.mnt; 831 struct dentry *dentry = __d_lookup(nd->path.dentry, name); 832 833 if (!dentry) 834 goto need_lookup; 835 if (dentry->d_op && dentry->d_op->d_revalidate) 836 goto need_revalidate; 837 done: 838 path->mnt = mnt; 839 path->dentry = dentry; 840 __follow_mount(path); 841 return 0; 842 843 need_lookup: 844 dentry = real_lookup(nd->path.dentry, name, nd); 845 if (IS_ERR(dentry)) 846 goto fail; 847 goto done; 848 849 need_revalidate: 850 dentry = do_revalidate(dentry, nd); 851 if (!dentry) 852 goto need_lookup; 853 if (IS_ERR(dentry)) 854 goto fail; 855 goto done; 856 857 fail: 858 return PTR_ERR(dentry); 859 } 860 861 /* 862 * Name resolution. 863 * This is the basic name resolution function, turning a pathname into 864 * the final dentry. We expect 'base' to be positive and a directory. 865 * 866 * Returns 0 and nd will have valid dentry and mnt on success. 867 * Returns error and drops reference to input namei data on failure. 868 */ 869 static int __link_path_walk(const char *name, struct nameidata *nd) 870 { 871 struct path next; 872 struct inode *inode; 873 int err; 874 unsigned int lookup_flags = nd->flags; 875 876 while (*name=='/') 877 name++; 878 if (!*name) 879 goto return_reval; 880 881 inode = nd->path.dentry->d_inode; 882 if (nd->depth) 883 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 884 885 /* At this point we know we have a real path component. */ 886 for(;;) { 887 unsigned long hash; 888 struct qstr this; 889 unsigned int c; 890 891 nd->flags |= LOOKUP_CONTINUE; 892 err = exec_permission_lite(inode, nd); 893 if (err == -EAGAIN) 894 err = vfs_permission(nd, MAY_EXEC); 895 if (err) 896 break; 897 898 this.name = name; 899 c = *(const unsigned char *)name; 900 901 hash = init_name_hash(); 902 do { 903 name++; 904 hash = partial_name_hash(c, hash); 905 c = *(const unsigned char *)name; 906 } while (c && (c != '/')); 907 this.len = name - (const char *) this.name; 908 this.hash = end_name_hash(hash); 909 910 /* remove trailing slashes? */ 911 if (!c) 912 goto last_component; 913 while (*++name == '/'); 914 if (!*name) 915 goto last_with_slashes; 916 917 /* 918 * "." and ".." are special - ".." especially so because it has 919 * to be able to know about the current root directory and 920 * parent relationships. 921 */ 922 if (this.name[0] == '.') switch (this.len) { 923 default: 924 break; 925 case 2: 926 if (this.name[1] != '.') 927 break; 928 follow_dotdot(nd); 929 inode = nd->path.dentry->d_inode; 930 /* fallthrough */ 931 case 1: 932 continue; 933 } 934 /* 935 * See if the low-level filesystem might want 936 * to use its own hash.. 937 */ 938 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 939 err = nd->path.dentry->d_op->d_hash(nd->path.dentry, 940 &this); 941 if (err < 0) 942 break; 943 } 944 /* This does the actual lookups.. */ 945 err = do_lookup(nd, &this, &next); 946 if (err) 947 break; 948 949 err = -ENOENT; 950 inode = next.dentry->d_inode; 951 if (!inode) 952 goto out_dput; 953 err = -ENOTDIR; 954 if (!inode->i_op) 955 goto out_dput; 956 957 if (inode->i_op->follow_link) { 958 err = do_follow_link(&next, nd); 959 if (err) 960 goto return_err; 961 err = -ENOENT; 962 inode = nd->path.dentry->d_inode; 963 if (!inode) 964 break; 965 err = -ENOTDIR; 966 if (!inode->i_op) 967 break; 968 } else 969 path_to_nameidata(&next, nd); 970 err = -ENOTDIR; 971 if (!inode->i_op->lookup) 972 break; 973 continue; 974 /* here ends the main loop */ 975 976 last_with_slashes: 977 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 978 last_component: 979 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 980 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 981 if (lookup_flags & LOOKUP_PARENT) 982 goto lookup_parent; 983 if (this.name[0] == '.') switch (this.len) { 984 default: 985 break; 986 case 2: 987 if (this.name[1] != '.') 988 break; 989 follow_dotdot(nd); 990 inode = nd->path.dentry->d_inode; 991 /* fallthrough */ 992 case 1: 993 goto return_reval; 994 } 995 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 996 err = nd->path.dentry->d_op->d_hash(nd->path.dentry, 997 &this); 998 if (err < 0) 999 break; 1000 } 1001 err = do_lookup(nd, &this, &next); 1002 if (err) 1003 break; 1004 inode = next.dentry->d_inode; 1005 if ((lookup_flags & LOOKUP_FOLLOW) 1006 && inode && inode->i_op && inode->i_op->follow_link) { 1007 err = do_follow_link(&next, nd); 1008 if (err) 1009 goto return_err; 1010 inode = nd->path.dentry->d_inode; 1011 } else 1012 path_to_nameidata(&next, nd); 1013 err = -ENOENT; 1014 if (!inode) 1015 break; 1016 if (lookup_flags & LOOKUP_DIRECTORY) { 1017 err = -ENOTDIR; 1018 if (!inode->i_op || !inode->i_op->lookup) 1019 break; 1020 } 1021 goto return_base; 1022 lookup_parent: 1023 nd->last = this; 1024 nd->last_type = LAST_NORM; 1025 if (this.name[0] != '.') 1026 goto return_base; 1027 if (this.len == 1) 1028 nd->last_type = LAST_DOT; 1029 else if (this.len == 2 && this.name[1] == '.') 1030 nd->last_type = LAST_DOTDOT; 1031 else 1032 goto return_base; 1033 return_reval: 1034 /* 1035 * We bypassed the ordinary revalidation routines. 1036 * We may need to check the cached dentry for staleness. 1037 */ 1038 if (nd->path.dentry && nd->path.dentry->d_sb && 1039 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { 1040 err = -ESTALE; 1041 /* Note: we do not d_invalidate() */ 1042 if (!nd->path.dentry->d_op->d_revalidate( 1043 nd->path.dentry, nd)) 1044 break; 1045 } 1046 return_base: 1047 return 0; 1048 out_dput: 1049 path_put_conditional(&next, nd); 1050 break; 1051 } 1052 path_put(&nd->path); 1053 return_err: 1054 return err; 1055 } 1056 1057 static int path_walk(const char *name, struct nameidata *nd) 1058 { 1059 current->total_link_count = 0; 1060 return link_path_walk(name, nd); 1061 } 1062 1063 /* 1064 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if 1065 * everything is done. Returns 0 and drops input nd, if lookup failed; 1066 */ 1067 static int __emul_lookup_dentry(const char *name, struct nameidata *nd) 1068 { 1069 if (path_walk(name, nd)) 1070 return 0; /* something went wrong... */ 1071 1072 if (!nd->path.dentry->d_inode || 1073 S_ISDIR(nd->path.dentry->d_inode->i_mode)) { 1074 struct path old_path = nd->path; 1075 struct qstr last = nd->last; 1076 int last_type = nd->last_type; 1077 struct fs_struct *fs = current->fs; 1078 1079 /* 1080 * NAME was not found in alternate root or it's a directory. 1081 * Try to find it in the normal root: 1082 */ 1083 nd->last_type = LAST_ROOT; 1084 read_lock(&fs->lock); 1085 nd->path = fs->root; 1086 path_get(&fs->root); 1087 read_unlock(&fs->lock); 1088 if (path_walk(name, nd) == 0) { 1089 if (nd->path.dentry->d_inode) { 1090 path_put(&old_path); 1091 return 1; 1092 } 1093 path_put(&nd->path); 1094 } 1095 nd->path = old_path; 1096 nd->last = last; 1097 nd->last_type = last_type; 1098 } 1099 return 1; 1100 } 1101 1102 void set_fs_altroot(void) 1103 { 1104 char *emul = __emul_prefix(); 1105 struct nameidata nd; 1106 struct path path = {}, old_path; 1107 int err; 1108 struct fs_struct *fs = current->fs; 1109 1110 if (!emul) 1111 goto set_it; 1112 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); 1113 if (!err) 1114 path = nd.path; 1115 set_it: 1116 write_lock(&fs->lock); 1117 old_path = fs->altroot; 1118 fs->altroot = path; 1119 write_unlock(&fs->lock); 1120 if (old_path.dentry) 1121 path_put(&old_path); 1122 } 1123 1124 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1125 static int do_path_lookup(int dfd, const char *name, 1126 unsigned int flags, struct nameidata *nd) 1127 { 1128 int retval = 0; 1129 int fput_needed; 1130 struct file *file; 1131 struct fs_struct *fs = current->fs; 1132 1133 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1134 nd->flags = flags; 1135 nd->depth = 0; 1136 1137 if (*name=='/') { 1138 read_lock(&fs->lock); 1139 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { 1140 nd->path = fs->altroot; 1141 path_get(&fs->altroot); 1142 read_unlock(&fs->lock); 1143 if (__emul_lookup_dentry(name,nd)) 1144 goto out; /* found in altroot */ 1145 read_lock(&fs->lock); 1146 } 1147 nd->path = fs->root; 1148 path_get(&fs->root); 1149 read_unlock(&fs->lock); 1150 } else if (dfd == AT_FDCWD) { 1151 read_lock(&fs->lock); 1152 nd->path = fs->pwd; 1153 path_get(&fs->pwd); 1154 read_unlock(&fs->lock); 1155 } else { 1156 struct dentry *dentry; 1157 1158 file = fget_light(dfd, &fput_needed); 1159 retval = -EBADF; 1160 if (!file) 1161 goto out_fail; 1162 1163 dentry = file->f_path.dentry; 1164 1165 retval = -ENOTDIR; 1166 if (!S_ISDIR(dentry->d_inode->i_mode)) 1167 goto fput_fail; 1168 1169 retval = file_permission(file, MAY_EXEC); 1170 if (retval) 1171 goto fput_fail; 1172 1173 nd->path = file->f_path; 1174 path_get(&file->f_path); 1175 1176 fput_light(file, fput_needed); 1177 } 1178 1179 retval = path_walk(name, nd); 1180 out: 1181 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1182 nd->path.dentry->d_inode)) 1183 audit_inode(name, nd->path.dentry); 1184 out_fail: 1185 return retval; 1186 1187 fput_fail: 1188 fput_light(file, fput_needed); 1189 goto out_fail; 1190 } 1191 1192 int path_lookup(const char *name, unsigned int flags, 1193 struct nameidata *nd) 1194 { 1195 return do_path_lookup(AT_FDCWD, name, flags, nd); 1196 } 1197 1198 /** 1199 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 1200 * @dentry: pointer to dentry of the base directory 1201 * @mnt: pointer to vfs mount of the base directory 1202 * @name: pointer to file name 1203 * @flags: lookup flags 1204 * @nd: pointer to nameidata 1205 */ 1206 int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1207 const char *name, unsigned int flags, 1208 struct nameidata *nd) 1209 { 1210 int retval; 1211 1212 /* same as do_path_lookup */ 1213 nd->last_type = LAST_ROOT; 1214 nd->flags = flags; 1215 nd->depth = 0; 1216 1217 nd->path.dentry = dentry; 1218 nd->path.mnt = mnt; 1219 path_get(&nd->path); 1220 1221 retval = path_walk(name, nd); 1222 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1223 nd->path.dentry->d_inode)) 1224 audit_inode(name, nd->path.dentry); 1225 1226 return retval; 1227 1228 } 1229 1230 static int __path_lookup_intent_open(int dfd, const char *name, 1231 unsigned int lookup_flags, struct nameidata *nd, 1232 int open_flags, int create_mode) 1233 { 1234 struct file *filp = get_empty_filp(); 1235 int err; 1236 1237 if (filp == NULL) 1238 return -ENFILE; 1239 nd->intent.open.file = filp; 1240 nd->intent.open.flags = open_flags; 1241 nd->intent.open.create_mode = create_mode; 1242 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); 1243 if (IS_ERR(nd->intent.open.file)) { 1244 if (err == 0) { 1245 err = PTR_ERR(nd->intent.open.file); 1246 path_put(&nd->path); 1247 } 1248 } else if (err != 0) 1249 release_open_intent(nd); 1250 return err; 1251 } 1252 1253 /** 1254 * path_lookup_open - lookup a file path with open intent 1255 * @dfd: the directory to use as base, or AT_FDCWD 1256 * @name: pointer to file name 1257 * @lookup_flags: lookup intent flags 1258 * @nd: pointer to nameidata 1259 * @open_flags: open intent flags 1260 */ 1261 int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, 1262 struct nameidata *nd, int open_flags) 1263 { 1264 return __path_lookup_intent_open(dfd, name, lookup_flags, nd, 1265 open_flags, 0); 1266 } 1267 1268 /** 1269 * path_lookup_create - lookup a file path with open + create intent 1270 * @dfd: the directory to use as base, or AT_FDCWD 1271 * @name: pointer to file name 1272 * @lookup_flags: lookup intent flags 1273 * @nd: pointer to nameidata 1274 * @open_flags: open intent flags 1275 * @create_mode: create intent flags 1276 */ 1277 static int path_lookup_create(int dfd, const char *name, 1278 unsigned int lookup_flags, struct nameidata *nd, 1279 int open_flags, int create_mode) 1280 { 1281 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, 1282 nd, open_flags, create_mode); 1283 } 1284 1285 int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, 1286 struct nameidata *nd, int open_flags) 1287 { 1288 char *tmp = getname(name); 1289 int err = PTR_ERR(tmp); 1290 1291 if (!IS_ERR(tmp)) { 1292 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0); 1293 putname(tmp); 1294 } 1295 return err; 1296 } 1297 1298 static struct dentry *__lookup_hash(struct qstr *name, 1299 struct dentry *base, struct nameidata *nd) 1300 { 1301 struct dentry *dentry; 1302 struct inode *inode; 1303 int err; 1304 1305 inode = base->d_inode; 1306 1307 /* 1308 * See if the low-level filesystem might want 1309 * to use its own hash.. 1310 */ 1311 if (base->d_op && base->d_op->d_hash) { 1312 err = base->d_op->d_hash(base, name); 1313 dentry = ERR_PTR(err); 1314 if (err < 0) 1315 goto out; 1316 } 1317 1318 dentry = cached_lookup(base, name, nd); 1319 if (!dentry) { 1320 struct dentry *new = d_alloc(base, name); 1321 dentry = ERR_PTR(-ENOMEM); 1322 if (!new) 1323 goto out; 1324 dentry = inode->i_op->lookup(inode, new, nd); 1325 if (!dentry) 1326 dentry = new; 1327 else 1328 dput(new); 1329 } 1330 out: 1331 return dentry; 1332 } 1333 1334 /* 1335 * Restricted form of lookup. Doesn't follow links, single-component only, 1336 * needs parent already locked. Doesn't follow mounts. 1337 * SMP-safe. 1338 */ 1339 static struct dentry *lookup_hash(struct nameidata *nd) 1340 { 1341 int err; 1342 1343 err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd); 1344 if (err) 1345 return ERR_PTR(err); 1346 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1347 } 1348 1349 static int __lookup_one_len(const char *name, struct qstr *this, 1350 struct dentry *base, int len) 1351 { 1352 unsigned long hash; 1353 unsigned int c; 1354 1355 this->name = name; 1356 this->len = len; 1357 if (!len) 1358 return -EACCES; 1359 1360 hash = init_name_hash(); 1361 while (len--) { 1362 c = *(const unsigned char *)name++; 1363 if (c == '/' || c == '\0') 1364 return -EACCES; 1365 hash = partial_name_hash(c, hash); 1366 } 1367 this->hash = end_name_hash(hash); 1368 return 0; 1369 } 1370 1371 /** 1372 * lookup_one_len - filesystem helper to lookup single pathname component 1373 * @name: pathname component to lookup 1374 * @base: base directory to lookup from 1375 * @len: maximum length @len should be interpreted to 1376 * 1377 * Note that this routine is purely a helper for filesystem usage and should 1378 * not be called by generic code. Also note that by using this function the 1379 * nameidata argument is passed to the filesystem methods and a filesystem 1380 * using this helper needs to be prepared for that. 1381 */ 1382 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1383 { 1384 int err; 1385 struct qstr this; 1386 1387 err = __lookup_one_len(name, &this, base, len); 1388 if (err) 1389 return ERR_PTR(err); 1390 1391 err = permission(base->d_inode, MAY_EXEC, NULL); 1392 if (err) 1393 return ERR_PTR(err); 1394 return __lookup_hash(&this, base, NULL); 1395 } 1396 1397 /** 1398 * lookup_one_noperm - bad hack for sysfs 1399 * @name: pathname component to lookup 1400 * @base: base directory to lookup from 1401 * 1402 * This is a variant of lookup_one_len that doesn't perform any permission 1403 * checks. It's a horrible hack to work around the braindead sysfs 1404 * architecture and should not be used anywhere else. 1405 * 1406 * DON'T USE THIS FUNCTION EVER, thanks. 1407 */ 1408 struct dentry *lookup_one_noperm(const char *name, struct dentry *base) 1409 { 1410 int err; 1411 struct qstr this; 1412 1413 err = __lookup_one_len(name, &this, base, strlen(name)); 1414 if (err) 1415 return ERR_PTR(err); 1416 return __lookup_hash(&this, base, NULL); 1417 } 1418 1419 int __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1420 struct nameidata *nd) 1421 { 1422 char *tmp = getname(name); 1423 int err = PTR_ERR(tmp); 1424 1425 if (!IS_ERR(tmp)) { 1426 err = do_path_lookup(dfd, tmp, flags, nd); 1427 putname(tmp); 1428 } 1429 return err; 1430 } 1431 1432 int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1433 { 1434 return __user_walk_fd(AT_FDCWD, name, flags, nd); 1435 } 1436 1437 /* 1438 * It's inline, so penalty for filesystems that don't use sticky bit is 1439 * minimal. 1440 */ 1441 static inline int check_sticky(struct inode *dir, struct inode *inode) 1442 { 1443 if (!(dir->i_mode & S_ISVTX)) 1444 return 0; 1445 if (inode->i_uid == current->fsuid) 1446 return 0; 1447 if (dir->i_uid == current->fsuid) 1448 return 0; 1449 return !capable(CAP_FOWNER); 1450 } 1451 1452 /* 1453 * Check whether we can remove a link victim from directory dir, check 1454 * whether the type of victim is right. 1455 * 1. We can't do it if dir is read-only (done in permission()) 1456 * 2. We should have write and exec permissions on dir 1457 * 3. We can't remove anything from append-only dir 1458 * 4. We can't do anything with immutable dir (done in permission()) 1459 * 5. If the sticky bit on dir is set we should either 1460 * a. be owner of dir, or 1461 * b. be owner of victim, or 1462 * c. have CAP_FOWNER capability 1463 * 6. If the victim is append-only or immutable we can't do antyhing with 1464 * links pointing to it. 1465 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 1466 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 1467 * 9. We can't remove a root or mountpoint. 1468 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 1469 * nfs_async_unlink(). 1470 */ 1471 static int may_delete(struct inode *dir,struct dentry *victim,int isdir) 1472 { 1473 int error; 1474 1475 if (!victim->d_inode) 1476 return -ENOENT; 1477 1478 BUG_ON(victim->d_parent->d_inode != dir); 1479 audit_inode_child(victim->d_name.name, victim, dir); 1480 1481 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1482 if (error) 1483 return error; 1484 if (IS_APPEND(dir)) 1485 return -EPERM; 1486 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1487 IS_IMMUTABLE(victim->d_inode)) 1488 return -EPERM; 1489 if (isdir) { 1490 if (!S_ISDIR(victim->d_inode->i_mode)) 1491 return -ENOTDIR; 1492 if (IS_ROOT(victim)) 1493 return -EBUSY; 1494 } else if (S_ISDIR(victim->d_inode->i_mode)) 1495 return -EISDIR; 1496 if (IS_DEADDIR(dir)) 1497 return -ENOENT; 1498 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 1499 return -EBUSY; 1500 return 0; 1501 } 1502 1503 /* Check whether we can create an object with dentry child in directory 1504 * dir. 1505 * 1. We can't do it if child already exists (open has special treatment for 1506 * this case, but since we are inlined it's OK) 1507 * 2. We can't do it if dir is read-only (done in permission()) 1508 * 3. We should have write and exec permissions on dir 1509 * 4. We can't do it if dir is immutable (done in permission()) 1510 */ 1511 static inline int may_create(struct inode *dir, struct dentry *child, 1512 struct nameidata *nd) 1513 { 1514 if (child->d_inode) 1515 return -EEXIST; 1516 if (IS_DEADDIR(dir)) 1517 return -ENOENT; 1518 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1519 } 1520 1521 /* 1522 * O_DIRECTORY translates into forcing a directory lookup. 1523 */ 1524 static inline int lookup_flags(unsigned int f) 1525 { 1526 unsigned long retval = LOOKUP_FOLLOW; 1527 1528 if (f & O_NOFOLLOW) 1529 retval &= ~LOOKUP_FOLLOW; 1530 1531 if (f & O_DIRECTORY) 1532 retval |= LOOKUP_DIRECTORY; 1533 1534 return retval; 1535 } 1536 1537 /* 1538 * p1 and p2 should be directories on the same fs. 1539 */ 1540 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) 1541 { 1542 struct dentry *p; 1543 1544 if (p1 == p2) { 1545 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1546 return NULL; 1547 } 1548 1549 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1550 1551 for (p = p1; p->d_parent != p; p = p->d_parent) { 1552 if (p->d_parent == p2) { 1553 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 1554 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 1555 return p; 1556 } 1557 } 1558 1559 for (p = p2; p->d_parent != p; p = p->d_parent) { 1560 if (p->d_parent == p1) { 1561 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1562 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1563 return p; 1564 } 1565 } 1566 1567 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1568 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1569 return NULL; 1570 } 1571 1572 void unlock_rename(struct dentry *p1, struct dentry *p2) 1573 { 1574 mutex_unlock(&p1->d_inode->i_mutex); 1575 if (p1 != p2) { 1576 mutex_unlock(&p2->d_inode->i_mutex); 1577 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1578 } 1579 } 1580 1581 int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1582 struct nameidata *nd) 1583 { 1584 int error = may_create(dir, dentry, nd); 1585 1586 if (error) 1587 return error; 1588 1589 if (!dir->i_op || !dir->i_op->create) 1590 return -EACCES; /* shouldn't it be ENOSYS? */ 1591 mode &= S_IALLUGO; 1592 mode |= S_IFREG; 1593 error = security_inode_create(dir, dentry, mode); 1594 if (error) 1595 return error; 1596 DQUOT_INIT(dir); 1597 error = dir->i_op->create(dir, dentry, mode, nd); 1598 if (!error) 1599 fsnotify_create(dir, dentry); 1600 return error; 1601 } 1602 1603 int may_open(struct nameidata *nd, int acc_mode, int flag) 1604 { 1605 struct dentry *dentry = nd->path.dentry; 1606 struct inode *inode = dentry->d_inode; 1607 int error; 1608 1609 if (!inode) 1610 return -ENOENT; 1611 1612 if (S_ISLNK(inode->i_mode)) 1613 return -ELOOP; 1614 1615 if (S_ISDIR(inode->i_mode) && (acc_mode & MAY_WRITE)) 1616 return -EISDIR; 1617 1618 /* 1619 * FIFO's, sockets and device files are special: they don't 1620 * actually live on the filesystem itself, and as such you 1621 * can write to them even if the filesystem is read-only. 1622 */ 1623 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 1624 flag &= ~O_TRUNC; 1625 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { 1626 if (nd->path.mnt->mnt_flags & MNT_NODEV) 1627 return -EACCES; 1628 1629 flag &= ~O_TRUNC; 1630 } 1631 1632 error = vfs_permission(nd, acc_mode); 1633 if (error) 1634 return error; 1635 /* 1636 * An append-only file must be opened in append mode for writing. 1637 */ 1638 if (IS_APPEND(inode)) { 1639 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1640 return -EPERM; 1641 if (flag & O_TRUNC) 1642 return -EPERM; 1643 } 1644 1645 /* O_NOATIME can only be set by the owner or superuser */ 1646 if (flag & O_NOATIME) 1647 if (!is_owner_or_cap(inode)) 1648 return -EPERM; 1649 1650 /* 1651 * Ensure there are no outstanding leases on the file. 1652 */ 1653 error = break_lease(inode, flag); 1654 if (error) 1655 return error; 1656 1657 if (flag & O_TRUNC) { 1658 error = get_write_access(inode); 1659 if (error) 1660 return error; 1661 1662 /* 1663 * Refuse to truncate files with mandatory locks held on them. 1664 */ 1665 error = locks_verify_locked(inode); 1666 if (!error) { 1667 DQUOT_INIT(inode); 1668 1669 error = do_truncate(dentry, 0, 1670 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, 1671 NULL); 1672 } 1673 put_write_access(inode); 1674 if (error) 1675 return error; 1676 } else 1677 if (flag & FMODE_WRITE) 1678 DQUOT_INIT(inode); 1679 1680 return 0; 1681 } 1682 1683 /* 1684 * Be careful about ever adding any more callers of this 1685 * function. Its flags must be in the namei format, not 1686 * what get passed to sys_open(). 1687 */ 1688 static int __open_namei_create(struct nameidata *nd, struct path *path, 1689 int flag, int mode) 1690 { 1691 int error; 1692 struct dentry *dir = nd->path.dentry; 1693 1694 if (!IS_POSIXACL(dir->d_inode)) 1695 mode &= ~current->fs->umask; 1696 error = vfs_create(dir->d_inode, path->dentry, mode, nd); 1697 mutex_unlock(&dir->d_inode->i_mutex); 1698 dput(nd->path.dentry); 1699 nd->path.dentry = path->dentry; 1700 if (error) 1701 return error; 1702 /* Don't check for write permission, don't truncate */ 1703 return may_open(nd, 0, flag & ~O_TRUNC); 1704 } 1705 1706 /* 1707 * Note that while the flag value (low two bits) for sys_open means: 1708 * 00 - read-only 1709 * 01 - write-only 1710 * 10 - read-write 1711 * 11 - special 1712 * it is changed into 1713 * 00 - no permissions needed 1714 * 01 - read-permission 1715 * 10 - write-permission 1716 * 11 - read-write 1717 * for the internal routines (ie open_namei()/follow_link() etc) 1718 * This is more logical, and also allows the 00 "no perm needed" 1719 * to be used for symlinks (where the permissions are checked 1720 * later). 1721 * 1722 */ 1723 static inline int open_to_namei_flags(int flag) 1724 { 1725 if ((flag+1) & O_ACCMODE) 1726 flag++; 1727 return flag; 1728 } 1729 1730 static int open_will_write_to_fs(int flag, struct inode *inode) 1731 { 1732 /* 1733 * We'll never write to the fs underlying 1734 * a device file. 1735 */ 1736 if (special_file(inode->i_mode)) 1737 return 0; 1738 return (flag & O_TRUNC); 1739 } 1740 1741 /* 1742 * Note that the low bits of the passed in "open_flag" 1743 * are not the same as in the local variable "flag". See 1744 * open_to_namei_flags() for more details. 1745 */ 1746 struct file *do_filp_open(int dfd, const char *pathname, 1747 int open_flag, int mode) 1748 { 1749 struct file *filp; 1750 struct nameidata nd; 1751 int acc_mode, error; 1752 struct path path; 1753 struct dentry *dir; 1754 int count = 0; 1755 int will_write; 1756 int flag = open_to_namei_flags(open_flag); 1757 1758 acc_mode = ACC_MODE(flag); 1759 1760 /* O_TRUNC implies we need access checks for write permissions */ 1761 if (flag & O_TRUNC) 1762 acc_mode |= MAY_WRITE; 1763 1764 /* Allow the LSM permission hook to distinguish append 1765 access from general write access. */ 1766 if (flag & O_APPEND) 1767 acc_mode |= MAY_APPEND; 1768 1769 /* 1770 * The simplest case - just a plain lookup. 1771 */ 1772 if (!(flag & O_CREAT)) { 1773 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1774 &nd, flag); 1775 if (error) 1776 return ERR_PTR(error); 1777 goto ok; 1778 } 1779 1780 /* 1781 * Create - we need to know the parent. 1782 */ 1783 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, 1784 &nd, flag, mode); 1785 if (error) 1786 return ERR_PTR(error); 1787 1788 /* 1789 * We have the parent and last component. First of all, check 1790 * that we are not asked to creat(2) an obvious directory - that 1791 * will not do. 1792 */ 1793 error = -EISDIR; 1794 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1795 goto exit; 1796 1797 dir = nd.path.dentry; 1798 nd.flags &= ~LOOKUP_PARENT; 1799 mutex_lock(&dir->d_inode->i_mutex); 1800 path.dentry = lookup_hash(&nd); 1801 path.mnt = nd.path.mnt; 1802 1803 do_last: 1804 error = PTR_ERR(path.dentry); 1805 if (IS_ERR(path.dentry)) { 1806 mutex_unlock(&dir->d_inode->i_mutex); 1807 goto exit; 1808 } 1809 1810 if (IS_ERR(nd.intent.open.file)) { 1811 error = PTR_ERR(nd.intent.open.file); 1812 goto exit_mutex_unlock; 1813 } 1814 1815 /* Negative dentry, just create the file */ 1816 if (!path.dentry->d_inode) { 1817 /* 1818 * This write is needed to ensure that a 1819 * ro->rw transition does not occur between 1820 * the time when the file is created and when 1821 * a permanent write count is taken through 1822 * the 'struct file' in nameidata_to_filp(). 1823 */ 1824 error = mnt_want_write(nd.path.mnt); 1825 if (error) 1826 goto exit_mutex_unlock; 1827 error = __open_namei_create(&nd, &path, flag, mode); 1828 if (error) { 1829 mnt_drop_write(nd.path.mnt); 1830 goto exit; 1831 } 1832 filp = nameidata_to_filp(&nd, open_flag); 1833 mnt_drop_write(nd.path.mnt); 1834 return filp; 1835 } 1836 1837 /* 1838 * It already exists. 1839 */ 1840 mutex_unlock(&dir->d_inode->i_mutex); 1841 audit_inode(pathname, path.dentry); 1842 1843 error = -EEXIST; 1844 if (flag & O_EXCL) 1845 goto exit_dput; 1846 1847 if (__follow_mount(&path)) { 1848 error = -ELOOP; 1849 if (flag & O_NOFOLLOW) 1850 goto exit_dput; 1851 } 1852 1853 error = -ENOENT; 1854 if (!path.dentry->d_inode) 1855 goto exit_dput; 1856 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1857 goto do_link; 1858 1859 path_to_nameidata(&path, &nd); 1860 error = -EISDIR; 1861 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1862 goto exit; 1863 ok: 1864 /* 1865 * Consider: 1866 * 1. may_open() truncates a file 1867 * 2. a rw->ro mount transition occurs 1868 * 3. nameidata_to_filp() fails due to 1869 * the ro mount. 1870 * That would be inconsistent, and should 1871 * be avoided. Taking this mnt write here 1872 * ensures that (2) can not occur. 1873 */ 1874 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); 1875 if (will_write) { 1876 error = mnt_want_write(nd.path.mnt); 1877 if (error) 1878 goto exit; 1879 } 1880 error = may_open(&nd, acc_mode, flag); 1881 if (error) { 1882 if (will_write) 1883 mnt_drop_write(nd.path.mnt); 1884 goto exit; 1885 } 1886 filp = nameidata_to_filp(&nd, open_flag); 1887 /* 1888 * It is now safe to drop the mnt write 1889 * because the filp has had a write taken 1890 * on its behalf. 1891 */ 1892 if (will_write) 1893 mnt_drop_write(nd.path.mnt); 1894 return filp; 1895 1896 exit_mutex_unlock: 1897 mutex_unlock(&dir->d_inode->i_mutex); 1898 exit_dput: 1899 path_put_conditional(&path, &nd); 1900 exit: 1901 if (!IS_ERR(nd.intent.open.file)) 1902 release_open_intent(&nd); 1903 path_put(&nd.path); 1904 return ERR_PTR(error); 1905 1906 do_link: 1907 error = -ELOOP; 1908 if (flag & O_NOFOLLOW) 1909 goto exit_dput; 1910 /* 1911 * This is subtle. Instead of calling do_follow_link() we do the 1912 * thing by hands. The reason is that this way we have zero link_count 1913 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1914 * After that we have the parent and last component, i.e. 1915 * we are in the same situation as after the first path_walk(). 1916 * Well, almost - if the last component is normal we get its copy 1917 * stored in nd->last.name and we will have to putname() it when we 1918 * are done. Procfs-like symlinks just set LAST_BIND. 1919 */ 1920 nd.flags |= LOOKUP_PARENT; 1921 error = security_inode_follow_link(path.dentry, &nd); 1922 if (error) 1923 goto exit_dput; 1924 error = __do_follow_link(&path, &nd); 1925 if (error) { 1926 /* Does someone understand code flow here? Or it is only 1927 * me so stupid? Anathema to whoever designed this non-sense 1928 * with "intent.open". 1929 */ 1930 release_open_intent(&nd); 1931 return ERR_PTR(error); 1932 } 1933 nd.flags &= ~LOOKUP_PARENT; 1934 if (nd.last_type == LAST_BIND) 1935 goto ok; 1936 error = -EISDIR; 1937 if (nd.last_type != LAST_NORM) 1938 goto exit; 1939 if (nd.last.name[nd.last.len]) { 1940 __putname(nd.last.name); 1941 goto exit; 1942 } 1943 error = -ELOOP; 1944 if (count++==32) { 1945 __putname(nd.last.name); 1946 goto exit; 1947 } 1948 dir = nd.path.dentry; 1949 mutex_lock(&dir->d_inode->i_mutex); 1950 path.dentry = lookup_hash(&nd); 1951 path.mnt = nd.path.mnt; 1952 __putname(nd.last.name); 1953 goto do_last; 1954 } 1955 1956 /** 1957 * filp_open - open file and return file pointer 1958 * 1959 * @filename: path to open 1960 * @flags: open flags as per the open(2) second argument 1961 * @mode: mode for the new file if O_CREAT is set, else ignored 1962 * 1963 * This is the helper to open a file from kernelspace if you really 1964 * have to. But in generally you should not do this, so please move 1965 * along, nothing to see here.. 1966 */ 1967 struct file *filp_open(const char *filename, int flags, int mode) 1968 { 1969 return do_filp_open(AT_FDCWD, filename, flags, mode); 1970 } 1971 EXPORT_SYMBOL(filp_open); 1972 1973 /** 1974 * lookup_create - lookup a dentry, creating it if it doesn't exist 1975 * @nd: nameidata info 1976 * @is_dir: directory flag 1977 * 1978 * Simple function to lookup and return a dentry and create it 1979 * if it doesn't exist. Is SMP-safe. 1980 * 1981 * Returns with nd->path.dentry->d_inode->i_mutex locked. 1982 */ 1983 struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1984 { 1985 struct dentry *dentry = ERR_PTR(-EEXIST); 1986 1987 mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 1988 /* 1989 * Yucky last component or no last component at all? 1990 * (foo/., foo/.., /////) 1991 */ 1992 if (nd->last_type != LAST_NORM) 1993 goto fail; 1994 nd->flags &= ~LOOKUP_PARENT; 1995 nd->flags |= LOOKUP_CREATE; 1996 nd->intent.open.flags = O_EXCL; 1997 1998 /* 1999 * Do the final lookup. 2000 */ 2001 dentry = lookup_hash(nd); 2002 if (IS_ERR(dentry)) 2003 goto fail; 2004 2005 if (dentry->d_inode) 2006 goto eexist; 2007 /* 2008 * Special case - lookup gave negative, but... we had foo/bar/ 2009 * From the vfs_mknod() POV we just have a negative dentry - 2010 * all is fine. Let's be bastards - you had / on the end, you've 2011 * been asking for (non-existent) directory. -ENOENT for you. 2012 */ 2013 if (unlikely(!is_dir && nd->last.name[nd->last.len])) { 2014 dput(dentry); 2015 dentry = ERR_PTR(-ENOENT); 2016 } 2017 return dentry; 2018 eexist: 2019 dput(dentry); 2020 dentry = ERR_PTR(-EEXIST); 2021 fail: 2022 return dentry; 2023 } 2024 EXPORT_SYMBOL_GPL(lookup_create); 2025 2026 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2027 { 2028 int error = may_create(dir, dentry, NULL); 2029 2030 if (error) 2031 return error; 2032 2033 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2034 return -EPERM; 2035 2036 if (!dir->i_op || !dir->i_op->mknod) 2037 return -EPERM; 2038 2039 error = devcgroup_inode_mknod(mode, dev); 2040 if (error) 2041 return error; 2042 2043 error = security_inode_mknod(dir, dentry, mode, dev); 2044 if (error) 2045 return error; 2046 2047 DQUOT_INIT(dir); 2048 error = dir->i_op->mknod(dir, dentry, mode, dev); 2049 if (!error) 2050 fsnotify_create(dir, dentry); 2051 return error; 2052 } 2053 2054 static int may_mknod(mode_t mode) 2055 { 2056 switch (mode & S_IFMT) { 2057 case S_IFREG: 2058 case S_IFCHR: 2059 case S_IFBLK: 2060 case S_IFIFO: 2061 case S_IFSOCK: 2062 case 0: /* zero mode translates to S_IFREG */ 2063 return 0; 2064 case S_IFDIR: 2065 return -EPERM; 2066 default: 2067 return -EINVAL; 2068 } 2069 } 2070 2071 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 2072 unsigned dev) 2073 { 2074 int error = 0; 2075 char * tmp; 2076 struct dentry * dentry; 2077 struct nameidata nd; 2078 2079 if (S_ISDIR(mode)) 2080 return -EPERM; 2081 tmp = getname(filename); 2082 if (IS_ERR(tmp)) 2083 return PTR_ERR(tmp); 2084 2085 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2086 if (error) 2087 goto out; 2088 dentry = lookup_create(&nd, 0); 2089 if (IS_ERR(dentry)) { 2090 error = PTR_ERR(dentry); 2091 goto out_unlock; 2092 } 2093 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2094 mode &= ~current->fs->umask; 2095 error = may_mknod(mode); 2096 if (error) 2097 goto out_dput; 2098 error = mnt_want_write(nd.path.mnt); 2099 if (error) 2100 goto out_dput; 2101 switch (mode & S_IFMT) { 2102 case 0: case S_IFREG: 2103 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2104 break; 2105 case S_IFCHR: case S_IFBLK: 2106 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, 2107 new_decode_dev(dev)); 2108 break; 2109 case S_IFIFO: case S_IFSOCK: 2110 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2111 break; 2112 } 2113 mnt_drop_write(nd.path.mnt); 2114 out_dput: 2115 dput(dentry); 2116 out_unlock: 2117 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2118 path_put(&nd.path); 2119 out: 2120 putname(tmp); 2121 2122 return error; 2123 } 2124 2125 asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) 2126 { 2127 return sys_mknodat(AT_FDCWD, filename, mode, dev); 2128 } 2129 2130 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2131 { 2132 int error = may_create(dir, dentry, NULL); 2133 2134 if (error) 2135 return error; 2136 2137 if (!dir->i_op || !dir->i_op->mkdir) 2138 return -EPERM; 2139 2140 mode &= (S_IRWXUGO|S_ISVTX); 2141 error = security_inode_mkdir(dir, dentry, mode); 2142 if (error) 2143 return error; 2144 2145 DQUOT_INIT(dir); 2146 error = dir->i_op->mkdir(dir, dentry, mode); 2147 if (!error) 2148 fsnotify_mkdir(dir, dentry); 2149 return error; 2150 } 2151 2152 asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) 2153 { 2154 int error = 0; 2155 char * tmp; 2156 struct dentry *dentry; 2157 struct nameidata nd; 2158 2159 tmp = getname(pathname); 2160 error = PTR_ERR(tmp); 2161 if (IS_ERR(tmp)) 2162 goto out_err; 2163 2164 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2165 if (error) 2166 goto out; 2167 dentry = lookup_create(&nd, 1); 2168 error = PTR_ERR(dentry); 2169 if (IS_ERR(dentry)) 2170 goto out_unlock; 2171 2172 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2173 mode &= ~current->fs->umask; 2174 error = mnt_want_write(nd.path.mnt); 2175 if (error) 2176 goto out_dput; 2177 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2178 mnt_drop_write(nd.path.mnt); 2179 out_dput: 2180 dput(dentry); 2181 out_unlock: 2182 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2183 path_put(&nd.path); 2184 out: 2185 putname(tmp); 2186 out_err: 2187 return error; 2188 } 2189 2190 asmlinkage long sys_mkdir(const char __user *pathname, int mode) 2191 { 2192 return sys_mkdirat(AT_FDCWD, pathname, mode); 2193 } 2194 2195 /* 2196 * We try to drop the dentry early: we should have 2197 * a usage count of 2 if we're the only user of this 2198 * dentry, and if that is true (possibly after pruning 2199 * the dcache), then we drop the dentry now. 2200 * 2201 * A low-level filesystem can, if it choses, legally 2202 * do a 2203 * 2204 * if (!d_unhashed(dentry)) 2205 * return -EBUSY; 2206 * 2207 * if it cannot handle the case of removing a directory 2208 * that is still in use by something else.. 2209 */ 2210 void dentry_unhash(struct dentry *dentry) 2211 { 2212 dget(dentry); 2213 shrink_dcache_parent(dentry); 2214 spin_lock(&dcache_lock); 2215 spin_lock(&dentry->d_lock); 2216 if (atomic_read(&dentry->d_count) == 2) 2217 __d_drop(dentry); 2218 spin_unlock(&dentry->d_lock); 2219 spin_unlock(&dcache_lock); 2220 } 2221 2222 int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2223 { 2224 int error = may_delete(dir, dentry, 1); 2225 2226 if (error) 2227 return error; 2228 2229 if (!dir->i_op || !dir->i_op->rmdir) 2230 return -EPERM; 2231 2232 DQUOT_INIT(dir); 2233 2234 mutex_lock(&dentry->d_inode->i_mutex); 2235 dentry_unhash(dentry); 2236 if (d_mountpoint(dentry)) 2237 error = -EBUSY; 2238 else { 2239 error = security_inode_rmdir(dir, dentry); 2240 if (!error) { 2241 error = dir->i_op->rmdir(dir, dentry); 2242 if (!error) 2243 dentry->d_inode->i_flags |= S_DEAD; 2244 } 2245 } 2246 mutex_unlock(&dentry->d_inode->i_mutex); 2247 if (!error) { 2248 d_delete(dentry); 2249 } 2250 dput(dentry); 2251 2252 return error; 2253 } 2254 2255 static long do_rmdir(int dfd, const char __user *pathname) 2256 { 2257 int error = 0; 2258 char * name; 2259 struct dentry *dentry; 2260 struct nameidata nd; 2261 2262 name = getname(pathname); 2263 if(IS_ERR(name)) 2264 return PTR_ERR(name); 2265 2266 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2267 if (error) 2268 goto exit; 2269 2270 switch(nd.last_type) { 2271 case LAST_DOTDOT: 2272 error = -ENOTEMPTY; 2273 goto exit1; 2274 case LAST_DOT: 2275 error = -EINVAL; 2276 goto exit1; 2277 case LAST_ROOT: 2278 error = -EBUSY; 2279 goto exit1; 2280 } 2281 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2282 dentry = lookup_hash(&nd); 2283 error = PTR_ERR(dentry); 2284 if (IS_ERR(dentry)) 2285 goto exit2; 2286 error = mnt_want_write(nd.path.mnt); 2287 if (error) 2288 goto exit3; 2289 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2290 mnt_drop_write(nd.path.mnt); 2291 exit3: 2292 dput(dentry); 2293 exit2: 2294 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2295 exit1: 2296 path_put(&nd.path); 2297 exit: 2298 putname(name); 2299 return error; 2300 } 2301 2302 asmlinkage long sys_rmdir(const char __user *pathname) 2303 { 2304 return do_rmdir(AT_FDCWD, pathname); 2305 } 2306 2307 int vfs_unlink(struct inode *dir, struct dentry *dentry) 2308 { 2309 int error = may_delete(dir, dentry, 0); 2310 2311 if (error) 2312 return error; 2313 2314 if (!dir->i_op || !dir->i_op->unlink) 2315 return -EPERM; 2316 2317 DQUOT_INIT(dir); 2318 2319 mutex_lock(&dentry->d_inode->i_mutex); 2320 if (d_mountpoint(dentry)) 2321 error = -EBUSY; 2322 else { 2323 error = security_inode_unlink(dir, dentry); 2324 if (!error) 2325 error = dir->i_op->unlink(dir, dentry); 2326 } 2327 mutex_unlock(&dentry->d_inode->i_mutex); 2328 2329 /* We don't d_delete() NFS sillyrenamed files--they still exist. */ 2330 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { 2331 fsnotify_link_count(dentry->d_inode); 2332 d_delete(dentry); 2333 } 2334 2335 return error; 2336 } 2337 2338 /* 2339 * Make sure that the actual truncation of the file will occur outside its 2340 * directory's i_mutex. Truncate can take a long time if there is a lot of 2341 * writeout happening, and we don't want to prevent access to the directory 2342 * while waiting on the I/O. 2343 */ 2344 static long do_unlinkat(int dfd, const char __user *pathname) 2345 { 2346 int error = 0; 2347 char * name; 2348 struct dentry *dentry; 2349 struct nameidata nd; 2350 struct inode *inode = NULL; 2351 2352 name = getname(pathname); 2353 if(IS_ERR(name)) 2354 return PTR_ERR(name); 2355 2356 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); 2357 if (error) 2358 goto exit; 2359 error = -EISDIR; 2360 if (nd.last_type != LAST_NORM) 2361 goto exit1; 2362 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2363 dentry = lookup_hash(&nd); 2364 error = PTR_ERR(dentry); 2365 if (!IS_ERR(dentry)) { 2366 /* Why not before? Because we want correct error value */ 2367 if (nd.last.name[nd.last.len]) 2368 goto slashes; 2369 inode = dentry->d_inode; 2370 if (inode) 2371 atomic_inc(&inode->i_count); 2372 error = mnt_want_write(nd.path.mnt); 2373 if (error) 2374 goto exit2; 2375 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2376 mnt_drop_write(nd.path.mnt); 2377 exit2: 2378 dput(dentry); 2379 } 2380 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2381 if (inode) 2382 iput(inode); /* truncate the inode here */ 2383 exit1: 2384 path_put(&nd.path); 2385 exit: 2386 putname(name); 2387 return error; 2388 2389 slashes: 2390 error = !dentry->d_inode ? -ENOENT : 2391 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; 2392 goto exit2; 2393 } 2394 2395 asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) 2396 { 2397 if ((flag & ~AT_REMOVEDIR) != 0) 2398 return -EINVAL; 2399 2400 if (flag & AT_REMOVEDIR) 2401 return do_rmdir(dfd, pathname); 2402 2403 return do_unlinkat(dfd, pathname); 2404 } 2405 2406 asmlinkage long sys_unlink(const char __user *pathname) 2407 { 2408 return do_unlinkat(AT_FDCWD, pathname); 2409 } 2410 2411 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2412 { 2413 int error = may_create(dir, dentry, NULL); 2414 2415 if (error) 2416 return error; 2417 2418 if (!dir->i_op || !dir->i_op->symlink) 2419 return -EPERM; 2420 2421 error = security_inode_symlink(dir, dentry, oldname); 2422 if (error) 2423 return error; 2424 2425 DQUOT_INIT(dir); 2426 error = dir->i_op->symlink(dir, dentry, oldname); 2427 if (!error) 2428 fsnotify_create(dir, dentry); 2429 return error; 2430 } 2431 2432 asmlinkage long sys_symlinkat(const char __user *oldname, 2433 int newdfd, const char __user *newname) 2434 { 2435 int error = 0; 2436 char * from; 2437 char * to; 2438 struct dentry *dentry; 2439 struct nameidata nd; 2440 2441 from = getname(oldname); 2442 if(IS_ERR(from)) 2443 return PTR_ERR(from); 2444 to = getname(newname); 2445 error = PTR_ERR(to); 2446 if (IS_ERR(to)) 2447 goto out_putname; 2448 2449 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2450 if (error) 2451 goto out; 2452 dentry = lookup_create(&nd, 0); 2453 error = PTR_ERR(dentry); 2454 if (IS_ERR(dentry)) 2455 goto out_unlock; 2456 2457 error = mnt_want_write(nd.path.mnt); 2458 if (error) 2459 goto out_dput; 2460 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2461 mnt_drop_write(nd.path.mnt); 2462 out_dput: 2463 dput(dentry); 2464 out_unlock: 2465 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2466 path_put(&nd.path); 2467 out: 2468 putname(to); 2469 out_putname: 2470 putname(from); 2471 return error; 2472 } 2473 2474 asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname) 2475 { 2476 return sys_symlinkat(oldname, AT_FDCWD, newname); 2477 } 2478 2479 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) 2480 { 2481 struct inode *inode = old_dentry->d_inode; 2482 int error; 2483 2484 if (!inode) 2485 return -ENOENT; 2486 2487 error = may_create(dir, new_dentry, NULL); 2488 if (error) 2489 return error; 2490 2491 if (dir->i_sb != inode->i_sb) 2492 return -EXDEV; 2493 2494 /* 2495 * A link to an append-only or immutable file cannot be created. 2496 */ 2497 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 2498 return -EPERM; 2499 if (!dir->i_op || !dir->i_op->link) 2500 return -EPERM; 2501 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2502 return -EPERM; 2503 2504 error = security_inode_link(old_dentry, dir, new_dentry); 2505 if (error) 2506 return error; 2507 2508 mutex_lock(&old_dentry->d_inode->i_mutex); 2509 DQUOT_INIT(dir); 2510 error = dir->i_op->link(old_dentry, dir, new_dentry); 2511 mutex_unlock(&old_dentry->d_inode->i_mutex); 2512 if (!error) 2513 fsnotify_link(dir, old_dentry->d_inode, new_dentry); 2514 return error; 2515 } 2516 2517 /* 2518 * Hardlinks are often used in delicate situations. We avoid 2519 * security-related surprises by not following symlinks on the 2520 * newname. --KAB 2521 * 2522 * We don't follow them on the oldname either to be compatible 2523 * with linux 2.0, and to avoid hard-linking to directories 2524 * and other special files. --ADM 2525 */ 2526 asmlinkage long sys_linkat(int olddfd, const char __user *oldname, 2527 int newdfd, const char __user *newname, 2528 int flags) 2529 { 2530 struct dentry *new_dentry; 2531 struct nameidata nd, old_nd; 2532 int error; 2533 char * to; 2534 2535 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2536 return -EINVAL; 2537 2538 to = getname(newname); 2539 if (IS_ERR(to)) 2540 return PTR_ERR(to); 2541 2542 error = __user_walk_fd(olddfd, oldname, 2543 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, 2544 &old_nd); 2545 if (error) 2546 goto exit; 2547 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2548 if (error) 2549 goto out; 2550 error = -EXDEV; 2551 if (old_nd.path.mnt != nd.path.mnt) 2552 goto out_release; 2553 new_dentry = lookup_create(&nd, 0); 2554 error = PTR_ERR(new_dentry); 2555 if (IS_ERR(new_dentry)) 2556 goto out_unlock; 2557 error = mnt_want_write(nd.path.mnt); 2558 if (error) 2559 goto out_dput; 2560 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2561 mnt_drop_write(nd.path.mnt); 2562 out_dput: 2563 dput(new_dentry); 2564 out_unlock: 2565 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2566 out_release: 2567 path_put(&nd.path); 2568 out: 2569 path_put(&old_nd.path); 2570 exit: 2571 putname(to); 2572 2573 return error; 2574 } 2575 2576 asmlinkage long sys_link(const char __user *oldname, const char __user *newname) 2577 { 2578 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 2579 } 2580 2581 /* 2582 * The worst of all namespace operations - renaming directory. "Perverted" 2583 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 2584 * Problems: 2585 * a) we can get into loop creation. Check is done in is_subdir(). 2586 * b) race potential - two innocent renames can create a loop together. 2587 * That's where 4.4 screws up. Current fix: serialization on 2588 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 2589 * story. 2590 * c) we have to lock _three_ objects - parents and victim (if it exists). 2591 * And that - after we got ->i_mutex on parents (until then we don't know 2592 * whether the target exists). Solution: try to be smart with locking 2593 * order for inodes. We rely on the fact that tree topology may change 2594 * only under ->s_vfs_rename_mutex _and_ that parent of the object we 2595 * move will be locked. Thus we can rank directories by the tree 2596 * (ancestors first) and rank all non-directories after them. 2597 * That works since everybody except rename does "lock parent, lookup, 2598 * lock child" and rename is under ->s_vfs_rename_mutex. 2599 * HOWEVER, it relies on the assumption that any object with ->lookup() 2600 * has no more than 1 dentry. If "hybrid" objects will ever appear, 2601 * we'd better make sure that there's no link(2) for them. 2602 * d) some filesystems don't support opened-but-unlinked directories, 2603 * either because of layout or because they are not ready to deal with 2604 * all cases correctly. The latter will be fixed (taking this sort of 2605 * stuff into VFS), but the former is not going away. Solution: the same 2606 * trick as in rmdir(). 2607 * e) conversion from fhandle to dentry may come in the wrong moment - when 2608 * we are removing the target. Solution: we will have to grab ->i_mutex 2609 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2610 * ->i_mutex on parents, which works but leads to some truely excessive 2611 * locking]. 2612 */ 2613 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2614 struct inode *new_dir, struct dentry *new_dentry) 2615 { 2616 int error = 0; 2617 struct inode *target; 2618 2619 /* 2620 * If we are going to change the parent - check write permissions, 2621 * we'll need to flip '..'. 2622 */ 2623 if (new_dir != old_dir) { 2624 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2625 if (error) 2626 return error; 2627 } 2628 2629 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2630 if (error) 2631 return error; 2632 2633 target = new_dentry->d_inode; 2634 if (target) { 2635 mutex_lock(&target->i_mutex); 2636 dentry_unhash(new_dentry); 2637 } 2638 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2639 error = -EBUSY; 2640 else 2641 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2642 if (target) { 2643 if (!error) 2644 target->i_flags |= S_DEAD; 2645 mutex_unlock(&target->i_mutex); 2646 if (d_unhashed(new_dentry)) 2647 d_rehash(new_dentry); 2648 dput(new_dentry); 2649 } 2650 if (!error) 2651 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2652 d_move(old_dentry,new_dentry); 2653 return error; 2654 } 2655 2656 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, 2657 struct inode *new_dir, struct dentry *new_dentry) 2658 { 2659 struct inode *target; 2660 int error; 2661 2662 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); 2663 if (error) 2664 return error; 2665 2666 dget(new_dentry); 2667 target = new_dentry->d_inode; 2668 if (target) 2669 mutex_lock(&target->i_mutex); 2670 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2671 error = -EBUSY; 2672 else 2673 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2674 if (!error) { 2675 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2676 d_move(old_dentry, new_dentry); 2677 } 2678 if (target) 2679 mutex_unlock(&target->i_mutex); 2680 dput(new_dentry); 2681 return error; 2682 } 2683 2684 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 2685 struct inode *new_dir, struct dentry *new_dentry) 2686 { 2687 int error; 2688 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2689 const char *old_name; 2690 2691 if (old_dentry->d_inode == new_dentry->d_inode) 2692 return 0; 2693 2694 error = may_delete(old_dir, old_dentry, is_dir); 2695 if (error) 2696 return error; 2697 2698 if (!new_dentry->d_inode) 2699 error = may_create(new_dir, new_dentry, NULL); 2700 else 2701 error = may_delete(new_dir, new_dentry, is_dir); 2702 if (error) 2703 return error; 2704 2705 if (!old_dir->i_op || !old_dir->i_op->rename) 2706 return -EPERM; 2707 2708 DQUOT_INIT(old_dir); 2709 DQUOT_INIT(new_dir); 2710 2711 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2712 2713 if (is_dir) 2714 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2715 else 2716 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2717 if (!error) { 2718 const char *new_name = old_dentry->d_name.name; 2719 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, 2720 new_dentry->d_inode, old_dentry); 2721 } 2722 fsnotify_oldname_free(old_name); 2723 2724 return error; 2725 } 2726 2727 static int do_rename(int olddfd, const char *oldname, 2728 int newdfd, const char *newname) 2729 { 2730 int error = 0; 2731 struct dentry * old_dir, * new_dir; 2732 struct dentry * old_dentry, *new_dentry; 2733 struct dentry * trap; 2734 struct nameidata oldnd, newnd; 2735 2736 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); 2737 if (error) 2738 goto exit; 2739 2740 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); 2741 if (error) 2742 goto exit1; 2743 2744 error = -EXDEV; 2745 if (oldnd.path.mnt != newnd.path.mnt) 2746 goto exit2; 2747 2748 old_dir = oldnd.path.dentry; 2749 error = -EBUSY; 2750 if (oldnd.last_type != LAST_NORM) 2751 goto exit2; 2752 2753 new_dir = newnd.path.dentry; 2754 if (newnd.last_type != LAST_NORM) 2755 goto exit2; 2756 2757 trap = lock_rename(new_dir, old_dir); 2758 2759 old_dentry = lookup_hash(&oldnd); 2760 error = PTR_ERR(old_dentry); 2761 if (IS_ERR(old_dentry)) 2762 goto exit3; 2763 /* source must exist */ 2764 error = -ENOENT; 2765 if (!old_dentry->d_inode) 2766 goto exit4; 2767 /* unless the source is a directory trailing slashes give -ENOTDIR */ 2768 if (!S_ISDIR(old_dentry->d_inode->i_mode)) { 2769 error = -ENOTDIR; 2770 if (oldnd.last.name[oldnd.last.len]) 2771 goto exit4; 2772 if (newnd.last.name[newnd.last.len]) 2773 goto exit4; 2774 } 2775 /* source should not be ancestor of target */ 2776 error = -EINVAL; 2777 if (old_dentry == trap) 2778 goto exit4; 2779 new_dentry = lookup_hash(&newnd); 2780 error = PTR_ERR(new_dentry); 2781 if (IS_ERR(new_dentry)) 2782 goto exit4; 2783 /* target should not be an ancestor of source */ 2784 error = -ENOTEMPTY; 2785 if (new_dentry == trap) 2786 goto exit5; 2787 2788 error = mnt_want_write(oldnd.path.mnt); 2789 if (error) 2790 goto exit5; 2791 error = vfs_rename(old_dir->d_inode, old_dentry, 2792 new_dir->d_inode, new_dentry); 2793 mnt_drop_write(oldnd.path.mnt); 2794 exit5: 2795 dput(new_dentry); 2796 exit4: 2797 dput(old_dentry); 2798 exit3: 2799 unlock_rename(new_dir, old_dir); 2800 exit2: 2801 path_put(&newnd.path); 2802 exit1: 2803 path_put(&oldnd.path); 2804 exit: 2805 return error; 2806 } 2807 2808 asmlinkage long sys_renameat(int olddfd, const char __user *oldname, 2809 int newdfd, const char __user *newname) 2810 { 2811 int error; 2812 char * from; 2813 char * to; 2814 2815 from = getname(oldname); 2816 if(IS_ERR(from)) 2817 return PTR_ERR(from); 2818 to = getname(newname); 2819 error = PTR_ERR(to); 2820 if (!IS_ERR(to)) { 2821 error = do_rename(olddfd, from, newdfd, to); 2822 putname(to); 2823 } 2824 putname(from); 2825 return error; 2826 } 2827 2828 asmlinkage long sys_rename(const char __user *oldname, const char __user *newname) 2829 { 2830 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); 2831 } 2832 2833 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 2834 { 2835 int len; 2836 2837 len = PTR_ERR(link); 2838 if (IS_ERR(link)) 2839 goto out; 2840 2841 len = strlen(link); 2842 if (len > (unsigned) buflen) 2843 len = buflen; 2844 if (copy_to_user(buffer, link, len)) 2845 len = -EFAULT; 2846 out: 2847 return len; 2848 } 2849 2850 /* 2851 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 2852 * have ->follow_link() touching nd only in nd_set_link(). Using (or not 2853 * using) it for any given inode is up to filesystem. 2854 */ 2855 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2856 { 2857 struct nameidata nd; 2858 void *cookie; 2859 int res; 2860 2861 nd.depth = 0; 2862 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 2863 if (IS_ERR(cookie)) 2864 return PTR_ERR(cookie); 2865 2866 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2867 if (dentry->d_inode->i_op->put_link) 2868 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 2869 return res; 2870 } 2871 2872 int vfs_follow_link(struct nameidata *nd, const char *link) 2873 { 2874 return __vfs_follow_link(nd, link); 2875 } 2876 2877 /* get the link contents into pagecache */ 2878 static char *page_getlink(struct dentry * dentry, struct page **ppage) 2879 { 2880 struct page * page; 2881 struct address_space *mapping = dentry->d_inode->i_mapping; 2882 page = read_mapping_page(mapping, 0, NULL); 2883 if (IS_ERR(page)) 2884 return (char*)page; 2885 *ppage = page; 2886 return kmap(page); 2887 } 2888 2889 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 2890 { 2891 struct page *page = NULL; 2892 char *s = page_getlink(dentry, &page); 2893 int res = vfs_readlink(dentry,buffer,buflen,s); 2894 if (page) { 2895 kunmap(page); 2896 page_cache_release(page); 2897 } 2898 return res; 2899 } 2900 2901 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 2902 { 2903 struct page *page = NULL; 2904 nd_set_link(nd, page_getlink(dentry, &page)); 2905 return page; 2906 } 2907 2908 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 2909 { 2910 struct page *page = cookie; 2911 2912 if (page) { 2913 kunmap(page); 2914 page_cache_release(page); 2915 } 2916 } 2917 2918 int __page_symlink(struct inode *inode, const char *symname, int len, 2919 gfp_t gfp_mask) 2920 { 2921 struct address_space *mapping = inode->i_mapping; 2922 struct page *page; 2923 void *fsdata; 2924 int err; 2925 char *kaddr; 2926 2927 retry: 2928 err = pagecache_write_begin(NULL, mapping, 0, len-1, 2929 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); 2930 if (err) 2931 goto fail; 2932 2933 kaddr = kmap_atomic(page, KM_USER0); 2934 memcpy(kaddr, symname, len-1); 2935 kunmap_atomic(kaddr, KM_USER0); 2936 2937 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, 2938 page, fsdata); 2939 if (err < 0) 2940 goto fail; 2941 if (err < len-1) 2942 goto retry; 2943 2944 mark_inode_dirty(inode); 2945 return 0; 2946 fail: 2947 return err; 2948 } 2949 2950 int page_symlink(struct inode *inode, const char *symname, int len) 2951 { 2952 return __page_symlink(inode, symname, len, 2953 mapping_gfp_mask(inode->i_mapping)); 2954 } 2955 2956 const struct inode_operations page_symlink_inode_operations = { 2957 .readlink = generic_readlink, 2958 .follow_link = page_follow_link_light, 2959 .put_link = page_put_link, 2960 }; 2961 2962 EXPORT_SYMBOL(__user_walk); 2963 EXPORT_SYMBOL(__user_walk_fd); 2964 EXPORT_SYMBOL(follow_down); 2965 EXPORT_SYMBOL(follow_up); 2966 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2967 EXPORT_SYMBOL(getname); 2968 EXPORT_SYMBOL(lock_rename); 2969 EXPORT_SYMBOL(lookup_one_len); 2970 EXPORT_SYMBOL(page_follow_link_light); 2971 EXPORT_SYMBOL(page_put_link); 2972 EXPORT_SYMBOL(page_readlink); 2973 EXPORT_SYMBOL(__page_symlink); 2974 EXPORT_SYMBOL(page_symlink); 2975 EXPORT_SYMBOL(page_symlink_inode_operations); 2976 EXPORT_SYMBOL(path_lookup); 2977 EXPORT_SYMBOL(vfs_path_lookup); 2978 EXPORT_SYMBOL(permission); 2979 EXPORT_SYMBOL(vfs_permission); 2980 EXPORT_SYMBOL(file_permission); 2981 EXPORT_SYMBOL(unlock_rename); 2982 EXPORT_SYMBOL(vfs_create); 2983 EXPORT_SYMBOL(vfs_follow_link); 2984 EXPORT_SYMBOL(vfs_link); 2985 EXPORT_SYMBOL(vfs_mkdir); 2986 EXPORT_SYMBOL(vfs_mknod); 2987 EXPORT_SYMBOL(generic_permission); 2988 EXPORT_SYMBOL(vfs_readlink); 2989 EXPORT_SYMBOL(vfs_rename); 2990 EXPORT_SYMBOL(vfs_rmdir); 2991 EXPORT_SYMBOL(vfs_symlink); 2992 EXPORT_SYMBOL(vfs_unlink); 2993 EXPORT_SYMBOL(dentry_unhash); 2994 EXPORT_SYMBOL(generic_readlink); 2995