1 /* 2 * linux/fs/namespace.c 3 * 4 * (C) Copyright Al Viro 2000, 2001 5 * Released under GPL v2. 6 * 7 * Based on code from fs/super.c, copyright Linus Torvalds and others. 8 * Heavily rewritten. 9 */ 10 11 #include <linux/config.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched.h> 15 #include <linux/smp_lock.h> 16 #include <linux/init.h> 17 #include <linux/quotaops.h> 18 #include <linux/acct.h> 19 #include <linux/module.h> 20 #include <linux/seq_file.h> 21 #include <linux/namespace.h> 22 #include <linux/namei.h> 23 #include <linux/security.h> 24 #include <linux/mount.h> 25 #include <asm/uaccess.h> 26 #include <asm/unistd.h> 27 28 extern int __init init_rootfs(void); 29 30 #ifdef CONFIG_SYSFS 31 extern int __init sysfs_init(void); 32 #else 33 static inline int sysfs_init(void) 34 { 35 return 0; 36 } 37 #endif 38 39 /* spinlock for vfsmount related operations, inplace of dcache_lock */ 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 41 42 static struct list_head *mount_hashtable; 43 static int hash_mask __read_mostly, hash_bits __read_mostly; 44 static kmem_cache_t *mnt_cache; 45 46 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 47 { 48 unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); 49 tmp += ((unsigned long) dentry / L1_CACHE_BYTES); 50 tmp = tmp + (tmp >> hash_bits); 51 return tmp & hash_mask; 52 } 53 54 struct vfsmount *alloc_vfsmnt(const char *name) 55 { 56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 57 if (mnt) { 58 memset(mnt, 0, sizeof(struct vfsmount)); 59 atomic_set(&mnt->mnt_count,1); 60 INIT_LIST_HEAD(&mnt->mnt_hash); 61 INIT_LIST_HEAD(&mnt->mnt_child); 62 INIT_LIST_HEAD(&mnt->mnt_mounts); 63 INIT_LIST_HEAD(&mnt->mnt_list); 64 INIT_LIST_HEAD(&mnt->mnt_expire); 65 if (name) { 66 int size = strlen(name)+1; 67 char *newname = kmalloc(size, GFP_KERNEL); 68 if (newname) { 69 memcpy(newname, name, size); 70 mnt->mnt_devname = newname; 71 } 72 } 73 } 74 return mnt; 75 } 76 77 void free_vfsmnt(struct vfsmount *mnt) 78 { 79 kfree(mnt->mnt_devname); 80 kmem_cache_free(mnt_cache, mnt); 81 } 82 83 /* 84 * Now, lookup_mnt increments the ref count before returning 85 * the vfsmount struct. 86 */ 87 struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 88 { 89 struct list_head * head = mount_hashtable + hash(mnt, dentry); 90 struct list_head * tmp = head; 91 struct vfsmount *p, *found = NULL; 92 93 spin_lock(&vfsmount_lock); 94 for (;;) { 95 tmp = tmp->next; 96 p = NULL; 97 if (tmp == head) 98 break; 99 p = list_entry(tmp, struct vfsmount, mnt_hash); 100 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 101 found = mntget(p); 102 break; 103 } 104 } 105 spin_unlock(&vfsmount_lock); 106 return found; 107 } 108 109 static inline int check_mnt(struct vfsmount *mnt) 110 { 111 return mnt->mnt_namespace == current->namespace; 112 } 113 114 static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 115 { 116 old_nd->dentry = mnt->mnt_mountpoint; 117 old_nd->mnt = mnt->mnt_parent; 118 mnt->mnt_parent = mnt; 119 mnt->mnt_mountpoint = mnt->mnt_root; 120 list_del_init(&mnt->mnt_child); 121 list_del_init(&mnt->mnt_hash); 122 old_nd->dentry->d_mounted--; 123 } 124 125 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 126 { 127 mnt->mnt_parent = mntget(nd->mnt); 128 mnt->mnt_mountpoint = dget(nd->dentry); 129 list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); 130 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); 131 nd->dentry->d_mounted++; 132 } 133 134 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 135 { 136 struct list_head *next = p->mnt_mounts.next; 137 if (next == &p->mnt_mounts) { 138 while (1) { 139 if (p == root) 140 return NULL; 141 next = p->mnt_child.next; 142 if (next != &p->mnt_parent->mnt_mounts) 143 break; 144 p = p->mnt_parent; 145 } 146 } 147 return list_entry(next, struct vfsmount, mnt_child); 148 } 149 150 static struct vfsmount * 151 clone_mnt(struct vfsmount *old, struct dentry *root) 152 { 153 struct super_block *sb = old->mnt_sb; 154 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 155 156 if (mnt) { 157 mnt->mnt_flags = old->mnt_flags; 158 atomic_inc(&sb->s_active); 159 mnt->mnt_sb = sb; 160 mnt->mnt_root = dget(root); 161 mnt->mnt_mountpoint = mnt->mnt_root; 162 mnt->mnt_parent = mnt; 163 mnt->mnt_namespace = current->namespace; 164 165 /* stick the duplicate mount on the same expiry list 166 * as the original if that was on one */ 167 spin_lock(&vfsmount_lock); 168 if (!list_empty(&old->mnt_expire)) 169 list_add(&mnt->mnt_expire, &old->mnt_expire); 170 spin_unlock(&vfsmount_lock); 171 } 172 return mnt; 173 } 174 175 void __mntput(struct vfsmount *mnt) 176 { 177 struct super_block *sb = mnt->mnt_sb; 178 dput(mnt->mnt_root); 179 free_vfsmnt(mnt); 180 deactivate_super(sb); 181 } 182 183 EXPORT_SYMBOL(__mntput); 184 185 /* iterator */ 186 static void *m_start(struct seq_file *m, loff_t *pos) 187 { 188 struct namespace *n = m->private; 189 struct list_head *p; 190 loff_t l = *pos; 191 192 down_read(&n->sem); 193 list_for_each(p, &n->list) 194 if (!l--) 195 return list_entry(p, struct vfsmount, mnt_list); 196 return NULL; 197 } 198 199 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 200 { 201 struct namespace *n = m->private; 202 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; 203 (*pos)++; 204 return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); 205 } 206 207 static void m_stop(struct seq_file *m, void *v) 208 { 209 struct namespace *n = m->private; 210 up_read(&n->sem); 211 } 212 213 static inline void mangle(struct seq_file *m, const char *s) 214 { 215 seq_escape(m, s, " \t\n\\"); 216 } 217 218 static int show_vfsmnt(struct seq_file *m, void *v) 219 { 220 struct vfsmount *mnt = v; 221 int err = 0; 222 static struct proc_fs_info { 223 int flag; 224 char *str; 225 } fs_info[] = { 226 { MS_SYNCHRONOUS, ",sync" }, 227 { MS_DIRSYNC, ",dirsync" }, 228 { MS_MANDLOCK, ",mand" }, 229 { MS_NOATIME, ",noatime" }, 230 { MS_NODIRATIME, ",nodiratime" }, 231 { 0, NULL } 232 }; 233 static struct proc_fs_info mnt_info[] = { 234 { MNT_NOSUID, ",nosuid" }, 235 { MNT_NODEV, ",nodev" }, 236 { MNT_NOEXEC, ",noexec" }, 237 { 0, NULL } 238 }; 239 struct proc_fs_info *fs_infop; 240 241 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 242 seq_putc(m, ' '); 243 seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); 244 seq_putc(m, ' '); 245 mangle(m, mnt->mnt_sb->s_type->name); 246 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 247 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 248 if (mnt->mnt_sb->s_flags & fs_infop->flag) 249 seq_puts(m, fs_infop->str); 250 } 251 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { 252 if (mnt->mnt_flags & fs_infop->flag) 253 seq_puts(m, fs_infop->str); 254 } 255 if (mnt->mnt_sb->s_op->show_options) 256 err = mnt->mnt_sb->s_op->show_options(m, mnt); 257 seq_puts(m, " 0 0\n"); 258 return err; 259 } 260 261 struct seq_operations mounts_op = { 262 .start = m_start, 263 .next = m_next, 264 .stop = m_stop, 265 .show = show_vfsmnt 266 }; 267 268 /** 269 * may_umount_tree - check if a mount tree is busy 270 * @mnt: root of mount tree 271 * 272 * This is called to check if a tree of mounts has any 273 * open files, pwds, chroots or sub mounts that are 274 * busy. 275 */ 276 int may_umount_tree(struct vfsmount *mnt) 277 { 278 struct list_head *next; 279 struct vfsmount *this_parent = mnt; 280 int actual_refs; 281 int minimum_refs; 282 283 spin_lock(&vfsmount_lock); 284 actual_refs = atomic_read(&mnt->mnt_count); 285 minimum_refs = 2; 286 repeat: 287 next = this_parent->mnt_mounts.next; 288 resume: 289 while (next != &this_parent->mnt_mounts) { 290 struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child); 291 292 next = next->next; 293 294 actual_refs += atomic_read(&p->mnt_count); 295 minimum_refs += 2; 296 297 if (!list_empty(&p->mnt_mounts)) { 298 this_parent = p; 299 goto repeat; 300 } 301 } 302 303 if (this_parent != mnt) { 304 next = this_parent->mnt_child.next; 305 this_parent = this_parent->mnt_parent; 306 goto resume; 307 } 308 spin_unlock(&vfsmount_lock); 309 310 if (actual_refs > minimum_refs) 311 return -EBUSY; 312 313 return 0; 314 } 315 316 EXPORT_SYMBOL(may_umount_tree); 317 318 /** 319 * may_umount - check if a mount point is busy 320 * @mnt: root of mount 321 * 322 * This is called to check if a mount point has any 323 * open files, pwds, chroots or sub mounts. If the 324 * mount has sub mounts this will return busy 325 * regardless of whether the sub mounts are busy. 326 * 327 * Doesn't take quota and stuff into account. IOW, in some cases it will 328 * give false negatives. The main reason why it's here is that we need 329 * a non-destructive way to look for easily umountable filesystems. 330 */ 331 int may_umount(struct vfsmount *mnt) 332 { 333 if (atomic_read(&mnt->mnt_count) > 2) 334 return -EBUSY; 335 return 0; 336 } 337 338 EXPORT_SYMBOL(may_umount); 339 340 static void umount_tree(struct vfsmount *mnt) 341 { 342 struct vfsmount *p; 343 LIST_HEAD(kill); 344 345 for (p = mnt; p; p = next_mnt(p, mnt)) { 346 list_del(&p->mnt_list); 347 list_add(&p->mnt_list, &kill); 348 p->mnt_namespace = NULL; 349 } 350 351 while (!list_empty(&kill)) { 352 mnt = list_entry(kill.next, struct vfsmount, mnt_list); 353 list_del_init(&mnt->mnt_list); 354 list_del_init(&mnt->mnt_expire); 355 if (mnt->mnt_parent == mnt) { 356 spin_unlock(&vfsmount_lock); 357 } else { 358 struct nameidata old_nd; 359 detach_mnt(mnt, &old_nd); 360 spin_unlock(&vfsmount_lock); 361 path_release(&old_nd); 362 } 363 mntput(mnt); 364 spin_lock(&vfsmount_lock); 365 } 366 } 367 368 static int do_umount(struct vfsmount *mnt, int flags) 369 { 370 struct super_block * sb = mnt->mnt_sb; 371 int retval; 372 373 retval = security_sb_umount(mnt, flags); 374 if (retval) 375 return retval; 376 377 /* 378 * Allow userspace to request a mountpoint be expired rather than 379 * unmounting unconditionally. Unmount only happens if: 380 * (1) the mark is already set (the mark is cleared by mntput()) 381 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 382 */ 383 if (flags & MNT_EXPIRE) { 384 if (mnt == current->fs->rootmnt || 385 flags & (MNT_FORCE | MNT_DETACH)) 386 return -EINVAL; 387 388 if (atomic_read(&mnt->mnt_count) != 2) 389 return -EBUSY; 390 391 if (!xchg(&mnt->mnt_expiry_mark, 1)) 392 return -EAGAIN; 393 } 394 395 /* 396 * If we may have to abort operations to get out of this 397 * mount, and they will themselves hold resources we must 398 * allow the fs to do things. In the Unix tradition of 399 * 'Gee thats tricky lets do it in userspace' the umount_begin 400 * might fail to complete on the first run through as other tasks 401 * must return, and the like. Thats for the mount program to worry 402 * about for the moment. 403 */ 404 405 lock_kernel(); 406 if( (flags&MNT_FORCE) && sb->s_op->umount_begin) 407 sb->s_op->umount_begin(sb); 408 unlock_kernel(); 409 410 /* 411 * No sense to grab the lock for this test, but test itself looks 412 * somewhat bogus. Suggestions for better replacement? 413 * Ho-hum... In principle, we might treat that as umount + switch 414 * to rootfs. GC would eventually take care of the old vfsmount. 415 * Actually it makes sense, especially if rootfs would contain a 416 * /reboot - static binary that would close all descriptors and 417 * call reboot(9). Then init(8) could umount root and exec /reboot. 418 */ 419 if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) { 420 /* 421 * Special case for "unmounting" root ... 422 * we just try to remount it readonly. 423 */ 424 down_write(&sb->s_umount); 425 if (!(sb->s_flags & MS_RDONLY)) { 426 lock_kernel(); 427 DQUOT_OFF(sb); 428 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); 429 unlock_kernel(); 430 } 431 up_write(&sb->s_umount); 432 return retval; 433 } 434 435 down_write(¤t->namespace->sem); 436 spin_lock(&vfsmount_lock); 437 438 if (atomic_read(&sb->s_active) == 1) { 439 /* last instance - try to be smart */ 440 spin_unlock(&vfsmount_lock); 441 lock_kernel(); 442 DQUOT_OFF(sb); 443 acct_auto_close(sb); 444 unlock_kernel(); 445 security_sb_umount_close(mnt); 446 spin_lock(&vfsmount_lock); 447 } 448 retval = -EBUSY; 449 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { 450 if (!list_empty(&mnt->mnt_list)) 451 umount_tree(mnt); 452 retval = 0; 453 } 454 spin_unlock(&vfsmount_lock); 455 if (retval) 456 security_sb_umount_busy(mnt); 457 up_write(¤t->namespace->sem); 458 return retval; 459 } 460 461 /* 462 * Now umount can handle mount points as well as block devices. 463 * This is important for filesystems which use unnamed block devices. 464 * 465 * We now support a flag for forced unmount like the other 'big iron' 466 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD 467 */ 468 469 asmlinkage long sys_umount(char __user * name, int flags) 470 { 471 struct nameidata nd; 472 int retval; 473 474 retval = __user_walk(name, LOOKUP_FOLLOW, &nd); 475 if (retval) 476 goto out; 477 retval = -EINVAL; 478 if (nd.dentry != nd.mnt->mnt_root) 479 goto dput_and_out; 480 if (!check_mnt(nd.mnt)) 481 goto dput_and_out; 482 483 retval = -EPERM; 484 if (!capable(CAP_SYS_ADMIN)) 485 goto dput_and_out; 486 487 retval = do_umount(nd.mnt, flags); 488 dput_and_out: 489 path_release_on_umount(&nd); 490 out: 491 return retval; 492 } 493 494 #ifdef __ARCH_WANT_SYS_OLDUMOUNT 495 496 /* 497 * The 2.0 compatible umount. No flags. 498 */ 499 500 asmlinkage long sys_oldumount(char __user * name) 501 { 502 return sys_umount(name,0); 503 } 504 505 #endif 506 507 static int mount_is_safe(struct nameidata *nd) 508 { 509 if (capable(CAP_SYS_ADMIN)) 510 return 0; 511 return -EPERM; 512 #ifdef notyet 513 if (S_ISLNK(nd->dentry->d_inode->i_mode)) 514 return -EPERM; 515 if (nd->dentry->d_inode->i_mode & S_ISVTX) { 516 if (current->uid != nd->dentry->d_inode->i_uid) 517 return -EPERM; 518 } 519 if (permission(nd->dentry->d_inode, MAY_WRITE, nd)) 520 return -EPERM; 521 return 0; 522 #endif 523 } 524 525 static int 526 lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) 527 { 528 while (1) { 529 if (d == dentry) 530 return 1; 531 if (d == NULL || d == d->d_parent) 532 return 0; 533 d = d->d_parent; 534 } 535 } 536 537 static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) 538 { 539 struct vfsmount *res, *p, *q, *r, *s; 540 struct list_head *h; 541 struct nameidata nd; 542 543 res = q = clone_mnt(mnt, dentry); 544 if (!q) 545 goto Enomem; 546 q->mnt_mountpoint = mnt->mnt_mountpoint; 547 548 p = mnt; 549 for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) { 550 r = list_entry(h, struct vfsmount, mnt_child); 551 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 552 continue; 553 554 for (s = r; s; s = next_mnt(s, r)) { 555 while (p != s->mnt_parent) { 556 p = p->mnt_parent; 557 q = q->mnt_parent; 558 } 559 p = s; 560 nd.mnt = q; 561 nd.dentry = p->mnt_mountpoint; 562 q = clone_mnt(p, p->mnt_root); 563 if (!q) 564 goto Enomem; 565 spin_lock(&vfsmount_lock); 566 list_add_tail(&q->mnt_list, &res->mnt_list); 567 attach_mnt(q, &nd); 568 spin_unlock(&vfsmount_lock); 569 } 570 } 571 return res; 572 Enomem: 573 if (res) { 574 spin_lock(&vfsmount_lock); 575 umount_tree(res); 576 spin_unlock(&vfsmount_lock); 577 } 578 return NULL; 579 } 580 581 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) 582 { 583 int err; 584 if (mnt->mnt_sb->s_flags & MS_NOUSER) 585 return -EINVAL; 586 587 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 588 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 589 return -ENOTDIR; 590 591 err = -ENOENT; 592 down(&nd->dentry->d_inode->i_sem); 593 if (IS_DEADDIR(nd->dentry->d_inode)) 594 goto out_unlock; 595 596 err = security_sb_check_sb(mnt, nd); 597 if (err) 598 goto out_unlock; 599 600 err = -ENOENT; 601 spin_lock(&vfsmount_lock); 602 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { 603 struct list_head head; 604 605 attach_mnt(mnt, nd); 606 list_add_tail(&head, &mnt->mnt_list); 607 list_splice(&head, current->namespace->list.prev); 608 mntget(mnt); 609 err = 0; 610 } 611 spin_unlock(&vfsmount_lock); 612 out_unlock: 613 up(&nd->dentry->d_inode->i_sem); 614 if (!err) 615 security_sb_post_addmount(mnt, nd); 616 return err; 617 } 618 619 /* 620 * do loopback mount. 621 */ 622 static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 623 { 624 struct nameidata old_nd; 625 struct vfsmount *mnt = NULL; 626 int err = mount_is_safe(nd); 627 if (err) 628 return err; 629 if (!old_name || !*old_name) 630 return -EINVAL; 631 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 632 if (err) 633 return err; 634 635 down_write(¤t->namespace->sem); 636 err = -EINVAL; 637 if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) { 638 err = -ENOMEM; 639 if (recurse) 640 mnt = copy_tree(old_nd.mnt, old_nd.dentry); 641 else 642 mnt = clone_mnt(old_nd.mnt, old_nd.dentry); 643 } 644 645 if (mnt) { 646 /* stop bind mounts from expiring */ 647 spin_lock(&vfsmount_lock); 648 list_del_init(&mnt->mnt_expire); 649 spin_unlock(&vfsmount_lock); 650 651 err = graft_tree(mnt, nd); 652 if (err) { 653 spin_lock(&vfsmount_lock); 654 umount_tree(mnt); 655 spin_unlock(&vfsmount_lock); 656 } else 657 mntput(mnt); 658 } 659 660 up_write(¤t->namespace->sem); 661 path_release(&old_nd); 662 return err; 663 } 664 665 /* 666 * change filesystem flags. dir should be a physical root of filesystem. 667 * If you've mounted a non-root directory somewhere and want to do remount 668 * on it - tough luck. 669 */ 670 671 static int do_remount(struct nameidata *nd, int flags, int mnt_flags, 672 void *data) 673 { 674 int err; 675 struct super_block * sb = nd->mnt->mnt_sb; 676 677 if (!capable(CAP_SYS_ADMIN)) 678 return -EPERM; 679 680 if (!check_mnt(nd->mnt)) 681 return -EINVAL; 682 683 if (nd->dentry != nd->mnt->mnt_root) 684 return -EINVAL; 685 686 down_write(&sb->s_umount); 687 err = do_remount_sb(sb, flags, data, 0); 688 if (!err) 689 nd->mnt->mnt_flags=mnt_flags; 690 up_write(&sb->s_umount); 691 if (!err) 692 security_sb_post_remount(nd->mnt, flags, data); 693 return err; 694 } 695 696 static int do_move_mount(struct nameidata *nd, char *old_name) 697 { 698 struct nameidata old_nd, parent_nd; 699 struct vfsmount *p; 700 int err = 0; 701 if (!capable(CAP_SYS_ADMIN)) 702 return -EPERM; 703 if (!old_name || !*old_name) 704 return -EINVAL; 705 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 706 if (err) 707 return err; 708 709 down_write(¤t->namespace->sem); 710 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 711 ; 712 err = -EINVAL; 713 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 714 goto out; 715 716 err = -ENOENT; 717 down(&nd->dentry->d_inode->i_sem); 718 if (IS_DEADDIR(nd->dentry->d_inode)) 719 goto out1; 720 721 spin_lock(&vfsmount_lock); 722 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) 723 goto out2; 724 725 err = -EINVAL; 726 if (old_nd.dentry != old_nd.mnt->mnt_root) 727 goto out2; 728 729 if (old_nd.mnt == old_nd.mnt->mnt_parent) 730 goto out2; 731 732 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 733 S_ISDIR(old_nd.dentry->d_inode->i_mode)) 734 goto out2; 735 736 err = -ELOOP; 737 for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent) 738 if (p == old_nd.mnt) 739 goto out2; 740 err = 0; 741 742 detach_mnt(old_nd.mnt, &parent_nd); 743 attach_mnt(old_nd.mnt, nd); 744 745 /* if the mount is moved, it should no longer be expire 746 * automatically */ 747 list_del_init(&old_nd.mnt->mnt_expire); 748 out2: 749 spin_unlock(&vfsmount_lock); 750 out1: 751 up(&nd->dentry->d_inode->i_sem); 752 out: 753 up_write(¤t->namespace->sem); 754 if (!err) 755 path_release(&parent_nd); 756 path_release(&old_nd); 757 return err; 758 } 759 760 /* 761 * create a new mount for userspace and request it to be added into the 762 * namespace's tree 763 */ 764 static int do_new_mount(struct nameidata *nd, char *type, int flags, 765 int mnt_flags, char *name, void *data) 766 { 767 struct vfsmount *mnt; 768 769 if (!type || !memchr(type, 0, PAGE_SIZE)) 770 return -EINVAL; 771 772 /* we need capabilities... */ 773 if (!capable(CAP_SYS_ADMIN)) 774 return -EPERM; 775 776 mnt = do_kern_mount(type, flags, name, data); 777 if (IS_ERR(mnt)) 778 return PTR_ERR(mnt); 779 780 return do_add_mount(mnt, nd, mnt_flags, NULL); 781 } 782 783 /* 784 * add a mount into a namespace's mount tree 785 * - provide the option of adding the new mount to an expiration list 786 */ 787 int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, 788 int mnt_flags, struct list_head *fslist) 789 { 790 int err; 791 792 down_write(¤t->namespace->sem); 793 /* Something was mounted here while we slept */ 794 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 795 ; 796 err = -EINVAL; 797 if (!check_mnt(nd->mnt)) 798 goto unlock; 799 800 /* Refuse the same filesystem on the same mount point */ 801 err = -EBUSY; 802 if (nd->mnt->mnt_sb == newmnt->mnt_sb && 803 nd->mnt->mnt_root == nd->dentry) 804 goto unlock; 805 806 err = -EINVAL; 807 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) 808 goto unlock; 809 810 newmnt->mnt_flags = mnt_flags; 811 newmnt->mnt_namespace = current->namespace; 812 err = graft_tree(newmnt, nd); 813 814 if (err == 0 && fslist) { 815 /* add to the specified expiration list */ 816 spin_lock(&vfsmount_lock); 817 list_add_tail(&newmnt->mnt_expire, fslist); 818 spin_unlock(&vfsmount_lock); 819 } 820 821 unlock: 822 up_write(¤t->namespace->sem); 823 mntput(newmnt); 824 return err; 825 } 826 827 EXPORT_SYMBOL_GPL(do_add_mount); 828 829 static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) 830 { 831 spin_lock(&vfsmount_lock); 832 833 /* 834 * Check if mount is still attached, if not, let whoever holds it deal 835 * with the sucker 836 */ 837 if (mnt->mnt_parent == mnt) { 838 spin_unlock(&vfsmount_lock); 839 return; 840 } 841 842 /* 843 * Check that it is still dead: the count should now be 2 - as 844 * contributed by the vfsmount parent and the mntget above 845 */ 846 if (atomic_read(&mnt->mnt_count) == 2) { 847 struct nameidata old_nd; 848 849 /* delete from the namespace */ 850 list_del_init(&mnt->mnt_list); 851 mnt->mnt_namespace = NULL; 852 detach_mnt(mnt, &old_nd); 853 spin_unlock(&vfsmount_lock); 854 path_release(&old_nd); 855 856 /* 857 * Now lay it to rest if this was the last ref on the superblock 858 */ 859 if (atomic_read(&mnt->mnt_sb->s_active) == 1) { 860 /* last instance - try to be smart */ 861 lock_kernel(); 862 DQUOT_OFF(mnt->mnt_sb); 863 acct_auto_close(mnt->mnt_sb); 864 unlock_kernel(); 865 } 866 mntput(mnt); 867 } else { 868 /* 869 * Someone brought it back to life whilst we didn't have any 870 * locks held so return it to the expiration list 871 */ 872 list_add_tail(&mnt->mnt_expire, mounts); 873 spin_unlock(&vfsmount_lock); 874 } 875 } 876 877 /* 878 * process a list of expirable mountpoints with the intent of discarding any 879 * mountpoints that aren't in use and haven't been touched since last we came 880 * here 881 */ 882 void mark_mounts_for_expiry(struct list_head *mounts) 883 { 884 struct namespace *namespace; 885 struct vfsmount *mnt, *next; 886 LIST_HEAD(graveyard); 887 888 if (list_empty(mounts)) 889 return; 890 891 spin_lock(&vfsmount_lock); 892 893 /* extract from the expiration list every vfsmount that matches the 894 * following criteria: 895 * - only referenced by its parent vfsmount 896 * - still marked for expiry (marked on the last call here; marks are 897 * cleared by mntput()) 898 */ 899 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { 900 if (!xchg(&mnt->mnt_expiry_mark, 1) || 901 atomic_read(&mnt->mnt_count) != 1) 902 continue; 903 904 mntget(mnt); 905 list_move(&mnt->mnt_expire, &graveyard); 906 } 907 908 /* 909 * go through the vfsmounts we've just consigned to the graveyard to 910 * - check that they're still dead 911 * - delete the vfsmount from the appropriate namespace under lock 912 * - dispose of the corpse 913 */ 914 while (!list_empty(&graveyard)) { 915 mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); 916 list_del_init(&mnt->mnt_expire); 917 918 /* don't do anything if the namespace is dead - all the 919 * vfsmounts from it are going away anyway */ 920 namespace = mnt->mnt_namespace; 921 if (!namespace || !namespace->root) 922 continue; 923 get_namespace(namespace); 924 925 spin_unlock(&vfsmount_lock); 926 down_write(&namespace->sem); 927 expire_mount(mnt, mounts); 928 up_write(&namespace->sem); 929 930 mntput(mnt); 931 put_namespace(namespace); 932 933 spin_lock(&vfsmount_lock); 934 } 935 936 spin_unlock(&vfsmount_lock); 937 } 938 939 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 940 941 /* 942 * Some copy_from_user() implementations do not return the exact number of 943 * bytes remaining to copy on a fault. But copy_mount_options() requires that. 944 * Note that this function differs from copy_from_user() in that it will oops 945 * on bad values of `to', rather than returning a short copy. 946 */ 947 static long 948 exact_copy_from_user(void *to, const void __user *from, unsigned long n) 949 { 950 char *t = to; 951 const char __user *f = from; 952 char c; 953 954 if (!access_ok(VERIFY_READ, from, n)) 955 return n; 956 957 while (n) { 958 if (__get_user(c, f)) { 959 memset(t, 0, n); 960 break; 961 } 962 *t++ = c; 963 f++; 964 n--; 965 } 966 return n; 967 } 968 969 int copy_mount_options(const void __user *data, unsigned long *where) 970 { 971 int i; 972 unsigned long page; 973 unsigned long size; 974 975 *where = 0; 976 if (!data) 977 return 0; 978 979 if (!(page = __get_free_page(GFP_KERNEL))) 980 return -ENOMEM; 981 982 /* We only care that *some* data at the address the user 983 * gave us is valid. Just in case, we'll zero 984 * the remainder of the page. 985 */ 986 /* copy_from_user cannot cross TASK_SIZE ! */ 987 size = TASK_SIZE - (unsigned long)data; 988 if (size > PAGE_SIZE) 989 size = PAGE_SIZE; 990 991 i = size - exact_copy_from_user((void *)page, data, size); 992 if (!i) { 993 free_page(page); 994 return -EFAULT; 995 } 996 if (i != PAGE_SIZE) 997 memset((char *)page + i, 0, PAGE_SIZE - i); 998 *where = page; 999 return 0; 1000 } 1001 1002 /* 1003 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to 1004 * be given to the mount() call (ie: read-only, no-dev, no-suid etc). 1005 * 1006 * data is a (void *) that can point to any structure up to 1007 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent 1008 * information (or be NULL). 1009 * 1010 * Pre-0.97 versions of mount() didn't have a flags word. 1011 * When the flags word was introduced its top half was required 1012 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. 1013 * Therefore, if this magic number is present, it carries no information 1014 * and must be discarded. 1015 */ 1016 long do_mount(char * dev_name, char * dir_name, char *type_page, 1017 unsigned long flags, void *data_page) 1018 { 1019 struct nameidata nd; 1020 int retval = 0; 1021 int mnt_flags = 0; 1022 1023 /* Discard magic */ 1024 if ((flags & MS_MGC_MSK) == MS_MGC_VAL) 1025 flags &= ~MS_MGC_MSK; 1026 1027 /* Basic sanity checks */ 1028 1029 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) 1030 return -EINVAL; 1031 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) 1032 return -EINVAL; 1033 1034 if (data_page) 1035 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1036 1037 /* Separate the per-mountpoint flags */ 1038 if (flags & MS_NOSUID) 1039 mnt_flags |= MNT_NOSUID; 1040 if (flags & MS_NODEV) 1041 mnt_flags |= MNT_NODEV; 1042 if (flags & MS_NOEXEC) 1043 mnt_flags |= MNT_NOEXEC; 1044 flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); 1045 1046 /* ... and get the mountpoint */ 1047 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1048 if (retval) 1049 return retval; 1050 1051 retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page); 1052 if (retval) 1053 goto dput_out; 1054 1055 if (flags & MS_REMOUNT) 1056 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1057 data_page); 1058 else if (flags & MS_BIND) 1059 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1060 else if (flags & MS_MOVE) 1061 retval = do_move_mount(&nd, dev_name); 1062 else 1063 retval = do_new_mount(&nd, type_page, flags, mnt_flags, 1064 dev_name, data_page); 1065 dput_out: 1066 path_release(&nd); 1067 return retval; 1068 } 1069 1070 int copy_namespace(int flags, struct task_struct *tsk) 1071 { 1072 struct namespace *namespace = tsk->namespace; 1073 struct namespace *new_ns; 1074 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1075 struct fs_struct *fs = tsk->fs; 1076 struct vfsmount *p, *q; 1077 1078 if (!namespace) 1079 return 0; 1080 1081 get_namespace(namespace); 1082 1083 if (!(flags & CLONE_NEWNS)) 1084 return 0; 1085 1086 if (!capable(CAP_SYS_ADMIN)) { 1087 put_namespace(namespace); 1088 return -EPERM; 1089 } 1090 1091 new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL); 1092 if (!new_ns) 1093 goto out; 1094 1095 atomic_set(&new_ns->count, 1); 1096 init_rwsem(&new_ns->sem); 1097 INIT_LIST_HEAD(&new_ns->list); 1098 1099 down_write(&tsk->namespace->sem); 1100 /* First pass: copy the tree topology */ 1101 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root); 1102 if (!new_ns->root) { 1103 up_write(&tsk->namespace->sem); 1104 kfree(new_ns); 1105 goto out; 1106 } 1107 spin_lock(&vfsmount_lock); 1108 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 1109 spin_unlock(&vfsmount_lock); 1110 1111 /* 1112 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 1113 * as belonging to new namespace. We have already acquired a private 1114 * fs_struct, so tsk->fs->lock is not needed. 1115 */ 1116 p = namespace->root; 1117 q = new_ns->root; 1118 while (p) { 1119 q->mnt_namespace = new_ns; 1120 if (fs) { 1121 if (p == fs->rootmnt) { 1122 rootmnt = p; 1123 fs->rootmnt = mntget(q); 1124 } 1125 if (p == fs->pwdmnt) { 1126 pwdmnt = p; 1127 fs->pwdmnt = mntget(q); 1128 } 1129 if (p == fs->altrootmnt) { 1130 altrootmnt = p; 1131 fs->altrootmnt = mntget(q); 1132 } 1133 } 1134 p = next_mnt(p, namespace->root); 1135 q = next_mnt(q, new_ns->root); 1136 } 1137 up_write(&tsk->namespace->sem); 1138 1139 tsk->namespace = new_ns; 1140 1141 if (rootmnt) 1142 mntput(rootmnt); 1143 if (pwdmnt) 1144 mntput(pwdmnt); 1145 if (altrootmnt) 1146 mntput(altrootmnt); 1147 1148 put_namespace(namespace); 1149 return 0; 1150 1151 out: 1152 put_namespace(namespace); 1153 return -ENOMEM; 1154 } 1155 1156 asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, 1157 char __user * type, unsigned long flags, 1158 void __user * data) 1159 { 1160 int retval; 1161 unsigned long data_page; 1162 unsigned long type_page; 1163 unsigned long dev_page; 1164 char *dir_page; 1165 1166 retval = copy_mount_options (type, &type_page); 1167 if (retval < 0) 1168 return retval; 1169 1170 dir_page = getname(dir_name); 1171 retval = PTR_ERR(dir_page); 1172 if (IS_ERR(dir_page)) 1173 goto out1; 1174 1175 retval = copy_mount_options (dev_name, &dev_page); 1176 if (retval < 0) 1177 goto out2; 1178 1179 retval = copy_mount_options (data, &data_page); 1180 if (retval < 0) 1181 goto out3; 1182 1183 lock_kernel(); 1184 retval = do_mount((char*)dev_page, dir_page, (char*)type_page, 1185 flags, (void*)data_page); 1186 unlock_kernel(); 1187 free_page(data_page); 1188 1189 out3: 1190 free_page(dev_page); 1191 out2: 1192 putname(dir_page); 1193 out1: 1194 free_page(type_page); 1195 return retval; 1196 } 1197 1198 /* 1199 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. 1200 * It can block. Requires the big lock held. 1201 */ 1202 void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt, 1203 struct dentry *dentry) 1204 { 1205 struct dentry *old_root; 1206 struct vfsmount *old_rootmnt; 1207 write_lock(&fs->lock); 1208 old_root = fs->root; 1209 old_rootmnt = fs->rootmnt; 1210 fs->rootmnt = mntget(mnt); 1211 fs->root = dget(dentry); 1212 write_unlock(&fs->lock); 1213 if (old_root) { 1214 dput(old_root); 1215 mntput(old_rootmnt); 1216 } 1217 } 1218 1219 /* 1220 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. 1221 * It can block. Requires the big lock held. 1222 */ 1223 void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, 1224 struct dentry *dentry) 1225 { 1226 struct dentry *old_pwd; 1227 struct vfsmount *old_pwdmnt; 1228 1229 write_lock(&fs->lock); 1230 old_pwd = fs->pwd; 1231 old_pwdmnt = fs->pwdmnt; 1232 fs->pwdmnt = mntget(mnt); 1233 fs->pwd = dget(dentry); 1234 write_unlock(&fs->lock); 1235 1236 if (old_pwd) { 1237 dput(old_pwd); 1238 mntput(old_pwdmnt); 1239 } 1240 } 1241 1242 static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) 1243 { 1244 struct task_struct *g, *p; 1245 struct fs_struct *fs; 1246 1247 read_lock(&tasklist_lock); 1248 do_each_thread(g, p) { 1249 task_lock(p); 1250 fs = p->fs; 1251 if (fs) { 1252 atomic_inc(&fs->count); 1253 task_unlock(p); 1254 if (fs->root==old_nd->dentry&&fs->rootmnt==old_nd->mnt) 1255 set_fs_root(fs, new_nd->mnt, new_nd->dentry); 1256 if (fs->pwd==old_nd->dentry&&fs->pwdmnt==old_nd->mnt) 1257 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); 1258 put_fs_struct(fs); 1259 } else 1260 task_unlock(p); 1261 } while_each_thread(g, p); 1262 read_unlock(&tasklist_lock); 1263 } 1264 1265 /* 1266 * pivot_root Semantics: 1267 * Moves the root file system of the current process to the directory put_old, 1268 * makes new_root as the new root file system of the current process, and sets 1269 * root/cwd of all processes which had them on the current root to new_root. 1270 * 1271 * Restrictions: 1272 * The new_root and put_old must be directories, and must not be on the 1273 * same file system as the current process root. The put_old must be 1274 * underneath new_root, i.e. adding a non-zero number of /.. to the string 1275 * pointed to by put_old must yield the same directory as new_root. No other 1276 * file system may be mounted on put_old. After all, new_root is a mountpoint. 1277 * 1278 * Notes: 1279 * - we don't move root/cwd if they are not at the root (reason: if something 1280 * cared enough to change them, it's probably wrong to force them elsewhere) 1281 * - it's okay to pick a root that isn't the root of a file system, e.g. 1282 * /nfs/my_root where /nfs is the mount point. It must be a mountpoint, 1283 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root 1284 * first. 1285 */ 1286 1287 asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old) 1288 { 1289 struct vfsmount *tmp; 1290 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1291 int error; 1292 1293 if (!capable(CAP_SYS_ADMIN)) 1294 return -EPERM; 1295 1296 lock_kernel(); 1297 1298 error = __user_walk(new_root, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); 1299 if (error) 1300 goto out0; 1301 error = -EINVAL; 1302 if (!check_mnt(new_nd.mnt)) 1303 goto out1; 1304 1305 error = __user_walk(put_old, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); 1306 if (error) 1307 goto out1; 1308 1309 error = security_sb_pivotroot(&old_nd, &new_nd); 1310 if (error) { 1311 path_release(&old_nd); 1312 goto out1; 1313 } 1314 1315 read_lock(¤t->fs->lock); 1316 user_nd.mnt = mntget(current->fs->rootmnt); 1317 user_nd.dentry = dget(current->fs->root); 1318 read_unlock(¤t->fs->lock); 1319 down_write(¤t->namespace->sem); 1320 down(&old_nd.dentry->d_inode->i_sem); 1321 error = -EINVAL; 1322 if (!check_mnt(user_nd.mnt)) 1323 goto out2; 1324 error = -ENOENT; 1325 if (IS_DEADDIR(new_nd.dentry->d_inode)) 1326 goto out2; 1327 if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) 1328 goto out2; 1329 if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) 1330 goto out2; 1331 error = -EBUSY; 1332 if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt) 1333 goto out2; /* loop, on the same file system */ 1334 error = -EINVAL; 1335 if (user_nd.mnt->mnt_root != user_nd.dentry) 1336 goto out2; /* not a mountpoint */ 1337 if (user_nd.mnt->mnt_parent == user_nd.mnt) 1338 goto out2; /* not attached */ 1339 if (new_nd.mnt->mnt_root != new_nd.dentry) 1340 goto out2; /* not a mountpoint */ 1341 if (new_nd.mnt->mnt_parent == new_nd.mnt) 1342 goto out2; /* not attached */ 1343 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ 1344 spin_lock(&vfsmount_lock); 1345 if (tmp != new_nd.mnt) { 1346 for (;;) { 1347 if (tmp->mnt_parent == tmp) 1348 goto out3; /* already mounted on put_old */ 1349 if (tmp->mnt_parent == new_nd.mnt) 1350 break; 1351 tmp = tmp->mnt_parent; 1352 } 1353 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry)) 1354 goto out3; 1355 } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) 1356 goto out3; 1357 detach_mnt(new_nd.mnt, &parent_nd); 1358 detach_mnt(user_nd.mnt, &root_parent); 1359 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1360 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1361 spin_unlock(&vfsmount_lock); 1362 chroot_fs_refs(&user_nd, &new_nd); 1363 security_sb_post_pivotroot(&user_nd, &new_nd); 1364 error = 0; 1365 path_release(&root_parent); 1366 path_release(&parent_nd); 1367 out2: 1368 up(&old_nd.dentry->d_inode->i_sem); 1369 up_write(¤t->namespace->sem); 1370 path_release(&user_nd); 1371 path_release(&old_nd); 1372 out1: 1373 path_release(&new_nd); 1374 out0: 1375 unlock_kernel(); 1376 return error; 1377 out3: 1378 spin_unlock(&vfsmount_lock); 1379 goto out2; 1380 } 1381 1382 static void __init init_mount_tree(void) 1383 { 1384 struct vfsmount *mnt; 1385 struct namespace *namespace; 1386 struct task_struct *g, *p; 1387 1388 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 1389 if (IS_ERR(mnt)) 1390 panic("Can't create rootfs"); 1391 namespace = kmalloc(sizeof(*namespace), GFP_KERNEL); 1392 if (!namespace) 1393 panic("Can't allocate initial namespace"); 1394 atomic_set(&namespace->count, 1); 1395 INIT_LIST_HEAD(&namespace->list); 1396 init_rwsem(&namespace->sem); 1397 list_add(&mnt->mnt_list, &namespace->list); 1398 namespace->root = mnt; 1399 mnt->mnt_namespace = namespace; 1400 1401 init_task.namespace = namespace; 1402 read_lock(&tasklist_lock); 1403 do_each_thread(g, p) { 1404 get_namespace(namespace); 1405 p->namespace = namespace; 1406 } while_each_thread(g, p); 1407 read_unlock(&tasklist_lock); 1408 1409 set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); 1410 set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); 1411 } 1412 1413 void __init mnt_init(unsigned long mempages) 1414 { 1415 struct list_head *d; 1416 unsigned int nr_hash; 1417 int i; 1418 1419 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 1420 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1421 1422 mount_hashtable = (struct list_head *) 1423 __get_free_page(GFP_ATOMIC); 1424 1425 if (!mount_hashtable) 1426 panic("Failed to allocate mount hash table\n"); 1427 1428 /* 1429 * Find the power-of-two list-heads that can fit into the allocation.. 1430 * We don't guarantee that "sizeof(struct list_head)" is necessarily 1431 * a power-of-two. 1432 */ 1433 nr_hash = PAGE_SIZE / sizeof(struct list_head); 1434 hash_bits = 0; 1435 do { 1436 hash_bits++; 1437 } while ((nr_hash >> hash_bits) != 0); 1438 hash_bits--; 1439 1440 /* 1441 * Re-calculate the actual number of entries and the mask 1442 * from the number of bits we can fit. 1443 */ 1444 nr_hash = 1UL << hash_bits; 1445 hash_mask = nr_hash-1; 1446 1447 printk("Mount-cache hash table entries: %d\n", nr_hash); 1448 1449 /* And initialize the newly allocated array */ 1450 d = mount_hashtable; 1451 i = nr_hash; 1452 do { 1453 INIT_LIST_HEAD(d); 1454 d++; 1455 i--; 1456 } while (i); 1457 sysfs_init(); 1458 init_rootfs(); 1459 init_mount_tree(); 1460 } 1461 1462 void __put_namespace(struct namespace *namespace) 1463 { 1464 struct vfsmount *root = namespace->root; 1465 namespace->root = NULL; 1466 spin_unlock(&vfsmount_lock); 1467 down_write(&namespace->sem); 1468 spin_lock(&vfsmount_lock); 1469 umount_tree(root); 1470 spin_unlock(&vfsmount_lock); 1471 up_write(&namespace->sem); 1472 kfree(namespace); 1473 } 1474