1 /* 2 * linux/fs/namespace.c 3 * 4 * (C) Copyright Al Viro 2000, 2001 5 * Released under GPL v2. 6 * 7 * Based on code from fs/super.c, copyright Linus Torvalds and others. 8 * Heavily rewritten. 9 */ 10 11 #include <linux/config.h> 12 #include <linux/syscalls.h> 13 #include <linux/slab.h> 14 #include <linux/sched.h> 15 #include <linux/smp_lock.h> 16 #include <linux/init.h> 17 #include <linux/quotaops.h> 18 #include <linux/acct.h> 19 #include <linux/module.h> 20 #include <linux/seq_file.h> 21 #include <linux/namespace.h> 22 #include <linux/namei.h> 23 #include <linux/security.h> 24 #include <linux/mount.h> 25 #include <asm/uaccess.h> 26 #include <asm/unistd.h> 27 28 extern int __init init_rootfs(void); 29 30 #ifdef CONFIG_SYSFS 31 extern int __init sysfs_init(void); 32 #else 33 static inline int sysfs_init(void) 34 { 35 return 0; 36 } 37 #endif 38 39 /* spinlock for vfsmount related operations, inplace of dcache_lock */ 40 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); 41 42 static struct list_head *mount_hashtable; 43 static int hash_mask, hash_bits; 44 static kmem_cache_t *mnt_cache; 45 46 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 47 { 48 unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); 49 tmp += ((unsigned long) dentry / L1_CACHE_BYTES); 50 tmp = tmp + (tmp >> hash_bits); 51 return tmp & hash_mask; 52 } 53 54 struct vfsmount *alloc_vfsmnt(const char *name) 55 { 56 struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 57 if (mnt) { 58 memset(mnt, 0, sizeof(struct vfsmount)); 59 atomic_set(&mnt->mnt_count,1); 60 INIT_LIST_HEAD(&mnt->mnt_hash); 61 INIT_LIST_HEAD(&mnt->mnt_child); 62 INIT_LIST_HEAD(&mnt->mnt_mounts); 63 INIT_LIST_HEAD(&mnt->mnt_list); 64 INIT_LIST_HEAD(&mnt->mnt_fslink); 65 if (name) { 66 int size = strlen(name)+1; 67 char *newname = kmalloc(size, GFP_KERNEL); 68 if (newname) { 69 memcpy(newname, name, size); 70 mnt->mnt_devname = newname; 71 } 72 } 73 } 74 return mnt; 75 } 76 77 void free_vfsmnt(struct vfsmount *mnt) 78 { 79 kfree(mnt->mnt_devname); 80 kmem_cache_free(mnt_cache, mnt); 81 } 82 83 /* 84 * Now, lookup_mnt increments the ref count before returning 85 * the vfsmount struct. 86 */ 87 struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 88 { 89 struct list_head * head = mount_hashtable + hash(mnt, dentry); 90 struct list_head * tmp = head; 91 struct vfsmount *p, *found = NULL; 92 93 spin_lock(&vfsmount_lock); 94 for (;;) { 95 tmp = tmp->next; 96 p = NULL; 97 if (tmp == head) 98 break; 99 p = list_entry(tmp, struct vfsmount, mnt_hash); 100 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 101 found = mntget(p); 102 break; 103 } 104 } 105 spin_unlock(&vfsmount_lock); 106 return found; 107 } 108 109 static inline int check_mnt(struct vfsmount *mnt) 110 { 111 return mnt->mnt_namespace == current->namespace; 112 } 113 114 static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) 115 { 116 old_nd->dentry = mnt->mnt_mountpoint; 117 old_nd->mnt = mnt->mnt_parent; 118 mnt->mnt_parent = mnt; 119 mnt->mnt_mountpoint = mnt->mnt_root; 120 list_del_init(&mnt->mnt_child); 121 list_del_init(&mnt->mnt_hash); 122 old_nd->dentry->d_mounted--; 123 } 124 125 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) 126 { 127 mnt->mnt_parent = mntget(nd->mnt); 128 mnt->mnt_mountpoint = dget(nd->dentry); 129 list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); 130 list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts); 131 nd->dentry->d_mounted++; 132 } 133 134 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 135 { 136 struct list_head *next = p->mnt_mounts.next; 137 if (next == &p->mnt_mounts) { 138 while (1) { 139 if (p == root) 140 return NULL; 141 next = p->mnt_child.next; 142 if (next != &p->mnt_parent->mnt_mounts) 143 break; 144 p = p->mnt_parent; 145 } 146 } 147 return list_entry(next, struct vfsmount, mnt_child); 148 } 149 150 static struct vfsmount * 151 clone_mnt(struct vfsmount *old, struct dentry *root) 152 { 153 struct super_block *sb = old->mnt_sb; 154 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 155 156 if (mnt) { 157 mnt->mnt_flags = old->mnt_flags; 158 atomic_inc(&sb->s_active); 159 mnt->mnt_sb = sb; 160 mnt->mnt_root = dget(root); 161 mnt->mnt_mountpoint = mnt->mnt_root; 162 mnt->mnt_parent = mnt; 163 mnt->mnt_namespace = old->mnt_namespace; 164 165 /* stick the duplicate mount on the same expiry list 166 * as the original if that was on one */ 167 spin_lock(&vfsmount_lock); 168 if (!list_empty(&old->mnt_fslink)) 169 list_add(&mnt->mnt_fslink, &old->mnt_fslink); 170 spin_unlock(&vfsmount_lock); 171 } 172 return mnt; 173 } 174 175 void __mntput(struct vfsmount *mnt) 176 { 177 struct super_block *sb = mnt->mnt_sb; 178 dput(mnt->mnt_root); 179 free_vfsmnt(mnt); 180 deactivate_super(sb); 181 } 182 183 EXPORT_SYMBOL(__mntput); 184 185 /* iterator */ 186 static void *m_start(struct seq_file *m, loff_t *pos) 187 { 188 struct namespace *n = m->private; 189 struct list_head *p; 190 loff_t l = *pos; 191 192 down_read(&n->sem); 193 list_for_each(p, &n->list) 194 if (!l--) 195 return list_entry(p, struct vfsmount, mnt_list); 196 return NULL; 197 } 198 199 static void *m_next(struct seq_file *m, void *v, loff_t *pos) 200 { 201 struct namespace *n = m->private; 202 struct list_head *p = ((struct vfsmount *)v)->mnt_list.next; 203 (*pos)++; 204 return p==&n->list ? NULL : list_entry(p, struct vfsmount, mnt_list); 205 } 206 207 static void m_stop(struct seq_file *m, void *v) 208 { 209 struct namespace *n = m->private; 210 up_read(&n->sem); 211 } 212 213 static inline void mangle(struct seq_file *m, const char *s) 214 { 215 seq_escape(m, s, " \t\n\\"); 216 } 217 218 static int show_vfsmnt(struct seq_file *m, void *v) 219 { 220 struct vfsmount *mnt = v; 221 int err = 0; 222 static struct proc_fs_info { 223 int flag; 224 char *str; 225 } fs_info[] = { 226 { MS_SYNCHRONOUS, ",sync" }, 227 { MS_DIRSYNC, ",dirsync" }, 228 { MS_MANDLOCK, ",mand" }, 229 { MS_NOATIME, ",noatime" }, 230 { MS_NODIRATIME, ",nodiratime" }, 231 { 0, NULL } 232 }; 233 static struct proc_fs_info mnt_info[] = { 234 { MNT_NOSUID, ",nosuid" }, 235 { MNT_NODEV, ",nodev" }, 236 { MNT_NOEXEC, ",noexec" }, 237 { 0, NULL } 238 }; 239 struct proc_fs_info *fs_infop; 240 241 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 242 seq_putc(m, ' '); 243 seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); 244 seq_putc(m, ' '); 245 mangle(m, mnt->mnt_sb->s_type->name); 246 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 247 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 248 if (mnt->mnt_sb->s_flags & fs_infop->flag) 249 seq_puts(m, fs_infop->str); 250 } 251 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { 252 if (mnt->mnt_flags & fs_infop->flag) 253 seq_puts(m, fs_infop->str); 254 } 255 if (mnt->mnt_sb->s_op->show_options) 256 err = mnt->mnt_sb->s_op->show_options(m, mnt); 257 seq_puts(m, " 0 0\n"); 258 return err; 259 } 260 261 struct seq_operations mounts_op = { 262 .start = m_start, 263 .next = m_next, 264 .stop = m_stop, 265 .show = show_vfsmnt 266 }; 267 268 /** 269 * may_umount_tree - check if a mount tree is busy 270 * @mnt: root of mount tree 271 * 272 * This is called to check if a tree of mounts has any 273 * open files, pwds, chroots or sub mounts that are 274 * busy. 275 */ 276 int may_umount_tree(struct vfsmount *mnt) 277 { 278 struct list_head *next; 279 struct vfsmount *this_parent = mnt; 280 int actual_refs; 281 int minimum_refs; 282 283 spin_lock(&vfsmount_lock); 284 actual_refs = atomic_read(&mnt->mnt_count); 285 minimum_refs = 2; 286 repeat: 287 next = this_parent->mnt_mounts.next; 288 resume: 289 while (next != &this_parent->mnt_mounts) { 290 struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child); 291 292 next = next->next; 293 294 actual_refs += atomic_read(&p->mnt_count); 295 minimum_refs += 2; 296 297 if (!list_empty(&p->mnt_mounts)) { 298 this_parent = p; 299 goto repeat; 300 } 301 } 302 303 if (this_parent != mnt) { 304 next = this_parent->mnt_child.next; 305 this_parent = this_parent->mnt_parent; 306 goto resume; 307 } 308 spin_unlock(&vfsmount_lock); 309 310 if (actual_refs > minimum_refs) 311 return -EBUSY; 312 313 return 0; 314 } 315 316 EXPORT_SYMBOL(may_umount_tree); 317 318 /** 319 * may_umount - check if a mount point is busy 320 * @mnt: root of mount 321 * 322 * This is called to check if a mount point has any 323 * open files, pwds, chroots or sub mounts. If the 324 * mount has sub mounts this will return busy 325 * regardless of whether the sub mounts are busy. 326 * 327 * Doesn't take quota and stuff into account. IOW, in some cases it will 328 * give false negatives. The main reason why it's here is that we need 329 * a non-destructive way to look for easily umountable filesystems. 330 */ 331 int may_umount(struct vfsmount *mnt) 332 { 333 if (atomic_read(&mnt->mnt_count) > 2) 334 return -EBUSY; 335 return 0; 336 } 337 338 EXPORT_SYMBOL(may_umount); 339 340 static void umount_tree(struct vfsmount *mnt) 341 { 342 struct vfsmount *p; 343 LIST_HEAD(kill); 344 345 for (p = mnt; p; p = next_mnt(p, mnt)) { 346 list_del(&p->mnt_list); 347 list_add(&p->mnt_list, &kill); 348 } 349 350 while (!list_empty(&kill)) { 351 mnt = list_entry(kill.next, struct vfsmount, mnt_list); 352 list_del_init(&mnt->mnt_list); 353 list_del_init(&mnt->mnt_fslink); 354 if (mnt->mnt_parent == mnt) { 355 spin_unlock(&vfsmount_lock); 356 } else { 357 struct nameidata old_nd; 358 detach_mnt(mnt, &old_nd); 359 spin_unlock(&vfsmount_lock); 360 path_release(&old_nd); 361 } 362 mntput(mnt); 363 spin_lock(&vfsmount_lock); 364 } 365 } 366 367 static int do_umount(struct vfsmount *mnt, int flags) 368 { 369 struct super_block * sb = mnt->mnt_sb; 370 int retval; 371 372 retval = security_sb_umount(mnt, flags); 373 if (retval) 374 return retval; 375 376 /* 377 * Allow userspace to request a mountpoint be expired rather than 378 * unmounting unconditionally. Unmount only happens if: 379 * (1) the mark is already set (the mark is cleared by mntput()) 380 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 381 */ 382 if (flags & MNT_EXPIRE) { 383 if (mnt == current->fs->rootmnt || 384 flags & (MNT_FORCE | MNT_DETACH)) 385 return -EINVAL; 386 387 if (atomic_read(&mnt->mnt_count) != 2) 388 return -EBUSY; 389 390 if (!xchg(&mnt->mnt_expiry_mark, 1)) 391 return -EAGAIN; 392 } 393 394 /* 395 * If we may have to abort operations to get out of this 396 * mount, and they will themselves hold resources we must 397 * allow the fs to do things. In the Unix tradition of 398 * 'Gee thats tricky lets do it in userspace' the umount_begin 399 * might fail to complete on the first run through as other tasks 400 * must return, and the like. Thats for the mount program to worry 401 * about for the moment. 402 */ 403 404 lock_kernel(); 405 if( (flags&MNT_FORCE) && sb->s_op->umount_begin) 406 sb->s_op->umount_begin(sb); 407 unlock_kernel(); 408 409 /* 410 * No sense to grab the lock for this test, but test itself looks 411 * somewhat bogus. Suggestions for better replacement? 412 * Ho-hum... In principle, we might treat that as umount + switch 413 * to rootfs. GC would eventually take care of the old vfsmount. 414 * Actually it makes sense, especially if rootfs would contain a 415 * /reboot - static binary that would close all descriptors and 416 * call reboot(9). Then init(8) could umount root and exec /reboot. 417 */ 418 if (mnt == current->fs->rootmnt && !(flags & MNT_DETACH)) { 419 /* 420 * Special case for "unmounting" root ... 421 * we just try to remount it readonly. 422 */ 423 down_write(&sb->s_umount); 424 if (!(sb->s_flags & MS_RDONLY)) { 425 lock_kernel(); 426 DQUOT_OFF(sb); 427 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); 428 unlock_kernel(); 429 } 430 up_write(&sb->s_umount); 431 return retval; 432 } 433 434 down_write(¤t->namespace->sem); 435 spin_lock(&vfsmount_lock); 436 437 if (atomic_read(&sb->s_active) == 1) { 438 /* last instance - try to be smart */ 439 spin_unlock(&vfsmount_lock); 440 lock_kernel(); 441 DQUOT_OFF(sb); 442 acct_auto_close(sb); 443 unlock_kernel(); 444 security_sb_umount_close(mnt); 445 spin_lock(&vfsmount_lock); 446 } 447 retval = -EBUSY; 448 if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) { 449 if (!list_empty(&mnt->mnt_list)) 450 umount_tree(mnt); 451 retval = 0; 452 } 453 spin_unlock(&vfsmount_lock); 454 if (retval) 455 security_sb_umount_busy(mnt); 456 up_write(¤t->namespace->sem); 457 return retval; 458 } 459 460 /* 461 * Now umount can handle mount points as well as block devices. 462 * This is important for filesystems which use unnamed block devices. 463 * 464 * We now support a flag for forced unmount like the other 'big iron' 465 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD 466 */ 467 468 asmlinkage long sys_umount(char __user * name, int flags) 469 { 470 struct nameidata nd; 471 int retval; 472 473 retval = __user_walk(name, LOOKUP_FOLLOW, &nd); 474 if (retval) 475 goto out; 476 retval = -EINVAL; 477 if (nd.dentry != nd.mnt->mnt_root) 478 goto dput_and_out; 479 if (!check_mnt(nd.mnt)) 480 goto dput_and_out; 481 482 retval = -EPERM; 483 if (!capable(CAP_SYS_ADMIN)) 484 goto dput_and_out; 485 486 retval = do_umount(nd.mnt, flags); 487 dput_and_out: 488 path_release_on_umount(&nd); 489 out: 490 return retval; 491 } 492 493 #ifdef __ARCH_WANT_SYS_OLDUMOUNT 494 495 /* 496 * The 2.0 compatible umount. No flags. 497 */ 498 499 asmlinkage long sys_oldumount(char __user * name) 500 { 501 return sys_umount(name,0); 502 } 503 504 #endif 505 506 static int mount_is_safe(struct nameidata *nd) 507 { 508 if (capable(CAP_SYS_ADMIN)) 509 return 0; 510 return -EPERM; 511 #ifdef notyet 512 if (S_ISLNK(nd->dentry->d_inode->i_mode)) 513 return -EPERM; 514 if (nd->dentry->d_inode->i_mode & S_ISVTX) { 515 if (current->uid != nd->dentry->d_inode->i_uid) 516 return -EPERM; 517 } 518 if (permission(nd->dentry->d_inode, MAY_WRITE, nd)) 519 return -EPERM; 520 return 0; 521 #endif 522 } 523 524 static int 525 lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) 526 { 527 while (1) { 528 if (d == dentry) 529 return 1; 530 if (d == NULL || d == d->d_parent) 531 return 0; 532 d = d->d_parent; 533 } 534 } 535 536 static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry) 537 { 538 struct vfsmount *res, *p, *q, *r, *s; 539 struct list_head *h; 540 struct nameidata nd; 541 542 res = q = clone_mnt(mnt, dentry); 543 if (!q) 544 goto Enomem; 545 q->mnt_mountpoint = mnt->mnt_mountpoint; 546 547 p = mnt; 548 for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) { 549 r = list_entry(h, struct vfsmount, mnt_child); 550 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 551 continue; 552 553 for (s = r; s; s = next_mnt(s, r)) { 554 while (p != s->mnt_parent) { 555 p = p->mnt_parent; 556 q = q->mnt_parent; 557 } 558 p = s; 559 nd.mnt = q; 560 nd.dentry = p->mnt_mountpoint; 561 q = clone_mnt(p, p->mnt_root); 562 if (!q) 563 goto Enomem; 564 spin_lock(&vfsmount_lock); 565 list_add_tail(&q->mnt_list, &res->mnt_list); 566 attach_mnt(q, &nd); 567 spin_unlock(&vfsmount_lock); 568 } 569 } 570 return res; 571 Enomem: 572 if (res) { 573 spin_lock(&vfsmount_lock); 574 umount_tree(res); 575 spin_unlock(&vfsmount_lock); 576 } 577 return NULL; 578 } 579 580 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) 581 { 582 int err; 583 if (mnt->mnt_sb->s_flags & MS_NOUSER) 584 return -EINVAL; 585 586 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 587 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 588 return -ENOTDIR; 589 590 err = -ENOENT; 591 down(&nd->dentry->d_inode->i_sem); 592 if (IS_DEADDIR(nd->dentry->d_inode)) 593 goto out_unlock; 594 595 err = security_sb_check_sb(mnt, nd); 596 if (err) 597 goto out_unlock; 598 599 err = -ENOENT; 600 spin_lock(&vfsmount_lock); 601 if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) { 602 struct list_head head; 603 604 attach_mnt(mnt, nd); 605 list_add_tail(&head, &mnt->mnt_list); 606 list_splice(&head, current->namespace->list.prev); 607 mntget(mnt); 608 err = 0; 609 } 610 spin_unlock(&vfsmount_lock); 611 out_unlock: 612 up(&nd->dentry->d_inode->i_sem); 613 if (!err) 614 security_sb_post_addmount(mnt, nd); 615 return err; 616 } 617 618 /* 619 * do loopback mount. 620 */ 621 static int do_loopback(struct nameidata *nd, char *old_name, int recurse) 622 { 623 struct nameidata old_nd; 624 struct vfsmount *mnt = NULL; 625 int err = mount_is_safe(nd); 626 if (err) 627 return err; 628 if (!old_name || !*old_name) 629 return -EINVAL; 630 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 631 if (err) 632 return err; 633 634 down_write(¤t->namespace->sem); 635 err = -EINVAL; 636 if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) { 637 err = -ENOMEM; 638 if (recurse) 639 mnt = copy_tree(old_nd.mnt, old_nd.dentry); 640 else 641 mnt = clone_mnt(old_nd.mnt, old_nd.dentry); 642 } 643 644 if (mnt) { 645 /* stop bind mounts from expiring */ 646 spin_lock(&vfsmount_lock); 647 list_del_init(&mnt->mnt_fslink); 648 spin_unlock(&vfsmount_lock); 649 650 err = graft_tree(mnt, nd); 651 if (err) { 652 spin_lock(&vfsmount_lock); 653 umount_tree(mnt); 654 spin_unlock(&vfsmount_lock); 655 } else 656 mntput(mnt); 657 } 658 659 up_write(¤t->namespace->sem); 660 path_release(&old_nd); 661 return err; 662 } 663 664 /* 665 * change filesystem flags. dir should be a physical root of filesystem. 666 * If you've mounted a non-root directory somewhere and want to do remount 667 * on it - tough luck. 668 */ 669 670 static int do_remount(struct nameidata *nd, int flags, int mnt_flags, 671 void *data) 672 { 673 int err; 674 struct super_block * sb = nd->mnt->mnt_sb; 675 676 if (!capable(CAP_SYS_ADMIN)) 677 return -EPERM; 678 679 if (!check_mnt(nd->mnt)) 680 return -EINVAL; 681 682 if (nd->dentry != nd->mnt->mnt_root) 683 return -EINVAL; 684 685 down_write(&sb->s_umount); 686 err = do_remount_sb(sb, flags, data, 0); 687 if (!err) 688 nd->mnt->mnt_flags=mnt_flags; 689 up_write(&sb->s_umount); 690 if (!err) 691 security_sb_post_remount(nd->mnt, flags, data); 692 return err; 693 } 694 695 static int do_move_mount(struct nameidata *nd, char *old_name) 696 { 697 struct nameidata old_nd, parent_nd; 698 struct vfsmount *p; 699 int err = 0; 700 if (!capable(CAP_SYS_ADMIN)) 701 return -EPERM; 702 if (!old_name || !*old_name) 703 return -EINVAL; 704 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 705 if (err) 706 return err; 707 708 down_write(¤t->namespace->sem); 709 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 710 ; 711 err = -EINVAL; 712 if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) 713 goto out; 714 715 err = -ENOENT; 716 down(&nd->dentry->d_inode->i_sem); 717 if (IS_DEADDIR(nd->dentry->d_inode)) 718 goto out1; 719 720 spin_lock(&vfsmount_lock); 721 if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry)) 722 goto out2; 723 724 err = -EINVAL; 725 if (old_nd.dentry != old_nd.mnt->mnt_root) 726 goto out2; 727 728 if (old_nd.mnt == old_nd.mnt->mnt_parent) 729 goto out2; 730 731 if (S_ISDIR(nd->dentry->d_inode->i_mode) != 732 S_ISDIR(old_nd.dentry->d_inode->i_mode)) 733 goto out2; 734 735 err = -ELOOP; 736 for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent) 737 if (p == old_nd.mnt) 738 goto out2; 739 err = 0; 740 741 detach_mnt(old_nd.mnt, &parent_nd); 742 attach_mnt(old_nd.mnt, nd); 743 744 /* if the mount is moved, it should no longer be expire 745 * automatically */ 746 list_del_init(&old_nd.mnt->mnt_fslink); 747 out2: 748 spin_unlock(&vfsmount_lock); 749 out1: 750 up(&nd->dentry->d_inode->i_sem); 751 out: 752 up_write(¤t->namespace->sem); 753 if (!err) 754 path_release(&parent_nd); 755 path_release(&old_nd); 756 return err; 757 } 758 759 /* 760 * create a new mount for userspace and request it to be added into the 761 * namespace's tree 762 */ 763 static int do_new_mount(struct nameidata *nd, char *type, int flags, 764 int mnt_flags, char *name, void *data) 765 { 766 struct vfsmount *mnt; 767 768 if (!type || !memchr(type, 0, PAGE_SIZE)) 769 return -EINVAL; 770 771 /* we need capabilities... */ 772 if (!capable(CAP_SYS_ADMIN)) 773 return -EPERM; 774 775 mnt = do_kern_mount(type, flags, name, data); 776 if (IS_ERR(mnt)) 777 return PTR_ERR(mnt); 778 779 return do_add_mount(mnt, nd, mnt_flags, NULL); 780 } 781 782 /* 783 * add a mount into a namespace's mount tree 784 * - provide the option of adding the new mount to an expiration list 785 */ 786 int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, 787 int mnt_flags, struct list_head *fslist) 788 { 789 int err; 790 791 down_write(¤t->namespace->sem); 792 /* Something was mounted here while we slept */ 793 while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) 794 ; 795 err = -EINVAL; 796 if (!check_mnt(nd->mnt)) 797 goto unlock; 798 799 /* Refuse the same filesystem on the same mount point */ 800 err = -EBUSY; 801 if (nd->mnt->mnt_sb == newmnt->mnt_sb && 802 nd->mnt->mnt_root == nd->dentry) 803 goto unlock; 804 805 err = -EINVAL; 806 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) 807 goto unlock; 808 809 newmnt->mnt_flags = mnt_flags; 810 err = graft_tree(newmnt, nd); 811 812 if (err == 0 && fslist) { 813 /* add to the specified expiration list */ 814 spin_lock(&vfsmount_lock); 815 list_add_tail(&newmnt->mnt_fslink, fslist); 816 spin_unlock(&vfsmount_lock); 817 } 818 819 unlock: 820 up_write(¤t->namespace->sem); 821 mntput(newmnt); 822 return err; 823 } 824 825 EXPORT_SYMBOL_GPL(do_add_mount); 826 827 /* 828 * process a list of expirable mountpoints with the intent of discarding any 829 * mountpoints that aren't in use and haven't been touched since last we came 830 * here 831 */ 832 void mark_mounts_for_expiry(struct list_head *mounts) 833 { 834 struct namespace *namespace; 835 struct vfsmount *mnt, *next; 836 LIST_HEAD(graveyard); 837 838 if (list_empty(mounts)) 839 return; 840 841 spin_lock(&vfsmount_lock); 842 843 /* extract from the expiration list every vfsmount that matches the 844 * following criteria: 845 * - only referenced by its parent vfsmount 846 * - still marked for expiry (marked on the last call here; marks are 847 * cleared by mntput()) 848 */ 849 list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { 850 if (!xchg(&mnt->mnt_expiry_mark, 1) || 851 atomic_read(&mnt->mnt_count) != 1) 852 continue; 853 854 mntget(mnt); 855 list_move(&mnt->mnt_fslink, &graveyard); 856 } 857 858 /* 859 * go through the vfsmounts we've just consigned to the graveyard to 860 * - check that they're still dead 861 * - delete the vfsmount from the appropriate namespace under lock 862 * - dispose of the corpse 863 */ 864 while (!list_empty(&graveyard)) { 865 mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); 866 list_del_init(&mnt->mnt_fslink); 867 868 /* don't do anything if the namespace is dead - all the 869 * vfsmounts from it are going away anyway */ 870 namespace = mnt->mnt_namespace; 871 if (!namespace || atomic_read(&namespace->count) <= 0) 872 continue; 873 get_namespace(namespace); 874 875 spin_unlock(&vfsmount_lock); 876 down_write(&namespace->sem); 877 spin_lock(&vfsmount_lock); 878 879 /* check that it is still dead: the count should now be 2 - as 880 * contributed by the vfsmount parent and the mntget above */ 881 if (atomic_read(&mnt->mnt_count) == 2) { 882 struct vfsmount *xdmnt; 883 struct dentry *xdentry; 884 885 /* delete from the namespace */ 886 list_del_init(&mnt->mnt_list); 887 list_del_init(&mnt->mnt_child); 888 list_del_init(&mnt->mnt_hash); 889 mnt->mnt_mountpoint->d_mounted--; 890 891 xdentry = mnt->mnt_mountpoint; 892 mnt->mnt_mountpoint = mnt->mnt_root; 893 xdmnt = mnt->mnt_parent; 894 mnt->mnt_parent = mnt; 895 896 spin_unlock(&vfsmount_lock); 897 898 mntput(xdmnt); 899 dput(xdentry); 900 901 /* now lay it to rest if this was the last ref on the 902 * superblock */ 903 if (atomic_read(&mnt->mnt_sb->s_active) == 1) { 904 /* last instance - try to be smart */ 905 lock_kernel(); 906 DQUOT_OFF(mnt->mnt_sb); 907 acct_auto_close(mnt->mnt_sb); 908 unlock_kernel(); 909 } 910 911 mntput(mnt); 912 } else { 913 /* someone brought it back to life whilst we didn't 914 * have any locks held so return it to the expiration 915 * list */ 916 list_add_tail(&mnt->mnt_fslink, mounts); 917 spin_unlock(&vfsmount_lock); 918 } 919 920 up_write(&namespace->sem); 921 922 mntput(mnt); 923 put_namespace(namespace); 924 925 spin_lock(&vfsmount_lock); 926 } 927 928 spin_unlock(&vfsmount_lock); 929 } 930 931 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 932 933 /* 934 * Some copy_from_user() implementations do not return the exact number of 935 * bytes remaining to copy on a fault. But copy_mount_options() requires that. 936 * Note that this function differs from copy_from_user() in that it will oops 937 * on bad values of `to', rather than returning a short copy. 938 */ 939 static long 940 exact_copy_from_user(void *to, const void __user *from, unsigned long n) 941 { 942 char *t = to; 943 const char __user *f = from; 944 char c; 945 946 if (!access_ok(VERIFY_READ, from, n)) 947 return n; 948 949 while (n) { 950 if (__get_user(c, f)) { 951 memset(t, 0, n); 952 break; 953 } 954 *t++ = c; 955 f++; 956 n--; 957 } 958 return n; 959 } 960 961 int copy_mount_options(const void __user *data, unsigned long *where) 962 { 963 int i; 964 unsigned long page; 965 unsigned long size; 966 967 *where = 0; 968 if (!data) 969 return 0; 970 971 if (!(page = __get_free_page(GFP_KERNEL))) 972 return -ENOMEM; 973 974 /* We only care that *some* data at the address the user 975 * gave us is valid. Just in case, we'll zero 976 * the remainder of the page. 977 */ 978 /* copy_from_user cannot cross TASK_SIZE ! */ 979 size = TASK_SIZE - (unsigned long)data; 980 if (size > PAGE_SIZE) 981 size = PAGE_SIZE; 982 983 i = size - exact_copy_from_user((void *)page, data, size); 984 if (!i) { 985 free_page(page); 986 return -EFAULT; 987 } 988 if (i != PAGE_SIZE) 989 memset((char *)page + i, 0, PAGE_SIZE - i); 990 *where = page; 991 return 0; 992 } 993 994 /* 995 * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to 996 * be given to the mount() call (ie: read-only, no-dev, no-suid etc). 997 * 998 * data is a (void *) that can point to any structure up to 999 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent 1000 * information (or be NULL). 1001 * 1002 * Pre-0.97 versions of mount() didn't have a flags word. 1003 * When the flags word was introduced its top half was required 1004 * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. 1005 * Therefore, if this magic number is present, it carries no information 1006 * and must be discarded. 1007 */ 1008 long do_mount(char * dev_name, char * dir_name, char *type_page, 1009 unsigned long flags, void *data_page) 1010 { 1011 struct nameidata nd; 1012 int retval = 0; 1013 int mnt_flags = 0; 1014 1015 /* Discard magic */ 1016 if ((flags & MS_MGC_MSK) == MS_MGC_VAL) 1017 flags &= ~MS_MGC_MSK; 1018 1019 /* Basic sanity checks */ 1020 1021 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) 1022 return -EINVAL; 1023 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) 1024 return -EINVAL; 1025 1026 if (data_page) 1027 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1028 1029 /* Separate the per-mountpoint flags */ 1030 if (flags & MS_NOSUID) 1031 mnt_flags |= MNT_NOSUID; 1032 if (flags & MS_NODEV) 1033 mnt_flags |= MNT_NODEV; 1034 if (flags & MS_NOEXEC) 1035 mnt_flags |= MNT_NOEXEC; 1036 flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); 1037 1038 /* ... and get the mountpoint */ 1039 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1040 if (retval) 1041 return retval; 1042 1043 retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page); 1044 if (retval) 1045 goto dput_out; 1046 1047 if (flags & MS_REMOUNT) 1048 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1049 data_page); 1050 else if (flags & MS_BIND) 1051 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1052 else if (flags & MS_MOVE) 1053 retval = do_move_mount(&nd, dev_name); 1054 else 1055 retval = do_new_mount(&nd, type_page, flags, mnt_flags, 1056 dev_name, data_page); 1057 dput_out: 1058 path_release(&nd); 1059 return retval; 1060 } 1061 1062 int copy_namespace(int flags, struct task_struct *tsk) 1063 { 1064 struct namespace *namespace = tsk->namespace; 1065 struct namespace *new_ns; 1066 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1067 struct fs_struct *fs = tsk->fs; 1068 struct vfsmount *p, *q; 1069 1070 if (!namespace) 1071 return 0; 1072 1073 get_namespace(namespace); 1074 1075 if (!(flags & CLONE_NEWNS)) 1076 return 0; 1077 1078 if (!capable(CAP_SYS_ADMIN)) { 1079 put_namespace(namespace); 1080 return -EPERM; 1081 } 1082 1083 new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL); 1084 if (!new_ns) 1085 goto out; 1086 1087 atomic_set(&new_ns->count, 1); 1088 init_rwsem(&new_ns->sem); 1089 INIT_LIST_HEAD(&new_ns->list); 1090 1091 down_write(&tsk->namespace->sem); 1092 /* First pass: copy the tree topology */ 1093 new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root); 1094 if (!new_ns->root) { 1095 up_write(&tsk->namespace->sem); 1096 kfree(new_ns); 1097 goto out; 1098 } 1099 spin_lock(&vfsmount_lock); 1100 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 1101 spin_unlock(&vfsmount_lock); 1102 1103 /* 1104 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 1105 * as belonging to new namespace. We have already acquired a private 1106 * fs_struct, so tsk->fs->lock is not needed. 1107 */ 1108 p = namespace->root; 1109 q = new_ns->root; 1110 while (p) { 1111 q->mnt_namespace = new_ns; 1112 if (fs) { 1113 if (p == fs->rootmnt) { 1114 rootmnt = p; 1115 fs->rootmnt = mntget(q); 1116 } 1117 if (p == fs->pwdmnt) { 1118 pwdmnt = p; 1119 fs->pwdmnt = mntget(q); 1120 } 1121 if (p == fs->altrootmnt) { 1122 altrootmnt = p; 1123 fs->altrootmnt = mntget(q); 1124 } 1125 } 1126 p = next_mnt(p, namespace->root); 1127 q = next_mnt(q, new_ns->root); 1128 } 1129 up_write(&tsk->namespace->sem); 1130 1131 tsk->namespace = new_ns; 1132 1133 if (rootmnt) 1134 mntput(rootmnt); 1135 if (pwdmnt) 1136 mntput(pwdmnt); 1137 if (altrootmnt) 1138 mntput(altrootmnt); 1139 1140 put_namespace(namespace); 1141 return 0; 1142 1143 out: 1144 put_namespace(namespace); 1145 return -ENOMEM; 1146 } 1147 1148 asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, 1149 char __user * type, unsigned long flags, 1150 void __user * data) 1151 { 1152 int retval; 1153 unsigned long data_page; 1154 unsigned long type_page; 1155 unsigned long dev_page; 1156 char *dir_page; 1157 1158 retval = copy_mount_options (type, &type_page); 1159 if (retval < 0) 1160 return retval; 1161 1162 dir_page = getname(dir_name); 1163 retval = PTR_ERR(dir_page); 1164 if (IS_ERR(dir_page)) 1165 goto out1; 1166 1167 retval = copy_mount_options (dev_name, &dev_page); 1168 if (retval < 0) 1169 goto out2; 1170 1171 retval = copy_mount_options (data, &data_page); 1172 if (retval < 0) 1173 goto out3; 1174 1175 lock_kernel(); 1176 retval = do_mount((char*)dev_page, dir_page, (char*)type_page, 1177 flags, (void*)data_page); 1178 unlock_kernel(); 1179 free_page(data_page); 1180 1181 out3: 1182 free_page(dev_page); 1183 out2: 1184 putname(dir_page); 1185 out1: 1186 free_page(type_page); 1187 return retval; 1188 } 1189 1190 /* 1191 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. 1192 * It can block. Requires the big lock held. 1193 */ 1194 void set_fs_root(struct fs_struct *fs, struct vfsmount *mnt, 1195 struct dentry *dentry) 1196 { 1197 struct dentry *old_root; 1198 struct vfsmount *old_rootmnt; 1199 write_lock(&fs->lock); 1200 old_root = fs->root; 1201 old_rootmnt = fs->rootmnt; 1202 fs->rootmnt = mntget(mnt); 1203 fs->root = dget(dentry); 1204 write_unlock(&fs->lock); 1205 if (old_root) { 1206 dput(old_root); 1207 mntput(old_rootmnt); 1208 } 1209 } 1210 1211 /* 1212 * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. 1213 * It can block. Requires the big lock held. 1214 */ 1215 void set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, 1216 struct dentry *dentry) 1217 { 1218 struct dentry *old_pwd; 1219 struct vfsmount *old_pwdmnt; 1220 1221 write_lock(&fs->lock); 1222 old_pwd = fs->pwd; 1223 old_pwdmnt = fs->pwdmnt; 1224 fs->pwdmnt = mntget(mnt); 1225 fs->pwd = dget(dentry); 1226 write_unlock(&fs->lock); 1227 1228 if (old_pwd) { 1229 dput(old_pwd); 1230 mntput(old_pwdmnt); 1231 } 1232 } 1233 1234 static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) 1235 { 1236 struct task_struct *g, *p; 1237 struct fs_struct *fs; 1238 1239 read_lock(&tasklist_lock); 1240 do_each_thread(g, p) { 1241 task_lock(p); 1242 fs = p->fs; 1243 if (fs) { 1244 atomic_inc(&fs->count); 1245 task_unlock(p); 1246 if (fs->root==old_nd->dentry&&fs->rootmnt==old_nd->mnt) 1247 set_fs_root(fs, new_nd->mnt, new_nd->dentry); 1248 if (fs->pwd==old_nd->dentry&&fs->pwdmnt==old_nd->mnt) 1249 set_fs_pwd(fs, new_nd->mnt, new_nd->dentry); 1250 put_fs_struct(fs); 1251 } else 1252 task_unlock(p); 1253 } while_each_thread(g, p); 1254 read_unlock(&tasklist_lock); 1255 } 1256 1257 /* 1258 * pivot_root Semantics: 1259 * Moves the root file system of the current process to the directory put_old, 1260 * makes new_root as the new root file system of the current process, and sets 1261 * root/cwd of all processes which had them on the current root to new_root. 1262 * 1263 * Restrictions: 1264 * The new_root and put_old must be directories, and must not be on the 1265 * same file system as the current process root. The put_old must be 1266 * underneath new_root, i.e. adding a non-zero number of /.. to the string 1267 * pointed to by put_old must yield the same directory as new_root. No other 1268 * file system may be mounted on put_old. After all, new_root is a mountpoint. 1269 * 1270 * Notes: 1271 * - we don't move root/cwd if they are not at the root (reason: if something 1272 * cared enough to change them, it's probably wrong to force them elsewhere) 1273 * - it's okay to pick a root that isn't the root of a file system, e.g. 1274 * /nfs/my_root where /nfs is the mount point. It must be a mountpoint, 1275 * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root 1276 * first. 1277 */ 1278 1279 asmlinkage long sys_pivot_root(const char __user *new_root, const char __user *put_old) 1280 { 1281 struct vfsmount *tmp; 1282 struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; 1283 int error; 1284 1285 if (!capable(CAP_SYS_ADMIN)) 1286 return -EPERM; 1287 1288 lock_kernel(); 1289 1290 error = __user_walk(new_root, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); 1291 if (error) 1292 goto out0; 1293 error = -EINVAL; 1294 if (!check_mnt(new_nd.mnt)) 1295 goto out1; 1296 1297 error = __user_walk(put_old, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); 1298 if (error) 1299 goto out1; 1300 1301 error = security_sb_pivotroot(&old_nd, &new_nd); 1302 if (error) { 1303 path_release(&old_nd); 1304 goto out1; 1305 } 1306 1307 read_lock(¤t->fs->lock); 1308 user_nd.mnt = mntget(current->fs->rootmnt); 1309 user_nd.dentry = dget(current->fs->root); 1310 read_unlock(¤t->fs->lock); 1311 down_write(¤t->namespace->sem); 1312 down(&old_nd.dentry->d_inode->i_sem); 1313 error = -EINVAL; 1314 if (!check_mnt(user_nd.mnt)) 1315 goto out2; 1316 error = -ENOENT; 1317 if (IS_DEADDIR(new_nd.dentry->d_inode)) 1318 goto out2; 1319 if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry)) 1320 goto out2; 1321 if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry)) 1322 goto out2; 1323 error = -EBUSY; 1324 if (new_nd.mnt == user_nd.mnt || old_nd.mnt == user_nd.mnt) 1325 goto out2; /* loop, on the same file system */ 1326 error = -EINVAL; 1327 if (user_nd.mnt->mnt_root != user_nd.dentry) 1328 goto out2; /* not a mountpoint */ 1329 if (new_nd.mnt->mnt_root != new_nd.dentry) 1330 goto out2; /* not a mountpoint */ 1331 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */ 1332 spin_lock(&vfsmount_lock); 1333 if (tmp != new_nd.mnt) { 1334 for (;;) { 1335 if (tmp->mnt_parent == tmp) 1336 goto out3; /* already mounted on put_old */ 1337 if (tmp->mnt_parent == new_nd.mnt) 1338 break; 1339 tmp = tmp->mnt_parent; 1340 } 1341 if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry)) 1342 goto out3; 1343 } else if (!is_subdir(old_nd.dentry, new_nd.dentry)) 1344 goto out3; 1345 detach_mnt(new_nd.mnt, &parent_nd); 1346 detach_mnt(user_nd.mnt, &root_parent); 1347 attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */ 1348 attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */ 1349 spin_unlock(&vfsmount_lock); 1350 chroot_fs_refs(&user_nd, &new_nd); 1351 security_sb_post_pivotroot(&user_nd, &new_nd); 1352 error = 0; 1353 path_release(&root_parent); 1354 path_release(&parent_nd); 1355 out2: 1356 up(&old_nd.dentry->d_inode->i_sem); 1357 up_write(¤t->namespace->sem); 1358 path_release(&user_nd); 1359 path_release(&old_nd); 1360 out1: 1361 path_release(&new_nd); 1362 out0: 1363 unlock_kernel(); 1364 return error; 1365 out3: 1366 spin_unlock(&vfsmount_lock); 1367 goto out2; 1368 } 1369 1370 static void __init init_mount_tree(void) 1371 { 1372 struct vfsmount *mnt; 1373 struct namespace *namespace; 1374 struct task_struct *g, *p; 1375 1376 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 1377 if (IS_ERR(mnt)) 1378 panic("Can't create rootfs"); 1379 namespace = kmalloc(sizeof(*namespace), GFP_KERNEL); 1380 if (!namespace) 1381 panic("Can't allocate initial namespace"); 1382 atomic_set(&namespace->count, 1); 1383 INIT_LIST_HEAD(&namespace->list); 1384 init_rwsem(&namespace->sem); 1385 list_add(&mnt->mnt_list, &namespace->list); 1386 namespace->root = mnt; 1387 mnt->mnt_namespace = namespace; 1388 1389 init_task.namespace = namespace; 1390 read_lock(&tasklist_lock); 1391 do_each_thread(g, p) { 1392 get_namespace(namespace); 1393 p->namespace = namespace; 1394 } while_each_thread(g, p); 1395 read_unlock(&tasklist_lock); 1396 1397 set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root); 1398 set_fs_root(current->fs, namespace->root, namespace->root->mnt_root); 1399 } 1400 1401 void __init mnt_init(unsigned long mempages) 1402 { 1403 struct list_head *d; 1404 unsigned int nr_hash; 1405 int i; 1406 1407 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 1408 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1409 1410 mount_hashtable = (struct list_head *) 1411 __get_free_page(GFP_ATOMIC); 1412 1413 if (!mount_hashtable) 1414 panic("Failed to allocate mount hash table\n"); 1415 1416 /* 1417 * Find the power-of-two list-heads that can fit into the allocation.. 1418 * We don't guarantee that "sizeof(struct list_head)" is necessarily 1419 * a power-of-two. 1420 */ 1421 nr_hash = PAGE_SIZE / sizeof(struct list_head); 1422 hash_bits = 0; 1423 do { 1424 hash_bits++; 1425 } while ((nr_hash >> hash_bits) != 0); 1426 hash_bits--; 1427 1428 /* 1429 * Re-calculate the actual number of entries and the mask 1430 * from the number of bits we can fit. 1431 */ 1432 nr_hash = 1UL << hash_bits; 1433 hash_mask = nr_hash-1; 1434 1435 printk("Mount-cache hash table entries: %d\n", nr_hash); 1436 1437 /* And initialize the newly allocated array */ 1438 d = mount_hashtable; 1439 i = nr_hash; 1440 do { 1441 INIT_LIST_HEAD(d); 1442 d++; 1443 i--; 1444 } while (i); 1445 sysfs_init(); 1446 init_rootfs(); 1447 init_mount_tree(); 1448 } 1449 1450 void __put_namespace(struct namespace *namespace) 1451 { 1452 struct vfsmount *mnt; 1453 1454 down_write(&namespace->sem); 1455 spin_lock(&vfsmount_lock); 1456 1457 list_for_each_entry(mnt, &namespace->list, mnt_list) { 1458 mnt->mnt_namespace = NULL; 1459 } 1460 1461 umount_tree(namespace->root); 1462 spin_unlock(&vfsmount_lock); 1463 up_write(&namespace->sem); 1464 kfree(namespace); 1465 } 1466